/****************************************************************************
**  CUBE        http://www.scalasca.org/                                   **
*****************************************************************************
**  Copyright (c) 2023-2025                                                **
**  Forschungszentrum Juelich GmbH, Juelich Supercomputing Centre          **
**                                                                         **
**  This software may be modified and distributed under the terms of       **
**  a BSD-style license.  See the COPYING file in the package base         **
**  directory for details.                                                 **
****************************************************************************/


#include "config.h"
#include <algorithm>
#include <vector>
#include <utility>
#include <stdint.h>
#include <iostream>
#include <sstream>
#include <set>
#include <string>
#include <future>

#include <CubeCnode.h>
#include "CubeIoProxy.h"
#include "POPCalculation.h"
#include "POPServerRequest.h"
#include "PerformanceAnalysis.h"
#include "PerformanceTest.h"
#include "POPAuditAnalysis.h"
#include "POPHybridAuditAnalysisAdd.h"
#include "POPHybridAuditAnalysis.h"
#include "BSPOPHybridAuditAnalysis.h"


using json = nlohmann::json;

struct CnodeCmp
{
    bool
    operator()( const cube::Cnode* lhs, const cube::Cnode* rhs ) const
    {
        return lhs->get_id() < rhs->get_id();
    }
};


static
void
add_kernel( const cube::Cnode* _c, cube::list_of_cnodes& new_cnodes, const std::set<const cube::Cnode*, CnodeCmp >& additional_cnodes_pool )
{
    std::vector < std::pair < std::string, double> > _parameters = _c->numeric_parameters();
    std::for_each( _parameters.begin(), _parameters.end(),  [ &additional_cnodes_pool, &new_cnodes, &_c ]( std::pair < std::string, double>& _cp )
    {
        if ( _cp.first == "callsite id" )
        {
            std::for_each( additional_cnodes_pool.begin(), additional_cnodes_pool.end(),  [ &new_cnodes, &_cp, &_c ]( const cube::Cnode* _local_c )
            {
                //check if alreaddy added into the lists
                bool added = std::any_of( new_cnodes.begin(), new_cnodes.end(), [ & ]( const cube::cnode_pair& _cp ) {
                    return _c == _cp.first;
                } );

                if ( !added ) // check if parameters fit, if yes -> add
                {
                    std::vector < std::pair < std::string, double> > _local_parameters = _local_c->numeric_parameters();
                    std::for_each( _local_parameters.begin(), _local_parameters.end(),  [ &new_cnodes, &_cp, &_local_c ]( std::pair < std::string, double>& _local_cp )
                    {
                        if ( _local_cp.second  == _cp.second )
                        {
                            cube::cnode_pair cpair;
                            cpair.first = const_cast<cube::Cnode*>( _local_c );
                            cpair.second = cube::CUBE_CALCULATE_INCLUSIVE;
                            new_cnodes.push_back( cpair );
                            return;
                        }
                    } );
                }
            }
                           );
        }
    }
                   );
    std::for_each( _c->get_children().begin(), _c->get_children().end(),  [ &additional_cnodes_pool, &new_cnodes ]( const cube::Vertex* _local_c )
    {
        add_kernel( static_cast<const cube::Cnode*>( _local_c ), new_cnodes, additional_cnodes_pool );
    }
                   );
}

/*
   To create the final list of nodes for calculation:
    We look through the existing list of computing nodes (cnodes).
    We find and add all related processes (spawned kernels) that were started by those nodes.
    We remove any duplicates so each item is listed only once.
    We get rid of any placeholder or temporary nodes (artificial cnodes).
    Result: We now have the corrected and complete list of cnodes needed to perform the calculation.
 */
void
popcalculation::POPCalculation::correctCnodes( cube::CubeProxy*      cube,
                                               cube::list_of_cnodes& _cnodes )
{
    const std::vector<cube::Cnode*> all_cnodev_    = cube->getCnodes();
    const cube::Cnode*              _local_kernels = nullptr;
    std::for_each( all_cnodev_.begin(), all_cnodev_.end(), [ &_local_kernels ]( const cube::Cnode* _r )
    {
        if ( _r->get_callee()->get_name() == "KERNELS" )
        {
            _local_kernels = _r;
        }
    } );
    if ( _local_kernels == nullptr ) // no kernels
    {
        return;
    }


    std::set<const cube::Cnode*, CnodeCmp > additional_cnodes_pool;
    // std::vector<const cube::Cnode*>         _kernels;
    std::for_each( _local_kernels->get_children().begin(), _local_kernels->get_children().end(), [ &additional_cnodes_pool ]( const cube::Vertex* _c )
    {
        additional_cnodes_pool.insert( static_cast<const cube::Cnode*>( _c ) );
    } );

    // here we have set to collect

    cube::list_of_cnodes new_cnodes;
    std::for_each( _cnodes.begin(), _cnodes.end(), [ &new_cnodes ]( const cube::cnode_pair& cp )
    {
        if ( cp.first->get_callee()->get_role() == "artificial" )
        {
            std::vector<cube::Vertex*> _children = cp.first->get_children();
            std::for_each( _children.begin(), _children.end(), [ &new_cnodes ]( cube::Vertex*& _l_vertex )
            {
                cube::Cnode* _l_cnode = static_cast<cube::Cnode*>( _l_vertex );
                if ( _l_cnode->get_callee()->get_role() != "artificial" )
                {
                    new_cnodes.push_back(  std::make_pair( _l_cnode, cube::CUBE_CALCULATE_INCLUSIVE ) );
                }
            } );
        }
    } );


    std::for_each( _cnodes.begin(), _cnodes.end(), [ &new_cnodes, &additional_cnodes_pool ]( const cube::cnode_pair& cp )
    {
        if ( cp.second == cube::CUBE_CALCULATE_INCLUSIVE )
        {
            add_kernel( cp.first, new_cnodes, additional_cnodes_pool );
        }
    } );


    std::for_each( new_cnodes.begin(), new_cnodes.end(), [ &_cnodes ]( cube::cnode_pair& cp )
    {
        bool already = std::any_of( _cnodes.begin(), _cnodes.end(), [ & ]( const cube::cnode_pair& _cp ) {
            return cp.first == _cp.first;
        } );

        if ( !already && cp.first->get_callee()->get_role() != "artificial" )  // we add only once ...
        {
            _cnodes.push_back( cp );
        }
    } );

    // now remove cnodes, which were selected as artificial
    _cnodes.erase( std::remove_if( _cnodes.begin(),
                                   _cnodes.end(),
                                   [ ]( cube::cnode_pair& cp ) {
        return cp.first->get_callee()->get_role() == "artificial";
    } ),
                   _cnodes.end() );
}




std::vector<unsigned char>
popcalculation::POPCalculation::calculate( const std::vector<unsigned char>& request,
                                           cube::CubeIoProxy*                cube )
{
    popcalculation::PerformanceAnalysis* performance_analysis = nullptr;

    std::vector<cube::Cnode*> cnodev_ = cube->getCnodes();
    std::string               stringFromClient { request.begin(), request.end() };

    json j           = json::parse( stringFromClient );
    auto pop_request = j.template get<popserver_request::POPServerRequest>();



    switch ( pop_request.pop_analysis )
    {
        case HYBRID_ADD:
            performance_analysis = new hybaddanalysis::POPHybridAuditPerformanceAnalysisAdd( cube );
            break;
        case HYBRID_MULT:
            performance_analysis = new hybanalysis::POPHybridAuditPerformanceAnalysis( cube );
            break;
        case HYBRID_BSC:
            performance_analysis = new bscanalysis::BSPOPHybridAuditPerformanceAnalysis( cube );
            break;
        case PURE_MPI:
        default:
            performance_analysis = new mpianalysis::POPAuditPerformanceAnalysis( cube );
            break;
    }




    if ( pop_request.operation == popserver_request::TEST )
    {
        std::stringstream mstr;
        std::string       result_message;
        if ( performance_analysis->isActive() )
        {
            mstr << performance_analysis->name()  << " is active.";
        }
        else
        {
            mstr <<  "[WARNING] Seems that POP Analysis \"" << performance_analysis->name() << "\" cannot be used with this profile. Interpret the result with care.";
        }
        result_message = mstr.str();

        popserver_request::POPServerAnswer pop_answer;
        pop_answer.analysis_active  = ( performance_analysis->isActive() ) ? popserver_request::OK : popserver_request::NOOP;
        pop_answer.analysis_message = result_message;

        std::stringstream sstr;

        json j3 = pop_answer;
        sstr << j3;
        std::string json_answer = sstr.str();

        std::vector<unsigned char> messageFromServer;
        messageFromServer.insert( messageFromServer.begin(), json_answer.begin(), json_answer.end() );

        delete performance_analysis;
        return messageFromServer;
    }




    std::list<popcalculation::PerformanceTest*> pop_metrics     = performance_analysis->getPOPTests();
    std::list<popcalculation::PerformanceTest*> gpu_metrics     = performance_analysis->getGPUTests();
    std::list<popcalculation::PerformanceTest*> io_metrics      = performance_analysis->getIOTests();
    std::list<popcalculation::PerformanceTest*> add_metrics     = performance_analysis->getAdditionalTests();
    std::list<popcalculation::PerformanceTest*> control_metrics = performance_analysis->getControlTests();

    cube::list_of_cnodes _cnodes;
    for ( size_t i = 0; i < pop_request.cnodes.size(); ++i )
    {
        _cnodes.push_back(   std::make_pair( cnodev_[ pop_request.cnodes[ i ] ], cube::CalculationFlavour( pop_request.state[ i ] ) ) );
    }

    correctCnodes( cube, _cnodes );



// ----   ASYNC CALCULATION
    std::list<popcalculation::PerformanceTest*> all_tests = performance_analysis->getAllTestsForCalculation();

    std::vector<std::future<void> > calculation_done;
    auto                            asynx_calculate_metric = [ ]( const cube::list_of_cnodes& c,  popcalculation::PerformanceTest* t )
                                                             {
                                                                 if ( t->isActive() )
                                                                 {
                                                                     t->apply( c );
                                                                 }
                                                             };

    std::for_each( all_tests.begin(), all_tests.end(), [ &_cnodes, &calculation_done, &asynx_calculate_metric ]( popcalculation::PerformanceTest* t )
    {
        calculation_done.push_back(  std::async( std::launch::async, asynx_calculate_metric, _cnodes, t )  );
    } );
    std::for_each( calculation_done.begin(), calculation_done.end(), [ ]( std::future<void>& _f )
    {
        _f.get();     // waiting for the result
    } );
// ---- DONE, NOW COLLECT RESULTS






    popserver_request::POPServerAnswer pop_answer;
    pop_answer.pop_analysis_help = performance_analysis->getAnchorHowToMeasure();

    auto calculate_pop_metric = [ &pop_answer ]( popcalculation::PerformanceTest* t )
                                {
                                    pop_answer.pop_metric_names.push_back( t->name() );
                                    pop_answer.pop_metrics_helps.push_back( t->getHelpUrl() );
                                    if ( t != nullptr &&  t->isActive() )
                                    {
                                        double v = t->value();
                                        pop_answer.pop_values.push_back( v );
                                    }
                                    else
                                    {
                                        pop_answer.pop_values.push_back( -1 );
                                    }
                                };

    std::for_each( pop_metrics.crbegin(), pop_metrics.crend(), calculate_pop_metric );

    auto calculate_gpu_metric = [ &pop_answer ]( popcalculation::PerformanceTest* t )
                                {
                                    pop_answer.gpu_metric_names.push_back( t->name() );
                                    pop_answer.gpu_metrics_helps.push_back( t->getHelpUrl() );
                                    if ( t != nullptr &&  t->isActive() )
                                    {
                                        double v = t->value();

                                        if ( std::isnan( v ) )
                                        {
                                            pop_answer.gpu_values.push_back( -1 );
                                        }
                                        else
                                        {
                                            pop_answer.gpu_values.push_back( v );
                                        }
                                    }
                                    else
                                    {
                                        pop_answer.gpu_values.push_back( -1 );
                                    }
                                };

    std::for_each( gpu_metrics.crbegin(), gpu_metrics.crend(), calculate_gpu_metric );


    auto calculate_io_metric = [ &pop_answer ]( popcalculation::PerformanceTest* t )
                               {
                                   pop_answer.io_metric_names.push_back( t->name() );
                                   pop_answer.io_metrics_helps.push_back( t->getHelpUrl() );
                                   if ( t->isActive() )
                                   {
                                       double v = t->value();
                                       pop_answer.io_values.push_back( v );
                                   }
                                   else
                                   {
                                       pop_answer.io_values.push_back( -1 );
                                   }
                               };

    std::for_each( io_metrics.crbegin(), io_metrics.crend(), calculate_io_metric );
    auto calculate_add_metric = [ &pop_answer ]( popcalculation::PerformanceTest* t )
                                {
                                    pop_answer.additional_metric_names.push_back( t->name() );
                                    pop_answer.additional_metrics_helps.push_back( t->getHelpUrl() );
                                    if ( t->isActive() )
                                    {
                                        double v = t->value();
                                        pop_answer.additional_values.push_back( v );
                                    }
                                    else
                                    {
                                        pop_answer.additional_values.push_back( -1 );
                                    }
                                };

    std::for_each( add_metrics.crbegin(), add_metrics.crend(), calculate_add_metric );
    auto calculate_control_metric = [ &pop_answer ]( popcalculation::PerformanceTest* t )
                                    {
                                        pop_answer.control_metric_names.push_back( t->name() + ", min" );
                                        pop_answer.control_metric_names.push_back( t->name() + ", avg" );
                                        pop_answer.control_metric_names.push_back( t->name() + ", max" );
                                        pop_answer.control_metrics_helps.push_back( t->getHelpUrl() );
                                        pop_answer.control_metrics_helps.push_back( "" );
                                        pop_answer.control_metrics_helps.push_back( "" );
                                        if ( t->isActive() )
                                        {
                                            double min_v = t->min_value();
                                            double v     = t->value();
                                            double max_v = t->max_value();
                                            pop_answer.control_values.push_back( min_v );
                                            pop_answer.control_values.push_back( v );
                                            pop_answer.control_values.push_back( max_v );
                                        }
                                        else
                                        {
                                            pop_answer.control_values.push_back( -1 );
                                            pop_answer.control_values.push_back( -1 );
                                            pop_answer.control_values.push_back( -1 );
                                        }
                                    };

    std::for_each( control_metrics.crbegin(), control_metrics.crend(), calculate_control_metric );

    // return corrected list of cnodes
    std::for_each(  _cnodes.begin(), _cnodes.end(), [ &pop_answer ]( cube::cnode_pair& cp ){
        pop_answer.cnodes.push_back( cp.first->get_id() );
        pop_answer.state.push_back( cp.second );
    } );

    json j3 = pop_answer;

    std::stringstream sstr;
    sstr << j3;
    std::string                json_answer = sstr.str();
    std::vector<unsigned char> messageFromServer;
    messageFromServer.insert( messageFromServer.begin(), json_answer.begin(), json_answer.end() );


    delete performance_analysis;
    return messageFromServer;
}

size_t
popcalculation::POPCalculation::get_num_thread_children( const cube::LocationGroup* _s )
{
    size_t t = 0;
    std::for_each( _s->get_children().begin(), _s->get_children().end(),  [ &t ]( const cube::Vertex* _local_s )
    {
        const cube::Location* _l_s = static_cast<const cube::Location*>( _local_s );
        if ( _l_s->get_type()  == cube::CUBE_LOCATION_TYPE_CPU_THREAD )
        {
            t++;
        }
    }
                   );
    return t;
}
