/****************************************************************************
**  CUBE        http://www.scalasca.org/                                   **
*****************************************************************************
**  Copyright (c) 2015-2024                                                **
**  Forschungszentrum Juelich GmbH, Juelich Supercomputing Centre          **
**                                                                         **
**  This software may be modified and distributed under the terms of       **
**  a BSD-style license.  See the COPYING file in the package base         **
**  directory for details.                                                 **
****************************************************************************/


#include <config.h>
#include <algorithm>
#include <future>
#include <thread>
#include "POPHybridProcessEfficiencyTest.h"
#include "POPCalculation.h"

using namespace hybanalysis;

POPHybridProcessEfficiencyTest::POPHybridProcessEfficiencyTest( cube::CubeProxy*                      cube,
                                                                POPHybridImbalanceTest*               _pop_lb,
                                                                POPHybridCommunicationEfficiencyTest* _pop_commeff ) :
    popcalculation::PerformanceTest( cube ),
    pop_lb( _pop_lb ),
    pop_commeff( _pop_commeff )

{
    setName( " * Process Efficiency" );
    setWeight( 1 );   // need to be adjusted
    if ( pop_lb == nullptr || pop_commeff == nullptr )
    {
        setWeight( 0.2 );
        setValue( 0. );
        return;
    }
    max_runtime = cube->getMetric( "max_runtime" );
    if ( max_runtime == nullptr )
    {
        adjustForTest( cube );
    }
    max_runtime = cube->getMetric( "max_runtime" );
    if ( max_runtime == nullptr )
    {
        setWeight( 0.1 );
        setValue( 0. );
        return;
    }

    pop_avg_omp      = cube->getMetric( "max_omp_time" );
    pop_avg_ser_comp = cube->getMetric( "ser_comp_time" );

    cube::metric_pair metric;
    metric.first  = max_runtime;
    metric.second = cube::CUBE_CALCULATE_INCLUSIVE;
    lmetrics.push_back( metric );

    metric.first  = pop_avg_omp;
    metric.second = cube::CUBE_CALCULATE_INCLUSIVE;
    lavg_omp_metrics.push_back( metric );

    metric.first  = pop_avg_ser_comp;
    metric.second = cube::CUBE_CALCULATE_INCLUSIVE;
    lavg_ser_metrics.push_back( metric );
}


void
POPHybridProcessEfficiencyTest::calculate( const cube::list_of_cnodes& cnodes )
{
    if ( max_runtime == nullptr )
    {
        return;
    }
    cube::value_container inclusive_values1;
    cube::value_container exclusive_values1;
    cube->getSystemTreeValues( lmetrics,
                               cnodes,
                               inclusive_values1,
                               exclusive_values1 );

    cube::value_container inclusive_values2;
    cube::value_container exclusive_values2;
    cube->getSystemTreeValues( lavg_omp_metrics,
                               cnodes,
                               inclusive_values2,
                               exclusive_values2 );

    cube::value_container inclusive_values3;
    cube::value_container exclusive_values3;
    cube->getSystemTreeValues( lavg_ser_metrics,
                               cnodes,
                               inclusive_values3,
                               exclusive_values3 );

    double max_runtime = inclusive_values1[ 0 ]->getDouble();

    const std::vector<cube::LocationGroup*>& _lgs              = cube->getLocationGroups();
    double                                   _avg_ser_comp_sum = 0.;
    double                                   _avg_omp_sum      = 0.;
    for ( std::vector<cube::LocationGroup*>::const_iterator iter = _lgs.begin(); iter != _lgs.end(); ++iter )
    {
        if ( ( *iter )->get_type() != cube::CUBE_LOCATION_GROUP_TYPE_PROCESS )
        {
            continue;
        }
        double _v1 =  inclusive_values3[ ( *iter )->get_sys_id() ]->getDouble();
        _avg_ser_comp_sum +=   _v1 * popcalculation::POPCalculation::get_num_thread_children( ( *iter ) );
        ;
        double _v2 =  inclusive_values2[ ( *iter )->get_sys_id() ]->getDouble();
        _avg_omp_sum +=   _v2 * popcalculation::POPCalculation::get_num_thread_children( ( *iter ) );
        ;
    }


    size_t cpu_locs_num = get_number_of_cpu_locations();

    double pop_avg_ser_comp_value = _avg_ser_comp_sum / cpu_locs_num;
    double pop_avg_omp_value      = _avg_omp_sum / cpu_locs_num;


    std::for_each( inclusive_values1.begin(), inclusive_values1.end(),  [ ]( cube::Value* element ){
        delete element;
    } );
    std::for_each( exclusive_values1.begin(), exclusive_values1.end(),  [ ]( cube::Value* element ){
        delete element;
    } );
    std::for_each( inclusive_values2.begin(), inclusive_values2.end(),  [ ]( cube::Value* element ){
        delete element;
    } );
    std::for_each( exclusive_values2.begin(), exclusive_values2.end(),  [ ]( cube::Value* element ){
        delete element;
    } );
    std::for_each( inclusive_values3.begin(), inclusive_values3.end(),  [ ]( cube::Value* element ){
        delete element;
    } );
    std::for_each( exclusive_values3.begin(), exclusive_values3.end(),  [ ]( cube::Value* element ){
        delete element;
    } );




    setValue( ( pop_avg_omp_value + pop_avg_ser_comp_value ) / ( max_runtime ) );
}


void
POPHybridProcessEfficiencyTest::calculate()
{
    if ( pop_lb == nullptr || pop_commeff == nullptr )
    {
        return;
    }
    double lb_eff_value   = pop_lb->value();
    double comm_eff_value = pop_commeff->value();
    setValue( ( ( pop_lb->isActive() ) ? lb_eff_value : 1. ) *
              ( ( pop_commeff->isActive() ) ? comm_eff_value : 1. ) );
}


static
void
lb_task_full_( POPHybridImbalanceTest*     _pop_lb,
               const cube::list_of_cnodes& cnodes )
{
    if ( _pop_lb )
    {
        _pop_lb->applyCnode( cnodes );
    }
}

static
void
comm_task_full_( POPHybridCommunicationEfficiencyTest* _pop_commeff,
                 const cube::list_of_cnodes&           cnodes  )
{
    if ( _pop_commeff )
    {
        _pop_commeff->applyCnode( cnodes );
    }
}


void
POPHybridProcessEfficiencyTest::applyCnode( const cube::list_of_cnodes& cnodes,
                                            const bool                  direct_calculation  )
{
    if ( direct_calculation )
    {
        calculate( cnodes );
    }
    else
    {
        auto lb_task_result   = std::async( std::launch::async, lb_task_full_, pop_lb, cnodes );
        auto comm_task_result = std::async( std::launch::async, comm_task_full_, pop_commeff, cnodes );

        lb_task_result.get();
        comm_task_result.get();


        calculate();
    }
}


const std::string&
POPHybridProcessEfficiencyTest::getCommentText() const
{
    return no_comment;
}


// ------ overview tests ---------

bool
POPHybridProcessEfficiencyTest::isActive() const
{
    return pop_lb != nullptr &&
           pop_commeff != nullptr;
};

bool
POPHybridProcessEfficiencyTest::isIssue() const
{
    return false;
};



void
POPHybridProcessEfficiencyTest::adjustForTest( cube::CubeProxy* cube ) const
{
    ( void )cube;
}
