/****************************************************************************
**  CUBE        http://www.scalasca.org/                                   **
*****************************************************************************
**  Copyright (c) 2015-2025                                                **
**  Forschungszentrum Juelich GmbH, Juelich Supercomputing Centre          **
**                                                                         **
**  This software may be modified and distributed under the terms of       **
**  a BSD-style license.  See the COPYING file in the package base         **
**  directory for details.                                                 **
****************************************************************************/


#include <config.h>
#include <limits>
#include <algorithm>

#include "POPGPUImbalanceTest.h"


using namespace popcalculation;

POPGPUImbalanceTest::POPGPUImbalanceTest( cube::CubeProxy* cube ) : popcalculation::PerformanceTest( cube )
{
    setName(  " * GPU Load Balance Efficiency" );
    setWeight( 1 );   // need to be adjusted
    all_kernels_executions = cube->getMetric( "all_kernels_executions" );
    if ( all_kernels_executions == nullptr )
    {
        adjustForTest( cube );
    }
    all_kernels_executions = cube->getMetric( "all_kernels_executions" );
    if ( all_kernels_executions == nullptr )
    {
        setWeight( 0.1 );
        setValue( 0. );
        return;
    }
    cube::list_of_sysresources lsysres = getRootsOfSystemTree();
    cube::metric_pair          metric;
    metric.first  = all_kernels_executions;
    metric.second = cube::CUBE_CALCULATE_INCLUSIVE;
    lmetrics.push_back( metric );
}



double
POPGPUImbalanceTest::analyze( const cube::list_of_cnodes& cnodes,
                              cube::LocationGroup*          ) const
{
    if ( all_kernels_executions == nullptr )
    {
        return 0.;
    }
    cube::value_container inclusive_values;
    cube::value_container exclusive_values;
    cube->getSystemTreeValues( lmetrics,
                               cnodes,
                               inclusive_values,
                               exclusive_values );

    const std::vector<cube::LocationGroup*>& _lgs                             = cube->getLocationGroups();
    double                                   gpu_pop_comp_imbalance_value_sum = 0.;
    double                                   gpu_pop_comp_imbalance_value_max = std::numeric_limits<double>::lowest();
    for ( std::vector<cube::LocationGroup*>::const_iterator iter = _lgs.begin(); iter != _lgs.end(); ++iter )
    {
        if ( ( *iter )->get_type() != cube::CUBE_LOCATION_GROUP_TYPE_ACCELERATOR )
        {
            continue;
        }
        double _v =  inclusive_values[ ( *iter )->get_sys_id() ]->getDouble();
        gpu_pop_comp_imbalance_value_sum += _v;
        gpu_pop_comp_imbalance_value_max  = std::max(
            gpu_pop_comp_imbalance_value_max,
            inclusive_values[ ( *iter )->get_sys_id() ]->getDouble()
            );
    }
    size_t gpu_locs_num = get_number_of_gpu_location_groups();

    std::for_each( inclusive_values.begin(), inclusive_values.end(),  [ ]( cube::Value* element ){
        delete element;
    } );
    std::for_each( exclusive_values.begin(), exclusive_values.end(),  [ ]( cube::Value* element ){
        delete element;
    } );
    /* are the values meaningless or there are no locatiuon groups related to GPU - return NaN to signalize its inavailability */
    return ( gpu_pop_comp_imbalance_value_max <= std::numeric_limits<double>::min() || gpu_locs_num == 0 ) ?  std::numeric_limits<double>::quiet_NaN()  :  gpu_pop_comp_imbalance_value_sum / gpu_locs_num / gpu_pop_comp_imbalance_value_max;
}




void
POPGPUImbalanceTest::applyCnode( const cube::list_of_cnodes& cnodes,
                                 const bool                  direct_calculation  )
{
    ( void )direct_calculation; // not used here
    if ( all_kernels_executions == nullptr )
    {
        return;
    }
    setValue(  analyze( cnodes ) );
}


const std::string&
POPGPUImbalanceTest::getCommentText() const
{
    return no_comment;
}


// ------ overview tests ---------

bool
POPGPUImbalanceTest::isActive() const
{
    cube::Metric* _cuda_kernel_executions       = cube->getMetric( "cuda_kernel_executions" );
    cube::Metric* _hip_kernel_executions        = cube->getMetric( "hip_kernel_executions" );
    cube::Metric* _omp_target_kernel_executions = cube->getMetric( "omp_target_kernel_executions" );
    return ( _cuda_kernel_executions != nullptr && !_cuda_kernel_executions->isInactive() ) ||
           ( _hip_kernel_executions != nullptr && !_hip_kernel_executions->isInactive() ) ||
           ( _omp_target_kernel_executions != nullptr && !_omp_target_kernel_executions->isInactive() );
};

bool
POPGPUImbalanceTest::isIssue() const
{
    return false;
};

void
POPGPUImbalanceTest::adjustForTest( cube::CubeProxy* cube ) const
{
    cube::Metric* _all_kernels_executions = cube->getMetric( "all_kernels_executions" );
    if ( _all_kernels_executions == nullptr )
    {
        add_kernels_execution_time( cube );
    }
}
