/****************************************************************************
**  CUBE        http://www.scalasca.org/                                   **
*****************************************************************************
**  Copyright (c) 2023-2025                                                **
**  Forschungszentrum Juelich GmbH, Juelich Supercomputing Centre          **
**                                                                         **
**  This software may be modified and distributed under the terms of       **
**  a BSD-style license.  See the COPYING file in the package base         **
**  directory for details.                                                 **
****************************************************************************/


#include <config.h>
#include <algorithm>
#include "POPGPUCommunicationEfficiencyTest.h"


using namespace popcalculation;


POPGPUCommunicationEfficiencyTest::POPGPUCommunicationEfficiencyTest( cube::CubeProxy* cube ) : popcalculation::PerformanceTest( cube )
{
    setName(  " * GPU Communication Efficiency" );
    setWeight( 1 );   // need to be adjusted
    max_all_kernels_executions = nullptr;
    max_runtime                = nullptr;

    max_all_kernels_executions = cube->getMetric( "all_kernels_executions" );
    if ( max_all_kernels_executions == nullptr )
    {
        adjustForTest( cube );
    }
    max_all_kernels_executions = cube->getMetric( "all_kernels_executions" );
    if ( max_all_kernels_executions == nullptr )
    {
        setWeight( 0.2 );
        setValue( 0. );
        return;
    }
    max_runtime = cube->getMetric( "time" );

    cube::metric_pair metric;
    metric.first  = max_runtime;
    metric.second = cube::CUBE_CALCULATE_INCLUSIVE;
    lmetrics.push_back( metric );

    metric.first  = max_all_kernels_executions;
    metric.second = cube::CUBE_CALCULATE_EXCLUSIVE;
    lmax_all_kernels_executions.push_back( metric );
}

void
POPGPUCommunicationEfficiencyTest::applyCnode( const cube::list_of_cnodes& cnodes,
                                               const bool                  direct_calculation  )
{
    ( void )direct_calculation; // not used here
    if ( max_all_kernels_executions == nullptr )
    {
        return;
    }
    setValue(  analyze( cnodes ) );
}


double
POPGPUCommunicationEfficiencyTest::analyze( const cube::list_of_cnodes& cnodes,
                                            cube::LocationGroup*          ) const
{
    if ( max_runtime == nullptr || max_all_kernels_executions == nullptr )
    {
        return 0.;
    }
    cube::value_container inclusive_values1;
    cube::value_container exclusive_values1;
    cube->getSystemTreeValues( lmetrics,
                               cnodes,
                               inclusive_values1,
                               exclusive_values1 );

    cube::value_container inclusive_values2;
    cube::value_container exclusive_values2;
    cube->getSystemTreeValues( lmax_all_kernels_executions,
                               cnodes,
                               inclusive_values2,
                               exclusive_values2 );

    const std::vector<cube::LocationGroup*>& _lgs       = cube->getLocationGroups();
    double                                   comm_value = 0;
    double                                   _v1        = std::numeric_limits<double>::lowest();
    double                                   _v2        = std::numeric_limits<double>::lowest();
    bool                                     _v1_valid  = false;
    bool                                     _v2_valid  = false;
    for ( std::vector<cube::LocationGroup*>::const_iterator iter = _lgs.begin(); iter != _lgs.end(); ++iter )
    {
        if ( ( *iter )->get_type() != cube::CUBE_LOCATION_GROUP_TYPE_PROCESS )
        {
            continue;
        }
        _v1_valid = true;
        _v1       =   std::max(  _v1, inclusive_values1[ ( *iter )->get_sys_id() ]->getDouble() );
    }
    for ( std::vector<cube::LocationGroup*>::const_iterator iter = _lgs.begin(); iter != _lgs.end(); ++iter )
    {
        if ( ( *iter )->get_type() != cube::CUBE_LOCATION_GROUP_TYPE_ACCELERATOR )
        {
            continue;
        }
        _v2_valid = true;
        _v2       =   std::max(  _v2, inclusive_values1[ ( *iter )->get_sys_id() ]->getDouble() );
    }

    comm_value = ( _v1_valid && _v2_valid ) ? _v2 / _v1 :  std::numeric_limits<double>::quiet_NaN();
    std::for_each( inclusive_values1.begin(), inclusive_values1.end(),  [ ]( cube::Value* element ){
        delete element;
    } );
    std::for_each( exclusive_values1.begin(), exclusive_values1.end(),  [ ]( cube::Value* element ){
        delete element;
    } );
    std::for_each( inclusive_values2.begin(), inclusive_values2.end(),  [ ]( cube::Value* element ){
        delete element;
    } );
    std::for_each( exclusive_values2.begin(), exclusive_values2.end(),  [ ]( cube::Value* element ){
        delete element;
    } );
    return comm_value;
}



const std::string&
POPGPUCommunicationEfficiencyTest::getCommentText() const
{
    return no_comment;
}


// ------ overview tests ---------

bool
POPGPUCommunicationEfficiencyTest::isActive() const
{
    cube::Metric* _cuda_kernels_executions       = cube->getMetric( "cuda_kernel_executions" );
    cube::Metric* _hip_kernels_executions        = cube->getMetric( "hip_kernel_executions" );
    cube::Metric* _omp_target_kernels_executions = cube->getMetric( "omp_target_kernel_executions" );
    return ( _cuda_kernels_executions != nullptr && !_cuda_kernels_executions->isInactive() ) ||
           ( _hip_kernels_executions != nullptr && !_hip_kernels_executions->isInactive() ) ||
           ( _omp_target_kernels_executions != nullptr && !_omp_target_kernels_executions->isInactive() );
};

bool
POPGPUCommunicationEfficiencyTest::isIssue() const
{
    return false;
};

void
POPGPUCommunicationEfficiencyTest::adjustForTest( cube::CubeProxy* cube ) const
{
    cube::Metric* _all_kernels_executions = cube->getMetric( "all_kernels_executions" );
    if ( _all_kernels_executions == nullptr )
    {
        add_kernels_execution_time( cube );
    }
}
