/****************************************************************************
**  CUBE        http://www.scalasca.org/                                   **
*****************************************************************************
**  Copyright (c) 2015-2025                                                **
**  Forschungszentrum Juelich GmbH, Juelich Supercomputing Centre          **
**                                                                         **
**  This software may be modified and distributed under the terms of       **
**  a BSD-style license.  See the COPYING file in the package base         **
**  directory for details.                                                 **
****************************************************************************/


#include <config.h>
#include <list>
#include "POPHybridAuditAnalysisAdd.h"
#include "PerformanceTest.h"


using namespace cube;
using namespace hybaddanalysis;



POPHybridAuditPerformanceAnalysisAdd::POPHybridAuditPerformanceAnalysisAdd( cube::CubeProxy* _cube ) : popcalculation::PerformanceAnalysis( _cube )
{
    stalled_resources = new popcalculation::POPStalledResourcesTest( cube );
    ipc               = new popcalculation::POPIPCTest( cube );
    wall              = new popcalculation::POPWallTimeTest( cube );
    no_wait_ins       = new popcalculation::POPNoWaitINSTest( cube );
    comp              = new popcalculation::POPComputationTime( cube );
    gpu_comp          = new popcalculation::POPGPUComputationTime( cube );
    posix_io          = new popcalculation::POPPosixIOTime( cube );
    mpi_io            = new popcalculation::POPMpiIOTime( cube );
    io_eff            = new popcalculation::POPIOEfficiencyTest( posix_io, mpi_io );

    pop_ser_eff      = new POPHybridSerialisationTestAdd( cube );
    pop_transfer_eff = new POPHybridTransferTestAdd( cube );
    comm_eff         = new POPHybridCommunicationEfficiencyTestAdd( cube, pop_ser_eff, pop_transfer_eff );
    lb_eff           = new POPHybridImbalanceTestAdd( cube );
    proc_eff         = new POPHybridProcessEfficiencyTestAdd( cube, lb_eff, comm_eff );

    omp_region_eff = new POPHybridOmpRegionEfficiencyTestAdd( cube );
    pop_amdahl_eff = new POPHybridAmdahlTestAdd( cube );

    thread_eff = new POPHybridThreadEfficiencyTestAdd( cube, pop_amdahl_eff,  omp_region_eff );
    par_eff    = new POPHybridParallelEfficiencyTestAdd( cube, proc_eff, thread_eff );


    gpu_comm_eff = new popcalculation::POPGPUCommunicationEfficiencyTest( cube );
    gpu_lb_eff   = new popcalculation::POPGPUImbalanceTest( cube );
    gpu_par_eff  = new popcalculation::POPGPUParallelEfficiencyTest( gpu_lb_eff, gpu_comm_eff );

    popcalculation::PerformanceTest::finalizePrepsForTest( _cube );

    max_ipc = ipc->getMaximum();
}


POPHybridAuditPerformanceAnalysisAdd::~POPHybridAuditPerformanceAnalysisAdd()
{
    delete gpu_comm_eff;
    delete gpu_lb_eff;
    delete gpu_par_eff;

    delete stalled_resources;
    delete ipc;
    delete wall;
    delete no_wait_ins;
    delete comp;
    delete gpu_comp;
    delete posix_io;
    delete mpi_io;
    delete io_eff;

    delete comm_eff;
    delete lb_eff;
    delete pop_ser_eff;
    delete pop_transfer_eff;
    delete proc_eff;

    delete omp_region_eff;
    delete pop_amdahl_eff;

    delete thread_eff;

    delete par_eff;
};


std::list<popcalculation::PerformanceTest*>
POPHybridAuditPerformanceAnalysisAdd::getAllTestsForCalculation()
{
    std::list<popcalculation::PerformanceTest*> to_return;
    to_return.push_back( wall );
    to_return.push_back( stalled_resources );
    to_return.push_back( ipc );
    to_return.push_back( no_wait_ins );
    to_return.push_back( comp );
    to_return.push_back( gpu_comp );
    to_return.push_back( io_eff );
    to_return.push_back( gpu_par_eff );
    to_return.push_back( par_eff );
    return to_return;
}

std::list<popcalculation::PerformanceTest*>
POPHybridAuditPerformanceAnalysisAdd::getPOPTests()
{
    std::list<popcalculation::PerformanceTest*> to_return;
    to_return.push_back( par_eff );
    to_return.push_back( proc_eff );
    to_return.push_back( lb_eff );
    to_return.push_back( comm_eff );
    to_return.push_back( pop_ser_eff );
    to_return.push_back( pop_transfer_eff );
    to_return.push_back( thread_eff );
    to_return.push_back( pop_amdahl_eff );
    to_return.push_back( omp_region_eff );
    return to_return;
}

std::list<popcalculation::PerformanceTest*>
POPHybridAuditPerformanceAnalysisAdd::getGPUTests()
{
    std::list<popcalculation::PerformanceTest*> to_return;
    to_return.push_back( gpu_par_eff );
    to_return.push_back( gpu_lb_eff );
    to_return.push_back( gpu_comm_eff );
    return to_return;
}

std::list<popcalculation::PerformanceTest*>
POPHybridAuditPerformanceAnalysisAdd::getIOTests()
{
    std::list<popcalculation::PerformanceTest*> to_return;
    to_return.push_back( io_eff );
    to_return.push_back( posix_io );
    to_return.push_back( mpi_io );
    return to_return;
}

std::list<popcalculation::PerformanceTest*>
POPHybridAuditPerformanceAnalysisAdd::getAdditionalTests()
{
    std::list<popcalculation::PerformanceTest*> to_return;
    to_return.push_back( stalled_resources );
    to_return.push_back( ipc );
    to_return.push_back( no_wait_ins );
    to_return.push_back( comp );
    to_return.push_back( gpu_comp );
    return to_return;
}

std::list<popcalculation::PerformanceTest*>
POPHybridAuditPerformanceAnalysisAdd::getControlTests()
{
    std::list<popcalculation::PerformanceTest*> to_return;
    to_return.push_back( wall );
    return to_return;
}


// ------ overview tests ---------

bool
POPHybridAuditPerformanceAnalysisAdd::isActive() const
{
    cube::Metric* omp_met = cube->getMetric( "omp_time" );
#ifdef HAVE_CUBELIB_DEBUG
    if ( omp_met->isInactive() )
    {
        std::cerr << "[WARNING] Profile doesn't contain OpenMP metrics ('omp_time'). "
                  << "Hybrid-POP Analysis results may be inaccurate. "
                  << "Consider using \"mpi\" or \"bsc\" metrics instead." << std::endl;
    }
#endif
    return !omp_met->isInactive();
}

std::string
POPHybridAuditPerformanceAnalysisAdd::getAnchorHowToMeasure()
{
    return
        "This approach extends POP metrics (see cube_pop_metrics -h or -?) for hybrid (MPI+OpenMP) applications.\n"
        "In this method, Parallel Efficiency is decomposed into two components:\n\n"
        "\t- Process Efficiency\n"
        "\t  Shows MPI-level inefficiencies, broken down into:\n"
        "\t  - Computation Load Balance\n"
        "\t  - MPI Communication Efficiency\n\n"
        "\t- Thread Efficiency\n"
        "\t  Shows OpenMP-level inefficiencies, broken down into:\n"
        "\t  - Amdahl's Efficiency\n"
        "\t  - OpenMP Region Efficiency\n\n"
        "In this analysis, Parallel Efficiency (PE) combines these sub-metrics as:\n"
        "\tPE = Process Efficiency + Thread Efficiency - 1\n\n"
        "Note: PE is normalized between 0 (worst case) and 1 (ideal parallelization).";
}
