/****************************************************************************
**  CUBE        http://www.scalasca.org/                                   **
*****************************************************************************
**  Copyright (c) 2015-2025                                                **
**  Forschungszentrum Juelich GmbH, Juelich Supercomputing Centre          **
**                                                                         **
**  This software may be modified and distributed under the terms of       **
**  a BSD-style license.  See the COPYING file in the package base         **
**  directory for details.                                                 **
****************************************************************************/


#include <config.h>
#include <list>
#include "POPHybridAuditAnalysis.h"
#include "PerformanceTest.h"


using namespace cube;
using namespace hybanalysis;



POPHybridAuditPerformanceAnalysis::POPHybridAuditPerformanceAnalysis( cube::CubeProxy* _cube ) : popcalculation::PerformanceAnalysis( _cube )
{
    stalled_resources = new popcalculation::POPStalledResourcesTest( cube );
    ipc               = new popcalculation::POPIPCTest( cube );
    wall              = new popcalculation::POPWallTimeTest( cube );
    no_wait_ins       = new popcalculation::POPNoWaitINSTest( cube );
    comp              = new popcalculation::POPComputationTime( cube );
    gpu_comp          = new popcalculation::POPGPUComputationTime( cube );
    posix_io          = new popcalculation::POPPosixIOTime( cube );
    mpi_io            = new popcalculation::POPMpiIOTime( cube );
    io_eff            = new popcalculation::POPIOEfficiencyTest( posix_io, mpi_io );

    pop_ser_eff      = new POPHybridSerialisationTest( cube );
    pop_transfer_eff = new POPHybridTransferTest( cube );
    comm_eff         = new POPHybridCommunicationEfficiencyTest( cube, pop_ser_eff, pop_transfer_eff );
    lb_eff           = new POPHybridImbalanceTest( cube );
    proc_eff         = new POPHybridProcessEfficiencyTest( cube, lb_eff, comm_eff );

    omp_region_eff = new POPHybridOmpRegionEfficiencyTest( cube );
    pop_amdahl_eff = new POPHybridAmdahlTest( cube );

    thread_eff = new POPHybridThreadEfficiencyTest( cube, pop_amdahl_eff,  omp_region_eff );
    par_eff    = new POPHybridParallelEfficiencyTest( cube, proc_eff, thread_eff );

    gpu_comm_eff = new popcalculation::POPGPUCommunicationEfficiencyTest( cube );
    gpu_lb_eff   = new popcalculation::POPGPUImbalanceTest( cube );
    gpu_par_eff  = new popcalculation::POPGPUParallelEfficiencyTest( gpu_lb_eff, gpu_comm_eff );

    popcalculation::PerformanceTest::finalizePrepsForTest( _cube );

    max_ipc = ipc->getMaximum();
}


POPHybridAuditPerformanceAnalysis::~POPHybridAuditPerformanceAnalysis()
{
    delete gpu_comm_eff;
    delete gpu_lb_eff;
    delete gpu_par_eff;

    delete stalled_resources;
    delete ipc;
    delete wall;
    delete no_wait_ins;
    delete comp;
    delete gpu_comp;
    delete posix_io;
    delete mpi_io;
    delete io_eff;

    delete comm_eff;
    delete lb_eff;
    delete pop_ser_eff;
    delete pop_transfer_eff;
    delete proc_eff;

    delete omp_region_eff;
    delete pop_amdahl_eff;

    delete thread_eff;

    delete par_eff;
};

std::list<popcalculation::PerformanceTest*>
POPHybridAuditPerformanceAnalysis::getAllTestsForCalculation()
{
    std::list<popcalculation::PerformanceTest*> to_return;
    to_return.push_back( wall );
    to_return.push_back( stalled_resources );
    to_return.push_back( ipc );
    to_return.push_back( no_wait_ins );
    to_return.push_back( comp );
    to_return.push_back( gpu_comp );
    to_return.push_back( io_eff );
    to_return.push_back( gpu_par_eff );
    to_return.push_back( par_eff );
    return to_return;
}

std::list<popcalculation::PerformanceTest*>
POPHybridAuditPerformanceAnalysis::getPOPTests()
{
    std::list<popcalculation::PerformanceTest*> to_return;
    to_return.push_back( par_eff );
    to_return.push_back( proc_eff );
    to_return.push_back( lb_eff );
    to_return.push_back( comm_eff );
    to_return.push_back( pop_ser_eff );
    to_return.push_back( pop_transfer_eff );
    to_return.push_back( thread_eff );
    to_return.push_back( pop_amdahl_eff );
    to_return.push_back( omp_region_eff );
    return to_return;
}

std::list<popcalculation::PerformanceTest*>
POPHybridAuditPerformanceAnalysis::getGPUTests()
{
    std::list<popcalculation::PerformanceTest*> to_return;
    to_return.push_back( gpu_par_eff );
    to_return.push_back( gpu_lb_eff );
    to_return.push_back( gpu_comm_eff );
    return to_return;
}

std::list<popcalculation::PerformanceTest*>
POPHybridAuditPerformanceAnalysis::getIOTests()
{
    std::list<popcalculation::PerformanceTest*> to_return;
    to_return.push_back( io_eff );
    to_return.push_back( posix_io );
    to_return.push_back( mpi_io );
    return to_return;
}

std::list<popcalculation::PerformanceTest*>
POPHybridAuditPerformanceAnalysis::getAdditionalTests()
{
    std::list<popcalculation::PerformanceTest*> to_return;
    to_return.push_back( stalled_resources );
    to_return.push_back( ipc );
    to_return.push_back( no_wait_ins );
    to_return.push_back( comp );
    to_return.push_back( gpu_comp );
    return to_return;
}

std::list<popcalculation::PerformanceTest*>
POPHybridAuditPerformanceAnalysis::getControlTests()
{
    std::list<popcalculation::PerformanceTest*> to_return;
    to_return.push_back( wall );
    return to_return;
}

// ------ overview tests ---------

bool
POPHybridAuditPerformanceAnalysis::isActive() const
{
    cube::Metric* omp_met = cube->getMetric( "omp_time" );
#ifdef HAVE_CUBELIB_DEBUG
    if ( omp_met->isInactive() )
    {
        std::cerr << "[WARNING] Profile doesn't contain OpenMP metrics. Hybrid-POP Analysis won't deliver correct result. Please use \"mpi\" or \"bsc\" instead. " << std::endl;
    }
#endif
    return !omp_met->isInactive();
}

std::string
POPHybridAuditPerformanceAnalysis::getAnchorHowToMeasure()
{
    std::string text =
        "This is one approach to extend POP metrics (see: <cube_pop_metrics -? mpi>) for hybrid (MPI+OpenMP) applications. In this approach Parallel Efficiency split into two components:\n\n"
        "\tProcess Efficiency shows the inefficiencies on MPI level, and can be broken down into \n"
        "\t\tComputation Load Balance and MPI Communication Efficiency\n"
        "\tThread Efficiency shows the inefficiencies on OpenMP level, and can be broken down into \n"
        "\t\tAmdahl's Efficiency and OpenMP Region Efficiency\n\n"
        "\tIn this analysis Parallel Efficiency (PE) can be computed as a product of these two sub-metrics:\n"
        "\t\tPE = Process Efficiency x Thread Efficiency ";
    return text;
}
