/****************************************************************************
**  CUBE        http://www.scalasca.org/                                   **
*****************************************************************************
**  Copyright (c) 2023-2025                                                **
**  Forschungszentrum Juelich GmbH, Juelich Supercomputing Centre          **
**                                                                         **
**  This software may be modified and distributed under the terms of       **
**  a BSD-style license.  See the COPYING file in the package base         **
**  directory for details.                                                 **
****************************************************************************/


#ifndef CUBELIB_POP_GPU_IMBALANCE_TEST_H
#define CUBELIB_POP_GPU_IMBALANCE_TEST_H

#include <string>
#include "PerformanceTest.h"


namespace popcalculation
{
class POPGPUParallelEfficiencyTest;

class POPGPUImbalanceTest : public popcalculation::PerformanceTest
{
    friend class POPGPUParallelEfficiencyTest;


private:
    cube::Metric* all_kernels_executions;


protected:


    virtual
    const std::string&
    getCommentText() const;

    inline
    virtual
    std::string
    getHelpUrl() const
    {
        std::string active_text =
            "GPU Load Balance Efficiency evaluates how evenly GPU computational workload is distributed\n"
            "across all GPU kernels in parallel execution.\n"
            "It is calculated as the ratio between:\n"
            "\tGPU Load Balance Efficiency = average(computation time on GPU kernels) / maximum(computation time on GPU kernels)\n\n"
            "This metric quantifies the relative difference between the most and least utilized GPU kernels,\n"
            "with values closer to 1 indicating better load balancing across all GPU resources.";

        std::string not_active_text =
            "GPU Load Balance Efficiency analysis requires precise timing information collected specifically for GPU operations.\n"
            "This metric is fully supported only in Score-P/Scalasca measurements that include:\n"
            "\t- Comprehensive Time metric instrumentation\n"
            "\t- GPU kernel-level timing resolution\n\n"
            "When Cube Reports originate from non-Score-P/Scalasca tools or lack:\n"
            "\t- Detailed timing metrics for GPU operations\n"
            "\t- Proper GPU instrumentation data\n"
            "the POP analysis for GPU load balancing becomes impossible to perform.\n"
            "In such cases, the analysis cannot be conducted and appropriate warnings should be presented to users.";

        return active_text;
    }

    virtual
    void
    adjustForTest( cube::CubeProxy* cube ) const;

public:
    POPGPUImbalanceTest( cube::CubeProxy* );

    virtual
    ~POPGPUImbalanceTest()
    {
    };

    void
    applyCnode( const cube::list_of_cnodes& cnodes,
                const bool                  direct_calculation = false  );

    virtual
    double
    analyze( const cube::list_of_cnodes& cnodes,
             cube::LocationGroup*        _lg = nullptr ) const;

// ------ overview tests ---------

    bool
    isActive() const;

    bool
    isIssue() const;
};
};
#endif // POP_IMBALANCE_TEST_H
