/****************************************************************************
**  CUBE        http://www.scalasca.org/                                   **
*****************************************************************************
**  Copyright (c) 1998-2025                                                **
**  Forschungszentrum Juelich GmbH, Juelich Supercomputing Centre          **
**                                                                         **
**  Copyright (c) 2009-2015                                                **
**  German Research School for Simulation Sciences GmbH,                   **
**  Laboratory for Parallel Programming                                    **
**                                                                         **
**  This software may be modified and distributed under the terms of       **
**  a BSD-style license.  See the COPYING file in the package base         **
**  directory for details.                                                 **
****************************************************************************/



/**
 * \file
 * \brief Defines a metric class, which uses build in type as a data type
 *
 */
#ifndef CUBELIB_INCLUSIVE_BUILDIN_METRIC_H
#define CUBELIB_INCLUSIVE_BUILDIN_METRIC_H


#include <iosfwd>
#include <map>
#include <cstring>
#include <limits>
#include <future>
#include <vector>

#include "CubeIDdeliverer.h"
#include "CubeWideSearchEnumerator.h"

#include "CubeMetricBuildInType.h"
#include "CubeServices.h"
#include "CubeSemaphore.h"
#include "CubeCriticalSection.h"
#include "CubeSimpleSemaphore.h"

/*
 *----------------------------------------------------------------------------
 *
 * class BuildInTypeMetric
 *
   =======
 */


#define MINWORKLOAD ( 16 * 1024 )

namespace cube
{
class Connection;
class CubeProxy;
extern CubeSimpleSemaphore global_simple_semaphore;

template <class T>
class InclusiveBuildInTypeMetric : public BuildInTypeMetric<T>
{
public:

    InclusiveBuildInTypeMetric( const std::string& disp_name,
                                const std::string& uniq_name,
                                const std::string& dtype,
                                const std::string& uom,
                                const std::string& val,
                                const std::string& url,
                                const std::string& descr,
                                FileFinder*        ffinder,
                                Metric*            parent,
                                uint32_t           id = 0,
                                const std::string& cubepl_expression = "",
                                const std::string& cubepl_init_expression = "",
                                const std::string& cubepl_aggr_plus_expression = "",
                                const std::string& cubepl_aggr_minus_expression = "",
                                const std::string& cubepl_aggr_aggr_expression = "",
                                bool               row_wise = true,
                                VizTypeOfMetric    is_ghost = CUBE_METRIC_NORMAL
                                )
        :
        BuildInTypeMetric< T >( disp_name,
                                uniq_name,
                                dtype,
                                uom,
                                val,
                                url,
                                descr,
                                ffinder,
                                parent,
                                id,
                                cubepl_expression,
                                cubepl_init_expression,
                                cubepl_aggr_plus_expression,
                                cubepl_aggr_minus_expression,
                                cubepl_aggr_aggr_expression,
                                row_wise,
                                is_ghost
                                )
    {
    }

    /// @brief
    ///     Factory method to create an intrinsic-type inclusive metric via a Cube connection.
    /// @param connection
    ///     Active Cube connection.
    /// @param cubeProxy
    ///     Cube proxy for internal cross-referencing.
    /// @return
    ///     New metric.
    ///
    static Serializable*
    create( Connection&      connection,
            const CubeProxy& cubeProxy );

    virtual
    ~InclusiveBuildInTypeMetric<T>( )
    {
    }

    /// @copydoc cube::Serializable::get_serialization_key
    ///
    virtual std::string
    get_serialization_key()  const override;

    /// @copydoc cube::Serializable::get_serialization_key
    ///
    static std::string
    get_static_serialization_key();

    virtual TypeOfMetric
    get_type_of_metric() const override
    {
        return CUBE_METRIC_INCLUSIVE;
    }

    virtual IndexFormat
    get_index_format() override
    {
        return CUBE_INDEX_FORMAT_DENSE;
    }

    virtual std::string
    get_metric_kind() const override
    {
        return "INCLUSIVE";
    };

    virtual row_of_objects_t*
    create_calltree_id_maps( IDdeliverer*,
                             Cnode*,
                             row_of_objects_t*  ) override;                               // /< Creates enumeration of Callpath-Tree according the "deep search" prescription. It is optimal for the calculation of inclusive values in exclusive metric.



protected:

    /// @brief
    ///     Constructs a Metric object via a Cube connection.
    /// @param connection
    ///     Active Cube connection.
    /// @param cubeProxy
    ///     Cube proxy for internal cross-referencing.
    ///
    /// @note
    ///     This constructor is called by the factory method.
    ///
    InclusiveBuildInTypeMetric( Connection&      connection,
                                const CubeProxy& cubeProxy )
        : BuildInTypeMetric<T>( connection, cubeProxy )
    {
        /// @note The full packing and unpacking is done by the Metric base class;
    }

    // ----------------- calculations of poitlike parameters

    virtual double
    get_sev_native(
        const cube::Cnode*,
        const CalculationFlavour cnf,
        const cube::Sysres* = NULL,
        const CalculationFlavour snf = CUBE_CALCULATE_NONE
        ) override;

    // -----------------row wise ----------------------------------------
    virtual double*
    get_sevs_native(
        const cube::Cnode*,
        const CalculationFlavour cnf
        ) override;


    // implementation for he rowwise aggregation to speed up
    // get_sev_aggregated
    virtual T
    get_row_reduction( const cube::Cnode* cnode ) override; // it is litteraly only reduction of the row - hence no flavour is needed


private:
    T
    get_sev_aggregated( const cube::Cnode*       cnode,
                        const CalculationFlavour cnf );

    T
    get_sev_pointlike( const Cnode*             cnode,
                       const CalculationFlavour cnf,
                       const Sysres*            sys,
                       const CalculationFlavour sf );

    char*
    get_sevs_raw(
        const cube::Cnode*,
        const CalculationFlavour cnf
        );
};




template <class T>
row_of_objects_t*
InclusiveBuildInTypeMetric<T>::create_calltree_id_maps( IDdeliverer* ids, Cnode* root, row_of_objects_t* _row  )
{
    WideSearchEnumerator enumerator;
    ids->reset();
    _row = enumerator.get_objects_to_enumerate( root, _row );

    for ( row_of_objects_t::iterator iter = _row->begin(); iter < _row->end(); ++iter )
    {
        if ( this->calltree_local_ids.size() <= ( *iter )->get_id() )
        {
            this->calltree_local_ids.resize( ( *iter )->get_id() + 1  );
        }
        this->calltree_local_ids[ ( *iter )->get_id() ] = ids->get_next_id();
    }

    return _row;
}




// / ------------------------ Sum over all threads for given cnode and all subcnodes

template <class T>
double
InclusiveBuildInTypeMetric<T>::get_sev_native(
    const cube::Cnode*       cnode,
    const CalculationFlavour cnf,
    const cube::Sysres*      sys,
    const CalculationFlavour sf
    )
{
    if ( sys == NULL )
    {
        return static_cast<double>( get_sev_aggregated( cnode, cnf ) );
    }
    return static_cast<double>( get_sev_pointlike( cnode, cnf, sys, sf ) );
}


// NOLINTNEXTLINE (suppress code checker warning)
template <class T>
T
InclusiveBuildInTypeMetric<T>::get_row_reduction( const Cnode* cnode )
{
    T v = static_cast<T>( 0 );
    if ( !cnode->is_clustered() )
    {
        char* row = ( this->adv_sev_mat )->getRow( this->calltree_local_ids[ cnode->get_remapping_cnode()->get_id() ] );
        if ( row != nullptr )
        {
            T*     t_row     = reinterpret_cast<T*>( row );
            size_t sysv_size = this->sysv.size();
            for ( size_t i = 0; i < sysv_size; i++ )
            {
                v += t_row[ i ];
            }
        }
        ( this->adv_sev_mat )->dropRow( this->calltree_local_ids[ cnode->get_remapping_cnode()->get_id() ] ); //  earliest moment,  metric calculation doesnt need this row. depending on strategy, it gets actully dropped or not
    }
    else
    {
        size_t sysv_size = this->sysv.size();
        for ( size_t i = 0; i < sysv_size; i++ )
        {
            const Location*    _loc         = this->sysv[ i ];
            Process*           _proc        = _loc->get_parent();
            int64_t            process_rank = _proc->get_rank();
            const cube::Cnode* _mapped      =  cnode->get_remapping_cnode( process_rank  );
            T                  tmp          = ( _mapped == NULL ) ? static_cast<T>( 0 ) : ( ( this->adv_sev_mat )->template getBuildInValue<T>( this->calltree_local_ids[ _mapped->get_id() ], _loc->get_id() ) );
            int64_t            _norm        = cnode->get_cluster_normalization( process_rank );
            if ( _norm > 0 )
            {
                tmp /= ( ( uint64_t )_norm );
            }
            v += tmp;
            ( this->adv_sev_mat )->dropRow( this->calltree_local_ids[ _mapped->get_id() ] ); //  earliest moment,  metric calculation doesnt need this row. depending on strategy, it gets actully dropped or not
        }
    }
    return v;
}




template <class T>
T
InclusiveBuildInTypeMetric<T>::get_sev_aggregated( const Cnode* cnode, const CalculationFlavour cnf )
{
    if ( !( this->active ) )
    {
        return 0.;
    }

    if ( this->adv_sev_mat == NULL && get_type_of_metric() == CUBE_METRIC_INCLUSIVE )
    {
        return 0.;
    }
    T v = static_cast<T>( 0 );
    if ( this->isCacheable() && ( ( this->t_cache )->testAndGetTCachedValue( v, cnode, cnf )  ) )
    {
        return v;
    }

    v = this->get_row_reduction( cnode );


    std::vector<cube::Cnode*> local_cnodes;
    if ( cnf == cube::CUBE_CALCULATE_EXCLUSIVE &&  ( cnode->num_children() > 0 ) )
    {
        for ( cnode_id_t cid = 0; cid < cnode->num_children(); cid++  )
        {
            cube::Cnode* tmp_c = cnode->get_child( cid );
            if ( !tmp_c->isHidden() )
            {
                local_cnodes.push_back( tmp_c );
            }
        }

        T _cv_local = static_cast<T>( 0 );
        // Calculate remote contributions asynchronously
        std::vector<std::future<T> > task_results;
        size_t                       sysv_size  = this->sysv.size();
        size_t                       last_cnode = 0;
        for ( cube::Cnode* tmp_c : local_cnodes )
        {
            if (  ( last_cnode != ( local_cnodes.size() - 1 ) )   && ( tmp_c->num_children() * sysv_size > MINWORKLOAD ) && ( global_simple_semaphore.isFree() )  )
            {
                std::future<T> _task_result = std::async( std::launch::async, &InclusiveBuildInTypeMetric<T>::get_sev_aggregated, this, tmp_c, cube::CUBE_CALCULATE_INCLUSIVE );
                task_results.push_back( std::move( _task_result ) );
            }
            else
            {
                T local_result = this->get_sev_aggregated( tmp_c, cube::CUBE_CALCULATE_INCLUSIVE );
                _cv_local = this->plus_operator( _cv_local, local_result );
            }
            last_cnode++;
        }

        // Calculate local contributions
        for ( std::future<T>& a3 : task_results )
        {
            global_simple_semaphore.unlock();
            T task_result = a3.get();
            _cv_local = this->plus_operator( _cv_local, task_result );
        }

        v = this->minus_operator( v, _cv_local );
    }
    if ( this->isCacheable() )
    {
        this->t_cache->setTCachedValue( v,  cnode, cnf );
    }
    return v;
}








template <class T>
T
InclusiveBuildInTypeMetric<T>::get_sev_pointlike( const Cnode* cnode, const CalculationFlavour cnf, const Sysres* sys,  const CalculationFlavour sf )
{
    if ( !( this->active ) )
    {
        return 0.;
    }

    if ( ( sys->isSystemTreeNode() || sys->isLocationGroup() ) && ( sf == cube::CUBE_CALCULATE_EXCLUSIVE ) )
    {
        return 0.;
    }
    if ( this->adv_sev_mat == NULL && get_type_of_metric() == CUBE_METRIC_INCLUSIVE )
    {
        return 0.;
    }
    T v = static_cast<T>( 0 );
    if (  this->isCacheable() && (  ( this->t_cache )->testAndGetTCachedValue( v, cnode, cnf, sys, sf ) ) )
    {
        return v;
    }

    const std::vector<Sysres*> _locations = sys->get_whole_subtree();  // collection of locations of this sysres
    for ( std::vector<Sysres*>::const_iterator _siter = _locations.begin(); _siter != _locations.end(); ++_siter )
    {
        const Location* _loc = ( Location* )( *_siter );
        T               _v   = this->get_sev_elementary( cnode, _loc );
        v = this->aggr_operator( v, _v );
    }

    if ( cnf == CUBE_CALCULATE_EXCLUSIVE &&  ( cnode->num_children() > 0 ) )
    {
        T _cv = static_cast<T>( 0 );
        for ( cnode_id_t cid = 0; cid < cnode->num_children(); cid++  )
        {
            cube::Cnode* tmp_c = cnode->get_child( cid );
            if ( !tmp_c->isHidden() )
            {
                T tmp_t = this->get_sev_pointlike( tmp_c, cube::CUBE_CALCULATE_INCLUSIVE, sys, sf );
                _cv = this->plus_operator( _cv, tmp_t );
            }
        }
        v = this->minus_operator( v, _cv );
    }
    if ( this->isCacheable() )
    {
        this->t_cache->setTCachedValue( v, cnode, cnf, sys, sf );
    }
    return v;
}

template < class T >
inline Serializable*
InclusiveBuildInTypeMetric< T >::create( Connection&      connection,
                                         const CubeProxy& cubeProxy )
{
    return new InclusiveBuildInTypeMetric<T>( connection, cubeProxy );
}

template < class T >
inline std::string
InclusiveBuildInTypeMetric< T >::get_serialization_key() const
{
    return get_static_serialization_key();
}

template < class T >
inline std::string
InclusiveBuildInTypeMetric< T >::get_static_serialization_key()
{
    return "Metric|Inclusive|" + BuildInTypeMetric<T>::get_type_string();
}

template <class T>
char*
InclusiveBuildInTypeMetric<T>::get_sevs_raw( const Cnode* cnode, const CalculationFlavour cnf )
{
    if ( !( this->active ) )
    {
        return NULL;
    }


    if ( this->adv_sev_mat == NULL && get_type_of_metric() == CUBE_METRIC_EXCLUSIVE )
    {
        return NULL;
    }
    if ( this->isCacheable() )
    {
        char* v = ( this->t_cache )->getCachedRow( cnode, cnf );
        if ( v != NULL )
        {
            return v;
        }
    }

    size_t sysv_size  = this->sysv.size();
    char*  to_return  = services::create_raw_row( sysv_size * this->metric_value->getSize() );
    T*     _to_return = reinterpret_cast< T* >( to_return );

    for ( size_t i = 0; i < sysv_size; i++ )
    {
        Location* _loc = this->sysv[ i ];
        _to_return[ i ] = this->get_sev_elementary( cnode, _loc );
    }

    if ( cnf == CUBE_CALCULATE_EXCLUSIVE  &&  ( cnode->num_children() > 0 )  )
    {
        std::vector<cube::Cnode*> local_cnodes;

        for ( cnode_id_t cid = 0; cid < cnode->num_children(); cid++  )
        {
            cube::Cnode* tmp_c = cnode->get_child( cid );
            if ( !tmp_c->isHidden() )
            {
                local_cnodes.push_back( tmp_c );
            }
        }

        // Calculate remote contributions asynchronously
        std::vector<std::future<char*> > task_results;
        char*                            _local_to_return   = services::create_raw_row( sysv_size * this->metric_value->getSize() );
        T*                               _t_local_to_return = reinterpret_cast< T* >( _local_to_return );
        size_t                           last_cnode         = 0;
        for ( cube::Cnode* tmp_c : local_cnodes )
        {
            if (  ( last_cnode != ( local_cnodes.size() - 1 ) )   && ( tmp_c->num_children() * sysv_size > MINWORKLOAD ) && ( global_simple_semaphore.isFree() )  )
            {
                std::future<char*> _task_result = std::async( std::launch::async, &InclusiveBuildInTypeMetric<T>::get_sevs_raw, this, tmp_c, cube::CUBE_CALCULATE_INCLUSIVE );
                task_results.push_back( std::move( _task_result ) );
            }
            else
            {
                char* local_result = this->get_sevs_raw( tmp_c, cube::CUBE_CALCULATE_INCLUSIVE );
                T*    tmp_t        = reinterpret_cast<T*>( local_result );
                for ( size_t i = 0; i < sysv_size; i++ )
                {
                    _t_local_to_return[ i ] = this->plus_operator( _t_local_to_return[ i ], tmp_t[ i ] );
                }
                services::delete_raw_row( local_result );
            }
            last_cnode++;
        }

        // Calculate local contributions

        for ( std::future<char* >& a3 : task_results )
        {
            char* task_result = a3.get();
            T*    tmp_t       = ( T* )task_result;
            for ( size_t i = 0; i < sysv_size; i++ )
            {
                _t_local_to_return[ i ] = this->plus_operator( _t_local_to_return[ i ], tmp_t[ i ] );
            }
            services::delete_raw_row( task_result );
        }




        for ( size_t i = 0; i < sysv_size; i++ )
        {
            _to_return[ i ] = this->minus_operator( _to_return[ i ], _t_local_to_return[ i ] );
        }
        services::delete_raw_row( _local_to_return );
    }
    if ( this->isCacheable() )
    {
        ( this->t_cache )->setCachedRow( to_return,  cnode, cnf );
    }
    return to_return;
}


template <class T>
double*
InclusiveBuildInTypeMetric<T>::get_sevs_native( const Cnode*             cnode,
                                                const CalculationFlavour cfv
                                                )
{
    char*   _v        =   this->get_sevs_raw( cnode, cfv );
    T*      tmp_v     = ( T* )_v;
    size_t  sysv_size = this->sysv.size();
    double* to_return = services::create_row_of_doubles( sysv_size );
    if ( _v == NULL )
    {
        return to_return;
    }
    for ( unsigned i = 0; i < sysv_size; ++i )
    {
        to_return[ i ] = static_cast< double >( tmp_v[ i ] );
    }
    services::delete_raw_row( _v );
    return to_return;
}
}


#endif
