/****************************************************************************
**  CUBE        http://www.scalasca.org/                                   **
*****************************************************************************
**  Copyright (c) 1998-2023                                                **
**  Forschungszentrum Juelich GmbH, Juelich Supercomputing Centre          **
**                                                                         **
**  Copyright (c) 2009-2015                                                **
**  German Research School for Simulation Sciences GmbH,                   **
**  Laboratory for Parallel Programming                                    **
**                                                                         **
**  This software may be modified and distributed under the terms of       **
**  a BSD-style license.  See the COPYING file in the package base         **
**  directory for details.                                                 **
****************************************************************************/



/**
 * \file cube_remap2.cpp
 * \brief Remaps a
   cube and saves it in new "remap.cube|.gz" file.
 */
/******************************************

   Performance Algebra Operation: REMAP

 *******************************************/
#include "config.h"

#include <cstdlib>
#include <iostream>
#include <string>
#include <sstream>
#include <unistd.h>
#include <set>

#include "Cube.h"
#include "AggrCube.h"
#include "CubeCnode.h"
#include "CubeMachine.h"
#include "CubeMetric.h"
#include "CubeRegion.h"
#include "CubeServices.h"
#include "CubeFileFinder.h"
#include "CubeLayoutDetector.h"
#include "algebra4.h"
#include <chrono>

#include "remap2_calls.h"

using namespace std;
using namespace cube;
using namespace remapper;
using namespace services;

int
generate_default_topologies( Cube* inCube )
{
    cout << "Generating global cartesian topologies." << endl;

    bool   useSingularDims = false;
    bool   createMultiple  = false;
    string name;

    int startingNumberOfTopologies = inCube->get_cartv().size();

    // Determine if we have multiple root nodes (machines) or first children, that are modules.
    if ( !inCube->get_root_stnv().empty() )
    {
        set < string > distinct_modules;
        for ( size_t i = 0; i < inCube->get_root_stnv().size(); i++ )
        {
            for ( size_t j = 0; j < inCube->get_root_stnv()[ i ]->get_children().size(); j++ )
            {
                if ( inCube->get_root_stnv()[ i ]->get_child( j )->get_class() == "module" )
                {
                    distinct_modules.insert( inCube->get_root_stnv()[ i ]->get_child( j )->get_name() );
                }
            }
        }
        if ( distinct_modules.size() > 1 || inCube->get_root_stnv().size() > 1 )
        {
            createMultiple = true;
        }
    }

    // Generating topologies
    // Assumption: all applications have at least one CPU thread
    gen_top( *inCube, CUBE_LOCATION_TYPE_CPU_THREAD, useSingularDims, createMultiple, false );

    int generatedNumberOfCPUTopologies = inCube->get_cartv().size() - startingNumberOfTopologies;
    if ( generatedNumberOfCPUTopologies == 1 )
    {
        cout << " " << generatedNumberOfCPUTopologies << " CPU topology generated." << endl;
    }
    else if ( generatedNumberOfCPUTopologies > 1 )
    {
        cout << " " << generatedNumberOfCPUTopologies << " CPU topologies generated." << endl;
    }

    // Only generate GPU topologies if there is at least one GPU stream
    const vector<Location*>& locationv = inCube->get_locationv();
    for ( size_t i = 0; i < locationv.size(); i++ )
    {
        if ( locationv[ i ]->get_type() == CUBE_LOCATION_TYPE_GPU )
        {
            bool force_old_style_streams = false;
            if ( locationv[ i ]->get_parent()->get_type() == CUBE_LOCATION_GROUP_TYPE_PROCESS )
            {
                // Pre Score-P 8.0 streams as children of the process
                force_old_style_streams =  true;
            }
            gen_top( *inCube, CUBE_LOCATION_TYPE_GPU, useSingularDims, createMultiple, force_old_style_streams );

            int generatedNumberOfGPUTopologies = inCube->get_cartv().size() - startingNumberOfTopologies - generatedNumberOfCPUTopologies;
            if ( generatedNumberOfGPUTopologies == 1 )
            {
                cout << " " << generatedNumberOfGPUTopologies << " GPU topology generated." << endl;
            }
            else if ( generatedNumberOfGPUTopologies > 1 )
            {
                cout << " " << generatedNumberOfGPUTopologies << " GPU topologies generated." << endl;
            }
            break;
        }
    }
    return inCube->get_cartv().size()  - startingNumberOfTopologies;
}

void
remove_old_processxthread_topology( Cube* inCube )
{
    vector<Cartesian*> topologies = inCube->get_cartv();
    for ( size_t i = 0; i < topologies.size(); i++ )
    {
        if ( ( topologies[ i ]->get_name() == "Process x Thread" ) && ( topologies[ i ]->get_ndims() == 2 ) )
        {
            cout << "Removing old:" << topologies[ i ]->get_name() << endl;
            inCube->drop_cart( i );
        }
    }
}

/* This removes duplicates of the last newTopoCount topologies in the cube file.
 * It is intended to prevent a proliferation of the generated topologies, when
 * the remapper is called multiple times. At the same time it only should affect
 * the generated topologies and not touch other preexisting topologies.
 * The assumption is: new topologies are added at the end.
 */
void
remove_duplicate_topologies( Cube* inCube, int newTopoCount )
{
    vector<Cartesian*> topologies      = inCube->get_cartv();
    int                toatalTopoCount = topologies.size();
    set <int >         delete_indeces;

    if ( toatalTopoCount == newTopoCount || newTopoCount == 0 )
    {
        // No need to check for duplicates if the new ones form the whole set
        // of topologies, i.e., no prior topologies, or if non were added.
        return;
    }

    // Gather indeces of duplicates of the selected subset of new topologies.
    for ( int i = toatalTopoCount - 1; i >= toatalTopoCount  - newTopoCount; i-- )
    {
        for ( int j = topologies.size()  - newTopoCount - 1; j >= 0; j-- )
        {
            if ( *topologies[ i ] == *topologies[ j ] )
            {
                delete_indeces.insert( j );
            }
        }
    }

    // Remove the indeces back to front from the topologies vector.
    for ( auto it = delete_indeces.rbegin(); it != delete_indeces.rend(); ++it )
    {
        // Since the indeces are deleted from the back, each lower index is
        // still valid in the vector.
        inCube->drop_cart( *it );
    }
}

/**
 * Main program.
 * - Check calling arguments
 * - Read the  .cube input file.
 * - Calls cube_remap(...) to create a clone of input cube and calculate exclusive
   and inclusive values.
 * - Saves the result in "-o outputfile" or "remap.cube|.gz" file.
 * - end.
 */
int
main( int argc, char* argv[] )
{
    int         ch;
    const char* output               = "remap";
    std::string spec                 = "__NO_SPEC__";
    bool        copy_structure       = false;
    bool        convert              = false;
    bool        add_scalasca_metrics = false;

    const string USAGE = "Usage: " + string( argv[ 0 ] ) + " -r <remap specification file> [-o output] [-d] [-s] [-h] <cube experiment>\n"
                         "  -r     Name of the remapping specification file. By omitting this option the specification file from the \n"
                         "        cube experiment is taken if present. \n"
                         "  -c     Create output file with the same structure as an input file. It overrides option \"-r\"\n"
                         "  -o     Name of the output file (default: " + output + ")\n"
                         "  -d     Convert all prederived metrics into usual metrics, calculate and store their values as a data. \n"
                         "  -s     Add hardcoded Scalasca metrics \"Idle threads\" and \"Limited parallelizm\"\n"
                         "  -h     Help; Output a brief help message.\n\n"
                         "Report bugs to <" PACKAGE_BUGREPORT ">\n";

    while ( ( ch = getopt( argc, argv, "csdr:o:h?" ) ) != -1 )
    {
        switch ( ch )
        {
            case 's':
                add_scalasca_metrics = true;
                break;
            case 'd':
                convert = true;
                break;
            case 'c':
                copy_structure = true;
                break;
            case 'r':
                spec = optarg;
                break;
            case 'o':
                output = optarg;
                break;
            case 'h':
            case '?':
                cerr << USAGE << endl;
                exit( EXIT_SUCCESS );
                break;
            default:
                cerr << USAGE << "\nError: Wrong arguments.\n";
                exit( EXIT_FAILURE );
        }
    }

    if ( argc - optind != 1 )
    {
        cerr << USAGE << "\nError: Wrong arguments.\n";
        exit( EXIT_FAILURE );
    }

    string filename = argv[ optind ];
    string epikname = filename.substr( 0, filename.rfind( '/' ) );
    if ( epikname.substr( epikname.rfind( '/' ) + 1, 5 ) != "epik_" )
    {
        epikname.clear();
    }

    if ( spec == std::string( "__NO_SPEC__" ) ) // early test if there is .spec file inside of the cube
    {
        cube::FileFinder* filefinder          = cube::LayoutDetector::getFileFinder( filename );
        std::string       remapping_spec_name = "remapping.spec";
        fileplace_t       specplace           = filefinder->getMiscData( remapping_spec_name );
        delete filefinder;
        if ( specplace.second.first == ( uint64_t )-1 )
        {
            std::cerr <<  "No remapping specification found inside the cube." << std::endl;
            exit( EXIT_FAILURE );
        }
    }
    else
    {
        std::ifstream* _specin = new std::ifstream();
        _specin->open( spec.c_str(), std::ios::in );
        if ( _specin->fail() )
        {
            std::cerr <<  "No remapping specification file (" << spec << ") found."  << std::endl;
            delete _specin;
            exit( EXIT_FAILURE );
        }
        delete _specin;
    }


    Cube* inCube = new Cube();
    // Create new CUBE object
    AggrCube* outCube = NULL;
    if ( copy_structure && !convert )
    {
        outCube = new AggrCube( *inCube );
    }
    else
    {
        outCube = new AggrCube();
    }

    cout << "++++++++++++ Remapping operation begins ++++++++++++++++++++++++++" << endl;
    try
    {
        chrono::high_resolution_clock::time_point t1 = chrono::high_resolution_clock::now();
        cout << "Reading " << filename << " ... " << flush;
        inCube->openCubeReport( filename );
        cout << " done." << endl;
        cube_remap( outCube, inCube, spec, epikname, convert, add_scalasca_metrics, false, copy_structure );

        cout << "++++++++++++ Remapping operation ends successfully ++++++++++++++++" << endl;

        remove_old_processxthread_topology( outCube );
        int generated = generate_default_topologies( outCube );
        remove_duplicate_topologies( outCube, generated );

        cout << "Writing " << output << " ... " << flush;
        outCube->writeCubeReport( get_cube4_name( output ), true );
        cout << "done." << endl;
        chrono::high_resolution_clock::time_point t2 = chrono::high_resolution_clock::now();
        cout << "\nRemapping done in "
             << chrono::duration_cast<chrono::duration<double> >( t2 - t1 ).count() << " seconds." << endl;

        string testname = get_cube4_name( output ) + ".cubex";
        cout << "++++++++++++ Running Checks on >" << testname << "< ++++++++++++++++" << endl;
        t1 = chrono::high_resolution_clock::now();
        Cube* testCube = new Cube();
        testCube->openCubeReport( testname );
        t2 = chrono::high_resolution_clock::now();
        cout << "\nFile opened and checked in "
             << chrono::duration_cast<chrono::duration<double> >( t2 - t1 ).count() << " seconds." << endl;
    }
    catch ( const RuntimeError& e )
    {
        std::cerr << e.what() << std::endl;
        delete outCube;
        exit( EXIT_FAILURE );
    }
    delete outCube;
    exit( EXIT_SUCCESS );
}
