/****************************************************************************
**  CUBE        http://www.scalasca.org/                                   **
*****************************************************************************
**  Copyright (c) 1998-2022                                                **
**  Forschungszentrum Juelich GmbH, Juelich Supercomputing Centre          **
**                                                                         **
**  This software may be modified and distributed under the terms of       **
**  a BSD-style license.  See the COPYING file in the package base         **
**  directory for details.                                                 **
****************************************************************************/



#include "config.h"

#include "StatisticalInformation.h"
#include "Globals.h"
#include "Environment.h"
#include "CubeMetric.h"
#include "CubeCnode.h"
#include "CubeServices.h"

#include <cmath>
#include <iomanip>
#include <sstream>
#include <cassert>
#include <cstdlib>
#include <stdexcept>
#include <limits>
#include <iostream>
#include <algorithm>
#include <iterator>
#include <algorithm>
#include <QDebug>

using namespace std;
using namespace cubegui;

namespace
{
void
ReadWhitespaces( istream& theFile )
{
    char check = theFile.peek();
    while ( check == ' ' || check == '\n' )
    {
        theFile.ignore();
        check = theFile.peek();
    }
}
}

StatisticalInformation::StatisticalInformation( ifstream& theFile )
{
    mean = median = minimum = maximum = sum = variance = q1 = q3 = 0;
    theFile >> count;
    ReadWhitespaces( theFile );
    if ( theFile.peek() != '-' )
    {
        theFile >> mean;
        theFile >> median;
        theFile >> minimum;
        theFile >> maximum;
        theFile >> sum;
        if ( count >= 2 )
        {
            theFile >> variance;
        }
        if ( count >= 5 )
        {
            theFile >> q1;
            theFile >> q3;
        }
        statInformation = true;
    }
    else
    {
        statInformation = false;
    }

    checkValidity();
}

StatisticalInformation::StatisticalInformation()
{
    count           = 0;
    sum             = 0;
    mean            = 0;
    minimum         = 0;
    q1              = 0;
    median          = 0;
    q3              = 0;
    maximum         = 0;
    variance        = 0;
    name            = "";
    uom             = "";
    statInformation = false;
    valid_values    = false;
    data_status_description.clear();
}


StatisticalInformation::StatisticalInformation( int         theCount,
                                                double      theSum,
                                                double      theMean,
                                                double      min,
                                                double      q25,
                                                double      theMedian,
                                                double      q75,
                                                double      max,
                                                double      theVariance,
                                                std::string theName,
                                                std::string theUom )
    : count( theCount ), sum( theSum ), mean( theMean ), minimum( min ), q1( q25 ),
    median( theMedian ), q3( q75 ), maximum( max ), variance( theVariance ), data_status_description(),
    name( theName ), uom( theUom )
{
    checkValidity();
    statInformation = true;
}


StatisticalInformation::StatisticalInformation( const vector<double>& dataVector )

{
    count    = 0;
    sum      = 0;
    mean     = 0;
    minimum  = 0;
    q1       = 0;
    median   = 0;
    q3       = 0;
    maximum  = 0;
    variance = 0;
    name     = "";
    uom      = "";

    this->data = dataVector;

    if ( data.size() == 0 )
    {
        return;
    }

    for ( unsigned i = 0; i < data.size(); i++ )
    {
        sum += data.at( i );
    }
    mean     = sum / data.size();
    count    = data.size();
    variance = 0.;
    for ( unsigned i = 0; i < data.size(); i++ )
    {
        variance += ( data.at( i ) - mean ) * ( data.at( i ) - mean );
    }
    variance /= ( data.size() );

    sort( data.begin(), data.end() );
    minimum = *data.begin();
    maximum = *( data.end() - 1 );
    median  = *( data.begin() + data.size() / 2 );
    q1      = *( data.begin() + data.size() / 4 );
    q3      = *( data.begin() + data.size() * 3 / 4 );

    valid_values    = true;
    statInformation = true;
}

int
StatisticalInformation::countRange( double min, double max ) const
{
    if ( ( min >= this->maximum ) || ( max <= this->minimum ) )
    {
        return 0; // invalid range
    }

    // find first value at least as big as min
    auto first = std::find_if( data.cbegin(), data.cend(), [ min ]( double val ){
        return val >= min;
    } );

    if ( *first >= max )
    {
        return 0; // empty range
    }

    // find first value bigger than max
    auto last = std::find_if( first + 1, data.cend(), [ max ]( double val ){
        return val > max;
    } );

    return last - first;
}

/**
 * Performs a sequence of checks, whether statistical data looks meaningfull or not.
 */
void
StatisticalInformation::checkValidity()
{
    data_status_description.clear();
    valid_values = true;
    if ( minimum > maximum )
    {
        data_status_description.append( QObject::tr( "Min value is larger than Max." ) );
        valid_values = false;                   // the world is up side down
    }
    if ( mean > maximum && mean < minimum )
    {
        data_status_description.append( QObject::tr( "Mean value is out of range [minimum, maximum]." ) );
        valid_values = false;
    } // mean is out of range
    if ( median > maximum && median < minimum )
    {
        data_status_description.append( QObject::tr( "Median value is out of range [minimum, maximum]." ) );
        valid_values = false;
    } // median is out of range
    if ( maximum * count  < sum )
    {
        data_status_description.append( QObject::tr( "Sum is larger than \"count * maximum\"." ) );
        valid_values = false;
    } // sum is bigger that count times maximum
    if ( ( minimum * count )  > sum )
    {
        data_status_description.append( QObject::tr( "Sum is smaller than \"count * minimum\"." ) );
        valid_values = false;
    } // sum is lower that count times minimum
    if ( count >= 2 )
    {
        if ( variance < 0 )
        {
            data_status_description.append( QObject::tr( "Variance is negative." ) );
            valid_values = false;
        } // varianvve cannot be negativ

        if ( count >= 5 )
        {
            if ( q1 > q3 )
            {
                data_status_description.append( QObject::tr( "25% quantile is larger than 75% quantile." ) );
                valid_values = false;
            } // quantile 25% is bigger than 75%
            if ( median > q3 )
            {
                data_status_description.append( QObject::tr( "Medium (50% quantile) is larger than 75% quantile." ) );
                valid_values = false;
            } // definition of medium : 50% and it cannot be bigger than 75%
            if ( median < q1 )
            {
                data_status_description.append( QObject::tr( "Medium (50% quantile) is smaller than 25% quantile." ) );
                valid_values = false;
            } // definition of medium : 50% and it cannot be smaller  than 25%
        }
    }
}

const std::string&
StatisticalInformation::getUom() const
{
    return uom;
}

int
StatisticalInformation::getCount() const
{
    return count;
}

double
StatisticalInformation::getVariance() const
{
    return variance;
}

QStringList
StatisticalInformation::getDataStatusDescription() const
{
    return data_status_description;
}

const std::string&
StatisticalInformation::getName() const
{
    return name;
}

bool
StatisticalInformation::isValid() const
{
    return valid_values;
}

double
StatisticalInformation::getMaximum() const
{
    return maximum;
}

double
StatisticalInformation::getQ3() const
{
    return q3;
}

double
StatisticalInformation::getMedian() const
{
    return median;
}

double
StatisticalInformation::getQ1() const
{
    return q1;
}

double
StatisticalInformation::getMinimum() const
{
    return minimum;
}

double
StatisticalInformation::getMean() const
{
    return mean;
}

double
StatisticalInformation::getSum() const
{
    return sum;
}

StatisticPair::StatisticPair( const StatisticalInformation& current, const StatisticalInformation& absolute ) : absolute( absolute ),
    current( current )
{
}

StatisticPair::StatisticPair( const StatisticalInformation& absolute ) : absolute( absolute ),
    current( absolute )
{
    _isAbsolute = true;
}

static QString
getTableRow( QStringList list )
{
    QString line  = "<tr>";
    bool    first = true;
    for ( auto elem : list )
    {
        if ( !elem.isEmpty() )
        {
            QString align = first ? "left" : "right";
            line += "<td align=" + align + ">" + elem + "</td>";
            first = false;
        }
    }
    line += "</tr>";
    return line;
}

/** returns a html table row with a label, an absolute and a relative value */
QString
StatisticPair::getInfoLine( const QString& label, double currentValue, double absoluteValue ) const
{
    QString currentStr = "";
    if ( isAbsolute() && fabs( current.getMaximum() ) > 10e-6 ) // calculate percentage, if currentValue is absolute and not near zero
    {
        absoluteValue = currentValue;
        currentValue  = lround( absoluteValue * 100 / current.getMaximum() );
        currentStr    = Globals::formatNumber( currentValue, true ) + "%";
    }
    QString unit = current.getUom().c_str();
    return getTableRow( QStringList()
                        << label + ":"
                        << Globals::formatNumber( absoluteValue, false, FORMAT_DOUBLE ) + unit
                        << currentStr );
}

QString
StatisticPair::toHtml( const QString& patternName ) const
{
    QString table_css = "table, th, td { padding: 0px 10px 0px 0px }";
    QString output    = "<html><style>" + table_css + "</style><body>";
    if ( patternName.length() > 0 )
    {
        output += QObject::tr( "Pattern" ) + ":" + patternName + "<p>";
    }
    if ( current.isValid() )
    {
        output += "<table>";
        bool hasQuartiles = current.getCount() >= 5;
        output += getTableRow( QStringList() << QObject::tr( "Sum:" ) << Globals::formatNumber( current.getSum() ) );
        output += getTableRow( QStringList() << QObject::tr( "Count:" ) << Globals::formatNumber( current.getCount() ) );
        output += getInfoLine( QObject::tr( "Mean" ), current.getMean(), absolute.getMean() );
        output += getInfoLine( QObject::tr( "Standard deviation" ), sqrt( current.getVariance() ), sqrt( absolute.getVariance() ) );
        output += getInfoLine( QObject::tr( "Maximum" ), current.getMaximum(), absolute.getMaximum() );
        if ( hasQuartiles )
        {
            output += getInfoLine( QObject::tr( "Upper quartile (Q3)" ), current.getQ3(), absolute.getQ3() );
        }
        output += getInfoLine( QObject::tr( "Median" ), current.getMedian(), absolute.getMedian() );
        if ( hasQuartiles )
        {
            output += getInfoLine( QObject::tr( "Lower quartile (Q1)" ), current.getQ1(), absolute.getQ1() );
        }
        output += getInfoLine( QObject::tr( "Minimum" ), current.getMinimum(), absolute.getMinimum() );
        output += "</table>";
    }
    return output;
}

const StatisticalInformation&
StatisticPair::getAbsolute() const
{
    return absolute;
}

const StatisticalInformation&
StatisticPair::getCurrent() const
{
    return current;
}
