/****************************************************************************
**  CUBE        http://www.scalasca.org/                                   **
*****************************************************************************
**  Copyright (c) 2025                                                     **
**  Forschungszentrum Juelich GmbH, Juelich Supercomputing Centre          **
**                                                                         **
**  This software may be modified and distributed under the terms of       **
**  a BSD-style license.  See the COPYING file in the package base         **
**  directory for details.                                                 **
****************************************************************************/


#ifndef CUBELIB_LIMITED_CACHE_H
#define CUBELIB_LIMITED_CACHE_H

#include <atomic>
#include <chrono>
#include <condition_variable>
#include <cstdlib> // For getenv
#include <cstring> // For memcpy
#include <limits>
#include <list>
#include <map>
#include <memory> // For std::unique_ptr
#include <mutex>
#include <stdint.h>

#include "CubeCache.h"
#include "CubeCnode.h"
#include "CubeSysres.h"
#include "CubeValue.h"

namespace cube
{
/**
 * Class "LimitedCache" is a class, which stores values up to a certain capacity
 * using a Least Recently Used (LRU) eviction policy.
 * The capacity is determined by the CUBE_CACHE_CAPACITY environment variable.
 */
template <class T>
class LimitedCache : public Cache
{
private:
    typedef std::map<const simple_cache_key_t, bool> dirty_values_map;

    // Use std::unique_ptr for automatic memory management of stored values
    typedef std::map<const simple_cache_key_t, std::unique_ptr<Value> >    cache_map;
    typedef std::map<const simple_cache_key_t, std::unique_ptr<char[]> >   row_cache_map;
    typedef std::map<const simple_cache_key_t, T>                          t_cache_map; // T is usually copyable/movable

    typedef std::list<simple_cache_key_t>                                  lru_list_type;
    typedef std::map<simple_cache_key_t, typename lru_list_type::iterator> lru_map_type;

    // For Values
    cache_map     stn_container;
    cache_map     sum_container;
    lru_list_type lru_list_values;
    lru_map_type  lru_map_values;

    // For T values
    t_cache_map   t_stn_container;
    t_cache_map   t_sum_container;
    lru_list_type lru_list_t_values;
    lru_map_type  lru_map_t_values;

    // For Rows
    row_cache_map row_container;
    lru_list_type lru_list_rows;
    lru_map_type  lru_map_rows;

    dirty_values_map dirty_values;
    dirty_values_map dirty_rows;

    std::mutex              access_mutex;
    std::condition_variable value_ready;
    std::condition_variable row_value_ready;
    std::mutex              value_key_calc_mutex;
    std::mutex              row_key_calc_mutex;
    std::mutex              value_key_mod_mutex;
    std::mutex              row_key_mod_mutex;
    std::int64_t            waiting_for_value;
    std::int64_t            waiting_for_row;

    size_t capacity_values; // Capacity for Value* and T type values
    size_t capacity_rows;   // Capacity for rows

    // New members for memory-based limit
    size_t              max_memory_capacity;  // Total memory limit in bytes
    std::atomic<size_t> current_memory_usage; // Current memory usage in bytes

protected:
    cnode_id_t  number_cnodes;
    sysres_id_t number_loc;
    size_t      loc_size;

    CalculationFlavour myf;
    TypeOfMetric       type_of_metric;

    simple_cache_key_t threshold;

    simple_cache_key_t
    get_key( const Cnode*,
             const CalculationFlavour,
             const Sysres*            sysres = nullptr,
             const CalculationFlavour sf = CUBE_CALCULATE_INCLUSIVE,
             bool                     setting = false );

    simple_cache_key_t
    get_key_for_row( const Cnode*             c,
                     const CalculationFlavour cf,
                     bool                     setting = false )
    {
        // misusing  pointer c as sysres pointer, to signalize that we are looking for key for a row
        return get_key( c, cf, reinterpret_cast<Sysres*>( const_cast<Cnode*>( c ) ), cf, setting );
    }

    void
    empty();

    void
    evict_lru_value();

    void
    evict_lru_t_value();

    void
    evict_lru_row();

public:
    LimitedCache( cnode_id_t         _n_cnode,
                  sysres_id_t        _n_loc,
                  size_t             _loc_size,
                  CalculationFlavour metric_flavor = CUBE_CALCULATE_EXCLUSIVE,
                  TypeOfMetric       _type_of_metric = CUBE_METRIC_EXCLUSIVE )
        : number_cnodes( _n_cnode )
        , number_loc( _n_loc )
        , loc_size( _loc_size )
        , myf( metric_flavor )
        , type_of_metric( _type_of_metric )
        , threshold( static_cast<simple_cache_key_t>( 0.7 * number_cnodes ) )
    {
        dirty_values.clear();
        dirty_rows.clear();
        waiting_for_row      = 0;
        waiting_for_value    = 0;
        current_memory_usage = 0; // Initialize current memory usage

        // Read capacity from environment variable CUBE_CACHE_CAPACITY
        const char* capacity_str = std::getenv( "CUBE_CACHE_CAPACITY" );
        if ( capacity_str != nullptr )
        {
            try
            {
                capacity_values = static_cast<size_t>( std::stoul( capacity_str ) );
                capacity_rows   = capacity_values; // Assuming same capacity for rows for now
            }
            catch ( const std::invalid_argument& )
            {
                capacity_values = 10000; // Default capacity if parsing fails
                capacity_rows   = 1000;
            }
            catch ( const std::out_of_range& )
            {
                capacity_values = 10000; // Default capacity if out of range
                capacity_rows   = 1000;
            }
        }
        else
        {
            capacity_values = 10000; // Default capacity if env variable not set
            capacity_rows   = 1000;
        }
        // Read total memory capacity from environment variable CUBE_CACHE_MAX_MEMORY
        const char* max_mem_str = std::getenv( "CUBE_CACHE_MAX_MEMORY" );
        if ( max_mem_str != nullptr )
        {
            try
            {
                // Use stoull for potentially very large numbers (size_t might be 64-bit)
                max_memory_capacity = static_cast<size_t>( std::stoull( max_mem_str ) );
            }
            catch ( const std::invalid_argument& )
            {
                // Default to effectively no limit if parsing fails
                max_memory_capacity = std::numeric_limits<size_t>::max();
            }
            catch ( const std::out_of_range& )
            {
                // Default to effectively no limit if out of range
                max_memory_capacity = std::numeric_limits<size_t>::max();
            }
        }
        else
        {
            // Default to effectively no limit if env variable not set
            max_memory_capacity = std::numeric_limits<size_t>::max();
        }
    }

    virtual ~LimitedCache()
    {
        empty();
    };

    virtual Value*
    getCachedValue( const Cnode*             cnode,
                    const CalculationFlavour cf,
                    const Sysres*            sysres = nullptr,
                    const CalculationFlavour sf = CUBE_CALCULATE_INCLUSIVE );

    virtual void
    setCachedValue( Value*,
                    const Cnode*             cnode,
                    const CalculationFlavour cf,
                    const Sysres*            sysres = nullptr,
                    const CalculationFlavour sf = CUBE_CALCULATE_INCLUSIVE );

    bool
    testAndGetTCachedValue( T&,
                            const Cnode*             cnode,
                            const CalculationFlavour cf,
                            const Sysres*            sysres = nullptr,
                            const CalculationFlavour sf = CUBE_CALCULATE_INCLUSIVE );

    void
    setTCachedValue( T                        value,
                     const Cnode*             cnode,
                     const CalculationFlavour cf,
                     const Sysres*            sysres = nullptr,
                     const CalculationFlavour sf = CUBE_CALCULATE_INCLUSIVE );

    char*
    getCachedRow( const Cnode*             cnode,
                  const CalculationFlavour cf );

    void
    setCachedRow( char*                    value,
                  const Cnode*             cnode,
                  const CalculationFlavour cf );

    virtual void
    invalidateCachedValue( const Cnode*             cnode,
                           const CalculationFlavour cf,
                           const Sysres*            sysres = nullptr,
                           const CalculationFlavour sf = CUBE_CALCULATE_INCLUSIVE );

    virtual void
    invalidate();
};

template <class T>
void
LimitedCache<T>::evict_lru_value()
{
    if ( lru_list_values.empty() )
    {
        return;
    }
    simple_cache_key_t key_to_evict = lru_list_values.back();
    lru_list_values.pop_back();
    lru_map_values.erase( key_to_evict );

    // Determine which container the key belongs to and remove
    // unique_ptr automatically handles deletion
    if ( stn_container.count( key_to_evict ) )
    {
        current_memory_usage -= stn_container.at( key_to_evict )->getSize(); // Subtract size before erase
        stn_container.erase( key_to_evict );
    }
    else if ( sum_container.count( key_to_evict ) )
    {
        current_memory_usage -= sum_container.at( key_to_evict )->getSize(); // Subtract size before erase
        sum_container.erase( key_to_evict );
    }
    std::lock_guard<std::mutex> lk( value_key_mod_mutex ); // Protect modification of dirty_values map
    dirty_values.erase( key_to_evict );
}

template <class T>
void
LimitedCache<T>::evict_lru_t_value()
{
    if ( lru_list_t_values.empty() )
    {
        return;
    }
    simple_cache_key_t key_to_evict = lru_list_t_values.back();
    lru_list_t_values.pop_back();
    lru_map_t_values.erase( key_to_evict );

    // Determine which container the key belongs to and remove
    if ( t_stn_container.count( key_to_evict ) )
    {
        current_memory_usage -= sizeof( T ); // Subtract size
        t_stn_container.erase( key_to_evict );
    }
    else if ( t_sum_container.count( key_to_evict ) )
    {
        current_memory_usage -= sizeof( T ); // Subtract size
        t_sum_container.erase( key_to_evict );
    }
    std::lock_guard<std::mutex> lk( value_key_mod_mutex ); // Protect modification of dirty_values map
    dirty_values.erase( key_to_evict );
}

template <class T>
void
LimitedCache<T>::evict_lru_row()
{
    if ( lru_list_rows.empty() )
    {
        return;
    }
    simple_cache_key_t key_to_evict = lru_list_rows.back();
    lru_list_rows.pop_back();
    lru_map_rows.erase( key_to_evict );

    // unique_ptr automatically handles deletion
    if ( row_container.count( key_to_evict ) )
    {
        current_memory_usage -= ( number_loc * loc_size ); // Subtract size
        row_container.erase( key_to_evict );
    }
    std::lock_guard<std::mutex> lk( row_key_mod_mutex ); // Protect modification of dirty_values map
    dirty_rows.erase( key_to_evict );
}

template <class T>
bool
LimitedCache<T>::testAndGetTCachedValue( T&                       value,
                                         const Cnode*             cnode,
                                         const CalculationFlavour cf,
                                         const Sysres*            sysres,
                                         const CalculationFlavour sf )
{
    simple_cache_key_t key = get_key( cnode, cf, sysres, sf );
    if ( key < 0 )
    {
        return false;
    }

    std::lock_guard<std::mutex> lock( access_mutex ); // Protect cache access
    if ( sysres )
    {
        typename t_cache_map::const_iterator iter = t_stn_container.find( key );
        if ( iter != t_stn_container.end() )
        {
            value = iter->second;
            // Update LRU: move key to front of list
            lru_list_t_values.erase( lru_map_t_values[ key ] );
            lru_list_t_values.push_front( key );
            lru_map_t_values[ key ] = lru_list_t_values.begin();
            return true;
        }
    }
    else
    {
        typename t_cache_map::const_iterator iter = t_sum_container.find( key );
        if ( iter != t_sum_container.end() )
        {
            value = iter->second;
            // Update LRU: move key to front of list
            lru_list_t_values.erase( lru_map_t_values[ key ] );
            lru_list_t_values.push_front( key );
            lru_map_t_values[ key ] = lru_list_t_values.begin();
            return true;
        }
    }
    return false;
}

template <class T>
void
LimitedCache<T>::setTCachedValue( T                        value,
                                  const Cnode*             cnode,
                                  const CalculationFlavour cf,
                                  const Sysres*            sysres,
                                  const CalculationFlavour sf )
{
    const simple_cache_key_t key = get_key( cnode, cf, sysres, sf, true );
    if ( key < 0 )
    {
        // No deletion of 'value' here, as caller is expected to own it.
        return;
    }
    {
        std::lock_guard<std::mutex> lock( access_mutex ); // Protect cache access

        bool exists = false;
        if ( sysres )
        {
            if ( t_stn_container.count( key ) )
            {
                exists                 = true;
                t_stn_container[ key ] = value; // Update existing value
            }
        }
        else
        {
            if ( t_sum_container.count( key ) )
            {
                exists                 = true;
                t_sum_container[ key ] = value; // Update existing value
            }
        }

        if ( exists )
        {
            // Move to front of LRU list
            lru_list_t_values.erase( lru_map_t_values[ key ] );
            lru_list_t_values.push_front( key );
            lru_map_t_values[ key ] = lru_list_t_values.begin();
        }
        else
        {
            // Evict if capacity is reached
            if ( lru_list_t_values.size() >= capacity_values )
            {
                evict_lru_t_value();
            }

            // Insert new value
            if ( sysres )
            {
                t_stn_container.insert( typename t_cache_map::value_type( key, value ) );
            }
            else
            {
                t_sum_container.insert( typename t_cache_map::value_type( key, value ) );
            }

            // Add to front of LRU list
            lru_list_t_values.push_front( key );
            lru_map_t_values[ key ] = lru_list_t_values.begin();
            // Update memory usage for new item (size of T is fixed)
            current_memory_usage += sizeof( T );
        }

        {
            std::lock_guard<std::mutex> lk( value_key_calc_mutex );
            dirty_values[ key ] = false;
            if ( waiting_for_value >= 0 )
            {
                value_ready.notify_all();
            }
        }
    }
    // Check memory limit after insertion
    if ( current_memory_usage > max_memory_capacity )
    {
        std::cerr << "WARNING: LimitedCache hits memory size limit ("
                  << max_memory_capacity << " bytes). Emptying cache." << std::endl;
        empty();
    }
}

template <class T>
void
LimitedCache<T>::empty()
{
    std::lock_guard<std::mutex> lock( access_mutex ); // Protect cache modification

    // Clear Value* containers (unique_ptr handles deallocation)
    stn_container.clear();
    sum_container.clear();
    lru_list_values.clear();
    lru_map_values.clear();

    // Clear T type containers
    t_stn_container.clear();
    t_sum_container.clear();
    lru_list_t_values.clear();
    lru_map_t_values.clear();

    // Clear row containers (unique_ptr handles deallocation)
    row_container.clear();
    lru_list_rows.clear();
    lru_map_rows.clear();

    // dirty_values.clear();
    // dirty_rows.clear();

    current_memory_usage = 0; // Reset memory usage to zero
}

template <class T>
Value*
LimitedCache<T>::getCachedValue( const Cnode*             cnode,
                                 const CalculationFlavour cf,
                                 const Sysres*            sysres,
                                 const CalculationFlavour sf )
{
    simple_cache_key_t key = get_key( cnode, cf, sysres, sf );
    if ( key < 0 )
    {
        return nullptr;
    }

    std::lock_guard<std::mutex> lock( access_mutex ); // Protect cache access
    Value*                      _value_raw = nullptr; // Raw pointer to return a copy

    if ( sysres )
    {
        typename cache_map::const_iterator iter = stn_container.find( key );
        if ( iter != stn_container.end() )
        {
            _value_raw = iter->second->copy(); // Return a copy of the Value object
            // Update LRU: move key to front of list
            lru_list_values.erase( lru_map_values[ key ] );
            lru_list_values.push_front( key );
            lru_map_values[ key ] = lru_list_values.begin();
        }
    }
    else
    {
        typename cache_map::const_iterator iter = sum_container.find( key );
        if ( iter != sum_container.end() )
        {
            _value_raw = iter->second->copy(); // Return a copy of the Value object
            // Update LRU: move key to front of list
            lru_list_values.erase( lru_map_values[ key ] );
            lru_list_values.push_front( key );
            lru_map_values[ key ] = lru_list_values.begin();
        }
    }
    return _value_raw;
}

template <class T>
void
LimitedCache<T>::setCachedValue( Value*                   value,
                                 const Cnode*             cnode,
                                 const CalculationFlavour cf,
                                 const Sysres*            sysres,
                                 const CalculationFlavour sf )
{
    simple_cache_key_t key = get_key( cnode, cf, sysres, sf, true );
    if ( key < 0 )
    {
        // No deletion of 'value' here. Caller is responsible for 'value's lifetime.
        return;
    }
    {
        std::lock_guard<std::mutex> lock( access_mutex ); // Protect cache access

        bool exists = false;
        if ( sysres )
        {
            typename cache_map::iterator iter = stn_container.find( key );
            if ( iter != stn_container.end() )
            {
                exists                = true;
                current_memory_usage -= iter->second->getSize(); // Subtract old value's size
                iter->second.reset( value->copy() );             // Replace existing unique_ptr with new copy
                current_memory_usage += iter->second->getSize(); // Add new value's size
            }
        }
        else
        {
            typename cache_map::iterator iter = sum_container.find( key );
            if ( iter != sum_container.end() )
            {
                exists                = true;
                current_memory_usage -= iter->second->getSize(); // Subtract old value's size
                iter->second.reset( value->copy() );             // Replace existing unique_ptr with new copy
                current_memory_usage += iter->second->getSize(); // Add new value's size
            }
        }

        if ( exists )
        {
            // Move to front of LRU list
            lru_list_values.erase( lru_map_values[ key ] );
            lru_list_values.push_front( key );
            lru_map_values[ key ] = lru_list_values.begin();
        }
        else
        {
            // Evict if capacity is reached
            if ( lru_list_values.size() >= capacity_values )
            {
                evict_lru_value();
            }

            // Insert new value (create unique_ptr from a copy)
            std::unique_ptr<Value> new_val_ptr( value->copy() );
            current_memory_usage += new_val_ptr->getSize(); // Add new value's size
            if ( sysres )
            {
                stn_container.insert( typename cache_map::value_type( key, std::move( new_val_ptr ) ) );
            }
            else
            {
                sum_container.insert( typename cache_map::value_type( key, std::move( new_val_ptr ) ) );
            }

            // Add to front of LRU list
            lru_list_values.push_front( key );
            lru_map_values[ key ] = lru_list_values.begin();
        }
        // No deletion of 'value'. Caller is responsible for 'value's lifetime.

        {
            std::lock_guard<std::mutex> lk( value_key_calc_mutex );
            dirty_values[ key ] = false;
            if ( waiting_for_value >= 0 )
            {
                value_ready.notify_all();
            }
        }
    }
    // Check memory limit after insertion/update
    if ( current_memory_usage > max_memory_capacity )
    {
        std::cerr << "WARNING: LimitedCache hits memory size limit ("
                  << max_memory_capacity << " bytes). Emptying cache." << std::endl;
        empty();
    }
}

template <class T>
simple_cache_key_t
LimitedCache<T>::get_key( const Cnode*             cnode,
                          const CalculationFlavour cf,
                          const Sysres*            sysres,
                          const CalculationFlavour sf,
                          bool                     setting )
{
#ifndef HAVE_INTERNAL_CACHE
    ( void )cnode;
    ( void )cf;
    ( void )sysres;
    ( void )sf;
    ( void )setting;
    return static_cast<simple_cache_key_t>( -1 );
#else
    simple_cache_key_t key;
    // either it is an aggregated value or a row. Both cases -> cache
    if ( sysres == nullptr
         || ( (
                  type_of_metric != CUBE_METRIC_SIMPLE )
              && ( ( void* )cnode == ( void* )sysres ) // This implies it's a row key
              && ( cf != myf ) ) )
    {
        key = ( 2 * cnode->get_id() + cf );
    }
    else
    {
        if ( cf == myf )
        {
            return static_cast<simple_cache_key_t>( -1 );
        }
        if ( !sysres->isSystemTreeNode() )
        {
            return static_cast<simple_cache_key_t>( -1 );
        }

        simple_cache_key_t Nc;
        if ( myf == CUBE_CALCULATE_EXCLUSIVE )
        {
            Nc = static_cast<simple_cache_key_t>( cnode->total_num_children() );
        }
        else   // myf == CUBE_CALCULATE_INCLUSIVE
        {
            Nc = static_cast<simple_cache_key_t>( cnode->num_children() );
        }
        if ( Nc <= threshold )
        {
            return static_cast<simple_cache_key_t>( -1 );
        }
        key = ( 2 * number_loc * ( 2 * cnode->get_id() + cf ) + 2 * sysres->get_id() + sf );
    }
    // Check if it's truly a row key (by misusing sysres pointer)
    if ( ( void* )cnode != ( void* )sysres ) // not a row call (for the get_key_for_row context)
    {
        if ( !setting )
        {
            std::unique_lock<std::mutex>     lock_( value_key_calc_mutex );
            dirty_values_map::const_iterator iter = dirty_values.find( key );

            if ( iter == dirty_values.end() || iter->second ) // If not found or currently dirty (being calculated)
            {
                dirty_values[ key ] = true;                   // Mark as dirty (being calculated by THIS thread)
                return key;
            }
            // Else, value is clean, means it's ready. Wait if it's still dirty.
            waiting_for_value++;
            value_ready.wait( lock_, [ & ]() {
                    // Predicate: wait until dirty_values[key] becomes false (i.e., value is calculated)
                    dirty_values_map::const_iterator current_iter = dirty_values.find( key );
                    return current_iter == dirty_values.end() || !current_iter->second;
                } );
            waiting_for_value--;
        }
    }
    else   // row call (sysres was cast from cnode for get_key_for_row)
    {
        if ( !setting )
        {
            std::unique_lock<std::mutex>     lock_( row_key_calc_mutex );
            dirty_values_map::const_iterator iter = dirty_rows.find( key );
            if ( iter == dirty_rows.end() || iter->second ) // If not found or currently dirty
            {
                dirty_rows[ key ] = true;                   // Mark as dirty
                return key;
            }
            // Else, value is clean, means it's ready. Wait if it's still dirty.
            waiting_for_row++;
            row_value_ready.wait( lock_, [ & ]() {
                    dirty_values_map::const_iterator current_iter = dirty_rows.find( key );
                    return current_iter == dirty_rows.end() || !current_iter->second;
                } );
            waiting_for_row--;
        }
    }
    return key;
#endif
}

template <class T>
void
LimitedCache<T>::invalidateCachedValue( const Cnode*             cnode,
                                        const CalculationFlavour cf,
                                        const Sysres*            sysres,
                                        const CalculationFlavour sf )
{
    simple_cache_key_t key = get_key( cnode, cf, sysres, sf, true );

    std::lock_guard<std::mutex> lock( access_mutex ); // Protect cache modification

    // Invalidate from Value* containers (unique_ptr handles deallocation)
    typename cache_map::iterator stn_iter = stn_container.find( key );
    if ( stn_iter != stn_container.end() )
    {
        current_memory_usage -= stn_iter->second->getSize(); // Subtract size
        stn_container.erase( stn_iter );
        typename lru_map_type::iterator lru_it = lru_map_values.find( key );
        if ( lru_it != lru_map_values.end() ) // ensure key exists in lru_map
        {
            lru_list_values.erase( lru_it->second );
            lru_map_values.erase( lru_it );
        }
    }
    typename cache_map::iterator sum_iter = sum_container.find( key );
    if ( sum_iter != sum_container.end() )
    {
        current_memory_usage -= sum_iter->second->getSize(); // Subtract size
        sum_container.erase( sum_iter );
        typename lru_map_type::iterator lru_it = lru_map_values.find( key );
        if ( lru_it != lru_map_values.end() )
        {
            lru_list_values.erase( lru_it->second );
            lru_map_values.erase( lru_it );
        }
    }

    // Invalidate from T type containers
    typename t_cache_map::iterator t_stn_iter = t_stn_container.find( key );
    if ( t_stn_iter != t_stn_container.end() )
    {
        current_memory_usage -= sizeof( T ); // Subtract size
        t_stn_container.erase( t_stn_iter );
        typename lru_map_type::iterator lru_it = lru_map_t_values.find( key );
        if ( lru_it != lru_map_t_values.end() )
        {
            lru_list_t_values.erase( lru_it->second );
            lru_map_t_values.erase( lru_it );
        }
    }
    typename t_cache_map::iterator t_sum_iter = t_sum_container.find( key );
    if ( t_sum_iter != t_sum_container.end() )
    {
        current_memory_usage -= sizeof( T ); // Subtract size
        t_sum_container.erase( t_sum_iter );
        typename lru_map_type::iterator lru_it = lru_map_t_values.find( key );
        if ( lru_it != lru_map_t_values.end() )
        {
            lru_list_t_values.erase( lru_it->second );
            lru_map_t_values.erase( lru_it );
        }
    }

    // Invalidate from row containers (unique_ptr handles deallocation)
    typename row_cache_map::iterator r_iter = row_container.find( key );
    if ( r_iter != row_container.end() )
    {
        current_memory_usage -= ( number_loc * loc_size ); // Subtract size
        row_container.erase( r_iter );
        typename lru_map_type::iterator lru_it = lru_map_rows.find( key );
        if ( lru_it != lru_map_rows.end() )
        {
            lru_list_rows.erase( lru_it->second );
            lru_map_rows.erase( lru_it );
        }
    }

    std::lock_guard<std::mutex> lk_value_mod( value_key_mod_mutex );
    dirty_values.erase( key );
    std::lock_guard<std::mutex> lk_row_mod( row_key_mod_mutex );
    dirty_rows.erase( key );
}

template <class T>
char*
LimitedCache<T>::getCachedRow( const Cnode*             cnode,
                               const CalculationFlavour cf )
{
    simple_cache_key_t key = get_key_for_row( cnode, cf );
    if ( key < 0 )
    {
        return nullptr;
    }

    std::lock_guard<std::mutex>            lock( access_mutex ); // Protect cache access
    char*                                  _value_raw = nullptr;
    typename row_cache_map::const_iterator iter       = row_container.find( key );
    if ( iter != row_container.end() )
    {
        _value_raw = new char[ number_loc * loc_size ];
        memcpy( _value_raw, iter->second.get(), number_loc * loc_size ); // .get() to access raw pointer
        // Update LRU: move key to front of list
        lru_list_rows.erase( lru_map_rows[ key ] );
        lru_list_rows.push_front( key );
        lru_map_rows[ key ] = lru_list_rows.begin();
    }
    return _value_raw;
}

template <class T>
void
LimitedCache<T>::setCachedRow( char*                    value,
                               const Cnode*             cnode,
                               const CalculationFlavour cf )
{
    simple_cache_key_t key = get_key_for_row( cnode, cf, true );
    if ( key < 0 )
    {
        // No deletion of 'value' here. Caller is responsible for 'value's lifetime.
        return;
    }
    {
        std::lock_guard<std::mutex> lock( access_mutex ); // Protect cache access

        bool                             exists = false;
        typename row_cache_map::iterator iter   = row_container.find( key );
        if ( iter != row_container.end() )
        {
            exists = true;
            // Replace existing unique_ptr with new array (size of row is fixed)
            std::unique_ptr<char[]> new_row( new char[ number_loc * loc_size ] );
            memcpy( new_row.get(), value, number_loc * loc_size ); // Use 'value' directly
            iter->second.swap( new_row );                          // Swap unique_ptrs
            // No change to current_memory_usage for updates as row size is fixed
        }

        if ( exists )
        {
            // Move to front of LRU list
            lru_list_rows.erase( lru_map_rows[ key ] );
            lru_list_rows.push_front( key );
            lru_map_rows[ key ] = lru_list_rows.begin();
        }
        else
        {
            // Evict if capacity is reached
            if ( lru_list_rows.size() >= capacity_rows )
            {
                evict_lru_row();
            }

            // Insert new value (create unique_ptr from a copy)
            std::unique_ptr<char[]> new_row( new char[ number_loc * loc_size ] );
            memcpy( new_row.get(), value, number_loc * loc_size ); // Use 'value' directly
            row_container.insert( typename row_cache_map::value_type( key, std::move( new_row ) ) );

            // Add to front of LRU list
            lru_list_rows.push_front( key );
            lru_map_rows[ key ] = lru_list_rows.begin();

            // Update memory usage for new item
            current_memory_usage += ( number_loc * loc_size );
        }
        // No deletion of 'value'. Caller is responsible for 'value's lifetime.

        {
            std::lock_guard<std::mutex> lk( row_key_calc_mutex );
            dirty_rows[ key ] = false;
            if ( waiting_for_row >= 0 )
            {
                row_value_ready.notify_all();
            }
        }
    }
    // Check memory limit after insertion
    if ( current_memory_usage > max_memory_capacity )
    {
        std::cerr << "WARNING: LimitedCache hits memory size limit ("
                  << max_memory_capacity << " bytes). Emptying cache." << std::endl;
        empty();
    }
}

template <class T>
void
LimitedCache<T>::invalidate()
{
    empty();
}
}

#endif
