//! Copyright (c) 2022-2026 GitDataAi All rights reserved.

//! In-memory response cache layer for GitKS.
//!
//! Two-tier architecture:
//! 1. **Moka in-memory cache** (this module) — sub-microsecond lookups for hot data
//! 2. **Disk cache** (disk_cache.rs) — persistent cache for pack-objects / info-refs
//!
//! # Cache Key Format
//!
//! Keys are structured to enable efficient repo-scoped invalidation:
//!
//! ```text
//! [namespace_len: u8][namespace: &[u8]][repo_path_len: u16 LE][repo_path: &[u8]][request_proto: &[u8]]
//! ```
//!
//! This allows `invalidate_repo` to extract and match the repo_path without
//! protobuf decoding or substring scanning.
//!
//! # Eviction Policy
//!
//! - **Weight-based**: total memory capped at 256 MB (weighed by key+value capacity)
//! - **TTI** (time-to-idle): 2 minutes — frequently accessed entries stay hot
//! - **TTL** (time-to-live): 10 minutes — hard upper bound for safety
//! - Evictions are tracked via metrics for observability

use dashmap::DashMap;
use std::sync::{Arc, OnceLock};

use moka::sync::Cache;
use prost::Message;

use crate::config::{
    CACHE_ENTRY_OVERHEAD as ENTRY_OVERHEAD, CACHE_MAX_TTL, CACHE_MAX_WEIGHT, CACHE_TTI,
};

struct CacheState {
    store: Cache<Vec<u8>, Vec<u8>>,
}

static CACHE: OnceLock<CacheState> = OnceLock::new();

fn state() -> &'static CacheState {
    CACHE.get_or_init(|| {
        let store = Cache::builder()
            .weigher(|key: &Vec<u8>, value: &Vec<u8>| -> u32 {
                key.capacity() as u32 + value.capacity() as u32 + ENTRY_OVERHEAD
            })
            .max_capacity(CACHE_MAX_WEIGHT)
            .time_to_live(CACHE_MAX_TTL)
            .time_to_idle(CACHE_TTI)
            .eviction_listener(|key: std::sync::Arc<Vec<u8>>, _value: Vec<u8>, cause| {
                let cause_str = match cause {
                    moka::notification::RemovalCause::Expired => "expired",
                    moka::notification::RemovalCause::Explicit => "explicit",
                    moka::notification::RemovalCause::Replaced => "replaced",
                    moka::notification::RemovalCause::Size => "size",
                };
                let namespace = decode_namespace(&key);
                crate::metrics::record_cache_eviction(namespace, cause_str);
            })
            .build();

        tracing::info!(
            max_weight_mb = CACHE_MAX_WEIGHT / (1024 * 1024),
            ttl_secs = CACHE_MAX_TTL.as_secs(),
            tti_secs = CACHE_TTI.as_secs(),
            "Moka in-memory cache initialized"
        );

        CacheState { store }
    })
}

#[inline]
fn cache() -> &'static Cache<Vec<u8>, Vec<u8>> {
    &state().store
}

struct RepoKeyIndex {
    repo_to_keys: DashMap<String, Vec<Arc<Vec<u8>>>>,
}

static REPO_KEY_INDEX: OnceLock<RepoKeyIndex> = OnceLock::new();

fn repo_key_index() -> &'static RepoKeyIndex {
    REPO_KEY_INDEX.get_or_init(|| RepoKeyIndex {
        repo_to_keys: DashMap::new(),
    })
}

fn track_cache_key(repo_path: &str, key: Arc<Vec<u8>>) {
    repo_key_index()
        .repo_to_keys
        .entry(repo_path.to_string())
        .or_default()
        .push(key);
}

/// Encode a structured cache key.
///
/// Format: `namespace_len(u8) + namespace + repo_path_len(u16 LE) + repo_path + request_proto`
///
fn encode_key(namespace: &str, repo_path: &str, request_bytes: &[u8]) -> Option<Vec<u8>> {
    let ns = namespace.as_bytes();
    let rp = repo_path.as_bytes();
    if ns.len() > u8::MAX as usize || rp.len() > u16::MAX as usize {
        tracing::warn!(
            namespace_len = ns.len(),
            repo_path_len = rp.len(),
            "cache key too long, bypassing cache"
        );
        return None;
    }

    const SEPARATOR: u8 = 0xFF;
    let total = 1 + ns.len() + 1 + 2 + rp.len() + 1 + request_bytes.len();
    let mut key = Vec::with_capacity(total);
    key.push(ns.len() as u8);
    key.extend_from_slice(ns);
    key.push(SEPARATOR);
    key.extend_from_slice(&(rp.len() as u16).to_le_bytes());
    key.extend_from_slice(rp);
    key.push(SEPARATOR);
    key.extend_from_slice(request_bytes);
    Some(key)
}

/// Extract the namespace string from a cache key.
fn decode_namespace(key: &[u8]) -> &str {
    if key.is_empty() {
        return "unknown";
    }
    let ns_len = key[0] as usize;
    let end = (1 + ns_len).min(key.len());
    std::str::from_utf8(&key[1..end]).unwrap_or("unknown")
}

/// Cache a single protobuf response.
///
/// On cache hit, decodes and returns the cached response.
/// On cache miss, calls `build`, caches the result, and returns it.
///
/// `repo_path` should be the repository's relative path (used for scoped invalidation).
pub(crate) fn cached_response<Req, Res, E, F>(
    namespace: &'static str,
    repo_path: &str,
    request: &Req,
    build: F,
) -> Result<Res, E>
where
    Req: Message,
    Res: Message + Default,
    F: FnOnce() -> Result<Res, E>,
{
    let req_bytes = encode_request(request);
    let Some(key) = encode_key(namespace, repo_path, &req_bytes) else {
        return build();
    };

    if let Some(bytes) = cache().get(&key)
        && let Ok(response) = Res::decode(bytes.as_slice())
    {
        let elapsed = std::time::Duration::ZERO;
        crate::metrics::record_cache_op("moka", "hit", elapsed);
        crate::metrics::record_cache_hit_ns(namespace);
        crate::metrics::record_cache_value_size(namespace, bytes.len());
        tracing::debug!(
            namespace = %namespace,
            repo = %repo_path,
            key_len = key.len(),
            value_len = bytes.len(),
            "cache hit"
        );
        return Ok(response);
    }

    crate::metrics::record_cache_miss_ns(namespace);

    tracing::debug!(
        namespace = %namespace,
        repo = %repo_path,
        key_len = key.len(),
        "cache miss, building response"
    );

    let start = std::time::Instant::now();
    let response = build()?;
    let build_elapsed = start.elapsed();

    let mut bytes = Vec::with_capacity(response.encoded_len());
    if let Err(err) = response.encode(&mut bytes) {
        tracing::warn!(
            namespace = %namespace,
            repo = %repo_path,
            error = %err,
            "failed to encode cache response"
        );
    } else {
        crate::metrics::record_cache_value_size(namespace, bytes.len());
        let key_arc = Arc::new(key.clone());
        cache().insert(key, bytes);
        track_cache_key(repo_path, key_arc);
    }

    crate::metrics::record_cache_op("moka", "miss", build_elapsed);
    Ok(response)
}

/// Cache a `Vec<Item>` protobuf response using length-delimited encoding.
///
/// Each item is stored sequentially with length-delimited framing, allowing
/// partial decode resilience: if any single item fails to decode, the entire
/// entry is discarded and rebuilt.
pub(crate) fn cached_vec_response<Req, Item, E, F>(
    namespace: &'static str,
    repo_path: &str,
    request: &Req,
    build: F,
) -> Result<Vec<Item>, E>
where
    Req: Message,
    Item: Message + Default,
    F: FnOnce() -> Result<Vec<Item>, E>,
{
    let req_bytes = encode_request(request);
    let Some(key) = encode_key(namespace, repo_path, &req_bytes) else {
        return build();
    };

    if let Some(bytes) = cache().get(&key) {
        if bytes.len() < 4 {
            cache().invalidate(&key);
        } else {
            let stored_crc = u32::from_le_bytes([bytes[0], bytes[1], bytes[2], bytes[3]]);
            let actual_crc = crc32fast::hash(&bytes[4..]);
            if stored_crc != actual_crc {
                tracing::warn!(
                    namespace = %namespace,
                    repo = %repo_path,
                    "cache entry corrupted (CRC mismatch), invalidating"
                );
                cache().invalidate(&key);
            } else {
                let mut items = Vec::new();
                let mut remaining = &bytes[4..];
                let mut valid = true;

                if let Ok(first) = Item::decode_length_delimited(&mut remaining) {
                    items.push(first);
                    while !remaining.is_empty() {
                        match Item::decode_length_delimited(&mut remaining) {
                            Ok(item) => items.push(item),
                            Err(_) => {
                                valid = false;
                                break;
                            }
                        }
                    }
                } else if !remaining.is_empty() {
                    valid = false;
                }

                if valid {
                    crate::metrics::record_cache_op("moka", "hit", std::time::Duration::ZERO);
                    crate::metrics::record_cache_hit_ns(namespace);
                    crate::metrics::record_cache_value_size(namespace, bytes.len());
                    tracing::debug!(
                        namespace = %namespace,
                        repo = %repo_path,
                        item_count = items.len(),
                        "vec cache hit"
                    );
                    return Ok(items);
                }

                tracing::warn!(
                    namespace = %namespace,
                    repo = %repo_path,
                    "vec cache decode failed, rebuilding"
                );
                cache().invalidate(&key);
            }
        }
    }

    crate::metrics::record_cache_miss_ns(namespace);
    tracing::debug!(
        namespace = %namespace,
        repo = %repo_path,
        "vec cache miss, building response"
    );

    let start = std::time::Instant::now();
    let response = build()?;
    let build_elapsed = start.elapsed();

    let total_est: usize = response.iter().map(|item| item.encoded_len() + 10).sum();
    let mut data = Vec::with_capacity(total_est);
    let mut encode_ok = true;
    for item in &response {
        if let Err(err) = item.encode_length_delimited(&mut data) {
            tracing::warn!(
                namespace = %namespace,
                repo = %repo_path,
                error = %err,
                "failed to encode vec cache item"
            );
            encode_ok = false;
            break;
        }
    }

    if encode_ok {
        let crc = crc32fast::hash(&data);
        let mut bytes = Vec::with_capacity(4 + data.len());
        bytes.extend_from_slice(&crc.to_le_bytes());
        bytes.extend_from_slice(&data);
        crate::metrics::record_cache_value_size(namespace, bytes.len());
        let key_arc = Arc::new(key.clone());
        cache().insert(key, bytes);
        track_cache_key(repo_path, key_arc);
    }
    crate::metrics::record_cache_op("moka", "miss", build_elapsed);
    Ok(response)
}

/// Encode a protobuf request into a byte vector.
#[inline]
fn encode_request<Req: Message>(request: &Req) -> Vec<u8> {
    let mut buf = Vec::with_capacity(request.encoded_len());
    if let Err(err) = request.encode(&mut buf) {
        tracing::warn!(error = %err, "failed to encode cache request");
    }
    buf
}

/// Invalidate all cache entries for a specific repository.
///
/// Uses the structured key format to extract and match repository paths
/// without protobuf decoding or substring scanning. O(n) where n is the
/// number of cached entries, with O(1) per-key comparison.
///
/// Called by `notify_ref_update` after any mutator RPC (create commit,
/// create branch, etc.) to prevent serving stale data.
pub(crate) fn invalidate_repo(relative_path: &str) {
    let c = cache();
    let idx = repo_key_index();

    if let Some((_key, keys)) = idx.repo_to_keys.remove(relative_path) {
        let removed = keys.len();
        for key in &keys {
            c.invalidate(key.as_ref());
        }

        if removed > 0 {
            tracing::debug!(
                relative_path = %relative_path,
                entries_removed = removed,
                "cache invalidated for repository (indexed)"
            );
        }
    }
}

use crate::pb::{ObjectSelector, object_selector};

/// Returns true if the selector is an OID-based reference.
/// OID-based selectors are cacheable because they are immutable.
pub(crate) fn selector_is_oid(selector: &Option<ObjectSelector>) -> bool {
    matches!(
        selector.as_ref().and_then(|s| s.selector.as_ref()),
        Some(object_selector::Selector::Oid(_))
    )
}

/// Returns true if both selectors are OID-based.
pub(crate) fn selectors_are_oid(
    left: &Option<ObjectSelector>,
    right: &Option<ObjectSelector>,
) -> bool {
    selector_is_oid(left) && selector_is_oid(right)
}