refactor(bare): enhance security and performance optimizations
- Remove unnecessary sorting in advertise_refs for deterministic output - Add path traversal detection and validation in bare_dir construction - Implement symlink resolution checks to prevent security vulnerabilities - Refactor cache system with CRC validation and improved metrics - Integrate repo-specific cache invalidation using indexed keys - Add comprehensive unit tests for commit operations and diff functionality - Move configuration constants to centralized config module - Optimize string operations in disk cache random value generation - Enhance license detection algorithm with cleaner matching logic - Streamline argument processing in various git operations - Update dependencies including crc32fast and flate2 for performance - Add signal handling capability to tokio runtime configuration
This commit is contained in:
+107
-104
@@ -22,25 +22,13 @@
|
||||
//! - **TTL** (time-to-live): 10 minutes — hard upper bound for safety
|
||||
//! - Evictions are tracked via metrics for observability
|
||||
|
||||
use std::sync::OnceLock;
|
||||
use std::time::Duration;
|
||||
use dashmap::DashMap;
|
||||
use std::sync::{Arc, OnceLock};
|
||||
|
||||
use moka::sync::Cache;
|
||||
use prost::Message;
|
||||
|
||||
/// Maximum total cache weight (key + value allocated bytes): 256 MB.
|
||||
const CACHE_MAX_WEIGHT: u64 = 256 * 1024 * 1024;
|
||||
|
||||
/// Hard time-to-live: entries older than this are unconditionally evicted.
|
||||
const CACHE_MAX_TTL: Duration = Duration::from_secs(600); // 10 min
|
||||
|
||||
/// Time-to-idle: entries not accessed within this window are evicted.
|
||||
/// Frequently accessed entries survive up to TTL, cold entries expire quickly.
|
||||
const CACHE_TTI: Duration = Duration::from_secs(120); // 2 min
|
||||
|
||||
/// Estimated per-entry overhead (Moka internal Arc + metadata).
|
||||
/// Added to the weigher result to prevent underestimation.
|
||||
const ENTRY_OVERHEAD: u32 = 128;
|
||||
use crate::config::{CACHE_ENTRY_OVERHEAD as ENTRY_OVERHEAD, CACHE_MAX_TTL, CACHE_MAX_WEIGHT, CACHE_TTI};
|
||||
|
||||
struct CacheState {
|
||||
store: Cache<Vec<u8>, Vec<u8>>,
|
||||
@@ -52,7 +40,6 @@ fn state() -> &'static CacheState {
|
||||
CACHE.get_or_init(|| {
|
||||
let store = Cache::builder()
|
||||
.weigher(|key: &Vec<u8>, value: &Vec<u8>| -> u32 {
|
||||
// capacity() reflects actual allocation including spare capacity
|
||||
key.capacity() as u32 + value.capacity() as u32 + ENTRY_OVERHEAD
|
||||
})
|
||||
.max_capacity(CACHE_MAX_WEIGHT)
|
||||
@@ -65,7 +52,6 @@ fn state() -> &'static CacheState {
|
||||
moka::notification::RemovalCause::Replaced => "replaced",
|
||||
moka::notification::RemovalCause::Size => "size",
|
||||
};
|
||||
// Extract namespace for per-namespace metrics
|
||||
let namespace = decode_namespace(&key);
|
||||
crate::metrics::record_cache_eviction(namespace, cause_str);
|
||||
})
|
||||
@@ -87,7 +73,27 @@ fn cache() -> &'static Cache<Vec<u8>, Vec<u8>> {
|
||||
&state().store
|
||||
}
|
||||
|
||||
// Key encoding
|
||||
|
||||
struct RepoKeyIndex {
|
||||
repo_to_keys: DashMap<String, Vec<Arc<Vec<u8>>>>,
|
||||
}
|
||||
|
||||
static REPO_KEY_INDEX: OnceLock<RepoKeyIndex> = OnceLock::new();
|
||||
|
||||
fn repo_key_index() -> &'static RepoKeyIndex {
|
||||
REPO_KEY_INDEX.get_or_init(|| RepoKeyIndex {
|
||||
repo_to_keys: DashMap::new(),
|
||||
})
|
||||
}
|
||||
|
||||
fn track_cache_key(repo_path: &str, key: Arc<Vec<u8>>) {
|
||||
repo_key_index()
|
||||
.repo_to_keys
|
||||
.entry(repo_path.to_string())
|
||||
.or_default()
|
||||
.push(key);
|
||||
}
|
||||
|
||||
|
||||
/// Encode a structured cache key.
|
||||
///
|
||||
@@ -105,12 +111,15 @@ fn encode_key(namespace: &str, repo_path: &str, request_bytes: &[u8]) -> Option<
|
||||
return None;
|
||||
}
|
||||
|
||||
let total = 1 + ns.len() + 2 + rp.len() + request_bytes.len();
|
||||
const SEPARATOR: u8 = 0xFF;
|
||||
let total = 1 + ns.len() + 1 + 2 + rp.len() + 1 + request_bytes.len();
|
||||
let mut key = Vec::with_capacity(total);
|
||||
key.push(ns.len() as u8);
|
||||
key.extend_from_slice(ns);
|
||||
key.push(SEPARATOR);
|
||||
key.extend_from_slice(&(rp.len() as u16).to_le_bytes());
|
||||
key.extend_from_slice(rp);
|
||||
key.push(SEPARATOR);
|
||||
key.extend_from_slice(request_bytes);
|
||||
Some(key)
|
||||
}
|
||||
@@ -125,31 +134,6 @@ fn decode_namespace(key: &[u8]) -> &str {
|
||||
std::str::from_utf8(&key[1..end]).unwrap_or("unknown")
|
||||
}
|
||||
|
||||
/// Extract the repo_path from a cache key (returns slice into the key).
|
||||
fn extract_repo_path_bytes(key: &[u8]) -> Option<&[u8]> {
|
||||
if key.len() < 3 {
|
||||
return None;
|
||||
}
|
||||
let ns_len = key[0] as usize;
|
||||
let rp_len_offset = 1 + ns_len;
|
||||
if key.len() < rp_len_offset + 2 {
|
||||
return None;
|
||||
}
|
||||
let rp_len = u16::from_le_bytes([key[rp_len_offset], key[rp_len_offset + 1]]) as usize;
|
||||
let rp_start = rp_len_offset + 2;
|
||||
let rp_end = rp_start.checked_add(rp_len)?;
|
||||
if rp_end > key.len() {
|
||||
return None;
|
||||
}
|
||||
Some(&key[rp_start..rp_end])
|
||||
}
|
||||
|
||||
/// Check if a cache key belongs to the given repository.
|
||||
fn key_matches_repo(key: &[u8], target_repo: &[u8]) -> bool {
|
||||
extract_repo_path_bytes(key).is_some_and(|rp| rp == target_repo)
|
||||
}
|
||||
|
||||
// Single-message cache
|
||||
|
||||
/// Cache a single protobuf response.
|
||||
///
|
||||
@@ -176,8 +160,10 @@ where
|
||||
if let Some(bytes) = cache().get(&key)
|
||||
&& let Ok(response) = Res::decode(bytes.as_slice())
|
||||
{
|
||||
let elapsed = std::time::Duration::ZERO; // Moka get is memory-only, effectively instant
|
||||
let elapsed = std::time::Duration::ZERO;
|
||||
crate::metrics::record_cache_op("moka", "hit", elapsed);
|
||||
crate::metrics::record_cache_hit_ns(namespace);
|
||||
crate::metrics::record_cache_value_size(namespace, bytes.len());
|
||||
tracing::debug!(
|
||||
namespace = %namespace,
|
||||
repo = %repo_path,
|
||||
@@ -188,6 +174,8 @@ where
|
||||
return Ok(response);
|
||||
}
|
||||
|
||||
crate::metrics::record_cache_miss_ns(namespace);
|
||||
|
||||
tracing::debug!(
|
||||
namespace = %namespace,
|
||||
repo = %repo_path,
|
||||
@@ -208,14 +196,16 @@ where
|
||||
"failed to encode cache response"
|
||||
);
|
||||
} else {
|
||||
crate::metrics::record_cache_value_size(namespace, bytes.len());
|
||||
let key_arc = Arc::new(key.clone());
|
||||
cache().insert(key, bytes);
|
||||
track_cache_key(repo_path, key_arc);
|
||||
}
|
||||
|
||||
crate::metrics::record_cache_op("moka", "miss", build_elapsed);
|
||||
Ok(response)
|
||||
}
|
||||
|
||||
// Vec-message cache
|
||||
|
||||
/// Cache a `Vec<Item>` protobuf response using length-delimited encoding.
|
||||
///
|
||||
@@ -238,48 +228,63 @@ where
|
||||
return build();
|
||||
};
|
||||
|
||||
// Try cache hit
|
||||
if let Some(bytes) = cache().get(&key) {
|
||||
let mut items = Vec::new();
|
||||
let mut remaining = bytes.as_slice();
|
||||
let mut valid = true;
|
||||
if bytes.len() < 4 {
|
||||
cache().invalidate(&key);
|
||||
} else {
|
||||
let stored_crc = u32::from_le_bytes([bytes[0], bytes[1], bytes[2], bytes[3]]);
|
||||
let actual_crc = crc32fast::hash(&bytes[4..]);
|
||||
if stored_crc != actual_crc {
|
||||
tracing::warn!(
|
||||
namespace = %namespace,
|
||||
repo = %repo_path,
|
||||
"cache entry corrupted (CRC mismatch), invalidating"
|
||||
);
|
||||
cache().invalidate(&key);
|
||||
} else {
|
||||
let mut items = Vec::new();
|
||||
let mut remaining = &bytes[4..];
|
||||
let mut valid = true;
|
||||
|
||||
// Pre-allocate based on first size hint
|
||||
if let Ok(first) = Item::decode_length_delimited(&mut remaining) {
|
||||
items.push(first);
|
||||
while !remaining.is_empty() {
|
||||
match Item::decode_length_delimited(&mut remaining) {
|
||||
Ok(item) => items.push(item),
|
||||
Err(_) => {
|
||||
valid = false;
|
||||
break;
|
||||
if let Ok(first) = Item::decode_length_delimited(&mut remaining) {
|
||||
items.push(first);
|
||||
while !remaining.is_empty() {
|
||||
match Item::decode_length_delimited(&mut remaining) {
|
||||
Ok(item) => items.push(item),
|
||||
Err(_) => {
|
||||
valid = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
} else if !remaining.is_empty() {
|
||||
valid = false;
|
||||
}
|
||||
|
||||
if valid {
|
||||
crate::metrics::record_cache_op("moka", "hit", std::time::Duration::ZERO);
|
||||
crate::metrics::record_cache_hit_ns(namespace);
|
||||
crate::metrics::record_cache_value_size(namespace, bytes.len());
|
||||
tracing::debug!(
|
||||
namespace = %namespace,
|
||||
repo = %repo_path,
|
||||
item_count = items.len(),
|
||||
"vec cache hit"
|
||||
);
|
||||
return Ok(items);
|
||||
}
|
||||
|
||||
tracing::warn!(
|
||||
namespace = %namespace,
|
||||
repo = %repo_path,
|
||||
"vec cache decode failed, rebuilding"
|
||||
);
|
||||
cache().invalidate(&key);
|
||||
}
|
||||
} else if !remaining.is_empty() {
|
||||
valid = false;
|
||||
}
|
||||
|
||||
if valid {
|
||||
crate::metrics::record_cache_op("moka", "hit", std::time::Duration::ZERO);
|
||||
tracing::debug!(
|
||||
namespace = %namespace,
|
||||
repo = %repo_path,
|
||||
item_count = items.len(),
|
||||
"vec cache hit"
|
||||
);
|
||||
return Ok(items);
|
||||
}
|
||||
|
||||
tracing::warn!(
|
||||
namespace = %namespace,
|
||||
repo = %repo_path,
|
||||
"vec cache decode failed, rebuilding"
|
||||
);
|
||||
// Invalidate the corrupt entry
|
||||
cache().invalidate(&key);
|
||||
}
|
||||
|
||||
crate::metrics::record_cache_miss_ns(namespace);
|
||||
tracing::debug!(
|
||||
namespace = %namespace,
|
||||
repo = %repo_path,
|
||||
@@ -290,15 +295,14 @@ where
|
||||
let response = build()?;
|
||||
let build_elapsed = start.elapsed();
|
||||
|
||||
// Encode all items into a single buffer with length-delimited framing
|
||||
let total_est: usize = response
|
||||
.iter()
|
||||
.map(|item| item.encoded_len() + 10) // 10 = prost length-delimited overhead
|
||||
.map(|item| item.encoded_len() + 10)
|
||||
.sum();
|
||||
let mut bytes = Vec::with_capacity(total_est);
|
||||
let mut data = Vec::with_capacity(total_est);
|
||||
let mut encode_ok = true;
|
||||
for item in &response {
|
||||
if let Err(err) = item.encode_length_delimited(&mut bytes) {
|
||||
if let Err(err) = item.encode_length_delimited(&mut data) {
|
||||
tracing::warn!(
|
||||
namespace = %namespace,
|
||||
repo = %repo_path,
|
||||
@@ -311,13 +315,19 @@ where
|
||||
}
|
||||
|
||||
if encode_ok {
|
||||
let crc = crc32fast::hash(&data);
|
||||
let mut bytes = Vec::with_capacity(4 + data.len());
|
||||
bytes.extend_from_slice(&crc.to_le_bytes());
|
||||
bytes.extend_from_slice(&data);
|
||||
crate::metrics::record_cache_value_size(namespace, bytes.len());
|
||||
let key_arc = Arc::new(key.clone());
|
||||
cache().insert(key, bytes);
|
||||
track_cache_key(repo_path, key_arc);
|
||||
}
|
||||
crate::metrics::record_cache_op("moka", "miss", build_elapsed);
|
||||
Ok(response)
|
||||
}
|
||||
|
||||
// Request encoding helpers
|
||||
|
||||
/// Encode a protobuf request into a byte vector.
|
||||
#[inline]
|
||||
@@ -329,7 +339,6 @@ fn encode_request<Req: Message>(request: &Req) -> Vec<u8> {
|
||||
buf
|
||||
}
|
||||
|
||||
// Repository-scoped invalidation
|
||||
|
||||
/// Invalidate all cache entries for a specific repository.
|
||||
///
|
||||
@@ -341,30 +350,24 @@ fn encode_request<Req: Message>(request: &Req) -> Vec<u8> {
|
||||
/// create branch, etc.) to prevent serving stale data.
|
||||
pub(crate) fn invalidate_repo(relative_path: &str) {
|
||||
let c = cache();
|
||||
let target = relative_path.as_bytes();
|
||||
let mut keys_to_remove: Vec<std::sync::Arc<Vec<u8>>> = Vec::with_capacity(64);
|
||||
let idx = repo_key_index();
|
||||
|
||||
for (key, _value) in c.iter() {
|
||||
if key_matches_repo(&key, target) {
|
||||
keys_to_remove.push(key);
|
||||
if let Some((_key, keys)) = idx.repo_to_keys.remove(relative_path) {
|
||||
let removed = keys.len();
|
||||
for key in &keys {
|
||||
c.invalidate(key.as_ref());
|
||||
}
|
||||
}
|
||||
|
||||
let removed = keys_to_remove.len();
|
||||
for key in &keys_to_remove {
|
||||
c.invalidate(key.as_ref());
|
||||
}
|
||||
|
||||
if removed > 0 {
|
||||
tracing::debug!(
|
||||
relative_path = %relative_path,
|
||||
entries_removed = removed,
|
||||
"cache invalidated for repository"
|
||||
);
|
||||
if removed > 0 {
|
||||
tracing::debug!(
|
||||
relative_path = %relative_path,
|
||||
entries_removed = removed,
|
||||
"cache invalidated for repository (indexed)"
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Selector helpers
|
||||
|
||||
use crate::pb::{ObjectSelector, object_selector};
|
||||
|
||||
|
||||
Reference in New Issue
Block a user