refactor(bare): enhance security and performance optimizations
- Remove unnecessary sorting in advertise_refs for deterministic output - Add path traversal detection and validation in bare_dir construction - Implement symlink resolution checks to prevent security vulnerabilities - Refactor cache system with CRC validation and improved metrics - Integrate repo-specific cache invalidation using indexed keys - Add comprehensive unit tests for commit operations and diff functionality - Move configuration constants to centralized config module - Optimize string operations in disk cache random value generation - Enhance license detection algorithm with cleaner matching logic - Streamline argument processing in various git operations - Update dependencies including crc32fast and flate2 for performance - Add signal handling capability to tokio runtime configuration
This commit is contained in:
+94
-20
@@ -10,15 +10,22 @@
|
||||
|
||||
use dashmap::DashMap;
|
||||
use std::sync::{Arc, OnceLock, RwLock};
|
||||
use std::time::Instant;
|
||||
use tokio::sync::Semaphore;
|
||||
|
||||
/// Default max concurrent operations per repository.
|
||||
const DEFAULT_MAX_CONCURRENT: usize = 5;
|
||||
use crate::config::{DEFAULT_MAX_CONCURRENT_OPS, SEMAPHORE_IDLE_THRESHOLD_SECS};
|
||||
|
||||
/// Per-repository rate limiter entry with usage tracking.
|
||||
struct SemaphoreEntry {
|
||||
sem: Arc<Semaphore>,
|
||||
max_permits: usize,
|
||||
last_accessed: RwLock<Instant>,
|
||||
}
|
||||
|
||||
/// Global rate limiter state.
|
||||
struct RateLimiter {
|
||||
/// Per-repository semaphores. Key = repository relative_path.
|
||||
semaphores: DashMap<String, Arc<Semaphore>>,
|
||||
semaphores: DashMap<String, SemaphoreEntry>,
|
||||
/// Max concurrent operations per repository (protected by RwLock for runtime updates).
|
||||
max_concurrent: RwLock<usize>,
|
||||
}
|
||||
@@ -30,7 +37,7 @@ fn limiter() -> &'static RateLimiter {
|
||||
let max = std::env::var("GITKS_RATE_LIMIT_MAX_CONCURRENT")
|
||||
.ok()
|
||||
.and_then(|v| v.parse().ok())
|
||||
.unwrap_or(DEFAULT_MAX_CONCURRENT);
|
||||
.unwrap_or(DEFAULT_MAX_CONCURRENT_OPS);
|
||||
|
||||
tracing::info!(
|
||||
max_concurrent = max,
|
||||
@@ -52,6 +59,8 @@ fn get_max_concurrent() -> usize {
|
||||
.unwrap_or_else(|e| e.into_inner())
|
||||
}
|
||||
|
||||
|
||||
|
||||
/// A guard that holds a rate-limit permit. The permit is released on drop.
|
||||
pub struct RateLimitGuard {
|
||||
/// The semaphore permit. Dropping this releases the permit.
|
||||
@@ -71,18 +80,24 @@ pub async fn acquire(repo_relative_path: Option<&str>) -> Option<RateLimitGuard>
|
||||
}
|
||||
let max_concurrent = get_max_concurrent();
|
||||
if max_concurrent == 0 {
|
||||
// Unlimited
|
||||
return None;
|
||||
}
|
||||
|
||||
let sem = limiter()
|
||||
.semaphores
|
||||
.entry(repo.to_string())
|
||||
.or_insert_with(|| Arc::new(Semaphore::new(max_concurrent)))
|
||||
.value()
|
||||
.clone();
|
||||
let sem = {
|
||||
let entry = limiter()
|
||||
.semaphores
|
||||
.entry(repo.to_string())
|
||||
.or_insert_with(|| SemaphoreEntry {
|
||||
sem: Arc::new(Semaphore::new(max_concurrent)),
|
||||
max_permits: max_concurrent,
|
||||
last_accessed: RwLock::new(Instant::now()),
|
||||
});
|
||||
if let Ok(mut last) = entry.last_accessed.write() {
|
||||
*last = Instant::now();
|
||||
}
|
||||
entry.sem.clone()
|
||||
};
|
||||
|
||||
// Release DashMap reference before awaiting
|
||||
let _ = repo;
|
||||
|
||||
match tokio::time::timeout(
|
||||
@@ -97,6 +112,7 @@ pub async fn acquire(repo_relative_path: Option<&str>) -> Option<RateLimitGuard>
|
||||
available = sem.available_permits(),
|
||||
"rate limit permit acquired"
|
||||
);
|
||||
crate::metrics::record_rate_limit_acquire(repo_relative_path.unwrap_or(""));
|
||||
Some(RateLimitGuard { _permit: permit })
|
||||
}
|
||||
Ok(Err(_closed)) => {
|
||||
@@ -105,7 +121,8 @@ pub async fn acquire(repo_relative_path: Option<&str>) -> Option<RateLimitGuard>
|
||||
repo = %repo_relative_path.unwrap_or(""),
|
||||
"rate limit semaphore closed, recreating"
|
||||
);
|
||||
let new_sem = Arc::new(Semaphore::new(get_max_concurrent()));
|
||||
let max = get_max_concurrent();
|
||||
let new_sem = Arc::new(Semaphore::new(max));
|
||||
let permit = match new_sem.clone().acquire_owned().await {
|
||||
Ok(permit) => permit,
|
||||
Err(_closed) => {
|
||||
@@ -116,9 +133,14 @@ pub async fn acquire(repo_relative_path: Option<&str>) -> Option<RateLimitGuard>
|
||||
return None;
|
||||
}
|
||||
};
|
||||
limiter()
|
||||
.semaphores
|
||||
.insert(repo_relative_path.unwrap_or("").to_string(), new_sem);
|
||||
limiter().semaphores.insert(
|
||||
repo_relative_path.unwrap_or("").to_string(),
|
||||
SemaphoreEntry {
|
||||
sem: new_sem,
|
||||
max_permits: get_max_concurrent(),
|
||||
last_accessed: RwLock::new(Instant::now()),
|
||||
},
|
||||
);
|
||||
Some(RateLimitGuard { _permit: permit })
|
||||
}
|
||||
Err(_elapsed) => {
|
||||
@@ -146,7 +168,7 @@ pub async fn acquire_or_reject(
|
||||
if get_max_concurrent() == 0 {
|
||||
return Ok(None);
|
||||
}
|
||||
// Timeout — reject with resource exhausted
|
||||
crate::metrics::record_rate_limit_reject(repo);
|
||||
Err(tonic::Status::resource_exhausted(format!(
|
||||
"rate limit exceeded for repository '{repo}': max {max} concurrent operations",
|
||||
max = get_max_concurrent()
|
||||
@@ -161,6 +183,52 @@ pub fn remove_repository(repo_relative_path: &str) {
|
||||
tracing::debug!(repo = %repo_relative_path, "rate limit semaphore removed");
|
||||
}
|
||||
|
||||
/// Clean up idle semaphores that have no active permits and haven't been
|
||||
/// accessed within the idle threshold.
|
||||
///
|
||||
/// Call this periodically (e.g., from a background task) to prevent
|
||||
/// unbounded growth of the semaphore map.
|
||||
pub fn cleanup_idle_semaphores() {
|
||||
let threshold = std::time::Duration::from_secs(SEMAPHORE_IDLE_THRESHOLD_SECS);
|
||||
let now = Instant::now();
|
||||
let max_concurrent = get_max_concurrent();
|
||||
let mut removed = 0u64;
|
||||
|
||||
limiter().semaphores.retain(|_key, entry| {
|
||||
let is_idle = entry.sem.available_permits() == max_concurrent;
|
||||
let is_stale = entry
|
||||
.last_accessed
|
||||
.read()
|
||||
.map(|last| now.duration_since(*last) > threshold)
|
||||
.unwrap_or(false);
|
||||
|
||||
let keep = !(is_idle && is_stale);
|
||||
if !keep {
|
||||
removed += 1;
|
||||
}
|
||||
keep
|
||||
});
|
||||
|
||||
if removed > 0 {
|
||||
tracing::info!(
|
||||
removed = removed,
|
||||
"cleaned up idle rate-limit semaphores"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
/// Start a background task to periodically clean up idle semaphores.
|
||||
pub fn start_semaphore_cleanup_task() -> tokio::task::JoinHandle<()> {
|
||||
let interval = std::time::Duration::from_secs(60);
|
||||
tokio::spawn(async move {
|
||||
let mut ticker = tokio::time::interval(interval);
|
||||
loop {
|
||||
ticker.tick().await;
|
||||
cleanup_idle_semaphores();
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
/// Update the max concurrent limit at runtime.
|
||||
///
|
||||
/// Only replaces semaphores that have no active permits (idle repos).
|
||||
@@ -187,8 +255,7 @@ pub fn set_max_concurrent(max: usize) {
|
||||
.semaphores
|
||||
.iter()
|
||||
.filter_map(|entry| {
|
||||
let sem = entry.value();
|
||||
if sem.available_permits() == old_max {
|
||||
if entry.value().max_permits == old_max {
|
||||
Some(entry.key().clone())
|
||||
} else {
|
||||
None
|
||||
@@ -197,7 +264,14 @@ pub fn set_max_concurrent(max: usize) {
|
||||
.collect();
|
||||
|
||||
for key in keys {
|
||||
l.semaphores.insert(key, Arc::new(Semaphore::new(max)));
|
||||
l.semaphores.insert(
|
||||
key,
|
||||
SemaphoreEntry {
|
||||
sem: Arc::new(Semaphore::new(max)),
|
||||
max_permits: max,
|
||||
last_accessed: RwLock::new(Instant::now()),
|
||||
},
|
||||
);
|
||||
}
|
||||
|
||||
tracing::info!(max_concurrent = max, "rate limit max_concurrent updated");
|
||||
|
||||
Reference in New Issue
Block a user