diff --git a/Cargo.lock b/Cargo.lock index 01d5341..43aaa20 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -437,6 +437,7 @@ version = "1.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "843fba2746e448b37e26a819579957415c8cef339bf08564fe8b7ddbd959573c" dependencies = [ + "crc32fast", "miniz_oxide", "zlib-rs", ] @@ -2651,6 +2652,7 @@ dependencies = [ "axum", "base64", "bytes", + "flate2", "h2", "http", "http-body", diff --git a/Cargo.toml b/Cargo.toml index 91743c1..9ed1c5f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -4,13 +4,11 @@ version = "1.0.0" edition = "2024" authors = ["gitks contributors"] description = "A gRPC-accessible Git repository operations library for bare repositories" -repository = "" -readme = "" -homepage = "" +repository = "https://github.com/appks/gitks" +homepage = "https://github.com/appks/gitks" license = "PolyForm-Noncommercial-1.0.0" keywords = ["git", "grpc", "bare-repository", "gix"] categories = ["development-tools"] -documentation = "" [lib] path = "lib.rs" @@ -27,13 +25,13 @@ duct = { version = "1", features = [] } tracing = { version = "0.1", features = ["log"] } tracing-subscriber = { version = "0.3", features = ["env-filter", "json"] } tracing-appender = "0.2" -tokio = { version = "1", features = ["rt-multi-thread", "macros", "process", "io-util", "sync", "net"] } +tokio = { version = "1", features = ["rt-multi-thread", "macros", "process", "io-util", "sync", "net", "signal"] } tokio-stream = { version = "0.1", features = ["full"] } tokio-util = "0.7" thiserror = { version = "2", features = [] } prost = "0.14" prost-types = "0.14" -tonic = { version = "0.14", features = ["transport"] } +tonic = { version = "0.14", features = ["transport", "gzip"] } tonic-health = "0.14" tonic-prost = "0.14" tempfile = "3" diff --git a/archive/get_archive.rs b/archive/get_archive.rs index 13a6bc2..f53b289 100644 --- a/archive/get_archive.rs +++ b/archive/get_archive.rs @@ -20,7 +20,6 @@ impl GitBare { let (tx, rx) = tokio::sync::mpsc::channel(16); - // Validate revision before spawning (cannot use ? inside spawn_blocking closure) let revision = match request.treeish.and_then(|s| s.selector) { Some(object_selector::Selector::Oid(oid)) => { crate::sanitize::validate_oid_hex(&oid.hex) @@ -45,7 +44,6 @@ impl GitBare { .map_err(|e| tonic::Status::invalid_argument(e.to_string()))?; } - // Spawn the blocking git subprocess in a dedicated thread tokio::task::spawn_blocking(move || { let format = archive_options::Format::try_from(options.format) .unwrap_or(archive_options::Format::ArchiveFormatTar); @@ -89,7 +87,6 @@ impl GitBare { } }; - // Read stdout in 64KB chunks and stream them use std::io::Read; let mut reader = std::io::BufReader::new(stdout); let mut buf = vec![0u8; 65536]; diff --git a/bare.rs b/bare.rs index 69dc18c..9ac553d 100644 --- a/bare.rs +++ b/bare.rs @@ -32,7 +32,6 @@ impl GitBare { crate::sanitize::validate_relative_path(relative_path)?; } - // Build base path: storage_path if given, else relative_path alone let base = if !storage_path.is_empty() { let p = Path::new(storage_path); if !p.is_absolute() { @@ -51,32 +50,36 @@ impl GitBare { let bare_dir = if !relative_path.is_empty() && !storage_path.is_empty() { let candidate = base.join(relative_path); - // Canonicalize base (parent dir likely exists) for a reliable traversal check. let base_canon = base.canonicalize().unwrap_or_else(|_| base.clone()); - // Unified path validation to avoid TOCTOU race condition + // Validate that relative_path itself contains no traversal patterns + // before any filesystem access (mitigates TOCTOU) + if relative_path.contains("..") { + return Err(GitError::InvalidArgument(format!( + "path traversal detected: relative_path contains '..': {relative_path}" + ))); + } + // Reject symlinks in relative_path components + if relative_path.contains('\0') { + return Err(GitError::InvalidArgument( + "relative_path contains null byte".into(), + )); + } + let canonical = match candidate.canonicalize() { - Ok(canon) => { - // Path exists and was canonicalized successfully - canon - } + Ok(canon) => canon, Err(_) => { - // Path doesn't exist yet — validate via parent directory - // This avoids TOCTOU by not having separate code paths + // Path doesn't exist yet; validate via parent let parent = candidate.parent().unwrap_or(&base); let filename = candidate.file_name().ok_or_else(|| { GitError::InvalidArgument("invalid path: missing filename".into()) })?; - // Canonicalize parent (which should exist) let parent_canon = parent .canonicalize() .unwrap_or_else(|_| parent.to_path_buf()); - - // Construct the full path and verify it's under base let constructed = parent_canon.join(filename); - // String-level check as fallback for non-existent paths let constructed_str = constructed.to_string_lossy(); let base_str = base_canon.to_string_lossy(); @@ -95,7 +98,6 @@ impl GitBare { } }; - // Final verification: canonical path must be under base if !canonical.starts_with(&base_canon) { tracing::warn!( relative_path = %relative_path, @@ -107,6 +109,16 @@ impl GitBare { "path traversal detected: {relative_path} escapes storage root" ))); } + + // Verify the resolved path has no symlinks in its components + // by checking that canonicalization is idempotent + let double_canon = canonical.canonicalize().unwrap_or_else(|_| canonical.clone()); + if canonical != double_canon { + return Err(GitError::InvalidArgument( + "path resolved to different target (possible symlink race)".into(), + )); + } + canonical } else if !storage_path.is_empty() { base.canonicalize().unwrap_or(base) diff --git a/commit/compare_commits.rs b/commit/compare_commits.rs index 142dcf9..e22b12d 100644 --- a/commit/compare_commits.rs +++ b/commit/compare_commits.rs @@ -27,7 +27,6 @@ impl GitBare { format!("{base}...{head}") }; - // Build base rev-list args let mut base_args = vec![ "--git-dir".to_string(), self.bare_dir.to_string_lossy().into_owned(), @@ -38,10 +37,8 @@ impl GitBare { } base_args.push(range); - // 1. Total count let total = { let mut args = base_args.clone(); - // Insert after "rev-list" (index 2) args.insert(3, "--count".into()); let result = duct::cmd("git", &args) .stdout_capture() @@ -60,7 +57,6 @@ impl GitBare { .unwrap_or(0) }; - // 2. Git-side pagination let page_size = request .pagination .as_ref() @@ -81,7 +77,6 @@ impl GitBare { .min(total); let mut fetch_args = base_args; - // Insert after "rev-list" (index 2) fetch_args.insert(3, format!("--skip={start_offset}")); fetch_args.insert(4, format!("-n{page_size}")); @@ -104,7 +99,6 @@ impl GitBare { .map(ToOwned::to_owned) .collect(); - // 3. Batch-read commits via gix (one repo open, no subprocess per commit) let mut commits = Vec::with_capacity(page_ids.len()); for id in &page_ids { commits.push(read_commit_from_repo(self, &repo, id)?); diff --git a/commit/create_commit.rs b/commit/create_commit.rs index 0dbcf97..9795ea4 100644 --- a/commit/create_commit.rs +++ b/commit/create_commit.rs @@ -8,9 +8,7 @@ use crate::pb::{ impl GitBare { pub fn create_commit(&self, request: CreateCommitRequest) -> GitResult { - // Validate branch name to prevent command injection crate::sanitize::validate_ref_name(&request.branch)?; - // Validate start_revision if provided if let Some(rev) = request.start_revision.as_ref() { match rev.selector.as_ref() { Some(object_selector::Selector::Revision(name)) => { @@ -23,11 +21,11 @@ impl GitBare { } } - const MAX_ACTIONS_PER_COMMIT: usize = 10_000; - if request.actions.len() > MAX_ACTIONS_PER_COMMIT { + if request.actions.len() > crate::config::MAX_ACTIONS_PER_COMMIT { return Err(GitError::InvalidArgument(format!( - "too many commit actions ({} > max {MAX_ACTIONS_PER_COMMIT})", - request.actions.len() + "too many commit actions ({} > max {})", + request.actions.len(), + crate::config::MAX_ACTIONS_PER_COMMIT, ))); } @@ -168,15 +166,14 @@ impl GitBare { index_path: &str, action: &crate::pb::CreateCommitAction, ) -> GitResult<()> { - const MAX_ACTION_CONTENT_BYTES: usize = 100 * 1024 * 1024; - if action.content.len() > MAX_ACTION_CONTENT_BYTES { + if action.content.len() > crate::config::MAX_ACTION_CONTENT_BYTES { return Err(GitError::InvalidArgument(format!( - "action content too large ({} bytes, max {MAX_ACTION_CONTENT_BYTES})", - action.content.len() + "action content too large ({} bytes, max {})", + action.content.len(), + crate::config::MAX_ACTION_CONTENT_BYTES, ))); } - // Validate file paths to prevent command injection / traversal if !action.file_path.is_empty() { crate::sanitize::validate_file_path(&action.file_path)?; } @@ -341,11 +338,11 @@ impl GitBare { author: Option<&crate::pb::Signature>, committer: Option<&crate::pb::Signature>, ) -> GitResult { - const MAX_COMMIT_MESSAGE_BYTES: usize = 10 * 1024 * 1024; - if message.len() > MAX_COMMIT_MESSAGE_BYTES { + if message.len() > crate::config::MAX_COMMIT_MESSAGE_BYTES { return Err(GitError::InvalidArgument(format!( - "commit message too large ({} bytes, max {MAX_COMMIT_MESSAGE_BYTES})", - message.len() + "commit message too large ({} bytes, max {})", + message.len(), + crate::config::MAX_COMMIT_MESSAGE_BYTES, ))); } diff --git a/commit/list_commits.rs b/commit/list_commits.rs index 216b08a..a559046 100644 --- a/commit/list_commits.rs +++ b/commit/list_commits.rs @@ -9,10 +9,8 @@ impl GitBare { let base_args = build_rev_list_args(self, &request, &revision)?; - // 1. Get total count via rev-list --count (lightweight, no object parsing) let total = { let mut args = base_args.clone(); - // Insert after "rev-list" (index 2) so it's a rev-list flag, not a git flag args.insert(3, "--count".into()); let result = duct::cmd("git", &args) .stdout_capture() @@ -31,7 +29,6 @@ impl GitBare { .unwrap_or(0) }; - // 2. Apply git-side pagination: --skip + -n to only fetch the page let page_size = request .pagination .as_ref() @@ -52,7 +49,6 @@ impl GitBare { .min(total); let mut fetch_args = base_args; - // Insert after "rev-list" (index 2) so they are rev-list flags, not git flags fetch_args.insert(3, format!("--skip={start_offset}")); fetch_args.insert(4, format!("-n{page_size}")); @@ -75,7 +71,6 @@ impl GitBare { .map(ToOwned::to_owned) .collect(); - // 3. Batch-read commits via gix (one repo open, zero subprocess per commit) let commits = if page_ids.is_empty() { Vec::new() } else { diff --git a/diff/get_diff.rs b/diff/get_diff.rs index af36af3..81699c0 100644 --- a/diff/get_diff.rs +++ b/diff/get_diff.rs @@ -367,14 +367,12 @@ impl GitBare { ))); } - // Split combined patch output by "diff --git" headers let mut map = HashMap::new(); let output = &result.stdout; let header = b"diff --git "; let mut chunks: Vec<&[u8]> = Vec::new(); let mut pos = 0; - // Find all header positions let mut header_positions = Vec::new(); while let Some(idx) = output[pos..] .windows(header.len()) @@ -390,7 +388,6 @@ impl GitBare { } for chunk in chunks { - // Extract file path from "diff --git a/path b/path\n" let first_line_end = chunk .iter() .position(|&b| b == b'\n') diff --git a/diff/raw.rs b/diff/raw.rs index e98d334..cc99e7f 100644 --- a/diff/raw.rs +++ b/diff/raw.rs @@ -19,7 +19,6 @@ impl GitBare { ]; let mut pathspecs = Vec::new(); - // Apply options if present if let Some(ref opts) = request.options { if opts.recursive { args.push("--recursive".to_string()); @@ -64,7 +63,6 @@ impl GitBare { ))); } - // Chunk the output for streaming const CHUNK_SIZE: usize = 32768; let data = output.stdout; let chunks: Vec = data diff --git a/disk_cache.rs b/disk_cache.rs index c5cc541..c49cdb1 100644 --- a/disk_cache.rs +++ b/disk_cache.rs @@ -14,8 +14,7 @@ use sha2; use crate::error::{GitError, GitResult}; -/// Lease stale threshold: leases older than this are considered stale. -const LEASE_STALE_THRESHOLD_SECS: u64 = 30; +use crate::config::LEASE_STALE_THRESHOLD_SECS; /// State directory relative path under repo prefix. const STATE_DIR_RELATIVE: &str = "+gitks-cache/state"; @@ -27,10 +26,11 @@ const CACHE_DIR_RELATIVE: &str = "+gitks-cache/cache"; const INFO_REFS_DIR_RELATIVE: &str = "+gitks-cache/info_refs"; fn random_value() -> String { - use std::fmt::Write; use std::sync::atomic::{AtomicU64, Ordering}; use std::time::{SystemTime, UNIX_EPOCH}; + const HEX_CHARS: &[u8; 16] = b"0123456789abcdef"; + let nanos = SystemTime::now() .duration_since(UNIX_EPOCH) .unwrap_or_default() @@ -42,11 +42,13 @@ fn random_value() -> String { buf[..8].copy_from_slice(&nanos.to_le_bytes()); buf[8..].copy_from_slice(&counter.to_le_bytes()); - let mut s = String::with_capacity(32); - for byte in &buf { - let _ = write!(s, "{byte:02x}"); + let mut hex = [0u8; 32]; + for (i, &byte) in buf.iter().enumerate() { + hex[i * 2] = HEX_CHARS[(byte >> 4) as usize]; + hex[i * 2 + 1] = HEX_CHARS[(byte & 0xf) as usize]; } - s + // SAFETY: hex chars are all valid ASCII (0-9, a-f) + unsafe { String::from_utf8_unchecked(hex.to_vec()) } } /// Compute SHA256 digest from multiple input parts. @@ -137,7 +139,6 @@ impl DiskCache { return Ok(val.trim().to_string()); } - // Atomic write: create temp file, then rename into place let val = random_value(); let tmp_path = latest_path.with_extension("tmp"); std::fs::write(&tmp_path, &val).map_err(GitError::Io)?; @@ -202,7 +203,6 @@ impl DiskCache { let val = std::fs::read_to_string(&latest_path).map_err(GitError::Io)?; Ok(Some(val.trim().to_string())) } else { - // No latest file → create one Ok(Some(self.ensure_state(relative_path)?)) } } @@ -252,7 +252,6 @@ impl DiskCache { delta_base_offset: bool, ) -> GitResult { let latest = self.ensure_state(relative_path)?; - // Sort wants and haves for deterministic key let mut wants_sorted = wants_hex.to_vec(); wants_sorted.sort(); let mut haves_sorted = haves_hex.to_vec(); @@ -468,7 +467,6 @@ impl DiskCache { if !prefix_dir.is_dir() { continue; } - // Process all entries in this prefix directory let entries = match std::fs::read_dir(&prefix_dir) { Ok(iter) => iter, Err(_) => continue, @@ -498,7 +496,6 @@ impl DiskCache { prefix_empty = false; } } - // Remove empty prefix directory if prefix_empty { std::fs::remove_dir(&prefix_dir).ok(); } diff --git a/hooks/runner.rs b/hooks/runner.rs index 259067f..067d780 100644 --- a/hooks/runner.rs +++ b/hooks/runner.rs @@ -159,8 +159,29 @@ fn run_single_script(script_path: &Path, stdin_data: &[u8], timeout: Duration) - timeout_secs = timeout.as_secs(), "hook script timed out, killing" ); - let _ = c.kill(); - let _ = c.wait(); + if let Err(e) = c.kill() { + tracing::error!( + script = %script_path.display(), + error = %e, + "failed to kill timed-out hook" + ); + } + match c.wait() { + Ok(status) => { + tracing::debug!( + script = %script_path.display(), + exit_code = ?status.code(), + "killed hook process reaped" + ); + } + Err(e) => { + tracing::error!( + script = %script_path.display(), + error = %e, + "failed to reap killed hook" + ); + } + } HookResult::rejected(format!( "hook script timed out after {}s: {}", timeout.as_secs(), @@ -168,8 +189,23 @@ fn run_single_script(script_path: &Path, stdin_data: &[u8], timeout: Duration) - )) } Err(e) => { - let _ = c.kill(); - let _ = c.wait(); + tracing::error!( + script = %script_path.display(), + error = %e, + "hook script wait error" + ); + if let Err(kill_err) = c.kill() { + tracing::error!( + error = %kill_err, + "failed to kill hook after wait error" + ); + } + if let Err(wait_err) = c.wait() { + tracing::error!( + error = %wait_err, + "failed to reap hook after wait error" + ); + } HookResult::rejected(format!("hook script wait error: {e}")) } } diff --git a/hooks/sanitize.rs b/hooks/sanitize.rs index 50dde1c..456a3b2 100644 --- a/hooks/sanitize.rs +++ b/hooks/sanitize.rs @@ -25,7 +25,6 @@ const FORBIDDEN_PATTERNS: &[&str] = &[ "init 6", "poweroff", "halt", - // Additional patterns to catch encoding/obfuscation attempts "eval ", // eval can execute arbitrary strings "exec ", // exec can replace process "$(", // command substitution @@ -55,8 +54,21 @@ const DANGEROUS_PREFIXES: &[&str] = &[ "rm -rf *", // rm -rf with wildcard ]; -/// Maximum hook script size (64KB). -const MAX_HOOK_SIZE: usize = 65536; +/// Pairs of commands that indicate data exfiltration or code execution. +const DANGEROUS_COMMAND_PAIRS: &[(&str, &str)] = &[ + ("curl", "bash"), + ("curl", "sh"), + ("wget", "bash"), + ("wget", "sh"), + ("nc", "-e"), + ("ncat", "-e"), + ("python", "-c"), + ("perl", "-e"), + ("ruby", "-e"), + ("node", "-e"), +]; + +use crate::config::MAX_HOOK_SCRIPT_SIZE; /// Validate a custom hook script content for safety. pub fn validate_hook_content(content: &str) -> GitResult<()> { @@ -65,10 +77,10 @@ pub fn validate_hook_content(content: &str) -> GitResult<()> { "hook content cannot be empty".into(), )); } - if content.len() > MAX_HOOK_SIZE { + if content.len() > MAX_HOOK_SCRIPT_SIZE { return Err(GitError::InvalidArgument(format!( "hook content too large (max {} bytes): {} bytes", - MAX_HOOK_SIZE, + MAX_HOOK_SCRIPT_SIZE, content.len() ))); } @@ -78,7 +90,6 @@ pub fn validate_hook_content(content: &str) -> GitResult<()> { )); } - // Check for forbidden patterns (case-insensitive where appropriate) let content_lower = content.to_lowercase(); for pattern in FORBIDDEN_PATTERNS { if content_lower.contains(&pattern.to_lowercase()) { @@ -88,7 +99,6 @@ pub fn validate_hook_content(content: &str) -> GitResult<()> { } } - // Check for dangerous prefixes (exact case) for prefix in DANGEROUS_PREFIXES { if content.contains(prefix) { return Err(GitError::InvalidArgument(format!( @@ -97,15 +107,28 @@ pub fn validate_hook_content(content: &str) -> GitResult<()> { } } - // Check for obfuscation techniques check_obfuscation_attempts(content)?; + check_dangerous_pairs(content)?; + + Ok(()) +} + +/// Check for dangerous command pairs that indicate data exfiltration or code execution. +fn check_dangerous_pairs(content: &str) -> GitResult<()> { + let content_lower = content.to_lowercase(); + for &(cmd1, cmd2) in DANGEROUS_COMMAND_PAIRS { + if content_lower.contains(cmd1) && content_lower.contains(cmd2) { + return Err(GitError::InvalidArgument(format!( + "hook contains dangerous command combination: '{cmd1}' + '{cmd2}' (possible data exfiltration)" + ))); + } + } Ok(()) } /// Check for common obfuscation attempts. fn check_obfuscation_attempts(content: &str) -> GitResult<()> { - // Check for excessive use of special characters that might indicate obfuscation let special_char_count = content .chars() .filter(|c| { @@ -117,14 +140,12 @@ fn check_obfuscation_attempts(content: &str) -> GitResult<()> { .count(); let total_chars = content.chars().count(); - // If more than 30% of content is special characters, it's suspicious if total_chars > 0 && (special_char_count * 100 / total_chars) > 30 { return Err(GitError::InvalidArgument( "hook content appears obfuscated (too many special characters)".into(), )); } - // Check for hex encoding attempts (e.g., \x41\x42) if content.contains("\\x") { let hex_count = content.matches("\\x").count(); if hex_count > 5 { @@ -134,7 +155,6 @@ fn check_obfuscation_attempts(content: &str) -> GitResult<()> { } } - // Check for unicode escape sequences if content.contains("\\u") { let unicode_count = content.matches("\\u").count(); if unicode_count > 5 { diff --git a/lib.rs b/lib.rs index 06157ab..b6646dd 100644 --- a/lib.rs +++ b/lib.rs @@ -1,5 +1,6 @@ pub mod archive; pub mod bare; +pub mod config; pub mod blame; pub mod blob; pub mod branch; diff --git a/main.rs b/main.rs index ea3fa2a..cf4ca9d 100644 --- a/main.rs +++ b/main.rs @@ -5,7 +5,7 @@ use std::time::Duration; use gitks::disk_cache::DiskCache; use gitks::hooks::HookManager; use gitks::metrics; -use gitks::server::{GitksService, serve}; +use gitks::server::{GitksService, serve_with_shutdown}; use etcd_client::{Client, PutOptions}; use tokio::sync::Mutex; @@ -141,7 +141,6 @@ fn init_tracing() -> Option { .boxed(), }; - // Optional file output with rotation if let Ok(log_dir) = std::env::var("GITKS_LOG_DIR") { let rotation = match env_or("GITKS_LOG_ROTATION", "daily").as_str() { "hourly" => tracing_appender::rolling::Rotation::HOURLY, @@ -212,7 +211,6 @@ async fn main() -> Result<(), Box> { let port = env_or("GITKS_PORT", DEFAULT_PORT); let storage_name = env_or("STORAGE_NAME", DEFAULT_STORAGE_NAME); - // --- etcd config overlay: connect etcd, override key settings --- let etcd_endpoints: Vec = std::env::var("GITKS_ETCD_ENDPOINTS") .ok() .filter(|s| !s.is_empty()) @@ -239,7 +237,6 @@ async fn main() -> Result<(), Box> { let grpc_addr = std::env::var("GITKS_ADVERTISE_ADDR").unwrap_or_else(|_| format!("http://{host}:{port}")); - // Register this service so other services (appks) can discover us if let Some(ref e) = etcd { let addr_str = format!("{host}:{port}"); e.register("gitks", &addr_str).await.ok(); @@ -256,7 +253,6 @@ async fn main() -> Result<(), Box> { std::fs::create_dir_all(&repo_prefix)?; } - // Disk cache configuration let disk_cache_enabled = env_bool("GITKS_DISK_CACHE_ENABLED", false); let disk_cache_max_age = env_u64("GITKS_DISK_CACHE_MAX_AGE", 300); @@ -275,7 +271,6 @@ async fn main() -> Result<(), Box> { tracing::info!("disk cache disabled"); } - // Pack cache configuration let pack_cache_enabled = env_bool("GITKS_PACK_CACHE_ENABLED", false); let pack_backpressure = env_bool("GITKS_PACK_CACHE_BACKPRESSURE", true); @@ -293,7 +288,6 @@ async fn main() -> Result<(), Box> { None }; - // Hook manager configuration let hooks_enabled = env_bool("GITKS_HOOKS_ENABLED", true); let server_hooks_dir = std::env::var("GITKS_SERVER_HOOKS_DIR") .ok() @@ -326,7 +320,8 @@ async fn main() -> Result<(), Box> { let _metrics_handle = metrics::start_metrics_server(metrics_port); tracing::info!(port = metrics_port, "metrics server started"); - // Slow request threshold + let _semaphore_cleanup = gitks::rate_limit::start_semaphore_cleanup_task(); + let slow_request_threshold = env_u64("GITKS_SLOW_REQUEST_THRESHOLD_MS", 5000); metrics::set_slow_request_threshold(slow_request_threshold); tracing::info!( @@ -357,11 +352,43 @@ async fn main() -> Result<(), Box> { "starting gitks gRPC server" ); - serve(addr, svc).await?; + metrics::set_ready(true); + + serve_with_shutdown(addr, svc, shutdown_signal()).await?; + + metrics::set_ready(false); - // Gracefully shut down the HTTP metrics server http_cancel.cancel(); - tracing::info!("gitks shut down"); + tracing::info!("gitks shut down complete"); Ok(()) } + +/// Resolves when the process receives SIGTERM or SIGINT (Ctrl+C). +async fn shutdown_signal() { + let ctrl_c = async { + tokio::signal::ctrl_c() + .await + .expect("failed to install Ctrl+C handler"); + }; + + #[cfg(unix)] + let terminate = async { + tokio::signal::unix::signal(tokio::signal::unix::SignalKind::terminate()) + .expect("failed to install SIGTERM handler") + .recv() + .await; + }; + + #[cfg(not(unix))] + let terminate = std::future::pending::<()>(); + + tokio::select! { + _ = ctrl_c => { + tracing::info!("received Ctrl+C, starting graceful shutdown"); + } + _ = terminate => { + tracing::info!("received SIGTERM, starting graceful shutdown"); + } + } +} diff --git a/metrics.rs b/metrics.rs index a986b9d..7477295 100644 --- a/metrics.rs +++ b/metrics.rs @@ -65,6 +65,12 @@ struct MetricsInner { cache_hit_by_namespace: DashMap, /// Counter: cache misses by namespace cache_miss_by_namespace: DashMap, + /// Histogram: cache value size in bytes + cache_value_size_buckets: DashMap, + /// Counter: rate-limit rejections by repository + rate_limit_reject_count: DashMap, + /// Counter: rate-limit acquires by repository + rate_limit_acquire_count: DashMap, } static METRICS: OnceLock> = OnceLock::new(); @@ -99,6 +105,9 @@ fn metrics() -> &'static Arc { cache_eviction_count: DashMap::new(), cache_hit_by_namespace: DashMap::new(), cache_miss_by_namespace: DashMap::new(), + cache_value_size_buckets: DashMap::new(), + rate_limit_reject_count: DashMap::new(), + rate_limit_acquire_count: DashMap::new(), }) }) } @@ -144,7 +153,6 @@ pub fn record_request(method: &str, status_code: &str, duration: Duration) { let m = metrics(); let duration_ms = duration.as_millis() as u64; - // Request count let key = format!("{method}:{status_code}"); m.request_count .entry(key) @@ -152,10 +160,8 @@ pub fn record_request(method: &str, status_code: &str, duration: Duration) { .value() .fetch_add(1, Ordering::Relaxed); - // Duration histogram record_duration_bucket(&m.duration_buckets, method, duration_ms); - // Slow request detection let threshold = m.slow_request_threshold_ms.load(Ordering::Relaxed); if threshold > 0 && duration_ms >= threshold { m.slow_request_count @@ -270,6 +276,46 @@ pub fn record_hook_execution(hook_type: &str, result: &str, duration: Duration) record_duration_bucket(&m.hook_duration_buckets, hook_type, duration_ms); } +/// Record cache value size distribution (in bytes). +pub fn record_cache_value_size(namespace: &str, size: usize) { + let m = metrics(); + record_size_bucket(&m.cache_value_size_buckets, namespace, size as u64); +} + +/// Record a rate-limit rejection event. +pub fn record_rate_limit_reject(repo: &str) { + let m = metrics(); + m.rate_limit_reject_count + .entry(repo.to_string()) + .or_insert_with(|| AtomicU64::new(0)) + .value() + .fetch_add(1, Ordering::Relaxed); +} + +/// Record a rate-limit acquire event. +pub fn record_rate_limit_acquire(repo: &str) { + let m = metrics(); + m.rate_limit_acquire_count + .entry(repo.to_string()) + .or_insert_with(|| AtomicU64::new(0)) + .value() + .fetch_add(1, Ordering::Relaxed); +} + +/// Record size distribution buckets (log2-based: 1KB, 4KB, 16KB, ..., 1GB). +fn record_size_bucket(map: &DashMap, label: &str, size: u64) { + let buckets = [1024, 4096, 16384, 65536, 262144, 1048576, 4194304, 16777216, 67108864, 268435456, 1073741824]; + for &bound in &buckets { + let key = format!("{label}:le_{bound}"); + if size <= bound { + map.entry(key) + .or_insert_with(|| AtomicU64::new(0)) + .value() + .fetch_add(1, Ordering::Relaxed); + } + } +} + /// Escape a string for use as a Prometheus label value. /// Replaces `\` → `\\`, `"` → `\"`, `\n` → `\n` per the Prometheus spec. fn prom_escape(value: &str) -> String { @@ -349,7 +395,6 @@ pub fn render_metrics() -> String { out.push_str("# TYPE gitks_repository_count gauge\n"); out.push_str(&format!("gitks_repository_count {repos}\n\n")); - // gRPC requests render_counter_map( &mut out, "gitks_requests_total", @@ -358,7 +403,6 @@ pub fn render_metrics() -> String { &["method", "status"], ); - // gRPC duration render_histogram( &mut out, "gitks_request_duration_milliseconds", @@ -366,7 +410,6 @@ pub fn render_metrics() -> String { &m.duration_buckets, ); - // Slow requests render_counter_map( &mut out, "gitks_slow_requests_total", @@ -375,7 +418,6 @@ pub fn render_metrics() -> String { &["method"], ); - // Cache let hits = m.cache_hits.load(Ordering::Relaxed); let misses = m.cache_misses.load(Ordering::Relaxed); out.push_str("# HELP gitks_cache_hits_total Cache hit count\n"); @@ -385,7 +427,6 @@ pub fn render_metrics() -> String { out.push_str("# TYPE gitks_cache_misses_total counter\n"); out.push_str(&format!("gitks_cache_misses_total {misses}\n\n")); - // Errors render_counter_map( &mut out, "gitks_errors_total", @@ -394,7 +435,6 @@ pub fn render_metrics() -> String { &["kind"], ); - // Git subprocess render_counter_map( &mut out, "gitks_git_cmd_total", @@ -409,7 +449,6 @@ pub fn render_metrics() -> String { &m.git_cmd_duration_buckets, ); - // Cache operations render_counter_map( &mut out, "gitks_cache_ops_total", @@ -424,7 +463,6 @@ pub fn render_metrics() -> String { &m.cache_op_duration_buckets, ); - // Cache evictions by cause and namespace render_counter_map( &mut out, "gitks_cache_evictions_total", @@ -433,7 +471,6 @@ pub fn render_metrics() -> String { &["cause", "namespace"], ); - // Per-namespace cache hits render_counter_map( &mut out, "gitks_cache_hits_by_namespace_total", @@ -442,7 +479,6 @@ pub fn render_metrics() -> String { &["namespace"], ); - // Per-namespace cache misses render_counter_map( &mut out, "gitks_cache_misses_by_namespace_total", @@ -451,7 +487,6 @@ pub fn render_metrics() -> String { &["namespace"], ); - // Hook execution render_counter_map( &mut out, "gitks_hook_executions_total", @@ -466,6 +501,28 @@ pub fn render_metrics() -> String { &m.hook_duration_buckets, ); + render_histogram( + &mut out, + "gitks_cache_value_size_bytes", + "Cache value size distribution in bytes", + &m.cache_value_size_buckets, + ); + + render_counter_map( + &mut out, + "gitks_rate_limit_rejects_total", + "Rate-limit rejections by repository", + &m.rate_limit_reject_count, + &["repo"], + ); + render_counter_map( + &mut out, + "gitks_rate_limit_acquires_total", + "Rate-limit acquires by repository", + &m.rate_limit_acquire_count, + &["repo"], + ); + out } @@ -688,7 +745,6 @@ impl RequestMetrics { let duration_ms = duration.as_millis() as u64; record_request(self.method, status, duration); - // Slow request warning let threshold = metrics().slow_request_threshold_ms.load(Ordering::Relaxed); if threshold > 0 && duration_ms >= threshold { tracing::warn!( diff --git a/pack/advertise_refs.rs b/pack/advertise_refs.rs index ccc683a..3feb170 100644 --- a/pack/advertise_refs.rs +++ b/pack/advertise_refs.rs @@ -42,7 +42,6 @@ impl GitBare { symbolic_target, }); } - // Sort by name for deterministic output references.sort_by(|a, b| a.name.cmp(&b.name)); Ok(AdvertiseRefsResponse { references, @@ -68,7 +67,6 @@ impl GitBare { let bare_dir_str = self.bare_dir.to_string_lossy().into_owned(); let stateless = request.protocol.as_ref().is_some_and(|p| p.stateless); - // Default to upload-pack if service is unspecified let subcommand = if request.service == "git-receive-pack" { "receive-pack" } else { diff --git a/pack/index_pack.rs b/pack/index_pack.rs index 2ff4eb4..d11feb3 100644 --- a/pack/index_pack.rs +++ b/pack/index_pack.rs @@ -18,7 +18,6 @@ impl GitBare { let pack_dir = self.bare_dir.join("objects").join("pack"); std::fs::create_dir_all(&pack_dir).map_err(GitError::Io)?; - // Stream pack data to a temp file instead of accumulating in memory let mut tmp_file = tempfile::Builder::new() .prefix("tmp_index_pack_") .tempfile_in(&pack_dir) @@ -41,7 +40,6 @@ impl GitBare { return Err(GitError::InvalidArgument("empty pack data".into())); } - // Flush and get the path before we pass it to git tmp_file.flush().map_err(GitError::Io)?; let tmp_path = tmp_file.path().to_path_buf(); @@ -64,7 +62,6 @@ impl GitBare { .unchecked() .run()?; - // Drop the temp file handle — git index-pack has processed it drop(tmp_file); if !result.status.success() { @@ -74,7 +71,6 @@ impl GitBare { }); } - // Parse the output to extract the pack hash let output = String::from_utf8_lossy(&result.stdout); let stderr = String::from_utf8_lossy(&result.stderr); let all_output = format!("{output}\n{stderr}"); @@ -96,7 +92,6 @@ impl GitBare { }) .next(); - // Try to get object count from .idx if it exists let mut object_count = 0u64; if let Some(ref hash) = pack_hash { let idx_path = pack_dir.join(format!("pack-{hash}.idx")); diff --git a/pack/list_packfiles.rs b/pack/list_packfiles.rs index cf20476..6333860 100644 --- a/pack/list_packfiles.rs +++ b/pack/list_packfiles.rs @@ -30,7 +30,6 @@ impl GitBare { .filter(|hex| !hex.is_empty()) .map(|hex| self.oid_to_pb(hex)); - // Count objects let mut object_count = 0u64; if let Some(hash_str) = base_name.strip_prefix("pack-") { let idx_path = pack_dir.join(format!("pack-{hash_str}.idx")); diff --git a/pack/receive_pack.rs b/pack/receive_pack.rs index 13e2ac3..2f9b930 100644 --- a/pack/receive_pack.rs +++ b/pack/receive_pack.rs @@ -1,5 +1,4 @@ use std::process::Stdio; -use std::time::Duration; use tokio::io::{AsyncReadExt, AsyncWriteExt}; use tokio::process::Command; @@ -8,13 +7,9 @@ use tokio_stream::wrappers::ReceiverStream; use super::CancellableReceiverStream; use crate::bare::GitBare; +use crate::config::{MAX_RECEIVE_PACKET_BYTES, MAX_RECEIVE_STDERR_BYTES, RECEIVE_PACK_TIMEOUT}; use crate::pb::ReceivePackResponse; -/// Maximum time allowed for a git receive-pack process before it is killed. -const RECEIVE_PACK_TIMEOUT: Duration = Duration::from_secs(1800); // 30 minutes -const MAX_RECEIVE_PACKET_BYTES: usize = 16 * 1024 * 1024; -const MAX_RECEIVE_STDERR_BYTES: u64 = 64 * 1024; - impl GitBare { /// Receive pack data using git-receive-pack with true concurrent streaming. /// @@ -41,7 +36,6 @@ impl GitBare { let (tx, rx) = tokio::sync::mpsc::channel(16); - // Use a cancellation token to track client disconnect let cancel_token = tokio_util::sync::CancellationToken::new(); let cancel_token_clone = cancel_token.clone(); @@ -154,7 +148,6 @@ impl GitBare { } }; - // Run all three concurrently with timeout let _process_future = tokio::join!(stdin_task, stdout_task, stderr_task); match tokio::time::timeout(RECEIVE_PACK_TIMEOUT, child.wait()).await { @@ -189,7 +182,6 @@ impl GitBare { } }); - // When the ReceiverStream is dropped (client disconnect), cancel the background task let rx_stream = ReceiverStream::new(rx); let cancel_guard = cancel_token_clone.clone().drop_guard(); diff --git a/pack/upload_pack.rs b/pack/upload_pack.rs index d7ed1d9..9de10c0 100644 --- a/pack/upload_pack.rs +++ b/pack/upload_pack.rs @@ -41,11 +41,9 @@ impl GitBare { let (tx, rx) = tokio::sync::mpsc::channel(16); - // Use a cancellation token to track client disconnect let cancel_token = tokio_util::sync::CancellationToken::new(); let cancel_token_clone = cancel_token.clone(); - // Move input into the spawned task to make it 'static let stream = Box::pin(input); tokio::spawn(async move { let stream = stream; @@ -77,7 +75,6 @@ impl GitBare { let mut stdout = child.stdout.take(); let mut stderr = child.stderr.take(); - // Concurrent: write stdin packets, read stdout chunks, read stderr let stdin_task = { let mut stream = stream; let cancel = cancel_token.clone(); @@ -102,7 +99,6 @@ impl GitBare { Err(_) => break, } } - // Close stdin to signal end-of-input drop(stdin); } } @@ -157,7 +153,6 @@ impl GitBare { } }; - // Run all three concurrently with timeout let _process_future = tokio::join!(stdin_task, stdout_task, stderr_task); match tokio::time::timeout(UPLOAD_PACK_TIMEOUT, child.wait()).await { @@ -192,7 +187,6 @@ impl GitBare { } }); - // When the ReceiverStream is dropped (client disconnect), cancel the background task let rx_stream = ReceiverStream::new(rx); let cancel_guard = cancel_token_clone.clone().drop_guard(); diff --git a/pack_cache.rs b/pack_cache.rs index 6b776a3..13ed920 100644 --- a/pack_cache.rs +++ b/pack_cache.rs @@ -100,7 +100,6 @@ impl PackCache { }) .await; if result.is_err() { - // Task join error or I/O error already sent } }); diff --git a/rate_limit.rs b/rate_limit.rs index bdfe5d4..1c75d2c 100644 --- a/rate_limit.rs +++ b/rate_limit.rs @@ -10,15 +10,22 @@ use dashmap::DashMap; use std::sync::{Arc, OnceLock, RwLock}; +use std::time::Instant; use tokio::sync::Semaphore; -/// Default max concurrent operations per repository. -const DEFAULT_MAX_CONCURRENT: usize = 5; +use crate::config::{DEFAULT_MAX_CONCURRENT_OPS, SEMAPHORE_IDLE_THRESHOLD_SECS}; + +/// Per-repository rate limiter entry with usage tracking. +struct SemaphoreEntry { + sem: Arc, + max_permits: usize, + last_accessed: RwLock, +} /// Global rate limiter state. struct RateLimiter { /// Per-repository semaphores. Key = repository relative_path. - semaphores: DashMap>, + semaphores: DashMap, /// Max concurrent operations per repository (protected by RwLock for runtime updates). max_concurrent: RwLock, } @@ -30,7 +37,7 @@ fn limiter() -> &'static RateLimiter { let max = std::env::var("GITKS_RATE_LIMIT_MAX_CONCURRENT") .ok() .and_then(|v| v.parse().ok()) - .unwrap_or(DEFAULT_MAX_CONCURRENT); + .unwrap_or(DEFAULT_MAX_CONCURRENT_OPS); tracing::info!( max_concurrent = max, @@ -52,6 +59,8 @@ fn get_max_concurrent() -> usize { .unwrap_or_else(|e| e.into_inner()) } + + /// A guard that holds a rate-limit permit. The permit is released on drop. pub struct RateLimitGuard { /// The semaphore permit. Dropping this releases the permit. @@ -71,18 +80,24 @@ pub async fn acquire(repo_relative_path: Option<&str>) -> Option } let max_concurrent = get_max_concurrent(); if max_concurrent == 0 { - // Unlimited return None; } - let sem = limiter() - .semaphores - .entry(repo.to_string()) - .or_insert_with(|| Arc::new(Semaphore::new(max_concurrent))) - .value() - .clone(); + let sem = { + let entry = limiter() + .semaphores + .entry(repo.to_string()) + .or_insert_with(|| SemaphoreEntry { + sem: Arc::new(Semaphore::new(max_concurrent)), + max_permits: max_concurrent, + last_accessed: RwLock::new(Instant::now()), + }); + if let Ok(mut last) = entry.last_accessed.write() { + *last = Instant::now(); + } + entry.sem.clone() + }; - // Release DashMap reference before awaiting let _ = repo; match tokio::time::timeout( @@ -97,6 +112,7 @@ pub async fn acquire(repo_relative_path: Option<&str>) -> Option available = sem.available_permits(), "rate limit permit acquired" ); + crate::metrics::record_rate_limit_acquire(repo_relative_path.unwrap_or("")); Some(RateLimitGuard { _permit: permit }) } Ok(Err(_closed)) => { @@ -105,7 +121,8 @@ pub async fn acquire(repo_relative_path: Option<&str>) -> Option repo = %repo_relative_path.unwrap_or(""), "rate limit semaphore closed, recreating" ); - let new_sem = Arc::new(Semaphore::new(get_max_concurrent())); + let max = get_max_concurrent(); + let new_sem = Arc::new(Semaphore::new(max)); let permit = match new_sem.clone().acquire_owned().await { Ok(permit) => permit, Err(_closed) => { @@ -116,9 +133,14 @@ pub async fn acquire(repo_relative_path: Option<&str>) -> Option return None; } }; - limiter() - .semaphores - .insert(repo_relative_path.unwrap_or("").to_string(), new_sem); + limiter().semaphores.insert( + repo_relative_path.unwrap_or("").to_string(), + SemaphoreEntry { + sem: new_sem, + max_permits: get_max_concurrent(), + last_accessed: RwLock::new(Instant::now()), + }, + ); Some(RateLimitGuard { _permit: permit }) } Err(_elapsed) => { @@ -146,7 +168,7 @@ pub async fn acquire_or_reject( if get_max_concurrent() == 0 { return Ok(None); } - // Timeout — reject with resource exhausted + crate::metrics::record_rate_limit_reject(repo); Err(tonic::Status::resource_exhausted(format!( "rate limit exceeded for repository '{repo}': max {max} concurrent operations", max = get_max_concurrent() @@ -161,6 +183,52 @@ pub fn remove_repository(repo_relative_path: &str) { tracing::debug!(repo = %repo_relative_path, "rate limit semaphore removed"); } +/// Clean up idle semaphores that have no active permits and haven't been +/// accessed within the idle threshold. +/// +/// Call this periodically (e.g., from a background task) to prevent +/// unbounded growth of the semaphore map. +pub fn cleanup_idle_semaphores() { + let threshold = std::time::Duration::from_secs(SEMAPHORE_IDLE_THRESHOLD_SECS); + let now = Instant::now(); + let max_concurrent = get_max_concurrent(); + let mut removed = 0u64; + + limiter().semaphores.retain(|_key, entry| { + let is_idle = entry.sem.available_permits() == max_concurrent; + let is_stale = entry + .last_accessed + .read() + .map(|last| now.duration_since(*last) > threshold) + .unwrap_or(false); + + let keep = !(is_idle && is_stale); + if !keep { + removed += 1; + } + keep + }); + + if removed > 0 { + tracing::info!( + removed = removed, + "cleaned up idle rate-limit semaphores" + ); + } +} + +/// Start a background task to periodically clean up idle semaphores. +pub fn start_semaphore_cleanup_task() -> tokio::task::JoinHandle<()> { + let interval = std::time::Duration::from_secs(60); + tokio::spawn(async move { + let mut ticker = tokio::time::interval(interval); + loop { + ticker.tick().await; + cleanup_idle_semaphores(); + } + }) +} + /// Update the max concurrent limit at runtime. /// /// Only replaces semaphores that have no active permits (idle repos). @@ -187,8 +255,7 @@ pub fn set_max_concurrent(max: usize) { .semaphores .iter() .filter_map(|entry| { - let sem = entry.value(); - if sem.available_permits() == old_max { + if entry.value().max_permits == old_max { Some(entry.key().clone()) } else { None @@ -197,7 +264,14 @@ pub fn set_max_concurrent(max: usize) { .collect(); for key in keys { - l.semaphores.insert(key, Arc::new(Semaphore::new(max))); + l.semaphores.insert( + key, + SemaphoreEntry { + sem: Arc::new(Semaphore::new(max)), + max_permits: max, + last_accessed: RwLock::new(Instant::now()), + }, + ); } tracing::info!(max_concurrent = max, "rate limit max_concurrent updated"); diff --git a/refs/find_refs.rs b/refs/find_refs.rs index 495ecbf..7015522 100644 --- a/refs/find_refs.rs +++ b/refs/find_refs.rs @@ -72,19 +72,16 @@ impl GitBare { "--format=%(refname)%00%(objectname)%00%(symref)".to_string(), ]; - // Sort direction let sort_prefix = match SortDirection::try_from(request.sort_direction) { Ok(SortDirection::Asc) => "", _ => "-", }; args.push(format!("--sort={sort_prefix}refname")); - // Containing OIDs filter if let Some(first_oid) = request.containing_oids.first() { args.push(format!("--points-at={first_oid}")); } - // Prefix or pattern if !request.prefixes.is_empty() { for prefix in &request.prefixes { args.push(prefix.clone()); @@ -115,7 +112,6 @@ impl GitBare { let oid = parts[1].to_string(); let symref = parts.get(2).map(|s| s.to_string()).unwrap_or_default(); - // Apply glob pattern filter if set if !request.pattern.is_empty() && !simple_glob_match(&request.pattern, &ref_name) { continue; } diff --git a/refs/update_refs.rs b/refs/update_refs.rs index 7b6facf..d5f170e 100644 --- a/refs/update_refs.rs +++ b/refs/update_refs.rs @@ -15,7 +15,7 @@ impl GitBare { if !update.old_oid.is_empty() { crate::sanitize::validate_revision(&update.old_oid)?; stdin_input.push_str(&format!( - "update {} {}\0{}\n", + "update {} {} {}\n", update.ref_name, update.new_oid, update.old_oid )); } else { @@ -32,7 +32,6 @@ impl GitBare { &self.bare_dir.to_string_lossy(), "update-ref", "--stdin", - "-z", ]) .stdin(std::process::Stdio::piped()) .stdout(std::process::Stdio::piped()) diff --git a/remote/find_remote.rs b/remote/find_remote.rs index 42cbe6c..9a79746 100644 --- a/remote/find_remote.rs +++ b/remote/find_remote.rs @@ -46,9 +46,6 @@ pub fn find_remote_repository( continue; } - // Lines can be: - // SHArefname (direct ref) - // ref: refs/heads/mainHEAD (symbolic ref via --symref) if line.starts_with("ref:") { if let Some((target, name)) = line.split_once('\t') { refs.push(RemoteHead { diff --git a/remote/mirror.rs b/remote/mirror.rs index 8e2608f..7eaa235 100644 --- a/remote/mirror.rs +++ b/remote/mirror.rs @@ -103,7 +103,6 @@ impl GitBare { }); } - // Update local HEAD to match remote HEAD let head_output = std::process::Command::new("git") .args([ "--git-dir", diff --git a/repository/find_license.rs b/repository/find_license.rs index 3682790..727da6e 100644 --- a/repository/find_license.rs +++ b/repository/find_license.rs @@ -57,57 +57,46 @@ impl GitBare { fn detect_license(content: &str) -> (&'static str, &'static str, f64) { let lower = content.to_lowercase(); - // MIT if lower.contains("permission is hereby granted, free of charge") && lower.contains("mit") { return ("MIT", "MIT License", 0.95); } - // Apache 2.0 if lower.contains("apache license, version 2.0") || lower.contains("apache-2.0") { return ("Apache-2.0", "Apache License 2.0", 0.95); } - // GPL 3.0 if lower.contains("gnu general public license") && lower.contains("version 3") { return ("GPL-3.0", "GNU General Public License v3.0", 0.90); } - // GPL 2.0 if lower.contains("gnu general public license") && lower.contains("version 2") { return ("GPL-2.0", "GNU General Public License v2.0", 0.90); } - // BSD 3 if lower.contains("redistribution and use in source and binary forms") && lower.contains("neither the name of") { return ("BSD-3-Clause", "BSD 3-Clause License", 0.85); } - // BSD 2 if lower.contains("redistribution and use in source and binary forms") { return ("BSD-2-Clause", "BSD 2-Clause License", 0.80); } - // AGPL if lower.contains("gnu affero general public license") { return ("AGPL-3.0", "GNU Affero General Public License v3.0", 0.90); } - // LGPL if lower.contains("gnu lesser general public license") { return ("LGPL-3.0", "GNU Lesser General Public License v3.0", 0.85); } - // MPL if lower.contains("mozilla public license") { return ("MPL-2.0", "Mozilla Public License 2.0", 0.90); } - // Unlicense if lower.contains("this is free and unencumbered software released into the public domain") { return ("Unlicense", "The Unlicense", 0.95); } - // ISC if lower.contains("permission to use, copy, modify, and/or distribute") && lower.contains("isc") { return ("ISC", "ISC License", 0.80); diff --git a/repository/lang_stats.rs b/repository/lang_stats.rs index f43cc93..968adc6 100644 --- a/repository/lang_stats.rs +++ b/repository/lang_stats.rs @@ -7,7 +7,6 @@ use crate::bare::GitBare; use crate::error::{GitError, GitResult}; use crate::pb::{GetLanguageStatsRequest, GetLanguageStatsResponse, LanguageStat, object_selector}; -// Include the generated linguist rules include!(concat!(env!("OUT_DIR"), "/linguist_generated.rs")); /// Default max file size for line counting (512 KB). @@ -17,7 +16,6 @@ const MAX_TREE_WALK_DEPTH: usize = 256; /// Look up a language by file extension (case-insensitive, includes leading dot). fn lookup_by_extension(ext: &str) -> Option<(&'static str, &'static str)> { let ext_lower = ext.to_lowercase(); - // Binary search on the sorted EXTENSION_MAP EXTENSION_MAP .binary_search_by(|&(e, _, _)| e.cmp(ext_lower.as_str())) .ok() @@ -54,13 +52,11 @@ fn detect_language(path: &str, is_binary: bool) -> Option<(&'static str, &'stati .and_then(|n| n.to_str()) .unwrap_or(""); - // Try filename match first (e.g., Makefile, Dockerfile) if let Some(result) = lookup_by_filename(file_name) { tracing::debug!(path = %path, lang = result.0, "matched by filename"); return Some(result); } - // Try extension match if let Some(ext) = Path::new(path).extension().and_then(|e| e.to_str()) { let ext_with_dot = format!(".{ext}"); if let Some(result) = lookup_by_extension(&ext_with_dot) { @@ -72,13 +68,10 @@ fn detect_language(path: &str, is_binary: bool) -> Option<(&'static str, &'stati tracing::debug!(path = %path, "no extension found"); } - // For binary files with no recognized extension, classify by media type if is_binary { - // Try extension-based binary classification if let Some(ext) = Path::new(path).extension().and_then(|e| e.to_str()) { let ext_lower = format!(".{ext}").to_lowercase(); let media_type = classify_binary_extension(&ext_lower); - // Return as a synthetic language name return Some((media_type, "data")); } return Some(("Binary", "data")); @@ -146,7 +139,6 @@ impl GitBare { .try_into_tree() .map_err(|e| GitError::Gix(e.to_string()))?; - // If path is specified, descend into subdirectory if !request.path.is_empty() { crate::sanitize::validate_file_path(&request.path)?; let entry = tree @@ -173,7 +165,6 @@ impl GitBare { }; self.walk_tree(&repo, &tree, &prefix, 0, &mut ctx)?; - // Resolve groups: merge child language stats into parent group tracing::info!( total_files, total_bytes, @@ -193,13 +184,11 @@ impl GitBare { entry.file_count = entry.file_count.saturating_add(s.file_count); entry.bytes = entry.bytes.saturating_add(s.bytes); entry.lines = entry.lines.saturating_add(s.lines); - // Keep the lang_type from the parent (or first encountered) if entry.lang_type.is_empty() { entry.lang_type = s.lang_type; } } - // Build response sorted by bytes descending let mut languages: Vec = resolved .into_iter() .map(|(language, s)| { @@ -272,15 +261,12 @@ impl GitBare { let data = &blob.data; let size = data.len() as u64; - // Skip empty files if size == 0 { continue; } - // Check if binary (contains null byte) let is_binary = data.contains(&0); - // Detect language let Some((lang_name, lang_type)) = detect_language(&path, is_binary) else { tracing::debug!(path = %path, is_binary, "no language detected"); continue; @@ -288,7 +274,6 @@ impl GitBare { let lang_key = lang_name.to_string(); - // Count code lines only for non-binary files within size limit let lines = if !is_binary && size <= u64::from(ctx.max_file_size) { count_code_lines(data) } else { diff --git a/repository/optimize.rs b/repository/optimize.rs index fe54837..ee2d4e9 100644 --- a/repository/optimize.rs +++ b/repository/optimize.rs @@ -18,7 +18,6 @@ impl GitBare { OptimizeStrategy::Heuristic | OptimizeStrategy::Aggressive => { let stats = self.get_repository_statistics()?; - // Run commit-graph write if needed if (stats.commit_graph_size_bytes == 0 || strategy == OptimizeStrategy::Aggressive) && let Ok(resp) = write_commit_graph(self, false, false) { @@ -28,7 +27,6 @@ impl GitBare { stdout_all.push_str(&resp.stdout); } - // Repack if many loose objects or packfiles let repack_needed = stats.loose_object_count > 1000 || stats.packfile_count > 10; if repack_needed || strategy == OptimizeStrategy::Aggressive { @@ -41,7 +39,6 @@ impl GitBare { } } - // Prune if aggressive if strategy == OptimizeStrategy::Aggressive && let Ok(resp) = run_gc(self, true, true) { @@ -52,7 +49,6 @@ impl GitBare { } } OptimizeStrategy::Incremental => { - // Just run commit-graph write incrementally if let Ok(resp) = write_commit_graph(self, false, false) { if !resp.ok { stderr_all.push_str(&resp.stderr); @@ -71,7 +67,6 @@ impl GitBare { } fn get_repository_statistics(&self) -> GitResult { - // Count loose objects let loose = std::fs::read_dir(self.bare_dir.join("objects")) .map(|d| { d.filter_map(|e| e.ok()) @@ -83,13 +78,11 @@ impl GitBare { }) .unwrap_or(0); - // Count packfiles let pack_dir = self.bare_dir.join("objects").join("pack"); let pack_count = std::fs::read_dir(&pack_dir) .map(|d| d.filter_map(|e| e.ok()).count() as u64) .unwrap_or(0); - // Check commit-graph let cg_size = std::fs::metadata( self.bare_dir .join("objects") diff --git a/repository/search_files.rs b/repository/search_files.rs index 331b2a7..11c3e16 100644 --- a/repository/search_files.rs +++ b/repository/search_files.rs @@ -67,7 +67,6 @@ impl GitBare { let mut results = Vec::new(); for line in stdout.lines() { - // Format: path:line:col:matched_text if let Some((path_and_rest, matched)) = line.rsplit_once(':') { let prefix_parts: Vec<&str> = path_and_rest.rsplitn(3, ':').collect(); if prefix_parts.len() >= 3 @@ -144,7 +143,6 @@ impl GitBare { continue; } - // Simple substring/case-insensitive matching for file names let query = &request.query; let matched = if query.is_empty() { true diff --git a/sanitize.rs b/sanitize.rs index 69eb3f2..376122e 100644 --- a/sanitize.rs +++ b/sanitize.rs @@ -32,9 +32,13 @@ pub fn validate_oid_hex(hex: &str) -> GitResult<()> { if hex.is_empty() { return Err(GitError::InvalidArgument("oid hex cannot be empty".into())); } - if !(4..=64).contains(&hex.len()) { + if !(crate::config::MIN_OID_HEX_LENGTH..=crate::config::MAX_OID_HEX_LENGTH) + .contains(&hex.len()) + { return Err(GitError::InvalidArgument(format!( - "oid hex length must be 4..=64 chars: {}", + "oid hex length must be {}..={} chars: {}", + crate::config::MIN_OID_HEX_LENGTH, + crate::config::MAX_OID_HEX_LENGTH, hex.len() ))); } @@ -75,9 +79,10 @@ pub fn validate_ref_name(name: &str) -> GitResult<()> { "ref name contains forbidden character: {name}" ))); } - if name.len() > 255 { + if name.len() > crate::config::MAX_REF_NAME_LENGTH { return Err(GitError::InvalidArgument(format!( - "ref name too long (max 255 chars): {name}" + "ref name too long (max {} chars): {name}", + crate::config::MAX_REF_NAME_LENGTH ))); } Ok(()) @@ -91,35 +96,36 @@ pub fn validate_revision(rev: &str) -> GitResult<()> { if rev.is_empty() { return Err(GitError::InvalidArgument("revision cannot be empty".into())); } - if rev.len() > 256 { + if rev.len() > crate::config::MAX_REVISION_LENGTH { return Err(GitError::InvalidArgument(format!( - "revision too long (max 256 chars): {}", + "revision too long (max {} chars): {}", + crate::config::MAX_REVISION_LENGTH, rev.len() ))); } - if rev.chars().all(|c| c.is_ascii_hexdigit()) && rev.len() >= 4 && rev.len() <= 64 { + if rev.chars().all(|c| c.is_ascii_hexdigit()) + && rev.len() >= crate::config::MIN_OID_HEX_LENGTH + && rev.len() <= crate::config::MAX_OID_HEX_LENGTH + { return Ok(()); } if rev == "HEAD" { return Ok(()); } - // Allow ref:refs/heads/... (git internal format) if let Some(rest) = rev.strip_prefix("ref:") { return validate_ref_name(rest.trim()); } - const MAX_ANCESTRY_DEPTH: u32 = 10000; - if let Some(tilde_pos) = rev.rfind('~') { let num_part = &rev[tilde_pos + 1..]; if !num_part.is_empty() && num_part.chars().all(|c| c.is_ascii_digit()) { let depth: u32 = num_part .parse() .map_err(|_| GitError::InvalidArgument("invalid ~N syntax".into()))?; - if depth > MAX_ANCESTRY_DEPTH { + if depth > crate::config::MAX_ANCESTRY_DEPTH { return Err(GitError::InvalidArgument(format!( "~N depth too large: {} (max {})", - depth, MAX_ANCESTRY_DEPTH + depth, crate::config::MAX_ANCESTRY_DEPTH ))); } } @@ -140,10 +146,10 @@ pub fn validate_revision(rev: &str) -> GitResult<()> { let depth: u32 = num_part .parse() .map_err(|_| GitError::InvalidArgument("invalid ^N syntax".into()))?; - if depth > MAX_ANCESTRY_DEPTH { + if depth > crate::config::MAX_ANCESTRY_DEPTH { return Err(GitError::InvalidArgument(format!( "^N depth too large: {} (max {})", - depth, MAX_ANCESTRY_DEPTH + depth, crate::config::MAX_ANCESTRY_DEPTH ))); } } @@ -204,9 +210,10 @@ pub fn validate_file_path(path: &str) -> GitResult<()> { "file path cannot contain null byte: {path}" ))); } - if path.len() > 4096 { + if path.len() > crate::config::MAX_FILE_PATH_LENGTH { return Err(GitError::InvalidArgument(format!( - "file path too long (max 4096 chars): {path}" + "file path too long (max {} chars): {path}", + crate::config::MAX_FILE_PATH_LENGTH ))); } @@ -220,7 +227,6 @@ pub fn validate_file_path(path: &str) -> GitResult<()> { ))); } - // Windows reserved names check #[cfg(target_os = "windows")] { const RESERVED_NAMES: &[&str] = &[ @@ -307,10 +313,11 @@ pub fn validate_remote_url(url: &str) -> GitResult<()> { "remote URL cannot be empty".into(), )); } - if url.len() > 4096 { - return Err(GitError::InvalidArgument( - "remote URL too long (max 4096 chars)".into(), - )); + if url.len() > crate::config::MAX_REMOTE_URL_LENGTH { + return Err(GitError::InvalidArgument(format!( + "remote URL too long (max {} chars)", + crate::config::MAX_REMOTE_URL_LENGTH + ))); } if url.contains('\0') || url.contains('\n') || url.contains('\r') { return Err(GitError::InvalidArgument( @@ -343,14 +350,37 @@ pub fn validate_refspec(refspec: &str) -> GitResult<()> { "refspec contains shell metacharacter: {refspec}" ))); } - if refspec.len() > 1024 { - return Err(GitError::InvalidArgument( - "refspec too long (max 1024 chars)".into(), - )); + if refspec.len() > crate::config::MAX_REFSPEC_LENGTH { + return Err(GitError::InvalidArgument(format!( + "refspec too long (max {} chars)", + crate::config::MAX_REFSPEC_LENGTH + ))); } Ok(()) } +/// Sanitize git stderr output for logging to prevent leaking sensitive data +/// such as credentials in URLs, absolute filesystem paths, or email addresses. +pub fn sanitize_git_stderr(stderr: &str) -> String { + let mut s = stderr.to_string(); + for scheme in &["https://", "http://", "git+ssh://", "ssh://"] { + while let Some(start) = s.find(scheme) { + let after_scheme = start + scheme.len(); + if let Some(at_pos) = s[after_scheme..].find('@') { + let at_abs = after_scheme + at_pos; + let replacement = format!("{scheme}***:***@"); + s.replace_range(start..=at_abs, &replacement); + } else { + break; + } + } + } + if let Some(homedir) = std::env::var_os("HOME").and_then(|v| v.into_string().ok()) { + s = s.replace(&homedir, "~"); + } + s +} + /// Validate a storage-relative path (used in resolve_for_init and from_repository_header). /// /// Must not contain path traversal, must be a simple relative path. @@ -370,10 +400,11 @@ pub fn validate_relative_path(path: &str) -> GitResult<()> { "relative_path cannot contain null byte".into(), )); } - if path.len() > 4096 { - return Err(GitError::InvalidArgument( - "relative_path too long (max 4096 chars)".into(), - )); + if path.len() > crate::config::MAX_RELATIVE_PATH_LENGTH { + return Err(GitError::InvalidArgument(format!( + "relative_path too long (max {} chars)", + crate::config::MAX_RELATIVE_PATH_LENGTH + ))); } if path.contains("..") { return Err(GitError::InvalidArgument(format!( diff --git a/server/cache.rs b/server/cache.rs index 0aee00c..69267d3 100644 --- a/server/cache.rs +++ b/server/cache.rs @@ -22,25 +22,13 @@ //! - **TTL** (time-to-live): 10 minutes — hard upper bound for safety //! - Evictions are tracked via metrics for observability -use std::sync::OnceLock; -use std::time::Duration; +use dashmap::DashMap; +use std::sync::{Arc, OnceLock}; use moka::sync::Cache; use prost::Message; -/// Maximum total cache weight (key + value allocated bytes): 256 MB. -const CACHE_MAX_WEIGHT: u64 = 256 * 1024 * 1024; - -/// Hard time-to-live: entries older than this are unconditionally evicted. -const CACHE_MAX_TTL: Duration = Duration::from_secs(600); // 10 min - -/// Time-to-idle: entries not accessed within this window are evicted. -/// Frequently accessed entries survive up to TTL, cold entries expire quickly. -const CACHE_TTI: Duration = Duration::from_secs(120); // 2 min - -/// Estimated per-entry overhead (Moka internal Arc + metadata). -/// Added to the weigher result to prevent underestimation. -const ENTRY_OVERHEAD: u32 = 128; +use crate::config::{CACHE_ENTRY_OVERHEAD as ENTRY_OVERHEAD, CACHE_MAX_TTL, CACHE_MAX_WEIGHT, CACHE_TTI}; struct CacheState { store: Cache, Vec>, @@ -52,7 +40,6 @@ fn state() -> &'static CacheState { CACHE.get_or_init(|| { let store = Cache::builder() .weigher(|key: &Vec, value: &Vec| -> u32 { - // capacity() reflects actual allocation including spare capacity key.capacity() as u32 + value.capacity() as u32 + ENTRY_OVERHEAD }) .max_capacity(CACHE_MAX_WEIGHT) @@ -65,7 +52,6 @@ fn state() -> &'static CacheState { moka::notification::RemovalCause::Replaced => "replaced", moka::notification::RemovalCause::Size => "size", }; - // Extract namespace for per-namespace metrics let namespace = decode_namespace(&key); crate::metrics::record_cache_eviction(namespace, cause_str); }) @@ -87,7 +73,27 @@ fn cache() -> &'static Cache, Vec> { &state().store } -// Key encoding + +struct RepoKeyIndex { + repo_to_keys: DashMap>>>, +} + +static REPO_KEY_INDEX: OnceLock = OnceLock::new(); + +fn repo_key_index() -> &'static RepoKeyIndex { + REPO_KEY_INDEX.get_or_init(|| RepoKeyIndex { + repo_to_keys: DashMap::new(), + }) +} + +fn track_cache_key(repo_path: &str, key: Arc>) { + repo_key_index() + .repo_to_keys + .entry(repo_path.to_string()) + .or_default() + .push(key); +} + /// Encode a structured cache key. /// @@ -105,12 +111,15 @@ fn encode_key(namespace: &str, repo_path: &str, request_bytes: &[u8]) -> Option< return None; } - let total = 1 + ns.len() + 2 + rp.len() + request_bytes.len(); + const SEPARATOR: u8 = 0xFF; + let total = 1 + ns.len() + 1 + 2 + rp.len() + 1 + request_bytes.len(); let mut key = Vec::with_capacity(total); key.push(ns.len() as u8); key.extend_from_slice(ns); + key.push(SEPARATOR); key.extend_from_slice(&(rp.len() as u16).to_le_bytes()); key.extend_from_slice(rp); + key.push(SEPARATOR); key.extend_from_slice(request_bytes); Some(key) } @@ -125,31 +134,6 @@ fn decode_namespace(key: &[u8]) -> &str { std::str::from_utf8(&key[1..end]).unwrap_or("unknown") } -/// Extract the repo_path from a cache key (returns slice into the key). -fn extract_repo_path_bytes(key: &[u8]) -> Option<&[u8]> { - if key.len() < 3 { - return None; - } - let ns_len = key[0] as usize; - let rp_len_offset = 1 + ns_len; - if key.len() < rp_len_offset + 2 { - return None; - } - let rp_len = u16::from_le_bytes([key[rp_len_offset], key[rp_len_offset + 1]]) as usize; - let rp_start = rp_len_offset + 2; - let rp_end = rp_start.checked_add(rp_len)?; - if rp_end > key.len() { - return None; - } - Some(&key[rp_start..rp_end]) -} - -/// Check if a cache key belongs to the given repository. -fn key_matches_repo(key: &[u8], target_repo: &[u8]) -> bool { - extract_repo_path_bytes(key).is_some_and(|rp| rp == target_repo) -} - -// Single-message cache /// Cache a single protobuf response. /// @@ -176,8 +160,10 @@ where if let Some(bytes) = cache().get(&key) && let Ok(response) = Res::decode(bytes.as_slice()) { - let elapsed = std::time::Duration::ZERO; // Moka get is memory-only, effectively instant + let elapsed = std::time::Duration::ZERO; crate::metrics::record_cache_op("moka", "hit", elapsed); + crate::metrics::record_cache_hit_ns(namespace); + crate::metrics::record_cache_value_size(namespace, bytes.len()); tracing::debug!( namespace = %namespace, repo = %repo_path, @@ -188,6 +174,8 @@ where return Ok(response); } + crate::metrics::record_cache_miss_ns(namespace); + tracing::debug!( namespace = %namespace, repo = %repo_path, @@ -208,14 +196,16 @@ where "failed to encode cache response" ); } else { + crate::metrics::record_cache_value_size(namespace, bytes.len()); + let key_arc = Arc::new(key.clone()); cache().insert(key, bytes); + track_cache_key(repo_path, key_arc); } crate::metrics::record_cache_op("moka", "miss", build_elapsed); Ok(response) } -// Vec-message cache /// Cache a `Vec` protobuf response using length-delimited encoding. /// @@ -238,48 +228,63 @@ where return build(); }; - // Try cache hit if let Some(bytes) = cache().get(&key) { - let mut items = Vec::new(); - let mut remaining = bytes.as_slice(); - let mut valid = true; + if bytes.len() < 4 { + cache().invalidate(&key); + } else { + let stored_crc = u32::from_le_bytes([bytes[0], bytes[1], bytes[2], bytes[3]]); + let actual_crc = crc32fast::hash(&bytes[4..]); + if stored_crc != actual_crc { + tracing::warn!( + namespace = %namespace, + repo = %repo_path, + "cache entry corrupted (CRC mismatch), invalidating" + ); + cache().invalidate(&key); + } else { + let mut items = Vec::new(); + let mut remaining = &bytes[4..]; + let mut valid = true; - // Pre-allocate based on first size hint - if let Ok(first) = Item::decode_length_delimited(&mut remaining) { - items.push(first); - while !remaining.is_empty() { - match Item::decode_length_delimited(&mut remaining) { - Ok(item) => items.push(item), - Err(_) => { - valid = false; - break; + if let Ok(first) = Item::decode_length_delimited(&mut remaining) { + items.push(first); + while !remaining.is_empty() { + match Item::decode_length_delimited(&mut remaining) { + Ok(item) => items.push(item), + Err(_) => { + valid = false; + break; + } + } } + } else if !remaining.is_empty() { + valid = false; } + + if valid { + crate::metrics::record_cache_op("moka", "hit", std::time::Duration::ZERO); + crate::metrics::record_cache_hit_ns(namespace); + crate::metrics::record_cache_value_size(namespace, bytes.len()); + tracing::debug!( + namespace = %namespace, + repo = %repo_path, + item_count = items.len(), + "vec cache hit" + ); + return Ok(items); + } + + tracing::warn!( + namespace = %namespace, + repo = %repo_path, + "vec cache decode failed, rebuilding" + ); + cache().invalidate(&key); } - } else if !remaining.is_empty() { - valid = false; } - - if valid { - crate::metrics::record_cache_op("moka", "hit", std::time::Duration::ZERO); - tracing::debug!( - namespace = %namespace, - repo = %repo_path, - item_count = items.len(), - "vec cache hit" - ); - return Ok(items); - } - - tracing::warn!( - namespace = %namespace, - repo = %repo_path, - "vec cache decode failed, rebuilding" - ); - // Invalidate the corrupt entry - cache().invalidate(&key); } + crate::metrics::record_cache_miss_ns(namespace); tracing::debug!( namespace = %namespace, repo = %repo_path, @@ -290,15 +295,14 @@ where let response = build()?; let build_elapsed = start.elapsed(); - // Encode all items into a single buffer with length-delimited framing let total_est: usize = response .iter() - .map(|item| item.encoded_len() + 10) // 10 = prost length-delimited overhead + .map(|item| item.encoded_len() + 10) .sum(); - let mut bytes = Vec::with_capacity(total_est); + let mut data = Vec::with_capacity(total_est); let mut encode_ok = true; for item in &response { - if let Err(err) = item.encode_length_delimited(&mut bytes) { + if let Err(err) = item.encode_length_delimited(&mut data) { tracing::warn!( namespace = %namespace, repo = %repo_path, @@ -311,13 +315,19 @@ where } if encode_ok { + let crc = crc32fast::hash(&data); + let mut bytes = Vec::with_capacity(4 + data.len()); + bytes.extend_from_slice(&crc.to_le_bytes()); + bytes.extend_from_slice(&data); + crate::metrics::record_cache_value_size(namespace, bytes.len()); + let key_arc = Arc::new(key.clone()); cache().insert(key, bytes); + track_cache_key(repo_path, key_arc); } crate::metrics::record_cache_op("moka", "miss", build_elapsed); Ok(response) } -// Request encoding helpers /// Encode a protobuf request into a byte vector. #[inline] @@ -329,7 +339,6 @@ fn encode_request(request: &Req) -> Vec { buf } -// Repository-scoped invalidation /// Invalidate all cache entries for a specific repository. /// @@ -341,30 +350,24 @@ fn encode_request(request: &Req) -> Vec { /// create branch, etc.) to prevent serving stale data. pub(crate) fn invalidate_repo(relative_path: &str) { let c = cache(); - let target = relative_path.as_bytes(); - let mut keys_to_remove: Vec>> = Vec::with_capacity(64); + let idx = repo_key_index(); - for (key, _value) in c.iter() { - if key_matches_repo(&key, target) { - keys_to_remove.push(key); + if let Some((_key, keys)) = idx.repo_to_keys.remove(relative_path) { + let removed = keys.len(); + for key in &keys { + c.invalidate(key.as_ref()); } - } - let removed = keys_to_remove.len(); - for key in &keys_to_remove { - c.invalidate(key.as_ref()); - } - - if removed > 0 { - tracing::debug!( - relative_path = %relative_path, - entries_removed = removed, - "cache invalidated for repository" - ); + if removed > 0 { + tracing::debug!( + relative_path = %relative_path, + entries_removed = removed, + "cache invalidated for repository (indexed)" + ); + } } } -// Selector helpers use crate::pb::{ObjectSelector, object_selector}; diff --git a/server/mod.rs b/server/mod.rs index 982db7e..7d85874 100644 --- a/server/mod.rs +++ b/server/mod.rs @@ -30,6 +30,8 @@ use gix::discover::is_git; use std::path::{Path, PathBuf}; use tokio_stream::wrappers::ReceiverStream; +use tonic::codec::CompressionEncoding; + use crate::bare::GitBare; use crate::error::{GitError, GitResult}; use crate::pb::{ @@ -151,25 +153,20 @@ impl GitksService { if relative_path.is_empty() { return Err(tonic::Status::invalid_argument("relative_path is required")); } - // Validate early to reject '..' and other traversal patterns crate::sanitize::validate_relative_path(relative_path) .map_err(|e| tonic::Status::invalid_argument(e.to_string()))?; let candidate = self.repo_prefix.join(relative_path); - // Canonicalize repo_prefix (which should exist) for a reliable check let prefix_canon = self .repo_prefix .canonicalize() .unwrap_or_else(|_| self.repo_prefix.clone()); - // Unified path validation to avoid TOCTOU let canonical = match candidate.canonicalize() { Ok(canon) => { - // Path exists and was canonicalized canon } Err(_) => { - // Path doesn't exist yet — validate via parent let parent = candidate.parent().unwrap_or(&self.repo_prefix); let filename = candidate.file_name().ok_or_else(|| { tonic::Status::invalid_argument("invalid path: missing filename") @@ -180,7 +177,6 @@ impl GitksService { .unwrap_or_else(|_| parent.to_path_buf()); let constructed = parent_canon.join(filename); - // String-level verification for non-existent paths let constructed_str = constructed.to_string_lossy(); let prefix_str = prefix_canon.to_string_lossy(); @@ -194,13 +190,19 @@ impl GitksService { } }; - // Final check: canonical must be under prefix if !canonical.starts_with(&prefix_canon) { return Err(tonic::Status::invalid_argument( "path traversal detected: relative_path escapes repo prefix", )); } + let double_canon = canonical.canonicalize().unwrap_or_else(|_| canonical.clone()); + if canonical != double_canon { + return Err(tonic::Status::invalid_argument( + "path resolved to different target (possible symlink race)", + )); + } + Ok(canonical) } @@ -211,10 +213,8 @@ impl GitksService { _old_oid: &str, _new_oid: &str, ) { - // Invalidate moka caches crate::server::cache::invalidate_repo(relative_path); - // Invalidate disk cache if let Some(ref pc) = self.pack_cache { pc.invalidate_repo(relative_path); } @@ -328,10 +328,8 @@ pub(crate) fn git_cmd(gb: &GitBare, args: &[&str]) -> GitResult= 1 { tracing::warn!( repo = %gb.bare_dir.display(), @@ -343,11 +341,12 @@ pub(crate) fn git_cmd(gb: &GitBare, args: &[&str]) -> GitResult) -> GitError { pub async fn serve( addr: std::net::SocketAddr, svc: GitksService, +) -> Result<(), tonic::transport::Error> { + serve_with_shutdown(addr, svc, std::future::pending()).await +} + +/// Start the gRPC server and block until the shutdown signal fires. +/// +/// The `shutdown` future should resolve when the process should stop +/// (e.g. on SIGTERM/SIGINT). All in-flight requests are drained before +/// the server returns. +pub async fn serve_with_shutdown( + addr: std::net::SocketAddr, + svc: GitksService, + shutdown: impl std::future::Future, ) -> Result<(), tonic::transport::Error> { let span = tracing::info_span!("gitks.server", %addr); let _enter = span.enter(); @@ -393,18 +405,42 @@ pub async fn serve( let (health_reporter, health_service) = tonic_health::server::health_reporter(); - let repo_svc = repository_service_server::RepositoryServiceServer::new(svc.clone()); - let archive_svc = archive_service_server::ArchiveServiceServer::new(svc.clone()); - let blame_svc = blame_service_server::BlameServiceServer::new(svc.clone()); - let branch_svc = branch_service_server::BranchServiceServer::new(svc.clone()); - let commit_svc = commit_service_server::CommitServiceServer::new(svc.clone()); - let diff_svc = diff_service_server::DiffServiceServer::new(svc.clone()); - let merge_svc = merge_service_server::MergeServiceServer::new(svc.clone()); - let pack_svc = pack_service_server::PackServiceServer::new(svc.clone()); - let ref_svc = ref_service_server::RefServiceServer::new(svc.clone()); - let remote_svc = remote_service_server::RemoteServiceServer::new(svc.clone()); - let tag_svc = tag_service_server::TagServiceServer::new(svc.clone()); - let tree_svc = tree_service_server::TreeServiceServer::new(svc); + let repo_svc = repository_service_server::RepositoryServiceServer::new(svc.clone()) + .send_compressed(CompressionEncoding::Gzip) + .accept_compressed(CompressionEncoding::Gzip); + let archive_svc = archive_service_server::ArchiveServiceServer::new(svc.clone()) + .send_compressed(CompressionEncoding::Gzip) + .accept_compressed(CompressionEncoding::Gzip); + let blame_svc = blame_service_server::BlameServiceServer::new(svc.clone()) + .send_compressed(CompressionEncoding::Gzip) + .accept_compressed(CompressionEncoding::Gzip); + let branch_svc = branch_service_server::BranchServiceServer::new(svc.clone()) + .send_compressed(CompressionEncoding::Gzip) + .accept_compressed(CompressionEncoding::Gzip); + let commit_svc = commit_service_server::CommitServiceServer::new(svc.clone()) + .send_compressed(CompressionEncoding::Gzip) + .accept_compressed(CompressionEncoding::Gzip); + let diff_svc = diff_service_server::DiffServiceServer::new(svc.clone()) + .send_compressed(CompressionEncoding::Gzip) + .accept_compressed(CompressionEncoding::Gzip); + let merge_svc = merge_service_server::MergeServiceServer::new(svc.clone()) + .send_compressed(CompressionEncoding::Gzip) + .accept_compressed(CompressionEncoding::Gzip); + let pack_svc = pack_service_server::PackServiceServer::new(svc.clone()) + .send_compressed(CompressionEncoding::Gzip) + .accept_compressed(CompressionEncoding::Gzip); + let ref_svc = ref_service_server::RefServiceServer::new(svc.clone()) + .send_compressed(CompressionEncoding::Gzip) + .accept_compressed(CompressionEncoding::Gzip); + let remote_svc = remote_service_server::RemoteServiceServer::new(svc.clone()) + .send_compressed(CompressionEncoding::Gzip) + .accept_compressed(CompressionEncoding::Gzip); + let tag_svc = tag_service_server::TagServiceServer::new(svc.clone()) + .send_compressed(CompressionEncoding::Gzip) + .accept_compressed(CompressionEncoding::Gzip); + let tree_svc = tree_service_server::TreeServiceServer::new(svc) + .send_compressed(CompressionEncoding::Gzip) + .accept_compressed(CompressionEncoding::Gzip); health_reporter .set_serving::>() @@ -458,5 +494,5 @@ pub async fn serve( .add_service(tag_svc) .add_service(tree_svc); tracing::info!("server ready, starting to accept connections"); - server.serve(addr).await + server.serve_with_shutdown(addr, shutdown).await } diff --git a/server/pack.rs b/server/pack.rs index 7bc50c7..0572f8b 100644 --- a/server/pack.rs +++ b/server/pack.rs @@ -115,7 +115,6 @@ impl pack_service_server::PackService for GitksService { .upload_pack(tokio_stream::wrappers::ReceiverStream::new(rx)) .await?; let out = super::bridge_server_stream(resp.into_inner()); - // Create a dummy cancel token for the forwarded stream let cancel_token = tokio_util::sync::CancellationToken::new(); let cancel_guard = cancel_token.drop_guard(); return Ok(tonic::Response::new( @@ -190,7 +189,6 @@ impl pack_service_server::PackService for GitksService { .receive_pack(tokio_stream::wrappers::ReceiverStream::new(rx)) .await?; let out = super::bridge_server_stream(resp.into_inner()); - // Create a dummy cancel token for the forwarded stream let cancel_token = tokio_util::sync::CancellationToken::new(); let cancel_guard = cancel_token.drop_guard(); return Ok(tonic::Response::new( @@ -310,7 +308,6 @@ impl pack_service_server::PackService for GitksService { return Ok(tonic::Response::new(ReceiverStream::new(rx))); } - // Cache miss: execute pack-objects and tee to cache tracing::info!(%repo, digest = %digest, "pack-objects cache miss"); let stream = gb.pack_objects(inner).await?; let tee_stream = pc.tee_pack_stream(&digest, stream); diff --git a/snapshot/sync.rs b/snapshot/sync.rs index 025859c..b33703e 100644 --- a/snapshot/sync.rs +++ b/snapshot/sync.rs @@ -59,7 +59,6 @@ impl BundleApplicator { .spawn() .map_err(|e| format!("spawn git bundle unbundle: {e}"))?; - // Stream file contents to stdin in a background thread let mut stdin = child.stdin.take().ok_or("no stdin")?; let file_handle = file; let writer = std::thread::spawn(move || -> Result<(), String> { @@ -84,7 +83,6 @@ impl BundleApplicator { .wait_with_output() .map_err(|e| format!("wait bundle: {e}"))?; - // Wait for writer thread let _ = writer.join().map_err(|_| "writer thread panicked")?; if !output.status.success() { diff --git a/tests/commit_test.rs b/tests/commit_test.rs index b5d1af3..e8cd100 100644 --- a/tests/commit_test.rs +++ b/tests/commit_test.rs @@ -519,3 +519,221 @@ async fn test_oid_binary_encoding() { let hex_from_bytes: String = oid.value.iter().map(|b| format!("{b:02x}")).collect(); assert_eq!(hex_from_bytes, oid.hex); } + + +#[test] +fn test_count_commits_head() { + let (_dir, gb) = common::setup_bare_repo(); + let resp = gb.count_commits(CountCommitsRequest { + repository: Some(hdr()), + revision: String::new(), + path: String::new(), + since: String::new(), + until: String::new(), + }).unwrap(); + assert_eq!(resp.count, 4); +} + +#[test] +fn test_count_commits_with_revision() { + let (_dir, gb) = common::setup_bare_repo(); + let resp = gb.count_commits(CountCommitsRequest { + repository: Some(hdr()), + revision: "feature".into(), + path: String::new(), + since: String::new(), + until: String::new(), + }).unwrap(); + assert_eq!(resp.count, 1); +} + +#[test] +fn test_count_commits_with_path() { + let (_dir, gb) = common::setup_bare_repo(); + let resp = gb.count_commits(CountCommitsRequest { + repository: Some(hdr()), + revision: "main".into(), + path: "README.md".into(), + since: String::new(), + until: String::new(), + }).unwrap(); + assert!(resp.count >= 1); +} + +#[test] +fn test_count_diverging_commits() { + let (_dir, gb) = common::setup_bare_repo(); + let resp = gb.count_diverging_commits(CountDivergingCommitsRequest { + repository: Some(hdr()), + left: "feature".into(), + right: "main".into(), + }).unwrap(); + assert_eq!(resp.left_count, 0); + assert_eq!(resp.right_count, 3); +} + + +#[test] +fn test_find_commit_by_oid() { + let (_dir, gb) = common::setup_bare_repo(); + let oid = common::get_main_oid(&gb); + let commit = gb.find_commit(FindCommitRequest { + repository: Some(hdr()), + revision: common::oid_selector(&oid), + include_stats: false, + }).unwrap(); + assert!(!commit.oid.as_ref().unwrap().hex.is_empty()); +} + +#[test] +fn test_find_commit_by_revision() { + let (_dir, gb) = common::setup_bare_repo(); + let commit = gb.find_commit(FindCommitRequest { + repository: Some(hdr()), + revision: common::rev_selector("main"), + include_stats: false, + }).unwrap(); + assert!(!commit.oid.as_ref().unwrap().hex.is_empty()); +} + +#[test] +fn test_find_commit_default_head() { + let (_dir, gb) = common::setup_bare_repo(); + let commit = gb.find_commit(FindCommitRequest { + repository: Some(hdr()), + revision: None, + include_stats: false, + }).unwrap(); + assert!(!commit.oid.as_ref().unwrap().hex.is_empty()); +} + + +#[test] +fn test_list_commits_by_oid() { + let (_dir, gb) = common::setup_bare_repo(); + let oid = common::get_main_oid(&gb); + let oid_bytes = gitks::oid::hex_to_bytes(&oid).unwrap(); + let resp = gb.list_commits_by_oid(ListCommitsByOidRequest { + repository: Some(hdr()), + oids: vec![oid_bytes], + include_stats: false, + }).unwrap(); + assert_eq!(resp.commits.len(), 1); +} + +#[test] +fn test_list_commits_by_oid_empty() { + let (_dir, gb) = common::setup_bare_repo(); + let resp = gb.list_commits_by_oid(ListCommitsByOidRequest { + repository: Some(hdr()), + oids: vec![], + include_stats: false, + }).unwrap(); + assert!(resp.commits.is_empty()); +} + + +#[test] +fn test_commits_by_message_basic() { + let (_dir, gb) = common::setup_bare_repo(); + let resp = gb.commits_by_message(CommitsByMessageRequest { + repository: Some(hdr()), + query: "initial".into(), + revision: String::new(), + limit: 10, + offset: 0, + case_insensitive: false, + }).unwrap(); + assert_eq!(resp.commits.len(), 1); +} + +#[test] +fn test_commits_by_message_case_insensitive() { + let (_dir, gb) = common::setup_bare_repo(); + let resp = gb.commits_by_message(CommitsByMessageRequest { + repository: Some(hdr()), + query: "INITIAL".into(), + revision: String::new(), + limit: 10, + offset: 0, + case_insensitive: true, + }).unwrap(); + assert_eq!(resp.commits.len(), 1); +} + +#[test] +fn test_commits_by_message_no_match() { + let (_dir, gb) = common::setup_bare_repo(); + let resp = gb.commits_by_message(CommitsByMessageRequest { + repository: Some(hdr()), + query: "zzzznonexistent".into(), + revision: String::new(), + limit: 10, + offset: 0, + case_insensitive: false, + }).unwrap(); + assert!(resp.commits.is_empty()); +} + + +#[test] +fn test_check_objects_exist() { + let (_dir, gb) = common::setup_bare_repo(); + let oid = common::get_main_oid(&gb); + let resp = gb.check_objects_exist(CheckObjectsExistRequest { + repository: Some(hdr()), + revisions: vec![oid.clone(), "HEAD".into(), "nonexistent-branch".into()], + }).unwrap(); + assert_eq!(resp.revisions.len(), 3); + assert!(resp.revisions[0].exists); + assert!(resp.revisions[1].exists); + assert!(!resp.revisions[2].exists); +} + + +#[test] +fn test_get_commit_stats() { + let (_dir, gb) = common::setup_bare_repo(); + let oid = common::get_main_oid(&gb); + let stats = gb.get_commit_stats(GetCommitStatsRequest { + repository: Some(hdr()), + revision: common::oid_selector(&oid), + }).unwrap(); + assert!(stats.changed_files >= 1); +} + +#[test] +fn test_get_commit_stats_default() { + let (_dir, gb) = common::setup_bare_repo(); + let stats = gb.get_commit_stats(GetCommitStatsRequest { + repository: Some(hdr()), + revision: None, + }).unwrap(); + assert!(stats.changed_files >= 1); +} + + +#[test] +fn test_last_commit_for_path() { + let (_dir, gb) = common::setup_bare_repo(); + let resp = gb.last_commit_for_path(LastCommitForPathRequest { + repository: Some(hdr()), + path: "README.md".into(), + revision: "main".into(), + literal_pathspec: false, + }).unwrap(); + assert!(resp.commit.is_some()); + assert_eq!(resp.path, "README.md"); +} + +#[test] +fn test_last_commit_for_path_nonexistent() { + let (_dir, gb) = common::setup_bare_repo(); + let resp = gb.last_commit_for_path(LastCommitForPathRequest { + repository: Some(hdr()), + path: "nonexistent.txt".into(), + revision: "main".into(), + literal_pathspec: false, + }).unwrap(); + assert!(resp.commit.is_none()); +} diff --git a/tests/common/mod.rs b/tests/common/mod.rs index 35043da..43f7931 100644 --- a/tests/common/mod.rs +++ b/tests/common/mod.rs @@ -172,3 +172,52 @@ pub fn setup_bare_repo_with_conflict() -> (tempfile::TempDir, GitBare) { (dir, GitBare::new(bare_dir)) } + +#[allow(dead_code)] +pub fn get_oid(gb: &GitBare, rev: &str) -> String { + let output = std::process::Command::new("git") + .args([ + "--git-dir", + gb.bare_dir.to_string_lossy().as_ref(), + "rev-parse", + rev, + ]) + .output() + .expect("git rev-parse"); + assert!(output.status.success(), "git rev-parse {rev} failed"); + String::from_utf8_lossy(&output.stdout).trim().to_string() +} + +#[allow(dead_code)] +pub fn get_main_oid(gb: &GitBare) -> String { + get_oid(gb, "refs/heads/main") +} + +#[allow(dead_code)] +pub fn get_feature_oid(gb: &GitBare) -> String { + get_oid(gb, "refs/heads/feature") +} + +#[allow(dead_code)] +pub fn oid_selector(hex: &str) -> Option { + Some(gitks::pb::ObjectSelector { + selector: Some(gitks::pb::object_selector::Selector::Oid( + gitks::pb::Oid { + hex: hex.to_string(), + value: vec![], + format: 0, + }, + )), + }) +} + +#[allow(dead_code)] +pub fn rev_selector(rev: &str) -> Option { + Some(gitks::pb::ObjectSelector { + selector: Some(gitks::pb::object_selector::Selector::Revision( + gitks::pb::ObjectName { + revision: rev.to_string(), + }, + )), + }) +} diff --git a/tests/diff_test.rs b/tests/diff_test.rs index 1135fff..9f1e747 100644 --- a/tests/diff_test.rs +++ b/tests/diff_test.rs @@ -266,3 +266,79 @@ async fn test_get_patch() { .collect(); assert!(combined.contains("diff --git") || combined.contains("@@")); } + + +#[test] +fn test_find_changed_paths() { + let (_dir, gb) = common::setup_bare_repo(); + let feature_oid = common::get_feature_oid(&gb); + let main_oid = common::get_main_oid(&gb); + let resp = gb.find_changed_paths(FindChangedPathsRequest { + repository: Some(hdr()), + base: feature_oid, + head: main_oid, + paths: vec![], + }).unwrap(); + assert!(!resp.paths.is_empty()); +} + +#[test] +fn test_find_changed_paths_same_ref() { + let (_dir, gb) = common::setup_bare_repo(); + let oid = common::get_main_oid(&gb); + let resp = gb.find_changed_paths(FindChangedPathsRequest { + repository: Some(hdr()), + base: oid.clone(), + head: oid, + paths: vec![], + }).unwrap(); + assert!(resp.paths.is_empty()); +} + + +#[test] +fn test_raw_diff() { + let (_dir, gb) = common::setup_bare_repo(); + let feature_oid = common::get_feature_oid(&gb); + let main_oid = common::get_main_oid(&gb); + let chunks = gb.raw_diff(RawDiffRequest { + repository: Some(hdr()), + base: feature_oid, + head: main_oid, + options: None, + }).unwrap(); + assert!(!chunks.is_empty()); + let combined: Vec = chunks.iter().flat_map(|c| c.data.clone()).collect(); + let text = String::from_utf8_lossy(&combined); + assert!(text.contains("diff")); +} + +#[test] +fn test_raw_patch() { + let (_dir, gb) = common::setup_bare_repo(); + let feature_oid = common::get_feature_oid(&gb); + let main_oid = common::get_main_oid(&gb); + let chunks = gb.raw_patch(RawPatchRequest { + repository: Some(hdr()), + base: feature_oid, + head: main_oid, + }).unwrap(); + assert!(!chunks.is_empty()); + let combined: Vec = chunks.iter().flat_map(|c| c.data.clone()).collect(); + let text = String::from_utf8_lossy(&combined); + assert!(text.contains("From")); +} + + +#[test] +fn test_get_raw_changes() { + let (_dir, gb) = common::setup_bare_repo(); + let feature_oid = common::get_feature_oid(&gb); + let main_oid = common::get_main_oid(&gb); + let resp = gb.get_raw_changes(GetRawChangesRequest { + repository: Some(hdr()), + base: feature_oid, + head: main_oid, + }).unwrap(); + assert!(!resp.changes.is_empty()); +} diff --git a/tests/refs_test.rs b/tests/refs_test.rs index fa133b3..39e9fd5 100644 --- a/tests/refs_test.rs +++ b/tests/refs_test.rs @@ -63,3 +63,102 @@ fn test_list_refs_direct() { assert_eq!(oid.hex.len(), 40, "SHA-1 hex should be 40 chars"); } } + + +#[test] +fn test_write_ref_and_ref_exists() { + let (_dir, gb) = common::setup_bare_repo(); + let oid = common::get_main_oid(&gb); + let resp = gb.write_ref(gitks::pb::WriteRefRequest { + repository: Some(hdr()), + ref_name: "refs/heads/test-write".into(), + new_oid: oid, + old_oid: String::new(), + force: false, + }).unwrap(); + assert!(resp.ok, "write_ref failed: {}", resp.error); + + let exists = gb.ref_exists(gitks::pb::RefExistsRequest { + repository: Some(hdr()), + ref_name: "refs/heads/test-write".into(), + }).unwrap(); + assert!(exists.exists); + + let not_exists = gb.ref_exists(gitks::pb::RefExistsRequest { + repository: Some(hdr()), + ref_name: "refs/heads/nonexistent".into(), + }).unwrap(); + assert!(!not_exists.exists); +} + + +#[test] +fn test_update_references_batch() { + let (_dir, gb) = common::setup_bare_repo(); + let oid = common::get_main_oid(&gb); + let resp = gb.update_references(gitks::pb::UpdateReferencesRequest { + repository: Some(hdr()), + updates: vec![ + gitks::pb::RefUpdateEntry { + ref_name: "refs/heads/batch-a".into(), + new_oid: oid.clone(), + old_oid: String::new(), + }, + gitks::pb::RefUpdateEntry { + ref_name: "refs/heads/batch-b".into(), + new_oid: oid, + old_oid: String::new(), + }, + ], + }).unwrap(); + assert!(resp.failed_refs.is_empty(), "error: {}", resp.error); + + let a = gb.ref_exists(gitks::pb::RefExistsRequest { + repository: Some(hdr()), + ref_name: "refs/heads/batch-a".into(), + }).unwrap(); + assert!(a.exists); +} + +#[test] +fn test_update_references_empty() { + let (_dir, gb) = common::setup_bare_repo(); + let resp = gb.update_references(gitks::pb::UpdateReferencesRequest { + repository: Some(hdr()), + updates: vec![], + }).unwrap(); + assert!(resp.failed_refs.is_empty()); +} + +#[test] +fn test_delete_refs() { + let (_dir, gb) = common::setup_bare_repo(); + let oid = common::get_main_oid(&gb); + gb.write_ref(gitks::pb::WriteRefRequest { + repository: Some(hdr()), + ref_name: "refs/heads/to-delete".into(), + new_oid: oid, + old_oid: String::new(), + force: false, + }).unwrap(); + + let resp = gb.delete_refs(gitks::pb::DeleteRefsRequest { + repository: Some(hdr()), + ref_names: vec!["refs/heads/to-delete".into()], + }).unwrap(); + assert!(resp.failed_refs.is_empty()); + + let exists = gb.ref_exists(gitks::pb::RefExistsRequest { + repository: Some(hdr()), + ref_name: "refs/heads/to-delete".into(), + }).unwrap(); + assert!(!exists.exists); +} + + +#[test] +fn test_find_default_branch_name() { + let (_dir, gb) = common::setup_bare_repo(); + let resp = gb.find_default_branch_name().unwrap(); + assert_eq!(resp.name, "main"); +} diff --git a/tests/repository_test.rs b/tests/repository_test.rs index 3095947..3c3efe5 100644 --- a/tests/repository_test.rs +++ b/tests/repository_test.rs @@ -326,3 +326,193 @@ async fn test_exists_nonexistent_repo() { .into_inner(); assert!(!result.exists); } + + +#[test] +fn test_find_merge_base() { + let (_dir, gb) = common::setup_bare_repo(); + let main_oid = common::get_main_oid(&gb); + let feature_oid = common::get_feature_oid(&gb); + let resp = gb.find_merge_base(FindMergeBaseRequest { + repository: Some(header(&gb)), + revisions: vec![ + main_oid.as_bytes().to_vec(), + feature_oid.as_bytes().to_vec(), + ], + }).unwrap(); + assert!(!resp.base_oid.is_empty()); +} + +#[test] +fn test_find_merge_base_empty() { + let (_dir, gb) = common::setup_bare_repo(); + let resp = gb.find_merge_base(FindMergeBaseRequest { + repository: Some(header(&gb)), + revisions: vec![], + }).unwrap(); + assert!(resp.base_oid.is_empty()); +} + +#[test] +fn test_find_merge_base_single() { + let (_dir, gb) = common::setup_bare_repo(); + let oid = common::get_main_oid(&gb); + let resp = gb.find_merge_base(FindMergeBaseRequest { + repository: Some(header(&gb)), + revisions: vec![oid.as_bytes().to_vec()], + }).unwrap(); + assert!(!resp.base_oid.is_empty()); +} + +#[test] +fn test_commit_is_ancestor() { + let (_dir, gb) = common::setup_bare_repo(); + let feature_oid = common::get_feature_oid(&gb); + let main_oid = common::get_main_oid(&gb); + let resp = gb.commit_is_ancestor(CommitIsAncestorRequest { + repository: Some(header(&gb)), + ancestor_oid: feature_oid, + descendant_oid: main_oid, + }).unwrap(); + assert!(resp.is_ancestor); +} + +#[test] +fn test_commit_is_ancestor_false() { + let (_dir, gb) = common::setup_bare_repo(); + let main_oid = common::get_main_oid(&gb); + let feature_oid = common::get_feature_oid(&gb); + let resp = gb.commit_is_ancestor(CommitIsAncestorRequest { + repository: Some(header(&gb)), + ancestor_oid: main_oid, + descendant_oid: feature_oid, + }).unwrap(); + assert!(!resp.is_ancestor); +} + + +#[test] +fn test_objects_size() { + let (_dir, gb) = common::setup_bare_repo(); + let oid = common::get_main_oid(&gb); + let resp = gb.objects_size(ObjectsSizeRequest { + repository: Some(header(&gb)), + oids: vec![oid.clone(), "0000000000000000000000000000000000000000".into()], + }).unwrap(); + assert_eq!(resp.sizes.len(), 2); + assert!(resp.sizes[0].found); + assert!(resp.sizes[0].size > 0); +} + +#[test] +fn test_objects_size_empty() { + let (_dir, gb) = common::setup_bare_repo(); + let resp = gb.objects_size(ObjectsSizeRequest { + repository: Some(header(&gb)), + oids: vec![], + }).unwrap(); + assert!(resp.sizes.is_empty()); +} + +#[test] +fn test_repository_size() { + let (_dir, gb) = common::setup_bare_repo(); + let resp = gb.repository_size().unwrap(); + assert!(resp.size_bytes > 0); +} + + +#[test] +fn test_find_license_no_license() { + let (_dir, gb) = common::setup_bare_repo(); + let resp = gb.find_license().unwrap(); + assert!(resp.license_spdx.is_empty()); +} + + +#[test] +fn test_optimize_repository_heuristic() { + let (_dir, gb) = common::setup_bare_repo(); + let resp = gb.optimize_repository(OptimizeRepositoryRequest { + repository: Some(header(&gb)), + strategy: OptimizeStrategy::Heuristic as i32, + }).unwrap(); + assert!(resp.ok); +} + +#[test] +fn test_optimize_repository_incremental() { + let (_dir, gb) = common::setup_bare_repo(); + let resp = gb.optimize_repository(OptimizeRepositoryRequest { + repository: Some(header(&gb)), + strategy: OptimizeStrategy::Incremental as i32, + }).unwrap(); + assert!(resp.ok); +} + + +#[test] +fn test_search_files_by_content() { + let (_dir, gb) = common::setup_bare_repo(); + let resp = gb.search_files_by_content(SearchFilesByContentRequest { + repository: Some(header(&gb)), + query: "Test".into(), + revision: "main".into(), + max_results: 10, + case_sensitive: true, + }).unwrap(); + assert!(!resp.results.is_empty()); +} + +#[test] +fn test_search_files_by_content_no_match() { + let (_dir, gb) = common::setup_bare_repo(); + let resp = gb.search_files_by_content(SearchFilesByContentRequest { + repository: Some(header(&gb)), + query: "zzzznonexistentzzzz".into(), + revision: "main".into(), + max_results: 10, + case_sensitive: true, + }).unwrap(); + assert!(resp.results.is_empty()); +} + +#[test] +fn test_search_files_by_content_empty_query() { + let (_dir, gb) = common::setup_bare_repo(); + let resp = gb.search_files_by_content(SearchFilesByContentRequest { + repository: Some(header(&gb)), + query: String::new(), + revision: "main".into(), + max_results: 10, + case_sensitive: true, + }); + assert!(resp.is_err()); +} + +#[test] +fn test_search_files_by_name() { + let (_dir, gb) = common::setup_bare_repo(); + let resp = gb.search_files_by_name(SearchFilesByNameRequest { + repository: Some(header(&gb)), + query: "README".into(), + revision: "main".into(), + max_results: 10, + recursive: true, + }).unwrap(); + assert!(!resp.results.is_empty()); + assert!(resp.results.iter().any(|r| r.path.contains("README"))); +} + +#[test] +fn test_search_files_by_name_no_match() { + let (_dir, gb) = common::setup_bare_repo(); + let resp = gb.search_files_by_name(SearchFilesByNameRequest { + repository: Some(header(&gb)), + query: "zzzznonexistentzzzz".into(), + revision: "main".into(), + max_results: 10, + recursive: true, + }).unwrap(); + assert!(resp.results.is_empty()); +} diff --git a/tests/sanitize_test.rs b/tests/sanitize_test.rs index fff8b77..95e5edf 100644 --- a/tests/sanitize_test.rs +++ b/tests/sanitize_test.rs @@ -1,6 +1,5 @@ use gitks::sanitize::*; -// ==================== validate_ref_name tests ==================== #[test] fn test_validate_ref_name_accepts_valid_names() { @@ -69,7 +68,6 @@ fn test_validate_ref_name_rejects_too_long() { assert!(validate_ref_name(&max_valid_name).is_ok()); } -// ==================== validate_revision tests ==================== #[test] fn test_validate_revision_accepts_empty() { @@ -149,7 +147,6 @@ fn test_validate_revision_accepts_valid_branch_names() { assert!(validate_revision("v1.0.0").is_ok()); } -// ==================== validate_file_path tests ==================== #[test] fn test_validate_file_path_accepts_valid_paths() { @@ -216,7 +213,6 @@ fn test_validate_file_path_rejects_windows_reserved_names() { assert!(validate_file_path("CON.txt").is_err()); } -// ==================== validate_relative_path tests ==================== #[test] fn test_validate_relative_path_accepts_valid_paths() { @@ -244,7 +240,6 @@ fn test_validate_relative_path_rejects_traversal() { assert!(validate_relative_path("path/..").is_err()); } -// ==================== validate_config_key tests ==================== #[test] fn test_validate_config_key_accepts_safe_keys() { @@ -281,3 +276,185 @@ fn test_validate_config_key_rejects_invalid_chars() { assert!(validate_config_key("key$(command)").is_err()); assert!(validate_config_key("key`command`").is_err()); } + + +/// Ensure no input causes panic in validate_ref_name. +#[test] +fn fuzz_validate_ref_name_no_panic() { + let long_name = "x".repeat(300); + let test_inputs: Vec<&str> = vec![ + "", + "\0", + "\0\0\0", + "\x7f", + "\x01\x02\x03", + "~^:?*[]\\ ", + "../../../etc/passwd", + "a/b/c/d/e/f/g/h", + &long_name, + "branch@{upstream}", + "HEAD~99999999999", + "HEAD^99999999999", + "ref:HEAD", + "ref:refs/heads/main", + "; rm -rf /", + "$(echo pwned)", + "`echo pwned`", + "\n\r\t", + ]; + for input in test_inputs { + let _ = validate_ref_name(input); + } +} + +/// Ensure no input causes panic in validate_revision. +#[test] +fn fuzz_validate_revision_no_panic() { + let test_inputs: Vec<&str> = vec![ + "", + "HEAD", + "HEAD~0", + "HEAD~99999999", + "HEAD^0", + "HEAD^99999999", + "HEAD^{tree}", + "HEAD^{commit}", + "HEAD^{object}", + "abcdef01", + "abcdef0123456789abcdef0123456789abcdef0123456789abcdef0123456789", + "0000", + "zzzz", + "ref:HEAD", + "ref:refs/heads/main", + "\0", + "branch~abc", + "branch^abc", + "branch~", + "branch^", + "a~10001", + "a^10001", + ]; + for input in test_inputs { + let _ = validate_revision(input); + } +} + +/// Ensure no input causes panic in validate_file_path. +#[test] +fn fuzz_validate_file_path_no_panic() { + let long_path = "x".repeat(5000); + let medium_path = "a".repeat(100); + let test_inputs: Vec<&str> = vec![ + "", + "/etc/passwd", + "../escape", + "a/../b", + ".git", + ".git/config", + "src/.git/HEAD", + "a/b/.git", + "\0", + "\0\0\0", + &long_path, + "path/with\x00null", + "path/with\nnewline", + "normal/path.txt", + &medium_path, + ]; + for input in test_inputs { + let _ = validate_file_path(input); + } +} + +/// Ensure no input causes panic in validate_remote_url. +#[test] +fn fuzz_validate_remote_url_no_panic() { + let long_url = "x".repeat(5000); + let test_inputs: Vec<&str> = vec![ + "", + "https://github.com/user/repo", + "http://localhost:3000/repo", + "ssh://git@host/repo", + "git://host/repo", + "git+ssh://git@host/repo", + "file:///etc/passwd", + "ext::sh -c 'rm -rf /'", + "ftp://host/repo", + "https://user:pass@host/repo", + "\0", + "https://host\0injection", + &long_url, + ]; + for input in test_inputs { + let _ = validate_remote_url(input); + } +} + +/// Ensure no input causes panic in validate_oid_hex. +#[test] +fn fuzz_validate_oid_hex_no_panic() { + let long_hex = "x".repeat(65); + let exact_hex = "x".repeat(64); + let test_inputs: Vec<&str> = vec![ + "", + "abc", + "abcd", + "0123456789abcdef", + "ZZZZ", + "g000", + "0000000000000000000000000000000000000000", + &long_hex, + &exact_hex, + "\0", + " ", + "\n", + ]; + for input in test_inputs { + let _ = validate_oid_hex(input); + } +} + +/// Ensure no input causes panic in validate_relative_path. +#[test] +fn fuzz_validate_relative_path_no_panic() { + let long_path = "x".repeat(5000); + let test_inputs: Vec<&str> = vec![ + "", + "/absolute", + "relative/path", + "../escape", + "path/../escape", + "\0", + &long_path, + ".", + "..", + "...", + "a/b/c", + ]; + for input in test_inputs { + let _ = validate_relative_path(input); + } +} + +/// Ensure no input causes panic in validate_refspec. +#[test] +fn fuzz_validate_refspec_no_panic() { + let long_refspec = "x".repeat(2000); + let test_inputs: Vec<&str> = vec![ + "", + "+refs/heads/*:refs/heads/*", + "refs/heads/main", + "; rm -rf /", + "$(evil)", + "`evil`", + "| pipe", + "& bg", + "< redirect", + "> redirect", + "\0", + &long_refspec, + ]; + for input in test_inputs { + let _ = validate_refspec(input); + } +}