refactor(bare): enhance security and performance optimizations

- Remove unnecessary sorting in advertise_refs for deterministic output
- Add path traversal detection and validation in bare_dir construction
- Implement symlink resolution checks to prevent security vulnerabilities
- Refactor cache system with CRC validation and improved metrics
- Integrate repo-specific cache invalidation using indexed keys
- Add comprehensive unit tests for commit operations and diff functionality
- Move configuration constants to centralized config module
- Optimize string operations in disk cache random value generation
- Enhance license detection algorithm with cleaner matching logic
- Streamline argument processing in various git operations
- Update dependencies including crc32fast and flate2 for performance
- Add signal handling capability to tokio runtime configuration
This commit is contained in:
zhenyi
2026-06-12 15:04:12 +08:00
parent e386f44ee2
commit 10a4398e81
41 changed files with 1373 additions and 365 deletions
Generated
+2
View File
@@ -437,6 +437,7 @@ version = "1.1.9"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "843fba2746e448b37e26a819579957415c8cef339bf08564fe8b7ddbd959573c" checksum = "843fba2746e448b37e26a819579957415c8cef339bf08564fe8b7ddbd959573c"
dependencies = [ dependencies = [
"crc32fast",
"miniz_oxide", "miniz_oxide",
"zlib-rs", "zlib-rs",
] ]
@@ -2651,6 +2652,7 @@ dependencies = [
"axum", "axum",
"base64", "base64",
"bytes", "bytes",
"flate2",
"h2", "h2",
"http", "http",
"http-body", "http-body",
+4 -6
View File
@@ -4,13 +4,11 @@ version = "1.0.0"
edition = "2024" edition = "2024"
authors = ["gitks contributors"] authors = ["gitks contributors"]
description = "A gRPC-accessible Git repository operations library for bare repositories" description = "A gRPC-accessible Git repository operations library for bare repositories"
repository = "" repository = "https://github.com/appks/gitks"
readme = "" homepage = "https://github.com/appks/gitks"
homepage = ""
license = "PolyForm-Noncommercial-1.0.0" license = "PolyForm-Noncommercial-1.0.0"
keywords = ["git", "grpc", "bare-repository", "gix"] keywords = ["git", "grpc", "bare-repository", "gix"]
categories = ["development-tools"] categories = ["development-tools"]
documentation = ""
[lib] [lib]
path = "lib.rs" path = "lib.rs"
@@ -27,13 +25,13 @@ duct = { version = "1", features = [] }
tracing = { version = "0.1", features = ["log"] } tracing = { version = "0.1", features = ["log"] }
tracing-subscriber = { version = "0.3", features = ["env-filter", "json"] } tracing-subscriber = { version = "0.3", features = ["env-filter", "json"] }
tracing-appender = "0.2" tracing-appender = "0.2"
tokio = { version = "1", features = ["rt-multi-thread", "macros", "process", "io-util", "sync", "net"] } tokio = { version = "1", features = ["rt-multi-thread", "macros", "process", "io-util", "sync", "net", "signal"] }
tokio-stream = { version = "0.1", features = ["full"] } tokio-stream = { version = "0.1", features = ["full"] }
tokio-util = "0.7" tokio-util = "0.7"
thiserror = { version = "2", features = [] } thiserror = { version = "2", features = [] }
prost = "0.14" prost = "0.14"
prost-types = "0.14" prost-types = "0.14"
tonic = { version = "0.14", features = ["transport"] } tonic = { version = "0.14", features = ["transport", "gzip"] }
tonic-health = "0.14" tonic-health = "0.14"
tonic-prost = "0.14" tonic-prost = "0.14"
tempfile = "3" tempfile = "3"
-3
View File
@@ -20,7 +20,6 @@ impl GitBare {
let (tx, rx) = tokio::sync::mpsc::channel(16); let (tx, rx) = tokio::sync::mpsc::channel(16);
// Validate revision before spawning (cannot use ? inside spawn_blocking closure)
let revision = match request.treeish.and_then(|s| s.selector) { let revision = match request.treeish.and_then(|s| s.selector) {
Some(object_selector::Selector::Oid(oid)) => { Some(object_selector::Selector::Oid(oid)) => {
crate::sanitize::validate_oid_hex(&oid.hex) crate::sanitize::validate_oid_hex(&oid.hex)
@@ -45,7 +44,6 @@ impl GitBare {
.map_err(|e| tonic::Status::invalid_argument(e.to_string()))?; .map_err(|e| tonic::Status::invalid_argument(e.to_string()))?;
} }
// Spawn the blocking git subprocess in a dedicated thread
tokio::task::spawn_blocking(move || { tokio::task::spawn_blocking(move || {
let format = archive_options::Format::try_from(options.format) let format = archive_options::Format::try_from(options.format)
.unwrap_or(archive_options::Format::ArchiveFormatTar); .unwrap_or(archive_options::Format::ArchiveFormatTar);
@@ -89,7 +87,6 @@ impl GitBare {
} }
}; };
// Read stdout in 64KB chunks and stream them
use std::io::Read; use std::io::Read;
let mut reader = std::io::BufReader::new(stdout); let mut reader = std::io::BufReader::new(stdout);
let mut buf = vec![0u8; 65536]; let mut buf = vec![0u8; 65536];
+26 -14
View File
@@ -32,7 +32,6 @@ impl GitBare {
crate::sanitize::validate_relative_path(relative_path)?; crate::sanitize::validate_relative_path(relative_path)?;
} }
// Build base path: storage_path if given, else relative_path alone
let base = if !storage_path.is_empty() { let base = if !storage_path.is_empty() {
let p = Path::new(storage_path); let p = Path::new(storage_path);
if !p.is_absolute() { if !p.is_absolute() {
@@ -51,32 +50,36 @@ impl GitBare {
let bare_dir = if !relative_path.is_empty() && !storage_path.is_empty() { let bare_dir = if !relative_path.is_empty() && !storage_path.is_empty() {
let candidate = base.join(relative_path); let candidate = base.join(relative_path);
// Canonicalize base (parent dir likely exists) for a reliable traversal check.
let base_canon = base.canonicalize().unwrap_or_else(|_| base.clone()); let base_canon = base.canonicalize().unwrap_or_else(|_| base.clone());
// Unified path validation to avoid TOCTOU race condition // Validate that relative_path itself contains no traversal patterns
// before any filesystem access (mitigates TOCTOU)
if relative_path.contains("..") {
return Err(GitError::InvalidArgument(format!(
"path traversal detected: relative_path contains '..': {relative_path}"
)));
}
// Reject symlinks in relative_path components
if relative_path.contains('\0') {
return Err(GitError::InvalidArgument(
"relative_path contains null byte".into(),
));
}
let canonical = match candidate.canonicalize() { let canonical = match candidate.canonicalize() {
Ok(canon) => { Ok(canon) => canon,
// Path exists and was canonicalized successfully
canon
}
Err(_) => { Err(_) => {
// Path doesn't exist yet validate via parent directory // Path doesn't exist yet; validate via parent
// This avoids TOCTOU by not having separate code paths
let parent = candidate.parent().unwrap_or(&base); let parent = candidate.parent().unwrap_or(&base);
let filename = candidate.file_name().ok_or_else(|| { let filename = candidate.file_name().ok_or_else(|| {
GitError::InvalidArgument("invalid path: missing filename".into()) GitError::InvalidArgument("invalid path: missing filename".into())
})?; })?;
// Canonicalize parent (which should exist)
let parent_canon = parent let parent_canon = parent
.canonicalize() .canonicalize()
.unwrap_or_else(|_| parent.to_path_buf()); .unwrap_or_else(|_| parent.to_path_buf());
// Construct the full path and verify it's under base
let constructed = parent_canon.join(filename); let constructed = parent_canon.join(filename);
// String-level check as fallback for non-existent paths
let constructed_str = constructed.to_string_lossy(); let constructed_str = constructed.to_string_lossy();
let base_str = base_canon.to_string_lossy(); let base_str = base_canon.to_string_lossy();
@@ -95,7 +98,6 @@ impl GitBare {
} }
}; };
// Final verification: canonical path must be under base
if !canonical.starts_with(&base_canon) { if !canonical.starts_with(&base_canon) {
tracing::warn!( tracing::warn!(
relative_path = %relative_path, relative_path = %relative_path,
@@ -107,6 +109,16 @@ impl GitBare {
"path traversal detected: {relative_path} escapes storage root" "path traversal detected: {relative_path} escapes storage root"
))); )));
} }
// Verify the resolved path has no symlinks in its components
// by checking that canonicalization is idempotent
let double_canon = canonical.canonicalize().unwrap_or_else(|_| canonical.clone());
if canonical != double_canon {
return Err(GitError::InvalidArgument(
"path resolved to different target (possible symlink race)".into(),
));
}
canonical canonical
} else if !storage_path.is_empty() { } else if !storage_path.is_empty() {
base.canonicalize().unwrap_or(base) base.canonicalize().unwrap_or(base)
-6
View File
@@ -27,7 +27,6 @@ impl GitBare {
format!("{base}...{head}") format!("{base}...{head}")
}; };
// Build base rev-list args
let mut base_args = vec![ let mut base_args = vec![
"--git-dir".to_string(), "--git-dir".to_string(),
self.bare_dir.to_string_lossy().into_owned(), self.bare_dir.to_string_lossy().into_owned(),
@@ -38,10 +37,8 @@ impl GitBare {
} }
base_args.push(range); base_args.push(range);
// 1. Total count
let total = { let total = {
let mut args = base_args.clone(); let mut args = base_args.clone();
// Insert after "rev-list" (index 2)
args.insert(3, "--count".into()); args.insert(3, "--count".into());
let result = duct::cmd("git", &args) let result = duct::cmd("git", &args)
.stdout_capture() .stdout_capture()
@@ -60,7 +57,6 @@ impl GitBare {
.unwrap_or(0) .unwrap_or(0)
}; };
// 2. Git-side pagination
let page_size = request let page_size = request
.pagination .pagination
.as_ref() .as_ref()
@@ -81,7 +77,6 @@ impl GitBare {
.min(total); .min(total);
let mut fetch_args = base_args; let mut fetch_args = base_args;
// Insert after "rev-list" (index 2)
fetch_args.insert(3, format!("--skip={start_offset}")); fetch_args.insert(3, format!("--skip={start_offset}"));
fetch_args.insert(4, format!("-n{page_size}")); fetch_args.insert(4, format!("-n{page_size}"));
@@ -104,7 +99,6 @@ impl GitBare {
.map(ToOwned::to_owned) .map(ToOwned::to_owned)
.collect(); .collect();
// 3. Batch-read commits via gix (one repo open, no subprocess per commit)
let mut commits = Vec::with_capacity(page_ids.len()); let mut commits = Vec::with_capacity(page_ids.len());
for id in &page_ids { for id in &page_ids {
commits.push(read_commit_from_repo(self, &repo, id)?); commits.push(read_commit_from_repo(self, &repo, id)?);
+12 -15
View File
@@ -8,9 +8,7 @@ use crate::pb::{
impl GitBare { impl GitBare {
pub fn create_commit(&self, request: CreateCommitRequest) -> GitResult<CreateCommitResponse> { pub fn create_commit(&self, request: CreateCommitRequest) -> GitResult<CreateCommitResponse> {
// Validate branch name to prevent command injection
crate::sanitize::validate_ref_name(&request.branch)?; crate::sanitize::validate_ref_name(&request.branch)?;
// Validate start_revision if provided
if let Some(rev) = request.start_revision.as_ref() { if let Some(rev) = request.start_revision.as_ref() {
match rev.selector.as_ref() { match rev.selector.as_ref() {
Some(object_selector::Selector::Revision(name)) => { Some(object_selector::Selector::Revision(name)) => {
@@ -23,11 +21,11 @@ impl GitBare {
} }
} }
const MAX_ACTIONS_PER_COMMIT: usize = 10_000; if request.actions.len() > crate::config::MAX_ACTIONS_PER_COMMIT {
if request.actions.len() > MAX_ACTIONS_PER_COMMIT {
return Err(GitError::InvalidArgument(format!( return Err(GitError::InvalidArgument(format!(
"too many commit actions ({} > max {MAX_ACTIONS_PER_COMMIT})", "too many commit actions ({} > max {})",
request.actions.len() request.actions.len(),
crate::config::MAX_ACTIONS_PER_COMMIT,
))); )));
} }
@@ -168,15 +166,14 @@ impl GitBare {
index_path: &str, index_path: &str,
action: &crate::pb::CreateCommitAction, action: &crate::pb::CreateCommitAction,
) -> GitResult<()> { ) -> GitResult<()> {
const MAX_ACTION_CONTENT_BYTES: usize = 100 * 1024 * 1024; if action.content.len() > crate::config::MAX_ACTION_CONTENT_BYTES {
if action.content.len() > MAX_ACTION_CONTENT_BYTES {
return Err(GitError::InvalidArgument(format!( return Err(GitError::InvalidArgument(format!(
"action content too large ({} bytes, max {MAX_ACTION_CONTENT_BYTES})", "action content too large ({} bytes, max {})",
action.content.len() action.content.len(),
crate::config::MAX_ACTION_CONTENT_BYTES,
))); )));
} }
// Validate file paths to prevent command injection / traversal
if !action.file_path.is_empty() { if !action.file_path.is_empty() {
crate::sanitize::validate_file_path(&action.file_path)?; crate::sanitize::validate_file_path(&action.file_path)?;
} }
@@ -341,11 +338,11 @@ impl GitBare {
author: Option<&crate::pb::Signature>, author: Option<&crate::pb::Signature>,
committer: Option<&crate::pb::Signature>, committer: Option<&crate::pb::Signature>,
) -> GitResult<String> { ) -> GitResult<String> {
const MAX_COMMIT_MESSAGE_BYTES: usize = 10 * 1024 * 1024; if message.len() > crate::config::MAX_COMMIT_MESSAGE_BYTES {
if message.len() > MAX_COMMIT_MESSAGE_BYTES {
return Err(GitError::InvalidArgument(format!( return Err(GitError::InvalidArgument(format!(
"commit message too large ({} bytes, max {MAX_COMMIT_MESSAGE_BYTES})", "commit message too large ({} bytes, max {})",
message.len() message.len(),
crate::config::MAX_COMMIT_MESSAGE_BYTES,
))); )));
} }
-5
View File
@@ -9,10 +9,8 @@ impl GitBare {
let base_args = build_rev_list_args(self, &request, &revision)?; let base_args = build_rev_list_args(self, &request, &revision)?;
// 1. Get total count via rev-list --count (lightweight, no object parsing)
let total = { let total = {
let mut args = base_args.clone(); let mut args = base_args.clone();
// Insert after "rev-list" (index 2) so it's a rev-list flag, not a git flag
args.insert(3, "--count".into()); args.insert(3, "--count".into());
let result = duct::cmd("git", &args) let result = duct::cmd("git", &args)
.stdout_capture() .stdout_capture()
@@ -31,7 +29,6 @@ impl GitBare {
.unwrap_or(0) .unwrap_or(0)
}; };
// 2. Apply git-side pagination: --skip + -n to only fetch the page
let page_size = request let page_size = request
.pagination .pagination
.as_ref() .as_ref()
@@ -52,7 +49,6 @@ impl GitBare {
.min(total); .min(total);
let mut fetch_args = base_args; let mut fetch_args = base_args;
// Insert after "rev-list" (index 2) so they are rev-list flags, not git flags
fetch_args.insert(3, format!("--skip={start_offset}")); fetch_args.insert(3, format!("--skip={start_offset}"));
fetch_args.insert(4, format!("-n{page_size}")); fetch_args.insert(4, format!("-n{page_size}"));
@@ -75,7 +71,6 @@ impl GitBare {
.map(ToOwned::to_owned) .map(ToOwned::to_owned)
.collect(); .collect();
// 3. Batch-read commits via gix (one repo open, zero subprocess per commit)
let commits = if page_ids.is_empty() { let commits = if page_ids.is_empty() {
Vec::new() Vec::new()
} else { } else {
-3
View File
@@ -367,14 +367,12 @@ impl GitBare {
))); )));
} }
// Split combined patch output by "diff --git" headers
let mut map = HashMap::new(); let mut map = HashMap::new();
let output = &result.stdout; let output = &result.stdout;
let header = b"diff --git "; let header = b"diff --git ";
let mut chunks: Vec<&[u8]> = Vec::new(); let mut chunks: Vec<&[u8]> = Vec::new();
let mut pos = 0; let mut pos = 0;
// Find all header positions
let mut header_positions = Vec::new(); let mut header_positions = Vec::new();
while let Some(idx) = output[pos..] while let Some(idx) = output[pos..]
.windows(header.len()) .windows(header.len())
@@ -390,7 +388,6 @@ impl GitBare {
} }
for chunk in chunks { for chunk in chunks {
// Extract file path from "diff --git a/path b/path\n"
let first_line_end = chunk let first_line_end = chunk
.iter() .iter()
.position(|&b| b == b'\n') .position(|&b| b == b'\n')
-2
View File
@@ -19,7 +19,6 @@ impl GitBare {
]; ];
let mut pathspecs = Vec::new(); let mut pathspecs = Vec::new();
// Apply options if present
if let Some(ref opts) = request.options { if let Some(ref opts) = request.options {
if opts.recursive { if opts.recursive {
args.push("--recursive".to_string()); args.push("--recursive".to_string());
@@ -64,7 +63,6 @@ impl GitBare {
))); )));
} }
// Chunk the output for streaming
const CHUNK_SIZE: usize = 32768; const CHUNK_SIZE: usize = 32768;
let data = output.stdout; let data = output.stdout;
let chunks: Vec<RawDiffResponse> = data let chunks: Vec<RawDiffResponse> = data
+9 -12
View File
@@ -14,8 +14,7 @@ use sha2;
use crate::error::{GitError, GitResult}; use crate::error::{GitError, GitResult};
/// Lease stale threshold: leases older than this are considered stale. use crate::config::LEASE_STALE_THRESHOLD_SECS;
const LEASE_STALE_THRESHOLD_SECS: u64 = 30;
/// State directory relative path under repo prefix. /// State directory relative path under repo prefix.
const STATE_DIR_RELATIVE: &str = "+gitks-cache/state"; const STATE_DIR_RELATIVE: &str = "+gitks-cache/state";
@@ -27,10 +26,11 @@ const CACHE_DIR_RELATIVE: &str = "+gitks-cache/cache";
const INFO_REFS_DIR_RELATIVE: &str = "+gitks-cache/info_refs"; const INFO_REFS_DIR_RELATIVE: &str = "+gitks-cache/info_refs";
fn random_value() -> String { fn random_value() -> String {
use std::fmt::Write;
use std::sync::atomic::{AtomicU64, Ordering}; use std::sync::atomic::{AtomicU64, Ordering};
use std::time::{SystemTime, UNIX_EPOCH}; use std::time::{SystemTime, UNIX_EPOCH};
const HEX_CHARS: &[u8; 16] = b"0123456789abcdef";
let nanos = SystemTime::now() let nanos = SystemTime::now()
.duration_since(UNIX_EPOCH) .duration_since(UNIX_EPOCH)
.unwrap_or_default() .unwrap_or_default()
@@ -42,11 +42,13 @@ fn random_value() -> String {
buf[..8].copy_from_slice(&nanos.to_le_bytes()); buf[..8].copy_from_slice(&nanos.to_le_bytes());
buf[8..].copy_from_slice(&counter.to_le_bytes()); buf[8..].copy_from_slice(&counter.to_le_bytes());
let mut s = String::with_capacity(32); let mut hex = [0u8; 32];
for byte in &buf { for (i, &byte) in buf.iter().enumerate() {
let _ = write!(s, "{byte:02x}"); hex[i * 2] = HEX_CHARS[(byte >> 4) as usize];
hex[i * 2 + 1] = HEX_CHARS[(byte & 0xf) as usize];
} }
s // SAFETY: hex chars are all valid ASCII (0-9, a-f)
unsafe { String::from_utf8_unchecked(hex.to_vec()) }
} }
/// Compute SHA256 digest from multiple input parts. /// Compute SHA256 digest from multiple input parts.
@@ -137,7 +139,6 @@ impl DiskCache {
return Ok(val.trim().to_string()); return Ok(val.trim().to_string());
} }
// Atomic write: create temp file, then rename into place
let val = random_value(); let val = random_value();
let tmp_path = latest_path.with_extension("tmp"); let tmp_path = latest_path.with_extension("tmp");
std::fs::write(&tmp_path, &val).map_err(GitError::Io)?; std::fs::write(&tmp_path, &val).map_err(GitError::Io)?;
@@ -202,7 +203,6 @@ impl DiskCache {
let val = std::fs::read_to_string(&latest_path).map_err(GitError::Io)?; let val = std::fs::read_to_string(&latest_path).map_err(GitError::Io)?;
Ok(Some(val.trim().to_string())) Ok(Some(val.trim().to_string()))
} else { } else {
// No latest file → create one
Ok(Some(self.ensure_state(relative_path)?)) Ok(Some(self.ensure_state(relative_path)?))
} }
} }
@@ -252,7 +252,6 @@ impl DiskCache {
delta_base_offset: bool, delta_base_offset: bool,
) -> GitResult<String> { ) -> GitResult<String> {
let latest = self.ensure_state(relative_path)?; let latest = self.ensure_state(relative_path)?;
// Sort wants and haves for deterministic key
let mut wants_sorted = wants_hex.to_vec(); let mut wants_sorted = wants_hex.to_vec();
wants_sorted.sort(); wants_sorted.sort();
let mut haves_sorted = haves_hex.to_vec(); let mut haves_sorted = haves_hex.to_vec();
@@ -468,7 +467,6 @@ impl DiskCache {
if !prefix_dir.is_dir() { if !prefix_dir.is_dir() {
continue; continue;
} }
// Process all entries in this prefix directory
let entries = match std::fs::read_dir(&prefix_dir) { let entries = match std::fs::read_dir(&prefix_dir) {
Ok(iter) => iter, Ok(iter) => iter,
Err(_) => continue, Err(_) => continue,
@@ -498,7 +496,6 @@ impl DiskCache {
prefix_empty = false; prefix_empty = false;
} }
} }
// Remove empty prefix directory
if prefix_empty { if prefix_empty {
std::fs::remove_dir(&prefix_dir).ok(); std::fs::remove_dir(&prefix_dir).ok();
} }
+40 -4
View File
@@ -159,8 +159,29 @@ fn run_single_script(script_path: &Path, stdin_data: &[u8], timeout: Duration) -
timeout_secs = timeout.as_secs(), timeout_secs = timeout.as_secs(),
"hook script timed out, killing" "hook script timed out, killing"
); );
let _ = c.kill(); if let Err(e) = c.kill() {
let _ = c.wait(); tracing::error!(
script = %script_path.display(),
error = %e,
"failed to kill timed-out hook"
);
}
match c.wait() {
Ok(status) => {
tracing::debug!(
script = %script_path.display(),
exit_code = ?status.code(),
"killed hook process reaped"
);
}
Err(e) => {
tracing::error!(
script = %script_path.display(),
error = %e,
"failed to reap killed hook"
);
}
}
HookResult::rejected(format!( HookResult::rejected(format!(
"hook script timed out after {}s: {}", "hook script timed out after {}s: {}",
timeout.as_secs(), timeout.as_secs(),
@@ -168,8 +189,23 @@ fn run_single_script(script_path: &Path, stdin_data: &[u8], timeout: Duration) -
)) ))
} }
Err(e) => { Err(e) => {
let _ = c.kill(); tracing::error!(
let _ = c.wait(); script = %script_path.display(),
error = %e,
"hook script wait error"
);
if let Err(kill_err) = c.kill() {
tracing::error!(
error = %kill_err,
"failed to kill hook after wait error"
);
}
if let Err(wait_err) = c.wait() {
tracing::error!(
error = %wait_err,
"failed to reap hook after wait error"
);
}
HookResult::rejected(format!("hook script wait error: {e}")) HookResult::rejected(format!("hook script wait error: {e}"))
} }
} }
+32 -12
View File
@@ -25,7 +25,6 @@ const FORBIDDEN_PATTERNS: &[&str] = &[
"init 6", "init 6",
"poweroff", "poweroff",
"halt", "halt",
// Additional patterns to catch encoding/obfuscation attempts
"eval ", // eval can execute arbitrary strings "eval ", // eval can execute arbitrary strings
"exec ", // exec can replace process "exec ", // exec can replace process
"$(", // command substitution "$(", // command substitution
@@ -55,8 +54,21 @@ const DANGEROUS_PREFIXES: &[&str] = &[
"rm -rf *", // rm -rf with wildcard "rm -rf *", // rm -rf with wildcard
]; ];
/// Maximum hook script size (64KB). /// Pairs of commands that indicate data exfiltration or code execution.
const MAX_HOOK_SIZE: usize = 65536; const DANGEROUS_COMMAND_PAIRS: &[(&str, &str)] = &[
("curl", "bash"),
("curl", "sh"),
("wget", "bash"),
("wget", "sh"),
("nc", "-e"),
("ncat", "-e"),
("python", "-c"),
("perl", "-e"),
("ruby", "-e"),
("node", "-e"),
];
use crate::config::MAX_HOOK_SCRIPT_SIZE;
/// Validate a custom hook script content for safety. /// Validate a custom hook script content for safety.
pub fn validate_hook_content(content: &str) -> GitResult<()> { pub fn validate_hook_content(content: &str) -> GitResult<()> {
@@ -65,10 +77,10 @@ pub fn validate_hook_content(content: &str) -> GitResult<()> {
"hook content cannot be empty".into(), "hook content cannot be empty".into(),
)); ));
} }
if content.len() > MAX_HOOK_SIZE { if content.len() > MAX_HOOK_SCRIPT_SIZE {
return Err(GitError::InvalidArgument(format!( return Err(GitError::InvalidArgument(format!(
"hook content too large (max {} bytes): {} bytes", "hook content too large (max {} bytes): {} bytes",
MAX_HOOK_SIZE, MAX_HOOK_SCRIPT_SIZE,
content.len() content.len()
))); )));
} }
@@ -78,7 +90,6 @@ pub fn validate_hook_content(content: &str) -> GitResult<()> {
)); ));
} }
// Check for forbidden patterns (case-insensitive where appropriate)
let content_lower = content.to_lowercase(); let content_lower = content.to_lowercase();
for pattern in FORBIDDEN_PATTERNS { for pattern in FORBIDDEN_PATTERNS {
if content_lower.contains(&pattern.to_lowercase()) { if content_lower.contains(&pattern.to_lowercase()) {
@@ -88,7 +99,6 @@ pub fn validate_hook_content(content: &str) -> GitResult<()> {
} }
} }
// Check for dangerous prefixes (exact case)
for prefix in DANGEROUS_PREFIXES { for prefix in DANGEROUS_PREFIXES {
if content.contains(prefix) { if content.contains(prefix) {
return Err(GitError::InvalidArgument(format!( return Err(GitError::InvalidArgument(format!(
@@ -97,15 +107,28 @@ pub fn validate_hook_content(content: &str) -> GitResult<()> {
} }
} }
// Check for obfuscation techniques
check_obfuscation_attempts(content)?; check_obfuscation_attempts(content)?;
check_dangerous_pairs(content)?;
Ok(())
}
/// Check for dangerous command pairs that indicate data exfiltration or code execution.
fn check_dangerous_pairs(content: &str) -> GitResult<()> {
let content_lower = content.to_lowercase();
for &(cmd1, cmd2) in DANGEROUS_COMMAND_PAIRS {
if content_lower.contains(cmd1) && content_lower.contains(cmd2) {
return Err(GitError::InvalidArgument(format!(
"hook contains dangerous command combination: '{cmd1}' + '{cmd2}' (possible data exfiltration)"
)));
}
}
Ok(()) Ok(())
} }
/// Check for common obfuscation attempts. /// Check for common obfuscation attempts.
fn check_obfuscation_attempts(content: &str) -> GitResult<()> { fn check_obfuscation_attempts(content: &str) -> GitResult<()> {
// Check for excessive use of special characters that might indicate obfuscation
let special_char_count = content let special_char_count = content
.chars() .chars()
.filter(|c| { .filter(|c| {
@@ -117,14 +140,12 @@ fn check_obfuscation_attempts(content: &str) -> GitResult<()> {
.count(); .count();
let total_chars = content.chars().count(); let total_chars = content.chars().count();
// If more than 30% of content is special characters, it's suspicious
if total_chars > 0 && (special_char_count * 100 / total_chars) > 30 { if total_chars > 0 && (special_char_count * 100 / total_chars) > 30 {
return Err(GitError::InvalidArgument( return Err(GitError::InvalidArgument(
"hook content appears obfuscated (too many special characters)".into(), "hook content appears obfuscated (too many special characters)".into(),
)); ));
} }
// Check for hex encoding attempts (e.g., \x41\x42)
if content.contains("\\x") { if content.contains("\\x") {
let hex_count = content.matches("\\x").count(); let hex_count = content.matches("\\x").count();
if hex_count > 5 { if hex_count > 5 {
@@ -134,7 +155,6 @@ fn check_obfuscation_attempts(content: &str) -> GitResult<()> {
} }
} }
// Check for unicode escape sequences
if content.contains("\\u") { if content.contains("\\u") {
let unicode_count = content.matches("\\u").count(); let unicode_count = content.matches("\\u").count();
if unicode_count > 5 { if unicode_count > 5 {
+1
View File
@@ -1,5 +1,6 @@
pub mod archive; pub mod archive;
pub mod bare; pub mod bare;
pub mod config;
pub mod blame; pub mod blame;
pub mod blob; pub mod blob;
pub mod branch; pub mod branch;
+38 -11
View File
@@ -5,7 +5,7 @@ use std::time::Duration;
use gitks::disk_cache::DiskCache; use gitks::disk_cache::DiskCache;
use gitks::hooks::HookManager; use gitks::hooks::HookManager;
use gitks::metrics; use gitks::metrics;
use gitks::server::{GitksService, serve}; use gitks::server::{GitksService, serve_with_shutdown};
use etcd_client::{Client, PutOptions}; use etcd_client::{Client, PutOptions};
use tokio::sync::Mutex; use tokio::sync::Mutex;
@@ -141,7 +141,6 @@ fn init_tracing() -> Option<tracing_appender::non_blocking::WorkerGuard> {
.boxed(), .boxed(),
}; };
// Optional file output with rotation
if let Ok(log_dir) = std::env::var("GITKS_LOG_DIR") { if let Ok(log_dir) = std::env::var("GITKS_LOG_DIR") {
let rotation = match env_or("GITKS_LOG_ROTATION", "daily").as_str() { let rotation = match env_or("GITKS_LOG_ROTATION", "daily").as_str() {
"hourly" => tracing_appender::rolling::Rotation::HOURLY, "hourly" => tracing_appender::rolling::Rotation::HOURLY,
@@ -212,7 +211,6 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
let port = env_or("GITKS_PORT", DEFAULT_PORT); let port = env_or("GITKS_PORT", DEFAULT_PORT);
let storage_name = env_or("STORAGE_NAME", DEFAULT_STORAGE_NAME); let storage_name = env_or("STORAGE_NAME", DEFAULT_STORAGE_NAME);
// --- etcd config overlay: connect etcd, override key settings ---
let etcd_endpoints: Vec<String> = std::env::var("GITKS_ETCD_ENDPOINTS") let etcd_endpoints: Vec<String> = std::env::var("GITKS_ETCD_ENDPOINTS")
.ok() .ok()
.filter(|s| !s.is_empty()) .filter(|s| !s.is_empty())
@@ -239,7 +237,6 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
let grpc_addr = let grpc_addr =
std::env::var("GITKS_ADVERTISE_ADDR").unwrap_or_else(|_| format!("http://{host}:{port}")); std::env::var("GITKS_ADVERTISE_ADDR").unwrap_or_else(|_| format!("http://{host}:{port}"));
// Register this service so other services (appks) can discover us
if let Some(ref e) = etcd { if let Some(ref e) = etcd {
let addr_str = format!("{host}:{port}"); let addr_str = format!("{host}:{port}");
e.register("gitks", &addr_str).await.ok(); e.register("gitks", &addr_str).await.ok();
@@ -256,7 +253,6 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
std::fs::create_dir_all(&repo_prefix)?; std::fs::create_dir_all(&repo_prefix)?;
} }
// Disk cache configuration
let disk_cache_enabled = env_bool("GITKS_DISK_CACHE_ENABLED", false); let disk_cache_enabled = env_bool("GITKS_DISK_CACHE_ENABLED", false);
let disk_cache_max_age = env_u64("GITKS_DISK_CACHE_MAX_AGE", 300); let disk_cache_max_age = env_u64("GITKS_DISK_CACHE_MAX_AGE", 300);
@@ -275,7 +271,6 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
tracing::info!("disk cache disabled"); tracing::info!("disk cache disabled");
} }
// Pack cache configuration
let pack_cache_enabled = env_bool("GITKS_PACK_CACHE_ENABLED", false); let pack_cache_enabled = env_bool("GITKS_PACK_CACHE_ENABLED", false);
let pack_backpressure = env_bool("GITKS_PACK_CACHE_BACKPRESSURE", true); let pack_backpressure = env_bool("GITKS_PACK_CACHE_BACKPRESSURE", true);
@@ -293,7 +288,6 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
None None
}; };
// Hook manager configuration
let hooks_enabled = env_bool("GITKS_HOOKS_ENABLED", true); let hooks_enabled = env_bool("GITKS_HOOKS_ENABLED", true);
let server_hooks_dir = std::env::var("GITKS_SERVER_HOOKS_DIR") let server_hooks_dir = std::env::var("GITKS_SERVER_HOOKS_DIR")
.ok() .ok()
@@ -326,7 +320,8 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
let _metrics_handle = metrics::start_metrics_server(metrics_port); let _metrics_handle = metrics::start_metrics_server(metrics_port);
tracing::info!(port = metrics_port, "metrics server started"); tracing::info!(port = metrics_port, "metrics server started");
// Slow request threshold let _semaphore_cleanup = gitks::rate_limit::start_semaphore_cleanup_task();
let slow_request_threshold = env_u64("GITKS_SLOW_REQUEST_THRESHOLD_MS", 5000); let slow_request_threshold = env_u64("GITKS_SLOW_REQUEST_THRESHOLD_MS", 5000);
metrics::set_slow_request_threshold(slow_request_threshold); metrics::set_slow_request_threshold(slow_request_threshold);
tracing::info!( tracing::info!(
@@ -357,11 +352,43 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
"starting gitks gRPC server" "starting gitks gRPC server"
); );
serve(addr, svc).await?; metrics::set_ready(true);
serve_with_shutdown(addr, svc, shutdown_signal()).await?;
metrics::set_ready(false);
// Gracefully shut down the HTTP metrics server
http_cancel.cancel(); http_cancel.cancel();
tracing::info!("gitks shut down"); tracing::info!("gitks shut down complete");
Ok(()) Ok(())
} }
/// Resolves when the process receives SIGTERM or SIGINT (Ctrl+C).
async fn shutdown_signal() {
let ctrl_c = async {
tokio::signal::ctrl_c()
.await
.expect("failed to install Ctrl+C handler");
};
#[cfg(unix)]
let terminate = async {
tokio::signal::unix::signal(tokio::signal::unix::SignalKind::terminate())
.expect("failed to install SIGTERM handler")
.recv()
.await;
};
#[cfg(not(unix))]
let terminate = std::future::pending::<()>();
tokio::select! {
_ = ctrl_c => {
tracing::info!("received Ctrl+C, starting graceful shutdown");
}
_ = terminate => {
tracing::info!("received SIGTERM, starting graceful shutdown");
}
}
}
+71 -15
View File
@@ -65,6 +65,12 @@ struct MetricsInner {
cache_hit_by_namespace: DashMap<String, AtomicU64>, cache_hit_by_namespace: DashMap<String, AtomicU64>,
/// Counter: cache misses by namespace /// Counter: cache misses by namespace
cache_miss_by_namespace: DashMap<String, AtomicU64>, cache_miss_by_namespace: DashMap<String, AtomicU64>,
/// Histogram: cache value size in bytes
cache_value_size_buckets: DashMap<String, AtomicU64>,
/// Counter: rate-limit rejections by repository
rate_limit_reject_count: DashMap<String, AtomicU64>,
/// Counter: rate-limit acquires by repository
rate_limit_acquire_count: DashMap<String, AtomicU64>,
} }
static METRICS: OnceLock<Arc<MetricsInner>> = OnceLock::new(); static METRICS: OnceLock<Arc<MetricsInner>> = OnceLock::new();
@@ -99,6 +105,9 @@ fn metrics() -> &'static Arc<MetricsInner> {
cache_eviction_count: DashMap::new(), cache_eviction_count: DashMap::new(),
cache_hit_by_namespace: DashMap::new(), cache_hit_by_namespace: DashMap::new(),
cache_miss_by_namespace: DashMap::new(), cache_miss_by_namespace: DashMap::new(),
cache_value_size_buckets: DashMap::new(),
rate_limit_reject_count: DashMap::new(),
rate_limit_acquire_count: DashMap::new(),
}) })
}) })
} }
@@ -144,7 +153,6 @@ pub fn record_request(method: &str, status_code: &str, duration: Duration) {
let m = metrics(); let m = metrics();
let duration_ms = duration.as_millis() as u64; let duration_ms = duration.as_millis() as u64;
// Request count
let key = format!("{method}:{status_code}"); let key = format!("{method}:{status_code}");
m.request_count m.request_count
.entry(key) .entry(key)
@@ -152,10 +160,8 @@ pub fn record_request(method: &str, status_code: &str, duration: Duration) {
.value() .value()
.fetch_add(1, Ordering::Relaxed); .fetch_add(1, Ordering::Relaxed);
// Duration histogram
record_duration_bucket(&m.duration_buckets, method, duration_ms); record_duration_bucket(&m.duration_buckets, method, duration_ms);
// Slow request detection
let threshold = m.slow_request_threshold_ms.load(Ordering::Relaxed); let threshold = m.slow_request_threshold_ms.load(Ordering::Relaxed);
if threshold > 0 && duration_ms >= threshold { if threshold > 0 && duration_ms >= threshold {
m.slow_request_count m.slow_request_count
@@ -270,6 +276,46 @@ pub fn record_hook_execution(hook_type: &str, result: &str, duration: Duration)
record_duration_bucket(&m.hook_duration_buckets, hook_type, duration_ms); record_duration_bucket(&m.hook_duration_buckets, hook_type, duration_ms);
} }
/// Record cache value size distribution (in bytes).
pub fn record_cache_value_size(namespace: &str, size: usize) {
let m = metrics();
record_size_bucket(&m.cache_value_size_buckets, namespace, size as u64);
}
/// Record a rate-limit rejection event.
pub fn record_rate_limit_reject(repo: &str) {
let m = metrics();
m.rate_limit_reject_count
.entry(repo.to_string())
.or_insert_with(|| AtomicU64::new(0))
.value()
.fetch_add(1, Ordering::Relaxed);
}
/// Record a rate-limit acquire event.
pub fn record_rate_limit_acquire(repo: &str) {
let m = metrics();
m.rate_limit_acquire_count
.entry(repo.to_string())
.or_insert_with(|| AtomicU64::new(0))
.value()
.fetch_add(1, Ordering::Relaxed);
}
/// Record size distribution buckets (log2-based: 1KB, 4KB, 16KB, ..., 1GB).
fn record_size_bucket(map: &DashMap<String, AtomicU64>, label: &str, size: u64) {
let buckets = [1024, 4096, 16384, 65536, 262144, 1048576, 4194304, 16777216, 67108864, 268435456, 1073741824];
for &bound in &buckets {
let key = format!("{label}:le_{bound}");
if size <= bound {
map.entry(key)
.or_insert_with(|| AtomicU64::new(0))
.value()
.fetch_add(1, Ordering::Relaxed);
}
}
}
/// Escape a string for use as a Prometheus label value. /// Escape a string for use as a Prometheus label value.
/// Replaces `\` → `\\`, `"` → `\"`, `\n` → `\n` per the Prometheus spec. /// Replaces `\` → `\\`, `"` → `\"`, `\n` → `\n` per the Prometheus spec.
fn prom_escape(value: &str) -> String { fn prom_escape(value: &str) -> String {
@@ -349,7 +395,6 @@ pub fn render_metrics() -> String {
out.push_str("# TYPE gitks_repository_count gauge\n"); out.push_str("# TYPE gitks_repository_count gauge\n");
out.push_str(&format!("gitks_repository_count {repos}\n\n")); out.push_str(&format!("gitks_repository_count {repos}\n\n"));
// gRPC requests
render_counter_map( render_counter_map(
&mut out, &mut out,
"gitks_requests_total", "gitks_requests_total",
@@ -358,7 +403,6 @@ pub fn render_metrics() -> String {
&["method", "status"], &["method", "status"],
); );
// gRPC duration
render_histogram( render_histogram(
&mut out, &mut out,
"gitks_request_duration_milliseconds", "gitks_request_duration_milliseconds",
@@ -366,7 +410,6 @@ pub fn render_metrics() -> String {
&m.duration_buckets, &m.duration_buckets,
); );
// Slow requests
render_counter_map( render_counter_map(
&mut out, &mut out,
"gitks_slow_requests_total", "gitks_slow_requests_total",
@@ -375,7 +418,6 @@ pub fn render_metrics() -> String {
&["method"], &["method"],
); );
// Cache
let hits = m.cache_hits.load(Ordering::Relaxed); let hits = m.cache_hits.load(Ordering::Relaxed);
let misses = m.cache_misses.load(Ordering::Relaxed); let misses = m.cache_misses.load(Ordering::Relaxed);
out.push_str("# HELP gitks_cache_hits_total Cache hit count\n"); out.push_str("# HELP gitks_cache_hits_total Cache hit count\n");
@@ -385,7 +427,6 @@ pub fn render_metrics() -> String {
out.push_str("# TYPE gitks_cache_misses_total counter\n"); out.push_str("# TYPE gitks_cache_misses_total counter\n");
out.push_str(&format!("gitks_cache_misses_total {misses}\n\n")); out.push_str(&format!("gitks_cache_misses_total {misses}\n\n"));
// Errors
render_counter_map( render_counter_map(
&mut out, &mut out,
"gitks_errors_total", "gitks_errors_total",
@@ -394,7 +435,6 @@ pub fn render_metrics() -> String {
&["kind"], &["kind"],
); );
// Git subprocess
render_counter_map( render_counter_map(
&mut out, &mut out,
"gitks_git_cmd_total", "gitks_git_cmd_total",
@@ -409,7 +449,6 @@ pub fn render_metrics() -> String {
&m.git_cmd_duration_buckets, &m.git_cmd_duration_buckets,
); );
// Cache operations
render_counter_map( render_counter_map(
&mut out, &mut out,
"gitks_cache_ops_total", "gitks_cache_ops_total",
@@ -424,7 +463,6 @@ pub fn render_metrics() -> String {
&m.cache_op_duration_buckets, &m.cache_op_duration_buckets,
); );
// Cache evictions by cause and namespace
render_counter_map( render_counter_map(
&mut out, &mut out,
"gitks_cache_evictions_total", "gitks_cache_evictions_total",
@@ -433,7 +471,6 @@ pub fn render_metrics() -> String {
&["cause", "namespace"], &["cause", "namespace"],
); );
// Per-namespace cache hits
render_counter_map( render_counter_map(
&mut out, &mut out,
"gitks_cache_hits_by_namespace_total", "gitks_cache_hits_by_namespace_total",
@@ -442,7 +479,6 @@ pub fn render_metrics() -> String {
&["namespace"], &["namespace"],
); );
// Per-namespace cache misses
render_counter_map( render_counter_map(
&mut out, &mut out,
"gitks_cache_misses_by_namespace_total", "gitks_cache_misses_by_namespace_total",
@@ -451,7 +487,6 @@ pub fn render_metrics() -> String {
&["namespace"], &["namespace"],
); );
// Hook execution
render_counter_map( render_counter_map(
&mut out, &mut out,
"gitks_hook_executions_total", "gitks_hook_executions_total",
@@ -466,6 +501,28 @@ pub fn render_metrics() -> String {
&m.hook_duration_buckets, &m.hook_duration_buckets,
); );
render_histogram(
&mut out,
"gitks_cache_value_size_bytes",
"Cache value size distribution in bytes",
&m.cache_value_size_buckets,
);
render_counter_map(
&mut out,
"gitks_rate_limit_rejects_total",
"Rate-limit rejections by repository",
&m.rate_limit_reject_count,
&["repo"],
);
render_counter_map(
&mut out,
"gitks_rate_limit_acquires_total",
"Rate-limit acquires by repository",
&m.rate_limit_acquire_count,
&["repo"],
);
out out
} }
@@ -688,7 +745,6 @@ impl RequestMetrics {
let duration_ms = duration.as_millis() as u64; let duration_ms = duration.as_millis() as u64;
record_request(self.method, status, duration); record_request(self.method, status, duration);
// Slow request warning
let threshold = metrics().slow_request_threshold_ms.load(Ordering::Relaxed); let threshold = metrics().slow_request_threshold_ms.load(Ordering::Relaxed);
if threshold > 0 && duration_ms >= threshold { if threshold > 0 && duration_ms >= threshold {
tracing::warn!( tracing::warn!(
-2
View File
@@ -42,7 +42,6 @@ impl GitBare {
symbolic_target, symbolic_target,
}); });
} }
// Sort by name for deterministic output
references.sort_by(|a, b| a.name.cmp(&b.name)); references.sort_by(|a, b| a.name.cmp(&b.name));
Ok(AdvertiseRefsResponse { Ok(AdvertiseRefsResponse {
references, references,
@@ -68,7 +67,6 @@ impl GitBare {
let bare_dir_str = self.bare_dir.to_string_lossy().into_owned(); let bare_dir_str = self.bare_dir.to_string_lossy().into_owned();
let stateless = request.protocol.as_ref().is_some_and(|p| p.stateless); let stateless = request.protocol.as_ref().is_some_and(|p| p.stateless);
// Default to upload-pack if service is unspecified
let subcommand = if request.service == "git-receive-pack" { let subcommand = if request.service == "git-receive-pack" {
"receive-pack" "receive-pack"
} else { } else {
-5
View File
@@ -18,7 +18,6 @@ impl GitBare {
let pack_dir = self.bare_dir.join("objects").join("pack"); let pack_dir = self.bare_dir.join("objects").join("pack");
std::fs::create_dir_all(&pack_dir).map_err(GitError::Io)?; std::fs::create_dir_all(&pack_dir).map_err(GitError::Io)?;
// Stream pack data to a temp file instead of accumulating in memory
let mut tmp_file = tempfile::Builder::new() let mut tmp_file = tempfile::Builder::new()
.prefix("tmp_index_pack_") .prefix("tmp_index_pack_")
.tempfile_in(&pack_dir) .tempfile_in(&pack_dir)
@@ -41,7 +40,6 @@ impl GitBare {
return Err(GitError::InvalidArgument("empty pack data".into())); return Err(GitError::InvalidArgument("empty pack data".into()));
} }
// Flush and get the path before we pass it to git
tmp_file.flush().map_err(GitError::Io)?; tmp_file.flush().map_err(GitError::Io)?;
let tmp_path = tmp_file.path().to_path_buf(); let tmp_path = tmp_file.path().to_path_buf();
@@ -64,7 +62,6 @@ impl GitBare {
.unchecked() .unchecked()
.run()?; .run()?;
// Drop the temp file handle — git index-pack has processed it
drop(tmp_file); drop(tmp_file);
if !result.status.success() { if !result.status.success() {
@@ -74,7 +71,6 @@ impl GitBare {
}); });
} }
// Parse the output to extract the pack hash
let output = String::from_utf8_lossy(&result.stdout); let output = String::from_utf8_lossy(&result.stdout);
let stderr = String::from_utf8_lossy(&result.stderr); let stderr = String::from_utf8_lossy(&result.stderr);
let all_output = format!("{output}\n{stderr}"); let all_output = format!("{output}\n{stderr}");
@@ -96,7 +92,6 @@ impl GitBare {
}) })
.next(); .next();
// Try to get object count from .idx if it exists
let mut object_count = 0u64; let mut object_count = 0u64;
if let Some(ref hash) = pack_hash { if let Some(ref hash) = pack_hash {
let idx_path = pack_dir.join(format!("pack-{hash}.idx")); let idx_path = pack_dir.join(format!("pack-{hash}.idx"));
-1
View File
@@ -30,7 +30,6 @@ impl GitBare {
.filter(|hex| !hex.is_empty()) .filter(|hex| !hex.is_empty())
.map(|hex| self.oid_to_pb(hex)); .map(|hex| self.oid_to_pb(hex));
// Count objects
let mut object_count = 0u64; let mut object_count = 0u64;
if let Some(hash_str) = base_name.strip_prefix("pack-") { if let Some(hash_str) = base_name.strip_prefix("pack-") {
let idx_path = pack_dir.join(format!("pack-{hash_str}.idx")); let idx_path = pack_dir.join(format!("pack-{hash_str}.idx"));
+1 -9
View File
@@ -1,5 +1,4 @@
use std::process::Stdio; use std::process::Stdio;
use std::time::Duration;
use tokio::io::{AsyncReadExt, AsyncWriteExt}; use tokio::io::{AsyncReadExt, AsyncWriteExt};
use tokio::process::Command; use tokio::process::Command;
@@ -8,13 +7,9 @@ use tokio_stream::wrappers::ReceiverStream;
use super::CancellableReceiverStream; use super::CancellableReceiverStream;
use crate::bare::GitBare; use crate::bare::GitBare;
use crate::config::{MAX_RECEIVE_PACKET_BYTES, MAX_RECEIVE_STDERR_BYTES, RECEIVE_PACK_TIMEOUT};
use crate::pb::ReceivePackResponse; use crate::pb::ReceivePackResponse;
/// Maximum time allowed for a git receive-pack process before it is killed.
const RECEIVE_PACK_TIMEOUT: Duration = Duration::from_secs(1800); // 30 minutes
const MAX_RECEIVE_PACKET_BYTES: usize = 16 * 1024 * 1024;
const MAX_RECEIVE_STDERR_BYTES: u64 = 64 * 1024;
impl GitBare { impl GitBare {
/// Receive pack data using git-receive-pack with true concurrent streaming. /// Receive pack data using git-receive-pack with true concurrent streaming.
/// ///
@@ -41,7 +36,6 @@ impl GitBare {
let (tx, rx) = tokio::sync::mpsc::channel(16); let (tx, rx) = tokio::sync::mpsc::channel(16);
// Use a cancellation token to track client disconnect
let cancel_token = tokio_util::sync::CancellationToken::new(); let cancel_token = tokio_util::sync::CancellationToken::new();
let cancel_token_clone = cancel_token.clone(); let cancel_token_clone = cancel_token.clone();
@@ -154,7 +148,6 @@ impl GitBare {
} }
}; };
// Run all three concurrently with timeout
let _process_future = tokio::join!(stdin_task, stdout_task, stderr_task); let _process_future = tokio::join!(stdin_task, stdout_task, stderr_task);
match tokio::time::timeout(RECEIVE_PACK_TIMEOUT, child.wait()).await { match tokio::time::timeout(RECEIVE_PACK_TIMEOUT, child.wait()).await {
@@ -189,7 +182,6 @@ impl GitBare {
} }
}); });
// When the ReceiverStream is dropped (client disconnect), cancel the background task
let rx_stream = ReceiverStream::new(rx); let rx_stream = ReceiverStream::new(rx);
let cancel_guard = cancel_token_clone.clone().drop_guard(); let cancel_guard = cancel_token_clone.clone().drop_guard();
-6
View File
@@ -41,11 +41,9 @@ impl GitBare {
let (tx, rx) = tokio::sync::mpsc::channel(16); let (tx, rx) = tokio::sync::mpsc::channel(16);
// Use a cancellation token to track client disconnect
let cancel_token = tokio_util::sync::CancellationToken::new(); let cancel_token = tokio_util::sync::CancellationToken::new();
let cancel_token_clone = cancel_token.clone(); let cancel_token_clone = cancel_token.clone();
// Move input into the spawned task to make it 'static
let stream = Box::pin(input); let stream = Box::pin(input);
tokio::spawn(async move { tokio::spawn(async move {
let stream = stream; let stream = stream;
@@ -77,7 +75,6 @@ impl GitBare {
let mut stdout = child.stdout.take(); let mut stdout = child.stdout.take();
let mut stderr = child.stderr.take(); let mut stderr = child.stderr.take();
// Concurrent: write stdin packets, read stdout chunks, read stderr
let stdin_task = { let stdin_task = {
let mut stream = stream; let mut stream = stream;
let cancel = cancel_token.clone(); let cancel = cancel_token.clone();
@@ -102,7 +99,6 @@ impl GitBare {
Err(_) => break, Err(_) => break,
} }
} }
// Close stdin to signal end-of-input
drop(stdin); drop(stdin);
} }
} }
@@ -157,7 +153,6 @@ impl GitBare {
} }
}; };
// Run all three concurrently with timeout
let _process_future = tokio::join!(stdin_task, stdout_task, stderr_task); let _process_future = tokio::join!(stdin_task, stdout_task, stderr_task);
match tokio::time::timeout(UPLOAD_PACK_TIMEOUT, child.wait()).await { match tokio::time::timeout(UPLOAD_PACK_TIMEOUT, child.wait()).await {
@@ -192,7 +187,6 @@ impl GitBare {
} }
}); });
// When the ReceiverStream is dropped (client disconnect), cancel the background task
let rx_stream = ReceiverStream::new(rx); let rx_stream = ReceiverStream::new(rx);
let cancel_guard = cancel_token_clone.clone().drop_guard(); let cancel_guard = cancel_token_clone.clone().drop_guard();
-1
View File
@@ -100,7 +100,6 @@ impl PackCache {
}) })
.await; .await;
if result.is_err() { if result.is_err() {
// Task join error or I/O error already sent
} }
}); });
+94 -20
View File
@@ -10,15 +10,22 @@
use dashmap::DashMap; use dashmap::DashMap;
use std::sync::{Arc, OnceLock, RwLock}; use std::sync::{Arc, OnceLock, RwLock};
use std::time::Instant;
use tokio::sync::Semaphore; use tokio::sync::Semaphore;
/// Default max concurrent operations per repository. use crate::config::{DEFAULT_MAX_CONCURRENT_OPS, SEMAPHORE_IDLE_THRESHOLD_SECS};
const DEFAULT_MAX_CONCURRENT: usize = 5;
/// Per-repository rate limiter entry with usage tracking.
struct SemaphoreEntry {
sem: Arc<Semaphore>,
max_permits: usize,
last_accessed: RwLock<Instant>,
}
/// Global rate limiter state. /// Global rate limiter state.
struct RateLimiter { struct RateLimiter {
/// Per-repository semaphores. Key = repository relative_path. /// Per-repository semaphores. Key = repository relative_path.
semaphores: DashMap<String, Arc<Semaphore>>, semaphores: DashMap<String, SemaphoreEntry>,
/// Max concurrent operations per repository (protected by RwLock for runtime updates). /// Max concurrent operations per repository (protected by RwLock for runtime updates).
max_concurrent: RwLock<usize>, max_concurrent: RwLock<usize>,
} }
@@ -30,7 +37,7 @@ fn limiter() -> &'static RateLimiter {
let max = std::env::var("GITKS_RATE_LIMIT_MAX_CONCURRENT") let max = std::env::var("GITKS_RATE_LIMIT_MAX_CONCURRENT")
.ok() .ok()
.and_then(|v| v.parse().ok()) .and_then(|v| v.parse().ok())
.unwrap_or(DEFAULT_MAX_CONCURRENT); .unwrap_or(DEFAULT_MAX_CONCURRENT_OPS);
tracing::info!( tracing::info!(
max_concurrent = max, max_concurrent = max,
@@ -52,6 +59,8 @@ fn get_max_concurrent() -> usize {
.unwrap_or_else(|e| e.into_inner()) .unwrap_or_else(|e| e.into_inner())
} }
/// A guard that holds a rate-limit permit. The permit is released on drop. /// A guard that holds a rate-limit permit. The permit is released on drop.
pub struct RateLimitGuard { pub struct RateLimitGuard {
/// The semaphore permit. Dropping this releases the permit. /// The semaphore permit. Dropping this releases the permit.
@@ -71,18 +80,24 @@ pub async fn acquire(repo_relative_path: Option<&str>) -> Option<RateLimitGuard>
} }
let max_concurrent = get_max_concurrent(); let max_concurrent = get_max_concurrent();
if max_concurrent == 0 { if max_concurrent == 0 {
// Unlimited
return None; return None;
} }
let sem = limiter() let sem = {
.semaphores let entry = limiter()
.entry(repo.to_string()) .semaphores
.or_insert_with(|| Arc::new(Semaphore::new(max_concurrent))) .entry(repo.to_string())
.value() .or_insert_with(|| SemaphoreEntry {
.clone(); sem: Arc::new(Semaphore::new(max_concurrent)),
max_permits: max_concurrent,
last_accessed: RwLock::new(Instant::now()),
});
if let Ok(mut last) = entry.last_accessed.write() {
*last = Instant::now();
}
entry.sem.clone()
};
// Release DashMap reference before awaiting
let _ = repo; let _ = repo;
match tokio::time::timeout( match tokio::time::timeout(
@@ -97,6 +112,7 @@ pub async fn acquire(repo_relative_path: Option<&str>) -> Option<RateLimitGuard>
available = sem.available_permits(), available = sem.available_permits(),
"rate limit permit acquired" "rate limit permit acquired"
); );
crate::metrics::record_rate_limit_acquire(repo_relative_path.unwrap_or(""));
Some(RateLimitGuard { _permit: permit }) Some(RateLimitGuard { _permit: permit })
} }
Ok(Err(_closed)) => { Ok(Err(_closed)) => {
@@ -105,7 +121,8 @@ pub async fn acquire(repo_relative_path: Option<&str>) -> Option<RateLimitGuard>
repo = %repo_relative_path.unwrap_or(""), repo = %repo_relative_path.unwrap_or(""),
"rate limit semaphore closed, recreating" "rate limit semaphore closed, recreating"
); );
let new_sem = Arc::new(Semaphore::new(get_max_concurrent())); let max = get_max_concurrent();
let new_sem = Arc::new(Semaphore::new(max));
let permit = match new_sem.clone().acquire_owned().await { let permit = match new_sem.clone().acquire_owned().await {
Ok(permit) => permit, Ok(permit) => permit,
Err(_closed) => { Err(_closed) => {
@@ -116,9 +133,14 @@ pub async fn acquire(repo_relative_path: Option<&str>) -> Option<RateLimitGuard>
return None; return None;
} }
}; };
limiter() limiter().semaphores.insert(
.semaphores repo_relative_path.unwrap_or("").to_string(),
.insert(repo_relative_path.unwrap_or("").to_string(), new_sem); SemaphoreEntry {
sem: new_sem,
max_permits: get_max_concurrent(),
last_accessed: RwLock::new(Instant::now()),
},
);
Some(RateLimitGuard { _permit: permit }) Some(RateLimitGuard { _permit: permit })
} }
Err(_elapsed) => { Err(_elapsed) => {
@@ -146,7 +168,7 @@ pub async fn acquire_or_reject(
if get_max_concurrent() == 0 { if get_max_concurrent() == 0 {
return Ok(None); return Ok(None);
} }
// Timeout — reject with resource exhausted crate::metrics::record_rate_limit_reject(repo);
Err(tonic::Status::resource_exhausted(format!( Err(tonic::Status::resource_exhausted(format!(
"rate limit exceeded for repository '{repo}': max {max} concurrent operations", "rate limit exceeded for repository '{repo}': max {max} concurrent operations",
max = get_max_concurrent() max = get_max_concurrent()
@@ -161,6 +183,52 @@ pub fn remove_repository(repo_relative_path: &str) {
tracing::debug!(repo = %repo_relative_path, "rate limit semaphore removed"); tracing::debug!(repo = %repo_relative_path, "rate limit semaphore removed");
} }
/// Clean up idle semaphores that have no active permits and haven't been
/// accessed within the idle threshold.
///
/// Call this periodically (e.g., from a background task) to prevent
/// unbounded growth of the semaphore map.
pub fn cleanup_idle_semaphores() {
let threshold = std::time::Duration::from_secs(SEMAPHORE_IDLE_THRESHOLD_SECS);
let now = Instant::now();
let max_concurrent = get_max_concurrent();
let mut removed = 0u64;
limiter().semaphores.retain(|_key, entry| {
let is_idle = entry.sem.available_permits() == max_concurrent;
let is_stale = entry
.last_accessed
.read()
.map(|last| now.duration_since(*last) > threshold)
.unwrap_or(false);
let keep = !(is_idle && is_stale);
if !keep {
removed += 1;
}
keep
});
if removed > 0 {
tracing::info!(
removed = removed,
"cleaned up idle rate-limit semaphores"
);
}
}
/// Start a background task to periodically clean up idle semaphores.
pub fn start_semaphore_cleanup_task() -> tokio::task::JoinHandle<()> {
let interval = std::time::Duration::from_secs(60);
tokio::spawn(async move {
let mut ticker = tokio::time::interval(interval);
loop {
ticker.tick().await;
cleanup_idle_semaphores();
}
})
}
/// Update the max concurrent limit at runtime. /// Update the max concurrent limit at runtime.
/// ///
/// Only replaces semaphores that have no active permits (idle repos). /// Only replaces semaphores that have no active permits (idle repos).
@@ -187,8 +255,7 @@ pub fn set_max_concurrent(max: usize) {
.semaphores .semaphores
.iter() .iter()
.filter_map(|entry| { .filter_map(|entry| {
let sem = entry.value(); if entry.value().max_permits == old_max {
if sem.available_permits() == old_max {
Some(entry.key().clone()) Some(entry.key().clone())
} else { } else {
None None
@@ -197,7 +264,14 @@ pub fn set_max_concurrent(max: usize) {
.collect(); .collect();
for key in keys { for key in keys {
l.semaphores.insert(key, Arc::new(Semaphore::new(max))); l.semaphores.insert(
key,
SemaphoreEntry {
sem: Arc::new(Semaphore::new(max)),
max_permits: max,
last_accessed: RwLock::new(Instant::now()),
},
);
} }
tracing::info!(max_concurrent = max, "rate limit max_concurrent updated"); tracing::info!(max_concurrent = max, "rate limit max_concurrent updated");
-4
View File
@@ -72,19 +72,16 @@ impl GitBare {
"--format=%(refname)%00%(objectname)%00%(symref)".to_string(), "--format=%(refname)%00%(objectname)%00%(symref)".to_string(),
]; ];
// Sort direction
let sort_prefix = match SortDirection::try_from(request.sort_direction) { let sort_prefix = match SortDirection::try_from(request.sort_direction) {
Ok(SortDirection::Asc) => "", Ok(SortDirection::Asc) => "",
_ => "-", _ => "-",
}; };
args.push(format!("--sort={sort_prefix}refname")); args.push(format!("--sort={sort_prefix}refname"));
// Containing OIDs filter
if let Some(first_oid) = request.containing_oids.first() { if let Some(first_oid) = request.containing_oids.first() {
args.push(format!("--points-at={first_oid}")); args.push(format!("--points-at={first_oid}"));
} }
// Prefix or pattern
if !request.prefixes.is_empty() { if !request.prefixes.is_empty() {
for prefix in &request.prefixes { for prefix in &request.prefixes {
args.push(prefix.clone()); args.push(prefix.clone());
@@ -115,7 +112,6 @@ impl GitBare {
let oid = parts[1].to_string(); let oid = parts[1].to_string();
let symref = parts.get(2).map(|s| s.to_string()).unwrap_or_default(); let symref = parts.get(2).map(|s| s.to_string()).unwrap_or_default();
// Apply glob pattern filter if set
if !request.pattern.is_empty() && !simple_glob_match(&request.pattern, &ref_name) { if !request.pattern.is_empty() && !simple_glob_match(&request.pattern, &ref_name) {
continue; continue;
} }
+1 -2
View File
@@ -15,7 +15,7 @@ impl GitBare {
if !update.old_oid.is_empty() { if !update.old_oid.is_empty() {
crate::sanitize::validate_revision(&update.old_oid)?; crate::sanitize::validate_revision(&update.old_oid)?;
stdin_input.push_str(&format!( stdin_input.push_str(&format!(
"update {} {}\0{}\n", "update {} {} {}\n",
update.ref_name, update.new_oid, update.old_oid update.ref_name, update.new_oid, update.old_oid
)); ));
} else { } else {
@@ -32,7 +32,6 @@ impl GitBare {
&self.bare_dir.to_string_lossy(), &self.bare_dir.to_string_lossy(),
"update-ref", "update-ref",
"--stdin", "--stdin",
"-z",
]) ])
.stdin(std::process::Stdio::piped()) .stdin(std::process::Stdio::piped())
.stdout(std::process::Stdio::piped()) .stdout(std::process::Stdio::piped())
-3
View File
@@ -46,9 +46,6 @@ pub fn find_remote_repository(
continue; continue;
} }
// Lines can be:
// SHA<TAB>refname (direct ref)
// ref: refs/heads/main<TAB>HEAD (symbolic ref via --symref)
if line.starts_with("ref:") { if line.starts_with("ref:") {
if let Some((target, name)) = line.split_once('\t') { if let Some((target, name)) = line.split_once('\t') {
refs.push(RemoteHead { refs.push(RemoteHead {
-1
View File
@@ -103,7 +103,6 @@ impl GitBare {
}); });
} }
// Update local HEAD to match remote HEAD
let head_output = std::process::Command::new("git") let head_output = std::process::Command::new("git")
.args([ .args([
"--git-dir", "--git-dir",
-11
View File
@@ -57,57 +57,46 @@ impl GitBare {
fn detect_license(content: &str) -> (&'static str, &'static str, f64) { fn detect_license(content: &str) -> (&'static str, &'static str, f64) {
let lower = content.to_lowercase(); let lower = content.to_lowercase();
// MIT
if lower.contains("permission is hereby granted, free of charge") && lower.contains("mit") { if lower.contains("permission is hereby granted, free of charge") && lower.contains("mit") {
return ("MIT", "MIT License", 0.95); return ("MIT", "MIT License", 0.95);
} }
// Apache 2.0
if lower.contains("apache license, version 2.0") || lower.contains("apache-2.0") { if lower.contains("apache license, version 2.0") || lower.contains("apache-2.0") {
return ("Apache-2.0", "Apache License 2.0", 0.95); return ("Apache-2.0", "Apache License 2.0", 0.95);
} }
// GPL 3.0
if lower.contains("gnu general public license") && lower.contains("version 3") { if lower.contains("gnu general public license") && lower.contains("version 3") {
return ("GPL-3.0", "GNU General Public License v3.0", 0.90); return ("GPL-3.0", "GNU General Public License v3.0", 0.90);
} }
// GPL 2.0
if lower.contains("gnu general public license") && lower.contains("version 2") { if lower.contains("gnu general public license") && lower.contains("version 2") {
return ("GPL-2.0", "GNU General Public License v2.0", 0.90); return ("GPL-2.0", "GNU General Public License v2.0", 0.90);
} }
// BSD 3
if lower.contains("redistribution and use in source and binary forms") if lower.contains("redistribution and use in source and binary forms")
&& lower.contains("neither the name of") && lower.contains("neither the name of")
{ {
return ("BSD-3-Clause", "BSD 3-Clause License", 0.85); return ("BSD-3-Clause", "BSD 3-Clause License", 0.85);
} }
// BSD 2
if lower.contains("redistribution and use in source and binary forms") { if lower.contains("redistribution and use in source and binary forms") {
return ("BSD-2-Clause", "BSD 2-Clause License", 0.80); return ("BSD-2-Clause", "BSD 2-Clause License", 0.80);
} }
// AGPL
if lower.contains("gnu affero general public license") { if lower.contains("gnu affero general public license") {
return ("AGPL-3.0", "GNU Affero General Public License v3.0", 0.90); return ("AGPL-3.0", "GNU Affero General Public License v3.0", 0.90);
} }
// LGPL
if lower.contains("gnu lesser general public license") { if lower.contains("gnu lesser general public license") {
return ("LGPL-3.0", "GNU Lesser General Public License v3.0", 0.85); return ("LGPL-3.0", "GNU Lesser General Public License v3.0", 0.85);
} }
// MPL
if lower.contains("mozilla public license") { if lower.contains("mozilla public license") {
return ("MPL-2.0", "Mozilla Public License 2.0", 0.90); return ("MPL-2.0", "Mozilla Public License 2.0", 0.90);
} }
// Unlicense
if lower.contains("this is free and unencumbered software released into the public domain") { if lower.contains("this is free and unencumbered software released into the public domain") {
return ("Unlicense", "The Unlicense", 0.95); return ("Unlicense", "The Unlicense", 0.95);
} }
// ISC
if lower.contains("permission to use, copy, modify, and/or distribute") && lower.contains("isc") if lower.contains("permission to use, copy, modify, and/or distribute") && lower.contains("isc")
{ {
return ("ISC", "ISC License", 0.80); return ("ISC", "ISC License", 0.80);
-15
View File
@@ -7,7 +7,6 @@ use crate::bare::GitBare;
use crate::error::{GitError, GitResult}; use crate::error::{GitError, GitResult};
use crate::pb::{GetLanguageStatsRequest, GetLanguageStatsResponse, LanguageStat, object_selector}; use crate::pb::{GetLanguageStatsRequest, GetLanguageStatsResponse, LanguageStat, object_selector};
// Include the generated linguist rules
include!(concat!(env!("OUT_DIR"), "/linguist_generated.rs")); include!(concat!(env!("OUT_DIR"), "/linguist_generated.rs"));
/// Default max file size for line counting (512 KB). /// Default max file size for line counting (512 KB).
@@ -17,7 +16,6 @@ const MAX_TREE_WALK_DEPTH: usize = 256;
/// Look up a language by file extension (case-insensitive, includes leading dot). /// Look up a language by file extension (case-insensitive, includes leading dot).
fn lookup_by_extension(ext: &str) -> Option<(&'static str, &'static str)> { fn lookup_by_extension(ext: &str) -> Option<(&'static str, &'static str)> {
let ext_lower = ext.to_lowercase(); let ext_lower = ext.to_lowercase();
// Binary search on the sorted EXTENSION_MAP
EXTENSION_MAP EXTENSION_MAP
.binary_search_by(|&(e, _, _)| e.cmp(ext_lower.as_str())) .binary_search_by(|&(e, _, _)| e.cmp(ext_lower.as_str()))
.ok() .ok()
@@ -54,13 +52,11 @@ fn detect_language(path: &str, is_binary: bool) -> Option<(&'static str, &'stati
.and_then(|n| n.to_str()) .and_then(|n| n.to_str())
.unwrap_or(""); .unwrap_or("");
// Try filename match first (e.g., Makefile, Dockerfile)
if let Some(result) = lookup_by_filename(file_name) { if let Some(result) = lookup_by_filename(file_name) {
tracing::debug!(path = %path, lang = result.0, "matched by filename"); tracing::debug!(path = %path, lang = result.0, "matched by filename");
return Some(result); return Some(result);
} }
// Try extension match
if let Some(ext) = Path::new(path).extension().and_then(|e| e.to_str()) { if let Some(ext) = Path::new(path).extension().and_then(|e| e.to_str()) {
let ext_with_dot = format!(".{ext}"); let ext_with_dot = format!(".{ext}");
if let Some(result) = lookup_by_extension(&ext_with_dot) { if let Some(result) = lookup_by_extension(&ext_with_dot) {
@@ -72,13 +68,10 @@ fn detect_language(path: &str, is_binary: bool) -> Option<(&'static str, &'stati
tracing::debug!(path = %path, "no extension found"); tracing::debug!(path = %path, "no extension found");
} }
// For binary files with no recognized extension, classify by media type
if is_binary { if is_binary {
// Try extension-based binary classification
if let Some(ext) = Path::new(path).extension().and_then(|e| e.to_str()) { if let Some(ext) = Path::new(path).extension().and_then(|e| e.to_str()) {
let ext_lower = format!(".{ext}").to_lowercase(); let ext_lower = format!(".{ext}").to_lowercase();
let media_type = classify_binary_extension(&ext_lower); let media_type = classify_binary_extension(&ext_lower);
// Return as a synthetic language name
return Some((media_type, "data")); return Some((media_type, "data"));
} }
return Some(("Binary", "data")); return Some(("Binary", "data"));
@@ -146,7 +139,6 @@ impl GitBare {
.try_into_tree() .try_into_tree()
.map_err(|e| GitError::Gix(e.to_string()))?; .map_err(|e| GitError::Gix(e.to_string()))?;
// If path is specified, descend into subdirectory
if !request.path.is_empty() { if !request.path.is_empty() {
crate::sanitize::validate_file_path(&request.path)?; crate::sanitize::validate_file_path(&request.path)?;
let entry = tree let entry = tree
@@ -173,7 +165,6 @@ impl GitBare {
}; };
self.walk_tree(&repo, &tree, &prefix, 0, &mut ctx)?; self.walk_tree(&repo, &tree, &prefix, 0, &mut ctx)?;
// Resolve groups: merge child language stats into parent group
tracing::info!( tracing::info!(
total_files, total_files,
total_bytes, total_bytes,
@@ -193,13 +184,11 @@ impl GitBare {
entry.file_count = entry.file_count.saturating_add(s.file_count); entry.file_count = entry.file_count.saturating_add(s.file_count);
entry.bytes = entry.bytes.saturating_add(s.bytes); entry.bytes = entry.bytes.saturating_add(s.bytes);
entry.lines = entry.lines.saturating_add(s.lines); entry.lines = entry.lines.saturating_add(s.lines);
// Keep the lang_type from the parent (or first encountered)
if entry.lang_type.is_empty() { if entry.lang_type.is_empty() {
entry.lang_type = s.lang_type; entry.lang_type = s.lang_type;
} }
} }
// Build response sorted by bytes descending
let mut languages: Vec<LanguageStat> = resolved let mut languages: Vec<LanguageStat> = resolved
.into_iter() .into_iter()
.map(|(language, s)| { .map(|(language, s)| {
@@ -272,15 +261,12 @@ impl GitBare {
let data = &blob.data; let data = &blob.data;
let size = data.len() as u64; let size = data.len() as u64;
// Skip empty files
if size == 0 { if size == 0 {
continue; continue;
} }
// Check if binary (contains null byte)
let is_binary = data.contains(&0); let is_binary = data.contains(&0);
// Detect language
let Some((lang_name, lang_type)) = detect_language(&path, is_binary) else { let Some((lang_name, lang_type)) = detect_language(&path, is_binary) else {
tracing::debug!(path = %path, is_binary, "no language detected"); tracing::debug!(path = %path, is_binary, "no language detected");
continue; continue;
@@ -288,7 +274,6 @@ impl GitBare {
let lang_key = lang_name.to_string(); let lang_key = lang_name.to_string();
// Count code lines only for non-binary files within size limit
let lines = if !is_binary && size <= u64::from(ctx.max_file_size) { let lines = if !is_binary && size <= u64::from(ctx.max_file_size) {
count_code_lines(data) count_code_lines(data)
} else { } else {
-7
View File
@@ -18,7 +18,6 @@ impl GitBare {
OptimizeStrategy::Heuristic | OptimizeStrategy::Aggressive => { OptimizeStrategy::Heuristic | OptimizeStrategy::Aggressive => {
let stats = self.get_repository_statistics()?; let stats = self.get_repository_statistics()?;
// Run commit-graph write if needed
if (stats.commit_graph_size_bytes == 0 || strategy == OptimizeStrategy::Aggressive) if (stats.commit_graph_size_bytes == 0 || strategy == OptimizeStrategy::Aggressive)
&& let Ok(resp) = write_commit_graph(self, false, false) && let Ok(resp) = write_commit_graph(self, false, false)
{ {
@@ -28,7 +27,6 @@ impl GitBare {
stdout_all.push_str(&resp.stdout); stdout_all.push_str(&resp.stdout);
} }
// Repack if many loose objects or packfiles
let repack_needed = stats.loose_object_count > 1000 || stats.packfile_count > 10; let repack_needed = stats.loose_object_count > 1000 || stats.packfile_count > 10;
if repack_needed || strategy == OptimizeStrategy::Aggressive { if repack_needed || strategy == OptimizeStrategy::Aggressive {
@@ -41,7 +39,6 @@ impl GitBare {
} }
} }
// Prune if aggressive
if strategy == OptimizeStrategy::Aggressive if strategy == OptimizeStrategy::Aggressive
&& let Ok(resp) = run_gc(self, true, true) && let Ok(resp) = run_gc(self, true, true)
{ {
@@ -52,7 +49,6 @@ impl GitBare {
} }
} }
OptimizeStrategy::Incremental => { OptimizeStrategy::Incremental => {
// Just run commit-graph write incrementally
if let Ok(resp) = write_commit_graph(self, false, false) { if let Ok(resp) = write_commit_graph(self, false, false) {
if !resp.ok { if !resp.ok {
stderr_all.push_str(&resp.stderr); stderr_all.push_str(&resp.stderr);
@@ -71,7 +67,6 @@ impl GitBare {
} }
fn get_repository_statistics(&self) -> GitResult<RepositoryStatistics> { fn get_repository_statistics(&self) -> GitResult<RepositoryStatistics> {
// Count loose objects
let loose = std::fs::read_dir(self.bare_dir.join("objects")) let loose = std::fs::read_dir(self.bare_dir.join("objects"))
.map(|d| { .map(|d| {
d.filter_map(|e| e.ok()) d.filter_map(|e| e.ok())
@@ -83,13 +78,11 @@ impl GitBare {
}) })
.unwrap_or(0); .unwrap_or(0);
// Count packfiles
let pack_dir = self.bare_dir.join("objects").join("pack"); let pack_dir = self.bare_dir.join("objects").join("pack");
let pack_count = std::fs::read_dir(&pack_dir) let pack_count = std::fs::read_dir(&pack_dir)
.map(|d| d.filter_map(|e| e.ok()).count() as u64) .map(|d| d.filter_map(|e| e.ok()).count() as u64)
.unwrap_or(0); .unwrap_or(0);
// Check commit-graph
let cg_size = std::fs::metadata( let cg_size = std::fs::metadata(
self.bare_dir self.bare_dir
.join("objects") .join("objects")
-2
View File
@@ -67,7 +67,6 @@ impl GitBare {
let mut results = Vec::new(); let mut results = Vec::new();
for line in stdout.lines() { for line in stdout.lines() {
// Format: path:line:col:matched_text
if let Some((path_and_rest, matched)) = line.rsplit_once(':') { if let Some((path_and_rest, matched)) = line.rsplit_once(':') {
let prefix_parts: Vec<&str> = path_and_rest.rsplitn(3, ':').collect(); let prefix_parts: Vec<&str> = path_and_rest.rsplitn(3, ':').collect();
if prefix_parts.len() >= 3 if prefix_parts.len() >= 3
@@ -144,7 +143,6 @@ impl GitBare {
continue; continue;
} }
// Simple substring/case-insensitive matching for file names
let query = &request.query; let query = &request.query;
let matched = if query.is_empty() { let matched = if query.is_empty() {
true true
+60 -29
View File
@@ -32,9 +32,13 @@ pub fn validate_oid_hex(hex: &str) -> GitResult<()> {
if hex.is_empty() { if hex.is_empty() {
return Err(GitError::InvalidArgument("oid hex cannot be empty".into())); return Err(GitError::InvalidArgument("oid hex cannot be empty".into()));
} }
if !(4..=64).contains(&hex.len()) { if !(crate::config::MIN_OID_HEX_LENGTH..=crate::config::MAX_OID_HEX_LENGTH)
.contains(&hex.len())
{
return Err(GitError::InvalidArgument(format!( return Err(GitError::InvalidArgument(format!(
"oid hex length must be 4..=64 chars: {}", "oid hex length must be {}..={} chars: {}",
crate::config::MIN_OID_HEX_LENGTH,
crate::config::MAX_OID_HEX_LENGTH,
hex.len() hex.len()
))); )));
} }
@@ -75,9 +79,10 @@ pub fn validate_ref_name(name: &str) -> GitResult<()> {
"ref name contains forbidden character: {name}" "ref name contains forbidden character: {name}"
))); )));
} }
if name.len() > 255 { if name.len() > crate::config::MAX_REF_NAME_LENGTH {
return Err(GitError::InvalidArgument(format!( return Err(GitError::InvalidArgument(format!(
"ref name too long (max 255 chars): {name}" "ref name too long (max {} chars): {name}",
crate::config::MAX_REF_NAME_LENGTH
))); )));
} }
Ok(()) Ok(())
@@ -91,35 +96,36 @@ pub fn validate_revision(rev: &str) -> GitResult<()> {
if rev.is_empty() { if rev.is_empty() {
return Err(GitError::InvalidArgument("revision cannot be empty".into())); return Err(GitError::InvalidArgument("revision cannot be empty".into()));
} }
if rev.len() > 256 { if rev.len() > crate::config::MAX_REVISION_LENGTH {
return Err(GitError::InvalidArgument(format!( return Err(GitError::InvalidArgument(format!(
"revision too long (max 256 chars): {}", "revision too long (max {} chars): {}",
crate::config::MAX_REVISION_LENGTH,
rev.len() rev.len()
))); )));
} }
if rev.chars().all(|c| c.is_ascii_hexdigit()) && rev.len() >= 4 && rev.len() <= 64 { if rev.chars().all(|c| c.is_ascii_hexdigit())
&& rev.len() >= crate::config::MIN_OID_HEX_LENGTH
&& rev.len() <= crate::config::MAX_OID_HEX_LENGTH
{
return Ok(()); return Ok(());
} }
if rev == "HEAD" { if rev == "HEAD" {
return Ok(()); return Ok(());
} }
// Allow ref:refs/heads/... (git internal format)
if let Some(rest) = rev.strip_prefix("ref:") { if let Some(rest) = rev.strip_prefix("ref:") {
return validate_ref_name(rest.trim()); return validate_ref_name(rest.trim());
} }
const MAX_ANCESTRY_DEPTH: u32 = 10000;
if let Some(tilde_pos) = rev.rfind('~') { if let Some(tilde_pos) = rev.rfind('~') {
let num_part = &rev[tilde_pos + 1..]; let num_part = &rev[tilde_pos + 1..];
if !num_part.is_empty() && num_part.chars().all(|c| c.is_ascii_digit()) { if !num_part.is_empty() && num_part.chars().all(|c| c.is_ascii_digit()) {
let depth: u32 = num_part let depth: u32 = num_part
.parse() .parse()
.map_err(|_| GitError::InvalidArgument("invalid ~N syntax".into()))?; .map_err(|_| GitError::InvalidArgument("invalid ~N syntax".into()))?;
if depth > MAX_ANCESTRY_DEPTH { if depth > crate::config::MAX_ANCESTRY_DEPTH {
return Err(GitError::InvalidArgument(format!( return Err(GitError::InvalidArgument(format!(
"~N depth too large: {} (max {})", "~N depth too large: {} (max {})",
depth, MAX_ANCESTRY_DEPTH depth, crate::config::MAX_ANCESTRY_DEPTH
))); )));
} }
} }
@@ -140,10 +146,10 @@ pub fn validate_revision(rev: &str) -> GitResult<()> {
let depth: u32 = num_part let depth: u32 = num_part
.parse() .parse()
.map_err(|_| GitError::InvalidArgument("invalid ^N syntax".into()))?; .map_err(|_| GitError::InvalidArgument("invalid ^N syntax".into()))?;
if depth > MAX_ANCESTRY_DEPTH { if depth > crate::config::MAX_ANCESTRY_DEPTH {
return Err(GitError::InvalidArgument(format!( return Err(GitError::InvalidArgument(format!(
"^N depth too large: {} (max {})", "^N depth too large: {} (max {})",
depth, MAX_ANCESTRY_DEPTH depth, crate::config::MAX_ANCESTRY_DEPTH
))); )));
} }
} }
@@ -204,9 +210,10 @@ pub fn validate_file_path(path: &str) -> GitResult<()> {
"file path cannot contain null byte: {path}" "file path cannot contain null byte: {path}"
))); )));
} }
if path.len() > 4096 { if path.len() > crate::config::MAX_FILE_PATH_LENGTH {
return Err(GitError::InvalidArgument(format!( return Err(GitError::InvalidArgument(format!(
"file path too long (max 4096 chars): {path}" "file path too long (max {} chars): {path}",
crate::config::MAX_FILE_PATH_LENGTH
))); )));
} }
@@ -220,7 +227,6 @@ pub fn validate_file_path(path: &str) -> GitResult<()> {
))); )));
} }
// Windows reserved names check
#[cfg(target_os = "windows")] #[cfg(target_os = "windows")]
{ {
const RESERVED_NAMES: &[&str] = &[ const RESERVED_NAMES: &[&str] = &[
@@ -307,10 +313,11 @@ pub fn validate_remote_url(url: &str) -> GitResult<()> {
"remote URL cannot be empty".into(), "remote URL cannot be empty".into(),
)); ));
} }
if url.len() > 4096 { if url.len() > crate::config::MAX_REMOTE_URL_LENGTH {
return Err(GitError::InvalidArgument( return Err(GitError::InvalidArgument(format!(
"remote URL too long (max 4096 chars)".into(), "remote URL too long (max {} chars)",
)); crate::config::MAX_REMOTE_URL_LENGTH
)));
} }
if url.contains('\0') || url.contains('\n') || url.contains('\r') { if url.contains('\0') || url.contains('\n') || url.contains('\r') {
return Err(GitError::InvalidArgument( return Err(GitError::InvalidArgument(
@@ -343,14 +350,37 @@ pub fn validate_refspec(refspec: &str) -> GitResult<()> {
"refspec contains shell metacharacter: {refspec}" "refspec contains shell metacharacter: {refspec}"
))); )));
} }
if refspec.len() > 1024 { if refspec.len() > crate::config::MAX_REFSPEC_LENGTH {
return Err(GitError::InvalidArgument( return Err(GitError::InvalidArgument(format!(
"refspec too long (max 1024 chars)".into(), "refspec too long (max {} chars)",
)); crate::config::MAX_REFSPEC_LENGTH
)));
} }
Ok(()) Ok(())
} }
/// Sanitize git stderr output for logging to prevent leaking sensitive data
/// such as credentials in URLs, absolute filesystem paths, or email addresses.
pub fn sanitize_git_stderr(stderr: &str) -> String {
let mut s = stderr.to_string();
for scheme in &["https://", "http://", "git+ssh://", "ssh://"] {
while let Some(start) = s.find(scheme) {
let after_scheme = start + scheme.len();
if let Some(at_pos) = s[after_scheme..].find('@') {
let at_abs = after_scheme + at_pos;
let replacement = format!("{scheme}***:***@");
s.replace_range(start..=at_abs, &replacement);
} else {
break;
}
}
}
if let Some(homedir) = std::env::var_os("HOME").and_then(|v| v.into_string().ok()) {
s = s.replace(&homedir, "~");
}
s
}
/// Validate a storage-relative path (used in resolve_for_init and from_repository_header). /// Validate a storage-relative path (used in resolve_for_init and from_repository_header).
/// ///
/// Must not contain path traversal, must be a simple relative path. /// Must not contain path traversal, must be a simple relative path.
@@ -370,10 +400,11 @@ pub fn validate_relative_path(path: &str) -> GitResult<()> {
"relative_path cannot contain null byte".into(), "relative_path cannot contain null byte".into(),
)); ));
} }
if path.len() > 4096 { if path.len() > crate::config::MAX_RELATIVE_PATH_LENGTH {
return Err(GitError::InvalidArgument( return Err(GitError::InvalidArgument(format!(
"relative_path too long (max 4096 chars)".into(), "relative_path too long (max {} chars)",
)); crate::config::MAX_RELATIVE_PATH_LENGTH
)));
} }
if path.contains("..") { if path.contains("..") {
return Err(GitError::InvalidArgument(format!( return Err(GitError::InvalidArgument(format!(
+107 -104
View File
@@ -22,25 +22,13 @@
//! - **TTL** (time-to-live): 10 minutes — hard upper bound for safety //! - **TTL** (time-to-live): 10 minutes — hard upper bound for safety
//! - Evictions are tracked via metrics for observability //! - Evictions are tracked via metrics for observability
use std::sync::OnceLock; use dashmap::DashMap;
use std::time::Duration; use std::sync::{Arc, OnceLock};
use moka::sync::Cache; use moka::sync::Cache;
use prost::Message; use prost::Message;
/// Maximum total cache weight (key + value allocated bytes): 256 MB. use crate::config::{CACHE_ENTRY_OVERHEAD as ENTRY_OVERHEAD, CACHE_MAX_TTL, CACHE_MAX_WEIGHT, CACHE_TTI};
const CACHE_MAX_WEIGHT: u64 = 256 * 1024 * 1024;
/// Hard time-to-live: entries older than this are unconditionally evicted.
const CACHE_MAX_TTL: Duration = Duration::from_secs(600); // 10 min
/// Time-to-idle: entries not accessed within this window are evicted.
/// Frequently accessed entries survive up to TTL, cold entries expire quickly.
const CACHE_TTI: Duration = Duration::from_secs(120); // 2 min
/// Estimated per-entry overhead (Moka internal Arc + metadata).
/// Added to the weigher result to prevent underestimation.
const ENTRY_OVERHEAD: u32 = 128;
struct CacheState { struct CacheState {
store: Cache<Vec<u8>, Vec<u8>>, store: Cache<Vec<u8>, Vec<u8>>,
@@ -52,7 +40,6 @@ fn state() -> &'static CacheState {
CACHE.get_or_init(|| { CACHE.get_or_init(|| {
let store = Cache::builder() let store = Cache::builder()
.weigher(|key: &Vec<u8>, value: &Vec<u8>| -> u32 { .weigher(|key: &Vec<u8>, value: &Vec<u8>| -> u32 {
// capacity() reflects actual allocation including spare capacity
key.capacity() as u32 + value.capacity() as u32 + ENTRY_OVERHEAD key.capacity() as u32 + value.capacity() as u32 + ENTRY_OVERHEAD
}) })
.max_capacity(CACHE_MAX_WEIGHT) .max_capacity(CACHE_MAX_WEIGHT)
@@ -65,7 +52,6 @@ fn state() -> &'static CacheState {
moka::notification::RemovalCause::Replaced => "replaced", moka::notification::RemovalCause::Replaced => "replaced",
moka::notification::RemovalCause::Size => "size", moka::notification::RemovalCause::Size => "size",
}; };
// Extract namespace for per-namespace metrics
let namespace = decode_namespace(&key); let namespace = decode_namespace(&key);
crate::metrics::record_cache_eviction(namespace, cause_str); crate::metrics::record_cache_eviction(namespace, cause_str);
}) })
@@ -87,7 +73,27 @@ fn cache() -> &'static Cache<Vec<u8>, Vec<u8>> {
&state().store &state().store
} }
// Key encoding
struct RepoKeyIndex {
repo_to_keys: DashMap<String, Vec<Arc<Vec<u8>>>>,
}
static REPO_KEY_INDEX: OnceLock<RepoKeyIndex> = OnceLock::new();
fn repo_key_index() -> &'static RepoKeyIndex {
REPO_KEY_INDEX.get_or_init(|| RepoKeyIndex {
repo_to_keys: DashMap::new(),
})
}
fn track_cache_key(repo_path: &str, key: Arc<Vec<u8>>) {
repo_key_index()
.repo_to_keys
.entry(repo_path.to_string())
.or_default()
.push(key);
}
/// Encode a structured cache key. /// Encode a structured cache key.
/// ///
@@ -105,12 +111,15 @@ fn encode_key(namespace: &str, repo_path: &str, request_bytes: &[u8]) -> Option<
return None; return None;
} }
let total = 1 + ns.len() + 2 + rp.len() + request_bytes.len(); const SEPARATOR: u8 = 0xFF;
let total = 1 + ns.len() + 1 + 2 + rp.len() + 1 + request_bytes.len();
let mut key = Vec::with_capacity(total); let mut key = Vec::with_capacity(total);
key.push(ns.len() as u8); key.push(ns.len() as u8);
key.extend_from_slice(ns); key.extend_from_slice(ns);
key.push(SEPARATOR);
key.extend_from_slice(&(rp.len() as u16).to_le_bytes()); key.extend_from_slice(&(rp.len() as u16).to_le_bytes());
key.extend_from_slice(rp); key.extend_from_slice(rp);
key.push(SEPARATOR);
key.extend_from_slice(request_bytes); key.extend_from_slice(request_bytes);
Some(key) Some(key)
} }
@@ -125,31 +134,6 @@ fn decode_namespace(key: &[u8]) -> &str {
std::str::from_utf8(&key[1..end]).unwrap_or("unknown") std::str::from_utf8(&key[1..end]).unwrap_or("unknown")
} }
/// Extract the repo_path from a cache key (returns slice into the key).
fn extract_repo_path_bytes(key: &[u8]) -> Option<&[u8]> {
if key.len() < 3 {
return None;
}
let ns_len = key[0] as usize;
let rp_len_offset = 1 + ns_len;
if key.len() < rp_len_offset + 2 {
return None;
}
let rp_len = u16::from_le_bytes([key[rp_len_offset], key[rp_len_offset + 1]]) as usize;
let rp_start = rp_len_offset + 2;
let rp_end = rp_start.checked_add(rp_len)?;
if rp_end > key.len() {
return None;
}
Some(&key[rp_start..rp_end])
}
/// Check if a cache key belongs to the given repository.
fn key_matches_repo(key: &[u8], target_repo: &[u8]) -> bool {
extract_repo_path_bytes(key).is_some_and(|rp| rp == target_repo)
}
// Single-message cache
/// Cache a single protobuf response. /// Cache a single protobuf response.
/// ///
@@ -176,8 +160,10 @@ where
if let Some(bytes) = cache().get(&key) if let Some(bytes) = cache().get(&key)
&& let Ok(response) = Res::decode(bytes.as_slice()) && let Ok(response) = Res::decode(bytes.as_slice())
{ {
let elapsed = std::time::Duration::ZERO; // Moka get is memory-only, effectively instant let elapsed = std::time::Duration::ZERO;
crate::metrics::record_cache_op("moka", "hit", elapsed); crate::metrics::record_cache_op("moka", "hit", elapsed);
crate::metrics::record_cache_hit_ns(namespace);
crate::metrics::record_cache_value_size(namespace, bytes.len());
tracing::debug!( tracing::debug!(
namespace = %namespace, namespace = %namespace,
repo = %repo_path, repo = %repo_path,
@@ -188,6 +174,8 @@ where
return Ok(response); return Ok(response);
} }
crate::metrics::record_cache_miss_ns(namespace);
tracing::debug!( tracing::debug!(
namespace = %namespace, namespace = %namespace,
repo = %repo_path, repo = %repo_path,
@@ -208,14 +196,16 @@ where
"failed to encode cache response" "failed to encode cache response"
); );
} else { } else {
crate::metrics::record_cache_value_size(namespace, bytes.len());
let key_arc = Arc::new(key.clone());
cache().insert(key, bytes); cache().insert(key, bytes);
track_cache_key(repo_path, key_arc);
} }
crate::metrics::record_cache_op("moka", "miss", build_elapsed); crate::metrics::record_cache_op("moka", "miss", build_elapsed);
Ok(response) Ok(response)
} }
// Vec-message cache
/// Cache a `Vec<Item>` protobuf response using length-delimited encoding. /// Cache a `Vec<Item>` protobuf response using length-delimited encoding.
/// ///
@@ -238,48 +228,63 @@ where
return build(); return build();
}; };
// Try cache hit
if let Some(bytes) = cache().get(&key) { if let Some(bytes) = cache().get(&key) {
let mut items = Vec::new(); if bytes.len() < 4 {
let mut remaining = bytes.as_slice(); cache().invalidate(&key);
let mut valid = true; } else {
let stored_crc = u32::from_le_bytes([bytes[0], bytes[1], bytes[2], bytes[3]]);
let actual_crc = crc32fast::hash(&bytes[4..]);
if stored_crc != actual_crc {
tracing::warn!(
namespace = %namespace,
repo = %repo_path,
"cache entry corrupted (CRC mismatch), invalidating"
);
cache().invalidate(&key);
} else {
let mut items = Vec::new();
let mut remaining = &bytes[4..];
let mut valid = true;
// Pre-allocate based on first size hint if let Ok(first) = Item::decode_length_delimited(&mut remaining) {
if let Ok(first) = Item::decode_length_delimited(&mut remaining) { items.push(first);
items.push(first); while !remaining.is_empty() {
while !remaining.is_empty() { match Item::decode_length_delimited(&mut remaining) {
match Item::decode_length_delimited(&mut remaining) { Ok(item) => items.push(item),
Ok(item) => items.push(item), Err(_) => {
Err(_) => { valid = false;
valid = false; break;
break; }
}
} }
} else if !remaining.is_empty() {
valid = false;
} }
if valid {
crate::metrics::record_cache_op("moka", "hit", std::time::Duration::ZERO);
crate::metrics::record_cache_hit_ns(namespace);
crate::metrics::record_cache_value_size(namespace, bytes.len());
tracing::debug!(
namespace = %namespace,
repo = %repo_path,
item_count = items.len(),
"vec cache hit"
);
return Ok(items);
}
tracing::warn!(
namespace = %namespace,
repo = %repo_path,
"vec cache decode failed, rebuilding"
);
cache().invalidate(&key);
} }
} else if !remaining.is_empty() {
valid = false;
} }
if valid {
crate::metrics::record_cache_op("moka", "hit", std::time::Duration::ZERO);
tracing::debug!(
namespace = %namespace,
repo = %repo_path,
item_count = items.len(),
"vec cache hit"
);
return Ok(items);
}
tracing::warn!(
namespace = %namespace,
repo = %repo_path,
"vec cache decode failed, rebuilding"
);
// Invalidate the corrupt entry
cache().invalidate(&key);
} }
crate::metrics::record_cache_miss_ns(namespace);
tracing::debug!( tracing::debug!(
namespace = %namespace, namespace = %namespace,
repo = %repo_path, repo = %repo_path,
@@ -290,15 +295,14 @@ where
let response = build()?; let response = build()?;
let build_elapsed = start.elapsed(); let build_elapsed = start.elapsed();
// Encode all items into a single buffer with length-delimited framing
let total_est: usize = response let total_est: usize = response
.iter() .iter()
.map(|item| item.encoded_len() + 10) // 10 = prost length-delimited overhead .map(|item| item.encoded_len() + 10)
.sum(); .sum();
let mut bytes = Vec::with_capacity(total_est); let mut data = Vec::with_capacity(total_est);
let mut encode_ok = true; let mut encode_ok = true;
for item in &response { for item in &response {
if let Err(err) = item.encode_length_delimited(&mut bytes) { if let Err(err) = item.encode_length_delimited(&mut data) {
tracing::warn!( tracing::warn!(
namespace = %namespace, namespace = %namespace,
repo = %repo_path, repo = %repo_path,
@@ -311,13 +315,19 @@ where
} }
if encode_ok { if encode_ok {
let crc = crc32fast::hash(&data);
let mut bytes = Vec::with_capacity(4 + data.len());
bytes.extend_from_slice(&crc.to_le_bytes());
bytes.extend_from_slice(&data);
crate::metrics::record_cache_value_size(namespace, bytes.len());
let key_arc = Arc::new(key.clone());
cache().insert(key, bytes); cache().insert(key, bytes);
track_cache_key(repo_path, key_arc);
} }
crate::metrics::record_cache_op("moka", "miss", build_elapsed); crate::metrics::record_cache_op("moka", "miss", build_elapsed);
Ok(response) Ok(response)
} }
// Request encoding helpers
/// Encode a protobuf request into a byte vector. /// Encode a protobuf request into a byte vector.
#[inline] #[inline]
@@ -329,7 +339,6 @@ fn encode_request<Req: Message>(request: &Req) -> Vec<u8> {
buf buf
} }
// Repository-scoped invalidation
/// Invalidate all cache entries for a specific repository. /// Invalidate all cache entries for a specific repository.
/// ///
@@ -341,30 +350,24 @@ fn encode_request<Req: Message>(request: &Req) -> Vec<u8> {
/// create branch, etc.) to prevent serving stale data. /// create branch, etc.) to prevent serving stale data.
pub(crate) fn invalidate_repo(relative_path: &str) { pub(crate) fn invalidate_repo(relative_path: &str) {
let c = cache(); let c = cache();
let target = relative_path.as_bytes(); let idx = repo_key_index();
let mut keys_to_remove: Vec<std::sync::Arc<Vec<u8>>> = Vec::with_capacity(64);
for (key, _value) in c.iter() { if let Some((_key, keys)) = idx.repo_to_keys.remove(relative_path) {
if key_matches_repo(&key, target) { let removed = keys.len();
keys_to_remove.push(key); for key in &keys {
c.invalidate(key.as_ref());
} }
}
let removed = keys_to_remove.len(); if removed > 0 {
for key in &keys_to_remove { tracing::debug!(
c.invalidate(key.as_ref()); relative_path = %relative_path,
} entries_removed = removed,
"cache invalidated for repository (indexed)"
if removed > 0 { );
tracing::debug!( }
relative_path = %relative_path,
entries_removed = removed,
"cache invalidated for repository"
);
} }
} }
// Selector helpers
use crate::pb::{ObjectSelector, object_selector}; use crate::pb::{ObjectSelector, object_selector};
+61 -25
View File
@@ -30,6 +30,8 @@ use gix::discover::is_git;
use std::path::{Path, PathBuf}; use std::path::{Path, PathBuf};
use tokio_stream::wrappers::ReceiverStream; use tokio_stream::wrappers::ReceiverStream;
use tonic::codec::CompressionEncoding;
use crate::bare::GitBare; use crate::bare::GitBare;
use crate::error::{GitError, GitResult}; use crate::error::{GitError, GitResult};
use crate::pb::{ use crate::pb::{
@@ -151,25 +153,20 @@ impl GitksService {
if relative_path.is_empty() { if relative_path.is_empty() {
return Err(tonic::Status::invalid_argument("relative_path is required")); return Err(tonic::Status::invalid_argument("relative_path is required"));
} }
// Validate early to reject '..' and other traversal patterns
crate::sanitize::validate_relative_path(relative_path) crate::sanitize::validate_relative_path(relative_path)
.map_err(|e| tonic::Status::invalid_argument(e.to_string()))?; .map_err(|e| tonic::Status::invalid_argument(e.to_string()))?;
let candidate = self.repo_prefix.join(relative_path); let candidate = self.repo_prefix.join(relative_path);
// Canonicalize repo_prefix (which should exist) for a reliable check
let prefix_canon = self let prefix_canon = self
.repo_prefix .repo_prefix
.canonicalize() .canonicalize()
.unwrap_or_else(|_| self.repo_prefix.clone()); .unwrap_or_else(|_| self.repo_prefix.clone());
// Unified path validation to avoid TOCTOU
let canonical = match candidate.canonicalize() { let canonical = match candidate.canonicalize() {
Ok(canon) => { Ok(canon) => {
// Path exists and was canonicalized
canon canon
} }
Err(_) => { Err(_) => {
// Path doesn't exist yet — validate via parent
let parent = candidate.parent().unwrap_or(&self.repo_prefix); let parent = candidate.parent().unwrap_or(&self.repo_prefix);
let filename = candidate.file_name().ok_or_else(|| { let filename = candidate.file_name().ok_or_else(|| {
tonic::Status::invalid_argument("invalid path: missing filename") tonic::Status::invalid_argument("invalid path: missing filename")
@@ -180,7 +177,6 @@ impl GitksService {
.unwrap_or_else(|_| parent.to_path_buf()); .unwrap_or_else(|_| parent.to_path_buf());
let constructed = parent_canon.join(filename); let constructed = parent_canon.join(filename);
// String-level verification for non-existent paths
let constructed_str = constructed.to_string_lossy(); let constructed_str = constructed.to_string_lossy();
let prefix_str = prefix_canon.to_string_lossy(); let prefix_str = prefix_canon.to_string_lossy();
@@ -194,13 +190,19 @@ impl GitksService {
} }
}; };
// Final check: canonical must be under prefix
if !canonical.starts_with(&prefix_canon) { if !canonical.starts_with(&prefix_canon) {
return Err(tonic::Status::invalid_argument( return Err(tonic::Status::invalid_argument(
"path traversal detected: relative_path escapes repo prefix", "path traversal detected: relative_path escapes repo prefix",
)); ));
} }
let double_canon = canonical.canonicalize().unwrap_or_else(|_| canonical.clone());
if canonical != double_canon {
return Err(tonic::Status::invalid_argument(
"path resolved to different target (possible symlink race)",
));
}
Ok(canonical) Ok(canonical)
} }
@@ -211,10 +213,8 @@ impl GitksService {
_old_oid: &str, _old_oid: &str,
_new_oid: &str, _new_oid: &str,
) { ) {
// Invalidate moka caches
crate::server::cache::invalidate_repo(relative_path); crate::server::cache::invalidate_repo(relative_path);
// Invalidate disk cache
if let Some(ref pc) = self.pack_cache { if let Some(ref pc) = self.pack_cache {
pc.invalidate_repo(relative_path); pc.invalidate_repo(relative_path);
} }
@@ -328,10 +328,8 @@ pub(crate) fn git_cmd(gb: &GitBare, args: &[&str]) -> GitResult<std::process::Ou
let elapsed = start.elapsed(); let elapsed = start.elapsed();
let elapsed_ms = elapsed.as_millis() as u64; let elapsed_ms = elapsed.as_millis() as u64;
// Record metrics
crate::metrics::record_git_cmd(cmd_name, elapsed); crate::metrics::record_git_cmd(cmd_name, elapsed);
// Slow operation warning
if elapsed.as_secs() >= 1 { if elapsed.as_secs() >= 1 {
tracing::warn!( tracing::warn!(
repo = %gb.bare_dir.display(), repo = %gb.bare_dir.display(),
@@ -343,11 +341,12 @@ pub(crate) fn git_cmd(gb: &GitBare, args: &[&str]) -> GitResult<std::process::Ou
if !result.status.success() { if !result.status.success() {
let stderr_str = String::from_utf8_lossy(&result.stderr); let stderr_str = String::from_utf8_lossy(&result.stderr);
let sanitized = crate::sanitize::sanitize_git_stderr(stderr_str.trim());
tracing::warn!( tracing::warn!(
repo = %gb.bare_dir.display(), repo = %gb.bare_dir.display(),
command = cmd_name, command = cmd_name,
status = ?result.status.code(), status = ?result.status.code(),
stderr = %stderr_str.trim(), stderr = %sanitized,
elapsed_ms, elapsed_ms,
"git subprocess exited with non-zero status" "git subprocess exited with non-zero status"
); );
@@ -386,6 +385,19 @@ fn structured_git_error(stderr: &str, code: Option<i32>) -> GitError {
pub async fn serve( pub async fn serve(
addr: std::net::SocketAddr, addr: std::net::SocketAddr,
svc: GitksService, svc: GitksService,
) -> Result<(), tonic::transport::Error> {
serve_with_shutdown(addr, svc, std::future::pending()).await
}
/// Start the gRPC server and block until the shutdown signal fires.
///
/// The `shutdown` future should resolve when the process should stop
/// (e.g. on SIGTERM/SIGINT). All in-flight requests are drained before
/// the server returns.
pub async fn serve_with_shutdown(
addr: std::net::SocketAddr,
svc: GitksService,
shutdown: impl std::future::Future<Output = ()>,
) -> Result<(), tonic::transport::Error> { ) -> Result<(), tonic::transport::Error> {
let span = tracing::info_span!("gitks.server", %addr); let span = tracing::info_span!("gitks.server", %addr);
let _enter = span.enter(); let _enter = span.enter();
@@ -393,18 +405,42 @@ pub async fn serve(
let (health_reporter, health_service) = tonic_health::server::health_reporter(); let (health_reporter, health_service) = tonic_health::server::health_reporter();
let repo_svc = repository_service_server::RepositoryServiceServer::new(svc.clone()); let repo_svc = repository_service_server::RepositoryServiceServer::new(svc.clone())
let archive_svc = archive_service_server::ArchiveServiceServer::new(svc.clone()); .send_compressed(CompressionEncoding::Gzip)
let blame_svc = blame_service_server::BlameServiceServer::new(svc.clone()); .accept_compressed(CompressionEncoding::Gzip);
let branch_svc = branch_service_server::BranchServiceServer::new(svc.clone()); let archive_svc = archive_service_server::ArchiveServiceServer::new(svc.clone())
let commit_svc = commit_service_server::CommitServiceServer::new(svc.clone()); .send_compressed(CompressionEncoding::Gzip)
let diff_svc = diff_service_server::DiffServiceServer::new(svc.clone()); .accept_compressed(CompressionEncoding::Gzip);
let merge_svc = merge_service_server::MergeServiceServer::new(svc.clone()); let blame_svc = blame_service_server::BlameServiceServer::new(svc.clone())
let pack_svc = pack_service_server::PackServiceServer::new(svc.clone()); .send_compressed(CompressionEncoding::Gzip)
let ref_svc = ref_service_server::RefServiceServer::new(svc.clone()); .accept_compressed(CompressionEncoding::Gzip);
let remote_svc = remote_service_server::RemoteServiceServer::new(svc.clone()); let branch_svc = branch_service_server::BranchServiceServer::new(svc.clone())
let tag_svc = tag_service_server::TagServiceServer::new(svc.clone()); .send_compressed(CompressionEncoding::Gzip)
let tree_svc = tree_service_server::TreeServiceServer::new(svc); .accept_compressed(CompressionEncoding::Gzip);
let commit_svc = commit_service_server::CommitServiceServer::new(svc.clone())
.send_compressed(CompressionEncoding::Gzip)
.accept_compressed(CompressionEncoding::Gzip);
let diff_svc = diff_service_server::DiffServiceServer::new(svc.clone())
.send_compressed(CompressionEncoding::Gzip)
.accept_compressed(CompressionEncoding::Gzip);
let merge_svc = merge_service_server::MergeServiceServer::new(svc.clone())
.send_compressed(CompressionEncoding::Gzip)
.accept_compressed(CompressionEncoding::Gzip);
let pack_svc = pack_service_server::PackServiceServer::new(svc.clone())
.send_compressed(CompressionEncoding::Gzip)
.accept_compressed(CompressionEncoding::Gzip);
let ref_svc = ref_service_server::RefServiceServer::new(svc.clone())
.send_compressed(CompressionEncoding::Gzip)
.accept_compressed(CompressionEncoding::Gzip);
let remote_svc = remote_service_server::RemoteServiceServer::new(svc.clone())
.send_compressed(CompressionEncoding::Gzip)
.accept_compressed(CompressionEncoding::Gzip);
let tag_svc = tag_service_server::TagServiceServer::new(svc.clone())
.send_compressed(CompressionEncoding::Gzip)
.accept_compressed(CompressionEncoding::Gzip);
let tree_svc = tree_service_server::TreeServiceServer::new(svc)
.send_compressed(CompressionEncoding::Gzip)
.accept_compressed(CompressionEncoding::Gzip);
health_reporter health_reporter
.set_serving::<repository_service_server::RepositoryServiceServer<GitksService>>() .set_serving::<repository_service_server::RepositoryServiceServer<GitksService>>()
@@ -458,5 +494,5 @@ pub async fn serve(
.add_service(tag_svc) .add_service(tag_svc)
.add_service(tree_svc); .add_service(tree_svc);
tracing::info!("server ready, starting to accept connections"); tracing::info!("server ready, starting to accept connections");
server.serve(addr).await server.serve_with_shutdown(addr, shutdown).await
} }
-3
View File
@@ -115,7 +115,6 @@ impl pack_service_server::PackService for GitksService {
.upload_pack(tokio_stream::wrappers::ReceiverStream::new(rx)) .upload_pack(tokio_stream::wrappers::ReceiverStream::new(rx))
.await?; .await?;
let out = super::bridge_server_stream(resp.into_inner()); let out = super::bridge_server_stream(resp.into_inner());
// Create a dummy cancel token for the forwarded stream
let cancel_token = tokio_util::sync::CancellationToken::new(); let cancel_token = tokio_util::sync::CancellationToken::new();
let cancel_guard = cancel_token.drop_guard(); let cancel_guard = cancel_token.drop_guard();
return Ok(tonic::Response::new( return Ok(tonic::Response::new(
@@ -190,7 +189,6 @@ impl pack_service_server::PackService for GitksService {
.receive_pack(tokio_stream::wrappers::ReceiverStream::new(rx)) .receive_pack(tokio_stream::wrappers::ReceiverStream::new(rx))
.await?; .await?;
let out = super::bridge_server_stream(resp.into_inner()); let out = super::bridge_server_stream(resp.into_inner());
// Create a dummy cancel token for the forwarded stream
let cancel_token = tokio_util::sync::CancellationToken::new(); let cancel_token = tokio_util::sync::CancellationToken::new();
let cancel_guard = cancel_token.drop_guard(); let cancel_guard = cancel_token.drop_guard();
return Ok(tonic::Response::new( return Ok(tonic::Response::new(
@@ -310,7 +308,6 @@ impl pack_service_server::PackService for GitksService {
return Ok(tonic::Response::new(ReceiverStream::new(rx))); return Ok(tonic::Response::new(ReceiverStream::new(rx)));
} }
// Cache miss: execute pack-objects and tee to cache
tracing::info!(%repo, digest = %digest, "pack-objects cache miss"); tracing::info!(%repo, digest = %digest, "pack-objects cache miss");
let stream = gb.pack_objects(inner).await?; let stream = gb.pack_objects(inner).await?;
let tee_stream = pc.tee_pack_stream(&digest, stream); let tee_stream = pc.tee_pack_stream(&digest, stream);
-2
View File
@@ -59,7 +59,6 @@ impl BundleApplicator {
.spawn() .spawn()
.map_err(|e| format!("spawn git bundle unbundle: {e}"))?; .map_err(|e| format!("spawn git bundle unbundle: {e}"))?;
// Stream file contents to stdin in a background thread
let mut stdin = child.stdin.take().ok_or("no stdin")?; let mut stdin = child.stdin.take().ok_or("no stdin")?;
let file_handle = file; let file_handle = file;
let writer = std::thread::spawn(move || -> Result<(), String> { let writer = std::thread::spawn(move || -> Result<(), String> {
@@ -84,7 +83,6 @@ impl BundleApplicator {
.wait_with_output() .wait_with_output()
.map_err(|e| format!("wait bundle: {e}"))?; .map_err(|e| format!("wait bundle: {e}"))?;
// Wait for writer thread
let _ = writer.join().map_err(|_| "writer thread panicked")?; let _ = writer.join().map_err(|_| "writer thread panicked")?;
if !output.status.success() { if !output.status.success() {
+218
View File
@@ -519,3 +519,221 @@ async fn test_oid_binary_encoding() {
let hex_from_bytes: String = oid.value.iter().map(|b| format!("{b:02x}")).collect(); let hex_from_bytes: String = oid.value.iter().map(|b| format!("{b:02x}")).collect();
assert_eq!(hex_from_bytes, oid.hex); assert_eq!(hex_from_bytes, oid.hex);
} }
#[test]
fn test_count_commits_head() {
let (_dir, gb) = common::setup_bare_repo();
let resp = gb.count_commits(CountCommitsRequest {
repository: Some(hdr()),
revision: String::new(),
path: String::new(),
since: String::new(),
until: String::new(),
}).unwrap();
assert_eq!(resp.count, 4);
}
#[test]
fn test_count_commits_with_revision() {
let (_dir, gb) = common::setup_bare_repo();
let resp = gb.count_commits(CountCommitsRequest {
repository: Some(hdr()),
revision: "feature".into(),
path: String::new(),
since: String::new(),
until: String::new(),
}).unwrap();
assert_eq!(resp.count, 1);
}
#[test]
fn test_count_commits_with_path() {
let (_dir, gb) = common::setup_bare_repo();
let resp = gb.count_commits(CountCommitsRequest {
repository: Some(hdr()),
revision: "main".into(),
path: "README.md".into(),
since: String::new(),
until: String::new(),
}).unwrap();
assert!(resp.count >= 1);
}
#[test]
fn test_count_diverging_commits() {
let (_dir, gb) = common::setup_bare_repo();
let resp = gb.count_diverging_commits(CountDivergingCommitsRequest {
repository: Some(hdr()),
left: "feature".into(),
right: "main".into(),
}).unwrap();
assert_eq!(resp.left_count, 0);
assert_eq!(resp.right_count, 3);
}
#[test]
fn test_find_commit_by_oid() {
let (_dir, gb) = common::setup_bare_repo();
let oid = common::get_main_oid(&gb);
let commit = gb.find_commit(FindCommitRequest {
repository: Some(hdr()),
revision: common::oid_selector(&oid),
include_stats: false,
}).unwrap();
assert!(!commit.oid.as_ref().unwrap().hex.is_empty());
}
#[test]
fn test_find_commit_by_revision() {
let (_dir, gb) = common::setup_bare_repo();
let commit = gb.find_commit(FindCommitRequest {
repository: Some(hdr()),
revision: common::rev_selector("main"),
include_stats: false,
}).unwrap();
assert!(!commit.oid.as_ref().unwrap().hex.is_empty());
}
#[test]
fn test_find_commit_default_head() {
let (_dir, gb) = common::setup_bare_repo();
let commit = gb.find_commit(FindCommitRequest {
repository: Some(hdr()),
revision: None,
include_stats: false,
}).unwrap();
assert!(!commit.oid.as_ref().unwrap().hex.is_empty());
}
#[test]
fn test_list_commits_by_oid() {
let (_dir, gb) = common::setup_bare_repo();
let oid = common::get_main_oid(&gb);
let oid_bytes = gitks::oid::hex_to_bytes(&oid).unwrap();
let resp = gb.list_commits_by_oid(ListCommitsByOidRequest {
repository: Some(hdr()),
oids: vec![oid_bytes],
include_stats: false,
}).unwrap();
assert_eq!(resp.commits.len(), 1);
}
#[test]
fn test_list_commits_by_oid_empty() {
let (_dir, gb) = common::setup_bare_repo();
let resp = gb.list_commits_by_oid(ListCommitsByOidRequest {
repository: Some(hdr()),
oids: vec![],
include_stats: false,
}).unwrap();
assert!(resp.commits.is_empty());
}
#[test]
fn test_commits_by_message_basic() {
let (_dir, gb) = common::setup_bare_repo();
let resp = gb.commits_by_message(CommitsByMessageRequest {
repository: Some(hdr()),
query: "initial".into(),
revision: String::new(),
limit: 10,
offset: 0,
case_insensitive: false,
}).unwrap();
assert_eq!(resp.commits.len(), 1);
}
#[test]
fn test_commits_by_message_case_insensitive() {
let (_dir, gb) = common::setup_bare_repo();
let resp = gb.commits_by_message(CommitsByMessageRequest {
repository: Some(hdr()),
query: "INITIAL".into(),
revision: String::new(),
limit: 10,
offset: 0,
case_insensitive: true,
}).unwrap();
assert_eq!(resp.commits.len(), 1);
}
#[test]
fn test_commits_by_message_no_match() {
let (_dir, gb) = common::setup_bare_repo();
let resp = gb.commits_by_message(CommitsByMessageRequest {
repository: Some(hdr()),
query: "zzzznonexistent".into(),
revision: String::new(),
limit: 10,
offset: 0,
case_insensitive: false,
}).unwrap();
assert!(resp.commits.is_empty());
}
#[test]
fn test_check_objects_exist() {
let (_dir, gb) = common::setup_bare_repo();
let oid = common::get_main_oid(&gb);
let resp = gb.check_objects_exist(CheckObjectsExistRequest {
repository: Some(hdr()),
revisions: vec![oid.clone(), "HEAD".into(), "nonexistent-branch".into()],
}).unwrap();
assert_eq!(resp.revisions.len(), 3);
assert!(resp.revisions[0].exists);
assert!(resp.revisions[1].exists);
assert!(!resp.revisions[2].exists);
}
#[test]
fn test_get_commit_stats() {
let (_dir, gb) = common::setup_bare_repo();
let oid = common::get_main_oid(&gb);
let stats = gb.get_commit_stats(GetCommitStatsRequest {
repository: Some(hdr()),
revision: common::oid_selector(&oid),
}).unwrap();
assert!(stats.changed_files >= 1);
}
#[test]
fn test_get_commit_stats_default() {
let (_dir, gb) = common::setup_bare_repo();
let stats = gb.get_commit_stats(GetCommitStatsRequest {
repository: Some(hdr()),
revision: None,
}).unwrap();
assert!(stats.changed_files >= 1);
}
#[test]
fn test_last_commit_for_path() {
let (_dir, gb) = common::setup_bare_repo();
let resp = gb.last_commit_for_path(LastCommitForPathRequest {
repository: Some(hdr()),
path: "README.md".into(),
revision: "main".into(),
literal_pathspec: false,
}).unwrap();
assert!(resp.commit.is_some());
assert_eq!(resp.path, "README.md");
}
#[test]
fn test_last_commit_for_path_nonexistent() {
let (_dir, gb) = common::setup_bare_repo();
let resp = gb.last_commit_for_path(LastCommitForPathRequest {
repository: Some(hdr()),
path: "nonexistent.txt".into(),
revision: "main".into(),
literal_pathspec: false,
}).unwrap();
assert!(resp.commit.is_none());
}
+49
View File
@@ -172,3 +172,52 @@ pub fn setup_bare_repo_with_conflict() -> (tempfile::TempDir, GitBare) {
(dir, GitBare::new(bare_dir)) (dir, GitBare::new(bare_dir))
} }
#[allow(dead_code)]
pub fn get_oid(gb: &GitBare, rev: &str) -> String {
let output = std::process::Command::new("git")
.args([
"--git-dir",
gb.bare_dir.to_string_lossy().as_ref(),
"rev-parse",
rev,
])
.output()
.expect("git rev-parse");
assert!(output.status.success(), "git rev-parse {rev} failed");
String::from_utf8_lossy(&output.stdout).trim().to_string()
}
#[allow(dead_code)]
pub fn get_main_oid(gb: &GitBare) -> String {
get_oid(gb, "refs/heads/main")
}
#[allow(dead_code)]
pub fn get_feature_oid(gb: &GitBare) -> String {
get_oid(gb, "refs/heads/feature")
}
#[allow(dead_code)]
pub fn oid_selector(hex: &str) -> Option<gitks::pb::ObjectSelector> {
Some(gitks::pb::ObjectSelector {
selector: Some(gitks::pb::object_selector::Selector::Oid(
gitks::pb::Oid {
hex: hex.to_string(),
value: vec![],
format: 0,
},
)),
})
}
#[allow(dead_code)]
pub fn rev_selector(rev: &str) -> Option<gitks::pb::ObjectSelector> {
Some(gitks::pb::ObjectSelector {
selector: Some(gitks::pb::object_selector::Selector::Revision(
gitks::pb::ObjectName {
revision: rev.to_string(),
},
)),
})
}
+76
View File
@@ -266,3 +266,79 @@ async fn test_get_patch() {
.collect(); .collect();
assert!(combined.contains("diff --git") || combined.contains("@@")); assert!(combined.contains("diff --git") || combined.contains("@@"));
} }
#[test]
fn test_find_changed_paths() {
let (_dir, gb) = common::setup_bare_repo();
let feature_oid = common::get_feature_oid(&gb);
let main_oid = common::get_main_oid(&gb);
let resp = gb.find_changed_paths(FindChangedPathsRequest {
repository: Some(hdr()),
base: feature_oid,
head: main_oid,
paths: vec![],
}).unwrap();
assert!(!resp.paths.is_empty());
}
#[test]
fn test_find_changed_paths_same_ref() {
let (_dir, gb) = common::setup_bare_repo();
let oid = common::get_main_oid(&gb);
let resp = gb.find_changed_paths(FindChangedPathsRequest {
repository: Some(hdr()),
base: oid.clone(),
head: oid,
paths: vec![],
}).unwrap();
assert!(resp.paths.is_empty());
}
#[test]
fn test_raw_diff() {
let (_dir, gb) = common::setup_bare_repo();
let feature_oid = common::get_feature_oid(&gb);
let main_oid = common::get_main_oid(&gb);
let chunks = gb.raw_diff(RawDiffRequest {
repository: Some(hdr()),
base: feature_oid,
head: main_oid,
options: None,
}).unwrap();
assert!(!chunks.is_empty());
let combined: Vec<u8> = chunks.iter().flat_map(|c| c.data.clone()).collect();
let text = String::from_utf8_lossy(&combined);
assert!(text.contains("diff"));
}
#[test]
fn test_raw_patch() {
let (_dir, gb) = common::setup_bare_repo();
let feature_oid = common::get_feature_oid(&gb);
let main_oid = common::get_main_oid(&gb);
let chunks = gb.raw_patch(RawPatchRequest {
repository: Some(hdr()),
base: feature_oid,
head: main_oid,
}).unwrap();
assert!(!chunks.is_empty());
let combined: Vec<u8> = chunks.iter().flat_map(|c| c.data.clone()).collect();
let text = String::from_utf8_lossy(&combined);
assert!(text.contains("From"));
}
#[test]
fn test_get_raw_changes() {
let (_dir, gb) = common::setup_bare_repo();
let feature_oid = common::get_feature_oid(&gb);
let main_oid = common::get_main_oid(&gb);
let resp = gb.get_raw_changes(GetRawChangesRequest {
repository: Some(hdr()),
base: feature_oid,
head: main_oid,
}).unwrap();
assert!(!resp.changes.is_empty());
}
+99
View File
@@ -63,3 +63,102 @@ fn test_list_refs_direct() {
assert_eq!(oid.hex.len(), 40, "SHA-1 hex should be 40 chars"); assert_eq!(oid.hex.len(), 40, "SHA-1 hex should be 40 chars");
} }
} }
#[test]
fn test_write_ref_and_ref_exists() {
let (_dir, gb) = common::setup_bare_repo();
let oid = common::get_main_oid(&gb);
let resp = gb.write_ref(gitks::pb::WriteRefRequest {
repository: Some(hdr()),
ref_name: "refs/heads/test-write".into(),
new_oid: oid,
old_oid: String::new(),
force: false,
}).unwrap();
assert!(resp.ok, "write_ref failed: {}", resp.error);
let exists = gb.ref_exists(gitks::pb::RefExistsRequest {
repository: Some(hdr()),
ref_name: "refs/heads/test-write".into(),
}).unwrap();
assert!(exists.exists);
let not_exists = gb.ref_exists(gitks::pb::RefExistsRequest {
repository: Some(hdr()),
ref_name: "refs/heads/nonexistent".into(),
}).unwrap();
assert!(!not_exists.exists);
}
#[test]
fn test_update_references_batch() {
let (_dir, gb) = common::setup_bare_repo();
let oid = common::get_main_oid(&gb);
let resp = gb.update_references(gitks::pb::UpdateReferencesRequest {
repository: Some(hdr()),
updates: vec![
gitks::pb::RefUpdateEntry {
ref_name: "refs/heads/batch-a".into(),
new_oid: oid.clone(),
old_oid: String::new(),
},
gitks::pb::RefUpdateEntry {
ref_name: "refs/heads/batch-b".into(),
new_oid: oid,
old_oid: String::new(),
},
],
}).unwrap();
assert!(resp.failed_refs.is_empty(), "error: {}", resp.error);
let a = gb.ref_exists(gitks::pb::RefExistsRequest {
repository: Some(hdr()),
ref_name: "refs/heads/batch-a".into(),
}).unwrap();
assert!(a.exists);
}
#[test]
fn test_update_references_empty() {
let (_dir, gb) = common::setup_bare_repo();
let resp = gb.update_references(gitks::pb::UpdateReferencesRequest {
repository: Some(hdr()),
updates: vec![],
}).unwrap();
assert!(resp.failed_refs.is_empty());
}
#[test]
fn test_delete_refs() {
let (_dir, gb) = common::setup_bare_repo();
let oid = common::get_main_oid(&gb);
gb.write_ref(gitks::pb::WriteRefRequest {
repository: Some(hdr()),
ref_name: "refs/heads/to-delete".into(),
new_oid: oid,
old_oid: String::new(),
force: false,
}).unwrap();
let resp = gb.delete_refs(gitks::pb::DeleteRefsRequest {
repository: Some(hdr()),
ref_names: vec!["refs/heads/to-delete".into()],
}).unwrap();
assert!(resp.failed_refs.is_empty());
let exists = gb.ref_exists(gitks::pb::RefExistsRequest {
repository: Some(hdr()),
ref_name: "refs/heads/to-delete".into(),
}).unwrap();
assert!(!exists.exists);
}
#[test]
fn test_find_default_branch_name() {
let (_dir, gb) = common::setup_bare_repo();
let resp = gb.find_default_branch_name().unwrap();
assert_eq!(resp.name, "main");
}
+190
View File
@@ -326,3 +326,193 @@ async fn test_exists_nonexistent_repo() {
.into_inner(); .into_inner();
assert!(!result.exists); assert!(!result.exists);
} }
#[test]
fn test_find_merge_base() {
let (_dir, gb) = common::setup_bare_repo();
let main_oid = common::get_main_oid(&gb);
let feature_oid = common::get_feature_oid(&gb);
let resp = gb.find_merge_base(FindMergeBaseRequest {
repository: Some(header(&gb)),
revisions: vec![
main_oid.as_bytes().to_vec(),
feature_oid.as_bytes().to_vec(),
],
}).unwrap();
assert!(!resp.base_oid.is_empty());
}
#[test]
fn test_find_merge_base_empty() {
let (_dir, gb) = common::setup_bare_repo();
let resp = gb.find_merge_base(FindMergeBaseRequest {
repository: Some(header(&gb)),
revisions: vec![],
}).unwrap();
assert!(resp.base_oid.is_empty());
}
#[test]
fn test_find_merge_base_single() {
let (_dir, gb) = common::setup_bare_repo();
let oid = common::get_main_oid(&gb);
let resp = gb.find_merge_base(FindMergeBaseRequest {
repository: Some(header(&gb)),
revisions: vec![oid.as_bytes().to_vec()],
}).unwrap();
assert!(!resp.base_oid.is_empty());
}
#[test]
fn test_commit_is_ancestor() {
let (_dir, gb) = common::setup_bare_repo();
let feature_oid = common::get_feature_oid(&gb);
let main_oid = common::get_main_oid(&gb);
let resp = gb.commit_is_ancestor(CommitIsAncestorRequest {
repository: Some(header(&gb)),
ancestor_oid: feature_oid,
descendant_oid: main_oid,
}).unwrap();
assert!(resp.is_ancestor);
}
#[test]
fn test_commit_is_ancestor_false() {
let (_dir, gb) = common::setup_bare_repo();
let main_oid = common::get_main_oid(&gb);
let feature_oid = common::get_feature_oid(&gb);
let resp = gb.commit_is_ancestor(CommitIsAncestorRequest {
repository: Some(header(&gb)),
ancestor_oid: main_oid,
descendant_oid: feature_oid,
}).unwrap();
assert!(!resp.is_ancestor);
}
#[test]
fn test_objects_size() {
let (_dir, gb) = common::setup_bare_repo();
let oid = common::get_main_oid(&gb);
let resp = gb.objects_size(ObjectsSizeRequest {
repository: Some(header(&gb)),
oids: vec![oid.clone(), "0000000000000000000000000000000000000000".into()],
}).unwrap();
assert_eq!(resp.sizes.len(), 2);
assert!(resp.sizes[0].found);
assert!(resp.sizes[0].size > 0);
}
#[test]
fn test_objects_size_empty() {
let (_dir, gb) = common::setup_bare_repo();
let resp = gb.objects_size(ObjectsSizeRequest {
repository: Some(header(&gb)),
oids: vec![],
}).unwrap();
assert!(resp.sizes.is_empty());
}
#[test]
fn test_repository_size() {
let (_dir, gb) = common::setup_bare_repo();
let resp = gb.repository_size().unwrap();
assert!(resp.size_bytes > 0);
}
#[test]
fn test_find_license_no_license() {
let (_dir, gb) = common::setup_bare_repo();
let resp = gb.find_license().unwrap();
assert!(resp.license_spdx.is_empty());
}
#[test]
fn test_optimize_repository_heuristic() {
let (_dir, gb) = common::setup_bare_repo();
let resp = gb.optimize_repository(OptimizeRepositoryRequest {
repository: Some(header(&gb)),
strategy: OptimizeStrategy::Heuristic as i32,
}).unwrap();
assert!(resp.ok);
}
#[test]
fn test_optimize_repository_incremental() {
let (_dir, gb) = common::setup_bare_repo();
let resp = gb.optimize_repository(OptimizeRepositoryRequest {
repository: Some(header(&gb)),
strategy: OptimizeStrategy::Incremental as i32,
}).unwrap();
assert!(resp.ok);
}
#[test]
fn test_search_files_by_content() {
let (_dir, gb) = common::setup_bare_repo();
let resp = gb.search_files_by_content(SearchFilesByContentRequest {
repository: Some(header(&gb)),
query: "Test".into(),
revision: "main".into(),
max_results: 10,
case_sensitive: true,
}).unwrap();
assert!(!resp.results.is_empty());
}
#[test]
fn test_search_files_by_content_no_match() {
let (_dir, gb) = common::setup_bare_repo();
let resp = gb.search_files_by_content(SearchFilesByContentRequest {
repository: Some(header(&gb)),
query: "zzzznonexistentzzzz".into(),
revision: "main".into(),
max_results: 10,
case_sensitive: true,
}).unwrap();
assert!(resp.results.is_empty());
}
#[test]
fn test_search_files_by_content_empty_query() {
let (_dir, gb) = common::setup_bare_repo();
let resp = gb.search_files_by_content(SearchFilesByContentRequest {
repository: Some(header(&gb)),
query: String::new(),
revision: "main".into(),
max_results: 10,
case_sensitive: true,
});
assert!(resp.is_err());
}
#[test]
fn test_search_files_by_name() {
let (_dir, gb) = common::setup_bare_repo();
let resp = gb.search_files_by_name(SearchFilesByNameRequest {
repository: Some(header(&gb)),
query: "README".into(),
revision: "main".into(),
max_results: 10,
recursive: true,
}).unwrap();
assert!(!resp.results.is_empty());
assert!(resp.results.iter().any(|r| r.path.contains("README")));
}
#[test]
fn test_search_files_by_name_no_match() {
let (_dir, gb) = common::setup_bare_repo();
let resp = gb.search_files_by_name(SearchFilesByNameRequest {
repository: Some(header(&gb)),
query: "zzzznonexistentzzzz".into(),
revision: "main".into(),
max_results: 10,
recursive: true,
}).unwrap();
assert!(resp.results.is_empty());
}
+182 -5
View File
@@ -1,6 +1,5 @@
use gitks::sanitize::*; use gitks::sanitize::*;
// ==================== validate_ref_name tests ====================
#[test] #[test]
fn test_validate_ref_name_accepts_valid_names() { fn test_validate_ref_name_accepts_valid_names() {
@@ -69,7 +68,6 @@ fn test_validate_ref_name_rejects_too_long() {
assert!(validate_ref_name(&max_valid_name).is_ok()); assert!(validate_ref_name(&max_valid_name).is_ok());
} }
// ==================== validate_revision tests ====================
#[test] #[test]
fn test_validate_revision_accepts_empty() { fn test_validate_revision_accepts_empty() {
@@ -149,7 +147,6 @@ fn test_validate_revision_accepts_valid_branch_names() {
assert!(validate_revision("v1.0.0").is_ok()); assert!(validate_revision("v1.0.0").is_ok());
} }
// ==================== validate_file_path tests ====================
#[test] #[test]
fn test_validate_file_path_accepts_valid_paths() { fn test_validate_file_path_accepts_valid_paths() {
@@ -216,7 +213,6 @@ fn test_validate_file_path_rejects_windows_reserved_names() {
assert!(validate_file_path("CON.txt").is_err()); assert!(validate_file_path("CON.txt").is_err());
} }
// ==================== validate_relative_path tests ====================
#[test] #[test]
fn test_validate_relative_path_accepts_valid_paths() { fn test_validate_relative_path_accepts_valid_paths() {
@@ -244,7 +240,6 @@ fn test_validate_relative_path_rejects_traversal() {
assert!(validate_relative_path("path/..").is_err()); assert!(validate_relative_path("path/..").is_err());
} }
// ==================== validate_config_key tests ====================
#[test] #[test]
fn test_validate_config_key_accepts_safe_keys() { fn test_validate_config_key_accepts_safe_keys() {
@@ -281,3 +276,185 @@ fn test_validate_config_key_rejects_invalid_chars() {
assert!(validate_config_key("key$(command)").is_err()); assert!(validate_config_key("key$(command)").is_err());
assert!(validate_config_key("key`command`").is_err()); assert!(validate_config_key("key`command`").is_err());
} }
/// Ensure no input causes panic in validate_ref_name.
#[test]
fn fuzz_validate_ref_name_no_panic() {
let long_name = "x".repeat(300);
let test_inputs: Vec<&str> = vec![
"",
"\0",
"\0\0\0",
"\x7f",
"\x01\x02\x03",
"~^:?*[]\\ ",
"../../../etc/passwd",
"a/b/c/d/e/f/g/h",
&long_name,
"branch@{upstream}",
"HEAD~99999999999",
"HEAD^99999999999",
"ref:HEAD",
"ref:refs/heads/main",
"; rm -rf /",
"$(echo pwned)",
"`echo pwned`",
"\n\r\t",
];
for input in test_inputs {
let _ = validate_ref_name(input);
}
}
/// Ensure no input causes panic in validate_revision.
#[test]
fn fuzz_validate_revision_no_panic() {
let test_inputs: Vec<&str> = vec![
"",
"HEAD",
"HEAD~0",
"HEAD~99999999",
"HEAD^0",
"HEAD^99999999",
"HEAD^{tree}",
"HEAD^{commit}",
"HEAD^{object}",
"abcdef01",
"abcdef0123456789abcdef0123456789abcdef0123456789abcdef0123456789",
"0000",
"zzzz",
"ref:HEAD",
"ref:refs/heads/main",
"\0",
"branch~abc",
"branch^abc",
"branch~",
"branch^",
"a~10001",
"a^10001",
];
for input in test_inputs {
let _ = validate_revision(input);
}
}
/// Ensure no input causes panic in validate_file_path.
#[test]
fn fuzz_validate_file_path_no_panic() {
let long_path = "x".repeat(5000);
let medium_path = "a".repeat(100);
let test_inputs: Vec<&str> = vec![
"",
"/etc/passwd",
"../escape",
"a/../b",
".git",
".git/config",
"src/.git/HEAD",
"a/b/.git",
"\0",
"\0\0\0",
&long_path,
"path/with\x00null",
"path/with\nnewline",
"normal/path.txt",
&medium_path,
];
for input in test_inputs {
let _ = validate_file_path(input);
}
}
/// Ensure no input causes panic in validate_remote_url.
#[test]
fn fuzz_validate_remote_url_no_panic() {
let long_url = "x".repeat(5000);
let test_inputs: Vec<&str> = vec![
"",
"https://github.com/user/repo",
"http://localhost:3000/repo",
"ssh://git@host/repo",
"git://host/repo",
"git+ssh://git@host/repo",
"file:///etc/passwd",
"ext::sh -c 'rm -rf /'",
"ftp://host/repo",
"https://user:pass@host/repo",
"\0",
"https://host\0injection",
&long_url,
];
for input in test_inputs {
let _ = validate_remote_url(input);
}
}
/// Ensure no input causes panic in validate_oid_hex.
#[test]
fn fuzz_validate_oid_hex_no_panic() {
let long_hex = "x".repeat(65);
let exact_hex = "x".repeat(64);
let test_inputs: Vec<&str> = vec![
"",
"abc",
"abcd",
"0123456789abcdef",
"ZZZZ",
"g000",
"0000000000000000000000000000000000000000",
&long_hex,
&exact_hex,
"\0",
" ",
"\n",
];
for input in test_inputs {
let _ = validate_oid_hex(input);
}
}
/// Ensure no input causes panic in validate_relative_path.
#[test]
fn fuzz_validate_relative_path_no_panic() {
let long_path = "x".repeat(5000);
let test_inputs: Vec<&str> = vec![
"",
"/absolute",
"relative/path",
"../escape",
"path/../escape",
"\0",
&long_path,
".",
"..",
"...",
"a/b/c",
];
for input in test_inputs {
let _ = validate_relative_path(input);
}
}
/// Ensure no input causes panic in validate_refspec.
#[test]
fn fuzz_validate_refspec_no_panic() {
let long_refspec = "x".repeat(2000);
let test_inputs: Vec<&str> = vec![
"",
"+refs/heads/*:refs/heads/*",
"refs/heads/main",
"; rm -rf /",
"$(evil)",
"`evil`",
"| pipe",
"& bg",
"< redirect",
"> redirect",
"\0",
&long_refspec,
];
for input in test_inputs {
let _ = validate_refspec(input);
}
}