refactor(bare): enhance security and performance optimizations

- Remove unnecessary sorting in advertise_refs for deterministic output
- Add path traversal detection and validation in bare_dir construction
- Implement symlink resolution checks to prevent security vulnerabilities
- Refactor cache system with CRC validation and improved metrics
- Integrate repo-specific cache invalidation using indexed keys
- Add comprehensive unit tests for commit operations and diff functionality
- Move configuration constants to centralized config module
- Optimize string operations in disk cache random value generation
- Enhance license detection algorithm with cleaner matching logic
- Streamline argument processing in various git operations
- Update dependencies including crc32fast and flate2 for performance
- Add signal handling capability to tokio runtime configuration
This commit is contained in:
zhenyi
2026-06-12 15:04:12 +08:00
parent e386f44ee2
commit 10a4398e81
41 changed files with 1373 additions and 365 deletions
Generated
+2
View File
@@ -437,6 +437,7 @@ version = "1.1.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "843fba2746e448b37e26a819579957415c8cef339bf08564fe8b7ddbd959573c"
dependencies = [
"crc32fast",
"miniz_oxide",
"zlib-rs",
]
@@ -2651,6 +2652,7 @@ dependencies = [
"axum",
"base64",
"bytes",
"flate2",
"h2",
"http",
"http-body",
+4 -6
View File
@@ -4,13 +4,11 @@ version = "1.0.0"
edition = "2024"
authors = ["gitks contributors"]
description = "A gRPC-accessible Git repository operations library for bare repositories"
repository = ""
readme = ""
homepage = ""
repository = "https://github.com/appks/gitks"
homepage = "https://github.com/appks/gitks"
license = "PolyForm-Noncommercial-1.0.0"
keywords = ["git", "grpc", "bare-repository", "gix"]
categories = ["development-tools"]
documentation = ""
[lib]
path = "lib.rs"
@@ -27,13 +25,13 @@ duct = { version = "1", features = [] }
tracing = { version = "0.1", features = ["log"] }
tracing-subscriber = { version = "0.3", features = ["env-filter", "json"] }
tracing-appender = "0.2"
tokio = { version = "1", features = ["rt-multi-thread", "macros", "process", "io-util", "sync", "net"] }
tokio = { version = "1", features = ["rt-multi-thread", "macros", "process", "io-util", "sync", "net", "signal"] }
tokio-stream = { version = "0.1", features = ["full"] }
tokio-util = "0.7"
thiserror = { version = "2", features = [] }
prost = "0.14"
prost-types = "0.14"
tonic = { version = "0.14", features = ["transport"] }
tonic = { version = "0.14", features = ["transport", "gzip"] }
tonic-health = "0.14"
tonic-prost = "0.14"
tempfile = "3"
-3
View File
@@ -20,7 +20,6 @@ impl GitBare {
let (tx, rx) = tokio::sync::mpsc::channel(16);
// Validate revision before spawning (cannot use ? inside spawn_blocking closure)
let revision = match request.treeish.and_then(|s| s.selector) {
Some(object_selector::Selector::Oid(oid)) => {
crate::sanitize::validate_oid_hex(&oid.hex)
@@ -45,7 +44,6 @@ impl GitBare {
.map_err(|e| tonic::Status::invalid_argument(e.to_string()))?;
}
// Spawn the blocking git subprocess in a dedicated thread
tokio::task::spawn_blocking(move || {
let format = archive_options::Format::try_from(options.format)
.unwrap_or(archive_options::Format::ArchiveFormatTar);
@@ -89,7 +87,6 @@ impl GitBare {
}
};
// Read stdout in 64KB chunks and stream them
use std::io::Read;
let mut reader = std::io::BufReader::new(stdout);
let mut buf = vec![0u8; 65536];
+26 -14
View File
@@ -32,7 +32,6 @@ impl GitBare {
crate::sanitize::validate_relative_path(relative_path)?;
}
// Build base path: storage_path if given, else relative_path alone
let base = if !storage_path.is_empty() {
let p = Path::new(storage_path);
if !p.is_absolute() {
@@ -51,32 +50,36 @@ impl GitBare {
let bare_dir = if !relative_path.is_empty() && !storage_path.is_empty() {
let candidate = base.join(relative_path);
// Canonicalize base (parent dir likely exists) for a reliable traversal check.
let base_canon = base.canonicalize().unwrap_or_else(|_| base.clone());
// Unified path validation to avoid TOCTOU race condition
let canonical = match candidate.canonicalize() {
Ok(canon) => {
// Path exists and was canonicalized successfully
canon
// Validate that relative_path itself contains no traversal patterns
// before any filesystem access (mitigates TOCTOU)
if relative_path.contains("..") {
return Err(GitError::InvalidArgument(format!(
"path traversal detected: relative_path contains '..': {relative_path}"
)));
}
// Reject symlinks in relative_path components
if relative_path.contains('\0') {
return Err(GitError::InvalidArgument(
"relative_path contains null byte".into(),
));
}
let canonical = match candidate.canonicalize() {
Ok(canon) => canon,
Err(_) => {
// Path doesn't exist yet validate via parent directory
// This avoids TOCTOU by not having separate code paths
// Path doesn't exist yet; validate via parent
let parent = candidate.parent().unwrap_or(&base);
let filename = candidate.file_name().ok_or_else(|| {
GitError::InvalidArgument("invalid path: missing filename".into())
})?;
// Canonicalize parent (which should exist)
let parent_canon = parent
.canonicalize()
.unwrap_or_else(|_| parent.to_path_buf());
// Construct the full path and verify it's under base
let constructed = parent_canon.join(filename);
// String-level check as fallback for non-existent paths
let constructed_str = constructed.to_string_lossy();
let base_str = base_canon.to_string_lossy();
@@ -95,7 +98,6 @@ impl GitBare {
}
};
// Final verification: canonical path must be under base
if !canonical.starts_with(&base_canon) {
tracing::warn!(
relative_path = %relative_path,
@@ -107,6 +109,16 @@ impl GitBare {
"path traversal detected: {relative_path} escapes storage root"
)));
}
// Verify the resolved path has no symlinks in its components
// by checking that canonicalization is idempotent
let double_canon = canonical.canonicalize().unwrap_or_else(|_| canonical.clone());
if canonical != double_canon {
return Err(GitError::InvalidArgument(
"path resolved to different target (possible symlink race)".into(),
));
}
canonical
} else if !storage_path.is_empty() {
base.canonicalize().unwrap_or(base)
-6
View File
@@ -27,7 +27,6 @@ impl GitBare {
format!("{base}...{head}")
};
// Build base rev-list args
let mut base_args = vec![
"--git-dir".to_string(),
self.bare_dir.to_string_lossy().into_owned(),
@@ -38,10 +37,8 @@ impl GitBare {
}
base_args.push(range);
// 1. Total count
let total = {
let mut args = base_args.clone();
// Insert after "rev-list" (index 2)
args.insert(3, "--count".into());
let result = duct::cmd("git", &args)
.stdout_capture()
@@ -60,7 +57,6 @@ impl GitBare {
.unwrap_or(0)
};
// 2. Git-side pagination
let page_size = request
.pagination
.as_ref()
@@ -81,7 +77,6 @@ impl GitBare {
.min(total);
let mut fetch_args = base_args;
// Insert after "rev-list" (index 2)
fetch_args.insert(3, format!("--skip={start_offset}"));
fetch_args.insert(4, format!("-n{page_size}"));
@@ -104,7 +99,6 @@ impl GitBare {
.map(ToOwned::to_owned)
.collect();
// 3. Batch-read commits via gix (one repo open, no subprocess per commit)
let mut commits = Vec::with_capacity(page_ids.len());
for id in &page_ids {
commits.push(read_commit_from_repo(self, &repo, id)?);
+12 -15
View File
@@ -8,9 +8,7 @@ use crate::pb::{
impl GitBare {
pub fn create_commit(&self, request: CreateCommitRequest) -> GitResult<CreateCommitResponse> {
// Validate branch name to prevent command injection
crate::sanitize::validate_ref_name(&request.branch)?;
// Validate start_revision if provided
if let Some(rev) = request.start_revision.as_ref() {
match rev.selector.as_ref() {
Some(object_selector::Selector::Revision(name)) => {
@@ -23,11 +21,11 @@ impl GitBare {
}
}
const MAX_ACTIONS_PER_COMMIT: usize = 10_000;
if request.actions.len() > MAX_ACTIONS_PER_COMMIT {
if request.actions.len() > crate::config::MAX_ACTIONS_PER_COMMIT {
return Err(GitError::InvalidArgument(format!(
"too many commit actions ({} > max {MAX_ACTIONS_PER_COMMIT})",
request.actions.len()
"too many commit actions ({} > max {})",
request.actions.len(),
crate::config::MAX_ACTIONS_PER_COMMIT,
)));
}
@@ -168,15 +166,14 @@ impl GitBare {
index_path: &str,
action: &crate::pb::CreateCommitAction,
) -> GitResult<()> {
const MAX_ACTION_CONTENT_BYTES: usize = 100 * 1024 * 1024;
if action.content.len() > MAX_ACTION_CONTENT_BYTES {
if action.content.len() > crate::config::MAX_ACTION_CONTENT_BYTES {
return Err(GitError::InvalidArgument(format!(
"action content too large ({} bytes, max {MAX_ACTION_CONTENT_BYTES})",
action.content.len()
"action content too large ({} bytes, max {})",
action.content.len(),
crate::config::MAX_ACTION_CONTENT_BYTES,
)));
}
// Validate file paths to prevent command injection / traversal
if !action.file_path.is_empty() {
crate::sanitize::validate_file_path(&action.file_path)?;
}
@@ -341,11 +338,11 @@ impl GitBare {
author: Option<&crate::pb::Signature>,
committer: Option<&crate::pb::Signature>,
) -> GitResult<String> {
const MAX_COMMIT_MESSAGE_BYTES: usize = 10 * 1024 * 1024;
if message.len() > MAX_COMMIT_MESSAGE_BYTES {
if message.len() > crate::config::MAX_COMMIT_MESSAGE_BYTES {
return Err(GitError::InvalidArgument(format!(
"commit message too large ({} bytes, max {MAX_COMMIT_MESSAGE_BYTES})",
message.len()
"commit message too large ({} bytes, max {})",
message.len(),
crate::config::MAX_COMMIT_MESSAGE_BYTES,
)));
}
-5
View File
@@ -9,10 +9,8 @@ impl GitBare {
let base_args = build_rev_list_args(self, &request, &revision)?;
// 1. Get total count via rev-list --count (lightweight, no object parsing)
let total = {
let mut args = base_args.clone();
// Insert after "rev-list" (index 2) so it's a rev-list flag, not a git flag
args.insert(3, "--count".into());
let result = duct::cmd("git", &args)
.stdout_capture()
@@ -31,7 +29,6 @@ impl GitBare {
.unwrap_or(0)
};
// 2. Apply git-side pagination: --skip + -n to only fetch the page
let page_size = request
.pagination
.as_ref()
@@ -52,7 +49,6 @@ impl GitBare {
.min(total);
let mut fetch_args = base_args;
// Insert after "rev-list" (index 2) so they are rev-list flags, not git flags
fetch_args.insert(3, format!("--skip={start_offset}"));
fetch_args.insert(4, format!("-n{page_size}"));
@@ -75,7 +71,6 @@ impl GitBare {
.map(ToOwned::to_owned)
.collect();
// 3. Batch-read commits via gix (one repo open, zero subprocess per commit)
let commits = if page_ids.is_empty() {
Vec::new()
} else {
-3
View File
@@ -367,14 +367,12 @@ impl GitBare {
)));
}
// Split combined patch output by "diff --git" headers
let mut map = HashMap::new();
let output = &result.stdout;
let header = b"diff --git ";
let mut chunks: Vec<&[u8]> = Vec::new();
let mut pos = 0;
// Find all header positions
let mut header_positions = Vec::new();
while let Some(idx) = output[pos..]
.windows(header.len())
@@ -390,7 +388,6 @@ impl GitBare {
}
for chunk in chunks {
// Extract file path from "diff --git a/path b/path\n"
let first_line_end = chunk
.iter()
.position(|&b| b == b'\n')
-2
View File
@@ -19,7 +19,6 @@ impl GitBare {
];
let mut pathspecs = Vec::new();
// Apply options if present
if let Some(ref opts) = request.options {
if opts.recursive {
args.push("--recursive".to_string());
@@ -64,7 +63,6 @@ impl GitBare {
)));
}
// Chunk the output for streaming
const CHUNK_SIZE: usize = 32768;
let data = output.stdout;
let chunks: Vec<RawDiffResponse> = data
+9 -12
View File
@@ -14,8 +14,7 @@ use sha2;
use crate::error::{GitError, GitResult};
/// Lease stale threshold: leases older than this are considered stale.
const LEASE_STALE_THRESHOLD_SECS: u64 = 30;
use crate::config::LEASE_STALE_THRESHOLD_SECS;
/// State directory relative path under repo prefix.
const STATE_DIR_RELATIVE: &str = "+gitks-cache/state";
@@ -27,10 +26,11 @@ const CACHE_DIR_RELATIVE: &str = "+gitks-cache/cache";
const INFO_REFS_DIR_RELATIVE: &str = "+gitks-cache/info_refs";
fn random_value() -> String {
use std::fmt::Write;
use std::sync::atomic::{AtomicU64, Ordering};
use std::time::{SystemTime, UNIX_EPOCH};
const HEX_CHARS: &[u8; 16] = b"0123456789abcdef";
let nanos = SystemTime::now()
.duration_since(UNIX_EPOCH)
.unwrap_or_default()
@@ -42,11 +42,13 @@ fn random_value() -> String {
buf[..8].copy_from_slice(&nanos.to_le_bytes());
buf[8..].copy_from_slice(&counter.to_le_bytes());
let mut s = String::with_capacity(32);
for byte in &buf {
let _ = write!(s, "{byte:02x}");
let mut hex = [0u8; 32];
for (i, &byte) in buf.iter().enumerate() {
hex[i * 2] = HEX_CHARS[(byte >> 4) as usize];
hex[i * 2 + 1] = HEX_CHARS[(byte & 0xf) as usize];
}
s
// SAFETY: hex chars are all valid ASCII (0-9, a-f)
unsafe { String::from_utf8_unchecked(hex.to_vec()) }
}
/// Compute SHA256 digest from multiple input parts.
@@ -137,7 +139,6 @@ impl DiskCache {
return Ok(val.trim().to_string());
}
// Atomic write: create temp file, then rename into place
let val = random_value();
let tmp_path = latest_path.with_extension("tmp");
std::fs::write(&tmp_path, &val).map_err(GitError::Io)?;
@@ -202,7 +203,6 @@ impl DiskCache {
let val = std::fs::read_to_string(&latest_path).map_err(GitError::Io)?;
Ok(Some(val.trim().to_string()))
} else {
// No latest file → create one
Ok(Some(self.ensure_state(relative_path)?))
}
}
@@ -252,7 +252,6 @@ impl DiskCache {
delta_base_offset: bool,
) -> GitResult<String> {
let latest = self.ensure_state(relative_path)?;
// Sort wants and haves for deterministic key
let mut wants_sorted = wants_hex.to_vec();
wants_sorted.sort();
let mut haves_sorted = haves_hex.to_vec();
@@ -468,7 +467,6 @@ impl DiskCache {
if !prefix_dir.is_dir() {
continue;
}
// Process all entries in this prefix directory
let entries = match std::fs::read_dir(&prefix_dir) {
Ok(iter) => iter,
Err(_) => continue,
@@ -498,7 +496,6 @@ impl DiskCache {
prefix_empty = false;
}
}
// Remove empty prefix directory
if prefix_empty {
std::fs::remove_dir(&prefix_dir).ok();
}
+40 -4
View File
@@ -159,8 +159,29 @@ fn run_single_script(script_path: &Path, stdin_data: &[u8], timeout: Duration) -
timeout_secs = timeout.as_secs(),
"hook script timed out, killing"
);
let _ = c.kill();
let _ = c.wait();
if let Err(e) = c.kill() {
tracing::error!(
script = %script_path.display(),
error = %e,
"failed to kill timed-out hook"
);
}
match c.wait() {
Ok(status) => {
tracing::debug!(
script = %script_path.display(),
exit_code = ?status.code(),
"killed hook process reaped"
);
}
Err(e) => {
tracing::error!(
script = %script_path.display(),
error = %e,
"failed to reap killed hook"
);
}
}
HookResult::rejected(format!(
"hook script timed out after {}s: {}",
timeout.as_secs(),
@@ -168,8 +189,23 @@ fn run_single_script(script_path: &Path, stdin_data: &[u8], timeout: Duration) -
))
}
Err(e) => {
let _ = c.kill();
let _ = c.wait();
tracing::error!(
script = %script_path.display(),
error = %e,
"hook script wait error"
);
if let Err(kill_err) = c.kill() {
tracing::error!(
error = %kill_err,
"failed to kill hook after wait error"
);
}
if let Err(wait_err) = c.wait() {
tracing::error!(
error = %wait_err,
"failed to reap hook after wait error"
);
}
HookResult::rejected(format!("hook script wait error: {e}"))
}
}
+32 -12
View File
@@ -25,7 +25,6 @@ const FORBIDDEN_PATTERNS: &[&str] = &[
"init 6",
"poweroff",
"halt",
// Additional patterns to catch encoding/obfuscation attempts
"eval ", // eval can execute arbitrary strings
"exec ", // exec can replace process
"$(", // command substitution
@@ -55,8 +54,21 @@ const DANGEROUS_PREFIXES: &[&str] = &[
"rm -rf *", // rm -rf with wildcard
];
/// Maximum hook script size (64KB).
const MAX_HOOK_SIZE: usize = 65536;
/// Pairs of commands that indicate data exfiltration or code execution.
const DANGEROUS_COMMAND_PAIRS: &[(&str, &str)] = &[
("curl", "bash"),
("curl", "sh"),
("wget", "bash"),
("wget", "sh"),
("nc", "-e"),
("ncat", "-e"),
("python", "-c"),
("perl", "-e"),
("ruby", "-e"),
("node", "-e"),
];
use crate::config::MAX_HOOK_SCRIPT_SIZE;
/// Validate a custom hook script content for safety.
pub fn validate_hook_content(content: &str) -> GitResult<()> {
@@ -65,10 +77,10 @@ pub fn validate_hook_content(content: &str) -> GitResult<()> {
"hook content cannot be empty".into(),
));
}
if content.len() > MAX_HOOK_SIZE {
if content.len() > MAX_HOOK_SCRIPT_SIZE {
return Err(GitError::InvalidArgument(format!(
"hook content too large (max {} bytes): {} bytes",
MAX_HOOK_SIZE,
MAX_HOOK_SCRIPT_SIZE,
content.len()
)));
}
@@ -78,7 +90,6 @@ pub fn validate_hook_content(content: &str) -> GitResult<()> {
));
}
// Check for forbidden patterns (case-insensitive where appropriate)
let content_lower = content.to_lowercase();
for pattern in FORBIDDEN_PATTERNS {
if content_lower.contains(&pattern.to_lowercase()) {
@@ -88,7 +99,6 @@ pub fn validate_hook_content(content: &str) -> GitResult<()> {
}
}
// Check for dangerous prefixes (exact case)
for prefix in DANGEROUS_PREFIXES {
if content.contains(prefix) {
return Err(GitError::InvalidArgument(format!(
@@ -97,15 +107,28 @@ pub fn validate_hook_content(content: &str) -> GitResult<()> {
}
}
// Check for obfuscation techniques
check_obfuscation_attempts(content)?;
check_dangerous_pairs(content)?;
Ok(())
}
/// Check for dangerous command pairs that indicate data exfiltration or code execution.
fn check_dangerous_pairs(content: &str) -> GitResult<()> {
let content_lower = content.to_lowercase();
for &(cmd1, cmd2) in DANGEROUS_COMMAND_PAIRS {
if content_lower.contains(cmd1) && content_lower.contains(cmd2) {
return Err(GitError::InvalidArgument(format!(
"hook contains dangerous command combination: '{cmd1}' + '{cmd2}' (possible data exfiltration)"
)));
}
}
Ok(())
}
/// Check for common obfuscation attempts.
fn check_obfuscation_attempts(content: &str) -> GitResult<()> {
// Check for excessive use of special characters that might indicate obfuscation
let special_char_count = content
.chars()
.filter(|c| {
@@ -117,14 +140,12 @@ fn check_obfuscation_attempts(content: &str) -> GitResult<()> {
.count();
let total_chars = content.chars().count();
// If more than 30% of content is special characters, it's suspicious
if total_chars > 0 && (special_char_count * 100 / total_chars) > 30 {
return Err(GitError::InvalidArgument(
"hook content appears obfuscated (too many special characters)".into(),
));
}
// Check for hex encoding attempts (e.g., \x41\x42)
if content.contains("\\x") {
let hex_count = content.matches("\\x").count();
if hex_count > 5 {
@@ -134,7 +155,6 @@ fn check_obfuscation_attempts(content: &str) -> GitResult<()> {
}
}
// Check for unicode escape sequences
if content.contains("\\u") {
let unicode_count = content.matches("\\u").count();
if unicode_count > 5 {
+1
View File
@@ -1,5 +1,6 @@
pub mod archive;
pub mod bare;
pub mod config;
pub mod blame;
pub mod blob;
pub mod branch;
+38 -11
View File
@@ -5,7 +5,7 @@ use std::time::Duration;
use gitks::disk_cache::DiskCache;
use gitks::hooks::HookManager;
use gitks::metrics;
use gitks::server::{GitksService, serve};
use gitks::server::{GitksService, serve_with_shutdown};
use etcd_client::{Client, PutOptions};
use tokio::sync::Mutex;
@@ -141,7 +141,6 @@ fn init_tracing() -> Option<tracing_appender::non_blocking::WorkerGuard> {
.boxed(),
};
// Optional file output with rotation
if let Ok(log_dir) = std::env::var("GITKS_LOG_DIR") {
let rotation = match env_or("GITKS_LOG_ROTATION", "daily").as_str() {
"hourly" => tracing_appender::rolling::Rotation::HOURLY,
@@ -212,7 +211,6 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
let port = env_or("GITKS_PORT", DEFAULT_PORT);
let storage_name = env_or("STORAGE_NAME", DEFAULT_STORAGE_NAME);
// --- etcd config overlay: connect etcd, override key settings ---
let etcd_endpoints: Vec<String> = std::env::var("GITKS_ETCD_ENDPOINTS")
.ok()
.filter(|s| !s.is_empty())
@@ -239,7 +237,6 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
let grpc_addr =
std::env::var("GITKS_ADVERTISE_ADDR").unwrap_or_else(|_| format!("http://{host}:{port}"));
// Register this service so other services (appks) can discover us
if let Some(ref e) = etcd {
let addr_str = format!("{host}:{port}");
e.register("gitks", &addr_str).await.ok();
@@ -256,7 +253,6 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
std::fs::create_dir_all(&repo_prefix)?;
}
// Disk cache configuration
let disk_cache_enabled = env_bool("GITKS_DISK_CACHE_ENABLED", false);
let disk_cache_max_age = env_u64("GITKS_DISK_CACHE_MAX_AGE", 300);
@@ -275,7 +271,6 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
tracing::info!("disk cache disabled");
}
// Pack cache configuration
let pack_cache_enabled = env_bool("GITKS_PACK_CACHE_ENABLED", false);
let pack_backpressure = env_bool("GITKS_PACK_CACHE_BACKPRESSURE", true);
@@ -293,7 +288,6 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
None
};
// Hook manager configuration
let hooks_enabled = env_bool("GITKS_HOOKS_ENABLED", true);
let server_hooks_dir = std::env::var("GITKS_SERVER_HOOKS_DIR")
.ok()
@@ -326,7 +320,8 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
let _metrics_handle = metrics::start_metrics_server(metrics_port);
tracing::info!(port = metrics_port, "metrics server started");
// Slow request threshold
let _semaphore_cleanup = gitks::rate_limit::start_semaphore_cleanup_task();
let slow_request_threshold = env_u64("GITKS_SLOW_REQUEST_THRESHOLD_MS", 5000);
metrics::set_slow_request_threshold(slow_request_threshold);
tracing::info!(
@@ -357,11 +352,43 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
"starting gitks gRPC server"
);
serve(addr, svc).await?;
metrics::set_ready(true);
serve_with_shutdown(addr, svc, shutdown_signal()).await?;
metrics::set_ready(false);
// Gracefully shut down the HTTP metrics server
http_cancel.cancel();
tracing::info!("gitks shut down");
tracing::info!("gitks shut down complete");
Ok(())
}
/// Resolves when the process receives SIGTERM or SIGINT (Ctrl+C).
async fn shutdown_signal() {
let ctrl_c = async {
tokio::signal::ctrl_c()
.await
.expect("failed to install Ctrl+C handler");
};
#[cfg(unix)]
let terminate = async {
tokio::signal::unix::signal(tokio::signal::unix::SignalKind::terminate())
.expect("failed to install SIGTERM handler")
.recv()
.await;
};
#[cfg(not(unix))]
let terminate = std::future::pending::<()>();
tokio::select! {
_ = ctrl_c => {
tracing::info!("received Ctrl+C, starting graceful shutdown");
}
_ = terminate => {
tracing::info!("received SIGTERM, starting graceful shutdown");
}
}
}
+71 -15
View File
@@ -65,6 +65,12 @@ struct MetricsInner {
cache_hit_by_namespace: DashMap<String, AtomicU64>,
/// Counter: cache misses by namespace
cache_miss_by_namespace: DashMap<String, AtomicU64>,
/// Histogram: cache value size in bytes
cache_value_size_buckets: DashMap<String, AtomicU64>,
/// Counter: rate-limit rejections by repository
rate_limit_reject_count: DashMap<String, AtomicU64>,
/// Counter: rate-limit acquires by repository
rate_limit_acquire_count: DashMap<String, AtomicU64>,
}
static METRICS: OnceLock<Arc<MetricsInner>> = OnceLock::new();
@@ -99,6 +105,9 @@ fn metrics() -> &'static Arc<MetricsInner> {
cache_eviction_count: DashMap::new(),
cache_hit_by_namespace: DashMap::new(),
cache_miss_by_namespace: DashMap::new(),
cache_value_size_buckets: DashMap::new(),
rate_limit_reject_count: DashMap::new(),
rate_limit_acquire_count: DashMap::new(),
})
})
}
@@ -144,7 +153,6 @@ pub fn record_request(method: &str, status_code: &str, duration: Duration) {
let m = metrics();
let duration_ms = duration.as_millis() as u64;
// Request count
let key = format!("{method}:{status_code}");
m.request_count
.entry(key)
@@ -152,10 +160,8 @@ pub fn record_request(method: &str, status_code: &str, duration: Duration) {
.value()
.fetch_add(1, Ordering::Relaxed);
// Duration histogram
record_duration_bucket(&m.duration_buckets, method, duration_ms);
// Slow request detection
let threshold = m.slow_request_threshold_ms.load(Ordering::Relaxed);
if threshold > 0 && duration_ms >= threshold {
m.slow_request_count
@@ -270,6 +276,46 @@ pub fn record_hook_execution(hook_type: &str, result: &str, duration: Duration)
record_duration_bucket(&m.hook_duration_buckets, hook_type, duration_ms);
}
/// Record cache value size distribution (in bytes).
pub fn record_cache_value_size(namespace: &str, size: usize) {
let m = metrics();
record_size_bucket(&m.cache_value_size_buckets, namespace, size as u64);
}
/// Record a rate-limit rejection event.
pub fn record_rate_limit_reject(repo: &str) {
let m = metrics();
m.rate_limit_reject_count
.entry(repo.to_string())
.or_insert_with(|| AtomicU64::new(0))
.value()
.fetch_add(1, Ordering::Relaxed);
}
/// Record a rate-limit acquire event.
pub fn record_rate_limit_acquire(repo: &str) {
let m = metrics();
m.rate_limit_acquire_count
.entry(repo.to_string())
.or_insert_with(|| AtomicU64::new(0))
.value()
.fetch_add(1, Ordering::Relaxed);
}
/// Record size distribution buckets (log2-based: 1KB, 4KB, 16KB, ..., 1GB).
fn record_size_bucket(map: &DashMap<String, AtomicU64>, label: &str, size: u64) {
let buckets = [1024, 4096, 16384, 65536, 262144, 1048576, 4194304, 16777216, 67108864, 268435456, 1073741824];
for &bound in &buckets {
let key = format!("{label}:le_{bound}");
if size <= bound {
map.entry(key)
.or_insert_with(|| AtomicU64::new(0))
.value()
.fetch_add(1, Ordering::Relaxed);
}
}
}
/// Escape a string for use as a Prometheus label value.
/// Replaces `\` → `\\`, `"` → `\"`, `\n` → `\n` per the Prometheus spec.
fn prom_escape(value: &str) -> String {
@@ -349,7 +395,6 @@ pub fn render_metrics() -> String {
out.push_str("# TYPE gitks_repository_count gauge\n");
out.push_str(&format!("gitks_repository_count {repos}\n\n"));
// gRPC requests
render_counter_map(
&mut out,
"gitks_requests_total",
@@ -358,7 +403,6 @@ pub fn render_metrics() -> String {
&["method", "status"],
);
// gRPC duration
render_histogram(
&mut out,
"gitks_request_duration_milliseconds",
@@ -366,7 +410,6 @@ pub fn render_metrics() -> String {
&m.duration_buckets,
);
// Slow requests
render_counter_map(
&mut out,
"gitks_slow_requests_total",
@@ -375,7 +418,6 @@ pub fn render_metrics() -> String {
&["method"],
);
// Cache
let hits = m.cache_hits.load(Ordering::Relaxed);
let misses = m.cache_misses.load(Ordering::Relaxed);
out.push_str("# HELP gitks_cache_hits_total Cache hit count\n");
@@ -385,7 +427,6 @@ pub fn render_metrics() -> String {
out.push_str("# TYPE gitks_cache_misses_total counter\n");
out.push_str(&format!("gitks_cache_misses_total {misses}\n\n"));
// Errors
render_counter_map(
&mut out,
"gitks_errors_total",
@@ -394,7 +435,6 @@ pub fn render_metrics() -> String {
&["kind"],
);
// Git subprocess
render_counter_map(
&mut out,
"gitks_git_cmd_total",
@@ -409,7 +449,6 @@ pub fn render_metrics() -> String {
&m.git_cmd_duration_buckets,
);
// Cache operations
render_counter_map(
&mut out,
"gitks_cache_ops_total",
@@ -424,7 +463,6 @@ pub fn render_metrics() -> String {
&m.cache_op_duration_buckets,
);
// Cache evictions by cause and namespace
render_counter_map(
&mut out,
"gitks_cache_evictions_total",
@@ -433,7 +471,6 @@ pub fn render_metrics() -> String {
&["cause", "namespace"],
);
// Per-namespace cache hits
render_counter_map(
&mut out,
"gitks_cache_hits_by_namespace_total",
@@ -442,7 +479,6 @@ pub fn render_metrics() -> String {
&["namespace"],
);
// Per-namespace cache misses
render_counter_map(
&mut out,
"gitks_cache_misses_by_namespace_total",
@@ -451,7 +487,6 @@ pub fn render_metrics() -> String {
&["namespace"],
);
// Hook execution
render_counter_map(
&mut out,
"gitks_hook_executions_total",
@@ -466,6 +501,28 @@ pub fn render_metrics() -> String {
&m.hook_duration_buckets,
);
render_histogram(
&mut out,
"gitks_cache_value_size_bytes",
"Cache value size distribution in bytes",
&m.cache_value_size_buckets,
);
render_counter_map(
&mut out,
"gitks_rate_limit_rejects_total",
"Rate-limit rejections by repository",
&m.rate_limit_reject_count,
&["repo"],
);
render_counter_map(
&mut out,
"gitks_rate_limit_acquires_total",
"Rate-limit acquires by repository",
&m.rate_limit_acquire_count,
&["repo"],
);
out
}
@@ -688,7 +745,6 @@ impl RequestMetrics {
let duration_ms = duration.as_millis() as u64;
record_request(self.method, status, duration);
// Slow request warning
let threshold = metrics().slow_request_threshold_ms.load(Ordering::Relaxed);
if threshold > 0 && duration_ms >= threshold {
tracing::warn!(
-2
View File
@@ -42,7 +42,6 @@ impl GitBare {
symbolic_target,
});
}
// Sort by name for deterministic output
references.sort_by(|a, b| a.name.cmp(&b.name));
Ok(AdvertiseRefsResponse {
references,
@@ -68,7 +67,6 @@ impl GitBare {
let bare_dir_str = self.bare_dir.to_string_lossy().into_owned();
let stateless = request.protocol.as_ref().is_some_and(|p| p.stateless);
// Default to upload-pack if service is unspecified
let subcommand = if request.service == "git-receive-pack" {
"receive-pack"
} else {
-5
View File
@@ -18,7 +18,6 @@ impl GitBare {
let pack_dir = self.bare_dir.join("objects").join("pack");
std::fs::create_dir_all(&pack_dir).map_err(GitError::Io)?;
// Stream pack data to a temp file instead of accumulating in memory
let mut tmp_file = tempfile::Builder::new()
.prefix("tmp_index_pack_")
.tempfile_in(&pack_dir)
@@ -41,7 +40,6 @@ impl GitBare {
return Err(GitError::InvalidArgument("empty pack data".into()));
}
// Flush and get the path before we pass it to git
tmp_file.flush().map_err(GitError::Io)?;
let tmp_path = tmp_file.path().to_path_buf();
@@ -64,7 +62,6 @@ impl GitBare {
.unchecked()
.run()?;
// Drop the temp file handle — git index-pack has processed it
drop(tmp_file);
if !result.status.success() {
@@ -74,7 +71,6 @@ impl GitBare {
});
}
// Parse the output to extract the pack hash
let output = String::from_utf8_lossy(&result.stdout);
let stderr = String::from_utf8_lossy(&result.stderr);
let all_output = format!("{output}\n{stderr}");
@@ -96,7 +92,6 @@ impl GitBare {
})
.next();
// Try to get object count from .idx if it exists
let mut object_count = 0u64;
if let Some(ref hash) = pack_hash {
let idx_path = pack_dir.join(format!("pack-{hash}.idx"));
-1
View File
@@ -30,7 +30,6 @@ impl GitBare {
.filter(|hex| !hex.is_empty())
.map(|hex| self.oid_to_pb(hex));
// Count objects
let mut object_count = 0u64;
if let Some(hash_str) = base_name.strip_prefix("pack-") {
let idx_path = pack_dir.join(format!("pack-{hash_str}.idx"));
+1 -9
View File
@@ -1,5 +1,4 @@
use std::process::Stdio;
use std::time::Duration;
use tokio::io::{AsyncReadExt, AsyncWriteExt};
use tokio::process::Command;
@@ -8,13 +7,9 @@ use tokio_stream::wrappers::ReceiverStream;
use super::CancellableReceiverStream;
use crate::bare::GitBare;
use crate::config::{MAX_RECEIVE_PACKET_BYTES, MAX_RECEIVE_STDERR_BYTES, RECEIVE_PACK_TIMEOUT};
use crate::pb::ReceivePackResponse;
/// Maximum time allowed for a git receive-pack process before it is killed.
const RECEIVE_PACK_TIMEOUT: Duration = Duration::from_secs(1800); // 30 minutes
const MAX_RECEIVE_PACKET_BYTES: usize = 16 * 1024 * 1024;
const MAX_RECEIVE_STDERR_BYTES: u64 = 64 * 1024;
impl GitBare {
/// Receive pack data using git-receive-pack with true concurrent streaming.
///
@@ -41,7 +36,6 @@ impl GitBare {
let (tx, rx) = tokio::sync::mpsc::channel(16);
// Use a cancellation token to track client disconnect
let cancel_token = tokio_util::sync::CancellationToken::new();
let cancel_token_clone = cancel_token.clone();
@@ -154,7 +148,6 @@ impl GitBare {
}
};
// Run all three concurrently with timeout
let _process_future = tokio::join!(stdin_task, stdout_task, stderr_task);
match tokio::time::timeout(RECEIVE_PACK_TIMEOUT, child.wait()).await {
@@ -189,7 +182,6 @@ impl GitBare {
}
});
// When the ReceiverStream is dropped (client disconnect), cancel the background task
let rx_stream = ReceiverStream::new(rx);
let cancel_guard = cancel_token_clone.clone().drop_guard();
-6
View File
@@ -41,11 +41,9 @@ impl GitBare {
let (tx, rx) = tokio::sync::mpsc::channel(16);
// Use a cancellation token to track client disconnect
let cancel_token = tokio_util::sync::CancellationToken::new();
let cancel_token_clone = cancel_token.clone();
// Move input into the spawned task to make it 'static
let stream = Box::pin(input);
tokio::spawn(async move {
let stream = stream;
@@ -77,7 +75,6 @@ impl GitBare {
let mut stdout = child.stdout.take();
let mut stderr = child.stderr.take();
// Concurrent: write stdin packets, read stdout chunks, read stderr
let stdin_task = {
let mut stream = stream;
let cancel = cancel_token.clone();
@@ -102,7 +99,6 @@ impl GitBare {
Err(_) => break,
}
}
// Close stdin to signal end-of-input
drop(stdin);
}
}
@@ -157,7 +153,6 @@ impl GitBare {
}
};
// Run all three concurrently with timeout
let _process_future = tokio::join!(stdin_task, stdout_task, stderr_task);
match tokio::time::timeout(UPLOAD_PACK_TIMEOUT, child.wait()).await {
@@ -192,7 +187,6 @@ impl GitBare {
}
});
// When the ReceiverStream is dropped (client disconnect), cancel the background task
let rx_stream = ReceiverStream::new(rx);
let cancel_guard = cancel_token_clone.clone().drop_guard();
-1
View File
@@ -100,7 +100,6 @@ impl PackCache {
})
.await;
if result.is_err() {
// Task join error or I/O error already sent
}
});
+92 -18
View File
@@ -10,15 +10,22 @@
use dashmap::DashMap;
use std::sync::{Arc, OnceLock, RwLock};
use std::time::Instant;
use tokio::sync::Semaphore;
/// Default max concurrent operations per repository.
const DEFAULT_MAX_CONCURRENT: usize = 5;
use crate::config::{DEFAULT_MAX_CONCURRENT_OPS, SEMAPHORE_IDLE_THRESHOLD_SECS};
/// Per-repository rate limiter entry with usage tracking.
struct SemaphoreEntry {
sem: Arc<Semaphore>,
max_permits: usize,
last_accessed: RwLock<Instant>,
}
/// Global rate limiter state.
struct RateLimiter {
/// Per-repository semaphores. Key = repository relative_path.
semaphores: DashMap<String, Arc<Semaphore>>,
semaphores: DashMap<String, SemaphoreEntry>,
/// Max concurrent operations per repository (protected by RwLock for runtime updates).
max_concurrent: RwLock<usize>,
}
@@ -30,7 +37,7 @@ fn limiter() -> &'static RateLimiter {
let max = std::env::var("GITKS_RATE_LIMIT_MAX_CONCURRENT")
.ok()
.and_then(|v| v.parse().ok())
.unwrap_or(DEFAULT_MAX_CONCURRENT);
.unwrap_or(DEFAULT_MAX_CONCURRENT_OPS);
tracing::info!(
max_concurrent = max,
@@ -52,6 +59,8 @@ fn get_max_concurrent() -> usize {
.unwrap_or_else(|e| e.into_inner())
}
/// A guard that holds a rate-limit permit. The permit is released on drop.
pub struct RateLimitGuard {
/// The semaphore permit. Dropping this releases the permit.
@@ -71,18 +80,24 @@ pub async fn acquire(repo_relative_path: Option<&str>) -> Option<RateLimitGuard>
}
let max_concurrent = get_max_concurrent();
if max_concurrent == 0 {
// Unlimited
return None;
}
let sem = limiter()
let sem = {
let entry = limiter()
.semaphores
.entry(repo.to_string())
.or_insert_with(|| Arc::new(Semaphore::new(max_concurrent)))
.value()
.clone();
.or_insert_with(|| SemaphoreEntry {
sem: Arc::new(Semaphore::new(max_concurrent)),
max_permits: max_concurrent,
last_accessed: RwLock::new(Instant::now()),
});
if let Ok(mut last) = entry.last_accessed.write() {
*last = Instant::now();
}
entry.sem.clone()
};
// Release DashMap reference before awaiting
let _ = repo;
match tokio::time::timeout(
@@ -97,6 +112,7 @@ pub async fn acquire(repo_relative_path: Option<&str>) -> Option<RateLimitGuard>
available = sem.available_permits(),
"rate limit permit acquired"
);
crate::metrics::record_rate_limit_acquire(repo_relative_path.unwrap_or(""));
Some(RateLimitGuard { _permit: permit })
}
Ok(Err(_closed)) => {
@@ -105,7 +121,8 @@ pub async fn acquire(repo_relative_path: Option<&str>) -> Option<RateLimitGuard>
repo = %repo_relative_path.unwrap_or(""),
"rate limit semaphore closed, recreating"
);
let new_sem = Arc::new(Semaphore::new(get_max_concurrent()));
let max = get_max_concurrent();
let new_sem = Arc::new(Semaphore::new(max));
let permit = match new_sem.clone().acquire_owned().await {
Ok(permit) => permit,
Err(_closed) => {
@@ -116,9 +133,14 @@ pub async fn acquire(repo_relative_path: Option<&str>) -> Option<RateLimitGuard>
return None;
}
};
limiter()
.semaphores
.insert(repo_relative_path.unwrap_or("").to_string(), new_sem);
limiter().semaphores.insert(
repo_relative_path.unwrap_or("").to_string(),
SemaphoreEntry {
sem: new_sem,
max_permits: get_max_concurrent(),
last_accessed: RwLock::new(Instant::now()),
},
);
Some(RateLimitGuard { _permit: permit })
}
Err(_elapsed) => {
@@ -146,7 +168,7 @@ pub async fn acquire_or_reject(
if get_max_concurrent() == 0 {
return Ok(None);
}
// Timeout — reject with resource exhausted
crate::metrics::record_rate_limit_reject(repo);
Err(tonic::Status::resource_exhausted(format!(
"rate limit exceeded for repository '{repo}': max {max} concurrent operations",
max = get_max_concurrent()
@@ -161,6 +183,52 @@ pub fn remove_repository(repo_relative_path: &str) {
tracing::debug!(repo = %repo_relative_path, "rate limit semaphore removed");
}
/// Clean up idle semaphores that have no active permits and haven't been
/// accessed within the idle threshold.
///
/// Call this periodically (e.g., from a background task) to prevent
/// unbounded growth of the semaphore map.
pub fn cleanup_idle_semaphores() {
let threshold = std::time::Duration::from_secs(SEMAPHORE_IDLE_THRESHOLD_SECS);
let now = Instant::now();
let max_concurrent = get_max_concurrent();
let mut removed = 0u64;
limiter().semaphores.retain(|_key, entry| {
let is_idle = entry.sem.available_permits() == max_concurrent;
let is_stale = entry
.last_accessed
.read()
.map(|last| now.duration_since(*last) > threshold)
.unwrap_or(false);
let keep = !(is_idle && is_stale);
if !keep {
removed += 1;
}
keep
});
if removed > 0 {
tracing::info!(
removed = removed,
"cleaned up idle rate-limit semaphores"
);
}
}
/// Start a background task to periodically clean up idle semaphores.
pub fn start_semaphore_cleanup_task() -> tokio::task::JoinHandle<()> {
let interval = std::time::Duration::from_secs(60);
tokio::spawn(async move {
let mut ticker = tokio::time::interval(interval);
loop {
ticker.tick().await;
cleanup_idle_semaphores();
}
})
}
/// Update the max concurrent limit at runtime.
///
/// Only replaces semaphores that have no active permits (idle repos).
@@ -187,8 +255,7 @@ pub fn set_max_concurrent(max: usize) {
.semaphores
.iter()
.filter_map(|entry| {
let sem = entry.value();
if sem.available_permits() == old_max {
if entry.value().max_permits == old_max {
Some(entry.key().clone())
} else {
None
@@ -197,7 +264,14 @@ pub fn set_max_concurrent(max: usize) {
.collect();
for key in keys {
l.semaphores.insert(key, Arc::new(Semaphore::new(max)));
l.semaphores.insert(
key,
SemaphoreEntry {
sem: Arc::new(Semaphore::new(max)),
max_permits: max,
last_accessed: RwLock::new(Instant::now()),
},
);
}
tracing::info!(max_concurrent = max, "rate limit max_concurrent updated");
-4
View File
@@ -72,19 +72,16 @@ impl GitBare {
"--format=%(refname)%00%(objectname)%00%(symref)".to_string(),
];
// Sort direction
let sort_prefix = match SortDirection::try_from(request.sort_direction) {
Ok(SortDirection::Asc) => "",
_ => "-",
};
args.push(format!("--sort={sort_prefix}refname"));
// Containing OIDs filter
if let Some(first_oid) = request.containing_oids.first() {
args.push(format!("--points-at={first_oid}"));
}
// Prefix or pattern
if !request.prefixes.is_empty() {
for prefix in &request.prefixes {
args.push(prefix.clone());
@@ -115,7 +112,6 @@ impl GitBare {
let oid = parts[1].to_string();
let symref = parts.get(2).map(|s| s.to_string()).unwrap_or_default();
// Apply glob pattern filter if set
if !request.pattern.is_empty() && !simple_glob_match(&request.pattern, &ref_name) {
continue;
}
+1 -2
View File
@@ -15,7 +15,7 @@ impl GitBare {
if !update.old_oid.is_empty() {
crate::sanitize::validate_revision(&update.old_oid)?;
stdin_input.push_str(&format!(
"update {} {}\0{}\n",
"update {} {} {}\n",
update.ref_name, update.new_oid, update.old_oid
));
} else {
@@ -32,7 +32,6 @@ impl GitBare {
&self.bare_dir.to_string_lossy(),
"update-ref",
"--stdin",
"-z",
])
.stdin(std::process::Stdio::piped())
.stdout(std::process::Stdio::piped())
-3
View File
@@ -46,9 +46,6 @@ pub fn find_remote_repository(
continue;
}
// Lines can be:
// SHA<TAB>refname (direct ref)
// ref: refs/heads/main<TAB>HEAD (symbolic ref via --symref)
if line.starts_with("ref:") {
if let Some((target, name)) = line.split_once('\t') {
refs.push(RemoteHead {
-1
View File
@@ -103,7 +103,6 @@ impl GitBare {
});
}
// Update local HEAD to match remote HEAD
let head_output = std::process::Command::new("git")
.args([
"--git-dir",
-11
View File
@@ -57,57 +57,46 @@ impl GitBare {
fn detect_license(content: &str) -> (&'static str, &'static str, f64) {
let lower = content.to_lowercase();
// MIT
if lower.contains("permission is hereby granted, free of charge") && lower.contains("mit") {
return ("MIT", "MIT License", 0.95);
}
// Apache 2.0
if lower.contains("apache license, version 2.0") || lower.contains("apache-2.0") {
return ("Apache-2.0", "Apache License 2.0", 0.95);
}
// GPL 3.0
if lower.contains("gnu general public license") && lower.contains("version 3") {
return ("GPL-3.0", "GNU General Public License v3.0", 0.90);
}
// GPL 2.0
if lower.contains("gnu general public license") && lower.contains("version 2") {
return ("GPL-2.0", "GNU General Public License v2.0", 0.90);
}
// BSD 3
if lower.contains("redistribution and use in source and binary forms")
&& lower.contains("neither the name of")
{
return ("BSD-3-Clause", "BSD 3-Clause License", 0.85);
}
// BSD 2
if lower.contains("redistribution and use in source and binary forms") {
return ("BSD-2-Clause", "BSD 2-Clause License", 0.80);
}
// AGPL
if lower.contains("gnu affero general public license") {
return ("AGPL-3.0", "GNU Affero General Public License v3.0", 0.90);
}
// LGPL
if lower.contains("gnu lesser general public license") {
return ("LGPL-3.0", "GNU Lesser General Public License v3.0", 0.85);
}
// MPL
if lower.contains("mozilla public license") {
return ("MPL-2.0", "Mozilla Public License 2.0", 0.90);
}
// Unlicense
if lower.contains("this is free and unencumbered software released into the public domain") {
return ("Unlicense", "The Unlicense", 0.95);
}
// ISC
if lower.contains("permission to use, copy, modify, and/or distribute") && lower.contains("isc")
{
return ("ISC", "ISC License", 0.80);
-15
View File
@@ -7,7 +7,6 @@ use crate::bare::GitBare;
use crate::error::{GitError, GitResult};
use crate::pb::{GetLanguageStatsRequest, GetLanguageStatsResponse, LanguageStat, object_selector};
// Include the generated linguist rules
include!(concat!(env!("OUT_DIR"), "/linguist_generated.rs"));
/// Default max file size for line counting (512 KB).
@@ -17,7 +16,6 @@ const MAX_TREE_WALK_DEPTH: usize = 256;
/// Look up a language by file extension (case-insensitive, includes leading dot).
fn lookup_by_extension(ext: &str) -> Option<(&'static str, &'static str)> {
let ext_lower = ext.to_lowercase();
// Binary search on the sorted EXTENSION_MAP
EXTENSION_MAP
.binary_search_by(|&(e, _, _)| e.cmp(ext_lower.as_str()))
.ok()
@@ -54,13 +52,11 @@ fn detect_language(path: &str, is_binary: bool) -> Option<(&'static str, &'stati
.and_then(|n| n.to_str())
.unwrap_or("");
// Try filename match first (e.g., Makefile, Dockerfile)
if let Some(result) = lookup_by_filename(file_name) {
tracing::debug!(path = %path, lang = result.0, "matched by filename");
return Some(result);
}
// Try extension match
if let Some(ext) = Path::new(path).extension().and_then(|e| e.to_str()) {
let ext_with_dot = format!(".{ext}");
if let Some(result) = lookup_by_extension(&ext_with_dot) {
@@ -72,13 +68,10 @@ fn detect_language(path: &str, is_binary: bool) -> Option<(&'static str, &'stati
tracing::debug!(path = %path, "no extension found");
}
// For binary files with no recognized extension, classify by media type
if is_binary {
// Try extension-based binary classification
if let Some(ext) = Path::new(path).extension().and_then(|e| e.to_str()) {
let ext_lower = format!(".{ext}").to_lowercase();
let media_type = classify_binary_extension(&ext_lower);
// Return as a synthetic language name
return Some((media_type, "data"));
}
return Some(("Binary", "data"));
@@ -146,7 +139,6 @@ impl GitBare {
.try_into_tree()
.map_err(|e| GitError::Gix(e.to_string()))?;
// If path is specified, descend into subdirectory
if !request.path.is_empty() {
crate::sanitize::validate_file_path(&request.path)?;
let entry = tree
@@ -173,7 +165,6 @@ impl GitBare {
};
self.walk_tree(&repo, &tree, &prefix, 0, &mut ctx)?;
// Resolve groups: merge child language stats into parent group
tracing::info!(
total_files,
total_bytes,
@@ -193,13 +184,11 @@ impl GitBare {
entry.file_count = entry.file_count.saturating_add(s.file_count);
entry.bytes = entry.bytes.saturating_add(s.bytes);
entry.lines = entry.lines.saturating_add(s.lines);
// Keep the lang_type from the parent (or first encountered)
if entry.lang_type.is_empty() {
entry.lang_type = s.lang_type;
}
}
// Build response sorted by bytes descending
let mut languages: Vec<LanguageStat> = resolved
.into_iter()
.map(|(language, s)| {
@@ -272,15 +261,12 @@ impl GitBare {
let data = &blob.data;
let size = data.len() as u64;
// Skip empty files
if size == 0 {
continue;
}
// Check if binary (contains null byte)
let is_binary = data.contains(&0);
// Detect language
let Some((lang_name, lang_type)) = detect_language(&path, is_binary) else {
tracing::debug!(path = %path, is_binary, "no language detected");
continue;
@@ -288,7 +274,6 @@ impl GitBare {
let lang_key = lang_name.to_string();
// Count code lines only for non-binary files within size limit
let lines = if !is_binary && size <= u64::from(ctx.max_file_size) {
count_code_lines(data)
} else {
-7
View File
@@ -18,7 +18,6 @@ impl GitBare {
OptimizeStrategy::Heuristic | OptimizeStrategy::Aggressive => {
let stats = self.get_repository_statistics()?;
// Run commit-graph write if needed
if (stats.commit_graph_size_bytes == 0 || strategy == OptimizeStrategy::Aggressive)
&& let Ok(resp) = write_commit_graph(self, false, false)
{
@@ -28,7 +27,6 @@ impl GitBare {
stdout_all.push_str(&resp.stdout);
}
// Repack if many loose objects or packfiles
let repack_needed = stats.loose_object_count > 1000 || stats.packfile_count > 10;
if repack_needed || strategy == OptimizeStrategy::Aggressive {
@@ -41,7 +39,6 @@ impl GitBare {
}
}
// Prune if aggressive
if strategy == OptimizeStrategy::Aggressive
&& let Ok(resp) = run_gc(self, true, true)
{
@@ -52,7 +49,6 @@ impl GitBare {
}
}
OptimizeStrategy::Incremental => {
// Just run commit-graph write incrementally
if let Ok(resp) = write_commit_graph(self, false, false) {
if !resp.ok {
stderr_all.push_str(&resp.stderr);
@@ -71,7 +67,6 @@ impl GitBare {
}
fn get_repository_statistics(&self) -> GitResult<RepositoryStatistics> {
// Count loose objects
let loose = std::fs::read_dir(self.bare_dir.join("objects"))
.map(|d| {
d.filter_map(|e| e.ok())
@@ -83,13 +78,11 @@ impl GitBare {
})
.unwrap_or(0);
// Count packfiles
let pack_dir = self.bare_dir.join("objects").join("pack");
let pack_count = std::fs::read_dir(&pack_dir)
.map(|d| d.filter_map(|e| e.ok()).count() as u64)
.unwrap_or(0);
// Check commit-graph
let cg_size = std::fs::metadata(
self.bare_dir
.join("objects")
-2
View File
@@ -67,7 +67,6 @@ impl GitBare {
let mut results = Vec::new();
for line in stdout.lines() {
// Format: path:line:col:matched_text
if let Some((path_and_rest, matched)) = line.rsplit_once(':') {
let prefix_parts: Vec<&str> = path_and_rest.rsplitn(3, ':').collect();
if prefix_parts.len() >= 3
@@ -144,7 +143,6 @@ impl GitBare {
continue;
}
// Simple substring/case-insensitive matching for file names
let query = &request.query;
let matched = if query.is_empty() {
true
+60 -29
View File
@@ -32,9 +32,13 @@ pub fn validate_oid_hex(hex: &str) -> GitResult<()> {
if hex.is_empty() {
return Err(GitError::InvalidArgument("oid hex cannot be empty".into()));
}
if !(4..=64).contains(&hex.len()) {
if !(crate::config::MIN_OID_HEX_LENGTH..=crate::config::MAX_OID_HEX_LENGTH)
.contains(&hex.len())
{
return Err(GitError::InvalidArgument(format!(
"oid hex length must be 4..=64 chars: {}",
"oid hex length must be {}..={} chars: {}",
crate::config::MIN_OID_HEX_LENGTH,
crate::config::MAX_OID_HEX_LENGTH,
hex.len()
)));
}
@@ -75,9 +79,10 @@ pub fn validate_ref_name(name: &str) -> GitResult<()> {
"ref name contains forbidden character: {name}"
)));
}
if name.len() > 255 {
if name.len() > crate::config::MAX_REF_NAME_LENGTH {
return Err(GitError::InvalidArgument(format!(
"ref name too long (max 255 chars): {name}"
"ref name too long (max {} chars): {name}",
crate::config::MAX_REF_NAME_LENGTH
)));
}
Ok(())
@@ -91,35 +96,36 @@ pub fn validate_revision(rev: &str) -> GitResult<()> {
if rev.is_empty() {
return Err(GitError::InvalidArgument("revision cannot be empty".into()));
}
if rev.len() > 256 {
if rev.len() > crate::config::MAX_REVISION_LENGTH {
return Err(GitError::InvalidArgument(format!(
"revision too long (max 256 chars): {}",
"revision too long (max {} chars): {}",
crate::config::MAX_REVISION_LENGTH,
rev.len()
)));
}
if rev.chars().all(|c| c.is_ascii_hexdigit()) && rev.len() >= 4 && rev.len() <= 64 {
if rev.chars().all(|c| c.is_ascii_hexdigit())
&& rev.len() >= crate::config::MIN_OID_HEX_LENGTH
&& rev.len() <= crate::config::MAX_OID_HEX_LENGTH
{
return Ok(());
}
if rev == "HEAD" {
return Ok(());
}
// Allow ref:refs/heads/... (git internal format)
if let Some(rest) = rev.strip_prefix("ref:") {
return validate_ref_name(rest.trim());
}
const MAX_ANCESTRY_DEPTH: u32 = 10000;
if let Some(tilde_pos) = rev.rfind('~') {
let num_part = &rev[tilde_pos + 1..];
if !num_part.is_empty() && num_part.chars().all(|c| c.is_ascii_digit()) {
let depth: u32 = num_part
.parse()
.map_err(|_| GitError::InvalidArgument("invalid ~N syntax".into()))?;
if depth > MAX_ANCESTRY_DEPTH {
if depth > crate::config::MAX_ANCESTRY_DEPTH {
return Err(GitError::InvalidArgument(format!(
"~N depth too large: {} (max {})",
depth, MAX_ANCESTRY_DEPTH
depth, crate::config::MAX_ANCESTRY_DEPTH
)));
}
}
@@ -140,10 +146,10 @@ pub fn validate_revision(rev: &str) -> GitResult<()> {
let depth: u32 = num_part
.parse()
.map_err(|_| GitError::InvalidArgument("invalid ^N syntax".into()))?;
if depth > MAX_ANCESTRY_DEPTH {
if depth > crate::config::MAX_ANCESTRY_DEPTH {
return Err(GitError::InvalidArgument(format!(
"^N depth too large: {} (max {})",
depth, MAX_ANCESTRY_DEPTH
depth, crate::config::MAX_ANCESTRY_DEPTH
)));
}
}
@@ -204,9 +210,10 @@ pub fn validate_file_path(path: &str) -> GitResult<()> {
"file path cannot contain null byte: {path}"
)));
}
if path.len() > 4096 {
if path.len() > crate::config::MAX_FILE_PATH_LENGTH {
return Err(GitError::InvalidArgument(format!(
"file path too long (max 4096 chars): {path}"
"file path too long (max {} chars): {path}",
crate::config::MAX_FILE_PATH_LENGTH
)));
}
@@ -220,7 +227,6 @@ pub fn validate_file_path(path: &str) -> GitResult<()> {
)));
}
// Windows reserved names check
#[cfg(target_os = "windows")]
{
const RESERVED_NAMES: &[&str] = &[
@@ -307,10 +313,11 @@ pub fn validate_remote_url(url: &str) -> GitResult<()> {
"remote URL cannot be empty".into(),
));
}
if url.len() > 4096 {
return Err(GitError::InvalidArgument(
"remote URL too long (max 4096 chars)".into(),
));
if url.len() > crate::config::MAX_REMOTE_URL_LENGTH {
return Err(GitError::InvalidArgument(format!(
"remote URL too long (max {} chars)",
crate::config::MAX_REMOTE_URL_LENGTH
)));
}
if url.contains('\0') || url.contains('\n') || url.contains('\r') {
return Err(GitError::InvalidArgument(
@@ -343,14 +350,37 @@ pub fn validate_refspec(refspec: &str) -> GitResult<()> {
"refspec contains shell metacharacter: {refspec}"
)));
}
if refspec.len() > 1024 {
return Err(GitError::InvalidArgument(
"refspec too long (max 1024 chars)".into(),
));
if refspec.len() > crate::config::MAX_REFSPEC_LENGTH {
return Err(GitError::InvalidArgument(format!(
"refspec too long (max {} chars)",
crate::config::MAX_REFSPEC_LENGTH
)));
}
Ok(())
}
/// Sanitize git stderr output for logging to prevent leaking sensitive data
/// such as credentials in URLs, absolute filesystem paths, or email addresses.
pub fn sanitize_git_stderr(stderr: &str) -> String {
let mut s = stderr.to_string();
for scheme in &["https://", "http://", "git+ssh://", "ssh://"] {
while let Some(start) = s.find(scheme) {
let after_scheme = start + scheme.len();
if let Some(at_pos) = s[after_scheme..].find('@') {
let at_abs = after_scheme + at_pos;
let replacement = format!("{scheme}***:***@");
s.replace_range(start..=at_abs, &replacement);
} else {
break;
}
}
}
if let Some(homedir) = std::env::var_os("HOME").and_then(|v| v.into_string().ok()) {
s = s.replace(&homedir, "~");
}
s
}
/// Validate a storage-relative path (used in resolve_for_init and from_repository_header).
///
/// Must not contain path traversal, must be a simple relative path.
@@ -370,10 +400,11 @@ pub fn validate_relative_path(path: &str) -> GitResult<()> {
"relative_path cannot contain null byte".into(),
));
}
if path.len() > 4096 {
return Err(GitError::InvalidArgument(
"relative_path too long (max 4096 chars)".into(),
));
if path.len() > crate::config::MAX_RELATIVE_PATH_LENGTH {
return Err(GitError::InvalidArgument(format!(
"relative_path too long (max {} chars)",
crate::config::MAX_RELATIVE_PATH_LENGTH
)));
}
if path.contains("..") {
return Err(GitError::InvalidArgument(format!(
+71 -68
View File
@@ -22,25 +22,13 @@
//! - **TTL** (time-to-live): 10 minutes — hard upper bound for safety
//! - Evictions are tracked via metrics for observability
use std::sync::OnceLock;
use std::time::Duration;
use dashmap::DashMap;
use std::sync::{Arc, OnceLock};
use moka::sync::Cache;
use prost::Message;
/// Maximum total cache weight (key + value allocated bytes): 256 MB.
const CACHE_MAX_WEIGHT: u64 = 256 * 1024 * 1024;
/// Hard time-to-live: entries older than this are unconditionally evicted.
const CACHE_MAX_TTL: Duration = Duration::from_secs(600); // 10 min
/// Time-to-idle: entries not accessed within this window are evicted.
/// Frequently accessed entries survive up to TTL, cold entries expire quickly.
const CACHE_TTI: Duration = Duration::from_secs(120); // 2 min
/// Estimated per-entry overhead (Moka internal Arc + metadata).
/// Added to the weigher result to prevent underestimation.
const ENTRY_OVERHEAD: u32 = 128;
use crate::config::{CACHE_ENTRY_OVERHEAD as ENTRY_OVERHEAD, CACHE_MAX_TTL, CACHE_MAX_WEIGHT, CACHE_TTI};
struct CacheState {
store: Cache<Vec<u8>, Vec<u8>>,
@@ -52,7 +40,6 @@ fn state() -> &'static CacheState {
CACHE.get_or_init(|| {
let store = Cache::builder()
.weigher(|key: &Vec<u8>, value: &Vec<u8>| -> u32 {
// capacity() reflects actual allocation including spare capacity
key.capacity() as u32 + value.capacity() as u32 + ENTRY_OVERHEAD
})
.max_capacity(CACHE_MAX_WEIGHT)
@@ -65,7 +52,6 @@ fn state() -> &'static CacheState {
moka::notification::RemovalCause::Replaced => "replaced",
moka::notification::RemovalCause::Size => "size",
};
// Extract namespace for per-namespace metrics
let namespace = decode_namespace(&key);
crate::metrics::record_cache_eviction(namespace, cause_str);
})
@@ -87,7 +73,27 @@ fn cache() -> &'static Cache<Vec<u8>, Vec<u8>> {
&state().store
}
// Key encoding
struct RepoKeyIndex {
repo_to_keys: DashMap<String, Vec<Arc<Vec<u8>>>>,
}
static REPO_KEY_INDEX: OnceLock<RepoKeyIndex> = OnceLock::new();
fn repo_key_index() -> &'static RepoKeyIndex {
REPO_KEY_INDEX.get_or_init(|| RepoKeyIndex {
repo_to_keys: DashMap::new(),
})
}
fn track_cache_key(repo_path: &str, key: Arc<Vec<u8>>) {
repo_key_index()
.repo_to_keys
.entry(repo_path.to_string())
.or_default()
.push(key);
}
/// Encode a structured cache key.
///
@@ -105,12 +111,15 @@ fn encode_key(namespace: &str, repo_path: &str, request_bytes: &[u8]) -> Option<
return None;
}
let total = 1 + ns.len() + 2 + rp.len() + request_bytes.len();
const SEPARATOR: u8 = 0xFF;
let total = 1 + ns.len() + 1 + 2 + rp.len() + 1 + request_bytes.len();
let mut key = Vec::with_capacity(total);
key.push(ns.len() as u8);
key.extend_from_slice(ns);
key.push(SEPARATOR);
key.extend_from_slice(&(rp.len() as u16).to_le_bytes());
key.extend_from_slice(rp);
key.push(SEPARATOR);
key.extend_from_slice(request_bytes);
Some(key)
}
@@ -125,31 +134,6 @@ fn decode_namespace(key: &[u8]) -> &str {
std::str::from_utf8(&key[1..end]).unwrap_or("unknown")
}
/// Extract the repo_path from a cache key (returns slice into the key).
fn extract_repo_path_bytes(key: &[u8]) -> Option<&[u8]> {
if key.len() < 3 {
return None;
}
let ns_len = key[0] as usize;
let rp_len_offset = 1 + ns_len;
if key.len() < rp_len_offset + 2 {
return None;
}
let rp_len = u16::from_le_bytes([key[rp_len_offset], key[rp_len_offset + 1]]) as usize;
let rp_start = rp_len_offset + 2;
let rp_end = rp_start.checked_add(rp_len)?;
if rp_end > key.len() {
return None;
}
Some(&key[rp_start..rp_end])
}
/// Check if a cache key belongs to the given repository.
fn key_matches_repo(key: &[u8], target_repo: &[u8]) -> bool {
extract_repo_path_bytes(key).is_some_and(|rp| rp == target_repo)
}
// Single-message cache
/// Cache a single protobuf response.
///
@@ -176,8 +160,10 @@ where
if let Some(bytes) = cache().get(&key)
&& let Ok(response) = Res::decode(bytes.as_slice())
{
let elapsed = std::time::Duration::ZERO; // Moka get is memory-only, effectively instant
let elapsed = std::time::Duration::ZERO;
crate::metrics::record_cache_op("moka", "hit", elapsed);
crate::metrics::record_cache_hit_ns(namespace);
crate::metrics::record_cache_value_size(namespace, bytes.len());
tracing::debug!(
namespace = %namespace,
repo = %repo_path,
@@ -188,6 +174,8 @@ where
return Ok(response);
}
crate::metrics::record_cache_miss_ns(namespace);
tracing::debug!(
namespace = %namespace,
repo = %repo_path,
@@ -208,14 +196,16 @@ where
"failed to encode cache response"
);
} else {
crate::metrics::record_cache_value_size(namespace, bytes.len());
let key_arc = Arc::new(key.clone());
cache().insert(key, bytes);
track_cache_key(repo_path, key_arc);
}
crate::metrics::record_cache_op("moka", "miss", build_elapsed);
Ok(response)
}
// Vec-message cache
/// Cache a `Vec<Item>` protobuf response using length-delimited encoding.
///
@@ -238,13 +228,24 @@ where
return build();
};
// Try cache hit
if let Some(bytes) = cache().get(&key) {
if bytes.len() < 4 {
cache().invalidate(&key);
} else {
let stored_crc = u32::from_le_bytes([bytes[0], bytes[1], bytes[2], bytes[3]]);
let actual_crc = crc32fast::hash(&bytes[4..]);
if stored_crc != actual_crc {
tracing::warn!(
namespace = %namespace,
repo = %repo_path,
"cache entry corrupted (CRC mismatch), invalidating"
);
cache().invalidate(&key);
} else {
let mut items = Vec::new();
let mut remaining = bytes.as_slice();
let mut remaining = &bytes[4..];
let mut valid = true;
// Pre-allocate based on first size hint
if let Ok(first) = Item::decode_length_delimited(&mut remaining) {
items.push(first);
while !remaining.is_empty() {
@@ -262,6 +263,8 @@ where
if valid {
crate::metrics::record_cache_op("moka", "hit", std::time::Duration::ZERO);
crate::metrics::record_cache_hit_ns(namespace);
crate::metrics::record_cache_value_size(namespace, bytes.len());
tracing::debug!(
namespace = %namespace,
repo = %repo_path,
@@ -276,10 +279,12 @@ where
repo = %repo_path,
"vec cache decode failed, rebuilding"
);
// Invalidate the corrupt entry
cache().invalidate(&key);
}
}
}
crate::metrics::record_cache_miss_ns(namespace);
tracing::debug!(
namespace = %namespace,
repo = %repo_path,
@@ -290,15 +295,14 @@ where
let response = build()?;
let build_elapsed = start.elapsed();
// Encode all items into a single buffer with length-delimited framing
let total_est: usize = response
.iter()
.map(|item| item.encoded_len() + 10) // 10 = prost length-delimited overhead
.map(|item| item.encoded_len() + 10)
.sum();
let mut bytes = Vec::with_capacity(total_est);
let mut data = Vec::with_capacity(total_est);
let mut encode_ok = true;
for item in &response {
if let Err(err) = item.encode_length_delimited(&mut bytes) {
if let Err(err) = item.encode_length_delimited(&mut data) {
tracing::warn!(
namespace = %namespace,
repo = %repo_path,
@@ -311,13 +315,19 @@ where
}
if encode_ok {
let crc = crc32fast::hash(&data);
let mut bytes = Vec::with_capacity(4 + data.len());
bytes.extend_from_slice(&crc.to_le_bytes());
bytes.extend_from_slice(&data);
crate::metrics::record_cache_value_size(namespace, bytes.len());
let key_arc = Arc::new(key.clone());
cache().insert(key, bytes);
track_cache_key(repo_path, key_arc);
}
crate::metrics::record_cache_op("moka", "miss", build_elapsed);
Ok(response)
}
// Request encoding helpers
/// Encode a protobuf request into a byte vector.
#[inline]
@@ -329,7 +339,6 @@ fn encode_request<Req: Message>(request: &Req) -> Vec<u8> {
buf
}
// Repository-scoped invalidation
/// Invalidate all cache entries for a specific repository.
///
@@ -341,17 +350,11 @@ fn encode_request<Req: Message>(request: &Req) -> Vec<u8> {
/// create branch, etc.) to prevent serving stale data.
pub(crate) fn invalidate_repo(relative_path: &str) {
let c = cache();
let target = relative_path.as_bytes();
let mut keys_to_remove: Vec<std::sync::Arc<Vec<u8>>> = Vec::with_capacity(64);
let idx = repo_key_index();
for (key, _value) in c.iter() {
if key_matches_repo(&key, target) {
keys_to_remove.push(key);
}
}
let removed = keys_to_remove.len();
for key in &keys_to_remove {
if let Some((_key, keys)) = idx.repo_to_keys.remove(relative_path) {
let removed = keys.len();
for key in &keys {
c.invalidate(key.as_ref());
}
@@ -359,12 +362,12 @@ pub(crate) fn invalidate_repo(relative_path: &str) {
tracing::debug!(
relative_path = %relative_path,
entries_removed = removed,
"cache invalidated for repository"
"cache invalidated for repository (indexed)"
);
}
}
}
// Selector helpers
use crate::pb::{ObjectSelector, object_selector};
+61 -25
View File
@@ -30,6 +30,8 @@ use gix::discover::is_git;
use std::path::{Path, PathBuf};
use tokio_stream::wrappers::ReceiverStream;
use tonic::codec::CompressionEncoding;
use crate::bare::GitBare;
use crate::error::{GitError, GitResult};
use crate::pb::{
@@ -151,25 +153,20 @@ impl GitksService {
if relative_path.is_empty() {
return Err(tonic::Status::invalid_argument("relative_path is required"));
}
// Validate early to reject '..' and other traversal patterns
crate::sanitize::validate_relative_path(relative_path)
.map_err(|e| tonic::Status::invalid_argument(e.to_string()))?;
let candidate = self.repo_prefix.join(relative_path);
// Canonicalize repo_prefix (which should exist) for a reliable check
let prefix_canon = self
.repo_prefix
.canonicalize()
.unwrap_or_else(|_| self.repo_prefix.clone());
// Unified path validation to avoid TOCTOU
let canonical = match candidate.canonicalize() {
Ok(canon) => {
// Path exists and was canonicalized
canon
}
Err(_) => {
// Path doesn't exist yet — validate via parent
let parent = candidate.parent().unwrap_or(&self.repo_prefix);
let filename = candidate.file_name().ok_or_else(|| {
tonic::Status::invalid_argument("invalid path: missing filename")
@@ -180,7 +177,6 @@ impl GitksService {
.unwrap_or_else(|_| parent.to_path_buf());
let constructed = parent_canon.join(filename);
// String-level verification for non-existent paths
let constructed_str = constructed.to_string_lossy();
let prefix_str = prefix_canon.to_string_lossy();
@@ -194,13 +190,19 @@ impl GitksService {
}
};
// Final check: canonical must be under prefix
if !canonical.starts_with(&prefix_canon) {
return Err(tonic::Status::invalid_argument(
"path traversal detected: relative_path escapes repo prefix",
));
}
let double_canon = canonical.canonicalize().unwrap_or_else(|_| canonical.clone());
if canonical != double_canon {
return Err(tonic::Status::invalid_argument(
"path resolved to different target (possible symlink race)",
));
}
Ok(canonical)
}
@@ -211,10 +213,8 @@ impl GitksService {
_old_oid: &str,
_new_oid: &str,
) {
// Invalidate moka caches
crate::server::cache::invalidate_repo(relative_path);
// Invalidate disk cache
if let Some(ref pc) = self.pack_cache {
pc.invalidate_repo(relative_path);
}
@@ -328,10 +328,8 @@ pub(crate) fn git_cmd(gb: &GitBare, args: &[&str]) -> GitResult<std::process::Ou
let elapsed = start.elapsed();
let elapsed_ms = elapsed.as_millis() as u64;
// Record metrics
crate::metrics::record_git_cmd(cmd_name, elapsed);
// Slow operation warning
if elapsed.as_secs() >= 1 {
tracing::warn!(
repo = %gb.bare_dir.display(),
@@ -343,11 +341,12 @@ pub(crate) fn git_cmd(gb: &GitBare, args: &[&str]) -> GitResult<std::process::Ou
if !result.status.success() {
let stderr_str = String::from_utf8_lossy(&result.stderr);
let sanitized = crate::sanitize::sanitize_git_stderr(stderr_str.trim());
tracing::warn!(
repo = %gb.bare_dir.display(),
command = cmd_name,
status = ?result.status.code(),
stderr = %stderr_str.trim(),
stderr = %sanitized,
elapsed_ms,
"git subprocess exited with non-zero status"
);
@@ -386,6 +385,19 @@ fn structured_git_error(stderr: &str, code: Option<i32>) -> GitError {
pub async fn serve(
addr: std::net::SocketAddr,
svc: GitksService,
) -> Result<(), tonic::transport::Error> {
serve_with_shutdown(addr, svc, std::future::pending()).await
}
/// Start the gRPC server and block until the shutdown signal fires.
///
/// The `shutdown` future should resolve when the process should stop
/// (e.g. on SIGTERM/SIGINT). All in-flight requests are drained before
/// the server returns.
pub async fn serve_with_shutdown(
addr: std::net::SocketAddr,
svc: GitksService,
shutdown: impl std::future::Future<Output = ()>,
) -> Result<(), tonic::transport::Error> {
let span = tracing::info_span!("gitks.server", %addr);
let _enter = span.enter();
@@ -393,18 +405,42 @@ pub async fn serve(
let (health_reporter, health_service) = tonic_health::server::health_reporter();
let repo_svc = repository_service_server::RepositoryServiceServer::new(svc.clone());
let archive_svc = archive_service_server::ArchiveServiceServer::new(svc.clone());
let blame_svc = blame_service_server::BlameServiceServer::new(svc.clone());
let branch_svc = branch_service_server::BranchServiceServer::new(svc.clone());
let commit_svc = commit_service_server::CommitServiceServer::new(svc.clone());
let diff_svc = diff_service_server::DiffServiceServer::new(svc.clone());
let merge_svc = merge_service_server::MergeServiceServer::new(svc.clone());
let pack_svc = pack_service_server::PackServiceServer::new(svc.clone());
let ref_svc = ref_service_server::RefServiceServer::new(svc.clone());
let remote_svc = remote_service_server::RemoteServiceServer::new(svc.clone());
let tag_svc = tag_service_server::TagServiceServer::new(svc.clone());
let tree_svc = tree_service_server::TreeServiceServer::new(svc);
let repo_svc = repository_service_server::RepositoryServiceServer::new(svc.clone())
.send_compressed(CompressionEncoding::Gzip)
.accept_compressed(CompressionEncoding::Gzip);
let archive_svc = archive_service_server::ArchiveServiceServer::new(svc.clone())
.send_compressed(CompressionEncoding::Gzip)
.accept_compressed(CompressionEncoding::Gzip);
let blame_svc = blame_service_server::BlameServiceServer::new(svc.clone())
.send_compressed(CompressionEncoding::Gzip)
.accept_compressed(CompressionEncoding::Gzip);
let branch_svc = branch_service_server::BranchServiceServer::new(svc.clone())
.send_compressed(CompressionEncoding::Gzip)
.accept_compressed(CompressionEncoding::Gzip);
let commit_svc = commit_service_server::CommitServiceServer::new(svc.clone())
.send_compressed(CompressionEncoding::Gzip)
.accept_compressed(CompressionEncoding::Gzip);
let diff_svc = diff_service_server::DiffServiceServer::new(svc.clone())
.send_compressed(CompressionEncoding::Gzip)
.accept_compressed(CompressionEncoding::Gzip);
let merge_svc = merge_service_server::MergeServiceServer::new(svc.clone())
.send_compressed(CompressionEncoding::Gzip)
.accept_compressed(CompressionEncoding::Gzip);
let pack_svc = pack_service_server::PackServiceServer::new(svc.clone())
.send_compressed(CompressionEncoding::Gzip)
.accept_compressed(CompressionEncoding::Gzip);
let ref_svc = ref_service_server::RefServiceServer::new(svc.clone())
.send_compressed(CompressionEncoding::Gzip)
.accept_compressed(CompressionEncoding::Gzip);
let remote_svc = remote_service_server::RemoteServiceServer::new(svc.clone())
.send_compressed(CompressionEncoding::Gzip)
.accept_compressed(CompressionEncoding::Gzip);
let tag_svc = tag_service_server::TagServiceServer::new(svc.clone())
.send_compressed(CompressionEncoding::Gzip)
.accept_compressed(CompressionEncoding::Gzip);
let tree_svc = tree_service_server::TreeServiceServer::new(svc)
.send_compressed(CompressionEncoding::Gzip)
.accept_compressed(CompressionEncoding::Gzip);
health_reporter
.set_serving::<repository_service_server::RepositoryServiceServer<GitksService>>()
@@ -458,5 +494,5 @@ pub async fn serve(
.add_service(tag_svc)
.add_service(tree_svc);
tracing::info!("server ready, starting to accept connections");
server.serve(addr).await
server.serve_with_shutdown(addr, shutdown).await
}
-3
View File
@@ -115,7 +115,6 @@ impl pack_service_server::PackService for GitksService {
.upload_pack(tokio_stream::wrappers::ReceiverStream::new(rx))
.await?;
let out = super::bridge_server_stream(resp.into_inner());
// Create a dummy cancel token for the forwarded stream
let cancel_token = tokio_util::sync::CancellationToken::new();
let cancel_guard = cancel_token.drop_guard();
return Ok(tonic::Response::new(
@@ -190,7 +189,6 @@ impl pack_service_server::PackService for GitksService {
.receive_pack(tokio_stream::wrappers::ReceiverStream::new(rx))
.await?;
let out = super::bridge_server_stream(resp.into_inner());
// Create a dummy cancel token for the forwarded stream
let cancel_token = tokio_util::sync::CancellationToken::new();
let cancel_guard = cancel_token.drop_guard();
return Ok(tonic::Response::new(
@@ -310,7 +308,6 @@ impl pack_service_server::PackService for GitksService {
return Ok(tonic::Response::new(ReceiverStream::new(rx)));
}
// Cache miss: execute pack-objects and tee to cache
tracing::info!(%repo, digest = %digest, "pack-objects cache miss");
let stream = gb.pack_objects(inner).await?;
let tee_stream = pc.tee_pack_stream(&digest, stream);
-2
View File
@@ -59,7 +59,6 @@ impl BundleApplicator {
.spawn()
.map_err(|e| format!("spawn git bundle unbundle: {e}"))?;
// Stream file contents to stdin in a background thread
let mut stdin = child.stdin.take().ok_or("no stdin")?;
let file_handle = file;
let writer = std::thread::spawn(move || -> Result<(), String> {
@@ -84,7 +83,6 @@ impl BundleApplicator {
.wait_with_output()
.map_err(|e| format!("wait bundle: {e}"))?;
// Wait for writer thread
let _ = writer.join().map_err(|_| "writer thread panicked")?;
if !output.status.success() {
+218
View File
@@ -519,3 +519,221 @@ async fn test_oid_binary_encoding() {
let hex_from_bytes: String = oid.value.iter().map(|b| format!("{b:02x}")).collect();
assert_eq!(hex_from_bytes, oid.hex);
}
#[test]
fn test_count_commits_head() {
let (_dir, gb) = common::setup_bare_repo();
let resp = gb.count_commits(CountCommitsRequest {
repository: Some(hdr()),
revision: String::new(),
path: String::new(),
since: String::new(),
until: String::new(),
}).unwrap();
assert_eq!(resp.count, 4);
}
#[test]
fn test_count_commits_with_revision() {
let (_dir, gb) = common::setup_bare_repo();
let resp = gb.count_commits(CountCommitsRequest {
repository: Some(hdr()),
revision: "feature".into(),
path: String::new(),
since: String::new(),
until: String::new(),
}).unwrap();
assert_eq!(resp.count, 1);
}
#[test]
fn test_count_commits_with_path() {
let (_dir, gb) = common::setup_bare_repo();
let resp = gb.count_commits(CountCommitsRequest {
repository: Some(hdr()),
revision: "main".into(),
path: "README.md".into(),
since: String::new(),
until: String::new(),
}).unwrap();
assert!(resp.count >= 1);
}
#[test]
fn test_count_diverging_commits() {
let (_dir, gb) = common::setup_bare_repo();
let resp = gb.count_diverging_commits(CountDivergingCommitsRequest {
repository: Some(hdr()),
left: "feature".into(),
right: "main".into(),
}).unwrap();
assert_eq!(resp.left_count, 0);
assert_eq!(resp.right_count, 3);
}
#[test]
fn test_find_commit_by_oid() {
let (_dir, gb) = common::setup_bare_repo();
let oid = common::get_main_oid(&gb);
let commit = gb.find_commit(FindCommitRequest {
repository: Some(hdr()),
revision: common::oid_selector(&oid),
include_stats: false,
}).unwrap();
assert!(!commit.oid.as_ref().unwrap().hex.is_empty());
}
#[test]
fn test_find_commit_by_revision() {
let (_dir, gb) = common::setup_bare_repo();
let commit = gb.find_commit(FindCommitRequest {
repository: Some(hdr()),
revision: common::rev_selector("main"),
include_stats: false,
}).unwrap();
assert!(!commit.oid.as_ref().unwrap().hex.is_empty());
}
#[test]
fn test_find_commit_default_head() {
let (_dir, gb) = common::setup_bare_repo();
let commit = gb.find_commit(FindCommitRequest {
repository: Some(hdr()),
revision: None,
include_stats: false,
}).unwrap();
assert!(!commit.oid.as_ref().unwrap().hex.is_empty());
}
#[test]
fn test_list_commits_by_oid() {
let (_dir, gb) = common::setup_bare_repo();
let oid = common::get_main_oid(&gb);
let oid_bytes = gitks::oid::hex_to_bytes(&oid).unwrap();
let resp = gb.list_commits_by_oid(ListCommitsByOidRequest {
repository: Some(hdr()),
oids: vec![oid_bytes],
include_stats: false,
}).unwrap();
assert_eq!(resp.commits.len(), 1);
}
#[test]
fn test_list_commits_by_oid_empty() {
let (_dir, gb) = common::setup_bare_repo();
let resp = gb.list_commits_by_oid(ListCommitsByOidRequest {
repository: Some(hdr()),
oids: vec![],
include_stats: false,
}).unwrap();
assert!(resp.commits.is_empty());
}
#[test]
fn test_commits_by_message_basic() {
let (_dir, gb) = common::setup_bare_repo();
let resp = gb.commits_by_message(CommitsByMessageRequest {
repository: Some(hdr()),
query: "initial".into(),
revision: String::new(),
limit: 10,
offset: 0,
case_insensitive: false,
}).unwrap();
assert_eq!(resp.commits.len(), 1);
}
#[test]
fn test_commits_by_message_case_insensitive() {
let (_dir, gb) = common::setup_bare_repo();
let resp = gb.commits_by_message(CommitsByMessageRequest {
repository: Some(hdr()),
query: "INITIAL".into(),
revision: String::new(),
limit: 10,
offset: 0,
case_insensitive: true,
}).unwrap();
assert_eq!(resp.commits.len(), 1);
}
#[test]
fn test_commits_by_message_no_match() {
let (_dir, gb) = common::setup_bare_repo();
let resp = gb.commits_by_message(CommitsByMessageRequest {
repository: Some(hdr()),
query: "zzzznonexistent".into(),
revision: String::new(),
limit: 10,
offset: 0,
case_insensitive: false,
}).unwrap();
assert!(resp.commits.is_empty());
}
#[test]
fn test_check_objects_exist() {
let (_dir, gb) = common::setup_bare_repo();
let oid = common::get_main_oid(&gb);
let resp = gb.check_objects_exist(CheckObjectsExistRequest {
repository: Some(hdr()),
revisions: vec![oid.clone(), "HEAD".into(), "nonexistent-branch".into()],
}).unwrap();
assert_eq!(resp.revisions.len(), 3);
assert!(resp.revisions[0].exists);
assert!(resp.revisions[1].exists);
assert!(!resp.revisions[2].exists);
}
#[test]
fn test_get_commit_stats() {
let (_dir, gb) = common::setup_bare_repo();
let oid = common::get_main_oid(&gb);
let stats = gb.get_commit_stats(GetCommitStatsRequest {
repository: Some(hdr()),
revision: common::oid_selector(&oid),
}).unwrap();
assert!(stats.changed_files >= 1);
}
#[test]
fn test_get_commit_stats_default() {
let (_dir, gb) = common::setup_bare_repo();
let stats = gb.get_commit_stats(GetCommitStatsRequest {
repository: Some(hdr()),
revision: None,
}).unwrap();
assert!(stats.changed_files >= 1);
}
#[test]
fn test_last_commit_for_path() {
let (_dir, gb) = common::setup_bare_repo();
let resp = gb.last_commit_for_path(LastCommitForPathRequest {
repository: Some(hdr()),
path: "README.md".into(),
revision: "main".into(),
literal_pathspec: false,
}).unwrap();
assert!(resp.commit.is_some());
assert_eq!(resp.path, "README.md");
}
#[test]
fn test_last_commit_for_path_nonexistent() {
let (_dir, gb) = common::setup_bare_repo();
let resp = gb.last_commit_for_path(LastCommitForPathRequest {
repository: Some(hdr()),
path: "nonexistent.txt".into(),
revision: "main".into(),
literal_pathspec: false,
}).unwrap();
assert!(resp.commit.is_none());
}
+49
View File
@@ -172,3 +172,52 @@ pub fn setup_bare_repo_with_conflict() -> (tempfile::TempDir, GitBare) {
(dir, GitBare::new(bare_dir))
}
#[allow(dead_code)]
pub fn get_oid(gb: &GitBare, rev: &str) -> String {
let output = std::process::Command::new("git")
.args([
"--git-dir",
gb.bare_dir.to_string_lossy().as_ref(),
"rev-parse",
rev,
])
.output()
.expect("git rev-parse");
assert!(output.status.success(), "git rev-parse {rev} failed");
String::from_utf8_lossy(&output.stdout).trim().to_string()
}
#[allow(dead_code)]
pub fn get_main_oid(gb: &GitBare) -> String {
get_oid(gb, "refs/heads/main")
}
#[allow(dead_code)]
pub fn get_feature_oid(gb: &GitBare) -> String {
get_oid(gb, "refs/heads/feature")
}
#[allow(dead_code)]
pub fn oid_selector(hex: &str) -> Option<gitks::pb::ObjectSelector> {
Some(gitks::pb::ObjectSelector {
selector: Some(gitks::pb::object_selector::Selector::Oid(
gitks::pb::Oid {
hex: hex.to_string(),
value: vec![],
format: 0,
},
)),
})
}
#[allow(dead_code)]
pub fn rev_selector(rev: &str) -> Option<gitks::pb::ObjectSelector> {
Some(gitks::pb::ObjectSelector {
selector: Some(gitks::pb::object_selector::Selector::Revision(
gitks::pb::ObjectName {
revision: rev.to_string(),
},
)),
})
}
+76
View File
@@ -266,3 +266,79 @@ async fn test_get_patch() {
.collect();
assert!(combined.contains("diff --git") || combined.contains("@@"));
}
#[test]
fn test_find_changed_paths() {
let (_dir, gb) = common::setup_bare_repo();
let feature_oid = common::get_feature_oid(&gb);
let main_oid = common::get_main_oid(&gb);
let resp = gb.find_changed_paths(FindChangedPathsRequest {
repository: Some(hdr()),
base: feature_oid,
head: main_oid,
paths: vec![],
}).unwrap();
assert!(!resp.paths.is_empty());
}
#[test]
fn test_find_changed_paths_same_ref() {
let (_dir, gb) = common::setup_bare_repo();
let oid = common::get_main_oid(&gb);
let resp = gb.find_changed_paths(FindChangedPathsRequest {
repository: Some(hdr()),
base: oid.clone(),
head: oid,
paths: vec![],
}).unwrap();
assert!(resp.paths.is_empty());
}
#[test]
fn test_raw_diff() {
let (_dir, gb) = common::setup_bare_repo();
let feature_oid = common::get_feature_oid(&gb);
let main_oid = common::get_main_oid(&gb);
let chunks = gb.raw_diff(RawDiffRequest {
repository: Some(hdr()),
base: feature_oid,
head: main_oid,
options: None,
}).unwrap();
assert!(!chunks.is_empty());
let combined: Vec<u8> = chunks.iter().flat_map(|c| c.data.clone()).collect();
let text = String::from_utf8_lossy(&combined);
assert!(text.contains("diff"));
}
#[test]
fn test_raw_patch() {
let (_dir, gb) = common::setup_bare_repo();
let feature_oid = common::get_feature_oid(&gb);
let main_oid = common::get_main_oid(&gb);
let chunks = gb.raw_patch(RawPatchRequest {
repository: Some(hdr()),
base: feature_oid,
head: main_oid,
}).unwrap();
assert!(!chunks.is_empty());
let combined: Vec<u8> = chunks.iter().flat_map(|c| c.data.clone()).collect();
let text = String::from_utf8_lossy(&combined);
assert!(text.contains("From"));
}
#[test]
fn test_get_raw_changes() {
let (_dir, gb) = common::setup_bare_repo();
let feature_oid = common::get_feature_oid(&gb);
let main_oid = common::get_main_oid(&gb);
let resp = gb.get_raw_changes(GetRawChangesRequest {
repository: Some(hdr()),
base: feature_oid,
head: main_oid,
}).unwrap();
assert!(!resp.changes.is_empty());
}
+99
View File
@@ -63,3 +63,102 @@ fn test_list_refs_direct() {
assert_eq!(oid.hex.len(), 40, "SHA-1 hex should be 40 chars");
}
}
#[test]
fn test_write_ref_and_ref_exists() {
let (_dir, gb) = common::setup_bare_repo();
let oid = common::get_main_oid(&gb);
let resp = gb.write_ref(gitks::pb::WriteRefRequest {
repository: Some(hdr()),
ref_name: "refs/heads/test-write".into(),
new_oid: oid,
old_oid: String::new(),
force: false,
}).unwrap();
assert!(resp.ok, "write_ref failed: {}", resp.error);
let exists = gb.ref_exists(gitks::pb::RefExistsRequest {
repository: Some(hdr()),
ref_name: "refs/heads/test-write".into(),
}).unwrap();
assert!(exists.exists);
let not_exists = gb.ref_exists(gitks::pb::RefExistsRequest {
repository: Some(hdr()),
ref_name: "refs/heads/nonexistent".into(),
}).unwrap();
assert!(!not_exists.exists);
}
#[test]
fn test_update_references_batch() {
let (_dir, gb) = common::setup_bare_repo();
let oid = common::get_main_oid(&gb);
let resp = gb.update_references(gitks::pb::UpdateReferencesRequest {
repository: Some(hdr()),
updates: vec![
gitks::pb::RefUpdateEntry {
ref_name: "refs/heads/batch-a".into(),
new_oid: oid.clone(),
old_oid: String::new(),
},
gitks::pb::RefUpdateEntry {
ref_name: "refs/heads/batch-b".into(),
new_oid: oid,
old_oid: String::new(),
},
],
}).unwrap();
assert!(resp.failed_refs.is_empty(), "error: {}", resp.error);
let a = gb.ref_exists(gitks::pb::RefExistsRequest {
repository: Some(hdr()),
ref_name: "refs/heads/batch-a".into(),
}).unwrap();
assert!(a.exists);
}
#[test]
fn test_update_references_empty() {
let (_dir, gb) = common::setup_bare_repo();
let resp = gb.update_references(gitks::pb::UpdateReferencesRequest {
repository: Some(hdr()),
updates: vec![],
}).unwrap();
assert!(resp.failed_refs.is_empty());
}
#[test]
fn test_delete_refs() {
let (_dir, gb) = common::setup_bare_repo();
let oid = common::get_main_oid(&gb);
gb.write_ref(gitks::pb::WriteRefRequest {
repository: Some(hdr()),
ref_name: "refs/heads/to-delete".into(),
new_oid: oid,
old_oid: String::new(),
force: false,
}).unwrap();
let resp = gb.delete_refs(gitks::pb::DeleteRefsRequest {
repository: Some(hdr()),
ref_names: vec!["refs/heads/to-delete".into()],
}).unwrap();
assert!(resp.failed_refs.is_empty());
let exists = gb.ref_exists(gitks::pb::RefExistsRequest {
repository: Some(hdr()),
ref_name: "refs/heads/to-delete".into(),
}).unwrap();
assert!(!exists.exists);
}
#[test]
fn test_find_default_branch_name() {
let (_dir, gb) = common::setup_bare_repo();
let resp = gb.find_default_branch_name().unwrap();
assert_eq!(resp.name, "main");
}
+190
View File
@@ -326,3 +326,193 @@ async fn test_exists_nonexistent_repo() {
.into_inner();
assert!(!result.exists);
}
#[test]
fn test_find_merge_base() {
let (_dir, gb) = common::setup_bare_repo();
let main_oid = common::get_main_oid(&gb);
let feature_oid = common::get_feature_oid(&gb);
let resp = gb.find_merge_base(FindMergeBaseRequest {
repository: Some(header(&gb)),
revisions: vec![
main_oid.as_bytes().to_vec(),
feature_oid.as_bytes().to_vec(),
],
}).unwrap();
assert!(!resp.base_oid.is_empty());
}
#[test]
fn test_find_merge_base_empty() {
let (_dir, gb) = common::setup_bare_repo();
let resp = gb.find_merge_base(FindMergeBaseRequest {
repository: Some(header(&gb)),
revisions: vec![],
}).unwrap();
assert!(resp.base_oid.is_empty());
}
#[test]
fn test_find_merge_base_single() {
let (_dir, gb) = common::setup_bare_repo();
let oid = common::get_main_oid(&gb);
let resp = gb.find_merge_base(FindMergeBaseRequest {
repository: Some(header(&gb)),
revisions: vec![oid.as_bytes().to_vec()],
}).unwrap();
assert!(!resp.base_oid.is_empty());
}
#[test]
fn test_commit_is_ancestor() {
let (_dir, gb) = common::setup_bare_repo();
let feature_oid = common::get_feature_oid(&gb);
let main_oid = common::get_main_oid(&gb);
let resp = gb.commit_is_ancestor(CommitIsAncestorRequest {
repository: Some(header(&gb)),
ancestor_oid: feature_oid,
descendant_oid: main_oid,
}).unwrap();
assert!(resp.is_ancestor);
}
#[test]
fn test_commit_is_ancestor_false() {
let (_dir, gb) = common::setup_bare_repo();
let main_oid = common::get_main_oid(&gb);
let feature_oid = common::get_feature_oid(&gb);
let resp = gb.commit_is_ancestor(CommitIsAncestorRequest {
repository: Some(header(&gb)),
ancestor_oid: main_oid,
descendant_oid: feature_oid,
}).unwrap();
assert!(!resp.is_ancestor);
}
#[test]
fn test_objects_size() {
let (_dir, gb) = common::setup_bare_repo();
let oid = common::get_main_oid(&gb);
let resp = gb.objects_size(ObjectsSizeRequest {
repository: Some(header(&gb)),
oids: vec![oid.clone(), "0000000000000000000000000000000000000000".into()],
}).unwrap();
assert_eq!(resp.sizes.len(), 2);
assert!(resp.sizes[0].found);
assert!(resp.sizes[0].size > 0);
}
#[test]
fn test_objects_size_empty() {
let (_dir, gb) = common::setup_bare_repo();
let resp = gb.objects_size(ObjectsSizeRequest {
repository: Some(header(&gb)),
oids: vec![],
}).unwrap();
assert!(resp.sizes.is_empty());
}
#[test]
fn test_repository_size() {
let (_dir, gb) = common::setup_bare_repo();
let resp = gb.repository_size().unwrap();
assert!(resp.size_bytes > 0);
}
#[test]
fn test_find_license_no_license() {
let (_dir, gb) = common::setup_bare_repo();
let resp = gb.find_license().unwrap();
assert!(resp.license_spdx.is_empty());
}
#[test]
fn test_optimize_repository_heuristic() {
let (_dir, gb) = common::setup_bare_repo();
let resp = gb.optimize_repository(OptimizeRepositoryRequest {
repository: Some(header(&gb)),
strategy: OptimizeStrategy::Heuristic as i32,
}).unwrap();
assert!(resp.ok);
}
#[test]
fn test_optimize_repository_incremental() {
let (_dir, gb) = common::setup_bare_repo();
let resp = gb.optimize_repository(OptimizeRepositoryRequest {
repository: Some(header(&gb)),
strategy: OptimizeStrategy::Incremental as i32,
}).unwrap();
assert!(resp.ok);
}
#[test]
fn test_search_files_by_content() {
let (_dir, gb) = common::setup_bare_repo();
let resp = gb.search_files_by_content(SearchFilesByContentRequest {
repository: Some(header(&gb)),
query: "Test".into(),
revision: "main".into(),
max_results: 10,
case_sensitive: true,
}).unwrap();
assert!(!resp.results.is_empty());
}
#[test]
fn test_search_files_by_content_no_match() {
let (_dir, gb) = common::setup_bare_repo();
let resp = gb.search_files_by_content(SearchFilesByContentRequest {
repository: Some(header(&gb)),
query: "zzzznonexistentzzzz".into(),
revision: "main".into(),
max_results: 10,
case_sensitive: true,
}).unwrap();
assert!(resp.results.is_empty());
}
#[test]
fn test_search_files_by_content_empty_query() {
let (_dir, gb) = common::setup_bare_repo();
let resp = gb.search_files_by_content(SearchFilesByContentRequest {
repository: Some(header(&gb)),
query: String::new(),
revision: "main".into(),
max_results: 10,
case_sensitive: true,
});
assert!(resp.is_err());
}
#[test]
fn test_search_files_by_name() {
let (_dir, gb) = common::setup_bare_repo();
let resp = gb.search_files_by_name(SearchFilesByNameRequest {
repository: Some(header(&gb)),
query: "README".into(),
revision: "main".into(),
max_results: 10,
recursive: true,
}).unwrap();
assert!(!resp.results.is_empty());
assert!(resp.results.iter().any(|r| r.path.contains("README")));
}
#[test]
fn test_search_files_by_name_no_match() {
let (_dir, gb) = common::setup_bare_repo();
let resp = gb.search_files_by_name(SearchFilesByNameRequest {
repository: Some(header(&gb)),
query: "zzzznonexistentzzzz".into(),
revision: "main".into(),
max_results: 10,
recursive: true,
}).unwrap();
assert!(resp.results.is_empty());
}
+182 -5
View File
@@ -1,6 +1,5 @@
use gitks::sanitize::*;
// ==================== validate_ref_name tests ====================
#[test]
fn test_validate_ref_name_accepts_valid_names() {
@@ -69,7 +68,6 @@ fn test_validate_ref_name_rejects_too_long() {
assert!(validate_ref_name(&max_valid_name).is_ok());
}
// ==================== validate_revision tests ====================
#[test]
fn test_validate_revision_accepts_empty() {
@@ -149,7 +147,6 @@ fn test_validate_revision_accepts_valid_branch_names() {
assert!(validate_revision("v1.0.0").is_ok());
}
// ==================== validate_file_path tests ====================
#[test]
fn test_validate_file_path_accepts_valid_paths() {
@@ -216,7 +213,6 @@ fn test_validate_file_path_rejects_windows_reserved_names() {
assert!(validate_file_path("CON.txt").is_err());
}
// ==================== validate_relative_path tests ====================
#[test]
fn test_validate_relative_path_accepts_valid_paths() {
@@ -244,7 +240,6 @@ fn test_validate_relative_path_rejects_traversal() {
assert!(validate_relative_path("path/..").is_err());
}
// ==================== validate_config_key tests ====================
#[test]
fn test_validate_config_key_accepts_safe_keys() {
@@ -281,3 +276,185 @@ fn test_validate_config_key_rejects_invalid_chars() {
assert!(validate_config_key("key$(command)").is_err());
assert!(validate_config_key("key`command`").is_err());
}
/// Ensure no input causes panic in validate_ref_name.
#[test]
fn fuzz_validate_ref_name_no_panic() {
let long_name = "x".repeat(300);
let test_inputs: Vec<&str> = vec![
"",
"\0",
"\0\0\0",
"\x7f",
"\x01\x02\x03",
"~^:?*[]\\ ",
"../../../etc/passwd",
"a/b/c/d/e/f/g/h",
&long_name,
"branch@{upstream}",
"HEAD~99999999999",
"HEAD^99999999999",
"ref:HEAD",
"ref:refs/heads/main",
"; rm -rf /",
"$(echo pwned)",
"`echo pwned`",
"\n\r\t",
];
for input in test_inputs {
let _ = validate_ref_name(input);
}
}
/// Ensure no input causes panic in validate_revision.
#[test]
fn fuzz_validate_revision_no_panic() {
let test_inputs: Vec<&str> = vec![
"",
"HEAD",
"HEAD~0",
"HEAD~99999999",
"HEAD^0",
"HEAD^99999999",
"HEAD^{tree}",
"HEAD^{commit}",
"HEAD^{object}",
"abcdef01",
"abcdef0123456789abcdef0123456789abcdef0123456789abcdef0123456789",
"0000",
"zzzz",
"ref:HEAD",
"ref:refs/heads/main",
"\0",
"branch~abc",
"branch^abc",
"branch~",
"branch^",
"a~10001",
"a^10001",
];
for input in test_inputs {
let _ = validate_revision(input);
}
}
/// Ensure no input causes panic in validate_file_path.
#[test]
fn fuzz_validate_file_path_no_panic() {
let long_path = "x".repeat(5000);
let medium_path = "a".repeat(100);
let test_inputs: Vec<&str> = vec![
"",
"/etc/passwd",
"../escape",
"a/../b",
".git",
".git/config",
"src/.git/HEAD",
"a/b/.git",
"\0",
"\0\0\0",
&long_path,
"path/with\x00null",
"path/with\nnewline",
"normal/path.txt",
&medium_path,
];
for input in test_inputs {
let _ = validate_file_path(input);
}
}
/// Ensure no input causes panic in validate_remote_url.
#[test]
fn fuzz_validate_remote_url_no_panic() {
let long_url = "x".repeat(5000);
let test_inputs: Vec<&str> = vec![
"",
"https://github.com/user/repo",
"http://localhost:3000/repo",
"ssh://git@host/repo",
"git://host/repo",
"git+ssh://git@host/repo",
"file:///etc/passwd",
"ext::sh -c 'rm -rf /'",
"ftp://host/repo",
"https://user:pass@host/repo",
"\0",
"https://host\0injection",
&long_url,
];
for input in test_inputs {
let _ = validate_remote_url(input);
}
}
/// Ensure no input causes panic in validate_oid_hex.
#[test]
fn fuzz_validate_oid_hex_no_panic() {
let long_hex = "x".repeat(65);
let exact_hex = "x".repeat(64);
let test_inputs: Vec<&str> = vec![
"",
"abc",
"abcd",
"0123456789abcdef",
"ZZZZ",
"g000",
"0000000000000000000000000000000000000000",
&long_hex,
&exact_hex,
"\0",
" ",
"\n",
];
for input in test_inputs {
let _ = validate_oid_hex(input);
}
}
/// Ensure no input causes panic in validate_relative_path.
#[test]
fn fuzz_validate_relative_path_no_panic() {
let long_path = "x".repeat(5000);
let test_inputs: Vec<&str> = vec![
"",
"/absolute",
"relative/path",
"../escape",
"path/../escape",
"\0",
&long_path,
".",
"..",
"...",
"a/b/c",
];
for input in test_inputs {
let _ = validate_relative_path(input);
}
}
/// Ensure no input causes panic in validate_refspec.
#[test]
fn fuzz_validate_refspec_no_panic() {
let long_refspec = "x".repeat(2000);
let test_inputs: Vec<&str> = vec![
"",
"+refs/heads/*:refs/heads/*",
"refs/heads/main",
"; rm -rf /",
"$(evil)",
"`evil`",
"| pipe",
"& bg",
"< redirect",
"> redirect",
"\0",
&long_refspec,
];
for input in test_inputs {
let _ = validate_refspec(input);
}
}