refactor(bare): enhance security and performance optimizations

- Remove unnecessary sorting in advertise_refs for deterministic output
- Add path traversal detection and validation in bare_dir construction
- Implement symlink resolution checks to prevent security vulnerabilities
- Refactor cache system with CRC validation and improved metrics
- Integrate repo-specific cache invalidation using indexed keys
- Add comprehensive unit tests for commit operations and diff functionality
- Move configuration constants to centralized config module
- Optimize string operations in disk cache random value generation
- Enhance license detection algorithm with cleaner matching logic
- Streamline argument processing in various git operations
- Update dependencies including crc32fast and flate2 for performance
- Add signal handling capability to tokio runtime configuration
This commit is contained in:
zhenyi
2026-06-12 15:04:12 +08:00
parent e386f44ee2
commit 10a4398e81
41 changed files with 1373 additions and 365 deletions
-15
View File
@@ -7,7 +7,6 @@ use crate::bare::GitBare;
use crate::error::{GitError, GitResult};
use crate::pb::{GetLanguageStatsRequest, GetLanguageStatsResponse, LanguageStat, object_selector};
// Include the generated linguist rules
include!(concat!(env!("OUT_DIR"), "/linguist_generated.rs"));
/// Default max file size for line counting (512 KB).
@@ -17,7 +16,6 @@ const MAX_TREE_WALK_DEPTH: usize = 256;
/// Look up a language by file extension (case-insensitive, includes leading dot).
fn lookup_by_extension(ext: &str) -> Option<(&'static str, &'static str)> {
let ext_lower = ext.to_lowercase();
// Binary search on the sorted EXTENSION_MAP
EXTENSION_MAP
.binary_search_by(|&(e, _, _)| e.cmp(ext_lower.as_str()))
.ok()
@@ -54,13 +52,11 @@ fn detect_language(path: &str, is_binary: bool) -> Option<(&'static str, &'stati
.and_then(|n| n.to_str())
.unwrap_or("");
// Try filename match first (e.g., Makefile, Dockerfile)
if let Some(result) = lookup_by_filename(file_name) {
tracing::debug!(path = %path, lang = result.0, "matched by filename");
return Some(result);
}
// Try extension match
if let Some(ext) = Path::new(path).extension().and_then(|e| e.to_str()) {
let ext_with_dot = format!(".{ext}");
if let Some(result) = lookup_by_extension(&ext_with_dot) {
@@ -72,13 +68,10 @@ fn detect_language(path: &str, is_binary: bool) -> Option<(&'static str, &'stati
tracing::debug!(path = %path, "no extension found");
}
// For binary files with no recognized extension, classify by media type
if is_binary {
// Try extension-based binary classification
if let Some(ext) = Path::new(path).extension().and_then(|e| e.to_str()) {
let ext_lower = format!(".{ext}").to_lowercase();
let media_type = classify_binary_extension(&ext_lower);
// Return as a synthetic language name
return Some((media_type, "data"));
}
return Some(("Binary", "data"));
@@ -146,7 +139,6 @@ impl GitBare {
.try_into_tree()
.map_err(|e| GitError::Gix(e.to_string()))?;
// If path is specified, descend into subdirectory
if !request.path.is_empty() {
crate::sanitize::validate_file_path(&request.path)?;
let entry = tree
@@ -173,7 +165,6 @@ impl GitBare {
};
self.walk_tree(&repo, &tree, &prefix, 0, &mut ctx)?;
// Resolve groups: merge child language stats into parent group
tracing::info!(
total_files,
total_bytes,
@@ -193,13 +184,11 @@ impl GitBare {
entry.file_count = entry.file_count.saturating_add(s.file_count);
entry.bytes = entry.bytes.saturating_add(s.bytes);
entry.lines = entry.lines.saturating_add(s.lines);
// Keep the lang_type from the parent (or first encountered)
if entry.lang_type.is_empty() {
entry.lang_type = s.lang_type;
}
}
// Build response sorted by bytes descending
let mut languages: Vec<LanguageStat> = resolved
.into_iter()
.map(|(language, s)| {
@@ -272,15 +261,12 @@ impl GitBare {
let data = &blob.data;
let size = data.len() as u64;
// Skip empty files
if size == 0 {
continue;
}
// Check if binary (contains null byte)
let is_binary = data.contains(&0);
// Detect language
let Some((lang_name, lang_type)) = detect_language(&path, is_binary) else {
tracing::debug!(path = %path, is_binary, "no language detected");
continue;
@@ -288,7 +274,6 @@ impl GitBare {
let lang_key = lang_name.to_string();
// Count code lines only for non-binary files within size limit
let lines = if !is_binary && size <= u64::from(ctx.max_file_size) {
count_code_lines(data)
} else {