refactor(bare): enhance security and performance optimizations
- Remove unnecessary sorting in advertise_refs for deterministic output - Add path traversal detection and validation in bare_dir construction - Implement symlink resolution checks to prevent security vulnerabilities - Refactor cache system with CRC validation and improved metrics - Integrate repo-specific cache invalidation using indexed keys - Add comprehensive unit tests for commit operations and diff functionality - Move configuration constants to centralized config module - Optimize string operations in disk cache random value generation - Enhance license detection algorithm with cleaner matching logic - Streamline argument processing in various git operations - Update dependencies including crc32fast and flate2 for performance - Add signal handling capability to tokio runtime configuration
This commit is contained in:
@@ -57,57 +57,46 @@ impl GitBare {
|
||||
fn detect_license(content: &str) -> (&'static str, &'static str, f64) {
|
||||
let lower = content.to_lowercase();
|
||||
|
||||
// MIT
|
||||
if lower.contains("permission is hereby granted, free of charge") && lower.contains("mit") {
|
||||
return ("MIT", "MIT License", 0.95);
|
||||
}
|
||||
|
||||
// Apache 2.0
|
||||
if lower.contains("apache license, version 2.0") || lower.contains("apache-2.0") {
|
||||
return ("Apache-2.0", "Apache License 2.0", 0.95);
|
||||
}
|
||||
|
||||
// GPL 3.0
|
||||
if lower.contains("gnu general public license") && lower.contains("version 3") {
|
||||
return ("GPL-3.0", "GNU General Public License v3.0", 0.90);
|
||||
}
|
||||
// GPL 2.0
|
||||
if lower.contains("gnu general public license") && lower.contains("version 2") {
|
||||
return ("GPL-2.0", "GNU General Public License v2.0", 0.90);
|
||||
}
|
||||
|
||||
// BSD 3
|
||||
if lower.contains("redistribution and use in source and binary forms")
|
||||
&& lower.contains("neither the name of")
|
||||
{
|
||||
return ("BSD-3-Clause", "BSD 3-Clause License", 0.85);
|
||||
}
|
||||
// BSD 2
|
||||
if lower.contains("redistribution and use in source and binary forms") {
|
||||
return ("BSD-2-Clause", "BSD 2-Clause License", 0.80);
|
||||
}
|
||||
|
||||
// AGPL
|
||||
if lower.contains("gnu affero general public license") {
|
||||
return ("AGPL-3.0", "GNU Affero General Public License v3.0", 0.90);
|
||||
}
|
||||
|
||||
// LGPL
|
||||
if lower.contains("gnu lesser general public license") {
|
||||
return ("LGPL-3.0", "GNU Lesser General Public License v3.0", 0.85);
|
||||
}
|
||||
|
||||
// MPL
|
||||
if lower.contains("mozilla public license") {
|
||||
return ("MPL-2.0", "Mozilla Public License 2.0", 0.90);
|
||||
}
|
||||
|
||||
// Unlicense
|
||||
if lower.contains("this is free and unencumbered software released into the public domain") {
|
||||
return ("Unlicense", "The Unlicense", 0.95);
|
||||
}
|
||||
|
||||
// ISC
|
||||
if lower.contains("permission to use, copy, modify, and/or distribute") && lower.contains("isc")
|
||||
{
|
||||
return ("ISC", "ISC License", 0.80);
|
||||
|
||||
@@ -7,7 +7,6 @@ use crate::bare::GitBare;
|
||||
use crate::error::{GitError, GitResult};
|
||||
use crate::pb::{GetLanguageStatsRequest, GetLanguageStatsResponse, LanguageStat, object_selector};
|
||||
|
||||
// Include the generated linguist rules
|
||||
include!(concat!(env!("OUT_DIR"), "/linguist_generated.rs"));
|
||||
|
||||
/// Default max file size for line counting (512 KB).
|
||||
@@ -17,7 +16,6 @@ const MAX_TREE_WALK_DEPTH: usize = 256;
|
||||
/// Look up a language by file extension (case-insensitive, includes leading dot).
|
||||
fn lookup_by_extension(ext: &str) -> Option<(&'static str, &'static str)> {
|
||||
let ext_lower = ext.to_lowercase();
|
||||
// Binary search on the sorted EXTENSION_MAP
|
||||
EXTENSION_MAP
|
||||
.binary_search_by(|&(e, _, _)| e.cmp(ext_lower.as_str()))
|
||||
.ok()
|
||||
@@ -54,13 +52,11 @@ fn detect_language(path: &str, is_binary: bool) -> Option<(&'static str, &'stati
|
||||
.and_then(|n| n.to_str())
|
||||
.unwrap_or("");
|
||||
|
||||
// Try filename match first (e.g., Makefile, Dockerfile)
|
||||
if let Some(result) = lookup_by_filename(file_name) {
|
||||
tracing::debug!(path = %path, lang = result.0, "matched by filename");
|
||||
return Some(result);
|
||||
}
|
||||
|
||||
// Try extension match
|
||||
if let Some(ext) = Path::new(path).extension().and_then(|e| e.to_str()) {
|
||||
let ext_with_dot = format!(".{ext}");
|
||||
if let Some(result) = lookup_by_extension(&ext_with_dot) {
|
||||
@@ -72,13 +68,10 @@ fn detect_language(path: &str, is_binary: bool) -> Option<(&'static str, &'stati
|
||||
tracing::debug!(path = %path, "no extension found");
|
||||
}
|
||||
|
||||
// For binary files with no recognized extension, classify by media type
|
||||
if is_binary {
|
||||
// Try extension-based binary classification
|
||||
if let Some(ext) = Path::new(path).extension().and_then(|e| e.to_str()) {
|
||||
let ext_lower = format!(".{ext}").to_lowercase();
|
||||
let media_type = classify_binary_extension(&ext_lower);
|
||||
// Return as a synthetic language name
|
||||
return Some((media_type, "data"));
|
||||
}
|
||||
return Some(("Binary", "data"));
|
||||
@@ -146,7 +139,6 @@ impl GitBare {
|
||||
.try_into_tree()
|
||||
.map_err(|e| GitError::Gix(e.to_string()))?;
|
||||
|
||||
// If path is specified, descend into subdirectory
|
||||
if !request.path.is_empty() {
|
||||
crate::sanitize::validate_file_path(&request.path)?;
|
||||
let entry = tree
|
||||
@@ -173,7 +165,6 @@ impl GitBare {
|
||||
};
|
||||
self.walk_tree(&repo, &tree, &prefix, 0, &mut ctx)?;
|
||||
|
||||
// Resolve groups: merge child language stats into parent group
|
||||
tracing::info!(
|
||||
total_files,
|
||||
total_bytes,
|
||||
@@ -193,13 +184,11 @@ impl GitBare {
|
||||
entry.file_count = entry.file_count.saturating_add(s.file_count);
|
||||
entry.bytes = entry.bytes.saturating_add(s.bytes);
|
||||
entry.lines = entry.lines.saturating_add(s.lines);
|
||||
// Keep the lang_type from the parent (or first encountered)
|
||||
if entry.lang_type.is_empty() {
|
||||
entry.lang_type = s.lang_type;
|
||||
}
|
||||
}
|
||||
|
||||
// Build response sorted by bytes descending
|
||||
let mut languages: Vec<LanguageStat> = resolved
|
||||
.into_iter()
|
||||
.map(|(language, s)| {
|
||||
@@ -272,15 +261,12 @@ impl GitBare {
|
||||
let data = &blob.data;
|
||||
let size = data.len() as u64;
|
||||
|
||||
// Skip empty files
|
||||
if size == 0 {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Check if binary (contains null byte)
|
||||
let is_binary = data.contains(&0);
|
||||
|
||||
// Detect language
|
||||
let Some((lang_name, lang_type)) = detect_language(&path, is_binary) else {
|
||||
tracing::debug!(path = %path, is_binary, "no language detected");
|
||||
continue;
|
||||
@@ -288,7 +274,6 @@ impl GitBare {
|
||||
|
||||
let lang_key = lang_name.to_string();
|
||||
|
||||
// Count code lines only for non-binary files within size limit
|
||||
let lines = if !is_binary && size <= u64::from(ctx.max_file_size) {
|
||||
count_code_lines(data)
|
||||
} else {
|
||||
|
||||
@@ -18,7 +18,6 @@ impl GitBare {
|
||||
OptimizeStrategy::Heuristic | OptimizeStrategy::Aggressive => {
|
||||
let stats = self.get_repository_statistics()?;
|
||||
|
||||
// Run commit-graph write if needed
|
||||
if (stats.commit_graph_size_bytes == 0 || strategy == OptimizeStrategy::Aggressive)
|
||||
&& let Ok(resp) = write_commit_graph(self, false, false)
|
||||
{
|
||||
@@ -28,7 +27,6 @@ impl GitBare {
|
||||
stdout_all.push_str(&resp.stdout);
|
||||
}
|
||||
|
||||
// Repack if many loose objects or packfiles
|
||||
let repack_needed = stats.loose_object_count > 1000 || stats.packfile_count > 10;
|
||||
|
||||
if repack_needed || strategy == OptimizeStrategy::Aggressive {
|
||||
@@ -41,7 +39,6 @@ impl GitBare {
|
||||
}
|
||||
}
|
||||
|
||||
// Prune if aggressive
|
||||
if strategy == OptimizeStrategy::Aggressive
|
||||
&& let Ok(resp) = run_gc(self, true, true)
|
||||
{
|
||||
@@ -52,7 +49,6 @@ impl GitBare {
|
||||
}
|
||||
}
|
||||
OptimizeStrategy::Incremental => {
|
||||
// Just run commit-graph write incrementally
|
||||
if let Ok(resp) = write_commit_graph(self, false, false) {
|
||||
if !resp.ok {
|
||||
stderr_all.push_str(&resp.stderr);
|
||||
@@ -71,7 +67,6 @@ impl GitBare {
|
||||
}
|
||||
|
||||
fn get_repository_statistics(&self) -> GitResult<RepositoryStatistics> {
|
||||
// Count loose objects
|
||||
let loose = std::fs::read_dir(self.bare_dir.join("objects"))
|
||||
.map(|d| {
|
||||
d.filter_map(|e| e.ok())
|
||||
@@ -83,13 +78,11 @@ impl GitBare {
|
||||
})
|
||||
.unwrap_or(0);
|
||||
|
||||
// Count packfiles
|
||||
let pack_dir = self.bare_dir.join("objects").join("pack");
|
||||
let pack_count = std::fs::read_dir(&pack_dir)
|
||||
.map(|d| d.filter_map(|e| e.ok()).count() as u64)
|
||||
.unwrap_or(0);
|
||||
|
||||
// Check commit-graph
|
||||
let cg_size = std::fs::metadata(
|
||||
self.bare_dir
|
||||
.join("objects")
|
||||
|
||||
@@ -67,7 +67,6 @@ impl GitBare {
|
||||
let mut results = Vec::new();
|
||||
|
||||
for line in stdout.lines() {
|
||||
// Format: path:line:col:matched_text
|
||||
if let Some((path_and_rest, matched)) = line.rsplit_once(':') {
|
||||
let prefix_parts: Vec<&str> = path_and_rest.rsplitn(3, ':').collect();
|
||||
if prefix_parts.len() >= 3
|
||||
@@ -144,7 +143,6 @@ impl GitBare {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Simple substring/case-insensitive matching for file names
|
||||
let query = &request.query;
|
||||
let matched = if query.is_empty() {
|
||||
true
|
||||
|
||||
Reference in New Issue
Block a user