Files
gitks/sanitize.rs
T
zhenyi 10a4398e81 refactor(bare): enhance security and performance optimizations
- Remove unnecessary sorting in advertise_refs for deterministic output
- Add path traversal detection and validation in bare_dir construction
- Implement symlink resolution checks to prevent security vulnerabilities
- Refactor cache system with CRC validation and improved metrics
- Integrate repo-specific cache invalidation using indexed keys
- Add comprehensive unit tests for commit operations and diff functionality
- Move configuration constants to centralized config module
- Optimize string operations in disk cache random value generation
- Enhance license detection algorithm with cleaner matching logic
- Streamline argument processing in various git operations
- Update dependencies including crc32fast and flate2 for performance
- Add signal handling capability to tokio runtime configuration
2026-06-12 15:04:12 +08:00

416 lines
14 KiB
Rust
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
//! Input sanitization for git subprocess arguments.
//!
//! Prevents command injection by validating user-supplied strings before
//! passing them to git commands.
use crate::error::GitError;
use crate::error::GitResult;
/// Characters that are never allowed in git ref names / revision strings.
///
/// Git disallows: space, `~`, `^`, `:`, `?`, `*`, `[`, `\`, and all ASCII
/// control characters (bytes 031 and 127). The control characters are
/// checked separately via `is_ascii_control()`.
const FORBIDDEN_REF_CHARS: &[char] = &['~', '^', ':', '?', '*', '[', '\\', ' '];
/// Returns true if `c` is an ASCII control character (bytes 031, 127).
fn is_ascii_control(c: char) -> bool {
let b = c as u32;
b <= 31 || b == 127
}
/// Validate a git reference name (branch, tag, etc.).
///
/// Git ref rules (from `git check-ref-format`):
/// - Cannot contain forbidden chars
/// - Cannot start or end with '.'
/// - Cannot end with '/'
/// - Cannot contain '..'
/// - Cannot contain '@{'
/// - Cannot be empty
pub fn validate_oid_hex(hex: &str) -> GitResult<()> {
if hex.is_empty() {
return Err(GitError::InvalidArgument("oid hex cannot be empty".into()));
}
if !(crate::config::MIN_OID_HEX_LENGTH..=crate::config::MAX_OID_HEX_LENGTH)
.contains(&hex.len())
{
return Err(GitError::InvalidArgument(format!(
"oid hex length must be {}..={} chars: {}",
crate::config::MIN_OID_HEX_LENGTH,
crate::config::MAX_OID_HEX_LENGTH,
hex.len()
)));
}
if !hex.chars().all(|c| c.is_ascii_hexdigit()) {
return Err(GitError::InvalidArgument(format!(
"oid hex contains non-hex character: {hex}"
)));
}
Ok(())
}
pub fn validate_ref_name(name: &str) -> GitResult<()> {
if name.is_empty() {
return Err(GitError::InvalidArgument("ref name cannot be empty".into()));
}
if name.starts_with('.') || name.ends_with('.') {
return Err(GitError::InvalidArgument(format!(
"ref name cannot start or end with '.': {name}"
)));
}
if name.ends_with('/') {
return Err(GitError::InvalidArgument(format!(
"ref name cannot end with '/': {name}"
)));
}
if name.contains("..") {
return Err(GitError::InvalidArgument(format!(
"ref name cannot contain '..': {name}"
)));
}
if name.contains("@{") {
return Err(GitError::InvalidArgument(format!(
"ref name cannot contain '@{{': {name}"
)));
}
if name.contains(|c: char| FORBIDDEN_REF_CHARS.contains(&c) || is_ascii_control(c)) {
return Err(GitError::InvalidArgument(format!(
"ref name contains forbidden character: {name}"
)));
}
if name.len() > crate::config::MAX_REF_NAME_LENGTH {
return Err(GitError::InvalidArgument(format!(
"ref name too long (max {} chars): {name}",
crate::config::MAX_REF_NAME_LENGTH
)));
}
Ok(())
}
/// Validate a revision string (branch name, tag name, or short expression).
///
/// Allows OID hex strings, ref names, and a small set of revision operators
/// (HEAD, ^{tree}, ~N, ^N) that are safe when passed as a single argument.
pub fn validate_revision(rev: &str) -> GitResult<()> {
if rev.is_empty() {
return Err(GitError::InvalidArgument("revision cannot be empty".into()));
}
if rev.len() > crate::config::MAX_REVISION_LENGTH {
return Err(GitError::InvalidArgument(format!(
"revision too long (max {} chars): {}",
crate::config::MAX_REVISION_LENGTH,
rev.len()
)));
}
if rev.chars().all(|c| c.is_ascii_hexdigit())
&& rev.len() >= crate::config::MIN_OID_HEX_LENGTH
&& rev.len() <= crate::config::MAX_OID_HEX_LENGTH
{
return Ok(());
}
if rev == "HEAD" {
return Ok(());
}
if let Some(rest) = rev.strip_prefix("ref:") {
return validate_ref_name(rest.trim());
}
if let Some(tilde_pos) = rev.rfind('~') {
let num_part = &rev[tilde_pos + 1..];
if !num_part.is_empty() && num_part.chars().all(|c| c.is_ascii_digit()) {
let depth: u32 = num_part
.parse()
.map_err(|_| GitError::InvalidArgument("invalid ~N syntax".into()))?;
if depth > crate::config::MAX_ANCESTRY_DEPTH {
return Err(GitError::InvalidArgument(format!(
"~N depth too large: {} (max {})",
depth, crate::config::MAX_ANCESTRY_DEPTH
)));
}
}
}
if let Some(caret_pos) = rev.rfind('^') {
let after_caret = &rev[caret_pos + 1..];
if !after_caret.starts_with('{')
&& !after_caret.is_empty()
&& let Some(first_char) = after_caret.chars().next()
&& first_char.is_ascii_digit()
{
let num_part: String = after_caret
.chars()
.take_while(|c| c.is_ascii_digit())
.collect();
if !num_part.is_empty() {
let depth: u32 = num_part
.parse()
.map_err(|_| GitError::InvalidArgument("invalid ^N syntax".into()))?;
if depth > crate::config::MAX_ANCESTRY_DEPTH {
return Err(GitError::InvalidArgument(format!(
"^N depth too large: {} (max {})",
depth, crate::config::MAX_ANCESTRY_DEPTH
)));
}
}
}
}
let mut base = rev;
base = base
.trim_end_matches("^{tree}")
.trim_end_matches("^{commit}")
.trim_end_matches("^{object}");
if let Some(tilde_pos) = base.rfind('~') {
let after_tilde = &base[tilde_pos + 1..];
if !after_tilde.is_empty() && after_tilde.chars().all(|c| c.is_ascii_digit()) {
base = &base[..tilde_pos];
}
} else if let Some(caret_pos) = base.rfind('^') {
let after_caret = &base[caret_pos + 1..];
if !after_caret.starts_with('{')
&& !after_caret.is_empty()
&& after_caret.chars().all(|c| c.is_ascii_digit())
{
base = &base[..caret_pos];
}
}
if base.is_empty() {
return Ok(());
}
validate_ref_name(base)?;
Ok(())
}
/// Validate a file path within a commit action.
///
/// Must be a relative path (no leading '/'), no '..' traversal,
/// no null bytes, no .git directory access, and reasonable length.
pub fn validate_file_path(path: &str) -> GitResult<()> {
if path.is_empty() {
return Err(GitError::InvalidArgument(
"file path cannot be empty".into(),
));
}
if path.starts_with('/') {
return Err(GitError::InvalidArgument(format!(
"file path must be relative, not absolute: {path}"
)));
}
if path.contains("..") {
return Err(GitError::InvalidArgument(format!(
"file path cannot contain '..': {path}"
)));
}
if path.contains('\0') {
return Err(GitError::InvalidArgument(format!(
"file path cannot contain null byte: {path}"
)));
}
if path.len() > crate::config::MAX_FILE_PATH_LENGTH {
return Err(GitError::InvalidArgument(format!(
"file path too long (max {} chars): {path}",
crate::config::MAX_FILE_PATH_LENGTH
)));
}
if path == ".git"
|| path.starts_with(".git/")
|| path.contains("/.git/")
|| path.ends_with("/.git")
{
return Err(GitError::InvalidArgument(format!(
"cannot modify .git directory: {path}"
)));
}
#[cfg(target_os = "windows")]
{
const RESERVED_NAMES: &[&str] = &[
"CON", "PRN", "AUX", "NUL", "COM1", "COM2", "COM3", "COM4", "COM5", "COM6", "COM7",
"COM8", "COM9", "LPT1", "LPT2", "LPT3", "LPT4", "LPT5", "LPT6", "LPT7", "LPT8", "LPT9",
];
for component in path.split('/') {
let name_part = component.split('.').next().unwrap_or(component);
let name_upper = name_part.to_uppercase();
if RESERVED_NAMES.contains(&name_upper.as_str()) {
return Err(GitError::InvalidArgument(format!(
"Windows reserved device name: {component}"
)));
}
}
}
Ok(())
}
/// Git config keys that are dangerous to set remotely.
/// Setting these could allow arbitrary command execution or bypass security.
const DANGEROUS_CONFIG_KEYS: &[&str] = &[
"core.sshCommand",
"core.gitProxy",
"http.proxy",
"https.proxy",
"remote.*.url",
"credential.*",
"safe.directory",
"core.hooksPath",
"receive.fsckObjects",
"receive.denyCurrentBranch",
"receive.denyDeleteCurrent",
];
/// Check if a git config key is safe to set remotely.
pub fn validate_config_key(key: &str) -> GitResult<()> {
if key.is_empty() {
return Err(GitError::InvalidArgument(
"config key cannot be empty".into(),
));
}
for pattern in DANGEROUS_CONFIG_KEYS {
if pattern.contains('*') {
// e.g. "remote.*.url" — match any "remote.<something>.url"
if let Some((prefix, suffix)) = pattern.split_once('*')
&& key.starts_with(prefix)
&& key.ends_with(suffix)
{
return Err(GitError::InvalidArgument(format!(
"config key '{key}' matches dangerous pattern '{pattern}'"
)));
}
} else if key == *pattern {
return Err(GitError::InvalidArgument(format!(
"config key '{key}' is not allowed to be set remotely"
)));
}
}
if !key
.chars()
.all(|c| c.is_ascii_alphanumeric() || c == '.' || c == '-' || c == '_')
{
return Err(GitError::InvalidArgument(format!(
"config key contains invalid characters: {key}"
)));
}
Ok(())
}
/// Allowed URL schemes for git remotes.
const ALLOWED_REMOTE_SCHEMES: &[&str] = &["http://", "https://", "ssh://", "git://", "git+ssh://"];
/// Validate a remote URL for git operations.
///
/// Only allows standard transport protocols. Rejects `file://`, `ext::`,
/// and other schemes that could access local resources or execute commands.
pub fn validate_remote_url(url: &str) -> GitResult<()> {
if url.is_empty() {
return Err(GitError::InvalidArgument(
"remote URL cannot be empty".into(),
));
}
if url.len() > crate::config::MAX_REMOTE_URL_LENGTH {
return Err(GitError::InvalidArgument(format!(
"remote URL too long (max {} chars)",
crate::config::MAX_REMOTE_URL_LENGTH
)));
}
if url.contains('\0') || url.contains('\n') || url.contains('\r') {
return Err(GitError::InvalidArgument(
"remote URL contains invalid characters".into(),
));
}
if !ALLOWED_REMOTE_SCHEMES.iter().any(|s| url.starts_with(s)) {
return Err(GitError::InvalidArgument(format!(
"remote URL must start with one of: {}. Got: {url}",
ALLOWED_REMOTE_SCHEMES.join(", ")
)));
}
Ok(())
}
/// Validate a git refspec string (e.g. `+refs/heads/*:refs/heads/*`).
///
/// Refspecs must not contain null bytes, newlines, or shell metacharacters.
pub fn validate_refspec(refspec: &str) -> GitResult<()> {
if refspec.is_empty() {
return Err(GitError::InvalidArgument("refspec cannot be empty".into()));
}
if refspec.contains('\0') || refspec.contains('\n') || refspec.contains('\r') {
return Err(GitError::InvalidArgument(
"refspec contains invalid characters".into(),
));
}
if refspec.contains(['$', '`', '(', ')', '{', '}', '|', ';', '&', '<', '>']) {
return Err(GitError::InvalidArgument(format!(
"refspec contains shell metacharacter: {refspec}"
)));
}
if refspec.len() > crate::config::MAX_REFSPEC_LENGTH {
return Err(GitError::InvalidArgument(format!(
"refspec too long (max {} chars)",
crate::config::MAX_REFSPEC_LENGTH
)));
}
Ok(())
}
/// Sanitize git stderr output for logging to prevent leaking sensitive data
/// such as credentials in URLs, absolute filesystem paths, or email addresses.
pub fn sanitize_git_stderr(stderr: &str) -> String {
let mut s = stderr.to_string();
for scheme in &["https://", "http://", "git+ssh://", "ssh://"] {
while let Some(start) = s.find(scheme) {
let after_scheme = start + scheme.len();
if let Some(at_pos) = s[after_scheme..].find('@') {
let at_abs = after_scheme + at_pos;
let replacement = format!("{scheme}***:***@");
s.replace_range(start..=at_abs, &replacement);
} else {
break;
}
}
}
if let Some(homedir) = std::env::var_os("HOME").and_then(|v| v.into_string().ok()) {
s = s.replace(&homedir, "~");
}
s
}
/// Validate a storage-relative path (used in resolve_for_init and from_repository_header).
///
/// Must not contain path traversal, must be a simple relative path.
pub fn validate_relative_path(path: &str) -> GitResult<()> {
if path.is_empty() {
return Err(GitError::InvalidArgument(
"relative_path cannot be empty".into(),
));
}
if path.starts_with('/') {
return Err(GitError::InvalidArgument(
"relative_path must be relative, not absolute".into(),
));
}
if path.contains('\0') {
return Err(GitError::InvalidArgument(
"relative_path cannot contain null byte".into(),
));
}
if path.len() > crate::config::MAX_RELATIVE_PATH_LENGTH {
return Err(GitError::InvalidArgument(format!(
"relative_path too long (max {} chars)",
crate::config::MAX_RELATIVE_PATH_LENGTH
)));
}
if path.contains("..") {
return Err(GitError::InvalidArgument(format!(
"path traversal detected: relative_path contains '..': {path}"
)));
}
Ok(())
}