10a4398e81
- Remove unnecessary sorting in advertise_refs for deterministic output - Add path traversal detection and validation in bare_dir construction - Implement symlink resolution checks to prevent security vulnerabilities - Refactor cache system with CRC validation and improved metrics - Integrate repo-specific cache invalidation using indexed keys - Add comprehensive unit tests for commit operations and diff functionality - Move configuration constants to centralized config module - Optimize string operations in disk cache random value generation - Enhance license detection algorithm with cleaner matching logic - Streamline argument processing in various git operations - Update dependencies including crc32fast and flate2 for performance - Add signal handling capability to tokio runtime configuration
416 lines
14 KiB
Rust
416 lines
14 KiB
Rust
//! Input sanitization for git subprocess arguments.
|
||
//!
|
||
//! Prevents command injection by validating user-supplied strings before
|
||
//! passing them to git commands.
|
||
|
||
use crate::error::GitError;
|
||
use crate::error::GitResult;
|
||
|
||
/// Characters that are never allowed in git ref names / revision strings.
|
||
///
|
||
/// Git disallows: space, `~`, `^`, `:`, `?`, `*`, `[`, `\`, and all ASCII
|
||
/// control characters (bytes 0–31 and 127). The control characters are
|
||
/// checked separately via `is_ascii_control()`.
|
||
const FORBIDDEN_REF_CHARS: &[char] = &['~', '^', ':', '?', '*', '[', '\\', ' '];
|
||
|
||
/// Returns true if `c` is an ASCII control character (bytes 0–31, 127).
|
||
fn is_ascii_control(c: char) -> bool {
|
||
let b = c as u32;
|
||
b <= 31 || b == 127
|
||
}
|
||
|
||
/// Validate a git reference name (branch, tag, etc.).
|
||
///
|
||
/// Git ref rules (from `git check-ref-format`):
|
||
/// - Cannot contain forbidden chars
|
||
/// - Cannot start or end with '.'
|
||
/// - Cannot end with '/'
|
||
/// - Cannot contain '..'
|
||
/// - Cannot contain '@{'
|
||
/// - Cannot be empty
|
||
pub fn validate_oid_hex(hex: &str) -> GitResult<()> {
|
||
if hex.is_empty() {
|
||
return Err(GitError::InvalidArgument("oid hex cannot be empty".into()));
|
||
}
|
||
if !(crate::config::MIN_OID_HEX_LENGTH..=crate::config::MAX_OID_HEX_LENGTH)
|
||
.contains(&hex.len())
|
||
{
|
||
return Err(GitError::InvalidArgument(format!(
|
||
"oid hex length must be {}..={} chars: {}",
|
||
crate::config::MIN_OID_HEX_LENGTH,
|
||
crate::config::MAX_OID_HEX_LENGTH,
|
||
hex.len()
|
||
)));
|
||
}
|
||
if !hex.chars().all(|c| c.is_ascii_hexdigit()) {
|
||
return Err(GitError::InvalidArgument(format!(
|
||
"oid hex contains non-hex character: {hex}"
|
||
)));
|
||
}
|
||
Ok(())
|
||
}
|
||
|
||
pub fn validate_ref_name(name: &str) -> GitResult<()> {
|
||
if name.is_empty() {
|
||
return Err(GitError::InvalidArgument("ref name cannot be empty".into()));
|
||
}
|
||
if name.starts_with('.') || name.ends_with('.') {
|
||
return Err(GitError::InvalidArgument(format!(
|
||
"ref name cannot start or end with '.': {name}"
|
||
)));
|
||
}
|
||
if name.ends_with('/') {
|
||
return Err(GitError::InvalidArgument(format!(
|
||
"ref name cannot end with '/': {name}"
|
||
)));
|
||
}
|
||
if name.contains("..") {
|
||
return Err(GitError::InvalidArgument(format!(
|
||
"ref name cannot contain '..': {name}"
|
||
)));
|
||
}
|
||
if name.contains("@{") {
|
||
return Err(GitError::InvalidArgument(format!(
|
||
"ref name cannot contain '@{{': {name}"
|
||
)));
|
||
}
|
||
if name.contains(|c: char| FORBIDDEN_REF_CHARS.contains(&c) || is_ascii_control(c)) {
|
||
return Err(GitError::InvalidArgument(format!(
|
||
"ref name contains forbidden character: {name}"
|
||
)));
|
||
}
|
||
if name.len() > crate::config::MAX_REF_NAME_LENGTH {
|
||
return Err(GitError::InvalidArgument(format!(
|
||
"ref name too long (max {} chars): {name}",
|
||
crate::config::MAX_REF_NAME_LENGTH
|
||
)));
|
||
}
|
||
Ok(())
|
||
}
|
||
|
||
/// Validate a revision string (branch name, tag name, or short expression).
|
||
///
|
||
/// Allows OID hex strings, ref names, and a small set of revision operators
|
||
/// (HEAD, ^{tree}, ~N, ^N) that are safe when passed as a single argument.
|
||
pub fn validate_revision(rev: &str) -> GitResult<()> {
|
||
if rev.is_empty() {
|
||
return Err(GitError::InvalidArgument("revision cannot be empty".into()));
|
||
}
|
||
if rev.len() > crate::config::MAX_REVISION_LENGTH {
|
||
return Err(GitError::InvalidArgument(format!(
|
||
"revision too long (max {} chars): {}",
|
||
crate::config::MAX_REVISION_LENGTH,
|
||
rev.len()
|
||
)));
|
||
}
|
||
if rev.chars().all(|c| c.is_ascii_hexdigit())
|
||
&& rev.len() >= crate::config::MIN_OID_HEX_LENGTH
|
||
&& rev.len() <= crate::config::MAX_OID_HEX_LENGTH
|
||
{
|
||
return Ok(());
|
||
}
|
||
if rev == "HEAD" {
|
||
return Ok(());
|
||
}
|
||
if let Some(rest) = rev.strip_prefix("ref:") {
|
||
return validate_ref_name(rest.trim());
|
||
}
|
||
|
||
if let Some(tilde_pos) = rev.rfind('~') {
|
||
let num_part = &rev[tilde_pos + 1..];
|
||
if !num_part.is_empty() && num_part.chars().all(|c| c.is_ascii_digit()) {
|
||
let depth: u32 = num_part
|
||
.parse()
|
||
.map_err(|_| GitError::InvalidArgument("invalid ~N syntax".into()))?;
|
||
if depth > crate::config::MAX_ANCESTRY_DEPTH {
|
||
return Err(GitError::InvalidArgument(format!(
|
||
"~N depth too large: {} (max {})",
|
||
depth, crate::config::MAX_ANCESTRY_DEPTH
|
||
)));
|
||
}
|
||
}
|
||
}
|
||
|
||
if let Some(caret_pos) = rev.rfind('^') {
|
||
let after_caret = &rev[caret_pos + 1..];
|
||
if !after_caret.starts_with('{')
|
||
&& !after_caret.is_empty()
|
||
&& let Some(first_char) = after_caret.chars().next()
|
||
&& first_char.is_ascii_digit()
|
||
{
|
||
let num_part: String = after_caret
|
||
.chars()
|
||
.take_while(|c| c.is_ascii_digit())
|
||
.collect();
|
||
if !num_part.is_empty() {
|
||
let depth: u32 = num_part
|
||
.parse()
|
||
.map_err(|_| GitError::InvalidArgument("invalid ^N syntax".into()))?;
|
||
if depth > crate::config::MAX_ANCESTRY_DEPTH {
|
||
return Err(GitError::InvalidArgument(format!(
|
||
"^N depth too large: {} (max {})",
|
||
depth, crate::config::MAX_ANCESTRY_DEPTH
|
||
)));
|
||
}
|
||
}
|
||
}
|
||
}
|
||
|
||
let mut base = rev;
|
||
|
||
base = base
|
||
.trim_end_matches("^{tree}")
|
||
.trim_end_matches("^{commit}")
|
||
.trim_end_matches("^{object}");
|
||
|
||
if let Some(tilde_pos) = base.rfind('~') {
|
||
let after_tilde = &base[tilde_pos + 1..];
|
||
if !after_tilde.is_empty() && after_tilde.chars().all(|c| c.is_ascii_digit()) {
|
||
base = &base[..tilde_pos];
|
||
}
|
||
} else if let Some(caret_pos) = base.rfind('^') {
|
||
let after_caret = &base[caret_pos + 1..];
|
||
if !after_caret.starts_with('{')
|
||
&& !after_caret.is_empty()
|
||
&& after_caret.chars().all(|c| c.is_ascii_digit())
|
||
{
|
||
base = &base[..caret_pos];
|
||
}
|
||
}
|
||
|
||
if base.is_empty() {
|
||
return Ok(());
|
||
}
|
||
validate_ref_name(base)?;
|
||
Ok(())
|
||
}
|
||
|
||
/// Validate a file path within a commit action.
|
||
///
|
||
/// Must be a relative path (no leading '/'), no '..' traversal,
|
||
/// no null bytes, no .git directory access, and reasonable length.
|
||
pub fn validate_file_path(path: &str) -> GitResult<()> {
|
||
if path.is_empty() {
|
||
return Err(GitError::InvalidArgument(
|
||
"file path cannot be empty".into(),
|
||
));
|
||
}
|
||
if path.starts_with('/') {
|
||
return Err(GitError::InvalidArgument(format!(
|
||
"file path must be relative, not absolute: {path}"
|
||
)));
|
||
}
|
||
if path.contains("..") {
|
||
return Err(GitError::InvalidArgument(format!(
|
||
"file path cannot contain '..': {path}"
|
||
)));
|
||
}
|
||
if path.contains('\0') {
|
||
return Err(GitError::InvalidArgument(format!(
|
||
"file path cannot contain null byte: {path}"
|
||
)));
|
||
}
|
||
if path.len() > crate::config::MAX_FILE_PATH_LENGTH {
|
||
return Err(GitError::InvalidArgument(format!(
|
||
"file path too long (max {} chars): {path}",
|
||
crate::config::MAX_FILE_PATH_LENGTH
|
||
)));
|
||
}
|
||
|
||
if path == ".git"
|
||
|| path.starts_with(".git/")
|
||
|| path.contains("/.git/")
|
||
|| path.ends_with("/.git")
|
||
{
|
||
return Err(GitError::InvalidArgument(format!(
|
||
"cannot modify .git directory: {path}"
|
||
)));
|
||
}
|
||
|
||
#[cfg(target_os = "windows")]
|
||
{
|
||
const RESERVED_NAMES: &[&str] = &[
|
||
"CON", "PRN", "AUX", "NUL", "COM1", "COM2", "COM3", "COM4", "COM5", "COM6", "COM7",
|
||
"COM8", "COM9", "LPT1", "LPT2", "LPT3", "LPT4", "LPT5", "LPT6", "LPT7", "LPT8", "LPT9",
|
||
];
|
||
|
||
for component in path.split('/') {
|
||
let name_part = component.split('.').next().unwrap_or(component);
|
||
let name_upper = name_part.to_uppercase();
|
||
|
||
if RESERVED_NAMES.contains(&name_upper.as_str()) {
|
||
return Err(GitError::InvalidArgument(format!(
|
||
"Windows reserved device name: {component}"
|
||
)));
|
||
}
|
||
}
|
||
}
|
||
|
||
Ok(())
|
||
}
|
||
|
||
/// Git config keys that are dangerous to set remotely.
|
||
/// Setting these could allow arbitrary command execution or bypass security.
|
||
const DANGEROUS_CONFIG_KEYS: &[&str] = &[
|
||
"core.sshCommand",
|
||
"core.gitProxy",
|
||
"http.proxy",
|
||
"https.proxy",
|
||
"remote.*.url",
|
||
"credential.*",
|
||
"safe.directory",
|
||
"core.hooksPath",
|
||
"receive.fsckObjects",
|
||
"receive.denyCurrentBranch",
|
||
"receive.denyDeleteCurrent",
|
||
];
|
||
|
||
/// Check if a git config key is safe to set remotely.
|
||
pub fn validate_config_key(key: &str) -> GitResult<()> {
|
||
if key.is_empty() {
|
||
return Err(GitError::InvalidArgument(
|
||
"config key cannot be empty".into(),
|
||
));
|
||
}
|
||
for pattern in DANGEROUS_CONFIG_KEYS {
|
||
if pattern.contains('*') {
|
||
// e.g. "remote.*.url" — match any "remote.<something>.url"
|
||
if let Some((prefix, suffix)) = pattern.split_once('*')
|
||
&& key.starts_with(prefix)
|
||
&& key.ends_with(suffix)
|
||
{
|
||
return Err(GitError::InvalidArgument(format!(
|
||
"config key '{key}' matches dangerous pattern '{pattern}'"
|
||
)));
|
||
}
|
||
} else if key == *pattern {
|
||
return Err(GitError::InvalidArgument(format!(
|
||
"config key '{key}' is not allowed to be set remotely"
|
||
)));
|
||
}
|
||
}
|
||
if !key
|
||
.chars()
|
||
.all(|c| c.is_ascii_alphanumeric() || c == '.' || c == '-' || c == '_')
|
||
{
|
||
return Err(GitError::InvalidArgument(format!(
|
||
"config key contains invalid characters: {key}"
|
||
)));
|
||
}
|
||
Ok(())
|
||
}
|
||
|
||
/// Allowed URL schemes for git remotes.
|
||
const ALLOWED_REMOTE_SCHEMES: &[&str] = &["http://", "https://", "ssh://", "git://", "git+ssh://"];
|
||
|
||
/// Validate a remote URL for git operations.
|
||
///
|
||
/// Only allows standard transport protocols. Rejects `file://`, `ext::`,
|
||
/// and other schemes that could access local resources or execute commands.
|
||
pub fn validate_remote_url(url: &str) -> GitResult<()> {
|
||
if url.is_empty() {
|
||
return Err(GitError::InvalidArgument(
|
||
"remote URL cannot be empty".into(),
|
||
));
|
||
}
|
||
if url.len() > crate::config::MAX_REMOTE_URL_LENGTH {
|
||
return Err(GitError::InvalidArgument(format!(
|
||
"remote URL too long (max {} chars)",
|
||
crate::config::MAX_REMOTE_URL_LENGTH
|
||
)));
|
||
}
|
||
if url.contains('\0') || url.contains('\n') || url.contains('\r') {
|
||
return Err(GitError::InvalidArgument(
|
||
"remote URL contains invalid characters".into(),
|
||
));
|
||
}
|
||
if !ALLOWED_REMOTE_SCHEMES.iter().any(|s| url.starts_with(s)) {
|
||
return Err(GitError::InvalidArgument(format!(
|
||
"remote URL must start with one of: {}. Got: {url}",
|
||
ALLOWED_REMOTE_SCHEMES.join(", ")
|
||
)));
|
||
}
|
||
Ok(())
|
||
}
|
||
|
||
/// Validate a git refspec string (e.g. `+refs/heads/*:refs/heads/*`).
|
||
///
|
||
/// Refspecs must not contain null bytes, newlines, or shell metacharacters.
|
||
pub fn validate_refspec(refspec: &str) -> GitResult<()> {
|
||
if refspec.is_empty() {
|
||
return Err(GitError::InvalidArgument("refspec cannot be empty".into()));
|
||
}
|
||
if refspec.contains('\0') || refspec.contains('\n') || refspec.contains('\r') {
|
||
return Err(GitError::InvalidArgument(
|
||
"refspec contains invalid characters".into(),
|
||
));
|
||
}
|
||
if refspec.contains(['$', '`', '(', ')', '{', '}', '|', ';', '&', '<', '>']) {
|
||
return Err(GitError::InvalidArgument(format!(
|
||
"refspec contains shell metacharacter: {refspec}"
|
||
)));
|
||
}
|
||
if refspec.len() > crate::config::MAX_REFSPEC_LENGTH {
|
||
return Err(GitError::InvalidArgument(format!(
|
||
"refspec too long (max {} chars)",
|
||
crate::config::MAX_REFSPEC_LENGTH
|
||
)));
|
||
}
|
||
Ok(())
|
||
}
|
||
|
||
/// Sanitize git stderr output for logging to prevent leaking sensitive data
|
||
/// such as credentials in URLs, absolute filesystem paths, or email addresses.
|
||
pub fn sanitize_git_stderr(stderr: &str) -> String {
|
||
let mut s = stderr.to_string();
|
||
for scheme in &["https://", "http://", "git+ssh://", "ssh://"] {
|
||
while let Some(start) = s.find(scheme) {
|
||
let after_scheme = start + scheme.len();
|
||
if let Some(at_pos) = s[after_scheme..].find('@') {
|
||
let at_abs = after_scheme + at_pos;
|
||
let replacement = format!("{scheme}***:***@");
|
||
s.replace_range(start..=at_abs, &replacement);
|
||
} else {
|
||
break;
|
||
}
|
||
}
|
||
}
|
||
if let Some(homedir) = std::env::var_os("HOME").and_then(|v| v.into_string().ok()) {
|
||
s = s.replace(&homedir, "~");
|
||
}
|
||
s
|
||
}
|
||
|
||
/// Validate a storage-relative path (used in resolve_for_init and from_repository_header).
|
||
///
|
||
/// Must not contain path traversal, must be a simple relative path.
|
||
pub fn validate_relative_path(path: &str) -> GitResult<()> {
|
||
if path.is_empty() {
|
||
return Err(GitError::InvalidArgument(
|
||
"relative_path cannot be empty".into(),
|
||
));
|
||
}
|
||
if path.starts_with('/') {
|
||
return Err(GitError::InvalidArgument(
|
||
"relative_path must be relative, not absolute".into(),
|
||
));
|
||
}
|
||
if path.contains('\0') {
|
||
return Err(GitError::InvalidArgument(
|
||
"relative_path cannot contain null byte".into(),
|
||
));
|
||
}
|
||
if path.len() > crate::config::MAX_RELATIVE_PATH_LENGTH {
|
||
return Err(GitError::InvalidArgument(format!(
|
||
"relative_path too long (max {} chars)",
|
||
crate::config::MAX_RELATIVE_PATH_LENGTH
|
||
)));
|
||
}
|
||
if path.contains("..") {
|
||
return Err(GitError::InvalidArgument(format!(
|
||
"path traversal detected: relative_path contains '..': {path}"
|
||
)));
|
||
}
|
||
Ok(())
|
||
}
|