gitks/hooks/sanitize.rs

//! Copyright (c) 2022-2026 GitDataAi All rights reserved.

//! Hook content sanitization.
//!
//! Validates custom hook scripts to prevent dangerous commands.

use crate::error::{GitError, GitResult};

/// Commands/patterns that are never allowed in custom hook scripts.
/// This is a blocklist approach - we also add pattern-based detection.
const FORBIDDEN_PATTERNS: &[&str] = &[
    "rm -rf",
    "rm -r /",
    "chmod 777",
    "chmod 666",
    "mkfs",
    "dd if=",
    ":(){ :|:& };:", // fork bomb
    "> /dev/sda",
    "curl -o /",
    "wget -O /",
    "/etc/passwd",
    "/etc/shadow",
    "shutdown",
    "reboot",
    "init 0",
    "init 6",
    "poweroff",
    "halt",
    "eval ",     // eval can execute arbitrary strings
    "exec ",     // exec can replace process
    "$(",        // command substitution
    "`",         // backtick command substitution
    "${",        // variable expansion (can be used for obfuscation)
    "|bash",     // piping to bash
    "|sh",       // piping to sh
    "|dash",     // piping to dash
    "|zsh",      // piping to zsh
    "base64",    // base64 encoding/decoding (common for obfuscation)
    "python -c", // inline python execution
    "perl -e",   // inline perl execution
    "ruby -e",   // inline ruby execution
    "node -e",   // inline node execution
    "/dev/tcp",  // bash reverse shell
    "nc -e",     // netcat reverse shell
    "ncat",      // netcat alternative
    "socat",     // socket relay
];

/// Additional regex-like patterns that indicate dangerous constructs.
/// These are checked with simple string matching for complexity reasons.
const DANGEROUS_PREFIXES: &[&str] = &[
    "rm -rf /", // rm -rf with absolute path
    "rm -rf ~", // rm -rf with home directory
    "rm -rf .", // rm -rf with relative path (current dir)
    "rm -rf *", // rm -rf with wildcard
];

/// Pairs of commands that indicate data exfiltration or code execution.
const DANGEROUS_COMMAND_PAIRS: &[(&str, &str)] = &[
    ("curl", "bash"),
    ("curl", "sh"),
    ("wget", "bash"),
    ("wget", "sh"),
    ("nc", "-e"),
    ("ncat", "-e"),
    ("python", "-c"),
    ("perl", "-e"),
    ("ruby", "-e"),
    ("node", "-e"),
];

use crate::config::MAX_HOOK_SCRIPT_SIZE;

/// Validate a custom hook script content for safety.
pub fn validate_hook_content(content: &str) -> GitResult<()> {
    if content.is_empty() {
        return Err(GitError::InvalidArgument(
            "hook content cannot be empty".into(),
        ));
    }
    if content.len() > MAX_HOOK_SCRIPT_SIZE {
        return Err(GitError::InvalidArgument(format!(
            "hook content too large (max {} bytes): {} bytes",
            MAX_HOOK_SCRIPT_SIZE,
            content.len()
        )));
    }
    if content.contains('\0') {
        return Err(GitError::InvalidArgument(
            "hook content cannot contain null bytes".into(),
        ));
    }

    let content_lower = content.to_lowercase();
    for pattern in FORBIDDEN_PATTERNS {
        if content_lower.contains(&pattern.to_lowercase()) {
            return Err(GitError::InvalidArgument(format!(
                "hook content contains forbidden pattern: '{pattern}'"
            )));
        }
    }

    for prefix in DANGEROUS_PREFIXES {
        if content.contains(prefix) {
            return Err(GitError::InvalidArgument(format!(
                "hook content contains dangerous command: '{prefix}'"
            )));
        }
    }

    check_obfuscation_attempts(content)?;

    check_dangerous_pairs(content)?;

    Ok(())
}

/// Check for dangerous command pairs that indicate data exfiltration or code execution.
fn check_dangerous_pairs(content: &str) -> GitResult<()> {
    let content_lower = content.to_lowercase();
    for &(cmd1, cmd2) in DANGEROUS_COMMAND_PAIRS {
        if content_lower.contains(cmd1) && content_lower.contains(cmd2) {
            return Err(GitError::InvalidArgument(format!(
                "hook contains dangerous command combination: '{cmd1}' + '{cmd2}' (possible data exfiltration)"
            )));
        }
    }
    Ok(())
}

/// Check for common obfuscation attempts.
fn check_obfuscation_attempts(content: &str) -> GitResult<()> {
    let special_char_count = content
        .chars()
        .filter(|c| {
            matches!(
                c,
                '$' | '`' | '\\' | '|' | ';' | '&' | '(' | ')' | '{' | '}' | '[' | ']'
            )
        })
        .count();
    let total_chars = content.chars().count();

    if total_chars > 0 && (special_char_count * 100 / total_chars) > 30 {
        return Err(GitError::InvalidArgument(
            "hook content appears obfuscated (too many special characters)".into(),
        ));
    }

    if content.contains("\\x") {
        let hex_count = content.matches("\\x").count();
        if hex_count > 5 {
            return Err(GitError::InvalidArgument(
                "hook content contains hex encoding (potential obfuscation)".into(),
            ));
        }
    }

    if content.contains("\\u") {
        let unicode_count = content.matches("\\u").count();
        if unicode_count > 5 {
            return Err(GitError::InvalidArgument(
                "hook content contains unicode escapes (potential obfuscation)".into(),
            ));
        }
    }

    Ok(())
}

/// Validate a hook name (must be a recognized git hook name).
pub fn validate_hook_name(name: &str) -> GitResult<()> {
    const VALID_HOOK_NAMES: &[&str] = &[
        "pre-receive",
        "update",
        "post-receive",
        "pre-applypatch",
        "applypatch-msg",
        "post-applypatch",
        "pre-commit",
        "prepare-commit-msg",
        "commit-msg",
        "post-commit",
        "pre-auto-gc",
    ];
    if !VALID_HOOK_NAMES.contains(&name) {
        return Err(GitError::InvalidArgument(format!(
            "invalid hook name: '{name}'. Must be one of: {}",
            VALID_HOOK_NAMES.join(", ")
        )));
    }
    Ok(())
}