Files
gitks/diff/get_diff.rs
T
zhenyi d243dce027 refactor(server): replace custom remote clients with macro-based implementation
- Replaced manual remote client functions with remote_client! macro for archive, blame, branch, commit, and diff services
- Simplified remote client creation logic using declarative macro approach
- Maintained same functionality while reducing code duplication across services

security(bare): enhance path traversal protection with comprehensive validation

- Added early relative_path validation to prevent path traversal attacks
- Implemented unified path validation to avoid TOCTOU race conditions
- Enhanced canonicalization checks for both existing and non-existent paths
- Added detailed logging for path traversal detection attempts

feat(cache): migrate from CLruCache to Moka with TTL and invalidation support

- Replaced clru dependency with moka for improved caching capabilities
- Added 300-second time-to-live for cache entries
- Implemented repository-specific cache invalidation mechanism
- Enhanced cache operations with thread-safe async support

refactor(commit): improve security validation for commit operations

- Added ref name validation to prevent command injection in cherry_pick_commit
- Implemented revision validation for commit selectors
- Added comprehensive input validation for create_commit parameters
- Enhanced file path validation to prevent traversal
2026-06-08 09:43:57 +08:00

401 lines
15 KiB
Rust

use std::collections::HashMap;
use crate::bare::GitBare;
use crate::diff::get_diff_stats::{diff_stats_for_range, push_diff_options};
use crate::error::{GitError, GitResult};
use crate::paginate;
use crate::pb::diff_file::ChangeType;
use crate::pb::{DiffFile, GetDiffRequest, GetDiffResponse};
/// Parsed entry from `git diff --raw -z`
struct RawDiffEntry {
status: char,
old_path: String,
new_path: String,
old_mode: u32,
new_mode: u32,
old_oid: String,
new_oid: String,
similarity: f64,
}
/// Type alias for diff raw output: (entries, numstat_map)
type DiffRawOutput = (Vec<RawDiffEntry>, HashMap<String, (u32, u32, bool)>);
impl GitBare {
pub fn get_diff(&self, request: GetDiffRequest) -> GitResult<GetDiffResponse> {
let base = match request.base.and_then(|s| s.selector) {
Some(crate::pb::object_selector::Selector::Oid(oid)) => oid.hex.clone(),
Some(crate::pb::object_selector::Selector::Revision(name)) => {
crate::sanitize::validate_revision(&name.revision)?;
name.revision.clone()
}
None => "HEAD".into(),
};
let head = match request.head.and_then(|s| s.selector) {
Some(crate::pb::object_selector::Selector::Oid(oid)) => oid.hex.clone(),
Some(crate::pb::object_selector::Selector::Revision(name)) => {
crate::sanitize::validate_revision(&name.revision)?;
name.revision.clone()
}
None => "HEAD".into(),
};
tracing::debug!(
repo = %self.bare_dir.display(),
base = %base,
head = %head,
"computing diff"
);
let options = request.options.as_ref();
let want_patch = options.is_some_and(|o| o.include_patch);
// ── Call 1: --raw -z --numstat -z (all metadata + line counts) ──
let (raw_entries, numstat_map) = self.diff_raw_and_numstat(&base, &head, options)?;
let max_files = options.and_then(|o| (o.max_files > 0).then_some(o.max_files as usize));
let overflow = max_files.is_some_and(|max| raw_entries.len() > max);
let entries_to_build = max_files.map_or(raw_entries.as_slice(), |max| {
&raw_entries[..raw_entries.len().min(max)]
});
// ── Call 2 (optional): --patch for all files at once ──
let patch_map = if want_patch {
self.diff_patch_batch(&base, &head, options)?
} else {
HashMap::new()
};
// ── Merge results (zero additional subprocess calls) ──
let mut files = Vec::with_capacity(entries_to_build.len());
for entry in entries_to_build {
let path = if !entry.new_path.is_empty() {
&entry.new_path
} else {
&entry.old_path
};
let (additions, deletions, binary) = numstat_map
.get(path)
.map(|(a, d, b)| (*a, *d, *b))
.unwrap_or((0, 0, false));
let too_large = options.is_some_and(|o| {
o.max_bytes > 0
&& patch_map
.get(path)
.is_some_and(|p: &Vec<u8>| p.len() > o.max_bytes as usize)
});
let patch = patch_map
.get(path)
.map(|p| {
let max = options.map(|o| o.max_bytes as usize).unwrap_or(0);
if too_large && max > 0 {
p[..max].to_vec()
} else {
p.clone()
}
})
.unwrap_or_default();
files.push(DiffFile {
old_path: entry.old_path.clone(),
new_path: entry.new_path.clone(),
old_oid: if !entry.old_oid.is_empty()
&& entry.old_oid != "0000000000000000000000000000000000000000"
{
Some(self.oid_to_pb(&entry.old_oid))
} else {
None
},
new_oid: if !entry.new_oid.is_empty()
&& entry.new_oid != "0000000000000000000000000000000000000000"
{
Some(self.oid_to_pb(&entry.new_oid))
} else {
None
},
old_mode: entry.old_mode,
new_mode: entry.new_mode,
change_type: change_type(entry.status) as i32,
binary,
too_large,
additions,
deletions,
hunks: Vec::new(),
patch,
similarity: entry.similarity,
});
}
// ── Call 3: diff --shortstat (already efficient, single call) ──
let stats = diff_stats_for_range(self, &base, &head, options)?;
let (files, page_info) = paginate::paginate(&files, request.pagination.as_ref());
Ok(GetDiffResponse {
files,
stats: Some(stats),
page_info: Some(page_info),
overflow,
})
}
/// Single subprocess call that gets BOTH --raw and --numstat with -z.
/// Returns parsed raw entries and a map of path → (additions, deletions, binary).
///
/// Combined output format with -z (NUL-separated records):
/// :<src_mode> <dst_mode> <src_hash> <dst_hash> <status>\0<path>\0
/// (for R/C: ...\0<old_path>\0<new_path>\0)
/// Then numstat records: <add>\t<del>\t<path>\0
fn diff_raw_and_numstat(
&self,
base: &str,
head: &str,
options: Option<&crate::pb::DiffOptions>,
) -> GitResult<DiffRawOutput> {
let mut args = vec![
"--git-dir".to_string(),
self.bare_dir.to_string_lossy().into_owned(),
"diff".into(),
"--raw".into(),
"--numstat".into(),
"-z".into(),
];
push_diff_options(&mut args, options);
args.push(base.to_string());
args.push(head.to_string());
if let Some(options) = options
&& !options.pathspec.is_empty()
{
args.push("--".into());
args.extend(options.pathspec.iter().cloned());
}
let result = duct::cmd("git", &args)
.stdout_capture()
.stderr_capture()
.unchecked()
.run()?;
if !result.status.success() {
return Err(GitError::CommandFailed {
status_code: result.status.code(),
stderr: String::from_utf8_lossy(&result.stderr).into_owned(),
});
}
// Split by NUL — each record is NUL-terminated
let records: Vec<&[u8]> = result.stdout.split(|b| *b == 0).collect();
let mut raw_entries = Vec::new();
let mut numstat_map: HashMap<String, (u32, u32, bool)> = HashMap::new();
let mut i = 0;
while i < records.len() {
let record = records[i];
if record.is_empty() {
i += 1;
continue;
}
if record.starts_with(b":") {
// Raw meta record: ":<src_mode> <dst_mode> <src_hash> <dst_hash> <status_char>"
// In older git: tab before status. In newer git: space before status.
// The path(s) follow as separate NUL-terminated records.
let record_str = String::from_utf8_lossy(record).into_owned();
// Try tab separator first (older git), then space (newer git)
let (meta, status_str) = if let Some((m, s)) = record_str.rsplit_once('\t') {
(m, s)
} else if let Some((m, s)) = record_str.rsplit_once(' ') {
(m, s)
} else {
i += 1;
continue;
};
let meta_parts: Vec<&str> = meta.split_whitespace().collect();
let old_mode = meta_parts
.first()
.and_then(|s| u32::from_str_radix(s, 8).ok())
.unwrap_or(0);
let new_mode = meta_parts
.get(1)
.and_then(|s| u32::from_str_radix(s, 8).ok())
.unwrap_or(0);
let old_oid = meta_parts.get(2).unwrap_or(&"").to_string();
let new_oid = meta_parts.get(3).unwrap_or(&"").to_string();
let status = status_str.chars().next().unwrap_or('M');
let similarity = status_str
.get(1..)
.and_then(|s| s.parse::<f64>().ok())
.unwrap_or(0.0);
// Read path record(s) that follow the meta record
let (old_path, new_path) = match status {
'R' | 'C' => {
// Two path records: old_path\0new_path\0
let op = if i + 1 < records.len() {
i += 1;
String::from_utf8_lossy(records[i]).into_owned()
} else {
String::new()
};
let np = if i + 1 < records.len() {
i += 1;
String::from_utf8_lossy(records[i]).into_owned()
} else {
String::new()
};
(op, np)
}
'A' => {
let p = if i + 1 < records.len() {
i += 1;
String::from_utf8_lossy(records[i]).into_owned()
} else {
String::new()
};
(String::new(), p)
}
'D' => {
let p = if i + 1 < records.len() {
i += 1;
String::from_utf8_lossy(records[i]).into_owned()
} else {
String::new()
};
(p, String::new())
}
_ => {
let p = if i + 1 < records.len() {
i += 1;
String::from_utf8_lossy(records[i]).into_owned()
} else {
String::new()
};
(p.clone(), p)
}
};
raw_entries.push(RawDiffEntry {
status,
old_path,
new_path,
old_mode,
new_mode,
old_oid,
new_oid,
similarity,
});
} else {
// Numstat record: "<add>\t<del>\t<path>"
let record_str = String::from_utf8_lossy(record);
let parts: Vec<&str> = record_str.split('\t').collect();
if parts.len() >= 3 {
let binary = parts[0] == "-" || parts[1] == "-";
let add = parts[0].parse().unwrap_or(0u32);
let del = parts[1].parse().unwrap_or(0u32);
let path = parts[2].to_string();
numstat_map.insert(path, (add, del, binary));
}
}
i += 1;
}
Ok((raw_entries, numstat_map))
}
/// Single subprocess call to get patches for ALL files at once.
/// Returns a map of path → patch bytes.
fn diff_patch_batch(
&self,
base: &str,
head: &str,
options: Option<&crate::pb::DiffOptions>,
) -> GitResult<HashMap<String, Vec<u8>>> {
let context = options
.map(|o| o.context_lines.to_string())
.unwrap_or_else(|| "3".into());
let mut args = vec![
"--git-dir".to_string(),
self.bare_dir.to_string_lossy().into_owned(),
"diff".into(),
"--patch".into(),
format!("--unified={context}"),
];
if options.is_some_and(|o| o.include_binary) {
args.push("--binary".into());
}
push_diff_options(&mut args, options);
args.push(base.to_string());
args.push(head.to_string());
if let Some(options) = options
&& !options.pathspec.is_empty()
{
args.push("--".into());
args.extend(options.pathspec.iter().cloned());
}
let result = duct::cmd("git", &args)
.stdout_capture()
.stderr_capture()
.unchecked()
.run()?;
if !result.status.success() {
return Err(GitError::CommandFailed {
status_code: result.status.code(),
stderr: String::from_utf8_lossy(&result.stderr).into_owned(),
});
}
// Split combined patch output by "diff --git" headers
let mut map = HashMap::new();
let output = &result.stdout;
let header = b"diff --git ";
let mut chunks: Vec<&[u8]> = Vec::new();
let mut pos = 0;
// Find all header positions
let mut header_positions = Vec::new();
while let Some(idx) = output[pos..]
.windows(header.len())
.position(|w| w == header)
{
header_positions.push(pos + idx);
pos = pos + idx + header.len();
}
for (i, &start) in header_positions.iter().enumerate() {
let end = header_positions.get(i + 1).copied().unwrap_or(output.len());
chunks.push(&output[start..end]);
}
for chunk in chunks {
// Extract file path from "diff --git a/path b/path\n"
let first_line_end = chunk
.iter()
.position(|&b| b == b'\n')
.unwrap_or(chunk.len());
let first_line = String::from_utf8_lossy(&chunk[..first_line_end]);
if let Some(b_pos) = first_line.rfind(" b/") {
let path = &first_line[b_pos + 3..];
map.insert(path.to_string(), chunk.to_vec());
}
}
Ok(map)
}
}
fn change_type(status: char) -> ChangeType {
match status {
'A' => ChangeType::DiffFileChangeTypeAdded,
'D' => ChangeType::DiffFileChangeTypeDeleted,
'R' => ChangeType::DiffFileChangeTypeRenamed,
'C' => ChangeType::DiffFileChangeTypeCopied,
'T' => ChangeType::DiffFileChangeTypeTypeChanged,
'U' => ChangeType::DiffFileChangeTypeUnmerged,
_ => ChangeType::DiffFileChangeTypeModified,
}
}