use std::collections::HashMap; use crate::bare::GitBare; use crate::diff::get_diff_stats::{diff_stats_for_range, push_diff_options}; use crate::error::{GitError, GitResult}; use crate::paginate; use crate::pb::diff_file::ChangeType; use crate::pb::{DiffFile, GetDiffRequest, GetDiffResponse}; /// Parsed entry from `git diff --raw -z` struct RawDiffEntry { status: char, old_path: String, new_path: String, old_mode: u32, new_mode: u32, old_oid: String, new_oid: String, similarity: f64, } /// Type alias for diff raw output: (entries, numstat_map) type DiffRawOutput = (Vec, HashMap); impl GitBare { pub fn get_diff(&self, request: GetDiffRequest) -> GitResult { let base = match request.base.and_then(|s| s.selector) { Some(crate::pb::object_selector::Selector::Oid(oid)) => oid.hex.clone(), Some(crate::pb::object_selector::Selector::Revision(name)) => { crate::sanitize::validate_revision(&name.revision)?; name.revision.clone() } None => "HEAD".into(), }; let head = match request.head.and_then(|s| s.selector) { Some(crate::pb::object_selector::Selector::Oid(oid)) => oid.hex.clone(), Some(crate::pb::object_selector::Selector::Revision(name)) => { crate::sanitize::validate_revision(&name.revision)?; name.revision.clone() } None => "HEAD".into(), }; tracing::debug!( repo = %self.bare_dir.display(), base = %base, head = %head, "computing diff" ); let options = request.options.as_ref(); let want_patch = options.is_some_and(|o| o.include_patch); let (raw_entries, numstat_map) = self.diff_raw_and_numstat(&base, &head, options)?; let max_files = options.and_then(|o| (o.max_files > 0).then_some(o.max_files as usize)); let overflow = max_files.is_some_and(|max| raw_entries.len() > max); let entries_to_build = max_files.map_or(raw_entries.as_slice(), |max| { &raw_entries[..raw_entries.len().min(max)] }); let patch_map = if want_patch { self.diff_patch_batch(&base, &head, options)? } else { HashMap::new() }; let mut files = Vec::with_capacity(entries_to_build.len()); for entry in entries_to_build { let path = if !entry.new_path.is_empty() { &entry.new_path } else { &entry.old_path }; let (additions, deletions, binary) = numstat_map .get(path) .map(|(a, d, b)| (*a, *d, *b)) .unwrap_or((0, 0, false)); let too_large = options.is_some_and(|o| { o.max_bytes > 0 && patch_map .get(path) .is_some_and(|p: &Vec| p.len() > o.max_bytes as usize) }); let patch = patch_map .get(path) .map(|p| { let max = options.map(|o| o.max_bytes as usize).unwrap_or(0); if too_large && max > 0 { p[..max].to_vec() } else { p.clone() } }) .unwrap_or_default(); files.push(DiffFile { old_path: entry.old_path.clone(), new_path: entry.new_path.clone(), old_oid: if !entry.old_oid.is_empty() && entry.old_oid != "0000000000000000000000000000000000000000" { Some(self.oid_to_pb(&entry.old_oid)) } else { None }, new_oid: if !entry.new_oid.is_empty() && entry.new_oid != "0000000000000000000000000000000000000000" { Some(self.oid_to_pb(&entry.new_oid)) } else { None }, old_mode: entry.old_mode, new_mode: entry.new_mode, change_type: change_type(entry.status) as i32, binary, too_large, additions, deletions, hunks: Vec::new(), patch, similarity: entry.similarity, }); } let stats = diff_stats_for_range(self, &base, &head, options)?; let (files, page_info) = paginate::paginate(&files, request.pagination.as_ref()); Ok(GetDiffResponse { files, stats: Some(stats), page_info: Some(page_info), overflow, }) } /// Single subprocess call that gets BOTH --raw and --numstat with -z. /// Returns parsed raw entries and a map of path → (additions, deletions, binary). /// /// Combined output format with -z (NUL-separated records): /// : \0\0 /// (for R/C: ...\0\0\0) /// Then numstat records: \t\t\0 fn diff_raw_and_numstat( &self, base: &str, head: &str, options: Option<&crate::pb::DiffOptions>, ) -> GitResult { let mut args = vec![ "--git-dir".to_string(), self.bare_dir.to_string_lossy().into_owned(), "diff".into(), "--raw".into(), "--numstat".into(), "-z".into(), ]; push_diff_options(&mut args, options); args.push(base.to_string()); args.push(head.to_string()); if let Some(options) = options && !options.pathspec.is_empty() { args.push("--".into()); args.extend(options.pathspec.iter().cloned()); } let result = duct::cmd("git", &args) .stdout_capture() .stderr_capture() .unchecked() .run()?; if !result.status.success() { return Err(GitError::CommandFailed { status_code: result.status.code(), stderr: String::from_utf8_lossy(&result.stderr).into_owned(), }); } // Split by NUL — each record is NUL-terminated let records: Vec<&[u8]> = result.stdout.split(|b| *b == 0).collect(); let mut raw_entries = Vec::new(); let mut numstat_map: HashMap = HashMap::new(); let mut i = 0; while i < records.len() { let record = records[i]; if record.is_empty() { i += 1; continue; } if record.starts_with(b":") { // Raw meta record: ": " // In older git: tab before status. In newer git: space before status. // The path(s) follow as separate NUL-terminated records. let record_str = String::from_utf8_lossy(record).into_owned(); // Try tab separator first (older git), then space (newer git) let (meta, status_str) = if let Some((m, s)) = record_str.rsplit_once('\t') { (m, s) } else if let Some((m, s)) = record_str.rsplit_once(' ') { (m, s) } else { i += 1; continue; }; let meta_parts: Vec<&str> = meta.split_whitespace().collect(); let old_mode = meta_parts .first() .and_then(|s| u32::from_str_radix(s, 8).ok()) .unwrap_or(0); let new_mode = meta_parts .get(1) .and_then(|s| u32::from_str_radix(s, 8).ok()) .unwrap_or(0); let old_oid = meta_parts.get(2).unwrap_or(&"").to_string(); let new_oid = meta_parts.get(3).unwrap_or(&"").to_string(); let status = status_str.chars().next().unwrap_or('M'); let similarity = status_str .get(1..) .and_then(|s| s.parse::().ok()) .unwrap_or(0.0); // Read path record(s) that follow the meta record let (old_path, new_path) = match status { 'R' | 'C' => { // Two path records: old_path\0new_path\0 let op = if i + 1 < records.len() { i += 1; String::from_utf8_lossy(records[i]).into_owned() } else { String::new() }; let np = if i + 1 < records.len() { i += 1; String::from_utf8_lossy(records[i]).into_owned() } else { String::new() }; (op, np) } 'A' => { let p = if i + 1 < records.len() { i += 1; String::from_utf8_lossy(records[i]).into_owned() } else { String::new() }; (String::new(), p) } 'D' => { let p = if i + 1 < records.len() { i += 1; String::from_utf8_lossy(records[i]).into_owned() } else { String::new() }; (p, String::new()) } _ => { let p = if i + 1 < records.len() { i += 1; String::from_utf8_lossy(records[i]).into_owned() } else { String::new() }; (p.clone(), p) } }; raw_entries.push(RawDiffEntry { status, old_path, new_path, old_mode, new_mode, old_oid, new_oid, similarity, }); } else { // Numstat record: "\t\t" let record_str = String::from_utf8_lossy(record); let parts: Vec<&str> = record_str.split('\t').collect(); if parts.len() >= 3 { let binary = parts[0] == "-" || parts[1] == "-"; let add = parts[0].parse().unwrap_or(0u32); let del = parts[1].parse().unwrap_or(0u32); let path = parts[2].to_string(); numstat_map.insert(path, (add, del, binary)); } } i += 1; } Ok((raw_entries, numstat_map)) } /// Single subprocess call to get patches for ALL files at once. /// Returns a map of path → patch bytes. fn diff_patch_batch( &self, base: &str, head: &str, options: Option<&crate::pb::DiffOptions>, ) -> GitResult>> { let context = options .map(|o| o.context_lines.to_string()) .unwrap_or_else(|| "3".into()); let mut args = vec![ "--git-dir".to_string(), self.bare_dir.to_string_lossy().into_owned(), "diff".into(), "--patch".into(), format!("--unified={context}"), ]; if options.is_some_and(|o| o.include_binary) { args.push("--binary".into()); } push_diff_options(&mut args, options); args.push(base.to_string()); args.push(head.to_string()); if let Some(options) = options && !options.pathspec.is_empty() { args.push("--".into()); args.extend(options.pathspec.iter().cloned()); } let result = duct::cmd("git", &args) .stdout_capture() .stderr_capture() .unchecked() .run()?; if !result.status.success() { return Err(GitError::CommandFailed { status_code: result.status.code(), stderr: String::from_utf8_lossy(&result.stderr).into_owned(), }); } // Split combined patch output by "diff --git" headers let mut map = HashMap::new(); let output = &result.stdout; let header = b"diff --git "; let mut chunks: Vec<&[u8]> = Vec::new(); let mut pos = 0; // Find all header positions let mut header_positions = Vec::new(); while let Some(idx) = output[pos..] .windows(header.len()) .position(|w| w == header) { header_positions.push(pos + idx); pos = pos + idx + header.len(); } for (i, &start) in header_positions.iter().enumerate() { let end = header_positions.get(i + 1).copied().unwrap_or(output.len()); chunks.push(&output[start..end]); } for chunk in chunks { // Extract file path from "diff --git a/path b/path\n" let first_line_end = chunk .iter() .position(|&b| b == b'\n') .unwrap_or(chunk.len()); let first_line = String::from_utf8_lossy(&chunk[..first_line_end]); if let Some(b_pos) = first_line.rfind(" b/") { let path = &first_line[b_pos + 3..]; map.insert(path.to_string(), chunk.to_vec()); } } Ok(map) } } fn change_type(status: char) -> ChangeType { match status { 'A' => ChangeType::DiffFileChangeTypeAdded, 'D' => ChangeType::DiffFileChangeTypeDeleted, 'R' => ChangeType::DiffFileChangeTypeRenamed, 'C' => ChangeType::DiffFileChangeTypeCopied, 'T' => ChangeType::DiffFileChangeTypeTypeChanged, 'U' => ChangeType::DiffFileChangeTypeUnmerged, _ => ChangeType::DiffFileChangeTypeModified, } }