feat(server): add tracing spans and caching to archive and blame services

- Add tracing spans with repo labels for archive and blame operations
- Implement caching for archive list entries when using OID selectors
- Implement caching for blame operations when using OID selectors
- Add detailed
This commit is contained in:
zhenyi
2026-06-04 15:33:16 +08:00
parent 729604f13b
commit cc202d6d1f
41 changed files with 2400 additions and 1067 deletions
+248 -187
View File
@@ -1,3 +1,5 @@
use std::collections::HashMap;
use crate::bare::GitBare;
use crate::diff::get_diff_stats::{diff_stats_for_range, push_diff_options};
use crate::error::{GitError, GitResult};
@@ -5,62 +7,106 @@ use crate::paginate;
use crate::pb::diff_file::ChangeType;
use crate::pb::{DiffFile, GetDiffRequest, GetDiffResponse};
#[derive(Debug, Clone)]
struct NameStatusEntry {
/// Parsed entry from `git diff --raw -z`
struct RawDiffEntry {
status: char,
old_path: String,
new_path: String,
old_mode: u32,
new_mode: u32,
old_oid: String,
new_oid: String,
similarity: f64,
}
#[derive(Debug, Clone, Default)]
struct TreeMeta {
oid_hex: String,
mode: u32,
}
impl GitBare {
pub fn get_diff(&self, request: GetDiffRequest) -> GitResult<GetDiffResponse> {
let base = match request.base.and_then(|s| s.selector) {
Some(crate::pb::object_selector::Selector::Oid(oid)) => oid.hex,
Some(crate::pb::object_selector::Selector::Revision(name)) => name.revision,
Some(crate::pb::object_selector::Selector::Oid(oid)) => oid.hex.clone(),
Some(crate::pb::object_selector::Selector::Revision(name)) => name.revision.clone(),
None => "HEAD".into(),
};
let head = match request.head.and_then(|s| s.selector) {
Some(crate::pb::object_selector::Selector::Oid(oid)) => oid.hex,
Some(crate::pb::object_selector::Selector::Revision(name)) => name.revision,
Some(crate::pb::object_selector::Selector::Oid(oid)) => oid.hex.clone(),
Some(crate::pb::object_selector::Selector::Revision(name)) => name.revision.clone(),
None => "HEAD".into(),
};
tracing::debug!(
repo = %self.bare_dir.display(),
base = %base,
head = %head,
"computing diff"
);
let options = request.options.as_ref();
let entries = self.diff_name_status(&base, &head, options)?;
let max_files = options.and_then(|o| (o.max_files > 0).then_some(o.max_files as usize));
let overflow = max_files.is_some_and(|max| entries.len() > max);
let entries_to_build =
max_files.map_or(entries.as_slice(), |max| &entries[..entries.len().min(max)]);
let want_patch = options.is_some_and(|o| o.include_patch);
// ── Call 1: --raw -z --numstat -z (all metadata + line counts) ──
let (raw_entries, numstat_map) = self.diff_raw_and_numstat(&base, &head, options)?;
let max_files = options.and_then(|o| (o.max_files > 0).then_some(o.max_files as usize));
let overflow = max_files.is_some_and(|max| raw_entries.len() > max);
let entries_to_build = max_files.map_or(raw_entries.as_slice(), |max| {
&raw_entries[..raw_entries.len().min(max)]
});
// ── Call 2 (optional): --patch for all files at once ──
let patch_map = if want_patch {
self.diff_patch_batch(&base, &head, options)?
} else {
HashMap::new()
};
// ── Merge results (zero additional subprocess calls) ──
let mut files = Vec::with_capacity(entries_to_build.len());
for entry in entries_to_build {
let old_meta = if !entry.old_path.is_empty() {
self.tree_meta(&base, &entry.old_path).ok().flatten()
let path = if !entry.new_path.is_empty() {
&entry.new_path
} else {
None
&entry.old_path
};
let new_meta = if !entry.new_path.is_empty() {
self.tree_meta(&head, &entry.new_path).ok().flatten()
} else {
None
};
let (additions, deletions, binary) = self.path_numstat(&base, &head, entry)?;
let (patch, too_large) = self.path_patch(&base, &head, entry, options)?;
let (additions, deletions, binary) = numstat_map
.get(path)
.map(|(a, d, b)| (*a, *d, *b))
.unwrap_or((0, 0, false));
let too_large = options.is_some_and(|o| {
o.max_bytes > 0
&& patch_map
.get(path)
.is_some_and(|p: &Vec<u8>| p.len() > o.max_bytes as usize)
});
let patch = patch_map
.get(path)
.map(|p| {
let max = options.map(|o| o.max_bytes as usize).unwrap_or(0);
if too_large && max > 0 {
p[..max].to_vec()
} else {
p.clone()
}
})
.unwrap_or_default();
files.push(DiffFile {
old_path: entry.old_path.clone(),
new_path: entry.new_path.clone(),
old_oid: old_meta.as_ref().map(|m| self.oid_to_pb(&m.oid_hex)),
new_oid: new_meta.as_ref().map(|m| self.oid_to_pb(&m.oid_hex)),
old_mode: old_meta.as_ref().map(|m| m.mode).unwrap_or(0),
new_mode: new_meta.as_ref().map(|m| m.mode).unwrap_or(0),
old_oid: if !entry.old_oid.is_empty()
&& entry.old_oid != "0000000000000000000000000000000000000000"
{
Some(self.oid_to_pb(&entry.old_oid))
} else {
None
},
new_oid: if !entry.new_oid.is_empty()
&& entry.new_oid != "0000000000000000000000000000000000000000"
{
Some(self.oid_to_pb(&entry.new_oid))
} else {
None
},
old_mode: entry.old_mode,
new_mode: entry.new_mode,
change_type: change_type(entry.status) as i32,
binary,
too_large,
@@ -72,6 +118,7 @@ impl GitBare {
});
}
// ── Call 3: diff --shortstat (already efficient, single call) ──
let stats = diff_stats_for_range(self, &base, &head, options)?;
let (files, page_info) = paginate::paginate(&files, request.pagination.as_ref());
@@ -83,17 +130,25 @@ impl GitBare {
})
}
fn diff_name_status(
/// Single subprocess call that gets BOTH --raw and --numstat with -z.
/// Returns parsed raw entries and a map of path → (additions, deletions, binary).
///
/// Combined output format with -z (NUL-separated records):
/// :<src_mode> <dst_mode> <src_hash> <dst_hash> <status>\0<path>\0
/// (for R/C: ...\0<old_path>\0<new_path>\0)
/// Then numstat records: <add>\t<del>\t<path>\0
fn diff_raw_and_numstat(
&self,
base: &str,
head: &str,
options: Option<&crate::pb::DiffOptions>,
) -> GitResult<Vec<NameStatusEntry>> {
) -> GitResult<(Vec<RawDiffEntry>, HashMap<String, (u32, u32, bool)>)> {
let mut args = vec![
"--git-dir".to_string(),
self.bare_dir.to_string_lossy().into_owned(),
"diff".into(),
"--name-status".into(),
"--raw".into(),
"--numstat".into(),
"-z".into(),
];
push_diff_options(&mut args, options);
@@ -118,168 +173,140 @@ impl GitBare {
});
}
let parts = result
.stdout
.split(|b| *b == 0)
.filter(|part| !part.is_empty())
.map(|part| String::from_utf8_lossy(part).into_owned())
.collect::<Vec<_>>();
// Split by NUL — each record is NUL-terminated
let records: Vec<&[u8]> = result.stdout.split(|b| *b == 0).collect();
let mut entries = Vec::new();
let mut idx = 0;
while idx < parts.len() {
let status_token = &parts[idx];
idx += 1;
let status = status_token.chars().next().unwrap_or('M');
let similarity = status_token
.get(1..)
.and_then(|s| s.parse::<f64>().ok())
.unwrap_or(0.0);
let mut raw_entries = Vec::new();
let mut numstat_map: HashMap<String, (u32, u32, bool)> = HashMap::new();
let mut i = 0;
if matches!(status, 'R' | 'C') {
if idx + 1 >= parts.len() {
break;
}
let old_path = parts[idx].clone();
let new_path = parts[idx + 1].clone();
idx += 2;
entries.push(NameStatusEntry {
while i < records.len() {
let record = records[i];
if record.is_empty() {
i += 1;
continue;
}
if record.starts_with(b":") {
// Raw meta record: ":<src_mode> <dst_mode> <src_hash> <dst_hash> <status_char>"
// In older git: tab before status. In newer git: space before status.
// The path(s) follow as separate NUL-terminated records.
let record_str = String::from_utf8_lossy(record).into_owned();
// Try tab separator first (older git), then space (newer git)
let (meta, status_str) = if let Some((m, s)) = record_str.rsplit_once('\t') {
(m, s)
} else if let Some((m, s)) = record_str.rsplit_once(' ') {
(m, s)
} else {
i += 1;
continue;
};
let meta_parts: Vec<&str> = meta.split_whitespace().collect();
let old_mode = meta_parts
.first()
.and_then(|s| u32::from_str_radix(s, 8).ok())
.unwrap_or(0);
let new_mode = meta_parts
.get(1)
.and_then(|s| u32::from_str_radix(s, 8).ok())
.unwrap_or(0);
let old_oid = meta_parts.get(2).unwrap_or(&"").to_string();
let new_oid = meta_parts.get(3).unwrap_or(&"").to_string();
let status = status_str.chars().next().unwrap_or('M');
let similarity = status_str
.get(1..)
.and_then(|s| s.parse::<f64>().ok())
.unwrap_or(0.0);
// Read path record(s) that follow the meta record
let (old_path, new_path) = match status {
'R' | 'C' => {
// Two path records: old_path\0new_path\0
let op = if i + 1 < records.len() {
i += 1;
String::from_utf8_lossy(records[i]).into_owned()
} else {
String::new()
};
let np = if i + 1 < records.len() {
i += 1;
String::from_utf8_lossy(records[i]).into_owned()
} else {
String::new()
};
(op, np)
}
'A' => {
let p = if i + 1 < records.len() {
i += 1;
String::from_utf8_lossy(records[i]).into_owned()
} else {
String::new()
};
(String::new(), p)
}
'D' => {
let p = if i + 1 < records.len() {
i += 1;
String::from_utf8_lossy(records[i]).into_owned()
} else {
String::new()
};
(p, String::new())
}
_ => {
let p = if i + 1 < records.len() {
i += 1;
String::from_utf8_lossy(records[i]).into_owned()
} else {
String::new()
};
(p.clone(), p)
}
};
raw_entries.push(RawDiffEntry {
status,
old_path,
new_path,
old_mode,
new_mode,
old_oid,
new_oid,
similarity,
});
} else {
if idx >= parts.len() {
break;
// Numstat record: "<add>\t<del>\t<path>"
let record_str = String::from_utf8_lossy(record);
let parts: Vec<&str> = record_str.split('\t').collect();
if parts.len() >= 3 {
let binary = parts[0] == "-" || parts[1] == "-";
let add = parts[0].parse().unwrap_or(0u32);
let del = parts[1].parse().unwrap_or(0u32);
let path = parts[2].to_string();
numstat_map.insert(path, (add, del, binary));
}
let path = parts[idx].clone();
idx += 1;
let (old_path, new_path) = match status {
'A' => (String::new(), path),
'D' => (path, String::new()),
_ => (path.clone(), path),
};
entries.push(NameStatusEntry {
status,
old_path,
new_path,
similarity,
});
}
i += 1;
}
Ok(entries)
Ok((raw_entries, numstat_map))
}
fn tree_meta(&self, revision: &str, path: &str) -> GitResult<Option<TreeMeta>> {
let result = duct::cmd(
"git",
[
"--git-dir",
self.bare_dir.to_string_lossy().as_ref(),
"ls-tree",
"-z",
"-l",
revision,
"--",
path,
],
)
.stdout_capture()
.stderr_capture()
.unchecked()
.run()?;
if !result.status.success() || result.stdout.is_empty() {
return Ok(None);
}
let record = result
.stdout
.split(|b| *b == 0)
.find(|part| !part.is_empty())
.map(|part| String::from_utf8_lossy(part).into_owned());
let Some(record) = record else {
return Ok(None);
};
let Some((meta, _path)) = record.split_once('\t') else {
return Ok(None);
};
let parts = meta.split_whitespace().collect::<Vec<_>>();
if parts.len() < 3 {
return Ok(None);
}
Ok(Some(TreeMeta {
mode: u32::from_str_radix(parts[0], 8).unwrap_or(0),
oid_hex: parts[2].to_string(),
}))
}
fn path_numstat(
/// Single subprocess call to get patches for ALL files at once.
/// Returns a map of path → patch bytes.
fn diff_patch_batch(
&self,
base: &str,
head: &str,
entry: &NameStatusEntry,
) -> GitResult<(u32, u32, bool)> {
let path = if entry.new_path.is_empty() {
&entry.old_path
} else {
&entry.new_path
};
let result = duct::cmd(
"git",
[
"--git-dir",
self.bare_dir.to_string_lossy().as_ref(),
"diff",
"--numstat",
base,
head,
"--",
path,
],
)
.stdout_capture()
.stderr_capture()
.unchecked()
.run()?;
if !result.status.success() {
return Err(GitError::CommandFailed {
status_code: result.status.code(),
stderr: String::from_utf8_lossy(&result.stderr).into_owned(),
});
}
let line = String::from_utf8_lossy(&result.stdout)
.lines()
.next()
.unwrap_or_default()
.to_string();
let mut parts = line.split('\t');
let add = parts.next().unwrap_or_default();
let del = parts.next().unwrap_or_default();
let binary = add == "-" || del == "-";
Ok((add.parse().unwrap_or(0), del.parse().unwrap_or(0), binary))
}
fn path_patch(
&self,
base: &str,
head: &str,
entry: &NameStatusEntry,
options: Option<&crate::pb::DiffOptions>,
) -> GitResult<(Vec<u8>, bool)> {
let Some(options) = options else {
return Ok((Vec::new(), false));
};
if !options.include_patch {
return Ok((Vec::new(), false));
}
let path = if entry.new_path.is_empty() {
&entry.old_path
} else {
&entry.new_path
};
let context = options.context_lines.to_string();
) -> GitResult<HashMap<String, Vec<u8>>> {
let context = options
.map(|o| o.context_lines.to_string())
.unwrap_or_else(|| "3".into());
let mut args = vec![
"--git-dir".to_string(),
self.bare_dir.to_string_lossy().into_owned(),
@@ -287,14 +314,18 @@ impl GitBare {
"--patch".into(),
format!("--unified={context}"),
];
if options.include_binary {
if options.is_some_and(|o| o.include_binary) {
args.push("--binary".into());
}
push_diff_options(&mut args, Some(options));
push_diff_options(&mut args, options);
args.push(base.to_string());
args.push(head.to_string());
args.push("--".into());
args.push(path.to_string());
if let Some(options) = options
&& !options.pathspec.is_empty()
{
args.push("--".into());
args.extend(options.pathspec.iter().cloned());
}
let result = duct::cmd("git", &args)
.stdout_capture()
@@ -308,12 +339,42 @@ impl GitBare {
});
}
let mut patch = result.stdout;
let too_large = options.max_bytes > 0 && patch.len() > options.max_bytes as usize;
if too_large {
patch.truncate(options.max_bytes as usize);
// Split combined patch output by "diff --git" headers
let mut map = HashMap::new();
let output = &result.stdout;
let header = b"diff --git ";
let mut chunks: Vec<&[u8]> = Vec::new();
let mut pos = 0;
// Find all header positions
let mut header_positions = Vec::new();
while let Some(idx) = output[pos..]
.windows(header.len())
.position(|w| w == header)
{
header_positions.push(pos + idx);
pos = pos + idx + header.len();
}
Ok((patch, too_large))
for (i, &start) in header_positions.iter().enumerate() {
let end = header_positions.get(i + 1).copied().unwrap_or(output.len());
chunks.push(&output[start..end]);
}
for chunk in chunks {
// Extract file path from "diff --git a/path b/path\n"
let first_line_end = chunk
.iter()
.position(|&b| b == b'\n')
.unwrap_or(chunk.len());
let first_line = String::from_utf8_lossy(&chunk[..first_line_end]);
if let Some(b_pos) = first_line.rfind(" b/") {
let path = &first_line[b_pos + 3..];
map.insert(path.to_string(), chunk.to_vec());
}
}
Ok(map)
}
}