feat(server): add tracing spans and caching to archive and blame services
- Add tracing spans with repo labels for archive and blame operations - Implement caching for archive list entries when using OID selectors - Implement caching for blame operations when using OID selectors - Add detailed
This commit is contained in:
+248
-187
@@ -1,3 +1,5 @@
|
||||
use std::collections::HashMap;
|
||||
|
||||
use crate::bare::GitBare;
|
||||
use crate::diff::get_diff_stats::{diff_stats_for_range, push_diff_options};
|
||||
use crate::error::{GitError, GitResult};
|
||||
@@ -5,62 +7,106 @@ use crate::paginate;
|
||||
use crate::pb::diff_file::ChangeType;
|
||||
use crate::pb::{DiffFile, GetDiffRequest, GetDiffResponse};
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
struct NameStatusEntry {
|
||||
/// Parsed entry from `git diff --raw -z`
|
||||
struct RawDiffEntry {
|
||||
status: char,
|
||||
old_path: String,
|
||||
new_path: String,
|
||||
old_mode: u32,
|
||||
new_mode: u32,
|
||||
old_oid: String,
|
||||
new_oid: String,
|
||||
similarity: f64,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Default)]
|
||||
struct TreeMeta {
|
||||
oid_hex: String,
|
||||
mode: u32,
|
||||
}
|
||||
|
||||
impl GitBare {
|
||||
pub fn get_diff(&self, request: GetDiffRequest) -> GitResult<GetDiffResponse> {
|
||||
let base = match request.base.and_then(|s| s.selector) {
|
||||
Some(crate::pb::object_selector::Selector::Oid(oid)) => oid.hex,
|
||||
Some(crate::pb::object_selector::Selector::Revision(name)) => name.revision,
|
||||
Some(crate::pb::object_selector::Selector::Oid(oid)) => oid.hex.clone(),
|
||||
Some(crate::pb::object_selector::Selector::Revision(name)) => name.revision.clone(),
|
||||
None => "HEAD".into(),
|
||||
};
|
||||
let head = match request.head.and_then(|s| s.selector) {
|
||||
Some(crate::pb::object_selector::Selector::Oid(oid)) => oid.hex,
|
||||
Some(crate::pb::object_selector::Selector::Revision(name)) => name.revision,
|
||||
Some(crate::pb::object_selector::Selector::Oid(oid)) => oid.hex.clone(),
|
||||
Some(crate::pb::object_selector::Selector::Revision(name)) => name.revision.clone(),
|
||||
None => "HEAD".into(),
|
||||
};
|
||||
tracing::debug!(
|
||||
repo = %self.bare_dir.display(),
|
||||
base = %base,
|
||||
head = %head,
|
||||
"computing diff"
|
||||
);
|
||||
|
||||
let options = request.options.as_ref();
|
||||
let entries = self.diff_name_status(&base, &head, options)?;
|
||||
let max_files = options.and_then(|o| (o.max_files > 0).then_some(o.max_files as usize));
|
||||
let overflow = max_files.is_some_and(|max| entries.len() > max);
|
||||
let entries_to_build =
|
||||
max_files.map_or(entries.as_slice(), |max| &entries[..entries.len().min(max)]);
|
||||
let want_patch = options.is_some_and(|o| o.include_patch);
|
||||
|
||||
// ── Call 1: --raw -z --numstat -z (all metadata + line counts) ──
|
||||
let (raw_entries, numstat_map) = self.diff_raw_and_numstat(&base, &head, options)?;
|
||||
|
||||
let max_files = options.and_then(|o| (o.max_files > 0).then_some(o.max_files as usize));
|
||||
let overflow = max_files.is_some_and(|max| raw_entries.len() > max);
|
||||
let entries_to_build = max_files.map_or(raw_entries.as_slice(), |max| {
|
||||
&raw_entries[..raw_entries.len().min(max)]
|
||||
});
|
||||
|
||||
// ── Call 2 (optional): --patch for all files at once ──
|
||||
let patch_map = if want_patch {
|
||||
self.diff_patch_batch(&base, &head, options)?
|
||||
} else {
|
||||
HashMap::new()
|
||||
};
|
||||
|
||||
// ── Merge results (zero additional subprocess calls) ──
|
||||
let mut files = Vec::with_capacity(entries_to_build.len());
|
||||
for entry in entries_to_build {
|
||||
let old_meta = if !entry.old_path.is_empty() {
|
||||
self.tree_meta(&base, &entry.old_path).ok().flatten()
|
||||
let path = if !entry.new_path.is_empty() {
|
||||
&entry.new_path
|
||||
} else {
|
||||
None
|
||||
&entry.old_path
|
||||
};
|
||||
let new_meta = if !entry.new_path.is_empty() {
|
||||
self.tree_meta(&head, &entry.new_path).ok().flatten()
|
||||
} else {
|
||||
None
|
||||
};
|
||||
let (additions, deletions, binary) = self.path_numstat(&base, &head, entry)?;
|
||||
let (patch, too_large) = self.path_patch(&base, &head, entry, options)?;
|
||||
let (additions, deletions, binary) = numstat_map
|
||||
.get(path)
|
||||
.map(|(a, d, b)| (*a, *d, *b))
|
||||
.unwrap_or((0, 0, false));
|
||||
|
||||
let too_large = options.is_some_and(|o| {
|
||||
o.max_bytes > 0
|
||||
&& patch_map
|
||||
.get(path)
|
||||
.is_some_and(|p: &Vec<u8>| p.len() > o.max_bytes as usize)
|
||||
});
|
||||
let patch = patch_map
|
||||
.get(path)
|
||||
.map(|p| {
|
||||
let max = options.map(|o| o.max_bytes as usize).unwrap_or(0);
|
||||
if too_large && max > 0 {
|
||||
p[..max].to_vec()
|
||||
} else {
|
||||
p.clone()
|
||||
}
|
||||
})
|
||||
.unwrap_or_default();
|
||||
|
||||
files.push(DiffFile {
|
||||
old_path: entry.old_path.clone(),
|
||||
new_path: entry.new_path.clone(),
|
||||
old_oid: old_meta.as_ref().map(|m| self.oid_to_pb(&m.oid_hex)),
|
||||
new_oid: new_meta.as_ref().map(|m| self.oid_to_pb(&m.oid_hex)),
|
||||
old_mode: old_meta.as_ref().map(|m| m.mode).unwrap_or(0),
|
||||
new_mode: new_meta.as_ref().map(|m| m.mode).unwrap_or(0),
|
||||
old_oid: if !entry.old_oid.is_empty()
|
||||
&& entry.old_oid != "0000000000000000000000000000000000000000"
|
||||
{
|
||||
Some(self.oid_to_pb(&entry.old_oid))
|
||||
} else {
|
||||
None
|
||||
},
|
||||
new_oid: if !entry.new_oid.is_empty()
|
||||
&& entry.new_oid != "0000000000000000000000000000000000000000"
|
||||
{
|
||||
Some(self.oid_to_pb(&entry.new_oid))
|
||||
} else {
|
||||
None
|
||||
},
|
||||
old_mode: entry.old_mode,
|
||||
new_mode: entry.new_mode,
|
||||
change_type: change_type(entry.status) as i32,
|
||||
binary,
|
||||
too_large,
|
||||
@@ -72,6 +118,7 @@ impl GitBare {
|
||||
});
|
||||
}
|
||||
|
||||
// ── Call 3: diff --shortstat (already efficient, single call) ──
|
||||
let stats = diff_stats_for_range(self, &base, &head, options)?;
|
||||
let (files, page_info) = paginate::paginate(&files, request.pagination.as_ref());
|
||||
|
||||
@@ -83,17 +130,25 @@ impl GitBare {
|
||||
})
|
||||
}
|
||||
|
||||
fn diff_name_status(
|
||||
/// Single subprocess call that gets BOTH --raw and --numstat with -z.
|
||||
/// Returns parsed raw entries and a map of path → (additions, deletions, binary).
|
||||
///
|
||||
/// Combined output format with -z (NUL-separated records):
|
||||
/// :<src_mode> <dst_mode> <src_hash> <dst_hash> <status>\0<path>\0
|
||||
/// (for R/C: ...\0<old_path>\0<new_path>\0)
|
||||
/// Then numstat records: <add>\t<del>\t<path>\0
|
||||
fn diff_raw_and_numstat(
|
||||
&self,
|
||||
base: &str,
|
||||
head: &str,
|
||||
options: Option<&crate::pb::DiffOptions>,
|
||||
) -> GitResult<Vec<NameStatusEntry>> {
|
||||
) -> GitResult<(Vec<RawDiffEntry>, HashMap<String, (u32, u32, bool)>)> {
|
||||
let mut args = vec![
|
||||
"--git-dir".to_string(),
|
||||
self.bare_dir.to_string_lossy().into_owned(),
|
||||
"diff".into(),
|
||||
"--name-status".into(),
|
||||
"--raw".into(),
|
||||
"--numstat".into(),
|
||||
"-z".into(),
|
||||
];
|
||||
push_diff_options(&mut args, options);
|
||||
@@ -118,168 +173,140 @@ impl GitBare {
|
||||
});
|
||||
}
|
||||
|
||||
let parts = result
|
||||
.stdout
|
||||
.split(|b| *b == 0)
|
||||
.filter(|part| !part.is_empty())
|
||||
.map(|part| String::from_utf8_lossy(part).into_owned())
|
||||
.collect::<Vec<_>>();
|
||||
// Split by NUL — each record is NUL-terminated
|
||||
let records: Vec<&[u8]> = result.stdout.split(|b| *b == 0).collect();
|
||||
|
||||
let mut entries = Vec::new();
|
||||
let mut idx = 0;
|
||||
while idx < parts.len() {
|
||||
let status_token = &parts[idx];
|
||||
idx += 1;
|
||||
let status = status_token.chars().next().unwrap_or('M');
|
||||
let similarity = status_token
|
||||
.get(1..)
|
||||
.and_then(|s| s.parse::<f64>().ok())
|
||||
.unwrap_or(0.0);
|
||||
let mut raw_entries = Vec::new();
|
||||
let mut numstat_map: HashMap<String, (u32, u32, bool)> = HashMap::new();
|
||||
let mut i = 0;
|
||||
|
||||
if matches!(status, 'R' | 'C') {
|
||||
if idx + 1 >= parts.len() {
|
||||
break;
|
||||
}
|
||||
let old_path = parts[idx].clone();
|
||||
let new_path = parts[idx + 1].clone();
|
||||
idx += 2;
|
||||
entries.push(NameStatusEntry {
|
||||
while i < records.len() {
|
||||
let record = records[i];
|
||||
if record.is_empty() {
|
||||
i += 1;
|
||||
continue;
|
||||
}
|
||||
|
||||
if record.starts_with(b":") {
|
||||
// Raw meta record: ":<src_mode> <dst_mode> <src_hash> <dst_hash> <status_char>"
|
||||
// In older git: tab before status. In newer git: space before status.
|
||||
// The path(s) follow as separate NUL-terminated records.
|
||||
let record_str = String::from_utf8_lossy(record).into_owned();
|
||||
|
||||
// Try tab separator first (older git), then space (newer git)
|
||||
let (meta, status_str) = if let Some((m, s)) = record_str.rsplit_once('\t') {
|
||||
(m, s)
|
||||
} else if let Some((m, s)) = record_str.rsplit_once(' ') {
|
||||
(m, s)
|
||||
} else {
|
||||
i += 1;
|
||||
continue;
|
||||
};
|
||||
|
||||
let meta_parts: Vec<&str> = meta.split_whitespace().collect();
|
||||
let old_mode = meta_parts
|
||||
.first()
|
||||
.and_then(|s| u32::from_str_radix(s, 8).ok())
|
||||
.unwrap_or(0);
|
||||
let new_mode = meta_parts
|
||||
.get(1)
|
||||
.and_then(|s| u32::from_str_radix(s, 8).ok())
|
||||
.unwrap_or(0);
|
||||
let old_oid = meta_parts.get(2).unwrap_or(&"").to_string();
|
||||
let new_oid = meta_parts.get(3).unwrap_or(&"").to_string();
|
||||
|
||||
let status = status_str.chars().next().unwrap_or('M');
|
||||
let similarity = status_str
|
||||
.get(1..)
|
||||
.and_then(|s| s.parse::<f64>().ok())
|
||||
.unwrap_or(0.0);
|
||||
|
||||
// Read path record(s) that follow the meta record
|
||||
let (old_path, new_path) = match status {
|
||||
'R' | 'C' => {
|
||||
// Two path records: old_path\0new_path\0
|
||||
let op = if i + 1 < records.len() {
|
||||
i += 1;
|
||||
String::from_utf8_lossy(records[i]).into_owned()
|
||||
} else {
|
||||
String::new()
|
||||
};
|
||||
let np = if i + 1 < records.len() {
|
||||
i += 1;
|
||||
String::from_utf8_lossy(records[i]).into_owned()
|
||||
} else {
|
||||
String::new()
|
||||
};
|
||||
(op, np)
|
||||
}
|
||||
'A' => {
|
||||
let p = if i + 1 < records.len() {
|
||||
i += 1;
|
||||
String::from_utf8_lossy(records[i]).into_owned()
|
||||
} else {
|
||||
String::new()
|
||||
};
|
||||
(String::new(), p)
|
||||
}
|
||||
'D' => {
|
||||
let p = if i + 1 < records.len() {
|
||||
i += 1;
|
||||
String::from_utf8_lossy(records[i]).into_owned()
|
||||
} else {
|
||||
String::new()
|
||||
};
|
||||
(p, String::new())
|
||||
}
|
||||
_ => {
|
||||
let p = if i + 1 < records.len() {
|
||||
i += 1;
|
||||
String::from_utf8_lossy(records[i]).into_owned()
|
||||
} else {
|
||||
String::new()
|
||||
};
|
||||
(p.clone(), p)
|
||||
}
|
||||
};
|
||||
|
||||
raw_entries.push(RawDiffEntry {
|
||||
status,
|
||||
old_path,
|
||||
new_path,
|
||||
old_mode,
|
||||
new_mode,
|
||||
old_oid,
|
||||
new_oid,
|
||||
similarity,
|
||||
});
|
||||
} else {
|
||||
if idx >= parts.len() {
|
||||
break;
|
||||
// Numstat record: "<add>\t<del>\t<path>"
|
||||
let record_str = String::from_utf8_lossy(record);
|
||||
let parts: Vec<&str> = record_str.split('\t').collect();
|
||||
if parts.len() >= 3 {
|
||||
let binary = parts[0] == "-" || parts[1] == "-";
|
||||
let add = parts[0].parse().unwrap_or(0u32);
|
||||
let del = parts[1].parse().unwrap_or(0u32);
|
||||
let path = parts[2].to_string();
|
||||
numstat_map.insert(path, (add, del, binary));
|
||||
}
|
||||
let path = parts[idx].clone();
|
||||
idx += 1;
|
||||
let (old_path, new_path) = match status {
|
||||
'A' => (String::new(), path),
|
||||
'D' => (path, String::new()),
|
||||
_ => (path.clone(), path),
|
||||
};
|
||||
entries.push(NameStatusEntry {
|
||||
status,
|
||||
old_path,
|
||||
new_path,
|
||||
similarity,
|
||||
});
|
||||
}
|
||||
i += 1;
|
||||
}
|
||||
|
||||
Ok(entries)
|
||||
Ok((raw_entries, numstat_map))
|
||||
}
|
||||
|
||||
fn tree_meta(&self, revision: &str, path: &str) -> GitResult<Option<TreeMeta>> {
|
||||
let result = duct::cmd(
|
||||
"git",
|
||||
[
|
||||
"--git-dir",
|
||||
self.bare_dir.to_string_lossy().as_ref(),
|
||||
"ls-tree",
|
||||
"-z",
|
||||
"-l",
|
||||
revision,
|
||||
"--",
|
||||
path,
|
||||
],
|
||||
)
|
||||
.stdout_capture()
|
||||
.stderr_capture()
|
||||
.unchecked()
|
||||
.run()?;
|
||||
if !result.status.success() || result.stdout.is_empty() {
|
||||
return Ok(None);
|
||||
}
|
||||
let record = result
|
||||
.stdout
|
||||
.split(|b| *b == 0)
|
||||
.find(|part| !part.is_empty())
|
||||
.map(|part| String::from_utf8_lossy(part).into_owned());
|
||||
let Some(record) = record else {
|
||||
return Ok(None);
|
||||
};
|
||||
let Some((meta, _path)) = record.split_once('\t') else {
|
||||
return Ok(None);
|
||||
};
|
||||
let parts = meta.split_whitespace().collect::<Vec<_>>();
|
||||
if parts.len() < 3 {
|
||||
return Ok(None);
|
||||
}
|
||||
Ok(Some(TreeMeta {
|
||||
mode: u32::from_str_radix(parts[0], 8).unwrap_or(0),
|
||||
oid_hex: parts[2].to_string(),
|
||||
}))
|
||||
}
|
||||
|
||||
fn path_numstat(
|
||||
/// Single subprocess call to get patches for ALL files at once.
|
||||
/// Returns a map of path → patch bytes.
|
||||
fn diff_patch_batch(
|
||||
&self,
|
||||
base: &str,
|
||||
head: &str,
|
||||
entry: &NameStatusEntry,
|
||||
) -> GitResult<(u32, u32, bool)> {
|
||||
let path = if entry.new_path.is_empty() {
|
||||
&entry.old_path
|
||||
} else {
|
||||
&entry.new_path
|
||||
};
|
||||
let result = duct::cmd(
|
||||
"git",
|
||||
[
|
||||
"--git-dir",
|
||||
self.bare_dir.to_string_lossy().as_ref(),
|
||||
"diff",
|
||||
"--numstat",
|
||||
base,
|
||||
head,
|
||||
"--",
|
||||
path,
|
||||
],
|
||||
)
|
||||
.stdout_capture()
|
||||
.stderr_capture()
|
||||
.unchecked()
|
||||
.run()?;
|
||||
if !result.status.success() {
|
||||
return Err(GitError::CommandFailed {
|
||||
status_code: result.status.code(),
|
||||
stderr: String::from_utf8_lossy(&result.stderr).into_owned(),
|
||||
});
|
||||
}
|
||||
let line = String::from_utf8_lossy(&result.stdout)
|
||||
.lines()
|
||||
.next()
|
||||
.unwrap_or_default()
|
||||
.to_string();
|
||||
let mut parts = line.split('\t');
|
||||
let add = parts.next().unwrap_or_default();
|
||||
let del = parts.next().unwrap_or_default();
|
||||
let binary = add == "-" || del == "-";
|
||||
Ok((add.parse().unwrap_or(0), del.parse().unwrap_or(0), binary))
|
||||
}
|
||||
|
||||
fn path_patch(
|
||||
&self,
|
||||
base: &str,
|
||||
head: &str,
|
||||
entry: &NameStatusEntry,
|
||||
options: Option<&crate::pb::DiffOptions>,
|
||||
) -> GitResult<(Vec<u8>, bool)> {
|
||||
let Some(options) = options else {
|
||||
return Ok((Vec::new(), false));
|
||||
};
|
||||
if !options.include_patch {
|
||||
return Ok((Vec::new(), false));
|
||||
}
|
||||
|
||||
let path = if entry.new_path.is_empty() {
|
||||
&entry.old_path
|
||||
} else {
|
||||
&entry.new_path
|
||||
};
|
||||
let context = options.context_lines.to_string();
|
||||
) -> GitResult<HashMap<String, Vec<u8>>> {
|
||||
let context = options
|
||||
.map(|o| o.context_lines.to_string())
|
||||
.unwrap_or_else(|| "3".into());
|
||||
let mut args = vec![
|
||||
"--git-dir".to_string(),
|
||||
self.bare_dir.to_string_lossy().into_owned(),
|
||||
@@ -287,14 +314,18 @@ impl GitBare {
|
||||
"--patch".into(),
|
||||
format!("--unified={context}"),
|
||||
];
|
||||
if options.include_binary {
|
||||
if options.is_some_and(|o| o.include_binary) {
|
||||
args.push("--binary".into());
|
||||
}
|
||||
push_diff_options(&mut args, Some(options));
|
||||
push_diff_options(&mut args, options);
|
||||
args.push(base.to_string());
|
||||
args.push(head.to_string());
|
||||
args.push("--".into());
|
||||
args.push(path.to_string());
|
||||
if let Some(options) = options
|
||||
&& !options.pathspec.is_empty()
|
||||
{
|
||||
args.push("--".into());
|
||||
args.extend(options.pathspec.iter().cloned());
|
||||
}
|
||||
|
||||
let result = duct::cmd("git", &args)
|
||||
.stdout_capture()
|
||||
@@ -308,12 +339,42 @@ impl GitBare {
|
||||
});
|
||||
}
|
||||
|
||||
let mut patch = result.stdout;
|
||||
let too_large = options.max_bytes > 0 && patch.len() > options.max_bytes as usize;
|
||||
if too_large {
|
||||
patch.truncate(options.max_bytes as usize);
|
||||
// Split combined patch output by "diff --git" headers
|
||||
let mut map = HashMap::new();
|
||||
let output = &result.stdout;
|
||||
let header = b"diff --git ";
|
||||
let mut chunks: Vec<&[u8]> = Vec::new();
|
||||
let mut pos = 0;
|
||||
|
||||
// Find all header positions
|
||||
let mut header_positions = Vec::new();
|
||||
while let Some(idx) = output[pos..]
|
||||
.windows(header.len())
|
||||
.position(|w| w == header)
|
||||
{
|
||||
header_positions.push(pos + idx);
|
||||
pos = pos + idx + header.len();
|
||||
}
|
||||
Ok((patch, too_large))
|
||||
|
||||
for (i, &start) in header_positions.iter().enumerate() {
|
||||
let end = header_positions.get(i + 1).copied().unwrap_or(output.len());
|
||||
chunks.push(&output[start..end]);
|
||||
}
|
||||
|
||||
for chunk in chunks {
|
||||
// Extract file path from "diff --git a/path b/path\n"
|
||||
let first_line_end = chunk
|
||||
.iter()
|
||||
.position(|&b| b == b'\n')
|
||||
.unwrap_or(chunk.len());
|
||||
let first_line = String::from_utf8_lossy(&chunk[..first_line_end]);
|
||||
if let Some(b_pos) = first_line.rfind(" b/") {
|
||||
let path = &first_line[b_pos + 3..];
|
||||
map.insert(path.to_string(), chunk.to_vec());
|
||||
}
|
||||
}
|
||||
|
||||
Ok(map)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user