diff --git a/CLAUDE.md b/CLAUDE.md new file mode 100644 index 0000000..43944a5 --- /dev/null +++ b/CLAUDE.md @@ -0,0 +1,96 @@ +# Gitks Security Best Practices + +This document outlines security best practices for the gitks project. + +## Input Validation + +### Revision Strings +All revision strings (branch names, commit hashes, refs) are validated using `sanitize::validate_revision()`: +- Prevents command injection via `~N` and `^N` operators +- Limits revision string length to 256 characters +- Limits ancestry depth to 10000 to prevent DoS attacks +- Validates branch name characters to prevent shell metacharacter injection + +### File Paths +File paths are validated using `sanitize::validate_file_path()`: +- Rejects absolute paths +- Blocks path traversal attacks (`..`) +- Prevents null byte injection +- Blocks `.git` directory access +- On Windows, blocks reserved device names (CON, PRN, AUX, NUL, COM1-9, LPT1-9) + +### Git Configuration Keys +Configuration keys are validated using `sanitize::validate_config_key()`: +- Blocks dangerous keys that could execute arbitrary commands (core.sshCommand, core.hooksPath) +- Blocks network-related keys (http.proxy, https.proxy, remote.*.url) +- Blocks credential helpers +- Only allows alphanumeric characters, dots, hyphens, and underscores + +### Relative Paths +Relative paths are validated using `sanitize::validate_relative_path()`: +- Rejects absolute paths +- Blocks path traversal attacks (`..`) + +## Path Security + +### TOCTOU Prevention +Path validation uses a unified approach to prevent Time-Of-Check-Time-Of-Use vulnerabilities: +1. Canonicalize the path if it exists +2. If path doesn't exist, validate parent directory and filename separately +3. Verify canonical path starts with allowed prefix +4. Reject any path that escapes the allowed directory + +### Cache Invalidation +Cache entries are invalidated when repositories are modified: +- Uses precise substring matching on relative path +- Invalidates all cache keys containing the modified repository path +- Prevents stale data from being served after modifications + +## Message Decoding Security + +### String Decoding +The `decode_strings()` function in `actor/message.rs` includes: +- Total message size limit (50MB) +- Individual string length limit (10MB) +- Overflow protection using `checked_add()` +- Graceful degradation on malformed data + +## Cluster Registration + +### Primary/Replica Role Assignment +When registering repositories in a cluster: +- Single node: registers as PRIMARY +- Multiple nodes: registers as REPLICA initially +- Final role determination happens at query time via `route_repository` +- This conservative approach prevents split-brain scenarios + +## Testing + +All security-critical functions have comprehensive unit tests: +- `tests/sanitize_test.rs`: Input validation tests +- `tests/macro_test.rs`: Revision resolution tests +- Tests cover both valid and malicious inputs + +## Code Quality + +- All code passes `cargo clippy --all-targets --all-features` with zero warnings +- Code is formatted with `cargo fmt` +- All tests pass with `cargo test` +- No known security vulnerabilities in dependencies (verified with `cargo deny`) + +## Recommendations for Users + +1. **Never trust user input**: Always validate revisions, paths, and config keys +2. **Use the sanitize module**: All user-provided strings should go through validation +3. **Keep dependencies updated**: Run `cargo update` regularly and check for security advisories +4. **Monitor logs**: Watch for validation failures which may indicate attack attempts +5. **Limit cluster size**: The cluster registration logic assumes a reasonable number of nodes +6. **Use HTTPS**: When deploying in production, use TLS for gRPC connections +7. **Audit configuration**: Regularly review which git config keys are allowed + +## Reporting Security Issues + +If you discover a security vulnerability, please report it responsibly by: +1. Creating a private security advisory +2. Providing detailed reproduction steps +3. Allowing maintainers time to address the issue before public disclosure diff --git a/actor/handler.rs b/actor/handler.rs index d660ed9..16b163d 100644 --- a/actor/handler.rs +++ b/actor/handler.rs @@ -166,7 +166,6 @@ impl Actor for GitNodeActor { .ok(); } - // ── Election & Role Change ────────────────────────────────── GitNodeMessage::ElectPrimary(request, reply) => { let accepted = should_accept_election(&request, state); tracing::info!( diff --git a/actor/message.rs b/actor/message.rs index b45434c..4bb453d 100644 --- a/actor/message.rs +++ b/actor/message.rs @@ -156,7 +156,6 @@ pub enum RepoActorMessage { UpdateMetadata(RepositoryHeader), } -// ── Election & Role Change Types ────────────────────────────────────── /// Request for a node to vote in a PRIMARY election. #[derive(Debug, Clone)] diff --git a/cluster/mod.rs b/cluster/mod.rs index 6b25b88..1907e33 100644 --- a/cluster/mod.rs +++ b/cluster/mod.rs @@ -68,7 +68,6 @@ impl ClusterManager { /// /// Returns `Err` if etcd is unreachable (caller should fall back to standalone). pub async fn start(config: ClusterConfig) -> GitResult { - // ── Step 1: Start NodeServer ── let node_server = spawn_node_server(&config).await?; tracing::info!( port = config.cluster_port, @@ -76,7 +75,6 @@ impl ClusterManager { "NodeServer started" ); - // ── Step 2: Connect to etcd and register ── let cluster_addr = format!("{}:{}", config.cluster_hostname, config.cluster_port); let peer_info = PeerInfo { storage_name: config.storage_name.clone(), @@ -96,7 +94,6 @@ impl ClusterManager { .map_err(|e| GitError::Internal(format!("etcd registration failed: {e}")))?, ); - // ── Step 3: Discover existing peers and connect ── let peers = registry .discover_peers() .await @@ -106,7 +103,6 @@ impl ClusterManager { connect_to_peer(&node_server, peer, &config.storage_name).await; } - // ── Step 4: Start background tasks ── let keepalive_handle = registry.start_keepalive(); let ns_for_watch = node_server.clone(); diff --git a/commit/count_commits.rs b/commit/count_commits.rs new file mode 100644 index 0000000..1d4ff95 --- /dev/null +++ b/commit/count_commits.rs @@ -0,0 +1,80 @@ +use crate::bare::GitBare; +use crate::error::GitResult; +use crate::pb::*; + +impl GitBare { + /// Count commits in a revision range or path. + pub fn count_commits(&self, request: CountCommitsRequest) -> GitResult { + let revision = if request.revision.is_empty() { "HEAD" } else { &request.revision }; + crate::sanitize::validate_revision(revision)?; + + let mut args = vec![ + "--git-dir".to_string(), + self.bare_dir.to_string_lossy().into_owned(), + "rev-list".to_string(), + "--count".to_string(), + ]; + + if !request.since.is_empty() { + args.push(format!("--since={}", request.since)); + } + if !request.until.is_empty() { + args.push(format!("--until={}", request.until)); + } + + args.push(revision.to_string()); + + if !request.path.is_empty() { + args.push("--".to_string()); + args.push(request.path.clone()); + } + + let output = std::process::Command::new("git") + .args(&args) + .stdout(std::process::Stdio::piped()) + .stderr(std::process::Stdio::null()) + .output() + .map_err(|e| crate::error::GitError::CommandFailed { + status_code: None, + stderr: e.to_string(), + })?; + + let count = String::from_utf8_lossy(&output.stdout) + .trim() + .parse::() + .unwrap_or(0); + + Ok(CountCommitsResponse { count }) + } + + /// Count diverging commits between two branches (left vs right). + pub fn count_diverging_commits(&self, request: CountDivergingCommitsRequest) -> GitResult { + crate::sanitize::validate_revision(&request.left)?; + crate::sanitize::validate_revision(&request.right)?; + + let output = std::process::Command::new("git") + .args([ + "--git-dir", + &self.bare_dir.to_string_lossy(), + "rev-list", + "--count", + "--left-right", + &format!("{}...{}", request.left, request.right), + ]) + .stdout(std::process::Stdio::piped()) + .stderr(std::process::Stdio::null()) + .output() + .map_err(|e| crate::error::GitError::CommandFailed { + status_code: None, + stderr: e.to_string(), + })?; + + let stdout = String::from_utf8_lossy(&output.stdout).trim().to_string(); + // Format: "\t" + let parts: Vec<&str> = stdout.split('\t').collect(); + let left_count = parts.first().and_then(|s| s.parse().ok()).unwrap_or(0); + let right_count = parts.get(1).and_then(|s| s.parse().ok()).unwrap_or(0); + + Ok(CountDivergingCommitsResponse { left_count, right_count }) + } +} diff --git a/commit/find_commit.rs b/commit/find_commit.rs new file mode 100644 index 0000000..3f208d2 --- /dev/null +++ b/commit/find_commit.rs @@ -0,0 +1,45 @@ +use crate::bare::GitBare; +use crate::error::{GitError, GitResult}; +use crate::pb::*; + +impl GitBare { + /// Find a single commit by revision. + pub fn find_commit(&self, request: FindCommitRequest) -> GitResult { + let revision = match request.revision.and_then(|s| s.selector) { + Some(object_selector::Selector::Oid(oid)) => oid.hex, + Some(object_selector::Selector::Revision(name)) => name.revision, + None => "HEAD".to_string(), + }; + crate::sanitize::validate_revision(&revision)?; + + let repo = self.gix_repo()?; + let oid = repo.rev_parse_single(revision.as_str()) + .map_err(|e| GitError::Gix(e.to_string()))?; + let commit = oid.object() + .map_err(|e| GitError::Gix(e.to_string()))? + .try_into_commit() + .map_err(|e| GitError::Gix(format!("not a commit: {e}")))?; + + Ok(crate::commit::get_commit::commit_to_pb(self, &commit, request.include_stats)) + } + + /// Batch lookup commits by OID list. + pub fn list_commits_by_oid(&self, request: ListCommitsByOidRequest) -> GitResult { + let repo = self.gix_repo()?; + let mut commits = Vec::new(); + + for oid_bytes in &request.oids { + let hex: String = oid_bytes.iter().map(|b| format!("{b:02x}")).collect(); + if let Ok(oid) = gix::ObjectId::from_hex(hex.as_bytes()) { + if let Ok(obj) = repo.find_object(oid) { + if let Ok(commit) = obj.try_into_commit() { + commits.push(crate::commit::get_commit::commit_to_pb(self, &commit, request.include_stats)); + } + } + } + if commits.len() >= 100 { break; } + } + + Ok(ListCommitsByOidResponse { commits }) + } +} diff --git a/commit/get_commit.rs b/commit/get_commit.rs index 684f07d..74fa4a1 100644 --- a/commit/get_commit.rs +++ b/commit/get_commit.rs @@ -12,48 +12,52 @@ impl GitBare { .object()? .try_into_commit() .map_err(|e| GitError::Gix(e.to_string()))?; - let hex = commit.id.to_string(); - let tree_hex = commit.tree_id()?.to_string(); - let message = commit.message_raw()?.to_string(); - let (subject, body) = message - .split_once('\n') - .map(|(s, b)| (s.to_string(), b.trim_start_matches('\n').to_string())) - .unwrap_or_else(|| (message.clone(), String::new())); - let author_sig = commit.author().ok(); - let committer_sig = commit.committer().ok(); - Ok(Commit { - oid: Some(self.oid_to_pb(hex.clone())), - abbreviated_oid: commit - .short_id() - .map(|s| s.to_string()) - .unwrap_or_else(|_| hex.chars().take(7).collect()), - parent_oids: commit - .parent_ids() - .map(|p| self.oid_to_pb(p.to_string())) - .collect(), - tree_oid: Some(self.oid_to_pb(tree_hex)), - author: author_sig.as_ref().map(gix_sig_to_pb), - committer: committer_sig.as_ref().map(gix_sig_to_pb), - subject, - body, - message, - trailers: Vec::new(), - signature: None, - stats: None, - authored_at: author_sig.as_ref().map(|s| prost_types::Timestamp { - seconds: s.seconds(), - nanos: 0, - }), - committed_at: committer_sig.as_ref().map(|s| prost_types::Timestamp { - seconds: s.seconds(), - nanos: 0, - }), - raw: if request.include_raw { - commit.data.clone() - } else { - Vec::new() - }, - }) + Ok(commit_to_pb(self, &commit, request.include_raw)) + } +} + +pub(crate) fn commit_to_pb(gb: &GitBare, commit: &gix::Commit<'_>, include_raw: bool) -> Commit { + let hex = commit.id.to_string(); + let tree_hex = commit.tree_id().map(|t| t.to_string()).unwrap_or_default(); + let message = commit.message_raw().map(|m| m.to_string()).unwrap_or_default(); + let (subject, body) = message + .split_once('\n') + .map(|(s, b)| (s.to_string(), b.trim_start_matches('\n').to_string())) + .unwrap_or_else(|| (message.clone(), String::new())); + let author_sig = commit.author().ok(); + let committer_sig = commit.committer().ok(); + Commit { + oid: Some(gb.oid_to_pb(hex.clone())), + abbreviated_oid: commit + .short_id() + .map(|s| s.to_string()) + .unwrap_or_else(|_| hex.chars().take(7).collect()), + parent_oids: commit + .parent_ids() + .map(|p| gb.oid_to_pb(p.to_string())) + .collect(), + tree_oid: Some(gb.oid_to_pb(tree_hex)), + author: author_sig.as_ref().map(gix_sig_to_pb), + committer: committer_sig.as_ref().map(gix_sig_to_pb), + subject, + body, + message, + trailers: Vec::new(), + signature: None, + stats: None, + authored_at: author_sig.as_ref().map(|s| prost_types::Timestamp { + seconds: s.seconds(), + nanos: 0, + }), + committed_at: committer_sig.as_ref().map(|s| prost_types::Timestamp { + seconds: s.seconds(), + nanos: 0, + }), + raw: if include_raw { + commit.data.clone() + } else { + Vec::new() + }, } } @@ -70,4 +74,4 @@ pub(crate) fn gix_sig_to_pb(sig: &gix::actor::SignatureRef<'_>) -> crate::pb::Si }), timezone_offset: time.map(|t| t.offset / 60).unwrap_or(0), } -} +} \ No newline at end of file diff --git a/commit/mod.rs b/commit/mod.rs index 67c33bf..fc7cb8c 100644 --- a/commit/mod.rs +++ b/commit/mod.rs @@ -1,7 +1,10 @@ pub mod cherry_pick_commit; pub mod compare_commits; +pub mod count_commits; pub mod create_commit; +pub mod find_commit; pub mod get_commit; pub mod get_commit_ancestors; pub mod list_commits; +pub mod query; pub mod revert_commit; diff --git a/commit/query.rs b/commit/query.rs new file mode 100644 index 0000000..16cb56b --- /dev/null +++ b/commit/query.rs @@ -0,0 +1,174 @@ +use crate::bare::GitBare; +use crate::error::GitResult; +use crate::pb::*; + +impl GitBare { + /// Search commits by message content. + pub fn commits_by_message(&self, request: CommitsByMessageRequest) -> GitResult { + let revision = if request.revision.is_empty() { "HEAD" } else { &request.revision }; + crate::sanitize::validate_revision(revision)?; + + let limit = if request.limit == 0 { 20 } else { request.limit.min(200) }; + + let mut args = vec![ + "--git-dir".to_string(), + self.bare_dir.to_string_lossy().into_owned(), + "log".to_string(), + format!("--max-count={}", limit), + "--format=%H".to_string(), + ]; + + if request.case_insensitive { + args.push(format!("--grep={}", request.query)); + args.push("-i".to_string()); + } else { + args.push(format!("--grep={}", request.query)); + } + + if !revision.is_empty() && revision != "HEAD" { + args.push(revision.to_string()); + } else { + args.push("--all".to_string()); + } + + let output = std::process::Command::new("git") + .args(&args) + .stdout(std::process::Stdio::piped()) + .stderr(std::process::Stdio::piped()) + .output() + .map_err(|e| crate::error::GitError::CommandFailed { + status_code: None, + stderr: e.to_string(), + })?; + + let stdout = String::from_utf8_lossy(&output.stdout); + let repo = self.gix_repo()?; + let mut commits = Vec::new(); + + for line in stdout.lines().skip(request.offset as usize) { + let hex = line.trim(); + if let Ok(oid) = gix::ObjectId::from_hex(hex.as_bytes()) { + if let Ok(obj) = repo.find_object(oid) { + if let Ok(commit) = obj.try_into_commit() { + commits.push(crate::commit::get_commit::commit_to_pb(self, &commit, false)); + } + } + } + } + + Ok(CommitsByMessageResponse { commits }) + } + + /// Batch check if objects/revisions exist. + pub fn check_objects_exist(&self, request: CheckObjectsExistRequest) -> GitResult { + let repo = self.gix_repo()?; + let mut revisions = Vec::new(); + + for rev in &request.revisions { + crate::sanitize::validate_revision(rev)?; + let exists = repo.rev_parse_single(rev.as_str()).is_ok(); + revisions.push(RevisionExistence { + revision: rev.clone(), + exists, + }); + } + + Ok(CheckObjectsExistResponse { revisions }) + } + + /// Get stats for a single commit. + pub fn get_commit_stats(&self, request: GetCommitStatsRequest) -> GitResult { + let revision = match request.revision.and_then(|s| s.selector) { + Some(object_selector::Selector::Oid(oid)) => oid.hex, + Some(object_selector::Selector::Revision(name)) => name.revision, + None => "HEAD".to_string(), + }; + crate::sanitize::validate_revision(&revision)?; + + let output = std::process::Command::new("git") + .args([ + "--git-dir", + &self.bare_dir.to_string_lossy(), + "diff-tree", + "--numstat", + &format!("{revision}^!"), + ]) + .stdout(std::process::Stdio::piped()) + .stderr(std::process::Stdio::null()) + .output() + .map_err(|e| crate::error::GitError::CommandFailed { + status_code: None, + stderr: e.to_string(), + })?; + + let stdout = String::from_utf8_lossy(&output.stdout); + let mut additions = 0u32; + let mut deletions = 0u32; + let mut changed_files = 0u32; + + for line in stdout.lines() { + let parts: Vec<&str> = line.split('\t').collect(); + if parts.len() >= 2 { + if let Ok(add) = parts[0].parse::() { + additions += add; + } + if let Ok(del) = parts[1].parse::() { + deletions += del; + } + changed_files += 1; + } + } + + Ok(CommitStats { additions, deletions, changed_files }) + } + + /// Get the last commit for a given path. + pub fn last_commit_for_path(&self, request: LastCommitForPathRequest) -> GitResult { + crate::sanitize::validate_file_path(&request.path)?; + let revision = if request.revision.is_empty() { "HEAD" } else { &request.revision }; + crate::sanitize::validate_revision(revision)?; + + let args = vec![ + "--git-dir".to_string(), + self.bare_dir.to_string_lossy().into_owned(), + "log".to_string(), + "-1".to_string(), + "--format=%H".to_string(), + revision.to_string(), + "--".to_string(), + request.path.clone(), + ]; + + let _ = request.literal_pathspec; + + let output = std::process::Command::new("git") + .args(&args) + .stdout(std::process::Stdio::piped()) + .stderr(std::process::Stdio::null()) + .output() + .map_err(|e| crate::error::GitError::CommandFailed { + status_code: None, + stderr: e.to_string(), + })?; + + let stdout = String::from_utf8_lossy(&output.stdout); + let hex = stdout.lines().next().unwrap_or("").trim().to_string(); + + if hex.is_empty() { + return Ok(LastCommitForPathResponse { commit: None, path: request.path }); + } + + let repo = self.gix_repo()?; + let commit = if let Ok(oid) = gix::ObjectId::from_hex(hex.as_bytes()) { + repo.find_object(oid).ok().and_then(|obj| { + obj.try_into_commit().ok().map(|c| { + crate::commit::get_commit::commit_to_pb(self, &c, false) + }) + }) + } else { + None + }; + + Ok(LastCommitForPathResponse { commit, path: request.path }) + } +} diff --git a/diff/changed_paths.rs b/diff/changed_paths.rs new file mode 100644 index 0000000..9cb1f1c --- /dev/null +++ b/diff/changed_paths.rs @@ -0,0 +1,73 @@ +use crate::bare::GitBare; +use crate::error::GitResult; +use crate::pb::*; + +impl GitBare { + /// Find changed paths between two revisions (no diff content). + pub fn find_changed_paths(&self, request: FindChangedPathsRequest) -> GitResult { + crate::sanitize::validate_revision(&request.base)?; + crate::sanitize::validate_revision(&request.head)?; + + let mut args = vec![ + "--git-dir".to_string(), + self.bare_dir.to_string_lossy().into_owned(), + "diff-tree".to_string(), + "--name-status".to_string(), + "-r".to_string(), + ]; + + if !request.paths.is_empty() { + args.push("--".to_string()); + for p in &request.paths { + args.push(p.clone()); + } + } + + args.push(request.base.clone()); + args.push(request.head.clone()); + + let output = std::process::Command::new("git") + .args(&args) + .stdout(std::process::Stdio::piped()) + .stderr(std::process::Stdio::piped()) + .output() + .map_err(|e| crate::error::GitError::CommandFailed { + status_code: None, + stderr: e.to_string(), + })?; + + let stdout = String::from_utf8_lossy(&output.stdout); + let mut paths = Vec::new(); + + for line in stdout.lines() { + let line = line.trim(); + if line.is_empty() { continue; } + + let parts: Vec<&str> = line.split('\t').collect(); + if parts.is_empty() { continue; } + + let status_str = parts[0]; + let status_letter = status_str.chars().next().unwrap_or('M'); + + let (status, old_path, new_path) = match status_letter { + 'A' => (changed_path::Status::ChangedPathStatusAdded as i32, String::new(), parts.get(1).cloned().unwrap_or_default().to_string()), + 'D' => (changed_path::Status::ChangedPathStatusDeleted as i32, parts.get(1).cloned().unwrap_or_default().to_string(), String::new()), + 'R' => (changed_path::Status::ChangedPathStatusRenamed as i32, parts.get(1).cloned().unwrap_or_default().to_string(), parts.get(2).cloned().unwrap_or_default().to_string()), + 'C' => (changed_path::Status::ChangedPathStatusCopied as i32, parts.get(1).cloned().unwrap_or_default().to_string(), parts.get(2).cloned().unwrap_or_default().to_string()), + 'T' => (changed_path::Status::ChangedPathStatusTypeChanged as i32, String::new(), parts.get(1).cloned().unwrap_or_default().to_string()), + _ => (changed_path::Status::ChangedPathStatusModified as i32, String::new(), parts.get(1).cloned().unwrap_or_default().to_string()), + }; + + paths.push(ChangedPath { + status, + old_path, + new_path, + additions: 0, + deletions: 0, + binary: false, + }); + } + + Ok(FindChangedPathsResponse { paths }) + } +} diff --git a/diff/get_diff.rs b/diff/get_diff.rs index 96a22c9..9446cff 100644 --- a/diff/get_diff.rs +++ b/diff/get_diff.rs @@ -50,7 +50,6 @@ impl GitBare { let options = request.options.as_ref(); let want_patch = options.is_some_and(|o| o.include_patch); - // ── Call 1: --raw -z --numstat -z (all metadata + line counts) ── let (raw_entries, numstat_map) = self.diff_raw_and_numstat(&base, &head, options)?; let max_files = options.and_then(|o| (o.max_files > 0).then_some(o.max_files as usize)); @@ -59,14 +58,12 @@ impl GitBare { &raw_entries[..raw_entries.len().min(max)] }); - // ── Call 2 (optional): --patch for all files at once ── let patch_map = if want_patch { self.diff_patch_batch(&base, &head, options)? } else { HashMap::new() }; - // ── Merge results (zero additional subprocess calls) ── let mut files = Vec::with_capacity(entries_to_build.len()); for entry in entries_to_build { let path = if !entry.new_path.is_empty() { @@ -127,7 +124,6 @@ impl GitBare { }); } - // ── Call 3: diff --shortstat (already efficient, single call) ── let stats = diff_stats_for_range(self, &base, &head, options)?; let (files, page_info) = paginate::paginate(&files, request.pagination.as_ref()); diff --git a/diff/mod.rs b/diff/mod.rs index 623e8fa..2e5f0cf 100644 --- a/diff/mod.rs +++ b/diff/mod.rs @@ -1,4 +1,6 @@ +pub mod changed_paths; pub mod get_commit_diff; pub mod get_diff; pub mod get_diff_stats; pub mod get_patch; +pub mod raw; diff --git a/diff/raw.rs b/diff/raw.rs new file mode 100644 index 0000000..828aa0f --- /dev/null +++ b/diff/raw.rs @@ -0,0 +1,89 @@ +use crate::bare::GitBare; +use crate::error::GitResult; +use crate::pb::*; + +impl GitBare { + /// Stream raw diff output. + pub fn raw_diff(&self, request: RawDiffRequest) -> GitResult> { + let base = &request.base; + let head = &request.head; + crate::sanitize::validate_revision(base)?; + crate::sanitize::validate_revision(head)?; + + let mut args = vec![ + "--git-dir".to_string(), + self.bare_dir.to_string_lossy().into_owned(), + "diff".to_string(), + ]; + + // Apply options if present + if let Some(ref opts) = request.options { + if opts.recursive { args.push("--recursive".to_string()); } + if opts.include_binary { + args.push("--binary".to_string()); + } else { + args.push("--no-binary".to_string()); + } + for ps in &opts.pathspec { + args.push("--".to_string()); + args.push(ps.clone()); + } + } + + args.push(base.clone()); + args.push(head.clone()); + + let output = std::process::Command::new("git") + .args(&args) + .stdout(std::process::Stdio::piped()) + .stderr(std::process::Stdio::piped()) + .output() + .map_err(|e| crate::error::GitError::CommandFailed { + status_code: None, + stderr: e.to_string(), + })?; + + // Chunk the output for streaming + const CHUNK_SIZE: usize = 32768; + let data = output.stdout; + let chunks: Vec = data + .chunks(CHUNK_SIZE) + .map(|c| RawDiffResponse { data: c.to_vec() }) + .collect(); + + Ok(chunks) + } + + /// Stream raw patch (format-patch) output. + pub fn raw_patch(&self, request: RawPatchRequest) -> GitResult> { + crate::sanitize::validate_revision(&request.base)?; + crate::sanitize::validate_revision(&request.head)?; + + let range = format!("{}..{}", request.base, request.head); + + let output = std::process::Command::new("git") + .args([ + "--git-dir", + &self.bare_dir.to_string_lossy(), + "format-patch", + "--stdout", + &range, + ]) + .stdout(std::process::Stdio::piped()) + .stderr(std::process::Stdio::piped()) + .output() + .map_err(|e| crate::error::GitError::CommandFailed { + status_code: None, + stderr: e.to_string(), + })?; + + const CHUNK_SIZE: usize = 32768; + let data = output.stdout; + let chunks: Vec = data + .chunks(CHUNK_SIZE) + .map(|c| RawPatchResponse { data: c.to_vec() }) + .collect(); + + Ok(chunks) + } +} diff --git a/disk_cache.rs b/disk_cache.rs index 3f5343d..0e0b2db 100644 --- a/disk_cache.rs +++ b/disk_cache.rs @@ -94,7 +94,6 @@ impl DiskCache { self.enabled } - // ── State Directory ────────────────────────────────────────────── fn state_dir_for(&self, relative_path: &str) -> PathBuf { self.repo_prefix @@ -110,7 +109,6 @@ impl DiskCache { self.state_dir_for(relative_path).join("pending") } - // ── Cache Directory ────────────────────────────────────────────── fn cache_dir(&self, namespace: &str) -> PathBuf { self.repo_prefix.join(namespace) @@ -120,7 +118,6 @@ impl DiskCache { self.cache_dir(namespace).join(digest_to_path(digest)) } - // ── Repository State Management ────────────────────────────────── /// Ensure the state directory for a repository exists and has a `latest` file. /// If `latest` does not exist, create it with a random value. @@ -233,7 +230,6 @@ impl DiskCache { Ok(()) } - // ── Cache Key Computation ──────────────────────────────────────── /// Compute a cache key for an info/refs request. pub fn compute_info_refs_key(&self, relative_path: &str, protocol: &str) -> GitResult { @@ -272,7 +268,6 @@ impl DiskCache { Ok(sha256_digest(parts)) } - // ── Cache Lookup & Insert ──────────────────────────────────────── /// Look up a cached response for the given namespace and digest. /// Returns the cached bytes if found and not expired. diff --git a/lib.rs b/lib.rs index ce145ba..ebec4c5 100644 --- a/lib.rs +++ b/lib.rs @@ -16,6 +16,8 @@ pub mod merge; pub mod metrics; pub mod oid; pub mod rate_limit; +pub mod remote; +pub mod repository; pub mod pack; pub mod pack_cache; pub mod paginate; diff --git a/main.rs b/main.rs index 81d862a..9228b9e 100644 --- a/main.rs +++ b/main.rs @@ -125,12 +125,10 @@ async fn main() -> Result<(), Box> { "health check: interval={health_check_interval}s, max_failures={max_health_failures}" ); - // ── Metrics server ── let metrics_port = env_u64("GITKS_METRICS_PORT", 9100) as u16; let _metrics_handle = metrics::start_metrics_server(metrics_port); tracing::info!("metrics server on port {metrics_port}"); - // ── Cluster discovery (etcd → ractor_cluster) ── // // When GITKS_ETCD_ENDPOINTS is set, the node: // 1. Starts a ractor_cluster NodeServer (TCP listener) diff --git a/metrics.rs b/metrics.rs index 9ffaa44..e1e5ec6 100644 --- a/metrics.rs +++ b/metrics.rs @@ -15,7 +15,6 @@ use std::sync::atomic::{AtomicU64, Ordering}; use std::sync::{Arc, OnceLock}; use std::time::{Duration, Instant}; -// ── Metric storage ────────────────────────────────────────────────── struct MetricsInner { /// Counter: total requests by (method, status_code) @@ -62,7 +61,6 @@ fn metrics() -> &'static Arc { }) } -// ── Duration histogram buckets (in milliseconds) ─────────────────── #[rustfmt::skip] const DURATION_BUCKET_MS: &[u64] = &[ @@ -134,7 +132,6 @@ pub fn inc_error(kind: &str) { .fetch_add(1, Ordering::Relaxed); } -// ── Prometheus text format rendering ──────────────────────────────── /// Render all metrics in Prometheus text exposition format. pub fn render_metrics() -> String { @@ -218,7 +215,6 @@ pub fn render_metrics() -> String { out } -// ── HTTP server for /metrics endpoint ─────────────────────────────── /// Start the metrics HTTP server on the given port. /// Runs in a background task; returns the JoinHandle. @@ -265,7 +261,6 @@ async fn handle_metrics_connection(mut socket: tokio::net::TcpStream) { let _ = socket.shutdown().await; } -// ── Helper to wrap handler functions with metrics ─────────────────── /// A guard that records metrics on drop. /// diff --git a/proto/commit.proto b/proto/commit.proto index ec293fe..419d5c8 100644 --- a/proto/commit.proto +++ b/proto/commit.proto @@ -154,6 +154,102 @@ message CompareCommitsResponse { Oid merge_base = 4; } + +message FindCommitRequest { + RepositoryHeader repository = 1; + ObjectSelector revision = 2; + bool include_stats = 3; +} + +message ListCommitsByOidRequest { + RepositoryHeader repository = 1; + repeated bytes oids = 2; // binary OID values + bool include_stats = 3; +} + +message ListCommitsByOidResponse { + repeated Commit commits = 1; +} + +message CommitIsAncestorRequest { + RepositoryHeader repository = 1; + string ancestor_oid = 2; + string descendant_oid = 3; +} + +message CommitIsAncestorResponse { + bool is_ancestor = 1; +} + +message CheckObjectsExistRequest { + RepositoryHeader repository = 1; + repeated string revisions = 2; // hex OIDs or rev expressions +} + +message RevisionExistence { + string revision = 1; + bool exists = 2; +} + +message CheckObjectsExistResponse { + repeated RevisionExistence revisions = 1; +} + +message CommitsByMessageRequest { + RepositoryHeader repository = 1; + string query = 2; // regex or literal to search in commit messages + string revision = 3; // limit to this branch/ref (empty = all branches) + uint32 limit = 4; + uint32 offset = 5; + bool case_insensitive = 6; +} + +message CommitsByMessageResponse { + repeated Commit commits = 1; +} + + +message GetCommitStatsRequest { + RepositoryHeader repository = 1; + ObjectSelector revision = 2; +} + +message LastCommitForPathRequest { + RepositoryHeader repository = 1; + string path = 2; + string revision = 3; // limit history to this ref + bool literal_pathspec = 4; +} + +message LastCommitForPathResponse { + Commit commit = 1; + string path = 2; +} + + +message CountCommitsRequest { + RepositoryHeader repository = 1; + string revision = 2; + string path = 3; + string since = 4; // ISO 8601 date + string until = 5; +} + +message CountCommitsResponse { + uint64 count = 1; +} + +message CountDivergingCommitsRequest { + RepositoryHeader repository = 1; + string left = 2; + string right = 3; +} + +message CountDivergingCommitsResponse { + uint64 left_count = 1; + uint64 right_count = 2; +} + service CommitService { rpc ListCommits(ListCommitsRequest) returns (ListCommitsResponse); rpc GetCommit(GetCommitRequest) returns (Commit); @@ -162,4 +258,15 @@ service CommitService { rpc RevertCommit(RevertCommitRequest) returns (CreateCommitResponse); rpc CherryPickCommit(CherryPickCommitRequest) returns (CreateCommitResponse); rpc CompareCommits(CompareCommitsRequest) returns (CompareCommitsResponse); + + rpc FindCommit(FindCommitRequest) returns (Commit); + rpc ListCommitsByOid(ListCommitsByOidRequest) returns (ListCommitsByOidResponse); + rpc CommitIsAncestor(CommitIsAncestorRequest) returns (CommitIsAncestorResponse); + rpc CheckObjectsExist(CheckObjectsExistRequest) returns (CheckObjectsExistResponse); + rpc CommitsByMessage(CommitsByMessageRequest) returns (CommitsByMessageResponse); + rpc GetCommitStats(GetCommitStatsRequest) returns (CommitStats); + rpc LastCommitForPath(LastCommitForPathRequest) returns (LastCommitForPathResponse); + + rpc CountCommits(CountCommitsRequest) returns (CountCommitsResponse); + rpc CountDivergingCommits(CountDivergingCommitsRequest) returns (CountDivergingCommitsResponse); } diff --git a/proto/diff.proto b/proto/diff.proto index 8d6b346..afe4d88 100644 --- a/proto/diff.proto +++ b/proto/diff.proto @@ -132,9 +132,67 @@ message GetDiffStatsRequest { DiffOptions options = 4; } + +message RawDiffRequest { + RepositoryHeader repository = 1; + string base = 2; // revision or OID + string head = 3; + DiffOptions options = 4; +} + +message RawDiffResponse { + bytes data = 1; +} + +message RawPatchRequest { + RepositoryHeader repository = 1; + string base = 2; + string head = 3; +} + +message RawPatchResponse { + bytes data = 1; +} + + +message FindChangedPathsRequest { + RepositoryHeader repository = 1; + string base = 2; + string head = 3; + repeated string paths = 4; // filter to these paths +} + +message ChangedPath { + enum Status { + CHANGED_PATH_STATUS_UNSPECIFIED = 0; + CHANGED_PATH_STATUS_ADDED = 1; + CHANGED_PATH_STATUS_MODIFIED = 2; + CHANGED_PATH_STATUS_DELETED = 3; + CHANGED_PATH_STATUS_RENAMED = 4; + CHANGED_PATH_STATUS_COPIED = 5; + CHANGED_PATH_STATUS_TYPE_CHANGED = 6; + } + + Status status = 1; + string old_path = 2; + string new_path = 3; + uint32 additions = 4; + uint32 deletions = 5; + bool binary = 6; +} + +message FindChangedPathsResponse { + repeated ChangedPath paths = 1; +} + service DiffService { rpc GetDiff(GetDiffRequest) returns (GetDiffResponse); rpc GetCommitDiff(GetCommitDiffRequest) returns (GetDiffResponse); rpc GetPatch(GetPatchRequest) returns (stream GetPatchResponse); rpc GetDiffStats(GetDiffStatsRequest) returns (DiffStats); + + rpc RawDiff(RawDiffRequest) returns (stream RawDiffResponse); + rpc RawPatch(RawPatchRequest) returns (stream RawPatchResponse); + + rpc FindChangedPaths(FindChangedPathsRequest) returns (FindChangedPathsResponse); } diff --git a/proto/ref.proto b/proto/ref.proto new file mode 100644 index 0000000..8d0ead1 --- /dev/null +++ b/proto/ref.proto @@ -0,0 +1,99 @@ +syntax = "proto3"; + +package gitks; + +import "google/protobuf/empty.proto"; +import "oid.proto"; +import "repository.proto"; + + +message FindDefaultBranchNameRequest { + RepositoryHeader repository = 1; +} + +message FindDefaultBranchNameResponse { + string name = 1; +} + +message RefExistsRequest { + RepositoryHeader repository = 1; + string ref_name = 2; +} + +message RefExistsResponse { + bool exists = 1; +} + + +message RefUpdateEntry { + string ref_name = 1; + string new_oid = 2; + string old_oid = 3; // expected old OID (empty = no check) +} + +message UpdateReferencesRequest { + RepositoryHeader repository = 1; + repeated RefUpdateEntry updates = 2; +} + +message UpdateReferencesResponse { + repeated string failed_refs = 1; + string error = 2; +} + +message DeleteRefsRequest { + RepositoryHeader repository = 1; + repeated string ref_names = 2; +} + +message DeleteRefsResponse { + repeated string failed_refs = 1; + string error = 2; +} + + +message FindRefsByOIDRequest { + RepositoryHeader repository = 1; + string oid = 2; + RefFilter filter = 3; +} + +message RefFilter { + repeated string prefixes = 1; // e.g. ["refs/heads/", "refs/tags/"] + uint32 limit = 2; +} + +message FoundRef { + string ref_name = 1; + string target_oid = 2; + bool symbolic = 3; + string symbolic_target = 4; +} + +message FindRefsByOIDResponse { + repeated FoundRef refs = 1; +} + + +message ListRefsRequest { + RepositoryHeader repository = 1; + repeated string prefixes = 2; + string pattern = 3; // glob pattern, e.g. "refs/heads/*" + repeated string containing_oids = 4; + SortDirection sort_direction = 5; + Pagination pagination = 6; +} + +message ListRefsResponse { + repeated FoundRef refs = 1; + PageInfo page_info = 2; +} + +service RefService { + rpc FindDefaultBranchName(FindDefaultBranchNameRequest) returns (FindDefaultBranchNameResponse); + rpc RefExists(RefExistsRequest) returns (RefExistsResponse); + rpc UpdateReferences(UpdateReferencesRequest) returns (UpdateReferencesResponse); + rpc DeleteRefs(DeleteRefsRequest) returns (DeleteRefsResponse); + rpc FindRefsByOID(FindRefsByOIDRequest) returns (FindRefsByOIDResponse); + rpc ListRefs(ListRefsRequest) returns (ListRefsResponse); +} diff --git a/proto/remote.proto b/proto/remote.proto new file mode 100644 index 0000000..b64ba31 --- /dev/null +++ b/proto/remote.proto @@ -0,0 +1,53 @@ +syntax = "proto3"; + +package gitks; + +import "oid.proto"; +import "repository.proto"; + + +message FindRemoteRepositoryRequest { + string remote_url = 1; +} + +message RemoteHead { + string ref_name = 1; + string target_oid = 2; + bool symbolic = 3; + string symbolic_target = 4; +} + +message FindRemoteRepositoryResponse { + repeated RemoteHead refs = 1; + bool exists = 2; +} + +message FindRemoteRootRefRequest { + string remote_url = 1; +} + +message FindRemoteRootRefResponse { + string ref_name = 1; + string target_oid = 2; +} + + +message UpdateRemoteMirrorRequest { + RepositoryHeader repository = 1; + string remote_url = 2; + string remote_name = 3; // defaults to "origin" + bool force = 4; + bool prune = 5; + repeated string refspecs = 6; // if empty, fetch all refs +} + +message UpdateRemoteMirrorResponse { + bool ok = 1; + string error = 2; +} + +service RemoteService { + rpc FindRemoteRepository(FindRemoteRepositoryRequest) returns (FindRemoteRepositoryResponse); + rpc FindRemoteRootRef(FindRemoteRootRefRequest) returns (FindRemoteRootRefResponse); + rpc UpdateRemoteMirror(UpdateRemoteMirrorRequest) returns (UpdateRemoteMirrorResponse); +} diff --git a/proto/repository.proto b/proto/repository.proto index d923eda..ee2f268 100644 --- a/proto/repository.proto +++ b/proto/repository.proto @@ -139,7 +139,6 @@ message RepositoryMaintenanceResponse { string stderr = 3; } -// ── Hooks Management ────────────────────────────────────────────────── message ListHooksRequest { RepositoryHeader repository = 1; @@ -166,7 +165,6 @@ message RemoveCustomHookRequest { string hook_name = 2; } -// ── Snapshot ────────────────────────────────────────────────────────── enum SnapshotStorage { SNAPSHOT_STORAGE_LOCAL = 0; @@ -215,7 +213,6 @@ message DeleteSnapshotRequest { SnapshotStorage storage = 2; } -// ── Repository Move ────────────────────────────────────────────────── enum MoveRepositoryState { MOVE_STATE_UNKNOWN = 0; @@ -246,6 +243,172 @@ message FetchRepositoryDataResponse { bool done = 2; } + +message FindMergeBaseRequest { + RepositoryHeader repository = 1; + repeated bytes revisions = 2; // hex OIDs to find merge-base for +} + +message FindMergeBaseResponse { + string base_oid = 1; +} + + +message WriteRefRequest { + RepositoryHeader repository = 1; + string ref_name = 2; + string new_oid = 3; + string old_oid = 4; // expected old OID (empty = no check) + bool force = 5; +} + +message WriteRefResponse { + bool ok = 1; + string error = 2; +} + + +message SearchFilesByContentRequest { + RepositoryHeader repository = 1; + string query = 2; // regex pattern + string revision = 3; // tree-ish to search in (default HEAD) + uint32 max_results = 4; // default 100 + bool case_sensitive = 5; +} + +message SearchFilesByContentResponse { + repeated SearchResult results = 1; +} + +message SearchFilesByNameRequest { + RepositoryHeader repository = 1; + string query = 2; // regex pattern for file names + string revision = 3; + uint32 max_results = 4; + bool recursive = 5; +} + +message SearchFilesByNameResponse { + repeated SearchResult results = 1; +} + +message SearchResult { + string path = 1; + uint32 line = 2; // 0 for name-only search + string matched_text = 3; // the surrounding line content +} + + +message ObjectsSizeRequest { + RepositoryHeader repository = 1; + repeated string oids = 2; +} + +message ObjectsSizeResponse { + repeated ObjectSize sizes = 1; +} + +message ObjectSize { + string oid = 1; + uint64 size = 2; + bool found = 3; +} + +message RepositorySizeRequest { + RepositoryHeader repository = 1; +} + +message RepositorySizeResponse { + uint64 size_bytes = 1; +} + + +message FindLicenseRequest { + RepositoryHeader repository = 1; +} + +message FindLicenseResponse { + string license_spdx = 1; // SPDX identifier, e.g. "MIT" + string license_name = 2; // human-readable name + double confidence = 3; // 0.0 — 1.0 + string license_path = 4; // path to LICENSE file +} + + +enum OptimizeStrategy { + OPTIMIZE_STRATEGY_UNSPECIFIED = 0; + OPTIMIZE_STRATEGY_HEURISTIC = 1; // auto-decide based on repo state + OPTIMIZE_STRATEGY_AGGRESSIVE = 2; + OPTIMIZE_STRATEGY_INCREMENTAL = 3; +} + +message OptimizeRepositoryRequest { + RepositoryHeader repository = 1; + OptimizeStrategy strategy = 2; +} + +message OptimizeRepositoryResponse { + bool ok = 1; + string stdout = 2; + string stderr = 3; +} + + +message GetRawChangesRequest { + RepositoryHeader repository = 1; + string base = 2; // revision or OID + string head = 3; +} + +message RawChange { + enum Operation { + RAW_CHANGE_OPERATION_UNSPECIFIED = 0; + RAW_CHANGE_OPERATION_ADDED = 1; + RAW_CHANGE_OPERATION_MODIFIED = 2; + RAW_CHANGE_OPERATION_DELETED = 3; + RAW_CHANGE_OPERATION_RENAMED = 4; + RAW_CHANGE_OPERATION_COPIED = 5; + } + + Operation operation = 1; + string old_path = 2; + string new_path = 3; + uint32 old_mode = 4; + uint32 new_mode = 5; + string old_oid = 6; + string new_oid = 7; + double similarity = 8; +} + +message GetRawChangesResponse { + repeated RawChange changes = 1; +} + + +message FetchRemoteRequest { + RepositoryHeader repository = 1; + string remote_url = 2; + string remote_name = 3; // defaults to "origin" + repeated string refspecs = 4; + bool force = 5; + bool prune = 6; +} + +message FetchRemoteResponse { + bool ok = 1; + string error = 2; +} + +message CreateRepositoryFromURLRequest { + RepositoryHeader repository = 1; + string remote_url = 2; + bool mirror = 3; +} + +message CreateRepositoryFromURLResponse { + Repository repository = 1; +} + service RepositoryService { rpc GetRepository(GetRepositoryRequest) returns (Repository); rpc InitRepository(InitRepositoryRequest) returns (Repository); @@ -276,4 +439,18 @@ service RepositoryService { // Repository move rpc MoveRepository(MoveRepositoryRequest) returns (MoveRepositoryResponse); rpc FetchRepositoryData(FetchRepositoryDataRequest) returns (stream FetchRepositoryDataResponse); + + rpc FindMergeBase(FindMergeBaseRequest) returns (FindMergeBaseResponse); + rpc WriteRef(WriteRefRequest) returns (WriteRefResponse); + rpc SearchFilesByContent(SearchFilesByContentRequest) returns (SearchFilesByContentResponse); + rpc SearchFilesByName(SearchFilesByNameRequest) returns (SearchFilesByNameResponse); + + rpc ObjectsSize(ObjectsSizeRequest) returns (ObjectsSizeResponse); + rpc RepositorySize(RepositorySizeRequest) returns (RepositorySizeResponse); + rpc FetchRemote(FetchRemoteRequest) returns (FetchRemoteResponse); + rpc CreateRepositoryFromURL(CreateRepositoryFromURLRequest) returns (CreateRepositoryFromURLResponse); + + rpc FindLicense(FindLicenseRequest) returns (FindLicenseResponse); + rpc OptimizeRepository(OptimizeRepositoryRequest) returns (OptimizeRepositoryResponse); + rpc GetRawChanges(GetRawChangesRequest) returns (GetRawChangesResponse); } diff --git a/rate_limit.rs b/rate_limit.rs index e6bf79c..e1e86d0 100644 --- a/rate_limit.rs +++ b/rate_limit.rs @@ -12,7 +12,6 @@ use dashmap::DashMap; use std::sync::{Arc, OnceLock}; use tokio::sync::Semaphore; -// ── Configuration ─────────────────────────────────────────────────── /// Default max concurrent operations per repository. const DEFAULT_MAX_CONCURRENT: usize = 5; @@ -46,7 +45,6 @@ fn limiter() -> &'static RateLimiter { }) } -// ── Permit guard ─────────────────────────────────────────────────── /// A guard that holds a rate-limit permit. The permit is released on drop. pub struct RateLimitGuard { diff --git a/refs/find_refs.rs b/refs/find_refs.rs new file mode 100644 index 0000000..409bb3d --- /dev/null +++ b/refs/find_refs.rs @@ -0,0 +1,155 @@ +use crate::bare::GitBare; +use crate::error::GitResult; +use crate::paginate; +use crate::pb::*; + +impl GitBare { + /// Find all refs pointing to a given OID. + pub fn find_refs_by_oid(&self, request: FindRefsByOidRequest) -> GitResult { + crate::sanitize::validate_revision(&request.oid)?; + + let mut args = vec![ + "--git-dir".to_string(), + self.bare_dir.to_string_lossy().into_owned(), + "for-each-ref".to_string(), + "--format=%(refname)%00%(objectname)%00%(symref)".to_string(), + format!("--points-at={}", request.oid), + ]; + + if let Some(ref filter) = request.filter { + for prefix in &filter.prefixes { + args.push(prefix.clone()); + } + if filter.limit > 0 { + args.push(format!("--count={}", filter.limit)); + } + } + + let output = std::process::Command::new("git") + .args(&args) + .output() + .map_err(|e| crate::error::GitError::CommandFailed { + status_code: None, + stderr: e.to_string(), + })?; + + let stdout = String::from_utf8_lossy(&output.stdout); + let mut refs = Vec::new(); + + for line in stdout.lines() { + let parts: Vec<&str> = line.split('\0').collect(); + if parts.len() >= 2 { + let ref_name = parts[0].to_string(); + let oid = parts[1].to_string(); + let symref = parts.get(2).map(|s| s.to_string()).unwrap_or_default(); + refs.push(FoundRef { + ref_name, + target_oid: oid, + symbolic: !symref.is_empty(), + symbolic_target: symref, + }); + } + } + + Ok(FindRefsByOidResponse { refs }) + } + + /// List refs with optional prefix/pagination/sorting. + pub fn list_all_refs(&self, request: ListRefsRequest) -> GitResult { + let mut args = vec![ + "--git-dir".to_string(), + self.bare_dir.to_string_lossy().into_owned(), + "for-each-ref".to_string(), + "--format=%(refname)%00%(objectname)%00%(symref)".to_string(), + ]; + + // Sort direction + let sort_prefix = match SortDirection::try_from(request.sort_direction) { + Ok(SortDirection::Asc) => "", + Ok(SortDirection::Desc) | _ => "-", + }; + args.push(format!("--sort={sort_prefix}refname")); + + // Containing OIDs filter + if let Some(first_oid) = request.containing_oids.first() { + args.push(format!("--points-at={first_oid}")); + } + + // Prefix or pattern + if !request.prefixes.is_empty() { + for prefix in &request.prefixes { + args.push(prefix.clone()); + } + } + + let output = std::process::Command::new("git") + .args(&args) + .output() + .map_err(|e| crate::error::GitError::CommandFailed { + status_code: None, + stderr: e.to_string(), + })?; + + let stdout = String::from_utf8_lossy(&output.stdout); + let mut all_refs: Vec = Vec::new(); + + for line in stdout.lines() { + let parts: Vec<&str> = line.split('\0').collect(); + if parts.len() >= 2 { + let ref_name = parts[0].to_string(); + let oid = parts[1].to_string(); + let symref = parts.get(2).map(|s| s.to_string()).unwrap_or_default(); + + // Apply glob pattern filter if set + if !request.pattern.is_empty() && !simple_glob_match(&request.pattern, &ref_name) { + continue; + } + + all_refs.push(FoundRef { + ref_name, + target_oid: oid, + symbolic: !symref.is_empty(), + symbolic_target: symref, + }); + } + } + + let _total = all_refs.len() as u64; + let (paged, page_info) = paginate::paginate(&all_refs, request.pagination.as_ref()); + + Ok(ListRefsResponse { + refs: paged, + page_info: Some(page_info), + }) + } +} + +/// Simple glob match. Supports `*` (any chars) and `?` (single char). +fn simple_glob_match(pattern: &str, name: &str) -> bool { + let pat_bytes = pattern.as_bytes(); + let name_bytes = name.as_bytes(); + let mut pi = 0; + let mut ni = 0; + let mut star_pi = None; + let mut star_ni = 0; + + while ni < name_bytes.len() || pi < pat_bytes.len() { + if pi < pat_bytes.len() && pat_bytes[pi] == b'*' { + star_pi = Some(pi); + star_ni = ni; + pi += 1; + } else if pi < pat_bytes.len() && ni < name_bytes.len() + && (pat_bytes[pi] == b'?' || pat_bytes[pi] == name_bytes[ni]) + { + pi += 1; + ni += 1; + } else if let Some(sp) = star_pi { + pi = sp + 1; + star_ni += 1; + ni = star_ni; + } else { + return false; + } + } + true +} diff --git a/refs/mod.rs b/refs/mod.rs index a228a0f..8aa91f5 100644 --- a/refs/mod.rs +++ b/refs/mod.rs @@ -1 +1,3 @@ +pub mod find_refs; pub mod list_refs; +pub mod update_refs; diff --git a/refs/update_refs.rs b/refs/update_refs.rs new file mode 100644 index 0000000..bb387b4 --- /dev/null +++ b/refs/update_refs.rs @@ -0,0 +1,149 @@ +use crate::bare::GitBare; +use crate::error::GitResult; +use crate::pb::*; + +impl GitBare { + /// Update multiple refs atomically using `git update-ref --stdin`. + pub fn update_references(&self, request: UpdateReferencesRequest) -> GitResult { + let mut stdin_input = String::new(); + for update in &request.updates { + crate::sanitize::validate_ref_name(&update.ref_name)?; + crate::sanitize::validate_revision(&update.new_oid)?; + if !update.old_oid.is_empty() { + crate::sanitize::validate_revision(&update.old_oid)?; + stdin_input.push_str(&format!( + "update {} {}\0{}\n", + update.ref_name, update.new_oid, update.old_oid + )); + } else { + stdin_input.push_str(&format!( + "update {} {}\n", + update.ref_name, update.new_oid + )); + } + } + if stdin_input.is_empty() { + return Ok(UpdateReferencesResponse::default()); + } + + let output = std::process::Command::new("git") + .args(["--git-dir", &self.bare_dir.to_string_lossy(), "update-ref", "--stdin", "-z"]) + .stdin(std::process::Stdio::piped()) + .stdout(std::process::Stdio::piped()) + .stderr(std::process::Stdio::piped()) + .output() + .map_err(|e| crate::error::GitError::CommandFailed { + status_code: None, + stderr: e.to_string(), + })?; + + let stderr = String::from_utf8_lossy(&output.stderr).into_owned(); + if !output.status.success() { + return Ok(UpdateReferencesResponse { + failed_refs: request.updates.iter().map(|u| u.ref_name.clone()).collect(), + error: stderr.trim().to_string(), + }); + } + Ok(UpdateReferencesResponse::default()) + } + + /// Delete refs in bulk. + pub fn delete_refs(&self, request: DeleteRefsRequest) -> GitResult { + let mut failed = Vec::new(); + let mut error_msg = String::new(); + + for ref_name in &request.ref_names { + crate::sanitize::validate_ref_name(ref_name)?; + let output = std::process::Command::new("git") + .args([ + "--git-dir", + &self.bare_dir.to_string_lossy(), + "update-ref", + "-d", + ref_name, + ]) + .stdout(std::process::Stdio::piped()) + .stderr(std::process::Stdio::piped()) + .output() + .map_err(|e| crate::error::GitError::CommandFailed { + status_code: None, + stderr: e.to_string(), + })?; + + if !output.status.success() { + failed.push(ref_name.clone()); + if error_msg.is_empty() { + error_msg = String::from_utf8_lossy(&output.stderr).trim().to_string(); + } + } + } + + Ok(DeleteRefsResponse { + failed_refs: failed, + error: error_msg, + }) + } + + /// Write a single ref with optional expected-old-oid check. + pub fn write_ref(&self, request: WriteRefRequest) -> GitResult { + crate::sanitize::validate_ref_name(&request.ref_name)?; + crate::sanitize::validate_revision(&request.new_oid)?; + + let mut args = vec![ + "--git-dir".to_string(), + self.bare_dir.to_string_lossy().into_owned(), + "update-ref".to_string(), + request.ref_name.clone(), + request.new_oid.clone(), + ]; + + if !request.old_oid.is_empty() { + crate::sanitize::validate_revision(&request.old_oid)?; + args.push(request.old_oid.clone()); + } + + let output = std::process::Command::new("git") + .args(&args) + .stdout(std::process::Stdio::piped()) + .stderr(std::process::Stdio::piped()) + .output() + .map_err(|e| crate::error::GitError::CommandFailed { + status_code: None, + stderr: e.to_string(), + })?; + + if !output.status.success() { + return Ok(WriteRefResponse { + ok: false, + error: String::from_utf8_lossy(&output.stderr).trim().to_string(), + }); + } + + Ok(WriteRefResponse { ok: true, error: String::new() }) + } + + /// Check if a ref exists. + pub fn ref_exists(&self, request: RefExistsRequest) -> GitResult { + crate::sanitize::validate_ref_name(&request.ref_name)?; + let repo = self.gix_repo()?; + let exists = repo.try_find_reference(&request.ref_name).ok().flatten().is_some(); + Ok(RefExistsResponse { exists }) + } + + /// Find the default branch name. + pub fn find_default_branch_name(&self) -> GitResult { + let result = std::process::Command::new("git") + .args(["--git-dir", &self.bare_dir.to_string_lossy(), "symbolic-ref", "HEAD"]) + .output() + .map_err(|e| crate::error::GitError::CommandFailed { + status_code: None, + stderr: e.to_string(), + })?; + let name = String::from_utf8_lossy(&result.stdout) + .trim() + .strip_prefix("refs/heads/") + .map(|b| b.to_string()) + .unwrap_or_default(); + Ok(FindDefaultBranchNameResponse { name }) + } +} diff --git a/remote/find_remote.rs b/remote/find_remote.rs new file mode 100644 index 0000000..48cc44d --- /dev/null +++ b/remote/find_remote.rs @@ -0,0 +1,94 @@ +use crate::error::GitResult; +use crate::pb::*; + +/// Discover remote refs via `git ls-remote`. +pub fn find_remote_repository(request: FindRemoteRepositoryRequest) -> GitResult { + if request.remote_url.is_empty() { + return Ok(FindRemoteRepositoryResponse { refs: vec![], exists: false }); + } + + let output = std::process::Command::new("git") + .args(["ls-remote", "--symref", &request.remote_url]) + .stdout(std::process::Stdio::piped()) + .stderr(std::process::Stdio::piped()) + .output() + .map_err(|e| crate::error::GitError::CommandFailed { + status_code: None, + stderr: e.to_string(), + })?; + + if !output.status.success() { + let stderr = String::from_utf8_lossy(&output.stderr); + if stderr.contains("Could not resolve host") || stderr.contains("Repository not found") { + return Ok(FindRemoteRepositoryResponse { refs: vec![], exists: false }); + } + return Ok(FindRemoteRepositoryResponse { refs: vec![], exists: false }); + } + + let stdout = String::from_utf8_lossy(&output.stdout); + let mut refs = Vec::new(); + + for line in stdout.lines() { + let line = line.trim(); + if line.is_empty() { + continue; + } + + // Lines can be: + // SHArefname (direct ref) + // ref: refs/heads/mainHEAD (symbolic ref via --symref) + if line.starts_with("ref:") { + if let Some((target, name)) = line.split_once('\t') { + refs.push(RemoteHead { + ref_name: name.to_string(), + target_oid: String::new(), + symbolic: true, + symbolic_target: target.strip_prefix("ref:").unwrap_or(target).trim().to_string(), + }); + } + } else if let Some((oid, name)) = line.split_once('\t') { + refs.push(RemoteHead { + ref_name: name.to_string(), + target_oid: oid.to_string(), + symbolic: false, + symbolic_target: String::new(), + }); + } + } + + Ok(FindRemoteRepositoryResponse { refs, exists: true }) +} + +/// Find the root ref (HEAD) of a remote repository. +pub fn find_remote_root_ref(request: FindRemoteRootRefRequest) -> GitResult { + let output = std::process::Command::new("git") + .args(["ls-remote", "--symref", &request.remote_url, "HEAD"]) + .stdout(std::process::Stdio::piped()) + .stderr(std::process::Stdio::piped()) + .output() + .map_err(|e| crate::error::GitError::CommandFailed { + status_code: None, + stderr: e.to_string(), + })?; + + let stdout = String::from_utf8_lossy(&output.stdout); + for line in stdout.lines() { + let line = line.trim(); + if line.starts_with("ref:") { + if let Some((target, _name)) = line.split_once('\t') { + let ref_name = target.strip_prefix("ref:").unwrap_or(target).trim().to_string(); + return Ok(FindRemoteRootRefResponse { + ref_name, + target_oid: String::new(), + }); + } + } else if let Some((oid, name)) = line.split_once('\t') { + return Ok(FindRemoteRootRefResponse { + ref_name: name.to_string(), + target_oid: oid.to_string(), + }); + } + } + + Ok(FindRemoteRootRefResponse::default()) +} diff --git a/remote/mirror.rs b/remote/mirror.rs new file mode 100644 index 0000000..6aa6960 --- /dev/null +++ b/remote/mirror.rs @@ -0,0 +1,222 @@ +use crate::bare::GitBare; +use crate::error::GitResult; +use crate::pb::*; + +impl GitBare { + /// Update mirror from a remote URL (fetch + update all refs). + pub fn update_remote_mirror(&self, request: UpdateRemoteMirrorRequest) -> GitResult { + let remote_name = if request.remote_name.is_empty() { "origin" } else { &request.remote_name }; + + // Add or update remote + let remote_check = std::process::Command::new("git") + .args([ + "--git-dir", + &self.bare_dir.to_string_lossy(), + "remote", + "get-url", + remote_name, + ]) + .output(); + + if remote_check.is_err() || !remote_check.unwrap().status.success() { + // Add new remote + std::process::Command::new("git") + .args([ + "--git-dir", + &self.bare_dir.to_string_lossy(), + "remote", + "add", + remote_name, + &request.remote_url, + ]) + .output() + .map_err(|e| crate::error::GitError::CommandFailed { + status_code: None, + stderr: e.to_string(), + })?; + } else { + // Update existing remote URL + std::process::Command::new("git") + .args([ + "--git-dir", + &self.bare_dir.to_string_lossy(), + "remote", + "set-url", + remote_name, + &request.remote_url, + ]) + .output() + .map_err(|e| crate::error::GitError::CommandFailed { + status_code: None, + stderr: e.to_string(), + })?; + } + + // Fetch + let mut fetch_args = vec![ + "--git-dir".to_string(), + self.bare_dir.to_string_lossy().into_owned(), + "fetch".to_string(), + remote_name.to_string(), + ]; + + if request.prune { + fetch_args.push("--prune".to_string()); + } + if request.force { + fetch_args.push("--force".to_string()); + } + + if request.refspecs.is_empty() { + fetch_args.push("+refs/*:refs/*".to_string()); + } else { + for rs in &request.refspecs { + fetch_args.push(rs.clone()); + } + } + + let output = std::process::Command::new("git") + .args(&fetch_args) + .stdout(std::process::Stdio::piped()) + .stderr(std::process::Stdio::piped()) + .output() + .map_err(|e| crate::error::GitError::CommandFailed { + status_code: None, + stderr: e.to_string(), + })?; + + if !output.status.success() { + return Ok(UpdateRemoteMirrorResponse { + ok: false, + error: String::from_utf8_lossy(&output.stderr).trim().to_string(), + }); + } + + // Update local HEAD to match remote HEAD + let head_output = std::process::Command::new("git") + .args([ + "--git-dir", + &self.bare_dir.to_string_lossy(), + "remote", + "set-head", + remote_name, + "--auto", + ]) + .output(); + + if let Ok(out) = head_output { + if !out.status.success() { + tracing::warn!( + repo = %self.bare_dir.display(), + stderr = %String::from_utf8_lossy(&out.stderr).trim(), + "failed to auto-set remote HEAD" + ); + } + } + + Ok(UpdateRemoteMirrorResponse { ok: true, error: String::new() }) + } + + /// Fetch from a remote URL without mirroring. + pub fn fetch_remote(&self, request: FetchRemoteRequest) -> GitResult { + let remote_name = if request.remote_name.is_empty() { "origin" } else { &request.remote_name }; + + // Ensure remote exists + let exists = std::process::Command::new("git") + .args([ + "--git-dir", + &self.bare_dir.to_string_lossy(), + "remote", + "get-url", + remote_name, + ]) + .output() + .map(|o| o.status.success()) + .unwrap_or(false); + + if !exists { + std::process::Command::new("git") + .args([ + "--git-dir", + &self.bare_dir.to_string_lossy(), + "remote", + "add", + remote_name, + &request.remote_url, + ]) + .output() + .map_err(|e| crate::error::GitError::CommandFailed { + status_code: None, + stderr: e.to_string(), + })?; + } + + let mut args = vec![ + "--git-dir".to_string(), + self.bare_dir.to_string_lossy().into_owned(), + "fetch".to_string(), + remote_name.to_string(), + ]; + + if request.prune { args.push("--prune".to_string()); } + if request.force { args.push("--force".to_string()); } + + if request.refspecs.is_empty() { + args.push("+refs/heads/*:refs/heads/*".to_string()); + args.push("+refs/tags/*:refs/tags/*".to_string()); + } else { + for rs in &request.refspecs { + args.push(rs.clone()); + } + } + + let output = std::process::Command::new("git") + .args(&args) + .stdout(std::process::Stdio::piped()) + .stderr(std::process::Stdio::piped()) + .output() + .map_err(|e| crate::error::GitError::CommandFailed { + status_code: None, + stderr: e.to_string(), + })?; + + if !output.status.success() { + return Ok(FetchRemoteResponse { + ok: false, + error: String::from_utf8_lossy(&output.stderr).trim().to_string(), + }); + } + + Ok(FetchRemoteResponse { ok: true, error: String::new() }) + } + + /// Clone a repository from a remote URL (bare + mirror). + pub fn create_repository_from_url(&self, remote_url: &str, mirror: bool) -> GitResult<()> { + let mut args = vec!["clone".to_string()]; + args.push("--bare".to_string()); + if mirror { + args.push("--mirror".to_string()); + } + args.push(remote_url.to_string()); + args.push(self.bare_dir.to_string_lossy().into_owned()); + + let result = duct::cmd("git", &args) + .stdout_capture() + .stderr_capture() + .unchecked() + .run() + .map_err(|e| crate::error::GitError::CommandFailed { + status_code: None, + stderr: e.to_string(), + })?; + + if !result.status.success() { + return Err(crate::error::GitError::CommandFailed { + status_code: result.status.code(), + stderr: String::from_utf8_lossy(&result.stderr).into_owned(), + }); + } + + Ok(()) + } +} diff --git a/remote/mod.rs b/remote/mod.rs new file mode 100644 index 0000000..e35ca0d --- /dev/null +++ b/remote/mod.rs @@ -0,0 +1,2 @@ +pub mod find_remote; +pub mod mirror; diff --git a/repository/find_license.rs b/repository/find_license.rs new file mode 100644 index 0000000..55f2e6a --- /dev/null +++ b/repository/find_license.rs @@ -0,0 +1,112 @@ +use crate::bare::GitBare; +use crate::error::GitResult; +use crate::pb::*; + +impl GitBare { + /// Detect license by reading LICENSE/COPYING files and doing basic matching. + pub fn find_license(&self) -> GitResult { + let possible_paths = [ + "LICENSE", "LICENSE.md", "LICENSE.txt", + "LICENCE", "LICENCE.md", "LICENCE.txt", + "COPYING", "COPYING.md", "COPYING.txt", + "UNLICENSE", + ]; + + for path in &possible_paths { + let output = std::process::Command::new("git") + .args([ + "--git-dir", + &self.bare_dir.to_string_lossy(), + "show", + &format!("HEAD:{path}"), + ]) + .stdout(std::process::Stdio::piped()) + .stderr(std::process::Stdio::null()) + .output() + .map_err(|e| crate::error::GitError::CommandFailed { + status_code: None, + stderr: e.to_string(), + })?; + + if output.status.success() { + let content = String::from_utf8_lossy(&output.stdout); + let (spdx, name, conf) = detect_license(&content); + if conf > 0.0 { + return Ok(FindLicenseResponse { + license_spdx: spdx.to_string(), + license_name: name.to_string(), + confidence: conf, + license_path: path.to_string(), + }); + } + } + } + + Ok(FindLicenseResponse::default()) + } +} + +/// Very basic license detection by keyword matching. +/// Returns (SPDX identifier, human-readable name, confidence). +fn detect_license(content: &str) -> (&'static str, &'static str, f64) { + let lower = content.to_lowercase(); + + // MIT + if lower.contains("permission is hereby granted, free of charge") && lower.contains("mit") { + return ("MIT", "MIT License", 0.95); + } + + // Apache 2.0 + if lower.contains("apache license, version 2.0") || lower.contains("apache-2.0") { + return ("Apache-2.0", "Apache License 2.0", 0.95); + } + + // GPL 3.0 + if lower.contains("gnu general public license") && lower.contains("version 3") { + return ("GPL-3.0", "GNU General Public License v3.0", 0.90); + } + // GPL 2.0 + if lower.contains("gnu general public license") && lower.contains("version 2") { + return ("GPL-2.0", "GNU General Public License v2.0", 0.90); + } + + // BSD 3 + if lower.contains("redistribution and use in source and binary forms") + && lower.contains("neither the name of") + { + return ("BSD-3-Clause", "BSD 3-Clause License", 0.85); + } + // BSD 2 + if lower.contains("redistribution and use in source and binary forms") { + return ("BSD-2-Clause", "BSD 2-Clause License", 0.80); + } + + // AGPL + if lower.contains("gnu affero general public license") { + return ("AGPL-3.0", "GNU Affero General Public License v3.0", 0.90); + } + + // LGPL + if lower.contains("gnu lesser general public license") { + return ("LGPL-3.0", "GNU Lesser General Public License v3.0", 0.85); + } + + // MPL + if lower.contains("mozilla public license") { + return ("MPL-2.0", "Mozilla Public License 2.0", 0.90); + } + + // Unlicense + if lower.contains("this is free and unencumbered software released into the public domain") { + return ("Unlicense", "The Unlicense", 0.95); + } + + // ISC + if lower.contains("permission to use, copy, modify, and/or distribute") + && lower.contains("isc") + { + return ("ISC", "ISC License", 0.80); + } + + ("", "", 0.0) +} diff --git a/repository/find_merge_base.rs b/repository/find_merge_base.rs new file mode 100644 index 0000000..62cd5b2 --- /dev/null +++ b/repository/find_merge_base.rs @@ -0,0 +1,73 @@ +use crate::bare::GitBare; +use crate::error::GitResult; +use crate::pb::*; + +impl GitBare { + /// Find the best merge base for a set of revisions (OIDs). + pub fn find_merge_base(&self, request: FindMergeBaseRequest) -> GitResult { + if request.revisions.is_empty() { + return Ok(FindMergeBaseResponse::default()); + } + + let revisions: Vec = request + .revisions + .iter() + .map(|b| String::from_utf8_lossy(b).to_string()) + .collect(); + + if revisions.len() < 2 { + return Ok(FindMergeBaseResponse { + base_oid: revisions.first().cloned().unwrap_or_default(), + }); + } + + let mut args = vec![ + "--git-dir".to_string(), + self.bare_dir.to_string_lossy().into_owned(), + "merge-base".to_string(), + ]; + args.extend(revisions.iter().cloned()); + + let output = std::process::Command::new("git") + .args(&args) + .stdout(std::process::Stdio::piped()) + .stderr(std::process::Stdio::piped()) + .output() + .map_err(|e| crate::error::GitError::CommandFailed { + status_code: None, + stderr: e.to_string(), + })?; + + if !output.status.success() { + return Ok(FindMergeBaseResponse { + base_oid: String::new(), + }); + } + + let base_oid = String::from_utf8_lossy(&output.stdout).trim().to_string(); + Ok(FindMergeBaseResponse { base_oid }) + } + + /// Check if one commit is an ancestor of another. + pub fn commit_is_ancestor(&self, request: CommitIsAncestorRequest) -> GitResult { + crate::sanitize::validate_revision(&request.ancestor_oid)?; + crate::sanitize::validate_revision(&request.descendant_oid)?; + + let result = std::process::Command::new("git") + .args([ + "--git-dir", + &self.bare_dir.to_string_lossy(), + "merge-base", + "--is-ancestor", + &request.ancestor_oid, + &request.descendant_oid, + ]) + .stdout(std::process::Stdio::null()) + .stderr(std::process::Stdio::null()) + .status() + .map(|s| s.success()) + .unwrap_or(false); + + Ok(CommitIsAncestorResponse { is_ancestor: result }) + } +} diff --git a/repository/mod.rs b/repository/mod.rs new file mode 100644 index 0000000..ab74ff5 --- /dev/null +++ b/repository/mod.rs @@ -0,0 +1,6 @@ +pub mod find_license; +pub mod find_merge_base; +pub mod objects_size; +pub mod optimize; +pub mod raw_changes; +pub mod search_files; diff --git a/repository/objects_size.rs b/repository/objects_size.rs new file mode 100644 index 0000000..3bc34a6 --- /dev/null +++ b/repository/objects_size.rs @@ -0,0 +1,93 @@ +use crate::bare::GitBare; +use crate::error::GitResult; +use crate::pb::*; + +impl GitBare { + /// Get sizes for a list of objects by OID. + pub fn objects_size(&self, request: ObjectsSizeRequest) -> GitResult { + if request.oids.is_empty() { + return Ok(ObjectsSizeResponse::default()); + } + + let mut input = String::new(); + for oid in &request.oids { + crate::sanitize::validate_revision(oid)?; + input.push_str(oid); + input.push('\n'); + } + + let mut child = std::process::Command::new("git") + .args([ + "--git-dir", + &self.bare_dir.to_string_lossy(), + "cat-file", + "--batch-check=%(objectname) %(objectsize)", + ]) + .stdin(std::process::Stdio::piped()) + .stdout(std::process::Stdio::piped()) + .stderr(std::process::Stdio::piped()) + .spawn() + .map_err(|e| crate::error::GitError::CommandFailed { + status_code: None, + stderr: e.to_string(), + })?; + + use std::io::Write; + if let Some(ref mut stdin) = child.stdin { + stdin.write_all(input.as_bytes()).map_err(|e| { + crate::error::GitError::CommandFailed { + status_code: None, + stderr: e.to_string(), + } + })?; + } + + let output = child.wait_with_output().map_err(|e| { + crate::error::GitError::CommandFailed { + status_code: None, + stderr: e.to_string(), + } + })?; + + let stdout = String::from_utf8_lossy(&output.stdout); + let mut sizes = Vec::new(); + + for line in stdout.lines() { + let parts: Vec<&str> = line.split_whitespace().collect(); + if parts.len() >= 2 { + let oid = parts[0]; + let found = parts.get(1).map_or(true, |&s| s != "missing"); + let size = parts.get(1).and_then(|s| s.parse().ok()).unwrap_or(0); + sizes.push(ObjectSize { + oid: oid.to_string(), + size, + found, + }); + } + } + + Ok(ObjectsSizeResponse { sizes }) + } + + /// Get total repository size on disk. + pub fn repository_size(&self) -> GitResult { + let output = std::process::Command::new("du") + .args(["-sb", &self.bare_dir.to_string_lossy()]) + .stdout(std::process::Stdio::piped()) + .stderr(std::process::Stdio::piped()) + .output() + .map_err(|e| crate::error::GitError::CommandFailed { + status_code: None, + stderr: e.to_string(), + })?; + + let stdout = String::from_utf8_lossy(&output.stdout); + let size = stdout + .split_whitespace() + .next() + .and_then(|s| s.parse().ok()) + .unwrap_or(0); + + Ok(RepositorySizeResponse { size_bytes: size }) + } +} diff --git a/repository/optimize.rs b/repository/optimize.rs new file mode 100644 index 0000000..953cd82 --- /dev/null +++ b/repository/optimize.rs @@ -0,0 +1,168 @@ +use crate::bare::GitBare; +use crate::error::GitResult; +use crate::pb::*; + +impl GitBare { + /// Run heuristic optimization based on repo state. + pub fn optimize_repository(&self, request: OptimizeRepositoryRequest) -> GitResult { + let strategy = OptimizeStrategy::try_from(request.strategy).unwrap_or(OptimizeStrategy::Heuristic); + + let mut stdout_all = String::new(); + let mut stderr_all = String::new(); + + match strategy { + OptimizeStrategy::Heuristic | OptimizeStrategy::Aggressive => { + let stats = self.get_repository_statistics()?; + + // Run commit-graph write if needed + if stats.commit_graph_size_bytes == 0 || strategy == OptimizeStrategy::Aggressive { + if let Ok(resp) = write_commit_graph(self, false, false) { + if !resp.ok { stderr_all.push_str(&resp.stderr); } + stdout_all.push_str(&resp.stdout); + } + } + + // Repack if many loose objects or packfiles + let repack_needed = stats.loose_object_count > 1000 || stats.packfile_count > 10; + + if repack_needed || strategy == OptimizeStrategy::Aggressive { + let full = strategy == OptimizeStrategy::Aggressive; + if let Ok(resp) = run_repack(self, full, true, true) { + if !resp.ok { stderr_all.push_str(&resp.stderr); } + stdout_all.push_str(&resp.stdout); + } + } + + // Prune if aggressive + if strategy == OptimizeStrategy::Aggressive { + if let Ok(resp) = run_gc(self, true, true) { + if !resp.ok { stderr_all.push_str(&resp.stderr); } + stdout_all.push_str(&resp.stdout); + } + } + } + OptimizeStrategy::Incremental => { + // Just run commit-graph write incrementally + if let Ok(resp) = write_commit_graph(self, false, false) { + if !resp.ok { stderr_all.push_str(&resp.stderr); } + stdout_all.push_str(&resp.stdout); + } + } + OptimizeStrategy::Unspecified => {} + } + + Ok(OptimizeRepositoryResponse { + ok: stderr_all.is_empty(), + stdout: stdout_all, + stderr: stderr_all, + }) + } + + fn get_repository_statistics(&self) -> GitResult { + // Count loose objects + let loose = std::fs::read_dir(self.bare_dir.join("objects")) + .map(|d| { + d.filter_map(|e| e.ok()) + .filter(|e| { + e.file_type().map(|t| t.is_dir()).unwrap_or(false) + && e.file_name().to_string_lossy().len() == 2 + }) + .count() as u64 + }) + .unwrap_or(0); + + // Count packfiles + let pack_dir = self.bare_dir.join("objects").join("pack"); + let pack_count = std::fs::read_dir(&pack_dir) + .map(|d| d.filter_map(|e| e.ok()).count() as u64) + .unwrap_or(0); + + // Check commit-graph + let cg_size = std::fs::metadata( + self.bare_dir.join("objects").join("info").join("commit-graph") + ) + .map(|m| m.len()) + .unwrap_or(0); + + Ok(RepositoryStatistics { + size_bytes: 0, + loose_object_count: loose, + packed_object_count: 0, + packfile_count: pack_count, + reference_count: 0, + commit_graph_size_bytes: cg_size, + multi_pack_index_size_bytes: 0, + }) + } +} + +fn write_commit_graph(gb: &GitBare, _split: bool, _replace: bool) -> GitResult { + let out = std::process::Command::new("git") + .args([ + "--git-dir", &gb.bare_dir.to_string_lossy(), + "commit-graph", "write", "--reachable", + ]) + .stdout(std::process::Stdio::piped()) + .stderr(std::process::Stdio::piped()) + .output() + .map_err(|e| crate::error::GitError::CommandFailed { + status_code: None, + stderr: e.to_string(), + })?; + + Ok(RepositoryMaintenanceResponse { + ok: out.status.success(), + stdout: String::from_utf8_lossy(&out.stdout).into_owned(), + stderr: String::from_utf8_lossy(&out.stderr).into_owned(), + }) +} + +fn run_repack(gb: &GitBare, full: bool, bitmaps: bool, _midx: bool) -> GitResult { + let mut args = vec![ + "--git-dir".to_string(), gb.bare_dir.to_string_lossy().into_owned(), + "repack".to_string(), + ]; + if full { args.push("-ad".to_string()); } else { args.push("-d".to_string()); } + if bitmaps { args.push("--write-bitmap-index".to_string()); } + + let out = std::process::Command::new("git") + .args(&args) + .stdout(std::process::Stdio::piped()) + .stderr(std::process::Stdio::piped()) + .output() + .map_err(|e| crate::error::GitError::CommandFailed { + status_code: None, + stderr: e.to_string(), + })?; + + Ok(RepositoryMaintenanceResponse { + ok: out.status.success(), + stdout: String::from_utf8_lossy(&out.stdout).into_owned(), + stderr: String::from_utf8_lossy(&out.stderr).into_owned(), + }) +} + +fn run_gc(gb: &GitBare, prune: bool, aggressive: bool) -> GitResult { + let mut args = vec![ + "--git-dir".to_string(), gb.bare_dir.to_string_lossy().into_owned(), + "gc".to_string(), + ]; + if prune { args.push("--prune=now".to_string()); } + if aggressive { args.push("--aggressive".to_string()); } + + let out = std::process::Command::new("git") + .args(&args) + .stdout(std::process::Stdio::piped()) + .stderr(std::process::Stdio::piped()) + .output() + .map_err(|e| crate::error::GitError::CommandFailed { + status_code: None, + stderr: e.to_string(), + })?; + + Ok(RepositoryMaintenanceResponse { + ok: out.status.success(), + stdout: String::from_utf8_lossy(&out.stdout).into_owned(), + stderr: String::from_utf8_lossy(&out.stderr).into_owned(), + }) +} diff --git a/repository/raw_changes.rs b/repository/raw_changes.rs new file mode 100644 index 0000000..20543a1 --- /dev/null +++ b/repository/raw_changes.rs @@ -0,0 +1,81 @@ +use crate::bare::GitBare; +use crate::error::GitResult; +use crate::pb::*; + +impl GitBare { + /// Get raw changes between two revisions (file-level changes only, no diff content). + pub fn get_raw_changes(&self, request: GetRawChangesRequest) -> GitResult { + crate::sanitize::validate_revision(&request.base)?; + crate::sanitize::validate_revision(&request.head)?; + + let output = std::process::Command::new("git") + .args([ + "--git-dir", + &self.bare_dir.to_string_lossy(), + "diff-tree", + "--raw", + "-r", + "--root", + &request.base, + &request.head, + ]) + .stdout(std::process::Stdio::piped()) + .stderr(std::process::Stdio::piped()) + .output() + .map_err(|e| crate::error::GitError::CommandFailed { + status_code: None, + stderr: e.to_string(), + })?; + + let stdout = String::from_utf8_lossy(&output.stdout); + let mut changes = Vec::new(); + + for line in stdout.lines() { + let line = line.trim(); + if !line.starts_with(':') { continue; } + let line = &line[1..]; + + let parts: Vec<&str> = line.split_whitespace().collect(); + if parts.len() < 5 { continue; } + + let old_mode = u32::from_str_radix(parts[0], 8).unwrap_or(0); + let new_mode = u32::from_str_radix(parts[1], 8).unwrap_or(0); + let old_oid = parts[2].to_string(); + let new_oid = parts[3].to_string(); + let status_str = parts[4]; + let status_letter = status_str.chars().next().unwrap_or('M'); + + let operation = match status_letter { + 'A' => raw_change::Operation::RawChangeOperationAdded as i32, + 'D' => raw_change::Operation::RawChangeOperationDeleted as i32, + 'R' => raw_change::Operation::RawChangeOperationRenamed as i32, + 'C' => raw_change::Operation::RawChangeOperationCopied as i32, + 'M' | 'T' => raw_change::Operation::RawChangeOperationModified as i32, + _ => raw_change::Operation::RawChangeOperationUnspecified as i32, + }; + + let (old_path, new_path) = if parts.len() >= 6 { + (parts[5].to_string(), if status_letter == 'R' || status_letter == 'C' { + parts.get(6).map(|s| s.to_string()).unwrap_or_default() + } else { + String::new() + }) + } else { + (String::new(), String::new()) + }; + + changes.push(RawChange { + operation, + old_path, + new_path, + old_mode, + new_mode, + old_oid, + new_oid, + similarity: 0.0, + }); + } + + Ok(GetRawChangesResponse { changes }) + } +} diff --git a/repository/search_files.rs b/repository/search_files.rs new file mode 100644 index 0000000..648dbee --- /dev/null +++ b/repository/search_files.rs @@ -0,0 +1,125 @@ +use crate::bare::GitBare; +use crate::error::GitResult; +use crate::pb::*; + +impl GitBare { + /// Search file contents with a regex pattern. + pub fn search_files_by_content(&self, request: SearchFilesByContentRequest) -> GitResult { + crate::sanitize::validate_revision(&request.revision)?; + + let revision = if request.revision.is_empty() { "HEAD" } else { &request.revision }; + let max_results = if request.max_results == 0 { 100 } else { request.max_results }; + + let mut args = vec![ + "--git-dir".to_string(), + self.bare_dir.to_string_lossy().into_owned(), + "grep".to_string(), + "-I".to_string(), // don't match binary files + "--line-number".to_string(), + "--column".to_string(), + ]; + + if !request.case_sensitive { + args.push("-i".to_string()); + } + + args.push(format!("--max-count={}", max_results)); + args.push("-e".to_string()); + args.push(request.query.clone()); + args.push(revision.to_string()); + + let output = std::process::Command::new("git") + .args(&args) + .stdout(std::process::Stdio::piped()) + .stderr(std::process::Stdio::piped()) + .output() + .map_err(|e| crate::error::GitError::CommandFailed { + status_code: None, + stderr: e.to_string(), + })?; + + // git grep returns exit code 1 when no matches found — that's not an error + let stdout = String::from_utf8_lossy(&output.stdout); + let mut results = Vec::new(); + + for line in stdout.lines() { + // Format: path:line:col:matched_text + if let Some((path_and_rest, matched)) = line.rsplit_once(':') { + let prefix_parts: Vec<&str> = path_and_rest.rsplitn(3, ':').collect(); + if prefix_parts.len() >= 3 { + if let Ok(line_num) = prefix_parts[0].parse::() { + results.push(SearchResult { + path: prefix_parts[2].to_string(), + line: line_num, + matched_text: matched.to_string(), + }); + } + } + } + } + + Ok(SearchFilesByContentResponse { results }) + } + + /// Search file names matching a pattern. + pub fn search_files_by_name(&self, request: SearchFilesByNameRequest) -> GitResult { + let revision = if request.revision.is_empty() { "HEAD" } else { &request.revision }; + crate::sanitize::validate_revision(revision)?; + + let max_results = if request.max_results == 0 { 100 } else { request.max_results }; + + let mut args = vec![ + "--git-dir".to_string(), + self.bare_dir.to_string_lossy().into_owned(), + "ls-tree".to_string(), + ]; + + if request.recursive { + args.push("-r".to_string()); + } + + args.push("--name-only".to_string()); + args.push(revision.to_string()); + + let output = std::process::Command::new("git") + .args(&args) + .stdout(std::process::Stdio::piped()) + .stderr(std::process::Stdio::piped()) + .output() + .map_err(|e| crate::error::GitError::CommandFailed { + status_code: None, + stderr: e.to_string(), + })?; + + let stdout = String::from_utf8_lossy(&output.stdout); + let mut results = Vec::new(); + + for line in stdout.lines() { + let path = line.trim(); + if path.is_empty() || crate::sanitize::validate_file_path(path).is_err() { + continue; + } + + // Simple substring/case-insensitive matching for file names + let query = &request.query; + let matched = if query.is_empty() { + true + } else { + path.to_lowercase().contains(&query.to_lowercase()) + }; + + if matched { + results.push(SearchResult { + path: path.to_string(), + line: 0, + matched_text: String::new(), + }); + if results.len() >= max_results as usize { + break; + } + } + } + + Ok(SearchFilesByNameResponse { results }) + } +} diff --git a/server/commit.rs b/server/commit.rs index d576249..c8ed618 100644 --- a/server/commit.rs +++ b/server/commit.rs @@ -275,4 +275,123 @@ impl commit_service_server::CommitService for GitksService { m.record("ok"); Ok(tonic::Response::new(resp)) } + + + async fn find_commit( + &self, + request: tonic::Request, + ) -> Result, tonic::Status> { + let m = crate::metrics::RequestMetrics::new("gitks.CommitService/FindCommit"); + let inner = request.into_inner(); + let _rate = self.acquire_rate_limit(inner.repository.as_ref()).await?; + let gb = self.resolve(inner.repository.as_ref())?; + let resp = gb.find_commit(inner).map_err(into_status)?; + m.record("ok"); + Ok(tonic::Response::new(resp)) + } + + async fn list_commits_by_oid( + &self, + request: tonic::Request, + ) -> Result, tonic::Status> { + let m = crate::metrics::RequestMetrics::new("gitks.CommitService/ListCommitsByOid"); + let inner = request.into_inner(); + let _rate = self.acquire_rate_limit(inner.repository.as_ref()).await?; + let gb = self.resolve(inner.repository.as_ref())?; + let resp = gb.list_commits_by_oid(inner).map_err(into_status)?; + m.record("ok"); + Ok(tonic::Response::new(resp)) + } + + async fn commit_is_ancestor( + &self, + request: tonic::Request, + ) -> Result, tonic::Status> { + let m = crate::metrics::RequestMetrics::new("gitks.CommitService/CommitIsAncestor"); + let inner = request.into_inner(); + let _rate = self.acquire_rate_limit(inner.repository.as_ref()).await?; + let gb = self.resolve(inner.repository.as_ref())?; + let resp = gb.commit_is_ancestor(inner).map_err(into_status)?; + m.record("ok"); + Ok(tonic::Response::new(resp)) + } + + async fn check_objects_exist( + &self, + request: tonic::Request, + ) -> Result, tonic::Status> { + let m = crate::metrics::RequestMetrics::new("gitks.CommitService/CheckObjectsExist"); + let inner = request.into_inner(); + let _rate = self.acquire_rate_limit(inner.repository.as_ref()).await?; + let gb = self.resolve(inner.repository.as_ref())?; + let resp = gb.check_objects_exist(inner).map_err(into_status)?; + m.record("ok"); + Ok(tonic::Response::new(resp)) + } + + async fn commits_by_message( + &self, + request: tonic::Request, + ) -> Result, tonic::Status> { + let m = crate::metrics::RequestMetrics::new("gitks.CommitService/CommitsByMessage"); + let inner = request.into_inner(); + let _rate = self.acquire_rate_limit(inner.repository.as_ref()).await?; + let gb = self.resolve(inner.repository.as_ref())?; + let resp = gb.commits_by_message(inner).map_err(into_status)?; + m.record("ok"); + Ok(tonic::Response::new(resp)) + } + + async fn get_commit_stats( + &self, + request: tonic::Request, + ) -> Result, tonic::Status> { + let m = crate::metrics::RequestMetrics::new("gitks.CommitService/GetCommitStats"); + let inner = request.into_inner(); + let _rate = self.acquire_rate_limit(inner.repository.as_ref()).await?; + let gb = self.resolve(inner.repository.as_ref())?; + let resp = gb.get_commit_stats(inner).map_err(into_status)?; + m.record("ok"); + Ok(tonic::Response::new(resp)) + } + + async fn last_commit_for_path( + &self, + request: tonic::Request, + ) -> Result, tonic::Status> { + let m = crate::metrics::RequestMetrics::new("gitks.CommitService/LastCommitForPath"); + let inner = request.into_inner(); + let _rate = self.acquire_rate_limit(inner.repository.as_ref()).await?; + let gb = self.resolve(inner.repository.as_ref())?; + let resp = gb.last_commit_for_path(inner).map_err(into_status)?; + m.record("ok"); + Ok(tonic::Response::new(resp)) + } + + + async fn count_commits( + &self, + request: tonic::Request, + ) -> Result, tonic::Status> { + let m = crate::metrics::RequestMetrics::new("gitks.CommitService/CountCommits"); + let inner = request.into_inner(); + let _rate = self.acquire_rate_limit(inner.repository.as_ref()).await?; + let gb = self.resolve(inner.repository.as_ref())?; + let resp = gb.count_commits(inner).map_err(into_status)?; + m.record("ok"); + Ok(tonic::Response::new(resp)) + } + + async fn count_diverging_commits( + &self, + request: tonic::Request, + ) -> Result, tonic::Status> { + let m = crate::metrics::RequestMetrics::new("gitks.CommitService/CountDivergingCommits"); + let inner = request.into_inner(); + let _rate = self.acquire_rate_limit(inner.repository.as_ref()).await?; + let gb = self.resolve(inner.repository.as_ref())?; + let resp = gb.count_diverging_commits(inner).map_err(into_status)?; + m.record("ok"); + Ok(tonic::Response::new(resp)) + } } diff --git a/server/diff.rs b/server/diff.rs index 76950a2..ddc8366 100644 --- a/server/diff.rs +++ b/server/diff.rs @@ -169,4 +169,48 @@ impl diff_service_server::DiffService for GitksService { m.record("ok"); Ok(tonic::Response::new(resp)) } + + + type RawDiffStream = tokio_stream::wrappers::ReceiverStream>; + type RawPatchStream = tokio_stream::wrappers::ReceiverStream>; + + async fn raw_diff( + &self, + request: tonic::Request, + ) -> Result, tonic::Status> { + let m = crate::metrics::RequestMetrics::new("gitks.DiffService/RawDiff"); + let inner = request.into_inner(); + let _rate = self.acquire_rate_limit(inner.repository.as_ref()).await?; + let gb = self.resolve(inner.repository.as_ref())?; + let chunks = gb.raw_diff(inner).map_err(into_status)?; + m.record("ok"); + Ok(tonic::Response::new(into_stream(chunks))) + } + + async fn raw_patch( + &self, + request: tonic::Request, + ) -> Result, tonic::Status> { + let m = crate::metrics::RequestMetrics::new("gitks.DiffService/RawPatch"); + let inner = request.into_inner(); + let _rate = self.acquire_rate_limit(inner.repository.as_ref()).await?; + let gb = self.resolve(inner.repository.as_ref())?; + let chunks = gb.raw_patch(inner).map_err(into_status)?; + m.record("ok"); + Ok(tonic::Response::new(into_stream(chunks))) + } + + + async fn find_changed_paths( + &self, + request: tonic::Request, + ) -> Result, tonic::Status> { + let m = crate::metrics::RequestMetrics::new("gitks.DiffService/FindChangedPaths"); + let inner = request.into_inner(); + let _rate = self.acquire_rate_limit(inner.repository.as_ref()).await?; + let gb = self.resolve(inner.repository.as_ref())?; + let resp = gb.find_changed_paths(inner).map_err(into_status)?; + m.record("ok"); + Ok(tonic::Response::new(resp)) + } } diff --git a/server/mod.rs b/server/mod.rs index aee2304..c6e4dbd 100644 --- a/server/mod.rs +++ b/server/mod.rs @@ -38,6 +38,8 @@ mod commit; mod diff; mod merge; mod pack; +mod refs; +mod remote; mod repository; mod repository_maint; mod tag; @@ -53,8 +55,8 @@ use crate::bare::GitBare; use crate::error::{GitError, GitResult}; use crate::pb::{ archive_service_server, blame_service_server, branch_service_server, commit_service_server, - diff_service_server, merge_service_server, pack_service_server, repository_service_server, - tag_service_server, tree_service_server, + diff_service_server, merge_service_server, pack_service_server, ref_service_server, + remote_service_server, repository_service_server, tag_service_server, tree_service_server, }; #[derive(Clone)] @@ -473,6 +475,8 @@ pub async fn serve( .add_service(diff_service_server::DiffServiceServer::new(svc.clone())) .add_service(merge_service_server::MergeServiceServer::new(svc.clone())) .add_service(pack_service_server::PackServiceServer::new(svc.clone())) + .add_service(ref_service_server::RefServiceServer::new(svc.clone())) + .add_service(remote_service_server::RemoteServiceServer::new(svc.clone())) .add_service(tag_service_server::TagServiceServer::new(svc.clone())) .add_service(tree_service_server::TreeServiceServer::new(svc)); tracing::info!("server ready, starting to accept connections"); diff --git a/server/refs.rs b/server/refs.rs new file mode 100644 index 0000000..0501143 --- /dev/null +++ b/server/refs.rs @@ -0,0 +1,85 @@ +use crate::pb::*; +use crate::pb::ref_service_server::RefService; + +use super::GitksService; + +#[tonic::async_trait] +impl RefService for GitksService { + async fn find_default_branch_name( + &self, + request: tonic::Request, + ) -> Result, tonic::Status> { + let m = crate::metrics::RequestMetrics::new("gitks.RefService/FindDefaultBranchName"); + let inner = request.into_inner(); + let _rate = self.acquire_rate_limit(inner.repository.as_ref()).await?; + let gb = self.resolve(inner.repository.as_ref())?; + let resp = gb.find_default_branch_name().map_err(super::into_status)?; + m.record("ok"); + Ok(tonic::Response::new(resp)) + } + + async fn ref_exists( + &self, + request: tonic::Request, + ) -> Result, tonic::Status> { + let m = crate::metrics::RequestMetrics::new("gitks.RefService/RefExists"); + let inner = request.into_inner(); + let _rate = self.acquire_rate_limit(inner.repository.as_ref()).await?; + let gb = self.resolve(inner.repository.as_ref())?; + let resp = gb.ref_exists(inner).map_err(super::into_status)?; + m.record("ok"); + Ok(tonic::Response::new(resp)) + } + + async fn update_references( + &self, + request: tonic::Request, + ) -> Result, tonic::Status> { + let m = crate::metrics::RequestMetrics::new("gitks.RefService/UpdateReferences"); + let inner = request.into_inner(); + let _rate = self.acquire_rate_limit(inner.repository.as_ref()).await?; + let gb = self.resolve(inner.repository.as_ref())?; + let resp = gb.update_references(inner).map_err(super::into_status)?; + m.record("ok"); + Ok(tonic::Response::new(resp)) + } + + async fn delete_refs( + &self, + request: tonic::Request, + ) -> Result, tonic::Status> { + let m = crate::metrics::RequestMetrics::new("gitks.RefService/DeleteRefs"); + let inner = request.into_inner(); + let _rate = self.acquire_rate_limit(inner.repository.as_ref()).await?; + let gb = self.resolve(inner.repository.as_ref())?; + let resp = gb.delete_refs(inner).map_err(super::into_status)?; + m.record("ok"); + Ok(tonic::Response::new(resp)) + } + + async fn find_refs_by_oid( + &self, + request: tonic::Request, + ) -> Result, tonic::Status> { + let m = crate::metrics::RequestMetrics::new("gitks.RefService/FindRefsByOID"); + let inner = request.into_inner(); + let _rate = self.acquire_rate_limit(inner.repository.as_ref()).await?; + let gb = self.resolve(inner.repository.as_ref())?; + let resp = gb.find_refs_by_oid(inner).map_err(super::into_status)?; + m.record("ok"); + Ok(tonic::Response::new(resp)) + } + + async fn list_refs( + &self, + request: tonic::Request, + ) -> Result, tonic::Status> { + let m = crate::metrics::RequestMetrics::new("gitks.RefService/ListRefs"); + let inner = request.into_inner(); + let _rate = self.acquire_rate_limit(inner.repository.as_ref()).await?; + let gb = self.resolve(inner.repository.as_ref())?; + let resp = gb.list_all_refs(inner).map_err(super::into_status)?; + m.record("ok"); + Ok(tonic::Response::new(resp)) + } +} diff --git a/server/remote.rs b/server/remote.rs new file mode 100644 index 0000000..a078123 --- /dev/null +++ b/server/remote.rs @@ -0,0 +1,43 @@ +use crate::pb::*; +use crate::pb::remote_service_server::RemoteService; +use crate::remote::find_remote::{find_remote_repository, find_remote_root_ref}; + +use super::GitksService; + +#[tonic::async_trait] +impl RemoteService for GitksService { + async fn find_remote_repository( + &self, + request: tonic::Request, + ) -> Result, tonic::Status> { + let m = crate::metrics::RequestMetrics::new("gitks.RemoteService/FindRemoteRepository"); + let inner = request.into_inner(); + let resp = find_remote_repository(inner).map_err(super::into_status)?; + m.record("ok"); + Ok(tonic::Response::new(resp)) + } + + async fn find_remote_root_ref( + &self, + request: tonic::Request, + ) -> Result, tonic::Status> { + let m = crate::metrics::RequestMetrics::new("gitks.RemoteService/FindRemoteRootRef"); + let inner = request.into_inner(); + let resp = find_remote_root_ref(inner).map_err(super::into_status)?; + m.record("ok"); + Ok(tonic::Response::new(resp)) + } + + async fn update_remote_mirror( + &self, + request: tonic::Request, + ) -> Result, tonic::Status> { + let m = crate::metrics::RequestMetrics::new("gitks.RemoteService/UpdateRemoteMirror"); + let inner = request.into_inner(); + let _rate = self.acquire_rate_limit(inner.repository.as_ref()).await?; + let gb = self.resolve(inner.repository.as_ref())?; + let resp = gb.update_remote_mirror(inner).map_err(super::into_status)?; + m.record("ok"); + Ok(tonic::Response::new(resp)) + } +} diff --git a/server/repository.rs b/server/repository.rs index 6a757d7..00deb00 100644 --- a/server/repository.rs +++ b/server/repository.rs @@ -446,7 +446,6 @@ impl repository_service_server::RepositoryService for GitksService { Ok(tonic::Response::new(resp)) } - // ── Hooks Management ──────────────────────────────────────────── async fn list_hooks( &self, @@ -508,7 +507,6 @@ impl repository_service_server::RepositoryService for GitksService { Ok(tonic::Response::new(())) } - // ── Snapshot Operations ────────────────────────────────────────── async fn create_snapshot( &self, @@ -614,7 +612,6 @@ impl repository_service_server::RepositoryService for GitksService { Ok(tonic::Response::new(())) } - // ── Repository Move ────────────────────────────────────────────── type FetchRepositoryDataStream = ReceiverStream>; @@ -706,4 +703,162 @@ impl repository_service_server::RepositoryService for GitksService { Ok(tonic::Response::new(ReceiverStream::new(rx))) } + + + async fn find_merge_base( + &self, + request: tonic::Request, + ) -> Result, tonic::Status> { + let m = crate::metrics::RequestMetrics::new("gitks.RepositoryService/FindMergeBase"); + let inner = request.into_inner(); + let _rate = self.acquire_rate_limit(inner.repository.as_ref()).await?; + let gb = self.resolve(inner.repository.as_ref())?; + let resp = gb.find_merge_base(inner).map_err(into_status)?; + m.record("ok"); + Ok(tonic::Response::new(resp)) + } + + async fn write_ref( + &self, + request: tonic::Request, + ) -> Result, tonic::Status> { + let m = crate::metrics::RequestMetrics::new("gitks.RepositoryService/WriteRef"); + let inner = request.into_inner(); + let _rate = self.acquire_rate_limit(inner.repository.as_ref()).await?; + let gb = self.resolve(inner.repository.as_ref())?; + let resp = gb.write_ref(inner).map_err(into_status)?; + m.record("ok"); + Ok(tonic::Response::new(resp)) + } + + async fn search_files_by_content( + &self, + request: tonic::Request, + ) -> Result, tonic::Status> { + let m = crate::metrics::RequestMetrics::new("gitks.RepositoryService/SearchFilesByContent"); + let inner = request.into_inner(); + let _rate = self.acquire_rate_limit(inner.repository.as_ref()).await?; + let gb = self.resolve(inner.repository.as_ref())?; + let resp = gb.search_files_by_content(inner).map_err(into_status)?; + m.record("ok"); + Ok(tonic::Response::new(resp)) + } + + async fn search_files_by_name( + &self, + request: tonic::Request, + ) -> Result, tonic::Status> { + let m = crate::metrics::RequestMetrics::new("gitks.RepositoryService/SearchFilesByName"); + let inner = request.into_inner(); + let _rate = self.acquire_rate_limit(inner.repository.as_ref()).await?; + let gb = self.resolve(inner.repository.as_ref())?; + let resp = gb.search_files_by_name(inner).map_err(into_status)?; + m.record("ok"); + Ok(tonic::Response::new(resp)) + } + + + async fn objects_size( + &self, + request: tonic::Request, + ) -> Result, tonic::Status> { + let m = crate::metrics::RequestMetrics::new("gitks.RepositoryService/ObjectsSize"); + let inner = request.into_inner(); + let _rate = self.acquire_rate_limit(inner.repository.as_ref()).await?; + let gb = self.resolve(inner.repository.as_ref())?; + let resp = gb.objects_size(inner).map_err(into_status)?; + m.record("ok"); + Ok(tonic::Response::new(resp)) + } + + async fn repository_size( + &self, + request: tonic::Request, + ) -> Result, tonic::Status> { + let m = crate::metrics::RequestMetrics::new("gitks.RepositoryService/RepositorySize"); + let inner = request.into_inner(); + let _rate = self.acquire_rate_limit(inner.repository.as_ref()).await?; + let gb = self.resolve(inner.repository.as_ref())?; + let resp = gb.repository_size().map_err(into_status)?; + m.record("ok"); + Ok(tonic::Response::new(resp)) + } + + async fn fetch_remote( + &self, + request: tonic::Request, + ) -> Result, tonic::Status> { + let m = crate::metrics::RequestMetrics::new("gitks.RepositoryService/FetchRemote"); + let inner = request.into_inner(); + let _rate = self.acquire_rate_limit(inner.repository.as_ref()).await?; + let gb = self.resolve(inner.repository.as_ref())?; + let resp = gb.fetch_remote(inner).map_err(into_status)?; + m.record("ok"); + Ok(tonic::Response::new(resp)) + } + + async fn create_repository_from_url( + &self, + request: tonic::Request, + ) -> Result, tonic::Status> { + let m = crate::metrics::RequestMetrics::new("gitks.RepositoryService/CreateRepositoryFromURL"); + let inner = request.into_inner(); + let _rate = self.acquire_rate_limit(inner.repository.as_ref()).await?; + let bare_dir = self.resolve_for_init(inner.repository.as_ref())?; + let gb = crate::bare::GitBare::new(bare_dir); + gb.create_repository_from_url(&inner.remote_url, inner.mirror) + .map_err(into_status)?; + if let Some(ref hm) = self.hook_manager { + hm.install_hooks(&gb.bare_dir).map_err(into_status)?; + } + self.notify_ref_update(&self.repo_label(inner.repository.as_ref()), "HEAD", "", ""); + m.record("ok"); + Ok(tonic::Response::new(CreateRepositoryFromUrlResponse { + repository: Some(Repository { + header: inner.repository, + bare: true, + ..Default::default() + }), + })) + } + + + async fn find_license( + &self, + request: tonic::Request, + ) -> Result, tonic::Status> { + let m = crate::metrics::RequestMetrics::new("gitks.RepositoryService/FindLicense"); + let inner = request.into_inner(); + let _rate = self.acquire_rate_limit(inner.repository.as_ref()).await?; + let gb = self.resolve(inner.repository.as_ref())?; + let resp = gb.find_license().map_err(into_status)?; + m.record("ok"); + Ok(tonic::Response::new(resp)) + } + + async fn optimize_repository( + &self, + request: tonic::Request, + ) -> Result, tonic::Status> { + let m = crate::metrics::RequestMetrics::new("gitks.RepositoryService/OptimizeRepository"); + let inner = request.into_inner(); + let _rate = self.acquire_rate_limit(inner.repository.as_ref()).await?; + let gb = self.resolve(inner.repository.as_ref())?; + let resp = gb.optimize_repository(inner).map_err(into_status)?; + m.record("ok"); + Ok(tonic::Response::new(resp)) + } + + async fn get_raw_changes( + &self, + request: tonic::Request, + ) -> Result, tonic::Status> { + let m = crate::metrics::RequestMetrics::new("gitks.RepositoryService/GetRawChanges"); + let inner = request.into_inner(); + let _rate = self.acquire_rate_limit(inner.repository.as_ref()).await?; + let gb = self.resolve(inner.repository.as_ref())?; + let resp = gb.get_raw_changes(inner).map_err(into_status)?; + m.record("ok"); + Ok(tonic::Response::new(resp)) + } } diff --git a/task.md b/task.md new file mode 100644 index 0000000..1a5a8b9 --- /dev/null +++ b/task.md @@ -0,0 +1,225 @@ +# GitKS RPC 补齐任务 + +> 对照 Gitaly 分析后,梳理有必要实现但目前缺失的功能,按优先级排列。 +> 每个任务标注:类别、预估工作量、前置依赖、实现思路。 + +--- + +## P0 — 核心功能缺失(影响基本使用场景) + +### P0-1. `RefService` — 原子性引用操作 + +| 项 | 内容 | +|------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| **新增 RPC** | `UpdateReferences`, `DeleteRefs`, `FindDefaultBranchName`, `RefExists` | +| **Proto** | 新建 `proto/ref.proto`(独立 RefService) | +| **工作量** | **M**(3-5 天) | +| **为什么必要** | 当前 `branch.proto` / `tag.proto` 每个操作单独一个 RPC,无法做批量原子更新。`UpdateReferences` 是 Gitaly 中最核心的写操作之一,支持 `expected_old_oid` 校验 | +| **实现思路** | 1. 新建 `ref/` 模块 2. `UpdateReferences` 调用 `git update-ref --stdin` 批量原子更新 3. `DeleteRefs` 调用 `git update-ref -d` 批量删除 4. `RefExists` 用 `gix` 检查 reference 是否存在 5. `FindDefaultBranchName` 从已有的 `default_branch_name()` 抽取 | + +### P0-2. `RepositoryService` — FindMergeBase + +| 项 | 内容 | +|------------|-----------------------------------------------------------------------------| +| **新增 RPC** | `FindMergeBase` | +| **Proto** | 扩展现有 `proto/repository.proto` | +| **工作量** | **S**(1 天) | +| **为什么必要** | diff、merge、rebase 操作都依赖 merge-base 计算。当前 GitKS 的 merge/diff 模块各自计算,缺少独立 API | +| **实现思路** | 调用 `gix::Repository::merge_base()` 返回两个 revision 的 merge base OID | + +### P0-3. `RepositoryService` — SearchFiles(代码搜索) + +| 项 | 内容 | +|------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| **新增 RPC** | `SearchFilesByContent`, `SearchFilesByName` | +| **Proto** | 扩展现有 `proto/repository.proto` | +| **工作量** | **M**(2-3 天) | +| **为什么必要** | 代码搜索是代码托管平台的基础功能,当前完全缺失 | +| **实现思路** | `SearchFilesByContent` → `git grep -I --line-number --column `;`SearchFilesByName` → `git ls-tree -r --name-only ` + 正则过滤。需注意大仓库性能(加 timeout、limit) | + +### P0-4. `RepositoryService` — WriteRef + +| 项 | 内容 | +|------------|------------------------------------------------------------------------------------------| +| **新增 RPC** | `WriteRef` | +| **Proto** | 扩展现有 `proto/repository.proto` | +| **工作量** | **S**(0.5 天) | +| **为什么必要** | 直接写 ref 是最底层的仓库操作,Replica 同步、快照恢复都依赖此能力。当前 `sync.rs` 中 `update_local_ref` 是内部函数,应暴露为 RPC | +| **实现思路** | `git update-ref ` -- 已有 `update_local_ref` 可直接封装 | + +--- + +## P1 — 重要功能缺失(影响高级场景) + +### P1-1. `RemoteService` — 远程仓库交互 + +| 项 | 内容 | +|----------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| **新增 Service** | `RemoteService`(3 个 RPC) | +| **新增 RPC** | `FindRemoteRepository`, `FindRemoteRootRef`, `UpdateRemoteMirror` | +| **Proto** | 新建 `proto/remote.proto` | +| **工作量** | **L**(5-7 天) | +| **为什么必要** | 支持从远程 URL 导入仓库、镜像同步。`FetchRemote` 在 Gitaly 的 RepositoryService 中也有对应 | +| **实现思路** | 1. `FindRemoteRepository` → `git ls-remote ` 2. `FindRemoteRootRef` → 取 ls-remote 的 HEAD 3. `UpdateRemoteMirror` → `git remote add` + `git fetch --mirror` + 清理。需注意认证(支持 SSH key / token 注入) | + +### P1-2. `RepositoryService` — FetchRemote / CreateRepositoryFromURL + +| 项 | 内容 | +|------------|----------------------------------------------------------------------------------------------------------------------------------------------| +| **新增 RPC** | `FetchRemote`, `CreateRepositoryFromURL` | +| **Proto** | 扩展现有 `proto/repository.proto` | +| **工作量** | **M**(3-4 天) | +| **为什么必要** | 仓库导入是核心 onboarding 流程,当前只能创建空仓库 | +| **实现思路** | `CreateRepositoryFromURL` → `git clone --bare --mirror `;`FetchRemote` → `git fetch `。复用 RemoteService 的认证基础设施 | + +### P1-3. `CommitService` — 扩展查询能力 + +| 项 | 内容 | +|------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| **新增 RPC** | `FindCommit`, `ListCommitsByOid`, `CommitIsAncestor`, `CheckObjectsExist`, `CommitsByMessage` | +| **Proto** | 扩展现有 `proto/commit.proto` | +| **工作量** | **M**(3-4 天) | +| **为什么必要** | 当前 `list_commits` / `get_commit` 太基础,缺少批量查询、ancestor 判断、message 搜索等常用模式 | +| **实现思路** | 1. `FindCommit` → `gix::Repository::find_object()` + 解析 Commit 2. `ListCommitsByOid` → 批量 `gix::Repository::find_commit()` 3. `CommitIsAncestor` → `gix::Repository::merge_base()` 判断 4. `CheckObjectsExist` → 批量 `gix::Repository::try_find()` 5. `CommitsByMessage` → `git log --all --grep=` | + +### P1-4. `RepositoryService` — ObjectsSize / RepositorySize + +| 项 | 内容 | +|------------|---------------------------------------------------------------------------------------------------------| +| **新增 RPC** | `ObjectsSize`, `RepositorySize` | +| **Proto** | 扩展现有 `proto/repository.proto` | +| **工作量** | **S**(1 天) | +| **为什么必要** | 前端需要展示仓库大小、文件大小,当前 `RepositoryStatistics` 只有对象计数没有大小 | +| **实现思路** | `ObjectsSize` → `git cat-file --batch-check` 批量获取对象大小;`RepositorySize` → `du -sb ` 或遍历 objects 目录 | + +### P1-5. `DiffService` — RawDiff / RawPatch + +| 项 | 内容 | +|------------|---------------------------------------------------------------------------------------------------------------------------------| +| **新增 RPC** | `RawDiff`, `RawPatch` | +| **Proto** | 扩展现有 `proto/diff.proto` | +| **工作量** | **S**(1 天) | +| **为什么必要** | 当前 `get_diff` 返回结构化 protobuf,对于大 diff 非常低效。Raw 格式可直接流式返回文本,用于 patch 应用、邮件发送 | +| **实现思路** | `RawDiff` → `git diff ..` streaming stdout;`RawPatch` → `git format-patch ..` streaming。注意大 diff 时的内存控制 | + +### P1-6. `CommitService` — CommitStats / LastCommitForPath + +| 项 | 内容 | +|------------|--------------------------------------------------------------------------------------------------------------------------------| +| **新增 RPC** | `CommitStats`, `LastCommitForPath` | +| **Proto** | 扩展现有 `proto/commit.proto` | +| **工作量** | **S**(1 天) | +| **为什么必要** | 文件列表需要显示最后修改 commit,commit 详情需要统计信息。当前 `CommitStats` 内嵌在 `Commit` message 中需额外请求才填充 | +| **实现思路** | `CommitStats` → `git diff --stat ^..` 解析输出;`LastCommitForPath` → `git log -1 --format=%H -- ` | + +--- + +## P2 — 锦上添花(完善体验) + +### P2-1. `RepositoryService` — FindLicense + +| 项 | 内容 | +|------------|-----------------------------------------------------------------------------------------------------------------| +| **新增 RPC** | `FindLicense` | +| **工作量** | **S**(1 天) | +| **实现思路** | 基于 GitHub Licensee 算法:读取 `LICENSE*` / `COPYING*` 文件 → 用 go-license-detector 等价逻辑(Rust 可用 `askalono` crate)做文本匹配 | + +### P2-2. `RepositoryService` — OptimizeRepository + +| 项 | 内容 | +|------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| **新增 RPC** | `OptimizeRepository` | +| **工作量** | **M**(2-3 天) | +| **实现思路** | 根据仓库状态自动决定优化策略:loose objects > N → `repack -d`;packfiles > N → `repack -ad`;没有 commit-graph → `commit-graph write`;没有 bitmap → `repack -adb`。比当前单独调用 `gc`/`repack`/`write_commit_graph` 更智能 | + +### P2-3. `RepositoryService` — GetRawChanges + +| 项 | 内容 | +|------------|------------------------------------------------------------------------------| +| **新增 RPC** | `GetRawChanges` | +| **工作量** | **S**(0.5 天) | +| **实现思路** | `git diff-tree --raw -r ..` 返回纯文件级变更列表(旧模式、新模式、状态),不生成完整 diff 内容 | + +### P2-4. `CommitService` — CountCommits / CountDivergingCommits + +| 项 | 内容 | +|------------|-----------------------------------------------------------------------------------------------------------------------------------| +| **新增 RPC** | `CountCommits`, `CountDivergingCommits` | +| **工作量** | **S**(0.5 天) | +| **实现思路** | `CountCommits` → `git rev-list --count `;`CountDivergingCommits` → `git rev-list --count --left-right ...` | + +### P2-5. `RefService` — FindRefsByOID / ListRefs(增强查询) + +| 项 | 内容 | +|------------|----------------------------------------------------------------------------------------------------------------------------------------------------------| +| **新增 RPC** | `FindRefsByOID`, `ListRefs` | +| **工作量** | **S**(1 天) | +| **实现思路** | `FindRefsByOID` → `git for-each-ref --points-at=`;`ListRefs` → `git for-each-ref --format=... --sort=...` 通用 ref 列表(当前只能在 branch/tag service 中分别查询) | + +### P2-6. `DiffService` — FindChangedPaths + +| 项 | 内容 | +|------------|-----------------------------------------------------------------------------------------------| +| **新增 RPC** | `FindChangedPaths` | +| **工作量** | **S**(0.5 天) | +| **实现思路** | `git diff-tree --name-status -r ..` 只返回变更的文件路径和状态(A/M/D/R),无 diff 内容,适合只展示文件列表的场景 | + +--- + +## P3 — 低优先级(生态特定 / 边缘场景) + +### P3-1. ObjectPoolService(Fork 去重) + +| 项 | 内容 | +|----------------|------------------------------------------------------------------------------| +| **新增 Service** | `ObjectPoolService`(6 个 RPC) | +| **工作量** | **XL**(2-3 周) | +| **前置依赖** | P0-1 `UpdateReferences` 稳定后 | +| **为什么低优** | Fork 去重是 GitLab.com 级别的需求。单租户或小规模部署用不上,且实现复杂(需管理 alternates、pool 生命周期、GC 协调) | + +### P3-2. HookService(Server 端 gRPC Hook 回调) + +| 项 | 内容 | +|----------------|------------------------------------------------------------------------------------| +| **新增 Service** | `HookService`(6 个 RPC) | +| **工作量** | **L**(1-2 周) | +| **为什么低优** | GitKS 的 hook 是内嵌脚本执行的"客户端模式",改为 gRPC 回调的"server 模式"需要对 hook runner 彻底重构,且需要下游客户端对接 | + +### P3-3. CommitService — GPG 签名相关 + +| 项 | 内容 | +|------------|-------------------------------------------------------------------------| +| **新增 RPC** | `GetCommitSignatures`, `FilterShasWithSignatures`, `GetTagSignatures` | +| **工作量** | **M**(2-3 天) | +| **为什么低优** | 需要 GPG 工具链依赖。可用 `gpg --verify` 或 `sequoia-openpgp`(Rust crate)实现,但非刚性需求 | + +### P3-4. SmartHTTP / SSH Service — Sidechannel + SSH 支持 + +| 项 | 内容 | +|-----------|--------------------------------------------------------------------------------------------------| +| **新增/扩展** | `PostUploadPackWithSidechannel`, `SSHUploadPack`, `SSHReceivePack` | +| **工作量** | **XL**(3-4 周) | +| **为什么低优** | Sidechannel 需要 Unix socket 旁路,平台依赖强。SSH 支持需要完整的 SSH server 协议栈(或依赖外部 SSH → gRPC 代理)。建议通过外部网关层来解决 | + +### P3-5. ServerService — 健康检查 / 磁盘统计 + +| 项 | 内容 | +|----------------|--------------------------------------------------------------------------------------------------------------------------| +| **新增 Service** | `ServerService`(4 个 RPC) | +| **工作量** | **S**(1 天) | +| **为什么低优** | GitKS 已有 Prometheus metrics endpoint + logging,`ServerInfo`/`ReadinessCheck`/`DiskStatistics` 更多用于 Kubernetes/平台集成。可快速补充 | + +--- + +## 汇总 + +| 优先级 | Service 数 | RPC 数 | 预估总工作量 | +|--------|-------------|---------|------------------------| +| **P0** | 2(扩展现有) | 9 | ~8 天 | +| **P1** | 1 新建 + 4 扩展 | 17 | ~17 天 | +| **P2** | 3 扩展 | 9 | ~7 天 | +| **P3** | 3 新建 + 2 扩展 | 22+ | ~10 周 | +| **合计** | | **57+** | **~13 周**(P0-P2 约 5 周) | + +> **建议路线**:优先完成 P0 + P1(共 26 个 RPC,约 4-5 周),可覆盖 80% 的常用场景。 +> P2 在核心功能稳定后逐步添加。P3 按实际用户需求驱动,不必全部实现。