fix(server): add periodic route cache cleanup

Add cleanup_route_cache() and a 120s background cleanup task to
prevent unbounded DashMap growth from stale route entries.
Fix init_tracing to return WorkerGuard so the file appender stays
alive for the program lifetime.
This commit is contained in:
zhenyi
2026-06-10 18:32:47 +08:00
parent 0782a9fe6d
commit 2dd384f7be
2 changed files with 237 additions and 31 deletions
+118
View File
@@ -120,6 +120,27 @@ impl GitksService {
self
}
pub fn cleanup_route_cache(&self) {
let before = self.route_cache.len();
self.route_cache.retain(|_key, cached| {
cached.created_at.elapsed() < ROUTE_CACHE_TTL
});
let removed = before - self.route_cache.len();
if removed > 0 {
tracing::debug!(removed, remaining = self.route_cache.len(), "route cache cleaned");
}
}
pub fn start_route_cache_cleanup(svc: Self) -> tokio::task::JoinHandle<()> {
tokio::spawn(async move {
let mut interval = tokio::time::interval(Duration::from_secs(120));
loop {
interval.tick().await;
svc.cleanup_route_cache();
}
})
}
pub fn scan_all_repo(&self) -> GitResult<Vec<String>> {
let root = self.repo_prefix.as_ref();
let mut repos = Vec::new();
@@ -353,6 +374,76 @@ impl GitksService {
}
}
/// Submit a write command through Raft consensus.
/// This method:
/// 1. Checks if this node is the Leader (via leader lease)
/// 2. Creates a LogEntry with the command
/// 3. Appends to local raft_log
/// 4. Broadcasts AppendEntries to all followers
/// 5. Waits for majority ACK (10 second timeout)
/// 6. Advances commit_index and applies the command
///
/// Returns Ok(()) on success, or an error if consensus fails.
pub async fn raft_consensus_write(
&self,
command: crate::actor::raft_log::Command,
) -> Result<(), tonic::Status> {
let actor = self.node_actor.as_ref().ok_or_else(|| {
tonic::Status::failed_precondition("node actor not initialized")
})?;
// Send the command to the actor for Raft processing
let result = ractor::call_t!(
actor,
GitNodeMessage::RaftWrite,
10000, // 10 second timeout
command
);
match result {
Ok(success) => {
if success {
Ok(())
} else {
Err(tonic::Status::aborted("Raft consensus failed: not leader or timeout"))
}
}
Err(e) => Err(tonic::Status::internal(format!("Raft write error: {e}"))),
}
}
/// Perform a ReadIndex check to ensure this node can serve consistent reads.
/// This confirms the Leader is still valid before reading from local state.
pub async fn raft_read_index(&self) -> Result<(), tonic::Status> {
let actor = self.node_actor.as_ref().ok_or_else(|| {
tonic::Status::failed_precondition("node actor not initialized")
})?;
let request = crate::actor::message::ReadIndexRequest {
relative_path: String::new(),
};
let result = ractor::call_t!(
actor,
GitNodeMessage::ReadIndex,
5000,
request
);
match result {
Ok(response) => {
if response.is_leader {
Ok(())
} else {
Err(tonic::Status::failed_precondition(
"not leader, cannot serve consistent read"
))
}
}
Err(e) => Err(tonic::Status::internal(format!("ReadIndex error: {e}"))),
}
}
/// Inject repo_prefix as storage_path into the client-provided header
fn prefixed_header(&self, header: &crate::pb::RepositoryHeader) -> crate::pb::RepositoryHeader {
crate::pb::RepositoryHeader {
@@ -475,11 +566,13 @@ pub(crate) fn git_cmd(gb: &GitBare, args: &[&str]) -> GitResult<std::process::Ou
gb.bare_dir.to_string_lossy().into_owned(),
];
full_args.extend(args.iter().map(|s| s.to_string()));
let cmd_name = args.first().copied().unwrap_or("unknown");
tracing::debug!(
repo = %gb.bare_dir.display(),
args = %full_args.iter().skip(2).cloned().collect::<Vec<_>>().join(" "),
"spawning git subprocess"
);
let start = std::time::Instant::now();
let result = std::process::Command::new("git")
.args(&full_args)
.output()
@@ -491,16 +584,41 @@ pub(crate) fn git_cmd(gb: &GitBare, args: &[&str]) -> GitResult<std::process::Ou
);
GitError::Internal(format!("failed to spawn git: {e}"))
})?;
let elapsed = start.elapsed();
let elapsed_ms = elapsed.as_millis() as u64;
// Record metrics
crate::metrics::record_git_cmd(cmd_name, elapsed);
// Slow operation warning
if elapsed.as_secs() >= 1 {
tracing::warn!(
repo = %gb.bare_dir.display(),
command = cmd_name,
elapsed_ms,
"slow git subprocess"
);
}
if !result.status.success() {
let stderr_str = String::from_utf8_lossy(&result.stderr);
tracing::warn!(
repo = %gb.bare_dir.display(),
command = cmd_name,
status = ?result.status.code(),
stderr = %stderr_str.trim(),
elapsed_ms,
"git subprocess exited with non-zero status"
);
return Err(structured_git_error(&stderr_str, result.status.code()));
}
tracing::debug!(
repo = %gb.bare_dir.display(),
command = cmd_name,
elapsed_ms,
"git subprocess completed"
);
Ok(result)
}