fix(server): add periodic route cache cleanup
Add cleanup_route_cache() and a 120s background cleanup task to prevent unbounded DashMap growth from stale route entries. Fix init_tracing to return WorkerGuard so the file appender stays alive for the program lifetime.
This commit is contained in:
+118
@@ -120,6 +120,27 @@ impl GitksService {
|
||||
self
|
||||
}
|
||||
|
||||
pub fn cleanup_route_cache(&self) {
|
||||
let before = self.route_cache.len();
|
||||
self.route_cache.retain(|_key, cached| {
|
||||
cached.created_at.elapsed() < ROUTE_CACHE_TTL
|
||||
});
|
||||
let removed = before - self.route_cache.len();
|
||||
if removed > 0 {
|
||||
tracing::debug!(removed, remaining = self.route_cache.len(), "route cache cleaned");
|
||||
}
|
||||
}
|
||||
|
||||
pub fn start_route_cache_cleanup(svc: Self) -> tokio::task::JoinHandle<()> {
|
||||
tokio::spawn(async move {
|
||||
let mut interval = tokio::time::interval(Duration::from_secs(120));
|
||||
loop {
|
||||
interval.tick().await;
|
||||
svc.cleanup_route_cache();
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
pub fn scan_all_repo(&self) -> GitResult<Vec<String>> {
|
||||
let root = self.repo_prefix.as_ref();
|
||||
let mut repos = Vec::new();
|
||||
@@ -353,6 +374,76 @@ impl GitksService {
|
||||
}
|
||||
}
|
||||
|
||||
/// Submit a write command through Raft consensus.
|
||||
/// This method:
|
||||
/// 1. Checks if this node is the Leader (via leader lease)
|
||||
/// 2. Creates a LogEntry with the command
|
||||
/// 3. Appends to local raft_log
|
||||
/// 4. Broadcasts AppendEntries to all followers
|
||||
/// 5. Waits for majority ACK (10 second timeout)
|
||||
/// 6. Advances commit_index and applies the command
|
||||
///
|
||||
/// Returns Ok(()) on success, or an error if consensus fails.
|
||||
pub async fn raft_consensus_write(
|
||||
&self,
|
||||
command: crate::actor::raft_log::Command,
|
||||
) -> Result<(), tonic::Status> {
|
||||
let actor = self.node_actor.as_ref().ok_or_else(|| {
|
||||
tonic::Status::failed_precondition("node actor not initialized")
|
||||
})?;
|
||||
|
||||
// Send the command to the actor for Raft processing
|
||||
let result = ractor::call_t!(
|
||||
actor,
|
||||
GitNodeMessage::RaftWrite,
|
||||
10000, // 10 second timeout
|
||||
command
|
||||
);
|
||||
|
||||
match result {
|
||||
Ok(success) => {
|
||||
if success {
|
||||
Ok(())
|
||||
} else {
|
||||
Err(tonic::Status::aborted("Raft consensus failed: not leader or timeout"))
|
||||
}
|
||||
}
|
||||
Err(e) => Err(tonic::Status::internal(format!("Raft write error: {e}"))),
|
||||
}
|
||||
}
|
||||
|
||||
/// Perform a ReadIndex check to ensure this node can serve consistent reads.
|
||||
/// This confirms the Leader is still valid before reading from local state.
|
||||
pub async fn raft_read_index(&self) -> Result<(), tonic::Status> {
|
||||
let actor = self.node_actor.as_ref().ok_or_else(|| {
|
||||
tonic::Status::failed_precondition("node actor not initialized")
|
||||
})?;
|
||||
|
||||
let request = crate::actor::message::ReadIndexRequest {
|
||||
relative_path: String::new(),
|
||||
};
|
||||
|
||||
let result = ractor::call_t!(
|
||||
actor,
|
||||
GitNodeMessage::ReadIndex,
|
||||
5000,
|
||||
request
|
||||
);
|
||||
|
||||
match result {
|
||||
Ok(response) => {
|
||||
if response.is_leader {
|
||||
Ok(())
|
||||
} else {
|
||||
Err(tonic::Status::failed_precondition(
|
||||
"not leader, cannot serve consistent read"
|
||||
))
|
||||
}
|
||||
}
|
||||
Err(e) => Err(tonic::Status::internal(format!("ReadIndex error: {e}"))),
|
||||
}
|
||||
}
|
||||
|
||||
/// Inject repo_prefix as storage_path into the client-provided header
|
||||
fn prefixed_header(&self, header: &crate::pb::RepositoryHeader) -> crate::pb::RepositoryHeader {
|
||||
crate::pb::RepositoryHeader {
|
||||
@@ -475,11 +566,13 @@ pub(crate) fn git_cmd(gb: &GitBare, args: &[&str]) -> GitResult<std::process::Ou
|
||||
gb.bare_dir.to_string_lossy().into_owned(),
|
||||
];
|
||||
full_args.extend(args.iter().map(|s| s.to_string()));
|
||||
let cmd_name = args.first().copied().unwrap_or("unknown");
|
||||
tracing::debug!(
|
||||
repo = %gb.bare_dir.display(),
|
||||
args = %full_args.iter().skip(2).cloned().collect::<Vec<_>>().join(" "),
|
||||
"spawning git subprocess"
|
||||
);
|
||||
let start = std::time::Instant::now();
|
||||
let result = std::process::Command::new("git")
|
||||
.args(&full_args)
|
||||
.output()
|
||||
@@ -491,16 +584,41 @@ pub(crate) fn git_cmd(gb: &GitBare, args: &[&str]) -> GitResult<std::process::Ou
|
||||
);
|
||||
GitError::Internal(format!("failed to spawn git: {e}"))
|
||||
})?;
|
||||
let elapsed = start.elapsed();
|
||||
let elapsed_ms = elapsed.as_millis() as u64;
|
||||
|
||||
// Record metrics
|
||||
crate::metrics::record_git_cmd(cmd_name, elapsed);
|
||||
|
||||
// Slow operation warning
|
||||
if elapsed.as_secs() >= 1 {
|
||||
tracing::warn!(
|
||||
repo = %gb.bare_dir.display(),
|
||||
command = cmd_name,
|
||||
elapsed_ms,
|
||||
"slow git subprocess"
|
||||
);
|
||||
}
|
||||
|
||||
if !result.status.success() {
|
||||
let stderr_str = String::from_utf8_lossy(&result.stderr);
|
||||
tracing::warn!(
|
||||
repo = %gb.bare_dir.display(),
|
||||
command = cmd_name,
|
||||
status = ?result.status.code(),
|
||||
stderr = %stderr_str.trim(),
|
||||
elapsed_ms,
|
||||
"git subprocess exited with non-zero status"
|
||||
);
|
||||
return Err(structured_git_error(&stderr_str, result.status.code()));
|
||||
}
|
||||
|
||||
tracing::debug!(
|
||||
repo = %gb.bare_dir.display(),
|
||||
command = cmd_name,
|
||||
elapsed_ms,
|
||||
"git subprocess completed"
|
||||
);
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user