refactor(actor): implement Raft consensus algorithm for cluster leader election

- Add voting mechanism with term tracking and vote persistence
- Implement election triggering logic with majority vote counting
- Add primary/replica role transition handling with state management
- Integrate health check failure detection for automatic elections
- Refactor actor messaging system for distributed coordination
- Update repository registration to query cluster for existing primary
- Add broadcast mechanism for role change notifications
- Implement proper term comparison and duplicate request filtering
- Upgrade dependency versions including tokio-util for async utilities
- Optimize code formatting and line wrapping for improved readability
- Remove redundant blank lines and improve code structure consistency
- Enhance error logging and trace information for debugging purposes
This commit is contained in:
zhenyi
2026-06-10 12:35:10 +08:00
parent ab32e8826e
commit 9a0c26e5f6
40 changed files with 1184 additions and 449 deletions
+81 -13
View File
@@ -45,9 +45,11 @@ mod repository_maint;
mod tag;
mod tree;
use dashmap::DashMap;
use gix::discover::is_git;
use ractor::{ActorCell, ActorRef};
use std::path::{Path, PathBuf};
use std::time::{Duration, Instant};
use tokio_stream::wrappers::ReceiverStream;
use crate::actor::message::{GitNodeMessage, RouteDecision};
@@ -59,6 +61,16 @@ use crate::pb::{
remote_service_server, repository_service_server, tag_service_server, tree_service_server,
};
/// TTL for route cache entries.
const ROUTE_CACHE_TTL: Duration = Duration::from_secs(60); // 1 minute
/// A cached route entry with creation time.
#[derive(Clone)]
pub struct CachedRoute {
pub decision: RouteDecision,
pub created_at: Instant,
}
#[derive(Clone)]
pub struct GitksService {
pub repo_prefix: PathBuf,
@@ -67,6 +79,7 @@ pub struct GitksService {
pub disk_cache: Option<crate::disk_cache::DiskCache>,
pub pack_cache: Option<crate::pack_cache::PackCache>,
pub hook_manager: Option<crate::hooks::HookManager>,
pub route_cache: DashMap<String, CachedRoute>,
}
impl GitksService {
@@ -78,6 +91,7 @@ impl GitksService {
disk_cache: None,
pack_cache: None,
hook_manager: None,
route_cache: DashMap::new(),
}
}
@@ -125,6 +139,22 @@ impl GitksService {
is_write: bool,
) -> Result<Option<RouteDecision>, tonic::Status> {
use crate::actor::message::{ROLE_PRIMARY, ROLE_REPLICA};
// Check route cache for read requests
if !is_write
&& let Some(cached) = self.route_cache.get(&header.relative_path)
&& !cached.decision.grpc_addr.is_empty()
&& cached.decision.found
&& cached.created_at.elapsed() < ROUTE_CACHE_TTL
{
tracing::debug!(
relative_path = %header.relative_path,
grpc_addr = %cached.decision.grpc_addr,
"route cache hit"
);
return Ok(Some(cached.decision.clone()));
}
let members = ractor::pg::get_members(&"gitks_nodes".to_string());
let local = self.node_actor.as_ref().map(|actor| actor.get_cell());
let mut primary: Option<RouteDecision> = None;
@@ -152,19 +182,31 @@ impl GitksService {
replica = Some(decision);
}
}
if let Some(p) = primary {
return Ok(Some(p));
}
if let Some(r) = replica {
let result = if let Some(p) = primary {
Some(p)
} else if let Some(r) = replica {
tracing::info!(
storage_name = %r.storage_name,
relative_path = %r.relative_path,
"read request routed to replica"
);
return Ok(Some(r));
Some(r)
} else {
let _ = ROLE_PRIMARY;
None
};
// Cache result for read requests
if let Some(ref decision) = result {
self.route_cache.insert(
header.relative_path.clone(),
CachedRoute {
decision: decision.clone(),
created_at: Instant::now(),
},
);
}
let _ = ROLE_PRIMARY;
Ok(None)
Ok(result)
}
fn repo_label(&self, header: Option<&crate::pb::RepositoryHeader>) -> String {
@@ -180,7 +222,10 @@ impl GitksService {
}
/// Get the relative path from a repository header, if any.
pub(crate) fn repo_relative_path<'a>(&self, header: Option<&'a crate::pb::RepositoryHeader>) -> Option<&'a str> {
pub(crate) fn repo_relative_path<'a>(
&self,
header: Option<&'a crate::pb::RepositoryHeader>,
) -> Option<&'a str> {
header.and_then(|h| {
if h.relative_path.is_empty() {
None
@@ -287,6 +332,9 @@ impl GitksService {
// Invalidate moka caches
crate::server::cache::invalidate_repo(relative_path);
// Invalidate route cache
self.route_cache.remove(relative_path);
// Invalidate disk cache
if let Some(ref pc) = self.pack_cache {
pc.invalidate_repo(relative_path);
@@ -421,7 +469,7 @@ pub(crate) fn into_stream<T: Send + 'static>(
ReceiverStream::new(rx)
}
pub(crate) fn git_cmd(gb: &GitBare, args: &[&str]) -> Result<std::process::Output, tonic::Status> {
pub(crate) fn git_cmd(gb: &GitBare, args: &[&str]) -> GitResult<std::process::Output> {
let mut full_args: Vec<String> = vec![
"--git-dir".into(),
gb.bare_dir.to_string_lossy().into_owned(),
@@ -441,21 +489,41 @@ pub(crate) fn git_cmd(gb: &GitBare, args: &[&str]) -> Result<std::process::Outpu
error = %e,
"failed to spawn git subprocess"
);
tonic::Status::internal(e.to_string())
GitError::Internal(format!("failed to spawn git: {e}"))
})?;
if !result.status.success() {
let stderr = String::from_utf8_lossy(&result.stderr);
let stderr_str = String::from_utf8_lossy(&result.stderr);
tracing::warn!(
repo = %gb.bare_dir.display(),
status = ?result.status.code(),
stderr = %stderr.trim(),
stderr = %stderr_str.trim(),
"git subprocess exited with non-zero status"
);
return Err(tonic::Status::internal(stderr.trim().to_string()));
return Err(structured_git_error(&stderr_str, result.status.code()));
}
Ok(result)
}
/// Map git subprocess stderr to a structured GitError variant.
fn structured_git_error(stderr: &str, code: Option<i32>) -> GitError {
let stderr_trimmed = stderr.trim();
if stderr_trimmed.contains("not a git repository") || stderr_trimmed.contains("does not exist")
{
GitError::RepoNotFound
} else if stderr_trimmed.contains("Permission denied") || stderr_trimmed.contains("denied") {
GitError::PermissionDenied(stderr_trimmed.to_string())
} else if stderr_trimmed.contains("is locked") || stderr_trimmed.contains("Could not acquire") {
GitError::Locked(stderr_trimmed.to_string())
} else if stderr_trimmed.contains("not found") || stderr_trimmed.contains("do not have") {
GitError::NotFound(stderr_trimmed.to_string())
} else {
GitError::CommandFailed {
status_code: code,
stderr: stderr_trimmed.to_string(),
}
}
}
pub async fn serve(
addr: std::net::SocketAddr,
svc: GitksService,