refactor(actor): implement Raft consensus algorithm for cluster leader election

- Add voting mechanism with term tracking and vote persistence
- Implement election triggering logic with majority vote counting
- Add primary/replica role transition handling with state management
- Integrate health check failure detection for automatic elections
- Refactor actor messaging system for distributed coordination
- Update repository registration to query cluster for existing primary
- Add broadcast mechanism for role change notifications
- Implement proper term comparison and duplicate request filtering
- Upgrade dependency versions including tokio-util for async utilities
- Optimize code formatting and line wrapping for improved readability
- Remove redundant blank lines and improve code structure consistency
- Enhance error logging and trace information for debugging purposes
This commit is contained in:
zhenyi
2026-06-10 12:35:10 +08:00
parent ab32e8826e
commit 9a0c26e5f6
40 changed files with 1184 additions and 449 deletions
-2
View File
@@ -276,7 +276,6 @@ impl commit_service_server::CommitService for GitksService {
Ok(tonic::Response::new(resp))
}
async fn find_commit(
&self,
request: tonic::Request<FindCommitRequest>,
@@ -368,7 +367,6 @@ impl commit_service_server::CommitService for GitksService {
Ok(tonic::Response::new(resp))
}
async fn count_commits(
&self,
request: tonic::Request<CountCommitsRequest>,
+4 -4
View File
@@ -170,9 +170,10 @@ impl diff_service_server::DiffService for GitksService {
Ok(tonic::Response::new(resp))
}
type RawDiffStream = tokio_stream::wrappers::ReceiverStream<Result<RawDiffResponse, tonic::Status>>;
type RawPatchStream = tokio_stream::wrappers::ReceiverStream<Result<RawPatchResponse, tonic::Status>>;
type RawDiffStream =
tokio_stream::wrappers::ReceiverStream<Result<RawDiffResponse, tonic::Status>>;
type RawPatchStream =
tokio_stream::wrappers::ReceiverStream<Result<RawPatchResponse, tonic::Status>>;
async fn raw_diff(
&self,
@@ -200,7 +201,6 @@ impl diff_service_server::DiffService for GitksService {
Ok(tonic::Response::new(into_stream(chunks)))
}
async fn find_changed_paths(
&self,
request: tonic::Request<FindChangedPathsRequest>,
+81 -13
View File
@@ -45,9 +45,11 @@ mod repository_maint;
mod tag;
mod tree;
use dashmap::DashMap;
use gix::discover::is_git;
use ractor::{ActorCell, ActorRef};
use std::path::{Path, PathBuf};
use std::time::{Duration, Instant};
use tokio_stream::wrappers::ReceiverStream;
use crate::actor::message::{GitNodeMessage, RouteDecision};
@@ -59,6 +61,16 @@ use crate::pb::{
remote_service_server, repository_service_server, tag_service_server, tree_service_server,
};
/// TTL for route cache entries.
const ROUTE_CACHE_TTL: Duration = Duration::from_secs(60); // 1 minute
/// A cached route entry with creation time.
#[derive(Clone)]
pub struct CachedRoute {
pub decision: RouteDecision,
pub created_at: Instant,
}
#[derive(Clone)]
pub struct GitksService {
pub repo_prefix: PathBuf,
@@ -67,6 +79,7 @@ pub struct GitksService {
pub disk_cache: Option<crate::disk_cache::DiskCache>,
pub pack_cache: Option<crate::pack_cache::PackCache>,
pub hook_manager: Option<crate::hooks::HookManager>,
pub route_cache: DashMap<String, CachedRoute>,
}
impl GitksService {
@@ -78,6 +91,7 @@ impl GitksService {
disk_cache: None,
pack_cache: None,
hook_manager: None,
route_cache: DashMap::new(),
}
}
@@ -125,6 +139,22 @@ impl GitksService {
is_write: bool,
) -> Result<Option<RouteDecision>, tonic::Status> {
use crate::actor::message::{ROLE_PRIMARY, ROLE_REPLICA};
// Check route cache for read requests
if !is_write
&& let Some(cached) = self.route_cache.get(&header.relative_path)
&& !cached.decision.grpc_addr.is_empty()
&& cached.decision.found
&& cached.created_at.elapsed() < ROUTE_CACHE_TTL
{
tracing::debug!(
relative_path = %header.relative_path,
grpc_addr = %cached.decision.grpc_addr,
"route cache hit"
);
return Ok(Some(cached.decision.clone()));
}
let members = ractor::pg::get_members(&"gitks_nodes".to_string());
let local = self.node_actor.as_ref().map(|actor| actor.get_cell());
let mut primary: Option<RouteDecision> = None;
@@ -152,19 +182,31 @@ impl GitksService {
replica = Some(decision);
}
}
if let Some(p) = primary {
return Ok(Some(p));
}
if let Some(r) = replica {
let result = if let Some(p) = primary {
Some(p)
} else if let Some(r) = replica {
tracing::info!(
storage_name = %r.storage_name,
relative_path = %r.relative_path,
"read request routed to replica"
);
return Ok(Some(r));
Some(r)
} else {
let _ = ROLE_PRIMARY;
None
};
// Cache result for read requests
if let Some(ref decision) = result {
self.route_cache.insert(
header.relative_path.clone(),
CachedRoute {
decision: decision.clone(),
created_at: Instant::now(),
},
);
}
let _ = ROLE_PRIMARY;
Ok(None)
Ok(result)
}
fn repo_label(&self, header: Option<&crate::pb::RepositoryHeader>) -> String {
@@ -180,7 +222,10 @@ impl GitksService {
}
/// Get the relative path from a repository header, if any.
pub(crate) fn repo_relative_path<'a>(&self, header: Option<&'a crate::pb::RepositoryHeader>) -> Option<&'a str> {
pub(crate) fn repo_relative_path<'a>(
&self,
header: Option<&'a crate::pb::RepositoryHeader>,
) -> Option<&'a str> {
header.and_then(|h| {
if h.relative_path.is_empty() {
None
@@ -287,6 +332,9 @@ impl GitksService {
// Invalidate moka caches
crate::server::cache::invalidate_repo(relative_path);
// Invalidate route cache
self.route_cache.remove(relative_path);
// Invalidate disk cache
if let Some(ref pc) = self.pack_cache {
pc.invalidate_repo(relative_path);
@@ -421,7 +469,7 @@ pub(crate) fn into_stream<T: Send + 'static>(
ReceiverStream::new(rx)
}
pub(crate) fn git_cmd(gb: &GitBare, args: &[&str]) -> Result<std::process::Output, tonic::Status> {
pub(crate) fn git_cmd(gb: &GitBare, args: &[&str]) -> GitResult<std::process::Output> {
let mut full_args: Vec<String> = vec![
"--git-dir".into(),
gb.bare_dir.to_string_lossy().into_owned(),
@@ -441,21 +489,41 @@ pub(crate) fn git_cmd(gb: &GitBare, args: &[&str]) -> Result<std::process::Outpu
error = %e,
"failed to spawn git subprocess"
);
tonic::Status::internal(e.to_string())
GitError::Internal(format!("failed to spawn git: {e}"))
})?;
if !result.status.success() {
let stderr = String::from_utf8_lossy(&result.stderr);
let stderr_str = String::from_utf8_lossy(&result.stderr);
tracing::warn!(
repo = %gb.bare_dir.display(),
status = ?result.status.code(),
stderr = %stderr.trim(),
stderr = %stderr_str.trim(),
"git subprocess exited with non-zero status"
);
return Err(tonic::Status::internal(stderr.trim().to_string()));
return Err(structured_git_error(&stderr_str, result.status.code()));
}
Ok(result)
}
/// Map git subprocess stderr to a structured GitError variant.
fn structured_git_error(stderr: &str, code: Option<i32>) -> GitError {
let stderr_trimmed = stderr.trim();
if stderr_trimmed.contains("not a git repository") || stderr_trimmed.contains("does not exist")
{
GitError::RepoNotFound
} else if stderr_trimmed.contains("Permission denied") || stderr_trimmed.contains("denied") {
GitError::PermissionDenied(stderr_trimmed.to_string())
} else if stderr_trimmed.contains("is locked") || stderr_trimmed.contains("Could not acquire") {
GitError::Locked(stderr_trimmed.to_string())
} else if stderr_trimmed.contains("not found") || stderr_trimmed.contains("do not have") {
GitError::NotFound(stderr_trimmed.to_string())
} else {
GitError::CommandFailed {
status_code: code,
stderr: stderr_trimmed.to_string(),
}
}
}
pub async fn serve(
addr: std::net::SocketAddr,
svc: GitksService,
+20 -5
View File
@@ -3,6 +3,7 @@ use tokio_stream::wrappers::ReceiverStream;
use crate::pb::pack_service_client::PackServiceClient;
use crate::pb::*;
use crate::pack::CancellableReceiverStream;
use super::{GitksService, into_status};
@@ -14,8 +15,8 @@ remote_client!(
#[tonic::async_trait]
impl pack_service_server::PackService for GitksService {
type UploadPackStream = ReceiverStream<Result<UploadPackResponse, tonic::Status>>;
type ReceivePackStream = ReceiverStream<Result<ReceivePackResponse, tonic::Status>>;
type UploadPackStream = CancellableReceiverStream<Result<UploadPackResponse, tonic::Status>>;
type ReceivePackStream = CancellableReceiverStream<Result<ReceivePackResponse, tonic::Status>>;
type PackObjectsStream = ReceiverStream<Result<PackfileChunk, tonic::Status>>;
async fn advertise_refs(
@@ -112,7 +113,12 @@ impl pack_service_server::PackService for GitksService {
.upload_pack(tokio_stream::wrappers::ReceiverStream::new(rx))
.await?;
let out = super::bridge_server_stream(resp.into_inner());
return Ok(tonic::Response::new(out));
// Create a dummy cancel token for the forwarded stream
let cancel_token = tokio_util::sync::CancellationToken::new();
let cancel_guard = cancel_token.drop_guard();
return Ok(tonic::Response::new(
crate::pack::CancellableReceiverStream::new(out, cancel_guard),
));
}
crate::metrics::record_rpc_error(&m, &err);
return Err(err);
@@ -182,7 +188,12 @@ impl pack_service_server::PackService for GitksService {
.receive_pack(tokio_stream::wrappers::ReceiverStream::new(rx))
.await?;
let out = super::bridge_server_stream(resp.into_inner());
return Ok(tonic::Response::new(out));
// Create a dummy cancel token for the forwarded stream
let cancel_token = tokio_util::sync::CancellationToken::new();
let cancel_guard = cancel_token.drop_guard();
return Ok(tonic::Response::new(
crate::pack::CancellableReceiverStream::new(out, cancel_guard),
));
}
crate::metrics::record_rpc_error(&m, &err);
return Err(err);
@@ -333,7 +344,11 @@ impl pack_service_server::PackService for GitksService {
inputs.push(msg?);
}
let _rate = self
.acquire_rate_limit(inputs.first().and_then(|r: &IndexPackRequest| r.repository.as_ref()))
.acquire_rate_limit(
inputs
.first()
.and_then(|r: &IndexPackRequest| r.repository.as_ref()),
)
.await?;
let repo = self.repo_label(inputs.first().and_then(|r| r.repository.as_ref()));
let span = tracing::info_span!("pack.index_pack", %repo);
+1 -1
View File
@@ -1,5 +1,5 @@
use crate::pb::*;
use crate::pb::ref_service_server::RefService;
use crate::pb::*;
use super::GitksService;
+1 -1
View File
@@ -1,5 +1,5 @@
use crate::pb::*;
use crate::pb::remote_service_server::RemoteService;
use crate::pb::*;
use crate::remote::find_remote::{find_remote_repository, find_remote_root_ref};
use super::GitksService;
+2 -7
View File
@@ -434,7 +434,6 @@ impl repository_service_server::RepositoryService for GitksService {
Ok(tonic::Response::new(resp))
}
async fn list_hooks(
&self,
request: tonic::Request<ListHooksRequest>,
@@ -495,7 +494,6 @@ impl repository_service_server::RepositoryService for GitksService {
Ok(tonic::Response::new(()))
}
async fn create_snapshot(
&self,
request: tonic::Request<CreateSnapshotRequest>,
@@ -600,7 +598,6 @@ impl repository_service_server::RepositoryService for GitksService {
Ok(tonic::Response::new(()))
}
type FetchRepositoryDataStream =
ReceiverStream<Result<FetchRepositoryDataResponse, tonic::Status>>;
@@ -698,7 +695,6 @@ impl repository_service_server::RepositoryService for GitksService {
Ok(tonic::Response::new(ReceiverStream::new(rx)))
}
async fn find_merge_base(
&self,
request: tonic::Request<FindMergeBaseRequest>,
@@ -751,7 +747,6 @@ impl repository_service_server::RepositoryService for GitksService {
Ok(tonic::Response::new(resp))
}
async fn objects_size(
&self,
request: tonic::Request<ObjectsSizeRequest>,
@@ -795,7 +790,8 @@ impl repository_service_server::RepositoryService for GitksService {
&self,
request: tonic::Request<CreateRepositoryFromUrlRequest>,
) -> Result<tonic::Response<CreateRepositoryFromUrlResponse>, tonic::Status> {
let m = crate::metrics::RequestMetrics::new("gitks.RepositoryService/CreateRepositoryFromURL");
let m =
crate::metrics::RequestMetrics::new("gitks.RepositoryService/CreateRepositoryFromURL");
let inner = request.into_inner();
let _rate = self.acquire_rate_limit(inner.repository.as_ref()).await?;
let bare_dir = self.resolve_for_init(inner.repository.as_ref())?;
@@ -816,7 +812,6 @@ impl repository_service_server::RepositoryService for GitksService {
}))
}
async fn find_license(
&self,
request: tonic::Request<FindLicenseRequest>,