refactor(actor): implement Raft consensus algorithm for cluster leader election

- Add voting mechanism with term tracking and vote persistence
- Implement election triggering logic with majority vote counting
- Add primary/replica role transition handling with state management
- Integrate health check failure detection for automatic elections
- Refactor actor messaging system for distributed coordination
- Update repository registration to query cluster for existing primary
- Add broadcast mechanism for role change notifications
- Implement proper term comparison and duplicate request filtering
- Upgrade dependency versions including tokio-util for async utilities
- Optimize code formatting and line wrapping for improved readability
- Remove redundant blank lines and improve code structure consistency
- Enhance error logging and trace information for debugging purposes
This commit is contained in:
zhenyi
2026-06-10 12:35:10 +08:00
parent ab32e8826e
commit 9a0c26e5f6
40 changed files with 1184 additions and 449 deletions
+51 -32
View File
@@ -9,10 +9,9 @@
//! // guard is dropped here → permit released
use dashmap::DashMap;
use std::sync::{Arc, OnceLock};
use std::sync::{Arc, OnceLock, RwLock};
use tokio::sync::Semaphore;
/// Default max concurrent operations per repository.
const DEFAULT_MAX_CONCURRENT: usize = 5;
@@ -20,8 +19,8 @@ const DEFAULT_MAX_CONCURRENT: usize = 5;
struct RateLimiter {
/// Per-repository semaphores. Key = repository relative_path.
semaphores: DashMap<String, Arc<Semaphore>>,
/// Max concurrent operations per repository.
max_concurrent: usize,
/// Max concurrent operations per repository (protected by RwLock for runtime updates).
max_concurrent: RwLock<usize>,
}
static RATE_LIMITER: OnceLock<RateLimiter> = OnceLock::new();
@@ -40,11 +39,18 @@ fn limiter() -> &'static RateLimiter {
RateLimiter {
semaphores: DashMap::new(),
max_concurrent: max,
max_concurrent: RwLock::new(max),
}
})
}
/// Get the current max_concurrent value.
fn get_max_concurrent() -> usize {
*limiter()
.max_concurrent
.read()
.unwrap_or_else(|e| e.into_inner())
}
/// A guard that holds a rate-limit permit. The permit is released on drop.
pub struct RateLimitGuard {
@@ -63,21 +69,21 @@ pub async fn acquire(repo_relative_path: Option<&str>) -> Option<RateLimitGuard>
if repo.is_empty() {
return None;
}
let l = limiter();
if l.max_concurrent == 0 {
let max_concurrent = get_max_concurrent();
if max_concurrent == 0 {
// Unlimited
return None;
}
let sem = l
let sem = limiter()
.semaphores
.entry(repo.to_string())
.or_insert_with(|| Arc::new(Semaphore::new(l.max_concurrent)))
.or_insert_with(|| Arc::new(Semaphore::new(max_concurrent)))
.value()
.clone();
// Release DashMap reference before awaiting
let _ = l;
let _ = repo;
match tokio::time::timeout(
std::time::Duration::from_secs(30),
@@ -87,7 +93,7 @@ pub async fn acquire(repo_relative_path: Option<&str>) -> Option<RateLimitGuard>
{
Ok(Ok(permit)) => {
tracing::debug!(
repo = %repo,
repo = %repo_relative_path.unwrap_or(""),
available = sem.available_permits(),
"rate limit permit acquired"
);
@@ -96,10 +102,10 @@ pub async fn acquire(repo_relative_path: Option<&str>) -> Option<RateLimitGuard>
Ok(Err(_closed)) => {
// Semaphore was closed — recreate it
tracing::warn!(
repo = %repo,
repo = %repo_relative_path.unwrap_or(""),
"rate limit semaphore closed, recreating"
);
let new_sem = Arc::new(Semaphore::new(limiter().max_concurrent));
let new_sem = Arc::new(Semaphore::new(get_max_concurrent()));
let permit = new_sem
.clone()
.acquire_owned()
@@ -107,13 +113,13 @@ pub async fn acquire(repo_relative_path: Option<&str>) -> Option<RateLimitGuard>
.expect("newly created semaphore should have permits");
limiter()
.semaphores
.insert(repo.to_string(), new_sem);
.insert(repo_relative_path.unwrap_or("").to_string(), new_sem);
Some(RateLimitGuard { _permit: permit })
}
Err(_elapsed) => {
tracing::warn!(
repo = %repo,
max_concurrent = limiter().max_concurrent,
repo = %repo_relative_path.unwrap_or(""),
max_concurrent = get_max_concurrent(),
"rate limit timeout waiting for permit"
);
None
@@ -122,7 +128,9 @@ pub async fn acquire(repo_relative_path: Option<&str>) -> Option<RateLimitGuard>
}
/// Acquire a rate-limit permit, returning a tonic error on timeout / overload.
pub async fn acquire_or_reject(repo_relative_path: Option<&str>) -> Result<Option<RateLimitGuard>, tonic::Status> {
pub async fn acquire_or_reject(
repo_relative_path: Option<&str>,
) -> Result<Option<RateLimitGuard>, tonic::Status> {
let repo = repo_relative_path.unwrap_or("");
if repo.is_empty() {
return Ok(None);
@@ -130,13 +138,13 @@ pub async fn acquire_or_reject(repo_relative_path: Option<&str>) -> Result<Optio
match acquire(Some(repo)).await {
Some(guard) => Ok(Some(guard)),
None => {
if limiter().max_concurrent == 0 {
if get_max_concurrent() == 0 {
return Ok(None);
}
// Timeout — reject with resource exhausted
Err(tonic::Status::resource_exhausted(format!(
"rate limit exceeded for repository '{repo}': max {max} concurrent operations",
max = limiter().max_concurrent
max = get_max_concurrent()
)))
}
}
@@ -149,22 +157,33 @@ pub fn remove_repository(repo_relative_path: &str) {
}
/// Update the max concurrent limit at runtime.
/// This properly updates the limit and recreates all existing semaphores.
pub fn set_max_concurrent(max: usize) {
let l = limiter();
// We can't modify the field directly through OnceLock, but we can
// update existing semaphores to add or remove permits as needed.
// For a simpler approach, just log and let new semaphores use the new value.
// Since max_concurrent is only read on insert, we use a separate atomic.
use std::sync::atomic::AtomicUsize;
static OVERRIDE: std::sync::atomic::AtomicUsize = AtomicUsize::new(0);
OVERRIDE.store(max, std::sync::atomic::Ordering::Relaxed);
// Recreate all existing semaphores
for entry in &l.semaphores {
let _old = l.semaphores.insert(
entry.key().clone(),
Arc::new(Semaphore::new(max)),
);
// Update the max_concurrent value
match l.max_concurrent.write() {
Ok(mut guard) => {
*guard = max;
}
Err(e) => {
// Poisoned lock - recover and update
let mut guard = e.into_inner();
*guard = max;
}
}
// Recreate all existing semaphores with the new limit
let keys: Vec<String> = l
.semaphores
.iter()
.map(|entry| entry.key().clone())
.collect();
for key in keys {
l.semaphores
.insert(key, Arc::new(Semaphore::new(max)));
}
tracing::info!(max_concurrent = max, "rate limit max_concurrent updated");
}