refactor(actor): implement Raft consensus algorithm for cluster leader election
- Add voting mechanism with term tracking and vote persistence - Implement election triggering logic with majority vote counting - Add primary/replica role transition handling with state management - Integrate health check failure detection for automatic elections - Refactor actor messaging system for distributed coordination - Update repository registration to query cluster for existing primary - Add broadcast mechanism for role change notifications - Implement proper term comparison and duplicate request filtering - Upgrade dependency versions including tokio-util for async utilities - Optimize code formatting and line wrapping for improved readability - Remove redundant blank lines and improve code structure consistency - Enhance error logging and trace information for debugging purposes
This commit is contained in:
+51
-32
@@ -9,10 +9,9 @@
|
||||
//! // guard is dropped here → permit released
|
||||
|
||||
use dashmap::DashMap;
|
||||
use std::sync::{Arc, OnceLock};
|
||||
use std::sync::{Arc, OnceLock, RwLock};
|
||||
use tokio::sync::Semaphore;
|
||||
|
||||
|
||||
/// Default max concurrent operations per repository.
|
||||
const DEFAULT_MAX_CONCURRENT: usize = 5;
|
||||
|
||||
@@ -20,8 +19,8 @@ const DEFAULT_MAX_CONCURRENT: usize = 5;
|
||||
struct RateLimiter {
|
||||
/// Per-repository semaphores. Key = repository relative_path.
|
||||
semaphores: DashMap<String, Arc<Semaphore>>,
|
||||
/// Max concurrent operations per repository.
|
||||
max_concurrent: usize,
|
||||
/// Max concurrent operations per repository (protected by RwLock for runtime updates).
|
||||
max_concurrent: RwLock<usize>,
|
||||
}
|
||||
|
||||
static RATE_LIMITER: OnceLock<RateLimiter> = OnceLock::new();
|
||||
@@ -40,11 +39,18 @@ fn limiter() -> &'static RateLimiter {
|
||||
|
||||
RateLimiter {
|
||||
semaphores: DashMap::new(),
|
||||
max_concurrent: max,
|
||||
max_concurrent: RwLock::new(max),
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
/// Get the current max_concurrent value.
|
||||
fn get_max_concurrent() -> usize {
|
||||
*limiter()
|
||||
.max_concurrent
|
||||
.read()
|
||||
.unwrap_or_else(|e| e.into_inner())
|
||||
}
|
||||
|
||||
/// A guard that holds a rate-limit permit. The permit is released on drop.
|
||||
pub struct RateLimitGuard {
|
||||
@@ -63,21 +69,21 @@ pub async fn acquire(repo_relative_path: Option<&str>) -> Option<RateLimitGuard>
|
||||
if repo.is_empty() {
|
||||
return None;
|
||||
}
|
||||
let l = limiter();
|
||||
if l.max_concurrent == 0 {
|
||||
let max_concurrent = get_max_concurrent();
|
||||
if max_concurrent == 0 {
|
||||
// Unlimited
|
||||
return None;
|
||||
}
|
||||
|
||||
let sem = l
|
||||
let sem = limiter()
|
||||
.semaphores
|
||||
.entry(repo.to_string())
|
||||
.or_insert_with(|| Arc::new(Semaphore::new(l.max_concurrent)))
|
||||
.or_insert_with(|| Arc::new(Semaphore::new(max_concurrent)))
|
||||
.value()
|
||||
.clone();
|
||||
|
||||
// Release DashMap reference before awaiting
|
||||
let _ = l;
|
||||
let _ = repo;
|
||||
|
||||
match tokio::time::timeout(
|
||||
std::time::Duration::from_secs(30),
|
||||
@@ -87,7 +93,7 @@ pub async fn acquire(repo_relative_path: Option<&str>) -> Option<RateLimitGuard>
|
||||
{
|
||||
Ok(Ok(permit)) => {
|
||||
tracing::debug!(
|
||||
repo = %repo,
|
||||
repo = %repo_relative_path.unwrap_or(""),
|
||||
available = sem.available_permits(),
|
||||
"rate limit permit acquired"
|
||||
);
|
||||
@@ -96,10 +102,10 @@ pub async fn acquire(repo_relative_path: Option<&str>) -> Option<RateLimitGuard>
|
||||
Ok(Err(_closed)) => {
|
||||
// Semaphore was closed — recreate it
|
||||
tracing::warn!(
|
||||
repo = %repo,
|
||||
repo = %repo_relative_path.unwrap_or(""),
|
||||
"rate limit semaphore closed, recreating"
|
||||
);
|
||||
let new_sem = Arc::new(Semaphore::new(limiter().max_concurrent));
|
||||
let new_sem = Arc::new(Semaphore::new(get_max_concurrent()));
|
||||
let permit = new_sem
|
||||
.clone()
|
||||
.acquire_owned()
|
||||
@@ -107,13 +113,13 @@ pub async fn acquire(repo_relative_path: Option<&str>) -> Option<RateLimitGuard>
|
||||
.expect("newly created semaphore should have permits");
|
||||
limiter()
|
||||
.semaphores
|
||||
.insert(repo.to_string(), new_sem);
|
||||
.insert(repo_relative_path.unwrap_or("").to_string(), new_sem);
|
||||
Some(RateLimitGuard { _permit: permit })
|
||||
}
|
||||
Err(_elapsed) => {
|
||||
tracing::warn!(
|
||||
repo = %repo,
|
||||
max_concurrent = limiter().max_concurrent,
|
||||
repo = %repo_relative_path.unwrap_or(""),
|
||||
max_concurrent = get_max_concurrent(),
|
||||
"rate limit timeout waiting for permit"
|
||||
);
|
||||
None
|
||||
@@ -122,7 +128,9 @@ pub async fn acquire(repo_relative_path: Option<&str>) -> Option<RateLimitGuard>
|
||||
}
|
||||
|
||||
/// Acquire a rate-limit permit, returning a tonic error on timeout / overload.
|
||||
pub async fn acquire_or_reject(repo_relative_path: Option<&str>) -> Result<Option<RateLimitGuard>, tonic::Status> {
|
||||
pub async fn acquire_or_reject(
|
||||
repo_relative_path: Option<&str>,
|
||||
) -> Result<Option<RateLimitGuard>, tonic::Status> {
|
||||
let repo = repo_relative_path.unwrap_or("");
|
||||
if repo.is_empty() {
|
||||
return Ok(None);
|
||||
@@ -130,13 +138,13 @@ pub async fn acquire_or_reject(repo_relative_path: Option<&str>) -> Result<Optio
|
||||
match acquire(Some(repo)).await {
|
||||
Some(guard) => Ok(Some(guard)),
|
||||
None => {
|
||||
if limiter().max_concurrent == 0 {
|
||||
if get_max_concurrent() == 0 {
|
||||
return Ok(None);
|
||||
}
|
||||
// Timeout — reject with resource exhausted
|
||||
Err(tonic::Status::resource_exhausted(format!(
|
||||
"rate limit exceeded for repository '{repo}': max {max} concurrent operations",
|
||||
max = limiter().max_concurrent
|
||||
max = get_max_concurrent()
|
||||
)))
|
||||
}
|
||||
}
|
||||
@@ -149,22 +157,33 @@ pub fn remove_repository(repo_relative_path: &str) {
|
||||
}
|
||||
|
||||
/// Update the max concurrent limit at runtime.
|
||||
/// This properly updates the limit and recreates all existing semaphores.
|
||||
pub fn set_max_concurrent(max: usize) {
|
||||
let l = limiter();
|
||||
// We can't modify the field directly through OnceLock, but we can
|
||||
// update existing semaphores to add or remove permits as needed.
|
||||
// For a simpler approach, just log and let new semaphores use the new value.
|
||||
// Since max_concurrent is only read on insert, we use a separate atomic.
|
||||
use std::sync::atomic::AtomicUsize;
|
||||
static OVERRIDE: std::sync::atomic::AtomicUsize = AtomicUsize::new(0);
|
||||
OVERRIDE.store(max, std::sync::atomic::Ordering::Relaxed);
|
||||
|
||||
// Recreate all existing semaphores
|
||||
for entry in &l.semaphores {
|
||||
let _old = l.semaphores.insert(
|
||||
entry.key().clone(),
|
||||
Arc::new(Semaphore::new(max)),
|
||||
);
|
||||
// Update the max_concurrent value
|
||||
match l.max_concurrent.write() {
|
||||
Ok(mut guard) => {
|
||||
*guard = max;
|
||||
}
|
||||
Err(e) => {
|
||||
// Poisoned lock - recover and update
|
||||
let mut guard = e.into_inner();
|
||||
*guard = max;
|
||||
}
|
||||
}
|
||||
|
||||
// Recreate all existing semaphores with the new limit
|
||||
let keys: Vec<String> = l
|
||||
.semaphores
|
||||
.iter()
|
||||
.map(|entry| entry.key().clone())
|
||||
.collect();
|
||||
|
||||
for key in keys {
|
||||
l.semaphores
|
||||
.insert(key, Arc::new(Semaphore::new(max)));
|
||||
}
|
||||
|
||||
tracing::info!(max_concurrent = max, "rate limit max_concurrent updated");
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user