feat(cluster): implement distributed clustering with etcd coordination

- Integrate etcd-client for distributed coordination and leader election
- Add remote client macros with proper formatting for all services
- Implement RequestMetrics for tracking RPC performance and errors
- Add rate limiting mechanism across all service endpoints
- Create ElectionRequest and ElectionResult message types for leader election
- Add role management with primary/replica switching capabilities
- Implement health checker with automatic failover detection
- Add repository count metrics for cluster monitoring
- Update Cargo.toml with etcd-client and dashmap dependencies
- Modify RepoEntry to include read_only flag for replica handling
- Implement should_accept_election logic to prevent duplicate elections
- Add RoleChangedEvent handling for cluster role updates
This commit is contained in:
zhenyi
2026-06-08 14:31:29 +08:00
parent d243dce027
commit 8f472a0443
37 changed files with 4691 additions and 83 deletions
+106 -6
View File
@@ -142,6 +142,13 @@ pub enum GitNodeMessage {
#[rpc]
GetNodeHealth(RpcReplyPort<NodeHealth>),
/// Election: vote for a candidate to become PRIMARY.
#[rpc]
ElectPrimary(ElectionRequest, RpcReplyPort<ElectionResult>),
/// A role change has occurred in the cluster.
RoleChanged(RoleChangedEvent),
}
#[derive(ractor_cluster::RactorMessage)]
@@ -149,6 +156,105 @@ pub enum RepoActorMessage {
UpdateMetadata(RepositoryHeader),
}
// ── Election & Role Change Types ──────────────────────────────────────
/// Request for a node to vote in a PRIMARY election.
#[derive(Debug, Clone)]
pub struct ElectionRequest {
pub candidate_storage_name: String,
pub candidate_grpc_addr: String,
pub candidate_actor_name: String,
pub term: u64,
pub reason: String, // "primary_failed" etc.
}
impl BytesConvertable for ElectionRequest {
fn into_bytes(self) -> Vec<u8> {
encode_strings(&[
self.candidate_storage_name,
self.candidate_grpc_addr,
self.candidate_actor_name,
self.term.to_string(),
self.reason,
])
}
fn from_bytes(bytes: Vec<u8>) -> Self {
let values = decode_strings(bytes);
Self {
candidate_storage_name: values.first().cloned().unwrap_or_default(),
candidate_grpc_addr: values.get(1).cloned().unwrap_or_default(),
candidate_actor_name: values.get(2).cloned().unwrap_or_default(),
term: values.get(3).and_then(|v| v.parse().ok()).unwrap_or(0),
reason: values.get(4).cloned().unwrap_or_default(),
}
}
}
/// Result of an election vote.
#[derive(Debug, Clone)]
pub struct ElectionResult {
pub accepted: bool,
pub current_term: u64,
pub voter_storage_name: String,
pub voter_role: String,
}
impl BytesConvertable for ElectionResult {
fn into_bytes(self) -> Vec<u8> {
encode_strings(&[
if self.accepted { "1" } else { "0" }.to_string(),
self.current_term.to_string(),
self.voter_storage_name,
self.voter_role,
])
}
fn from_bytes(bytes: Vec<u8>) -> Self {
let values = decode_strings(bytes);
Self {
accepted: values.first().is_some_and(|v| v == "1"),
current_term: values.get(1).and_then(|v| v.parse().ok()).unwrap_or(0),
voter_storage_name: values.get(2).cloned().unwrap_or_default(),
voter_role: values.get(3).cloned().unwrap_or_default(),
}
}
}
/// Event broadcast when a node's role changes.
#[derive(Debug, Clone)]
pub struct RoleChangedEvent {
pub storage_name: String,
pub grpc_addr: String,
pub new_role: String, // "primary" or "replica"
pub term: u64,
pub relative_paths: Vec<String>, // repos that changed role
}
impl BytesConvertable for RoleChangedEvent {
fn into_bytes(self) -> Vec<u8> {
let mut strings = vec![
self.storage_name,
self.grpc_addr,
self.new_role,
self.term.to_string(),
];
strings.extend(self.relative_paths);
encode_strings(&strings)
}
fn from_bytes(bytes: Vec<u8>) -> Self {
let values = decode_strings(bytes);
Self {
storage_name: values.first().cloned().unwrap_or_default(),
grpc_addr: values.get(1).cloned().unwrap_or_default(),
new_role: values.get(2).cloned().unwrap_or_default(),
term: values.get(3).and_then(|v| v.parse().ok()).unwrap_or(0),
relative_paths: values.iter().skip(4).cloned().collect(),
}
}
}
fn encode_strings(values: &[String]) -> Vec<u8> {
let mut buf = Vec::new();
for value in values {
@@ -159,16 +265,13 @@ fn encode_strings(values: &[String]) -> Vec<u8> {
buf
}
// Maximum allowed length for a single string in the message
const MAX_STRING_LEN: usize = 10 * 1024 * 1024; // 10MB
// Maximum total message size
const MAX_TOTAL_SIZE: usize = 50 * 1024 * 1024; // 50MB
fn decode_strings(bytes: Vec<u8>) -> Vec<String> {
let mut values = Vec::new();
let mut offset = 0;
// Check total message size
if bytes.len() > MAX_TOTAL_SIZE {
tracing::warn!(
total = bytes.len(),
@@ -182,7 +285,6 @@ fn decode_strings(bytes: Vec<u8>) -> Vec<String> {
let len_bytes: [u8; 8] = bytes[offset..offset + 8].try_into().unwrap_or([0u8; 8]);
let len_u64 = u64::from_be_bytes(len_bytes);
// Prevent DoS via extremely large length values
if len_u64 > MAX_STRING_LEN as u64 {
tracing::warn!(
offset,
@@ -196,7 +298,6 @@ fn decode_strings(bytes: Vec<u8>) -> Vec<String> {
let len = len_u64 as usize;
offset += 8;
// Prevent integer overflow in offset calculation
let end_offset = match offset.checked_add(len) {
Some(end) => end,
None => {
@@ -210,7 +311,6 @@ fn decode_strings(bytes: Vec<u8>) -> Vec<String> {
};
if len == 0 || end_offset > bytes.len() {
// Invalid length — stop decoding, return what we have so far
tracing::warn!(
offset,
claimed_len = len,