Files
gitks/actor/handler.rs
T
zhenyi a40da90ef9 refactor(build): reformat code and add tonic health dependency
- Reformatted build script with proper indentation and line breaks
- Added tonic-health dependency to Cargo.toml and updated lock file
- Improved error handling in disk cache with concurrent deletion checks
- Refactored conditional chains using && and let expressions
- Reformatted struct initialization and function parameter lists
- Added proper spacing and alignment in language stats processing
- Improved assertion formatting in test cases
- Reorganized import statements and code layout in multiple files
- Updated metrics functions with better parameter handling and formatting
2026-06-11 13:56:15 +08:00

1142 lines
40 KiB
Rust

use crate::actor::message::{
AppendEntriesRequest, AppendEntriesResponse, ElectionRequest, ElectionResult, GitNodeMessage,
NodeHealth, RAFT_MSG_VERSION, ROLE_PRIMARY, ROLE_REPLICA, ReadIndexResponse, RefUpdateEvent,
RoleChangedEvent, RouteDecision,
};
use crate::actor::raft_log::RaftLog;
use crate::pb::RepositoryHeader;
use crate::server::GitksService;
use async_trait::async_trait;
use ractor::pg;
use ractor::{Actor, ActorCell, ActorProcessingErr, ActorRef, SupervisionEvent};
use std::collections::HashMap;
use std::path::PathBuf;
use std::time::{Duration, Instant};
#[derive(Clone)]
pub struct GitNodeActor {
pub version: String,
pub service: GitksService,
}
impl GitNodeActor {
pub fn init(service: GitksService) -> Self {
GitNodeActor {
version: env!("CARGO_PKG_VERSION").to_string(),
service,
}
}
}
#[derive(Debug, Clone)]
pub struct RepoEntry {
pub role: String,
pub last_commit: String,
pub read_only: bool,
}
pub struct GitNodeArgs {
pub storage_name: String,
pub grpc_addr: String,
/// Directory for Raft log persistence.
pub data_dir: PathBuf,
}
/// Leader lease duration (10 seconds).
const LEADER_LEASE_DURATION: Duration = Duration::from_secs(10);
pub struct GitNodeState {
storage_name: String,
actor_name: String,
grpc_addr: String,
repos: HashMap<String, RepoEntry>,
current_term: u64,
health_failures: u32,
is_primary: bool,
last_known_primary_grpc: String,
voted_for: Option<String>,
// ── Raft consensus state ─────────────────────────────────
pub raft_log: RaftLog,
/// Leader-only: lease deadline. If expired, Leader stops accepting writes.
pub leader_lease_deadline: Option<Instant>,
/// Leader-only: next index to send to each follower.
pub next_index: HashMap<String, u64>,
/// Leader-only: highest known replicated index for each follower.
pub match_index: HashMap<String, u64>,
/// The known leader's storage_name (for followers).
pub leader_id: Option<String>,
/// The known leader's gRPC address (for followers).
pub leader_grpc_addr: Option<String>,
}
#[async_trait]
impl Actor for GitNodeActor {
type Msg = GitNodeMessage;
type State = GitNodeState;
type Arguments = GitNodeArgs;
async fn pre_start(
&self,
myself: ActorRef<Self::Msg>,
args: Self::Arguments,
) -> Result<Self::State, ActorProcessingErr> {
let actor_name = format!("git_node_{}", args.storage_name);
pg::join("gitks_nodes".to_string(), vec![myself.get_cell()]);
pg::join_scoped(
args.storage_name.clone(),
"node".to_string(),
vec![myself.get_cell()],
);
tracing::info!(storage_name = %args.storage_name, actor_name = %actor_name, grpc_addr = %args.grpc_addr, "GitNodeActor started");
start_health_checker(myself.clone(), 1, 10);
// Initialize Raft log with disk persistence
let raft_data_dir = args.data_dir.join("raft");
let raft_log = RaftLog::new(&raft_data_dir)
.map_err(|e| ActorProcessingErr::from(format!("failed to init raft log: {e}")))?;
tracing::info!(
storage_name = %args.storage_name,
entries = raft_log.len(),
last_index = raft_log.last_index(),
"raft log initialized"
);
Ok(GitNodeState {
storage_name: args.storage_name,
actor_name,
grpc_addr: args.grpc_addr.clone(),
repos: HashMap::new(),
current_term: 0,
health_failures: 0,
is_primary: true, // Will be refined at registration
last_known_primary_grpc: args.grpc_addr.clone(),
voted_for: None,
raft_log,
leader_lease_deadline: None,
next_index: HashMap::new(),
match_index: HashMap::new(),
leader_id: None,
leader_grpc_addr: None,
})
}
async fn handle(
&self,
myself: ActorRef<Self::Msg>,
message: Self::Msg,
state: &mut Self::State,
) -> Result<(), ActorProcessingErr> {
match message {
GitNodeMessage::ScanAndRegister => {
let repos = self.service.scan_all_repo()?;
tracing::info!(storage_name = %state.storage_name, found = repos.len(), "scanning local repositories");
crate::metrics::set_repository_count(repos.len() as u64);
for repo_path in repos {
let relative_path = repo_path
.strip_prefix(self.service.repo_prefix.to_string_lossy().as_ref())
.unwrap_or(&repo_path)
.trim_start_matches('/')
.to_string();
register_repo(&myself, state, relative_path).await;
}
}
GitNodeMessage::RegisterRepository(header) => {
register_repo(&myself, state, header.relative_path).await;
}
GitNodeMessage::RemoveRepository(header) => {
state.repos.remove(&header.relative_path);
tracing::info!(storage_name = %state.storage_name, relative_path = %header.relative_path, "repository route removed");
}
GitNodeMessage::RefUpdated(event) => {
if let Some(entry) = state.repos.get(&event.relative_path)
&& entry.role == ROLE_REPLICA
{
let local_path = self.service.repo_prefix.join(&event.relative_path);
crate::actor::sync::sync_from_primary(event, local_path).await;
}
}
GitNodeMessage::FindPrimary(header, reply) => {
let entry = state.repos.get(&header.relative_path);
let is_primary = entry.is_some_and(|e| e.role == ROLE_PRIMARY);
reply
.send(build_decision(
state,
&header,
is_primary,
entry.map(|e| e.role.as_str()),
))
.ok();
}
GitNodeMessage::FindReplica(header, reply) => {
let entry = state.repos.get(&header.relative_path);
let has = entry.is_some();
reply
.send(build_decision(
state,
&header,
has,
entry.map(|e| e.role.as_str()),
))
.ok();
}
GitNodeMessage::ListRepositoryPaths(reply) => {
let paths: Vec<String> = state.repos.keys().cloned().collect();
reply.send(paths.join("\n")).ok();
}
GitNodeMessage::RepositoryExists(header, reply) => {
reply
.send(state.repos.contains_key(&header.relative_path))
.ok();
}
GitNodeMessage::GetNodeHealth(reply) => {
reply
.send(NodeHealth {
storage_name: state.storage_name.clone(),
repo_count: state.repos.len() as u64,
healthy: true,
version: self.version.clone(),
})
.ok();
}
GitNodeMessage::ElectPrimary(request, reply) => {
let accepted = should_accept_election(&request, state);
tracing::info!(
candidate = %request.candidate_storage_name,
term = request.term,
current_term = state.current_term,
accepted = accepted,
voted_for = ?state.voted_for,
"election vote"
);
if accepted {
state.current_term = request.term;
state.voted_for = Some(request.candidate_storage_name.clone());
state.last_known_primary_grpc = request.candidate_grpc_addr.clone();
}
reply
.send(ElectionResult {
accepted,
current_term: state.current_term,
voter_storage_name: state.storage_name.clone(),
voter_role: if state.is_primary {
ROLE_PRIMARY
} else {
ROLE_REPLICA
}
.to_string(),
})
.ok();
}
GitNodeMessage::RoleChanged(event) => {
// Empty storage_name = self-promotion from health checker
let is_self =
event.storage_name.is_empty() || event.storage_name == state.storage_name;
if is_self && event.new_role == ROLE_PRIMARY {
tracing::info!(
storage_name = %state.storage_name,
term = event.term,
"promoted to PRIMARY"
);
state.is_primary = true;
state.current_term = event.term;
state.health_failures = 0;
state.voted_for = None;
for entry in state.repos.values_mut() {
entry.role = ROLE_PRIMARY.to_string();
entry.read_only = false;
}
} else if is_self && event.new_role == ROLE_REPLICA {
tracing::info!(
storage_name = %state.storage_name,
term = event.term,
"demoted to REPLICA"
);
state.is_primary = false;
state.current_term = event.term;
state.voted_for = None;
for entry in state.repos.values_mut() {
entry.role = ROLE_REPLICA.to_string();
}
} else {
// Another node's role changed — update routing info
tracing::info!(
storage_name = %event.storage_name,
new_role = %event.new_role,
"remote node role changed"
);
state.last_known_primary_grpc = if event.new_role == ROLE_PRIMARY {
event.grpc_addr.clone()
} else {
state.last_known_primary_grpc.clone()
};
}
}
GitNodeMessage::TriggerElection => {
let members = ractor::pg::get_members(&"gitks_nodes".to_string());
let total = members.len();
let my_cell = myself.get_cell();
let new_term = state.current_term.wrapping_add(1);
let mut accepted_count = 0u64;
for member in &members {
if *member == my_cell {
// We vote for ourselves
accepted_count += 1;
continue;
}
let actor_ref: ActorRef<GitNodeMessage> = member.clone().into();
let request = ElectionRequest {
candidate_storage_name: state.storage_name.clone(),
candidate_grpc_addr: state.grpc_addr.clone(),
candidate_actor_name: state.actor_name.clone(),
term: new_term,
reason: "health_check_failure".to_string(),
last_log_index: state.raft_log.last_index(),
last_log_term: state.raft_log.last_term(),
};
match ractor::call_t!(actor_ref, GitNodeMessage::ElectPrimary, 1000, request) {
Ok(result) if result.accepted => {
accepted_count += 1;
}
Ok(_) => {}
Err(_) => {
tracing::warn!(
member = ?member.get_id(),
"no response from member during election"
);
}
}
}
let majority = (total / 2).max(1) + 1;
if accepted_count >= majority as u64 {
tracing::info!(
term = new_term,
accepted = accepted_count,
total = total,
"won election, promoting to PRIMARY"
);
state.is_primary = true;
state.current_term = new_term;
state.health_failures = 0;
state.voted_for = None;
for entry in state.repos.values_mut() {
entry.role = ROLE_PRIMARY.to_string();
entry.read_only = false;
}
let role_event = RoleChangedEvent {
storage_name: state.storage_name.clone(),
grpc_addr: state.grpc_addr.clone(),
new_role: ROLE_PRIMARY.to_string(),
term: new_term,
relative_paths: state.repos.keys().cloned().collect(),
};
broadcast_role_changed(&myself, role_event);
} else {
tracing::warn!(
term = new_term,
accepted = accepted_count,
total = total,
"election lost, staying as REPLICA"
);
}
}
// ── Raft consensus messages ─────────────────────────
GitNodeMessage::AppendEntries(request, reply) => {
let response = handle_append_entries(&myself, state, &request);
let _ = reply.send(response);
}
GitNodeMessage::ReadIndex(_request, reply) => {
let response = handle_read_index(state);
let _ = reply.send(response);
}
GitNodeMessage::RaftWrite(command, reply) => {
let success = handle_raft_write(&myself, state, command).await;
let _ = reply.send(success);
}
}
Ok(())
}
async fn handle_supervisor_evt(
&self,
_myself: ActorRef<Self::Msg>,
evt: SupervisionEvent,
_state: &mut Self::State,
) -> Result<(), ActorProcessingErr> {
match evt {
SupervisionEvent::ActorStarted(who) => {
tracing::debug!(actor = ?who.get_id(), "child started")
}
SupervisionEvent::ActorTerminated(who, _, reason) => {
tracing::warn!(actor = ?who.get_id(), reason = ?reason, "child terminated")
}
SupervisionEvent::ActorFailed(who, panic_msg) => {
tracing::error!(actor = ?who.get_id(), msg = %panic_msg, "child panicked")
}
SupervisionEvent::ProcessGroupChanged(group) => {
tracing::info!(group = ?group, "PG membership changed")
}
_ => {}
}
Ok(())
}
async fn post_stop(
&self,
_myself: ActorRef<Self::Msg>,
state: &mut Self::State,
) -> Result<(), ActorProcessingErr> {
tracing::info!(storage_name = %state.storage_name, "GitNodeActor stopped");
Ok(())
}
}
/// Determine whether to accept an election request.
fn should_accept_election(request: &ElectionRequest, state: &GitNodeState) -> bool {
// Reject old terms (prevents old/duplicate election messages)
if request.term < state.current_term {
tracing::warn!(
request_term = request.term,
current_term = state.current_term,
"rejecting election: term too old"
);
return false;
}
// Same term: only accept if we haven't already voted for someone else
if request.term == state.current_term
&& let Some(ref voted_for) = state.voted_for
&& voted_for != &request.candidate_storage_name
{
tracing::warn!(
request_term = request.term,
current_term = state.current_term,
already_voted = %voted_for,
candidate = %request.candidate_storage_name,
"rejecting election: already voted this term"
);
return false;
}
// Raft log consistency check: candidate's log must be at least as up-to-date as ours.
// This prevents a node with stale data from winning an election.
let my_last_index = state.raft_log.last_index();
let my_last_term = state.raft_log.last_term();
if request.last_log_term < my_last_term {
tracing::warn!(
candidate_term = request.last_log_term,
my_term = my_last_term,
candidate = %request.candidate_storage_name,
"rejecting election: candidate log term is older"
);
return false;
}
if request.last_log_term == my_last_term && request.last_log_index < my_last_index {
tracing::warn!(
candidate_index = request.last_log_index,
my_index = my_last_index,
candidate = %request.candidate_storage_name,
"rejecting election: candidate log is shorter"
);
return false;
}
true
}
fn build_decision(
state: &GitNodeState,
header: &crate::pb::RepositoryHeader,
found: bool,
role: Option<&str>,
) -> RouteDecision {
RouteDecision {
found,
storage_name: if found {
state.storage_name.clone()
} else {
String::new()
},
relative_path: header.relative_path.clone(),
actor_name: if found {
state.actor_name.clone()
} else {
String::new()
},
grpc_addr: if found {
state.grpc_addr.clone()
} else {
String::new()
},
role: role.unwrap_or("").to_string(),
}
}
async fn register_repo(
myself: &ActorRef<GitNodeMessage>,
state: &mut GitNodeState,
relative_path: String,
) {
if state.repos.contains_key(&relative_path) {
return;
}
let members = ractor::pg::get_members(&"gitks_nodes".to_string());
let my_cell = myself.get_cell();
let role = if members.iter().any(|m| m != &my_cell) {
let header = RepositoryHeader {
storage_name: String::new(),
relative_path: relative_path.clone(),
storage_path: String::new(),
};
let primary_found = find_primary_in_cluster(&members, &my_cell, &header).await;
if primary_found {
ROLE_REPLICA.to_string()
} else {
ROLE_PRIMARY.to_string()
}
} else {
ROLE_PRIMARY.to_string()
};
if role == ROLE_PRIMARY {
state.is_primary = true;
}
let category = extract_category(&relative_path);
pg::join_scoped(
state.storage_name.clone(),
category.to_string(),
vec![myself.get_cell()],
);
state.repos.insert(
relative_path.clone(),
RepoEntry {
role: role.clone(),
last_commit: String::new(),
read_only: false,
},
);
tracing::info!(
storage_name = %state.storage_name,
category = %category,
relative_path = %relative_path,
actor_name = %state.actor_name,
role = %role,
"repository route registered"
);
}
/// Query all cluster members (except self) to find if a repository has a PRIMARY.
pub async fn find_primary_in_cluster(
members: &[ActorCell],
my_cell: &ActorCell,
header: &RepositoryHeader,
) -> bool {
for member in members {
if member == my_cell {
continue;
}
let actor_ref: ActorRef<GitNodeMessage> = member.clone().into();
if let Ok(decision) =
ractor::call_t!(actor_ref, GitNodeMessage::FindPrimary, 500, header.clone())
&& decision.found
&& decision.role == ROLE_PRIMARY
{
return true;
}
}
false
}
fn extract_category(relative_path: &str) -> &str {
relative_path.split('/').next().unwrap_or("root")
}
/// Start background health checker that monitors the PRIMARY node.
/// If the PRIMARY becomes unreachable for `max_failures` consecutive checks,
/// triggers an election.
fn start_health_checker(myself: ActorRef<GitNodeMessage>, interval_secs: u64, max_failures: u32) {
tokio::spawn(async move {
let mut interval = tokio::time::interval(std::time::Duration::from_secs(interval_secs));
interval.tick().await; // First tick immediate
let mut consecutive_failures: u32 = 0;
loop {
interval.tick().await;
let members = ractor::pg::get_members(&"gitks_nodes".to_string());
let my_cell = myself.get_cell();
let other_cells: Vec<ractor::ActorCell> =
members.into_iter().filter(|m| m != &my_cell).collect();
if other_cells.is_empty() {
// No other nodes → we are the only node → ensure we are PRIMARY
consecutive_failures = 0;
continue;
}
let mut any_reachable = false;
for cell in &other_cells {
let actor_ref: ActorRef<GitNodeMessage> = cell.clone().into();
match ractor::call_t!(actor_ref, GitNodeMessage::GetNodeHealth, 2000) {
Ok(health) if health.healthy => {
any_reachable = true;
break;
}
_ => continue,
}
}
if any_reachable {
consecutive_failures = 0;
} else {
consecutive_failures += 1;
tracing::warn!(
consecutive_failures = consecutive_failures,
max_failures = max_failures,
"no other cluster nodes reachable"
);
if consecutive_failures >= max_failures {
tracing::error!(
"no other nodes reachable for {max_failures} checks, triggering election"
);
myself.cast(GitNodeMessage::TriggerElection).ok();
consecutive_failures = 0;
}
}
}
});
}
pub async fn start_node_actor(
service: GitksService,
storage_name: String,
grpc_addr: String,
data_dir: PathBuf,
) -> Result<(ActorRef<GitNodeMessage>, tokio::task::JoinHandle<()>), ractor::SpawnErr> {
let actor = GitNodeActor::init(service);
let (actor_ref, handle) = Actor::spawn(
Some(format!("git_node_{storage_name}")),
actor,
GitNodeArgs {
storage_name,
grpc_addr,
data_dir,
},
)
.await?;
actor_ref.cast(GitNodeMessage::ScanAndRegister).ok();
Ok((actor_ref, handle))
}
pub fn get_cluster_nodes(storage_name: &str) -> Vec<ractor::ActorCell> {
pg::get_scoped_members(&storage_name.to_string(), &"node".to_string())
}
pub fn get_category_members(storage_name: &str, category: &str) -> Vec<ractor::ActorCell> {
pg::get_scoped_members(&storage_name.to_string(), &category.to_string())
}
pub fn route_group_for(header: &crate::pb::RepositoryHeader) -> String {
extract_category(&header.relative_path).to_string()
}
pub fn list_all_groups() -> Vec<String> {
pg::which_groups()
}
pub fn broadcast_ref_update(_node_actor: &ActorRef<GitNodeMessage>, event: RefUpdateEvent) {
let members = ractor::pg::get_members(&"gitks_nodes".to_string());
for member in members {
let actor_ref: ActorRef<GitNodeMessage> = member.into();
actor_ref
.cast(GitNodeMessage::RefUpdated(event.clone()))
.ok();
}
}
/// Broadcast a role change event to all cluster members.
pub fn broadcast_role_changed(_actor: &ActorRef<GitNodeMessage>, event: RoleChangedEvent) {
let members = ractor::pg::get_members(&"gitks_nodes".to_string());
for member in members {
let actor_ref: ActorRef<GitNodeMessage> = member.into();
actor_ref
.cast(GitNodeMessage::RoleChanged(event.clone()))
.ok();
}
}
// ── Raft consensus helpers ───────────────────────────────────
/// Handle AppendEntries RPC from Leader (Follower side).
fn handle_append_entries(
_myself: &ActorRef<GitNodeMessage>,
state: &mut GitNodeState,
request: &AppendEntriesRequest,
) -> AppendEntriesResponse {
// Step 1: Reply false if term < currentTerm
if request.term < state.current_term {
tracing::debug!(
request_term = request.term,
local_term = state.current_term,
"AppendEntries rejected: stale term"
);
return AppendEntriesResponse {
version: RAFT_MSG_VERSION,
term: state.current_term,
success: false,
match_index: state.raft_log.last_index(),
conflict_index: 0,
conflict_term: 0,
};
}
// Step 2: Update leader info if term >= currentTerm
if request.term >= state.current_term {
state.current_term = request.term;
state.voted_for = None;
state.leader_id = Some(request.leader_id.clone());
state.leader_grpc_addr = Some(request.leader_grpc_addr.clone());
// If we were primary but received a valid AppendEntries from a higher term,
// step down
if state.is_primary {
tracing::info!(
term = request.term,
leader = %request.leader_id,
"stepping down from PRIMARY (received AppendEntries from new leader)"
);
state.is_primary = false;
for entry in state.repos.values_mut() {
entry.role = ROLE_REPLICA.to_string();
}
}
}
if request.prev_log_index > 0 {
let prev_term = state.raft_log.term_at(request.prev_log_index);
if prev_term == 0 {
tracing::debug!(
prev_log_index = request.prev_log_index,
last_index = state.raft_log.last_index(),
"AppendEntries rejected: missing prev_log_index"
);
return AppendEntriesResponse {
version: RAFT_MSG_VERSION,
term: state.current_term,
success: false,
match_index: state.raft_log.last_index(),
conflict_index: state.raft_log.last_index() + 1,
conflict_term: 0,
};
}
if prev_term != request.prev_log_term {
let conflict_term = prev_term;
let conflict_index = find_first_index_of_term(state, request.prev_log_index);
tracing::debug!(
prev_log_index = request.prev_log_index,
expected_term = request.prev_log_term,
actual_term = conflict_term,
"AppendEntries rejected: term conflict"
);
return AppendEntriesResponse {
version: RAFT_MSG_VERSION,
term: state.current_term,
success: false,
match_index: state.raft_log.last_index(),
conflict_index,
conflict_term,
};
}
}
for entry in &request.entries {
let existing_term = state.raft_log.term_at(entry.index);
if existing_term != 0 && existing_term != entry.term {
tracing::debug!(
index = entry.index,
existing_term,
new_term = entry.term,
"truncating conflicting log entries"
);
if let Err(e) = state.raft_log.truncate_from(entry.index) {
tracing::error!(error = %e, "failed to truncate raft log");
return AppendEntriesResponse {
version: RAFT_MSG_VERSION,
term: state.current_term,
success: false,
match_index: state.raft_log.last_index(),
conflict_index: 0,
conflict_term: 0,
};
}
}
if state.raft_log.term_at(entry.index) == 0
&& let Some(raft_entry) = entry.to_entry()
&& let Err(e) = state.raft_log.append_reserved(raft_entry)
{
tracing::error!(error = %e, "failed to append raft entry");
return AppendEntriesResponse {
version: RAFT_MSG_VERSION,
term: state.current_term,
success: false,
match_index: state.raft_log.last_index(),
conflict_index: 0,
conflict_term: 0,
};
}
}
// Step 5: Update commit_index
if request.leader_commit > state.raft_log.commit_index() {
let new_commit = request.leader_commit.min(state.raft_log.last_index());
state.raft_log.advance_commit_index(new_commit);
}
let match_index = state.raft_log.last_index();
tracing::debug!(
leader = %request.leader_id,
term = request.term,
entries_received = request.entries.len(),
match_index,
"AppendEntries accepted"
);
AppendEntriesResponse {
version: RAFT_MSG_VERSION,
term: state.current_term,
success: true,
match_index,
conflict_index: 0,
conflict_term: 0,
}
}
/// Find the first index of the term that conflicts at the given index.
fn find_first_index_of_term(state: &GitNodeState, index: u64) -> u64 {
let term = state.raft_log.term_at(index);
if term == 0 {
return index;
}
// Walk backwards to find the first entry of this term
for i in (1..=index).rev() {
if state.raft_log.term_at(i) != term {
return i + 1;
}
}
1
}
/// Handle ReadIndex request (confirm Leader is still valid).
fn handle_read_index(state: &GitNodeState) -> ReadIndexResponse {
ReadIndexResponse {
commit_index: state.raft_log.commit_index(),
leader_term: state.current_term,
is_leader: state.is_primary
&& state
.leader_lease_deadline
.is_some_and(|d| d > Instant::now()),
}
}
/// Broadcast AppendEntries to all followers and collect responses.
/// Returns the number of successful responses (including self).
pub async fn broadcast_append_entries(
myself: &ActorRef<GitNodeMessage>,
state: &mut GitNodeState,
entries: Vec<crate::actor::raft_log::LogEntry>,
) -> u64 {
let members = ractor::pg::get_members(&"gitks_nodes".to_string());
let my_cell = myself.get_cell();
let mut success_count = 1u64; // Count self
let serialized_entries: Vec<crate::actor::message::SerializedRaftEntry> = entries
.iter()
.map(crate::actor::message::SerializedRaftEntry::from_entry)
.collect();
for member in &members {
if *member == my_cell {
continue;
}
let actor_ref: ActorRef<GitNodeMessage> = member.clone().into();
let follower_id = format!("{:?}", member.get_id());
let prev_log_index = state.match_index.get(&follower_id).copied().unwrap_or(0);
let prev_log_term = state.raft_log.term_at(prev_log_index);
let request = AppendEntriesRequest {
version: RAFT_MSG_VERSION,
term: state.current_term,
leader_id: state.storage_name.clone(),
leader_grpc_addr: state.grpc_addr.clone(),
prev_log_index,
prev_log_term,
entries: serialized_entries.clone(),
leader_commit: state.raft_log.commit_index(),
};
match ractor::call_t!(actor_ref, GitNodeMessage::AppendEntries, 5000, request) {
Ok(response) if response.success => {
success_count += 1;
state
.match_index
.insert(follower_id.clone(), response.match_index);
state
.next_index
.insert(follower_id, response.match_index + 1);
}
Ok(response) => {
// Follower rejected — update next_index for retry
tracing::debug!(
follower = %follower_id,
term = response.term,
conflict_index = response.conflict_index,
"AppendEntries rejected by follower"
);
// Decrement next_index (optimization: use conflict info)
let next = state.next_index.get(&follower_id).copied().unwrap_or(1);
if response.conflict_index > 0 && response.conflict_index < next {
state
.next_index
.insert(follower_id, response.conflict_index);
} else if next > 1 {
state.next_index.insert(follower_id, next - 1);
}
}
Err(e) => {
tracing::warn!(follower = %follower_id, error = %e, "AppendEntries RPC failed");
}
}
}
success_count
}
/// Check if Leader lease is still valid.
pub fn is_leader_lease_valid(state: &GitNodeState) -> bool {
state.is_primary
&& state
.leader_lease_deadline
.is_some_and(|d| d > Instant::now())
}
/// Update Leader lease after successful majority replication.
pub fn renew_leader_lease(state: &mut GitNodeState) {
state.leader_lease_deadline = Some(Instant::now() + LEADER_LEASE_DURATION);
}
/// Handle a Raft write command (Leader side).
/// This is the core of the Raft consensus write path:
/// 1. Check leader lease
/// 2. Create and append log entry
/// 3. Broadcast to followers and wait for majority
/// 4. Advance commit index and apply
async fn handle_raft_write(
myself: &ActorRef<GitNodeMessage>,
state: &mut GitNodeState,
command: crate::actor::raft_log::Command,
) -> bool {
// Step 1: Check if we are the Leader with a valid lease
if !state.is_primary {
tracing::warn!("Raft write rejected: not primary");
return false;
}
if !is_leader_lease_valid(state) {
tracing::warn!("Raft write rejected: leader lease expired");
return false;
}
// Step 2: Create log entry and append to local log
let term = state.current_term;
let entry = crate::actor::raft_log::LogEntry::new(term, state.raft_log.next_index(), command);
let entry_index = entry.index;
if let Err(e) = state.raft_log.append_reserved(entry.clone()) {
tracing::error!(error = %e, "failed to append raft entry locally");
return false;
}
// Step 3: Broadcast AppendEntries to all followers
let members = ractor::pg::get_members(&"gitks_nodes".to_string());
let my_cell = myself.get_cell();
let total_nodes = members.len() as u64;
let majority = (total_nodes / 2) + 1;
let serialized_entry = crate::actor::message::SerializedRaftEntry::from_entry(&entry);
let mut success_count = 1u64; // Count self
for member in &members {
if *member == my_cell {
continue;
}
let actor_ref: ActorRef<GitNodeMessage> = member.clone().into();
let follower_id = format!("{:?}", member.get_id());
let prev_log_index = state.match_index.get(&follower_id).copied().unwrap_or(0);
let prev_log_term = state.raft_log.term_at(prev_log_index);
let request = AppendEntriesRequest {
version: RAFT_MSG_VERSION,
term: state.current_term,
leader_id: state.storage_name.clone(),
leader_grpc_addr: state.grpc_addr.clone(),
prev_log_index,
prev_log_term,
entries: vec![serialized_entry.clone()],
leader_commit: state.raft_log.commit_index(),
};
match ractor::call_t!(actor_ref, GitNodeMessage::AppendEntries, 5000, request) {
Ok(response) if response.success => {
success_count += 1;
state
.match_index
.insert(follower_id.clone(), response.match_index);
state
.next_index
.insert(follower_id, response.match_index + 1);
}
Ok(response) => {
tracing::debug!(
follower = %follower_id,
term = response.term,
"AppendEntries rejected by follower during write"
);
}
Err(e) => {
tracing::warn!(follower = %follower_id, error = %e, "AppendEntries RPC failed during write");
}
}
}
// Step 4: Check if we achieved majority
if success_count >= majority {
// Advance commit index
state.raft_log.advance_commit_index(entry_index);
// Renew leader lease
renew_leader_lease(state);
tracing::info!(
index = entry_index,
term,
success_count,
majority,
"Raft write committed"
);
// Step 5: Apply the command to the state machine
apply_raft_command(state, &entry.command);
true
} else {
tracing::warn!(
index = entry_index,
success_count,
majority,
"Raft write failed: no majority"
);
false
}
}
/// Apply a committed Raft command to the state machine.
fn apply_raft_command(state: &mut GitNodeState, command: &crate::actor::raft_log::Command) {
match command {
crate::actor::raft_log::Command::RefUpdate {
relative_path,
ref_name,
old_oid: _,
new_oid,
} => {
// Update local repo state
tracing::info!(
relative_path = %relative_path,
ref_name = %ref_name,
"applying RefUpdate from Raft log"
);
// The actual git ref update is already done by the primary before calling raft_consensus_write.
// Here we just update the actor's tracking state.
if let Some(entry) = state.repos.get_mut(relative_path) {
entry.last_commit = new_oid.clone();
}
}
crate::actor::raft_log::Command::RegisterRepo {
relative_path,
storage_name,
} => {
tracing::info!(
relative_path = %relative_path,
storage_name = %storage_name,
"applying RegisterRepo from Raft log"
);
state
.repos
.entry(relative_path.clone())
.or_insert_with(|| RepoEntry {
role: ROLE_REPLICA.to_string(),
last_commit: String::new(),
read_only: false,
});
}
crate::actor::raft_log::Command::RemoveRepo { relative_path } => {
tracing::info!(
relative_path = %relative_path,
"applying RemoveRepo from Raft log"
);
state.repos.remove(relative_path);
}
crate::actor::raft_log::Command::SetPrimary {
storage_name,
relative_paths,
} => {
tracing::info!(
storage_name = %storage_name,
paths = relative_paths.len(),
"applying SetPrimary from Raft log"
);
// Update role for the specified paths
for path in relative_paths {
if let Some(entry) = state.repos.get_mut(path) {
if storage_name == &state.storage_name {
entry.role = ROLE_PRIMARY.to_string();
entry.read_only = false;
} else {
entry.role = ROLE_REPLICA.to_string();
entry.read_only = true;
}
}
}
}
}
// Advance last_applied
state
.raft_log
.advance_last_applied(state.raft_log.commit_index());
}