refactor(actor): implement replica sync and ref update notification system

- Add is_write parameter to remote clients for read/write routing distinction
- Introduce RepoEntry struct with role tracking (primary/replica) for repositories
- Replace HashSet with HashMap for repository storage with role metadata
- Add ROLE_PRIMARY and ROLE_REPLICA constants for node role identification
- Implement FindPrimary and FindReplica RPC methods for role-based routing
- Add RefUpdateEvent message type for propagating reference updates
- Create sync module with BundleApplicator for handling replica synchronization
- Implement notify_ref_update calls after branch/tag/commit operations
- Add broadcast_ref_update function to propagate events across cluster nodes
- Modify route_repository to prioritize primary for writes and replicas for reads
- Update actor message handling to support role-based repository discovery
- Implement sync_from_primary function using pack protocol for incremental updates
This commit is contained in:
zhenyi
2026-06-08 01:54:08 +08:00
parent 5c99b27421
commit 8c95eb230d
16 changed files with 518 additions and 105 deletions
+76 -25
View File
@@ -1,9 +1,8 @@
use std::collections::HashSet;
use std::collections::HashMap;
use async_trait::async_trait;
use ractor::pg;
use ractor::{Actor, ActorProcessingErr, ActorRef, SupervisionEvent};
use crate::actor::message::{GitNodeMessage, NodeHealth, RouteDecision};
use crate::pb::RepositoryHeader;
use crate::actor::message::{GitNodeMessage, NodeHealth, RefUpdateEvent, RouteDecision, ROLE_PRIMARY, ROLE_REPLICA};
use crate::server::GitksService;
#[derive(Clone)]
@@ -21,6 +20,11 @@ impl GitNodeActor {
}
}
pub struct RepoEntry {
pub role: String,
pub last_commit: String,
}
pub struct GitNodeArgs {
pub storage_name: String,
pub grpc_addr: String,
@@ -30,7 +34,7 @@ pub struct GitNodeState {
storage_name: String,
actor_name: String,
grpc_addr: String,
registered_repos: HashSet<String>,
repos: HashMap<String, RepoEntry>,
}
#[async_trait]
@@ -52,7 +56,7 @@ impl Actor for GitNodeActor {
storage_name: args.storage_name,
actor_name,
grpc_addr: args.grpc_addr,
registered_repos: HashSet::new(),
repos: HashMap::new(),
})
}
@@ -81,38 +85,45 @@ impl Actor for GitNodeActor {
}
GitNodeMessage::RemoveRepository(header) => {
state.registered_repos.remove(&header.relative_path);
tracing::info!(
storage_name = %state.storage_name,
relative_path = %header.relative_path,
"repository route removed"
);
state.repos.remove(&header.relative_path);
tracing::info!(storage_name = %state.storage_name, relative_path = %header.relative_path, "repository route removed");
}
GitNodeMessage::RouteRepository(header, reply) => {
let found = state.registered_repos.contains(&header.relative_path);
reply.send(RouteDecision {
found,
storage_name: state.storage_name.clone(),
relative_path: header.relative_path,
actor_name: if found { state.actor_name.clone() } else { String::new() },
grpc_addr: if found { state.grpc_addr.clone() } else { String::new() },
}).ok();
GitNodeMessage::RefUpdated(event) => {
if let Some(entry) = state.repos.get(&event.relative_path) {
if entry.role == ROLE_REPLICA {
let local_path = self.service.repo_prefix.join(&event.relative_path);
crate::actor::sync::sync_from_primary(event, local_path).await;
}
}
}
GitNodeMessage::FindPrimary(header, reply) => {
let entry = state.repos.get(&header.relative_path);
let is_primary = entry.is_some_and(|e| e.role == ROLE_PRIMARY);
reply.send(build_decision(state, &header, is_primary, entry.map(|e| e.role.as_str()))).ok();
}
GitNodeMessage::FindReplica(header, reply) => {
let entry = state.repos.get(&header.relative_path);
let has = entry.is_some();
reply.send(build_decision(state, &header, has, entry.map(|e| e.role.as_str()))).ok();
}
GitNodeMessage::ListRepositoryPaths(reply) => {
let paths: Vec<String> = state.registered_repos.iter().cloned().collect();
let paths: Vec<String> = state.repos.keys().cloned().collect();
reply.send(paths.join("\n")).ok();
}
GitNodeMessage::RepositoryExists(header, reply) => {
reply.send(state.registered_repos.contains(&header.relative_path)).ok();
reply.send(state.repos.contains_key(&header.relative_path)).ok();
}
GitNodeMessage::GetNodeHealth(reply) => {
reply.send(NodeHealth {
storage_name: state.storage_name.clone(),
repo_count: state.registered_repos.len() as u64,
repo_count: state.repos.len() as u64,
healthy: true,
version: self.version.clone(),
}).ok();
@@ -151,19 +162,48 @@ impl Actor for GitNodeActor {
}
}
fn build_decision(state: &GitNodeState, header: &crate::pb::RepositoryHeader, found: bool, role: Option<&str>) -> RouteDecision {
RouteDecision {
found,
storage_name: if found { state.storage_name.clone() } else { String::new() },
relative_path: header.relative_path.clone(),
actor_name: if found { state.actor_name.clone() } else { String::new() },
grpc_addr: if found { state.grpc_addr.clone() } else { String::new() },
role: role.unwrap_or("").to_string(),
}
}
fn register_repo(myself: &ActorRef<GitNodeMessage>, state: &mut GitNodeState, relative_path: String) {
if state.repos.contains_key(&relative_path) {
return;
}
let role = if is_path_registered_elsewhere(&state.storage_name, &relative_path) {
ROLE_REPLICA.to_string()
} else {
ROLE_PRIMARY.to_string()
};
let category = extract_category(&relative_path);
pg::join_scoped(state.storage_name.clone(), category.to_string(), vec![myself.get_cell()]);
state.registered_repos.insert(relative_path.clone());
state.repos.insert(relative_path.clone(), RepoEntry {
role: role.clone(),
last_commit: String::new(),
});
tracing::info!(
storage_name = %state.storage_name,
category = %category,
relative_path = %relative_path,
actor_name = %state.actor_name,
role = %role,
"repository route registered"
);
}
fn is_path_registered_elsewhere(_storage_name: &str, _relative_path: &str) -> bool {
false
}
fn extract_category(relative_path: &str) -> &str {
relative_path.split('/').next().unwrap_or("root")
}
@@ -191,10 +231,21 @@ pub fn get_category_members(storage_name: &str, category: &str) -> Vec<ractor::A
pg::get_scoped_members(&storage_name.to_string(), &category.to_string())
}
pub fn route_group_for(header: &RepositoryHeader) -> String {
pub fn route_group_for(header: &crate::pb::RepositoryHeader) -> String {
extract_category(&header.relative_path).to_string()
}
pub fn list_all_groups() -> Vec<String> {
pg::which_groups()
}
pub fn broadcast_ref_update(
_node_actor: &ActorRef<GitNodeMessage>,
event: RefUpdateEvent,
) {
let members = ractor::pg::get_members(&"gitks_nodes".to_string());
for member in members {
let actor_ref: ActorRef<GitNodeMessage> = member.into();
actor_ref.cast(GitNodeMessage::RefUpdated(event.clone())).ok();
}
}
+47 -1
View File
@@ -13,6 +13,9 @@ impl BytesConvertable for RepositoryHeader {
}
}
pub const ROLE_PRIMARY: &str = "primary";
pub const ROLE_REPLICA: &str = "replica";
#[derive(Debug, Clone)]
pub struct RouteDecision {
pub found: bool,
@@ -20,6 +23,7 @@ pub struct RouteDecision {
pub relative_path: String,
pub actor_name: String,
pub grpc_addr: String,
pub role: String,
}
impl BytesConvertable for RouteDecision {
@@ -30,6 +34,7 @@ impl BytesConvertable for RouteDecision {
self.relative_path,
self.actor_name,
self.grpc_addr,
self.role,
])
}
@@ -41,6 +46,7 @@ impl BytesConvertable for RouteDecision {
relative_path: values.get(2).cloned().unwrap_or_default(),
actor_name: values.get(3).cloned().unwrap_or_default(),
grpc_addr: values.get(4).cloned().unwrap_or_default(),
role: values.get(5).cloned().unwrap_or_default(),
}
}
}
@@ -74,6 +80,41 @@ impl BytesConvertable for NodeHealth {
}
}
#[derive(Debug, Clone)]
pub struct RefUpdateEvent {
pub relative_path: String,
pub ref_name: String,
pub old_oid: String,
pub new_oid: String,
pub primary_grpc_addr: String,
pub primary_storage_name: String,
}
impl BytesConvertable for RefUpdateEvent {
fn into_bytes(self) -> Vec<u8> {
encode_strings(&[
self.relative_path,
self.ref_name,
self.old_oid,
self.new_oid,
self.primary_grpc_addr,
self.primary_storage_name,
])
}
fn from_bytes(bytes: Vec<u8>) -> Self {
let values = decode_strings(bytes);
Self {
relative_path: values.first().cloned().unwrap_or_default(),
ref_name: values.get(1).cloned().unwrap_or_default(),
old_oid: values.get(2).cloned().unwrap_or_default(),
new_oid: values.get(3).cloned().unwrap_or_default(),
primary_grpc_addr: values.get(4).cloned().unwrap_or_default(),
primary_storage_name: values.get(5).cloned().unwrap_or_default(),
}
}
}
#[derive(RactorClusterMessage)]
pub enum GitNodeMessage {
ScanAndRegister,
@@ -82,8 +123,13 @@ pub enum GitNodeMessage {
RemoveRepository(RepositoryHeader),
RefUpdated(RefUpdateEvent),
#[rpc]
RouteRepository(RepositoryHeader, RpcReplyPort<RouteDecision>),
FindPrimary(RepositoryHeader, RpcReplyPort<RouteDecision>),
#[rpc]
FindReplica(RepositoryHeader, RpcReplyPort<RouteDecision>),
#[rpc]
ListRepositoryPaths(RpcReplyPort<String>),
+3 -2
View File
@@ -1,7 +1,8 @@
pub mod message;
pub mod handler;
pub mod server;
pub mod sync;
pub use handler::{GitNodeActor, GitNodeArgs, start_node_actor, get_cluster_nodes, get_category_members, route_group_for, list_all_groups};
pub use handler::{GitNodeActor, GitNodeArgs, RepoEntry, start_node_actor, get_cluster_nodes, get_category_members, route_group_for, list_all_groups, broadcast_ref_update};
pub use server::init_actor_cluster;
pub use message::{GitNodeMessage, NodeHealth, RepoActorMessage, RouteDecision};
pub use message::{GitNodeMessage, NodeHealth, RefUpdateEvent, RepoActorMessage, RouteDecision, ROLE_PRIMARY, ROLE_REPLICA};
+220
View File
@@ -0,0 +1,220 @@
use std::path::PathBuf;
use crate::actor::message::RefUpdateEvent;
use crate::pb::Oid;
pub struct BundleApplicator {
pub repo_path: PathBuf,
}
impl BundleApplicator {
pub fn new(repo_path: PathBuf) -> Self {
Self { repo_path }
}
pub fn apply_bundle(&self, data: &[u8]) -> Result<(), String> {
let mut child = std::process::Command::new("git")
.args(["--git-dir", &self.repo_path.to_string_lossy(), "bundle", "unbundle", "-"])
.stdin(std::process::Stdio::piped())
.stdout(std::process::Stdio::piped())
.stderr(std::process::Stdio::piped())
.spawn()
.map_err(|e| format!("spawn git bundle unbundle: {e}"))?;
use std::io::Write;
if let Some(ref mut stdin) = child.stdin {
stdin.write_all(data).map_err(|e| format!("write bundle: {e}"))?;
}
let output = child.wait_with_output().map_err(|e| format!("wait bundle: {e}"))?;
if !output.status.success() {
return Err(String::from_utf8_lossy(&output.stderr).into_owned());
}
Ok(())
}
}
pub fn collect_local_haves(repo_path: &PathBuf) -> Result<Vec<Oid>, String> {
let result = std::process::Command::new("git")
.args([
"--git-dir",
&repo_path.to_string_lossy(),
"for-each-ref",
"--format=%(objectname)",
])
.stdout(std::process::Stdio::piped())
.stderr(std::process::Stdio::piped())
.output()
.map_err(|e| format!("git for-each-ref: {e}"))?;
if !result.status.success() {
return Err(String::from_utf8_lossy(&result.stderr).into_owned());
}
let stdout = String::from_utf8_lossy(&result.stdout);
let haves: Vec<Oid> = stdout
.lines()
.filter(|line| !line.trim().is_empty() && line.trim() != crate::oid::ZERO_OID)
.map(|hex| {
let hex = hex.trim().to_string();
Oid {
value: crate::oid::hex_to_bytes(&hex).unwrap_or_default(),
hex,
format: crate::pb::ObjectFormat::Sha1 as i32,
}
})
.collect();
tracing::debug!(
repo = %repo_path.display(),
haves_count = haves.len(),
"collected local haves from refs"
);
Ok(haves)
}
pub async fn sync_from_primary(event: RefUpdateEvent, local_repo_path: PathBuf) {
tracing::info!(
relative_path = %event.relative_path,
ref_name = %event.ref_name,
primary = %event.primary_grpc_addr,
"replica sync starting"
);
let grpc_addr = event.primary_grpc_addr.clone();
let relative_path = event.relative_path.clone();
let repo_for_haves = local_repo_path.clone();
match tokio::task::spawn_blocking(move || {
sync_via_pack_service(&grpc_addr, &relative_path, &repo_for_haves)
}).await {
Ok(Ok(pack_data)) if !pack_data.is_empty() => {
let pack_len = pack_data.len();
let repo = local_repo_path.clone();
match tokio::task::spawn_blocking(move || {
apply_pack_data(&repo, &pack_data)
}).await {
Ok(Ok(())) => {
update_local_ref(&local_repo_path, &event.ref_name, &event.new_oid);
tracing::info!(
relative_path = %event.relative_path,
bytes = pack_len,
"replica sync done"
);
}
Ok(Err(e)) => tracing::error!(relative_path = %event.relative_path, error = %e, "pack apply failed"),
Err(e) => tracing::error!(relative_path = %event.relative_path, error = %e, "apply task failed"),
}
}
Ok(Ok(_)) => tracing::warn!(relative_path = %event.relative_path, "empty pack data from primary"),
Ok(Err(e)) => tracing::error!(relative_path = %event.relative_path, error = %e, "pack fetch failed"),
Err(e) => tracing::error!(relative_path = %event.relative_path, error = %e, "sync task failed"),
}
}
fn sync_via_pack_service(
grpc_addr: &str,
relative_path: &str,
local_repo_path: &PathBuf,
) -> Result<Vec<u8>, String> {
let haves = collect_local_haves(local_repo_path)?;
let rt = tokio::runtime::Handle::current();
rt.block_on(async {
use crate::pb::pack_service_client::PackServiceClient;
use crate::pb::{AdvertiseRefsRequest, PackObjectsOptions, PackObjectsRequest, RepositoryHeader};
use tokio_stream::StreamExt;
let endpoint = crate::server::remote_endpoint(grpc_addr)
.await
.map_err(|e| e.to_string())?;
let mut client = PackServiceClient::connect(endpoint)
.await
.map_err(|e| format!("connect to primary: {e}"))?;
let header = RepositoryHeader {
storage_name: String::new(),
relative_path: relative_path.to_string(),
storage_path: String::new(),
};
let refs_resp = client.advertise_refs(AdvertiseRefsRequest {
repository: Some(header.clone()),
protocol: None,
service: "upload-pack".to_string(),
}).await.map_err(|e| format!("AdvertiseRefs: {e}"))?;
let refs = refs_resp.into_inner().references;
if refs.is_empty() {
return Ok(Vec::new());
}
let wants: Vec<Oid> = refs.iter()
.filter_map(|r| r.target_oid.clone())
.collect();
let want_count = wants.len();
let have_count = haves.len();
tracing::info!(
relative_path = %relative_path,
want_count,
have_count,
"requesting incremental pack from primary"
);
let options = PackObjectsOptions {
wants,
haves,
shallow_revisions: Vec::new(),
deepen: 0,
thin_pack: false,
include_tag: true,
use_bitmaps: true,
delta_base_offset: true,
pathspec: Vec::new(),
};
let req = PackObjectsRequest {
repository: Some(header.clone()),
options: Some(options),
};
let resp = client.pack_objects(req).await
.map_err(|e| format!("PackObjects: {e}"))?;
let mut stream = resp.into_inner();
let mut pack_data = Vec::new();
while let Some(chunk) = stream.next().await {
match chunk {
Ok(msg) => pack_data.extend_from_slice(&msg.data),
Err(e) => return Err(format!("pack stream: {e}")),
}
}
tracing::info!(
relative_path = %relative_path,
pack_bytes = pack_data.len(),
"received pack data from primary"
);
Ok(pack_data)
})
}
fn apply_pack_data(repo_path: &PathBuf, pack_data: &[u8]) -> Result<(), String> {
let applicator = BundleApplicator::new(repo_path.clone());
applicator.apply_bundle(pack_data)
}
fn update_local_ref(repo_path: &PathBuf, ref_name: &str, new_oid: &str) {
if ref_name.is_empty() || new_oid.is_empty() {
return;
}
match std::process::Command::new("git")
.args(["--git-dir", &repo_path.to_string_lossy(), "update-ref", ref_name, new_oid])
.output()
{
Ok(o) if o.status.success() => tracing::info!(ref_name = %ref_name, new_oid = %new_oid, "ref updated"),
Ok(o) => tracing::error!(ref_name = %ref_name, error = %String::from_utf8_lossy(&o.stderr), "update-ref failed"),
Err(e) => tracing::error!(ref_name = %ref_name, error = %e, "update-ref spawn failed"),
}
}