refactor(server): replace custom remote clients with macro-based implementation

- Replaced manual remote client functions with remote_client! macro for archive, blame, branch, commit, and diff services
- Simplified remote client creation logic using declarative macro approach
- Maintained same functionality while reducing code duplication across services

security(bare): enhance path traversal protection with comprehensive validation

- Added early relative_path validation to prevent path traversal attacks
- Implemented unified path validation to avoid TOCTOU race conditions
- Enhanced canonicalization checks for both existing and non-existent paths
- Added detailed logging for path traversal detection attempts

feat(cache): migrate from CLruCache to Moka with TTL and invalidation support

- Replaced clru dependency with moka for improved caching capabilities
- Added 300-second time-to-live for cache entries
- Implemented repository-specific cache invalidation mechanism
- Enhanced cache operations with thread-safe async support

refactor(commit): improve security validation for commit operations

- Added ref name validation to prevent command injection in cherry_pick_commit
- Implemented revision validation for commit selectors
- Added comprehensive input validation for create_commit parameters
- Enhanced file path validation to prevent traversal
This commit is contained in:
zhenyi
2026-06-08 09:43:57 +08:00
parent 8c95eb230d
commit d243dce027
60 changed files with 1746 additions and 561 deletions
+109 -44
View File
@@ -1,9 +1,11 @@
use std::collections::HashMap;
use crate::actor::message::{
GitNodeMessage, NodeHealth, ROLE_PRIMARY, ROLE_REPLICA, RefUpdateEvent, RouteDecision,
};
use crate::server::GitksService;
use async_trait::async_trait;
use ractor::pg;
use ractor::{Actor, ActorProcessingErr, ActorRef, SupervisionEvent};
use crate::actor::message::{GitNodeMessage, NodeHealth, RefUpdateEvent, RouteDecision, ROLE_PRIMARY, ROLE_REPLICA};
use crate::server::GitksService;
use std::collections::HashMap;
#[derive(Clone)]
pub struct GitNodeActor {
@@ -50,7 +52,11 @@ impl Actor for GitNodeActor {
) -> Result<Self::State, ActorProcessingErr> {
let actor_name = format!("git_node_{}", args.storage_name);
pg::join("gitks_nodes".to_string(), vec![myself.get_cell()]);
pg::join_scoped(args.storage_name.clone(), "node".to_string(), vec![myself.get_cell()]);
pg::join_scoped(
args.storage_name.clone(),
"node".to_string(),
vec![myself.get_cell()],
);
tracing::info!(storage_name = %args.storage_name, actor_name = %actor_name, grpc_addr = %args.grpc_addr, "GitNodeActor started");
Ok(GitNodeState {
storage_name: args.storage_name,
@@ -90,43 +96,60 @@ impl Actor for GitNodeActor {
}
GitNodeMessage::RefUpdated(event) => {
if let Some(entry) = state.repos.get(&event.relative_path) {
if entry.role == ROLE_REPLICA {
let local_path = self.service.repo_prefix.join(&event.relative_path);
crate::actor::sync::sync_from_primary(event, local_path).await;
}
if let Some(entry) = state.repos.get(&event.relative_path)
&& entry.role == ROLE_REPLICA
{
let local_path = self.service.repo_prefix.join(&event.relative_path);
crate::actor::sync::sync_from_primary(event, local_path).await;
}
}
GitNodeMessage::FindPrimary(header, reply) => {
let entry = state.repos.get(&header.relative_path);
let is_primary = entry.is_some_and(|e| e.role == ROLE_PRIMARY);
reply.send(build_decision(state, &header, is_primary, entry.map(|e| e.role.as_str()))).ok();
reply
.send(build_decision(
state,
&header,
is_primary,
entry.map(|e| e.role.as_str()),
))
.ok();
}
GitNodeMessage::FindReplica(header, reply) => {
let entry = state.repos.get(&header.relative_path);
let has = entry.is_some();
reply.send(build_decision(state, &header, has, entry.map(|e| e.role.as_str()))).ok();
reply
.send(build_decision(
state,
&header,
has,
entry.map(|e| e.role.as_str()),
))
.ok();
}
GitNodeMessage::ListRepositoryPaths(reply) => {
let paths: Vec<String> = state.repos.keys().cloned().collect();
reply.send(paths.join("\n")).ok();
}
GitNodeMessage::RepositoryExists(header, reply) => {
reply.send(state.repos.contains_key(&header.relative_path)).ok();
reply
.send(state.repos.contains_key(&header.relative_path))
.ok();
}
GitNodeMessage::GetNodeHealth(reply) => {
reply.send(NodeHealth {
storage_name: state.storage_name.clone(),
repo_count: state.repos.len() as u64,
healthy: true,
version: self.version.clone(),
}).ok();
reply
.send(NodeHealth {
storage_name: state.storage_name.clone(),
repo_count: state.repos.len() as u64,
healthy: true,
version: self.version.clone(),
})
.ok();
}
}
Ok(())
@@ -139,14 +162,18 @@ impl Actor for GitNodeActor {
_state: &mut Self::State,
) -> Result<(), ActorProcessingErr> {
match evt {
SupervisionEvent::ActorStarted(who) => tracing::debug!(actor = ?who.get_id(), "child started"),
SupervisionEvent::ActorStarted(who) => {
tracing::debug!(actor = ?who.get_id(), "child started")
}
SupervisionEvent::ActorTerminated(who, _, reason) => {
tracing::warn!(actor = ?who.get_id(), reason = ?reason, "child terminated")
}
SupervisionEvent::ActorFailed(who, panic_msg) => {
tracing::error!(actor = ?who.get_id(), msg = %panic_msg, "child panicked")
}
SupervisionEvent::ProcessGroupChanged(group) => tracing::info!(group = ?group, "PG membership changed"),
SupervisionEvent::ProcessGroupChanged(group) => {
tracing::info!(group = ?group, "PG membership changed")
}
_ => {}
}
Ok(())
@@ -162,48 +189,83 @@ impl Actor for GitNodeActor {
}
}
fn build_decision(state: &GitNodeState, header: &crate::pb::RepositoryHeader, found: bool, role: Option<&str>) -> RouteDecision {
fn build_decision(
state: &GitNodeState,
header: &crate::pb::RepositoryHeader,
found: bool,
role: Option<&str>,
) -> RouteDecision {
RouteDecision {
found,
storage_name: if found { state.storage_name.clone() } else { String::new() },
storage_name: if found {
state.storage_name.clone()
} else {
String::new()
},
relative_path: header.relative_path.clone(),
actor_name: if found { state.actor_name.clone() } else { String::new() },
grpc_addr: if found { state.grpc_addr.clone() } else { String::new() },
actor_name: if found {
state.actor_name.clone()
} else {
String::new()
},
grpc_addr: if found {
state.grpc_addr.clone()
} else {
String::new()
},
role: role.unwrap_or("").to_string(),
}
}
fn register_repo(myself: &ActorRef<GitNodeMessage>, state: &mut GitNodeState, relative_path: String) {
fn register_repo(
myself: &ActorRef<GitNodeMessage>,
state: &mut GitNodeState,
relative_path: String,
) {
if state.repos.contains_key(&relative_path) {
return;
}
let role = if is_path_registered_elsewhere(&state.storage_name, &relative_path) {
// Determine role based on cluster state
// For simplicity and correctness, we use a conservative approach:
// If there are other nodes in the cluster, register as replica initially.
// The route_repository logic will determine the actual primary at query time.
let members = ractor::pg::get_members(&"gitks_nodes".to_string());
let my_cell = myself.get_cell();
let other_nodes_exist = members.iter().any(|m| m != &my_cell);
let role = if other_nodes_exist {
// Conservative: assume another node might be primary
// The actual primary will be determined by route_repository query
ROLE_REPLICA.to_string()
} else {
// We're the only node, so we're primary
ROLE_PRIMARY.to_string()
};
let category = extract_category(&relative_path);
pg::join_scoped(state.storage_name.clone(), category.to_string(), vec![myself.get_cell()]);
state.repos.insert(relative_path.clone(), RepoEntry {
role: role.clone(),
last_commit: String::new(),
});
pg::join_scoped(
state.storage_name.clone(),
category.to_string(),
vec![myself.get_cell()],
);
state.repos.insert(
relative_path.clone(),
RepoEntry {
role: role.clone(),
last_commit: String::new(),
},
);
tracing::info!(
storage_name = %state.storage_name,
category = %category,
relative_path = %relative_path,
actor_name = %state.actor_name,
role = %role,
"repository route registered"
"repository route registered (role will be refined at query time)"
);
}
fn is_path_registered_elsewhere(_storage_name: &str, _relative_path: &str) -> bool {
false
}
fn extract_category(relative_path: &str) -> &str {
relative_path.split('/').next().unwrap_or("root")
}
@@ -217,8 +279,12 @@ pub async fn start_node_actor(
let (actor_ref, handle) = Actor::spawn(
Some(format!("git_node_{storage_name}")),
actor,
GitNodeArgs { storage_name, grpc_addr },
).await?;
GitNodeArgs {
storage_name,
grpc_addr,
},
)
.await?;
actor_ref.cast(GitNodeMessage::ScanAndRegister).ok();
Ok((actor_ref, handle))
}
@@ -239,13 +305,12 @@ pub fn list_all_groups() -> Vec<String> {
pg::which_groups()
}
pub fn broadcast_ref_update(
_node_actor: &ActorRef<GitNodeMessage>,
event: RefUpdateEvent,
) {
pub fn broadcast_ref_update(_node_actor: &ActorRef<GitNodeMessage>, event: RefUpdateEvent) {
let members = ractor::pg::get_members(&"gitks_nodes".to_string());
for member in members {
let actor_ref: ActorRef<GitNodeMessage> = member.into();
actor_ref.cast(GitNodeMessage::RefUpdated(event.clone())).ok();
actor_ref
.cast(GitNodeMessage::RefUpdated(event.clone()))
.ok();
}
}
+62 -7
View File
@@ -1,7 +1,7 @@
use crate::pb::RepositoryHeader;
use ractor::RpcReplyPort;
use ractor_cluster::BytesConvertable;
use ractor_cluster::RactorClusterMessage;
use crate::pb::RepositoryHeader;
impl BytesConvertable for RepositoryHeader {
fn into_bytes(self) -> Vec<u8> {
@@ -73,7 +73,10 @@ impl BytesConvertable for NodeHealth {
let values = decode_strings(bytes);
Self {
storage_name: values.first().cloned().unwrap_or_default(),
repo_count: values.get(1).and_then(|v| v.parse().ok()).unwrap_or_default(),
repo_count: values
.get(1)
.and_then(|v| v.parse().ok())
.unwrap_or_default(),
healthy: values.get(2).is_some_and(|v| v == "1"),
version: values.get(3).cloned().unwrap_or_default(),
}
@@ -156,17 +159,69 @@ fn encode_strings(values: &[String]) -> Vec<u8> {
buf
}
// Maximum allowed length for a single string in the message
const MAX_STRING_LEN: usize = 10 * 1024 * 1024; // 10MB
// Maximum total message size
const MAX_TOTAL_SIZE: usize = 50 * 1024 * 1024; // 50MB
fn decode_strings(bytes: Vec<u8>) -> Vec<String> {
let mut values = Vec::new();
let mut offset = 0;
// Check total message size
if bytes.len() > MAX_TOTAL_SIZE {
tracing::warn!(
total = bytes.len(),
max = MAX_TOTAL_SIZE,
"message exceeds maximum size, truncating"
);
return values;
}
while offset + 8 <= bytes.len() {
let len = u64::from_be_bytes(bytes[offset..offset + 8].try_into().unwrap()) as usize;
offset += 8;
if offset + len > bytes.len() {
let len_bytes: [u8; 8] = bytes[offset..offset + 8].try_into().unwrap_or([0u8; 8]);
let len_u64 = u64::from_be_bytes(len_bytes);
// Prevent DoS via extremely large length values
if len_u64 > MAX_STRING_LEN as u64 {
tracing::warn!(
offset,
claimed_len = len_u64,
max = MAX_STRING_LEN,
"string length exceeds maximum, stopping decode"
);
break;
}
values.push(String::from_utf8_lossy(&bytes[offset..offset + len]).into_owned());
offset += len;
let len = len_u64 as usize;
offset += 8;
// Prevent integer overflow in offset calculation
let end_offset = match offset.checked_add(len) {
Some(end) => end,
None => {
tracing::warn!(
offset,
len,
"integer overflow in offset calculation, stopping decode"
);
break;
}
};
if len == 0 || end_offset > bytes.len() {
// Invalid length — stop decoding, return what we have so far
tracing::warn!(
offset,
claimed_len = len,
total = bytes.len(),
"malformed bytes in decode_strings, stopping early"
);
break;
}
values.push(String::from_utf8_lossy(&bytes[offset..end_offset]).into_owned());
offset = end_offset;
}
values
}
+9 -3
View File
@@ -1,8 +1,14 @@
pub mod message;
pub mod handler;
pub mod message;
pub mod server;
pub mod sync;
pub use handler::{GitNodeActor, GitNodeArgs, RepoEntry, start_node_actor, get_cluster_nodes, get_category_members, route_group_for, list_all_groups, broadcast_ref_update};
pub use handler::{
GitNodeActor, GitNodeArgs, RepoEntry, broadcast_ref_update, get_category_members,
get_cluster_nodes, list_all_groups, route_group_for, start_node_actor,
};
pub use message::{
GitNodeMessage, NodeHealth, ROLE_PRIMARY, ROLE_REPLICA, RefUpdateEvent, RepoActorMessage,
RouteDecision,
};
pub use server::init_actor_cluster;
pub use message::{GitNodeMessage, NodeHealth, RefUpdateEvent, RepoActorMessage, RouteDecision, ROLE_PRIMARY, ROLE_REPLICA};
+1 -1
View File
@@ -1,7 +1,7 @@
use ractor::ActorRef;
use crate::actor::handler::start_node_actor;
use crate::actor::message::GitNodeMessage;
use crate::server::GitksService;
use ractor::ActorRef;
pub async fn init_actor_cluster(
service: GitksService,
+66 -31
View File
@@ -1,6 +1,6 @@
use std::path::PathBuf;
use crate::actor::message::RefUpdateEvent;
use crate::pb::Oid;
use std::path::{Path, PathBuf};
pub struct BundleApplicator {
pub repo_path: PathBuf,
@@ -13,7 +13,13 @@ impl BundleApplicator {
pub fn apply_bundle(&self, data: &[u8]) -> Result<(), String> {
let mut child = std::process::Command::new("git")
.args(["--git-dir", &self.repo_path.to_string_lossy(), "bundle", "unbundle", "-"])
.args([
"--git-dir",
&self.repo_path.to_string_lossy(),
"bundle",
"unbundle",
"-",
])
.stdin(std::process::Stdio::piped())
.stdout(std::process::Stdio::piped())
.stderr(std::process::Stdio::piped())
@@ -21,9 +27,13 @@ impl BundleApplicator {
.map_err(|e| format!("spawn git bundle unbundle: {e}"))?;
use std::io::Write;
if let Some(ref mut stdin) = child.stdin {
stdin.write_all(data).map_err(|e| format!("write bundle: {e}"))?;
stdin
.write_all(data)
.map_err(|e| format!("write bundle: {e}"))?;
}
let output = child.wait_with_output().map_err(|e| format!("wait bundle: {e}"))?;
let output = child
.wait_with_output()
.map_err(|e| format!("wait bundle: {e}"))?;
if !output.status.success() {
return Err(String::from_utf8_lossy(&output.stderr).into_owned());
}
@@ -31,7 +41,7 @@ impl BundleApplicator {
}
}
pub fn collect_local_haves(repo_path: &PathBuf) -> Result<Vec<Oid>, String> {
pub fn collect_local_haves(repo_path: &Path) -> Result<Vec<Oid>, String> {
let result = std::process::Command::new("git")
.args([
"--git-dir",
@@ -84,13 +94,13 @@ pub async fn sync_from_primary(event: RefUpdateEvent, local_repo_path: PathBuf)
match tokio::task::spawn_blocking(move || {
sync_via_pack_service(&grpc_addr, &relative_path, &repo_for_haves)
}).await {
})
.await
{
Ok(Ok(pack_data)) if !pack_data.is_empty() => {
let pack_len = pack_data.len();
let repo = local_repo_path.clone();
match tokio::task::spawn_blocking(move || {
apply_pack_data(&repo, &pack_data)
}).await {
match tokio::task::spawn_blocking(move || apply_pack_data(&repo, &pack_data)).await {
Ok(Ok(())) => {
update_local_ref(&local_repo_path, &event.ref_name, &event.new_oid);
tracing::info!(
@@ -99,27 +109,39 @@ pub async fn sync_from_primary(event: RefUpdateEvent, local_repo_path: PathBuf)
"replica sync done"
);
}
Ok(Err(e)) => tracing::error!(relative_path = %event.relative_path, error = %e, "pack apply failed"),
Err(e) => tracing::error!(relative_path = %event.relative_path, error = %e, "apply task failed"),
Ok(Err(e)) => {
tracing::error!(relative_path = %event.relative_path, error = %e, "pack apply failed")
}
Err(e) => {
tracing::error!(relative_path = %event.relative_path, error = %e, "apply task failed")
}
}
}
Ok(Ok(_)) => tracing::warn!(relative_path = %event.relative_path, "empty pack data from primary"),
Ok(Err(e)) => tracing::error!(relative_path = %event.relative_path, error = %e, "pack fetch failed"),
Err(e) => tracing::error!(relative_path = %event.relative_path, error = %e, "sync task failed"),
Ok(Ok(_)) => {
tracing::warn!(relative_path = %event.relative_path, "empty pack data from primary")
}
Ok(Err(e)) => {
tracing::error!(relative_path = %event.relative_path, error = %e, "pack fetch failed")
}
Err(e) => {
tracing::error!(relative_path = %event.relative_path, error = %e, "sync task failed")
}
}
}
fn sync_via_pack_service(
grpc_addr: &str,
relative_path: &str,
local_repo_path: &PathBuf,
local_repo_path: &Path,
) -> Result<Vec<u8>, String> {
let haves = collect_local_haves(local_repo_path)?;
let rt = tokio::runtime::Handle::current();
rt.block_on(async {
use crate::pb::pack_service_client::PackServiceClient;
use crate::pb::{AdvertiseRefsRequest, PackObjectsOptions, PackObjectsRequest, RepositoryHeader};
use crate::pb::{
AdvertiseRefsRequest, PackObjectsOptions, PackObjectsRequest, RepositoryHeader,
};
use tokio_stream::StreamExt;
let endpoint = crate::server::remote_endpoint(grpc_addr)
@@ -136,20 +158,21 @@ fn sync_via_pack_service(
storage_path: String::new(),
};
let refs_resp = client.advertise_refs(AdvertiseRefsRequest {
repository: Some(header.clone()),
protocol: None,
service: "upload-pack".to_string(),
}).await.map_err(|e| format!("AdvertiseRefs: {e}"))?;
let refs_resp = client
.advertise_refs(AdvertiseRefsRequest {
repository: Some(header.clone()),
protocol: None,
service: "upload-pack".to_string(),
})
.await
.map_err(|e| format!("AdvertiseRefs: {e}"))?;
let refs = refs_resp.into_inner().references;
if refs.is_empty() {
return Ok(Vec::new());
}
let wants: Vec<Oid> = refs.iter()
.filter_map(|r| r.target_oid.clone())
.collect();
let wants: Vec<Oid> = refs.iter().filter_map(|r| r.target_oid.clone()).collect();
let want_count = wants.len();
let have_count = haves.len();
@@ -178,7 +201,9 @@ fn sync_via_pack_service(
options: Some(options),
};
let resp = client.pack_objects(req).await
let resp = client
.pack_objects(req)
.await
.map_err(|e| format!("PackObjects: {e}"))?;
let mut stream = resp.into_inner();
@@ -200,21 +225,31 @@ fn sync_via_pack_service(
})
}
fn apply_pack_data(repo_path: &PathBuf, pack_data: &[u8]) -> Result<(), String> {
let applicator = BundleApplicator::new(repo_path.clone());
fn apply_pack_data(repo_path: &Path, pack_data: &[u8]) -> Result<(), String> {
let applicator = BundleApplicator::new(repo_path.to_path_buf());
applicator.apply_bundle(pack_data)
}
fn update_local_ref(repo_path: &PathBuf, ref_name: &str, new_oid: &str) {
fn update_local_ref(repo_path: &Path, ref_name: &str, new_oid: &str) {
if ref_name.is_empty() || new_oid.is_empty() {
return;
}
match std::process::Command::new("git")
.args(["--git-dir", &repo_path.to_string_lossy(), "update-ref", ref_name, new_oid])
.args([
"--git-dir",
&repo_path.to_string_lossy(),
"update-ref",
ref_name,
new_oid,
])
.output()
{
Ok(o) if o.status.success() => tracing::info!(ref_name = %ref_name, new_oid = %new_oid, "ref updated"),
Ok(o) => tracing::error!(ref_name = %ref_name, error = %String::from_utf8_lossy(&o.stderr), "update-ref failed"),
Ok(o) if o.status.success() => {
tracing::info!(ref_name = %ref_name, new_oid = %new_oid, "ref updated")
}
Ok(o) => {
tracing::error!(ref_name = %ref_name, error = %String::from_utf8_lossy(&o.stderr), "update-ref failed")
}
Err(e) => tracing::error!(ref_name = %ref_name, error = %e, "update-ref spawn failed"),
}
}