refactor(cache): redesign cache system with structured keys and improved performance

- Add repo_path parameter to cached_response and cached_vec_response functions
- Implement structured cache key format with namespace, repo_path, and request proto
- Replace global cache with Moka in-memory cache using weight-based eviction
- Set 256MB memory cap with 10-minute TTL and 2-minute TTI policy
- Add metrics collection for cache operations and evictions
- Implement efficient repo-scoped invalidation using key structure
- Add detailed documentation comments explaining cache architecture
- Remove outdated dependencies and update dependency versions
- Add error handling for encoding failures in cache operations
- Optimize Vec responses with length-delimited encoding and pre-allocation
This commit is contained in:
zhenyi
2026-06-12 12:53:23 +08:00
parent a40da90ef9
commit 934858bebf
82 changed files with 1273 additions and 4969 deletions
+8 -269
View File
@@ -1,31 +1,12 @@
/// Generate a `remote_<service>_client` helper function that resolves a repository
/// route and returns a connected gRPC client for the given service.
/// Single-machine mode: no cluster forwarding.
macro_rules! remote_client {
($fn_name:ident, $client:ty, $svc_label:literal) => {
async fn $fn_name(
svc: &super::GitksService,
header: Option<&crate::pb::RepositoryHeader>,
is_write: bool,
_svc: &super::GitksService,
_header: Option<&crate::pb::RepositoryHeader>,
_is_write: bool,
) -> Result<Option<$client>, tonic::Status> {
let header = match header {
Some(h) => h,
None => return Ok(None),
};
let Some(route) = svc.route_repository(header, is_write).await? else {
return Ok(None);
};
tracing::info!(
storage_name = %route.storage_name,
relative_path = %route.relative_path,
actor_name = %route.actor_name,
grpc_addr = %route.grpc_addr,
concat!("forwarding ", $svc_label, " rpc")
);
let endpoint = super::remote_endpoint(&route.grpc_addr).await?;
let client = <$client>::connect(endpoint)
.await
.map_err(|e| tonic::Status::unavailable(e.to_string()))?;
Ok(Some(client))
Ok(None)
}
};
}
@@ -45,14 +26,10 @@ mod repository_maint;
mod tag;
mod tree;
use dashmap::DashMap;
use gix::discover::is_git;
use ractor::{ActorCell, ActorRef};
use std::path::{Path, PathBuf};
use std::time::{Duration, Instant};
use tokio_stream::wrappers::ReceiverStream;
use crate::actor::message::{GitNodeMessage, RouteDecision};
use crate::bare::GitBare;
use crate::error::{GitError, GitResult};
use crate::pb::{
@@ -61,45 +38,26 @@ use crate::pb::{
remote_service_server, repository_service_server, tag_service_server, tree_service_server,
};
/// TTL for route cache entries.
const ROUTE_CACHE_TTL: Duration = Duration::from_secs(60); // 1 minute
/// A cached route entry with creation time.
#[derive(Clone)]
pub struct CachedRoute {
pub decision: RouteDecision,
pub created_at: Instant,
}
#[derive(Clone)]
pub struct GitksService {
pub repo_prefix: PathBuf,
pub node_actor: Option<ActorRef<GitNodeMessage>>,
pub grpc_addr: String,
pub disk_cache: Option<crate::disk_cache::DiskCache>,
pub pack_cache: Option<crate::pack_cache::PackCache>,
pub hook_manager: Option<crate::hooks::HookManager>,
pub route_cache: DashMap<String, CachedRoute>,
}
impl GitksService {
pub fn new(repo_prefix: PathBuf) -> Self {
Self {
repo_prefix,
node_actor: None,
grpc_addr: String::new(),
disk_cache: None,
pack_cache: None,
hook_manager: None,
route_cache: DashMap::new(),
}
}
pub fn with_actor(mut self, node_actor: ActorRef<GitNodeMessage>) -> Self {
self.node_actor = Some(node_actor);
self
}
pub fn with_disk_cache(mut self, dc: crate::disk_cache::DiskCache) -> Self {
self.disk_cache = Some(dc);
self
@@ -120,30 +78,6 @@ impl GitksService {
self
}
pub fn cleanup_route_cache(&self) {
let before = self.route_cache.len();
self.route_cache
.retain(|_key, cached| cached.created_at.elapsed() < ROUTE_CACHE_TTL);
let removed = before - self.route_cache.len();
if removed > 0 {
tracing::debug!(
removed,
remaining = self.route_cache.len(),
"route cache cleaned"
);
}
}
pub fn start_route_cache_cleanup(svc: Self) -> tokio::task::JoinHandle<()> {
tokio::spawn(async move {
let mut interval = tokio::time::interval(Duration::from_secs(120));
loop {
interval.tick().await;
svc.cleanup_route_cache();
}
})
}
pub fn scan_all_repo(&self) -> GitResult<Vec<String>> {
let root = self.repo_prefix.as_ref();
let mut repos = Vec::new();
@@ -157,82 +91,6 @@ impl GitksService {
.filter_map(|path| path.to_str().map(str::to_owned))
.collect())
}
pub async fn route_repository(
&self,
header: &crate::pb::RepositoryHeader,
is_write: bool,
) -> Result<Option<RouteDecision>, tonic::Status> {
use crate::actor::message::{ROLE_PRIMARY, ROLE_REPLICA};
// Check route cache for read requests
if !is_write
&& let Some(cached) = self.route_cache.get(&header.relative_path)
&& !cached.decision.grpc_addr.is_empty()
&& cached.decision.found
&& cached.created_at.elapsed() < ROUTE_CACHE_TTL
{
tracing::debug!(
relative_path = %header.relative_path,
grpc_addr = %cached.decision.grpc_addr,
"route cache hit"
);
return Ok(Some(cached.decision.clone()));
}
let members = ractor::pg::get_members(&"gitks_nodes".to_string());
let local = self.node_actor.as_ref().map(|actor| actor.get_cell());
let mut primary: Option<RouteDecision> = None;
let mut replica: Option<RouteDecision> = None;
for member in members {
if local.as_ref().is_some_and(|actor| actor == &member) {
continue;
}
if let Some(decision) = query_find_primary(member.clone(), header.clone()).await?
&& decision.found
&& !decision.grpc_addr.is_empty()
{
primary = Some(decision);
if is_write {
return Ok(primary);
}
}
if !is_write
&& replica.is_none()
&& let Some(decision) = query_find_replica(member.clone(), header.clone()).await?
&& decision.found
&& !decision.grpc_addr.is_empty()
&& decision.role == ROLE_REPLICA
{
replica = Some(decision);
}
}
let result = if let Some(p) = primary {
Some(p)
} else if let Some(r) = replica {
tracing::info!(
storage_name = %r.storage_name,
relative_path = %r.relative_path,
"read request routed to replica"
);
Some(r)
} else {
let _ = ROLE_PRIMARY;
None
};
// Cache result for read requests
if let Some(ref decision) = result {
self.route_cache.insert(
header.relative_path.clone(),
CachedRoute {
decision: decision.clone(),
created_at: Instant::now(),
},
);
}
Ok(result)
}
fn repo_label(&self, header: Option<&crate::pb::RepositoryHeader>) -> String {
header
.and_then(|h| {
@@ -349,101 +207,17 @@ impl GitksService {
pub fn notify_ref_update(
&self,
relative_path: &str,
ref_name: &str,
old_oid: &str,
new_oid: &str,
_ref_name: &str,
_old_oid: &str,
_new_oid: &str,
) {
// Invalidate moka caches
crate::server::cache::invalidate_repo(relative_path);
// Invalidate route cache
self.route_cache.remove(relative_path);
// Invalidate disk cache
if let Some(ref pc) = self.pack_cache {
pc.invalidate_repo(relative_path);
}
if let Some(ref actor) = self.node_actor {
let event = crate::actor::message::RefUpdateEvent {
relative_path: relative_path.to_string(),
ref_name: ref_name.to_string(),
old_oid: old_oid.to_string(),
new_oid: new_oid.to_string(),
primary_grpc_addr: self.grpc_addr.clone(),
primary_storage_name: String::new(),
};
crate::actor::handler::broadcast_ref_update(actor, event);
}
}
/// Submit a write command through Raft consensus.
/// This method:
/// 1. Checks if this node is the Leader (via leader lease)
/// 2. Creates a LogEntry with the command
/// 3. Appends to local raft_log
/// 4. Broadcasts AppendEntries to all followers
/// 5. Waits for majority ACK (10 second timeout)
/// 6. Advances commit_index and applies the command
///
/// Returns Ok(()) on success, or an error if consensus fails.
pub async fn raft_consensus_write(
&self,
command: crate::actor::raft_log::Command,
) -> Result<(), tonic::Status> {
let actor = self
.node_actor
.as_ref()
.ok_or_else(|| tonic::Status::failed_precondition("node actor not initialized"))?;
// Send the command to the actor for Raft processing
let result = ractor::call_t!(
actor,
GitNodeMessage::RaftWrite,
10000, // 10 second timeout
command
);
match result {
Ok(success) => {
if success {
Ok(())
} else {
Err(tonic::Status::aborted(
"Raft consensus failed: not leader or timeout",
))
}
}
Err(e) => Err(tonic::Status::internal(format!("Raft write error: {e}"))),
}
}
/// Perform a ReadIndex check to ensure this node can serve consistent reads.
/// This confirms the Leader is still valid before reading from local state.
pub async fn raft_read_index(&self) -> Result<(), tonic::Status> {
let actor = self
.node_actor
.as_ref()
.ok_or_else(|| tonic::Status::failed_precondition("node actor not initialized"))?;
let request = crate::actor::message::ReadIndexRequest {
relative_path: String::new(),
};
let result = ractor::call_t!(actor, GitNodeMessage::ReadIndex, 5000, request);
match result {
Ok(response) => {
if response.is_leader {
Ok(())
} else {
Err(tonic::Status::failed_precondition(
"not leader, cannot serve consistent read",
))
}
}
Err(e) => Err(tonic::Status::internal(format!("ReadIndex error: {e}"))),
}
}
/// Inject repo_prefix as storage_path into the client-provided header
@@ -456,13 +230,6 @@ impl GitksService {
}
}
pub async fn remote_endpoint(addr: &str) -> Result<tonic::transport::Endpoint, tonic::Status> {
let uri: tonic::codegen::http::Uri = addr
.parse()
.map_err(|e| tonic::Status::invalid_argument(format!("invalid URI: {e}")))?;
tonic::transport::Endpoint::new(uri).map_err(|e| tonic::Status::internal(e.to_string()))
}
pub(super) fn bridge_server_stream<T: Send + 'static>(
mut remote: tonic::Streaming<T>,
) -> tokio_stream::wrappers::ReceiverStream<Result<T, tonic::Status>> {
@@ -478,34 +245,6 @@ pub(super) fn bridge_server_stream<T: Send + 'static>(
tokio_stream::wrappers::ReceiverStream::new(rx)
}
async fn query_find_primary(
member: ActorCell,
header: crate::pb::RepositoryHeader,
) -> Result<Option<RouteDecision>, tonic::Status> {
let actor_ref: ActorRef<GitNodeMessage> = member.into();
match ractor::call_t!(actor_ref, GitNodeMessage::FindPrimary, 500, header) {
Ok(decision) => Ok(Some(decision)),
Err(err) => {
tracing::warn!(error = %err, "find primary query failed");
Ok(None)
}
}
}
async fn query_find_replica(
member: ActorCell,
header: crate::pb::RepositoryHeader,
) -> Result<Option<RouteDecision>, tonic::Status> {
let actor_ref: ActorRef<GitNodeMessage> = member.into();
match ractor::call_t!(actor_ref, GitNodeMessage::FindReplica, 500, header) {
Ok(decision) => Ok(Some(decision)),
Err(err) => {
tracing::warn!(error = %err, "find replica query failed");
Ok(None)
}
}
}
fn scan_bare_repos_recursively(dir: &Path, repos: &mut Vec<PathBuf>) -> GitResult<()> {
for entry in std::fs::read_dir(dir)? {
let entry = entry?;