refactor(cache): redesign cache system with structured keys and improved performance

- Add repo_path parameter to cached_response and cached_vec_response functions
- Implement structured cache key format with namespace, repo_path, and request proto
- Replace global cache with Moka in-memory cache using weight-based eviction
- Set 256MB memory cap with 10-minute TTL and 2-minute TTI policy
- Add metrics collection for cache operations and evictions
- Implement efficient repo-scoped invalidation using key structure
- Add detailed documentation comments explaining cache architecture
- Remove outdated dependencies and update dependency versions
- Add error handling for encoding failures in cache operations
- Optimize Vec responses with length-delimited encoding and pre-allocation
This commit is contained in:
zhenyi
2026-06-12 12:53:23 +08:00
parent a40da90ef9
commit 934858bebf
82 changed files with 1273 additions and 4969 deletions
+1 -1
View File
@@ -77,7 +77,7 @@ impl archive_service_server::ArchiveService for GitksService {
}
};
let resp = if cache::selector_is_oid(&inner.treeish) {
cache::cached_response("archive.list_archive_entries", &inner, || {
cache::cached_response("archive.list_archive_entries", &repo, &inner, || {
gb.list_archive_entries(inner.clone()).map_err(into_status)
})?
} else {
+2 -2
View File
@@ -43,7 +43,7 @@ impl blame_service_server::BlameService for GitksService {
}
};
let resp = if cache::selector_is_oid(&inner.revision) {
cache::cached_response("blame.blame", &inner, || {
cache::cached_response("blame.blame", &repo, &inner, || {
gb.blame(inner.clone()).map_err(into_status)
})?
} else {
@@ -85,7 +85,7 @@ impl blame_service_server::BlameService for GitksService {
}
};
let resp = if cache::selector_is_oid(&inner.revision) {
cache::cached_response("blame.blame", &inner, || {
cache::cached_response("blame.blame", &repo, &inner, || {
gb.blame(inner.clone()).map_err(into_status)
})?
} else {
+267 -86
View File
@@ -1,40 +1,165 @@
//! In-memory response cache layer for GitKS.
//!
//! Two-tier architecture:
//! 1. **Moka in-memory cache** (this module) — sub-microsecond lookups for hot data
//! 2. **Disk cache** (disk_cache.rs) — persistent cache for pack-objects / info-refs
//!
//! # Cache Key Format
//!
//! Keys are structured to enable efficient repo-scoped invalidation:
//!
//! ```text
//! [namespace_len: u8][namespace: &[u8]][repo_path_len: u16 LE][repo_path: &[u8]][request_proto: &[u8]]
//! ```
//!
//! This allows `invalidate_repo` to extract and match the repo_path without
//! protobuf decoding or substring scanning.
//!
//! # Eviction Policy
//!
//! - **Weight-based**: total memory capped at 256 MB (weighed by key+value capacity)
//! - **TTI** (time-to-idle): 2 minutes — frequently accessed entries stay hot
//! - **TTL** (time-to-live): 10 minutes — hard upper bound for safety
//! - Evictions are tracked via metrics for observability
use std::sync::OnceLock;
use std::time::Duration;
use moka::sync::Cache;
use prost::Message;
use crate::pb::{ObjectSelector, object_selector};
/// Maximum total cache weight (key + value allocated bytes): 256 MB.
const CACHE_MAX_WEIGHT: u64 = 256 * 1024 * 1024;
const GLOBAL_CACHE_MAX: u64 = 65_536;
const CACHE_TTL: Duration = Duration::from_secs(300);
/// Hard time-to-live: entries older than this are unconditionally evicted.
const CACHE_MAX_TTL: Duration = Duration::from_secs(600); // 10 min
static GLOBAL_CACHE: OnceLock<Cache<Vec<u8>, Vec<u8>>> = OnceLock::new();
/// Time-to-idle: entries not accessed within this window are evicted.
/// Frequently accessed entries survive up to TTL, cold entries expire quickly.
const CACHE_TTI: Duration = Duration::from_secs(120); // 2 min
fn cache() -> &'static Cache<Vec<u8>, Vec<u8>> {
GLOBAL_CACHE.get_or_init(|| {
Cache::builder()
.max_capacity(GLOBAL_CACHE_MAX)
.time_to_live(CACHE_TTL)
.build()
/// Estimated per-entry overhead (Moka internal Arc + metadata).
/// Added to the weigher result to prevent underestimation.
const ENTRY_OVERHEAD: u32 = 128;
struct CacheState {
store: Cache<Vec<u8>, Vec<u8>>,
}
static CACHE: OnceLock<CacheState> = OnceLock::new();
fn state() -> &'static CacheState {
CACHE.get_or_init(|| {
let store = Cache::builder()
.weigher(|key: &Vec<u8>, value: &Vec<u8>| -> u32 {
// capacity() reflects actual allocation including spare capacity
key.capacity() as u32 + value.capacity() as u32 + ENTRY_OVERHEAD
})
.max_capacity(CACHE_MAX_WEIGHT)
.time_to_live(CACHE_MAX_TTL)
.time_to_idle(CACHE_TTI)
.eviction_listener(|key: std::sync::Arc<Vec<u8>>, _value: Vec<u8>, cause| {
let cause_str = match cause {
moka::notification::RemovalCause::Expired => "expired",
moka::notification::RemovalCause::Explicit => "explicit",
moka::notification::RemovalCause::Replaced => "replaced",
moka::notification::RemovalCause::Size => "size",
};
// Extract namespace for per-namespace metrics
let namespace = decode_namespace(&key);
crate::metrics::record_cache_eviction(namespace, cause_str);
})
.build();
tracing::info!(
max_weight_mb = CACHE_MAX_WEIGHT / (1024 * 1024),
ttl_secs = CACHE_MAX_TTL.as_secs(),
tti_secs = CACHE_TTI.as_secs(),
"Moka in-memory cache initialized"
);
CacheState { store }
})
}
fn cache_key<Req>(namespace: &str, request: &Req) -> Vec<u8>
where
Req: Message,
{
let mut key = Vec::with_capacity(namespace.len() + 1 + request.encoded_len());
key.extend_from_slice(namespace.as_bytes());
key.push(0);
request
.encode(&mut key)
.expect("encoding a prost message into Vec cannot fail");
key
#[inline]
fn cache() -> &'static Cache<Vec<u8>, Vec<u8>> {
&state().store
}
// Key encoding
/// Encode a structured cache key.
///
/// Format: `namespace_len(u8) + namespace + repo_path_len(u16 LE) + repo_path + request_proto`
///
fn encode_key(namespace: &str, repo_path: &str, request_bytes: &[u8]) -> Option<Vec<u8>> {
let ns = namespace.as_bytes();
let rp = repo_path.as_bytes();
if ns.len() > u8::MAX as usize || rp.len() > u16::MAX as usize {
tracing::warn!(
namespace_len = ns.len(),
repo_path_len = rp.len(),
"cache key too long, bypassing cache"
);
return None;
}
let total = 1 + ns.len() + 2 + rp.len() + request_bytes.len();
let mut key = Vec::with_capacity(total);
key.push(ns.len() as u8);
key.extend_from_slice(ns);
key.extend_from_slice(&(rp.len() as u16).to_le_bytes());
key.extend_from_slice(rp);
key.extend_from_slice(request_bytes);
Some(key)
}
/// Extract the namespace string from a cache key.
fn decode_namespace(key: &[u8]) -> &str {
if key.is_empty() {
return "unknown";
}
let ns_len = key[0] as usize;
let end = (1 + ns_len).min(key.len());
std::str::from_utf8(&key[1..end]).unwrap_or("unknown")
}
/// Extract the repo_path from a cache key (returns slice into the key).
fn extract_repo_path_bytes(key: &[u8]) -> Option<&[u8]> {
if key.len() < 3 {
return None;
}
let ns_len = key[0] as usize;
let rp_len_offset = 1 + ns_len;
if key.len() < rp_len_offset + 2 {
return None;
}
let rp_len = u16::from_le_bytes([key[rp_len_offset], key[rp_len_offset + 1]]) as usize;
let rp_start = rp_len_offset + 2;
let rp_end = rp_start.checked_add(rp_len)?;
if rp_end > key.len() {
return None;
}
Some(&key[rp_start..rp_end])
}
/// Check if a cache key belongs to the given repository.
fn key_matches_repo(key: &[u8], target_repo: &[u8]) -> bool {
extract_repo_path_bytes(key).is_some_and(|rp| rp == target_repo)
}
// Single-message cache
/// Cache a single protobuf response.
///
/// On cache hit, decodes and returns the cached response.
/// On cache miss, calls `build`, caches the result, and returns it.
///
/// `repo_path` should be the repository's relative path (used for scoped invalidation).
pub(crate) fn cached_response<Req, Res, E, F>(
namespace: &'static str,
repo_path: &str,
request: &Req,
build: F,
) -> Result<Res, E>
@@ -43,14 +168,21 @@ where
Res: Message + Default,
F: FnOnce() -> Result<Res, E>,
{
let key = cache_key(namespace, request);
let req_bytes = encode_request(request);
let Some(key) = encode_key(namespace, repo_path, &req_bytes) else {
return build();
};
if let Some(bytes) = cache().get(&key)
&& let Ok(response) = Res::decode(bytes.as_slice())
{
let elapsed = std::time::Duration::ZERO; // Moka get is memory-only, effectively instant
crate::metrics::record_cache_op("moka", "hit", elapsed);
tracing::debug!(
namespace = %namespace,
repo = %repo_path,
key_len = key.len(),
value_len = bytes.len(),
"cache hit"
);
return Ok(response);
@@ -58,20 +190,41 @@ where
tracing::debug!(
namespace = %namespace,
repo = %repo_path,
key_len = key.len(),
"cache miss, building response"
);
let start = std::time::Instant::now();
let response = build()?;
let build_elapsed = start.elapsed();
let mut bytes = Vec::with_capacity(response.encoded_len());
response
.encode(&mut bytes)
.expect("encoding a prost message into Vec cannot fail");
cache().insert(key, bytes);
if let Err(err) = response.encode(&mut bytes) {
tracing::warn!(
namespace = %namespace,
repo = %repo_path,
error = %err,
"failed to encode cache response"
);
} else {
cache().insert(key, bytes);
}
crate::metrics::record_cache_op("moka", "miss", build_elapsed);
Ok(response)
}
// Vec-message cache
/// Cache a `Vec<Item>` protobuf response using length-delimited encoding.
///
/// Each item is stored sequentially with length-delimited framing, allowing
/// partial decode resilience: if any single item fails to decode, the entire
/// entry is discarded and rebuilt.
pub(crate) fn cached_vec_response<Req, Item, E, F>(
namespace: &'static str,
repo_path: &str,
request: &Req,
build: F,
) -> Result<Vec<Item>, E>
@@ -80,90 +233,125 @@ where
Item: Message + Default,
F: FnOnce() -> Result<Vec<Item>, E>,
{
let key = cache_key(namespace, request);
let req_bytes = encode_request(request);
let Some(key) = encode_key(namespace, repo_path, &req_bytes) else {
return build();
};
// Try cache hit
if let Some(bytes) = cache().get(&key) {
let mut remaining = bytes.as_slice();
let mut items = Vec::new();
let mut remaining = bytes.as_slice();
let mut valid = true;
while !remaining.is_empty() {
match Item::decode_length_delimited(&mut remaining) {
Ok(item) => items.push(item),
Err(_) => {
valid = false;
break;
// Pre-allocate based on first size hint
if let Ok(first) = Item::decode_length_delimited(&mut remaining) {
items.push(first);
while !remaining.is_empty() {
match Item::decode_length_delimited(&mut remaining) {
Ok(item) => items.push(item),
Err(_) => {
valid = false;
break;
}
}
}
} else if !remaining.is_empty() {
valid = false;
}
if valid {
crate::metrics::record_cache_op("moka", "hit", std::time::Duration::ZERO);
tracing::debug!(
namespace = %namespace,
key_len = key.len(),
repo = %repo_path,
item_count = items.len(),
"vec cache hit"
);
return Ok(items);
}
tracing::warn!(
namespace = %namespace,
repo = %repo_path,
"vec cache decode failed, rebuilding"
);
// Invalidate the corrupt entry
cache().invalidate(&key);
}
tracing::debug!(
namespace = %namespace,
key_len = key.len(),
repo = %repo_path,
"vec cache miss, building response"
);
let start = std::time::Instant::now();
let response = build()?;
let mut bytes = Vec::new();
let build_elapsed = start.elapsed();
// Encode all items into a single buffer with length-delimited framing
let total_est: usize = response
.iter()
.map(|item| item.encoded_len() + 10) // 10 = prost length-delimited overhead
.sum();
let mut bytes = Vec::with_capacity(total_est);
let mut encode_ok = true;
for item in &response {
item.encode_length_delimited(&mut bytes)
.expect("encoding a prost message into Vec cannot fail");
if let Err(err) = item.encode_length_delimited(&mut bytes) {
tracing::warn!(
namespace = %namespace,
repo = %repo_path,
error = %err,
"failed to encode vec cache item"
);
encode_ok = false;
break;
}
}
cache().insert(key, bytes);
if encode_ok {
cache().insert(key, bytes);
}
crate::metrics::record_cache_op("moka", "miss", build_elapsed);
Ok(response)
}
/// Invalidate all cache entries related to a specific repository.
/// Called when refs are updated (create branch, create commit, etc.)
/// so that stale data is not served.
// Request encoding helpers
/// Encode a protobuf request into a byte vector.
#[inline]
fn encode_request<Req: Message>(request: &Req) -> Vec<u8> {
let mut buf = Vec::with_capacity(request.encoded_len());
if let Err(err) = request.encode(&mut buf) {
tracing::warn!(error = %err, "failed to encode cache request");
}
buf
}
// Repository-scoped invalidation
/// Invalidate all cache entries for a specific repository.
///
/// Uses the structured key format to extract and match repository paths
/// without protobuf decoding or substring scanning. O(n) where n is the
/// number of cached entries, with O(1) per-key comparison.
///
/// Called by `notify_ref_update` after any mutator RPC (create commit,
/// create branch, etc.) to prevent serving stale data.
pub(crate) fn invalidate_repo(relative_path: &str) {
let c = cache();
let target = relative_path.as_bytes();
let mut keys_to_remove: Vec<std::sync::Arc<Vec<u8>>> = Vec::with_capacity(64);
// Encode the relative_path to match how it appears in cache keys
let target_path_bytes = relative_path.as_bytes();
// Remove all keys that reference this repository
// Cache keys are: namespace\0 + prost-encoded request
let keys_to_remove: Vec<std::sync::Arc<Vec<u8>>> = c
.iter()
.filter_map(|(key, _)| {
// Find the null byte separator
if let Some(null_pos) = key.iter().position(|&b| b == 0) {
let encoded_request = &key[null_pos + 1..];
// Check if this encoded request contains the repository path
// We use a sliding window to find the path bytes in the encoded protobuf
// This is conservative but correct: we may invalidate slightly more than
// necessary, but we won't miss any entries for this repository.
//
// The encoded protobuf format embeds string fields as length-prefixed data,
// so the relative_path bytes should appear verbatim somewhere in the message.
if contains_subslice(encoded_request, target_path_bytes) {
return Some(key);
}
} else {
// Malformed key without separator, remove it to be safe
tracing::warn!("found cache key without null separator, removing");
return Some(key);
}
None
})
.collect();
for (key, _value) in c.iter() {
if key_matches_repo(&key, target) {
keys_to_remove.push(key);
}
}
let removed = keys_to_remove.len();
for key in keys_to_remove {
for key in &keys_to_remove {
c.invalidate(key.as_ref());
}
@@ -176,20 +364,12 @@ pub(crate) fn invalidate_repo(relative_path: &str) {
}
}
/// Check if a byte slice contains a subslice
fn contains_subslice(haystack: &[u8], needle: &[u8]) -> bool {
if needle.is_empty() {
return true;
}
if needle.len() > haystack.len() {
return false;
}
// Selector helpers
haystack
.windows(needle.len())
.any(|window| window == needle)
}
use crate::pb::{ObjectSelector, object_selector};
/// Returns true if the selector is an OID-based reference.
/// OID-based selectors are cacheable because they are immutable.
pub(crate) fn selector_is_oid(selector: &Option<ObjectSelector>) -> bool {
matches!(
selector.as_ref().and_then(|s| s.selector.as_ref()),
@@ -197,6 +377,7 @@ pub(crate) fn selector_is_oid(selector: &Option<ObjectSelector>) -> bool {
)
}
/// Returns true if both selectors are OID-based.
pub(crate) fn selectors_are_oid(
left: &Option<ObjectSelector>,
right: &Option<ObjectSelector>,
+4 -4
View File
@@ -39,7 +39,7 @@ impl commit_service_server::CommitService for GitksService {
}
};
let resp = if !inner.all && cache::selector_is_oid(&inner.revision) {
cache::cached_response("commit.list_commits", &inner, || {
cache::cached_response("commit.list_commits", &repo, &inner, || {
gb.list_commits(inner.clone()).map_err(into_status)
})?
} else {
@@ -78,7 +78,7 @@ impl commit_service_server::CommitService for GitksService {
}
};
let resp = if cache::selector_is_oid(&inner.revision) {
cache::cached_response("commit.get_commit", &inner, || {
cache::cached_response("commit.get_commit", &repo, &inner, || {
gb.get_commit(inner.clone()).map_err(into_status)
})?
} else {
@@ -116,7 +116,7 @@ impl commit_service_server::CommitService for GitksService {
}
};
let resp = if cache::selector_is_oid(&inner.revision) {
cache::cached_response("commit.get_commit_ancestors", &inner, || {
cache::cached_response("commit.get_commit_ancestors", &repo, &inner, || {
gb.get_commit_ancestors(inner.clone()).map_err(into_status)
})?
} else {
@@ -265,7 +265,7 @@ impl commit_service_server::CommitService for GitksService {
}
};
let resp = if cache::selectors_are_oid(&inner.base, &inner.head) {
cache::cached_response("commit.compare_commits", &inner, || {
cache::cached_response("commit.compare_commits", &repo, &inner, || {
gb.compare_commits(inner.clone()).map_err(into_status)
})?
} else {
+4 -4
View File
@@ -42,7 +42,7 @@ impl diff_service_server::DiffService for GitksService {
}
};
let resp = if cache::selectors_are_oid(&inner.base, &inner.head) {
cache::cached_response("diff.get_diff", &inner, || {
cache::cached_response("diff.get_diff", &repo, &inner, || {
gb.get_diff(inner.clone()).map_err(into_status)
})?
} else {
@@ -81,7 +81,7 @@ impl diff_service_server::DiffService for GitksService {
}
};
let resp = if cache::selector_is_oid(&inner.commit) {
cache::cached_response("diff.get_commit_diff", &inner, || {
cache::cached_response("diff.get_commit_diff", &repo, &inner, || {
gb.get_commit_diff(inner.clone()).map_err(into_status)
})?
} else {
@@ -122,7 +122,7 @@ impl diff_service_server::DiffService for GitksService {
}
};
let items = if cache::selectors_are_oid(&inner.base, &inner.head) {
cache::cached_vec_response("diff.get_patch", &inner, || {
cache::cached_vec_response("diff.get_patch", &repo, &inner, || {
gb.get_patch(inner.clone()).map_err(into_status)
})?
} else {
@@ -160,7 +160,7 @@ impl diff_service_server::DiffService for GitksService {
}
};
let resp = if cache::selectors_are_oid(&inner.base, &inner.head) {
cache::cached_response("diff.get_diff_stats", &inner, || {
cache::cached_response("diff.get_diff_stats", &repo, &inner, || {
gb.get_diff_stats(inner.clone()).map_err(into_status)
})?
} else {
+8 -269
View File
@@ -1,31 +1,12 @@
/// Generate a `remote_<service>_client` helper function that resolves a repository
/// route and returns a connected gRPC client for the given service.
/// Single-machine mode: no cluster forwarding.
macro_rules! remote_client {
($fn_name:ident, $client:ty, $svc_label:literal) => {
async fn $fn_name(
svc: &super::GitksService,
header: Option<&crate::pb::RepositoryHeader>,
is_write: bool,
_svc: &super::GitksService,
_header: Option<&crate::pb::RepositoryHeader>,
_is_write: bool,
) -> Result<Option<$client>, tonic::Status> {
let header = match header {
Some(h) => h,
None => return Ok(None),
};
let Some(route) = svc.route_repository(header, is_write).await? else {
return Ok(None);
};
tracing::info!(
storage_name = %route.storage_name,
relative_path = %route.relative_path,
actor_name = %route.actor_name,
grpc_addr = %route.grpc_addr,
concat!("forwarding ", $svc_label, " rpc")
);
let endpoint = super::remote_endpoint(&route.grpc_addr).await?;
let client = <$client>::connect(endpoint)
.await
.map_err(|e| tonic::Status::unavailable(e.to_string()))?;
Ok(Some(client))
Ok(None)
}
};
}
@@ -45,14 +26,10 @@ mod repository_maint;
mod tag;
mod tree;
use dashmap::DashMap;
use gix::discover::is_git;
use ractor::{ActorCell, ActorRef};
use std::path::{Path, PathBuf};
use std::time::{Duration, Instant};
use tokio_stream::wrappers::ReceiverStream;
use crate::actor::message::{GitNodeMessage, RouteDecision};
use crate::bare::GitBare;
use crate::error::{GitError, GitResult};
use crate::pb::{
@@ -61,45 +38,26 @@ use crate::pb::{
remote_service_server, repository_service_server, tag_service_server, tree_service_server,
};
/// TTL for route cache entries.
const ROUTE_CACHE_TTL: Duration = Duration::from_secs(60); // 1 minute
/// A cached route entry with creation time.
#[derive(Clone)]
pub struct CachedRoute {
pub decision: RouteDecision,
pub created_at: Instant,
}
#[derive(Clone)]
pub struct GitksService {
pub repo_prefix: PathBuf,
pub node_actor: Option<ActorRef<GitNodeMessage>>,
pub grpc_addr: String,
pub disk_cache: Option<crate::disk_cache::DiskCache>,
pub pack_cache: Option<crate::pack_cache::PackCache>,
pub hook_manager: Option<crate::hooks::HookManager>,
pub route_cache: DashMap<String, CachedRoute>,
}
impl GitksService {
pub fn new(repo_prefix: PathBuf) -> Self {
Self {
repo_prefix,
node_actor: None,
grpc_addr: String::new(),
disk_cache: None,
pack_cache: None,
hook_manager: None,
route_cache: DashMap::new(),
}
}
pub fn with_actor(mut self, node_actor: ActorRef<GitNodeMessage>) -> Self {
self.node_actor = Some(node_actor);
self
}
pub fn with_disk_cache(mut self, dc: crate::disk_cache::DiskCache) -> Self {
self.disk_cache = Some(dc);
self
@@ -120,30 +78,6 @@ impl GitksService {
self
}
pub fn cleanup_route_cache(&self) {
let before = self.route_cache.len();
self.route_cache
.retain(|_key, cached| cached.created_at.elapsed() < ROUTE_CACHE_TTL);
let removed = before - self.route_cache.len();
if removed > 0 {
tracing::debug!(
removed,
remaining = self.route_cache.len(),
"route cache cleaned"
);
}
}
pub fn start_route_cache_cleanup(svc: Self) -> tokio::task::JoinHandle<()> {
tokio::spawn(async move {
let mut interval = tokio::time::interval(Duration::from_secs(120));
loop {
interval.tick().await;
svc.cleanup_route_cache();
}
})
}
pub fn scan_all_repo(&self) -> GitResult<Vec<String>> {
let root = self.repo_prefix.as_ref();
let mut repos = Vec::new();
@@ -157,82 +91,6 @@ impl GitksService {
.filter_map(|path| path.to_str().map(str::to_owned))
.collect())
}
pub async fn route_repository(
&self,
header: &crate::pb::RepositoryHeader,
is_write: bool,
) -> Result<Option<RouteDecision>, tonic::Status> {
use crate::actor::message::{ROLE_PRIMARY, ROLE_REPLICA};
// Check route cache for read requests
if !is_write
&& let Some(cached) = self.route_cache.get(&header.relative_path)
&& !cached.decision.grpc_addr.is_empty()
&& cached.decision.found
&& cached.created_at.elapsed() < ROUTE_CACHE_TTL
{
tracing::debug!(
relative_path = %header.relative_path,
grpc_addr = %cached.decision.grpc_addr,
"route cache hit"
);
return Ok(Some(cached.decision.clone()));
}
let members = ractor::pg::get_members(&"gitks_nodes".to_string());
let local = self.node_actor.as_ref().map(|actor| actor.get_cell());
let mut primary: Option<RouteDecision> = None;
let mut replica: Option<RouteDecision> = None;
for member in members {
if local.as_ref().is_some_and(|actor| actor == &member) {
continue;
}
if let Some(decision) = query_find_primary(member.clone(), header.clone()).await?
&& decision.found
&& !decision.grpc_addr.is_empty()
{
primary = Some(decision);
if is_write {
return Ok(primary);
}
}
if !is_write
&& replica.is_none()
&& let Some(decision) = query_find_replica(member.clone(), header.clone()).await?
&& decision.found
&& !decision.grpc_addr.is_empty()
&& decision.role == ROLE_REPLICA
{
replica = Some(decision);
}
}
let result = if let Some(p) = primary {
Some(p)
} else if let Some(r) = replica {
tracing::info!(
storage_name = %r.storage_name,
relative_path = %r.relative_path,
"read request routed to replica"
);
Some(r)
} else {
let _ = ROLE_PRIMARY;
None
};
// Cache result for read requests
if let Some(ref decision) = result {
self.route_cache.insert(
header.relative_path.clone(),
CachedRoute {
decision: decision.clone(),
created_at: Instant::now(),
},
);
}
Ok(result)
}
fn repo_label(&self, header: Option<&crate::pb::RepositoryHeader>) -> String {
header
.and_then(|h| {
@@ -349,101 +207,17 @@ impl GitksService {
pub fn notify_ref_update(
&self,
relative_path: &str,
ref_name: &str,
old_oid: &str,
new_oid: &str,
_ref_name: &str,
_old_oid: &str,
_new_oid: &str,
) {
// Invalidate moka caches
crate::server::cache::invalidate_repo(relative_path);
// Invalidate route cache
self.route_cache.remove(relative_path);
// Invalidate disk cache
if let Some(ref pc) = self.pack_cache {
pc.invalidate_repo(relative_path);
}
if let Some(ref actor) = self.node_actor {
let event = crate::actor::message::RefUpdateEvent {
relative_path: relative_path.to_string(),
ref_name: ref_name.to_string(),
old_oid: old_oid.to_string(),
new_oid: new_oid.to_string(),
primary_grpc_addr: self.grpc_addr.clone(),
primary_storage_name: String::new(),
};
crate::actor::handler::broadcast_ref_update(actor, event);
}
}
/// Submit a write command through Raft consensus.
/// This method:
/// 1. Checks if this node is the Leader (via leader lease)
/// 2. Creates a LogEntry with the command
/// 3. Appends to local raft_log
/// 4. Broadcasts AppendEntries to all followers
/// 5. Waits for majority ACK (10 second timeout)
/// 6. Advances commit_index and applies the command
///
/// Returns Ok(()) on success, or an error if consensus fails.
pub async fn raft_consensus_write(
&self,
command: crate::actor::raft_log::Command,
) -> Result<(), tonic::Status> {
let actor = self
.node_actor
.as_ref()
.ok_or_else(|| tonic::Status::failed_precondition("node actor not initialized"))?;
// Send the command to the actor for Raft processing
let result = ractor::call_t!(
actor,
GitNodeMessage::RaftWrite,
10000, // 10 second timeout
command
);
match result {
Ok(success) => {
if success {
Ok(())
} else {
Err(tonic::Status::aborted(
"Raft consensus failed: not leader or timeout",
))
}
}
Err(e) => Err(tonic::Status::internal(format!("Raft write error: {e}"))),
}
}
/// Perform a ReadIndex check to ensure this node can serve consistent reads.
/// This confirms the Leader is still valid before reading from local state.
pub async fn raft_read_index(&self) -> Result<(), tonic::Status> {
let actor = self
.node_actor
.as_ref()
.ok_or_else(|| tonic::Status::failed_precondition("node actor not initialized"))?;
let request = crate::actor::message::ReadIndexRequest {
relative_path: String::new(),
};
let result = ractor::call_t!(actor, GitNodeMessage::ReadIndex, 5000, request);
match result {
Ok(response) => {
if response.is_leader {
Ok(())
} else {
Err(tonic::Status::failed_precondition(
"not leader, cannot serve consistent read",
))
}
}
Err(e) => Err(tonic::Status::internal(format!("ReadIndex error: {e}"))),
}
}
/// Inject repo_prefix as storage_path into the client-provided header
@@ -456,13 +230,6 @@ impl GitksService {
}
}
pub async fn remote_endpoint(addr: &str) -> Result<tonic::transport::Endpoint, tonic::Status> {
let uri: tonic::codegen::http::Uri = addr
.parse()
.map_err(|e| tonic::Status::invalid_argument(format!("invalid URI: {e}")))?;
tonic::transport::Endpoint::new(uri).map_err(|e| tonic::Status::internal(e.to_string()))
}
pub(super) fn bridge_server_stream<T: Send + 'static>(
mut remote: tonic::Streaming<T>,
) -> tokio_stream::wrappers::ReceiverStream<Result<T, tonic::Status>> {
@@ -478,34 +245,6 @@ pub(super) fn bridge_server_stream<T: Send + 'static>(
tokio_stream::wrappers::ReceiverStream::new(rx)
}
async fn query_find_primary(
member: ActorCell,
header: crate::pb::RepositoryHeader,
) -> Result<Option<RouteDecision>, tonic::Status> {
let actor_ref: ActorRef<GitNodeMessage> = member.into();
match ractor::call_t!(actor_ref, GitNodeMessage::FindPrimary, 500, header) {
Ok(decision) => Ok(Some(decision)),
Err(err) => {
tracing::warn!(error = %err, "find primary query failed");
Ok(None)
}
}
}
async fn query_find_replica(
member: ActorCell,
header: crate::pb::RepositoryHeader,
) -> Result<Option<RouteDecision>, tonic::Status> {
let actor_ref: ActorRef<GitNodeMessage> = member.into();
match ractor::call_t!(actor_ref, GitNodeMessage::FindReplica, 500, header) {
Ok(decision) => Ok(Some(decision)),
Err(err) => {
tracing::warn!(error = %err, "find replica query failed");
Ok(None)
}
}
}
fn scan_bare_repos_recursively(dir: &Path, repos: &mut Vec<PathBuf>) -> GitResult<()> {
for entry in std::fs::read_dir(dir)? {
let entry = entry?;
+36 -35
View File
@@ -13,6 +13,8 @@ remote_client!(
"pack"
);
const MAX_INDEX_PACK_BUFFER_BYTES: usize = 512 * 1024 * 1024;
#[tonic::async_trait]
impl pack_service_server::PackService for GitksService {
type UploadPackStream = CancellableReceiverStream<Result<UploadPackResponse, tonic::Status>>;
@@ -276,43 +278,33 @@ impl pack_service_server::PackService for GitksService {
m.record("ok");
let (tx, rx) = tokio::sync::mpsc::channel(16);
tokio::spawn(async move {
let result = tokio::task::spawn_blocking(move || {
use std::io::Read;
let mut file = file;
let mut buf = vec![0u8; 65536];
let mut chunks = Vec::new();
loop {
match file.read(&mut buf) {
Ok(0) => break,
Ok(n) => chunks.push(Ok(PackfileChunk {
data: buf[..n].to_vec(),
})),
Err(e) => {
chunks.push(Err(tonic::Status::internal(format!(
use tokio::io::AsyncReadExt;
let mut file = tokio::fs::File::from_std(file);
let mut buf = vec![0u8; 65536];
loop {
match file.read(&mut buf).await {
Ok(0) => break,
Ok(n) => {
if tx
.send(Ok(PackfileChunk {
data: buf[..n].to_vec(),
}))
.await
.is_err()
{
break;
}
}
Err(e) => {
let _ = tx
.send(Err(tonic::Status::internal(format!(
"cache read error: {e}"
))));
break;
}
))))
.await;
break;
}
}
chunks
})
.await;
match result {
Ok(chunks) => {
for chunk in chunks {
if tx.send(chunk).await.is_err() {
break;
}
}
}
Err(e) => {
let _ = tx
.send(Err(tonic::Status::internal(format!(
"cache read task failed: {e}"
))))
.await;
}
}
});
return Ok(tonic::Response::new(ReceiverStream::new(rx)));
@@ -340,8 +332,17 @@ impl pack_service_server::PackService for GitksService {
let m = crate::metrics::RequestMetrics::new("gitks.PackService/IndexPack");
let mut stream = request.into_inner();
let mut inputs = Vec::new();
let mut total_bytes = 0usize;
while let Some(msg) = stream.next().await {
inputs.push(msg?);
let msg = msg?;
total_bytes = total_bytes.saturating_add(msg.data.len());
if total_bytes > MAX_INDEX_PACK_BUFFER_BYTES {
return Err(tonic::Status::resource_exhausted(format!(
"index-pack input too large (max {} bytes)",
MAX_INDEX_PACK_BUFFER_BYTES
)));
}
inputs.push(msg);
}
let _rate = self
.acquire_rate_limit(
+3 -1
View File
@@ -567,9 +567,11 @@ impl repository_service_server::RepositoryService for GitksService {
.list_snapshots(&repo)
.map_err(tonic::Status::internal)?;
let limit = (inner.limit > 0).then_some(inner.limit as usize);
let resp = ListSnapshotsResponse {
snapshots: snapshots
.into_iter()
.take(limit.unwrap_or(usize::MAX))
.map(|s| crate::pb::SnapshotInfo {
snapshot_id: s.snapshot_id,
relative_path: s.relative_path,
@@ -678,7 +680,7 @@ impl repository_service_server::RepositoryService for GitksService {
return;
}
for offset in (0..total).step_by(CHUNK_SIZE) {
let end = (offset + CHUNK_SIZE).min(total);
let end = offset.saturating_add(CHUNK_SIZE).min(total);
let chunk_data = bundle_data[offset..end].to_vec();
let is_done = end >= total;
if tx
+7 -7
View File
@@ -42,7 +42,7 @@ impl tree_service_server::TreeService for GitksService {
}
};
let resp = if cache::selector_is_oid(&inner.revision) {
cache::cached_response("tree.list_tree", &inner, || {
cache::cached_response("tree.list_tree", &repo, &inner, || {
gb.list_tree(inner.clone()).map_err(into_status)
})?
} else {
@@ -81,7 +81,7 @@ impl tree_service_server::TreeService for GitksService {
}
};
let resp = if cache::selector_is_oid(&inner.revision) {
cache::cached_response("tree.get_tree", &inner, || {
cache::cached_response("tree.get_tree", &repo, &inner, || {
gb.get_tree(inner.clone()).map_err(into_status)
})?
} else {
@@ -120,7 +120,7 @@ impl tree_service_server::TreeService for GitksService {
}
};
let resp = if cache::selector_is_oid(&inner.revision) {
cache::cached_response("tree.get_blob", &inner, || {
cache::cached_response("tree.get_blob", &repo, &inner, || {
gb.get_blob(inner.clone()).map_err(into_status)
})?
} else {
@@ -160,11 +160,11 @@ impl tree_service_server::TreeService for GitksService {
}
};
let items = if inner.oid.is_some() {
cache::cached_vec_response("tree.get_raw_blob", &inner, || {
cache::cached_vec_response("tree.get_raw_blob", &repo, &inner, || {
gb.get_raw_blob(inner.clone()).map_err(into_status)
})?
} else if cache::selector_is_oid(&inner.revision) {
cache::cached_vec_response("tree.get_raw_blob", &inner, || {
cache::cached_vec_response("tree.get_raw_blob", &repo, &inner, || {
gb.get_raw_blob(inner.clone()).map_err(into_status)
})?
} else {
@@ -202,7 +202,7 @@ impl tree_service_server::TreeService for GitksService {
}
};
let resp = if cache::selector_is_oid(&inner.revision) {
cache::cached_response("tree.get_file_metadata", &inner, || {
cache::cached_response("tree.get_file_metadata", &repo, &inner, || {
gb.get_file_metadata(inner.clone()).map_err(into_status)
})?
} else {
@@ -240,7 +240,7 @@ impl tree_service_server::TreeService for GitksService {
}
};
let resp = if cache::selector_is_oid(&inner.revision) {
cache::cached_response("tree.find_files", &inner, || {
cache::cached_response("tree.find_files", &repo, &inner, || {
gb.find_files(inner.clone()).map_err(into_status)
})?
} else {