fix(server): add periodic route cache cleanup
Add cleanup_route_cache() and a 120s background cleanup task to prevent unbounded DashMap growth from stale route entries. Fix init_tracing to return WorkerGuard so the file appender stays alive for the program lifetime.
This commit is contained in:
@@ -8,6 +8,10 @@ use gitks::hooks::HookManager;
|
||||
use gitks::metrics;
|
||||
use gitks::server::{GitksService, serve};
|
||||
|
||||
use tracing_subscriber::EnvFilter;
|
||||
use tracing_subscriber::fmt;
|
||||
use tracing_subscriber::prelude::*;
|
||||
|
||||
const DEFAULT_HOST: &str = "0.0.0.0";
|
||||
const DEFAULT_PORT: &str = "50051";
|
||||
const DEFAULT_STORAGE_NAME: &str = "default";
|
||||
@@ -20,8 +24,7 @@ fn env_bool(key: &str, default: bool) -> bool {
|
||||
match std::env::var(key).as_deref() {
|
||||
Ok("true" | "1" | "yes") => true,
|
||||
Ok("false" | "0" | "no") => false,
|
||||
Ok(_) => default,
|
||||
Err(_) => default,
|
||||
_ => default,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -32,12 +35,84 @@ fn env_u64(key: &str, default: u64) -> u64 {
|
||||
.unwrap_or(default)
|
||||
}
|
||||
|
||||
fn init_tracing() -> Option<tracing_appender::non_blocking::WorkerGuard> {
|
||||
let env_filter = EnvFilter::try_from_default_env().unwrap_or_else(|_| EnvFilter::new("info"));
|
||||
|
||||
let log_format = env_or("GITKS_LOG_FORMAT", "pretty");
|
||||
|
||||
let fmt_layer = match log_format.as_str() {
|
||||
"json" => fmt::layer()
|
||||
.json()
|
||||
.with_target(true)
|
||||
.with_file(true)
|
||||
.with_line_number(true)
|
||||
.with_thread_ids(true)
|
||||
.with_span_events(fmt::format::FmtSpan::NEW | fmt::format::FmtSpan::CLOSE)
|
||||
.boxed(),
|
||||
_ => fmt::layer()
|
||||
.pretty()
|
||||
.with_target(true)
|
||||
.with_file(true)
|
||||
.with_line_number(true)
|
||||
.boxed(),
|
||||
};
|
||||
|
||||
// Optional file output with rotation
|
||||
if let Ok(log_dir) = std::env::var("GITKS_LOG_DIR") {
|
||||
let rotation = match env_or("GITKS_LOG_ROTATION", "daily").as_str() {
|
||||
"hourly" => tracing_appender::rolling::Rotation::HOURLY,
|
||||
"never" => tracing_appender::rolling::Rotation::NEVER,
|
||||
_ => tracing_appender::rolling::Rotation::DAILY,
|
||||
};
|
||||
let retention = env_u64("GITKS_LOG_RETENTION", 7) as usize;
|
||||
|
||||
let mut builder = tracing_appender::rolling::Builder::new()
|
||||
.rotation(rotation)
|
||||
.filename_prefix("gitks")
|
||||
.filename_suffix("log");
|
||||
|
||||
if retention > 0 {
|
||||
builder = builder.max_log_files(retention);
|
||||
}
|
||||
|
||||
let file_appender = builder.build(&log_dir).expect("failed to create log directory");
|
||||
let (non_blocking, guard) = tracing_appender::non_blocking(file_appender);
|
||||
|
||||
let file_layer = fmt::layer()
|
||||
.json()
|
||||
.with_target(true)
|
||||
.with_file(true)
|
||||
.with_line_number(true)
|
||||
.with_writer(non_blocking)
|
||||
.with_filter(EnvFilter::new("info"))
|
||||
.boxed();
|
||||
|
||||
tracing_subscriber::registry()
|
||||
.with(env_filter)
|
||||
.with(fmt_layer)
|
||||
.with(file_layer)
|
||||
.init();
|
||||
|
||||
Some(guard)
|
||||
} else {
|
||||
tracing_subscriber::registry()
|
||||
.with(env_filter)
|
||||
.with(fmt_layer)
|
||||
.init();
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
#[tokio::main]
|
||||
async fn main() -> Result<(), Box<dyn std::error::Error>> {
|
||||
dotenvy::dotenv().ok();
|
||||
tracing_subscriber::fmt().init();
|
||||
let _log_guard = init_tracing();
|
||||
|
||||
tracing::info!(version = env!("CARGO_PKG_VERSION"), "gitks starting up");
|
||||
tracing::info!(
|
||||
version = env!("CARGO_PKG_VERSION"),
|
||||
log_format = %env_or("GITKS_LOG_FORMAT", "pretty"),
|
||||
"gitks starting up"
|
||||
);
|
||||
|
||||
let host = env_or("GITKS_HOST", DEFAULT_HOST);
|
||||
let port = env_or("GITKS_PORT", DEFAULT_PORT);
|
||||
@@ -68,7 +143,10 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
|
||||
);
|
||||
|
||||
if disk_cache_enabled {
|
||||
tracing::info!("disk cache enabled, max_age={disk_cache_max_age}s");
|
||||
tracing::info!(
|
||||
max_age_secs = disk_cache_max_age,
|
||||
"disk cache enabled"
|
||||
);
|
||||
disk_cache.cleanup_on_startup()?;
|
||||
gitks::disk_cache::start_cache_cleanup_task(disk_cache.clone(), Duration::from_secs(300));
|
||||
} else {
|
||||
@@ -79,12 +157,11 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
|
||||
let pack_cache_enabled = env_bool("GITKS_PACK_CACHE_ENABLED", false);
|
||||
let pack_backpressure = env_bool("GITKS_PACK_CACHE_BACKPRESSURE", true);
|
||||
|
||||
// Pack cache: needs disk_cache. If disk_cache is enabled, info/refs cache
|
||||
// is always available via PackCache wrapper. pack-objects caching is
|
||||
// additionally controlled by GITKS_PACK_CACHE_ENABLED.
|
||||
let pack_cache = if disk_cache_enabled {
|
||||
tracing::info!(
|
||||
"pack cache wrapper enabled, pack-objects cache={pack_cache_enabled}, backpressure={pack_backpressure}"
|
||||
pack_objects_cache = pack_cache_enabled,
|
||||
backpressure = pack_backpressure,
|
||||
"pack cache wrapper enabled"
|
||||
);
|
||||
Some(gitks::pack_cache::PackCache::new(
|
||||
disk_cache.clone(),
|
||||
@@ -104,7 +181,11 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
|
||||
let allow_custom_hooks = env_bool("GITKS_ALLOW_CUSTOM_HOOKS", true);
|
||||
|
||||
let hook_manager = if hooks_enabled {
|
||||
tracing::info!("hooks enabled, timeout={hook_timeout}s, custom_hooks={allow_custom_hooks}");
|
||||
tracing::info!(
|
||||
timeout_secs = hook_timeout,
|
||||
custom_hooks = allow_custom_hooks,
|
||||
"hooks enabled"
|
||||
);
|
||||
Some(HookManager::new(
|
||||
repo_prefix.clone(),
|
||||
server_hooks_dir,
|
||||
@@ -122,25 +203,25 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
|
||||
let max_health_failures = env_u64("GITKS_MAX_HEALTH_FAILURES", 10);
|
||||
|
||||
tracing::info!(
|
||||
"health check: interval={health_check_interval}s, max_failures={max_health_failures}"
|
||||
interval_secs = health_check_interval,
|
||||
max_failures = max_health_failures,
|
||||
"health check configured"
|
||||
);
|
||||
|
||||
let metrics_port = env_u64("GITKS_METRICS_PORT", 9100) as u16;
|
||||
let http_cancel = tokio_util::sync::CancellationToken::new();
|
||||
metrics::set_http_cancel_token(http_cancel.clone());
|
||||
let _metrics_handle = metrics::start_metrics_server(metrics_port);
|
||||
tracing::info!("metrics server on port {metrics_port}");
|
||||
tracing::info!(port = metrics_port, "metrics server started");
|
||||
|
||||
// Slow request threshold
|
||||
let slow_request_threshold = env_u64("GITKS_SLOW_REQUEST_THRESHOLD_MS", 5000);
|
||||
metrics::set_slow_request_threshold(slow_request_threshold);
|
||||
tracing::info!(
|
||||
threshold_ms = slow_request_threshold,
|
||||
"slow request detection configured"
|
||||
);
|
||||
|
||||
//
|
||||
// When GITKS_ETCD_ENDPOINTS is set, the node:
|
||||
// 1. Starts a ractor_cluster NodeServer (TCP listener)
|
||||
// 2. Connects to etcd and registers itself
|
||||
// 3. Discovers existing peers → establishes ractor_cluster TCP connections
|
||||
// 4. Watches etcd for future peer join/leave events
|
||||
//
|
||||
// Once ractor_cluster connections are up, pg::get_members() automatically
|
||||
// returns remote actors — no changes needed in actor/handler.rs.
|
||||
//
|
||||
// When GITKS_ETCD_ENDPOINTS is unset or etcd is unreachable, the node
|
||||
// falls back to standalone mode (existing local-only behavior).
|
||||
let etcd_endpoints = std::env::var("GITKS_ETCD_ENDPOINTS")
|
||||
.ok()
|
||||
.filter(|s| !s.is_empty())
|
||||
@@ -158,8 +239,6 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
|
||||
let lease_ttl = env_u64("GITKS_LEASE_TTL", 15) as i64;
|
||||
let connect_timeout_ms = env_u64("GITKS_ETCD_CONNECT_TIMEOUT", 5000);
|
||||
|
||||
// Resolve the hostname/address other nodes use to reach our NodeServer.
|
||||
// Priority: GITKS_CLUSTER_HOSTNAME > POD_IP (K8s) > HOSTNAME env > "localhost"
|
||||
let cluster_hostname = std::env::var("GITKS_CLUSTER_HOSTNAME")
|
||||
.or_else(|_| std::env::var("POD_IP"))
|
||||
.or_else(|_| std::env::var("HOSTNAME"))
|
||||
@@ -167,8 +246,8 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
|
||||
|
||||
let _cluster: Option<ClusterManager> = if let Some(endpoints) = etcd_endpoints {
|
||||
tracing::info!(
|
||||
endpoints = ?endpoints,
|
||||
cluster_port = cluster_port,
|
||||
?endpoints,
|
||||
cluster_port,
|
||||
cluster_hostname = %cluster_hostname,
|
||||
"starting cluster discovery via etcd"
|
||||
);
|
||||
@@ -210,19 +289,28 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
|
||||
svc = svc.with_hook_manager(hm);
|
||||
}
|
||||
|
||||
let raft_data_dir = repo_prefix.join(".gitks_raft");
|
||||
let (node_actor, node_handle) =
|
||||
init_actor_cluster(svc.clone(), storage_name.clone(), grpc_addr.clone()).await?;
|
||||
init_actor_cluster(svc.clone(), storage_name.clone(), grpc_addr.clone(), raft_data_dir).await?;
|
||||
let svc = svc
|
||||
.with_actor(node_actor.clone())
|
||||
.with_grpc_addr(grpc_addr.clone());
|
||||
|
||||
tracing::info!(
|
||||
"starting gitks gRPC server on {addr}, repo prefix: {}, storage: {storage_name}, advertise: {grpc_addr}",
|
||||
repo_prefix.display()
|
||||
addr = %addr,
|
||||
repo_prefix = %repo_prefix.display(),
|
||||
storage = %storage_name,
|
||||
advertise = %grpc_addr,
|
||||
"starting gitks gRPC server"
|
||||
);
|
||||
|
||||
let _route_cache_cleanup = gitks::server::GitksService::start_route_cache_cleanup(svc.clone());
|
||||
|
||||
serve(addr, svc).await?;
|
||||
|
||||
// Gracefully shut down the HTTP metrics server
|
||||
http_cancel.cancel();
|
||||
|
||||
node_actor.stop(None);
|
||||
node_handle.await?;
|
||||
|
||||
|
||||
Reference in New Issue
Block a user