0dbac480ae
- Add OpenTelemetry SDK, OTLP exporter, Prometheus integration - Implement connection tracking with active/total/disconnection metrics - Add health endpoint with uptime and connection counts - Integrate tracing spans for socket events and engine messages - Add metrics collection for event handling duration - Update health endpoint to include live runtime state - Add graceful telemetry shutdown in main function - Implement engine session active metrics tracking - Add namespace-specific attributes to connection metrics - Introduce message edit history retrieval endpoint - Add scheduled message CRUD operations and dispatcher - Update Socket.IO event registration with observability - Refactor component update to remove dead code allowance - Add comprehensive environment variables documentation - Implement detailed development guidelines in AGENTS.md
171 lines
5.1 KiB
Rust
171 lines
5.1 KiB
Rust
//! Enhanced health check endpoint with upstream dependency checks.
|
|
//!
|
|
//! Returns JSON with server status, version, uptime, connection counts,
|
|
//! and optional health checks for PostgreSQL, Redis, NATS, and gRPC.
|
|
|
|
use std::sync::Arc;
|
|
use std::sync::OnceLock;
|
|
use std::sync::atomic::{AtomicU64, Ordering};
|
|
use std::time::Instant;
|
|
|
|
use actix_web::HttpResponse;
|
|
use serde::Serialize;
|
|
|
|
/// Server start time captured at init.
|
|
static START_TIME: std::sync::OnceLock<Instant> = std::sync::OnceLock::new();
|
|
|
|
/// Live connection counter shared across the process.
|
|
/// Updated by the socket layer on connect / disconnect.
|
|
static CONNECTIONS_ACTIVE: OnceLock<AtomicU64> = OnceLock::new();
|
|
|
|
/// Initializes the start time (call once during startup).
|
|
pub fn record_start_time() {
|
|
START_TIME.set(Instant::now()).ok();
|
|
}
|
|
|
|
/// Initialize shared health counters (call once during startup).
|
|
pub fn init_counters() {
|
|
CONNECTIONS_ACTIVE.set(AtomicU64::new(0)).ok();
|
|
}
|
|
|
|
/// Signal that a new socket connection was established.
|
|
pub fn connection_connected() {
|
|
if let Some(c) = CONNECTIONS_ACTIVE.get() {
|
|
c.fetch_add(1, Ordering::Relaxed);
|
|
}
|
|
}
|
|
|
|
/// Signal that a socket connection was closed.
|
|
pub fn connection_disconnected() {
|
|
if let Some(c) = CONNECTIONS_ACTIVE.get() {
|
|
c.fetch_sub(1, Ordering::Relaxed);
|
|
}
|
|
}
|
|
|
|
/// Return the current number of active socket connections.
|
|
pub fn connections_active_count() -> u64 {
|
|
CONNECTIONS_ACTIVE
|
|
.get()
|
|
.map(|c| c.load(Ordering::Relaxed))
|
|
.unwrap_or(0)
|
|
}
|
|
|
|
/// Returns the server uptime in seconds.
|
|
pub fn uptime_secs() -> u64 {
|
|
START_TIME
|
|
.get()
|
|
.map(|t| t.elapsed().as_secs())
|
|
.unwrap_or(0)
|
|
}
|
|
|
|
#[derive(Debug, Clone, Serialize)]
|
|
pub struct HealthResponse {
|
|
pub status: String,
|
|
pub version: String,
|
|
pub timestamp: String,
|
|
pub uptime_secs: u64,
|
|
pub connections_active: u64,
|
|
pub sessions_count: u64,
|
|
#[serde(skip_serializing_if = "Option::is_none")]
|
|
pub checks: Option<HealthChecks>,
|
|
}
|
|
|
|
#[derive(Debug, Clone, Serialize)]
|
|
pub struct HealthChecks {
|
|
#[serde(skip_serializing_if = "Option::is_none")]
|
|
pub postgres: Option<CheckResult>,
|
|
#[serde(skip_serializing_if = "Option::is_none")]
|
|
pub redis: Option<CheckResult>,
|
|
#[serde(skip_serializing_if = "Option::is_none")]
|
|
pub nats: Option<CheckResult>,
|
|
#[serde(skip_serializing_if = "Option::is_none")]
|
|
pub grpc: Option<CheckResult>,
|
|
}
|
|
|
|
#[derive(Debug, Clone, Serialize)]
|
|
pub struct CheckResult {
|
|
pub status: String,
|
|
#[serde(skip_serializing_if = "Option::is_none")]
|
|
pub latency_ms: Option<u64>,
|
|
#[serde(skip_serializing_if = "Option::is_none")]
|
|
pub error: Option<String>,
|
|
}
|
|
|
|
/// Optional external check functions.
|
|
/// Each returns `Some(CheckResult)` if the service is configured, `None` otherwise.
|
|
#[derive(Default)]
|
|
pub struct HealthCheckFns {
|
|
pub check_postgres: Option<Arc<dyn Fn() -> CheckResult + Send + Sync>>,
|
|
pub check_redis: Option<Arc<dyn Fn() -> CheckResult + Send + Sync>>,
|
|
pub check_nats: Option<Arc<dyn Fn() -> CheckResult + Send + Sync>>,
|
|
pub check_grpc: Option<Arc<dyn Fn() -> CheckResult + Send + Sync>>,
|
|
}
|
|
|
|
impl HealthCheckFns {
|
|
pub fn with_postgres(mut self, f: impl Fn() -> CheckResult + Send + Sync + 'static) -> Self {
|
|
self.check_postgres = Some(Arc::new(f));
|
|
self
|
|
}
|
|
|
|
pub fn with_redis(mut self, f: impl Fn() -> CheckResult + Send + Sync + 'static) -> Self {
|
|
self.check_redis = Some(Arc::new(f));
|
|
self
|
|
}
|
|
|
|
pub fn with_nats(mut self, f: impl Fn() -> CheckResult + Send + Sync + 'static) -> Self {
|
|
self.check_nats = Some(Arc::new(f));
|
|
self
|
|
}
|
|
|
|
pub fn with_grpc(mut self, f: impl Fn() -> CheckResult + Send + Sync + 'static) -> Self {
|
|
self.check_grpc = Some(Arc::new(f));
|
|
self
|
|
}
|
|
}
|
|
|
|
/// GET /health handler with dependency checks.
|
|
pub async fn health_check(checks: actix_web::web::Data<Arc<HealthCheckFns>>) -> HttpResponse {
|
|
let checks = checks.get_ref();
|
|
|
|
let health_checks = if checks.check_postgres.is_some()
|
|
|| checks.check_redis.is_some()
|
|
|| checks.check_nats.is_some()
|
|
|| checks.check_grpc.is_some()
|
|
{
|
|
Some(HealthChecks {
|
|
postgres: checks.check_postgres.as_ref().map(|f| f()),
|
|
redis: checks.check_redis.as_ref().map(|f| f()),
|
|
nats: checks.check_nats.as_ref().map(|f| f()),
|
|
grpc: checks.check_grpc.as_ref().map(|f| f()),
|
|
})
|
|
} else {
|
|
None
|
|
};
|
|
|
|
let overall_status = if let Some(ref hc) = health_checks {
|
|
let all_up = [&hc.postgres, &hc.redis, &hc.nats, &hc.grpc]
|
|
.iter()
|
|
.filter_map(|c| c.as_ref())
|
|
.all(|c| c.status == "up");
|
|
if all_up {
|
|
"healthy"
|
|
} else {
|
|
"degraded"
|
|
}
|
|
} else {
|
|
"healthy"
|
|
};
|
|
|
|
let response = HealthResponse {
|
|
status: overall_status.to_string(),
|
|
version: env!("CARGO_PKG_VERSION").to_string(),
|
|
timestamp: chrono::Utc::now().to_rfc3339(),
|
|
uptime_secs: uptime_secs(),
|
|
connections_active: 0,
|
|
sessions_count: 0,
|
|
checks: health_checks,
|
|
};
|
|
|
|
HttpResponse::Ok().json(response)
|
|
}
|