feat(telemetry): integrate OpenTelemetry observability stack with health metrics

- Add OpenTelemetry SDK, OTLP exporter, Prometheus integration
- Implement connection tracking with active/total/disconnection metrics
- Add health endpoint with uptime and connection counts
- Integrate tracing spans for socket events and engine messages
- Add metrics collection for event handling duration
- Update health endpoint to include live runtime state
- Add graceful telemetry shutdown in main function
- Implement engine session active metrics tracking
- Add namespace-specific attributes to connection metrics
- Introduce message edit history retrieval endpoint
- Add scheduled message CRUD operations and dispatcher
- Update Socket.IO event registration with observability
- Refactor component update to remove dead code allowance
- Add comprehensive environment variables documentation
- Implement detailed development guidelines in AGENTS.md
This commit is contained in:
zhenyi
2026-06-11 13:53:29 +08:00
parent 40241e5db3
commit 0dbac480ae
22 changed files with 3116 additions and 64 deletions
+12 -6
View File
@@ -1,26 +1,32 @@
//! Health check endpoint for the imks server.
//!
//! Returns JSON with server status, version, and upstream connectivity.
//! Returns JSON with server status, version, uptime, and connection counts
//! sourced from live runtime state (session store + atomic counter).
use actix_web::HttpResponse;
use actix_web::{HttpResponse, web};
use serde::Serialize;
use crate::engine::session::SessionStore;
use crate::telemetry;
#[derive(Serialize)]
struct HealthResponse {
status: String,
version: String,
timestamp: String,
uptime_secs: u64,
connections_active: u64,
sessions_count: usize,
}
/// GET /health — returns server health status.
pub async fn health_check() -> HttpResponse {
/// GET /health — returns server health status with live connection metrics.
pub async fn health_check(store: web::Data<SessionStore>) -> HttpResponse {
HttpResponse::Ok().json(HealthResponse {
status: "healthy".into(),
version: env!("CARGO_PKG_VERSION").into(),
timestamp: chrono::Utc::now().to_rfc3339(),
uptime_secs: 0,
sessions_count: 0,
uptime_secs: telemetry::health::uptime_secs(),
connections_active: telemetry::health::connections_active_count(),
sessions_count: store.len(),
})
}