feat(telemetry): integrate OpenTelemetry observability stack with health metrics

- Add OpenTelemetry SDK, OTLP exporter, Prometheus integration
- Implement connection tracking with active/total/disconnection metrics
- Add health endpoint with uptime and connection counts
- Integrate tracing spans for socket events and engine messages
- Add metrics collection for event handling duration
- Update health endpoint to include live runtime state
- Add graceful telemetry shutdown in main function
- Implement engine session active metrics tracking
- Add namespace-specific attributes to connection metrics
- Introduce message edit history retrieval endpoint
- Add scheduled message CRUD operations and dispatcher
- Update Socket.IO event registration with observability
- Refactor component update to remove dead code allowance
- Add comprehensive environment variables documentation
- Implement detailed development guidelines in AGENTS.md
This commit is contained in:
zhenyi
2026-06-11 13:53:29 +08:00
parent 40241e5db3
commit 0dbac480ae
22 changed files with 3116 additions and 64 deletions
+10 -1
View File
@@ -115,17 +115,26 @@ impl EngineServer {
));
let heartbeat_handle = heartbeat.start();
tracing::info!("Engine.IO HTTP server listening on {}", addr);
tracing::info!(
endpoint = %addr,
"Engine.IO HTTP server listening, /health and /metrics available"
);
let result = HttpServer::new(move || {
App::new()
.app_data(web::Data::new(store.clone()))
.app_data(web::Data::new(config.clone()))
.app_data(web::Data::new(on_message.clone()))
// Health check with connection metrics
.route(
"/health",
web::get().to(crate::engine::health::health_check),
)
// Prometheus metrics endpoint
.route(
"/metrics",
web::get().to(crate::telemetry::metrics::metrics_handler),
)
.route("/engine.io/", web::get().to(engine_get))
.route(
"/engine.io/",