//! Prometheus metrics: global meter provider, registry, and the /metrics actix-web handler. use std::sync::OnceLock; use opentelemetry::KeyValue; use opentelemetry::global; use opentelemetry::metrics::{Counter, Histogram, Meter, UpDownCounter}; use opentelemetry_sdk::Resource; use opentelemetry_sdk::metrics::SdkMeterProvider; use prometheus::{Encoder, Registry, TextEncoder}; use crate::ImksResult; /// Shared Prometheus registry, lazily initialized. static PROMETHEUS_REGISTRY: OnceLock = OnceLock::new(); /// Global metrics instruments, initialized once at startup. static METRICS: OnceLock = OnceLock::new(); /// All application metrics instruments. #[derive(Debug, Clone)] pub struct MetricsInstruments { pub connections_active: UpDownCounter, pub connections_total: Counter, pub disconnections_total: Counter, pub messages_received_total: Counter, pub messages_sent_total: Counter, pub event_handling_duration: Histogram, pub db_query_duration: Histogram, pub engine_sessions_active: UpDownCounter, pub namespaces_active: UpDownCounter, pub gprc_calls_total: Counter, pub gprc_call_errors_total: Counter, pub adapter_broadcasts_total: Counter, } /// Initialize the Prometheus meter provider and create all metric instruments. pub fn init_metrics( _config: &super::config::TelemetryConfig, resource: &Resource, ) -> ImksResult<(SdkMeterProvider, MetricsInstruments)> { let registry = Registry::new(); PROMETHEUS_REGISTRY .set(registry.clone()) .expect("Prometheus registry already initialized"); let exporter = opentelemetry_prometheus::exporter() .with_registry(registry) .build() .map_err(|e| { crate::ImksError::Internal(format!("failed to build Prometheus exporter: {e}")) })?; let provider = SdkMeterProvider::builder() .with_resource(resource.clone()) .with_reader(exporter) .build(); global::set_meter_provider(provider.clone()); let meter = global::meter_with_scope( opentelemetry::InstrumentationScope::builder("imks") .with_version(env!("CARGO_PKG_VERSION")) .build(), ); let instruments = MetricsInstruments::new(&meter); METRICS .set(instruments.clone()) .expect("Metrics instruments already initialized"); Ok((provider, instruments)) } /// Obtain the globally initialized metrics. Panics if not initialized. pub fn get() -> MetricsInstruments { METRICS .get() .expect("Metrics not initialized — call init_metrics first") .clone() } /// Obtain the globally initialized metrics, returning `None` if not initialized. /// Prefer this in library code that may run before metrics are set up (e.g., tests). pub fn try_get() -> Option { METRICS.get().cloned() } impl MetricsInstruments { fn new(meter: &Meter) -> Self { Self { connections_active: meter .i64_up_down_counter("imks_connections_active") .with_description("Number of active Socket.IO connections") .build(), connections_total: meter .u64_counter("imks_connections_total") .with_description("Total number of socket connections since start") .build(), disconnections_total: meter .u64_counter("imks_disconnections_total") .with_description("Total number of socket disconnections since start") .build(), messages_received_total: meter .u64_counter("imks_messages_received_total") .with_description("Total number of messages received from clients") .build(), messages_sent_total: meter .u64_counter("imks_messages_sent_total") .with_description("Total number of messages sent to clients") .build(), event_handling_duration: meter .f64_histogram("imks_event_handling_duration_seconds") .with_description("Socket.IO event handling latency in seconds") .build(), db_query_duration: meter .f64_histogram("imks_db_query_duration_seconds") .with_description("Database query duration in seconds") .build(), engine_sessions_active: meter .i64_up_down_counter("imks_engine_sessions_active") .with_description("Number of active Engine.IO sessions") .build(), namespaces_active: meter .i64_up_down_counter("imks_namespaces_active") .with_description("Number of active Socket.IO namespaces") .build(), gprc_calls_total: meter .u64_counter("imks_gprc_calls_total") .with_description("Total number of gRPC calls to appks") .build(), gprc_call_errors_total: meter .u64_counter("imks_gprc_call_errors_total") .with_description("Total number of failed gRPC calls to appks") .build(), adapter_broadcasts_total: meter .u64_counter("imks_adapter_broadcasts_total") .with_description("Total number of cross-node adapter broadcasts") .build(), } } /// Helper: create KV attributes for an event. pub fn event_attrs(event: &str) -> [KeyValue; 1] { [KeyValue::new("event", event.to_string())] } /// Helper: create KV attributes for a namespace. pub fn namespace_attrs(ns: &str) -> [KeyValue; 1] { [KeyValue::new("namespace", ns.to_string())] } } /// Actix-web handler for `GET /metrics`. /// /// Encodes the Prometheus text format from the shared registry. pub async fn metrics_handler() -> actix_web::HttpResponse { let registry = PROMETHEUS_REGISTRY .get() .expect("Prometheus registry not initialized"); let metric_families = registry.gather(); let encoder = TextEncoder::new(); let mut buffer = Vec::new(); if encoder.encode(&metric_families, &mut buffer).is_err() { return actix_web::HttpResponse::InternalServerError().body("failed to encode metrics"); } actix_web::HttpResponse::Ok() .content_type("text/plain; version=0.0.4") .body(buffer) }