Files
zhenyi b797e360c0 feat(registry): add service discovery and health check capabilities
- Integrate tonic-health for gRPC service health monitoring
- Add etcd-based service registration with automatic keep-alive
- Implement dynamic configuration loading from etcd with fallback
- Remove external dependencies from docker-compose for simplified deployment
- Refactor service registration logic with improved lease management
- Add health service to gRPC server with serving status reporting
2026-06-11 22:50:40 +08:00

153 lines
4.8 KiB
Rust

mod discovery;
mod register;
mod types;
pub use types::{GitksPeerInfo, ServiceInstance};
use std::sync::Arc;
use std::sync::atomic::AtomicI64;
use dashmap::DashMap;
use etcd_client::Client;
use tokio::sync::Mutex;
use uuid::Uuid;
use crate::config::AppConfig;
use crate::error::{AppError, AppResult};
use crate::pb::{EmailClient, RepoClient};
#[derive(Clone)]
pub struct EtcdRegistry {
pub(crate) inner: Arc<EtcdRegistryInner>,
email_client: Option<EmailClient>,
}
pub(crate) struct EtcdRegistryInner {
pub client: Mutex<Client>,
pub config: AppConfig,
pub key_prefix: String,
pub git_nodes: DashMap<Uuid, RepoClient>,
pub mail_nodes: DashMap<Uuid, EmailClient>,
pub lease_id: AtomicI64,
}
impl EtcdRegistry {
pub async fn connect(config: &AppConfig) -> AppResult<Self> {
let endpoints = config.etcd_endpoints()?;
let timeout = config.etcd_connect_timeout()?;
let opts = etcd_client::ConnectOptions::new()
.with_connect_timeout(std::time::Duration::from_secs(timeout));
let client = Client::connect(endpoints, Some(opts))
.await
.map_err(|e| AppError::Config(format!("etcd connect failed: {e}")))?;
if let (Some(user), Some(pass)) = (config.etcd_username()?, config.etcd_password()?) {
let auth_resp = client
.auth_client()
.authenticate(user, pass)
.await
.map_err(|e| AppError::Config(format!("etcd auth failed: {e}")))?;
let token = auth_resp.token().to_string();
tracing::info!(token_len = token.len(), "etcd authenticated");
}
let key_prefix = config.etcd_key_prefix()?;
let email_client = match config.email_rpc_addr()? {
Some(addr) if !addr.is_empty() => match EmailClient::lazy_connect(&addr) {
Ok(client) => {
tracing::info!(addr = %addr, "email client connected via APP_EMAIL_RPC_ADDR");
Some(client)
}
Err(e) => {
tracing::error!(addr = %addr, error = %e, "email client connect via APP_EMAIL_RPC_ADDR failed");
None
}
},
_ => {
tracing::info!(
"APP_EMAIL_RPC_ADDR not set, will fall back to etcd discovery for email"
);
None
}
};
Ok(Self {
inner: Arc::new(EtcdRegistryInner {
client: Mutex::new(client),
config: config.clone(),
key_prefix,
git_nodes: DashMap::new(),
mail_nodes: DashMap::new(),
lease_id: AtomicI64::new(0),
}),
email_client,
})
}
pub fn get_git_client(&self, node_id: &Uuid) -> Option<RepoClient> {
self.inner.git_nodes.get(node_id).map(|c| c.clone())
}
pub fn git_node_ids(&self) -> Vec<Uuid> {
self.inner.git_nodes.iter().map(|e| *e.key()).collect()
}
pub fn get_email_client(&self) -> Option<EmailClient> {
if let Some(ref client) = self.email_client {
return Some(client.clone());
}
self.inner
.mail_nodes
.iter()
.next()
.map(|e| e.value().clone())
}
pub fn has_git_nodes(&self) -> bool {
!self.inner.git_nodes.is_empty()
}
/// Sort available gitks node UUIDs for deterministic selection.
pub fn git_node_ids_sorted(&self) -> Vec<Uuid> {
let mut ids: Vec<Uuid> = self.git_node_ids();
ids.sort();
ids
}
/// Read config from etcd. Priority: etcd > env > default.
/// This is async but can be called from sync context via block_on.
pub async fn get_config(&self, key: &str, default: &str) -> String {
let etcd_key = format!("{}config/{}", self.inner.key_prefix, key);
let mut client = self.inner.client.lock().await;
if let Ok(resp) = client.get(etcd_key.as_str(), None).await {
if let Some(kv) = resp.kvs().first() {
if let Ok(v) = kv.value_str() {
if !v.is_empty() {
tracing::info!(key, value = v, "config from etcd");
return v.to_string();
}
}
}
}
drop(client);
// Fall back to env
if let Ok(v) = std::env::var(key) {
if !v.is_empty() {
return v;
}
}
default.to_string()
}
}
/// Derive a deterministic UUID from a gitks storage_name.
/// Uses UUID v5 with DNS namespace so the same storage_name always
/// maps to the same UUID across all appks instances.
pub fn storage_name_to_uuid(storage_name: &str) -> Uuid {
Uuid::new_v5(&Uuid::NAMESPACE_DNS, storage_name.as_bytes())
}