feat(config): integrate etcd for service discovery and config management
- Add etcd-client dependency for distributed configuration storage - Implement EtcdConfig with priority: etcd > environment variables > defaults - Add ServiceRegistry for service registration with lease keep-alive - Integrate etcd-based service discovery for appks gRPC connections - Add service watcher for real-time service instance updates - Migrate Redis configuration from single URL to cluster node list - Update Dockerfile with default IMKS_HOST and IMKS_PORT environment variables - Add etcd bootstrap configuration through environment variables - Implement Redis cluster URL building with optional authentication
This commit is contained in:
@@ -1,5 +1,6 @@
|
||||
use etcd_client::{Client, PutOptions, GetOptions, WatchOptions};
|
||||
use std::sync::Arc;
|
||||
|
||||
use etcd_client::{Client, EventType, GetOptions, PutOptions, WatchOptions};
|
||||
use tokio::sync::Mutex;
|
||||
use tokio_stream::StreamExt;
|
||||
|
||||
@@ -14,7 +15,10 @@ impl EtcdConfig {
|
||||
let client = Client::connect(endpoints, None)
|
||||
.await
|
||||
.map_err(|e| format!("etcd connect: {e}"))?;
|
||||
Ok(Self { client: Arc::new(Mutex::new(client)), prefix: prefix.to_string() })
|
||||
Ok(Self {
|
||||
client: Arc::new(Mutex::new(client)),
|
||||
prefix: prefix.to_string(),
|
||||
})
|
||||
}
|
||||
|
||||
/// Get config value: etcd first, then env var, then default.
|
||||
@@ -41,7 +45,9 @@ impl EtcdConfig {
|
||||
}
|
||||
|
||||
pub async fn get_parsed<T: std::str::FromStr>(&self, key: &str, default: T) -> T
|
||||
where T::Err: std::fmt::Display, T: std::fmt::Display
|
||||
where
|
||||
T::Err: std::fmt::Display,
|
||||
T: std::fmt::Display,
|
||||
{
|
||||
let s = self.get(key, &default.to_string()).await;
|
||||
s.parse().unwrap_or(default)
|
||||
@@ -71,22 +77,44 @@ impl EtcdConfig {
|
||||
}
|
||||
}
|
||||
}
|
||||
tracing::info!(service = service_name, count = addrs.len(), "discovered instances");
|
||||
tracing::info!(
|
||||
service = service_name,
|
||||
count = addrs.len(),
|
||||
"discovered instances"
|
||||
);
|
||||
Ok(addrs)
|
||||
}
|
||||
|
||||
/// Watch a service for live updates.
|
||||
pub fn start_service_watcher(&self, service_name: &str) {
|
||||
/// Watch a service prefix for live join/leave events.
|
||||
///
|
||||
/// Calls `on_up(addr)` when a new instance appears and `on_down(addr)`
|
||||
/// when one disappears. The watcher runs in a background task and
|
||||
/// automatically reconnects on failure.
|
||||
pub fn start_service_watcher(
|
||||
&self,
|
||||
service_name: &str,
|
||||
on_up: impl Fn(String) + Send + Sync + 'static,
|
||||
on_down: impl Fn(String) + Send + Sync + 'static,
|
||||
) {
|
||||
let client = self.client.clone();
|
||||
let prefix = self.prefix.clone();
|
||||
let svc = service_name.to_string();
|
||||
let watch_prefix = format!("{}services/{}/", prefix, svc);
|
||||
|
||||
let on_up = Arc::new(on_up);
|
||||
let on_down = Arc::new(on_down);
|
||||
|
||||
tokio::spawn(async move {
|
||||
loop {
|
||||
let mut stream = {
|
||||
let mut c = client.lock().await;
|
||||
match c.watch(watch_prefix.as_str(), Some(WatchOptions::new().with_prefix())).await {
|
||||
match c
|
||||
.watch(
|
||||
watch_prefix.as_str(),
|
||||
Some(WatchOptions::new().with_prefix()),
|
||||
)
|
||||
.await
|
||||
{
|
||||
Ok(s) => s,
|
||||
Err(e) => {
|
||||
tracing::warn!(service = %svc, error = %e, "watch failed, retry in 3s");
|
||||
@@ -96,23 +124,36 @@ impl EtcdConfig {
|
||||
}
|
||||
};
|
||||
while let Some(resp) = stream.next().await {
|
||||
if let Ok(resp) = resp {
|
||||
for event in resp.events() {
|
||||
if let Some(kv) = event.kv() {
|
||||
let addr = kv.value_str().unwrap_or_default();
|
||||
let key = kv.key_str().unwrap_or_default();
|
||||
match event.event_type() {
|
||||
etcd_client::EventType::Put => {
|
||||
tracing::info!(service = %svc, key, addr, "service up");
|
||||
}
|
||||
etcd_client::EventType::Delete => {
|
||||
tracing::info!(service = %svc, key, "service down");
|
||||
}
|
||||
}
|
||||
let Ok(resp) = resp else { break };
|
||||
for event in resp.events() {
|
||||
let Some(kv) = event.kv() else { continue };
|
||||
let raw = kv.value_str().unwrap_or_default();
|
||||
let key = kv.key_str().unwrap_or_default();
|
||||
|
||||
// Parse JSON to extract the actual address
|
||||
let addr = serde_json::from_str::<serde_json::Value>(raw)
|
||||
.ok()
|
||||
.and_then(|v| {
|
||||
v.get("addr")
|
||||
.and_then(|a| a.as_str())
|
||||
.map(|s| s.to_string())
|
||||
})
|
||||
.unwrap_or_else(|| raw.to_string());
|
||||
|
||||
match event.event_type() {
|
||||
EventType::Put => {
|
||||
tracing::info!(service = %svc, key, addr, "service up");
|
||||
on_up(addr);
|
||||
}
|
||||
EventType::Delete => {
|
||||
tracing::info!(service = %svc, key, "service down");
|
||||
on_down(addr);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
tracing::warn!(service = %svc, "watch stream ended, restarting in 3s");
|
||||
tokio::time::sleep(std::time::Duration::from_secs(3)).await;
|
||||
}
|
||||
});
|
||||
}
|
||||
@@ -126,7 +167,10 @@ pub struct ServiceRegistry {
|
||||
|
||||
impl ServiceRegistry {
|
||||
pub fn new(client: Arc<Mutex<Client>>, prefix: &str) -> Self {
|
||||
Self { client, prefix: prefix.to_string() }
|
||||
Self {
|
||||
client,
|
||||
prefix: prefix.to_string(),
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn register(&self, service_name: &str, addr: &str) -> Result<(), String> {
|
||||
@@ -143,43 +187,44 @@ impl ServiceRegistry {
|
||||
|
||||
let lease = {
|
||||
let mut client = self.client.lock().await;
|
||||
client.lease_grant(15, None).await.map_err(|e| format!("lease: {e}"))?
|
||||
client
|
||||
.lease_grant(60, None)
|
||||
.await
|
||||
.map_err(|e| format!("lease: {e}"))?
|
||||
};
|
||||
|
||||
{
|
||||
let mut client = self.client.lock().await;
|
||||
let opts = PutOptions::new().with_lease(lease.id());
|
||||
client.put(key.clone(), value, Some(opts)).await.map_err(|e| format!("put: {e}"))?;
|
||||
client
|
||||
.put(key.clone(), value, Some(opts))
|
||||
.await
|
||||
.map_err(|e| format!("put: {e}"))?;
|
||||
}
|
||||
|
||||
tracing::info!(service = service_name, instance = %instance_id, addr = %addr, "registered in etcd");
|
||||
|
||||
let c = self.client.clone();
|
||||
let lease_id = lease.id();
|
||||
tokio::spawn(async move {
|
||||
let (mut keeper, mut stream) = {
|
||||
let mut client = c.lock().await;
|
||||
match client.lease_keep_alive(lease_id).await {
|
||||
Ok(pair) => pair,
|
||||
Err(e) => {
|
||||
tracing::error!(lease_id, error = %e, "failed to start lease keepalive");
|
||||
return;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
let mut interval = tokio::time::interval(std::time::Duration::from_secs(10));
|
||||
loop {
|
||||
let result = {
|
||||
let mut client = c.lock().await;
|
||||
client.lease_keep_alive(lease.id()).await
|
||||
};
|
||||
match result {
|
||||
Ok((_keeper, mut stream)) => {
|
||||
while stream.next().await.is_some() {}
|
||||
}
|
||||
Err(e) => tracing::warn!(lease_id = lease.id(), error = %e, "keepalive failed"),
|
||||
}
|
||||
tokio::time::sleep(std::time::Duration::from_secs(5)).await;
|
||||
let new_lease = {
|
||||
let mut client = c.lock().await;
|
||||
client.lease_grant(15, None).await
|
||||
};
|
||||
if let Ok(lr) = new_lease {
|
||||
let instance = serde_json::json!({"addr": &addr, "port": 0, "version": env!("CARGO_PKG_VERSION")});
|
||||
if let Ok(v) = serde_json::to_string(&instance) {
|
||||
let mut client = c.lock().await;
|
||||
let opts = PutOptions::new().with_lease(lr.id());
|
||||
let _ = client.put(key.clone(), v, Some(opts)).await;
|
||||
}
|
||||
interval.tick().await;
|
||||
if let Err(e) = keeper.keep_alive().await {
|
||||
tracing::warn!(lease_id, error = %e, "lease keepalive failed");
|
||||
}
|
||||
let _ = stream.message().await;
|
||||
}
|
||||
});
|
||||
|
||||
|
||||
Reference in New Issue
Block a user