feat(cluster): implement distributed clustering with etcd coordination

- Integrate etcd-client for distributed coordination and leader election
- Add remote client macros with proper formatting for all services
- Implement RequestMetrics for tracking RPC performance and errors
- Add rate limiting mechanism across all service endpoints
- Create ElectionRequest and ElectionResult message types for leader election
- Add role management with primary/replica switching capabilities
- Implement health checker with automatic failover detection
- Add repository count metrics for cluster monitoring
- Update Cargo.toml with etcd-client and dashmap dependencies
- Modify RepoEntry to include read_only flag for replica handling
- Implement should_accept_election logic to prevent duplicate elections
- Add RoleChangedEvent handling for cluster role updates
This commit is contained in:
zhenyi
2026-06-08 14:31:29 +08:00
parent d243dce027
commit 8f472a0443
37 changed files with 4691 additions and 83 deletions
+287 -2
View File
@@ -2,8 +2,13 @@ use crate::pb::repository_service_client::RepositoryServiceClient;
use crate::pb::*;
use super::{GitksService, git_cmd, into_status, repository_maint};
use tokio_stream::wrappers::ReceiverStream;
remote_client!(remote_repository_client, RepositoryServiceClient<tonic::transport::Channel>, "repository");
remote_client!(
remote_repository_client,
RepositoryServiceClient<tonic::transport::Channel>,
"repository"
);
fn default_branch_name(gb: &crate::bare::GitBare) -> String {
git_cmd(gb, &["symbolic-ref", "HEAD"])
@@ -23,7 +28,9 @@ impl repository_service_server::RepositoryService for GitksService {
&self,
request: tonic::Request<GetRepositoryRequest>,
) -> Result<tonic::Response<Repository>, tonic::Status> {
let m = crate::metrics::RequestMetrics::new("gitks.RepositoryService/GetRepository");
let inner = request.into_inner();
let _rate = self.acquire_rate_limit(inner.repository.as_ref()).await?;
let repo = self.repo_label(inner.repository.as_ref());
let span = tracing::info_span!("repo.get_repository", %repo);
let _enter = span.enter();
@@ -33,14 +40,20 @@ impl repository_service_server::RepositoryService for GitksService {
if let Some(mut client) =
remote_repository_client(self, inner.repository.as_ref(), false).await?
{
m.record("ok");
return client.get_repository(inner).await;
}
crate::metrics::record_rpc_error(&m, &err);
return Err(err);
}
Err(err) => {
crate::metrics::record_rpc_error(&m, &err);
return Err(err);
}
Err(err) => return Err(err),
};
let bare = gb.bare_dir.join("HEAD").exists();
let object_format = gb.object_format();
m.record("ok");
Ok(tonic::Response::new(Repository {
header: inner.repository,
bare,
@@ -54,15 +67,21 @@ impl repository_service_server::RepositoryService for GitksService {
&self,
request: tonic::Request<InitRepositoryRequest>,
) -> Result<tonic::Response<Repository>, tonic::Status> {
let m = crate::metrics::RequestMetrics::new("gitks.RepositoryService/InitRepository");
let inner = request.into_inner();
let _rate = self.acquire_rate_limit(inner.repository.as_ref()).await?;
let repo = self.repo_label(inner.repository.as_ref());
let span = tracing::info_span!("repo.init_repository", %repo);
let _enter = span.enter();
let bare_dir = self.resolve_for_init(inner.repository.as_ref())?;
let gb = crate::bare::GitBare::new(bare_dir);
gb.init_repository(inner.bare).map_err(into_status)?;
if let Some(ref hm) = self.hook_manager {
hm.install_hooks(&gb.bare_dir).map_err(into_status)?;
}
tracing::info!(%repo, bare = inner.bare, "repository initialized");
self.notify_ref_update(&repo, "HEAD", "", "");
m.record("ok");
Ok(tonic::Response::new(Repository {
header: inner.repository,
bare: inner.bare,
@@ -74,7 +93,9 @@ impl repository_service_server::RepositoryService for GitksService {
&self,
request: tonic::Request<DeleteRepositoryRequest>,
) -> Result<tonic::Response<()>, tonic::Status> {
let m = crate::metrics::RequestMetrics::new("gitks.RepositoryService/DeleteRepository");
let inner = request.into_inner();
let _rate = self.acquire_rate_limit(inner.repository.as_ref()).await?;
let repo = self.repo_label(inner.repository.as_ref());
let span = tracing::info_span!("repo.delete_repository", %repo);
let _enter = span.enter();
@@ -83,12 +104,15 @@ impl repository_service_server::RepositoryService for GitksService {
&& let Some(mut client) =
remote_repository_client(self, inner.repository.as_ref(), true).await?
{
m.record("ok");
return client.delete_repository(inner).await;
}
tracing::warn!(%repo, path = %bare_dir.display(), "deleting repository");
std::fs::remove_dir_all(&bare_dir).map_err(|e| tonic::Status::internal(e.to_string()))?;
tracing::info!(%repo, "repository deleted");
self.notify_ref_update(&repo, "", "", "");
crate::rate_limit::remove_repository(&repo);
m.record("ok");
Ok(tonic::Response::new(()))
}
@@ -421,4 +445,265 @@ impl repository_service_server::RepositoryService for GitksService {
tracing::info!(%repo, ok = resp.ok, "commit-graph write done");
Ok(tonic::Response::new(resp))
}
// ── Hooks Management ────────────────────────────────────────────
async fn list_hooks(
&self,
request: tonic::Request<ListHooksRequest>,
) -> Result<tonic::Response<ListHooksResponse>, tonic::Status> {
let inner = request.into_inner();
let gb = self.resolve(inner.repository.as_ref())?;
let hook_mgr = self.hook_manager.as_ref();
let hooks = if let Some(hm) = hook_mgr {
hm.list_hooks(&gb.bare_dir)
.map_err(|e| tonic::Status::internal(e.to_string()))?
} else {
Vec::new()
};
let resp = ListHooksResponse {
hooks: hooks
.into_iter()
.map(|h| crate::pb::HookInfo {
hook_type: h.hook_type,
level: h.level.to_string(),
path: h.path,
})
.collect(),
};
Ok(tonic::Response::new(resp))
}
async fn set_custom_hook(
&self,
request: tonic::Request<SetCustomHookRequest>,
) -> Result<tonic::Response<()>, tonic::Status> {
let inner = request.into_inner();
let gb = self.resolve(inner.repository.as_ref())?;
let hook_mgr = self.hook_manager.as_ref();
if let Some(hm) = hook_mgr {
hm.set_custom_hook(&gb.bare_dir, &inner.hook_name, &inner.content)
.map_err(|e| tonic::Status::internal(e.to_string()))?;
} else {
return Err(tonic::Status::failed_precondition("hooks not enabled"));
}
tracing::info!(repo = %gb.bare_dir.display(), hook = %inner.hook_name, "custom hook set");
Ok(tonic::Response::new(()))
}
async fn remove_custom_hook(
&self,
request: tonic::Request<RemoveCustomHookRequest>,
) -> Result<tonic::Response<()>, tonic::Status> {
let inner = request.into_inner();
let gb = self.resolve(inner.repository.as_ref())?;
let hook_mgr = self.hook_manager.as_ref();
if let Some(hm) = hook_mgr {
hm.remove_custom_hook(&gb.bare_dir, &inner.hook_name)
.map_err(|e| tonic::Status::internal(e.to_string()))?;
} else {
return Err(tonic::Status::failed_precondition("hooks not enabled"));
}
tracing::info!(repo = %gb.bare_dir.display(), hook = %inner.hook_name, "custom hook removed");
Ok(tonic::Response::new(()))
}
// ── Snapshot Operations ──────────────────────────────────────────
async fn create_snapshot(
&self,
request: tonic::Request<CreateSnapshotRequest>,
) -> Result<tonic::Response<CreateSnapshotResponse>, tonic::Status> {
let inner = request.into_inner();
let gb = self.resolve(inner.repository.as_ref())?;
let repo = self.repo_label(inner.repository.as_ref());
let span = tracing::info_span!("repo.create_snapshot", %repo);
let _enter = span.enter();
let storage = crate::snapshot::storage::LocalSnapshotStorage::new(
self.repo_prefix.join("+gitks-snapshots"),
);
let snapshot_id = crate::snapshot::ops::create_and_store_snapshot(&gb, &repo, &storage)
.map_err(|e| tonic::Status::internal(e.to_string()))?;
let head_oid = crate::snapshot::ops::get_head_oid_internal(&gb)
.map_err(|e| tonic::Status::internal(e.to_string()))?;
use crate::snapshot::storage::SnapshotStorageBackend;
let actual_size = storage
.read_snapshot(&snapshot_id)
.map(|d| d.len() as u64)
.unwrap_or(0);
tracing::info!(%repo, snapshot_id = %snapshot_id, size_bytes = actual_size, "snapshot created");
Ok(tonic::Response::new(CreateSnapshotResponse {
snapshot_id,
size_bytes: actual_size,
head_oid,
}))
}
async fn restore_snapshot(
&self,
request: tonic::Request<RestoreSnapshotRequest>,
) -> Result<tonic::Response<()>, tonic::Status> {
let inner = request.into_inner();
let target_repo = self.repo_label(inner.target_repository.as_ref());
let span = tracing::info_span!("repo.restore_snapshot", %target_repo);
let _enter = span.enter();
let storage = crate::snapshot::storage::LocalSnapshotStorage::new(
self.repo_prefix.join("+gitks-snapshots"),
);
let target_path = self.resolve_for_init(inner.target_repository.as_ref())?;
crate::snapshot::ops::restore_from_storage(&target_path, &inner.snapshot_id, &storage)
.map_err(|e| tonic::Status::internal(e.to_string()))?;
tracing::info!(%target_repo, snapshot_id = %inner.snapshot_id, "snapshot restored");
self.notify_ref_update(&target_repo, "HEAD", "", "");
Ok(tonic::Response::new(()))
}
async fn list_snapshots(
&self,
request: tonic::Request<ListSnapshotsRequest>,
) -> Result<tonic::Response<ListSnapshotsResponse>, tonic::Status> {
let inner = request.into_inner();
let repo = self.repo_label(inner.repository.as_ref());
let storage = crate::snapshot::storage::LocalSnapshotStorage::new(
self.repo_prefix.join("+gitks-snapshots"),
);
use crate::snapshot::storage::SnapshotStorageBackend;
let snapshots = storage
.list_snapshots(&repo)
.map_err(tonic::Status::internal)?;
let resp = ListSnapshotsResponse {
snapshots: snapshots
.into_iter()
.map(|s| crate::pb::SnapshotInfo {
snapshot_id: s.snapshot_id,
relative_path: s.relative_path,
size_bytes: s.size_bytes,
created_at: s.created_at,
head_oid: s.head_oid,
})
.collect(),
};
Ok(tonic::Response::new(resp))
}
async fn delete_snapshot(
&self,
request: tonic::Request<DeleteSnapshotRequest>,
) -> Result<tonic::Response<()>, tonic::Status> {
let inner = request.into_inner();
let storage = crate::snapshot::storage::LocalSnapshotStorage::new(
self.repo_prefix.join("+gitks-snapshots"),
);
use crate::snapshot::storage::SnapshotStorageBackend;
storage
.delete_snapshot(&inner.snapshot_id)
.map_err(tonic::Status::internal)?;
tracing::info!(snapshot_id = %inner.snapshot_id, "snapshot deleted");
Ok(tonic::Response::new(()))
}
// ── Repository Move ──────────────────────────────────────────────
type FetchRepositoryDataStream =
ReceiverStream<Result<FetchRepositoryDataResponse, tonic::Status>>;
async fn move_repository(
&self,
request: tonic::Request<MoveRepositoryRequest>,
) -> Result<tonic::Response<MoveRepositoryResponse>, tonic::Status> {
let inner = request.into_inner();
let source_repo = self.repo_label(inner.source_repository.as_ref());
let span = tracing::info_span!("repo.move_repository", %source_repo);
let _enter = span.enter();
let gb = self.resolve(inner.source_repository.as_ref())?;
let bundle_data = crate::snapshot::ops::create_snapshot(&gb)
.map_err(|e| tonic::Status::internal(e.to_string()))?;
let target_path = self.resolve_for_init(inner.target_repository.as_ref())?;
let target_gb = crate::bare::GitBare::new(target_path.clone());
target_gb
.init_repository(true)
.map_err(|e| tonic::Status::internal(e.to_string()))?;
crate::snapshot::ops::restore_snapshot(&target_path, &bundle_data)
.map_err(|e| tonic::Status::internal(e.to_string()))?;
if let Some(ref hm) = self.hook_manager {
hm.install_hooks(&target_path)
.map_err(|e| tonic::Status::internal(e.to_string()))?;
}
let source_path = gb.bare_dir.clone();
std::fs::remove_dir_all(&source_path)
.map_err(|e| tonic::Status::internal(e.to_string()))?;
self.notify_ref_update(&source_repo, "HEAD", "", "");
tracing::info!(source = %source_repo, "repository moved successfully");
Ok(tonic::Response::new(MoveRepositoryResponse {
state: MoveRepositoryState::MoveStateCompleted as i32,
error_message: String::new(),
}))
}
async fn fetch_repository_data(
&self,
request: tonic::Request<FetchRepositoryDataRequest>,
) -> Result<tonic::Response<Self::FetchRepositoryDataStream>, tonic::Status> {
let inner = request.into_inner();
let repo = self.repo_label(inner.repository.as_ref());
let span = tracing::info_span!("repo.fetch_repository_data", %repo);
let _enter = span.enter();
let gb = self.resolve(inner.repository.as_ref())?;
let bundle_data = crate::snapshot::ops::create_snapshot(&gb)
.map_err(|e| tonic::Status::internal(e.to_string()))?;
let (tx, rx) = tokio::sync::mpsc::channel(16);
tokio::spawn(async move {
const CHUNK_SIZE: usize = 65536;
let total = bundle_data.len();
if total == 0 {
let _ = tx
.send(Ok(FetchRepositoryDataResponse {
data: vec![],
done: true,
}))
.await;
return;
}
for offset in (0..total).step_by(CHUNK_SIZE) {
let end = (offset + CHUNK_SIZE).min(total);
let chunk_data = bundle_data[offset..end].to_vec();
let is_done = end >= total;
if tx
.send(Ok(FetchRepositoryDataResponse {
data: chunk_data,
done: is_done,
}))
.await
.is_err()
{
break;
}
}
});
Ok(tonic::Response::new(ReceiverStream::new(rx)))
}
}