feat(repository): add language statistics analysis feature

- Remove data directory from gitignore to include language data
- Add build script to parse linguist languages.yml and generate static mappings
- Include serde and serde_yml dependencies for YAML parsing
- Add lang_stats module with language detection and statistics calculation
- Generate protobuf definitions for language statistics API endpoints
- Implement GetLanguageStats RPC endpoint in repository server
- Add comprehensive test suite for language statistics functionality
- Include extension and filename based language detection logic
- Implement binary file classification and group resolution features
This commit is contained in:
zhenyi
2026-06-10 13:06:59 +08:00
parent 9a0c26e5f6
commit 939931acad
10 changed files with 10202 additions and 1 deletions
+17
View File
@@ -850,4 +850,21 @@ impl repository_service_server::RepositoryService for GitksService {
m.record("ok");
Ok(tonic::Response::new(resp))
}
async fn get_language_stats(
&self,
request: tonic::Request<GetLanguageStatsRequest>,
) -> Result<tonic::Response<GetLanguageStatsResponse>, tonic::Status> {
let m = crate::metrics::RequestMetrics::new("gitks.RepositoryService/GetLanguageStats");
let inner = request.into_inner();
let _rate = self.acquire_rate_limit(inner.repository.as_ref()).await?;
let repo = self.repo_label(inner.repository.as_ref());
let span = tracing::info_span!("repo.get_language_stats", %repo);
let _enter = span.enter();
let gb = self.resolve(inner.repository.as_ref())?;
let resp = gb.get_language_stats(inner).map_err(into_status)?;
tracing::info!(%repo, languages = resp.languages.len(), "language stats done");
m.record("ok");
Ok(tonic::Response::new(resp))
}
}