feat(repository): add language statistics analysis feature
- Remove data directory from gitignore to include language data - Add build script to parse linguist languages.yml and generate static mappings - Include serde and serde_yml dependencies for YAML parsing - Add lang_stats module with language detection and statistics calculation - Generate protobuf definitions for language statistics API endpoints - Implement GetLanguageStats RPC endpoint in repository server - Add comprehensive test suite for language statistics functionality - Include extension and filename based language detection logic - Implement binary file classification and group resolution features
This commit is contained in:
@@ -0,0 +1,150 @@
|
||||
mod common;
|
||||
|
||||
use gitks::pb::GetLanguageStatsRequest;
|
||||
use gitks::repository::lang_stats::{EXTENSION_MAP, FILENAME_MAP};
|
||||
|
||||
#[test]
|
||||
fn test_extension_map_lookup() {
|
||||
// Verify .md is in the map
|
||||
let result = EXTENSION_MAP.binary_search_by(|&(e, _, _)| e.cmp(".md"));
|
||||
assert!(result.is_ok(), ".md should be in EXTENSION_MAP, got {:?}", result);
|
||||
let idx = result.unwrap();
|
||||
assert_eq!(EXTENSION_MAP[idx].1, "Markdown");
|
||||
assert_eq!(EXTENSION_MAP[idx].2, "prose");
|
||||
|
||||
// Verify .rs is in the map
|
||||
let result = EXTENSION_MAP.binary_search_by(|&(e, _, _)| e.cmp(".rs"));
|
||||
assert!(result.is_ok(), ".rs should be in EXTENSION_MAP");
|
||||
let idx = result.unwrap();
|
||||
assert_eq!(EXTENSION_MAP[idx].1, "Rust");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_filename_map_lookup() {
|
||||
// Verify Makefile is in the map
|
||||
let result = FILENAME_MAP.binary_search_by(|&(f, _, _)| f.cmp("Makefile"));
|
||||
assert!(result.is_ok(), "Makefile should be in FILENAME_MAP");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_language_stats_basic() {
|
||||
let (_dir, gb) = common::setup_bare_repo();
|
||||
|
||||
let resp = gb
|
||||
.get_language_stats(GetLanguageStatsRequest {
|
||||
repository: None,
|
||||
revision: None,
|
||||
path: String::new(),
|
||||
max_file_size: 0,
|
||||
})
|
||||
.expect("get_language_stats");
|
||||
|
||||
// Should have some files
|
||||
assert!(resp.total_files > 0, "expected some files");
|
||||
assert!(resp.total_bytes > 0, "expected some bytes");
|
||||
|
||||
// Should detect Markdown (README.md)
|
||||
let md = resp.languages.iter().find(|l| l.language == "Markdown");
|
||||
assert!(md.is_some(), "should detect Markdown language");
|
||||
let md = md.unwrap();
|
||||
assert!(md.file_count > 0);
|
||||
assert!(md.bytes > 0);
|
||||
assert!(md.lines > 0);
|
||||
|
||||
// Should detect Rust (src/lib/mod.rs)
|
||||
let rust = resp.languages.iter().find(|l| l.language == "Rust");
|
||||
assert!(rust.is_some(), "should detect Rust language");
|
||||
let rust = rust.unwrap();
|
||||
assert!(rust.file_count > 0);
|
||||
|
||||
// Percentages should sum to ~100%
|
||||
let total_pct: f64 = resp.languages.iter().map(|l| l.percentage).sum();
|
||||
assert!(
|
||||
(total_pct - 100.0).abs() < 0.01,
|
||||
"percentages should sum to 100, got {total_pct}"
|
||||
);
|
||||
|
||||
// Languages should be sorted by bytes descending
|
||||
for i in 1..resp.languages.len() {
|
||||
assert!(
|
||||
resp.languages[i - 1].bytes >= resp.languages[i].bytes,
|
||||
"languages should be sorted by bytes descending"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_language_stats_lang_type() {
|
||||
let (_dir, gb) = common::setup_bare_repo();
|
||||
|
||||
let resp = gb
|
||||
.get_language_stats(GetLanguageStatsRequest {
|
||||
repository: None,
|
||||
revision: None,
|
||||
path: String::new(),
|
||||
max_file_size: 0,
|
||||
})
|
||||
.expect("get_language_stats");
|
||||
|
||||
// Markdown should be "prose" type
|
||||
let md = resp.languages.iter().find(|l| l.language == "Markdown");
|
||||
if let Some(md) = md {
|
||||
assert_eq!(md.lang_type, "prose", "Markdown should be prose type");
|
||||
}
|
||||
|
||||
// Rust should be "programming" type
|
||||
let rust = resp.languages.iter().find(|l| l.language == "Rust");
|
||||
if let Some(rust) = rust {
|
||||
assert_eq!(
|
||||
rust.lang_type, "programming",
|
||||
"Rust should be programming type"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_language_stats_with_path() {
|
||||
let (_dir, gb) = common::setup_bare_repo();
|
||||
|
||||
// Restrict to "src" subdirectory
|
||||
let resp = gb
|
||||
.get_language_stats(GetLanguageStatsRequest {
|
||||
repository: None,
|
||||
revision: None,
|
||||
path: "src".to_string(),
|
||||
max_file_size: 0,
|
||||
})
|
||||
.expect("get_language_stats");
|
||||
|
||||
// Should find Rust files in src/
|
||||
let rust = resp.languages.iter().find(|l| l.language == "Rust");
|
||||
assert!(rust.is_some(), "should find Rust in src/ directory");
|
||||
|
||||
// Should NOT find README.md (it's at root level)
|
||||
let md = resp.languages.iter().find(|l| l.language == "Markdown");
|
||||
assert!(
|
||||
md.is_none(),
|
||||
"should not find Markdown in src/ directory"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_language_stats_line_count_excludes_blank_lines() {
|
||||
let (_dir, gb) = common::setup_bare_repo();
|
||||
|
||||
let resp = gb
|
||||
.get_language_stats(GetLanguageStatsRequest {
|
||||
repository: None,
|
||||
revision: None,
|
||||
path: String::new(),
|
||||
max_file_size: 0,
|
||||
})
|
||||
.expect("get_language_stats");
|
||||
|
||||
// README.md has "# Test\n\nUpdated.\n" = 3 lines but only 2 non-blank lines
|
||||
let md = resp.languages.iter().find(|l| l.language == "Markdown");
|
||||
if let Some(md) = md {
|
||||
// README.md: "# Test" and "Updated." are non-blank = 2 lines
|
||||
assert!(md.lines >= 2, "should count at least 2 code lines for README.md");
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user