Issue #38: warn_reset_hwm() silently swallowed non-permission I/O errors from /proc/self/clear_refs (e.g. missing /proc, read-only procfs, kernel incompatibility). This left users unaware that VmHWM reset failed and memory peak data could be contaminated across suites. Changes: - runner.rs: all errors now produce a warning with specific failure reason; PermissionDenied retains 'try running as root' hint; AtomicBool warn-once prevents duplicate output across 7 suite runs - main.rs: preflight check now uses warn_reset_hwm() instead of the vague can_reset_vm_hwm(), sharing the same warn-once mechanism - metrics.rs: remove dead can_reset_vm_hwm() (no callers remaining) - tests: add hwm_warned_flag_prevents_reentry and warn_reset_hwm_does_not_panic
276 lines
7.7 KiB
Rust
276 lines
7.7 KiB
Rust
use std::fs::{self, File};
|
||
use std::os::unix::fs::MetadataExt;
|
||
use std::os::unix::io::AsRawFd;
|
||
use std::path::Path;
|
||
|
||
pub struct RssMetrics {
|
||
pub vm_rss_kb: u64,
|
||
pub vm_hwm_kb: u64,
|
||
}
|
||
|
||
pub struct PageFaultMetrics {
|
||
pub minor_faults: u64,
|
||
pub major_faults: u64,
|
||
}
|
||
|
||
pub struct MetricsCollector;
|
||
|
||
impl MetricsCollector {
|
||
/// Read VmRSS and VmHWM from /proc/self/status
|
||
pub fn read_rss() -> RssMetrics {
|
||
let status = fs::read_to_string("/proc/self/status").unwrap_or_default();
|
||
let mut vm_rss_kb: u64 = 0;
|
||
let mut vm_hwm_kb: u64 = 0;
|
||
for line in status.lines() {
|
||
if line.starts_with("VmRSS:") {
|
||
vm_rss_kb = parse_kb_value(line);
|
||
} else if line.starts_with("VmHWM:") {
|
||
vm_hwm_kb = parse_kb_value(line);
|
||
}
|
||
}
|
||
RssMetrics {
|
||
vm_rss_kb,
|
||
vm_hwm_kb,
|
||
}
|
||
}
|
||
|
||
/// Read page fault counts from getrusage
|
||
pub fn read_page_faults() -> PageFaultMetrics {
|
||
let usage =
|
||
nix::sys::resource::getrusage(nix::sys::resource::UsageWho::RUSAGE_SELF).unwrap();
|
||
PageFaultMetrics {
|
||
// getrusage() returns c_long (i64 on 64-bit Linux) — explicit as u64 conversion
|
||
minor_faults: usage.minor_page_faults() as u64,
|
||
major_faults: usage.major_page_faults() as u64,
|
||
}
|
||
}
|
||
|
||
/// Clear page cache (requires root: sync + drop_caches)
|
||
/// Falls back to doing nothing if no permission
|
||
pub fn clear_page_cache() -> std::io::Result<()> {
|
||
let _ = std::process::Command::new("sync").status();
|
||
fs::write("/proc/sys/vm/drop_caches", "1")
|
||
}
|
||
|
||
/// Clear file cache using posix_fadvise(DONTNEED) — no root required
|
||
pub fn clear_file_cache(path: &Path) -> std::io::Result<()> {
|
||
let file = File::open(path)?;
|
||
let len = file.metadata()?.len();
|
||
let ret = unsafe {
|
||
libc::posix_fadvise(file.as_raw_fd(), 0, len as i64, libc::POSIX_FADV_DONTNEED)
|
||
};
|
||
// posix_fadvise returns error code directly (not errno), 0 = success
|
||
if ret != 0 {
|
||
return Err(std::io::Error::from_raw_os_error(ret));
|
||
}
|
||
Ok(())
|
||
}
|
||
|
||
/// Reset VmHWM by writing to /proc/self/clear_refs (requires root)
|
||
pub fn reset_vm_hwm() -> std::io::Result<()> {
|
||
fs::write("/proc/self/clear_refs", "5").map_err(|e| {
|
||
if e.kind() == std::io::ErrorKind::PermissionDenied {
|
||
std::io::Error::new(
|
||
std::io::ErrorKind::PermissionDenied,
|
||
"VmHWM reset requires root (can't write /proc/self/clear_refs)",
|
||
)
|
||
} else {
|
||
e
|
||
}
|
||
})
|
||
}
|
||
|
||
/// Get file inode number
|
||
pub fn get_inode(path: &Path) -> std::io::Result<u64> {
|
||
let meta = fs::metadata(path)?;
|
||
Ok(meta.ino())
|
||
}
|
||
|
||
/// Check if file was rotated (inode changed)
|
||
pub fn detect_rotation(original_inode: u64, path: &Path) -> bool {
|
||
Self::get_inode(path)
|
||
.map(|ino| ino != original_inode)
|
||
.unwrap_or(true)
|
||
}
|
||
}
|
||
|
||
fn parse_kb_value(line: &str) -> u64 {
|
||
// Format: "VmRSS: 12345 kB"
|
||
line.split_whitespace()
|
||
.nth(1)
|
||
.and_then(|v| v.parse::<u64>().ok())
|
||
.unwrap_or(0)
|
||
}
|
||
|
||
pub fn mean(data: &[u64]) -> f64 {
|
||
if data.is_empty() {
|
||
return 0.0;
|
||
}
|
||
data.iter().sum::<u64>() as f64 / data.len() as f64
|
||
}
|
||
|
||
/// Percentile of data at given fraction (0.0–1.0). Returns from a sorted copy.
|
||
pub fn percentile(data: &[u64], p: f64) -> u64 {
|
||
if data.is_empty() {
|
||
return 0;
|
||
}
|
||
let mut sorted: Vec<u64> = data.to_vec();
|
||
sorted.sort_unstable();
|
||
let idx = ((p * (sorted.len() - 1) as f64).round()) as usize;
|
||
sorted[idx.min(sorted.len() - 1)]
|
||
}
|
||
|
||
pub fn stdev(data: &[u64]) -> f64 {
|
||
if data.len() < 2 {
|
||
return 0.0;
|
||
}
|
||
let m = mean(data);
|
||
let variance: f64 = data
|
||
.iter()
|
||
.map(|&v| {
|
||
let d = v as f64 - m;
|
||
d * d
|
||
})
|
||
.sum::<f64>()
|
||
/ (data.len() - 1) as f64;
|
||
variance.sqrt()
|
||
}
|
||
|
||
pub fn p50(data: &[u64]) -> u64 {
|
||
percentile(data, 0.50)
|
||
}
|
||
|
||
pub fn p95(data: &[u64]) -> u64 {
|
||
percentile(data, 0.95)
|
||
}
|
||
|
||
pub fn p99(data: &[u64]) -> u64 {
|
||
percentile(data, 0.99)
|
||
}
|
||
|
||
#[cfg(test)]
|
||
mod tests {
|
||
use super::*;
|
||
|
||
#[test]
|
||
fn test_rss_returns_values() {
|
||
let rss = MetricsCollector::read_rss();
|
||
assert!(
|
||
rss.vm_rss_kb > 0,
|
||
"VmRSS should be non-zero for a running process"
|
||
);
|
||
assert!(
|
||
rss.vm_hwm_kb > 0,
|
||
"VmHWM should be non-zero for a running process"
|
||
);
|
||
}
|
||
|
||
#[test]
|
||
fn test_page_faults_returns_values() {
|
||
let faults = MetricsCollector::read_page_faults();
|
||
assert!(
|
||
faults.minor_faults > 0,
|
||
"Should have some minor page faults"
|
||
);
|
||
}
|
||
|
||
#[test]
|
||
fn test_mean() {
|
||
let data = vec![100, 200, 300, 400, 500];
|
||
let result = mean(&data);
|
||
assert!(
|
||
(result - 300.0).abs() < f64::EPSILON,
|
||
"mean should be 300.0, got {result}"
|
||
);
|
||
}
|
||
|
||
#[test]
|
||
fn test_mean_empty() {
|
||
assert_eq!(mean(&[]), 0.0);
|
||
}
|
||
|
||
#[test]
|
||
fn test_percentile_p50() {
|
||
let data = vec![100, 200, 300, 400, 500];
|
||
assert_eq!(percentile(&data, 0.50), 300);
|
||
}
|
||
|
||
#[test]
|
||
fn test_percentile_p99() {
|
||
let data = vec![10, 20, 30, 40, 50, 60, 70, 80, 90, 100];
|
||
let p99_result = percentile(&data, 0.99);
|
||
assert!(p99_result >= 90, "P99 should be near max, got {p99_result}");
|
||
}
|
||
|
||
#[test]
|
||
fn test_percentile_empty() {
|
||
assert_eq!(percentile(&[], 0.5), 0);
|
||
}
|
||
|
||
#[test]
|
||
fn test_stdev() {
|
||
let data = vec![100, 200, 300, 400, 500];
|
||
let s = stdev(&data);
|
||
assert!(s > 100.0, "stdev should be significant, got {s}");
|
||
assert!(s < 200.0, "stdev should be < 200, got {s}");
|
||
}
|
||
|
||
#[test]
|
||
fn test_stdev_single() {
|
||
assert_eq!(stdev(&[42]), 0.0);
|
||
assert_eq!(stdev(&[]), 0.0);
|
||
}
|
||
|
||
#[test]
|
||
fn test_parse_kb_value() {
|
||
assert_eq!(parse_kb_value("VmRSS: 12345 kB"), 12345);
|
||
assert_eq!(parse_kb_value("VmHWM:\t2048 kB"), 2048);
|
||
assert_eq!(parse_kb_value("VmRSS: 0 kB"), 0);
|
||
}
|
||
|
||
#[test]
|
||
fn test_parse_kb_value_malformed() {
|
||
assert_eq!(parse_kb_value("VmRSS: NaN kB"), 0);
|
||
assert_eq!(parse_kb_value("garbage"), 0);
|
||
}
|
||
|
||
#[test]
|
||
fn test_convenience_percentiles() {
|
||
let data = vec![10, 20, 30, 40, 50];
|
||
assert_eq!(p50(&data), 30);
|
||
assert_eq!(p95(&data), 50);
|
||
assert_eq!(p99(&data), 50);
|
||
}
|
||
|
||
#[test]
|
||
fn test_inode_for_existing_file() {
|
||
let tmp = tempfile::NamedTempFile::new().unwrap();
|
||
let inode = MetricsCollector::get_inode(tmp.path()).unwrap();
|
||
assert!(inode > 0, "inode should be non-zero");
|
||
}
|
||
|
||
#[test]
|
||
fn test_detect_rotation_no_rotation() {
|
||
let tmp = tempfile::NamedTempFile::new().unwrap();
|
||
let inode = MetricsCollector::get_inode(tmp.path()).unwrap();
|
||
assert!(!MetricsCollector::detect_rotation(inode, tmp.path()));
|
||
}
|
||
|
||
#[test]
|
||
fn test_detect_rotation_file_removed() {
|
||
let inode: u64 = 99999;
|
||
let result = MetricsCollector::detect_rotation(inode, Path::new("/no/such/file"));
|
||
assert!(result, "missing file should indicate rotation");
|
||
}
|
||
|
||
#[test]
|
||
fn test_clear_file_cache() {
|
||
let tmp = tempfile::NamedTempFile::new().unwrap();
|
||
let result = MetricsCollector::clear_file_cache(tmp.path());
|
||
assert!(
|
||
result.is_ok(),
|
||
"clear_file_cache should succeed on temp file: {result:?}"
|
||
);
|
||
}
|
||
}
|