Reuse the existing count_existing_lines() (reader.lines().count()) instead of a manual read_until loop, eliminating duplicate line-counting logic in data_gen.rs. Closes #40
219 lines
6.4 KiB
Rust
219 lines
6.4 KiB
Rust
use std::fs;
|
||
use std::io::{BufRead, BufReader, BufWriter, Write};
|
||
use std::path::{Path, PathBuf};
|
||
|
||
pub struct TestFileInfo {
|
||
pub path: PathBuf,
|
||
pub size_bytes: u64,
|
||
pub line_count: u64,
|
||
pub avg_line_length: f64,
|
||
}
|
||
|
||
/// Check if test file exists and return its info, or generate it
|
||
pub fn ensure_test_file(path: &Path) -> std::io::Result<TestFileInfo> {
|
||
if path.exists() {
|
||
return get_file_info(path);
|
||
}
|
||
generate_test_file(path)
|
||
}
|
||
|
||
/// Get info about an existing test file
|
||
fn get_file_info(path: &Path) -> std::io::Result<TestFileInfo> {
|
||
let metadata = fs::metadata(path)?;
|
||
let size_bytes = metadata.len();
|
||
let line_count = count_existing_lines(path)?;
|
||
|
||
let avg_line_length = if line_count > 0 {
|
||
size_bytes as f64 / line_count as f64
|
||
} else {
|
||
0.0
|
||
};
|
||
|
||
Ok(TestFileInfo {
|
||
path: path.to_path_buf(),
|
||
size_bytes,
|
||
line_count,
|
||
avg_line_length,
|
||
})
|
||
}
|
||
|
||
/// Generate a large test file (~5GB / ~74M lines) if it doesn't exist
|
||
fn generate_test_file(path: &Path) -> std::io::Result<TestFileInfo> {
|
||
if let Some(parent) = path.parent() {
|
||
fs::create_dir_all(parent)?;
|
||
}
|
||
|
||
let mut file = BufWriter::with_capacity(64 * 1024, fs::File::create(path)?);
|
||
let target_lines: u64 = 74_000_000;
|
||
for i in 0..target_lines {
|
||
writeln!(
|
||
file,
|
||
"2024-01-15 10:30:{:02} INFO [thread-{}] Application processing request id={} user_id={}",
|
||
i % 60,
|
||
i % 16,
|
||
i,
|
||
i * 7
|
||
)?;
|
||
}
|
||
file.flush()?;
|
||
drop(file);
|
||
|
||
get_file_info(path)
|
||
}
|
||
|
||
/// Generate a smaller file (~10MB / ~150K lines) for growth/rotation tests
|
||
pub fn generate_growable_file(dir: &Path) -> std::io::Result<PathBuf> {
|
||
fs::create_dir_all(dir)?;
|
||
let path = dir.join("growable.log");
|
||
|
||
let mut file = BufWriter::with_capacity(64 * 1024, fs::File::create(&path)?);
|
||
for i in 0..150_000u64 {
|
||
writeln!(
|
||
file,
|
||
"2024-01-15 10:30:{:02} INFO [thread-{}] Appending test line {}",
|
||
i % 60,
|
||
i % 16,
|
||
i
|
||
)?;
|
||
}
|
||
file.flush()?;
|
||
|
||
Ok(path)
|
||
}
|
||
|
||
/// Append `count` lines to the file
|
||
pub fn append_lines(path: &Path, count: usize) -> std::io::Result<()> {
|
||
let mut file = BufWriter::with_capacity(
|
||
64 * 1024,
|
||
fs::OpenOptions::new().append(true).open(path)?,
|
||
);
|
||
let existing_lines = count_existing_lines(path).unwrap_or(0);
|
||
for i in 0..count {
|
||
writeln!(
|
||
file,
|
||
"2024-01-15 10:30:00 INFO Appended line {}",
|
||
existing_lines + i as u64
|
||
)?;
|
||
}
|
||
file.flush()?;
|
||
Ok(())
|
||
}
|
||
|
||
/// Truncate file to specified size
|
||
pub fn truncate_file(path: &Path, size: u64) -> std::io::Result<()> {
|
||
let file = fs::OpenOptions::new().write(true).open(path)?;
|
||
file.set_len(size)
|
||
}
|
||
|
||
/// Rotate file: rename existing file, create new empty file
|
||
pub fn rotate_file(path: &Path) -> std::io::Result<PathBuf> {
|
||
let rotated = path.with_extension("log.1");
|
||
fs::rename(path, &rotated)?;
|
||
fs::File::create(path)?;
|
||
Ok(rotated)
|
||
}
|
||
|
||
/// Count lines in a file (helper)
|
||
fn count_existing_lines(path: &Path) -> std::io::Result<u64> {
|
||
let file = fs::File::open(path)?;
|
||
let reader = BufReader::new(file);
|
||
Ok(reader.lines().count() as u64)
|
||
}
|
||
|
||
#[cfg(test)]
|
||
mod tests {
|
||
use super::*;
|
||
|
||
#[test]
|
||
fn test_generate_growable_file_creates_approximately_correct_size() {
|
||
let dir = tempfile::tempdir().unwrap();
|
||
let path = generate_growable_file(dir.path()).unwrap();
|
||
|
||
assert!(path.exists());
|
||
let metadata = fs::metadata(&path).unwrap();
|
||
let size_mb = metadata.len() as f64 / (1024.0 * 1024.0);
|
||
// ~150K lines × ~67 bytes ≈ ~10MB; allow 5MB–15MB range
|
||
assert!(
|
||
(5.0..=15.0).contains(&size_mb),
|
||
"Expected ~10MB, got {size_mb:.1}MB"
|
||
);
|
||
}
|
||
|
||
#[test]
|
||
fn test_append_lines_increases_line_count() {
|
||
let dir = tempfile::tempdir().unwrap();
|
||
let path = {
|
||
let mut f = fs::File::create(dir.path().join("test.log")).unwrap();
|
||
for i in 0..10u64 {
|
||
writeln!(f, "line {i}").unwrap();
|
||
}
|
||
dir.path().join("test.log")
|
||
};
|
||
|
||
let before = count_existing_lines(&path).unwrap();
|
||
append_lines(&path, 5).unwrap();
|
||
let after = count_existing_lines(&path).unwrap();
|
||
assert_eq!(after, before + 5);
|
||
}
|
||
|
||
#[test]
|
||
fn test_truncate_file_reduces_size() {
|
||
let dir = tempfile::tempdir().unwrap();
|
||
let path = dir.path().join("trunc.log");
|
||
{
|
||
let mut f = fs::File::create(&path).unwrap();
|
||
write!(f, "{}", "A".repeat(1024)).unwrap();
|
||
}
|
||
|
||
let before = fs::metadata(&path).unwrap().len();
|
||
assert_eq!(before, 1024);
|
||
|
||
truncate_file(&path, 512).unwrap();
|
||
let after = fs::metadata(&path).unwrap().len();
|
||
assert_eq!(after, 512);
|
||
}
|
||
|
||
#[test]
|
||
fn test_rotate_file_renames_and_creates_empty() {
|
||
let dir = tempfile::tempdir().unwrap();
|
||
let path = dir.path().join("rotate.log");
|
||
{
|
||
let mut f = fs::File::create(&path).unwrap();
|
||
write!(f, "original content").unwrap();
|
||
}
|
||
|
||
let rotated = rotate_file(&path).unwrap();
|
||
|
||
// Rotated file has the old content
|
||
assert!(rotated.exists());
|
||
assert_eq!(fs::read_to_string(&rotated).unwrap(), "original content");
|
||
|
||
// New file is empty
|
||
assert!(path.exists());
|
||
assert_eq!(fs::metadata(&path).unwrap().len(), 0);
|
||
}
|
||
|
||
#[test]
|
||
fn test_ensure_test_file_generates_when_missing() {
|
||
let dir = tempfile::tempdir().unwrap();
|
||
let path = dir.path().join("fresh.log");
|
||
assert!(!path.exists());
|
||
|
||
// Override the generator to use a small file for test speed:
|
||
// We'll test ensure_test_file indirectly by checking it calls generate_test_file.
|
||
// Since generate_test_file creates 74M lines (too slow for tests), test the logic
|
||
// by directly creating a small file and checking get_file_info works.
|
||
{
|
||
let mut f = fs::File::create(&path).unwrap();
|
||
for i in 0..100u64 {
|
||
writeln!(f, "2024-01-15 10:30:00 INFO line {i}").unwrap();
|
||
}
|
||
}
|
||
|
||
let info = ensure_test_file(&path).unwrap();
|
||
assert_eq!(info.line_count, 100);
|
||
assert!(info.size_bytes > 0);
|
||
assert!(info.avg_line_length > 0.0);
|
||
}
|
||
}
|