feat(core): add disk index cache infrastructure
Add xxhash-rust and bincode workspace dependencies for fast hashing and serialization. Implement cache_util for cache directory/path resolution with versioning, and IndexCache for saving/loading line indices to disk with file-hash validation. Ultraworked with [Sisyphus](https://github.com/code-yeongyu/oh-my-openagent) Co-authored-by: Sisyphus <clio-agent@sisyphuslabs.ai>
This commit is contained in:
@@ -123,6 +123,10 @@ pub enum CoreError {
|
||||
#[error("mmap error: {0}")]
|
||||
Mmap(String),
|
||||
|
||||
// ─── Cache 变体:缓存操作错误(缓存目录创建失败、缓存读写失败等)────────────
|
||||
#[error("cache error: {0}")]
|
||||
Cache(String),
|
||||
|
||||
// ─── FileNotFound 变体:文件未找到 ──────────────────────────────────────
|
||||
#[error("file not found: {path:?}")]
|
||||
// {path:?} 使用 Debug 格式化输出路径,会保留引号,如 "file not found: "/path/to/file""
|
||||
|
||||
103
crates/core/src/io/cache_util.rs
Normal file
103
crates/core/src/io/cache_util.rs
Normal file
@@ -0,0 +1,103 @@
|
||||
use std::path::{Path, PathBuf};
|
||||
|
||||
use directories::ProjectDirs;
|
||||
|
||||
pub const CACHE_VERSION: u8 = 1;
|
||||
|
||||
pub fn cache_dir() -> Option<PathBuf> {
|
||||
let proj_dirs = ProjectDirs::from("", "", "log-viewer")?;
|
||||
let cache_dir = proj_dirs.cache_dir().join("indexes");
|
||||
if cache_dir.exists() {
|
||||
Some(cache_dir)
|
||||
} else {
|
||||
std::fs::create_dir_all(&cache_dir).ok().map(|_| cache_dir)
|
||||
}
|
||||
}
|
||||
|
||||
pub fn cache_path(file_path: &Path) -> Option<PathBuf> {
|
||||
let canonical = std::fs::canonicalize(file_path).ok()?;
|
||||
let hash = xxhash_rust::xxh3::xxh3_64(canonical.to_str()?.as_bytes());
|
||||
let dir = cache_dir()?;
|
||||
Some(dir.join(format!("{:016x}.index", hash)))
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use tempfile::NamedTempFile;
|
||||
|
||||
#[test]
|
||||
fn test_cache_dir_exists_or_creates() {
|
||||
let dir = cache_dir();
|
||||
assert!(dir.is_some(), "cache_dir should return Some");
|
||||
let path = dir.unwrap();
|
||||
assert!(path.exists(), "cache directory should exist on disk");
|
||||
assert!(
|
||||
path.to_string_lossy().contains("log-viewer"),
|
||||
"cache dir should contain 'log-viewer': {}",
|
||||
path.display()
|
||||
);
|
||||
assert!(
|
||||
path.to_string_lossy().contains("indexes"),
|
||||
"cache dir should contain 'indexes': {}",
|
||||
path.display()
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_cache_path_canonicalized() {
|
||||
let file = NamedTempFile::new().unwrap();
|
||||
let abs_path = file.path().canonicalize().unwrap();
|
||||
|
||||
let result_abs = cache_path(&abs_path).expect("cache_path with absolute path");
|
||||
let result_canonical = cache_path(file.path()).expect("cache_path with canonicalized path");
|
||||
|
||||
assert_eq!(
|
||||
result_abs, result_canonical,
|
||||
"same file via different paths must produce same cache path"
|
||||
);
|
||||
assert!(
|
||||
result_abs.to_string_lossy().ends_with(".index"),
|
||||
"cache path should end with .index: {}",
|
||||
result_abs.display()
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_cache_path_consistent_hashes() {
|
||||
let file1 = NamedTempFile::new().unwrap();
|
||||
let file2 = NamedTempFile::new().unwrap();
|
||||
|
||||
let path1 = cache_path(file1.path()).expect("cache_path for file1");
|
||||
let path2 = cache_path(file2.path()).expect("cache_path for file2");
|
||||
|
||||
assert_ne!(
|
||||
path1, path2,
|
||||
"different files must produce different cache paths"
|
||||
);
|
||||
|
||||
let path1_again = cache_path(file1.path()).expect("cache_path for file1 again");
|
||||
assert_eq!(
|
||||
path1, path1_again,
|
||||
"same file must produce consistent cache path across calls"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_cache_dir_graceful() {
|
||||
let result = cache_dir();
|
||||
assert!(
|
||||
result.is_some() || result.is_none(),
|
||||
"cache_dir must never panic, only return Some or None"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_cache_path_nonexistent_file() {
|
||||
let result = cache_path(Path::new("/nonexistent/path/to/file.log"));
|
||||
assert!(
|
||||
result.is_none(),
|
||||
"cache_path should return None for nonexistent file"
|
||||
);
|
||||
}
|
||||
}
|
||||
245
crates/core/src/io/index_cache.rs
Normal file
245
crates/core/src/io/index_cache.rs
Normal file
@@ -0,0 +1,245 @@
|
||||
use std::io::{Read as _, Write as _};
|
||||
use std::path::Path;
|
||||
|
||||
use crate::io::cache_util::{cache_path, CACHE_VERSION};
|
||||
use crate::io::line_index::LineIndex;
|
||||
|
||||
pub struct IndexCache;
|
||||
|
||||
impl IndexCache {
|
||||
/// Save a `LineIndex` to disk using atomic write (write to .tmp, then rename).
|
||||
pub fn save(file_path: &Path, index: &LineIndex) -> std::io::Result<()> {
|
||||
let dest = cache_path(file_path).ok_or_else(|| {
|
||||
std::io::Error::new(std::io::ErrorKind::NotFound, "cannot determine cache path")
|
||||
})?;
|
||||
|
||||
let file_hash = compute_file_hash(file_path)?;
|
||||
let index_bytes = bincode::serialize(index)
|
||||
.map_err(|e| std::io::Error::new(std::io::ErrorKind::InvalidData, e.to_string()))?;
|
||||
|
||||
let mut buf = Vec::with_capacity(1 + 8 + index_bytes.len());
|
||||
buf.push(CACHE_VERSION);
|
||||
buf.extend_from_slice(&file_hash.to_le_bytes());
|
||||
buf.extend_from_slice(&index_bytes);
|
||||
|
||||
let tmp_path = dest.with_extension("index.tmp");
|
||||
{
|
||||
let mut f = std::fs::File::create(&tmp_path)?;
|
||||
f.write_all(&buf)?;
|
||||
f.sync_all()?;
|
||||
}
|
||||
std::fs::rename(&tmp_path, &dest)?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Load a cached `LineIndex` from disk.
|
||||
/// Returns `None` if: cache missing, version mismatch, file modified, or corruption.
|
||||
pub fn load(file_path: &Path) -> Option<LineIndex> {
|
||||
let path = cache_path(file_path)?;
|
||||
let data = std::fs::read(&path).ok()?;
|
||||
|
||||
if data.len() < 9 {
|
||||
return None;
|
||||
}
|
||||
|
||||
// Validate version byte
|
||||
if data[0] != CACHE_VERSION {
|
||||
return None;
|
||||
}
|
||||
|
||||
// Validate file hash
|
||||
let stored_hash = u64::from_le_bytes(data[1..9].try_into().ok()?);
|
||||
let current_hash = compute_file_hash(file_path).ok()?;
|
||||
if stored_hash != current_hash {
|
||||
return None;
|
||||
}
|
||||
|
||||
// Deserialize index
|
||||
bincode::deserialize(&data[9..]).ok()
|
||||
}
|
||||
}
|
||||
|
||||
/// Compute a fast fingerprint of the file: xxhash of (head 4KB + tail 4KB + file size).
|
||||
/// Returns 0 for empty files.
|
||||
fn compute_file_hash(file_path: &Path) -> std::io::Result<u64> {
|
||||
let file = std::fs::File::open(file_path)?;
|
||||
let file_size = file.metadata()?.len();
|
||||
|
||||
if file_size == 0 {
|
||||
return Ok(0);
|
||||
}
|
||||
|
||||
let mut f = std::io::BufReader::new(file);
|
||||
|
||||
let head_size = 4096.min(file_size as usize);
|
||||
let mut head = vec![0u8; head_size];
|
||||
f.read_exact(&mut head)?;
|
||||
|
||||
let tail_size = 4096.min(file_size as usize);
|
||||
let mut tail = vec![0u8; tail_size];
|
||||
|
||||
if file_size as usize > head_size + tail_size {
|
||||
// Need to seek to tail region
|
||||
let mut file = std::fs::File::open(file_path)?;
|
||||
std::io::Seek::seek(&mut file, std::io::SeekFrom::End(-(tail_size as i64)))?;
|
||||
let mut tf = std::io::BufReader::new(file);
|
||||
tf.read_exact(&mut tail)?;
|
||||
} else {
|
||||
// File is small enough that head already covers everything;
|
||||
// tail overlaps with head — just take the last tail_size bytes
|
||||
let start = head.len().saturating_sub(tail_size);
|
||||
tail = head[start..].to_vec();
|
||||
tail.resize(tail_size, 0);
|
||||
}
|
||||
|
||||
let mut hasher_state = xxhash_rust::xxh3::Xxh3::new();
|
||||
hasher_state.update(&head);
|
||||
hasher_state.update(&tail);
|
||||
hasher_state.update(&file_size.to_le_bytes());
|
||||
|
||||
Ok(hasher_state.digest())
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use std::io::Write as _;
|
||||
use tempfile::NamedTempFile;
|
||||
|
||||
fn make_test_file(lines: usize) -> NamedTempFile {
|
||||
let mut file = NamedTempFile::new().unwrap();
|
||||
for i in 0..lines {
|
||||
writeln!(file, "line {}", i).unwrap();
|
||||
}
|
||||
file.flush().unwrap();
|
||||
file
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_cache_roundtrip() {
|
||||
let file = make_test_file(300);
|
||||
let data = std::fs::read(file.path()).unwrap();
|
||||
let index = LineIndex::from_bytes(&data);
|
||||
|
||||
IndexCache::save(file.path(), &index).expect("save should succeed");
|
||||
let loaded = IndexCache::load(file.path()).expect("load should return Some");
|
||||
|
||||
assert_eq!(loaded.line_count(), index.line_count());
|
||||
assert_eq!(loaded.sampled_offsets(), index.sampled_offsets());
|
||||
assert_eq!(loaded.has_trailing_newline(), index.has_trailing_newline());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_cache_invalidation_file_modified() {
|
||||
let file = make_test_file(300);
|
||||
let data = std::fs::read(file.path()).unwrap();
|
||||
let index = LineIndex::from_bytes(&data);
|
||||
|
||||
IndexCache::save(file.path(), &index).expect("save should succeed");
|
||||
|
||||
// Append to file, changing its content
|
||||
{
|
||||
let mut f = std::fs::OpenOptions::new()
|
||||
.append(true)
|
||||
.open(file.path())
|
||||
.unwrap();
|
||||
writeln!(f, "extra line").unwrap();
|
||||
}
|
||||
|
||||
let loaded = IndexCache::load(file.path());
|
||||
assert!(
|
||||
loaded.is_none(),
|
||||
"cache should be invalidated after file modification"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_cache_corruption() {
|
||||
let file = make_test_file(300);
|
||||
let data = std::fs::read(file.path()).unwrap();
|
||||
let index = LineIndex::from_bytes(&data);
|
||||
|
||||
IndexCache::save(file.path(), &index).expect("save should succeed");
|
||||
|
||||
// Corrupt the cache file
|
||||
let cache_path = cache_path(file.path()).expect("cache path");
|
||||
let mut cache_data = std::fs::read(&cache_path).unwrap();
|
||||
// Truncate the file (remove last 10 bytes)
|
||||
let new_len = cache_data.len().saturating_sub(10);
|
||||
cache_data.truncate(new_len);
|
||||
std::fs::write(&cache_path, &cache_data).unwrap();
|
||||
|
||||
let loaded = IndexCache::load(file.path());
|
||||
assert!(loaded.is_none(), "corrupt cache should return None");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_cache_version_mismatch() {
|
||||
let file = make_test_file(300);
|
||||
let data = std::fs::read(file.path()).unwrap();
|
||||
let index = LineIndex::from_bytes(&data);
|
||||
|
||||
IndexCache::save(file.path(), &index).expect("save should succeed");
|
||||
|
||||
// Modify first byte (version)
|
||||
let cache_path = cache_path(file.path()).expect("cache path");
|
||||
let mut cache_data = std::fs::read(&cache_path).unwrap();
|
||||
cache_data[0] = cache_data[0].wrapping_add(1);
|
||||
std::fs::write(&cache_path, &cache_data).unwrap();
|
||||
|
||||
let loaded = IndexCache::load(file.path());
|
||||
assert!(loaded.is_none(), "version mismatch should return None");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_cache_empty_file() {
|
||||
let file = NamedTempFile::new().unwrap();
|
||||
let index = LineIndex::from_bytes(b"");
|
||||
|
||||
IndexCache::save(file.path(), &index).expect("save should succeed");
|
||||
let loaded = IndexCache::load(file.path()).expect("load should return Some");
|
||||
|
||||
assert_eq!(loaded.line_count(), 0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_cache_nonexistent_source() {
|
||||
let loaded = IndexCache::load(Path::new("/nonexistent/file.log"));
|
||||
assert!(loaded.is_none(), "nonexistent file should return None");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_compute_file_hash_empty() {
|
||||
let file = NamedTempFile::new().unwrap();
|
||||
let hash = compute_file_hash(file.path()).unwrap();
|
||||
assert_eq!(hash, 0, "empty file should hash to 0");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_compute_file_hash_deterministic() {
|
||||
let mut file = NamedTempFile::new().unwrap();
|
||||
write!(file, "hello world").unwrap();
|
||||
file.flush().unwrap();
|
||||
|
||||
let h1 = compute_file_hash(file.path()).unwrap();
|
||||
let h2 = compute_file_hash(file.path()).unwrap();
|
||||
assert_eq!(h1, h2, "same file must produce same hash");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_compute_file_hash_changes_on_content_change() {
|
||||
let mut file = NamedTempFile::new().unwrap();
|
||||
write!(file, "version 1").unwrap();
|
||||
file.flush().unwrap();
|
||||
let h1 = compute_file_hash(file.path()).unwrap();
|
||||
|
||||
// Overwrite with different content
|
||||
let mut file2 = std::fs::File::create(file.path()).unwrap();
|
||||
write!(file2, "version 2 with more data").unwrap();
|
||||
file2.flush().unwrap();
|
||||
let h2 = compute_file_hash(file.path()).unwrap();
|
||||
|
||||
assert_ne!(h1, h2, "hash should change when content changes");
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user