fix: eliminate TOCTOU race in IndexCache::save (closes #5)

spawn_indexer builds LineIndex from mmap snapshot but IndexCache::save()
re-opened the file to compute the hash. If the file changed between those
two steps, the cached index would be stored under the wrong hash.

- Add IndexCache::save_with_hash() that computes hash from in-memory data
- Add compute_data_hash() public function (same algorithm as compute_file_hash)
- Update spawn_indexer, FileReader::save_cache, and test callers
This commit is contained in:
dailz
2026-06-04 13:30:16 +08:00
parent bef0b44e91
commit 1bb6b2e9f3
5 changed files with 66 additions and 5 deletions

View File

@@ -83,9 +83,8 @@ impl FileReader {
}
}
/// Save the line index cache to disk.
pub fn save_cache(&self) -> std::io::Result<()> {
IndexCache::save(&self.path, &self.line_index)?;
IndexCache::save_with_hash(&self.path, &self.line_index, self.data())?;
Ok(())
}

View File

@@ -8,6 +8,42 @@ pub struct IndexCache;
impl IndexCache {
/// Save a `LineIndex` to disk using atomic write (write to .tmp, then rename).
///
/// The file hash is derived from `data` (the same byte slice used to build the index),
/// avoiding TOCTOU issues from re-reading the file from disk.
pub fn save_with_hash(
file_path: &Path,
index: &LineIndex,
data: &[u8],
) -> std::io::Result<()> {
let dest = cache_path(file_path).ok_or_else(|| {
std::io::Error::new(std::io::ErrorKind::NotFound, "cannot determine cache path")
})?;
let file_hash = compute_data_hash(data);
let index_bytes = bincode::serialize(index)
.map_err(|e| std::io::Error::new(std::io::ErrorKind::InvalidData, e.to_string()))?;
let mut buf = Vec::with_capacity(1 + 8 + index_bytes.len());
buf.push(CACHE_VERSION);
buf.extend_from_slice(&file_hash.to_le_bytes());
buf.extend_from_slice(&index_bytes);
let tmp_path = dest.with_extension("index.tmp");
{
let mut f = std::fs::File::create(&tmp_path)?;
f.write_all(&buf)?;
f.sync_all()?;
}
std::fs::rename(&tmp_path, &dest)?;
Ok(())
}
/// Save a `LineIndex` to disk, computing the hash by re-reading the file.
///
/// Prefer [`save_with_hash`] when the source data is already in memory
/// to avoid a TOCTOU race between indexing and hash computation.
pub fn save(file_path: &Path, index: &LineIndex) -> std::io::Result<()> {
let dest = cache_path(file_path).ok_or_else(|| {
std::io::Error::new(std::io::ErrorKind::NotFound, "cannot determine cache path")
@@ -60,6 +96,32 @@ impl IndexCache {
}
}
/// Compute a fingerprint from in-memory data, using the same algorithm as `compute_file_hash`.
/// Returns 0 for empty data.
pub fn compute_data_hash(data: &[u8]) -> u64 {
let len = data.len();
if len == 0 {
return 0;
}
let head_size = 4096.min(len);
let tail_size = 4096.min(len);
let mut hasher_state = xxhash_rust::xxh3::Xxh3::new();
hasher_state.update(&data[..head_size]);
if len > head_size + tail_size {
hasher_state.update(&data[len - tail_size..]);
} else {
// For small files the tail overlaps with the head; hash from the real tail start.
let tail_start = len.saturating_sub(tail_size);
hasher_state.update(&data[tail_start..]);
}
hasher_state.update(&(len as u64).to_le_bytes());
hasher_state.digest()
}
/// Compute a fast fingerprint of the file: xxhash of (head 4KB + tail 4KB + file size).
/// Returns 0 for empty files.
fn compute_file_hash(file_path: &Path) -> std::io::Result<u64> {

View File

@@ -314,7 +314,7 @@ pub fn spawn_indexer(
let line_index = LineIndex::from_bytes(data);
let _ = IndexCache::save(&path, &line_index);
let _ = IndexCache::save_with_hash(&path, &line_index, data);
let reader = FileReader::from_parts(path, mmap, line_index);

View File

@@ -1039,7 +1039,7 @@ mod tests {
fn load_file_ready(app: &mut App, path: &std::path::Path) {
let data = std::fs::read(path).unwrap();
let index = log_viewer_core::io::line_index::LineIndex::from_bytes(&data);
let _ = log_viewer_core::io::index_cache::IndexCache::save(path, &index);
let _ = log_viewer_core::io::index_cache::IndexCache::save_with_hash(path, &index, &data);
app.load_file(path.to_str().unwrap()).unwrap();
}

View File

@@ -469,7 +469,7 @@ mod tests {
let result = std::panic::catch_unwind(|| {
let data = std::fs::read(&path).unwrap();
let index = log_viewer_core::io::line_index::LineIndex::from_bytes(&data);
let _ = log_viewer_core::io::index_cache::IndexCache::save(&path, &index);
let _ = log_viewer_core::io::index_cache::IndexCache::save_with_hash(&path, &index, &data);
let mut app = App::new();
app.load_file(path.to_str().unwrap()).unwrap();