diff --git a/crates/core/src/io/file_reader.rs b/crates/core/src/io/file_reader.rs index 2267bb8..00fca93 100644 --- a/crates/core/src/io/file_reader.rs +++ b/crates/core/src/io/file_reader.rs @@ -83,9 +83,8 @@ impl FileReader { } } - /// Save the line index cache to disk. pub fn save_cache(&self) -> std::io::Result<()> { - IndexCache::save(&self.path, &self.line_index)?; + IndexCache::save_with_hash(&self.path, &self.line_index, self.data())?; Ok(()) } diff --git a/crates/core/src/io/index_cache.rs b/crates/core/src/io/index_cache.rs index fdad8e8..b5e1455 100644 --- a/crates/core/src/io/index_cache.rs +++ b/crates/core/src/io/index_cache.rs @@ -8,6 +8,42 @@ pub struct IndexCache; impl IndexCache { /// Save a `LineIndex` to disk using atomic write (write to .tmp, then rename). + /// + /// The file hash is derived from `data` (the same byte slice used to build the index), + /// avoiding TOCTOU issues from re-reading the file from disk. + pub fn save_with_hash( + file_path: &Path, + index: &LineIndex, + data: &[u8], + ) -> std::io::Result<()> { + let dest = cache_path(file_path).ok_or_else(|| { + std::io::Error::new(std::io::ErrorKind::NotFound, "cannot determine cache path") + })?; + + let file_hash = compute_data_hash(data); + let index_bytes = bincode::serialize(index) + .map_err(|e| std::io::Error::new(std::io::ErrorKind::InvalidData, e.to_string()))?; + + let mut buf = Vec::with_capacity(1 + 8 + index_bytes.len()); + buf.push(CACHE_VERSION); + buf.extend_from_slice(&file_hash.to_le_bytes()); + buf.extend_from_slice(&index_bytes); + + let tmp_path = dest.with_extension("index.tmp"); + { + let mut f = std::fs::File::create(&tmp_path)?; + f.write_all(&buf)?; + f.sync_all()?; + } + std::fs::rename(&tmp_path, &dest)?; + + Ok(()) + } + + /// Save a `LineIndex` to disk, computing the hash by re-reading the file. + /// + /// Prefer [`save_with_hash`] when the source data is already in memory + /// to avoid a TOCTOU race between indexing and hash computation. pub fn save(file_path: &Path, index: &LineIndex) -> std::io::Result<()> { let dest = cache_path(file_path).ok_or_else(|| { std::io::Error::new(std::io::ErrorKind::NotFound, "cannot determine cache path") @@ -60,6 +96,32 @@ impl IndexCache { } } +/// Compute a fingerprint from in-memory data, using the same algorithm as `compute_file_hash`. +/// Returns 0 for empty data. +pub fn compute_data_hash(data: &[u8]) -> u64 { + let len = data.len(); + if len == 0 { + return 0; + } + + let head_size = 4096.min(len); + let tail_size = 4096.min(len); + + let mut hasher_state = xxhash_rust::xxh3::Xxh3::new(); + hasher_state.update(&data[..head_size]); + + if len > head_size + tail_size { + hasher_state.update(&data[len - tail_size..]); + } else { + // For small files the tail overlaps with the head; hash from the real tail start. + let tail_start = len.saturating_sub(tail_size); + hasher_state.update(&data[tail_start..]); + } + hasher_state.update(&(len as u64).to_le_bytes()); + + hasher_state.digest() +} + /// Compute a fast fingerprint of the file: xxhash of (head 4KB + tail 4KB + file size). /// Returns 0 for empty files. fn compute_file_hash(file_path: &Path) -> std::io::Result { diff --git a/crates/core/src/io/progressive_reader.rs b/crates/core/src/io/progressive_reader.rs index bbb9dcf..49adc02 100644 --- a/crates/core/src/io/progressive_reader.rs +++ b/crates/core/src/io/progressive_reader.rs @@ -314,7 +314,7 @@ pub fn spawn_indexer( let line_index = LineIndex::from_bytes(data); - let _ = IndexCache::save(&path, &line_index); + let _ = IndexCache::save_with_hash(&path, &line_index, data); let reader = FileReader::from_parts(path, mmap, line_index); diff --git a/crates/tui/src/app.rs b/crates/tui/src/app.rs index e953556..b96a605 100644 --- a/crates/tui/src/app.rs +++ b/crates/tui/src/app.rs @@ -1039,7 +1039,7 @@ mod tests { fn load_file_ready(app: &mut App, path: &std::path::Path) { let data = std::fs::read(path).unwrap(); let index = log_viewer_core::io::line_index::LineIndex::from_bytes(&data); - let _ = log_viewer_core::io::index_cache::IndexCache::save(path, &index); + let _ = log_viewer_core::io::index_cache::IndexCache::save_with_hash(path, &index, &data); app.load_file(path.to_str().unwrap()).unwrap(); } diff --git a/crates/tui/src/ui.rs b/crates/tui/src/ui.rs index ce1d0a2..6c57748 100644 --- a/crates/tui/src/ui.rs +++ b/crates/tui/src/ui.rs @@ -469,7 +469,7 @@ mod tests { let result = std::panic::catch_unwind(|| { let data = std::fs::read(&path).unwrap(); let index = log_viewer_core::io::line_index::LineIndex::from_bytes(&data); - let _ = log_viewer_core::io::index_cache::IndexCache::save(&path, &index); + let _ = log_viewer_core::io::index_cache::IndexCache::save_with_hash(&path, &index, &data); let mut app = App::new(); app.load_file(path.to_str().unwrap()).unwrap();