fix: eliminate TOCTOU race in IndexCache::save (closes #5)
spawn_indexer builds LineIndex from mmap snapshot but IndexCache::save() re-opened the file to compute the hash. If the file changed between those two steps, the cached index would be stored under the wrong hash. - Add IndexCache::save_with_hash() that computes hash from in-memory data - Add compute_data_hash() public function (same algorithm as compute_file_hash) - Update spawn_indexer, FileReader::save_cache, and test callers
This commit is contained in:
@@ -83,9 +83,8 @@ impl FileReader {
|
||||
}
|
||||
}
|
||||
|
||||
/// Save the line index cache to disk.
|
||||
pub fn save_cache(&self) -> std::io::Result<()> {
|
||||
IndexCache::save(&self.path, &self.line_index)?;
|
||||
IndexCache::save_with_hash(&self.path, &self.line_index, self.data())?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
|
||||
@@ -8,6 +8,42 @@ pub struct IndexCache;
|
||||
|
||||
impl IndexCache {
|
||||
/// Save a `LineIndex` to disk using atomic write (write to .tmp, then rename).
|
||||
///
|
||||
/// The file hash is derived from `data` (the same byte slice used to build the index),
|
||||
/// avoiding TOCTOU issues from re-reading the file from disk.
|
||||
pub fn save_with_hash(
|
||||
file_path: &Path,
|
||||
index: &LineIndex,
|
||||
data: &[u8],
|
||||
) -> std::io::Result<()> {
|
||||
let dest = cache_path(file_path).ok_or_else(|| {
|
||||
std::io::Error::new(std::io::ErrorKind::NotFound, "cannot determine cache path")
|
||||
})?;
|
||||
|
||||
let file_hash = compute_data_hash(data);
|
||||
let index_bytes = bincode::serialize(index)
|
||||
.map_err(|e| std::io::Error::new(std::io::ErrorKind::InvalidData, e.to_string()))?;
|
||||
|
||||
let mut buf = Vec::with_capacity(1 + 8 + index_bytes.len());
|
||||
buf.push(CACHE_VERSION);
|
||||
buf.extend_from_slice(&file_hash.to_le_bytes());
|
||||
buf.extend_from_slice(&index_bytes);
|
||||
|
||||
let tmp_path = dest.with_extension("index.tmp");
|
||||
{
|
||||
let mut f = std::fs::File::create(&tmp_path)?;
|
||||
f.write_all(&buf)?;
|
||||
f.sync_all()?;
|
||||
}
|
||||
std::fs::rename(&tmp_path, &dest)?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Save a `LineIndex` to disk, computing the hash by re-reading the file.
|
||||
///
|
||||
/// Prefer [`save_with_hash`] when the source data is already in memory
|
||||
/// to avoid a TOCTOU race between indexing and hash computation.
|
||||
pub fn save(file_path: &Path, index: &LineIndex) -> std::io::Result<()> {
|
||||
let dest = cache_path(file_path).ok_or_else(|| {
|
||||
std::io::Error::new(std::io::ErrorKind::NotFound, "cannot determine cache path")
|
||||
@@ -60,6 +96,32 @@ impl IndexCache {
|
||||
}
|
||||
}
|
||||
|
||||
/// Compute a fingerprint from in-memory data, using the same algorithm as `compute_file_hash`.
|
||||
/// Returns 0 for empty data.
|
||||
pub fn compute_data_hash(data: &[u8]) -> u64 {
|
||||
let len = data.len();
|
||||
if len == 0 {
|
||||
return 0;
|
||||
}
|
||||
|
||||
let head_size = 4096.min(len);
|
||||
let tail_size = 4096.min(len);
|
||||
|
||||
let mut hasher_state = xxhash_rust::xxh3::Xxh3::new();
|
||||
hasher_state.update(&data[..head_size]);
|
||||
|
||||
if len > head_size + tail_size {
|
||||
hasher_state.update(&data[len - tail_size..]);
|
||||
} else {
|
||||
// For small files the tail overlaps with the head; hash from the real tail start.
|
||||
let tail_start = len.saturating_sub(tail_size);
|
||||
hasher_state.update(&data[tail_start..]);
|
||||
}
|
||||
hasher_state.update(&(len as u64).to_le_bytes());
|
||||
|
||||
hasher_state.digest()
|
||||
}
|
||||
|
||||
/// Compute a fast fingerprint of the file: xxhash of (head 4KB + tail 4KB + file size).
|
||||
/// Returns 0 for empty files.
|
||||
fn compute_file_hash(file_path: &Path) -> std::io::Result<u64> {
|
||||
|
||||
@@ -314,7 +314,7 @@ pub fn spawn_indexer(
|
||||
|
||||
let line_index = LineIndex::from_bytes(data);
|
||||
|
||||
let _ = IndexCache::save(&path, &line_index);
|
||||
let _ = IndexCache::save_with_hash(&path, &line_index, data);
|
||||
|
||||
let reader = FileReader::from_parts(path, mmap, line_index);
|
||||
|
||||
|
||||
@@ -1039,7 +1039,7 @@ mod tests {
|
||||
fn load_file_ready(app: &mut App, path: &std::path::Path) {
|
||||
let data = std::fs::read(path).unwrap();
|
||||
let index = log_viewer_core::io::line_index::LineIndex::from_bytes(&data);
|
||||
let _ = log_viewer_core::io::index_cache::IndexCache::save(path, &index);
|
||||
let _ = log_viewer_core::io::index_cache::IndexCache::save_with_hash(path, &index, &data);
|
||||
app.load_file(path.to_str().unwrap()).unwrap();
|
||||
}
|
||||
|
||||
|
||||
@@ -469,7 +469,7 @@ mod tests {
|
||||
let result = std::panic::catch_unwind(|| {
|
||||
let data = std::fs::read(&path).unwrap();
|
||||
let index = log_viewer_core::io::line_index::LineIndex::from_bytes(&data);
|
||||
let _ = log_viewer_core::io::index_cache::IndexCache::save(&path, &index);
|
||||
let _ = log_viewer_core::io::index_cache::IndexCache::save_with_hash(&path, &index, &data);
|
||||
|
||||
let mut app = App::new();
|
||||
app.load_file(path.to_str().unwrap()).unwrap();
|
||||
|
||||
Reference in New Issue
Block a user