From 24fe97a457533c4b3cd20160332dec7bdad5113f Mon Sep 17 00:00:00 2001 From: dailz Date: Wed, 3 Jun 2026 15:08:22 +0800 Subject: [PATCH] fix(io): eliminate TOCTOU race in FileReader open/reload Replace LineIndex::from_reader(BufReader) with LineIndex::from_bytes(&mmap) in both open() and reload(), ensuring the line index is always built from the same mmap snapshot rather than a separate read through the file descriptor. This closes the race window where an external file modification between mmap() and from_reader() could cause line_index offsets to disagree with the mmap data, leading to get_line() returning wrong content or panicking. Closes #2 --- crates/core/src/io/file_reader.rs | 154 +++++++++++++++++++++++++++--- 1 file changed, 143 insertions(+), 11 deletions(-) diff --git a/crates/core/src/io/file_reader.rs b/crates/core/src/io/file_reader.rs index fa1725b..2267bb8 100644 --- a/crates/core/src/io/file_reader.rs +++ b/crates/core/src/io/file_reader.rs @@ -41,12 +41,11 @@ impl FileReader { Some(unsafe { memmap2::Mmap::map(&file) }.map_err(|e| CoreError::Mmap(e.to_string()))?) }; - let line_index = { - let mut reader = std::io::BufReader::new(&file); - LineIndex::from_reader(&mut reader).map_err(|e| CoreError::Io { - source: e, - context: "building line index".into(), - })? + // 直接从 mmap 快照构建行索引,确保索引与数据来自同一内存映射, + // 消除 mmap + BufReader 双读之间的 TOCTOU 竞态窗口。 + let line_index = match &mmap { + Some(m) => LineIndex::from_bytes(m.as_ref()), + None => LineIndex::from_bytes(&[]), }; Ok(Self { @@ -107,11 +106,10 @@ impl FileReader { ); } - let mut reader = std::io::BufReader::new(&file); - self.line_index = LineIndex::from_reader(&mut reader).map_err(|e| CoreError::Io { - source: e, - context: "rebuilding line index on reload".into(), - })?; + self.line_index = match &self.mmap { + Some(m) => LineIndex::from_bytes(m.as_ref()), + None => LineIndex::from_bytes(&[]), + }; Ok(()) } @@ -385,4 +383,138 @@ mod tests { let idx = reader.line_index(); assert_eq!(idx.line_count(), 2); } + + // ─── TOCTOU 修复验证 ───────────────────────────────────────────────── + // open() 和 reload() 现在从 mmap 快照构建索引(from_bytes), + // 而非从独立 BufReader 读取(from_reader)。以下测试验证: + // 1. from_bytes 与 from_reader 产出完全一致的索引 + // 2. 修改文件后 reload 仍返回正确内容(无残留旧状态) + // 3. 极端情况下(空文件、单行、跨块)open+reload 一致性 + + #[test] + fn test_open_from_bytes_matches_from_reader() { + let cases: Vec<&[u8]> = vec![ + b"", + b"hello", + b"hello\n", + b"aaa\nbbb\nccc", + b"\n", + b"a\n\nb", + ]; + + let mut large_cases = Vec::new(); + for &n in &[256usize, 300, 512] { + let mut buf = Vec::new(); + for i in 0..n { + buf.extend_from_slice(format!("line{}\n", i).as_bytes()); + } + large_cases.push(buf); + } + + let all_contents: Vec<&[u8]> = cases + .iter() + .copied() + .chain(large_cases.iter().map(Vec::as_slice)) + .collect(); + + for content in all_contents { + let f = create_temp_file(content); + let reader = FileReader::open(f.path()).unwrap(); + + let from_bytes_idx = LineIndex::from_bytes(content); + let mut buf_reader = std::io::BufReader::new(content); + let from_reader_idx = LineIndex::from_reader(&mut buf_reader).unwrap(); + + assert_eq!( + reader.line_index().total_lines(), + from_bytes_idx.total_lines(), + "open() line_count should match from_bytes for {:?}B file", + content.len() + ); + assert_eq!( + from_bytes_idx.total_lines(), + from_reader_idx.total_lines(), + "from_bytes should match from_reader total_lines for {:?}B file", + content.len() + ); + assert_eq!( + from_bytes_idx.sampled_offsets(), + from_reader_idx.sampled_offsets(), + "from_bytes should match from_reader sampled_offsets for {:?}B file", + content.len() + ); + assert_eq!( + from_bytes_idx.has_trailing_newline(), + from_reader_idx.has_trailing_newline(), + "from_bytes should match from_reader has_trailing_newline for {:?}B file", + content.len() + ); + } + } + + #[test] + fn test_reload_after_external_modify_returns_correct_content() { + let f = create_temp_file(b"aaa\nbbb\n"); + let mut reader = FileReader::open(f.path()).unwrap(); + assert_eq!(reader.get_line(0), Some("aaa")); + assert_eq!(reader.get_line(1), Some("bbb")); + assert_eq!(reader.line_count(), 2); + + // 外部修改:覆盖写入新内容 + { + use std::io::Write; + std::fs::write(f.path(), b"xxx\nyyy\nzzz\n").unwrap(); + } + + reader.reload().unwrap(); + assert_eq!(reader.line_count(), 3); + assert_eq!(reader.get_line(0), Some("xxx")); + assert_eq!(reader.get_line(1), Some("yyy")); + assert_eq!(reader.get_line(2), Some("zzz")); + } + + #[test] + fn test_reload_after_truncate_then_rewrite_no_stale_data() { + let f = create_temp_file(b"line0\nline1\nline2\nline3\n"); + let mut reader = FileReader::open(f.path()).unwrap(); + assert_eq!(reader.line_count(), 4); + + // 截断后写入更短内容 + { + use std::io::Write; + let mut file = std::fs::OpenOptions::new() + .write(true) + .truncate(true) + .open(f.path()) + .unwrap(); + file.write_all(b"new\n").unwrap(); + } + + reader.reload().unwrap(); + assert_eq!(reader.line_count(), 1); + assert_eq!(reader.get_line(0), Some("new")); + assert_eq!(reader.get_line(1), None, "old line should not be accessible"); + } + + #[test] + fn test_open_reload_idempotent_cross_block() { + let mut content = Vec::new(); + for i in 0..600u32 { + content.extend_from_slice(format!("line{}\n", i).as_bytes()); + } + let f = create_temp_file(&content); + + let reader = FileReader::open(f.path()).unwrap(); + let mut reloaded = FileReader::open(f.path()).unwrap(); + reloaded.reload().unwrap(); + + assert_eq!(reader.line_count(), reloaded.line_count()); + for i in 0..600 { + assert_eq!( + reader.get_line(i), + reloaded.get_line(i), + "line {i} mismatch between open and reload" + ); + } + } }