fix(io): eliminate TOCTOU race in FileReader open/reload

Replace LineIndex::from_reader(BufReader) with LineIndex::from_bytes(&mmap)
in both open() and reload(), ensuring the line index is always built from
the same mmap snapshot rather than a separate read through the file descriptor.

This closes the race window where an external file modification between
mmap() and from_reader() could cause line_index offsets to disagree with
the mmap data, leading to get_line() returning wrong content or panicking.

Closes #2
This commit is contained in:
dailz
2026-06-03 15:08:22 +08:00
parent b6e655bff6
commit 24fe97a457

View File

@@ -41,12 +41,11 @@ impl FileReader {
Some(unsafe { memmap2::Mmap::map(&file) }.map_err(|e| CoreError::Mmap(e.to_string()))?)
};
let line_index = {
let mut reader = std::io::BufReader::new(&file);
LineIndex::from_reader(&mut reader).map_err(|e| CoreError::Io {
source: e,
context: "building line index".into(),
})?
// 直接从 mmap 快照构建行索引,确保索引与数据来自同一内存映射,
// 消除 mmap + BufReader 双读之间的 TOCTOU 竞态窗口。
let line_index = match &mmap {
Some(m) => LineIndex::from_bytes(m.as_ref()),
None => LineIndex::from_bytes(&[]),
};
Ok(Self {
@@ -107,11 +106,10 @@ impl FileReader {
);
}
let mut reader = std::io::BufReader::new(&file);
self.line_index = LineIndex::from_reader(&mut reader).map_err(|e| CoreError::Io {
source: e,
context: "rebuilding line index on reload".into(),
})?;
self.line_index = match &self.mmap {
Some(m) => LineIndex::from_bytes(m.as_ref()),
None => LineIndex::from_bytes(&[]),
};
Ok(())
}
@@ -385,4 +383,138 @@ mod tests {
let idx = reader.line_index();
assert_eq!(idx.line_count(), 2);
}
// ─── TOCTOU 修复验证 ─────────────────────────────────────────────────
// open() 和 reload() 现在从 mmap 快照构建索引from_bytes
// 而非从独立 BufReader 读取from_reader。以下测试验证
// 1. from_bytes 与 from_reader 产出完全一致的索引
// 2. 修改文件后 reload 仍返回正确内容(无残留旧状态)
// 3. 极端情况下空文件、单行、跨块open+reload 一致性
#[test]
fn test_open_from_bytes_matches_from_reader() {
let cases: Vec<&[u8]> = vec![
b"",
b"hello",
b"hello\n",
b"aaa\nbbb\nccc",
b"\n",
b"a\n\nb",
];
let mut large_cases = Vec::new();
for &n in &[256usize, 300, 512] {
let mut buf = Vec::new();
for i in 0..n {
buf.extend_from_slice(format!("line{}\n", i).as_bytes());
}
large_cases.push(buf);
}
let all_contents: Vec<&[u8]> = cases
.iter()
.copied()
.chain(large_cases.iter().map(Vec::as_slice))
.collect();
for content in all_contents {
let f = create_temp_file(content);
let reader = FileReader::open(f.path()).unwrap();
let from_bytes_idx = LineIndex::from_bytes(content);
let mut buf_reader = std::io::BufReader::new(content);
let from_reader_idx = LineIndex::from_reader(&mut buf_reader).unwrap();
assert_eq!(
reader.line_index().total_lines(),
from_bytes_idx.total_lines(),
"open() line_count should match from_bytes for {:?}B file",
content.len()
);
assert_eq!(
from_bytes_idx.total_lines(),
from_reader_idx.total_lines(),
"from_bytes should match from_reader total_lines for {:?}B file",
content.len()
);
assert_eq!(
from_bytes_idx.sampled_offsets(),
from_reader_idx.sampled_offsets(),
"from_bytes should match from_reader sampled_offsets for {:?}B file",
content.len()
);
assert_eq!(
from_bytes_idx.has_trailing_newline(),
from_reader_idx.has_trailing_newline(),
"from_bytes should match from_reader has_trailing_newline for {:?}B file",
content.len()
);
}
}
#[test]
fn test_reload_after_external_modify_returns_correct_content() {
let f = create_temp_file(b"aaa\nbbb\n");
let mut reader = FileReader::open(f.path()).unwrap();
assert_eq!(reader.get_line(0), Some("aaa"));
assert_eq!(reader.get_line(1), Some("bbb"));
assert_eq!(reader.line_count(), 2);
// 外部修改:覆盖写入新内容
{
use std::io::Write;
std::fs::write(f.path(), b"xxx\nyyy\nzzz\n").unwrap();
}
reader.reload().unwrap();
assert_eq!(reader.line_count(), 3);
assert_eq!(reader.get_line(0), Some("xxx"));
assert_eq!(reader.get_line(1), Some("yyy"));
assert_eq!(reader.get_line(2), Some("zzz"));
}
#[test]
fn test_reload_after_truncate_then_rewrite_no_stale_data() {
let f = create_temp_file(b"line0\nline1\nline2\nline3\n");
let mut reader = FileReader::open(f.path()).unwrap();
assert_eq!(reader.line_count(), 4);
// 截断后写入更短内容
{
use std::io::Write;
let mut file = std::fs::OpenOptions::new()
.write(true)
.truncate(true)
.open(f.path())
.unwrap();
file.write_all(b"new\n").unwrap();
}
reader.reload().unwrap();
assert_eq!(reader.line_count(), 1);
assert_eq!(reader.get_line(0), Some("new"));
assert_eq!(reader.get_line(1), None, "old line should not be accessible");
}
#[test]
fn test_open_reload_idempotent_cross_block() {
let mut content = Vec::new();
for i in 0..600u32 {
content.extend_from_slice(format!("line{}\n", i).as_bytes());
}
let f = create_temp_file(&content);
let reader = FileReader::open(f.path()).unwrap();
let mut reloaded = FileReader::open(f.path()).unwrap();
reloaded.reload().unwrap();
assert_eq!(reader.line_count(), reloaded.line_count());
for i in 0..600 {
assert_eq!(
reader.get_line(i),
reloaded.get_line(i),
"line {i} mismatch between open and reload"
);
}
}
}