fix(io): eliminate TOCTOU race in FileReader open/reload
Replace LineIndex::from_reader(BufReader) with LineIndex::from_bytes(&mmap) in both open() and reload(), ensuring the line index is always built from the same mmap snapshot rather than a separate read through the file descriptor. This closes the race window where an external file modification between mmap() and from_reader() could cause line_index offsets to disagree with the mmap data, leading to get_line() returning wrong content or panicking. Closes #2
This commit is contained in:
@@ -41,12 +41,11 @@ impl FileReader {
|
||||
Some(unsafe { memmap2::Mmap::map(&file) }.map_err(|e| CoreError::Mmap(e.to_string()))?)
|
||||
};
|
||||
|
||||
let line_index = {
|
||||
let mut reader = std::io::BufReader::new(&file);
|
||||
LineIndex::from_reader(&mut reader).map_err(|e| CoreError::Io {
|
||||
source: e,
|
||||
context: "building line index".into(),
|
||||
})?
|
||||
// 直接从 mmap 快照构建行索引,确保索引与数据来自同一内存映射,
|
||||
// 消除 mmap + BufReader 双读之间的 TOCTOU 竞态窗口。
|
||||
let line_index = match &mmap {
|
||||
Some(m) => LineIndex::from_bytes(m.as_ref()),
|
||||
None => LineIndex::from_bytes(&[]),
|
||||
};
|
||||
|
||||
Ok(Self {
|
||||
@@ -107,11 +106,10 @@ impl FileReader {
|
||||
);
|
||||
}
|
||||
|
||||
let mut reader = std::io::BufReader::new(&file);
|
||||
self.line_index = LineIndex::from_reader(&mut reader).map_err(|e| CoreError::Io {
|
||||
source: e,
|
||||
context: "rebuilding line index on reload".into(),
|
||||
})?;
|
||||
self.line_index = match &self.mmap {
|
||||
Some(m) => LineIndex::from_bytes(m.as_ref()),
|
||||
None => LineIndex::from_bytes(&[]),
|
||||
};
|
||||
|
||||
Ok(())
|
||||
}
|
||||
@@ -385,4 +383,138 @@ mod tests {
|
||||
let idx = reader.line_index();
|
||||
assert_eq!(idx.line_count(), 2);
|
||||
}
|
||||
|
||||
// ─── TOCTOU 修复验证 ─────────────────────────────────────────────────
|
||||
// open() 和 reload() 现在从 mmap 快照构建索引(from_bytes),
|
||||
// 而非从独立 BufReader 读取(from_reader)。以下测试验证:
|
||||
// 1. from_bytes 与 from_reader 产出完全一致的索引
|
||||
// 2. 修改文件后 reload 仍返回正确内容(无残留旧状态)
|
||||
// 3. 极端情况下(空文件、单行、跨块)open+reload 一致性
|
||||
|
||||
#[test]
|
||||
fn test_open_from_bytes_matches_from_reader() {
|
||||
let cases: Vec<&[u8]> = vec![
|
||||
b"",
|
||||
b"hello",
|
||||
b"hello\n",
|
||||
b"aaa\nbbb\nccc",
|
||||
b"\n",
|
||||
b"a\n\nb",
|
||||
];
|
||||
|
||||
let mut large_cases = Vec::new();
|
||||
for &n in &[256usize, 300, 512] {
|
||||
let mut buf = Vec::new();
|
||||
for i in 0..n {
|
||||
buf.extend_from_slice(format!("line{}\n", i).as_bytes());
|
||||
}
|
||||
large_cases.push(buf);
|
||||
}
|
||||
|
||||
let all_contents: Vec<&[u8]> = cases
|
||||
.iter()
|
||||
.copied()
|
||||
.chain(large_cases.iter().map(Vec::as_slice))
|
||||
.collect();
|
||||
|
||||
for content in all_contents {
|
||||
let f = create_temp_file(content);
|
||||
let reader = FileReader::open(f.path()).unwrap();
|
||||
|
||||
let from_bytes_idx = LineIndex::from_bytes(content);
|
||||
let mut buf_reader = std::io::BufReader::new(content);
|
||||
let from_reader_idx = LineIndex::from_reader(&mut buf_reader).unwrap();
|
||||
|
||||
assert_eq!(
|
||||
reader.line_index().total_lines(),
|
||||
from_bytes_idx.total_lines(),
|
||||
"open() line_count should match from_bytes for {:?}B file",
|
||||
content.len()
|
||||
);
|
||||
assert_eq!(
|
||||
from_bytes_idx.total_lines(),
|
||||
from_reader_idx.total_lines(),
|
||||
"from_bytes should match from_reader total_lines for {:?}B file",
|
||||
content.len()
|
||||
);
|
||||
assert_eq!(
|
||||
from_bytes_idx.sampled_offsets(),
|
||||
from_reader_idx.sampled_offsets(),
|
||||
"from_bytes should match from_reader sampled_offsets for {:?}B file",
|
||||
content.len()
|
||||
);
|
||||
assert_eq!(
|
||||
from_bytes_idx.has_trailing_newline(),
|
||||
from_reader_idx.has_trailing_newline(),
|
||||
"from_bytes should match from_reader has_trailing_newline for {:?}B file",
|
||||
content.len()
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_reload_after_external_modify_returns_correct_content() {
|
||||
let f = create_temp_file(b"aaa\nbbb\n");
|
||||
let mut reader = FileReader::open(f.path()).unwrap();
|
||||
assert_eq!(reader.get_line(0), Some("aaa"));
|
||||
assert_eq!(reader.get_line(1), Some("bbb"));
|
||||
assert_eq!(reader.line_count(), 2);
|
||||
|
||||
// 外部修改:覆盖写入新内容
|
||||
{
|
||||
use std::io::Write;
|
||||
std::fs::write(f.path(), b"xxx\nyyy\nzzz\n").unwrap();
|
||||
}
|
||||
|
||||
reader.reload().unwrap();
|
||||
assert_eq!(reader.line_count(), 3);
|
||||
assert_eq!(reader.get_line(0), Some("xxx"));
|
||||
assert_eq!(reader.get_line(1), Some("yyy"));
|
||||
assert_eq!(reader.get_line(2), Some("zzz"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_reload_after_truncate_then_rewrite_no_stale_data() {
|
||||
let f = create_temp_file(b"line0\nline1\nline2\nline3\n");
|
||||
let mut reader = FileReader::open(f.path()).unwrap();
|
||||
assert_eq!(reader.line_count(), 4);
|
||||
|
||||
// 截断后写入更短内容
|
||||
{
|
||||
use std::io::Write;
|
||||
let mut file = std::fs::OpenOptions::new()
|
||||
.write(true)
|
||||
.truncate(true)
|
||||
.open(f.path())
|
||||
.unwrap();
|
||||
file.write_all(b"new\n").unwrap();
|
||||
}
|
||||
|
||||
reader.reload().unwrap();
|
||||
assert_eq!(reader.line_count(), 1);
|
||||
assert_eq!(reader.get_line(0), Some("new"));
|
||||
assert_eq!(reader.get_line(1), None, "old line should not be accessible");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_open_reload_idempotent_cross_block() {
|
||||
let mut content = Vec::new();
|
||||
for i in 0..600u32 {
|
||||
content.extend_from_slice(format!("line{}\n", i).as_bytes());
|
||||
}
|
||||
let f = create_temp_file(&content);
|
||||
|
||||
let reader = FileReader::open(f.path()).unwrap();
|
||||
let mut reloaded = FileReader::open(f.path()).unwrap();
|
||||
reloaded.reload().unwrap();
|
||||
|
||||
assert_eq!(reader.line_count(), reloaded.line_count());
|
||||
for i in 0..600 {
|
||||
assert_eq!(
|
||||
reader.get_line(i),
|
||||
reloaded.get_line(i),
|
||||
"line {i} mismatch between open and reload"
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user