fix(io): eliminate TOCTOU race in FileReader open/reload
Replace LineIndex::from_reader(BufReader) with LineIndex::from_bytes(&mmap) in both open() and reload(), ensuring the line index is always built from the same mmap snapshot rather than a separate read through the file descriptor. This closes the race window where an external file modification between mmap() and from_reader() could cause line_index offsets to disagree with the mmap data, leading to get_line() returning wrong content or panicking. Closes #2
This commit is contained in:
@@ -41,12 +41,11 @@ impl FileReader {
|
|||||||
Some(unsafe { memmap2::Mmap::map(&file) }.map_err(|e| CoreError::Mmap(e.to_string()))?)
|
Some(unsafe { memmap2::Mmap::map(&file) }.map_err(|e| CoreError::Mmap(e.to_string()))?)
|
||||||
};
|
};
|
||||||
|
|
||||||
let line_index = {
|
// 直接从 mmap 快照构建行索引,确保索引与数据来自同一内存映射,
|
||||||
let mut reader = std::io::BufReader::new(&file);
|
// 消除 mmap + BufReader 双读之间的 TOCTOU 竞态窗口。
|
||||||
LineIndex::from_reader(&mut reader).map_err(|e| CoreError::Io {
|
let line_index = match &mmap {
|
||||||
source: e,
|
Some(m) => LineIndex::from_bytes(m.as_ref()),
|
||||||
context: "building line index".into(),
|
None => LineIndex::from_bytes(&[]),
|
||||||
})?
|
|
||||||
};
|
};
|
||||||
|
|
||||||
Ok(Self {
|
Ok(Self {
|
||||||
@@ -107,11 +106,10 @@ impl FileReader {
|
|||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
let mut reader = std::io::BufReader::new(&file);
|
self.line_index = match &self.mmap {
|
||||||
self.line_index = LineIndex::from_reader(&mut reader).map_err(|e| CoreError::Io {
|
Some(m) => LineIndex::from_bytes(m.as_ref()),
|
||||||
source: e,
|
None => LineIndex::from_bytes(&[]),
|
||||||
context: "rebuilding line index on reload".into(),
|
};
|
||||||
})?;
|
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
@@ -385,4 +383,138 @@ mod tests {
|
|||||||
let idx = reader.line_index();
|
let idx = reader.line_index();
|
||||||
assert_eq!(idx.line_count(), 2);
|
assert_eq!(idx.line_count(), 2);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ─── TOCTOU 修复验证 ─────────────────────────────────────────────────
|
||||||
|
// open() 和 reload() 现在从 mmap 快照构建索引(from_bytes),
|
||||||
|
// 而非从独立 BufReader 读取(from_reader)。以下测试验证:
|
||||||
|
// 1. from_bytes 与 from_reader 产出完全一致的索引
|
||||||
|
// 2. 修改文件后 reload 仍返回正确内容(无残留旧状态)
|
||||||
|
// 3. 极端情况下(空文件、单行、跨块)open+reload 一致性
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_open_from_bytes_matches_from_reader() {
|
||||||
|
let cases: Vec<&[u8]> = vec![
|
||||||
|
b"",
|
||||||
|
b"hello",
|
||||||
|
b"hello\n",
|
||||||
|
b"aaa\nbbb\nccc",
|
||||||
|
b"\n",
|
||||||
|
b"a\n\nb",
|
||||||
|
];
|
||||||
|
|
||||||
|
let mut large_cases = Vec::new();
|
||||||
|
for &n in &[256usize, 300, 512] {
|
||||||
|
let mut buf = Vec::new();
|
||||||
|
for i in 0..n {
|
||||||
|
buf.extend_from_slice(format!("line{}\n", i).as_bytes());
|
||||||
|
}
|
||||||
|
large_cases.push(buf);
|
||||||
|
}
|
||||||
|
|
||||||
|
let all_contents: Vec<&[u8]> = cases
|
||||||
|
.iter()
|
||||||
|
.copied()
|
||||||
|
.chain(large_cases.iter().map(Vec::as_slice))
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
for content in all_contents {
|
||||||
|
let f = create_temp_file(content);
|
||||||
|
let reader = FileReader::open(f.path()).unwrap();
|
||||||
|
|
||||||
|
let from_bytes_idx = LineIndex::from_bytes(content);
|
||||||
|
let mut buf_reader = std::io::BufReader::new(content);
|
||||||
|
let from_reader_idx = LineIndex::from_reader(&mut buf_reader).unwrap();
|
||||||
|
|
||||||
|
assert_eq!(
|
||||||
|
reader.line_index().total_lines(),
|
||||||
|
from_bytes_idx.total_lines(),
|
||||||
|
"open() line_count should match from_bytes for {:?}B file",
|
||||||
|
content.len()
|
||||||
|
);
|
||||||
|
assert_eq!(
|
||||||
|
from_bytes_idx.total_lines(),
|
||||||
|
from_reader_idx.total_lines(),
|
||||||
|
"from_bytes should match from_reader total_lines for {:?}B file",
|
||||||
|
content.len()
|
||||||
|
);
|
||||||
|
assert_eq!(
|
||||||
|
from_bytes_idx.sampled_offsets(),
|
||||||
|
from_reader_idx.sampled_offsets(),
|
||||||
|
"from_bytes should match from_reader sampled_offsets for {:?}B file",
|
||||||
|
content.len()
|
||||||
|
);
|
||||||
|
assert_eq!(
|
||||||
|
from_bytes_idx.has_trailing_newline(),
|
||||||
|
from_reader_idx.has_trailing_newline(),
|
||||||
|
"from_bytes should match from_reader has_trailing_newline for {:?}B file",
|
||||||
|
content.len()
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_reload_after_external_modify_returns_correct_content() {
|
||||||
|
let f = create_temp_file(b"aaa\nbbb\n");
|
||||||
|
let mut reader = FileReader::open(f.path()).unwrap();
|
||||||
|
assert_eq!(reader.get_line(0), Some("aaa"));
|
||||||
|
assert_eq!(reader.get_line(1), Some("bbb"));
|
||||||
|
assert_eq!(reader.line_count(), 2);
|
||||||
|
|
||||||
|
// 外部修改:覆盖写入新内容
|
||||||
|
{
|
||||||
|
use std::io::Write;
|
||||||
|
std::fs::write(f.path(), b"xxx\nyyy\nzzz\n").unwrap();
|
||||||
|
}
|
||||||
|
|
||||||
|
reader.reload().unwrap();
|
||||||
|
assert_eq!(reader.line_count(), 3);
|
||||||
|
assert_eq!(reader.get_line(0), Some("xxx"));
|
||||||
|
assert_eq!(reader.get_line(1), Some("yyy"));
|
||||||
|
assert_eq!(reader.get_line(2), Some("zzz"));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_reload_after_truncate_then_rewrite_no_stale_data() {
|
||||||
|
let f = create_temp_file(b"line0\nline1\nline2\nline3\n");
|
||||||
|
let mut reader = FileReader::open(f.path()).unwrap();
|
||||||
|
assert_eq!(reader.line_count(), 4);
|
||||||
|
|
||||||
|
// 截断后写入更短内容
|
||||||
|
{
|
||||||
|
use std::io::Write;
|
||||||
|
let mut file = std::fs::OpenOptions::new()
|
||||||
|
.write(true)
|
||||||
|
.truncate(true)
|
||||||
|
.open(f.path())
|
||||||
|
.unwrap();
|
||||||
|
file.write_all(b"new\n").unwrap();
|
||||||
|
}
|
||||||
|
|
||||||
|
reader.reload().unwrap();
|
||||||
|
assert_eq!(reader.line_count(), 1);
|
||||||
|
assert_eq!(reader.get_line(0), Some("new"));
|
||||||
|
assert_eq!(reader.get_line(1), None, "old line should not be accessible");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_open_reload_idempotent_cross_block() {
|
||||||
|
let mut content = Vec::new();
|
||||||
|
for i in 0..600u32 {
|
||||||
|
content.extend_from_slice(format!("line{}\n", i).as_bytes());
|
||||||
|
}
|
||||||
|
let f = create_temp_file(&content);
|
||||||
|
|
||||||
|
let reader = FileReader::open(f.path()).unwrap();
|
||||||
|
let mut reloaded = FileReader::open(f.path()).unwrap();
|
||||||
|
reloaded.reload().unwrap();
|
||||||
|
|
||||||
|
assert_eq!(reader.line_count(), reloaded.line_count());
|
||||||
|
for i in 0..600 {
|
||||||
|
assert_eq!(
|
||||||
|
reader.get_line(i),
|
||||||
|
reloaded.get_line(i),
|
||||||
|
"line {i} mismatch between open and reload"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user