feat(core): implement ProgressiveFileReader with background indexer

Add ProgressiveFileReader state machine (Sampling/Ready/Error) with scanned_newlines cache for O(1) line lookups. Implement spawn_indexer() for background mmap-based indexing with cancellation and progress reporting. Add VisualHeightIndex with prefix-sum + binary search for O(log N) visual height queries. Register all new io submodules and extend FileReader with reload(), save_cache(), and accessor methods.

Ultraworked with [Sisyphus](https://github.com/code-yeongyu/oh-my-openagent)

Co-authored-by: Sisyphus <clio-agent@sisyphuslabs.ai>
This commit is contained in:
dailz
2026-04-14 09:07:09 +08:00
parent 210eecfa66
commit f9f451e0d6
3 changed files with 1449 additions and 0 deletions

View File

@@ -5,6 +5,7 @@
// ──────────────────────────────────────────────────────────────────────────────
use crate::error::{CoreError, Result};
use crate::io::index_cache::IndexCache;
use crate::io::line_index::LineIndex;
use std::path::{Path, PathBuf};
@@ -66,6 +67,68 @@ impl FileReader {
pub fn path(&self) -> &Path {
&self.path
}
pub fn from_parts(path: PathBuf, mmap: Option<memmap2::Mmap>, line_index: LineIndex) -> Self {
Self {
path,
mmap,
line_index,
}
}
/// Save the line index cache to disk.
pub fn save_cache(&self) -> std::io::Result<()> {
IndexCache::save(&self.path, &self.line_index)?;
Ok(())
}
/// Access the underlying line index.
pub fn line_index(&self) -> &LineIndex {
&self.line_index
}
pub fn reload(&mut self) -> Result<()> {
let file = std::fs::File::open(&self.path)?;
let file_size = file.metadata()?.len();
self.mmap = None;
if file_size > 0 {
self.mmap = Some(
unsafe { memmap2::Mmap::map(&file) }.map_err(|e| CoreError::Mmap(e.to_string()))?,
);
}
let mut reader = std::io::BufReader::new(&file);
self.line_index = LineIndex::from_reader(&mut reader).map_err(|e| CoreError::Io {
source: e,
context: "rebuilding line index on reload".into(),
})?;
Ok(())
}
pub fn update_for_append(&mut self) -> Result<u64> {
let file = std::fs::File::open(&self.path)?;
let new_size = file.metadata()?.len();
let old_size = self.mmap.as_ref().map_or(0u64, |m| m.len() as u64);
if new_size <= old_size {
return Ok(0);
}
let old_lines = self.line_index.line_count() as u64;
self.mmap = None;
let mmap =
unsafe { memmap2::Mmap::map(&file) }.map_err(|e| CoreError::Mmap(e.to_string()))?;
self.line_index
.extend_from_bytes(&mmap[old_size as usize..], old_size);
self.mmap = Some(mmap);
Ok(self.line_index.line_count() as u64 - old_lines)
}
}
const _: () = {
@@ -192,4 +255,117 @@ mod tests {
assert!(reader.data().is_empty());
assert_eq!(reader.data(), b"");
}
#[test]
fn test_file_reader_reload() {
let f = create_temp_file(b"aaa\nbbb\n");
let mut reader = FileReader::open(f.path()).unwrap();
assert_eq!(reader.line_count(), 2);
assert_eq!(reader.get_line(0), Some("aaa"));
assert_eq!(reader.get_line(1), Some("bbb"));
reader.reload().unwrap();
assert_eq!(reader.line_count(), 2);
assert_eq!(reader.get_line(0), Some("aaa"));
assert_eq!(reader.get_line(1), Some("bbb"));
}
#[test]
fn test_file_reader_reload_after_truncate() {
let f = create_temp_file(b"aaa\nbbb\nccc\n");
let mut reader = FileReader::open(f.path()).unwrap();
assert_eq!(reader.line_count(), 3);
{
use std::io::Write;
let file = std::fs::OpenOptions::new()
.write(true)
.truncate(true)
.open(f.path())
.unwrap();
drop(file);
}
reader.reload().unwrap();
assert_eq!(reader.line_count(), 0);
}
#[test]
fn test_file_reader_update_for_append() {
let f = create_temp_file(b"aaa\nbbb\n");
let mut reader = FileReader::open(f.path()).unwrap();
assert_eq!(reader.line_count(), 2);
{
use std::io::Write;
let mut file = std::fs::OpenOptions::new()
.append(true)
.open(f.path())
.unwrap();
file.write_all(b"ccc\nddd\n").unwrap();
}
let new_lines = reader.update_for_append().unwrap();
assert_eq!(new_lines, 2);
assert_eq!(reader.line_count(), 4);
assert_eq!(reader.get_line(0), Some("aaa"));
assert_eq!(reader.get_line(1), Some("bbb"));
assert_eq!(reader.get_line(2), Some("ccc"));
assert_eq!(reader.get_line(3), Some("ddd"));
}
#[test]
fn test_file_reader_update_for_append_no_change() {
let f = create_temp_file(b"aaa\n");
let mut reader = FileReader::open(f.path()).unwrap();
assert_eq!(reader.line_count(), 1);
let new_lines = reader.update_for_append().unwrap();
assert_eq!(new_lines, 0);
assert_eq!(reader.line_count(), 1);
}
#[test]
fn test_file_reader_update_for_append_shrunk_file() {
let f = create_temp_file(b"aaa\nbbb\nccc\n");
let mut reader = FileReader::open(f.path()).unwrap();
assert_eq!(reader.line_count(), 3);
{
use std::io::Write;
let mut file = std::fs::OpenOptions::new()
.write(true)
.truncate(true)
.open(f.path())
.unwrap();
file.write_all(b"x\n").unwrap();
}
let new_lines = reader.update_for_append().unwrap();
assert_eq!(new_lines, 0);
}
#[test]
fn test_file_reader_save_cache() {
let f = create_temp_file(b"aaa\nbbb\n");
// Clear any existing cache
if let Some(cp) = crate::io::cache_util::cache_path(f.path()) {
let _ = std::fs::remove_file(cp);
}
let reader = FileReader::open(f.path()).unwrap();
assert!(reader.save_cache().is_ok());
let cached = crate::io::index_cache::IndexCache::load(f.path());
assert!(cached.is_some(), "cache should exist after save_cache");
assert_eq!(cached.unwrap().line_count(), 2);
}
#[test]
fn test_file_reader_line_index_accessor() {
let f = create_temp_file(b"x\ny\n");
let reader = FileReader::open(f.path()).unwrap();
let idx = reader.line_index();
assert_eq!(idx.line_count(), 2);
}
}

View File

@@ -13,3 +13,9 @@ pub mod file_reader;
// 同理,声明 line_index 子模块,定义在 line_index.rs 文件中。
// 这个模块负责维护文件中每一行的起始位置索引,用于快速定位某一行内容。
pub mod line_index;
pub mod cache_util;
pub mod index_cache;
pub mod line_sampler;
pub mod progressive_reader;
pub mod wrap;

File diff suppressed because it is too large Load Diff