feat(core): implement ProgressiveFileReader with background indexer
Add ProgressiveFileReader state machine (Sampling/Ready/Error) with scanned_newlines cache for O(1) line lookups. Implement spawn_indexer() for background mmap-based indexing with cancellation and progress reporting. Add VisualHeightIndex with prefix-sum + binary search for O(log N) visual height queries. Register all new io submodules and extend FileReader with reload(), save_cache(), and accessor methods. Ultraworked with [Sisyphus](https://github.com/code-yeongyu/oh-my-openagent) Co-authored-by: Sisyphus <clio-agent@sisyphuslabs.ai>
This commit is contained in:
@@ -5,6 +5,7 @@
|
|||||||
// ──────────────────────────────────────────────────────────────────────────────
|
// ──────────────────────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
use crate::error::{CoreError, Result};
|
use crate::error::{CoreError, Result};
|
||||||
|
use crate::io::index_cache::IndexCache;
|
||||||
use crate::io::line_index::LineIndex;
|
use crate::io::line_index::LineIndex;
|
||||||
use std::path::{Path, PathBuf};
|
use std::path::{Path, PathBuf};
|
||||||
|
|
||||||
@@ -66,6 +67,68 @@ impl FileReader {
|
|||||||
pub fn path(&self) -> &Path {
|
pub fn path(&self) -> &Path {
|
||||||
&self.path
|
&self.path
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn from_parts(path: PathBuf, mmap: Option<memmap2::Mmap>, line_index: LineIndex) -> Self {
|
||||||
|
Self {
|
||||||
|
path,
|
||||||
|
mmap,
|
||||||
|
line_index,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Save the line index cache to disk.
|
||||||
|
pub fn save_cache(&self) -> std::io::Result<()> {
|
||||||
|
IndexCache::save(&self.path, &self.line_index)?;
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Access the underlying line index.
|
||||||
|
pub fn line_index(&self) -> &LineIndex {
|
||||||
|
&self.line_index
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn reload(&mut self) -> Result<()> {
|
||||||
|
let file = std::fs::File::open(&self.path)?;
|
||||||
|
let file_size = file.metadata()?.len();
|
||||||
|
|
||||||
|
self.mmap = None;
|
||||||
|
|
||||||
|
if file_size > 0 {
|
||||||
|
self.mmap = Some(
|
||||||
|
unsafe { memmap2::Mmap::map(&file) }.map_err(|e| CoreError::Mmap(e.to_string()))?,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut reader = std::io::BufReader::new(&file);
|
||||||
|
self.line_index = LineIndex::from_reader(&mut reader).map_err(|e| CoreError::Io {
|
||||||
|
source: e,
|
||||||
|
context: "rebuilding line index on reload".into(),
|
||||||
|
})?;
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn update_for_append(&mut self) -> Result<u64> {
|
||||||
|
let file = std::fs::File::open(&self.path)?;
|
||||||
|
let new_size = file.metadata()?.len();
|
||||||
|
let old_size = self.mmap.as_ref().map_or(0u64, |m| m.len() as u64);
|
||||||
|
|
||||||
|
if new_size <= old_size {
|
||||||
|
return Ok(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
let old_lines = self.line_index.line_count() as u64;
|
||||||
|
|
||||||
|
self.mmap = None;
|
||||||
|
let mmap =
|
||||||
|
unsafe { memmap2::Mmap::map(&file) }.map_err(|e| CoreError::Mmap(e.to_string()))?;
|
||||||
|
|
||||||
|
self.line_index
|
||||||
|
.extend_from_bytes(&mmap[old_size as usize..], old_size);
|
||||||
|
self.mmap = Some(mmap);
|
||||||
|
|
||||||
|
Ok(self.line_index.line_count() as u64 - old_lines)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
const _: () = {
|
const _: () = {
|
||||||
@@ -192,4 +255,117 @@ mod tests {
|
|||||||
assert!(reader.data().is_empty());
|
assert!(reader.data().is_empty());
|
||||||
assert_eq!(reader.data(), b"");
|
assert_eq!(reader.data(), b"");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_file_reader_reload() {
|
||||||
|
let f = create_temp_file(b"aaa\nbbb\n");
|
||||||
|
let mut reader = FileReader::open(f.path()).unwrap();
|
||||||
|
assert_eq!(reader.line_count(), 2);
|
||||||
|
assert_eq!(reader.get_line(0), Some("aaa"));
|
||||||
|
assert_eq!(reader.get_line(1), Some("bbb"));
|
||||||
|
|
||||||
|
reader.reload().unwrap();
|
||||||
|
assert_eq!(reader.line_count(), 2);
|
||||||
|
assert_eq!(reader.get_line(0), Some("aaa"));
|
||||||
|
assert_eq!(reader.get_line(1), Some("bbb"));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_file_reader_reload_after_truncate() {
|
||||||
|
let f = create_temp_file(b"aaa\nbbb\nccc\n");
|
||||||
|
let mut reader = FileReader::open(f.path()).unwrap();
|
||||||
|
assert_eq!(reader.line_count(), 3);
|
||||||
|
|
||||||
|
{
|
||||||
|
use std::io::Write;
|
||||||
|
let file = std::fs::OpenOptions::new()
|
||||||
|
.write(true)
|
||||||
|
.truncate(true)
|
||||||
|
.open(f.path())
|
||||||
|
.unwrap();
|
||||||
|
drop(file);
|
||||||
|
}
|
||||||
|
|
||||||
|
reader.reload().unwrap();
|
||||||
|
assert_eq!(reader.line_count(), 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_file_reader_update_for_append() {
|
||||||
|
let f = create_temp_file(b"aaa\nbbb\n");
|
||||||
|
let mut reader = FileReader::open(f.path()).unwrap();
|
||||||
|
assert_eq!(reader.line_count(), 2);
|
||||||
|
|
||||||
|
{
|
||||||
|
use std::io::Write;
|
||||||
|
let mut file = std::fs::OpenOptions::new()
|
||||||
|
.append(true)
|
||||||
|
.open(f.path())
|
||||||
|
.unwrap();
|
||||||
|
file.write_all(b"ccc\nddd\n").unwrap();
|
||||||
|
}
|
||||||
|
|
||||||
|
let new_lines = reader.update_for_append().unwrap();
|
||||||
|
assert_eq!(new_lines, 2);
|
||||||
|
assert_eq!(reader.line_count(), 4);
|
||||||
|
assert_eq!(reader.get_line(0), Some("aaa"));
|
||||||
|
assert_eq!(reader.get_line(1), Some("bbb"));
|
||||||
|
assert_eq!(reader.get_line(2), Some("ccc"));
|
||||||
|
assert_eq!(reader.get_line(3), Some("ddd"));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_file_reader_update_for_append_no_change() {
|
||||||
|
let f = create_temp_file(b"aaa\n");
|
||||||
|
let mut reader = FileReader::open(f.path()).unwrap();
|
||||||
|
assert_eq!(reader.line_count(), 1);
|
||||||
|
|
||||||
|
let new_lines = reader.update_for_append().unwrap();
|
||||||
|
assert_eq!(new_lines, 0);
|
||||||
|
assert_eq!(reader.line_count(), 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_file_reader_update_for_append_shrunk_file() {
|
||||||
|
let f = create_temp_file(b"aaa\nbbb\nccc\n");
|
||||||
|
let mut reader = FileReader::open(f.path()).unwrap();
|
||||||
|
assert_eq!(reader.line_count(), 3);
|
||||||
|
|
||||||
|
{
|
||||||
|
use std::io::Write;
|
||||||
|
let mut file = std::fs::OpenOptions::new()
|
||||||
|
.write(true)
|
||||||
|
.truncate(true)
|
||||||
|
.open(f.path())
|
||||||
|
.unwrap();
|
||||||
|
file.write_all(b"x\n").unwrap();
|
||||||
|
}
|
||||||
|
|
||||||
|
let new_lines = reader.update_for_append().unwrap();
|
||||||
|
assert_eq!(new_lines, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_file_reader_save_cache() {
|
||||||
|
let f = create_temp_file(b"aaa\nbbb\n");
|
||||||
|
// Clear any existing cache
|
||||||
|
if let Some(cp) = crate::io::cache_util::cache_path(f.path()) {
|
||||||
|
let _ = std::fs::remove_file(cp);
|
||||||
|
}
|
||||||
|
|
||||||
|
let reader = FileReader::open(f.path()).unwrap();
|
||||||
|
assert!(reader.save_cache().is_ok());
|
||||||
|
|
||||||
|
let cached = crate::io::index_cache::IndexCache::load(f.path());
|
||||||
|
assert!(cached.is_some(), "cache should exist after save_cache");
|
||||||
|
assert_eq!(cached.unwrap().line_count(), 2);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_file_reader_line_index_accessor() {
|
||||||
|
let f = create_temp_file(b"x\ny\n");
|
||||||
|
let reader = FileReader::open(f.path()).unwrap();
|
||||||
|
let idx = reader.line_index();
|
||||||
|
assert_eq!(idx.line_count(), 2);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -13,3 +13,9 @@ pub mod file_reader;
|
|||||||
// 同理,声明 line_index 子模块,定义在 line_index.rs 文件中。
|
// 同理,声明 line_index 子模块,定义在 line_index.rs 文件中。
|
||||||
// 这个模块负责维护文件中每一行的起始位置索引,用于快速定位某一行内容。
|
// 这个模块负责维护文件中每一行的起始位置索引,用于快速定位某一行内容。
|
||||||
pub mod line_index;
|
pub mod line_index;
|
||||||
|
|
||||||
|
pub mod cache_util;
|
||||||
|
pub mod index_cache;
|
||||||
|
pub mod line_sampler;
|
||||||
|
pub mod progressive_reader;
|
||||||
|
pub mod wrap;
|
||||||
|
|||||||
1267
crates/core/src/io/progressive_reader.rs
Normal file
1267
crates/core/src/io/progressive_reader.rs
Normal file
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user