Files
logViewer/crates/core/src/io/progressive_reader.rs
dailz d4679a7543 fix(io): update visual height of last line on append without trailing newline (closes #11)
When a file does not end with a newline, appending content extends the
last logical line's text and thus its visual height. The incremental
extend path in handle_file_appended only computed heights for newly
created logical lines, missing the old last line whose content changed.

Add VisualHeightIndex::replace_last_line_height() — an O(1) method that
rewrites the final prefix sum entry and total. Called before
extend_from_heights so the correct line is targeted.

Changes:
- progressive_reader.rs: add replace_last_line_height, pub with_params,
  7 VHI unit tests
- app.rs: save old_reader_line_count before update, recompute last old
  line height in extend path, 2 integration regression tests
2026-06-07 09:46:24 +08:00

1513 lines
51 KiB
Rust

use std::cell::RefCell;
use std::fmt;
use std::io::BufRead;
use std::path::{Path, PathBuf};
use crate::error::{CoreError, Result};
use crate::io::file_reader::{AppendStatus, FileReader};
use crate::io::index_cache::IndexCache;
use crate::io::line_index::LineIndex;
use crate::io::line_sampler::sample_line_count;
use crate::io::wrap::{format_json_line, wrap_line_chars, MAX_WRAP_INPUT_LEN};
// ─── IndexerMessage ──────────────────────────────────────────────────────────
pub enum IndexerMessage {
Progress {
generation: u64,
percent: f64,
lines_scanned: u64,
},
Complete {
generation: u64,
reader: FileReader,
visual_height_index: Option<VisualHeightIndex>,
},
Error {
generation: u64,
message: String,
},
}
impl fmt::Debug for IndexerMessage {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
IndexerMessage::Progress {
generation,
percent,
lines_scanned,
} => f
.debug_struct("IndexerMessage::Progress")
.field("generation", generation)
.field("percent", percent)
.field("lines_scanned", lines_scanned)
.finish(),
IndexerMessage::Complete {
generation,
reader: _,
visual_height_index,
} => f
.debug_struct("IndexerMessage::Complete")
.field("generation", generation)
.field("reader", &"<FileReader>")
.field("visual_height_index", visual_height_index)
.finish(),
IndexerMessage::Error {
generation,
message,
} => f
.debug_struct("IndexerMessage::Error")
.field("generation", generation)
.field("message", message)
.finish(),
}
}
}
impl IndexerMessage {
pub fn generation(&self) -> u64 {
match self {
IndexerMessage::Progress { generation, .. } => *generation,
IndexerMessage::Complete { generation, .. } => *generation,
IndexerMessage::Error { generation, .. } => *generation,
}
}
}
// ─── VisualHeightIndex ───────────────────────────────────────────────────────
#[derive(Debug)]
pub struct VisualHeightIndex {
/// prefix_sums[i] = sum of visual_heights[0..i], prefix_sums[0] = 0.
/// Length = line_count + 1.
prefix_sums: Vec<u64>,
total_visual_rows: u64,
json_format: bool,
terminal_width: usize,
}
impl VisualHeightIndex {
pub fn build(visual_heights: &[usize]) -> Self {
let mut prefix_sums = Vec::with_capacity(visual_heights.len() + 1);
prefix_sums.push(0u64);
for &h in visual_heights {
prefix_sums.push(prefix_sums.last().unwrap() + h as u64);
}
let total = *prefix_sums.last().unwrap();
Self {
prefix_sums,
total_visual_rows: total,
json_format: false,
terminal_width: 0,
}
}
pub fn with_params(mut self, json_format: bool, terminal_width: usize) -> Self {
self.json_format = json_format;
self.terminal_width = terminal_width;
self
}
pub fn total_visual_rows(&self) -> u64 {
self.total_visual_rows
}
/// O(log N) binary-search: which logical line does `visual_row` belong to?
pub fn visual_row_to_logical_row(&self, visual_row: u64) -> usize {
if visual_row >= self.total_visual_rows {
return self.prefix_sums.len().saturating_sub(2);
}
self.prefix_sums.partition_point(|&sum| sum <= visual_row) - 1
}
/// Returns (logical_line, sub_row_offset_within_that_line).
pub fn visual_row_to_logical_row_with_offset(&self, visual_row: u64) -> (usize, usize) {
let logical = self.visual_row_to_logical_row(visual_row);
let row_start = self.prefix_sums.get(logical).copied().unwrap_or(0);
let offset = (visual_row - row_start) as usize;
(logical, offset)
}
/// O(1) prefix-sum lookup: first visual row for a given logical line.
pub fn cursor_to_first_visual_row(&self, logical_line: usize) -> u64 {
self.prefix_sums.get(logical_line).copied().unwrap_or(0)
}
/// O(1): visual height of a single logical line.
pub fn visual_height_of_line(&self, logical_line: usize) -> usize {
let start = self.prefix_sums.get(logical_line).copied().unwrap_or(0);
let end = self
.prefix_sums
.get(logical_line + 1)
.copied()
.unwrap_or(start);
(end - start) as usize
}
pub fn line_count(&self) -> usize {
self.prefix_sums.len().saturating_sub(1)
}
pub fn is_valid_for(&self, json_format: bool, terminal_width: usize) -> bool {
self.json_format == json_format && self.terminal_width == terminal_width
}
pub fn extend_from_heights(&mut self, new_heights: &[usize]) {
for &h in new_heights {
let last = *self.prefix_sums.last().unwrap_or(&0);
self.prefix_sums.push(last + h as u64);
self.total_visual_rows += h as u64;
}
}
/// Replace the visual height of the last logical line. O(1).
///
/// Must be called **before** `extend_from_heights` so that the last line
/// index still refers to the pre-extension line.
pub fn replace_last_line_height(&mut self, new_height: usize) {
let n = self.prefix_sums.len();
if n < 2 {
return;
}
let last_line = n - 2;
let old_height = self.prefix_sums[last_line + 1] - self.prefix_sums[last_line];
let new_height = new_height as u64;
if new_height == old_height {
return;
}
let delta = new_height.abs_diff(old_height);
if new_height > old_height {
self.prefix_sums[last_line + 1] += delta;
self.total_visual_rows += delta;
} else {
self.prefix_sums[last_line + 1] -= delta;
self.total_visual_rows -= delta;
}
}
}
// ─── VisualHeightRebuildResult ────────────────────────────────────────────────
pub struct VisualHeightRebuildResult {
pub generation: u64,
pub index: VisualHeightIndex,
}
// ─── Visual height computation helpers ────────────────────────────────────────
pub fn compute_line_visual_height(
line_text: &str,
terminal_width: usize,
json_format: bool,
) -> usize {
if line_text.len() > MAX_WRAP_INPUT_LEN {
return 1;
}
if json_format {
let formatted = format_json_line(line_text);
compute_text_visual_height(&formatted, terminal_width)
} else {
compute_text_visual_height(line_text, terminal_width)
}
}
fn compute_text_visual_height(text: &str, width: usize) -> usize {
let mut height = 0;
for sub_line in text.split('\n') {
height += wrap_line_chars(sub_line, width).len();
}
height.max(1)
}
fn compute_visual_heights(
reader: &FileReader,
terminal_width: usize,
json_format: bool,
) -> Vec<usize> {
let line_count = reader.line_count();
let mut visual_heights = Vec::with_capacity(line_count);
for i in 0..line_count {
let line_text = reader.get_line(i).unwrap_or("");
visual_heights.push(compute_line_visual_height(
line_text,
terminal_width,
json_format,
));
}
visual_heights
}
// ─── ReaderState ─────────────────────────────────────────────────────────────
pub enum ReaderState {
Sampling {
mmap: Option<memmap2::Mmap>,
estimated_lines: u64,
avg_line_length: f64,
/// Byte positions of newlines found during initial scan.
/// These are mutable via interior mutability (RefCell).
scanned_newlines: RefCell<Vec<usize>>,
/// Byte position up to which we've scanned.
scanned_up_to: RefCell<usize>,
},
Ready {
reader: FileReader,
visual_height_index: Option<VisualHeightIndex>,
},
Error(String),
}
pub fn spawn_indexer(
path: PathBuf,
generation: u64,
terminal_width: usize,
json_format: bool,
cancel_rx: crossbeam_channel::Receiver<()>,
) -> crossbeam_channel::Receiver<IndexerMessage> {
let (tx, rx) = crossbeam_channel::bounded(10);
std::thread::spawn(move || {
let file = match std::fs::File::open(&path) {
Ok(f) => f,
Err(e) => {
let _ = tx.send(IndexerMessage::Error {
generation,
message: e.to_string(),
});
return;
}
};
let target_len = match file.metadata() {
Ok(m) => m.len(),
Err(e) => {
let _ = tx.send(IndexerMessage::Error {
generation,
message: e.to_string(),
});
return;
}
};
let mut buf_reader = std::io::BufReader::with_capacity(64 * 1024, file);
let mut sampled_offsets: Vec<u64> = vec![0];
let mut next_line_idx: usize = 1;
let mut newline_count: usize = 0;
let mut chunk_offset: u64 = 0;
let mut last_byte: Option<u8> = None;
let mut bytes_since_check: usize = 0;
loop {
let buf = match buf_reader.fill_buf() {
Ok(b) => b,
Err(e) => {
let _ = tx.send(IndexerMessage::Error {
generation,
message: e.to_string(),
});
return;
}
};
if buf.is_empty() {
break;
}
if let Some(&b) = buf.last() {
last_byte = Some(b);
}
for pos in memchr::memchr_iter(b'\n', buf) {
newline_count += 1;
if next_line_idx.is_multiple_of(256) {
sampled_offsets.push(chunk_offset + pos as u64 + 1);
}
next_line_idx += 1;
}
let consumed = buf.len();
bytes_since_check += consumed;
chunk_offset += consumed as u64;
buf_reader.consume(consumed);
if bytes_since_check >= 1_000_000 {
bytes_since_check = 0;
if cancel_rx.try_recv().is_ok() {
return;
}
if target_len > 0 {
let percent = (chunk_offset as f64 / target_len as f64) * 100.0;
let _ = tx.send(IndexerMessage::Progress {
generation,
percent,
lines_scanned: newline_count as u64,
});
}
}
}
if cancel_rx.try_recv().is_ok() {
return;
}
let line_index = if chunk_offset == 0 {
LineIndex {
sampled_offsets: vec![],
total_lines: 0,
has_trailing_newline: false,
}
} else {
let has_trailing_newline = last_byte == Some(b'\n') && newline_count > 0;
let total_lines = if has_trailing_newline && newline_count > 0 {
newline_count as u64
} else {
(1 + newline_count) as u64
};
if has_trailing_newline && newline_count > 0 {
let trailing_line_idx = newline_count;
if trailing_line_idx.is_multiple_of(256) {
sampled_offsets.pop();
}
}
LineIndex {
sampled_offsets,
total_lines,
has_trailing_newline,
}
};
let mmap = if target_len == 0 {
None
} else {
match std::fs::File::open(&path) {
Ok(mmap_file) => match unsafe { memmap2::Mmap::map(&mmap_file) } {
Ok(m) => match mmap_file.metadata() {
Ok(metadata) if metadata.len() >= m.len() as u64 => Some(m),
Ok(_) | Err(_) => None,
},
Err(e) => {
let _ = tx.send(IndexerMessage::Error {
generation,
message: e.to_string(),
});
return;
}
},
Err(e) => {
let _ = tx.send(IndexerMessage::Error {
generation,
message: e.to_string(),
});
return;
}
}
};
if let Some(data) = mmap.as_deref() {
let _ = IndexCache::save_with_hash(&path, &line_index, data);
}
let reader = FileReader::from_parts(path, mmap, line_index);
let visual_height_index = if terminal_width > 0 {
let visual_heights = compute_visual_heights(&reader, terminal_width, json_format);
Some(VisualHeightIndex::build(&visual_heights).with_params(json_format, terminal_width))
} else {
None
};
let _ = tx.send(IndexerMessage::Complete {
generation,
reader,
visual_height_index,
});
});
rx
}
pub fn spawn_visual_height_rebuild(
path: PathBuf,
generation: u64,
terminal_width: usize,
json_format: bool,
cancel_rx: crossbeam_channel::Receiver<()>,
) -> crossbeam_channel::Receiver<VisualHeightRebuildResult> {
let (tx, rx) = crossbeam_channel::bounded(1);
std::thread::spawn(move || {
let line_index = match IndexCache::load(&path) {
Some(idx) => idx,
None => return,
};
let file = match std::fs::File::open(&path) {
Ok(f) => f,
Err(_) => return,
};
let mut reader = std::io::BufReader::with_capacity(64 * 1024, file);
let mut visual_heights = Vec::with_capacity(line_index.line_count());
let mut line_buf = Vec::new();
loop {
if cancel_rx.try_recv().is_ok() {
return;
}
line_buf.clear();
match std::io::BufRead::read_until(&mut reader, b'\n', &mut line_buf) {
Ok(0) => break,
Ok(_) => {
let line_text = std::str::from_utf8(&line_buf)
.ok()
.map(|s| s.trim_end_matches(['\r', '\n']))
.unwrap_or("");
visual_heights.push(compute_line_visual_height(
line_text,
terminal_width,
json_format,
));
}
Err(_) => return,
}
}
if visual_heights.len() != line_index.line_count() {
return;
}
let index =
VisualHeightIndex::build(&visual_heights).with_params(json_format, terminal_width);
let _ = tx.send(VisualHeightRebuildResult { generation, index });
});
rx
}
// ─── ProgressiveFileReader ───────────────────────────────────────────────────
/// Maximum bytes to scan during initial open for the Sampling state.
const INITIAL_SCAN_BYTES: usize = 64 * 1024;
/// Maximum number of additional lines to scan beyond what's already cached
/// in a single `get_line()` call. Prevents O(N) blocking when the user
/// jumps far ahead (e.g. `G` to end-of-file) during the Loading state.
const SCAN_AHEAD_LIMIT: usize = 10_000;
pub struct ProgressiveFileReader {
path: PathBuf,
pub state: ReaderState,
cancel_tx: Option<crossbeam_channel::Sender<()>>,
indexer_rx: Option<crossbeam_channel::Receiver<IndexerMessage>>,
generation: u64,
vh_rebuild_cancel_tx: Option<crossbeam_channel::Sender<()>>,
vh_rebuild_rx: Option<crossbeam_channel::Receiver<VisualHeightRebuildResult>>,
vh_generation: u64,
}
impl ProgressiveFileReader {
/// Open a file for progressive reading.
///
/// - If a cached `LineIndex` exists, returns immediately in `Ready` state.
/// - Otherwise, mmaps the file, takes a quick line-count sample, scans the
/// first ~64KB for line positions, and returns in `Sampling` state.
/// - Empty files return `Sampling` with `mmap: None`.
pub fn open(path: &Path) -> Result<Self> {
// Try cache first
if let Some(line_index) = IndexCache::load(path) {
let file = std::fs::File::open(path)?;
let file_size = file.metadata()?.len();
let mmap = if file_size == 0 {
None
} else {
Some(
unsafe { memmap2::Mmap::map(&file) }
.map_err(|e| CoreError::Mmap(e.to_string()))?,
)
};
let reader = FileReader::from_parts(path.to_path_buf(), mmap, line_index);
return Ok(ProgressiveFileReader {
path: path.to_path_buf(),
state: ReaderState::Ready {
reader,
visual_height_index: None,
},
cancel_tx: None,
indexer_rx: None,
generation: 1,
vh_rebuild_cancel_tx: None,
vh_rebuild_rx: None,
vh_generation: 0,
});
}
// Cache miss: mmap and sample
let file = std::fs::File::open(path)?;
let file_size = file.metadata()?.len();
let mmap = if file_size == 0 {
None
} else {
Some(unsafe { memmap2::Mmap::map(&file) }.map_err(|e| CoreError::Mmap(e.to_string()))?)
};
let sample = sample_line_count(path).map_err(|e| CoreError::Io {
source: e,
context: "sampling line count".into(),
})?;
// Scan first ~64KB for newline positions
let data = mmap.as_deref().unwrap_or(&[]);
let scan_limit = INITIAL_SCAN_BYTES.min(data.len());
let scanned_newlines: Vec<usize> =
memchr::memchr_iter(b'\n', &data[..scan_limit]).collect();
Ok(ProgressiveFileReader {
path: path.to_path_buf(),
state: ReaderState::Sampling {
mmap,
estimated_lines: sample.estimated_lines,
avg_line_length: sample.avg_line_length,
scanned_newlines: RefCell::new(scanned_newlines),
scanned_up_to: RefCell::new(scan_limit),
},
cancel_tx: None,
indexer_rx: None,
generation: 1,
vh_rebuild_cancel_tx: None,
vh_rebuild_rx: None,
vh_generation: 0,
})
}
/// Create a ProgressiveFileReader with channel endpoints for background indexing.
pub fn with_channels(
path: &Path,
cancel_tx: crossbeam_channel::Sender<()>,
indexer_rx: crossbeam_channel::Receiver<IndexerMessage>,
generation: u64,
) -> Result<Self> {
let mut reader = Self::open(path)?;
reader.cancel_tx = Some(cancel_tx);
reader.indexer_rx = Some(indexer_rx);
reader.generation = generation;
Ok(reader)
}
/// Get a line by index.
///
/// - In `Sampling` state: uses the cached newline positions to extract lines
/// from the mmap. If the requested line is beyond the scanned region,
/// extends the scan incrementally.
/// - In `Ready` state: delegates to `FileReader::get_line`.
/// - In `Error` state: returns `None`.
pub fn get_line(&self, idx: usize) -> Option<String> {
match &self.state {
ReaderState::Sampling {
mmap,
scanned_newlines,
scanned_up_to,
..
} => {
let mmap_data = mmap.as_deref()?;
if mmap_data.is_empty() {
return None;
}
let mut newlines = scanned_newlines.borrow_mut();
let mut up_to = scanned_up_to.borrow_mut();
let scan_limit = newlines.len() + SCAN_AHEAD_LIMIT;
// Extend scan if needed, but stop at scan_limit to avoid O(N) blocking
while newlines.len() <= idx
&& newlines.len() < scan_limit
&& *up_to < mmap_data.len()
{
let remaining = &mmap_data[*up_to..];
if let Some(rel_pos) = memchr::memchr(b'\n', remaining) {
newlines.push(*up_to + rel_pos);
*up_to += rel_pos + 1;
} else {
*up_to = mmap_data.len();
break;
}
}
if idx > newlines.len() {
return None;
}
let (start, end) = if idx == 0 {
(0, newlines.first().copied().unwrap_or(mmap_data.len()))
} else if idx <= newlines.len() {
let s = newlines[idx - 1] + 1;
let e = if idx < newlines.len() {
newlines[idx]
} else {
mmap_data.len()
};
(s, e)
} else {
return None;
};
if start >= mmap_data.len() {
return None;
}
let line_bytes = &mmap_data[start..end];
std::str::from_utf8(line_bytes)
.map(|s| s.trim_end_matches(['\r', '\n']).to_owned())
.ok()
}
ReaderState::Ready { reader, .. } => reader.get_line(idx).map(|s| s.to_owned()),
ReaderState::Error(_) => None,
}
}
/// Transition to Ready state with a fully-indexed FileReader.
pub fn set_ready(
&mut self,
reader: FileReader,
visual_height_index: Option<VisualHeightIndex>,
) {
self.state = ReaderState::Ready {
reader,
visual_height_index,
};
}
/// Return the estimated or exact line count.
pub fn line_count(&self) -> usize {
match &self.state {
ReaderState::Sampling {
estimated_lines, ..
} => *estimated_lines as usize,
ReaderState::Ready { reader, .. } => reader.line_count(),
ReaderState::Error(_) => 0,
}
}
/// Return the number of lines available from the scan (Sampling state),
/// or the exact count (Ready state).
pub fn sampled_line_count(&self) -> usize {
match &self.state {
ReaderState::Sampling {
scanned_newlines, ..
} => {
let newlines = scanned_newlines.borrow();
// Number of newlines found = number of complete lines in scanned region.
// If we hit EOF without a trailing newline, there's one more line.
// But we return newlines.len() as specified — it matches "lines in sampled region"
// since each newline demarcates one line.
newlines.len()
}
ReaderState::Ready { reader, .. } => reader.line_count(),
ReaderState::Error(_) => 0,
}
}
/// Whether the reader is in Sampling state.
pub fn is_sampling(&self) -> bool {
matches!(self.state, ReaderState::Sampling { .. })
}
/// Whether a full prefix sum index is available (only in Ready state).
pub fn has_prefix_sum(&self) -> bool {
matches!(self.state, ReaderState::Ready { .. })
}
/// The file path.
pub fn path(&self) -> &Path {
&self.path
}
/// The generation counter (tracks file-open count).
pub fn generation(&self) -> u64 {
self.generation
}
/// Non-blocking poll for indexer messages.
///
/// Discards messages whose generation doesn't match `self.generation`.
/// Returns the first matching message, or `None` if no message is available.
pub fn poll_indexer(&mut self) -> Option<IndexerMessage> {
let rx = self.indexer_rx.as_ref()?;
loop {
match rx.try_recv() {
Ok(msg) => {
if msg.generation() == self.generation {
return Some(msg);
}
// Stale message, discard and keep polling
}
Err(crossbeam_channel::TryRecvError::Empty) => return None,
Err(crossbeam_channel::TryRecvError::Disconnected) => return None,
}
}
}
/// Get a reference to the internal FileReader (only in Ready state).
pub fn reader(&self) -> Option<&FileReader> {
match &self.state {
ReaderState::Ready { reader, .. } => Some(reader),
_ => None,
}
}
pub fn update_for_append(&mut self) -> Result<AppendStatus> {
match &mut self.state {
ReaderState::Ready { reader, .. } => reader.update_for_append(),
_ => Ok(AppendStatus::Unchanged),
}
}
pub fn invalidate_visual_height_index(&mut self) {
if let ReaderState::Ready {
visual_height_index,
..
} = &mut self.state
{
*visual_height_index = None;
}
}
pub fn start_visual_height_rebuild(&mut self, terminal_width: usize, json_format: bool) {
if let Some(tx) = self.vh_rebuild_cancel_tx.take() {
let _ = tx.send(());
}
let (cancel_tx, cancel_rx) = crossbeam_channel::bounded(1);
self.vh_generation += 1;
let vh_gen = self.vh_generation;
let rx = spawn_visual_height_rebuild(
self.path.clone(),
vh_gen,
terminal_width,
json_format,
cancel_rx,
);
self.vh_rebuild_cancel_tx = Some(cancel_tx);
self.vh_rebuild_rx = Some(rx);
}
pub fn poll_visual_height_rebuild(&mut self) -> Option<VisualHeightIndex> {
let rx = self.vh_rebuild_rx.as_ref()?;
match rx.try_recv() {
Ok(result) if result.generation == self.vh_generation => {
self.vh_rebuild_cancel_tx = None;
self.vh_rebuild_rx = None;
Some(result.index)
}
Ok(_) => None,
Err(crossbeam_channel::TryRecvError::Empty) => None,
Err(crossbeam_channel::TryRecvError::Disconnected) => {
self.vh_rebuild_cancel_tx = None;
self.vh_rebuild_rx = None;
None
}
}
}
pub fn reload(&mut self) -> Result<()> {
match &mut self.state {
ReaderState::Ready { reader, .. } => reader.reload(),
_ => Ok(()),
}
}
pub fn save_cache(&self) -> std::io::Result<()> {
match &self.state {
ReaderState::Ready { reader, .. } => reader.save_cache(),
_ => Ok(()),
}
}
}
impl Drop for ProgressiveFileReader {
fn drop(&mut self) {
if let Some(tx) = &self.cancel_tx {
let _ = tx.send(());
}
if let Some(tx) = self.vh_rebuild_cancel_tx.take() {
let _ = tx.send(());
}
}
}
// ─── Tests ───────────────────────────────────────────────────────────────────
#[cfg(test)]
mod tests {
use super::*;
use crate::io::line_index::LineIndex;
use std::io::Write;
use tempfile::NamedTempFile;
fn create_temp_file(content: &[u8]) -> NamedTempFile {
let mut f = NamedTempFile::new().unwrap();
f.write_all(content).unwrap();
f.flush().unwrap();
f
}
#[test]
fn test_progressive_cache_hit() {
let f = create_temp_file(b"line1\nline2\nline3\n");
// Build and save a cache
let data = std::fs::read(f.path()).unwrap();
let index = LineIndex::from_bytes(&data);
IndexCache::save(f.path(), &index).unwrap();
let reader = ProgressiveFileReader::open(f.path()).unwrap();
assert!(!reader.is_sampling(), "cache hit should return Ready state");
assert_eq!(reader.line_count(), 3);
}
#[test]
fn test_progressive_cache_miss() {
let f = create_temp_file(b"alpha\nbeta\ngamma\n");
// No cache saved — should return Sampling
let reader = ProgressiveFileReader::open(f.path()).unwrap();
assert!(
reader.is_sampling(),
"cache miss should return Sampling state"
);
}
#[test]
fn test_progressive_transition_sampling_to_ready() {
let f = create_temp_file(b"aaa\nbbb\nccc\n");
let mut pfr = ProgressiveFileReader::open(f.path()).unwrap();
assert!(pfr.is_sampling());
// Build a FileReader for the transition
let fr = FileReader::open(f.path()).unwrap();
pfr.set_ready(fr, None);
assert!(!pfr.is_sampling());
assert!(pfr.has_prefix_sum());
assert_eq!(pfr.line_count(), 3);
}
#[test]
fn test_progressive_empty_file() {
let f = create_temp_file(b"");
let reader = ProgressiveFileReader::open(f.path()).unwrap();
// Empty file should not panic, either Sampling (mmap=None) or Ready
assert_eq!(reader.line_count(), 0);
assert_eq!(reader.get_line(0), None);
}
#[test]
fn test_progressive_get_line_sampling() {
let content = b"first line\nsecond line\nthird line\nfourth line\n";
let f = create_temp_file(content);
let reader = ProgressiveFileReader::open(f.path()).unwrap();
assert!(reader.is_sampling());
// Get first few lines (within initial 64KB scan)
assert_eq!(reader.get_line(0), Some("first line".to_owned()));
assert_eq!(reader.get_line(1), Some("second line".to_owned()));
assert_eq!(reader.get_line(2), Some("third line".to_owned()));
assert_eq!(reader.get_line(3), Some("fourth line".to_owned()));
assert_eq!(reader.get_line(4), None);
}
#[test]
fn test_progressive_get_line_ready() {
let f = create_temp_file(b"aaa\nbbb\nccc\n");
// Save cache so we get Ready state
let data = std::fs::read(f.path()).unwrap();
let index = LineIndex::from_bytes(&data);
IndexCache::save(f.path(), &index).unwrap();
let reader = ProgressiveFileReader::open(f.path()).unwrap();
assert!(!reader.is_sampling());
assert_eq!(reader.get_line(0), Some("aaa".to_owned()));
assert_eq!(reader.get_line(1), Some("bbb".to_owned()));
assert_eq!(reader.get_line(2), Some("ccc".to_owned()));
assert_eq!(reader.get_line(3), None);
}
#[test]
fn test_progressive_from_parts() {
let f = create_temp_file(b"hello\nworld\n");
let data = std::fs::read(f.path()).unwrap();
let index = LineIndex::from_bytes(&data);
let file = std::fs::File::open(f.path()).unwrap();
let mmap = Some(unsafe { memmap2::Mmap::map(&file).unwrap() });
let reader = FileReader::from_parts(f.path().to_path_buf(), mmap, index);
assert_eq!(reader.line_count(), 2);
assert_eq!(reader.get_line(0), Some("hello"));
assert_eq!(reader.get_line(1), Some("world"));
}
#[test]
fn test_progressive_drop_sends_cancel() {
let (tx, rx) = crossbeam_channel::bounded(1);
let f = create_temp_file(b"test\n");
let mut reader = ProgressiveFileReader::open(f.path()).unwrap();
reader.cancel_tx = Some(tx);
reader.indexer_rx = None;
drop(reader);
// The cancel signal should have been sent
assert!(rx.try_recv().is_ok());
}
#[test]
fn test_progressive_sampled_line_count() {
let content = b"alpha\nbeta\ngamma\ndelta\nepsilon\n";
let f = create_temp_file(content);
let reader = ProgressiveFileReader::open(f.path()).unwrap();
assert!(reader.is_sampling());
// sampled_line_count should return the number of newlines found in initial scan
let count = reader.sampled_line_count();
assert_eq!(count, 5, "should find all 5 newlines in small file");
}
#[test]
fn test_progressive_poll_indexer_discards_stale() {
let (tx, rx) = crossbeam_channel::bounded(10);
let f = create_temp_file(b"data\n");
let mut reader = ProgressiveFileReader::open(f.path()).unwrap();
reader.indexer_rx = Some(rx);
reader.generation = 2; // Our generation is 2
// Send a stale message (generation 1)
tx.send(IndexerMessage::Progress {
generation: 1,
percent: 50.0,
lines_scanned: 100,
})
.unwrap();
// Send a current message (generation 2)
tx.send(IndexerMessage::Progress {
generation: 2,
percent: 75.0,
lines_scanned: 200,
})
.unwrap();
// Should get the current message, discarding the stale one
let msg = reader.poll_indexer().unwrap();
assert_eq!(msg.generation(), 2);
// No more current messages
assert!(reader.poll_indexer().is_none());
}
#[test]
fn test_progressive_get_line_sampling_no_trailing_newline() {
let content = b"aaa\nbbb\nccc"; // No trailing newline
let f = create_temp_file(content);
let reader = ProgressiveFileReader::open(f.path()).unwrap();
assert!(reader.is_sampling());
assert_eq!(reader.get_line(0), Some("aaa".to_owned()));
assert_eq!(reader.get_line(1), Some("bbb".to_owned()));
assert_eq!(reader.get_line(2), Some("ccc".to_owned()));
assert_eq!(reader.get_line(3), None);
}
#[test]
fn test_progressive_get_line_beyond_initial_scan() {
// Create content larger than 64KB to test incremental scan
let mut content = Vec::new();
for i in 0..5000 {
writeln!(content, "line number {:05}", i).unwrap();
}
// Make it > 64KB
assert!(
content.len() > INITIAL_SCAN_BYTES,
"test data should exceed initial scan size"
);
let f = create_temp_file(&content);
let reader = ProgressiveFileReader::open(f.path()).unwrap();
assert!(reader.is_sampling());
// Initial sampled lines should be less than 5000
let initial = reader.sampled_line_count();
assert!(initial < 5000, "initial scan should not find all lines");
// Request line beyond initial scan — should extend incrementally
let line_4000 = reader.get_line(4000);
assert!(
line_4000.is_some(),
"should find line 4000 after scan extension"
);
assert_eq!(line_4000.unwrap(), "line number 04000");
// Line 0 should still work
assert_eq!(reader.get_line(0), Some("line number 00000".to_owned()));
}
#[test]
fn test_progressive_path() {
let f = create_temp_file(b"data\n");
let reader = ProgressiveFileReader::open(f.path()).unwrap();
assert_eq!(reader.path(), f.path());
}
#[test]
fn test_progressive_reader_accessor() {
let f = create_temp_file(b"x\ny\nz\n");
let data = std::fs::read(f.path()).unwrap();
let index = LineIndex::from_bytes(&data);
IndexCache::save(f.path(), &index).unwrap();
let reader = ProgressiveFileReader::open(f.path()).unwrap();
let fr = reader.reader().unwrap();
assert_eq!(fr.line_count(), 3);
}
#[test]
fn test_spawn_indexer_complete() {
let f = create_temp_file(b"line1\nline2\nline3\n");
let (_cancel_tx, cancel_rx) = crossbeam_channel::bounded(1);
let rx = spawn_indexer(f.path().to_path_buf(), 1, 80, false, cancel_rx);
let msg = rx.recv_timeout(std::time::Duration::from_secs(5)).unwrap();
match msg {
IndexerMessage::Complete {
generation,
reader,
visual_height_index,
} => {
assert_eq!(generation, 1);
assert_eq!(reader.line_count(), 3);
assert_eq!(reader.get_line(0), Some("line1"));
assert_eq!(reader.get_line(1), Some("line2"));
assert_eq!(reader.get_line(2), Some("line3"));
assert!(visual_height_index.is_some());
let idx = visual_height_index.unwrap();
assert_eq!(idx.total_visual_rows(), 3);
assert_eq!(idx.visual_row_to_logical_row(0), 0);
assert_eq!(idx.visual_row_to_logical_row(1), 1);
assert_eq!(idx.visual_row_to_logical_row(2), 2);
}
other => panic!("expected Complete, got {:?}", other),
}
}
#[test]
fn test_spawn_indexer_cancel() {
let mut content = Vec::new();
for i in 0..500_000 {
writeln!(content, "line number {:08}", i).unwrap();
}
let f = create_temp_file(&content);
let (cancel_tx, cancel_rx) = crossbeam_channel::bounded(1);
let rx = spawn_indexer(f.path().to_path_buf(), 1, 80, false, cancel_rx);
cancel_tx.send(()).unwrap();
let result = rx.recv_timeout(std::time::Duration::from_secs(5));
match result {
Err(crossbeam_channel::RecvTimeoutError::Timeout)
| Err(crossbeam_channel::RecvTimeoutError::Disconnected) => {}
Ok(IndexerMessage::Complete { .. }) => {
// Thread finished before cancel was processed — acceptable
}
other => panic!("unexpected message: {:?}", other),
}
}
#[test]
fn test_spawn_indexer_progress() {
let mut content = Vec::new();
for i in 0..600_000 {
writeln!(content, "line number {:08}", i).unwrap();
}
let f = create_temp_file(&content);
let (_cancel_tx, cancel_rx) = crossbeam_channel::bounded(1);
let rx = spawn_indexer(f.path().to_path_buf(), 1, 80, false, cancel_rx);
let mut got_progress = false;
let mut got_complete = false;
loop {
match rx.recv_timeout(std::time::Duration::from_secs(30)) {
Ok(IndexerMessage::Progress { generation, .. }) => {
assert_eq!(generation, 1);
got_progress = true;
}
Ok(IndexerMessage::Complete {
generation, reader, ..
}) => {
assert_eq!(generation, 1);
assert_eq!(reader.line_count(), 600_000);
got_complete = true;
break;
}
Ok(IndexerMessage::Error { message, .. }) => {
panic!("unexpected error: {}", message);
}
Err(e) => panic!("recv error: {:?}", e),
}
}
assert!(got_complete, "should receive Complete");
// Progress may or may not be sent depending on timing, so we don't assert got_progress
let _ = got_progress;
}
#[test]
fn test_spawn_indexer_cache_saved() {
let f = create_temp_file(b"cached line1\ncached line2\n");
let (_cancel_tx, cancel_rx) = crossbeam_channel::bounded(1);
let rx = spawn_indexer(f.path().to_path_buf(), 1, 80, false, cancel_rx);
let msg = rx.recv_timeout(std::time::Duration::from_secs(5)).unwrap();
match &msg {
IndexerMessage::Complete { .. } => {}
other => panic!("expected Complete, got {:?}", other),
}
let cached = IndexCache::load(f.path());
assert!(
cached.is_some(),
"cache should be saved after indexer completes"
);
let cached_index = cached.unwrap();
assert_eq!(cached_index.line_count(), 2);
}
#[test]
fn test_spawn_indexer_error() {
let bad_path = PathBuf::from("/nonexistent/file/that/does/not/exist.log");
let (_cancel_tx, cancel_rx) = crossbeam_channel::bounded(1);
let rx = spawn_indexer(bad_path.clone(), 1, 80, false, cancel_rx);
let msg = rx.recv_timeout(std::time::Duration::from_secs(5)).unwrap();
match msg {
IndexerMessage::Error {
generation,
message,
} => {
assert_eq!(generation, 1);
assert!(!message.is_empty());
}
other => panic!("expected Error, got {:?}", other),
}
}
#[test]
fn test_spawn_indexer_empty_file() {
let f = create_temp_file(b"");
let (_cancel_tx, cancel_rx) = crossbeam_channel::bounded(1);
let rx = spawn_indexer(f.path().to_path_buf(), 1, 80, false, cancel_rx);
let msg = rx.recv_timeout(std::time::Duration::from_secs(5)).unwrap();
match msg {
IndexerMessage::Complete {
generation, reader, ..
} => {
assert_eq!(generation, 1);
assert_eq!(reader.line_count(), 0);
}
other => panic!("expected Complete, got {:?}", other),
}
}
#[test]
fn test_visual_height_index_binary_search() {
// heights: [3, 1, 4, 1, 5]
// prefix: [0, 3, 4, 8, 9, 14]
let heights = [3, 1, 4, 1, 5];
let idx = VisualHeightIndex::build(&heights);
assert_eq!(idx.total_visual_rows(), 14);
assert_eq!(idx.line_count(), 5);
assert_eq!(idx.visual_row_to_logical_row(0), 0);
assert_eq!(idx.visual_row_to_logical_row(1), 0);
assert_eq!(idx.visual_row_to_logical_row(2), 0);
assert_eq!(idx.visual_row_to_logical_row(3), 1);
assert_eq!(idx.visual_row_to_logical_row(4), 2);
assert_eq!(idx.visual_row_to_logical_row(7), 2);
assert_eq!(idx.visual_row_to_logical_row(8), 3);
assert_eq!(idx.visual_row_to_logical_row(9), 4);
assert_eq!(idx.visual_row_to_logical_row(13), 4);
assert_eq!(idx.visual_row_to_logical_row(14), 4);
assert_eq!(idx.visual_row_to_logical_row(999), 4);
}
#[test]
fn test_visual_height_index_with_offset() {
let heights = [3, 1, 4];
let idx = VisualHeightIndex::build(&heights);
assert_eq!(idx.visual_row_to_logical_row_with_offset(0), (0, 0));
assert_eq!(idx.visual_row_to_logical_row_with_offset(1), (0, 1));
assert_eq!(idx.visual_row_to_logical_row_with_offset(2), (0, 2));
assert_eq!(idx.visual_row_to_logical_row_with_offset(3), (1, 0));
assert_eq!(idx.visual_row_to_logical_row_with_offset(4), (2, 0));
assert_eq!(idx.visual_row_to_logical_row_with_offset(5), (2, 1));
assert_eq!(idx.visual_row_to_logical_row_with_offset(7), (2, 3));
}
#[test]
fn test_visual_height_index_cursor_to_first() {
let heights = [2, 3, 1];
let idx = VisualHeightIndex::build(&heights);
assert_eq!(idx.cursor_to_first_visual_row(0), 0);
assert_eq!(idx.cursor_to_first_visual_row(1), 2);
assert_eq!(idx.cursor_to_first_visual_row(2), 5);
assert_eq!(idx.cursor_to_first_visual_row(99), 0);
}
#[test]
fn test_visual_height_index_empty() {
let heights: [usize; 0] = [];
let idx = VisualHeightIndex::build(&heights);
assert_eq!(idx.total_visual_rows(), 0);
assert_eq!(idx.line_count(), 0);
assert_eq!(idx.visual_row_to_logical_row(0), 0);
assert_eq!(idx.cursor_to_first_visual_row(0), 0);
}
#[test]
fn test_max_wrap_input_len_guard() {
let long_line = "x".repeat(MAX_WRAP_INPUT_LEN + 1);
let temp = create_temp_file(format!("short\n{}\n", long_line).as_bytes());
let (_cancel_tx, cancel_rx) = crossbeam_channel::bounded(1);
let rx = spawn_indexer(temp.path().to_path_buf(), 1, 80, false, cancel_rx);
loop {
match rx.recv_timeout(std::time::Duration::from_secs(10)).unwrap() {
IndexerMessage::Progress { .. } => continue,
IndexerMessage::Complete {
visual_height_index,
..
} => {
let idx = visual_height_index.expect("should have visual height index");
assert_eq!(idx.visual_height_of_line(0), 1);
assert_eq!(idx.visual_height_of_line(1), 1);
break;
}
other => panic!("expected Complete, got {:?}", other),
}
}
}
#[test]
fn test_visual_height_index_is_valid_for() {
let heights = [1, 2, 3];
let idx = VisualHeightIndex::build(&heights).with_params(true, 80);
assert!(idx.is_valid_for(true, 80));
assert!(!idx.is_valid_for(false, 80));
assert!(!idx.is_valid_for(true, 40));
}
#[test]
fn test_visual_height_index_visual_height_of_line() {
let heights = [3, 1, 5, 2];
let idx = VisualHeightIndex::build(&heights);
assert_eq!(idx.visual_height_of_line(0), 3);
assert_eq!(idx.visual_height_of_line(1), 1);
assert_eq!(idx.visual_height_of_line(2), 5);
assert_eq!(idx.visual_height_of_line(3), 2);
assert_eq!(idx.visual_height_of_line(4), 0);
}
#[test]
fn test_visual_height_index_extend_from_heights() {
let heights = [2, 3];
let mut idx = VisualHeightIndex::build(&heights);
assert_eq!(idx.total_visual_rows(), 5);
assert_eq!(idx.line_count(), 2);
idx.extend_from_heights(&[4, 1]);
assert_eq!(idx.line_count(), 4);
assert_eq!(idx.total_visual_rows(), 10);
assert_eq!(idx.cursor_to_first_visual_row(0), 0);
assert_eq!(idx.cursor_to_first_visual_row(1), 2);
assert_eq!(idx.cursor_to_first_visual_row(2), 5);
assert_eq!(idx.cursor_to_first_visual_row(3), 9);
assert_eq!(idx.visual_row_to_logical_row(4), 1);
assert_eq!(idx.visual_row_to_logical_row(6), 2);
}
#[test]
fn test_visual_height_index_extend_empty() {
let heights: [usize; 0] = [];
let mut idx = VisualHeightIndex::build(&heights);
assert_eq!(idx.total_visual_rows(), 0);
idx.extend_from_heights(&[1, 2, 3]);
assert_eq!(idx.total_visual_rows(), 6);
}
#[test]
fn test_replace_last_line_height_increase() {
let heights = [2, 3];
let mut idx = VisualHeightIndex::build(&heights);
assert_eq!(idx.total_visual_rows(), 5);
assert_eq!(idx.visual_height_of_line(1), 3);
idx.replace_last_line_height(7);
assert_eq!(idx.visual_height_of_line(0), 2);
assert_eq!(idx.visual_height_of_line(1), 7);
assert_eq!(idx.total_visual_rows(), 9);
assert_eq!(idx.cursor_to_first_visual_row(0), 0);
assert_eq!(idx.cursor_to_first_visual_row(1), 2);
}
#[test]
fn test_replace_last_line_height_decrease() {
let heights = [2, 5];
let mut idx = VisualHeightIndex::build(&heights);
idx.replace_last_line_height(1);
assert_eq!(idx.visual_height_of_line(0), 2);
assert_eq!(idx.visual_height_of_line(1), 1);
assert_eq!(idx.total_visual_rows(), 3);
}
#[test]
fn test_replace_last_line_height_same_is_noop() {
let heights = [2, 3];
let mut idx = VisualHeightIndex::build(&heights);
let total_before = idx.total_visual_rows();
idx.replace_last_line_height(3);
assert_eq!(idx.total_visual_rows(), total_before);
}
#[test]
fn test_replace_last_line_height_then_extend() {
let heights = [2, 1];
let mut idx = VisualHeightIndex::build(&heights);
assert_eq!(idx.total_visual_rows(), 3);
idx.replace_last_line_height(4);
idx.extend_from_heights(&[3]);
assert_eq!(idx.line_count(), 3);
assert_eq!(idx.visual_height_of_line(0), 2);
assert_eq!(idx.visual_height_of_line(1), 4);
assert_eq!(idx.visual_height_of_line(2), 3);
assert_eq!(idx.total_visual_rows(), 9);
assert_eq!(idx.cursor_to_first_visual_row(0), 0);
assert_eq!(idx.cursor_to_first_visual_row(1), 2);
assert_eq!(idx.cursor_to_first_visual_row(2), 6);
}
#[test]
fn test_replace_last_line_height_single_line() {
let heights = [5];
let mut idx = VisualHeightIndex::build(&heights);
idx.replace_last_line_height(2);
assert_eq!(idx.visual_height_of_line(0), 2);
assert_eq!(idx.total_visual_rows(), 2);
}
#[test]
fn test_replace_last_line_height_empty_index() {
let heights: [usize; 0] = [];
let mut idx = VisualHeightIndex::build(&heights);
idx.replace_last_line_height(5);
assert_eq!(idx.total_visual_rows(), 0);
}
#[test]
fn test_spawn_indexer_file_truncated_during_scan() {
let mut content = Vec::new();
for i in 0..100_000 {
writeln!(content, "line number {:08}", i).unwrap();
}
let f = create_temp_file(&content);
let (_cancel_tx, cancel_rx) = crossbeam_channel::bounded(1);
let rx = spawn_indexer(f.path().to_path_buf(), 1, 80, false, cancel_rx);
{
use std::io::Write;
let _ = std::fs::OpenOptions::new()
.write(true)
.truncate(true)
.open(f.path())
.unwrap();
}
let result = rx.recv_timeout(std::time::Duration::from_secs(10));
match result {
Ok(IndexerMessage::Complete { reader, .. }) => {
assert!(reader.line_count() <= 100_000);
}
Ok(IndexerMessage::Error { .. }) => {}
Err(crossbeam_channel::RecvTimeoutError::Disconnected) => {}
Err(crossbeam_channel::RecvTimeoutError::Timeout) => {}
other => panic!("unexpected: {:?}", other),
}
}
#[test]
fn test_spawn_visual_height_rebuild_line_count_mismatch_discards() {
let content = b"line0\nline1\nline2\n";
let f = create_temp_file(content);
let data = std::fs::read(f.path()).unwrap();
let index = LineIndex::from_bytes(&data);
IndexCache::save(f.path(), &index).unwrap();
{
use std::io::Write;
let mut file = std::fs::OpenOptions::new()
.write(true)
.truncate(true)
.open(f.path())
.unwrap();
file.write_all(b"only_one_line\n").unwrap();
}
let (_cancel_tx, cancel_rx) = crossbeam_channel::bounded(1);
let rx = spawn_visual_height_rebuild(
f.path().to_path_buf(),
1,
80,
false,
cancel_rx,
);
let result = rx.recv_timeout(std::time::Duration::from_secs(5));
match result {
Err(crossbeam_channel::RecvTimeoutError::Disconnected) => {}
Err(crossbeam_channel::RecvTimeoutError::Timeout) => {}
Ok(_) => panic!("should have been discarded due to line count mismatch"),
}
}
}