logViewer/crates/bench/src/line_index.rs

// ─── line_index.rs ───────────────────────────────────────────────────────────
// Vendored from crates/core/src/io/line_index.rs
// Sparse line index: sample every 256 lines to reduce memory usage.
// ──────────────────────────────────────────────────────────────────────────────

const BLOCK_SIZE: usize = 256;

pub struct LineIndex {
    pub(crate) sampled_offsets: Vec<u64>,
    pub(crate) total_lines: u64,
    #[allow(dead_code)]
    pub(crate) has_trailing_newline: bool,
}

impl LineIndex {
    /// Build sparse line index from a streaming reader.
    /// Uses fill_buf()/consume() to avoid loading the entire file into memory.
    /// RSS stays at ~64KB (BufReader buffer size), independent of file size.
    pub fn from_reader(reader: &mut impl std::io::BufRead) -> std::io::Result<Self> {
        let mut sampled_offsets: Vec<u64> = vec![0]; // line 0 starts at offset 0
        let mut next_line_idx: usize = 1;
        let mut newline_count: usize = 0;
        let mut chunk_offset: u64 = 0;
        let mut last_byte: Option<u8> = None;

        loop {
            let buf = reader.fill_buf()?;
            if buf.is_empty() {
                break;
            }

            if let Some(&b) = buf.last() {
                last_byte = Some(b);
            }

            for pos in memchr::memchr_iter(b'\n', buf) {
                newline_count += 1;
                if next_line_idx.is_multiple_of(BLOCK_SIZE) {
                    sampled_offsets.push(chunk_offset + pos as u64 + 1);
                }
                next_line_idx += 1;
            }

            let consumed = buf.len();
            chunk_offset += consumed as u64;
            reader.consume(consumed);
        }

        // Empty file: no data at all
        if chunk_offset == 0 {
            return Ok(LineIndex {
                sampled_offsets: vec![],
                total_lines: 0,
                has_trailing_newline: false,
            });
        }

        let has_trailing_newline = last_byte == Some(b'\n') && newline_count > 0;

        let total_lines: u64 = if has_trailing_newline && newline_count > 0 {
            newline_count as u64
        } else {
            (1 + newline_count) as u64
        };

        // Trailing \n pop logic
        if has_trailing_newline && newline_count > 0 {
            let trailing_line_idx = newline_count;
            if trailing_line_idx.is_multiple_of(BLOCK_SIZE) {
                sampled_offsets.pop();
            }
        }

        Ok(LineIndex {
            sampled_offsets,
            total_lines,
            has_trailing_newline,
        })
    }

    /// Return total line count.
    pub fn line_count(&self) -> usize {
        self.total_lines as usize
    }

    /// Retrieve the content of line `idx` from the given data slice.
    /// Uses sparse index to locate the block start, then scans forward
    /// a small number of newlines to find the target line.
    pub fn get_line<'a>(&self, data: &'a [u8], idx: usize) -> Option<&'a str> {
        if idx >= self.total_lines as usize || data.is_empty() {
            return None;
        }
        let block = idx / BLOCK_SIZE;
        let offset_in_block = idx % BLOCK_SIZE;
        let mut pos = self.sampled_offsets[block] as usize;
        for _ in 0..offset_in_block {
            match memchr::memchr(b'\n', &data[pos..]) {
                Some(rel) => pos = pos + rel + 1,
                None => return None,
            }
        }
        let end = memchr::memchr(b'\n', &data[pos..])
            .map(|rel| pos + rel)
            .unwrap_or(data.len());
        let line_bytes = &data[pos..end];
        std::str::from_utf8(line_bytes)
            .map(|s| s.trim_end_matches(['\r', '\n']))
            .ok()
    }
}