Files
logViewer/crates/bench/src/line_index.rs
dailz dad5f5a635 fix(bench): eliminate SIGBUS handler static mut UB with Once + raw atomics (closes #33)
Replace `static mut OLD_SIGBUS_HANDLER` with AtomicU8 + AtomicPtr to
remove data race UB when concurrent benchmarks call open() from multiple
threads.

Key changes:
- Use `Once::call_once` to guarantee single handler installation
- Publish old handler to atomics BEFORE installing new handler (closes
  the handler-active-but-state-unpublished race window)
- Read atomics with Acquire in signal handler (async-signal-safe)
- Align si_addr to page boundary before mmap(MAP_FIXED)
- Add concurrent test: 8 threads open all 5 variants simultaneously
2026-06-05 13:22:02 +08:00

111 lines
3.9 KiB
Rust

// ─── line_index.rs ───────────────────────────────────────────────────────────
// Vendored from crates/core/src/io/line_index.rs
// Sparse line index: sample every 256 lines to reduce memory usage.
// ──────────────────────────────────────────────────────────────────────────────
const BLOCK_SIZE: usize = 256;
pub struct LineIndex {
pub(crate) sampled_offsets: Vec<u64>,
pub(crate) total_lines: u64,
#[allow(dead_code)]
pub(crate) has_trailing_newline: bool,
}
impl LineIndex {
/// Build sparse line index from a streaming reader.
/// Uses fill_buf()/consume() to avoid loading the entire file into memory.
/// RSS stays at ~64KB (BufReader buffer size), independent of file size.
pub fn from_reader(reader: &mut impl std::io::BufRead) -> std::io::Result<Self> {
let mut sampled_offsets: Vec<u64> = vec![0]; // line 0 starts at offset 0
let mut next_line_idx: usize = 1;
let mut newline_count: usize = 0;
let mut chunk_offset: u64 = 0;
let mut last_byte: Option<u8> = None;
loop {
let buf = reader.fill_buf()?;
if buf.is_empty() {
break;
}
if let Some(&b) = buf.last() {
last_byte = Some(b);
}
for pos in memchr::memchr_iter(b'\n', buf) {
newline_count += 1;
if next_line_idx.is_multiple_of(BLOCK_SIZE) {
sampled_offsets.push(chunk_offset + pos as u64 + 1);
}
next_line_idx += 1;
}
let consumed = buf.len();
chunk_offset += consumed as u64;
reader.consume(consumed);
}
// Empty file: no data at all
if chunk_offset == 0 {
return Ok(LineIndex {
sampled_offsets: vec![],
total_lines: 0,
has_trailing_newline: false,
});
}
let has_trailing_newline = last_byte == Some(b'\n') && newline_count > 0;
let total_lines: u64 = if has_trailing_newline && newline_count > 0 {
newline_count as u64
} else {
(1 + newline_count) as u64
};
// Trailing \n pop logic
if has_trailing_newline && newline_count > 0 {
let trailing_line_idx = newline_count;
if trailing_line_idx.is_multiple_of(BLOCK_SIZE) {
sampled_offsets.pop();
}
}
Ok(LineIndex {
sampled_offsets,
total_lines,
has_trailing_newline,
})
}
/// Return total line count.
pub fn line_count(&self) -> usize {
self.total_lines as usize
}
/// Retrieve the content of line `idx` from the given data slice.
/// Uses sparse index to locate the block start, then scans forward
/// a small number of newlines to find the target line.
pub fn get_line<'a>(&self, data: &'a [u8], idx: usize) -> Option<&'a str> {
if idx >= self.total_lines as usize || data.is_empty() {
return None;
}
let block = idx / BLOCK_SIZE;
let offset_in_block = idx % BLOCK_SIZE;
let mut pos = self.sampled_offsets[block] as usize;
for _ in 0..offset_in_block {
match memchr::memchr(b'\n', &data[pos..]) {
Some(rel) => pos = pos + rel + 1,
None => return None,
}
}
let end = memchr::memchr(b'\n', &data[pos..])
.map(|rel| pos + rel)
.unwrap_or(data.len());
let line_bytes = &data[pos..end];
std::str::from_utf8(line_bytes)
.map(|s| s.trim_end_matches(['\r', '\n']))
.ok()
}
}