Compare commits
26 Commits
fix/m23-si
...
master
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
10323ce814 | ||
|
|
c1a931551b | ||
|
|
dfc016c348 | ||
|
|
19a3b877f9 | ||
|
|
5cb56dafd8 | ||
|
|
e99861c76d | ||
|
|
a43ef673b0 | ||
|
|
70f930eef7 | ||
|
|
463c53148b | ||
|
|
e9f75ce3b1 | ||
|
|
ef1889767a | ||
|
|
eedab3ac96 | ||
|
|
8e9600dda2 | ||
|
|
2cebbd94c4 | ||
|
|
0d88e933e6 | ||
|
|
420b853cb9 | ||
|
|
7852e92ecc | ||
|
|
d37ed6df68 | ||
|
|
b58d66f2aa | ||
|
|
d4679a7543 | ||
|
|
8844e58cb4 | ||
|
|
6a2f8ecb66 | ||
|
|
f6081b9fe9 | ||
|
|
97a2c6a925 | ||
|
|
e6e0e2cc90 | ||
|
|
ffaf462bae |
2
Cargo.lock
generated
2
Cargo.lock
generated
@@ -2333,6 +2333,7 @@ dependencies = [
|
||||
"tempfile",
|
||||
"thiserror 2.0.18",
|
||||
"toml",
|
||||
"unicode-width",
|
||||
"xxhash-rust",
|
||||
]
|
||||
|
||||
@@ -2357,6 +2358,7 @@ dependencies = [
|
||||
"ratatui",
|
||||
"serde_json",
|
||||
"tempfile",
|
||||
"unicode-width",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
||||
@@ -27,3 +27,4 @@ textwrap = "0.16"
|
||||
tempfile = "3"
|
||||
xxhash-rust = { version = "0.8", features = ["xxh3"] }
|
||||
bincode = "1"
|
||||
unicode-width = "0.2"
|
||||
|
||||
@@ -83,11 +83,11 @@ pub fn generate_growable_file(dir: &Path) -> std::io::Result<PathBuf> {
|
||||
|
||||
/// Append `count` lines to the file
|
||||
pub fn append_lines(path: &Path, count: usize) -> std::io::Result<()> {
|
||||
let existing_lines = count_existing_lines(path)?;
|
||||
let mut file = BufWriter::with_capacity(
|
||||
64 * 1024,
|
||||
fs::OpenOptions::new().append(true).open(path)?,
|
||||
);
|
||||
let existing_lines = count_existing_lines(path).unwrap_or(0);
|
||||
for i in 0..count {
|
||||
writeln!(
|
||||
file,
|
||||
@@ -117,7 +117,12 @@ pub fn rotate_file(path: &Path) -> std::io::Result<PathBuf> {
|
||||
fn count_existing_lines(path: &Path) -> std::io::Result<u64> {
|
||||
let file = fs::File::open(path)?;
|
||||
let reader = BufReader::new(file);
|
||||
Ok(reader.lines().count() as u64)
|
||||
let mut count = 0u64;
|
||||
for line in reader.lines() {
|
||||
line?;
|
||||
count += 1;
|
||||
}
|
||||
Ok(count)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
|
||||
@@ -35,7 +35,7 @@ const HANDLER_NONE: u8 = 0;
|
||||
const HANDLER_DEFAULT: u8 = 1;
|
||||
const HANDLER_IGNORE: u8 = 2;
|
||||
const HANDLER_PLAIN: u8 = 3; // extern "C" fn(c_int)
|
||||
#[expect(clippy::unseparated_literal_suffix, reason = "clarity: this is the SA_SIGACTION variant")]
|
||||
#[allow(clippy::unseparated_literal_suffix, reason = "clarity: this is the SA_SIGACTION variant")]
|
||||
const HANDLER_SIGACTION: u8 = 4; // extern "C" fn(c_int, *mut siginfo_t, *mut c_void)
|
||||
|
||||
/// Old SIGBUS handler type — raw atomic, async-signal-safe to read.
|
||||
|
||||
@@ -143,8 +143,7 @@ pub fn format_report(results: &[BenchmarkResult]) -> String {
|
||||
report.push_str("| Test | Variant | RSS | Peak RSS | Page Faults |\n");
|
||||
report.push_str("|------|---------|-----|----------|-------------|\n");
|
||||
|
||||
let mut mem_rows: Vec<&BenchmarkResult> =
|
||||
category_results.iter().copied().collect();
|
||||
let mut mem_rows: Vec<&BenchmarkResult> = category_results.to_vec();
|
||||
mem_rows.sort_by(|a, b| {
|
||||
(&a.test_name, &a.backend, &a.variant)
|
||||
.cmp(&(&b.test_name, &b.backend, &b.variant))
|
||||
@@ -163,7 +162,10 @@ pub fn format_report(results: &[BenchmarkResult]) -> String {
|
||||
report.push('\n');
|
||||
}
|
||||
|
||||
let mut extras: Vec<(String, String, Vec<(String, f64)>)> = category_results
|
||||
type ExtraEntry = (String, f64);
|
||||
type ExtraGroup = (String, String, Vec<ExtraEntry>);
|
||||
|
||||
let mut extras: Vec<ExtraGroup> = category_results
|
||||
.iter()
|
||||
.filter(|r| !r.extra.is_empty())
|
||||
.map(|r| {
|
||||
|
||||
@@ -101,11 +101,15 @@ fn bench_scroll_rss<B: FileReaderBackend>(
|
||||
let sample_interval = 100_000;
|
||||
let max_lines = if config.quick_mode { 100_000 } else { total };
|
||||
|
||||
let upper = max_lines.min(total);
|
||||
let mut rss_samples = Vec::new();
|
||||
let mut hwm_samples = Vec::new();
|
||||
let mut lines_read = 0usize;
|
||||
|
||||
for i in (0..max_lines).step_by(sample_interval) {
|
||||
let _ = reader.get_line(i);
|
||||
for i in (0..upper).step_by(sample_interval) {
|
||||
if reader.get_line(i).is_some() {
|
||||
lines_read += 1;
|
||||
}
|
||||
let rss = MetricsCollector::read_rss();
|
||||
rss_samples.push(rss.vm_rss_kb);
|
||||
hwm_samples.push(rss.vm_hwm_kb);
|
||||
@@ -124,7 +128,7 @@ fn bench_scroll_rss<B: FileReaderBackend>(
|
||||
"max_hwm_kb".into(),
|
||||
hwm_samples.iter().copied().fold(0u64, u64::max) as f64,
|
||||
);
|
||||
extra.insert("lines_read".into(), max_lines.min(total) as f64);
|
||||
extra.insert("lines_read".into(), lines_read as f64);
|
||||
|
||||
reader.close();
|
||||
|
||||
|
||||
@@ -24,13 +24,14 @@ fn bench_truncate_safety_mmap(
|
||||
dir: &std::path::Path,
|
||||
) -> Vec<BenchmarkResult> {
|
||||
let sub_dir = dir.join("trunc_mmap");
|
||||
let path = data_gen::generate_growable_file(&sub_dir).expect("Failed to create file");
|
||||
let iterations: usize = if _config.quick_mode { 3 } else { 10 };
|
||||
|
||||
let mut latencies = Vec::with_capacity(iterations);
|
||||
let mut sigbus_detected = 0usize;
|
||||
|
||||
for _ in 0..iterations {
|
||||
let path = data_gen::generate_growable_file(&sub_dir).expect("Failed to create file");
|
||||
|
||||
mmap_reader::reset_sigbus_flag();
|
||||
|
||||
let reader = MmapReaderPlain::open(&path).expect("Failed to open file");
|
||||
@@ -48,12 +49,6 @@ fn bench_truncate_safety_mmap(
|
||||
sigbus_detected += 1;
|
||||
}
|
||||
reader.close();
|
||||
|
||||
let mut f = std::fs::File::create(&path).expect("Failed to recreate file");
|
||||
use std::io::Write;
|
||||
for i in 0..1000u64 {
|
||||
writeln!(f, "restored line {i}").unwrap();
|
||||
}
|
||||
}
|
||||
|
||||
let rss = MetricsCollector::read_rss();
|
||||
@@ -82,13 +77,14 @@ fn bench_truncate_safety_pread(
|
||||
dir: &std::path::Path,
|
||||
) -> Vec<BenchmarkResult> {
|
||||
let sub_dir = dir.join("trunc_pread");
|
||||
let path = data_gen::generate_growable_file(&sub_dir).expect("Failed to create file");
|
||||
let iterations: usize = if _config.quick_mode { 3 } else { 10 };
|
||||
|
||||
let mut latencies = Vec::with_capacity(iterations);
|
||||
let mut error_count = 0usize;
|
||||
|
||||
for _ in 0..iterations {
|
||||
let path = data_gen::generate_growable_file(&sub_dir).expect("Failed to create file");
|
||||
|
||||
let reader = PreadReaderPlain::open(&path).expect("Failed to open file");
|
||||
let original_size = reader.file_size();
|
||||
|
||||
@@ -104,12 +100,6 @@ fn bench_truncate_safety_pread(
|
||||
error_count += 1;
|
||||
}
|
||||
reader.close();
|
||||
|
||||
let mut f = std::fs::File::create(&path).expect("Failed to recreate file");
|
||||
use std::io::Write;
|
||||
for i in 0..1000u64 {
|
||||
writeln!(f, "restored line {i}").unwrap();
|
||||
}
|
||||
}
|
||||
|
||||
let rss = MetricsCollector::read_rss();
|
||||
|
||||
@@ -17,7 +17,8 @@ memmap2.workspace = true
|
||||
directories.workspace = true
|
||||
xxhash-rust.workspace = true
|
||||
bincode.workspace = true
|
||||
unicode-width.workspace = true
|
||||
tempfile.workspace = true
|
||||
|
||||
[dev-dependencies]
|
||||
insta.workspace = true
|
||||
tempfile.workspace = true
|
||||
|
||||
@@ -6,8 +6,44 @@ use crate::io::line_index::LineIndex;
|
||||
|
||||
pub struct IndexCache;
|
||||
|
||||
/// Write `buf` to `dest` atomically using a unique temporary file in the same directory.
|
||||
///
|
||||
/// Each call creates its own temp file via `tempfile::Builder`, eliminating collisions
|
||||
/// when multiple threads (or processes) save to the same cache path concurrently.
|
||||
/// The temp file is created in `dest.parent()` so the final `rename` stays on the
|
||||
/// same filesystem and remains atomic.
|
||||
fn write_cache_atomically(dest: &Path, buf: &[u8]) -> std::io::Result<()> {
|
||||
let dir = dest.parent().ok_or_else(|| {
|
||||
std::io::Error::new(
|
||||
std::io::ErrorKind::InvalidInput,
|
||||
"cache destination has no parent directory",
|
||||
)
|
||||
})?;
|
||||
|
||||
let mut tmp = tempfile::Builder::new()
|
||||
.prefix("index-cache-")
|
||||
.suffix(".tmp")
|
||||
.tempfile_in(dir)?;
|
||||
|
||||
tmp.as_file_mut().write_all(buf)?;
|
||||
tmp.as_file_mut().sync_all()?;
|
||||
|
||||
tmp.persist(dest).map(|_| ()).map_err(|e| e.error)
|
||||
}
|
||||
|
||||
fn encode_cache(file_hash: u64, index: &LineIndex) -> std::io::Result<Vec<u8>> {
|
||||
let index_bytes = bincode::serialize(index)
|
||||
.map_err(|e| std::io::Error::new(std::io::ErrorKind::InvalidData, e.to_string()))?;
|
||||
|
||||
let mut buf = Vec::with_capacity(1 + 8 + index_bytes.len());
|
||||
buf.push(CACHE_VERSION);
|
||||
buf.extend_from_slice(&file_hash.to_le_bytes());
|
||||
buf.extend_from_slice(&index_bytes);
|
||||
Ok(buf)
|
||||
}
|
||||
|
||||
impl IndexCache {
|
||||
/// Save a `LineIndex` to disk using atomic write (write to .tmp, then rename).
|
||||
/// Save a `LineIndex` to disk using atomic write (unique temp file, then rename).
|
||||
///
|
||||
/// The file hash is derived from `data` (the same byte slice used to build the index),
|
||||
/// avoiding TOCTOU issues from re-reading the file from disk.
|
||||
@@ -21,23 +57,8 @@ impl IndexCache {
|
||||
})?;
|
||||
|
||||
let file_hash = compute_data_hash(data);
|
||||
let index_bytes = bincode::serialize(index)
|
||||
.map_err(|e| std::io::Error::new(std::io::ErrorKind::InvalidData, e.to_string()))?;
|
||||
|
||||
let mut buf = Vec::with_capacity(1 + 8 + index_bytes.len());
|
||||
buf.push(CACHE_VERSION);
|
||||
buf.extend_from_slice(&file_hash.to_le_bytes());
|
||||
buf.extend_from_slice(&index_bytes);
|
||||
|
||||
let tmp_path = dest.with_extension("index.tmp");
|
||||
{
|
||||
let mut f = std::fs::File::create(&tmp_path)?;
|
||||
f.write_all(&buf)?;
|
||||
f.sync_all()?;
|
||||
}
|
||||
std::fs::rename(&tmp_path, &dest)?;
|
||||
|
||||
Ok(())
|
||||
let buf = encode_cache(file_hash, index)?;
|
||||
write_cache_atomically(&dest, &buf)
|
||||
}
|
||||
|
||||
/// Save a `LineIndex` to disk, computing the hash by re-reading the file.
|
||||
@@ -50,23 +71,8 @@ impl IndexCache {
|
||||
})?;
|
||||
|
||||
let file_hash = compute_file_hash(file_path)?;
|
||||
let index_bytes = bincode::serialize(index)
|
||||
.map_err(|e| std::io::Error::new(std::io::ErrorKind::InvalidData, e.to_string()))?;
|
||||
|
||||
let mut buf = Vec::with_capacity(1 + 8 + index_bytes.len());
|
||||
buf.push(CACHE_VERSION);
|
||||
buf.extend_from_slice(&file_hash.to_le_bytes());
|
||||
buf.extend_from_slice(&index_bytes);
|
||||
|
||||
let tmp_path = dest.with_extension("index.tmp");
|
||||
{
|
||||
let mut f = std::fs::File::create(&tmp_path)?;
|
||||
f.write_all(&buf)?;
|
||||
f.sync_all()?;
|
||||
}
|
||||
std::fs::rename(&tmp_path, &dest)?;
|
||||
|
||||
Ok(())
|
||||
let buf = encode_cache(file_hash, index)?;
|
||||
write_cache_atomically(&dest, &buf)
|
||||
}
|
||||
|
||||
/// Load a cached `LineIndex` from disk.
|
||||
@@ -305,4 +311,77 @@ mod tests {
|
||||
|
||||
assert_ne!(h1, h2, "hash should change when content changes");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_concurrent_save_with_hash_no_corruption() {
|
||||
use std::sync::{Arc, Barrier};
|
||||
|
||||
let file = make_test_file(300);
|
||||
let data = std::fs::read(file.path()).unwrap();
|
||||
|
||||
let num_threads = 8;
|
||||
let iterations = 50;
|
||||
let barrier = Arc::new(Barrier::new(num_threads));
|
||||
let path = file.path().to_path_buf();
|
||||
|
||||
let handles: Vec<_> = (0..num_threads)
|
||||
.map(|_| {
|
||||
let barrier = Arc::clone(&barrier);
|
||||
let path = path.clone();
|
||||
let data = data.clone();
|
||||
std::thread::spawn(move || {
|
||||
let index = LineIndex::from_bytes(&data);
|
||||
barrier.wait();
|
||||
for _ in 0..iterations {
|
||||
IndexCache::save_with_hash(&path, &index, &data)
|
||||
.expect("concurrent save_with_hash should succeed");
|
||||
}
|
||||
})
|
||||
})
|
||||
.collect();
|
||||
|
||||
for h in handles {
|
||||
h.join().expect("thread should not panic");
|
||||
}
|
||||
|
||||
let loaded = IndexCache::load(file.path()).expect("final cache should load successfully");
|
||||
let expected = LineIndex::from_bytes(&data);
|
||||
assert_eq!(loaded.line_count(), expected.line_count());
|
||||
assert_eq!(
|
||||
loaded.sampled_offsets(),
|
||||
expected.sampled_offsets(),
|
||||
"concurrent writes must not produce interleaved data"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_concurrent_save_same_dest_all_succeed() {
|
||||
use std::sync::{Arc, Barrier};
|
||||
|
||||
let dir = tempfile::tempdir().unwrap();
|
||||
let dest = dir.path().join("test.index");
|
||||
|
||||
let payloads: Vec<Vec<u8>> = (0..8).map(|i| vec![i; 64 * 1024]).collect();
|
||||
let barrier = Arc::new(Barrier::new(8));
|
||||
|
||||
let handles: Vec<_> = payloads
|
||||
.into_iter()
|
||||
.map(|payload| {
|
||||
let barrier = Arc::clone(&barrier);
|
||||
let dest = dest.clone();
|
||||
std::thread::spawn(move || {
|
||||
barrier.wait();
|
||||
write_cache_atomically(&dest, &payload)
|
||||
.expect("concurrent atomic write should succeed");
|
||||
})
|
||||
})
|
||||
.collect();
|
||||
|
||||
for h in handles {
|
||||
h.join().expect("thread should not panic");
|
||||
}
|
||||
|
||||
let final_data = std::fs::read(&dest).expect("dest file should exist");
|
||||
assert_eq!(final_data.len(), 64 * 1024, "final file must be exactly one payload");
|
||||
}
|
||||
}
|
||||
@@ -18,4 +18,5 @@ pub mod cache_util;
|
||||
pub mod index_cache;
|
||||
pub mod line_sampler;
|
||||
pub mod progressive_reader;
|
||||
pub mod read_cache;
|
||||
pub mod wrap;
|
||||
|
||||
@@ -10,6 +10,24 @@ use crate::io::line_index::LineIndex;
|
||||
use crate::io::line_sampler::sample_line_count;
|
||||
use crate::io::wrap::{format_json_line, wrap_line_chars, MAX_WRAP_INPUT_LEN};
|
||||
|
||||
// ─── Cancel-aware channel helpers ────────────────────────────────────────────
|
||||
|
||||
/// Send a message on `tx`, but abort if `cancel_rx` fires first.
|
||||
///
|
||||
/// Uses `crossbeam_channel::select!` so the thread sleeps efficiently instead
|
||||
/// of busy-looping. Used for terminal messages (Complete / Error) that must
|
||||
/// not be silently dropped while the receiver is still alive.
|
||||
fn send_cancelable<T>(
|
||||
tx: &crossbeam_channel::Sender<T>,
|
||||
msg: T,
|
||||
cancel_rx: &crossbeam_channel::Receiver<()>,
|
||||
) {
|
||||
crossbeam_channel::select! {
|
||||
send(tx, msg) -> _ => {}
|
||||
recv(cancel_rx) -> _ => {}
|
||||
}
|
||||
}
|
||||
|
||||
// ─── IndexerMessage ──────────────────────────────────────────────────────────
|
||||
|
||||
pub enum IndexerMessage {
|
||||
@@ -102,7 +120,7 @@ impl VisualHeightIndex {
|
||||
}
|
||||
}
|
||||
|
||||
fn with_params(mut self, json_format: bool, terminal_width: usize) -> Self {
|
||||
pub fn with_params(mut self, json_format: bool, terminal_width: usize) -> Self {
|
||||
self.json_format = json_format;
|
||||
self.terminal_width = terminal_width;
|
||||
self
|
||||
@@ -159,6 +177,31 @@ impl VisualHeightIndex {
|
||||
self.total_visual_rows += h as u64;
|
||||
}
|
||||
}
|
||||
|
||||
/// Replace the visual height of the last logical line. O(1).
|
||||
///
|
||||
/// Must be called **before** `extend_from_heights` so that the last line
|
||||
/// index still refers to the pre-extension line.
|
||||
pub fn replace_last_line_height(&mut self, new_height: usize) {
|
||||
let n = self.prefix_sums.len();
|
||||
if n < 2 {
|
||||
return;
|
||||
}
|
||||
let last_line = n - 2;
|
||||
let old_height = self.prefix_sums[last_line + 1] - self.prefix_sums[last_line];
|
||||
let new_height = new_height as u64;
|
||||
if new_height == old_height {
|
||||
return;
|
||||
}
|
||||
let delta = new_height.abs_diff(old_height);
|
||||
if new_height > old_height {
|
||||
self.prefix_sums[last_line + 1] += delta;
|
||||
self.total_visual_rows += delta;
|
||||
} else {
|
||||
self.prefix_sums[last_line + 1] -= delta;
|
||||
self.total_visual_rows -= delta;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ─── VisualHeightRebuildResult ────────────────────────────────────────────────
|
||||
@@ -180,6 +223,9 @@ pub fn compute_line_visual_height(
|
||||
}
|
||||
if json_format {
|
||||
let formatted = format_json_line(line_text);
|
||||
if formatted.len() > MAX_WRAP_INPUT_LEN {
|
||||
return 1;
|
||||
}
|
||||
compute_text_visual_height(&formatted, terminal_width)
|
||||
} else {
|
||||
compute_text_visual_height(line_text, terminal_width)
|
||||
@@ -245,20 +291,20 @@ pub fn spawn_indexer(
|
||||
let file = match std::fs::File::open(&path) {
|
||||
Ok(f) => f,
|
||||
Err(e) => {
|
||||
let _ = tx.send(IndexerMessage::Error {
|
||||
send_cancelable(&tx, IndexerMessage::Error {
|
||||
generation,
|
||||
message: e.to_string(),
|
||||
});
|
||||
}, &cancel_rx);
|
||||
return;
|
||||
}
|
||||
};
|
||||
let target_len = match file.metadata() {
|
||||
Ok(m) => m.len(),
|
||||
Err(e) => {
|
||||
let _ = tx.send(IndexerMessage::Error {
|
||||
send_cancelable(&tx, IndexerMessage::Error {
|
||||
generation,
|
||||
message: e.to_string(),
|
||||
});
|
||||
}, &cancel_rx);
|
||||
return;
|
||||
}
|
||||
};
|
||||
@@ -275,10 +321,10 @@ pub fn spawn_indexer(
|
||||
let buf = match buf_reader.fill_buf() {
|
||||
Ok(b) => b,
|
||||
Err(e) => {
|
||||
let _ = tx.send(IndexerMessage::Error {
|
||||
send_cancelable(&tx, IndexerMessage::Error {
|
||||
generation,
|
||||
message: e.to_string(),
|
||||
});
|
||||
}, &cancel_rx);
|
||||
return;
|
||||
}
|
||||
};
|
||||
@@ -310,7 +356,7 @@ pub fn spawn_indexer(
|
||||
}
|
||||
if target_len > 0 {
|
||||
let percent = (chunk_offset as f64 / target_len as f64) * 100.0;
|
||||
let _ = tx.send(IndexerMessage::Progress {
|
||||
let _ = tx.try_send(IndexerMessage::Progress {
|
||||
generation,
|
||||
percent,
|
||||
lines_scanned: newline_count as u64,
|
||||
@@ -361,18 +407,18 @@ pub fn spawn_indexer(
|
||||
Ok(_) | Err(_) => None,
|
||||
},
|
||||
Err(e) => {
|
||||
let _ = tx.send(IndexerMessage::Error {
|
||||
send_cancelable(&tx, IndexerMessage::Error {
|
||||
generation,
|
||||
message: e.to_string(),
|
||||
});
|
||||
}, &cancel_rx);
|
||||
return;
|
||||
}
|
||||
},
|
||||
Err(e) => {
|
||||
let _ = tx.send(IndexerMessage::Error {
|
||||
send_cancelable(&tx, IndexerMessage::Error {
|
||||
generation,
|
||||
message: e.to_string(),
|
||||
});
|
||||
}, &cancel_rx);
|
||||
return;
|
||||
}
|
||||
}
|
||||
@@ -391,11 +437,11 @@ pub fn spawn_indexer(
|
||||
None
|
||||
};
|
||||
|
||||
let _ = tx.send(IndexerMessage::Complete {
|
||||
send_cancelable(&tx, IndexerMessage::Complete {
|
||||
generation,
|
||||
reader,
|
||||
visual_height_index,
|
||||
});
|
||||
}, &cancel_rx);
|
||||
});
|
||||
|
||||
rx
|
||||
@@ -455,7 +501,7 @@ pub fn spawn_visual_height_rebuild(
|
||||
let index =
|
||||
VisualHeightIndex::build(&visual_heights).with_params(json_format, terminal_width);
|
||||
|
||||
let _ = tx.send(VisualHeightRebuildResult { generation, index });
|
||||
send_cancelable(&tx, VisualHeightRebuildResult { generation, index }, &cancel_rx);
|
||||
});
|
||||
|
||||
rx
|
||||
@@ -752,7 +798,7 @@ impl ProgressiveFileReader {
|
||||
|
||||
pub fn start_visual_height_rebuild(&mut self, terminal_width: usize, json_format: bool) {
|
||||
if let Some(tx) = self.vh_rebuild_cancel_tx.take() {
|
||||
let _ = tx.send(());
|
||||
let _ = tx.try_send(());
|
||||
}
|
||||
|
||||
let (cancel_tx, cancel_rx) = crossbeam_channel::bounded(1);
|
||||
@@ -807,10 +853,10 @@ impl ProgressiveFileReader {
|
||||
impl Drop for ProgressiveFileReader {
|
||||
fn drop(&mut self) {
|
||||
if let Some(tx) = &self.cancel_tx {
|
||||
let _ = tx.send(());
|
||||
let _ = tx.try_send(());
|
||||
}
|
||||
if let Some(tx) = self.vh_rebuild_cancel_tx.take() {
|
||||
let _ = tx.send(());
|
||||
let _ = tx.try_send(());
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1339,6 +1385,85 @@ mod tests {
|
||||
assert_eq!(idx.total_visual_rows(), 6);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_replace_last_line_height_increase() {
|
||||
let heights = [2, 3];
|
||||
let mut idx = VisualHeightIndex::build(&heights);
|
||||
assert_eq!(idx.total_visual_rows(), 5);
|
||||
assert_eq!(idx.visual_height_of_line(1), 3);
|
||||
|
||||
idx.replace_last_line_height(7);
|
||||
|
||||
assert_eq!(idx.visual_height_of_line(0), 2);
|
||||
assert_eq!(idx.visual_height_of_line(1), 7);
|
||||
assert_eq!(idx.total_visual_rows(), 9);
|
||||
assert_eq!(idx.cursor_to_first_visual_row(0), 0);
|
||||
assert_eq!(idx.cursor_to_first_visual_row(1), 2);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_replace_last_line_height_decrease() {
|
||||
let heights = [2, 5];
|
||||
let mut idx = VisualHeightIndex::build(&heights);
|
||||
|
||||
idx.replace_last_line_height(1);
|
||||
|
||||
assert_eq!(idx.visual_height_of_line(0), 2);
|
||||
assert_eq!(idx.visual_height_of_line(1), 1);
|
||||
assert_eq!(idx.total_visual_rows(), 3);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_replace_last_line_height_same_is_noop() {
|
||||
let heights = [2, 3];
|
||||
let mut idx = VisualHeightIndex::build(&heights);
|
||||
let total_before = idx.total_visual_rows();
|
||||
|
||||
idx.replace_last_line_height(3);
|
||||
|
||||
assert_eq!(idx.total_visual_rows(), total_before);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_replace_last_line_height_then_extend() {
|
||||
let heights = [2, 1];
|
||||
let mut idx = VisualHeightIndex::build(&heights);
|
||||
assert_eq!(idx.total_visual_rows(), 3);
|
||||
|
||||
idx.replace_last_line_height(4);
|
||||
idx.extend_from_heights(&[3]);
|
||||
|
||||
assert_eq!(idx.line_count(), 3);
|
||||
assert_eq!(idx.visual_height_of_line(0), 2);
|
||||
assert_eq!(idx.visual_height_of_line(1), 4);
|
||||
assert_eq!(idx.visual_height_of_line(2), 3);
|
||||
assert_eq!(idx.total_visual_rows(), 9);
|
||||
assert_eq!(idx.cursor_to_first_visual_row(0), 0);
|
||||
assert_eq!(idx.cursor_to_first_visual_row(1), 2);
|
||||
assert_eq!(idx.cursor_to_first_visual_row(2), 6);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_replace_last_line_height_single_line() {
|
||||
let heights = [5];
|
||||
let mut idx = VisualHeightIndex::build(&heights);
|
||||
|
||||
idx.replace_last_line_height(2);
|
||||
|
||||
assert_eq!(idx.visual_height_of_line(0), 2);
|
||||
assert_eq!(idx.total_visual_rows(), 2);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_replace_last_line_height_empty_index() {
|
||||
let heights: [usize; 0] = [];
|
||||
let mut idx = VisualHeightIndex::build(&heights);
|
||||
|
||||
idx.replace_last_line_height(5);
|
||||
|
||||
assert_eq!(idx.total_visual_rows(), 0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_spawn_indexer_file_truncated_during_scan() {
|
||||
let mut content = Vec::new();
|
||||
@@ -1405,4 +1530,103 @@ mod tests {
|
||||
Ok(_) => panic!("should have been discarded due to line count mismatch"),
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_send_cancelable_delivers_on_empty_channel() {
|
||||
let (tx, rx) = crossbeam_channel::bounded(2);
|
||||
let (_cancel_tx, cancel_rx) = crossbeam_channel::bounded(1);
|
||||
|
||||
send_cancelable(&tx, 42, &cancel_rx);
|
||||
|
||||
assert_eq!(rx.try_recv(), Ok(42));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_send_cancelable_aborts_on_cancel() {
|
||||
let (tx, rx) = crossbeam_channel::bounded(1);
|
||||
let (cancel_tx, cancel_rx) = crossbeam_channel::bounded(1);
|
||||
|
||||
tx.send("filler").unwrap();
|
||||
|
||||
let handle = std::thread::spawn(move || {
|
||||
send_cancelable(&tx, "important", &cancel_rx);
|
||||
});
|
||||
|
||||
cancel_tx.send(()).unwrap();
|
||||
handle.join().unwrap();
|
||||
|
||||
assert_eq!(rx.try_recv(), Ok("filler"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_send_cancelable_drains_when_room_available() {
|
||||
let (tx, rx) = crossbeam_channel::bounded(1);
|
||||
let (_cancel_tx, cancel_rx) = crossbeam_channel::bounded(1);
|
||||
|
||||
tx.send("first").unwrap();
|
||||
|
||||
let rx_clone = rx.clone();
|
||||
let handle = std::thread::spawn(move || {
|
||||
std::thread::sleep(std::time::Duration::from_millis(50));
|
||||
let _ = rx_clone.try_recv();
|
||||
});
|
||||
|
||||
send_cancelable(&tx, "second", &cancel_rx);
|
||||
handle.join().unwrap();
|
||||
|
||||
assert_eq!(rx.try_recv(), Ok("second"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_progress_try_send_does_not_block_full_channel() {
|
||||
let mut content = Vec::new();
|
||||
for i in 0..50_000 {
|
||||
writeln!(content, "line number {:08}", i).unwrap();
|
||||
}
|
||||
let f = create_temp_file(&content);
|
||||
let (_cancel_tx, cancel_rx) = crossbeam_channel::bounded(1);
|
||||
|
||||
let rx = spawn_indexer(f.path().to_path_buf(), 1, 80, false, cancel_rx);
|
||||
|
||||
let mut got_complete = false;
|
||||
let timeout = std::time::Duration::from_secs(15);
|
||||
let start = std::time::Instant::now();
|
||||
while start.elapsed() < timeout {
|
||||
match rx.recv_timeout(std::time::Duration::from_secs(1)) {
|
||||
Ok(IndexerMessage::Progress { .. }) => {}
|
||||
Ok(IndexerMessage::Complete { .. }) => {
|
||||
got_complete = true;
|
||||
break;
|
||||
}
|
||||
Ok(IndexerMessage::Error { message, .. }) => {
|
||||
panic!("unexpected error: {}", message);
|
||||
}
|
||||
Err(e) => panic!("recv error: {:?}", e),
|
||||
}
|
||||
}
|
||||
assert!(got_complete, "indexer should complete even when Progress fills channel");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_indexer_cancel_with_full_channel() {
|
||||
let mut content = Vec::new();
|
||||
for i in 0..500_000 {
|
||||
writeln!(content, "line number {:08}", i).unwrap();
|
||||
}
|
||||
let f = create_temp_file(&content);
|
||||
let (cancel_tx, cancel_rx) = crossbeam_channel::bounded(1);
|
||||
|
||||
let rx = spawn_indexer(f.path().to_path_buf(), 1, 80, false, cancel_rx);
|
||||
|
||||
cancel_tx.send(()).unwrap();
|
||||
|
||||
let result = rx.recv_timeout(std::time::Duration::from_secs(5));
|
||||
match result {
|
||||
Err(crossbeam_channel::RecvTimeoutError::Timeout)
|
||||
| Err(crossbeam_channel::RecvTimeoutError::Disconnected) => {}
|
||||
Ok(IndexerMessage::Complete { .. }) => {}
|
||||
Ok(IndexerMessage::Error { .. }) => {}
|
||||
Ok(IndexerMessage::Progress { .. }) => {}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -56,9 +56,15 @@ impl LruReadCache {
|
||||
/// on a hit, or fills a cache slot on a miss. Cross-block reads go through
|
||||
/// the spill buffer and are not cached.
|
||||
pub fn get(&mut self, file: &File, offset: u64, len: usize) -> io::Result<&[u8]> {
|
||||
if len == 0 {
|
||||
return Ok(&[]);
|
||||
}
|
||||
|
||||
let aligned_key = offset & !(BLOCK_ALIGN as u64 - 1);
|
||||
let request_end = offset.saturating_add(len as u64);
|
||||
let block_end = aligned_key + BLOCK_ALIGN as u64;
|
||||
let request_end = offset.checked_add(len as u64).ok_or_else(|| {
|
||||
io::Error::new(io::ErrorKind::InvalidInput, "read range overflows u64")
|
||||
})?;
|
||||
let block_end = aligned_key.saturating_add(BLOCK_ALIGN as u64);
|
||||
|
||||
if request_end > block_end {
|
||||
self.spill_buf.resize(len, 0);
|
||||
@@ -74,7 +80,8 @@ impl LruReadCache {
|
||||
}
|
||||
|
||||
let hit_idx = self.slots.iter().position(|slot| {
|
||||
slot.block_offset == aligned_key && request_end <= slot.block_offset + slot.len as u64
|
||||
let slot_end = slot.block_offset.saturating_add(slot.len as u64);
|
||||
slot.len > 0 && slot.block_offset == aligned_key && request_end <= slot_end
|
||||
});
|
||||
|
||||
if let Some(idx) = hit_idx {
|
||||
@@ -96,8 +103,8 @@ impl LruReadCache {
|
||||
let slot = &mut self.slots[evict_idx];
|
||||
let bytes_read = file.read_at(&mut slot.buf, aligned_key)?;
|
||||
|
||||
// Note: get(file, 0, 0) on an empty file now returns Err (old code returned Ok(&[])).
|
||||
// No callers pass len == 0, so this is a safe semantic change.
|
||||
// Non-empty reads that return 0 are EOF. Zero-length reads are handled above
|
||||
// as a successful no-op.
|
||||
if bytes_read == 0 {
|
||||
return Err(io::Error::new(io::ErrorKind::UnexpectedEof, "read 0 bytes"));
|
||||
}
|
||||
@@ -107,7 +114,8 @@ impl LruReadCache {
|
||||
slot.last_access = self.tick;
|
||||
self.tick += 1;
|
||||
|
||||
if request_end > aligned_key + bytes_read as u64 {
|
||||
let bytes_end = aligned_key.saturating_add(bytes_read as u64);
|
||||
if request_end > bytes_end {
|
||||
return Err(io::Error::new(io::ErrorKind::UnexpectedEof, "short read"));
|
||||
}
|
||||
|
||||
@@ -118,7 +126,9 @@ impl LruReadCache {
|
||||
/// Invalidate all cache slots and the spill buffer.
|
||||
pub fn clear(&mut self) {
|
||||
for slot in &mut self.slots {
|
||||
slot.block_offset = 0;
|
||||
slot.len = 0;
|
||||
slot.last_access = 0;
|
||||
}
|
||||
self.spill_len = 0;
|
||||
}
|
||||
@@ -314,9 +324,11 @@ mod tests {
|
||||
|
||||
cache.clear();
|
||||
|
||||
// All slots should have len == 0.
|
||||
// All slots should be fully reset.
|
||||
for slot in &cache.slots {
|
||||
assert_eq!(slot.block_offset, 0);
|
||||
assert_eq!(slot.len, 0);
|
||||
assert_eq!(slot.last_access, 0);
|
||||
}
|
||||
assert_eq!(cache.spill_len, 0);
|
||||
|
||||
@@ -432,4 +444,52 @@ mod tests {
|
||||
assert_eq!(&line2[..4090], &[b'B'; 4090]);
|
||||
assert_eq!(line2[4090], b'\n');
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn zero_len_read_is_noop_on_fresh_cache() {
|
||||
let f = make_file(b"");
|
||||
let file = File::open(f.path()).unwrap();
|
||||
let mut cache = ReadCache::new();
|
||||
|
||||
let result = cache.get(&file, 0, 0).unwrap();
|
||||
assert!(result.is_empty());
|
||||
assert_eq!(cache.tick, 0);
|
||||
assert!(cache.slots.iter().all(|s| s.len == 0));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn zero_len_read_is_noop_on_populated_cache() {
|
||||
let f = make_file(b"abc");
|
||||
let file = File::open(f.path()).unwrap();
|
||||
let mut cache = ReadCache::new();
|
||||
|
||||
cache.get(&file, 0, 1).unwrap();
|
||||
let tick_before = cache.tick;
|
||||
|
||||
let result = cache.get(&file, 0, 0).unwrap();
|
||||
assert!(result.is_empty());
|
||||
assert_eq!(cache.tick, tick_before);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn zero_len_read_at_max_offset_is_ok() {
|
||||
let f = make_file(b"");
|
||||
let file = File::open(f.path()).unwrap();
|
||||
let mut cache = ReadCache::new();
|
||||
|
||||
let result = cache.get(&file, u64::MAX, 0).unwrap();
|
||||
assert!(result.is_empty());
|
||||
assert_eq!(cache.tick, 0);
|
||||
assert!(cache.slots.iter().all(|s| s.len == 0));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn nonzero_read_range_overflow_returns_invalid_input() {
|
||||
let f = make_file(b"abc");
|
||||
let file = File::open(f.path()).unwrap();
|
||||
let mut cache = ReadCache::new();
|
||||
|
||||
let err = cache.get(&file, u64::MAX, 1).unwrap_err();
|
||||
assert_eq!(err.kind(), io::ErrorKind::InvalidInput);
|
||||
}
|
||||
}
|
||||
@@ -1,10 +1,19 @@
|
||||
/// Maximum input length for wrap/format operations (10 MB).
|
||||
/// Lines exceeding this are returned as-is to avoid pathological cases.
|
||||
/// Callers should check against this constant before invoking `wrap_line_chars`
|
||||
/// to avoid pathological cases on oversized lines.
|
||||
pub const MAX_WRAP_INPUT_LEN: usize = 10 * 1024 * 1024;
|
||||
|
||||
/// Split a line into chunks of exactly `width` characters (display columns).
|
||||
/// Column spacing for tab stop alignment.
|
||||
const TAB_WIDTH: usize = 4;
|
||||
|
||||
/// Split a line into chunks of exactly `width` display columns.
|
||||
/// For a log viewer, we want character-level wrapping, not word-level.
|
||||
/// Uses `unicode-width` for correct CJK/emoji/zero-width handling.
|
||||
/// Tab characters expand to the next tab-stop boundary and split across
|
||||
/// rows when the expansion exceeds the remaining width.
|
||||
pub fn wrap_line_chars(line: &str, width: usize) -> Vec<String> {
|
||||
use unicode_width::UnicodeWidthChar;
|
||||
|
||||
if width == 0 {
|
||||
return vec![String::new()];
|
||||
}
|
||||
@@ -15,21 +24,40 @@ pub fn wrap_line_chars(line: &str, width: usize) -> Vec<String> {
|
||||
let mut row = String::new();
|
||||
let mut col = 0;
|
||||
for ch in line.chars() {
|
||||
let w = if ch == '\t' { 4 } else { 1 };
|
||||
if col + w > width && !row.is_empty() {
|
||||
result.push(std::mem::take(&mut row));
|
||||
col = 0;
|
||||
}
|
||||
if ch == '\t' {
|
||||
row.push_str(" ");
|
||||
col += 4;
|
||||
let tab_stop = TAB_WIDTH - (col % TAB_WIDTH);
|
||||
let mut remaining = tab_stop;
|
||||
while remaining > 0 {
|
||||
let avail = width.saturating_sub(col);
|
||||
let fill = remaining.min(avail);
|
||||
for _ in 0..fill {
|
||||
row.push(' ');
|
||||
}
|
||||
col += fill;
|
||||
remaining -= fill;
|
||||
if col >= width {
|
||||
result.push(std::mem::take(&mut row));
|
||||
col = 0;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
let w = if ch.is_control() {
|
||||
// Control characters (except tab): width 0, still pushed to preserve content.
|
||||
// Visible rendering is the caller's responsibility.
|
||||
0
|
||||
} else {
|
||||
ch.width().unwrap_or(0)
|
||||
};
|
||||
if col + w > width && !row.is_empty() {
|
||||
result.push(std::mem::take(&mut row));
|
||||
col = 0;
|
||||
}
|
||||
row.push(ch);
|
||||
col += w;
|
||||
}
|
||||
if col >= width {
|
||||
result.push(std::mem::take(&mut row));
|
||||
col = 0;
|
||||
if col >= width {
|
||||
result.push(std::mem::take(&mut row));
|
||||
col = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
if !row.is_empty() {
|
||||
@@ -96,7 +124,7 @@ mod tests {
|
||||
#[test]
|
||||
fn test_wrap_with_tab() {
|
||||
let result = wrap_line_chars("a\tb", 4);
|
||||
assert_eq!(result, vec!["a", " ", "b"]);
|
||||
assert_eq!(result, vec!["a ", "b"]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
@@ -132,4 +160,75 @@ mod tests {
|
||||
fn test_max_wrap_input_len_constant() {
|
||||
assert_eq!(MAX_WRAP_INPUT_LEN, 10 * 1024 * 1024);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_wrap_cjk_chars() {
|
||||
let result = wrap_line_chars("你好", 3);
|
||||
assert_eq!(result, vec!["你", "好"]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_wrap_cjk_ascii_mixed() {
|
||||
let result = wrap_line_chars("a你好", 4);
|
||||
assert_eq!(result, vec!["a你", "好"]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_wrap_zero_width_char() {
|
||||
let result = wrap_line_chars("a\u{200B}b", 2);
|
||||
assert_eq!(result, vec!["a\u{200B}b"]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_wrap_emoji() {
|
||||
let result = wrap_line_chars("😀a", 3);
|
||||
assert_eq!(result, vec!["😀a"]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_wrap_emoji_exact_wrap() {
|
||||
let result = wrap_line_chars("😀a", 2);
|
||||
assert_eq!(result, vec!["😀", "a"]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_wrap_combining_mark() {
|
||||
// Scalar-width wrapping: combining mark (width 0) stays with next base char,
|
||||
// not the preceding one, because the base char already triggered a flush.
|
||||
let result = wrap_line_chars("a\u{0301}b", 1);
|
||||
assert_eq!(result, vec!["a", "\u{0301}b"]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_wrap_cjk_width_one() {
|
||||
let result = wrap_line_chars("你好", 1);
|
||||
assert_eq!(result, vec!["你", "好"]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_tab_narrow_width() {
|
||||
let result = wrap_line_chars("\t", 2);
|
||||
assert_eq!(result, vec![" ", " "]);
|
||||
let result = wrap_line_chars("\t", 1);
|
||||
assert_eq!(result, vec![" ", " ", " ", " "]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_tab_stop_alignment() {
|
||||
assert_eq!(wrap_line_chars("a\tb", 8), vec!["a b"]);
|
||||
assert_eq!(wrap_line_chars("ab\t", 4), vec!["ab "]);
|
||||
assert_eq!(wrap_line_chars("abc\tb", 8), vec!["abc b"]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_tab_at_line_boundary() {
|
||||
let result = wrap_line_chars("a\tb", 4);
|
||||
assert_eq!(result, vec!["a ", "b"]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_tab_regression_ab_tab() {
|
||||
let result = wrap_line_chars("ab\t", 4);
|
||||
assert_eq!(result, vec!["ab "]);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -16,7 +16,14 @@
|
||||
// 类似于 Python 的 dict、JavaScript 的 Object/Map、Java 的 HashMap。
|
||||
// 它存储键值对(key-value pairs),可以通过键快速查找对应的值。
|
||||
// 这里用 HashMap<String, Value> 来存储 JSON 中除 timestamp/level 之外的其他字段。
|
||||
use std::collections::HashMap;
|
||||
use std::collections::{HashMap, HashSet};
|
||||
|
||||
// serde::de 中的 Visitor / MapAccess 允许我们自定义 JSON 对象的反序列化过程。
|
||||
// 默认的 serde_json::from_str::<HashMap<_, _>>() 遇到重复键时会采用"后者覆盖前者"(last-wins),
|
||||
// 前面的值被静默丢弃。这里我们通过自定义 Visitor 在反序列化过程中逐个观察 key-value 对,
|
||||
// 在保持 last-wins 行为的同时,将重复 key 的所有值记录到 DuplicateKey 中。
|
||||
use serde::de::{MapAccess, Visitor};
|
||||
use serde::Deserializer;
|
||||
|
||||
// serde_json::Value — 来自 serde_json 库(Rust 中最流行的 JSON 处理库)。
|
||||
// Value 是一个枚举类型,可以表示任意 JSON 值:
|
||||
@@ -32,7 +39,20 @@ use serde_json::Value;
|
||||
// ─── 引入项目内部类型 ──────────────────────────────────────────────────────
|
||||
// crate 表示"当前项目(crate)"。
|
||||
// types 模块中定义了 LogEntry(一条日志记录)和 LogLevel(日志级别,如 INFO/ERROR)。
|
||||
use crate::types::{LogEntry, LogLevel};
|
||||
use crate::types::{DuplicateKey, LogEntry, LogLevel};
|
||||
|
||||
// ─── strip_bom 辅助函数 ──────────────────────────────────────────────────
|
||||
// 剥离行首的 UTF-8 BOM(Byte Order Mark, U+FEFF)。
|
||||
//
|
||||
// Windows 环境和某些导出工具生成的文件会在行首插入 BOM,
|
||||
// 而 serde_json 不接受 BOM 前缀的 JSON 文本(会报 "expected value" 错误)。
|
||||
// 只剥离一个前导 BOM,不处理多个 BOM 或行内 BOM(那些是畸形输入)。
|
||||
//
|
||||
// 参数: line: &str — 输入字符串切片。
|
||||
// 返回: &str — 去掉 BOM 后的字符串切片(借用原始字符串,零分配)。
|
||||
fn strip_bom(line: &str) -> &str {
|
||||
line.strip_prefix('\u{FEFF}').unwrap_or(line)
|
||||
}
|
||||
|
||||
// ─── detect_json_log 函数 ──────────────────────────────────────────────────
|
||||
// 检测一行文本是否是一个 JSON 对象。
|
||||
@@ -63,7 +83,89 @@ pub fn detect_json_log(line: &str) -> bool {
|
||||
// 则匹配成功。_ 是通配符,表示"不关心对象里面的具体内容"。
|
||||
//
|
||||
// 如果匹配到 Ok(Value::Object(_)) 返回 true,否则返回 false。
|
||||
matches!(serde_json::from_str::<Value>(line), Ok(Value::Object(_)))
|
||||
matches!(serde_json::from_str::<Value>(strip_bom(line)), Ok(Value::Object(_)))
|
||||
}
|
||||
|
||||
// ─── DuplicateKeyVisitor ──────────────────────────────────────────────────
|
||||
// 自定义 serde Visitor,在反序列化 JSON 对象时检测重复 key。
|
||||
//
|
||||
// 工作原理:
|
||||
// serde 的 MapAccess trait 允许我们逐个遍历 JSON 对象的 key-value 对。
|
||||
// 每读到一个 (key, value),我们:
|
||||
// 1. 检查这个 key 是否已经见过(通过 HashSet)
|
||||
// 2. 如果是重复 key,记录到 Vec<DuplicateKey> 中(包含所有出现过的值)
|
||||
// 3. 将 key-value 插入 Map(last-wins,与 serde_json 默认行为一致)
|
||||
//
|
||||
// 这样既保持了兼容性(last-wins),又不丢失信息(所有值都记录在 DuplicateKey 中)。
|
||||
struct DuplicateKeyVisitor;
|
||||
|
||||
impl<'de> Visitor<'de> for DuplicateKeyVisitor {
|
||||
// 返回类型:(serde_json::Map, 重复 key 列表)
|
||||
type Value = (serde_json::Map<String, Value>, Vec<DuplicateKey>);
|
||||
|
||||
fn expecting(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
|
||||
f.write_str("a JSON object")
|
||||
}
|
||||
|
||||
fn visit_map<A>(self, mut access: A) -> Result<Self::Value, A::Error>
|
||||
where
|
||||
A: MapAccess<'de>,
|
||||
{
|
||||
let mut map = serde_json::Map::new();
|
||||
let mut seen = HashSet::new();
|
||||
let mut duplicates: Vec<DuplicateKey> = Vec::new();
|
||||
|
||||
while let Some((key, value)) = access.next_entry::<String, Value>()? {
|
||||
if !seen.insert(key.clone()) {
|
||||
// 重复 key:将之前 map 中的值和当前值都记录下来
|
||||
if let Some(existing) = duplicates.iter_mut().find(|d| d.key == key) {
|
||||
// 同一个 key 第三次及以上出现:追加当前值
|
||||
existing.values.push(value.clone());
|
||||
} else {
|
||||
// 同一个 key 第二次出现:记录第一次的值 + 当前值
|
||||
let prev_value = map.get(&key).cloned().unwrap_or(Value::Null);
|
||||
duplicates.push(DuplicateKey {
|
||||
key: key.clone(),
|
||||
values: vec![prev_value, value.clone()],
|
||||
});
|
||||
}
|
||||
}
|
||||
// last-wins:后出现的值覆盖前面的值,与 serde_json 默认行为一致
|
||||
map.insert(key, value);
|
||||
}
|
||||
|
||||
Ok((map, duplicates))
|
||||
}
|
||||
}
|
||||
|
||||
/// 使用自定义 Visitor 解析 JSON 对象,同时检测重复 key。
|
||||
///
|
||||
/// 返回 (serde_json::Map, Vec<DuplicateKey>):
|
||||
/// - Map 中存储所有 key-value(重复 key 取 last-wins)
|
||||
/// - Vec 中记录所有重复 key 及其全部值
|
||||
fn parse_json_object_with_duplicates(
|
||||
json: &str,
|
||||
) -> Option<(serde_json::Map<String, Value>, Vec<DuplicateKey>)> {
|
||||
let mut deserializer = serde_json::Deserializer::from_str(json);
|
||||
Some(deserializer.deserialize_map(DuplicateKeyVisitor).ok()?)
|
||||
}
|
||||
|
||||
// ─── take_string_field_from_map 辅助函数 ──────────────────────────────────
|
||||
// 从 serde_json::Map 中安全提取字符串字段。
|
||||
// 功能与原 take_string_field 相同,但操作 serde_json::Map 而非 HashMap。
|
||||
fn take_string_field_from_map(
|
||||
obj: &mut serde_json::Map<String, Value>,
|
||||
keys: &[&str],
|
||||
) -> Option<String> {
|
||||
for key in keys {
|
||||
if obj.get(*key).is_some_and(Value::is_string) {
|
||||
let Some(Value::String(v)) = obj.remove(*key) else {
|
||||
unreachable!("value was checked as string");
|
||||
};
|
||||
return Some(v);
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
// ─── parse_line 函数 ──────────────────────────────────────────────────────
|
||||
@@ -73,107 +175,34 @@ pub fn detect_json_log(line: &str) -> bool {
|
||||
// 返回: Option<LogEntry> — 解析成功返回 Some(LogEntry),失败或不合法返回 None。
|
||||
// Option 是 Rust 的可选类型:Some(值) 表示有值,None 表示没有值。
|
||||
pub fn parse_line(line: &str) -> Option<LogEntry> {
|
||||
// ─── 跳过空行 ──────────────────────────────────────────────────────────
|
||||
// line.trim() 去除首尾空白字符(空格、制表符、换行符等)。
|
||||
// .is_empty() 检查是否为空字符串。
|
||||
// 如果去除空白后是空的,说明是空行,不需要解析,直接返回 None。
|
||||
let line = strip_bom(line);
|
||||
|
||||
if line.trim().is_empty() {
|
||||
return None;
|
||||
}
|
||||
|
||||
// ─── 解析 JSON 为 HashMap ──────────────────────────────────────────────
|
||||
// serde_json::from_str(line) 尝试将字符串解析为 JSON。
|
||||
// 由于我们声明了 HashMap<String, Value> 类型,Rust 会自动将 JSON 对象
|
||||
// 转换为 HashMap,其中每个键是 String,每个值是 serde_json::Value。
|
||||
//
|
||||
// .ok() 将 Result 转换为 Option:
|
||||
// Ok(值) → Some(值)
|
||||
// Err(_) → None
|
||||
//
|
||||
// 末尾的 ? 是"问号操作符"(try operator),在这里的作用是:
|
||||
// 如果 .ok() 返回 None(即 JSON 解析失败),则整个函数直接返回 None。
|
||||
// 如果返回 Some(hashmap),则将 hashmap 取出并绑定到 fields 变量。
|
||||
//
|
||||
// let mut 表示这是一个"可变变量"(mutable variable),
|
||||
// 后续代码会修改这个 HashMap(从中删除已识别的字段)。
|
||||
let mut fields: HashMap<String, Value> = serde_json::from_str(line).ok()?;
|
||||
// ─── 使用自定义 Visitor 解析 JSON ──────────────────────────────────
|
||||
// 通过 DuplicateKeyVisitor 反序列化,在保持 last-wins 的同时检测重复 key。
|
||||
// 返回的 (serde_json::Map, Vec<DuplicateKey>) 中:
|
||||
// - Map 包含所有 key-value(重复 key 取最后一个值)
|
||||
// - Vec 记录了所有重复 key 及其出现过的全部值
|
||||
let (mut obj, duplicate_keys) = parse_json_object_with_duplicates(line)?;
|
||||
|
||||
// ─── 保存原始行内容 ──────────────────────────────────────────────────
|
||||
// line.to_string() 将 &str(字符串切片引用)转换为 String(拥有所有权的字符串)。
|
||||
// 保存原始行是为了在 UI 中显示未经修改的原始日志内容。
|
||||
let raw_line = line.to_string();
|
||||
|
||||
// ─── 提取时间戳字段 ──────────────────────────────────────────────────
|
||||
// 这段代码尝试从 JSON 中提取时间戳,逻辑如下:
|
||||
//
|
||||
// 1. ["timestamp", "time", "ts", "@timestamp"] — 候选键名数组。
|
||||
// 不同日志系统使用不同的时间戳字段名,这里列出常见的几种。
|
||||
//
|
||||
// 2. .iter() — 创建数组的迭代器,可以逐个遍历元素。
|
||||
//
|
||||
// 3. .find_map(|key| fields.remove(*key)) — 对每个候选键名:
|
||||
// - fields.remove(*key): 尝试从 HashMap 中移除该键并返回对应的值。
|
||||
// 如果键不存在,remove 返回 None。
|
||||
// *key 是解引用(deref),将 &str(引用)转换为 str,因为 remove 接受 &str 类型。
|
||||
// - find_map: 遍历所有候选键,返回第一个 Some(值) 的结果。
|
||||
// 即找到第一个存在的键就停止。
|
||||
//
|
||||
// 4. .and_then(|v| v.as_str().map(String::from)) — 如果找到了时间戳值:
|
||||
// - v.as_str(): 尝试将 serde_json::Value 转换为 &str(字符串切片)。
|
||||
// 如果 Value 不是字符串类型(比如是数字),返回 None。
|
||||
// - .map(String::from): 如果是字符串,将其转换为 String(拥有所有权的字符串)。
|
||||
// - and_then: 类似于 map,但用于"扁平化"嵌套的 Option。
|
||||
// 如果 as_str() 返回 None,整个链返回 None。
|
||||
let timestamp = ["timestamp", "time", "ts", "@timestamp"]
|
||||
.iter()
|
||||
.find_map(|key| fields.remove(*key))
|
||||
.and_then(|v| v.as_str().map(String::from));
|
||||
|
||||
// ─── 提取日志级别字段 ──────────────────────────────────────────────
|
||||
// 与时间戳提取类似,但多了一步:将字符串解析为 LogLevel 枚举。
|
||||
let level = ["level", "lvl", "severity"]
|
||||
.iter()
|
||||
.find_map(|key| fields.remove(*key))
|
||||
.and_then(|v| v.as_str().map(String::from))
|
||||
// .map(|s| s.parse::<LogLevel>(...)) — 尝试将字符串解析为 LogLevel 枚举。
|
||||
// parse::<LogLevel> 中的 ::<LogLevel> 是泛型参数(turbofish 语法),
|
||||
// 指定我们要将字符串解析为 LogLevel 类型。
|
||||
//
|
||||
// .unwrap_or_else(|e| match e {}) — 错误处理:
|
||||
// - 如果解析成功,直接返回 LogLevel 值。
|
||||
// - 如果解析失败(字符串不匹配任何已知的日志级别),执行闭包。
|
||||
// - |e| match e {}: 这个闭包接收解析错误 e,用 match e {} 进行"穷尽匹配"。
|
||||
// 由于 LogLevel 的 parse 错误类型是一个空枚举(没有任何变体),
|
||||
// match e {} 意味着"这个分支永远不会执行"(unreachable)。
|
||||
// 但实际上,如果 parse 失败,unwrap_or_else 不会执行这个闭包——
|
||||
// 等等,这里有个细微之处:
|
||||
// unwrap_or_else 只在 Err 时执行闭包,但 match e {} 对空枚举是合法的
|
||||
// (因为空枚举没有任何可能的值,所以 match 是穷尽的)。
|
||||
// 不过这里的实际效果是:如果 parse 失败,整个 .map() 返回 None
|
||||
// (因为 unwrap_or_else 返回的类型是 LogLevel,而空 match 不会有返回值)。
|
||||
//
|
||||
// 实际上更准确的解释:parse() 的错误类型是 Infallible(不可失败的),
|
||||
// 即解析总是成功。所以 unwrap_or_else 永远不会被执行。
|
||||
// 但即使如此,unwrap_or_else 的闭包也需要类型正确,match e {} 满足这一点。
|
||||
let timestamp = take_string_field_from_map(&mut obj, &["timestamp", "time", "ts", "@timestamp"]);
|
||||
let level = take_string_field_from_map(&mut obj, &["level", "lvl", "severity"])
|
||||
.map(|s| s.parse::<LogLevel>().unwrap_or_else(|e| match e {}));
|
||||
|
||||
// ─── 构建 LogEntry 并返回 ──────────────────────────────────────────
|
||||
// 此时 fields HashMap 中还剩下未被提取的字段(如 message、自定义字段等)。
|
||||
// timestamp 和 level 已经从 fields 中移除了(通过 remove)。
|
||||
//
|
||||
// Some(LogEntry { ... }) — 使用结构体字面量创建 LogEntry 实例,
|
||||
// 并用 Some() 包裹表示"有值"。
|
||||
// serde_json::Map → HashMap:剩余字段转为 HashMap 存入 fields
|
||||
let fields: HashMap<String, Value> = obj.into_iter().collect();
|
||||
|
||||
Some(LogEntry {
|
||||
// line_number 设为 0,由调用者(如 parse_line_with_number)设置正确的值。
|
||||
line_number: 0,
|
||||
// 原始行内容。
|
||||
raw_line,
|
||||
// 时间戳(可能为 None,如果 JSON 中没有时间戳字段)。
|
||||
timestamp,
|
||||
// 日志级别(可能为 None,如果 JSON 中没有级别字段)。
|
||||
level,
|
||||
// 剩余的 JSON 字段(已移除 timestamp 和 level)。
|
||||
fields,
|
||||
duplicate_keys,
|
||||
})
|
||||
}
|
||||
|
||||
@@ -317,6 +346,13 @@ mod tests {
|
||||
assert_eq!(parse_line(warn_line).unwrap().level, Some(LogLevel::Warn));
|
||||
}
|
||||
|
||||
#[test]
|
||||
// Regression: level field with surrounding whitespace should still be recognized.
|
||||
fn test_level_whitespace_in_json() {
|
||||
let line = r#"{"level":" WARN ","message":"test"}"#;
|
||||
assert_eq!(parse_line(line).unwrap().level, Some(LogLevel::Warn));
|
||||
}
|
||||
|
||||
#[test]
|
||||
// 测试:所有候选时间戳键名(timestamp, time, ts, @timestamp)都能被识别。
|
||||
fn test_timestamp_key_names() {
|
||||
@@ -357,4 +393,174 @@ mod tests {
|
||||
assert_eq!(entry.level, Some(LogLevel::Info), "failed for key: {key}");
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
// 测试:数字类型的 level 值不应被提取,应保留在 fields 中。
|
||||
fn test_numeric_level_preserved_in_fields() {
|
||||
let line = r#"{"level":30,"message":"hello"}"#;
|
||||
let entry = parse_line(line).unwrap();
|
||||
// level 不是字符串,应返回 None。
|
||||
assert!(entry.level.is_none());
|
||||
// 数字 level 应保留在 fields 中,不被静默丢弃。
|
||||
assert_eq!(entry.fields.get("level"), Some(&Value::Number(30.into())));
|
||||
// message 仍正常存在。
|
||||
assert_eq!(
|
||||
entry.fields.get("message"),
|
||||
Some(&Value::String("hello".to_string()))
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
// 测试:数字类型的 timestamp 值不应被提取,应保留在 fields 中。
|
||||
fn test_numeric_timestamp_preserved_in_fields() {
|
||||
let line = r#"{"timestamp":1718000000,"message":"hello"}"#;
|
||||
let entry = parse_line(line).unwrap();
|
||||
// timestamp 不是字符串,应返回 None。
|
||||
assert!(entry.timestamp.is_none());
|
||||
// 数字 timestamp 应保留在 fields 中。
|
||||
assert_eq!(
|
||||
entry.fields.get("timestamp"),
|
||||
Some(&Value::Number(1718000000.into()))
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
// 测试:当第一个候选键是数字时,应回退到下一个字符串类型的候选键。
|
||||
fn test_fallback_to_string_key() {
|
||||
let line = r#"{"level":30,"lvl":"INFO","message":"hello"}"#;
|
||||
let entry = parse_line(line).unwrap();
|
||||
// "level" 是数字,应跳过;"lvl" 是字符串,应成功提取。
|
||||
assert_eq!(entry.level, Some(LogLevel::Info));
|
||||
// 数字 "level" 保留在 fields 中。
|
||||
assert_eq!(entry.fields.get("level"), Some(&Value::Number(30.into())));
|
||||
// "lvl" 已被成功提取并从 fields 中移除。
|
||||
assert!(entry.fields.get("lvl").is_none());
|
||||
}
|
||||
|
||||
#[test]
|
||||
// 测试:timestamp 的 fallback 行为 — 数字 timestamp 被保留,字符串 time 被提取。
|
||||
fn test_timestamp_fallback_preserves_numeric() {
|
||||
let line = r#"{"timestamp":1718000000,"time":"2024-01-01T00:00:00Z"}"#;
|
||||
let entry = parse_line(line).unwrap();
|
||||
// "timestamp" 是数字,跳过;"time" 是字符串,成功提取。
|
||||
assert_eq!(entry.timestamp, Some("2024-01-01T00:00:00Z".to_string()));
|
||||
// 数字 "timestamp" 保留在 fields 中。
|
||||
assert_eq!(
|
||||
entry.fields.get("timestamp"),
|
||||
Some(&Value::Number(1718000000.into()))
|
||||
);
|
||||
// "time" 已被提取并从 fields 中移除。
|
||||
assert!(entry.fields.get("time").is_none());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_bom_prefixed_json() {
|
||||
let line = "\u{FEFF}{\"level\":\"INFO\",\"message\":\"hello\"}";
|
||||
let entry = parse_line(line).unwrap();
|
||||
assert_eq!(entry.level, Some(LogLevel::Info));
|
||||
assert_eq!(
|
||||
entry.fields.get("message"),
|
||||
Some(&Value::String("hello".into()))
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_bom_prefixed_detect() {
|
||||
assert!(detect_json_log("\u{FEFF}{\"level\":\"INFO\"}"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_bom_only_whitespace() {
|
||||
assert!(parse_line("\u{FEFF} ").is_none());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_bom_stripped_from_raw_line() {
|
||||
let line = "\u{FEFF}{\"level\":\"INFO\",\"message\":\"hello\"}";
|
||||
let entry = parse_line(line).unwrap();
|
||||
assert_eq!(entry.raw_line, "{\"level\":\"INFO\",\"message\":\"hello\"}");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_internal_bom_not_stripped() {
|
||||
let line = "{\"message\":\"\u{FEFF}hello\"}";
|
||||
let entry = parse_line(line).unwrap();
|
||||
assert_eq!(
|
||||
entry.fields.get("message"),
|
||||
Some(&Value::String("\u{FEFF}hello".into()))
|
||||
);
|
||||
}
|
||||
|
||||
// ─── 重复 key 检测测试 ──────────────────────────────────────────────
|
||||
|
||||
#[test]
|
||||
fn test_no_duplicate_keys_normal_json() {
|
||||
let line = r#"{"level":"INFO","message":"hello"}"#;
|
||||
let entry = parse_line(line).unwrap();
|
||||
assert!(entry.duplicate_keys.is_empty());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_duplicate_message_key_detected() {
|
||||
let line = r#"{"level":"INFO","message":"first","message":"second"}"#;
|
||||
let entry = parse_line(line).unwrap();
|
||||
// last-wins: fields 中保留第二个值
|
||||
assert_eq!(
|
||||
entry.fields.get("message"),
|
||||
Some(&Value::String("second".into()))
|
||||
);
|
||||
// 重复 key 记录中包含所有值
|
||||
assert_eq!(entry.duplicate_keys.len(), 1);
|
||||
assert_eq!(entry.duplicate_keys[0].key, "message");
|
||||
assert_eq!(entry.duplicate_keys[0].values.len(), 2);
|
||||
assert_eq!(entry.duplicate_keys[0].values[0], Value::String("first".into()));
|
||||
assert_eq!(entry.duplicate_keys[0].values[1], Value::String("second".into()));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_duplicate_key_last_wins() {
|
||||
let line = r#"{"msg":"a","msg":"b","msg":"c"}"#;
|
||||
let entry = parse_line(line).unwrap();
|
||||
// last-wins: 最终值是 "c"
|
||||
assert_eq!(entry.fields.get("msg"), Some(&Value::String("c".into())));
|
||||
// 三个值都被记录
|
||||
assert_eq!(entry.duplicate_keys.len(), 1);
|
||||
assert_eq!(entry.duplicate_keys[0].values.len(), 3);
|
||||
assert_eq!(entry.duplicate_keys[0].values[0], Value::String("a".into()));
|
||||
assert_eq!(entry.duplicate_keys[0].values[1], Value::String("b".into()));
|
||||
assert_eq!(entry.duplicate_keys[0].values[2], Value::String("c".into()));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_multiple_different_duplicate_keys() {
|
||||
let line = r#"{"a":"1","b":"2","a":"3","b":"4"}"#;
|
||||
let entry = parse_line(line).unwrap();
|
||||
assert_eq!(entry.duplicate_keys.len(), 2);
|
||||
let dup_a = entry.duplicate_keys.iter().find(|d| d.key == "a").unwrap();
|
||||
let dup_b = entry.duplicate_keys.iter().find(|d| d.key == "b").unwrap();
|
||||
assert_eq!(dup_a.values.len(), 2);
|
||||
assert_eq!(dup_b.values.len(), 2);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_duplicate_level_key_detected() {
|
||||
let line = r#"{"level":"INFO","level":"ERROR","message":"hello"}"#;
|
||||
let entry = parse_line(line).unwrap();
|
||||
// last-wins: level 被提取为 ERROR
|
||||
assert_eq!(entry.level, Some(LogLevel::Error));
|
||||
// 重复 key 被记录
|
||||
assert_eq!(entry.duplicate_keys.len(), 1);
|
||||
assert_eq!(entry.duplicate_keys[0].key, "level");
|
||||
assert_eq!(entry.duplicate_keys[0].values.len(), 2);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_duplicate_timestamp_key_detected() {
|
||||
let line = r#"{"timestamp":"2024-01-01","timestamp":"2024-06-01"}"#;
|
||||
let entry = parse_line(line).unwrap();
|
||||
// last-wins: timestamp 提取为后者
|
||||
assert_eq!(entry.timestamp, Some("2024-06-01".to_string()));
|
||||
assert_eq!(entry.duplicate_keys.len(), 1);
|
||||
assert_eq!(entry.duplicate_keys[0].key, "timestamp");
|
||||
}
|
||||
}
|
||||
|
||||
@@ -100,13 +100,21 @@ fn detect_level_from_text(line: &str) -> Option<LogLevel> {
|
||||
)
|
||||
}
|
||||
|
||||
// ─── is_ident_char ─────────────────────────────────────────────────────────
|
||||
/// Whether a byte looks like an ASCII identifier continuation character
|
||||
/// (letter / digit / underscore). Log-level keywords must NOT be adjacent to
|
||||
/// such characters to count as a valid word boundary.
|
||||
fn is_ident_char(b: u8) -> bool {
|
||||
b.is_ascii_alphanumeric() || b == b'_'
|
||||
}
|
||||
|
||||
// ─── is_word_boundary ───────────────────────────────────────────────────────
|
||||
/// Check that the match at `start..start+len` is surrounded by non-alphabetic
|
||||
/// Check that the match at `start..start+len` is surrounded by non-identifier
|
||||
/// characters (or the string edge).
|
||||
fn is_word_boundary(text: &str, start: usize, len: usize) -> bool {
|
||||
let before_ok = start == 0 || !text.as_bytes()[start - 1].is_ascii_alphabetic();
|
||||
let before_ok = start == 0 || !is_ident_char(text.as_bytes()[start - 1]);
|
||||
let after_idx = start + len;
|
||||
let after_ok = after_idx >= text.len() || !text.as_bytes()[after_idx].is_ascii_alphabetic();
|
||||
let after_ok = after_idx >= text.len() || !is_ident_char(text.as_bytes()[after_idx]);
|
||||
before_ok && after_ok
|
||||
}
|
||||
|
||||
@@ -209,4 +217,36 @@ mod tests {
|
||||
let line = format!("{prefix} ERROR something");
|
||||
assert_eq!(detect_level(&line), Some(LogLevel::Error));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_boundary_rejects_trailing_digits() {
|
||||
assert_eq!(detect_level("ERROR123"), None);
|
||||
assert_eq!(detect_level("WARN2: bad"), None);
|
||||
assert_eq!(detect_level("ERR2"), None);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_boundary_rejects_underscore() {
|
||||
assert_eq!(detect_level("INFO_foo"), None);
|
||||
assert_eq!(detect_level("DBG_value=5"), None);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_boundary_rejects_leading_digits_and_underscore() {
|
||||
assert_eq!(detect_level("123ERROR: fail"), None);
|
||||
assert_eq!(detect_level("foo_ERROR: fail"), None);
|
||||
assert_eq!(detect_level("1WRN"), None);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_boundary_accepts_valid_suffixes() {
|
||||
assert_eq!(detect_level("ERROR: fail"), Some(LogLevel::Error));
|
||||
assert_eq!(detect_level("[ERROR] fail"), Some(LogLevel::Error));
|
||||
assert_eq!(detect_level("ERROR fail"), Some(LogLevel::Error));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_boundary_camel_case_regression() {
|
||||
assert_eq!(detect_level("errorLevel"), None);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -71,13 +71,17 @@ impl FromStr for LogLevel {
|
||||
// 接收一个字符串切片 &str,返回 Result<LogLevel, Infallible>。
|
||||
// 由于 Err 类型是 Infallible,实际上返回值总是 Ok(LogLevel)。
|
||||
fn from_str(s: &str) -> Result<Self, Self::Err> {
|
||||
// `s.to_uppercase()` — 将字符串转换为大写,实现不区分大小写的匹配。
|
||||
// `s.trim()` — 去除字符串前后的 Unicode 空白字符。
|
||||
// 例如 " WARN " → "WARN","\tINFO\n" → "INFO"。
|
||||
let trimmed = s.trim();
|
||||
|
||||
// `trimmed.to_uppercase()` — 将 trimmed 后的字符串转换为大写,实现不区分大小写的匹配。
|
||||
// 例如 "info"、"Info"、"INFO" 都会被转换为 "INFO"。
|
||||
// 返回一个新的 String(堆分配)。
|
||||
//
|
||||
// `.as_str()` — 将 String 转换回 &str(字符串切片引用)。
|
||||
// 因为 match 需要匹配 &str 而不是 String。
|
||||
match s.to_uppercase().as_str() {
|
||||
match trimmed.to_uppercase().as_str() {
|
||||
// `|` 在 match 分支中表示"或"(multiple patterns)。
|
||||
// "ERROR" | "ERR" | "SEVERE" | "FATAL" 都匹配到 LogLevel::Error。
|
||||
"ERROR" | "ERR" | "SEVERE" | "FATAL" => Ok(LogLevel::Error),
|
||||
@@ -86,9 +90,9 @@ impl FromStr for LogLevel {
|
||||
"DEBUG" | "DBG" => Ok(LogLevel::Debug),
|
||||
"TRACE" | "TRC" => Ok(LogLevel::Trace),
|
||||
// `_` 是通配符,匹配所有未被上面分支捕获的值。
|
||||
// 对于未知级别,包装为 Unknown 并保存原始字符串。
|
||||
// s.to_string() 将 &str 转换为 String(注意这里用原始的 s,不是大写后的)。
|
||||
_ => Ok(LogLevel::Unknown(s.to_string())),
|
||||
// 对于未知级别,包装为 Unknown 并保存 trimmed 后的字符串。
|
||||
// s.to_string() 将 &str 转换为 String(注意这里用 trimmed,不是原始 s)。
|
||||
_ => Ok(LogLevel::Unknown(trimmed.to_string())),
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -119,6 +123,18 @@ impl fmt::Display for LogLevel {
|
||||
}
|
||||
|
||||
// ─── LogEntry 结构体 ────────────────────────────────────────────────────────
|
||||
/// 记录 JSON 日志中出现的重复 key 信息
|
||||
///
|
||||
/// 当 JSON 对象中同一个 key 出现多次时,serde_json 默认 last-wins(后值覆盖前值),
|
||||
/// 前面的值会静默丢失。此结构记录所有重复出现的 key 及其全部值,供 UI 展示警告。
|
||||
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
|
||||
pub struct DuplicateKey {
|
||||
/// 重复的 key 名称
|
||||
pub key: String,
|
||||
/// 该 key 出现的所有值(按出现顺序排列,最后一个值是 fields 中的最终值)
|
||||
pub values: Vec<Value>,
|
||||
}
|
||||
|
||||
/// 一行解析后的日志
|
||||
///
|
||||
/// 表示日志文件中经过解析器处理后的一行内容。
|
||||
@@ -144,6 +160,9 @@ pub struct LogEntry {
|
||||
/// HashMap<String, Value> 是一个字典,键是字段名,值是 JSON 值。
|
||||
/// 例如 {"message": "hello", "request_id": "abc123"}。
|
||||
pub fields: HashMap<String, Value>,
|
||||
|
||||
/// JSON 中重复出现的 key 记录(正常日志为空 Vec)
|
||||
pub duplicate_keys: Vec<DuplicateKey>,
|
||||
}
|
||||
|
||||
// ─── SearchResult 结构体 ────────────────────────────────────────────────────
|
||||
@@ -288,6 +307,34 @@ mod tests {
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_from_str_whitespace_trimmed() {
|
||||
assert_eq!(" WARN ".parse::<LogLevel>(), Ok(LogLevel::Warn));
|
||||
assert_eq!("\tINFO".parse::<LogLevel>(), Ok(LogLevel::Info));
|
||||
assert_eq!("ERROR\n".parse::<LogLevel>(), Ok(LogLevel::Error));
|
||||
assert_eq!(" debug ".parse::<LogLevel>(), Ok(LogLevel::Debug));
|
||||
assert_eq!("\tTRACE\t".parse::<LogLevel>(), Ok(LogLevel::Trace));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_from_str_whitespace_unknown_trimmed() {
|
||||
// Unknown stores the trimmed value, not the original.
|
||||
assert_eq!(
|
||||
" CUSTOM ".parse::<LogLevel>(),
|
||||
Ok(LogLevel::Unknown("CUSTOM".into()))
|
||||
);
|
||||
// Pure whitespace becomes Unknown("").
|
||||
assert_eq!(
|
||||
" ".parse::<LogLevel>(),
|
||||
Ok(LogLevel::Unknown("".into()))
|
||||
);
|
||||
// Internal whitespace is NOT collapsed.
|
||||
assert_eq!(
|
||||
"W ARN".parse::<LogLevel>(),
|
||||
Ok(LogLevel::Unknown("W ARN".into()))
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
// 测试:LogLevel 的 Display 输出格式是否正确。
|
||||
fn test_display_output() {
|
||||
@@ -319,6 +366,7 @@ mod tests {
|
||||
timestamp: Some("2024-01-01T00:00:00".to_string()),
|
||||
level: Some(LogLevel::Info),
|
||||
fields,
|
||||
duplicate_keys: vec![],
|
||||
};
|
||||
|
||||
// 逐字段验证。
|
||||
|
||||
@@ -13,6 +13,7 @@ pub enum FileEvent {
|
||||
Truncated { new_size: u64 },
|
||||
Rotated { new_inode: u64 },
|
||||
Removed,
|
||||
WatcherError { message: String },
|
||||
}
|
||||
|
||||
// ─── get_inode ──────────────────────────────────────────────────────────────
|
||||
@@ -34,6 +35,43 @@ struct WatchState {
|
||||
last_inode: u64,
|
||||
}
|
||||
|
||||
fn process_event(event: Event, watch_path: &Path, state: &mut WatchState) -> Option<FileEvent> {
|
||||
if !event.paths.iter().any(|p| p == watch_path) {
|
||||
return None;
|
||||
}
|
||||
|
||||
match event.kind {
|
||||
EventKind::Modify(_) | EventKind::Create(_) | EventKind::Any => {}
|
||||
EventKind::Remove(_) => return Some(FileEvent::Removed),
|
||||
_ => return None,
|
||||
}
|
||||
|
||||
let current_inode = get_inode(watch_path).unwrap_or(0);
|
||||
let current_size = std::fs::metadata(watch_path).map(|m| m.len()).unwrap_or(0);
|
||||
|
||||
if current_inode != 0 && state.last_inode != 0 && current_inode != state.last_inode {
|
||||
state.last_inode = current_inode;
|
||||
state.last_size = current_size;
|
||||
Some(FileEvent::Rotated {
|
||||
new_inode: current_inode,
|
||||
})
|
||||
} else if current_size > state.last_size {
|
||||
state.last_size = current_size;
|
||||
state.last_inode = current_inode;
|
||||
Some(FileEvent::Appended {
|
||||
new_size: current_size,
|
||||
})
|
||||
} else if current_size < state.last_size {
|
||||
state.last_size = current_size;
|
||||
state.last_inode = current_inode;
|
||||
Some(FileEvent::Truncated {
|
||||
new_size: current_size,
|
||||
})
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
// ─── FileWatcher ────────────────────────────────────────────────────────────
|
||||
pub struct FileWatcher {
|
||||
rx: Receiver<FileEvent>,
|
||||
@@ -56,24 +94,13 @@ impl FileWatcher {
|
||||
notify::recommended_watcher(move |res: std::result::Result<Event, notify::Error>| {
|
||||
let event = match res {
|
||||
Ok(e) => e,
|
||||
Err(_) => return,
|
||||
};
|
||||
|
||||
match event.kind {
|
||||
EventKind::Modify(_) | EventKind::Create(_) | EventKind::Any => {}
|
||||
EventKind::Remove(_) => {
|
||||
let _ = tx.try_send(FileEvent::Removed);
|
||||
Err(error) => {
|
||||
let _ = tx.try_send(FileEvent::WatcherError {
|
||||
message: error.to_string(),
|
||||
});
|
||||
return;
|
||||
}
|
||||
_ => return,
|
||||
}
|
||||
|
||||
if !event.paths.iter().any(|p| p == &watch_path) {
|
||||
return;
|
||||
}
|
||||
|
||||
let current_inode = get_inode(&watch_path).unwrap_or(0);
|
||||
let current_size = std::fs::metadata(&watch_path).map(|m| m.len()).unwrap_or(0);
|
||||
};
|
||||
|
||||
let mut st = state.lock().unwrap_or_else(|poison| {
|
||||
// Recover from poisoned mutex — state only tracks last_size
|
||||
@@ -81,25 +108,8 @@ impl FileWatcher {
|
||||
// cause a duplicate event, which is harmless.
|
||||
poison.into_inner()
|
||||
});
|
||||
|
||||
if current_inode != 0 && st.last_inode != 0 && current_inode != st.last_inode {
|
||||
let _ = tx.try_send(FileEvent::Rotated {
|
||||
new_inode: current_inode,
|
||||
});
|
||||
st.last_inode = current_inode;
|
||||
st.last_size = current_size;
|
||||
} else if current_size > st.last_size {
|
||||
let _ = tx.try_send(FileEvent::Appended {
|
||||
new_size: current_size,
|
||||
});
|
||||
st.last_size = current_size;
|
||||
st.last_inode = current_inode;
|
||||
} else if current_size < st.last_size {
|
||||
let _ = tx.try_send(FileEvent::Truncated {
|
||||
new_size: current_size,
|
||||
});
|
||||
st.last_size = current_size;
|
||||
st.last_inode = current_inode;
|
||||
if let Some(fe) = process_event(event, &watch_path, &mut st) {
|
||||
let _ = tx.try_send(fe);
|
||||
}
|
||||
})?;
|
||||
|
||||
@@ -144,6 +154,56 @@ mod tests {
|
||||
events
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_remove_wrong_path_ignored() {
|
||||
let dir = tempfile::tempdir().expect("create temp dir");
|
||||
let watched = dir.path().join("watched.log");
|
||||
let other = dir.path().join("other.log");
|
||||
std::fs::write(&watched, b"hello\n").expect("write watched");
|
||||
std::fs::write(&other, b"other\n").expect("write other");
|
||||
|
||||
let mut state = WatchState {
|
||||
last_size: 6,
|
||||
last_inode: get_inode(&watched).unwrap_or(0),
|
||||
};
|
||||
|
||||
let event = Event {
|
||||
kind: EventKind::Remove(notify::event::RemoveKind::File),
|
||||
paths: vec![other.clone()],
|
||||
attrs: Default::default(),
|
||||
};
|
||||
|
||||
let result = process_event(event, &watched, &mut state);
|
||||
assert_eq!(
|
||||
result, None,
|
||||
"Remove for non-watched path should be ignored"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_remove_correct_path_emits_removed() {
|
||||
let dir = tempfile::tempdir().expect("create temp dir");
|
||||
let watched = dir.path().join("watched.log");
|
||||
std::fs::write(&watched, b"hello\n").expect("write watched");
|
||||
|
||||
let mut state = WatchState {
|
||||
last_size: 6,
|
||||
last_inode: get_inode(&watched).unwrap_or(0),
|
||||
};
|
||||
|
||||
let event = Event {
|
||||
kind: EventKind::Remove(notify::event::RemoveKind::File),
|
||||
paths: vec![watched.clone()],
|
||||
attrs: Default::default(),
|
||||
};
|
||||
|
||||
let result = process_event(event, &watched, &mut state);
|
||||
assert!(
|
||||
matches!(result, Some(FileEvent::Removed)),
|
||||
"Remove for watched path should emit Removed"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_watcher_append() {
|
||||
let dir = tempfile::tempdir().expect("create temp dir");
|
||||
@@ -232,5 +292,19 @@ mod tests {
|
||||
|
||||
let d = FileEvent::Rotated { new_inode: 42 };
|
||||
assert_ne!(a, d);
|
||||
|
||||
let e1 = FileEvent::WatcherError {
|
||||
message: "io error".into(),
|
||||
};
|
||||
let e2 = FileEvent::WatcherError {
|
||||
message: "io error".into(),
|
||||
};
|
||||
assert_eq!(e1, e2);
|
||||
assert_ne!(
|
||||
e1,
|
||||
FileEvent::WatcherError {
|
||||
message: "other".into()
|
||||
}
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -11,6 +11,7 @@ anyhow.workspace = true
|
||||
log-viewer-core.workspace = true
|
||||
serde_json.workspace = true
|
||||
crossbeam-channel.workspace = true
|
||||
unicode-width.workspace = true
|
||||
|
||||
[dev-dependencies]
|
||||
tempfile = { workspace = true }
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -88,9 +88,21 @@ pub fn render(frame: &mut ratatui::Frame, app: &mut App) {
|
||||
}
|
||||
|
||||
// ── Status bar ─────────────────────────────────────────────────
|
||||
let status_text = if app.mode == AppMode::Settings {
|
||||
" j/k:navigate ←/→:change 1-8:jump Enter:save Esc:cancel"
|
||||
} else if app.is_error() {
|
||||
if app.mode == AppMode::Settings {
|
||||
if let Some(ref err) = app.settings_error {
|
||||
frame.render_widget(
|
||||
Paragraph::new(err.as_str()).style(Style::default().fg(Color::Red)),
|
||||
outer[2],
|
||||
);
|
||||
} else {
|
||||
frame.render_widget(
|
||||
Paragraph::new(" j/k:navigate ←/→:change 1-8:jump Enter:save Esc:cancel"),
|
||||
outer[2],
|
||||
);
|
||||
}
|
||||
return;
|
||||
}
|
||||
let status_text = if app.is_error() {
|
||||
" Press q to quit"
|
||||
} else if app.is_loading() {
|
||||
let pct = app.loading_progress().map_or(0, |p| p as usize);
|
||||
@@ -130,8 +142,8 @@ pub fn render_settings(frame: &mut ratatui::Frame, app: &mut App, area: ratatui:
|
||||
|
||||
let popup_w = ((area.width as u32 * 4 / 5).max(40)).min(area.width as u32) as u16;
|
||||
let popup_h = ((area.height as u32 * 4 / 5).max(14)).min(area.height as u32) as u16;
|
||||
let popup_x = area.width.saturating_sub(popup_w) / 2;
|
||||
let popup_y = area.height.saturating_sub(popup_h) / 2;
|
||||
let popup_x = area.x.saturating_add(area.width.saturating_sub(popup_w) / 2);
|
||||
let popup_y = area.y.saturating_add(area.height.saturating_sub(popup_h) / 2);
|
||||
let popup = ratatui::layout::Rect::new(popup_x, popup_y, popup_w, popup_h);
|
||||
|
||||
let block = Block::new().borders(Borders::ALL).title(" Color Settings ");
|
||||
@@ -493,4 +505,74 @@ mod tests {
|
||||
let _ = std::fs::remove_file(&path);
|
||||
assert!(result.is_ok());
|
||||
}
|
||||
|
||||
// ── Issue #31: Settings popup area offset tests ────────────────
|
||||
|
||||
/// Helper: enter settings mode and render to buffer.
|
||||
fn render_settings_to_buffer(app: &mut App, width: u16, height: u16) -> ratatui::buffer::Buffer {
|
||||
app.mode = crate::app::AppMode::Settings;
|
||||
app.settings_draft = app.color_config.clone();
|
||||
render_to_buffer(app, width, height)
|
||||
}
|
||||
|
||||
/// Find the top-left corner of the popup border by scanning for '┌'.
|
||||
fn find_popup_top_left(buf: &ratatui::buffer::Buffer, width: u16, height: u16) -> Option<(u16, u16)> {
|
||||
for row in 0..height {
|
||||
for col in 0..width {
|
||||
if buf.cell((col, row)).unwrap().symbol() == "┌" {
|
||||
return Some((col, row));
|
||||
}
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_settings_popup_includes_area_offset() {
|
||||
// In an 80x24 frame with Layout [Length(1), Min(1), Length(1)]:
|
||||
// outer[0] = title bar -> y=0
|
||||
// outer[1] = content -> y=1, height=22
|
||||
// outer[2] = status bar -> y=23
|
||||
// The popup is centered within outer[1], so its y must be >= outer[1].y (which is 1).
|
||||
let mut app = App::new();
|
||||
let buf = render_settings_to_buffer(&mut app, 80, 24);
|
||||
|
||||
let (_px, py) = find_popup_top_left(&buf, 80, 24)
|
||||
.expect("popup border '┌' should be rendered");
|
||||
|
||||
// outer[1].y == 1; the popup is centered inside a 22-row area,
|
||||
// so popup_y must be at least 1 (not 0).
|
||||
assert!(
|
||||
py >= 1,
|
||||
"popup top row should account for area.y offset, got y={py} (expected >= 1)"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_settings_popup_horizontal_centering_uses_area_x() {
|
||||
// outer[1].x is 0 for this layout, so this mainly verifies the popup
|
||||
// is centered and not shifted left. A non-zero area.x layout would
|
||||
// need a different layout to trigger, but the formula is the same.
|
||||
let mut app = App::new();
|
||||
let buf = render_settings_to_buffer(&mut app, 80, 24);
|
||||
|
||||
let (px, _py) = find_popup_top_left(&buf, 80, 24)
|
||||
.expect("popup border '┌' should be rendered");
|
||||
|
||||
// popup_w = 80*4/5 = 64, centered: (80-64)/2 = 8
|
||||
assert_eq!(
|
||||
px, 8,
|
||||
"popup should start at x=8 (centered 64-wide popup in 80-col area)"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_settings_popup_small_frame_no_panic() {
|
||||
// Frame smaller than the min popup size (40x14) should not panic.
|
||||
let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
|
||||
let mut app = App::new();
|
||||
let _buf = render_settings_to_buffer(&mut app, 30, 10);
|
||||
}));
|
||||
assert!(result.is_ok(), "rendering settings in a small frame should not panic");
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user