From 210eecfa66ce006673b3fc8dcfa106095004f632 Mon Sep 17 00:00:00 2001
From: dailz <dailz@example.com>
Date: Tue, 14 Apr 2026 09:06:52 +0800
Subject: [PATCH] feat(core): extract wrap utilities and extend LineIndex for
 progressive loading

Move wrap_line_chars and format_json_line from app.rs to core/io/wrap.rs with MAX_WRAP_INPUT_LEN guard. Add serde derives, pub getters, and extend_from_bytes() to LineIndex for incremental index building.

Ultraworked with [Sisyphus](https://github.com/code-yeongyu/oh-my-openagent)

Co-authored-by: Sisyphus <clio-agent@sisyphuslabs.ai>
---
 crates/core/src/io/line_index.rs | 254 +++++++++++++++++++++++++++++++
 crates/core/src/io/wrap.rs       | 135 ++++++++++++++++
 2 files changed, 389 insertions(+)
 create mode 100644 crates/core/src/io/wrap.rs

diff --git a/crates/core/src/io/line_index.rs b/crates/core/src/io/line_index.rs
index d6aa701..0ba2fcd 100644
--- a/crates/core/src/io/line_index.rs
+++ b/crates/core/src/io/line_index.rs
@@ -16,6 +16,7 @@
 const BLOCK_SIZE: usize = 256;
 
 // ─── LineIndex 结构体定义 ────────────────────────────────────────────────────
+#[derive(serde::Serialize, serde::Deserialize)]
 pub struct LineIndex {
     // 采样偏移量：每 BLOCK_SIZE 行记录一个起始字节偏移。
     // sampled_offsets[i] 存储第 (i * BLOCK_SIZE) 行的字节起始位置。
@@ -139,11 +140,91 @@ impl LineIndex {
         })
     }
 
+    // ─── extend_from_bytes：用追加的数据增量更新索引 ─────────────────────
+    /// Extend the index with new content appended after the existing data.
+    /// `new_data` is the bytes that were appended.
+    /// `start_offset` is the byte offset where `new_data` starts in the file.
+    pub fn extend_from_bytes(&mut self, new_data: &[u8], start_offset: u64) {
+        if new_data.is_empty() {
+            return;
+        }
+
+        let old_total = self.total_lines;
+        let old_had_trailing = self.has_trailing_newline;
+
+        // Determine whether the first byte of new_data starts a new line
+        // or continues the old partial last line.
+        let starts_new_line = old_total == 0 || old_had_trailing;
+
+        // If the junction falls on a block boundary, record the start offset
+        // (analogous to from_bytes always pushing offset 0 for line 0).
+        if starts_new_line && (old_total as usize) % BLOCK_SIZE == 0 {
+            self.sampled_offsets.push(start_offset);
+        }
+
+        // Scan new_data for newlines, recording sampled offsets at block boundaries.
+        let mut next_line_idx = if starts_new_line {
+            (old_total + 1) as usize
+        } else {
+            old_total as usize
+        };
+
+        let mut new_newlines: usize = 0;
+        for pos in memchr::memchr_iter(b'\n', new_data) {
+            new_newlines += 1;
+            if next_line_idx.is_multiple_of(BLOCK_SIZE) {
+                self.sampled_offsets.push(start_offset + pos as u64 + 1);
+            }
+            next_line_idx += 1;
+        }
+
+        let new_has_trailing = new_data.last().is_some_and(|&b| b == b'\n');
+
+        // Compute how many new lines the appended data contributes.
+        // - starts_new_line: the new data begins a fresh line. Each \n ends a line.
+        //   If the new data doesn't end with \n there's one extra trailing-partial line.
+        // - !starts_new_line: the new data continues an old partial line. The first \n
+        //   closes that old line (already counted). Remaining \n's each close a new line.
+        let added = if starts_new_line {
+            new_newlines + if new_has_trailing { 0 } else { 1 }
+        } else if new_has_trailing {
+            new_newlines.saturating_sub(1)
+        } else {
+            new_newlines
+        };
+
+        self.total_lines += added as u64;
+        self.has_trailing_newline = new_has_trailing;
+
+        // Trailing pop: if total_lines landed exactly on a block boundary and the
+        // file ends with \n, the offset we pushed for that boundary points to a
+        // non-existent trailing line — remove it (same logic as from_bytes).
+        if self.has_trailing_newline
+            && self.total_lines > 0
+            && (self.total_lines as usize).is_multiple_of(BLOCK_SIZE)
+        {
+            self.sampled_offsets.pop();
+        }
+    }
+
     // ─── line_count：返回总行数 ───────────────────────────────────────────
     pub fn line_count(&self) -> usize {
         self.total_lines as usize
     }
 
+    // ─── getter 方法 ────────────────────────────────────────────────────
+    pub(crate) fn sampled_offsets(&self) -> &[u64] {
+        &self.sampled_offsets
+    }
+
+    pub(crate) fn total_lines(&self) -> u64 {
+        self.total_lines
+    }
+
+    pub(crate) fn has_trailing_newline(&self) -> bool {
+        self.has_trailing_newline
+    }
+
     // ─── get_line：根据行号获取行内容 ─────────────────────────────────────
     // 通过稀疏索引定位到所在块的起始位置，然后向前扫描少量换行符来定位目标行。
     pub fn get_line<'a>(&self, data: &'a [u8], idx: usize) -> Option<&'a str> {
@@ -490,4 +571,177 @@ mod tests {
         assert_eq!(idx.get_line(&data, 256), Some("line256"));
         assert_eq!(idx.get_line(&data, 299), Some("line299"));
     }
+
+    #[test]
+    fn test_lineindex_getters() {
+        let data = make_lines(300);
+        let idx = LineIndex::from_bytes(&data);
+        assert_eq!(idx.total_lines(), 300);
+        assert!(idx.has_trailing_newline());
+        assert_eq!(idx.sampled_offsets().len(), 2);
+        assert_eq!(idx.sampled_offsets()[0], 0);
+    }
+
+    // ─── extend_from_bytes tests ──────────────────────────────────────────
+
+    #[test]
+    fn test_extend_from_bytes_basic() {
+        let mut idx = LineIndex::from_bytes(b"aaa\nbbb\n");
+        assert_eq!(idx.line_count(), 2);
+        idx.extend_from_bytes(b"ccc\nddd\n", 8);
+        assert_eq!(idx.line_count(), 4);
+        assert_eq!(idx.has_trailing_newline(), true);
+    }
+
+    #[test]
+    fn test_extend_from_bytes_trailing_to_trailing() {
+        let mut idx = LineIndex::from_bytes(b"hello\n");
+        assert_eq!(idx.line_count(), 1);
+        idx.extend_from_bytes(b"world\n", 6);
+        assert_eq!(idx.line_count(), 2);
+        assert_eq!(idx.has_trailing_newline(), true);
+    }
+
+    #[test]
+    fn test_extend_from_bytes_trailing_to_no_trailing() {
+        let mut idx = LineIndex::from_bytes(b"hello\n");
+        assert_eq!(idx.line_count(), 1);
+        idx.extend_from_bytes(b"world", 6);
+        assert_eq!(idx.line_count(), 2);
+        assert_eq!(idx.has_trailing_newline(), false);
+    }
+
+    #[test]
+    fn test_extend_from_bytes_no_trailing_to_trailing() {
+        let mut idx = LineIndex::from_bytes(b"hello");
+        assert_eq!(idx.line_count(), 1);
+        idx.extend_from_bytes(b"\nworld\n", 5);
+        assert_eq!(idx.line_count(), 2);
+        assert_eq!(idx.has_trailing_newline(), true);
+    }
+
+    #[test]
+    fn test_extend_from_bytes_no_trailing_merge() {
+        let mut idx = LineIndex::from_bytes(b"hel");
+        assert_eq!(idx.line_count(), 1);
+        idx.extend_from_bytes(b"lo", 3);
+        assert_eq!(idx.line_count(), 1);
+        assert_eq!(idx.has_trailing_newline(), false);
+    }
+
+    #[test]
+    fn test_extend_from_bytes_empty() {
+        let mut idx = LineIndex::from_bytes(b"hello\n");
+        let lines_before = idx.line_count();
+        let offsets_before = idx.sampled_offsets().to_vec();
+        idx.extend_from_bytes(b"", 6);
+        assert_eq!(idx.line_count(), lines_before);
+        assert_eq!(idx.sampled_offsets(), offsets_before.as_slice());
+    }
+
+    #[test]
+    fn test_extend_from_bytes_from_empty() {
+        let mut idx = LineIndex::from_bytes(b"");
+        assert_eq!(idx.line_count(), 0);
+        idx.extend_from_bytes(b"aaa\nbbb\n", 0);
+        assert_eq!(idx.line_count(), 2);
+        assert_eq!(idx.has_trailing_newline(), true);
+    }
+
+    #[test]
+    fn test_extend_from_bytes_matches_from_bytes() {
+        let cases: Vec<(&[u8], &[u8])> = vec![
+            (b"hello\n", b"world\n"),
+            (b"hello", b"\nworld\n"),
+            (b"hello", b"world"),
+            (b"", b"aaa\nbbb\n"),
+            (b"aaa\n", b""),
+            (b"a\nb\nc", b"\nd\ne\n"),
+            (b"a\nb\nc\n", b"d\ne\nf"),
+        ];
+
+        for (i, (old, new)) in cases.iter().enumerate() {
+            let combined: Vec<u8> = old.iter().chain(new.iter()).copied().collect();
+            let full_idx = LineIndex::from_bytes(&combined);
+
+            let mut ext_idx = LineIndex::from_bytes(old);
+            ext_idx.extend_from_bytes(new, old.len() as u64);
+
+            assert_eq!(
+                ext_idx.total_lines, full_idx.total_lines,
+                "case {}: total_lines mismatch (old={:?}, new={:?})",
+                i, old, new
+            );
+            assert_eq!(
+                ext_idx.has_trailing_newline, full_idx.has_trailing_newline,
+                "case {}: has_trailing_newline mismatch",
+                i
+            );
+            assert_eq!(
+                ext_idx.sampled_offsets, full_idx.sampled_offsets,
+                "case {}: sampled_offsets mismatch",
+                i
+            );
+        }
+    }
+
+    #[test]
+    fn test_extend_from_bytes_256_block_boundary() {
+        let old = make_lines(256);
+        let new = make_lines(256);
+        let combined: Vec<u8> = old.iter().chain(new.iter()).copied().collect();
+
+        let full_idx = LineIndex::from_bytes(&combined);
+        let mut ext_idx = LineIndex::from_bytes(&old);
+        ext_idx.extend_from_bytes(&new, old.len() as u64);
+
+        assert_eq!(ext_idx.total_lines, full_idx.total_lines);
+        assert_eq!(ext_idx.sampled_offsets, full_idx.sampled_offsets);
+        assert_eq!(ext_idx.has_trailing_newline, full_idx.has_trailing_newline);
+
+        assert_eq!(ext_idx.get_line(&combined, 0), Some("line0"));
+        assert_eq!(ext_idx.get_line(&combined, 255), Some("line255"));
+        assert_eq!(ext_idx.get_line(&combined, 256), Some("line0"));
+        assert_eq!(ext_idx.get_line(&combined, 511), Some("line255"));
+    }
+
+    #[test]
+    fn test_extend_from_bytes_300_plus_300() {
+        let old = make_lines(300);
+        let new = make_lines(300);
+        let combined: Vec<u8> = old.iter().chain(new.iter()).copied().collect();
+
+        let full_idx = LineIndex::from_bytes(&combined);
+        let mut ext_idx = LineIndex::from_bytes(&old);
+        ext_idx.extend_from_bytes(&new, old.len() as u64);
+
+        assert_eq!(ext_idx.total_lines, full_idx.total_lines);
+        assert_eq!(ext_idx.sampled_offsets, full_idx.sampled_offsets);
+
+        assert_eq!(ext_idx.get_line(&combined, 299), Some("line299"));
+        assert_eq!(ext_idx.get_line(&combined, 300), Some("line0"));
+        assert_eq!(ext_idx.get_line(&combined, 599), Some("line299"));
+    }
+
+    #[test]
+    fn test_lineindex_serde_roundtrip() {
+        let data = make_lines(300);
+        let original = LineIndex::from_bytes(&data);
+
+        let bytes = bincode::serialize(&original).expect("serialize");
+        let restored: LineIndex = bincode::deserialize(&bytes).expect("deserialize");
+
+        assert_eq!(restored.total_lines(), original.total_lines());
+        assert_eq!(
+            restored.has_trailing_newline(),
+            original.has_trailing_newline()
+        );
+        assert_eq!(restored.sampled_offsets(), original.sampled_offsets());
+        assert_eq!(restored.line_count(), original.line_count());
+
+        // Verify restored index still works for line lookups
+        assert_eq!(restored.get_line(&data, 0), Some("line0"));
+        assert_eq!(restored.get_line(&data, 255), Some("line255"));
+        assert_eq!(restored.get_line(&data, 299), Some("line299"));
+    }
 }
diff --git a/crates/core/src/io/wrap.rs b/crates/core/src/io/wrap.rs
new file mode 100644
index 0000000..a33b09b
--- /dev/null
+++ b/crates/core/src/io/wrap.rs
@@ -0,0 +1,135 @@
+/// Maximum input length for wrap/format operations (10 MB).
+/// Lines exceeding this are returned as-is to avoid pathological cases.
+pub const MAX_WRAP_INPUT_LEN: usize = 10 * 1024 * 1024;
+
+/// Split a line into chunks of exactly `width` characters (display columns).
+/// For a log viewer, we want character-level wrapping, not word-level.
+pub fn wrap_line_chars(line: &str, width: usize) -> Vec<String> {
+    if width == 0 {
+        return vec![String::new()];
+    }
+    if line.is_empty() {
+        return vec![String::new()];
+    }
+    let mut result = Vec::new();
+    let mut row = String::new();
+    let mut col = 0;
+    for ch in line.chars() {
+        let w = if ch == '\t' { 4 } else { 1 };
+        if col + w > width && !row.is_empty() {
+            result.push(std::mem::take(&mut row));
+            col = 0;
+        }
+        if ch == '\t' {
+            row.push_str("    ");
+            col += 4;
+        } else {
+            row.push(ch);
+            col += w;
+        }
+        if col >= width {
+            result.push(std::mem::take(&mut row));
+            col = 0;
+        }
+    }
+    if !row.is_empty() {
+        result.push(row);
+    }
+    if result.is_empty() {
+        result.push(String::new());
+    }
+    result
+}
+
+/// Format a line as pretty-printed JSON if it's a JSON Object.
+/// Returns the original line unchanged for non-JSON or non-Object content.
+pub fn format_json_line(line: &str) -> String {
+    if line.trim().is_empty() {
+        return String::new();
+    }
+    // Quick pre-check: only try parsing if it starts with '{'
+    if !line.trim_start().starts_with('{') {
+        return line.to_string();
+    }
+    match serde_json::from_str::<serde_json::Value>(line) {
+        Ok(value) if value.is_object() => {
+            serde_json::to_string_pretty(&value).unwrap_or_else(|_| line.to_string())
+        }
+        _ => line.to_string(),
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_wrap_empty_line() {
+        let result = wrap_line_chars("", 80);
+        assert_eq!(result, vec![""]);
+    }
+
+    #[test]
+    fn test_wrap_zero_width() {
+        let result = wrap_line_chars("hello", 0);
+        assert_eq!(result, vec![""]);
+    }
+
+    #[test]
+    fn test_wrap_short_line() {
+        let result = wrap_line_chars("hello", 80);
+        assert_eq!(result, vec!["hello"]);
+    }
+
+    #[test]
+    fn test_wrap_exact_width() {
+        let result = wrap_line_chars("abc", 3);
+        assert_eq!(result, vec!["abc"]);
+    }
+
+    #[test]
+    fn test_wrap_multi_row() {
+        let result = wrap_line_chars("abcdef", 3);
+        assert_eq!(result, vec!["abc", "def"]);
+    }
+
+    #[test]
+    fn test_wrap_with_tab() {
+        let result = wrap_line_chars("a\tb", 4);
+        assert_eq!(result, vec!["a", "    ", "b"]);
+    }
+
+    #[test]
+    fn test_format_json_empty() {
+        assert_eq!(format_json_line(""), "");
+        assert_eq!(format_json_line("   "), "");
+    }
+
+    #[test]
+    fn test_format_json_non_json() {
+        assert_eq!(format_json_line("hello world"), "hello world");
+    }
+
+    #[test]
+    fn test_format_json_valid_object() {
+        let input = r#"{"key":"value"}"#;
+        let output = format_json_line(input);
+        assert!(
+            output.contains('\n'),
+            "pretty-printed JSON should have newlines"
+        );
+        assert!(output.contains("key"));
+        assert!(output.contains("value"));
+    }
+
+    #[test]
+    fn test_format_json_array_unchanged() {
+        let input = r#"[1,2,3]"#;
+        assert_eq!(format_json_line(input), input);
+    }
+
+    #[test]
+    fn test_max_wrap_input_len_constant() {
+        assert_eq!(MAX_WRAP_INPUT_LEN, 10 * 1024 * 1024);
+    }
+}