docs(core): add Chinese comments to io module

Add detailed Chinese comments explaining Rust syntax and concepts for readers unfamiliar with the language. Ultraworked with [Sisyphus](https://github.com/code-yeongyu/oh-my-openagent) Co-authored-by: Sisyphus <clio-agent@sisyphuslabs.ai>
2026-04-11 16:17:06 +08:00
parent 1c9367f1f9
commit e3f11d2165
3 changed files with 320 additions and 0 deletions
--- a/crates/core/src/io/file_reader.rs
+++ b/crates/core/src/io/file_reader.rs
@@ -1,26 +1,104 @@
+// ─── file_reader.rs ─────────────────────────────────────────────────────────
+// 这个文件定义了一个"文件读取器"（FileReader）结构体，用于将日志文件一次性
+// 读入内存，并支持按行号快速访问文件中的任意一行内容。
+// ──────────────────────────────────────────────────────────────────────────────
+
+// `use` 语句用于引入其他模块中定义的类型，类似于 Python 的 import 或 C++ 的 #include。
+// 这里从 crate（当前项目）的 error 模块中引入了 CoreError 和 Result 两个类型。
+//   - CoreError: 项目自定义的错误类型，用于表示各种可能的错误情况。
+//   - Result: 项目自定义的 Result 类型（注意：这不是 Rust 标准库的 Result，而是项目自己定义的别名）。
 use crate::error::{CoreError, Result};
+
+// 从同目录下的 line_index 模块引入 LineIndex 类型。
+// LineIndex 用于记录文件中每一行的起始字节位置，从而支持快速定位某一行。
 use crate::io::line_index::LineIndex;
+
+// 从 Rust 标准库的 std::path 模块引入 Path 和 PathBuf 两个类型。
+//   - Path: 文件路径的不可变引用类型（类似于 &str 之于 String）。
+//   - PathBuf: 拥有所有权的文件路径类型（类似于 String 之于 &str），可以修改和存储。
 use std::path::{Path, PathBuf};

+// ─── FileReader 结构体定义 ────────────────────────────────────────────────────
+// `pub struct` 定义一个公开的结构体（struct），类似于其他语言中的 class。
+// 结构体是数据的容器，可以包含多个字段（field）。
+// `pub` 表示这个结构体可以被其他模块访问，如果不加 pub 则只能在当前模块内使用。
 pub struct FileReader {
+    // `path: PathBuf` — 文件路径，存储被打开的文件的完整路径。
+    // PathBuf 是一个堆分配的、可增长的路径类型（类似于 String）。
    path: PathBuf,
+
+    // `data: Vec<u8>` — 文件的原始字节数据。
+    // Vec<u8> 是一个动态数组（向量），存储的是 u8（无符号 8 位整数，即一个字节）。
+    // 整个文件的内容被读取后以字节形式存储在这里。
+    // Vec 是 Rust 中最常用的集合类型，类似于 C++ 的 std::vector 或 Python 的 list。
    data: Vec<u8>,
+
+    // `line_index: LineIndex` — 行索引，记录每行在 data 中的起始位置。
+    // 通过这个索引可以快速找到第 N 行在 data 字节数组中的位置。
    line_index: LineIndex,
 }

+// ─── FileReader 的实现块 ─────────────────────────────────────────────────────
+// `impl FileReader` 块用来给 FileReader 结构体添加方法（method）。
+// 类似于其他语言中给 class 添加方法。Rust 中方法分为两种：
+//   - 关联函数（类似静态方法）：没有 &self 参数，用 `FileReader::open(...)` 调用。
+//   - 实例方法：第一个参数是 &self（不可变引用）或 &mut self（可变引用），
+//     用 `reader.line_count()` 这样的方式调用。
 impl FileReader {
+    // ─── open 方法（关联函数，即"构造函数"）──────────────────────────────────
+    // `pub fn open(path: &Path) -> Result<Self>` 的含义：
+    //   - pub fn: 公开的函数（function）。
+    //   - open: 函数名。
+    //   - path: &Path: 参数名为 path，类型是 &Path（Path 的不可变引用）。
+    //     & 表示借用（borrow），即我们不获取所有权，只是借用一下这个路径来读取。
+    //   - -> Result<Self>: 返回值类型。Result 是项目自定义的结果类型，
+    //     Self 指代当前类型（即 FileReader 本身）。
+    //     Result<Self> 实际上意味着"要么成功返回一个 FileReader，要么返回一个错误"。
    pub fn open(path: &Path) -> Result<Self> {
+        // `std::fs::read(path)?` — 读取文件的全部内容到内存中。
+        // std::fs 是 Rust 标准库中的文件系统模块。
+        // read() 函数接受一个文件路径，将整个文件内容读取为 Vec<u8>（字节数组）。
+        // 末尾的 `?` 是 Rust 的错误传播操作符：如果 read() 返回错误，
+        // 这个错误会自动从当前函数中返回（即提前退出函数）。
+        // 这比手写 match/if 来处理错误要简洁得多。
        let data = std::fs::read(path)?;

+        // ─── UTF-8 编码检查 ──────────────────────────────────────────────────
+        // `std::str::from_utf8(&data)` 尝试将字节数组解释为 UTF-8 编码的字符串。
+        // 如果字节数组不是有效的 UTF-8 编码，会返回 Err。
+        // `.is_err()` 检查结果是否为错误（即不是有效的 UTF-8）。
        if std::str::from_utf8(&data).is_err() {
+            // 如果不是有效的 UTF-8，返回一个自定义的编码错误。
+            // `Err(...)` 创建一个包含错误值的 Result::Err 变体。
+            // `CoreError::Encoding { ... }` 是 CoreError 枚举的一个变体（variant），
+            // 这里使用了结构体风格的枚举变体，包含两个字段：
            return Err(CoreError::Encoding {
+                // `line: 0` — 错误发生在第 0 行（此处只表示"文件开头附近"，
+                // 因为这里还没有逐行解析，所以总是 0）。
                line: 0,
+                // `bytes: data.iter().take(64).copied().collect()` — 这是一段链式调用：
+                //   1. data.iter() — 创建一个迭代器，遍历 data 中的每个 &u8（字节的引用）。
+                //   2. .take(64) — 只取前 64 个元素（如果文件不满 64 字节则取全部）。
+                //   3. .copied() — 将 &u8（引用）转换为 u8（值），即复制一份。
+                //   4. .collect() — 将迭代器收集为一个新的集合（这里推断为 Vec<u8>）。
+                // 这行代码的作用是：取文件前 64 字节内容，放在错误信息中，方便调试。
                bytes: data.iter().take(64).copied().collect(),
            });
        }

+        // ─── 构建行索引 ──────────────────────────────────────────────────────
+        // 调用 LineIndex::from_bytes() 静态方法，传入文件字节数据的引用。
+        // 这个方法会扫描整个字节数组，找到所有换行符的位置，
+        // 构建一个索引，记录每一行的起始字节偏移量。
+        // &data 中的 & 表示传递引用（不转移所有权）。
        let line_index = LineIndex::from_bytes(&data);

+        // ─── 构造并返回 FileReader 实例 ─────────────────────────────────────
+        // Ok(...) 创建一个成功的 Result 值。
+        // FileReader { ... } 使用结构体字面量语法创建实例：
+        //   - 字段名和变量名相同时，可以用简写语法（如 path, data, line_index），
+        //     而不需要写成 path: path, data: data, ...
+        // path.to_path_buf() 将 &Path（引用）转换为 PathBuf（拥有所有权的路径类型）。
        Ok(FileReader {
            path: path.to_path_buf(),
            data,
@@ -28,51 +106,105 @@ impl FileReader {
        })
    }

+    // ─── data 方法 ──────────────────────────────────────────────────────────
+    // `&self` 表示这是一个实例方法，通过不可变引用访问自身。
+    // 返回类型 `&[u8]` 是一个"字节切片引用"（即对字节数组的只读视图）。
+    // &[u8] 类似于其他语言中"只读字节数组"的概念，它不拥有数据，只是指向数据。
    pub fn data(&self) -> &[u8] {
+        // `&self.data` 获取 self.data（即 Vec<u8>）的引用。
+        // Rust 会自动将 &Vec<u8> 转换为 &[u8]（这叫做 Deref 强制转换）。
        &self.data
    }

+    // ─── line_count 方法 ────────────────────────────────────────────────────
+    // 返回文件的总行数。usize 是 Rust 中表示大小/索引的无符号整数类型
+    // （类似 C 的 size_t），在 64 位系统上占 8 字节。
    pub fn line_count(&self) -> usize {
+        // 委托给 line_index 的 line_count() 方法。
+        // 即实际上是由 LineIndex 来计算行数的。
        self.line_index.line_count()
    }

+    // ─── get_line 方法 ──────────────────────────────────────────────────────
+    // 根据行号获取某一行内容。
+    // `idx: usize` — 行号索引，从 0 开始（第 0 行、第 1 行……）。
+    // `-> Option<&str>` — 返回类型：
+    //   - Option 是 Rust 的可选类型，要么是 Some(值) 要么是 None。
+    //   - &str 是字符串切片的引用（只读字符串视图）。
+    //   - 如果行号有效，返回 Some("该行内容")；如果行号越界，返回 None。
    pub fn get_line(&self, idx: usize) -> Option<&str> {
+        // 委托给 line_index 的 get_line() 方法。
+        // 传入文件的字节数据和行号索引。
        self.line_index.get_line(&self.data, idx)
    }

+    // ─── file_size 方法 ─────────────────────────────────────────────────────
+    // 返回文件大小（以字节为单位）。
+    // u64 是无符号 64 位整数类型。
    pub fn file_size(&self) -> u64 {
+        // self.data.len() 获取 Vec<u8> 的长度（字节数），返回 usize 类型。
+        // `as u64` 是类型转换（cast），将 usize 转换为 u64。
        self.data.len() as u64
    }

+    // ─── path 方法 ──────────────────────────────────────────────────────────
+    // 返回文件路径的引用。
+    // &Path 是不可变的路径引用（不拥有所有权）。
    pub fn path(&self) -> &Path {
+        // &self.path 获取 PathBuf 的引用，Rust 会自动转换为 &Path。
        &self.path
    }
 }

+// ─── 单元测试 ────────────────────────────────────────────────────────────────
+// `#[cfg(test)]` 是一个条件编译属性（attribute），表示以下代码只在运行测试时编译。
+// 普通的 `cargo build` 不会编译这部分代码，只有 `cargo test` 才会。
+// 这是 Rust 中编写单元测试的标准方式——测试代码和业务代码放在同一个文件中。
 #[cfg(test)]
 mod tests {
+    // `use super::*;` — 将父模块（即外面的 FileReader 等）的所有公开内容引入当前作用域。
+    // super 指代父模块，* 是通配符，表示"所有内容"。
    use super::*;
+    // 引入标准库中的临时目录函数。
    use std::env::temp_dir;

+    // 辅助函数：生成临时目录下的文件路径。
+    // `-> PathBuf` 表示返回一个 PathBuf（拥有所有权的路径类型）。
    fn temp_path(name: &str) -> PathBuf {
+        // temp_dir() 返回系统临时目录（如 /tmp）。
+        // .join(name) 将文件名拼接到临时目录路径后面，形成完整路径。
        temp_dir().join(name)
    }

+    // `#[test]` 属性标记这是一个测试函数。cargo test 会自动发现并运行它。
    #[test]
+    // 测试：空文件应该有 0 行。
    fn test_empty_file() {
+        // 生成临时文件路径。
        let path = temp_path("file_reader_test_empty");
+        // 将空字节数组 b"" 写入文件。b"" 是字节字符串字面量语法。
+        // .unwrap() 的含义是"我确信这不会失败；如果失败了就直接 panic（崩溃）"。
+        // 在测试代码中常用 .unwrap() 来简化错误处理。
        std::fs::write(&path, b"").unwrap();
+        // 打开文件创建 FileReader。unwrap() 断言操作成功。
        let reader = FileReader::open(&path).unwrap();
+        // assert_eq! 宏断言两个值相等。这里断言行数为 0。
        assert_eq!(reader.line_count(), 0);
+        // 清理：删除临时文件。let _ = 表示忽略返回值（不关心删除是否成功）。
        let _ = std::fs::remove_file(&path);
    }

    #[test]
+    // 测试：多行文件（带换行符和不带末尾换行符的情况）。
    fn test_multi_line_file() {
        let path = temp_path("file_reader_test_multi");
+        // b"hello\nworld\nfoo" — 三行内容：hello、world、foo。
+        // 注意最后一行 foo 后面没有换行符。
        std::fs::write(&path, b"hello\nworld\nfoo").unwrap();
        let reader = FileReader::open(&path).unwrap();
+        // 应该识别为 3 行。
        assert_eq!(reader.line_count(), 3);
+        // 逐行验证内容。Some("hello") 表示第 0 行是 "hello"。
        assert_eq!(reader.get_line(0), Some("hello"));
        assert_eq!(reader.get_line(1), Some("world"));
        assert_eq!(reader.get_line(2), Some("foo"));
@@ -80,25 +212,40 @@ mod tests {
    }

    #[test]
+    // 测试：打开不存在的文件应该返回 IO 错误。
    fn test_nonexistent_file() {
        let path = temp_path("file_reader_test_nonexistent_xyzzy");
+        // 先删除文件确保它不存在。
        let _ = std::fs::remove_file(&path);
+        // 尝试打开不存在的文件。
        let result = FileReader::open(&path);
+        // 使用 match 进行模式匹配（类似于 switch-case，但更强大）。
        match result {
+            // 期望得到 Io 类型的错误（文件不存在的 IO 错误）。
+            // `{ .. }` 表示忽略 CoreError::Io 中的字段细节。
            Err(CoreError::Io { .. }) => {}
+            // 如果是其他类型的错误，说明出问题了，panic 并打印实际收到的错误类型。
            Err(other) => panic!("expected Io variant, got {other:?}"),
+            // 如果竟然成功了，也 panic。
+            // {other:?} 中的 :? 是 Debug 格式化，打印详细调试信息。
            Ok(_) => panic!("expected error, got success"),
        }
    }

    #[test]
+    // 测试：非 UTF-8 编码的文件应该返回 Encoding 错误。
    fn test_non_utf8_file() {
        let path = temp_path("file_reader_test_nonutf8");
+        // [0xFF, 0xFE] 是无效的 UTF-8 字节序列（这是 UTF-16 LE 的 BOM 头）。
+        // &[] 创建一个数组的引用（在这里是 &[u8; 2] 类型）。
        std::fs::write(&path, &[0xFF, 0xFE]).unwrap();
        let result = FileReader::open(&path);
        match result {
+            // 期望得到 Encoding 错误，并验证其中的字段。
            Err(CoreError::Encoding { line, bytes }) => {
+                // 错误行号应该是 0（文件开头）。
                assert_eq!(line, 0);
+                // bytes 应该包含我们写入的那两个无效字节。
                assert_eq!(bytes, vec![0xFF, 0xFE]);
            }
            Err(other) => panic!("expected Encoding variant, got {other:?}"),
@@ -108,30 +255,39 @@ mod tests {
    }

    #[test]
+    // 测试：file_size() 返回的文件大小是否正确。
    fn test_file_size() {
        let path = temp_path("file_reader_test_size");
+        // b"hello world" — 11 个字节。
        let content = b"hello world";
        std::fs::write(&path, content).unwrap();
        let reader = FileReader::open(&path).unwrap();
+        // 文件大小应该等于内容的字节长度。
        assert_eq!(reader.file_size(), content.len() as u64);
        let _ = std::fs::remove_file(&path);
    }

    #[test]
+    // 测试：path() 返回的路径是否与传入的路径一致。
    fn test_path() {
        let path = temp_path("file_reader_test_path");
        std::fs::write(&path, b"data").unwrap();
        let reader = FileReader::open(&path).unwrap();
+        // 验证存储的路径与原始路径相同。
        assert_eq!(reader.path(), path);
        let _ = std::fs::remove_file(&path);
    }

    #[test]
+    // 测试：只有一行且没有末尾换行符的情况。
    fn test_single_line_no_newline() {
        let path = temp_path("file_reader_test_single");
+        // b"hello" — 只有一行，没有换行符。
        std::fs::write(&path, b"hello").unwrap();
        let reader = FileReader::open(&path).unwrap();
+        // 应该识别为 1 行。
        assert_eq!(reader.line_count(), 1);
+        // 第 0 行的内容应该是 "hello"。
        assert_eq!(reader.get_line(0), Some("hello"));
        let _ = std::fs::remove_file(&path);
    }
--- a/crates/core/src/io/line_index.rs
+++ b/crates/core/src/io/line_index.rs
@@ -1,84 +1,219 @@
+// ─── line_index.rs ───────────────────────────────────────────────────────────
+// 这个文件定义了 LineIndex 结构体，用于为一段字节数据建立"行索引"。
+//
+// "行索引"的核心思想：记录每一行在字节数组中的起始位置（偏移量）。
+// 例如，对于内容 "aaa\nbbb\nccc"，字节布局如下：
+//
+//   位置: 0 1 2 3 4 5 6 7 8 9 10
+//   内容: a a a \n b b b \n c  c  c
+//         ↑第0行   ↑第1行   ↑第2行
+//
+// line_starts 数组会存储 [0, 4, 8]，即每行的起始位置。
+// 这样要获取第 N 行，只需要读取从 line_starts[N] 到 line_starts[N+1] 之间的字节即可。
+// ──────────────────────────────────────────────────────────────────────────────
+
+// ─── LineIndex 结构体定义 ────────────────────────────────────────────────────
+// `pub struct` 定义一个公开的结构体。pub 表示外部模块可以访问。
 pub struct LineIndex {
+    // `line_starts: Vec<usize>` — 一个动态数组，存储每一行的起始字节偏移量。
+    // Vec<usize> 即"存放 usize 值的向量"。
+    // usize 是 Rust 中用于表示大小和索引的无符号整数类型（类似 C 的 size_t）。
+    // 例如对于 "hello\nworld"，line_starts = [0, 6]：
+    //   - 第 0 行从字节偏移 0 开始
+    //   - 第 1 行从字节偏移 6 开始（'w' 的位置）
    line_starts: Vec<usize>,
+
+    // `#[allow(dead_code)]` — 一个属性（attribute），告诉编译器"不要对下面这个字段
+    // 发出'未使用'的警告"。has_trailing_newline 字段目前没有被使用，
+    // 但保留它是为了将来可能的功能需要（比如判断文件是否以换行符结尾）。
+    // #[...] 是 Rust 中为接下来的项添加元信息（属性）的语法。
    #[allow(dead_code)]
+    // `has_trailing_newline: bool` — 布尔值，表示文件最后一个字节是否是换行符 \n。
+    // bool 类型只有两个值：true 和 false。
    has_trailing_newline: bool,
 }

+// ─── LineIndex 的实现块 ─────────────────────────────────────────────────────
+// `impl LineIndex` 块用来为 LineIndex 结构体添加方法。
 impl LineIndex {
+    // ─── from_bytes 方法（关联函数 / 静态方法）─────────────────────────────
+    // 根据一段字节数据构建行索引。
+    // 参数 data: &[u8] — 一个字节切片的引用（只读的字节数组视图）。
+    // &[u8] 不拥有数据，只是借用（borrow）了数据的只读视图。
+    // -> Self 返回一个 LineIndex 实例。Self 是当前类型（LineIndex）的别名。
    pub fn from_bytes(data: &[u8]) -> Self {
+        // ─── 处理空数据的特殊情况 ────────────────────────────────────────
+        // `if data.is_empty()` — 检查字节数组是否为空。
+        // 如果文件为空，没有行可索引，直接返回一个空的 LineIndex。
        if data.is_empty() {
            return LineIndex {
+                // `vec![]` 宏创建一个空的 Vec（动态数组）。
+                // vec! 是 Rust 中创建 Vec 的便捷宏，类似 Python 的 []。
                line_starts: vec![],
                has_trailing_newline: false,
            };
        }

+        // ─── 构建行起始位置数组 ──────────────────────────────────────────
+        // `vec![0usize]` 创建一个包含一个元素 0 的 Vec<usize>。
+        // 第一行永远从字节偏移量 0 开始，所以初始化时先放入 0。
+        // 0usize 中的 usize 是类型后缀，明确指定 0 的类型是 usize。
        let mut line_starts = vec![0usize];
+        // `mut`（mutable）关键字表示这个变量可以被修改。
+        // 在 Rust 中，变量默认是不可变的（immutable），必须加 mut 才能修改。
+
+        // ─── 遍历所有换行符位置 ──────────────────────────────────────────
+        // `memchr::memchr_iter(b'\n', data)` — 在 data 字节数组中查找所有
+        // 换行符 \n 的位置，返回一个迭代器。
+        // memchr 是一个高性能的字节搜索库，比逐字节查找快得多（使用 SIMD 指令）。
+        // b'\n' 是一个字节字面量，值为 10（换行符的 ASCII 码）。
+        // b 前缀表示"这是一个字节（u8），而不是字符（char）"。
        for pos in memchr::memchr_iter(b'\n', data) {
+            // 换行符的下一个位置就是新行的起始位置。
+            // pos + 1 跳过换行符本身，指向下一行的第一个字节。
+            // .push() 方法向 Vec 末尾添加一个元素。
            line_starts.push(pos + 1);
        }

+        // ─── 处理末尾换行符的特殊情况 ────────────────────────────────────
+        // `data.last()` 返回字节数组的最后一个元素的 Option<&u8>（可能为空）。
+        // `.is_some_and(|&b| b == b'\n')` — 如果最后一个元素存在，
+        // 且它的值等于换行符 \n，则返回 true。
+        //   - is_some_and: "如果 Option 是 Some，则对其值执行判断函数"
+        //   - |&b| b == b'\n': 这是一个闭包（匿名函数），参数 &b 是对元素的引用，
+        //     判断它是否等于换行符。
        let trailing = data.last().is_some_and(|&b| b == b'\n');
+
+        // 如果文件以换行符结尾，需要特殊处理：
+        // 因为最后一个 \n 后面没有实际的行内容，所以我们不应该为它记录一个行起始位置。
        if trailing {
+            // .pop() 移除 Vec 的最后一个元素。
+            // 这里移除的是最后一个 \n 后面的位置（因为那里没有实际的行内容）。
            line_starts.pop();
        }

+        // ─── 返回构建好的 LineIndex ──────────────────────────────────────
+        // 使用结构体字面量语法创建实例。
+        // 因为字段名和变量名相同，所以可以简写。
        LineIndex {
            line_starts,
            has_trailing_newline: trailing,
        }
    }

+    // ─── line_count 方法 ───────────────────────────────────────────────────
+    // 返回总行数。
+    // &self 表示这是一个实例方法，通过不可变引用访问自身。
+    // -> usize 返回一个表示行数的无符号整数。
    pub fn line_count(&self) -> usize {
+        // line_starts 数组的长度就等于行数（每行有一个起始位置）。
+        // .len() 返回 Vec 的元素个数。
        self.line_starts.len()
    }

+    // ─── get_line 方法 ─────────────────────────────────────────────────────
+    // 根据行号索引获取某一行内容。
+    // 这是一个稍微复杂的方法，涉及 Rust 的"生命周期"（lifetime）概念。
+    //
+    // 参数说明：
+    //   - &'a [u8]: 带有生命周期标注的字节切片引用。
+    //     'a 是一个生命周期参数，表示"返回的字符串引用的生命周期与 data 相同"。
+    //     这确保了返回的 &str 不会在 data 被销毁后仍然存在。
+    //   - idx: usize: 要获取的行号（从 0 开始）。
+    //
+    // 返回值 Option<&'a str>:
+    //   - 如果行号有效，返回 Some("行内容字符串")。
+    //   - 如果行号越界，返回 None。
    pub fn get_line<'a>(&self, data: &'a [u8], idx: usize) -> Option<&'a str> {
+        // ─── 边界检查 ────────────────────────────────────────────────────
+        // 如果请求的行号超出了 line_starts 的范围，返回 None。
+        // .len() 返回数组长度（即行数）。
        if idx >= self.line_starts.len() {
            return None;
        }

+        // ─── 计算行的字节范围 ─────────────────────────────────────────────
+        // start: 当前行的起始字节偏移量。
        let start = self.line_starts[idx];
+
+        // end: 当前行的结束字节偏移量（不包含）。
+        // 使用 if-else 表达式来计算：
+        //   - 如果不是最后一行，结束位置是下一行的起始位置减 1（跳过换行符）。
+        //   - 如果是最后一行，结束位置是整个数据的末尾。
        let end = if idx + 1 < self.line_starts.len() {
+            // .saturating_sub(1) 是安全的减法：如果结果会下溢（变成负数），
+            // 则返回 0 而不会 panic。这比直接写 -1 更安全。
+            // 这里减 1 是为了跳过换行符本身（换行符属于前一行，不属于后一行）。
            self.line_starts[idx + 1].saturating_sub(1)
        } else {
+            // data.len() 返回字节数组的总长度，即最后一行到数据末尾。
            data.len()
        };

+        // ─── 提取行内容 ──────────────────────────────────────────────────
+        // &data[start..end] 使用切片语法获取从 start 到 end（不包含 end）的字节子数组。
+        // Rust 的切片语法 [a..b] 表示"从索引 a 到 b-1"，即左闭右开区间 [a, b)。
        let slice = &data[start..end];
+
+        // ─── 将字节转换为字符串并清理末尾空白 ──────────────────────────────
+        // 这是一段链式调用：
+        //
+        // 1. std::str::from_utf8(slice) — 尝试将字节切片转换为 &str（UTF-8 字符串切片）。
+        //    返回 Result<&str, Utf8Error>，即"成功得到字符串"或"编码错误"。
+        //
+        // 2. .map(|s| s.trim_end_matches(['\r', '\n'])) — 如果成功，对字符串执行 map 操作。
+        //    trim_end_matches 从字符串末尾移除所有匹配的字符。
+        //    这里移除 '\r'（回车）和 '\n'（换行），处理 CRLF (\r\n) 和 LF (\n) 两种换行风格。
+        //
+        // 3. .ok() — 将 Result 转换为 Option：
+        //    Ok(值) → Some(值)，Err(_) → None。
+        //    这样如果 UTF-8 转换失败，整个方法会返回 None 而不是 panic。
        std::str::from_utf8(slice)
            .map(|s| s.trim_end_matches(['\r', '\n']))
            .ok()
    }
 }

+// ─── 单元测试 ────────────────────────────────────────────────────────────────
+// `#[cfg(test)]` 表示以下代码只在测试时编译。
 #[cfg(test)]
 mod tests {
+    // 引入父模块的所有公开内容。
    use super::*;

    #[test]
+    // 测试：空数据应该有 0 行。
    fn test_empty_data() {
+        // b"" 是一个空的字节字符串（长度为 0 的 &[u8]）。
        let idx = LineIndex::from_bytes(b"");
        assert_eq!(idx.line_count(), 0);
    }

    #[test]
+    // 测试：单行内容，末尾没有换行符。
    fn test_single_line_no_newline() {
+        // b"hello" 是一个字节字符串字面量，类型为 &[u8; 5]。
        let data = b"hello";
        let idx = LineIndex::from_bytes(data);
+        // 只有 1 行。
        assert_eq!(idx.line_count(), 1);
+        // 第 0 行内容是 "hello"。
        assert_eq!(idx.get_line(data, 0), Some("hello"));
    }

    #[test]
+    // 测试：单行内容，末尾有换行符。
    fn test_single_line_with_newline() {
        let data = b"hello\n";
        let idx = LineIndex::from_bytes(data);
+        // 即使末尾有 \n，仍然只算 1 行（因为 \n 后面没有内容）。
        assert_eq!(idx.line_count(), 1);
+        // 返回内容时不包含末尾的 \n。
        assert_eq!(idx.get_line(data, 0), Some("hello"));
    }

    #[test]
+    // 测试：多行内容。
    fn test_multi_line() {
        let data = b"aaa\nbbb\nccc";
        let idx = LineIndex::from_bytes(data);
@@ -89,52 +224,68 @@ mod tests {
    }

    #[test]
+    // 测试：Windows 风格的 CRLF 换行符（\r\n）。
    fn test_crlf_endings() {
+        // b"hello\r\nworld\r\n" — 每行末尾是 \r\n（回车+换行）。
        let data = b"hello\r\nworld\r\n";
        let idx = LineIndex::from_bytes(data);
+        // 2 行（末尾 \n 后没有内容，所以不算第 3 行）。
        assert_eq!(idx.line_count(), 2);
+        // get_line 会自动去除 \r 和 \n，所以内容是干净的。
        assert_eq!(idx.get_line(data, 0), Some("hello"));
        assert_eq!(idx.get_line(data, 1), Some("world"));
    }

    #[test]
+    // 测试：文件内容只是一个换行符。
    fn test_only_newline() {
        let data = b"\n";
        let idx = LineIndex::from_bytes(data);
+        // 一个 \n 算作 1 行（内容为空字符串）。
        assert_eq!(idx.line_count(), 1);
+        // 第 0 行内容为空字符串 ""。
        assert_eq!(idx.get_line(data, 0), Some(""));
    }

    #[test]
+    // 测试：连续的换行符（中间有空行）。
    fn test_consecutive_newlines() {
        let data = b"a\n\nb";
        let idx = LineIndex::from_bytes(data);
+        // 3 行：'a'、空行、'b'。
        assert_eq!(idx.line_count(), 3);
        assert_eq!(idx.get_line(data, 0), Some("a"));
+        // 中间的空行，内容为空字符串 ""。
        assert_eq!(idx.get_line(data, 1), Some(""));
        assert_eq!(idx.get_line(data, 2), Some("b"));
    }

    #[test]
+    // 测试：两个换行符（两行空行）。
    fn test_double_newline() {
        let data = b"\n\n";
        let idx = LineIndex::from_bytes(data);
+        // 2 行空行。
        assert_eq!(idx.line_count(), 2);
        assert_eq!(idx.get_line(data, 0), Some(""));
        assert_eq!(idx.get_line(data, 1), Some(""));
    }

    #[test]
+    // 测试：行号越界时应该返回 None。
    fn test_out_of_bounds() {
        let data = b"hello";
        let idx = LineIndex::from_bytes(data);
+        // 只有 1 行，请求第 999 行应该返回 None。
        assert_eq!(idx.get_line(data, 999), None);
    }

    #[test]
+    // 测试：空文件的行数和 get_line 都应正确处理。
    fn test_empty_file_line_count_and_get_line() {
        let idx = LineIndex::from_bytes(b"");
        assert_eq!(idx.line_count(), 0);
+        // 空文件，请求第 0 行也应该返回 None（因为没有行）。
        assert_eq!(idx.get_line(b"", 0), None);
    }
 }
--- a/crates/core/src/io/mod.rs
+++ b/crates/core/src/io/mod.rs
@@ -1,2 +1,15 @@
+// ─── io 模块说明 ───────────────────────────────────────────────────────────────
+// 这个模块（module）负责文件 I/O（输入/输出）相关的功能。
+// 在 Rust 中，一个目录下的 mod.rs 文件相当于这个模块的"入口"，
+// 通过 `pub mod` 语句把子模块声明并导出，让外部代码可以使用。
+// ──────────────────────────────────────────────────────────────────────────────
+
+// `pub mod` 表示"公开声明一个子模块"。
+// pub = public（公开的），mod = module（模块）。
+// 这里声明了 file_reader 子模块，它定义在同目录下的 file_reader.rs 文件中。
+// 外部代码可以通过 crate::io::file_reader 来访问这个模块里的内容。
 pub mod file_reader;
+
+// 同理，声明 line_index 子模块，定义在 line_index.rs 文件中。
+// 这个模块负责维护文件中每一行的起始位置索引，用于快速定位某一行内容。
 pub mod line_index;