feat(core): implement FileReader with memchr line indexing

This commit is contained in:
dailz
2026-04-10 23:01:03 +08:00
parent 73322138c1
commit f173adc018
2 changed files with 271 additions and 7 deletions

View File

@@ -1,13 +1,138 @@
use crate::error::Result;
use std::path::Path;
use crate::error::{CoreError, Result};
use crate::io::line_index::LineIndex;
use std::path::{Path, PathBuf};
pub struct FileReader {/* TODO */}
pub struct FileReader {
path: PathBuf,
data: Vec<u8>,
line_index: LineIndex,
}
impl FileReader {
pub fn open(_path: &Path) -> Result<Self> {
todo!()
pub fn open(path: &Path) -> Result<Self> {
let data = std::fs::read(path)?;
if std::str::from_utf8(&data).is_err() {
return Err(CoreError::Encoding {
line: 0,
bytes: data.iter().take(64).copied().collect(),
});
}
let line_index = LineIndex::from_bytes(&data);
Ok(FileReader {
path: path.to_path_buf(),
data,
line_index,
})
}
pub fn data(&self) -> &[u8] {
&self.data
}
pub fn line_count(&self) -> usize {
self.line_index.line_count()
}
pub fn get_line(&self, idx: usize) -> Option<&str> {
self.line_index.get_line(&self.data, idx)
}
pub fn file_size(&self) -> u64 {
todo!()
self.data.len() as u64
}
pub fn path(&self) -> &Path {
&self.path
}
}
#[cfg(test)]
mod tests {
use super::*;
use std::env::temp_dir;
fn temp_path(name: &str) -> PathBuf {
temp_dir().join(name)
}
#[test]
fn test_empty_file() {
let path = temp_path("file_reader_test_empty");
std::fs::write(&path, b"").unwrap();
let reader = FileReader::open(&path).unwrap();
assert_eq!(reader.line_count(), 0);
let _ = std::fs::remove_file(&path);
}
#[test]
fn test_multi_line_file() {
let path = temp_path("file_reader_test_multi");
std::fs::write(&path, b"hello\nworld\nfoo").unwrap();
let reader = FileReader::open(&path).unwrap();
assert_eq!(reader.line_count(), 3);
assert_eq!(reader.get_line(0), Some("hello"));
assert_eq!(reader.get_line(1), Some("world"));
assert_eq!(reader.get_line(2), Some("foo"));
let _ = std::fs::remove_file(&path);
}
#[test]
fn test_nonexistent_file() {
let path = temp_path("file_reader_test_nonexistent_xyzzy");
let _ = std::fs::remove_file(&path);
let result = FileReader::open(&path);
match result {
Err(CoreError::Io { .. }) => {}
Err(other) => panic!("expected Io variant, got {other:?}"),
Ok(_) => panic!("expected error, got success"),
}
}
#[test]
fn test_non_utf8_file() {
let path = temp_path("file_reader_test_nonutf8");
std::fs::write(&path, &[0xFF, 0xFE]).unwrap();
let result = FileReader::open(&path);
match result {
Err(CoreError::Encoding { line, bytes }) => {
assert_eq!(line, 0);
assert_eq!(bytes, vec![0xFF, 0xFE]);
}
Err(other) => panic!("expected Encoding variant, got {other:?}"),
Ok(_) => panic!("expected error, got success"),
}
let _ = std::fs::remove_file(&path);
}
#[test]
fn test_file_size() {
let path = temp_path("file_reader_test_size");
let content = b"hello world";
std::fs::write(&path, content).unwrap();
let reader = FileReader::open(&path).unwrap();
assert_eq!(reader.file_size(), content.len() as u64);
let _ = std::fs::remove_file(&path);
}
#[test]
fn test_path() {
let path = temp_path("file_reader_test_path");
std::fs::write(&path, b"data").unwrap();
let reader = FileReader::open(&path).unwrap();
assert_eq!(reader.path(), path);
let _ = std::fs::remove_file(&path);
}
#[test]
fn test_single_line_no_newline() {
let path = temp_path("file_reader_test_single");
std::fs::write(&path, b"hello").unwrap();
let reader = FileReader::open(&path).unwrap();
assert_eq!(reader.line_count(), 1);
assert_eq!(reader.get_line(0), Some("hello"));
let _ = std::fs::remove_file(&path);
}
}

View File

@@ -1 +1,140 @@
pub struct LineIndex {/* TODO */}
pub struct LineIndex {
line_starts: Vec<usize>,
#[allow(dead_code)]
has_trailing_newline: bool,
}
impl LineIndex {
pub fn from_bytes(data: &[u8]) -> Self {
if data.is_empty() {
return LineIndex {
line_starts: vec![],
has_trailing_newline: false,
};
}
let mut line_starts = vec![0usize];
for pos in memchr::memchr_iter(b'\n', data) {
line_starts.push(pos + 1);
}
let trailing = data.last().is_some_and(|&b| b == b'\n');
if trailing {
line_starts.pop();
}
LineIndex {
line_starts,
has_trailing_newline: trailing,
}
}
pub fn line_count(&self) -> usize {
self.line_starts.len()
}
pub fn get_line<'a>(&self, data: &'a [u8], idx: usize) -> Option<&'a str> {
if idx >= self.line_starts.len() {
return None;
}
let start = self.line_starts[idx];
let end = if idx + 1 < self.line_starts.len() {
self.line_starts[idx + 1].saturating_sub(1)
} else {
data.len()
};
let slice = &data[start..end];
std::str::from_utf8(slice)
.map(|s| s.trim_end_matches(['\r', '\n']))
.ok()
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_empty_data() {
let idx = LineIndex::from_bytes(b"");
assert_eq!(idx.line_count(), 0);
}
#[test]
fn test_single_line_no_newline() {
let data = b"hello";
let idx = LineIndex::from_bytes(data);
assert_eq!(idx.line_count(), 1);
assert_eq!(idx.get_line(data, 0), Some("hello"));
}
#[test]
fn test_single_line_with_newline() {
let data = b"hello\n";
let idx = LineIndex::from_bytes(data);
assert_eq!(idx.line_count(), 1);
assert_eq!(idx.get_line(data, 0), Some("hello"));
}
#[test]
fn test_multi_line() {
let data = b"aaa\nbbb\nccc";
let idx = LineIndex::from_bytes(data);
assert_eq!(idx.line_count(), 3);
assert_eq!(idx.get_line(data, 0), Some("aaa"));
assert_eq!(idx.get_line(data, 1), Some("bbb"));
assert_eq!(idx.get_line(data, 2), Some("ccc"));
}
#[test]
fn test_crlf_endings() {
let data = b"hello\r\nworld\r\n";
let idx = LineIndex::from_bytes(data);
assert_eq!(idx.line_count(), 2);
assert_eq!(idx.get_line(data, 0), Some("hello"));
assert_eq!(idx.get_line(data, 1), Some("world"));
}
#[test]
fn test_only_newline() {
let data = b"\n";
let idx = LineIndex::from_bytes(data);
assert_eq!(idx.line_count(), 1);
assert_eq!(idx.get_line(data, 0), Some(""));
}
#[test]
fn test_consecutive_newlines() {
let data = b"a\n\nb";
let idx = LineIndex::from_bytes(data);
assert_eq!(idx.line_count(), 3);
assert_eq!(idx.get_line(data, 0), Some("a"));
assert_eq!(idx.get_line(data, 1), Some(""));
assert_eq!(idx.get_line(data, 2), Some("b"));
}
#[test]
fn test_double_newline() {
let data = b"\n\n";
let idx = LineIndex::from_bytes(data);
assert_eq!(idx.line_count(), 2);
assert_eq!(idx.get_line(data, 0), Some(""));
assert_eq!(idx.get_line(data, 1), Some(""));
}
#[test]
fn test_out_of_bounds() {
let data = b"hello";
let idx = LineIndex::from_bytes(data);
assert_eq!(idx.get_line(data, 999), None);
}
#[test]
fn test_empty_file_line_count_and_get_line() {
let idx = LineIndex::from_bytes(b"");
assert_eq!(idx.line_count(), 0);
assert_eq!(idx.get_line(b"", 0), None);
}
}