feat(core): implement FileReader with memchr line indexing
This commit is contained in:
@@ -1,13 +1,138 @@
|
||||
use crate::error::Result;
|
||||
use std::path::Path;
|
||||
use crate::error::{CoreError, Result};
|
||||
use crate::io::line_index::LineIndex;
|
||||
use std::path::{Path, PathBuf};
|
||||
|
||||
pub struct FileReader {/* TODO */}
|
||||
pub struct FileReader {
|
||||
path: PathBuf,
|
||||
data: Vec<u8>,
|
||||
line_index: LineIndex,
|
||||
}
|
||||
|
||||
impl FileReader {
|
||||
pub fn open(_path: &Path) -> Result<Self> {
|
||||
todo!()
|
||||
pub fn open(path: &Path) -> Result<Self> {
|
||||
let data = std::fs::read(path)?;
|
||||
|
||||
if std::str::from_utf8(&data).is_err() {
|
||||
return Err(CoreError::Encoding {
|
||||
line: 0,
|
||||
bytes: data.iter().take(64).copied().collect(),
|
||||
});
|
||||
}
|
||||
|
||||
let line_index = LineIndex::from_bytes(&data);
|
||||
|
||||
Ok(FileReader {
|
||||
path: path.to_path_buf(),
|
||||
data,
|
||||
line_index,
|
||||
})
|
||||
}
|
||||
|
||||
pub fn data(&self) -> &[u8] {
|
||||
&self.data
|
||||
}
|
||||
|
||||
pub fn line_count(&self) -> usize {
|
||||
self.line_index.line_count()
|
||||
}
|
||||
|
||||
pub fn get_line(&self, idx: usize) -> Option<&str> {
|
||||
self.line_index.get_line(&self.data, idx)
|
||||
}
|
||||
|
||||
pub fn file_size(&self) -> u64 {
|
||||
todo!()
|
||||
self.data.len() as u64
|
||||
}
|
||||
|
||||
pub fn path(&self) -> &Path {
|
||||
&self.path
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use std::env::temp_dir;
|
||||
|
||||
fn temp_path(name: &str) -> PathBuf {
|
||||
temp_dir().join(name)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_empty_file() {
|
||||
let path = temp_path("file_reader_test_empty");
|
||||
std::fs::write(&path, b"").unwrap();
|
||||
let reader = FileReader::open(&path).unwrap();
|
||||
assert_eq!(reader.line_count(), 0);
|
||||
let _ = std::fs::remove_file(&path);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_multi_line_file() {
|
||||
let path = temp_path("file_reader_test_multi");
|
||||
std::fs::write(&path, b"hello\nworld\nfoo").unwrap();
|
||||
let reader = FileReader::open(&path).unwrap();
|
||||
assert_eq!(reader.line_count(), 3);
|
||||
assert_eq!(reader.get_line(0), Some("hello"));
|
||||
assert_eq!(reader.get_line(1), Some("world"));
|
||||
assert_eq!(reader.get_line(2), Some("foo"));
|
||||
let _ = std::fs::remove_file(&path);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_nonexistent_file() {
|
||||
let path = temp_path("file_reader_test_nonexistent_xyzzy");
|
||||
let _ = std::fs::remove_file(&path);
|
||||
let result = FileReader::open(&path);
|
||||
match result {
|
||||
Err(CoreError::Io { .. }) => {}
|
||||
Err(other) => panic!("expected Io variant, got {other:?}"),
|
||||
Ok(_) => panic!("expected error, got success"),
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_non_utf8_file() {
|
||||
let path = temp_path("file_reader_test_nonutf8");
|
||||
std::fs::write(&path, &[0xFF, 0xFE]).unwrap();
|
||||
let result = FileReader::open(&path);
|
||||
match result {
|
||||
Err(CoreError::Encoding { line, bytes }) => {
|
||||
assert_eq!(line, 0);
|
||||
assert_eq!(bytes, vec![0xFF, 0xFE]);
|
||||
}
|
||||
Err(other) => panic!("expected Encoding variant, got {other:?}"),
|
||||
Ok(_) => panic!("expected error, got success"),
|
||||
}
|
||||
let _ = std::fs::remove_file(&path);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_file_size() {
|
||||
let path = temp_path("file_reader_test_size");
|
||||
let content = b"hello world";
|
||||
std::fs::write(&path, content).unwrap();
|
||||
let reader = FileReader::open(&path).unwrap();
|
||||
assert_eq!(reader.file_size(), content.len() as u64);
|
||||
let _ = std::fs::remove_file(&path);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_path() {
|
||||
let path = temp_path("file_reader_test_path");
|
||||
std::fs::write(&path, b"data").unwrap();
|
||||
let reader = FileReader::open(&path).unwrap();
|
||||
assert_eq!(reader.path(), path);
|
||||
let _ = std::fs::remove_file(&path);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_single_line_no_newline() {
|
||||
let path = temp_path("file_reader_test_single");
|
||||
std::fs::write(&path, b"hello").unwrap();
|
||||
let reader = FileReader::open(&path).unwrap();
|
||||
assert_eq!(reader.line_count(), 1);
|
||||
assert_eq!(reader.get_line(0), Some("hello"));
|
||||
let _ = std::fs::remove_file(&path);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1 +1,140 @@
|
||||
pub struct LineIndex {/* TODO */}
|
||||
pub struct LineIndex {
|
||||
line_starts: Vec<usize>,
|
||||
#[allow(dead_code)]
|
||||
has_trailing_newline: bool,
|
||||
}
|
||||
|
||||
impl LineIndex {
|
||||
pub fn from_bytes(data: &[u8]) -> Self {
|
||||
if data.is_empty() {
|
||||
return LineIndex {
|
||||
line_starts: vec![],
|
||||
has_trailing_newline: false,
|
||||
};
|
||||
}
|
||||
|
||||
let mut line_starts = vec![0usize];
|
||||
for pos in memchr::memchr_iter(b'\n', data) {
|
||||
line_starts.push(pos + 1);
|
||||
}
|
||||
|
||||
let trailing = data.last().is_some_and(|&b| b == b'\n');
|
||||
if trailing {
|
||||
line_starts.pop();
|
||||
}
|
||||
|
||||
LineIndex {
|
||||
line_starts,
|
||||
has_trailing_newline: trailing,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn line_count(&self) -> usize {
|
||||
self.line_starts.len()
|
||||
}
|
||||
|
||||
pub fn get_line<'a>(&self, data: &'a [u8], idx: usize) -> Option<&'a str> {
|
||||
if idx >= self.line_starts.len() {
|
||||
return None;
|
||||
}
|
||||
|
||||
let start = self.line_starts[idx];
|
||||
let end = if idx + 1 < self.line_starts.len() {
|
||||
self.line_starts[idx + 1].saturating_sub(1)
|
||||
} else {
|
||||
data.len()
|
||||
};
|
||||
|
||||
let slice = &data[start..end];
|
||||
std::str::from_utf8(slice)
|
||||
.map(|s| s.trim_end_matches(['\r', '\n']))
|
||||
.ok()
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_empty_data() {
|
||||
let idx = LineIndex::from_bytes(b"");
|
||||
assert_eq!(idx.line_count(), 0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_single_line_no_newline() {
|
||||
let data = b"hello";
|
||||
let idx = LineIndex::from_bytes(data);
|
||||
assert_eq!(idx.line_count(), 1);
|
||||
assert_eq!(idx.get_line(data, 0), Some("hello"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_single_line_with_newline() {
|
||||
let data = b"hello\n";
|
||||
let idx = LineIndex::from_bytes(data);
|
||||
assert_eq!(idx.line_count(), 1);
|
||||
assert_eq!(idx.get_line(data, 0), Some("hello"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_multi_line() {
|
||||
let data = b"aaa\nbbb\nccc";
|
||||
let idx = LineIndex::from_bytes(data);
|
||||
assert_eq!(idx.line_count(), 3);
|
||||
assert_eq!(idx.get_line(data, 0), Some("aaa"));
|
||||
assert_eq!(idx.get_line(data, 1), Some("bbb"));
|
||||
assert_eq!(idx.get_line(data, 2), Some("ccc"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_crlf_endings() {
|
||||
let data = b"hello\r\nworld\r\n";
|
||||
let idx = LineIndex::from_bytes(data);
|
||||
assert_eq!(idx.line_count(), 2);
|
||||
assert_eq!(idx.get_line(data, 0), Some("hello"));
|
||||
assert_eq!(idx.get_line(data, 1), Some("world"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_only_newline() {
|
||||
let data = b"\n";
|
||||
let idx = LineIndex::from_bytes(data);
|
||||
assert_eq!(idx.line_count(), 1);
|
||||
assert_eq!(idx.get_line(data, 0), Some(""));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_consecutive_newlines() {
|
||||
let data = b"a\n\nb";
|
||||
let idx = LineIndex::from_bytes(data);
|
||||
assert_eq!(idx.line_count(), 3);
|
||||
assert_eq!(idx.get_line(data, 0), Some("a"));
|
||||
assert_eq!(idx.get_line(data, 1), Some(""));
|
||||
assert_eq!(idx.get_line(data, 2), Some("b"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_double_newline() {
|
||||
let data = b"\n\n";
|
||||
let idx = LineIndex::from_bytes(data);
|
||||
assert_eq!(idx.line_count(), 2);
|
||||
assert_eq!(idx.get_line(data, 0), Some(""));
|
||||
assert_eq!(idx.get_line(data, 1), Some(""));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_out_of_bounds() {
|
||||
let data = b"hello";
|
||||
let idx = LineIndex::from_bytes(data);
|
||||
assert_eq!(idx.get_line(data, 999), None);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_empty_file_line_count_and_get_line() {
|
||||
let idx = LineIndex::from_bytes(b"");
|
||||
assert_eq!(idx.line_count(), 0);
|
||||
assert_eq!(idx.get_line(b"", 0), None);
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user