diff --git a/crates/core/src/io/file_reader.rs b/crates/core/src/io/file_reader.rs index 936af7a..3a52fd8 100644 --- a/crates/core/src/io/file_reader.rs +++ b/crates/core/src/io/file_reader.rs @@ -1,13 +1,138 @@ -use crate::error::Result; -use std::path::Path; +use crate::error::{CoreError, Result}; +use crate::io::line_index::LineIndex; +use std::path::{Path, PathBuf}; -pub struct FileReader {/* TODO */} +pub struct FileReader { + path: PathBuf, + data: Vec, + line_index: LineIndex, +} impl FileReader { - pub fn open(_path: &Path) -> Result { - todo!() + pub fn open(path: &Path) -> Result { + let data = std::fs::read(path)?; + + if std::str::from_utf8(&data).is_err() { + return Err(CoreError::Encoding { + line: 0, + bytes: data.iter().take(64).copied().collect(), + }); + } + + let line_index = LineIndex::from_bytes(&data); + + Ok(FileReader { + path: path.to_path_buf(), + data, + line_index, + }) } + + pub fn data(&self) -> &[u8] { + &self.data + } + + pub fn line_count(&self) -> usize { + self.line_index.line_count() + } + + pub fn get_line(&self, idx: usize) -> Option<&str> { + self.line_index.get_line(&self.data, idx) + } + pub fn file_size(&self) -> u64 { - todo!() + self.data.len() as u64 + } + + pub fn path(&self) -> &Path { + &self.path + } +} + +#[cfg(test)] +mod tests { + use super::*; + use std::env::temp_dir; + + fn temp_path(name: &str) -> PathBuf { + temp_dir().join(name) + } + + #[test] + fn test_empty_file() { + let path = temp_path("file_reader_test_empty"); + std::fs::write(&path, b"").unwrap(); + let reader = FileReader::open(&path).unwrap(); + assert_eq!(reader.line_count(), 0); + let _ = std::fs::remove_file(&path); + } + + #[test] + fn test_multi_line_file() { + let path = temp_path("file_reader_test_multi"); + std::fs::write(&path, b"hello\nworld\nfoo").unwrap(); + let reader = FileReader::open(&path).unwrap(); + assert_eq!(reader.line_count(), 3); + assert_eq!(reader.get_line(0), Some("hello")); + assert_eq!(reader.get_line(1), Some("world")); + assert_eq!(reader.get_line(2), Some("foo")); + let _ = std::fs::remove_file(&path); + } + + #[test] + fn test_nonexistent_file() { + let path = temp_path("file_reader_test_nonexistent_xyzzy"); + let _ = std::fs::remove_file(&path); + let result = FileReader::open(&path); + match result { + Err(CoreError::Io { .. }) => {} + Err(other) => panic!("expected Io variant, got {other:?}"), + Ok(_) => panic!("expected error, got success"), + } + } + + #[test] + fn test_non_utf8_file() { + let path = temp_path("file_reader_test_nonutf8"); + std::fs::write(&path, &[0xFF, 0xFE]).unwrap(); + let result = FileReader::open(&path); + match result { + Err(CoreError::Encoding { line, bytes }) => { + assert_eq!(line, 0); + assert_eq!(bytes, vec![0xFF, 0xFE]); + } + Err(other) => panic!("expected Encoding variant, got {other:?}"), + Ok(_) => panic!("expected error, got success"), + } + let _ = std::fs::remove_file(&path); + } + + #[test] + fn test_file_size() { + let path = temp_path("file_reader_test_size"); + let content = b"hello world"; + std::fs::write(&path, content).unwrap(); + let reader = FileReader::open(&path).unwrap(); + assert_eq!(reader.file_size(), content.len() as u64); + let _ = std::fs::remove_file(&path); + } + + #[test] + fn test_path() { + let path = temp_path("file_reader_test_path"); + std::fs::write(&path, b"data").unwrap(); + let reader = FileReader::open(&path).unwrap(); + assert_eq!(reader.path(), path); + let _ = std::fs::remove_file(&path); + } + + #[test] + fn test_single_line_no_newline() { + let path = temp_path("file_reader_test_single"); + std::fs::write(&path, b"hello").unwrap(); + let reader = FileReader::open(&path).unwrap(); + assert_eq!(reader.line_count(), 1); + assert_eq!(reader.get_line(0), Some("hello")); + let _ = std::fs::remove_file(&path); } } diff --git a/crates/core/src/io/line_index.rs b/crates/core/src/io/line_index.rs index 099a1e8..edca9b9 100644 --- a/crates/core/src/io/line_index.rs +++ b/crates/core/src/io/line_index.rs @@ -1 +1,140 @@ -pub struct LineIndex {/* TODO */} +pub struct LineIndex { + line_starts: Vec, + #[allow(dead_code)] + has_trailing_newline: bool, +} + +impl LineIndex { + pub fn from_bytes(data: &[u8]) -> Self { + if data.is_empty() { + return LineIndex { + line_starts: vec![], + has_trailing_newline: false, + }; + } + + let mut line_starts = vec![0usize]; + for pos in memchr::memchr_iter(b'\n', data) { + line_starts.push(pos + 1); + } + + let trailing = data.last().is_some_and(|&b| b == b'\n'); + if trailing { + line_starts.pop(); + } + + LineIndex { + line_starts, + has_trailing_newline: trailing, + } + } + + pub fn line_count(&self) -> usize { + self.line_starts.len() + } + + pub fn get_line<'a>(&self, data: &'a [u8], idx: usize) -> Option<&'a str> { + if idx >= self.line_starts.len() { + return None; + } + + let start = self.line_starts[idx]; + let end = if idx + 1 < self.line_starts.len() { + self.line_starts[idx + 1].saturating_sub(1) + } else { + data.len() + }; + + let slice = &data[start..end]; + std::str::from_utf8(slice) + .map(|s| s.trim_end_matches(['\r', '\n'])) + .ok() + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_empty_data() { + let idx = LineIndex::from_bytes(b""); + assert_eq!(idx.line_count(), 0); + } + + #[test] + fn test_single_line_no_newline() { + let data = b"hello"; + let idx = LineIndex::from_bytes(data); + assert_eq!(idx.line_count(), 1); + assert_eq!(idx.get_line(data, 0), Some("hello")); + } + + #[test] + fn test_single_line_with_newline() { + let data = b"hello\n"; + let idx = LineIndex::from_bytes(data); + assert_eq!(idx.line_count(), 1); + assert_eq!(idx.get_line(data, 0), Some("hello")); + } + + #[test] + fn test_multi_line() { + let data = b"aaa\nbbb\nccc"; + let idx = LineIndex::from_bytes(data); + assert_eq!(idx.line_count(), 3); + assert_eq!(idx.get_line(data, 0), Some("aaa")); + assert_eq!(idx.get_line(data, 1), Some("bbb")); + assert_eq!(idx.get_line(data, 2), Some("ccc")); + } + + #[test] + fn test_crlf_endings() { + let data = b"hello\r\nworld\r\n"; + let idx = LineIndex::from_bytes(data); + assert_eq!(idx.line_count(), 2); + assert_eq!(idx.get_line(data, 0), Some("hello")); + assert_eq!(idx.get_line(data, 1), Some("world")); + } + + #[test] + fn test_only_newline() { + let data = b"\n"; + let idx = LineIndex::from_bytes(data); + assert_eq!(idx.line_count(), 1); + assert_eq!(idx.get_line(data, 0), Some("")); + } + + #[test] + fn test_consecutive_newlines() { + let data = b"a\n\nb"; + let idx = LineIndex::from_bytes(data); + assert_eq!(idx.line_count(), 3); + assert_eq!(idx.get_line(data, 0), Some("a")); + assert_eq!(idx.get_line(data, 1), Some("")); + assert_eq!(idx.get_line(data, 2), Some("b")); + } + + #[test] + fn test_double_newline() { + let data = b"\n\n"; + let idx = LineIndex::from_bytes(data); + assert_eq!(idx.line_count(), 2); + assert_eq!(idx.get_line(data, 0), Some("")); + assert_eq!(idx.get_line(data, 1), Some("")); + } + + #[test] + fn test_out_of_bounds() { + let data = b"hello"; + let idx = LineIndex::from_bytes(data); + assert_eq!(idx.get_line(data, 999), None); + } + + #[test] + fn test_empty_file_line_count_and_get_line() { + let idx = LineIndex::from_bytes(b""); + assert_eq!(idx.line_count(), 0); + assert_eq!(idx.get_line(b"", 0), None); + } +}