fix(io): harden read_cache against zero-length false hits and overflow (closes #13)
- Add early return for len==0 (Ok(&[])) matching std::io semantics - Add slot.len > 0 guard to cache hit predicate to prevent empty-slot false matches - Replace unchecked arithmetic with checked_add/saturating_add for request_end, block_end, and post-read coverage check - Fix misleading comment about get(file,0,0) behavior on miss path - Strengthen clear() to fully reset block_offset and last_access - Register read_cache module in io/mod.rs - Add 4 regression tests: zero-len on fresh/populated cache, zero-len at u64::MAX, overflow error on nonzero read at u64::MAX
This commit is contained in:
@@ -18,4 +18,5 @@ pub mod cache_util;
|
|||||||
pub mod index_cache;
|
pub mod index_cache;
|
||||||
pub mod line_sampler;
|
pub mod line_sampler;
|
||||||
pub mod progressive_reader;
|
pub mod progressive_reader;
|
||||||
|
pub mod read_cache;
|
||||||
pub mod wrap;
|
pub mod wrap;
|
||||||
|
|||||||
@@ -56,9 +56,15 @@ impl LruReadCache {
|
|||||||
/// on a hit, or fills a cache slot on a miss. Cross-block reads go through
|
/// on a hit, or fills a cache slot on a miss. Cross-block reads go through
|
||||||
/// the spill buffer and are not cached.
|
/// the spill buffer and are not cached.
|
||||||
pub fn get(&mut self, file: &File, offset: u64, len: usize) -> io::Result<&[u8]> {
|
pub fn get(&mut self, file: &File, offset: u64, len: usize) -> io::Result<&[u8]> {
|
||||||
|
if len == 0 {
|
||||||
|
return Ok(&[]);
|
||||||
|
}
|
||||||
|
|
||||||
let aligned_key = offset & !(BLOCK_ALIGN as u64 - 1);
|
let aligned_key = offset & !(BLOCK_ALIGN as u64 - 1);
|
||||||
let request_end = offset.saturating_add(len as u64);
|
let request_end = offset.checked_add(len as u64).ok_or_else(|| {
|
||||||
let block_end = aligned_key + BLOCK_ALIGN as u64;
|
io::Error::new(io::ErrorKind::InvalidInput, "read range overflows u64")
|
||||||
|
})?;
|
||||||
|
let block_end = aligned_key.saturating_add(BLOCK_ALIGN as u64);
|
||||||
|
|
||||||
if request_end > block_end {
|
if request_end > block_end {
|
||||||
self.spill_buf.resize(len, 0);
|
self.spill_buf.resize(len, 0);
|
||||||
@@ -74,7 +80,8 @@ impl LruReadCache {
|
|||||||
}
|
}
|
||||||
|
|
||||||
let hit_idx = self.slots.iter().position(|slot| {
|
let hit_idx = self.slots.iter().position(|slot| {
|
||||||
slot.block_offset == aligned_key && request_end <= slot.block_offset + slot.len as u64
|
let slot_end = slot.block_offset.saturating_add(slot.len as u64);
|
||||||
|
slot.len > 0 && slot.block_offset == aligned_key && request_end <= slot_end
|
||||||
});
|
});
|
||||||
|
|
||||||
if let Some(idx) = hit_idx {
|
if let Some(idx) = hit_idx {
|
||||||
@@ -96,8 +103,8 @@ impl LruReadCache {
|
|||||||
let slot = &mut self.slots[evict_idx];
|
let slot = &mut self.slots[evict_idx];
|
||||||
let bytes_read = file.read_at(&mut slot.buf, aligned_key)?;
|
let bytes_read = file.read_at(&mut slot.buf, aligned_key)?;
|
||||||
|
|
||||||
// Note: get(file, 0, 0) on an empty file now returns Err (old code returned Ok(&[])).
|
// Non-empty reads that return 0 are EOF. Zero-length reads are handled above
|
||||||
// No callers pass len == 0, so this is a safe semantic change.
|
// as a successful no-op.
|
||||||
if bytes_read == 0 {
|
if bytes_read == 0 {
|
||||||
return Err(io::Error::new(io::ErrorKind::UnexpectedEof, "read 0 bytes"));
|
return Err(io::Error::new(io::ErrorKind::UnexpectedEof, "read 0 bytes"));
|
||||||
}
|
}
|
||||||
@@ -107,7 +114,8 @@ impl LruReadCache {
|
|||||||
slot.last_access = self.tick;
|
slot.last_access = self.tick;
|
||||||
self.tick += 1;
|
self.tick += 1;
|
||||||
|
|
||||||
if request_end > aligned_key + bytes_read as u64 {
|
let bytes_end = aligned_key.saturating_add(bytes_read as u64);
|
||||||
|
if request_end > bytes_end {
|
||||||
return Err(io::Error::new(io::ErrorKind::UnexpectedEof, "short read"));
|
return Err(io::Error::new(io::ErrorKind::UnexpectedEof, "short read"));
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -118,7 +126,9 @@ impl LruReadCache {
|
|||||||
/// Invalidate all cache slots and the spill buffer.
|
/// Invalidate all cache slots and the spill buffer.
|
||||||
pub fn clear(&mut self) {
|
pub fn clear(&mut self) {
|
||||||
for slot in &mut self.slots {
|
for slot in &mut self.slots {
|
||||||
|
slot.block_offset = 0;
|
||||||
slot.len = 0;
|
slot.len = 0;
|
||||||
|
slot.last_access = 0;
|
||||||
}
|
}
|
||||||
self.spill_len = 0;
|
self.spill_len = 0;
|
||||||
}
|
}
|
||||||
@@ -314,9 +324,11 @@ mod tests {
|
|||||||
|
|
||||||
cache.clear();
|
cache.clear();
|
||||||
|
|
||||||
// All slots should have len == 0.
|
// All slots should be fully reset.
|
||||||
for slot in &cache.slots {
|
for slot in &cache.slots {
|
||||||
|
assert_eq!(slot.block_offset, 0);
|
||||||
assert_eq!(slot.len, 0);
|
assert_eq!(slot.len, 0);
|
||||||
|
assert_eq!(slot.last_access, 0);
|
||||||
}
|
}
|
||||||
assert_eq!(cache.spill_len, 0);
|
assert_eq!(cache.spill_len, 0);
|
||||||
|
|
||||||
@@ -432,4 +444,52 @@ mod tests {
|
|||||||
assert_eq!(&line2[..4090], &[b'B'; 4090]);
|
assert_eq!(&line2[..4090], &[b'B'; 4090]);
|
||||||
assert_eq!(line2[4090], b'\n');
|
assert_eq!(line2[4090], b'\n');
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn zero_len_read_is_noop_on_fresh_cache() {
|
||||||
|
let f = make_file(b"");
|
||||||
|
let file = File::open(f.path()).unwrap();
|
||||||
|
let mut cache = ReadCache::new();
|
||||||
|
|
||||||
|
let result = cache.get(&file, 0, 0).unwrap();
|
||||||
|
assert!(result.is_empty());
|
||||||
|
assert_eq!(cache.tick, 0);
|
||||||
|
assert!(cache.slots.iter().all(|s| s.len == 0));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn zero_len_read_is_noop_on_populated_cache() {
|
||||||
|
let f = make_file(b"abc");
|
||||||
|
let file = File::open(f.path()).unwrap();
|
||||||
|
let mut cache = ReadCache::new();
|
||||||
|
|
||||||
|
cache.get(&file, 0, 1).unwrap();
|
||||||
|
let tick_before = cache.tick;
|
||||||
|
|
||||||
|
let result = cache.get(&file, 0, 0).unwrap();
|
||||||
|
assert!(result.is_empty());
|
||||||
|
assert_eq!(cache.tick, tick_before);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn zero_len_read_at_max_offset_is_ok() {
|
||||||
|
let f = make_file(b"");
|
||||||
|
let file = File::open(f.path()).unwrap();
|
||||||
|
let mut cache = ReadCache::new();
|
||||||
|
|
||||||
|
let result = cache.get(&file, u64::MAX, 0).unwrap();
|
||||||
|
assert!(result.is_empty());
|
||||||
|
assert_eq!(cache.tick, 0);
|
||||||
|
assert!(cache.slots.iter().all(|s| s.len == 0));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn nonzero_read_range_overflow_returns_invalid_input() {
|
||||||
|
let f = make_file(b"abc");
|
||||||
|
let file = File::open(f.path()).unwrap();
|
||||||
|
let mut cache = ReadCache::new();
|
||||||
|
|
||||||
|
let err = cache.get(&file, u64::MAX, 1).unwrap_err();
|
||||||
|
assert_eq!(err.kind(), io::ErrorKind::InvalidInput);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
Reference in New Issue
Block a user