fix(io): harden read_cache against zero-length false hits and overflow (closes #13)

- Add early return for len==0 (Ok(&[])) matching std::io semantics
- Add slot.len > 0 guard to cache hit predicate to prevent empty-slot
  false matches
- Replace unchecked arithmetic with checked_add/saturating_add for
  request_end, block_end, and post-read coverage check
- Fix misleading comment about get(file,0,0) behavior on miss path
- Strengthen clear() to fully reset block_offset and last_access
- Register read_cache module in io/mod.rs
- Add 4 regression tests: zero-len on fresh/populated cache,
  zero-len at u64::MAX, overflow error on nonzero read at u64::MAX
This commit is contained in:
dailz
2026-06-09 10:48:34 +08:00
parent b58d66f2aa
commit d37ed6df68
2 changed files with 68 additions and 7 deletions

View File

@@ -18,4 +18,5 @@ pub mod cache_util;
pub mod index_cache;
pub mod line_sampler;
pub mod progressive_reader;
pub mod read_cache;
pub mod wrap;

View File

@@ -56,9 +56,15 @@ impl LruReadCache {
/// on a hit, or fills a cache slot on a miss. Cross-block reads go through
/// the spill buffer and are not cached.
pub fn get(&mut self, file: &File, offset: u64, len: usize) -> io::Result<&[u8]> {
if len == 0 {
return Ok(&[]);
}
let aligned_key = offset & !(BLOCK_ALIGN as u64 - 1);
let request_end = offset.saturating_add(len as u64);
let block_end = aligned_key + BLOCK_ALIGN as u64;
let request_end = offset.checked_add(len as u64).ok_or_else(|| {
io::Error::new(io::ErrorKind::InvalidInput, "read range overflows u64")
})?;
let block_end = aligned_key.saturating_add(BLOCK_ALIGN as u64);
if request_end > block_end {
self.spill_buf.resize(len, 0);
@@ -74,7 +80,8 @@ impl LruReadCache {
}
let hit_idx = self.slots.iter().position(|slot| {
slot.block_offset == aligned_key && request_end <= slot.block_offset + slot.len as u64
let slot_end = slot.block_offset.saturating_add(slot.len as u64);
slot.len > 0 && slot.block_offset == aligned_key && request_end <= slot_end
});
if let Some(idx) = hit_idx {
@@ -96,8 +103,8 @@ impl LruReadCache {
let slot = &mut self.slots[evict_idx];
let bytes_read = file.read_at(&mut slot.buf, aligned_key)?;
// Note: get(file, 0, 0) on an empty file now returns Err (old code returned Ok(&[])).
// No callers pass len == 0, so this is a safe semantic change.
// Non-empty reads that return 0 are EOF. Zero-length reads are handled above
// as a successful no-op.
if bytes_read == 0 {
return Err(io::Error::new(io::ErrorKind::UnexpectedEof, "read 0 bytes"));
}
@@ -107,7 +114,8 @@ impl LruReadCache {
slot.last_access = self.tick;
self.tick += 1;
if request_end > aligned_key + bytes_read as u64 {
let bytes_end = aligned_key.saturating_add(bytes_read as u64);
if request_end > bytes_end {
return Err(io::Error::new(io::ErrorKind::UnexpectedEof, "short read"));
}
@@ -118,7 +126,9 @@ impl LruReadCache {
/// Invalidate all cache slots and the spill buffer.
pub fn clear(&mut self) {
for slot in &mut self.slots {
slot.block_offset = 0;
slot.len = 0;
slot.last_access = 0;
}
self.spill_len = 0;
}
@@ -314,9 +324,11 @@ mod tests {
cache.clear();
// All slots should have len == 0.
// All slots should be fully reset.
for slot in &cache.slots {
assert_eq!(slot.block_offset, 0);
assert_eq!(slot.len, 0);
assert_eq!(slot.last_access, 0);
}
assert_eq!(cache.spill_len, 0);
@@ -432,4 +444,52 @@ mod tests {
assert_eq!(&line2[..4090], &[b'B'; 4090]);
assert_eq!(line2[4090], b'\n');
}
#[test]
fn zero_len_read_is_noop_on_fresh_cache() {
let f = make_file(b"");
let file = File::open(f.path()).unwrap();
let mut cache = ReadCache::new();
let result = cache.get(&file, 0, 0).unwrap();
assert!(result.is_empty());
assert_eq!(cache.tick, 0);
assert!(cache.slots.iter().all(|s| s.len == 0));
}
#[test]
fn zero_len_read_is_noop_on_populated_cache() {
let f = make_file(b"abc");
let file = File::open(f.path()).unwrap();
let mut cache = ReadCache::new();
cache.get(&file, 0, 1).unwrap();
let tick_before = cache.tick;
let result = cache.get(&file, 0, 0).unwrap();
assert!(result.is_empty());
assert_eq!(cache.tick, tick_before);
}
#[test]
fn zero_len_read_at_max_offset_is_ok() {
let f = make_file(b"");
let file = File::open(f.path()).unwrap();
let mut cache = ReadCache::new();
let result = cache.get(&file, u64::MAX, 0).unwrap();
assert!(result.is_empty());
assert_eq!(cache.tick, 0);
assert!(cache.slots.iter().all(|s| s.len == 0));
}
#[test]
fn nonzero_read_range_overflow_returns_invalid_input() {
let f = make_file(b"abc");
let file = File::open(f.path()).unwrap();
let mut cache = ReadCache::new();
let err = cache.get(&file, u64::MAX, 1).unwrap_err();
assert_eq!(err.kind(), io::ErrorKind::InvalidInput);
}
}