feat(core): add detect_level() for plain text logs
Add level detection that tries JSON parsing first (trusts level field), then falls back to keyword scanning with word-boundary checks. Supports SEVERE, FATAL, ERROR, WARN, INFO, DEBUG, TRACE and their abbreviations. Ultraworked with [Sisyphus](https://github.com/code-yeongyu/oh-my-openagent) Co-authored-by: Sisyphus <clio-agent@sisyphuslabs.ai>
This commit is contained in:
212
crates/core/src/parser/level.rs
Normal file
212
crates/core/src/parser/level.rs
Normal file
@@ -0,0 +1,212 @@
|
||||
// ─── level.rs ────────────────────────────────────────────────────────────────
|
||||
// Detects the log level from a line of text.
|
||||
//
|
||||
// Two detection strategies:
|
||||
// 1. JSON lines: delegate to `super::json::parse_line` and extract the level field.
|
||||
// 2. Plain-text lines: keyword scan with word-boundary checks.
|
||||
// ──────────────────────────────────────────────────────────────────────────────
|
||||
|
||||
use crate::types::LogLevel;
|
||||
|
||||
/// Keywords to scan for, ordered longest-first so that "WARNING" is tried
|
||||
/// before "WARN" at the same byte offset (earliest match wins).
|
||||
const KEYWORDS: &[&str] = &[
|
||||
"INFORMATION",
|
||||
"SEVERE",
|
||||
"WARNING",
|
||||
"FATAL",
|
||||
"ERROR",
|
||||
"DEBUG",
|
||||
"TRACE",
|
||||
"WARN",
|
||||
"INFO",
|
||||
"ERR",
|
||||
"WRN",
|
||||
"DBG",
|
||||
"TRC",
|
||||
];
|
||||
|
||||
/// Maximum number of bytes to scan in a non-JSON line.
|
||||
/// Lines longer than this are safely truncated at a char boundary.
|
||||
const SCAN_LIMIT: usize = 200;
|
||||
|
||||
// ─── detect_level ───────────────────────────────────────────────────────────
|
||||
/// Detect the log level of a line.
|
||||
///
|
||||
/// Returns `Some(level)` if a level is found, `None` otherwise.
|
||||
/// For valid JSON lines the result comes from the parsed `level` field
|
||||
/// (which may itself be `None` if the field is absent).
|
||||
pub fn detect_level(line: &str) -> Option<LogLevel> {
|
||||
if line.trim().is_empty() {
|
||||
return None;
|
||||
}
|
||||
|
||||
// Try JSON first. If parse_line returns Some, the line is valid JSON —
|
||||
// trust its level field (even if None) and do NOT fall through to keyword scan.
|
||||
if let Some(entry) = super::json::parse_line(line) {
|
||||
return entry.level;
|
||||
}
|
||||
|
||||
// Non-JSON: keyword scan.
|
||||
detect_level_from_text(line)
|
||||
}
|
||||
|
||||
// ─── detect_level_from_text ─────────────────────────────────────────────────
|
||||
/// Keyword-based level detection for non-JSON lines.
|
||||
fn detect_level_from_text(line: &str) -> Option<LogLevel> {
|
||||
// Safe truncation to SCAN_LIMIT bytes.
|
||||
let scan = if line.len() > SCAN_LIMIT {
|
||||
let mut end = SCAN_LIMIT;
|
||||
while !line.is_char_boundary(end) {
|
||||
end -= 1;
|
||||
}
|
||||
&line[..end]
|
||||
} else {
|
||||
line
|
||||
};
|
||||
|
||||
let lower = scan.to_ascii_lowercase();
|
||||
|
||||
// Track earliest match.
|
||||
let mut best_pos = usize::MAX;
|
||||
let mut best_keyword = "";
|
||||
|
||||
for &kw in KEYWORDS {
|
||||
let kw_lower = kw.to_ascii_lowercase();
|
||||
let mut start = 0;
|
||||
while let Some(pos) = lower[start..].find(&kw_lower) {
|
||||
let abs_pos = start + pos;
|
||||
if is_word_boundary(&lower, abs_pos, kw_lower.len()) {
|
||||
if abs_pos < best_pos {
|
||||
best_pos = abs_pos;
|
||||
best_keyword = kw;
|
||||
}
|
||||
break; // only need earliest occurrence of this keyword
|
||||
}
|
||||
// Move past this match to keep searching for a later occurrence with boundary.
|
||||
start = abs_pos + 1;
|
||||
}
|
||||
}
|
||||
|
||||
if best_keyword.is_empty() {
|
||||
return None;
|
||||
}
|
||||
|
||||
// LogLevel::FromStr is case-insensitive and maps all aliases correctly.
|
||||
Some(
|
||||
best_keyword
|
||||
.parse::<LogLevel>()
|
||||
.unwrap_or_else(|e| match e {}),
|
||||
)
|
||||
}
|
||||
|
||||
// ─── is_word_boundary ───────────────────────────────────────────────────────
|
||||
/// Check that the match at `start..start+len` is surrounded by non-alphabetic
|
||||
/// characters (or the string edge).
|
||||
fn is_word_boundary(text: &str, start: usize, len: usize) -> bool {
|
||||
let before_ok = start == 0 || !text.as_bytes()[start - 1].is_ascii_alphabetic();
|
||||
let after_idx = start + len;
|
||||
let after_ok = after_idx >= text.len() || !text.as_bytes()[after_idx].is_ascii_alphabetic();
|
||||
before_ok && after_ok
|
||||
}
|
||||
|
||||
// ─── tests ──────────────────────────────────────────────────────────────────
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_detect_level_json_error() {
|
||||
let line = r#"{"level":"ERROR","message":"fail"}"#;
|
||||
assert_eq!(detect_level(line), Some(LogLevel::Error));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_detect_level_json_info() {
|
||||
let line = r#"{"level":"info","msg":"hello"}"#;
|
||||
assert_eq!(detect_level(line), Some(LogLevel::Info));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_detect_level_json_no_level() {
|
||||
let line = r#"{"message":"no level here"}"#;
|
||||
assert_eq!(detect_level(line), None);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_detect_level_keyword_error() {
|
||||
assert_eq!(
|
||||
detect_level("ERROR: connection failed"),
|
||||
Some(LogLevel::Error)
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_detect_level_keyword_warn() {
|
||||
assert_eq!(
|
||||
detect_level("WARN something happened"),
|
||||
Some(LogLevel::Warn)
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_detect_level_keyword_case_insensitive() {
|
||||
assert_eq!(detect_level("error: failed"), Some(LogLevel::Error));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_detect_level_empty() {
|
||||
assert_eq!(detect_level(""), None);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_detect_level_no_level() {
|
||||
assert_eq!(detect_level("just some text"), None);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_detect_level_false_positive() {
|
||||
// "errors" contains "error" but it's not a word boundary match.
|
||||
assert_eq!(detect_level("GET /api/errors/404"), None);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_detect_level_prefixed() {
|
||||
assert_eq!(
|
||||
detect_level("2024-01-01 ERROR something"),
|
||||
Some(LogLevel::Error)
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_detect_level_json_fallback() {
|
||||
// JSON without level field but msg contains "ERROR" — must return None
|
||||
// (trust JSON parse result, don't fall through to keyword scan).
|
||||
let line = r#"{"msg":"ERROR happened"}"#;
|
||||
assert_eq!(detect_level(line), None);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_detect_level_severe() {
|
||||
assert_eq!(detect_level("SEVERE: system crash"), Some(LogLevel::Error));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_detect_level_fatal() {
|
||||
assert_eq!(detect_level("FATAL: unrecoverable"), Some(LogLevel::Error));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_detect_level_whitespace_only() {
|
||||
assert_eq!(detect_level(" \t "), None);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_detect_level_multibyte_prefix() {
|
||||
// 63 Chinese chars = 189 bytes (3 bytes each), total with " ERROR something" = 204 bytes.
|
||||
// "ERROR" at bytes 190..195 is within the 200-byte scan limit; truncation still kicks in.
|
||||
let prefix: String = "你".repeat(63); // 189 bytes
|
||||
let line = format!("{prefix} ERROR something");
|
||||
assert_eq!(detect_level(&line), Some(LogLevel::Error));
|
||||
}
|
||||
}
|
||||
@@ -10,3 +10,4 @@
|
||||
// 声明并导出 json 子模块(定义在 json.rs 文件中)。
|
||||
// 该模块负责解析 JSON 格式的日志行。
|
||||
pub mod json;
|
||||
pub mod level;
|
||||
|
||||
Reference in New Issue
Block a user