1use crate::error::{RlgError, RlgResult};
7use crate::utils::sanitize_log_message;
8use regex::Regex;
9use serde::{Deserialize, Serialize};
10use std::fmt;
11use std::str::FromStr;
12use std::sync::LazyLock;
13
14static CLF_REGEX: LazyLock<Regex> = LazyLock::new(|| {
16 Regex::new(
17 r#"^(?P<host>\S+) (?P<ident>\S+) (?P<user>\S+) \[(?P<time>[^\]]+)\] "(?P<method>\S+) (?P<path>\S+) (?P<protocol>\S+)" (?P<status>\d{3}) (?P<size>\d+|-)$"#
18).expect("Failed to compile CLF regex")
19});
20
21static CEF_REGEX: LazyLock<Regex> = LazyLock::new(|| {
22 Regex::new(
23 r"^CEF:\d+\|[^|]+\|[^|]+\|[^|]+\|[^|]+\|[^|]+\|[^|]+\|.*$",
24 )
25 .expect("Failed to compile CEF regex")
26});
27
28static W3C_REGEX: LazyLock<Regex> = LazyLock::new(|| {
29 Regex::new(
30 r"^#Fields:.*
31.+$",
32 )
33 .expect("Failed to compile W3C regex")
34});
35
36#[derive(
38 Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize,
39)]
40pub enum LogFormat {
41 CLF,
43 JSON,
45 CEF,
47 ELF,
49 W3C,
51 GELF,
53 ApacheAccessLog,
55 Logstash,
57 Log4jXML,
59 NDJSON,
61 MCP,
63 OTLP,
65 Logfmt,
67 ECS,
69}
70
71macro_rules! define_log_format_strings {
72 ( $( $variant:ident => $display:expr, [ $( $key:expr ),+ ] );+ $(;)? ) => {
73 impl FromStr for LogFormat {
74 type Err = RlgError;
75
76 fn from_str(s: &str) -> Result<Self, Self::Err> {
77 match s.to_lowercase().as_str() {
78 $(
79 $( $key )|+ => Ok(Self::$variant),
80 )+
81 _ => Err(RlgError::FormatParseError(format!(
82 "Unknown log format: {s}"
83 ))),
84 }
85 }
86 }
87
88 impl fmt::Display for LogFormat {
89 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
90 let s = match self {
91 $( Self::$variant => $display, )+
92 };
93 write!(f, "{s}")
94 }
95 }
96 };
97}
98
99define_log_format_strings! {
100 CLF => "CLF", ["clf"];
101 JSON => "JSON", ["json"];
102 CEF => "CEF", ["cef"];
103 ELF => "ELF", ["elf"];
104 W3C => "W3C", ["w3c"];
105 GELF => "GELF", ["gelf"];
106 ApacheAccessLog => "Apache Access Log", ["apache", "apacheaccesslog"];
107 Logstash => "Logstash", ["logstash"];
108 Log4jXML => "Log4j XML", ["log4jxml"];
109 NDJSON => "NDJSON", ["ndjson"];
110 MCP => "MCP", ["mcp"];
111 OTLP => "OTLP", ["otlp"];
112 Logfmt => "logfmt", ["logfmt"];
113 ECS => "ECS", ["ecs"];
114}
115
116impl LogFormat {
117 #[must_use]
127 pub fn validate(&self, entry: &str) -> bool {
128 if entry.is_empty() {
129 return false;
130 }
131 match self {
132 Self::CLF => CLF_REGEX.is_match(entry),
133 Self::CEF => CEF_REGEX.is_match(entry),
134 Self::W3C => W3C_REGEX.is_match(entry),
135 Self::JSON
136 | Self::GELF
137 | Self::Logstash
138 | Self::NDJSON
139 | Self::MCP
140 | Self::OTLP
141 | Self::ECS => {
142 serde_json::from_str::<serde_json::Value>(entry).is_ok()
143 }
144 Self::Logfmt => {
145 entry.contains('=') && !entry.starts_with('=')
146 }
147 Self::Log4jXML => {
148 entry.contains("<log4j:event") && entry.contains('>')
149 }
150 Self::ELF | Self::ApacheAccessLog => true, }
152 }
153
154 pub fn format_log(&self, entry: &str) -> RlgResult<String> {
173 let sanitized_entry = sanitize_log_message(entry);
174 match self {
175 Self::CLF
176 | Self::ApacheAccessLog
177 | Self::CEF
178 | Self::ELF
179 | Self::W3C
180 | Self::Log4jXML
181 | Self::Logfmt => Ok(sanitized_entry),
182 Self::JSON
183 | Self::Logstash
184 | Self::NDJSON
185 | Self::GELF
186 | Self::MCP
187 | Self::OTLP
188 | Self::ECS => {
189 let val = serde_json::from_str::<serde_json::Value>(
190 &sanitized_entry,
191 )
192 .map_err(|e| {
193 RlgError::FormattingError(format!(
194 "Invalid JSON: {e}"
195 ))
196 })?;
197
198 Ok(serde_json::to_string_pretty(&val).expect(
201 "serde_json::to_string_pretty cannot fail on a valid Value",
202 ))
203 }
204 }
205 }
206}
207
208#[cfg(test)]
209mod tests {
210 use super::*;
211
212 #[test]
213 fn test_log_format_from_str() {
214 assert_eq!(
215 LogFormat::from_str("json").unwrap(),
216 LogFormat::JSON
217 );
218 assert_eq!(LogFormat::from_str("CLF").unwrap(), LogFormat::CLF);
219 assert!(LogFormat::from_str("invalid").is_err());
220 }
221
222 #[test]
223 fn test_log_format_validate() {
224 let clf_log = r#"127.0.0.1 - - [10/Oct/2000:13:55:36 -0700] "GET /apache_pb.gif HTTP/1.0" 200 2326"#;
225 assert!(LogFormat::CLF.validate(clf_log));
226 assert!(LogFormat::JSON.validate(r#"{"key": "value"}"#));
227 }
228
229 #[test]
230 fn test_log_format_format_log() {
231 let json_log = r#"{"key":"value"}"#;
232 let formatted = LogFormat::JSON.format_log(json_log).unwrap();
233 assert!(formatted.contains('"'));
234
235 let clf_log = r#"127.0.0.1 - - [10/Oct/2000:13:55:36 -0700] "GET /apache_pb.gif HTTP/1.0" 200 2326"#;
236 let formatted = LogFormat::CLF.format_log(clf_log).unwrap();
237 assert_eq!(formatted, clf_log); }
239}