Skip to main content

rlg/
log_format.rs

1// log_format.rs
2// Copyright © 2024-2026 RustLogs (RLG). All rights reserved.
3// SPDX-License-Identifier: Apache-2.0
4// SPDX-License-Identifier: MIT
5
6use crate::error::{RlgError, RlgResult};
7use crate::utils::sanitize_log_message;
8use regex::Regex;
9use serde::{Deserialize, Serialize};
10use std::fmt;
11use std::str::FromStr;
12use std::sync::LazyLock;
13
14/// Compiled regular expressions for log format validation.
15static CLF_REGEX: LazyLock<Regex> = LazyLock::new(|| {
16    Regex::new(
17    r#"^(?P<host>\S+) (?P<ident>\S+) (?P<user>\S+) \[(?P<time>[^\]]+)\] "(?P<method>\S+) (?P<path>\S+) (?P<protocol>\S+)" (?P<status>\d{3}) (?P<size>\d+|-)$"#
18).expect("Failed to compile CLF regex")
19});
20
21static CEF_REGEX: LazyLock<Regex> = LazyLock::new(|| {
22    Regex::new(
23        r"^CEF:\d+\|[^|]+\|[^|]+\|[^|]+\|[^|]+\|[^|]+\|[^|]+\|.*$",
24    )
25    .expect("Failed to compile CEF regex")
26});
27
28static W3C_REGEX: LazyLock<Regex> = LazyLock::new(|| {
29    Regex::new(
30        r"^#Fields:.*
31.+$",
32    )
33    .expect("Failed to compile W3C regex")
34});
35
36/// `LogFormat` is an enum representing the different structured log formats supported by the `RLG` library.
37#[derive(
38    Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize,
39)]
40pub enum LogFormat {
41    /// Common Log Format (CLF)
42    CLF,
43    /// JavaScript Object Notation (JSON)
44    JSON,
45    /// Common Event Format (CEF)
46    CEF,
47    /// Extended Log Format (ELF)
48    ELF,
49    /// W3C Extended Log Format (W3C)
50    W3C,
51    /// Graylog Extended Log Format (GELF)
52    GELF,
53    /// Apache Access Log Format
54    ApacheAccessLog,
55    /// Logstash Format
56    Logstash,
57    /// Log4j XML Format
58    Log4jXML,
59    /// Network Data JSON (NDJSON)
60    NDJSON,
61    /// Model Context Protocol (MCP) - AI Native
62    MCP,
63    /// OpenTelemetry Logging (OTLP) - AI Native
64    OTLP,
65    /// Logfmt (key=value)
66    Logfmt,
67    /// Elastic Common Schema (ECS)
68    ECS,
69}
70
71macro_rules! define_log_format_strings {
72    ( $( $variant:ident => $display:expr, [ $( $key:expr ),+ ] );+ $(;)? ) => {
73        impl FromStr for LogFormat {
74            type Err = RlgError;
75
76            fn from_str(s: &str) -> Result<Self, Self::Err> {
77                match s.to_lowercase().as_str() {
78                    $(
79                        $( $key )|+ => Ok(Self::$variant),
80                    )+
81                    _ => Err(RlgError::FormatParseError(format!(
82                        "Unknown log format: {s}"
83                    ))),
84                }
85            }
86        }
87
88        impl fmt::Display for LogFormat {
89            fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
90                let s = match self {
91                    $( Self::$variant => $display, )+
92                };
93                write!(f, "{s}")
94            }
95        }
96    };
97}
98
99define_log_format_strings! {
100    CLF => "CLF", ["clf"];
101    JSON => "JSON", ["json"];
102    CEF => "CEF", ["cef"];
103    ELF => "ELF", ["elf"];
104    W3C => "W3C", ["w3c"];
105    GELF => "GELF", ["gelf"];
106    ApacheAccessLog => "Apache Access Log", ["apache", "apacheaccesslog"];
107    Logstash => "Logstash", ["logstash"];
108    Log4jXML => "Log4j XML", ["log4jxml"];
109    NDJSON => "NDJSON", ["ndjson"];
110    MCP => "MCP", ["mcp"];
111    OTLP => "OTLP", ["otlp"];
112    Logfmt => "logfmt", ["logfmt"];
113    ECS => "ECS", ["ecs"];
114}
115
116impl LogFormat {
117    /// Validates a log entry against the current format.
118    ///
119    /// # Examples
120    ///
121    /// ```
122    /// use rlg::log_format::LogFormat;
123    /// let is_valid = LogFormat::CLF.validate("127.0.0.1 - - [10/Oct/2000:13:55:36 -0700] \"GET /apache_pb.gif HTTP/1.0\" 200 2326");
124    /// assert!(is_valid);
125    /// ```
126    #[must_use]
127    pub fn validate(&self, entry: &str) -> bool {
128        if entry.is_empty() {
129            return false;
130        }
131        match self {
132            Self::CLF => CLF_REGEX.is_match(entry),
133            Self::CEF => CEF_REGEX.is_match(entry),
134            Self::W3C => W3C_REGEX.is_match(entry),
135            Self::JSON
136            | Self::GELF
137            | Self::Logstash
138            | Self::NDJSON
139            | Self::MCP
140            | Self::OTLP
141            | Self::ECS => {
142                serde_json::from_str::<serde_json::Value>(entry).is_ok()
143            }
144            Self::Logfmt => {
145                entry.contains('=') && !entry.starts_with('=')
146            }
147            Self::Log4jXML => {
148                entry.contains("<log4j:event") && entry.contains('>')
149            }
150            Self::ELF | Self::ApacheAccessLog => true, // Basic validation for others
151        }
152    }
153
154    /// Formats a log entry according to the log format.
155    ///
156    /// # Errors
157    ///
158    /// This function returns an error if the log entry is not valid JSON for JSON-based formats.
159    ///
160    /// # Panics
161    ///
162    /// This function does not panic under normal usage. The internal `expect` guards
163    /// a `serde_json::to_string_pretty` call on a successfully parsed `Value`, which
164    /// can only fail on out-of-memory conditions.
165    ///
166    /// # Examples
167    ///
168    /// ```
169    /// use rlg::log_format::LogFormat;
170    /// let formatted_log = LogFormat::CLF.format_log("127.0.0.1 - - [10/Oct/2000:13:55:36 -0700] \"GET /apache_pb.gif HTTP/1.0\" 200 2326").unwrap();
171    /// ```
172    pub fn format_log(&self, entry: &str) -> RlgResult<String> {
173        let sanitized_entry = sanitize_log_message(entry);
174        match self {
175            Self::CLF
176            | Self::ApacheAccessLog
177            | Self::CEF
178            | Self::ELF
179            | Self::W3C
180            | Self::Log4jXML
181            | Self::Logfmt => Ok(sanitized_entry),
182            Self::JSON
183            | Self::Logstash
184            | Self::NDJSON
185            | Self::GELF
186            | Self::MCP
187            | Self::OTLP
188            | Self::ECS => {
189                let val = serde_json::from_str::<serde_json::Value>(
190                    &sanitized_entry,
191                )
192                .map_err(|e| {
193                    RlgError::FormattingError(format!(
194                        "Invalid JSON: {e}"
195                    ))
196                })?;
197
198                // to_string_pretty on a valid Value writing to String cannot fail
199                // (the only failure mode is I/O error, which String doesn't produce).
200                Ok(serde_json::to_string_pretty(&val).expect(
201                    "serde_json::to_string_pretty cannot fail on a valid Value",
202                ))
203            }
204        }
205    }
206}
207
208#[cfg(test)]
209mod tests {
210    use super::*;
211
212    #[test]
213    fn test_log_format_from_str() {
214        assert_eq!(
215            LogFormat::from_str("json").unwrap(),
216            LogFormat::JSON
217        );
218        assert_eq!(LogFormat::from_str("CLF").unwrap(), LogFormat::CLF);
219        assert!(LogFormat::from_str("invalid").is_err());
220    }
221
222    #[test]
223    fn test_log_format_validate() {
224        let clf_log = r#"127.0.0.1 - - [10/Oct/2000:13:55:36 -0700] "GET /apache_pb.gif HTTP/1.0" 200 2326"#;
225        assert!(LogFormat::CLF.validate(clf_log));
226        assert!(LogFormat::JSON.validate(r#"{"key": "value"}"#));
227    }
228
229    #[test]
230    fn test_log_format_format_log() {
231        let json_log = r#"{"key":"value"}"#;
232        let formatted = LogFormat::JSON.format_log(json_log).unwrap();
233        assert!(formatted.contains('"'));
234
235        let clf_log = r#"127.0.0.1 - - [10/Oct/2000:13:55:36 -0700] "GET /apache_pb.gif HTTP/1.0" 200 2326"#;
236        let formatted = LogFormat::CLF.format_log(clf_log).unwrap();
237        assert_eq!(formatted, clf_log); // CLF should remain unchanged
238    }
239}