From a40e26f8cef4103e94be13edcc0044290ac141d7 Mon Sep 17 00:00:00 2001 From: Mx Kookie Date: Thu, 10 Dec 2020 14:30:38 +0000 Subject: cassiopeia: adding new format lexer built on logos --- apps/cassiopeia/src/format/lexer.rs | 116 ++++++++++++++++++++++++++++++++++++ apps/cassiopeia/src/format/mod.rs | 3 + 2 files changed, 119 insertions(+) create mode 100644 apps/cassiopeia/src/format/lexer.rs create mode 100644 apps/cassiopeia/src/format/mod.rs (limited to 'apps/cassiopeia/src/format') diff --git a/apps/cassiopeia/src/format/lexer.rs b/apps/cassiopeia/src/format/lexer.rs new file mode 100644 index 000000000000..f062ca4238c1 --- /dev/null +++ b/apps/cassiopeia/src/format/lexer.rs @@ -0,0 +1,116 @@ +//! Cassiopeia file lexer + +use logos::Logos; + +/// A basic line lexer type +/// +/// This lexer distinguishes between comments, and keyword lines. It +/// does not attempt to parse the line specifics. This is what the +/// content lexer is for. +#[derive(Logos, Debug, PartialEq)] +enum Line { + + #[token("HEADER")] + Header, + + #[token("START")] + Start, + + #[token("STOP")] + Stop, + + #[token("INVOICE")] + Invoice, + + #[regex(r"\w+=[^,$]+[,$]")] + HeaderData, + + // FIXME: this will have a leading whitespace that we could remove + // with ^\w, but logos does not support this at the moment + #[regex(r"[0-9-:+ ]+")] + Date, + + #[token(" ", logos::skip)] + Space, + + #[error] + Error, +} + + +// pub fn test_this() { +// // let mut lex = Line::lexer("HEADER version=0.0.0,location=Berlin,"); +// let mut lex = Line::lexer("START 2020-11-11 13:00:00+01:00"); + +// while let Some(t) = lex.next() { +// println!("{:?}: {}", t, lex.slice()); +// } +// } + + +#[test] +fn basic_header() { + let mut lex = Line::lexer("HEADER version=0.0.0,location=Berlin Lichtenberg,"); + + assert_eq!(lex.next(), Some(Line::Header)); + assert_eq!(lex.span(), 0..6); + assert_eq!(lex.slice(), "HEADER"); + + assert_eq!(lex.next(), Some(Line::HeaderData)); + assert_eq!(lex.span(), 7..21); + assert_eq!(lex.slice(), "version=0.0.0,"); + + assert_eq!(lex.next(), Some(Line::HeaderData)); + assert_eq!(lex.span(), 21..49); + assert_eq!(lex.slice(), "location=Berlin Lichtenberg,"); + + assert_eq!(lex.next(), None); +} + + +#[test] +fn basic_start() { + let mut lex = Line::lexer("START 2020-11-11 13:00:00+01:00"); + + assert_eq!(lex.next(), Some(Line::Start)); + assert_eq!(lex.span(), 0..5); + assert_eq!(lex.slice(), "START"); + + assert_eq!(lex.next(), Some(Line::Date)); + assert_eq!(lex.span(), 5..31); + assert_eq!(lex.slice(), " 2020-11-11 13:00:00+01:00"); + + assert_eq!(lex.next(), None); +} + + +#[test] +fn basic_stop() { + let mut lex = Line::lexer("STOP 2020-11-11 13:00:00+01:00"); + + assert_eq!(lex.next(), Some(Line::Stop)); + assert_eq!(lex.span(), 0..4); + assert_eq!(lex.slice(), "STOP"); + + assert_eq!(lex.next(), Some(Line::Date)); + assert_eq!(lex.span(), 4..30); + assert_eq!(lex.slice(), " 2020-11-11 13:00:00+01:00"); + + assert_eq!(lex.next(), None); +} + + +#[test] +fn basic_invoice() { + let mut lex = Line::lexer("INVOICE 2020-11-11 13:00:00+01:00"); + + assert_eq!(lex.next(), Some(Line::Invoice)); + assert_eq!(lex.span(), 0..7); + assert_eq!(lex.slice(), "INVOICE"); + + assert_eq!(lex.next(), Some(Line::Date)); + assert_eq!(lex.span(), 7..33); + assert_eq!(lex.slice(), " 2020-11-11 13:00:00+01:00"); + + assert_eq!(lex.next(), None); +} diff --git a/apps/cassiopeia/src/format/mod.rs b/apps/cassiopeia/src/format/mod.rs new file mode 100644 index 000000000000..766bb22be13e --- /dev/null +++ b/apps/cassiopeia/src/format/mod.rs @@ -0,0 +1,3 @@ +//! cassiopeia file format handling + +pub(crate) mod lexer; -- cgit v1.2.3