diff options
Diffstat (limited to 'apps/koffice/libko/src/cass/format/lexer.rs')
-rw-r--r-- | apps/koffice/libko/src/cass/format/lexer.rs | 151 |
1 files changed, 151 insertions, 0 deletions
diff --git a/apps/koffice/libko/src/cass/format/lexer.rs b/apps/koffice/libko/src/cass/format/lexer.rs new file mode 100644 index 000000000000..bdb89f5180e5 --- /dev/null +++ b/apps/koffice/libko/src/cass/format/lexer.rs @@ -0,0 +1,151 @@ +//! Cassiopeia file lexer + +use logos::{Lexer, Logos}; +use std::iter::Iterator; + +/// A basic line lexer type +/// +/// This lexer distinguishes between comments, and keyword lines. It +/// does not attempt to parse the line specifics. This is what the +/// content lexer is for. +#[derive(Logos, Debug, PartialEq)] +pub(crate) enum Token { + #[token("HEADER")] + Header, + + #[token("START")] + Start, + + #[token("STOP")] + Stop, + + #[token("INVOICE")] + Invoice, + + #[regex(r"\w+=[^,$]+[,$]")] + HeaderData, + + // FIXME: this will have a leading whitespace that we could remove + // with ^\w, but logos does not support this at the moment + #[regex(r"[0-9-:+ ]+")] + Date, + + #[token(" ", logos::skip)] + Space, + + #[regex(";;.*")] + Comment, + + #[error] + Error, +} + +/// A single token type on a line +#[derive(Debug)] +pub(crate) struct LineToken<'l> { + pub(crate) tt: Token, + pub(crate) slice: &'l str, +} + +/// A lexer wrapped for a single line +pub(crate) struct LineLexer<'l> { + lexer: Lexer<'l, Token>, +} + +impl<'l> LineLexer<'l> { + pub(crate) fn get_all(self) -> Vec<LineToken<'l>> { + let mut acc = vec![]; + for l in self { + acc.push(l); + } + acc + } +} + +impl<'l> Iterator for LineLexer<'l> { + type Item = LineToken<'l>; + + fn next(&mut self) -> Option<Self::Item> { + self.lexer.next().map(|tt| Self::Item { + tt, + slice: self.lexer.slice(), + }) + } +} + +/// Take a line of input and lex it into a stream of tokens +pub(crate) fn lex<'l>(line: &'l mut String) -> LineLexer<'l> { + LineLexer { + lexer: Token::lexer(line), + } +} + +#[test] +fn basic_header() { + let mut lex = Token::lexer("HEADER version=0.0.0,location=Berlin Lichtenberg,"); + + assert_eq!(lex.next(), Some(Token::Header)); + assert_eq!(lex.span(), 0..6); + assert_eq!(lex.slice(), "HEADER"); + + assert_eq!(lex.next(), Some(Token::HeaderData)); + assert_eq!(lex.span(), 7..21); + assert_eq!(lex.slice(), "version=0.0.0,"); + + assert_eq!(lex.next(), Some(Token::HeaderData)); + assert_eq!(lex.span(), 21..49); + assert_eq!(lex.slice(), "location=Berlin Lichtenberg,"); + + assert_eq!(lex.next(), None); +} + +#[test] +fn basic_start() { + let mut lex = Token::lexer("START 2020-11-11 13:00:00+01:00"); + + assert_eq!(lex.next(), Some(Token::Start)); + assert_eq!(lex.span(), 0..5); + assert_eq!(lex.slice(), "START"); + + assert_eq!(lex.next(), Some(Token::Date)); + assert_eq!(lex.span(), 5..31); + assert_eq!(lex.slice(), " 2020-11-11 13:00:00+01:00"); + + assert_eq!(lex.next(), None); +} + +#[test] +fn basic_stop() { + let mut lex = Token::lexer("STOP 2020-11-11 13:00:00+01:00"); + + assert_eq!(lex.next(), Some(Token::Stop)); + assert_eq!(lex.span(), 0..4); + assert_eq!(lex.slice(), "STOP"); + + assert_eq!(lex.next(), Some(Token::Date)); + assert_eq!(lex.span(), 4..30); + assert_eq!(lex.slice(), " 2020-11-11 13:00:00+01:00"); + + assert_eq!(lex.next(), None); +} + +#[test] +fn basic_invoice() { + let mut lex = Token::lexer("INVOICE 2020-11-11 13:00:00+01:00"); + + assert_eq!(lex.next(), Some(Token::Invoice)); + assert_eq!(lex.span(), 0..7); + assert_eq!(lex.slice(), "INVOICE"); + + assert_eq!(lex.next(), Some(Token::Date)); + assert_eq!(lex.span(), 7..33); + assert_eq!(lex.slice(), " 2020-11-11 13:00:00+01:00"); + + assert_eq!(lex.next(), None); +} + +#[test] +fn basic_comment() { + let mut lex = Token::lexer(";; This file is auto generated!"); + assert_eq!(lex.next(), Some(Token::Comment)); +} |