//! Cassiopeia file lexer use logos::{Lexer, Logos}; use std::iter::Iterator; /// A basic line lexer type /// /// This lexer distinguishes between comments, and keyword lines. It /// does not attempt to parse the line specifics. This is what the /// content lexer is for. #[derive(Logos, Debug, PartialEq)] pub(crate) enum Token { #[token("HEADER")] Header, #[token("START")] Start, #[token("STOP")] Stop, #[token("INVOICE")] Invoice, #[regex(r"\w+=[^,$]+[,$]")] HeaderData, // FIXME: this will have a leading whitespace that we could remove // with ^\w, but logos does not support this at the moment #[regex(r"[0-9-:+ ]+")] Date, #[token(" ", logos::skip)] Space, #[regex(";;.*")] Comment, #[error] Error, } /// A single token type on a line #[derive(Debug)] pub(crate) struct LineToken<'l> { pub(crate) tt: Token, pub(crate) slice: &'l str, } /// A lexer wrapped for a single line pub(crate) struct LineLexer<'l> { lexer: Lexer<'l, Token>, } impl<'l> LineLexer<'l> { pub(crate) fn get_all(self) -> Vec> { let mut acc = vec![]; for l in self { acc.push(l); } acc } } impl<'l> Iterator for LineLexer<'l> { type Item = LineToken<'l>; fn next(&mut self) -> Option { self.lexer.next().map(|tt| Self::Item { tt, slice: self.lexer.slice(), }) } } /// Take a line of input and lex it into a stream of tokens pub(crate) fn lex<'l>(line: &'l mut String) -> LineLexer<'l> { LineLexer { lexer: Token::lexer(line), } } #[test] fn basic_header() { let mut lex = Token::lexer("HEADER version=0.0.0,location=Berlin Lichtenberg,"); assert_eq!(lex.next(), Some(Token::Header)); assert_eq!(lex.span(), 0..6); assert_eq!(lex.slice(), "HEADER"); assert_eq!(lex.next(), Some(Token::HeaderData)); assert_eq!(lex.span(), 7..21); assert_eq!(lex.slice(), "version=0.0.0,"); assert_eq!(lex.next(), Some(Token::HeaderData)); assert_eq!(lex.span(), 21..49); assert_eq!(lex.slice(), "location=Berlin Lichtenberg,"); assert_eq!(lex.next(), None); } #[test] fn basic_start() { let mut lex = Token::lexer("START 2020-11-11 13:00:00+01:00"); assert_eq!(lex.next(), Some(Token::Start)); assert_eq!(lex.span(), 0..5); assert_eq!(lex.slice(), "START"); assert_eq!(lex.next(), Some(Token::Date)); assert_eq!(lex.span(), 5..31); assert_eq!(lex.slice(), " 2020-11-11 13:00:00+01:00"); assert_eq!(lex.next(), None); } #[test] fn basic_stop() { let mut lex = Token::lexer("STOP 2020-11-11 13:00:00+01:00"); assert_eq!(lex.next(), Some(Token::Stop)); assert_eq!(lex.span(), 0..4); assert_eq!(lex.slice(), "STOP"); assert_eq!(lex.next(), Some(Token::Date)); assert_eq!(lex.span(), 4..30); assert_eq!(lex.slice(), " 2020-11-11 13:00:00+01:00"); assert_eq!(lex.next(), None); } #[test] fn basic_invoice() { let mut lex = Token::lexer("INVOICE 2020-11-11 13:00:00+01:00"); assert_eq!(lex.next(), Some(Token::Invoice)); assert_eq!(lex.span(), 0..7); assert_eq!(lex.slice(), "INVOICE"); assert_eq!(lex.next(), Some(Token::Date)); assert_eq!(lex.span(), 7..33); assert_eq!(lex.slice(), " 2020-11-11 13:00:00+01:00"); assert_eq!(lex.next(), None); } #[test] fn basic_comment() { let mut lex = Token::lexer(";; This file is auto generated!"); assert_eq!(lex.next(), Some(Token::Comment)); }