1 files changed, 151 insertions, 0 deletions
diff --git a/apps/koffice/libko/src/cass/format/lexer.rs b/apps/koffice/libko/src/cass/format/lexer.rs
new file mode 100644
index 000000000000..bdb89f5180e5
--- /dev/null
+++ b/apps/koffice/libko/src/cass/format/lexer.rs
@@ -0,0 +1,151 @@
+//! Cassiopeia file lexer
+
+use logos::{Lexer, Logos};
+use std::iter::Iterator;
+
+/// A basic line lexer type
+///
+/// This lexer distinguishes between comments, and keyword lines.  It
+/// does not attempt to parse the line specifics.  This is what the
+/// content lexer is for.
+#[derive(Logos, Debug, PartialEq)]
+pub(crate) enum Token {
+    #[token("HEADER")]
+    Header,
+
+    #[token("START")]
+    Start,
+
+    #[token("STOP")]
+    Stop,
+
+    #[token("INVOICE")]
+    Invoice,
+
+    #[regex(r"\w+=[^,$]+[,$]")]
+    HeaderData,
+
+    // FIXME: this will have a leading whitespace that we could remove
+    // with ^\w, but logos does not support this at the moment
+    #[regex(r"[0-9-:+ ]+")]
+    Date,
+
+    #[token(" ", logos::skip)]
+    Space,
+
+    #[regex(";;.*")]
+    Comment,
+
+    #[error]
+    Error,
+}
+
+/// A single token type on a line
+#[derive(Debug)]
+pub(crate) struct LineToken<'l> {
+    pub(crate) tt: Token,
+    pub(crate) slice: &'l str,
+}
+
+/// A lexer wrapped for a single line
+pub(crate) struct LineLexer<'l> {
+    lexer: Lexer<'l, Token>,
+}
+
+impl<'l> LineLexer<'l> {
+    pub(crate) fn get_all(self) -> Vec<LineToken<'l>> {
+        let mut acc = vec![];
+        for l in self {
+            acc.push(l);
+        }
+        acc
+    }
+}
+
+impl<'l> Iterator for LineLexer<'l> {
+    type Item = LineToken<'l>;
+
+    fn next(&mut self) -> Option<Self::Item> {
+        self.lexer.next().map(|tt| Self::Item {
+            tt,
+            slice: self.lexer.slice(),
+        })
+    }
+}
+
+/// Take a line of input and lex it into a stream of tokens
+pub(crate) fn lex<'l>(line: &'l mut String) -> LineLexer<'l> {
+    LineLexer {
+        lexer: Token::lexer(line),
+    }
+}
+
+#[test]
+fn basic_header() {
+    let mut lex = Token::lexer("HEADER version=0.0.0,location=Berlin Lichtenberg,");
+
+    assert_eq!(lex.next(), Some(Token::Header));
+    assert_eq!(lex.span(), 0..6);
+    assert_eq!(lex.slice(), "HEADER");
+
+    assert_eq!(lex.next(), Some(Token::HeaderData));
+    assert_eq!(lex.span(), 7..21);
+    assert_eq!(lex.slice(), "version=0.0.0,");
+
+    assert_eq!(lex.next(), Some(Token::HeaderData));
+    assert_eq!(lex.span(), 21..49);
+    assert_eq!(lex.slice(), "location=Berlin Lichtenberg,");
+
+    assert_eq!(lex.next(), None);
+}
+
+#[test]
+fn basic_start() {
+    let mut lex = Token::lexer("START 2020-11-11 13:00:00+01:00");
+
+    assert_eq!(lex.next(), Some(Token::Start));
+    assert_eq!(lex.span(), 0..5);
+    assert_eq!(lex.slice(), "START");
+
+    assert_eq!(lex.next(), Some(Token::Date));
+    assert_eq!(lex.span(), 5..31);
+    assert_eq!(lex.slice(), " 2020-11-11 13:00:00+01:00");
+
+    assert_eq!(lex.next(), None);
+}
+
+#[test]
+fn basic_stop() {
+    let mut lex = Token::lexer("STOP 2020-11-11 13:00:00+01:00");
+
+    assert_eq!(lex.next(), Some(Token::Stop));
+    assert_eq!(lex.span(), 0..4);
+    assert_eq!(lex.slice(), "STOP");
+
+    assert_eq!(lex.next(), Some(Token::Date));
+    assert_eq!(lex.span(), 4..30);
+    assert_eq!(lex.slice(), " 2020-11-11 13:00:00+01:00");
+
+    assert_eq!(lex.next(), None);
+}
+
+#[test]
+fn basic_invoice() {
+    let mut lex = Token::lexer("INVOICE 2020-11-11 13:00:00+01:00");
+
+    assert_eq!(lex.next(), Some(Token::Invoice));
+    assert_eq!(lex.span(), 0..7);
+    assert_eq!(lex.slice(), "INVOICE");
+
+    assert_eq!(lex.next(), Some(Token::Date));
+    assert_eq!(lex.span(), 7..33);
+    assert_eq!(lex.slice(), " 2020-11-11 13:00:00+01:00");
+
+    assert_eq!(lex.next(), None);
+}
+
+#[test]
+fn basic_comment() {
+    let mut lex = Token::lexer(";; This file is auto generated!");
+    assert_eq!(lex.next(), Some(Token::Comment));
+}