aboutsummaryrefslogtreecommitdiff
path: root/apps/koffice/libko/src/cass/format/lexer.rs
diff options
context:
space:
mode:
Diffstat (limited to 'apps/koffice/libko/src/cass/format/lexer.rs')
-rw-r--r--apps/koffice/libko/src/cass/format/lexer.rs151
1 files changed, 151 insertions, 0 deletions
diff --git a/apps/koffice/libko/src/cass/format/lexer.rs b/apps/koffice/libko/src/cass/format/lexer.rs
new file mode 100644
index 000000000000..bdb89f5180e5
--- /dev/null
+++ b/apps/koffice/libko/src/cass/format/lexer.rs
@@ -0,0 +1,151 @@
+//! Cassiopeia file lexer
+
+use logos::{Lexer, Logos};
+use std::iter::Iterator;
+
+/// A basic line lexer type
+///
+/// This lexer distinguishes between comments, and keyword lines. It
+/// does not attempt to parse the line specifics. This is what the
+/// content lexer is for.
+#[derive(Logos, Debug, PartialEq)]
+pub(crate) enum Token {
+ #[token("HEADER")]
+ Header,
+
+ #[token("START")]
+ Start,
+
+ #[token("STOP")]
+ Stop,
+
+ #[token("INVOICE")]
+ Invoice,
+
+ #[regex(r"\w+=[^,$]+[,$]")]
+ HeaderData,
+
+ // FIXME: this will have a leading whitespace that we could remove
+ // with ^\w, but logos does not support this at the moment
+ #[regex(r"[0-9-:+ ]+")]
+ Date,
+
+ #[token(" ", logos::skip)]
+ Space,
+
+ #[regex(";;.*")]
+ Comment,
+
+ #[error]
+ Error,
+}
+
+/// A single token type on a line
+#[derive(Debug)]
+pub(crate) struct LineToken<'l> {
+ pub(crate) tt: Token,
+ pub(crate) slice: &'l str,
+}
+
+/// A lexer wrapped for a single line
+pub(crate) struct LineLexer<'l> {
+ lexer: Lexer<'l, Token>,
+}
+
+impl<'l> LineLexer<'l> {
+ pub(crate) fn get_all(self) -> Vec<LineToken<'l>> {
+ let mut acc = vec![];
+ for l in self {
+ acc.push(l);
+ }
+ acc
+ }
+}
+
+impl<'l> Iterator for LineLexer<'l> {
+ type Item = LineToken<'l>;
+
+ fn next(&mut self) -> Option<Self::Item> {
+ self.lexer.next().map(|tt| Self::Item {
+ tt,
+ slice: self.lexer.slice(),
+ })
+ }
+}
+
+/// Take a line of input and lex it into a stream of tokens
+pub(crate) fn lex<'l>(line: &'l mut String) -> LineLexer<'l> {
+ LineLexer {
+ lexer: Token::lexer(line),
+ }
+}
+
+#[test]
+fn basic_header() {
+ let mut lex = Token::lexer("HEADER version=0.0.0,location=Berlin Lichtenberg,");
+
+ assert_eq!(lex.next(), Some(Token::Header));
+ assert_eq!(lex.span(), 0..6);
+ assert_eq!(lex.slice(), "HEADER");
+
+ assert_eq!(lex.next(), Some(Token::HeaderData));
+ assert_eq!(lex.span(), 7..21);
+ assert_eq!(lex.slice(), "version=0.0.0,");
+
+ assert_eq!(lex.next(), Some(Token::HeaderData));
+ assert_eq!(lex.span(), 21..49);
+ assert_eq!(lex.slice(), "location=Berlin Lichtenberg,");
+
+ assert_eq!(lex.next(), None);
+}
+
+#[test]
+fn basic_start() {
+ let mut lex = Token::lexer("START 2020-11-11 13:00:00+01:00");
+
+ assert_eq!(lex.next(), Some(Token::Start));
+ assert_eq!(lex.span(), 0..5);
+ assert_eq!(lex.slice(), "START");
+
+ assert_eq!(lex.next(), Some(Token::Date));
+ assert_eq!(lex.span(), 5..31);
+ assert_eq!(lex.slice(), " 2020-11-11 13:00:00+01:00");
+
+ assert_eq!(lex.next(), None);
+}
+
+#[test]
+fn basic_stop() {
+ let mut lex = Token::lexer("STOP 2020-11-11 13:00:00+01:00");
+
+ assert_eq!(lex.next(), Some(Token::Stop));
+ assert_eq!(lex.span(), 0..4);
+ assert_eq!(lex.slice(), "STOP");
+
+ assert_eq!(lex.next(), Some(Token::Date));
+ assert_eq!(lex.span(), 4..30);
+ assert_eq!(lex.slice(), " 2020-11-11 13:00:00+01:00");
+
+ assert_eq!(lex.next(), None);
+}
+
+#[test]
+fn basic_invoice() {
+ let mut lex = Token::lexer("INVOICE 2020-11-11 13:00:00+01:00");
+
+ assert_eq!(lex.next(), Some(Token::Invoice));
+ assert_eq!(lex.span(), 0..7);
+ assert_eq!(lex.slice(), "INVOICE");
+
+ assert_eq!(lex.next(), Some(Token::Date));
+ assert_eq!(lex.span(), 7..33);
+ assert_eq!(lex.slice(), " 2020-11-11 13:00:00+01:00");
+
+ assert_eq!(lex.next(), None);
+}
+
+#[test]
+fn basic_comment() {
+ let mut lex = Token::lexer(";; This file is auto generated!");
+ assert_eq!(lex.next(), Some(Token::Comment));
+}