From afd8a74e43fc57662381b16d418a559f471c80f5 Mon Sep 17 00:00:00 2001 From: Mx Kookie Date: Thu, 10 Dec 2020 14:30:38 +0000 Subject: cassiopeia: adding new format lexer built on logos --- apps/cassiopeia/.envrc | 1 + apps/cassiopeia/Cargo.lock | 175 ++++++++++++++++++++++++++++++++++++ apps/cassiopeia/Cargo.toml | 3 +- apps/cassiopeia/shell.nix | 8 ++ apps/cassiopeia/src/format/lexer.rs | 116 ++++++++++++++++++++++++ apps/cassiopeia/src/format/mod.rs | 3 + apps/cassiopeia/src/main.rs | 4 +- 7 files changed, 308 insertions(+), 2 deletions(-) create mode 100644 apps/cassiopeia/.envrc create mode 100644 apps/cassiopeia/Cargo.lock create mode 100644 apps/cassiopeia/shell.nix create mode 100644 apps/cassiopeia/src/format/lexer.rs create mode 100644 apps/cassiopeia/src/format/mod.rs (limited to 'apps/cassiopeia') diff --git a/apps/cassiopeia/.envrc b/apps/cassiopeia/.envrc new file mode 100644 index 000000000000..051d09d292a8 --- /dev/null +++ b/apps/cassiopeia/.envrc @@ -0,0 +1 @@ +eval "$(lorri direnv)" diff --git a/apps/cassiopeia/Cargo.lock b/apps/cassiopeia/Cargo.lock new file mode 100644 index 000000000000..be14f5554b85 --- /dev/null +++ b/apps/cassiopeia/Cargo.lock @@ -0,0 +1,175 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +[[package]] +name = "autocfg" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cdb031dd78e28731d87d56cc8ffef4a8f36ca26c38fe2de700543e627f8a464a" + +[[package]] +name = "beef" +version = "0.4.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "474a626a67200bd107d44179bb3d4fc61891172d11696609264589be6a0e6a43" + +[[package]] +name = "cassiopeia" +version = "0.1.0" +dependencies = [ + "chrono", + "logos", +] + +[[package]] +name = "chrono" +version = "0.4.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "670ad68c9088c2a963aaa298cb369688cf3f9465ce5e2d4ca10e6e0098a1ce73" +dependencies = [ + "libc", + "num-integer", + "num-traits", + "time", + "winapi", +] + +[[package]] +name = "fnv" +version = "1.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" + +[[package]] +name = "libc" +version = "0.2.81" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1482821306169ec4d07f6aca392a4681f66c75c9918aa49641a2595db64053cb" + +[[package]] +name = "logos" +version = "0.11.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b91c49573597a5d6c094f9031617bb1fed15c0db68c81e6546d313414ce107e4" +dependencies = [ + "logos-derive", +] + +[[package]] +name = "logos-derive" +version = "0.11.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "797b1f8a0571b331c1b47e7db245af3dc634838da7a92b3bef4e30376ae1c347" +dependencies = [ + "beef", + "fnv", + "proc-macro2", + "quote", + "regex-syntax", + "syn", + "utf8-ranges", +] + +[[package]] +name = "num-integer" +version = "0.1.44" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d2cc698a63b549a70bc047073d2949cce27cd1c7b0a4a862d08a8031bc2801db" +dependencies = [ + "autocfg", + "num-traits", +] + +[[package]] +name = "num-traits" +version = "0.2.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a64b1ec5cda2586e284722486d802acf1f7dbdc623e2bfc57e65ca1cd099290" +dependencies = [ + "autocfg", +] + +[[package]] +name = "proc-macro2" +version = "1.0.24" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e0704ee1a7e00d7bb417d0770ea303c1bccbabf0ef1667dae92b5967f5f8a71" +dependencies = [ + "unicode-xid", +] + +[[package]] +name = "quote" +version = "1.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aa563d17ecb180e500da1cfd2b028310ac758de548efdd203e18f283af693f37" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "regex-syntax" +version = "0.6.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3b181ba2dcf07aaccad5448e8ead58db5b742cf85dfe035e2227f137a539a189" + +[[package]] +name = "syn" +version = "1.0.54" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a2af957a63d6bd42255c359c93d9bfdb97076bd3b820897ce55ffbfbf107f44" +dependencies = [ + "proc-macro2", + "quote", + "unicode-xid", +] + +[[package]] +name = "time" +version = "0.1.44" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6db9e6914ab8b1ae1c260a4ae7a49b6c5611b40328a735b21862567685e73255" +dependencies = [ + "libc", + "wasi", + "winapi", +] + +[[package]] +name = "unicode-xid" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f7fe0bb3479651439c9112f72b6c505038574c9fbb575ed1bf3b797fa39dd564" + +[[package]] +name = "utf8-ranges" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b4ae116fef2b7fea257ed6440d3cfcff7f190865f170cdad00bb6465bf18ecba" + +[[package]] +name = "wasi" +version = "0.10.0+wasi-snapshot-preview1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1a143597ca7c7793eff794def352d41792a93c481eb1042423ff7ff72ba2c31f" + +[[package]] +name = "winapi" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" +dependencies = [ + "winapi-i686-pc-windows-gnu", + "winapi-x86_64-pc-windows-gnu", +] + +[[package]] +name = "winapi-i686-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" + +[[package]] +name = "winapi-x86_64-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" diff --git a/apps/cassiopeia/Cargo.toml b/apps/cassiopeia/Cargo.toml index db7acbd1ba77..2901f8199c00 100644 --- a/apps/cassiopeia/Cargo.toml +++ b/apps/cassiopeia/Cargo.toml @@ -5,4 +5,5 @@ authors = ["Mx Kookie "] edition = "2018" [dependencies] -chrono = "*" \ No newline at end of file +chrono = "*" +logos = "0.11" \ No newline at end of file diff --git a/apps/cassiopeia/shell.nix b/apps/cassiopeia/shell.nix new file mode 100644 index 000000000000..a365404a622b --- /dev/null +++ b/apps/cassiopeia/shell.nix @@ -0,0 +1,8 @@ +with import {}; + +stdenv.mkDerivation { + name = "cassiopeia"; + buildInputs = with pkgs; [ + rustracer rustup clangStdenv + ]; +} diff --git a/apps/cassiopeia/src/format/lexer.rs b/apps/cassiopeia/src/format/lexer.rs new file mode 100644 index 000000000000..f062ca4238c1 --- /dev/null +++ b/apps/cassiopeia/src/format/lexer.rs @@ -0,0 +1,116 @@ +//! Cassiopeia file lexer + +use logos::Logos; + +/// A basic line lexer type +/// +/// This lexer distinguishes between comments, and keyword lines. It +/// does not attempt to parse the line specifics. This is what the +/// content lexer is for. +#[derive(Logos, Debug, PartialEq)] +enum Line { + + #[token("HEADER")] + Header, + + #[token("START")] + Start, + + #[token("STOP")] + Stop, + + #[token("INVOICE")] + Invoice, + + #[regex(r"\w+=[^,$]+[,$]")] + HeaderData, + + // FIXME: this will have a leading whitespace that we could remove + // with ^\w, but logos does not support this at the moment + #[regex(r"[0-9-:+ ]+")] + Date, + + #[token(" ", logos::skip)] + Space, + + #[error] + Error, +} + + +// pub fn test_this() { +// // let mut lex = Line::lexer("HEADER version=0.0.0,location=Berlin,"); +// let mut lex = Line::lexer("START 2020-11-11 13:00:00+01:00"); + +// while let Some(t) = lex.next() { +// println!("{:?}: {}", t, lex.slice()); +// } +// } + + +#[test] +fn basic_header() { + let mut lex = Line::lexer("HEADER version=0.0.0,location=Berlin Lichtenberg,"); + + assert_eq!(lex.next(), Some(Line::Header)); + assert_eq!(lex.span(), 0..6); + assert_eq!(lex.slice(), "HEADER"); + + assert_eq!(lex.next(), Some(Line::HeaderData)); + assert_eq!(lex.span(), 7..21); + assert_eq!(lex.slice(), "version=0.0.0,"); + + assert_eq!(lex.next(), Some(Line::HeaderData)); + assert_eq!(lex.span(), 21..49); + assert_eq!(lex.slice(), "location=Berlin Lichtenberg,"); + + assert_eq!(lex.next(), None); +} + + +#[test] +fn basic_start() { + let mut lex = Line::lexer("START 2020-11-11 13:00:00+01:00"); + + assert_eq!(lex.next(), Some(Line::Start)); + assert_eq!(lex.span(), 0..5); + assert_eq!(lex.slice(), "START"); + + assert_eq!(lex.next(), Some(Line::Date)); + assert_eq!(lex.span(), 5..31); + assert_eq!(lex.slice(), " 2020-11-11 13:00:00+01:00"); + + assert_eq!(lex.next(), None); +} + + +#[test] +fn basic_stop() { + let mut lex = Line::lexer("STOP 2020-11-11 13:00:00+01:00"); + + assert_eq!(lex.next(), Some(Line::Stop)); + assert_eq!(lex.span(), 0..4); + assert_eq!(lex.slice(), "STOP"); + + assert_eq!(lex.next(), Some(Line::Date)); + assert_eq!(lex.span(), 4..30); + assert_eq!(lex.slice(), " 2020-11-11 13:00:00+01:00"); + + assert_eq!(lex.next(), None); +} + + +#[test] +fn basic_invoice() { + let mut lex = Line::lexer("INVOICE 2020-11-11 13:00:00+01:00"); + + assert_eq!(lex.next(), Some(Line::Invoice)); + assert_eq!(lex.span(), 0..7); + assert_eq!(lex.slice(), "INVOICE"); + + assert_eq!(lex.next(), Some(Line::Date)); + assert_eq!(lex.span(), 7..33); + assert_eq!(lex.slice(), " 2020-11-11 13:00:00+01:00"); + + assert_eq!(lex.next(), None); +} diff --git a/apps/cassiopeia/src/format/mod.rs b/apps/cassiopeia/src/format/mod.rs new file mode 100644 index 000000000000..766bb22be13e --- /dev/null +++ b/apps/cassiopeia/src/format/mod.rs @@ -0,0 +1,3 @@ +//! cassiopeia file format handling + +pub(crate) mod lexer; diff --git a/apps/cassiopeia/src/main.rs b/apps/cassiopeia/src/main.rs index e7a11a969c03..80aabc7ad912 100644 --- a/apps/cassiopeia/src/main.rs +++ b/apps/cassiopeia/src/main.rs @@ -1,3 +1,5 @@ +mod format; + fn main() { - println!("Hello, world!"); + } -- cgit v1.2.3