From 5502c6d320f05f43239fc8e2b2839eb3fd5d742a Mon Sep 17 00:00:00 2001 From: Mx Kookie Date: Fri, 11 Dec 2020 13:38:58 +0000 Subject: cassiopeia: implementing basic file parser --- apps/cassiopeia/src/data.rs | 29 +++++++ apps/cassiopeia/src/file.rs | 145 ----------------------------------- apps/cassiopeia/src/format/lexer.rs | 91 +++++++++++++++------- apps/cassiopeia/src/format/mod.rs | 26 ++++++- apps/cassiopeia/src/format/parser.rs | 78 +++++++++++++++++++ apps/cassiopeia/src/main.rs | 5 +- 6 files changed, 198 insertions(+), 176 deletions(-) create mode 100644 apps/cassiopeia/src/data.rs delete mode 100644 apps/cassiopeia/src/file.rs create mode 100644 apps/cassiopeia/src/format/parser.rs (limited to 'apps/cassiopeia') diff --git a/apps/cassiopeia/src/data.rs b/apps/cassiopeia/src/data.rs new file mode 100644 index 000000000000..8ebc67f016c5 --- /dev/null +++ b/apps/cassiopeia/src/data.rs @@ -0,0 +1,29 @@ +//! Typed time file for cassiopeia +//! +//! This data gets generated by the `format` module, and can later be +//! used to generate new files, and perform various lookups and +//! analysis tasks. + +use crate::format::LineCfg; +use chrono::{Date, DateTime, FixedOffset as Offset}; +use std::collections::BTreeMap; + +#[derive(Default)] +pub struct TimeFile { + header: BTreeMap, + sessions: Vec, + invoices: Vec>, +} + +impl TimeFile { + pub(crate) fn append(self, line: LineCfg) -> Self { + println!("{:?}", line); + + self + } +} + +pub struct Session { + start: DateTime, + stop: DateTime, +} diff --git a/apps/cassiopeia/src/file.rs b/apps/cassiopeia/src/file.rs deleted file mode 100644 index 94da234e8661..000000000000 --- a/apps/cassiopeia/src/file.rs +++ /dev/null @@ -1,145 +0,0 @@ -//! Parse the cassiopeia file format -//! -//! Each file is associated with a single project. This way there is -//! no need to associate session enries with multiple customers and -//! projcets. Currently there's also no way to cross-relate sessions -//! between projects or clients, although the metadata in the header -//! is available to do so in the future -//! -//! ## Structure -//! -//! `cassiopeia` files should use the `.cass` extension, although this -//! implementation is not opinionated on that. -//! -//! A line starting with `;` is a comment and can be ignored. A line -//! can have a comment anywhere, which means that everything after it -//! gets ignored. There are no block comments. -//! -//! A regular statements has two parts: a key, and a value. Available -//! keys are: -//! -//! - HEADER -//! - START -//! - STOP -//! - FINISH -//! -//! A file has to have at least one `HEADER` key, containing a certain -//! number of fields to be considered valid. The required number of -//! fields may vary between versions. -//! -//! ### HEADER -//! -//! `cassiopeia` in princpile only needs a single value to parse a -//! file, which is `version`. It is however recommended to add -//! additional metadata to allow future processing into clients and -//! cross-referencing projects. Importantly: header keys that are not -//! expected will be ignored. -//! -//! The general header format is a comma-separated list with a key -//! value pair, separated by an equals sign. You can use spaces in -//! both keys and values without having to escape them or use special -//! quotes. Leading and trailing spaces will be removed. -//! -//! ``` -//! HEADER version=0.0.0,location=Berlin -//! HEADER work schedule=mon tue wed -//! ``` -//! -//! When re-writing the file format, known/ accepted keys should go -//! first. All other unknown keys will be printed alphabetically at -//! the end. This way it's possible for an outdated implementation to -//! pass through unknown keys, or users to add their own keys. - -use chrono::{DateTime, Utc}; -use std::{fs::File, io::Read, path::Path}; - -/// A cassiopeia file that has been successfully parsed -pub struct TimeFile { - path: PathBuf, - content: Vec, -} - -impl TimeFile { - /// Open an existing `.cass` file on disk. Panics! - pub fn open(p: impl Into) -> Self { - let mut f = File::open(p).unwrap(); - let mut cont = String::new(); - f.read_to_string(&mut cont).unwrap(); - } -} - -/// A statement in a `.cass` line -/// -/// While the whole file get's re-written on every run to update -/// version numbers and header values, the structure of the file is -/// preserved. -pub enum Statement { - /// A blank line - Blank, - /// A comment line that is echo-ed back out - Comment(String), - /// Header value - Header(Vec), - /// A session start value - Start(DateTime), - /// A session stop value - Stop(DateTime), - /// A project finish value - Finish(DateTime), -} - -/// A set of header value -pub struct HeaderVal { - /// Header key - key: String, - /// Header value - val: String, -} - -impl HeaderVal { - fn new>(key: S, val: S) -> Self { - Self { - key: key.into(), - val: val.into(), - } - } - - /// Test if a header value is known to this implementation - fn known(&self) -> bool { - match self.key { - "version" => true, - _ => false, - } - } -} - -/// A builder for cass files -#[cfg(tests)] -struct FileBuilder { - acc: Vec, -} - -impl FileBuilder { - fn new() -> Self { - Self { acc: vec![] } - } - - fn header(mut self, data: Vec<(&str, &str)>) -> Self { - self.acc.push(Statement::Header( - data.into_iter() - .map(|(key, val)| HeaderVal::new(key, val)) - .collect(), - )); - - self - } - - fn build(self) -> String { - format!(";; This file was generated by cassiopeia (reference)\n{}", self.acc.into_iter().map(|s| s.render()).collect::().join("\n")) - } -} - -#[test] -fn empty_file() { - let fb = FileBuilder::new().header(vec![("version", "0.3.0"), ("project", "testing")]); -} diff --git a/apps/cassiopeia/src/format/lexer.rs b/apps/cassiopeia/src/format/lexer.rs index f062ca4238c1..bdb89f5180e5 100644 --- a/apps/cassiopeia/src/format/lexer.rs +++ b/apps/cassiopeia/src/format/lexer.rs @@ -1,6 +1,7 @@ //! Cassiopeia file lexer -use logos::Logos; +use logos::{Lexer, Logos}; +use std::iter::Iterator; /// A basic line lexer type /// @@ -8,8 +9,7 @@ use logos::Logos; /// does not attempt to parse the line specifics. This is what the /// content lexer is for. #[derive(Logos, Debug, PartialEq)] -enum Line { - +pub(crate) enum Token { #[token("HEADER")] Header, @@ -21,7 +21,7 @@ enum Line { #[token("INVOICE")] Invoice, - + #[regex(r"\w+=[^,$]+[,$]")] HeaderData, @@ -32,85 +32,120 @@ enum Line { #[token(" ", logos::skip)] Space, - + + #[regex(";;.*")] + Comment, + #[error] Error, } +/// A single token type on a line +#[derive(Debug)] +pub(crate) struct LineToken<'l> { + pub(crate) tt: Token, + pub(crate) slice: &'l str, +} -// pub fn test_this() { -// // let mut lex = Line::lexer("HEADER version=0.0.0,location=Berlin,"); -// let mut lex = Line::lexer("START 2020-11-11 13:00:00+01:00"); +/// A lexer wrapped for a single line +pub(crate) struct LineLexer<'l> { + lexer: Lexer<'l, Token>, +} + +impl<'l> LineLexer<'l> { + pub(crate) fn get_all(self) -> Vec> { + let mut acc = vec![]; + for l in self { + acc.push(l); + } + acc + } +} -// while let Some(t) = lex.next() { -// println!("{:?}: {}", t, lex.slice()); -// } -// } +impl<'l> Iterator for LineLexer<'l> { + type Item = LineToken<'l>; + fn next(&mut self) -> Option { + self.lexer.next().map(|tt| Self::Item { + tt, + slice: self.lexer.slice(), + }) + } +} + +/// Take a line of input and lex it into a stream of tokens +pub(crate) fn lex<'l>(line: &'l mut String) -> LineLexer<'l> { + LineLexer { + lexer: Token::lexer(line), + } +} #[test] fn basic_header() { - let mut lex = Line::lexer("HEADER version=0.0.0,location=Berlin Lichtenberg,"); + let mut lex = Token::lexer("HEADER version=0.0.0,location=Berlin Lichtenberg,"); - assert_eq!(lex.next(), Some(Line::Header)); + assert_eq!(lex.next(), Some(Token::Header)); assert_eq!(lex.span(), 0..6); assert_eq!(lex.slice(), "HEADER"); - assert_eq!(lex.next(), Some(Line::HeaderData)); + assert_eq!(lex.next(), Some(Token::HeaderData)); assert_eq!(lex.span(), 7..21); assert_eq!(lex.slice(), "version=0.0.0,"); - assert_eq!(lex.next(), Some(Line::HeaderData)); + assert_eq!(lex.next(), Some(Token::HeaderData)); assert_eq!(lex.span(), 21..49); assert_eq!(lex.slice(), "location=Berlin Lichtenberg,"); assert_eq!(lex.next(), None); } - #[test] fn basic_start() { - let mut lex = Line::lexer("START 2020-11-11 13:00:00+01:00"); + let mut lex = Token::lexer("START 2020-11-11 13:00:00+01:00"); - assert_eq!(lex.next(), Some(Line::Start)); + assert_eq!(lex.next(), Some(Token::Start)); assert_eq!(lex.span(), 0..5); assert_eq!(lex.slice(), "START"); - assert_eq!(lex.next(), Some(Line::Date)); + assert_eq!(lex.next(), Some(Token::Date)); assert_eq!(lex.span(), 5..31); assert_eq!(lex.slice(), " 2020-11-11 13:00:00+01:00"); assert_eq!(lex.next(), None); } - #[test] fn basic_stop() { - let mut lex = Line::lexer("STOP 2020-11-11 13:00:00+01:00"); + let mut lex = Token::lexer("STOP 2020-11-11 13:00:00+01:00"); - assert_eq!(lex.next(), Some(Line::Stop)); + assert_eq!(lex.next(), Some(Token::Stop)); assert_eq!(lex.span(), 0..4); assert_eq!(lex.slice(), "STOP"); - assert_eq!(lex.next(), Some(Line::Date)); + assert_eq!(lex.next(), Some(Token::Date)); assert_eq!(lex.span(), 4..30); assert_eq!(lex.slice(), " 2020-11-11 13:00:00+01:00"); assert_eq!(lex.next(), None); } - #[test] fn basic_invoice() { - let mut lex = Line::lexer("INVOICE 2020-11-11 13:00:00+01:00"); + let mut lex = Token::lexer("INVOICE 2020-11-11 13:00:00+01:00"); - assert_eq!(lex.next(), Some(Line::Invoice)); + assert_eq!(lex.next(), Some(Token::Invoice)); assert_eq!(lex.span(), 0..7); assert_eq!(lex.slice(), "INVOICE"); - assert_eq!(lex.next(), Some(Line::Date)); + assert_eq!(lex.next(), Some(Token::Date)); assert_eq!(lex.span(), 7..33); assert_eq!(lex.slice(), " 2020-11-11 13:00:00+01:00"); assert_eq!(lex.next(), None); } + +#[test] +fn basic_comment() { + let mut lex = Token::lexer(";; This file is auto generated!"); + assert_eq!(lex.next(), Some(Token::Comment)); +} diff --git a/apps/cassiopeia/src/format/mod.rs b/apps/cassiopeia/src/format/mod.rs index 766bb22be13e..beab2f7aac66 100644 --- a/apps/cassiopeia/src/format/mod.rs +++ b/apps/cassiopeia/src/format/mod.rs @@ -1,3 +1,25 @@ -//! cassiopeia file format handling +//! cassiopeia file format -pub(crate) mod lexer; +mod lexer; +mod parser; + +pub(crate) use lexer::{LineLexer, LineToken, Token}; +pub(crate) use parser::LineCfg; + +use crate::TimeFile; +use std::{fs::File, io::Read}; + +pub(crate) fn load_file(path: &str) { + let mut f = File::open(path).unwrap(); + let mut content = String::new(); + f.read_to_string(&mut content).unwrap(); + + let mut lines: Vec = content.split("\n").map(|l| l.to_owned()).collect(); + + lines + .iter_mut() + .map(|line| lexer::lex(line)) + .map(|lex| parser::parse(lex)) + .filter(|line| line.valid()) + .fold(TimeFile::default(), |file, line| file.append(line)); +} diff --git a/apps/cassiopeia/src/format/parser.rs b/apps/cassiopeia/src/format/parser.rs new file mode 100644 index 000000000000..cc4b1b7c77df --- /dev/null +++ b/apps/cassiopeia/src/format/parser.rs @@ -0,0 +1,78 @@ +//! cassiopeia parser +//! +//! Takes a lexer's token stream as an input, and outputs a fully +//! parsed time file. + +use crate::format::{LineLexer, LineToken, Token}; +use chrono::{DateTime, FixedOffset as Offset}; +use logos::Lexer; +use std::collections::BTreeMap; +use std::iter::Iterator; + +/// A type-parsed line in a time file +#[derive(Debug)] +pub enum LineCfg { + /// A header line with a set of keys and values + Header(BTreeMap), + /// A session start line with a date and time + Start(Option>), + /// A session stop line with a date and time + Stop(Option>), + /// An invoice line with a date + Invoice(Option>), + /// An empty line + Empty, + /// A temporary value that is invalid + #[doc(hidden)] + Ignore, +} + +impl LineCfg { + pub(crate) fn valid(&self) -> bool { + match self { + LineCfg::Ignore => false, + _ => true, + } + } +} + +pub(crate) fn parse<'l>(lex: LineLexer<'l>) -> LineCfg { + use LineCfg::*; + use Token as T; + + #[cfg_attr(rustfmt, rustfmt_skip)] + lex.get_all().into_iter().fold(Ignore, |cfg, tok| match (cfg, tok) { + // If the first token is a comment, we ignore it + (Ignore, LineToken { tt: T::Comment, .. }, ) => Ignore, + // If the first token is a keyword, we wait for more data + (Ignore, LineToken { tt: T::Header, .. }) => Header(Default::default()), + (Ignore, LineToken { tt: T::Start, .. }) => Start(None), + (Ignore, LineToken { tt: T::Stop, .. }) => Stop(None), + (Ignore, LineToken { tt: T::Invoice, .. }) => Invoice(None), + + // If the first token _was_ a keyword, fill in the data + (Header(map), LineToken { tt: T::HeaderData, slice }) => Header(append_data(map, slice)), + (Start(_), LineToken { tt: T::Date, slice }) => Start(parse_date(slice)), + (Stop(_), LineToken { tt: T::Date, slice }) => Stop(parse_date(slice)), + (Invoice(_), LineToken { tt: T::Date, slice }) => Invoice(parse_date(slice)), + + // Pass empty lines through, + (Empty, _) => Empty, + + // Ignore everything else (which will be filtered) + _ => Ignore, + }) +} + +fn append_data(mut map: BTreeMap, slice: &str) -> BTreeMap { + let split = slice.split("=").collect::>(); + map.insert(split[0].into(), split[1].into()); + map +} + +fn parse_date(slice: &str) -> Option> { + Some( + DateTime::parse_from_str(slice, "%Y-%m-%d %H:%M:%S%:z") + .expect("Failed to parse date; invalid format!"), + ) +} diff --git a/apps/cassiopeia/src/main.rs b/apps/cassiopeia/src/main.rs index 80aabc7ad912..b28f3c3438f7 100644 --- a/apps/cassiopeia/src/main.rs +++ b/apps/cassiopeia/src/main.rs @@ -1,5 +1,8 @@ mod format; +mod data; -fn main() { +pub use data::{TimeFile, Session}; +fn main() { + format::load_file("/home/projects/clients/nyantec-nix-workshops/time.cass") } -- cgit v1.2.3