aboutsummaryrefslogtreecommitdiff
path: root/apps/cassiopeia
diff options
context:
space:
mode:
authorMx Kookie <kookie@spacekookie.de>2020-12-11 13:38:58 +0000
committerMx Kookie <kookie@spacekookie.de>2020-12-21 05:19:49 +0100
commit5502c6d320f05f43239fc8e2b2839eb3fd5d742a (patch)
tree76f5d1d5685f6281789b4559db1bd257855b774b /apps/cassiopeia
parentafd8a74e43fc57662381b16d418a559f471c80f5 (diff)
cassiopeia: implementing basic file parser
Diffstat (limited to 'apps/cassiopeia')
-rw-r--r--apps/cassiopeia/src/data.rs29
-rw-r--r--apps/cassiopeia/src/file.rs145
-rw-r--r--apps/cassiopeia/src/format/lexer.rs91
-rw-r--r--apps/cassiopeia/src/format/mod.rs26
-rw-r--r--apps/cassiopeia/src/format/parser.rs78
-rw-r--r--apps/cassiopeia/src/main.rs5
6 files changed, 198 insertions, 176 deletions
diff --git a/apps/cassiopeia/src/data.rs b/apps/cassiopeia/src/data.rs
new file mode 100644
index 000000000000..8ebc67f016c5
--- /dev/null
+++ b/apps/cassiopeia/src/data.rs
@@ -0,0 +1,29 @@
+//! Typed time file for cassiopeia
+//!
+//! This data gets generated by the `format` module, and can later be
+//! used to generate new files, and perform various lookups and
+//! analysis tasks.
+
+use crate::format::LineCfg;
+use chrono::{Date, DateTime, FixedOffset as Offset};
+use std::collections::BTreeMap;
+
+#[derive(Default)]
+pub struct TimeFile {
+ header: BTreeMap<String, String>,
+ sessions: Vec<Session>,
+ invoices: Vec<Date<Offset>>,
+}
+
+impl TimeFile {
+ pub(crate) fn append(self, line: LineCfg) -> Self {
+ println!("{:?}", line);
+
+ self
+ }
+}
+
+pub struct Session {
+ start: DateTime<Offset>,
+ stop: DateTime<Offset>,
+}
diff --git a/apps/cassiopeia/src/file.rs b/apps/cassiopeia/src/file.rs
deleted file mode 100644
index 94da234e8661..000000000000
--- a/apps/cassiopeia/src/file.rs
+++ /dev/null
@@ -1,145 +0,0 @@
-//! Parse the cassiopeia file format
-//!
-//! Each file is associated with a single project. This way there is
-//! no need to associate session enries with multiple customers and
-//! projcets. Currently there's also no way to cross-relate sessions
-//! between projects or clients, although the metadata in the header
-//! is available to do so in the future
-//!
-//! ## Structure
-//!
-//! `cassiopeia` files should use the `.cass` extension, although this
-//! implementation is not opinionated on that.
-//!
-//! A line starting with `;` is a comment and can be ignored. A line
-//! can have a comment anywhere, which means that everything after it
-//! gets ignored. There are no block comments.
-//!
-//! A regular statements has two parts: a key, and a value. Available
-//! keys are:
-//!
-//! - HEADER
-//! - START
-//! - STOP
-//! - FINISH
-//!
-//! A file has to have at least one `HEADER` key, containing a certain
-//! number of fields to be considered valid. The required number of
-//! fields may vary between versions.
-//!
-//! ### HEADER
-//!
-//! `cassiopeia` in princpile only needs a single value to parse a
-//! file, which is `version`. It is however recommended to add
-//! additional metadata to allow future processing into clients and
-//! cross-referencing projects. Importantly: header keys that are not
-//! expected will be ignored.
-//!
-//! The general header format is a comma-separated list with a key
-//! value pair, separated by an equals sign. You can use spaces in
-//! both keys and values without having to escape them or use special
-//! quotes. Leading and trailing spaces will be removed.
-//!
-//! ```
-//! HEADER version=0.0.0,location=Berlin
-//! HEADER work schedule=mon tue wed
-//! ```
-//!
-//! When re-writing the file format, known/ accepted keys should go
-//! first. All other unknown keys will be printed alphabetically at
-//! the end. This way it's possible for an outdated implementation to
-//! pass through unknown keys, or users to add their own keys.
-
-use chrono::{DateTime, Utc};
-use std::{fs::File, io::Read, path::Path};
-
-/// A cassiopeia file that has been successfully parsed
-pub struct TimeFile {
- path: PathBuf,
- content: Vec<Statement>,
-}
-
-impl TimeFile {
- /// Open an existing `.cass` file on disk. Panics!
- pub fn open(p: impl Into<Path>) -> Self {
- let mut f = File::open(p).unwrap();
- let mut cont = String::new();
- f.read_to_string(&mut cont).unwrap();
- }
-}
-
-/// A statement in a `.cass` line
-///
-/// While the whole file get's re-written on every run to update
-/// version numbers and header values, the structure of the file is
-/// preserved.
-pub enum Statement {
- /// A blank line
- Blank,
- /// A comment line that is echo-ed back out
- Comment(String),
- /// Header value
- Header(Vec<HeaderVal>),
- /// A session start value
- Start(DateTime<Utc>),
- /// A session stop value
- Stop(DateTime<Utc>),
- /// A project finish value
- Finish(DateTime<Utc>),
-}
-
-/// A set of header value
-pub struct HeaderVal {
- /// Header key
- key: String,
- /// Header value
- val: String,
-}
-
-impl HeaderVal {
- fn new<S: Into<String>>(key: S, val: S) -> Self {
- Self {
- key: key.into(),
- val: val.into(),
- }
- }
-
- /// Test if a header value is known to this implementation
- fn known(&self) -> bool {
- match self.key {
- "version" => true,
- _ => false,
- }
- }
-}
-
-/// A builder for cass files
-#[cfg(tests)]
-struct FileBuilder {
- acc: Vec<Statement>,
-}
-
-impl FileBuilder {
- fn new() -> Self {
- Self { acc: vec![] }
- }
-
- fn header(mut self, data: Vec<(&str, &str)>) -> Self {
- self.acc.push(Statement::Header(
- data.into_iter()
- .map(|(key, val)| HeaderVal::new(key, val))
- .collect(),
- ));
-
- self
- }
-
- fn build(self) -> String {
- format!(";; This file was generated by cassiopeia (reference)\n{}", self.acc.into_iter().map(|s| s.render()).collect::<Vec<_>().join("\n"))
- }
-}
-
-#[test]
-fn empty_file() {
- let fb = FileBuilder::new().header(vec![("version", "0.3.0"), ("project", "testing")]);
-}
diff --git a/apps/cassiopeia/src/format/lexer.rs b/apps/cassiopeia/src/format/lexer.rs
index f062ca4238c1..bdb89f5180e5 100644
--- a/apps/cassiopeia/src/format/lexer.rs
+++ b/apps/cassiopeia/src/format/lexer.rs
@@ -1,6 +1,7 @@
//! Cassiopeia file lexer
-use logos::Logos;
+use logos::{Lexer, Logos};
+use std::iter::Iterator;
/// A basic line lexer type
///
@@ -8,8 +9,7 @@ use logos::Logos;
/// does not attempt to parse the line specifics. This is what the
/// content lexer is for.
#[derive(Logos, Debug, PartialEq)]
-enum Line {
-
+pub(crate) enum Token {
#[token("HEADER")]
Header,
@@ -21,7 +21,7 @@ enum Line {
#[token("INVOICE")]
Invoice,
-
+
#[regex(r"\w+=[^,$]+[,$]")]
HeaderData,
@@ -32,85 +32,120 @@ enum Line {
#[token(" ", logos::skip)]
Space,
-
+
+ #[regex(";;.*")]
+ Comment,
+
#[error]
Error,
}
+/// A single token type on a line
+#[derive(Debug)]
+pub(crate) struct LineToken<'l> {
+ pub(crate) tt: Token,
+ pub(crate) slice: &'l str,
+}
-// pub fn test_this() {
-// // let mut lex = Line::lexer("HEADER version=0.0.0,location=Berlin,");
-// let mut lex = Line::lexer("START 2020-11-11 13:00:00+01:00");
+/// A lexer wrapped for a single line
+pub(crate) struct LineLexer<'l> {
+ lexer: Lexer<'l, Token>,
+}
+
+impl<'l> LineLexer<'l> {
+ pub(crate) fn get_all(self) -> Vec<LineToken<'l>> {
+ let mut acc = vec![];
+ for l in self {
+ acc.push(l);
+ }
+ acc
+ }
+}
-// while let Some(t) = lex.next() {
-// println!("{:?}: {}", t, lex.slice());
-// }
-// }
+impl<'l> Iterator for LineLexer<'l> {
+ type Item = LineToken<'l>;
+ fn next(&mut self) -> Option<Self::Item> {
+ self.lexer.next().map(|tt| Self::Item {
+ tt,
+ slice: self.lexer.slice(),
+ })
+ }
+}
+
+/// Take a line of input and lex it into a stream of tokens
+pub(crate) fn lex<'l>(line: &'l mut String) -> LineLexer<'l> {
+ LineLexer {
+ lexer: Token::lexer(line),
+ }
+}
#[test]
fn basic_header() {
- let mut lex = Line::lexer("HEADER version=0.0.0,location=Berlin Lichtenberg,");
+ let mut lex = Token::lexer("HEADER version=0.0.0,location=Berlin Lichtenberg,");
- assert_eq!(lex.next(), Some(Line::Header));
+ assert_eq!(lex.next(), Some(Token::Header));
assert_eq!(lex.span(), 0..6);
assert_eq!(lex.slice(), "HEADER");
- assert_eq!(lex.next(), Some(Line::HeaderData));
+ assert_eq!(lex.next(), Some(Token::HeaderData));
assert_eq!(lex.span(), 7..21);
assert_eq!(lex.slice(), "version=0.0.0,");
- assert_eq!(lex.next(), Some(Line::HeaderData));
+ assert_eq!(lex.next(), Some(Token::HeaderData));
assert_eq!(lex.span(), 21..49);
assert_eq!(lex.slice(), "location=Berlin Lichtenberg,");
assert_eq!(lex.next(), None);
}
-
#[test]
fn basic_start() {
- let mut lex = Line::lexer("START 2020-11-11 13:00:00+01:00");
+ let mut lex = Token::lexer("START 2020-11-11 13:00:00+01:00");
- assert_eq!(lex.next(), Some(Line::Start));
+ assert_eq!(lex.next(), Some(Token::Start));
assert_eq!(lex.span(), 0..5);
assert_eq!(lex.slice(), "START");
- assert_eq!(lex.next(), Some(Line::Date));
+ assert_eq!(lex.next(), Some(Token::Date));
assert_eq!(lex.span(), 5..31);
assert_eq!(lex.slice(), " 2020-11-11 13:00:00+01:00");
assert_eq!(lex.next(), None);
}
-
#[test]
fn basic_stop() {
- let mut lex = Line::lexer("STOP 2020-11-11 13:00:00+01:00");
+ let mut lex = Token::lexer("STOP 2020-11-11 13:00:00+01:00");
- assert_eq!(lex.next(), Some(Line::Stop));
+ assert_eq!(lex.next(), Some(Token::Stop));
assert_eq!(lex.span(), 0..4);
assert_eq!(lex.slice(), "STOP");
- assert_eq!(lex.next(), Some(Line::Date));
+ assert_eq!(lex.next(), Some(Token::Date));
assert_eq!(lex.span(), 4..30);
assert_eq!(lex.slice(), " 2020-11-11 13:00:00+01:00");
assert_eq!(lex.next(), None);
}
-
#[test]
fn basic_invoice() {
- let mut lex = Line::lexer("INVOICE 2020-11-11 13:00:00+01:00");
+ let mut lex = Token::lexer("INVOICE 2020-11-11 13:00:00+01:00");
- assert_eq!(lex.next(), Some(Line::Invoice));
+ assert_eq!(lex.next(), Some(Token::Invoice));
assert_eq!(lex.span(), 0..7);
assert_eq!(lex.slice(), "INVOICE");
- assert_eq!(lex.next(), Some(Line::Date));
+ assert_eq!(lex.next(), Some(Token::Date));
assert_eq!(lex.span(), 7..33);
assert_eq!(lex.slice(), " 2020-11-11 13:00:00+01:00");
assert_eq!(lex.next(), None);
}
+
+#[test]
+fn basic_comment() {
+ let mut lex = Token::lexer(";; This file is auto generated!");
+ assert_eq!(lex.next(), Some(Token::Comment));
+}
diff --git a/apps/cassiopeia/src/format/mod.rs b/apps/cassiopeia/src/format/mod.rs
index 766bb22be13e..beab2f7aac66 100644
--- a/apps/cassiopeia/src/format/mod.rs
+++ b/apps/cassiopeia/src/format/mod.rs
@@ -1,3 +1,25 @@
-//! cassiopeia file format handling
+//! cassiopeia file format
-pub(crate) mod lexer;
+mod lexer;
+mod parser;
+
+pub(crate) use lexer::{LineLexer, LineToken, Token};
+pub(crate) use parser::LineCfg;
+
+use crate::TimeFile;
+use std::{fs::File, io::Read};
+
+pub(crate) fn load_file(path: &str) {
+ let mut f = File::open(path).unwrap();
+ let mut content = String::new();
+ f.read_to_string(&mut content).unwrap();
+
+ let mut lines: Vec<String> = content.split("\n").map(|l| l.to_owned()).collect();
+
+ lines
+ .iter_mut()
+ .map(|line| lexer::lex(line))
+ .map(|lex| parser::parse(lex))
+ .filter(|line| line.valid())
+ .fold(TimeFile::default(), |file, line| file.append(line));
+}
diff --git a/apps/cassiopeia/src/format/parser.rs b/apps/cassiopeia/src/format/parser.rs
new file mode 100644
index 000000000000..cc4b1b7c77df
--- /dev/null
+++ b/apps/cassiopeia/src/format/parser.rs
@@ -0,0 +1,78 @@
+//! cassiopeia parser
+//!
+//! Takes a lexer's token stream as an input, and outputs a fully
+//! parsed time file.
+
+use crate::format::{LineLexer, LineToken, Token};
+use chrono::{DateTime, FixedOffset as Offset};
+use logos::Lexer;
+use std::collections::BTreeMap;
+use std::iter::Iterator;
+
+/// A type-parsed line in a time file
+#[derive(Debug)]
+pub enum LineCfg {
+ /// A header line with a set of keys and values
+ Header(BTreeMap<String, String>),
+ /// A session start line with a date and time
+ Start(Option<DateTime<Offset>>),
+ /// A session stop line with a date and time
+ Stop(Option<DateTime<Offset>>),
+ /// An invoice line with a date
+ Invoice(Option<DateTime<Offset>>),
+ /// An empty line
+ Empty,
+ /// A temporary value that is invalid
+ #[doc(hidden)]
+ Ignore,
+}
+
+impl LineCfg {
+ pub(crate) fn valid(&self) -> bool {
+ match self {
+ LineCfg::Ignore => false,
+ _ => true,
+ }
+ }
+}
+
+pub(crate) fn parse<'l>(lex: LineLexer<'l>) -> LineCfg {
+ use LineCfg::*;
+ use Token as T;
+
+ #[cfg_attr(rustfmt, rustfmt_skip)]
+ lex.get_all().into_iter().fold(Ignore, |cfg, tok| match (cfg, tok) {
+ // If the first token is a comment, we ignore it
+ (Ignore, LineToken { tt: T::Comment, .. }, ) => Ignore,
+ // If the first token is a keyword, we wait for more data
+ (Ignore, LineToken { tt: T::Header, .. }) => Header(Default::default()),
+ (Ignore, LineToken { tt: T::Start, .. }) => Start(None),
+ (Ignore, LineToken { tt: T::Stop, .. }) => Stop(None),
+ (Ignore, LineToken { tt: T::Invoice, .. }) => Invoice(None),
+
+ // If the first token _was_ a keyword, fill in the data
+ (Header(map), LineToken { tt: T::HeaderData, slice }) => Header(append_data(map, slice)),
+ (Start(_), LineToken { tt: T::Date, slice }) => Start(parse_date(slice)),
+ (Stop(_), LineToken { tt: T::Date, slice }) => Stop(parse_date(slice)),
+ (Invoice(_), LineToken { tt: T::Date, slice }) => Invoice(parse_date(slice)),
+
+ // Pass empty lines through,
+ (Empty, _) => Empty,
+
+ // Ignore everything else (which will be filtered)
+ _ => Ignore,
+ })
+}
+
+fn append_data(mut map: BTreeMap<String, String>, slice: &str) -> BTreeMap<String, String> {
+ let split = slice.split("=").collect::<Vec<_>>();
+ map.insert(split[0].into(), split[1].into());
+ map
+}
+
+fn parse_date(slice: &str) -> Option<DateTime<Offset>> {
+ Some(
+ DateTime::parse_from_str(slice, "%Y-%m-%d %H:%M:%S%:z")
+ .expect("Failed to parse date; invalid format!"),
+ )
+}
diff --git a/apps/cassiopeia/src/main.rs b/apps/cassiopeia/src/main.rs
index 80aabc7ad912..b28f3c3438f7 100644
--- a/apps/cassiopeia/src/main.rs
+++ b/apps/cassiopeia/src/main.rs
@@ -1,5 +1,8 @@
mod format;
+mod data;
-fn main() {
+pub use data::{TimeFile, Session};
+fn main() {
+ format::load_file("/home/projects/clients/nyantec-nix-workshops/time.cass")
}