5 files changed, 431 insertions, 0 deletions
diff --git a/apps/koffice/libko/src/cass/format/gen.rs b/apps/koffice/libko/src/cass/format/gen.rs
new file mode 100644
index 000000000000..f77bcdc90e84
--- /dev/null
+++ b/apps/koffice/libko/src/cass/format/gen.rs
@@ -0,0 +1,32 @@
+//! Cassiopeia line generator
+//!
+//! This module takes a set of IR lines, and generates strings from
+//! them that are in accordance with the way that the parser of the
+//! same version expects them.
+
+use crate::cass::format::ir::{IrItem, IrType};
+
+/// Take a line of IR and generate a string to print into a file
+pub(crate) fn line(ir: &IrItem) -> String {
+    let IrItem { tt, lo } = ir;
+    match tt {
+        IrType::Ignore => "".into(),
+        IrType::Header(map) => format!(
+            "HEADER {}",
+            map.iter()
+                .map(|(k, v)| format!("{}={},", k, v))
+                .collect::<Vec<_>>()
+                .join("")
+        ),
+        IrType::Start(time) => format!("START {}", time.to_string()),
+
+        // FIXME: find a better way to align the lines here rather
+        // than having to manually having to pad the 'STOP' commands
+        IrType::Stop(time) => format!("STOP  {}", time.to_string()),
+        IrType::Invoice(date) => format!("INVOICE {}", date.to_string()),
+    }
+}
+
+pub(crate) fn head_comment() -> String {
+    ";; generated by cassiopeia, be careful about editing by hand!".into()
+}
diff --git a/apps/koffice/libko/src/cass/format/ir.rs b/apps/koffice/libko/src/cass/format/ir.rs
new file mode 100644
index 000000000000..d1a3a62c1508
--- /dev/null
+++ b/apps/koffice/libko/src/cass/format/ir.rs
@@ -0,0 +1,99 @@
+use crate::cass::{format::LineCfg, Date, Time, TimeFile};
+use std::collections::BTreeMap;
+
+/// A set of IR parsed items that makes up a whole cass file
+pub(crate) type IrStream = Vec<IrItem>;
+
+/// Intermediate representation for parsing and generating files
+///
+/// The CASS IR is largely based on the output of the parser's
+/// [`LineCfg`](crate::format::LineCfg), but with concrete types used
+/// in the data layer (namely [`Date`][date] and [`Time`][time]),
+/// while also keeping track of the line numbers to allow idempotent
+/// file changes.
+///
+/// Something not yet implemented is comment pass-through (this needs
+/// to happen in the parser first), but will likely be implemented in
+/// a future version.
+///
+/// [date]: crate::Date
+/// [time]: crate::Time
+#[derive(Debug, Clone)]
+pub(crate) struct IrItem {
+    pub(crate) tt: IrType,
+    pub(crate) lo: usize,
+}
+
+/// Disambiguate between different IR line types with their payload
+#[derive(Debug, Clone)]
+pub(crate) enum IrType {
+    /// A line with parsed header information
+    Header(BTreeMap<String, String>),
+    /// Start a session at a given timestapm
+    Start(Time),
+    /// Stop a session at a given timestamp
+    Stop(Time),
+    /// Invoice a block of previous work
+    Invoice(Date),
+    /// An item that gets ignored
+    Ignore,
+}
+
+/// Generate a stream of IR items from the raw parser output
+pub(crate) fn generate_ir(buf: impl Iterator<Item = LineCfg>) -> IrStream {
+    buf.enumerate().fold(vec![], |mut buf, (lo, item)| {
+        #[cfg_attr(rustfmt, rustfmt_skip)]
+        buf.push(match item {
+            LineCfg::Header(map) => IrItem { tt: IrType::Header(map.into_iter().map(|(k, v)| (k, v.replace(",", ""))).collect()), lo },
+            LineCfg::Start(Some(time)) => IrItem { tt: IrType::Start(time.into()), lo },
+            LineCfg::Stop(Some(time)) => IrItem { tt: IrType::Stop(time.into()), lo },
+            LineCfg::Invoice(Some(date)) => IrItem { tt: IrType::Invoice(date.into()), lo },
+            LineCfg::Ignore => IrItem { tt: IrType::Ignore, lo },
+            _ => IrItem { tt: IrType::Ignore, lo },
+        });
+
+        buf
+    })
+}
+
+pub(crate) trait MakeIr {
+    /// Make a new IR line from an object
+    fn make_ir(&self) -> IrType;
+}
+
+pub(crate) fn clean_ir(ir: &mut IrStream) {
+    ir.remove(0); // FIXME: this is required to remove the leading
+                  // comment, which will be manually re-generated at
+                  // the moment, but which would just add more blank
+                  // lines between the new comment, and the first line
+                  // in this current format.  This is very bad, yikes
+                  // yikes yikes, but what can I do, I have a deadline
+                  // (not really) lol
+
+    // FIXME: this hack gets rid of a trailing empty line if it exists
+    // to make sure we don't have any gaps between work sessions.
+    if match ir.last() {
+        Some(IrItem {
+            tt: IrType::Ignore, ..
+        }) => true,
+        _ => false,
+    } {
+        ir.pop();
+    }
+}
+
+/// Taken an IrType and append it to an existing IR stream
+pub(crate) fn append_ir(ir: &mut IrStream, tt: IrType) {
+    let lo = ir.last().unwrap().lo;
+    ir.push(IrItem { tt, lo });
+}
+
+/// Search for the header that contains the version string and update it
+pub(crate) fn update_header(ir: &mut IrStream) {
+    ir.iter_mut().for_each(|item| match item.tt {
+        IrType::Header(ref mut map) if map.contains_key("version") => {
+            map.insert("version".into(), crate::cass::meta::VERSION.into());
+        }
+        _ => {}
+    });
+}
diff --git a/apps/koffice/libko/src/cass/format/lexer.rs b/apps/koffice/libko/src/cass/format/lexer.rs
new file mode 100644
index 000000000000..bdb89f5180e5
--- /dev/null
+++ b/apps/koffice/libko/src/cass/format/lexer.rs
@@ -0,0 +1,151 @@
+//! Cassiopeia file lexer
+
+use logos::{Lexer, Logos};
+use std::iter::Iterator;
+
+/// A basic line lexer type
+///
+/// This lexer distinguishes between comments, and keyword lines.  It
+/// does not attempt to parse the line specifics.  This is what the
+/// content lexer is for.
+#[derive(Logos, Debug, PartialEq)]
+pub(crate) enum Token {
+    #[token("HEADER")]
+    Header,
+
+    #[token("START")]
+    Start,
+
+    #[token("STOP")]
+    Stop,
+
+    #[token("INVOICE")]
+    Invoice,
+
+    #[regex(r"\w+=[^,$]+[,$]")]
+    HeaderData,
+
+    // FIXME: this will have a leading whitespace that we could remove
+    // with ^\w, but logos does not support this at the moment
+    #[regex(r"[0-9-:+ ]+")]
+    Date,
+
+    #[token(" ", logos::skip)]
+    Space,
+
+    #[regex(";;.*")]
+    Comment,
+
+    #[error]
+    Error,
+}
+
+/// A single token type on a line
+#[derive(Debug)]
+pub(crate) struct LineToken<'l> {
+    pub(crate) tt: Token,
+    pub(crate) slice: &'l str,
+}
+
+/// A lexer wrapped for a single line
+pub(crate) struct LineLexer<'l> {
+    lexer: Lexer<'l, Token>,
+}
+
+impl<'l> LineLexer<'l> {
+    pub(crate) fn get_all(self) -> Vec<LineToken<'l>> {
+        let mut acc = vec![];
+        for l in self {
+            acc.push(l);
+        }
+        acc
+    }
+}
+
+impl<'l> Iterator for LineLexer<'l> {
+    type Item = LineToken<'l>;
+
+    fn next(&mut self) -> Option<Self::Item> {
+        self.lexer.next().map(|tt| Self::Item {
+            tt,
+            slice: self.lexer.slice(),
+        })
+    }
+}
+
+/// Take a line of input and lex it into a stream of tokens
+pub(crate) fn lex<'l>(line: &'l mut String) -> LineLexer<'l> {
+    LineLexer {
+        lexer: Token::lexer(line),
+    }
+}
+
+#[test]
+fn basic_header() {
+    let mut lex = Token::lexer("HEADER version=0.0.0,location=Berlin Lichtenberg,");
+
+    assert_eq!(lex.next(), Some(Token::Header));
+    assert_eq!(lex.span(), 0..6);
+    assert_eq!(lex.slice(), "HEADER");
+
+    assert_eq!(lex.next(), Some(Token::HeaderData));
+    assert_eq!(lex.span(), 7..21);
+    assert_eq!(lex.slice(), "version=0.0.0,");
+
+    assert_eq!(lex.next(), Some(Token::HeaderData));
+    assert_eq!(lex.span(), 21..49);
+    assert_eq!(lex.slice(), "location=Berlin Lichtenberg,");
+
+    assert_eq!(lex.next(), None);
+}
+
+#[test]
+fn basic_start() {
+    let mut lex = Token::lexer("START 2020-11-11 13:00:00+01:00");
+
+    assert_eq!(lex.next(), Some(Token::Start));
+    assert_eq!(lex.span(), 0..5);
+    assert_eq!(lex.slice(), "START");
+
+    assert_eq!(lex.next(), Some(Token::Date));
+    assert_eq!(lex.span(), 5..31);
+    assert_eq!(lex.slice(), " 2020-11-11 13:00:00+01:00");
+
+    assert_eq!(lex.next(), None);
+}
+
+#[test]
+fn basic_stop() {
+    let mut lex = Token::lexer("STOP 2020-11-11 13:00:00+01:00");
+
+    assert_eq!(lex.next(), Some(Token::Stop));
+    assert_eq!(lex.span(), 0..4);
+    assert_eq!(lex.slice(), "STOP");
+
+    assert_eq!(lex.next(), Some(Token::Date));
+    assert_eq!(lex.span(), 4..30);
+    assert_eq!(lex.slice(), " 2020-11-11 13:00:00+01:00");
+
+    assert_eq!(lex.next(), None);
+}
+
+#[test]
+fn basic_invoice() {
+    let mut lex = Token::lexer("INVOICE 2020-11-11 13:00:00+01:00");
+
+    assert_eq!(lex.next(), Some(Token::Invoice));
+    assert_eq!(lex.span(), 0..7);
+    assert_eq!(lex.slice(), "INVOICE");
+
+    assert_eq!(lex.next(), Some(Token::Date));
+    assert_eq!(lex.span(), 7..33);
+    assert_eq!(lex.slice(), " 2020-11-11 13:00:00+01:00");
+
+    assert_eq!(lex.next(), None);
+}
+
+#[test]
+fn basic_comment() {
+    let mut lex = Token::lexer(";; This file is auto generated!");
+    assert_eq!(lex.next(), Some(Token::Comment));
+}
diff --git a/apps/koffice/libko/src/cass/format/mod.rs b/apps/koffice/libko/src/cass/format/mod.rs
new file mode 100644
index 000000000000..2983653898b6
--- /dev/null
+++ b/apps/koffice/libko/src/cass/format/mod.rs
@@ -0,0 +1,76 @@
+//! cassiopeia file format
+
+mod gen;
+pub(crate) mod ir;
+mod lexer;
+mod parser;
+
+pub(crate) use lexer::{LineLexer, LineToken, Token};
+pub(crate) use parser::LineCfg;
+
+use crate::{
+    cass::error::{ParseError, ParseResult},
+    cass::TimeFile,
+};
+use ir::{IrItem, IrStream};
+use std::{
+    fs::{File, OpenOptions},
+    io::{Read, Write},
+};
+
+/// A crate internal representation of the IR stream and timefile
+#[derive(Default)]
+pub(crate) struct ParseOutput {
+    pub(crate) ir: IrStream,
+    pub(crate) tf: TimeFile,
+}
+
+impl ParseOutput {
+    fn append(mut self, ir: IrItem) -> ParseResult<Self> {
+        self.tf.append(ir.clone())?;
+        self.ir.push(ir);
+        Ok(self)
+    }
+}
+
+/// Load a file from disk and parse it into a
+/// [`TimeFile`](crate::TimeFile)
+pub(crate) fn load_file(path: &str) -> ParseResult<ParseOutput> {
+    // Load the raw file contents
+    let mut f = File::open(path)?;
+    let mut content = String::new();
+    f.read_to_string(&mut content)?;
+
+    // Split the file by lines - .cass is a line based format
+    let mut lines: Vec<String> = content.split("\n").map(|l| l.to_owned()).collect();
+
+    // Build an iterator over parsed lines
+    let parsed = lines
+        .iter_mut()
+        .map(|line| lexer::lex(line))
+        .map(|lex| parser::parse(lex));
+
+    // Generate the IR from parse output, then build the timefile
+    ir::generate_ir(parsed)
+        .into_iter()
+        .fold(Ok(ParseOutput::default()), |out, ir| match out {
+            Ok(out) => out.append(ir),
+            e @ Err(_) => e,
+        })
+}
+
+/// Write a file with the updated IR stream
+pub(crate) fn write_file(path: &str, ir: &mut IrStream) -> ParseResult<()> {
+    ir::update_header(ir);
+    let mut lines = ir.into_iter().map(|ir| gen::line(ir)).collect::<Vec<_>>();
+    lines.insert(0, gen::head_comment());
+
+    // let mut f = OpenOptions::new()
+    //     .write(true)
+    //     .create(true)
+    //     .truncate(true)
+    //     .open(path)
+    //     .ok()?;
+    // f.write_all(lines.join("\n").as_bytes()).ok()?;
+    Ok(())
+}
diff --git a/apps/koffice/libko/src/cass/format/parser.rs b/apps/koffice/libko/src/cass/format/parser.rs
new file mode 100644
index 000000000000..8e0602d440d2
--- /dev/null
+++ b/apps/koffice/libko/src/cass/format/parser.rs
@@ -0,0 +1,73 @@
+//! cassiopeia parser
+//!
+//! Takes a lexer's token stream as an input, and outputs a fully
+//! parsed time file.
+
+use crate::cass::format::{LineLexer, LineToken, Token};
+use chrono::{DateTime, FixedOffset as Offset, NaiveDate};
+use std::collections::BTreeMap;
+use std::iter::Iterator;
+
+/// A type-parsed line in a time file
+#[derive(Debug)]
+pub enum LineCfg {
+    /// A header line with a set of keys and values
+    Header(BTreeMap<String, String>),
+    /// A session start line with a date and time
+    Start(Option<DateTime<Offset>>),
+    /// A session stop line with a date and time
+    Stop(Option<DateTime<Offset>>),
+    /// An invoice line with a date
+    Invoice(Option<NaiveDate>),
+    /// A temporary value that is invalid
+    #[doc(hidden)]
+    Ignore,
+}
+
+pub(crate) fn parse<'l>(lex: LineLexer<'l>) -> LineCfg {
+    use LineCfg::*;
+    use Token as T;
+
+    #[cfg_attr(rustfmt, rustfmt_skip)]
+    lex.get_all().into_iter().fold(Ignore, |cfg, tok| match (cfg, tok) {
+        // If the first token is a comment, we ignore it
+        (Ignore, LineToken { tt: T::Comment, .. }, ) => Ignore,
+        // If the first token is a keyword, we wait for more data
+        (Ignore, LineToken { tt: T::Header, .. }) => Header(Default::default()),
+        (Ignore, LineToken { tt: T::Start, .. }) => Start(None),
+        (Ignore, LineToken { tt: T::Stop, .. }) => Stop(None),
+        (Ignore, LineToken { tt: T::Invoice, .. }) => Invoice(None),
+
+        // If the first token _was_ a keyword, fill in the data
+        (Header(map), LineToken { tt: T::HeaderData, slice }) => Header(append_data(map, slice)),
+        (Start(_), LineToken { tt: T::Date, slice }) => Start(parse_datetime(slice)),
+        (Stop(_), LineToken { tt: T::Date, slice }) => Stop(parse_datetime(slice)),
+        (Invoice(_), LineToken { tt: T::Date, slice }) => Invoice(parse_date(slice)),
+
+        // Pass empty lines through,
+        (empty, _) => empty,
+
+        // Ignore everything else (which will be filtered)
+        _ => Ignore,
+    })
+}
+
+fn append_data(mut map: BTreeMap<String, String>, slice: &str) -> BTreeMap<String, String> {
+    let split = slice.split("=").collect::<Vec<_>>();
+    map.insert(split[0].into(), split[1].into());
+    map
+}
+
+fn parse_datetime(slice: &str) -> Option<DateTime<Offset>> {
+    Some(
+        DateTime::parse_from_str(slice, "%Y-%m-%d %H:%M:%S%:z")
+            .expect("Failed to parse date; invalid format!"),
+    )
+}
+
+fn parse_date(slice: &str) -> Option<NaiveDate> {
+    Some(
+        NaiveDate::parse_from_str(slice, "%Y-%m-%d")
+            .expect("Failed to parse date; invalid format!"),
+    )
+}