aboutsummaryrefslogtreecommitdiff
path: root/apps/koffice/libko/src/cass/format
diff options
context:
space:
mode:
Diffstat (limited to 'apps/koffice/libko/src/cass/format')
-rw-r--r--apps/koffice/libko/src/cass/format/gen.rs32
-rw-r--r--apps/koffice/libko/src/cass/format/ir.rs99
-rw-r--r--apps/koffice/libko/src/cass/format/lexer.rs151
-rw-r--r--apps/koffice/libko/src/cass/format/mod.rs76
-rw-r--r--apps/koffice/libko/src/cass/format/parser.rs73
5 files changed, 431 insertions, 0 deletions
diff --git a/apps/koffice/libko/src/cass/format/gen.rs b/apps/koffice/libko/src/cass/format/gen.rs
new file mode 100644
index 000000000000..f77bcdc90e84
--- /dev/null
+++ b/apps/koffice/libko/src/cass/format/gen.rs
@@ -0,0 +1,32 @@
+//! Cassiopeia line generator
+//!
+//! This module takes a set of IR lines, and generates strings from
+//! them that are in accordance with the way that the parser of the
+//! same version expects them.
+
+use crate::cass::format::ir::{IrItem, IrType};
+
+/// Take a line of IR and generate a string to print into a file
+pub(crate) fn line(ir: &IrItem) -> String {
+ let IrItem { tt, lo } = ir;
+ match tt {
+ IrType::Ignore => "".into(),
+ IrType::Header(map) => format!(
+ "HEADER {}",
+ map.iter()
+ .map(|(k, v)| format!("{}={},", k, v))
+ .collect::<Vec<_>>()
+ .join("")
+ ),
+ IrType::Start(time) => format!("START {}", time.to_string()),
+
+ // FIXME: find a better way to align the lines here rather
+ // than having to manually having to pad the 'STOP' commands
+ IrType::Stop(time) => format!("STOP {}", time.to_string()),
+ IrType::Invoice(date) => format!("INVOICE {}", date.to_string()),
+ }
+}
+
+pub(crate) fn head_comment() -> String {
+ ";; generated by cassiopeia, be careful about editing by hand!".into()
+}
diff --git a/apps/koffice/libko/src/cass/format/ir.rs b/apps/koffice/libko/src/cass/format/ir.rs
new file mode 100644
index 000000000000..d1a3a62c1508
--- /dev/null
+++ b/apps/koffice/libko/src/cass/format/ir.rs
@@ -0,0 +1,99 @@
+use crate::cass::{format::LineCfg, Date, Time, TimeFile};
+use std::collections::BTreeMap;
+
+/// A set of IR parsed items that makes up a whole cass file
+pub(crate) type IrStream = Vec<IrItem>;
+
+/// Intermediate representation for parsing and generating files
+///
+/// The CASS IR is largely based on the output of the parser's
+/// [`LineCfg`](crate::format::LineCfg), but with concrete types used
+/// in the data layer (namely [`Date`][date] and [`Time`][time]),
+/// while also keeping track of the line numbers to allow idempotent
+/// file changes.
+///
+/// Something not yet implemented is comment pass-through (this needs
+/// to happen in the parser first), but will likely be implemented in
+/// a future version.
+///
+/// [date]: crate::Date
+/// [time]: crate::Time
+#[derive(Debug, Clone)]
+pub(crate) struct IrItem {
+ pub(crate) tt: IrType,
+ pub(crate) lo: usize,
+}
+
+/// Disambiguate between different IR line types with their payload
+#[derive(Debug, Clone)]
+pub(crate) enum IrType {
+ /// A line with parsed header information
+ Header(BTreeMap<String, String>),
+ /// Start a session at a given timestapm
+ Start(Time),
+ /// Stop a session at a given timestamp
+ Stop(Time),
+ /// Invoice a block of previous work
+ Invoice(Date),
+ /// An item that gets ignored
+ Ignore,
+}
+
+/// Generate a stream of IR items from the raw parser output
+pub(crate) fn generate_ir(buf: impl Iterator<Item = LineCfg>) -> IrStream {
+ buf.enumerate().fold(vec![], |mut buf, (lo, item)| {
+ #[cfg_attr(rustfmt, rustfmt_skip)]
+ buf.push(match item {
+ LineCfg::Header(map) => IrItem { tt: IrType::Header(map.into_iter().map(|(k, v)| (k, v.replace(",", ""))).collect()), lo },
+ LineCfg::Start(Some(time)) => IrItem { tt: IrType::Start(time.into()), lo },
+ LineCfg::Stop(Some(time)) => IrItem { tt: IrType::Stop(time.into()), lo },
+ LineCfg::Invoice(Some(date)) => IrItem { tt: IrType::Invoice(date.into()), lo },
+ LineCfg::Ignore => IrItem { tt: IrType::Ignore, lo },
+ _ => IrItem { tt: IrType::Ignore, lo },
+ });
+
+ buf
+ })
+}
+
+pub(crate) trait MakeIr {
+ /// Make a new IR line from an object
+ fn make_ir(&self) -> IrType;
+}
+
+pub(crate) fn clean_ir(ir: &mut IrStream) {
+ ir.remove(0); // FIXME: this is required to remove the leading
+ // comment, which will be manually re-generated at
+ // the moment, but which would just add more blank
+ // lines between the new comment, and the first line
+ // in this current format. This is very bad, yikes
+ // yikes yikes, but what can I do, I have a deadline
+ // (not really) lol
+
+ // FIXME: this hack gets rid of a trailing empty line if it exists
+ // to make sure we don't have any gaps between work sessions.
+ if match ir.last() {
+ Some(IrItem {
+ tt: IrType::Ignore, ..
+ }) => true,
+ _ => false,
+ } {
+ ir.pop();
+ }
+}
+
+/// Taken an IrType and append it to an existing IR stream
+pub(crate) fn append_ir(ir: &mut IrStream, tt: IrType) {
+ let lo = ir.last().unwrap().lo;
+ ir.push(IrItem { tt, lo });
+}
+
+/// Search for the header that contains the version string and update it
+pub(crate) fn update_header(ir: &mut IrStream) {
+ ir.iter_mut().for_each(|item| match item.tt {
+ IrType::Header(ref mut map) if map.contains_key("version") => {
+ map.insert("version".into(), crate::cass::meta::VERSION.into());
+ }
+ _ => {}
+ });
+}
diff --git a/apps/koffice/libko/src/cass/format/lexer.rs b/apps/koffice/libko/src/cass/format/lexer.rs
new file mode 100644
index 000000000000..bdb89f5180e5
--- /dev/null
+++ b/apps/koffice/libko/src/cass/format/lexer.rs
@@ -0,0 +1,151 @@
+//! Cassiopeia file lexer
+
+use logos::{Lexer, Logos};
+use std::iter::Iterator;
+
+/// A basic line lexer type
+///
+/// This lexer distinguishes between comments, and keyword lines. It
+/// does not attempt to parse the line specifics. This is what the
+/// content lexer is for.
+#[derive(Logos, Debug, PartialEq)]
+pub(crate) enum Token {
+ #[token("HEADER")]
+ Header,
+
+ #[token("START")]
+ Start,
+
+ #[token("STOP")]
+ Stop,
+
+ #[token("INVOICE")]
+ Invoice,
+
+ #[regex(r"\w+=[^,$]+[,$]")]
+ HeaderData,
+
+ // FIXME: this will have a leading whitespace that we could remove
+ // with ^\w, but logos does not support this at the moment
+ #[regex(r"[0-9-:+ ]+")]
+ Date,
+
+ #[token(" ", logos::skip)]
+ Space,
+
+ #[regex(";;.*")]
+ Comment,
+
+ #[error]
+ Error,
+}
+
+/// A single token type on a line
+#[derive(Debug)]
+pub(crate) struct LineToken<'l> {
+ pub(crate) tt: Token,
+ pub(crate) slice: &'l str,
+}
+
+/// A lexer wrapped for a single line
+pub(crate) struct LineLexer<'l> {
+ lexer: Lexer<'l, Token>,
+}
+
+impl<'l> LineLexer<'l> {
+ pub(crate) fn get_all(self) -> Vec<LineToken<'l>> {
+ let mut acc = vec![];
+ for l in self {
+ acc.push(l);
+ }
+ acc
+ }
+}
+
+impl<'l> Iterator for LineLexer<'l> {
+ type Item = LineToken<'l>;
+
+ fn next(&mut self) -> Option<Self::Item> {
+ self.lexer.next().map(|tt| Self::Item {
+ tt,
+ slice: self.lexer.slice(),
+ })
+ }
+}
+
+/// Take a line of input and lex it into a stream of tokens
+pub(crate) fn lex<'l>(line: &'l mut String) -> LineLexer<'l> {
+ LineLexer {
+ lexer: Token::lexer(line),
+ }
+}
+
+#[test]
+fn basic_header() {
+ let mut lex = Token::lexer("HEADER version=0.0.0,location=Berlin Lichtenberg,");
+
+ assert_eq!(lex.next(), Some(Token::Header));
+ assert_eq!(lex.span(), 0..6);
+ assert_eq!(lex.slice(), "HEADER");
+
+ assert_eq!(lex.next(), Some(Token::HeaderData));
+ assert_eq!(lex.span(), 7..21);
+ assert_eq!(lex.slice(), "version=0.0.0,");
+
+ assert_eq!(lex.next(), Some(Token::HeaderData));
+ assert_eq!(lex.span(), 21..49);
+ assert_eq!(lex.slice(), "location=Berlin Lichtenberg,");
+
+ assert_eq!(lex.next(), None);
+}
+
+#[test]
+fn basic_start() {
+ let mut lex = Token::lexer("START 2020-11-11 13:00:00+01:00");
+
+ assert_eq!(lex.next(), Some(Token::Start));
+ assert_eq!(lex.span(), 0..5);
+ assert_eq!(lex.slice(), "START");
+
+ assert_eq!(lex.next(), Some(Token::Date));
+ assert_eq!(lex.span(), 5..31);
+ assert_eq!(lex.slice(), " 2020-11-11 13:00:00+01:00");
+
+ assert_eq!(lex.next(), None);
+}
+
+#[test]
+fn basic_stop() {
+ let mut lex = Token::lexer("STOP 2020-11-11 13:00:00+01:00");
+
+ assert_eq!(lex.next(), Some(Token::Stop));
+ assert_eq!(lex.span(), 0..4);
+ assert_eq!(lex.slice(), "STOP");
+
+ assert_eq!(lex.next(), Some(Token::Date));
+ assert_eq!(lex.span(), 4..30);
+ assert_eq!(lex.slice(), " 2020-11-11 13:00:00+01:00");
+
+ assert_eq!(lex.next(), None);
+}
+
+#[test]
+fn basic_invoice() {
+ let mut lex = Token::lexer("INVOICE 2020-11-11 13:00:00+01:00");
+
+ assert_eq!(lex.next(), Some(Token::Invoice));
+ assert_eq!(lex.span(), 0..7);
+ assert_eq!(lex.slice(), "INVOICE");
+
+ assert_eq!(lex.next(), Some(Token::Date));
+ assert_eq!(lex.span(), 7..33);
+ assert_eq!(lex.slice(), " 2020-11-11 13:00:00+01:00");
+
+ assert_eq!(lex.next(), None);
+}
+
+#[test]
+fn basic_comment() {
+ let mut lex = Token::lexer(";; This file is auto generated!");
+ assert_eq!(lex.next(), Some(Token::Comment));
+}
diff --git a/apps/koffice/libko/src/cass/format/mod.rs b/apps/koffice/libko/src/cass/format/mod.rs
new file mode 100644
index 000000000000..2983653898b6
--- /dev/null
+++ b/apps/koffice/libko/src/cass/format/mod.rs
@@ -0,0 +1,76 @@
+//! cassiopeia file format
+
+mod gen;
+pub(crate) mod ir;
+mod lexer;
+mod parser;
+
+pub(crate) use lexer::{LineLexer, LineToken, Token};
+pub(crate) use parser::LineCfg;
+
+use crate::{
+ cass::error::{ParseError, ParseResult},
+ cass::TimeFile,
+};
+use ir::{IrItem, IrStream};
+use std::{
+ fs::{File, OpenOptions},
+ io::{Read, Write},
+};
+
+/// A crate internal representation of the IR stream and timefile
+#[derive(Default)]
+pub(crate) struct ParseOutput {
+ pub(crate) ir: IrStream,
+ pub(crate) tf: TimeFile,
+}
+
+impl ParseOutput {
+ fn append(mut self, ir: IrItem) -> ParseResult<Self> {
+ self.tf.append(ir.clone())?;
+ self.ir.push(ir);
+ Ok(self)
+ }
+}
+
+/// Load a file from disk and parse it into a
+/// [`TimeFile`](crate::TimeFile)
+pub(crate) fn load_file(path: &str) -> ParseResult<ParseOutput> {
+ // Load the raw file contents
+ let mut f = File::open(path)?;
+ let mut content = String::new();
+ f.read_to_string(&mut content)?;
+
+ // Split the file by lines - .cass is a line based format
+ let mut lines: Vec<String> = content.split("\n").map(|l| l.to_owned()).collect();
+
+ // Build an iterator over parsed lines
+ let parsed = lines
+ .iter_mut()
+ .map(|line| lexer::lex(line))
+ .map(|lex| parser::parse(lex));
+
+ // Generate the IR from parse output, then build the timefile
+ ir::generate_ir(parsed)
+ .into_iter()
+ .fold(Ok(ParseOutput::default()), |out, ir| match out {
+ Ok(out) => out.append(ir),
+ e @ Err(_) => e,
+ })
+}
+
+/// Write a file with the updated IR stream
+pub(crate) fn write_file(path: &str, ir: &mut IrStream) -> ParseResult<()> {
+ ir::update_header(ir);
+ let mut lines = ir.into_iter().map(|ir| gen::line(ir)).collect::<Vec<_>>();
+ lines.insert(0, gen::head_comment());
+
+ // let mut f = OpenOptions::new()
+ // .write(true)
+ // .create(true)
+ // .truncate(true)
+ // .open(path)
+ // .ok()?;
+ // f.write_all(lines.join("\n").as_bytes()).ok()?;
+ Ok(())
+}
diff --git a/apps/koffice/libko/src/cass/format/parser.rs b/apps/koffice/libko/src/cass/format/parser.rs
new file mode 100644
index 000000000000..8e0602d440d2
--- /dev/null
+++ b/apps/koffice/libko/src/cass/format/parser.rs
@@ -0,0 +1,73 @@
+//! cassiopeia parser
+//!
+//! Takes a lexer's token stream as an input, and outputs a fully
+//! parsed time file.
+
+use crate::cass::format::{LineLexer, LineToken, Token};
+use chrono::{DateTime, FixedOffset as Offset, NaiveDate};
+use std::collections::BTreeMap;
+use std::iter::Iterator;
+
+/// A type-parsed line in a time file
+#[derive(Debug)]
+pub enum LineCfg {
+ /// A header line with a set of keys and values
+ Header(BTreeMap<String, String>),
+ /// A session start line with a date and time
+ Start(Option<DateTime<Offset>>),
+ /// A session stop line with a date and time
+ Stop(Option<DateTime<Offset>>),
+ /// An invoice line with a date
+ Invoice(Option<NaiveDate>),
+ /// A temporary value that is invalid
+ #[doc(hidden)]
+ Ignore,
+}
+
+pub(crate) fn parse<'l>(lex: LineLexer<'l>) -> LineCfg {
+ use LineCfg::*;
+ use Token as T;
+
+ #[cfg_attr(rustfmt, rustfmt_skip)]
+ lex.get_all().into_iter().fold(Ignore, |cfg, tok| match (cfg, tok) {
+ // If the first token is a comment, we ignore it
+ (Ignore, LineToken { tt: T::Comment, .. }, ) => Ignore,
+ // If the first token is a keyword, we wait for more data
+ (Ignore, LineToken { tt: T::Header, .. }) => Header(Default::default()),
+ (Ignore, LineToken { tt: T::Start, .. }) => Start(None),
+ (Ignore, LineToken { tt: T::Stop, .. }) => Stop(None),
+ (Ignore, LineToken { tt: T::Invoice, .. }) => Invoice(None),
+
+ // If the first token _was_ a keyword, fill in the data
+ (Header(map), LineToken { tt: T::HeaderData, slice }) => Header(append_data(map, slice)),
+ (Start(_), LineToken { tt: T::Date, slice }) => Start(parse_datetime(slice)),
+ (Stop(_), LineToken { tt: T::Date, slice }) => Stop(parse_datetime(slice)),
+ (Invoice(_), LineToken { tt: T::Date, slice }) => Invoice(parse_date(slice)),
+
+ // Pass empty lines through,
+ (empty, _) => empty,
+
+ // Ignore everything else (which will be filtered)
+ _ => Ignore,
+ })
+}
+
+fn append_data(mut map: BTreeMap<String, String>, slice: &str) -> BTreeMap<String, String> {
+ let split = slice.split("=").collect::<Vec<_>>();
+ map.insert(split[0].into(), split[1].into());
+ map
+}
+
+fn parse_datetime(slice: &str) -> Option<DateTime<Offset>> {
+ Some(
+ DateTime::parse_from_str(slice, "%Y-%m-%d %H:%M:%S%:z")
+ .expect("Failed to parse date; invalid format!"),
+ )
+}
+
+fn parse_date(slice: &str) -> Option<NaiveDate> {
+ Some(
+ NaiveDate::parse_from_str(slice, "%Y-%m-%d")
+ .expect("Failed to parse date; invalid format!"),
+ )
+}