diff options
Diffstat (limited to 'apps/koffice/libko/src/cass/format')
-rw-r--r-- | apps/koffice/libko/src/cass/format/gen.rs | 32 | ||||
-rw-r--r-- | apps/koffice/libko/src/cass/format/ir.rs | 99 | ||||
-rw-r--r-- | apps/koffice/libko/src/cass/format/lexer.rs | 151 | ||||
-rw-r--r-- | apps/koffice/libko/src/cass/format/mod.rs | 76 | ||||
-rw-r--r-- | apps/koffice/libko/src/cass/format/parser.rs | 73 |
5 files changed, 431 insertions, 0 deletions
diff --git a/apps/koffice/libko/src/cass/format/gen.rs b/apps/koffice/libko/src/cass/format/gen.rs new file mode 100644 index 000000000000..f77bcdc90e84 --- /dev/null +++ b/apps/koffice/libko/src/cass/format/gen.rs @@ -0,0 +1,32 @@ +//! Cassiopeia line generator +//! +//! This module takes a set of IR lines, and generates strings from +//! them that are in accordance with the way that the parser of the +//! same version expects them. + +use crate::cass::format::ir::{IrItem, IrType}; + +/// Take a line of IR and generate a string to print into a file +pub(crate) fn line(ir: &IrItem) -> String { + let IrItem { tt, lo } = ir; + match tt { + IrType::Ignore => "".into(), + IrType::Header(map) => format!( + "HEADER {}", + map.iter() + .map(|(k, v)| format!("{}={},", k, v)) + .collect::<Vec<_>>() + .join("") + ), + IrType::Start(time) => format!("START {}", time.to_string()), + + // FIXME: find a better way to align the lines here rather + // than having to manually having to pad the 'STOP' commands + IrType::Stop(time) => format!("STOP {}", time.to_string()), + IrType::Invoice(date) => format!("INVOICE {}", date.to_string()), + } +} + +pub(crate) fn head_comment() -> String { + ";; generated by cassiopeia, be careful about editing by hand!".into() +} diff --git a/apps/koffice/libko/src/cass/format/ir.rs b/apps/koffice/libko/src/cass/format/ir.rs new file mode 100644 index 000000000000..d1a3a62c1508 --- /dev/null +++ b/apps/koffice/libko/src/cass/format/ir.rs @@ -0,0 +1,99 @@ +use crate::cass::{format::LineCfg, Date, Time, TimeFile}; +use std::collections::BTreeMap; + +/// A set of IR parsed items that makes up a whole cass file +pub(crate) type IrStream = Vec<IrItem>; + +/// Intermediate representation for parsing and generating files +/// +/// The CASS IR is largely based on the output of the parser's +/// [`LineCfg`](crate::format::LineCfg), but with concrete types used +/// in the data layer (namely [`Date`][date] and [`Time`][time]), +/// while also keeping track of the line numbers to allow idempotent +/// file changes. +/// +/// Something not yet implemented is comment pass-through (this needs +/// to happen in the parser first), but will likely be implemented in +/// a future version. +/// +/// [date]: crate::Date +/// [time]: crate::Time +#[derive(Debug, Clone)] +pub(crate) struct IrItem { + pub(crate) tt: IrType, + pub(crate) lo: usize, +} + +/// Disambiguate between different IR line types with their payload +#[derive(Debug, Clone)] +pub(crate) enum IrType { + /// A line with parsed header information + Header(BTreeMap<String, String>), + /// Start a session at a given timestapm + Start(Time), + /// Stop a session at a given timestamp + Stop(Time), + /// Invoice a block of previous work + Invoice(Date), + /// An item that gets ignored + Ignore, +} + +/// Generate a stream of IR items from the raw parser output +pub(crate) fn generate_ir(buf: impl Iterator<Item = LineCfg>) -> IrStream { + buf.enumerate().fold(vec![], |mut buf, (lo, item)| { + #[cfg_attr(rustfmt, rustfmt_skip)] + buf.push(match item { + LineCfg::Header(map) => IrItem { tt: IrType::Header(map.into_iter().map(|(k, v)| (k, v.replace(",", ""))).collect()), lo }, + LineCfg::Start(Some(time)) => IrItem { tt: IrType::Start(time.into()), lo }, + LineCfg::Stop(Some(time)) => IrItem { tt: IrType::Stop(time.into()), lo }, + LineCfg::Invoice(Some(date)) => IrItem { tt: IrType::Invoice(date.into()), lo }, + LineCfg::Ignore => IrItem { tt: IrType::Ignore, lo }, + _ => IrItem { tt: IrType::Ignore, lo }, + }); + + buf + }) +} + +pub(crate) trait MakeIr { + /// Make a new IR line from an object + fn make_ir(&self) -> IrType; +} + +pub(crate) fn clean_ir(ir: &mut IrStream) { + ir.remove(0); // FIXME: this is required to remove the leading + // comment, which will be manually re-generated at + // the moment, but which would just add more blank + // lines between the new comment, and the first line + // in this current format. This is very bad, yikes + // yikes yikes, but what can I do, I have a deadline + // (not really) lol + + // FIXME: this hack gets rid of a trailing empty line if it exists + // to make sure we don't have any gaps between work sessions. + if match ir.last() { + Some(IrItem { + tt: IrType::Ignore, .. + }) => true, + _ => false, + } { + ir.pop(); + } +} + +/// Taken an IrType and append it to an existing IR stream +pub(crate) fn append_ir(ir: &mut IrStream, tt: IrType) { + let lo = ir.last().unwrap().lo; + ir.push(IrItem { tt, lo }); +} + +/// Search for the header that contains the version string and update it +pub(crate) fn update_header(ir: &mut IrStream) { + ir.iter_mut().for_each(|item| match item.tt { + IrType::Header(ref mut map) if map.contains_key("version") => { + map.insert("version".into(), crate::cass::meta::VERSION.into()); + } + _ => {} + }); +} diff --git a/apps/koffice/libko/src/cass/format/lexer.rs b/apps/koffice/libko/src/cass/format/lexer.rs new file mode 100644 index 000000000000..bdb89f5180e5 --- /dev/null +++ b/apps/koffice/libko/src/cass/format/lexer.rs @@ -0,0 +1,151 @@ +//! Cassiopeia file lexer + +use logos::{Lexer, Logos}; +use std::iter::Iterator; + +/// A basic line lexer type +/// +/// This lexer distinguishes between comments, and keyword lines. It +/// does not attempt to parse the line specifics. This is what the +/// content lexer is for. +#[derive(Logos, Debug, PartialEq)] +pub(crate) enum Token { + #[token("HEADER")] + Header, + + #[token("START")] + Start, + + #[token("STOP")] + Stop, + + #[token("INVOICE")] + Invoice, + + #[regex(r"\w+=[^,$]+[,$]")] + HeaderData, + + // FIXME: this will have a leading whitespace that we could remove + // with ^\w, but logos does not support this at the moment + #[regex(r"[0-9-:+ ]+")] + Date, + + #[token(" ", logos::skip)] + Space, + + #[regex(";;.*")] + Comment, + + #[error] + Error, +} + +/// A single token type on a line +#[derive(Debug)] +pub(crate) struct LineToken<'l> { + pub(crate) tt: Token, + pub(crate) slice: &'l str, +} + +/// A lexer wrapped for a single line +pub(crate) struct LineLexer<'l> { + lexer: Lexer<'l, Token>, +} + +impl<'l> LineLexer<'l> { + pub(crate) fn get_all(self) -> Vec<LineToken<'l>> { + let mut acc = vec![]; + for l in self { + acc.push(l); + } + acc + } +} + +impl<'l> Iterator for LineLexer<'l> { + type Item = LineToken<'l>; + + fn next(&mut self) -> Option<Self::Item> { + self.lexer.next().map(|tt| Self::Item { + tt, + slice: self.lexer.slice(), + }) + } +} + +/// Take a line of input and lex it into a stream of tokens +pub(crate) fn lex<'l>(line: &'l mut String) -> LineLexer<'l> { + LineLexer { + lexer: Token::lexer(line), + } +} + +#[test] +fn basic_header() { + let mut lex = Token::lexer("HEADER version=0.0.0,location=Berlin Lichtenberg,"); + + assert_eq!(lex.next(), Some(Token::Header)); + assert_eq!(lex.span(), 0..6); + assert_eq!(lex.slice(), "HEADER"); + + assert_eq!(lex.next(), Some(Token::HeaderData)); + assert_eq!(lex.span(), 7..21); + assert_eq!(lex.slice(), "version=0.0.0,"); + + assert_eq!(lex.next(), Some(Token::HeaderData)); + assert_eq!(lex.span(), 21..49); + assert_eq!(lex.slice(), "location=Berlin Lichtenberg,"); + + assert_eq!(lex.next(), None); +} + +#[test] +fn basic_start() { + let mut lex = Token::lexer("START 2020-11-11 13:00:00+01:00"); + + assert_eq!(lex.next(), Some(Token::Start)); + assert_eq!(lex.span(), 0..5); + assert_eq!(lex.slice(), "START"); + + assert_eq!(lex.next(), Some(Token::Date)); + assert_eq!(lex.span(), 5..31); + assert_eq!(lex.slice(), " 2020-11-11 13:00:00+01:00"); + + assert_eq!(lex.next(), None); +} + +#[test] +fn basic_stop() { + let mut lex = Token::lexer("STOP 2020-11-11 13:00:00+01:00"); + + assert_eq!(lex.next(), Some(Token::Stop)); + assert_eq!(lex.span(), 0..4); + assert_eq!(lex.slice(), "STOP"); + + assert_eq!(lex.next(), Some(Token::Date)); + assert_eq!(lex.span(), 4..30); + assert_eq!(lex.slice(), " 2020-11-11 13:00:00+01:00"); + + assert_eq!(lex.next(), None); +} + +#[test] +fn basic_invoice() { + let mut lex = Token::lexer("INVOICE 2020-11-11 13:00:00+01:00"); + + assert_eq!(lex.next(), Some(Token::Invoice)); + assert_eq!(lex.span(), 0..7); + assert_eq!(lex.slice(), "INVOICE"); + + assert_eq!(lex.next(), Some(Token::Date)); + assert_eq!(lex.span(), 7..33); + assert_eq!(lex.slice(), " 2020-11-11 13:00:00+01:00"); + + assert_eq!(lex.next(), None); +} + +#[test] +fn basic_comment() { + let mut lex = Token::lexer(";; This file is auto generated!"); + assert_eq!(lex.next(), Some(Token::Comment)); +} diff --git a/apps/koffice/libko/src/cass/format/mod.rs b/apps/koffice/libko/src/cass/format/mod.rs new file mode 100644 index 000000000000..2983653898b6 --- /dev/null +++ b/apps/koffice/libko/src/cass/format/mod.rs @@ -0,0 +1,76 @@ +//! cassiopeia file format + +mod gen; +pub(crate) mod ir; +mod lexer; +mod parser; + +pub(crate) use lexer::{LineLexer, LineToken, Token}; +pub(crate) use parser::LineCfg; + +use crate::{ + cass::error::{ParseError, ParseResult}, + cass::TimeFile, +}; +use ir::{IrItem, IrStream}; +use std::{ + fs::{File, OpenOptions}, + io::{Read, Write}, +}; + +/// A crate internal representation of the IR stream and timefile +#[derive(Default)] +pub(crate) struct ParseOutput { + pub(crate) ir: IrStream, + pub(crate) tf: TimeFile, +} + +impl ParseOutput { + fn append(mut self, ir: IrItem) -> ParseResult<Self> { + self.tf.append(ir.clone())?; + self.ir.push(ir); + Ok(self) + } +} + +/// Load a file from disk and parse it into a +/// [`TimeFile`](crate::TimeFile) +pub(crate) fn load_file(path: &str) -> ParseResult<ParseOutput> { + // Load the raw file contents + let mut f = File::open(path)?; + let mut content = String::new(); + f.read_to_string(&mut content)?; + + // Split the file by lines - .cass is a line based format + let mut lines: Vec<String> = content.split("\n").map(|l| l.to_owned()).collect(); + + // Build an iterator over parsed lines + let parsed = lines + .iter_mut() + .map(|line| lexer::lex(line)) + .map(|lex| parser::parse(lex)); + + // Generate the IR from parse output, then build the timefile + ir::generate_ir(parsed) + .into_iter() + .fold(Ok(ParseOutput::default()), |out, ir| match out { + Ok(out) => out.append(ir), + e @ Err(_) => e, + }) +} + +/// Write a file with the updated IR stream +pub(crate) fn write_file(path: &str, ir: &mut IrStream) -> ParseResult<()> { + ir::update_header(ir); + let mut lines = ir.into_iter().map(|ir| gen::line(ir)).collect::<Vec<_>>(); + lines.insert(0, gen::head_comment()); + + // let mut f = OpenOptions::new() + // .write(true) + // .create(true) + // .truncate(true) + // .open(path) + // .ok()?; + // f.write_all(lines.join("\n").as_bytes()).ok()?; + Ok(()) +} diff --git a/apps/koffice/libko/src/cass/format/parser.rs b/apps/koffice/libko/src/cass/format/parser.rs new file mode 100644 index 000000000000..8e0602d440d2 --- /dev/null +++ b/apps/koffice/libko/src/cass/format/parser.rs @@ -0,0 +1,73 @@ +//! cassiopeia parser +//! +//! Takes a lexer's token stream as an input, and outputs a fully +//! parsed time file. + +use crate::cass::format::{LineLexer, LineToken, Token}; +use chrono::{DateTime, FixedOffset as Offset, NaiveDate}; +use std::collections::BTreeMap; +use std::iter::Iterator; + +/// A type-parsed line in a time file +#[derive(Debug)] +pub enum LineCfg { + /// A header line with a set of keys and values + Header(BTreeMap<String, String>), + /// A session start line with a date and time + Start(Option<DateTime<Offset>>), + /// A session stop line with a date and time + Stop(Option<DateTime<Offset>>), + /// An invoice line with a date + Invoice(Option<NaiveDate>), + /// A temporary value that is invalid + #[doc(hidden)] + Ignore, +} + +pub(crate) fn parse<'l>(lex: LineLexer<'l>) -> LineCfg { + use LineCfg::*; + use Token as T; + + #[cfg_attr(rustfmt, rustfmt_skip)] + lex.get_all().into_iter().fold(Ignore, |cfg, tok| match (cfg, tok) { + // If the first token is a comment, we ignore it + (Ignore, LineToken { tt: T::Comment, .. }, ) => Ignore, + // If the first token is a keyword, we wait for more data + (Ignore, LineToken { tt: T::Header, .. }) => Header(Default::default()), + (Ignore, LineToken { tt: T::Start, .. }) => Start(None), + (Ignore, LineToken { tt: T::Stop, .. }) => Stop(None), + (Ignore, LineToken { tt: T::Invoice, .. }) => Invoice(None), + + // If the first token _was_ a keyword, fill in the data + (Header(map), LineToken { tt: T::HeaderData, slice }) => Header(append_data(map, slice)), + (Start(_), LineToken { tt: T::Date, slice }) => Start(parse_datetime(slice)), + (Stop(_), LineToken { tt: T::Date, slice }) => Stop(parse_datetime(slice)), + (Invoice(_), LineToken { tt: T::Date, slice }) => Invoice(parse_date(slice)), + + // Pass empty lines through, + (empty, _) => empty, + + // Ignore everything else (which will be filtered) + _ => Ignore, + }) +} + +fn append_data(mut map: BTreeMap<String, String>, slice: &str) -> BTreeMap<String, String> { + let split = slice.split("=").collect::<Vec<_>>(); + map.insert(split[0].into(), split[1].into()); + map +} + +fn parse_datetime(slice: &str) -> Option<DateTime<Offset>> { + Some( + DateTime::parse_from_str(slice, "%Y-%m-%d %H:%M:%S%:z") + .expect("Failed to parse date; invalid format!"), + ) +} + +fn parse_date(slice: &str) -> Option<NaiveDate> { + Some( + NaiveDate::parse_from_str(slice, "%Y-%m-%d") + .expect("Failed to parse date; invalid format!"), + ) +} |