From e30713b84bc9e66f7a8e8d2f51e953472cac28e4 Mon Sep 17 00:00:00 2001 From: Katharina Fey Date: Mon, 22 Jun 2020 06:23:04 +0200 Subject: Committing all the libgit2 progress before throwing it away I don't think libgit2 is the way forward to make any of this work. There's so much work involved in parsing the git k-v store, and the library itself is essentially of zero help for most of the heavy lifting. --- src/git/log.rs | 61 ++++++++++++++++++++++++ src/git/mod.rs | 5 ++ src/git/repo.rs | 19 ++++++++ src/git/tree.rs | 144 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 229 insertions(+) create mode 100644 src/git/log.rs create mode 100644 src/git/mod.rs create mode 100644 src/git/repo.rs create mode 100644 src/git/tree.rs (limited to 'src/git') diff --git a/src/git/log.rs b/src/git/log.rs new file mode 100644 index 0000000..dab9e69 --- /dev/null +++ b/src/git/log.rs @@ -0,0 +1,61 @@ +//! libgit2 log parsing + +use crate::git::{self, tree::FileNode}; +use git2::{Oid, Repository}; +use std::collections::{BTreeMap, BTreeSet}; + +/// A file-commit referenced graph thing +/// +/// git is _weird_! It's essentially just a glorified key-value store +/// and it shows. There's no utilities to figure out how thing are +/// related, and all the actual graph things in git are sugar on top +/// of this store. +/// +/// In order to make sense of anything in a repo we need to quite +/// heavily parse the log. This type here is the result of this +/// parsing: you can ask it smart questions like "when did this file +/// change" and it will tell you (sort of). +#[derive(Default)] +pub(crate) struct CommitGraph { + order: Vec, + file_refs: BTreeMap>, + commit_refs: BTreeMap, +} + +pub(crate) struct CommitNode { + id: String, + author: String, + touches: BTreeSet, + date: String, +} + +fn build_diff_log(repo: &Repository, log: Vec<(String, Vec)>) -> Vec { + todo!() +} + +/// Walk through all commits from a given ref and build a commit graph +pub(crate) fn create_commit_log(id: String, repo: &Repository) -> CommitGraph { + let mut walker = repo.revwalk().unwrap(); + walker.push(Oid::from_str(id.as_str()).unwrap()).unwrap(); + let mut v = walker + .into_iter() + .map(|oid| { + let oid = oid.unwrap(); + repo.find_commit(oid).unwrap() + }) + .collect::>(); + v.reverse(); + + let log: Vec<_> = v + .into_iter() + .map(|commit| { + let id = format!("{}", commit.id()); + let tree_u = git::repo::get_tree(&repo, id.as_str()); + let tree = git::tree::parse_tree(tree_u, &repo); + (id, tree.flatten()) + }) + .collect(); + + let diffs = build_diff_log(&repo, log); + todo!() +} diff --git a/src/git/mod.rs b/src/git/mod.rs new file mode 100644 index 0000000..1cd9057 --- /dev/null +++ b/src/git/mod.rs @@ -0,0 +1,5 @@ +//! Wrappers for libgit2 + +pub mod log; +pub mod repo; +pub mod tree; diff --git a/src/git/repo.rs b/src/git/repo.rs new file mode 100644 index 0000000..0d04b0d --- /dev/null +++ b/src/git/repo.rs @@ -0,0 +1,19 @@ +use crate::templ_data::repo::RepoData; +use git2::{Oid, Repository, Tree}; + +/// Represents a repo in libgit2 +pub(crate) struct Repo { + pub(crate) inner: Repository, +} + +impl Repo { + pub(crate) fn new(path: &str) -> Self { + Self { + inner: Repository::open(path).expect(&format!("`{}` is not a valid git repo", path)), + } + } +} + +pub(crate) fn get_tree<'r>(repo: &'r Repository, rev: &str) -> Tree<'r> { + repo.revparse_single(rev).unwrap().peel_to_tree().unwrap() +} diff --git a/src/git/tree.rs b/src/git/tree.rs new file mode 100644 index 0000000..457eb40 --- /dev/null +++ b/src/git/tree.rs @@ -0,0 +1,144 @@ +//! Tree handling utilities +//! +//! The way that libgit2 handles trees is super low-level and overkill +//! for what we need. In this module we knock it down a notch or two. +//! +//! This code takes a tree returned by +//! `crate::git::repo::Repo::get_tree()`, and transforms it into a +//! `TreeData` type that the template engine can render. + +use crate::templ_data::repo::{CommitData, FileData, TreeData}; +use git2::{self, ObjectType, TreeWalkMode}; +use std::collections::BTreeMap; + +/// A cache of a repository tree +#[derive(Default, Debug, Clone)] +pub(crate) struct Tree { + inner: BTreeMap, +} + +impl Tree { + /// Insert a node into a subtree with it's full path + fn insert_to_subtree(&mut self, mut path: Vec, name: String, node: TreeNode) { + // If we are given a path, resolve it first + let curr = if path.len() > 0 { + let rest = path.split_off(1); + let mut curr = self.inner.get_mut(&path[0]).unwrap(); + + for dir in rest { + match curr { + TreeNode::Dir(ref mut d) => { + curr = d.children.inner.get_mut(&dir).unwrap(); + } + _ => panic!("Not a tree!"), + } + } + + match curr { + TreeNode::Dir(ref mut d) => &mut d.children, + TreeNode::File(_) => panic!("Not a tree!"), + } + } else { + // If no path was given, we assume the root is meant + self + }; + + curr.inner.insert(name, node); + } + + /// Walk through the tree and only return filenode objects + pub(crate) fn flatten(&self) -> Vec { + self.inner.values().fold(vec![], |mut vec, node| { + match node { + TreeNode::File(f) => vec.push(f.clone()), + TreeNode::Dir(d) => vec.append(&mut d.children.flatten()), + } + + vec + }) + } +} + +#[derive(Clone, Debug)] +pub(crate) enum TreeNode { + File(FileNode), + Dir(DirNode), +} + +impl TreeNode { + fn name(&self) -> String { + match self { + Self::File(f) => f.name.clone(), + Self::Dir(d) => d.name.clone(), + } + } +} + +#[derive(Clone, Debug)] +pub(crate) struct FileNode { + pub id: String, + pub path: Vec, + pub name: String, +} + +#[derive(Clone, Debug)] +pub(crate) struct DirNode { + pub path: Vec, + pub name: String, + pub children: Tree, +} + +impl DirNode { + fn append(&mut self, node: TreeNode) { + self.children.inner.insert(node.name(), node); + } +} + +/// Take a series of path-segments and render a tree at that location +pub(crate) fn parse_tree(tree: git2::Tree, repo: &git2::Repository) -> Tree { + let mut root = Tree::default(); + + tree.walk(TreeWalkMode::PreOrder, |path, entry| { + let path: Vec = path + .split("/") + .filter_map(|seg| match seg { + "" => None, + val => Some(val.into()), + }) + .collect(); + let name = entry.name().unwrap().to_string(); + + match entry.kind() { + // For every tree in the tree we create a new TreeNode with the path we know about + Some(ObjectType::Tree) => { + root.insert_to_subtree( + path.clone(), + name.clone(), + TreeNode::Dir(DirNode { + path, + name, + children: Tree::default(), + }), + ); + } + // If we encounter a blob, this is a file that we can simply insert into the tree + Some(ObjectType::Blob) => { + root.insert_to_subtree( + path.clone(), + name.clone(), + TreeNode::File(FileNode { + id: format!("{}", entry.id()), + path, + name, + }), + ); + } + _ => {} + } + + 0 + }) + .unwrap(); + + root +} -- cgit v1.2.3