aboutsummaryrefslogtreecommitdiff
path: root/apps
diff options
context:
space:
mode:
authorKaiden Fey <kookie@spacekookie.de>2020-11-09 20:55:54 +0100
committerMx Kookie <kookie@spacekookie.de>2020-12-21 05:19:47 +0100
commit948a4a76c14956df7ed38172abd0e2469ca7ffa8 (patch)
tree6bda1034109c3a62fc482f0fe9ba85a3ee5cd0ec /apps
parent2a180cc038d39e58a2aa99075695e01d4013b69b (diff)
supergit: implementing tree parsing
This implementation is a bit weird, especially because it changes the API from what it was previously. This works, for now, but some of the relationships between types feel a bit bad. Especially that all queries have to go via the FileTree, and we can't just give out objects that represent some part of the tree that are then loaded when needed. For now this will work though. What's still missing is to turn a Yield::Dir into a new FileTree.
Diffstat (limited to 'apps')
-rw-r--r--apps/servers/octopus/supergit/src/bin/test.rs3
-rw-r--r--apps/servers/octopus/supergit/src/branch.rs50
-rw-r--r--apps/servers/octopus/supergit/src/commit.rs6
-rw-r--r--apps/servers/octopus/supergit/src/files.rs177
-rw-r--r--apps/servers/octopus/supergit/src/lib.rs8
-rw-r--r--apps/servers/octopus/supergit/src/repo.rs32
6 files changed, 204 insertions, 72 deletions
diff --git a/apps/servers/octopus/supergit/src/bin/test.rs b/apps/servers/octopus/supergit/src/bin/test.rs
index c4fcc2fbb3a8..830c8b62f136 100644
--- a/apps/servers/octopus/supergit/src/bin/test.rs
+++ b/apps/servers/octopus/supergit/src/bin/test.rs
@@ -22,5 +22,6 @@ fn main() {
let head = main.get_head();
let tree = head.get_tree();
-
+
+ println!("{:?}", tree.load(""));
}
diff --git a/apps/servers/octopus/supergit/src/branch.rs b/apps/servers/octopus/supergit/src/branch.rs
index dd92aea93cba..432227900247 100644
--- a/apps/servers/octopus/supergit/src/branch.rs
+++ b/apps/servers/octopus/supergit/src/branch.rs
@@ -4,7 +4,26 @@ use std::{mem, sync::Arc};
/// Abstraction for a branch history slice
///
+/// Git implements an acyclical graph, where branches can be split,
+/// and re-merge later. Traversal always happens from some point
+/// onwards, backwards through the history. Because git repositories
+/// can get quite large and this is a recursive process, it's very
+/// quickly possible to overflow your program stack. To avoid this,
+/// `supergit` uses an iterator design to enumerate commits.
///
+/// Use the API on this type to specify your starting point. By
+/// default, it will be the head of the branch you are looking at.
+/// Note: not all branches have names!
+///
+/// After creating a `BranchIter` you can then call `next()` on it,
+/// yielding `BranchCommit` objects. These can either be single
+/// commits, or various types of merge commits. Each merge commit
+/// yields some set of `Branch` handles, that you can either traverse
+/// by building another `BranchIter`.
+///
+/// A branch iterator is therefore always first-parent, meaning that
+/// merged branches can simply be ignored by only ever inspecting the
+/// current `Commit` contained by a `BranchCommit`.
#[derive(Clone)]
pub struct Branch {
repo: Arc<Repository>,
@@ -56,6 +75,7 @@ impl Branch {
}
}
+ /// Create a branch iterator that stops when reaching a commit
pub fn get_to(&self, commit: HashId) -> BranchIter {
BranchIter::new(
Arc::clone(&self.repo),
@@ -64,12 +84,10 @@ impl Branch {
)
}
- /// Get the primary branch history as far back as it goes
- pub fn get_all(&self) -> BranchIter {
- BranchIter::new(Arc::clone(&self.repo), self.head.clone(), SegLimit::None)
- }
-
- /// Get a branch segment of a certain length
+ /// Create a step-limited branch iterator
+ ///
+ /// This type of iterator is especially useful when combined with
+ /// `skip()`, to create a paginated view onto commits.
pub fn get(&self, num: usize) -> BranchIter {
BranchIter::new(
Arc::clone(&self.repo),
@@ -78,7 +96,17 @@ impl Branch {
)
}
- /// Get the commit pointed at by HEAD
+ /// Create an endless branch iterator
+ ///
+ /// While the creation of the iterator is instantanious, actually
+ /// enumerating all commits in a repository can be quite
+ /// computationally intensive and is almost never what you
+ /// actually want.
+ pub fn get_all(&self) -> BranchIter {
+ BranchIter::new(Arc::clone(&self.repo), self.head.clone(), SegLimit::None)
+ }
+
+ /// Get the current HEAD commit
pub fn get_head(&self) -> Commit {
Commit::new(&self.repo, self.head.clone()).unwrap()
}
@@ -89,10 +117,12 @@ impl Branch {
}
}
-/// A branch segment iterator
+/// A branch slice iterator, created via `Branch` handle
///
-/// Each iterator is first-parent, but will notify you about a split
-/// parent by setting
+/// This iterator yields `BranchCommit` objects, that can either be
+/// simple commits, or various types of merge commits with new Branch
+/// handles. This means that without explicitly branching, this
+/// iterator is first-parent.
pub struct BranchIter {
repo: Arc<Repository>,
curr: Option<HashId>,
diff --git a/apps/servers/octopus/supergit/src/commit.rs b/apps/servers/octopus/supergit/src/commit.rs
index 14f2d9bafdc1..bc7383d1ed6d 100644
--- a/apps/servers/octopus/supergit/src/commit.rs
+++ b/apps/servers/octopus/supergit/src/commit.rs
@@ -26,7 +26,7 @@ impl Commit {
self.id.to_string()
}
- /// Get the summary line as a utf-7 string
+ /// Get the summary line as a utf-8 string
pub fn summary(&self) -> String {
self.find().summary().unwrap().into()
}
@@ -55,6 +55,10 @@ impl Commit {
.and_then(|c| Self::new(&self.repo, c.id().into()))
}
+ /// Get the set of parents as a vector
+ ///
+ /// Use this function if you suspect a commit has more than one
+ /// parent.
pub fn parents(&self) -> Vec<Commit> {
self.find()
.parents()
diff --git a/apps/servers/octopus/supergit/src/files.rs b/apps/servers/octopus/supergit/src/files.rs
index d86a82306eef..fa68fbc2f3dc 100644
--- a/apps/servers/octopus/supergit/src/files.rs
+++ b/apps/servers/octopus/supergit/src/files.rs
@@ -1,13 +1,13 @@
use crate::{Branch, BranchIter, Commit, HashId};
-use git2::{ObjectType, TreeWalkMode, TreeWalkResult};
use atomptr::AtomPtr;
+use git2::{ObjectType, TreeWalkMode, TreeWalkResult};
use std::collections::BTreeMap;
use std::{path::PathBuf, sync::Arc};
/// A tree of files
pub struct FileTree {
repo: Arc<git2::Repository>,
- tree: AtomPtr<BTreeMap<String, TreeEntry>>,
+ tree: AtomPtr<BTreeMap<String, Arc<TreeEntry>>>,
}
impl FileTree {
@@ -23,39 +23,78 @@ impl FileTree {
/// Parse a tree from a specific commit
pub(crate) fn parse(self: Arc<Self>, commit: HashId) -> Arc<Self> {
let mut new_tree = BTreeMap::new();
-
+
let tree = (&self.repo)
.find_commit(commit.to_oid())
.unwrap()
.tree()
.unwrap();
- tree.walk(TreeWalkMode::PreOrder, |what, entry| {
- let path_segs: Vec<_> = what.split("/").filter(|s| s != &"").collect();
+ tree.walk(TreeWalkMode::PreOrder, |p, entry| {
+ let path_segs: Vec<_> = p.split("/").filter(|s| s != &"").collect();
let path = if path_segs.len() == 0 {
None
} else {
Some(path_segs)
};
- println!("{:?} {}", path, entry.name().unwrap());
+ let te = TreeEntry::generate(path, entry);
+ new_tree.insert(te.path(), Arc::new(te));
TreeWalkResult::Ok
})
.unwrap();
+
+ // Add a special entry for the root of the repo
+ new_tree.insert(
+ "".into(),
+ Arc::new(TreeEntry::Dir(Directory {
+ id: tree.id().into(),
+ path: "".into(),
+ name: "".into(),
+ })),
+ );
+
+ // This is needed to make borrowchk shut up
drop(tree);
// Atomicly swap new tree into place
self.tree.swap(new_tree);
-
+
self
}
+
+ fn get_entry(&self, path: &str) -> Option<Arc<TreeEntry>> {
+ self.tree.get_ref().get(path).map(|e| Arc::clone(&e))
+ }
+
+ /// Load a file entry in this `FileTree` from disk
+ ///
+ /// When calling this function on a directory, nothing will happen
+ /// (returns `None`), because directories can't be loaded. If you
+ /// want to get a list of children for a directory, use
+ /// [`FileTree::enumerate()`]() instead!
+ pub fn load(&self, path: &str) -> Option<Yield> {
+ self.get_entry(path).and_then(|e| e.load(&self.repo))
+ }
}
-/// An entry in a file tree
+/// Data yielded from loading a part of the file tree
+///
+/// This type is returned when fetching a path via `FileTree::load()`,
+/// and can either be a single file read into memory, or an
+/// enumeration of direct children of a directory.
///
-/// It's variants can either be a file (leaf), or a subtree, with it's
-/// own path handles, and children.
-pub enum TreeEntry {
+/// To get all children of a subtree, use `Yield::into_tree()` to
+/// create a new, recursive `FileTree` to enumerate.
+#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)]
+pub enum Yield {
+ /// Load a single file into a buffer
+ File(Vec<u8>),
+ /// Enumerate children in a directory
+ Dir(Vec<String>),
+}
+
+enum TreeEntry {
/// A single file
File(File),
/// A sub-tree
@@ -63,74 +102,114 @@ pub enum TreeEntry {
}
impl TreeEntry {
- /// Create a tree entry from a path and `git2::TreeEntry`
- fn generate(root: PathBuf, path_segments: Option<Vec<String>>, entry: git2::TreeEntry) -> Self {
+ fn generate(path_segments: Option<Vec<&str>>, entry: &git2::TreeEntry) -> Self {
let path = path_segments.map_or("".into(), |p| path_segs_join(p));
+ let id = entry.id().into();
+ let name = entry.name().unwrap().into();
match entry.kind() {
- Some(ObjectType::Blob) => Self::File(File::new(root, path)),
- Some(ObjectType::Tree) => Self::Dir(Directory::new(root, path)),
+ Some(ObjectType::Blob) => Self::File(File::new(id, path, name)),
+ Some(ObjectType::Tree) => Self::Dir(Directory::new(id, path, name)),
_ => unimplemented!(),
}
}
- /// Load this tree entry from disk, if it is a file
- ///
- /// When calling this function on a directory, nothing will
- /// happen, because directories can't be loaded. If you want to
- /// get a list of children for a directory, use
- /// [`FileTree::enumerate()`]() instead!
- pub fn load(&self) -> Option<Vec<u8>> {
- if !self.is_file() {
- return None;
- }
+ fn load(&self, repo: &Arc<git2::Repository>) -> Option<Yield> {
+ let id = self.id();
- let obj =
+ match self {
+ Self::File(ref f) => repo
+ .find_blob(id.into())
+ .ok()
+ .map(|b| Yield::File(b.content().into())),
+ Self::Dir(ref d) => repo
+ .find_tree(id.into())
+ .ok()
+ .map(|tree| {
+ let mut children = vec![];
+
+ // Iterate the tree, but only as long as there are no
+ // additional path segments
+ tree.walk(TreeWalkMode::PreOrder, |p, entry| {
+ let path_segs: Vec<_> = p.split("/").filter(|s| s != &"").collect();
+ if path_segs.len() > 0 {
+ TreeWalkResult::Skip
+ } else {
+ // Take the current tree path, and append the
+ // name of whatever we're currently iterating
+ // over is
+ let path = PathBuf::new().join(self.path()).join(entry.name().unwrap());
+ children.push(path.as_path().to_str().unwrap().into());
+ TreeWalkResult::Ok
+ }
+ });
+
+ children
+ })
+ .map(|c| Yield::Dir(c)),
+ }
}
- /// Check if this tree entry is a file
- pub fn is_file(&self) -> bool {
+ fn is_file(&self) -> bool {
match self {
Self::File(_) => true,
Self::Dir(_) => false,
}
}
+
+ fn id(&self) -> HashId {
+ match self {
+ Self::File(ref f) => f.id.clone(),
+ Self::Dir(ref d) => d.id.clone(),
+ }
+ }
+
+ /// Get the repo-internal path (including name)
+ ///
+ /// This is used to index files in a file tree, to allow O(1)
+ /// access to deeply nested items.
+ fn path(&self) -> String {
+ match self {
+ Self::File(ref f) => PathBuf::new().join(&f.path).join(&f.name),
+ Self::Dir(ref d) => PathBuf::new().join(&d.path).join(&d.name),
+ }
+ .as_path()
+ .to_str()
+ .unwrap()
+ .into()
+ }
}
-/// A file to have ever existed in a git repo
-pub struct File {
- root: PathBuf,
+struct File {
+ id: HashId,
path: String,
+ name: String,
}
impl File {
- pub(crate) fn new(root: PathBuf, path: String) -> Self {
- Self { root, path }
- }
-
- /// Get the history of a file from a branch iterator
- pub fn get_history(&self, branch: BranchIter) -> Vec<Commit> {
- todo!()
+ fn new(id: HashId, path: String, name: String) -> Self {
+ Self { id, path, name }
}
}
-/// A subdirectory in a file tree
-///
-/// A directory has a set of children, which can either be Files, or
-/// other directories. Many of the functions to retrieve metadata
-/// (such as the last commit, count, etc) will be deferred to the
-/// children of this directory.
-pub struct Directory {
- root: PathBuf,
+struct Directory {
+ id: HashId,
path: String,
+ name: String,
}
impl Directory {
- pub(crate) fn new(root: PathBuf, path: String) -> Self {
- Self { root, path }
+ fn new(id: HashId, path: String, name: String) -> Self {
+ Self { id, path, name }
+ }
+
+ fn enumerate(&self, repo: git2::Repository) -> Vec<String> {
+ vec![]
}
}
+////////////////////////////////
+
/// Take a vector of path segments, and turn it into a valid offset path
///
/// There are tests to make sure this function works properly.
@@ -139,7 +218,7 @@ impl Directory {
/// * vec![] -> ""
/// * vec!["foo"] -> "foo"
/// * vec!["foo", "bar", "baz"] -> "foo/bar/baz"
-fn path_segs_join(segments: Vec<String>) -> String {
+fn path_segs_join(segments: Vec<&str>) -> String {
segments
.into_iter()
.fold(PathBuf::new(), |buf, seg| buf.join(seg))
diff --git a/apps/servers/octopus/supergit/src/lib.rs b/apps/servers/octopus/supergit/src/lib.rs
index df30d996ebed..17c9094c32dd 100644
--- a/apps/servers/octopus/supergit/src/lib.rs
+++ b/apps/servers/octopus/supergit/src/lib.rs
@@ -5,8 +5,10 @@
//! repository, consider using that library instead.
//!
//! supergit aims to make queries into a git repo as typed and easy as
-//! possible. Start by creating a [`Repository`](), and enumerating
-//! or fetching [`Branch`]()es that you are interested in.
+//! possible. Start by creating a
+//! [`Repository`](struct.Repository.html), and enumerating or
+//! fetching [`Branch`](struct.Branch.html)es that you are interested
+//! in.
//!
//! Unlike `libgit2`, this library can resolve reverse dependencies
//! between files, and their commit history. Some of these functions
@@ -27,7 +29,7 @@ pub(crate) use repo::HashId;
pub use repo::Repository;
mod files;
-pub use files::{File, FileTree};
+pub use files::{Yield, FileTree};
use async_std::sync::{Arc, RwLock};
use std::sync::atomic::{AtomicUsize, Ordering};
diff --git a/apps/servers/octopus/supergit/src/repo.rs b/apps/servers/octopus/supergit/src/repo.rs
index 37991c3a560f..3d802a929e77 100644
--- a/apps/servers/octopus/supergit/src/repo.rs
+++ b/apps/servers/octopus/supergit/src/repo.rs
@@ -2,7 +2,7 @@
use crate::{Branch, BranchCommit};
use git2::{self, Oid};
-use std::sync::Arc;
+use std::{fmt, sync::Arc};
pub type GitResult<T> = Result<T, GitError>;
@@ -10,6 +10,12 @@ pub type GitResult<T> = Result<T, GitError>;
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct HashId(String);
+impl fmt::Display for HashId {
+ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+ write!(f, "{}", self.0)
+ }
+}
+
impl HashId {
pub fn to_oid(&self) -> Oid {
self.clone().into()
@@ -63,6 +69,7 @@ pub struct Repository {
}
impl Repository {
+ /// Open a repository read-only at a specific path
pub fn open(path: &str) -> GitResult<Self> {
Ok(Self {
inner: Arc::new(git2::Repository::open(path)?),
@@ -71,9 +78,12 @@ impl Repository {
/// Parse branch data from repository
///
+ /// If you only care about a single branch, you can also use the
+ /// convenience function `get_branch()`.
+ ///
/// ## Panics
///
- /// If there is an error around getting the name, or head commit.
+ /// This function can panic when branch metadata is missing.
pub fn branches(&self) -> GitResult<Vec<Branch>> {
Ok(self
.inner
@@ -88,11 +98,17 @@ impl Repository {
.collect())
}
- /// Get the files touched by a commit
- pub fn get_files_for(&self, id: HashId) -> GitResult<Vec<()>> {
- let c = self.inner.find_commit(id.into())?;
- let tree = c.tree()?;
-
- todo!()
+ /// Get a single branch by name
+ ///
+ /// This function will enumerate all branches, and then select the
+ /// desired one. If you want to make repeated queries onto the
+ /// branch set, it's recommended you call `branches()`, and cache
+ /// the data yourself.
+ pub fn get_branch(&self, name: String) -> Option<Branch> {
+ self.branches().ok().and_then(|ok| {
+ ok.into_iter()
+ .filter(|b| b.name().is_some())
+ .find(|b| &b.name().unwrap() == &name)
+ })
}
}