supergit: WIP, trying to add branching iterators

This commit is here for the history of my madness. Rather than trying to teach the iterator API to be branching, I should add a management wrapper around it, which can decide for each commit it sees whether it would like to follow a branch or not. This way it's also much easier to terminate a branch when we realise that we've seen it before (or disable that functionality), without having to cram more features into the same abstraction.
author: Mx Kookie <kookie@spacekookie.de> 2020-11-10 18:10:03 +0100
committer: Mx Kookie <kookie@spacekookie.de> 2020-11-11 14:06:08 +0100
commit: 0728c2f325e2eaac2c3b834260a8d0a97afaff63 (patch)
tree: 41bf27b47fe55454bbf8b12b1a5aeaae53e633db /apps/servers/octopus/supergit
parent: 57af96a71849c3aefb3f2923c75447d4e43bca2a (diff)
2 files changed, 162 insertions, 20 deletions
diff --git a/apps/servers/octopus/supergit/src/bin/test.rs b/apps/servers/octopus/supergit/src/bin/test.rs
index cb21d5c25645..6b9a67b75813 100644
--- a/apps/servers/octopus/supergit/src/bin/test.rs
+++ b/apps/servers/octopus/supergit/src/bin/test.rs
@@ -23,11 +23,11 @@ fn main() {
     let head = main.get_head();
     let tree = head.get_tree();
     
-    // println!(
-    //     "{:?}",
-    //     tree.history(main.get_all(), "infra/libkookie/nixpkgs/nixos/modules/module-list.nix")
-    //         .into_iter()
-    //         .map(|c| c.summary())
-    //         .collect::<Vec<_>>()
-    // );
+    println!(
+        "{:#?}",
+        tree.history(main.get_all(), "Cargo.toml")
+            .into_iter()
+            .map(|c| c.summary())
+            .collect::<Vec<_>>()
+    );
 }
diff --git a/apps/servers/octopus/supergit/src/branch.rs b/apps/servers/octopus/supergit/src/branch.rs
index 6b838f4ff420..bbbf96dbf2c5 100644
--- a/apps/servers/octopus/supergit/src/branch.rs
+++ b/apps/servers/octopus/supergit/src/branch.rs
@@ -1,7 +1,13 @@
 use crate::{Commit, HashId};
 use atomptr::AtomPtr;
 use git2::Repository;
-use std::{mem, sync::Arc};
+use std::{
+    mem,
+    sync::{
+        atomic::{AtomicUsize, Ordering},
+        Arc,
+    },
+};
 
 /// Abstraction for a branch history slice
 ///
@@ -125,8 +131,8 @@ impl Branch {
 /// handles.  This means that without explicitly branching, this
 /// iterator is first-parent.
 pub struct BranchIter {
-    rec: AtomPtr<IterMode>,
-    splits: AtomPtr<Vec<HashId>>,
+    mode: AtomPtr<IterMode>,
+    splits: IterData,
     repo: Arc<Repository>,
     curr: Option<HashId>,
     limit: SegLimit,
@@ -136,8 +142,8 @@ impl BranchIter {
     /// Create a new branch segment iterator
     fn new(repo: Arc<Repository>, last: HashId, limit: SegLimit) -> Self {
         Self {
-            rec: AtomPtr::new(IterMode::FirstParent),
-            splits: AtomPtr::new(vec![]),
+            mode: AtomPtr::new(IterMode::FirstParent),
+            splits: IterData::new(),
             repo,
             curr: Some(last),
             limit,
@@ -176,12 +182,75 @@ impl BranchIter {
         }
     }
 
-    /// Get the current commit
+    /// Determine which commit should be looked at next
     ///
-    /// This function looks either at the "curr" field, or takes the
-    /// ID from `cmd`, if it is set to `IterCmd::Jump(...)`, which
-    /// indicates that the previous commit was a merge, and we need to escape
-    fn set_next(&mut self, current: Commit) -> Commit {
+    /// FirstParent and DepthFirst iterators will take the next
+    /// first_parent if available.  DepthFirst iterators will fall
+    /// back to the last split point, if no `first_parent()` exists.
+    /// BreathFirst iterators will always prefer a new branch over
+    /// first_parent, and jump back to the last split if there are no
+    /// parents.
+    fn determine_next(&mut self, current: Commit) -> Commit {
+        let mode = &**self.mode.get_ref();
+        self.curr = match mode {
+            IterMode::FirstParent => match current.first_parent() {
+                Some(p1) => Some(p1.id),
+                None => None,
+            },
+            // DepthFirst iterates normally until we hit the end of
+            // the branch, then we "jump back" to an earlier commit.
+            IterMode::DepthFirst => match current.first_parent() {
+                Some(p1) => Some(p1.id),
+                None => {
+                    // Get the last split point.  If there are no
+                    // more, terminate the iterator.  If there are,
+                    // increment brnum and keep going.  If brnum is
+                    // higher than the parent count, call this
+                    // function again to get the next split point (and
+                    // reset brnum)
+                    self.splits.next().map(|id| {
+                        let brnum = self.splits.incr_brnum();
+
+                        // ALSO: when brnum is LOWER than parent point,
+                        // re-insert split, to allow the commit to be
+                        // jumped to again!
+                        if brnum < current.parent_count() {
+                            self.splits.re_insert(id.clone());
+                        }
+
+                        let com = self.find_commit(&id).unwrap();
+                        if brnum > current.parent_count() {
+                            self.splits.reset_brnum();
+                            self.determine_next(com).id
+                        } else {
+                            id
+                        }
+                    })
+                }
+            },
+            // // When there is only one parent, chose that parent.  When
+            // // there is none, jump back to the last split point,
+            // // according to brnum.  If brnum is then greater than the
+            // // number of branches, reset brnum and re-call this
+            // // function.
+            // IterMode::BreadthFirst if current.parent_count() <= 1 => match current.first_parent() {
+            //     Some(p1) => Some(p1.id),
+            //     None => self.splits.next().map(|id| {
+            //         let brnum = self.splits.incr_brnum();
+            //     }),
+            // },
+
+            // // When there are is more than 1 parent, set this commit
+            // // as a split point, and chose the last parent commit as
+            // // the next commit to walk.  This shifts the active branch
+            // // over.  Important: we set brnum to the _highest_ branch,
+            // // and iterate inwards.
+            // IterMode::BreadthFirst => {
+
+            // },
+            _ => todo!(),
+        };
+
         self.curr = match current.first_parent() {
             Some(p1) => Some(p1.id),
             None => None,
@@ -195,9 +264,28 @@ impl Iterator for BranchIter {
     type Item = BranchCommit;
 
     fn next(&mut self) -> Option<Self::Item> {
-        mem::replace(&mut self.curr, None)
-            .and_then(|id| self.find_commit(&id))
-            .map(|c| self.set_next(c))
+        let mode = &**self.mode.get_ref();
+
+        let id = match mode {
+            // When iterating first-parent OR when going depth first,
+            // while the current branch still has children, take the
+            // next child of the current branch.
+            IterMode::FirstParent => mem::replace(&mut self.curr, None),
+            IterMode::DepthFirst | IterMode::BreadthFirst if self.curr.is_some() => {
+                mem::replace(&mut self.curr, None)
+            }
+            // When going both BreadthFirst or DepthFirst, reaching
+            // the end of the current branch means getting the last
+            // split point.  The difference between these two
+            // strategies is in how the split points and "current
+            // point" are stored.
+            _ if self.curr.is_none() => self.splits.next(),
+            _ => unreachable!(), // can't be reached
+        };
+
+        // The chain of events
+        id.and_then(|id| self.find_commit(&id))
+            .map(|c| self.determine_next(c))
             .and_then(|c| match self.limit {
                 SegLimit::None => Some(c),
                 SegLimit::Commit(ended, _) if ended => None,
@@ -285,17 +373,64 @@ impl BranchCommit {
 }
 
 /// Additional iterator data
+///
+/// This structure tracks split points on the iterator.  When
+/// traversing a branch, the iterator can reach branch points.  While
+/// in `IterMode::FirstParent`, these are irrelevant.  However: when
+/// iterating breadth or depth first these need to be tracked.
+///
+/// ## Depth first
+///
+/// - When a branch is encountered, append it to this data set.
+/// - When the end of the current branch is reached, get the next
+///   split point to resume from
+///
+/// ## Breadth first
+///
+/// - When a branch is encountered, add the previous commit to this set
+/// - When reaching the end of a branch, get the next split point to
+///   resume from
+///
+/// ---
+///
+/// In essense, the usage of this structure for BreadthFirst and
+/// DepthFirst are inverted!
 struct IterData {
+    /// Split points on the iterator
     splits: AtomPtr<Vec<HashId>>,
+    /// Additional data for octopus merges
+    brnum: AtomicUsize,
 }
 
 impl IterData {
     fn new() -> Self {
         Self {
             splits: AtomPtr::new(vec![]),
+            brnum: AtomicUsize::new(0),
         }
     }
 
+    /// Check if the split set is empty
+    fn empty(&self) -> bool {
+        self.splits.get_ref().len() == 0
+    }
+
+    fn set_brnum(&self, num: usize) {
+        self.brnum.swap(num, Ordering::Relaxed);
+    }
+
+    fn incr_brnum(&self) -> usize {
+        self.brnum.fetch_add(1, Ordering::Relaxed) + 1
+    }
+
+    fn decr_brnum(&self) -> usize {
+        self.brnum.fetch_sub(1, Ordering::Relaxed) - 1
+    }
+
+    fn reset_brnum(&self) {
+        self.set_brnum(0);
+    }
+
     fn append(&self, id: HashId) {
         let mut vec = (**self.splits.get_ref()).clone();
         let mut new = vec![id];
@@ -303,6 +438,13 @@ impl IterData {
         self.splits.swap(new);
     }
 
+    /// Insert a hashID to the front of the splits list
+    fn re_insert(&self, id: HashId) {
+        let mut vec = (**self.splits.get_ref()).clone();
+        vec.insert(0, id);
+        self.splits.swap(vec);
+    }
+
     fn next(&self) -> Option<HashId> {
         let mut vec = (**self.splits.get_ref()).clone();
         let next = if vec.len() < 0 {
author	Mx Kookie <kookie@spacekookie.de>	2020-11-10 18:10:03 +0100
committer	Mx Kookie <kookie@spacekookie.de>	2020-11-11 14:06:08 +0100
commit	0728c2f325e2eaac2c3b834260a8d0a97afaff63 (patch)
tree	41bf27b47fe55454bbf8b12b1a5aeaae53e633db /apps/servers/octopus/supergit
parent	57af96a71849c3aefb3f2923c75447d4e43bca2a (diff)