From 0728c2f325e2eaac2c3b834260a8d0a97afaff63 Mon Sep 17 00:00:00 2001 From: Mx Kookie Date: Tue, 10 Nov 2020 18:10:03 +0100 Subject: supergit: WIP, trying to add branching iterators This commit is here for the history of my madness. Rather than trying to teach the iterator API to be branching, I should add a management wrapper around it, which can decide for each commit it sees whether it would like to follow a branch or not. This way it's also much easier to terminate a branch when we realise that we've seen it before (or disable that functionality), without having to cram more features into the same abstraction. --- apps/servers/octopus/supergit/src/bin/test.rs | 14 +-- apps/servers/octopus/supergit/src/branch.rs | 168 ++++++++++++++++++++++++-- 2 files changed, 162 insertions(+), 20 deletions(-) (limited to 'apps/servers/octopus/supergit') diff --git a/apps/servers/octopus/supergit/src/bin/test.rs b/apps/servers/octopus/supergit/src/bin/test.rs index cb21d5c25645..6b9a67b75813 100644 --- a/apps/servers/octopus/supergit/src/bin/test.rs +++ b/apps/servers/octopus/supergit/src/bin/test.rs @@ -23,11 +23,11 @@ fn main() { let head = main.get_head(); let tree = head.get_tree(); - // println!( - // "{:?}", - // tree.history(main.get_all(), "infra/libkookie/nixpkgs/nixos/modules/module-list.nix") - // .into_iter() - // .map(|c| c.summary()) - // .collect::>() - // ); + println!( + "{:#?}", + tree.history(main.get_all(), "Cargo.toml") + .into_iter() + .map(|c| c.summary()) + .collect::>() + ); } diff --git a/apps/servers/octopus/supergit/src/branch.rs b/apps/servers/octopus/supergit/src/branch.rs index 6b838f4ff420..bbbf96dbf2c5 100644 --- a/apps/servers/octopus/supergit/src/branch.rs +++ b/apps/servers/octopus/supergit/src/branch.rs @@ -1,7 +1,13 @@ use crate::{Commit, HashId}; use atomptr::AtomPtr; use git2::Repository; -use std::{mem, sync::Arc}; +use std::{ + mem, + sync::{ + atomic::{AtomicUsize, Ordering}, + Arc, + }, +}; /// Abstraction for a branch history slice /// @@ -125,8 +131,8 @@ impl Branch { /// handles. This means that without explicitly branching, this /// iterator is first-parent. pub struct BranchIter { - rec: AtomPtr, - splits: AtomPtr>, + mode: AtomPtr, + splits: IterData, repo: Arc, curr: Option, limit: SegLimit, @@ -136,8 +142,8 @@ impl BranchIter { /// Create a new branch segment iterator fn new(repo: Arc, last: HashId, limit: SegLimit) -> Self { Self { - rec: AtomPtr::new(IterMode::FirstParent), - splits: AtomPtr::new(vec![]), + mode: AtomPtr::new(IterMode::FirstParent), + splits: IterData::new(), repo, curr: Some(last), limit, @@ -176,12 +182,75 @@ impl BranchIter { } } - /// Get the current commit + /// Determine which commit should be looked at next /// - /// This function looks either at the "curr" field, or takes the - /// ID from `cmd`, if it is set to `IterCmd::Jump(...)`, which - /// indicates that the previous commit was a merge, and we need to escape - fn set_next(&mut self, current: Commit) -> Commit { + /// FirstParent and DepthFirst iterators will take the next + /// first_parent if available. DepthFirst iterators will fall + /// back to the last split point, if no `first_parent()` exists. + /// BreathFirst iterators will always prefer a new branch over + /// first_parent, and jump back to the last split if there are no + /// parents. + fn determine_next(&mut self, current: Commit) -> Commit { + let mode = &**self.mode.get_ref(); + self.curr = match mode { + IterMode::FirstParent => match current.first_parent() { + Some(p1) => Some(p1.id), + None => None, + }, + // DepthFirst iterates normally until we hit the end of + // the branch, then we "jump back" to an earlier commit. + IterMode::DepthFirst => match current.first_parent() { + Some(p1) => Some(p1.id), + None => { + // Get the last split point. If there are no + // more, terminate the iterator. If there are, + // increment brnum and keep going. If brnum is + // higher than the parent count, call this + // function again to get the next split point (and + // reset brnum) + self.splits.next().map(|id| { + let brnum = self.splits.incr_brnum(); + + // ALSO: when brnum is LOWER than parent point, + // re-insert split, to allow the commit to be + // jumped to again! + if brnum < current.parent_count() { + self.splits.re_insert(id.clone()); + } + + let com = self.find_commit(&id).unwrap(); + if brnum > current.parent_count() { + self.splits.reset_brnum(); + self.determine_next(com).id + } else { + id + } + }) + } + }, + // // When there is only one parent, chose that parent. When + // // there is none, jump back to the last split point, + // // according to brnum. If brnum is then greater than the + // // number of branches, reset brnum and re-call this + // // function. + // IterMode::BreadthFirst if current.parent_count() <= 1 => match current.first_parent() { + // Some(p1) => Some(p1.id), + // None => self.splits.next().map(|id| { + // let brnum = self.splits.incr_brnum(); + // }), + // }, + + // // When there are is more than 1 parent, set this commit + // // as a split point, and chose the last parent commit as + // // the next commit to walk. This shifts the active branch + // // over. Important: we set brnum to the _highest_ branch, + // // and iterate inwards. + // IterMode::BreadthFirst => { + + // }, + _ => todo!(), + }; + self.curr = match current.first_parent() { Some(p1) => Some(p1.id), None => None, @@ -195,9 +264,28 @@ impl Iterator for BranchIter { type Item = BranchCommit; fn next(&mut self) -> Option { - mem::replace(&mut self.curr, None) - .and_then(|id| self.find_commit(&id)) - .map(|c| self.set_next(c)) + let mode = &**self.mode.get_ref(); + + let id = match mode { + // When iterating first-parent OR when going depth first, + // while the current branch still has children, take the + // next child of the current branch. + IterMode::FirstParent => mem::replace(&mut self.curr, None), + IterMode::DepthFirst | IterMode::BreadthFirst if self.curr.is_some() => { + mem::replace(&mut self.curr, None) + } + // When going both BreadthFirst or DepthFirst, reaching + // the end of the current branch means getting the last + // split point. The difference between these two + // strategies is in how the split points and "current + // point" are stored. + _ if self.curr.is_none() => self.splits.next(), + _ => unreachable!(), // can't be reached + }; + + // The chain of events + id.and_then(|id| self.find_commit(&id)) + .map(|c| self.determine_next(c)) .and_then(|c| match self.limit { SegLimit::None => Some(c), SegLimit::Commit(ended, _) if ended => None, @@ -285,17 +373,64 @@ impl BranchCommit { } /// Additional iterator data +/// +/// This structure tracks split points on the iterator. When +/// traversing a branch, the iterator can reach branch points. While +/// in `IterMode::FirstParent`, these are irrelevant. However: when +/// iterating breadth or depth first these need to be tracked. +/// +/// ## Depth first +/// +/// - When a branch is encountered, append it to this data set. +/// - When the end of the current branch is reached, get the next +/// split point to resume from +/// +/// ## Breadth first +/// +/// - When a branch is encountered, add the previous commit to this set +/// - When reaching the end of a branch, get the next split point to +/// resume from +/// +/// --- +/// +/// In essense, the usage of this structure for BreadthFirst and +/// DepthFirst are inverted! struct IterData { + /// Split points on the iterator splits: AtomPtr>, + /// Additional data for octopus merges + brnum: AtomicUsize, } impl IterData { fn new() -> Self { Self { splits: AtomPtr::new(vec![]), + brnum: AtomicUsize::new(0), } } + /// Check if the split set is empty + fn empty(&self) -> bool { + self.splits.get_ref().len() == 0 + } + + fn set_brnum(&self, num: usize) { + self.brnum.swap(num, Ordering::Relaxed); + } + + fn incr_brnum(&self) -> usize { + self.brnum.fetch_add(1, Ordering::Relaxed) + 1 + } + + fn decr_brnum(&self) -> usize { + self.brnum.fetch_sub(1, Ordering::Relaxed) - 1 + } + + fn reset_brnum(&self) { + self.set_brnum(0); + } + fn append(&self, id: HashId) { let mut vec = (**self.splits.get_ref()).clone(); let mut new = vec![id]; @@ -303,6 +438,13 @@ impl IterData { self.splits.swap(new); } + /// Insert a hashID to the front of the splits list + fn re_insert(&self, id: HashId) { + let mut vec = (**self.splits.get_ref()).clone(); + vec.insert(0, id); + self.splits.swap(vec); + } + fn next(&self) -> Option { let mut vec = (**self.splits.get_ref()).clone(); let next = if vec.len() < 0 { -- cgit v1.2.3