Commit b2d51c68450 by Georges Racinet

rust-nodemap: core implementation for shortest

In this implementation, we just make `lookup()` return also the number
of steps that have been needed to come to a conclusion from the
nodetree data, and `validate_candidate()` takes care of the special
cases related to `NULL_NODE`.

This way of doing minimizes code duplication, but it means that
the comparatively slower finding of first non zero nybble will run
for all calls to `find()` where it is not needed.

Still running on the file generated for the mozilla-central repository,
it seems indeed that we now get more ofter 320 ns than 310. The odds that
this could have a significant impact on real life Mercurial performance
are still looking low. Let's wait for actual benchmark runs to see if
an optimization is needed here.

Differential Revision: https://phab.mercurial-scm.org/D7819
parent 167ccaf6062
......@@ -127,6 +127,36 @@ impl<'a> NodePrefixRef<'a> {
pub fn get_nybble(&self, i: usize) -> u8 {
get_nybble(i, self.buf)
}
/// Return the index first nybble that's different from `node`
///
/// If the return value is `None` that means that `self` is
/// a prefix of `node`, but the current method is a bit slower
/// than `is_prefix_of`.
///
/// Returned index is as in `get_nybble`, i.e., starting at 0.
pub fn first_different_nybble(&self, node: &Node) -> Option<usize> {
let buf = self.buf;
let until = if self.is_odd {
buf.len() - 1
} else {
buf.len()
};
for i in 0..until {
if buf[i] != node[i] {
if buf[i] & 0xf0 == node[i] & 0xf0 {
return Some(2 * i + 1);
} else {
return Some(2 * i);
}
}
}
if self.is_odd && buf[until] & 0xf0 != node[until] & 0xf0 {
Some(until * 2)
} else {
None
}
}
}
/// A shortcut for full `Node` references
......@@ -235,4 +265,34 @@ mod tests {
assert_eq!(prefix.borrow().get_nybble(7), 9);
Ok(())
}
#[test]
fn test_first_different_nybble_even_prefix() {
let prefix = NodePrefix::from_hex("12ca").unwrap();
let prefref = prefix.borrow();
let mut node: Node = [0; 20];
assert_eq!(prefref.first_different_nybble(&node), Some(0));
node[0] = 0x13;
assert_eq!(prefref.first_different_nybble(&node), Some(1));
node[0] = 0x12;
assert_eq!(prefref.first_different_nybble(&node), Some(2));
node[1] = 0xca;
// now it is a prefix
assert_eq!(prefref.first_different_nybble(&node), None);
}
#[test]
fn test_first_different_nybble_odd_prefix() {
let prefix = NodePrefix::from_hex("12c").unwrap();
let prefref = prefix.borrow();
let mut node: Node = [0; 20];
assert_eq!(prefref.first_different_nybble(&node), Some(0));
node[0] = 0x13;
assert_eq!(prefref.first_different_nybble(&node), Some(1));
node[0] = 0x12;
assert_eq!(prefref.first_different_nybble(&node), Some(2));
node[1] = 0xca;
// now it is a prefix
assert_eq!(prefref.first_different_nybble(&node), None);
}
}
......@@ -16,6 +16,7 @@ use super::{
node::get_nybble, node::NULL_NODE, Node, NodeError, NodePrefix,
NodePrefixRef, Revision, RevlogIndex, NULL_REVISION,
};
use std::cmp::max;
use std::fmt;
use std::mem;
use std::ops::Deref;
......@@ -47,6 +48,20 @@ pub trait NodeMap {
prefix: NodePrefixRef<'a>,
) -> Result<Option<Revision>, NodeMapError>;
/// Give the size of the shortest node prefix that determines
/// the revision uniquely.
///
/// From a binary node prefix, if it is matched in the node map, this
/// returns the number of hexadecimal digits that would had sufficed
/// to find the revision uniquely.
///
/// Returns `None` if no `Revision` could be found for the prefix.
fn shortest_bin<'a>(
&self,
idx: &impl RevlogIndex,
node_prefix: NodePrefixRef<'a>,
) -> Result<Option<usize>, NodeMapError>;
fn find_hex(
&self,
idx: &impl RevlogIndex,
......@@ -54,6 +69,16 @@ pub trait NodeMap {
) -> Result<Option<Revision>, NodeMapError> {
self.find_bin(idx, NodePrefix::from_hex(prefix)?.borrow())
}
/// Same as `shortest_bin`, with the hexadecimal representation of the
/// prefix as input.
fn shortest_hex(
&self,
idx: &impl RevlogIndex,
prefix: &str,
) -> Result<Option<usize>, NodeMapError> {
self.shortest_bin(idx, NodePrefix::from_hex(prefix)?.borrow())
}
}
pub trait MutableNodeMap: NodeMap {
......@@ -217,20 +242,24 @@ fn has_prefix_or_none<'p>(
fn validate_candidate<'p>(
idx: &impl RevlogIndex,
prefix: NodePrefixRef<'p>,
rev: Option<Revision>,
) -> Result<Option<Revision>, NodeMapError> {
if prefix.is_prefix_of(&NULL_NODE) {
// NULL_REVISION always matches a prefix made only of zeros
cand: (Option<Revision>, usize),
) -> Result<(Option<Revision>, usize), NodeMapError> {
let (rev, steps) = cand;
if let Some(nz_nybble) = prefix.first_different_nybble(&NULL_NODE) {
rev.map_or(Ok((None, steps)), |r| {
has_prefix_or_none(idx, prefix, r)
.map(|opt| (opt, max(steps, nz_nybble + 1)))
})
} else {
// the prefix is only made of zeros; NULL_REVISION always matches it
// and any other *valid* result is an ambiguity
match rev {
None => Ok(Some(NULL_REVISION)),
None => Ok((Some(NULL_REVISION), steps + 1)),
Some(r) => match has_prefix_or_none(idx, prefix, r)? {
None => Ok(Some(NULL_REVISION)),
None => Ok((Some(NULL_REVISION), steps + 1)),
_ => Err(NodeMapError::MultipleResults),
},
}
} else {
rev.map_or(Ok(None), |r| has_prefix_or_none(idx, prefix, r))
}
}
......@@ -310,11 +339,13 @@ impl NodeTree {
fn lookup<'p>(
&self,
prefix: NodePrefixRef<'p>,
) -> Result<Option<Revision>, NodeMapError> {
) -> Result<(Option<Revision>, usize), NodeMapError> {
let mut i = 1;
for (leaf, _, _, opt) in self.visit(prefix) {
if leaf {
return Ok(opt);
return Ok((opt, i));
}
i += 1;
}
Err(NodeMapError::MultipleResults)
}
......@@ -542,6 +573,16 @@ impl NodeMap for NodeTree {
prefix: NodePrefixRef<'a>,
) -> Result<Option<Revision>, NodeMapError> {
validate_candidate(idx, prefix.clone(), self.lookup(prefix)?)
.map(|(opt, _shortest)| opt)
}
fn shortest_bin<'a>(
&self,
idx: &impl RevlogIndex,
prefix: NodePrefixRef<'a>,
) -> Result<Option<usize>, NodeMapError> {
validate_candidate(idx, prefix.clone(), self.lookup(prefix)?)
.map(|(opt, shortest)| opt.map(|_rev| shortest))
}
}
......@@ -667,6 +708,7 @@ mod tests {
assert_eq!(nt.find_hex(&idx, "01"), Ok(Some(9)));
assert_eq!(nt.find_hex(&idx, "00"), Err(MultipleResults));
assert_eq!(nt.find_hex(&idx, "00a"), Ok(Some(0)));
assert_eq!(nt.shortest_hex(&idx, "00a"), Ok(Some(3)));
assert_eq!(nt.find_hex(&idx, "000"), Ok(Some(NULL_REVISION)));
}
......@@ -686,8 +728,10 @@ mod tests {
};
assert_eq!(nt.find_hex(&idx, "10")?, Some(1));
assert_eq!(nt.find_hex(&idx, "c")?, Some(2));
assert_eq!(nt.shortest_hex(&idx, "c")?, Some(1));
assert_eq!(nt.find_hex(&idx, "00"), Err(MultipleResults));
assert_eq!(nt.find_hex(&idx, "000")?, Some(NULL_REVISION));
assert_eq!(nt.shortest_hex(&idx, "000")?, Some(3));
assert_eq!(nt.find_hex(&idx, "01")?, Some(9));
Ok(())
}
......@@ -723,6 +767,13 @@ mod tests {
self.nt.find_hex(&self.index, prefix)
}
fn shortest_hex(
&self,
prefix: &str,
) -> Result<Option<usize>, NodeMapError> {
self.nt.shortest_hex(&self.index, prefix)
}
/// Drain `added` and restart a new one
fn commit(self) -> Self {
let mut as_vec: Vec<Block> =
......@@ -775,6 +826,28 @@ mod tests {
}
#[test]
fn test_shortest_zero_prefix() {
let mut idx = TestNtIndex::new();
idx.insert(0, "00000abcd").unwrap();
assert_eq!(idx.find_hex("000"), Err(NodeMapError::MultipleResults));
// in the nodetree proper, this will be found at the first nybble
// yet the correct answer for shortest is not 1, nor 1+1, but the
// the first difference with `NULL_NODE`
assert_eq!(idx.shortest_hex("00000a"), Ok(Some(6)));
assert_eq!(idx.shortest_hex("00000ab"), Ok(Some(6)));
// same with odd result
idx.insert(1, "00123").unwrap();
assert_eq!(idx.shortest_hex("001"), Ok(Some(3)));
assert_eq!(idx.shortest_hex("0012"), Ok(Some(3)));
// these are unchanged of course
assert_eq!(idx.shortest_hex("00000a"), Ok(Some(6)));
assert_eq!(idx.shortest_hex("00000ab"), Ok(Some(6)));
}
#[test]
fn test_insert_extreme_splitting() -> Result<(), NodeMapError> {
// check that the splitting loop is long enough
let mut nt_idx = TestNtIndex::new();
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment