Commit 6b5030b9dbf by Raphaël Gomès

WIP add support for bare hg status

parent de5d34ca01b
......@@ -27,14 +27,12 @@ from . import (
policy,
pycompat,
scmutil,
sparse,
txnutil,
util,
)
from .interfaces import (
dirstate as intdirstate,
util as interfaceutil,
)
from .interfaces import dirstate as intdirstate, util as interfaceutil
parsers = policy.importmod('parsers')
rustmod = policy.importrust('dirstate')
......@@ -1083,6 +1081,76 @@ class dirstate(object):
results[next(iv)] = st
return results
def _rust_status(self, matcher, list_clean, list_ignored, list_unknown):
# Force Rayon (Rust parallelism library) to respect the number of
# workers. This is a temporary workaround until Rust code knows
# how to read the config file.
numcpus = self._ui.configint(b"worker", b"numcpus")
if numcpus is not None:
encoding.environ.setdefault(b'RAYON_NUM_THREADS', b'%d' % numcpus)
workers_enabled = self._ui.configbool(b"worker", b"enabled", True)
if not workers_enabled:
encoding.environ[b"RAYON_NUM_THREADS"] = b"1"
(
lookup,
modified,
added,
removed,
deleted,
clean,
ignored,
unknown,
warnings,
bad,
) = rustmod.status(
self._map._rustmap,
matcher,
self._rootdir,
self._ignorefiles(),
self._checkexec,
self._lastnormaltime,
bool(list_clean),
bool(list_ignored),
bool(list_unknown),
)
if self._ui.warn:
for item in warnings:
if isinstance(item, tuple):
file_path, syntax = item
msg = _(b"%s: ignoring invalid syntax '%s'\n") % (
file_path,
syntax,
)
self._ui.warn(msg)
else:
msg = _(b"skipping unreadable pattern file '%s': %s\n")
self._ui.warn(
msg
% (
pathutil.canonpath(
self._rootdir, self._rootdir, item
),
b"No such file or directory",
)
)
for (fn, message) in bad:
matcher.bad(fn, encoding.strtolocal(message))
status = scmutil.status(
modified=modified,
added=added,
removed=removed,
deleted=deleted,
unknown=unknown,
ignored=ignored,
clean=clean,
)
# print(lookup, status)
return (lookup, status)
def status(self, match, subrepos, ignored, clean, unknown):
'''Determine the status of the working copy relative to the
dirstate and return a pair of (unsure, status), where status is of type
......@@ -1108,65 +1176,33 @@ class dirstate(object):
use_rust = True
allowed_matchers = (matchmod.alwaysmatcher, matchmod.exactmatcher)
allowed_matchers = (
matchmod.alwaysmatcher,
matchmod.exactmatcher,
matchmod.includematcher,
)
if rustmod is None:
use_rust = False
elif subrepos:
use_rust = False
elif bool(listunknown):
# Pathauditor does not exist yet in Rust, unknown files
# can't be trusted.
elif sparse.enabled:
use_rust = False
elif self._ignorefiles() and listignored:
# Rust has no ignore mechanism yet, so don't use Rust for
# commands that need ignore.
elif match.traversedir is not None:
use_rust = False
elif not isinstance(match, allowed_matchers):
# Matchers have yet to be implemented
use_rust = False
if use_rust:
# Force Rayon (Rust parallelism library) to respect the number of
# workers. This is a temporary workaround until Rust code knows
# how to read the config file.
numcpus = self._ui.configint(b"worker", b"numcpus")
if numcpus is not None:
encoding.environ.setdefault(
b'RAYON_NUM_THREADS', b'%d' % numcpus
try:
return self._rust_status(
match, listclean, listignored, listunknown
)
workers_enabled = self._ui.configbool(b"worker", b"enabled", True)
if not workers_enabled:
encoding.environ[b"RAYON_NUM_THREADS"] = b"1"
(
lookup,
modified,
added,
removed,
deleted,
unknown,
clean,
) = rustmod.status(
dmap._rustmap,
match,
self._rootdir,
bool(listclean),
self._lastnormaltime,
self._checkexec,
)
status = scmutil.status(
modified=modified,
added=added,
removed=removed,
deleted=deleted,
unknown=unknown,
ignored=ignored,
clean=clean,
)
return (lookup, status)
except rustmod.FallbackError:
# FIXME warning?
# print("Fallback: ", e)
pass
def noop(f):
pass
......@@ -1248,13 +1284,11 @@ class dirstate(object):
aadd(fn)
elif state == b'r':
radd(fn)
return (
lookup,
scmutil.status(
modified, added, removed, deleted, unknown, ignored, clean
),
status = scmutil.status(
modified, added, removed, deleted, unknown, ignored, clean
)
# print(lookup, status)
return (lookup, status)
def matches(self, match):
'''
......
......@@ -24,7 +24,7 @@ from . import (
)
from .utils import stringutil
rustmod = policy.importrust('filepatterns')
rustmod = policy.importrust('dirstate')
allpatternkinds = (
b're',
......@@ -666,7 +666,10 @@ class _dirchildren(object):
class includematcher(basematcher):
def __init__(self, root, kindpats, badfn=None):
super(includematcher, self).__init__(badfn)
if rustmod is not None:
# We need to pass the patterns to Rust because they can contain
# patterns from the user interface
self._kindpats = kindpats
self._pats, self.matchfn = _buildmatch(kindpats, b'(?:/|$)', root)
self._prefix = _prefix(kindpats)
roots, dirs, parents = _rootsdirsandparents(kindpats)
......@@ -719,7 +722,8 @@ class includematcher(basematcher):
return b'this'
if dir in self._parents:
return self._allparentschildren.get(dir) or set()
allparentschildren = self._allparentschildren
return allparentschildren.get(dir) or set()
return set()
@encoding.strmethod
......@@ -1273,15 +1277,6 @@ def _regex(kind, pat, globsuffix):
'''Convert a (normalized) pattern of any kind into a
regular expression.
globsuffix is appended to the regexp of globs.'''
if rustmod is not None:
try:
return rustmod.build_single_regex(kind, pat, globsuffix)
except rustmod.PatternError:
raise error.ProgrammingError(
b'not a regex pattern: %s:%s' % (kind, pat)
)
if not pat and kind in (b'glob', b'relpath'):
return b''
if kind == b're':
......@@ -1554,18 +1549,6 @@ def readpatternfile(filepath, warn, sourceinfo=False):
This is useful to debug ignore patterns.
'''
if rustmod is not None:
result, warnings = rustmod.read_pattern_file(
filepath, bool(warn), sourceinfo,
)
for warning_params in warnings:
# Can't be easily emitted from Rust, because it would require
# a mechanism for both gettext and calling the `warn` function.
warn(_(b"%s: ignoring invalid syntax '%s'\n") % warning_params)
return result
syntaxes = {
b're': b'relre:',
b'regexp': b'relre:',
......
......@@ -4,6 +4,7 @@ version = "0.1.0"
authors = ["Georges Racinet <gracinet@anybox.fr>"]
description = "Mercurial pure Rust core library, with no assumption on Python bindings (FFI)"
edition = "2018"
build = "build.rs"
[lib]
name = "hg"
......@@ -11,9 +12,19 @@ name = "hg"
[dependencies]
byteorder = "1.3.1"
lazy_static = "1.3.0"
libc = "0.2.66"
memchr = "2.2.0"
rand = "0.6.5"
rand_pcg = "0.1.1"
rayon = "1.2.0"
regex = "1.1.0"
tempfile = "3.1.0"
twox-hash = "1.5.0"
maplit = "1.0.2"
same-file = "1.0.5"
[dev-dependencies]
pretty_assertions = "0.6.1"
[build-dependencies]
cc = "1.0.48"
\ No newline at end of file
use cc;
fn main() {
cc::Build::new()
.cpp(true)
.flag("-std=c++11")
.file("src/rust_re2.cpp")
.compile("librustre.a");
println!("cargo:rustc-link-lib=re2");
}
......@@ -8,12 +8,15 @@
//! A multiset of directory names.
//!
//! Used to counts the references to directories in a manifest or dirstate.
use crate::utils::hg_path::{HgPath, HgPathBuf};
use crate::{
dirstate::EntryState, utils::files, DirstateEntry, DirstateMapError,
FastHashMap,
dirstate::EntryState,
utils::{
files,
hg_path::{HgPath, HgPathBuf},
},
DirstateEntry, DirstateMapError, FastHashMap,
};
use std::collections::hash_map::{self, Entry};
use std::collections::{hash_map, hash_map::Entry, HashMap, HashSet};
// could be encapsulated if we care API stability more seriously
pub type DirsMultisetIter<'a> = hash_map::Keys<'a, HgPathBuf, u32>;
......@@ -129,6 +132,65 @@ impl DirsMultiset {
}
}
/// This is basically a reimplementation of `DirsMultiset` that stores the
/// children instead of just a count of them, plus a small optional
/// optimization to avoid some directories we don't need.
#[derive(PartialEq, Debug)]
pub struct DirsChildrenMultiset<'a> {
inner: FastHashMap<&'a HgPath, HashSet<&'a HgPath>>,
only_include: Option<HashSet<&'a HgPath>>,
}
impl<'a> DirsChildrenMultiset<'a> {
pub fn new(
paths: impl Iterator<Item = &'a HgPathBuf>,
only_include: Option<&'a HashSet<impl AsRef<HgPath> + 'a>>,
) -> Self {
let mut new = Self {
inner: HashMap::default(),
only_include: only_include
.map(|s| s.iter().map(|p| p.as_ref()).collect()),
};
for path in paths {
new.add_path(path)
}
new
}
fn add_path(&mut self, path: &'a (impl AsRef<HgPath> + 'a)) {
if path.as_ref().is_empty() {
return;
}
for (directory, basename) in files::find_dirs_with_base(path.as_ref())
{
if !match &self.only_include {
None => false,
Some(i) => i.contains(&directory),
} {
continue;
}
self.inner
.entry(directory)
.and_modify(|e| {
e.insert(basename);
})
.or_insert_with(|| {
let mut set = HashSet::new();
set.insert(basename);
set
});
}
}
pub fn get(
&self,
path: impl AsRef<HgPath>,
) -> Option<&HashSet<&'a HgPath>> {
self.inner.get(path.as_ref())
}
}
#[cfg(test)]
mod tests {
use super::*;
......
......@@ -13,7 +13,9 @@ pub use dirstate::{
dirs_multiset::{DirsMultiset, DirsMultisetIter},
dirstate_map::DirstateMap,
parsers::{pack_dirstate, parse_dirstate, PARENT_SIZE},
status::{status, StatusResult},
status::{
status, BadMatch, BadType, DirstateStatus, StatusError, StatusOptions,
},
CopyMap, CopyMapIter, DirstateEntry, DirstateParents, EntryState,
StateMap, StateMapIter,
};
......@@ -21,11 +23,13 @@ mod filepatterns;
pub mod matchers;
pub mod revlog;
pub use revlog::*;
pub mod re2;
pub mod utils;
use crate::utils::hg_path::HgPathBuf;
use crate::utils::hg_path::{HgPathBuf, HgPathError};
pub use filepatterns::{
build_single_regex, read_pattern_file, PatternSyntax, PatternTuple,
parse_pattern_syntax, read_pattern_file, PatternFileWarning,
PatternSyntax, PatternTuple,
};
use std::collections::HashMap;
use twox_hash::RandomXxHashBuilder64;
......@@ -116,18 +120,31 @@ impl From<DirstatePackError> for DirstateError {
#[derive(Debug)]
pub enum PatternError {
Path(HgPathError),
UnsupportedSyntax(String),
}
#[derive(Debug)]
pub enum PatternFileError {
UnsupportedSyntaxInFile(String, String, usize),
TooLong(usize),
IO(std::io::Error),
Pattern(PatternError, LineNumber),
}
impl From<std::io::Error> for PatternFileError {
fn from(e: std::io::Error) -> Self {
PatternFileError::IO(e)
impl ToString for PatternError {
fn to_string(&self) -> String {
match self {
PatternError::UnsupportedSyntax(syntax) => {
format!("Unsupported syntax {}", syntax)
}
PatternError::UnsupportedSyntaxInFile(syntax, file_path, line) => {
format!(
"{}:{}: unsupported syntax {}",
file_path, line, syntax
)
}
PatternError::TooLong(size) => {
format!("matcher pattern is too long ({} bytes)", size)
}
PatternError::IO(e) => e.to_string(),
PatternError::Path(e) => e.to_string(),
}
}
}
......@@ -142,3 +159,15 @@ impl From<std::io::Error> for DirstateError {
DirstateError::IO(e)
}
}
impl From<std::io::Error> for PatternError {
fn from(e: std::io::Error) -> Self {
PatternError::IO(e)
}
}
impl From<HgPathError> for PatternError {
fn from(e: HgPathError) -> Self {
PatternError::Path(e)
}
}
use libc::{c_int, c_void};
type Re2Ptr = *const c_void;
pub struct Re2(Re2Ptr);
unsafe impl Sync for Re2 {
// re2.h says:
// An "RE2" object is safe for
// concurrent use by multiple threads
}
extern "C" {
fn rustre2_create(data: *const u8, len: usize) -> Re2Ptr;
fn rustre2_ok(re2: Re2Ptr) -> bool;
fn rustre2_error(
re2: Re2Ptr,
outdata: *mut *const u8,
outlen: *mut usize,
) -> bool;
fn rustre2_match(
re2: Re2Ptr,
data: *const u8,
len: usize,
anchor: c_int,
) -> bool;
}
impl Re2 {
pub fn new(pattern: &[u8]) -> Result<Re2, String> {
unsafe {
let re2 = rustre2_create(pattern.as_ptr(), pattern.len());
if rustre2_ok(re2) {
Ok(Re2(re2))
} else {
let mut data: *const u8 = std::ptr::null();
let mut len: usize = 0;
rustre2_error(re2, &mut data, &mut len);
Err(String::from_utf8_lossy(std::slice::from_raw_parts(
data, len,
))
.to_string())
}
}
}
pub fn is_match(&self, data: &[u8]) -> bool {
unsafe { rustre2_match(self.0, data.as_ptr(), data.len(), 1) }
}
}
#include <re2/re2.h>
using namespace re2;
extern "C" {
RE2* rustre2_create(const char* data, size_t len) {
RE2::Options o;
o.set_encoding(RE2::Options::Encoding::EncodingLatin1);
o.set_log_errors(false);
o.set_max_mem(50000000);
return new RE2(StringPiece(data, len), o);
}
bool rustre2_ok(RE2* re) {
return re->ok();
}
void rustre2_error(RE2* re, const char** outdata, size_t* outlen) {
const std::string& e = re->error();
*outdata = e.data();
*outlen = e.length();
}
bool rustre2_match(RE2* re, char* data, size_t len, int ianchor) {
const StringPiece sp = StringPiece(data, len);
RE2::Anchor anchor =
ianchor == 0 ? RE2::Anchor::UNANCHORED :
(ianchor == 1 ? RE2::Anchor::ANCHOR_START :
RE2::Anchor::ANCHOR_BOTH);
return re->Match(sp, 0, len, anchor, NULL, 0);
}
}
......@@ -7,8 +7,12 @@
//! Contains useful functions, traits, structs, etc. for use in core.
use crate::utils::hg_path::HgPath;
use std::ops::Deref;
pub mod files;
pub mod hg_path;
pub mod path_auditor;
/// Replaces the `from` slice with the `to` slice inside the `buf` slice.
///
......@@ -37,6 +41,26 @@ where
}
}
/// Useful until rust/issues/56345 is stable
///
/// # Examples
///
/// ```
/// use crate::hg::utils::find_slice_in_slice;
///
/// let haystack = b"This is the haystack".to_vec();
/// assert_eq!(find_slice_in_slice(&haystack, b"the"), Some(8));
/// assert_eq!(find_slice_in_slice(&haystack, b"not here"), None);
/// ```
pub fn find_slice_in_slice<T>(slice: &[T], needle: &[T]) -> Option<usize>
where
for<'a> &'a [T]: PartialEq,
{
slice
.windows(needle.len())
.position(|window| window == needle)
}
pub trait SliceExt {
fn trim_end(&self) -> &Self;
fn trim_start(&self) -> &Self;
......@@ -91,3 +115,61 @@ impl SliceExt for [u8] {
}
}
}
const HEX_DIGITS: &[u8] = b"0123456789abcdef";
pub trait PrettyPrint {
fn pretty_print(&self) -> Vec<u8>;
}
impl PrettyPrint for u8 {
fn pretty_print(&self) -> Vec<u8> {
let mut acc = vec![];
match self {
c @ b'\'' | c @ b'\\' => {
acc.push(b'\\');
acc.push(*c);
}
b'\t' => {
acc.extend(br"\\t");
}
b'\n' => {
acc.extend(br"\\n");
}
b'\r' => {
acc.extend(br"\\r");
}
c if (*c < b' ' || *c >= 127) => {
acc.push(b'\\');
acc.push(b'x');
acc.push(HEX_DIGITS[((*c & 0xf0) >> 4) as usize]);
acc.push(HEX_DIGITS[(*c & 0xf) as usize]);
}
c => {
acc.push(*c);
}
}
acc
}
}
impl<'a, T: PrettyPrint> PrettyPrint for &'a [T] {
fn pretty_print(&self) -> Vec<u8> {
self.iter().fold(vec![], |mut acc, item| {
acc.extend(item.pretty_print());
acc
})
}
}
impl<T: PrettyPrint> PrettyPrint for Vec<T> {
fn pretty_print(&self) -> Vec<u8> {
self.deref().pretty_print()
}
}
impl<'a> PrettyPrint for &'a HgPath {
fn pretty_print(&self) -> Vec<u8> {
self.as_bytes().pretty_print()
}
}
......@@ -15,12 +15,33 @@ use std::path::{Path, PathBuf};
pub enum HgPathError {
/// Bytes from the invalid `HgPath`
LeadingSlash(Vec<u8>),
/// Bytes and index of the second slash
ConsecutiveSlashes(Vec<u8>, usize),
/// Bytes and index of the null byte
ContainsNullByte(Vec<u8>, usize),
ConsecutiveSlashes {
bytes: Vec<u8>,
second_slash_index: usize,
},
ContainsNullByte {
bytes: Vec<u8>,
null_byte_index: usize,
},
/// Bytes
DecodeError(Vec<u8>),
/// The rest come from audit errors
EndsWithSlash(HgPathBuf),
ContainsIllegalComponent(HgPathBuf),
IsInsideNestedRepo {
path: HgPathBuf,
nested_repo: HgPathBuf,
},
TraversesSymbolicLink {
path: HgPathBuf,
symlink: HgPathBuf,
},
NotFsCompliant(HgPathBuf),
/// `path` is the smallest invalid path
NotUnderRoot {
path: PathBuf,
root: PathBuf,
},
}
impl ToString for HgPathError {
......@@ -29,17 +50,51 @@ impl ToString for HgPathError {
HgPathError::LeadingSlash(bytes) => {
format!("Invalid HgPath '{:?}': has a leading slash.", bytes)
}
HgPathError::ConsecutiveSlashes(bytes, pos) => format!(
"Invalid HgPath '{:?}': consecutive slahes at pos {}.",
HgPathError::ConsecutiveSlashes {
bytes,
second_slash_index: pos,
} => format!(
"Invalid HgPath '{:?}': consecutive slashes at pos {}.",
bytes, pos
),
HgPathError::ContainsNullByte(bytes, pos) => format!(
HgPathError::ContainsNullByte {
bytes,
null_byte_index: pos,
} => format!(
"Invalid HgPath '{:?}': contains null byte at pos {}.",
bytes, pos
),
HgPathError::DecodeError(bytes) => {
format!("Invalid HgPath '{:?}': could not be decoded.", bytes)
}
HgPathError::EndsWithSlash(path) => {
format!("Audit failed for '{}': ends with a slash.", path)