...
 
Commits (10)
......@@ -27,6 +27,7 @@ from . import (
policy,
pycompat,
scmutil,
sparse,
txnutil,
util,
)
......@@ -1083,15 +1084,13 @@ class dirstate(object):
results[next(iv)] = st
return results
def _rust_status(self, matcher, list_clean):
def _rust_status(self, matcher, list_clean, list_ignored, list_unknown):
# Force Rayon (Rust parallelism library) to respect the number of
# workers. This is a temporary workaround until Rust code knows
# how to read the config file.
numcpus = self._ui.configint(b"worker", b"numcpus")
if numcpus is not None:
encoding.environ.setdefault(
b'RAYON_NUM_THREADS', b'%d' % numcpus
)
encoding.environ.setdefault(b'RAYON_NUM_THREADS', b'%d' % numcpus)
workers_enabled = self._ui.configbool(b"worker", b"enabled", True)
if not workers_enabled:
......@@ -1103,16 +1102,45 @@ class dirstate(object):
added,
removed,
deleted,
unknown,
clean,
ignored,
unknown,
warnings,
bad,
) = rustmod.status(
self._map._rustmap,
matcher,
self._rootdir,
bool(list_clean),
self._lastnormaltime,
self._ignorefiles(),
self._checkexec,
self._lastnormaltime,
bool(list_clean),
bool(list_ignored),
bool(list_unknown),
)
if self._ui.warn:
for item in warnings:
if isinstance(item, tuple):
file_path, syntax = item
msg = _(b"%s: ignoring invalid syntax '%s'\n") % (
file_path,
syntax,
)
self._ui.warn(msg)
else:
msg = _(b"skipping unreadable pattern file '%s': %s\n")
self._ui.warn(
msg
% (
pathutil.canonpath(
self._rootdir, self._rootdir, item
),
b"No such file or directory",
)
)
for (fn, message) in bad:
matcher.bad(fn, encoding.strtolocal(message))
status = scmutil.status(
modified=modified,
......@@ -1120,9 +1148,10 @@ class dirstate(object):
removed=removed,
deleted=deleted,
unknown=unknown,
ignored=[],
ignored=ignored,
clean=clean,
)
# print(lookup, status)
return (lookup, status)
def status(self, match, subrepos, ignored, clean, unknown):
......@@ -1150,26 +1179,78 @@ class dirstate(object):
use_rust = True
allowed_matchers = (matchmod.alwaysmatcher, matchmod.exactmatcher)
allowed_matchers = (
matchmod.alwaysmatcher,
matchmod.exactmatcher,
matchmod.includematcher,
)
if rustmod is None:
use_rust = False
elif subrepos:
use_rust = False
elif bool(listunknown):
# Pathauditor does not exist yet in Rust, unknown files
# can't be trusted.
elif sparse.enabled:
use_rust = False
elif self._ignorefiles() and listignored:
# Rust has no ignore mechanism yet, so don't use Rust for
# commands that need ignore.
elif match.traversedir is not None:
use_rust = False
elif not isinstance(match, allowed_matchers):
# Matchers have yet to be implemented
use_rust = False
if use_rust:
<<<<<<< dest
return self._rust_status(match, listclean)
||||||| base
# Force Rayon (Rust parallelism library) to respect the number of
# workers. This is a temporary workaround until Rust code knows
# how to read the config file.
numcpus = self._ui.configint(b"worker", b"numcpus")
if numcpus is not None:
encoding.environ.setdefault(
b'RAYON_NUM_THREADS', b'%d' % numcpus
)
workers_enabled = self._ui.configbool(b"worker", b"enabled", True)
if not workers_enabled:
encoding.environ[b"RAYON_NUM_THREADS"] = b"1"
(
lookup,
modified,
added,
removed,
deleted,
unknown,
clean,
) = rustmod.status(
dmap._rustmap,
match,
self._rootdir,
bool(listclean),
self._lastnormaltime,
self._checkexec,
)
status = scmutil.status(
modified=modified,
added=added,
removed=removed,
deleted=deleted,
unknown=unknown,
ignored=ignored,
clean=clean,
)
return (lookup, status)
=======
try:
return self._rust_status(
match, listclean, listignored, listunknown
)
except rustmod.FallbackError:
# FIXME warning?
# print("Fallback: ", e)
pass
>>>>>>> source
def noop(f):
pass
......@@ -1251,13 +1332,11 @@ class dirstate(object):
aadd(fn)
elif state == b'r':
radd(fn)
return (
lookup,
scmutil.status(
modified, added, removed, deleted, unknown, ignored, clean
),
status = scmutil.status(
modified, added, removed, deleted, unknown, ignored, clean
)
# print(lookup, status)
return (lookup, status)
def matches(self, match):
'''
......
......@@ -24,7 +24,7 @@ from . import (
)
from .utils import stringutil
rustmod = policy.importrust('filepatterns')
rustmod = policy.importrust('dirstate')
allpatternkinds = (
b're',
......@@ -45,6 +45,7 @@ cwdrelativepatternkinds = (b'relpath', b'glob')
propertycache = util.propertycache
rustmod = policy.importrust('dirstate')
def _rematcher(regex):
'''compile the regexp with the best available regexp engine and return a
......@@ -666,7 +667,10 @@ class _dirchildren(object):
class includematcher(basematcher):
def __init__(self, root, kindpats, badfn=None):
super(includematcher, self).__init__(badfn)
if rustmod is not None:
# We need to pass the patterns to Rust because they can contain
# patterns from the user interface
self._kindpats = kindpats
self._pats, self.matchfn = _buildmatch(kindpats, b'(?:/|$)', root)
self._prefix = _prefix(kindpats)
roots, dirs, parents = _rootsdirsandparents(kindpats)
......@@ -1273,15 +1277,6 @@ def _regex(kind, pat, globsuffix):
'''Convert a (normalized) pattern of any kind into a
regular expression.
globsuffix is appended to the regexp of globs.'''
if rustmod is not None:
try:
return rustmod.build_single_regex(kind, pat, globsuffix)
except rustmod.PatternError:
raise error.ProgrammingError(
b'not a regex pattern: %s:%s' % (kind, pat)
)
if not pat and kind in (b'glob', b'relpath'):
return b''
if kind == b're':
......@@ -1554,18 +1549,6 @@ def readpatternfile(filepath, warn, sourceinfo=False):
This is useful to debug ignore patterns.
'''
if rustmod is not None:
result, warnings = rustmod.read_pattern_file(
filepath, bool(warn), sourceinfo,
)
for warning_params in warnings:
# Can't be easily emitted from Rust, because it would require
# a mechanism for both gettext and calling the `warn` function.
warn(_(b"%s: ignoring invalid syntax '%s'\n") % warning_params)
return result
syntaxes = {
b're': b'relre:',
b'regexp': b'relre:',
......
......@@ -4,6 +4,7 @@ version = "0.1.0"
authors = ["Georges Racinet <gracinet@anybox.fr>"]
description = "Mercurial pure Rust core library, with no assumption on Python bindings (FFI)"
edition = "2018"
build = "build.rs"
[lib]
name = "hg"
......@@ -11,9 +12,19 @@ name = "hg"
[dependencies]
byteorder = "1.3.1"
lazy_static = "1.3.0"
libc = "0.2.66"
memchr = "2.2.0"
rand = "0.6.5"
rand_pcg = "0.1.1"
rayon = "1.2.0"
regex = "1.1.0"
twox-hash = "1.5.0"
same-file = "1.0.6"
[dev-dependencies]
tempfile = "3.1.0"
maplit = "1.0.2"
pretty_assertions = "0.6.1"
[build-dependencies]
cc = "1.0.48"
use cc;
fn main() {
cc::Build::new()
.cpp(true)
.flag("-std=c++11")
.file("src/rust_re2.cpp")
.compile("librustre.a");
println!("cargo:rustc-link-lib=re2");
}
......@@ -8,12 +8,15 @@
//! A multiset of directory names.
//!
//! Used to counts the references to directories in a manifest or dirstate.
use crate::utils::hg_path::{HgPath, HgPathBuf};
use crate::{
dirstate::EntryState, utils::files, DirstateEntry, DirstateMapError,
FastHashMap,
dirstate::EntryState,
utils::{
files,
hg_path::{HgPath, HgPathBuf},
},
DirstateEntry, DirstateMapError, FastHashMap,
};
use std::collections::hash_map::{self, Entry};
use std::collections::{hash_map, hash_map::Entry, HashMap, HashSet};
// could be encapsulated if we care API stability more seriously
pub type DirsMultisetIter<'a> = hash_map::Keys<'a, HgPathBuf, u32>;
......@@ -129,6 +132,65 @@ impl DirsMultiset {
}
}
/// This is basically a reimplementation of `DirsMultiset` that stores the
/// children instead of just a count of them, plus a small optional
/// optimization to avoid some directories we don't need.
#[derive(PartialEq, Debug)]
pub struct DirsChildrenMultiset<'a> {
inner: FastHashMap<&'a HgPath, HashSet<&'a HgPath>>,
only_include: Option<HashSet<&'a HgPath>>,
}
impl<'a> DirsChildrenMultiset<'a> {
pub fn new(
paths: impl Iterator<Item = &'a HgPathBuf>,
only_include: Option<&'a HashSet<impl AsRef<HgPath> + 'a>>,
) -> Self {
let mut new = Self {
inner: HashMap::default(),
only_include: only_include
.map(|s| s.iter().map(|p| p.as_ref()).collect()),
};
for path in paths {
new.add_path(path)
}
new
}
fn add_path(&mut self, path: &'a (impl AsRef<HgPath> + 'a)) {
if path.as_ref().is_empty() {
return;
}
for (directory, basename) in files::find_dirs_with_base(path.as_ref())
{
if !match &self.only_include {
None => false,
Some(i) => i.contains(&directory),
} {
continue;
}
self.inner
.entry(directory)
.and_modify(|e| {
e.insert(basename);
})
.or_insert_with(|| {
let mut set = HashSet::new();
set.insert(basename);
set
});
}
}
pub fn get(
&self,
path: impl AsRef<HgPath>,
) -> Option<&HashSet<&'a HgPath>> {
self.inner.get(path.as_ref())
}
}
#[cfg(test)]
mod tests {
use super::*;
......
......@@ -13,7 +13,9 @@ pub use dirstate::{
dirs_multiset::{DirsMultiset, DirsMultisetIter},
dirstate_map::DirstateMap,
parsers::{pack_dirstate, parse_dirstate, PARENT_SIZE},
status::{status, StatusResult},
status::{
status, BadMatch, BadType, DirstateStatus, StatusError, StatusOptions,
},
CopyMap, CopyMapIter, DirstateEntry, DirstateParents, EntryState,
StateMap, StateMapIter,
};
......@@ -21,11 +23,13 @@ mod filepatterns;
pub mod matchers;
pub mod revlog;
pub use revlog::*;
pub mod re2;
pub mod utils;
use crate::utils::hg_path::HgPathBuf;
use crate::utils::hg_path::{HgPathBuf, HgPathError};
pub use filepatterns::{
build_single_regex, read_pattern_file, PatternSyntax, PatternTuple,
parse_pattern_syntax, read_pattern_file, IgnorePattern,
PatternFileWarning, PatternSyntax,
};
use std::collections::HashMap;
use twox_hash::RandomXxHashBuilder64;
......@@ -116,18 +120,31 @@ impl From<DirstatePackError> for DirstateError {
#[derive(Debug)]
pub enum PatternError {
Path(HgPathError),
UnsupportedSyntax(String),
}
#[derive(Debug)]
pub enum PatternFileError {
UnsupportedSyntaxInFile(String, String, usize),
TooLong(usize),
IO(std::io::Error),
Pattern(PatternError, LineNumber),
}
impl From<std::io::Error> for PatternFileError {
fn from(e: std::io::Error) -> Self {
PatternFileError::IO(e)
impl ToString for PatternError {
fn to_string(&self) -> String {
match self {
PatternError::UnsupportedSyntax(syntax) => {
format!("Unsupported syntax {}", syntax)
}
PatternError::UnsupportedSyntaxInFile(syntax, file_path, line) => {
format!(
"{}:{}: unsupported syntax {}",
file_path, line, syntax
)
}
PatternError::TooLong(size) => {
format!("matcher pattern is too long ({} bytes)", size)
}
PatternError::IO(e) => e.to_string(),
PatternError::Path(e) => e.to_string(),
}
}
}
......@@ -142,3 +159,15 @@ impl From<std::io::Error> for DirstateError {
DirstateError::IO(e)
}
}
impl From<std::io::Error> for PatternError {
fn from(e: std::io::Error) -> Self {
PatternError::IO(e)
}
}
impl From<HgPathError> for PatternError {
fn from(e: HgPathError) -> Self {
PatternError::Path(e)
}
}
use libc::{c_int, c_void};
type Re2Ptr = *const c_void;
pub struct Re2(Re2Ptr);
unsafe impl Sync for Re2 {
// re2.h says:
// An "RE2" object is safe for
// concurrent use by multiple threads
}
extern "C" {
fn rustre2_create(data: *const u8, len: usize) -> Re2Ptr;
fn rustre2_ok(re2: Re2Ptr) -> bool;
fn rustre2_error(
re2: Re2Ptr,
outdata: *mut *const u8,
outlen: *mut usize,
) -> bool;
fn rustre2_match(
re2: Re2Ptr,
data: *const u8,
len: usize,
anchor: c_int,
) -> bool;
}
impl Re2 {
pub fn new(pattern: &[u8]) -> Result<Re2, String> {
unsafe {
let re2 = rustre2_create(pattern.as_ptr(), pattern.len());
if rustre2_ok(re2) {
Ok(Re2(re2))
} else {
let mut data: *const u8 = std::ptr::null();
let mut len: usize = 0;
rustre2_error(re2, &mut data, &mut len);
Err(String::from_utf8_lossy(std::slice::from_raw_parts(
data, len,
))
.to_string())
}
}
}
pub fn is_match(&self, data: &[u8]) -> bool {
unsafe { rustre2_match(self.0, data.as_ptr(), data.len(), 1) }
}
}
#include <re2/re2.h>
using namespace re2;
extern "C" {
RE2* rustre2_create(const char* data, size_t len) {
RE2::Options o;
o.set_encoding(RE2::Options::Encoding::EncodingLatin1);
o.set_log_errors(false);
o.set_max_mem(50000000);
return new RE2(StringPiece(data, len), o);
}
bool rustre2_ok(RE2* re) {
return re->ok();
}
void rustre2_error(RE2* re, const char** outdata, size_t* outlen) {
const std::string& e = re->error();
*outdata = e.data();
*outlen = e.length();
}
bool rustre2_match(RE2* re, char* data, size_t len, int ianchor) {
const StringPiece sp = StringPiece(data, len);
RE2::Anchor anchor =
ianchor == 0 ? RE2::Anchor::UNANCHORED :
(ianchor == 1 ? RE2::Anchor::ANCHOR_START :
RE2::Anchor::ANCHOR_BOTH);
return re->Match(sp, 0, len, anchor, NULL, 0);
}
}
......@@ -7,8 +7,32 @@
//! Contains useful functions, traits, structs, etc. for use in core.
use crate::utils::hg_path::HgPath;
use std::ops::Deref;
pub mod files;
pub mod hg_path;
pub mod path_auditor;
/// Useful until rust/issues/56345 is stable
///
/// # Examples
///
/// ```
/// use crate::hg::utils::find_slice_in_slice;
///
/// let haystack = b"This is the haystack".to_vec();
/// assert_eq!(find_slice_in_slice(&haystack, b"the"), Some(8));
/// assert_eq!(find_slice_in_slice(&haystack, b"not here"), None);
/// ```
pub fn find_slice_in_slice<T>(slice: &[T], needle: &[T]) -> Option<usize>
where
for<'a> &'a [T]: PartialEq,
{
slice
.windows(needle.len())
.position(|window| window == needle)
}
/// Replaces the `from` slice with the `to` slice inside the `buf` slice.
///
......@@ -91,3 +115,61 @@ impl SliceExt for [u8] {
}
}
}
const HEX_DIGITS: &[u8] = b"0123456789abcdef";
pub trait PrettyPrint {
fn pretty_print(&self) -> Vec<u8>;
}
impl PrettyPrint for u8 {
fn pretty_print(&self) -> Vec<u8> {
let mut acc = vec![];
match self {
c @ b'\'' | c @ b'\\' => {
acc.push(b'\\');
acc.push(*c);
}
b'\t' => {
acc.extend(br"\\t");
}
b'\n' => {
acc.extend(br"\\n");
}
b'\r' => {
acc.extend(br"\\r");
}
c if (*c < b' ' || *c >= 127) => {
acc.push(b'\\');
acc.push(b'x');
acc.push(HEX_DIGITS[((*c & 0xf0) >> 4) as usize]);
acc.push(HEX_DIGITS[(*c & 0xf) as usize]);
}
c => {
acc.push(*c);
}
}
acc
}
}
impl<'a, T: PrettyPrint> PrettyPrint for &'a [T] {
fn pretty_print(&self) -> Vec<u8> {
self.iter().fold(vec![], |mut acc, item| {
acc.extend(item.pretty_print());
acc
})
}
}
impl<T: PrettyPrint> PrettyPrint for Vec<T> {
fn pretty_print(&self) -> Vec<u8> {
self.deref().pretty_print()
}
}
impl<'a> PrettyPrint for &'a HgPath {
fn pretty_print(&self) -> Vec<u8> {
self.as_bytes().pretty_print()
}
}
......@@ -15,12 +15,33 @@ use std::path::{Path, PathBuf};
pub enum HgPathError {
/// Bytes from the invalid `HgPath`
LeadingSlash(Vec<u8>),
/// Bytes and index of the second slash
ConsecutiveSlashes(Vec<u8>, usize),
/// Bytes and index of the null byte
ContainsNullByte(Vec<u8>, usize),
ConsecutiveSlashes {
bytes: Vec<u8>,
second_slash_index: usize,
},
ContainsNullByte {
bytes: Vec<u8>,
null_byte_index: usize,
},
/// Bytes
DecodeError(Vec<u8>),
/// The rest come from audit errors
EndsWithSlash(HgPathBuf),
ContainsIllegalComponent(HgPathBuf),
IsInsideNestedRepo {
path: HgPathBuf,
nested_repo: HgPathBuf,
},
TraversesSymbolicLink {
path: HgPathBuf,
symlink: HgPathBuf,
},
NotFsCompliant(HgPathBuf),
/// `path` is the smallest invalid path
NotUnderRoot {
path: PathBuf,
root: PathBuf,
},
}
impl ToString for HgPathError {
......@@ -29,17 +50,51 @@ impl ToString for HgPathError {
HgPathError::LeadingSlash(bytes) => {
format!("Invalid HgPath '{:?}': has a leading slash.", bytes)
}
HgPathError::ConsecutiveSlashes(bytes, pos) => format!(
"Invalid HgPath '{:?}': consecutive slahes at pos {}.",
HgPathError::ConsecutiveSlashes {
bytes,
second_slash_index: pos,
} => format!(
"Invalid HgPath '{:?}': consecutive slashes at pos {}.",
bytes, pos
),
HgPathError::ContainsNullByte(bytes, pos) => format!(
HgPathError::ContainsNullByte {
bytes,
null_byte_index: pos,
} => format!(
"Invalid HgPath '{:?}': contains null byte at pos {}.",
bytes, pos
),
HgPathError::DecodeError(bytes) => {
format!("Invalid HgPath '{:?}': could not be decoded.", bytes)
}
HgPathError::EndsWithSlash(path) => {
format!("Audit failed for '{}': ends with a slash.", path)
}
HgPathError::ContainsIllegalComponent(path) => format!(
"Audit failed for '{}': contains an illegal component.",
path
),
HgPathError::IsInsideNestedRepo {
path,
nested_repo: nested,
} => format!(
"Audit failed for '{}': is inside a nested repository '{}'.",
path, nested
),
HgPathError::TraversesSymbolicLink { path, symlink } => format!(
"Audit failed for '{}': traverses symbolic link '{}'.",
path, symlink
),
HgPathError::NotFsCompliant(path) => format!(
"Audit failed for '{}': cannot be turned into a \
filesystem path.",
path
),
HgPathError::NotUnderRoot { path, root } => format!(
"Audit failed for '{}': not under root {}.",
path.display(),
root.display()
),
}
}
}
......@@ -109,13 +164,16 @@ impl HgPath {
pub fn as_bytes(&self) -> &[u8] {
&self.inner
}
pub fn as_bytes_mut(&mut self) -> &mut [u8] {
&mut self.inner
}
pub fn contains(&self, other: u8) -> bool {
self.inner.contains(&other)
}
pub fn starts_with(&self, needle: impl AsRef<HgPath>) -> bool {
pub fn starts_with(&self, needle: impl AsRef<Self>) -> bool {
self.inner.starts_with(needle.as_ref().as_bytes())
}
pub fn join<T: ?Sized + AsRef<HgPath>>(&self, other: &T) -> HgPathBuf {
pub fn join<T: ?Sized + AsRef<Self>>(&self, other: &T) -> HgPathBuf {
let mut inner = self.inner.to_owned();
if inner.len() != 0 && inner.last() != Some(&b'/') {
inner.push(b'/');
......@@ -123,17 +181,28 @@ impl HgPath {
inner.extend(other.as_ref().bytes());
HgPathBuf::from_bytes(&inner)
}
pub fn parent(&self) -> &Self {
let inner = if self.inner.last() == Some(&b'/') {
&self.inner[..self.inner.len() - 1]
} else {
&self.inner[..]
};
HgPath::new(match inner.iter().rposition(|b| *b == b'/') {
Some(pos) => &inner[..pos],
None => &[],
})
}
/// Given a base directory, returns the slice of `self` relative to the
/// base directory. If `base` is not a directory (does not end with a
/// `b'/'`), returns `None`.
pub fn relative_to(&self, base: impl AsRef<HgPath>) -> Option<&HgPath> {
pub fn relative_to(&self, base: impl AsRef<Self>) -> Option<&Self> {
let base = base.as_ref();
if base.is_empty() {
return Some(self);
}
let is_dir = base.as_bytes().ends_with(b"/");
if is_dir && self.starts_with(base) {
Some(HgPath::new(&self.inner[base.len()..]))
Some(Self::new(&self.inner[base.len()..]))
} else {
None
}
......@@ -154,17 +223,17 @@ impl HgPath {
for (index, byte) in bytes.iter().enumerate() {
match byte {
0 => {
return Err(HgPathError::ContainsNullByte(
bytes.to_vec(),
index,
))
return Err(HgPathError::ContainsNullByte {
bytes: bytes.to_vec(),
null_byte_index: index,
})
}
b'/' => {
if previous_byte.is_some() && previous_byte == Some(b'/') {
return Err(HgPathError::ConsecutiveSlashes(
bytes.to_vec(),
index,
));
return Err(HgPathError::ConsecutiveSlashes {
bytes: bytes.to_vec(),
second_slash_index: index,
});
}
}
_ => (),
......@@ -348,6 +417,7 @@ pub fn path_to_hg_path_buf<P: AsRef<Path>>(
#[cfg(test)]
mod tests {
use super::*;
use pretty_assertions::assert_eq;
#[test]
fn test_path_states() {
......@@ -356,11 +426,17 @@ mod tests {
HgPath::new(b"/").check_state()
);
assert_eq!(
Err(HgPathError::ConsecutiveSlashes(b"a/b//c".to_vec(), 4)),
Err(HgPathError::ConsecutiveSlashes {
bytes: b"a/b//c".to_vec(),
second_slash_index: 4
}),
HgPath::new(b"a/b//c").check_state()
);
assert_eq!(
Err(HgPathError::ContainsNullByte(b"a/b/\0c".to_vec(), 4)),
Err(HgPathError::ContainsNullByte {
bytes: b"a/b/\0c".to_vec(),
null_byte_index: 4
}),
HgPath::new(b"a/b/\0c").check_state()
);
// TODO test HgPathError::DecodeError for the Windows implementation.
......@@ -473,4 +549,19 @@ mod tests {
let base = HgPath::new(b"ends/");
assert_eq!(Some(HgPath::new(b"with/dir/")), path.relative_to(base));
}
#[test]
fn test_parent() {
let path = HgPath::new(b"");
assert_eq!(path.parent(), path);
let path = HgPath::new(b"a/");
assert_eq!(path.parent(), HgPath::new(b""));
let path = HgPath::new(b"a/b");
assert_eq!(path.parent(), HgPath::new(b"a"));
let path = HgPath::new(b"a/other/b");
assert_eq!(path.parent(), HgPath::new(b"a/other"));
}
}
// path_auditor.rs
//
// Copyright 2020
// Raphaël Gomès <rgomes@octobus.net>,
//
// This software may be used and distributed according to the terms of the
// GNU General Public License version 2 or any later version.
use crate::utils::{
files::{lower_clean, split_drive},
find_slice_in_slice,
hg_path::{hg_path_to_path_buf, HgPath, HgPathBuf, HgPathError},
};
use std::collections::HashSet;
use std::path::{Path, PathBuf};
/// Ensures that a path is valid for use in the repository i.e. does not use
/// any banned components, does not traverse a symlink, etc.
#[derive(Debug, Default)]
pub struct PathAuditor {
audited: HashSet<HgPathBuf>,
audited_dirs: HashSet<HgPathBuf>,
root: PathBuf,
}
impl PathAuditor {
pub fn new(root: impl AsRef<Path>) -> Self {
Self {
root: root.as_ref().to_owned(),
..Default::default()
}
}
pub fn audit_path(
&mut self,
path: impl AsRef<HgPath>,
) -> Result<(), HgPathError> {
// TODO windows "localpath" normalization
let path = path.as_ref();
if path.is_empty() {
return Ok(());
}
// TODO case normalization
if self.audited.contains(path) {
return Ok(());
}
// AIX ignores "/" at end of path, others raise EISDIR.
let last_byte = path.as_bytes()[path.len() - 1];
if last_byte == b'/' || last_byte == b'\\' {
return Err(HgPathError::EndsWithSlash(path.to_owned()));
}
let parts: Vec<_> = path
.as_bytes()
.split(|c| *c as char == std::path::MAIN_SEPARATOR)
.collect();
if !split_drive(path).0.is_empty()
|| [&b".hg"[..], &b".hg."[..], &b""[..]]
.contains(&lower_clean(parts[0]).as_ref())
|| parts.iter().any(|c| c == b"..")
{
return Err(HgPathError::ContainsIllegalComponent(
path.to_owned(),
));
}
// Windows shortname aliases
for part in parts.iter() {
if part.contains(&b'~') {
let mut split = part.splitn(1, |b| *b == b'~');
let mut first = split.next().unwrap().to_owned();
first.make_ascii_uppercase();
let last = split.next().unwrap();
if last.iter().all(|b| (*b as char).is_digit(10))
&& [&b"HG"[..], &b"HG8B6C"[..]].contains(&first.as_ref())
{
return Err(HgPathError::ContainsIllegalComponent(
path.to_owned(),
));
}
}
}
if find_slice_in_slice(&lower_clean(path.as_bytes()), b".hg").is_some()
{
let lower_parts: Vec<_> =
parts.iter().map(|p| lower_clean(p)).collect();
for pattern in [b".hg".to_vec(), b".hg.".to_vec()].iter() {
if lower_parts[1..].contains(pattern) {
let pos = lower_parts
.iter()
.position(|part| part == pattern)
.unwrap();
let base = lower_parts[..pos]
.iter()
.fold(HgPathBuf::new(), |acc, p| {
acc.join(HgPath::new(p))
});
return Err(HgPathError::IsInsideNestedRepo {
path: path.to_owned(),
nested_repo: base,
});
}
}
}
let parts = &parts[..parts.len().saturating_sub(1)];
let mut prefixes = vec![];
// It's important that we check the path parts starting from the root.
// This means we won't accidentally traverse a symlink into some other
// filesystem (which is potentially expensive to access).
for index in 0..parts.len() {
let prefix =
&parts[..index + 1].join(&(std::path::MAIN_SEPARATOR as u8));
let prefix = HgPath::new(prefix);
if self.audited_dirs.contains(prefix) {
continue;
}
self.check_filesystem(&prefix, &path)?;
prefixes.push(prefix.to_owned());
}
self.audited.insert(path.to_owned());
// Only add prefixes to the cache after checking everything: we don't
// want to add "foo/bar/baz" before checking if there's a "foo/.hg"
self.audited_dirs.extend(prefixes);
Ok(())
}
pub fn check_filesystem(
&self,
prefix: impl AsRef<HgPath>,
path: impl AsRef<HgPath>,
) -> Result<(), HgPathError> {
let prefix = prefix.as_ref();
let path = path.as_ref();
let current_path = self.root.join(
hg_path_to_path_buf(prefix)
.map_err(|_| HgPathError::NotFsCompliant(path.to_owned()))?,
);
match std::fs::symlink_metadata(&current_path) {
Err(e) => {
// EINVAL can be raised as invalid path syntax under win32.
// They must be ignored for patterns can be checked too.
if e.kind() != std::io::ErrorKind::NotFound
&& e.kind() != std::io::ErrorKind::InvalidInput
&& e.raw_os_error() != Some(20)
{
eprintln!("{:?}", e.kind());
// Rust does not yet have an `ErrorKind` for
// `NotADirectory` (errno 20)
// It happens if the dirstate contains `foo/bar` and
// foo is not a directory
return Err(HgPathError::NotFsCompliant(path.to_owned()));
}
}
Ok(meta) => {
if meta.file_type().is_symlink() {
return Err(HgPathError::TraversesSymbolicLink {
path: path.to_owned(),
symlink: prefix.to_owned(),
});
}
if meta.file_type().is_dir()
&& current_path.join(".hg").is_dir()
{
return Err(HgPathError::IsInsideNestedRepo {
path: path.to_owned(),
nested_repo: prefix.to_owned(),
});
}
}
};
Ok(())
}
pub fn check(&mut self, path: impl AsRef<HgPath>) -> bool {
self.audit_path(path).is_ok()
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::utils::files::get_path_from_bytes;
use crate::utils::hg_path::path_to_hg_path_buf;
#[test]
fn test_path_auditor() {
let mut auditor = PathAuditor::new(get_path_from_bytes(b"/tmp"));
let path = HgPath::new(b".hg/00changelog.i");
assert_eq!(
auditor.audit_path(path),
Err(HgPathError::ContainsIllegalComponent(path.to_owned()))
);
let path = HgPath::new(b"this/is/nested/.hg/thing.txt");
assert_eq!(
auditor.audit_path(path),
Err(HgPathError::IsInsideNestedRepo {
path: path.to_owned(),
nested_repo: HgPathBuf::from_bytes(b"this/is/nested")
})
);
use std::fs::{create_dir, File};
use tempfile::tempdir;
let base_dir = tempdir().unwrap();
let base_dir_path = base_dir.path();
let a = base_dir_path.join("a");
let b = base_dir_path.join("b");
create_dir(&a).unwrap();
let in_a_path = a.join("in_a");
File::create(in_a_path).unwrap();
// TODO make portable
std::os::unix::fs::symlink(&a, &b).unwrap();
let buf = b.join("in_a").components().skip(2).collect::<PathBuf>();
eprintln!("buf: {}", buf.display());
let path = path_to_hg_path_buf(buf).unwrap();
assert_eq!(
auditor.audit_path(&path),
Err(HgPathError::TraversesSymbolicLink {
path: path,
symlink: path_to_hg_path_buf(
b.components().skip(2).collect::<PathBuf>()
)
.unwrap()
})
);
}
}
......@@ -13,12 +13,15 @@ mod copymap;
mod dirs_multiset;
mod dirstate_map;
mod status;
use crate::dirstate::{
dirs_multiset::Dirs, dirstate_map::DirstateMap, status::status_wrapper,
use crate::{
dirstate::{
dirs_multiset::Dirs, dirstate_map::DirstateMap, status::status_wrapper,
},
exceptions,
};
use cpython::{
exc, PyBytes, PyDict, PyErr, PyModule, PyObject, PyResult, PySequence,
Python,
exc, PyBytes, PyDict, PyErr, PyList, PyModule, PyObject, PyResult,
PySequence, Python,
};
use hg::{
utils::hg_path::HgPathBuf, DirstateEntry, DirstateParseError, EntryState,
......@@ -106,6 +109,11 @@ pub fn init_module(py: Python, package: &str) -> PyResult<PyModule> {
m.add(py, "__package__", package)?;
m.add(py, "__doc__", "Dirstate - Rust implementation")?;
m.add(
py,
"FallbackError",
py.get_type::<exceptions::FallbackError>(),
)?;
m.add_class::<Dirs>(py)?;
m.add_class::<DirstateMap>(py)?;
m.add(
......@@ -117,9 +125,12 @@ pub fn init_module(py: Python, package: &str) -> PyResult<PyModule> {
dmap: DirstateMap,
root_dir: PyObject,
matcher: PyObject,
list_clean: bool,
ignorefiles: PyList,
check_exec: bool,
last_normal_time: i64,
check_exec: bool
list_clean: bool,
list_ignored: bool,
list_unknown: bool
)
),
)?;
......
......@@ -13,7 +13,7 @@
//!
//! [`GraphError`]: struct.GraphError.html
use cpython::{
exc::{IOError, RuntimeError, ValueError},
exc::{RuntimeError, ValueError},
py_exception, PyErr, Python,
};
use hg;
......@@ -39,34 +39,7 @@ impl GraphError {
}
}
py_exception!(rustext, PatternError, RuntimeError);
py_exception!(rustext, PatternFileError, RuntimeError);
py_exception!(rustext, HgPathPyError, RuntimeError);
impl PatternError {
pub fn pynew(py: Python, inner: hg::PatternError) -> PyErr {
match inner {
hg::PatternError::UnsupportedSyntax(m) => {
PatternError::new(py, ("PatternError", m))
}
}
}
}
impl PatternFileError {
pub fn pynew(py: Python, inner: hg::PatternFileError) -> PyErr {
match inner {
hg::PatternFileError::IO(e) => {
let value = (e.raw_os_error().unwrap_or(2), e.to_string());
PyErr::new::<IOError, _>(py, value)
}
hg::PatternFileError::Pattern(e, l) => match e {
hg::PatternError::UnsupportedSyntax(m) => {
PatternFileError::new(py, ("PatternFileError", m, l))
}
},
}
}
}
py_exception!(rustext, FallbackError, RuntimeError);
py_exception!(shared_ref, AlreadyBorrowed, RuntimeError);
// filepatterns.rs
//
// Copyright 2019, Georges Racinet <gracinet@anybox.fr>,
// Raphaël Gomès <rgomes@octobus.net>
//
// This software may be used and distributed according to the terms of the
// GNU General Public License version 2 or any later version.
//! Bindings for the `hg::filepatterns` module provided by the
//! `hg-core` crate. From Python, this will be seen as `rustext.filepatterns`
//! and can be used as replacement for the the pure `filepatterns` Python
//! module.
use crate::exceptions::{PatternError, PatternFileError};
use cpython::{
PyBytes, PyDict, PyModule, PyObject, PyResult, PyTuple, Python, ToPyObject,
};
use hg::utils::files;
use hg::{build_single_regex, read_pattern_file, LineNumber, PatternTuple};
use std::path::PathBuf;
/// Rust does not like functions with different return signatures.
/// The 3-tuple version is always returned by the hg-core function,
/// the (potential) conversion is handled at this level since it is not likely
/// to have any measurable impact on performance.
///
/// The Python implementation passes a function reference for `warn` instead
/// of a boolean that is used to emit warnings while parsing. The Rust
/// implementation chooses to accumulate the warnings and propagate them to
/// Python upon completion. See the `readpatternfile` function in `match.py`
/// for more details.
fn read_pattern_file_wrapper(
py: Python,
file_path: PyObject,
warn: bool,
source_info: bool,
) -> PyResult<PyTuple> {
let bytes = file_path.extract::<PyBytes>(py)?;
let path = files::get_path_from_bytes(bytes.data(py));
match read_pattern_file(path, warn) {
Ok((patterns, warnings)) => {
if source_info {
let itemgetter = |x: &PatternTuple| {
(PyBytes::new(py, &x.0), x.1, PyBytes::new(py, &x.2))
};
let results: Vec<(PyBytes, LineNumber, PyBytes)> =
patterns.iter().map(itemgetter).collect();
return Ok((results, warnings_to_py_bytes(py, &warnings))
.to_py_object(py));
}
let itemgetter = |x: &PatternTuple| PyBytes::new(py, &x.0);
let results: Vec<PyBytes> =
patterns.iter().map(itemgetter).collect();
Ok(
(results, warnings_to_py_bytes(py, &warnings))
.to_py_object(py),
)
}
Err(e) => Err(PatternFileError::pynew(py, e)),
}
}
fn warnings_to_py_bytes(
py: Python,
warnings: &[(PathBuf, Vec<u8>)],
) -> Vec<(PyBytes, PyBytes)> {
warnings
.iter()
.map(|(path, syn)| {
(
PyBytes::new(py, &files::get_bytes_from_path(path)),
PyBytes::new(py, syn),
)
})
.collect()
}
fn build_single_regex_wrapper(
py: Python,
kind: PyObject,
pat: PyObject,
globsuffix: PyObject,
) -> PyResult<PyBytes> {
match build_single_regex(
kind.extract::<PyBytes>(py)?.data(py),
pat.extract::<PyBytes>(py)?.data(py),
globsuffix.extract::<PyBytes>(py)?.data(py),
) {
Ok(regex) => Ok(PyBytes::new(py, &regex)),
Err(e) => Err(PatternError::pynew(py, e)),
}
}
pub fn init_module(py: Python, package: &str) -> PyResult<PyModule> {
let dotted_name = &format!("{}.filepatterns", package);
let m = PyModule::new(py, dotted_name)?;
m.add(py, "__package__", package)?;
m.add(
py,
"__doc__",
"Patterns files parsing - Rust implementation",
)?;
m.add(
py,
"build_single_regex",
py_fn!(
py,
build_single_regex_wrapper(
kind: PyObject,
pat: PyObject,
globsuffix: PyObject
)
),
)?;
m.add(
py,
"read_pattern_file",
py_fn!(
py,
read_pattern_file_wrapper(
file_path: PyObject,
warn: bool,
source_info: bool
)
),
)?;
m.add(py, "PatternError", py.get_type::<PatternError>())?;
let sys = PyModule::import(py, "sys")?;
let sys_modules: PyDict = sys.get(py, "modules")?.extract(py)?;
sys_modules.set_item(py, dotted_name, &m)?;
Ok(m)
}
......@@ -33,7 +33,6 @@ pub mod dagops;
pub mod dirstate;
pub mod discovery;
pub mod exceptions;
pub mod filepatterns;
pub mod parsers;
pub mod revlog;
pub mod utils;
......@@ -53,25 +52,10 @@ py_module_initializer!(rustext, initrustext, PyInit_rustext, |py, m| {
m.add(py, "revlog", revlog::init_module(py, &dotted_name)?)?;
m.add(
py,
"filepatterns",
filepatterns::init_module(py, &dotted_name)?,
)?;
m.add(
py,
"parsers",
parsers::init_parsers_module(py, &dotted_name)?,
)?;
m.add(py, "GraphError", py.get_type::<exceptions::GraphError>())?;
m.add(
py,
"PatternFileError",
py.get_type::<exceptions::PatternFileError>(),
)?;
m.add(
py,
"PatternError",
py.get_type::<exceptions::PatternError>(),
)?;
Ok(())
});
......
......@@ -7,14 +7,17 @@
use crate::cindex;
use cpython::{
ObjectProtocol, PyClone, PyDict, PyModule, PyObject, PyResult, PyTuple, Python, PythonObject,
ToPyObject,
ObjectProtocol, PyClone, PyDict, PyModule, PyObject, PyResult, PyTuple,
Python, PythonObject, ToPyObject,
};
use hg::Revision;
use std::cell::RefCell;
/// Return a Struct implementing the Graph trait
pub(crate) fn pyindex_to_graph(py: Python, index: PyObject) -> PyResult<cindex::Index> {
pub(crate) fn pyindex_to_graph(
py: Python,
index: PyObject,
) -> PyResult<cindex::Index> {
match index.extract::<MixedIndex>(py) {
Ok(midx) => Ok(midx.clone_cindex(py)),
Err(_) => cindex::Index::new(py, index),
......
......@@ -355,6 +355,11 @@ A deleted subrepo file is flagged as dirty, like the top level repo
R sub1/sub2/folder/test.txt
! sub1/.hgsub
? sub1/x.hgsub
$ hg status -R sub1
warning: subrepo spec file 'sub1/.hgsub' not found
R .hgsubstate
! .hgsub
? x.hgsub
$ mv sub1/x.hgsub sub1/.hgsub
$ hg update -Cq
$ touch sub1/foo
......