Merge pull request #245623 from tweag/fileset.toSource

File set combinators base: `lib.fileset.toSource`
This commit is contained in:
Silvan Mosberger 2023-09-02 04:07:02 +02:00 committed by GitHub
commit d66929b7b1
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
13 changed files with 1122 additions and 1 deletions

2
.github/CODEOWNERS vendored
View file

@ -29,6 +29,8 @@
/lib/debug.nix @edolstra @Profpatsch
/lib/asserts.nix @edolstra @Profpatsch
/lib/path.* @infinisil @fricklerhandwerk
/lib/fileset @infinisil
/doc/functions/fileset.section.md @infinisil
# Nixpkgs Internals
/default.nix @Ericson2314

View file

@ -19,6 +19,7 @@ let
{ name = "options"; description = "NixOS / nixpkgs option handling"; }
{ name = "path"; description = "path functions"; }
{ name = "filesystem"; description = "filesystem functions"; }
{ name = "fileset"; description = "file set functions"; }
{ name = "sources"; description = "source filtering functions"; }
{ name = "cli"; description = "command-line serialization functions"; }
{ name = "gvariant"; description = "GVariant formatted string serialization functions"; }

View file

@ -8,4 +8,5 @@ functions/generators.section.md
functions/debug.section.md
functions/prefer-remote-fetch.section.md
functions/nix-gitignore.section.md
functions/fileset.section.md
```

View file

@ -0,0 +1,51 @@
<!-- TODO: Render this document in front of function documentation in case https://github.com/nix-community/nixdoc/issues/19 is ever supported -->
# File sets {#sec-fileset}
The [`lib.fileset`](#sec-functions-library-fileset) library allows you to work with _file sets_.
A file set is a mathematical set of local files that can be added to the Nix store for use in Nix derivations.
File sets are easy and safe to use, providing obvious and composable semantics with good error messages to prevent mistakes.
These sections apply to the entire library.
See the [function reference](#sec-functions-library-fileset) for function-specific documentation.
The file set library is currently very limited but is being expanded to include more functions over time.
## Implicit coercion from paths to file sets {#sec-fileset-path-coercion}
All functions accepting file sets as arguments can also accept [paths](https://nixos.org/manual/nix/stable/language/values.html#type-path) as arguments.
Such path arguments are implicitly coerced to file sets containing all files under that path:
- A path to a file turns into a file set containing that single file.
- A path to a directory turns into a file set containing all files _recursively_ in that directory.
If the path points to a non-existent location, an error is thrown.
::: {.note}
Just like in Git, file sets cannot represent empty directories.
Because of this, a path to a directory that contains no files (recursively) will turn into a file set containing no files.
:::
:::{.note}
File set coercion does _not_ add any of the files under the coerced paths to the store.
Only the [`toSource`](#function-library-lib.fileset.toSource) function adds files to the Nix store, and only those files contained in the `fileset` argument.
This is in contrast to using [paths in string interpolation](https://nixos.org/manual/nix/stable/language/values.html#type-path), which does add the entire referenced path to the store.
:::
### Example {#sec-fileset-path-coercion-example}
Assume we are in a local directory with a file hierarchy like this:
```
├─ a/
│ ├─ x (file)
│ └─ b/
  └─ y (file)
└─ c/
  └─ d/
```
Here's a listing of which files get included when different path expressions get coerced to file sets:
- `./.` as a file set contains both `a/x` and `a/b/y` (`c/` does not contain any files and is therefore omitted).
- `./a` as a file set contains both `a/x` and `a/b/y`.
- `./a/x` as a file set contains only `a/x`.
- `./a/b` as a file set contains only `a/b/y`.
- `./c` as a file set is empty, since neither `c` nor `c/d` contain any files.

View file

@ -70,4 +70,7 @@ tests/filesystem.sh
# Run the lib.path property tests
path/tests/prop.sh
# Run the lib.fileset tests
fileset/tests.sh
```

View file

@ -55,6 +55,7 @@ let
# Eval-time filesystem handling
path = callLibs ./path;
filesystem = callLibs ./filesystem.nix;
fileset = callLibs ./fileset;
sources = callLibs ./sources.nix;
# back-compat aliases

183
lib/fileset/README.md Normal file
View file

@ -0,0 +1,183 @@
# File set library
The main goal of the file set library is to be able to select local files that should be added to the Nix store.
It should have the following properties:
- Easy:
The functions should have obvious semantics, be low in number and be composable.
- Safe:
Throw early and helpful errors when mistakes are detected.
- Lazy:
Only compute values when necessary.
Non-goals are:
- Efficient:
If the abstraction proves itself worthwhile but too slow, it can be still be optimized further.
## Tests
Tests are declared in [`tests.sh`](./tests.sh) and can be run using
```
./tests.sh
```
## Benchmark
A simple benchmark against the HEAD commit can be run using
```
./benchmark.sh HEAD
```
This is intended to be run manually and is not checked by CI.
## Internal representation
The internal representation is versioned in order to allow file sets from different Nixpkgs versions to be composed with each other, see [`internal.nix`](./internal.nix) for the versions and conversions between them.
This section describes only the current representation, but past versions will have to be supported by the code.
### `fileset`
An attribute set with these values:
- `_type` (constant string `"fileset"`):
Tag to indicate this value is a file set.
- `_internalVersion` (constant string equal to the current version):
Version of the representation
- `_internalBase` (path):
Any files outside of this path cannot influence the set of files.
This is always a directory.
- `_internalTree` ([filesetTree](#filesettree)):
A tree representation of all included files under `_internalBase`.
- `__noEval` (error):
An error indicating that directly evaluating file sets is not supported.
## `filesetTree`
One of the following:
- `{ <name> = filesetTree; }`:
A directory with a nested `filesetTree` value for every directory entry.
Even entries that aren't included are present as `null` because it improves laziness and allows using this as a sort of `builtins.readDir` cache.
- `"directory"`:
A directory with all its files included recursively, allowing early cutoff for some operations.
This specific string is chosen to be compatible with `builtins.readDir` for a simpler implementation.
- `"regular"`, `"symlink"`, `"unknown"` or any other non-`"directory"` string:
A nested file with its file type.
These specific strings are chosen to be compatible with `builtins.readDir` for a simpler implementation.
Distinguishing between different file types is not strictly necessary for the functionality this library,
but it does allow nicer printing of file sets.
- `null`:
A file or directory that is excluded from the tree.
It may still exist on the file system.
## API design decisions
This section justifies API design decisions.
### Internal structure
The representation of the file set data type is internal and can be changed over time.
Arguments:
- (+) The point of this library is to provide high-level functions, users don't need to be concerned with how it's implemented
- (+) It allows adjustments to the representation, which is especially useful in the early days of the library.
- (+) It still allows the representation to be stabilized later if necessary and if it has proven itself
### Influence tracking
File set operations internally track the top-most directory that could influence the exact contents of a file set.
Specifically, `toSource` requires that the given `fileset` is completely determined by files within the directory specified by the `root` argument.
For example, even with `dir/file.txt` being the only file in `./.`, `toSource { root = ./dir; fileset = ./.; }` gives an error.
This is because `fileset` may as well be the result of filtering `./.` in a way that excludes `dir`.
Arguments:
- (+) This gives us the guarantee that adding new files to a project never breaks a file set expression.
This is also true in a lesser form for removed files:
only removing files explicitly referenced by paths can break a file set expression.
- (+) This can be removed later, if we discover it's too restrictive
- (-) It leads to errors when a sensible result could sometimes be returned, such as in the above example.
### Empty directories
File sets can only represent a _set_ of local files, directories on their own are not representable.
Arguments:
- (+) There does not seem to be a sensible set of combinators when directories can be represented on their own.
Here's some possibilities:
- `./.` represents the files in `./.` _and_ the directory itself including its subdirectories, meaning that even if there's no files, the entire structure of `./.` is preserved
In that case, what should `fileFilter (file: false) ./.` return?
It could return the entire directory structure unchanged, but with all files removed, which would not be what one would expect.
Trying to have a filter function that also supports directories will lead to the question of:
What should the behavior be if `./foo` itself is excluded but all of its contents are included?
It leads to having to define when directories are recursed into, but then we're effectively back at how the `builtins.path`-based filters work.
- `./.` represents all files in `./.` _and_ the directory itself, but not its subdirectories, meaning that at least `./.` will be preserved even if it's empty.
In that case, `intersect ./. ./foo` should only include files and no directories themselves, since `./.` includes only `./.` as a directory, and same for `./foo`, so there's no overlap in directories.
But intuitively this operation should result in the same as `./foo` everything else is just confusing.
- (+) This matches how Git only supports files, so developers should already be used to it.
- (-) Empty directories (even if they contain nested directories) are neither representable nor preserved when coercing from paths.
- (+) It is very rare that empty directories are necessary.
- (+) We can implement a workaround, allowing `toSource` to take an extra argument for ensuring certain extra directories exist in the result.
- (-) It slows down store imports, since the evaluator needs to traverse the entire tree to remove any empty directories
- (+) This can still be optimized by introducing more Nix builtins if necessary
### String paths
File sets do not support Nix store paths in strings such as `"/nix/store/...-source"`.
Arguments:
- (+) Such paths are usually produced by derivations, which means `toSource` would either:
- Require IFD if `builtins.path` is used as the underlying primitive
- Require importing the entire `root` into the store such that derivations can be used to do the filtering
- (+) The convenient path coercion like `union ./foo ./bar` wouldn't work for absolute paths, requiring more verbose alternate interfaces:
- `let root = "/nix/store/...-source"; in union "${root}/foo" "${root}/bar"`
Verbose and dangerous because if `root` was a path, the entire path would get imported into the store.
- `toSource { root = "/nix/store/...-source"; fileset = union "./foo" "./bar"; }`
Does not allow debug printing intermediate file set contents, since we don't know the paths contents before having a `root`.
- `let fs = lib.fileset.withRoot "/nix/store/...-source"; in fs.union "./foo" "./bar"`
Makes library functions impure since they depend on the contextual root path, questionable composability.
- (+) The point of the file set abstraction is to specify which files should get imported into the store.
This use case makes little sense for files that are already in the store.
This should be a separate abstraction as e.g. `pkgs.drvLayout` instead, which could have a similar interface but be specific to derivations.
Additional capabilities could be supported that can't be done at evaluation time, such as renaming files, creating new directories, setting executable bits, etc.
### Single files
File sets cannot add single files to the store, they can only import files under directories.
Arguments:
- (+) There's no point in using this library for a single file, since you can't do anything other than add it to the store or not.
And it would be unclear how the library should behave if the one file wouldn't be added to the store:
`toSource { root = ./file.nix; fileset = <empty>; }` has no reasonable result because returing an empty store path wouldn't match the file type, and there's no way to have an empty file store path, whatever that would mean.
## To update in the future
Here's a list of places in the library that need to be updated in the future:
- > The file set library is currently very limited but is being expanded to include more functions over time.
in [the manual](../../doc/functions/fileset.section.md)
- > Currently the only way to construct file sets is using implicit coercion from paths.
in [the `toSource` reference](./default.nix)
- > For now filesets are always paths
in [the `toSource` implementation](./default.nix), also update the variable name there
- Once a tracing function exists, `__noEval` in [internal.nix](./internal.nix) should mention it
- If/Once a function to convert `lib.sources` values into file sets exists, the `_coerce` and `toSource` functions should be updated to mention that function in the error when such a value is passed
- If/Once a function exists that can optionally include a path depending on whether it exists, the error message for the path not existing in `_coerce` should mention the new function

94
lib/fileset/benchmark.sh Executable file
View file

@ -0,0 +1,94 @@
#!/usr/bin/env bash
# Benchmarks lib.fileset
# Run:
# [nixpkgs]$ lib/fileset/benchmark.sh HEAD
set -euo pipefail
shopt -s inherit_errexit dotglob
if (( $# == 0 )); then
echo "Usage: $0 HEAD"
echo "Benchmarks the current tree against the HEAD commit. Any git ref will work."
exit 1
fi
compareTo=$1
SCRIPT_FILE=$(readlink -f "${BASH_SOURCE[0]}")
SCRIPT_DIR=$(dirname "$SCRIPT_FILE")
nixpkgs=$(cd "$SCRIPT_DIR/../.."; pwd)
tmp="$(mktemp -d)"
clean_up() {
rm -rf "$tmp"
}
trap clean_up EXIT SIGINT SIGTERM
work="$tmp/work"
mkdir "$work"
cd "$work"
# Create a fairly populated tree
touch f{0..5}
mkdir d{0..5}
mkdir e{0..5}
touch d{0..5}/f{0..5}
mkdir -p d{0..5}/d{0..5}
mkdir -p e{0..5}/e{0..5}
touch d{0..5}/d{0..5}/f{0..5}
mkdir -p d{0..5}/d{0..5}/d{0..5}
mkdir -p e{0..5}/e{0..5}/e{0..5}
touch d{0..5}/d{0..5}/d{0..5}/f{0..5}
mkdir -p d{0..5}/d{0..5}/d{0..5}/d{0..5}
mkdir -p e{0..5}/e{0..5}/e{0..5}/e{0..5}
touch d{0..5}/d{0..5}/d{0..5}/d{0..5}/f{0..5}
bench() {
NIX_PATH=nixpkgs=$1 NIX_SHOW_STATS=1 NIX_SHOW_STATS_PATH=$tmp/stats.json \
nix-instantiate --eval --strict --show-trace >/dev/null \
--expr '(import <nixpkgs/lib>).fileset.toSource { root = ./.; fileset = ./.; }'
cat "$tmp/stats.json"
}
echo "Running benchmark on index" >&2
bench "$nixpkgs" > "$tmp/new.json"
(
echo "Checking out $compareTo" >&2
git -C "$nixpkgs" worktree add --quiet "$tmp/worktree" "$compareTo"
trap 'git -C "$nixpkgs" worktree remove "$tmp/worktree"' EXIT
echo "Running benchmark on $compareTo" >&2
bench "$tmp/worktree" > "$tmp/old.json"
)
declare -a stats=(
".envs.elements"
".envs.number"
".gc.totalBytes"
".list.concats"
".list.elements"
".nrFunctionCalls"
".nrLookups"
".nrOpUpdates"
".nrPrimOpCalls"
".nrThunks"
".sets.elements"
".sets.number"
".symbols.number"
".values.number"
)
different=0
for stat in "${stats[@]}"; do
oldValue=$(jq "$stat" "$tmp/old.json")
newValue=$(jq "$stat" "$tmp/new.json")
if (( oldValue != newValue )); then
percent=$(bc <<< "scale=100; result = 100/$oldValue*$newValue; scale=4; result / 1")
if (( oldValue < newValue )); then
echo -e "Statistic $stat ($newValue) is \e[0;31m$percent% (+$(( newValue - oldValue )))\e[0m of the old value $oldValue" >&2
else
echo -e "Statistic $stat ($newValue) is \e[0;32m$percent% (-$(( oldValue - newValue )))\e[0m of the old value $oldValue" >&2
fi
(( different++ )) || true
fi
done
echo "$different stats differ between the current tree and $compareTo"

131
lib/fileset/default.nix Normal file
View file

@ -0,0 +1,131 @@
{ lib }:
let
inherit (import ./internal.nix { inherit lib; })
_coerce
_toSourceFilter
;
inherit (builtins)
isPath
pathExists
typeOf
;
inherit (lib.path)
hasPrefix
splitRoot
;
inherit (lib.strings)
isStringLike
;
inherit (lib.filesystem)
pathType
;
inherit (lib.sources)
cleanSourceWith
;
in {
/*
Add the local files contained in `fileset` to the store as a single [store path](https://nixos.org/manual/nix/stable/glossary#gloss-store-path) rooted at `root`.
The result is the store path as a string-like value, making it usable e.g. as the `src` of a derivation, or in string interpolation:
```nix
stdenv.mkDerivation {
src = lib.fileset.toSource { ... };
# ...
}
```
The name of the store path is always `source`.
Type:
toSource :: {
root :: Path,
fileset :: FileSet,
} -> SourceLike
Example:
# Import the current directory into the store but only include files under ./src
toSource { root = ./.; fileset = ./src; }
=> "/nix/store/...-source"
# The file set coerced from path ./bar could contain files outside the root ./foo, which is not allowed
toSource { root = ./foo; fileset = ./bar; }
=> <error>
# The root has to be a local filesystem path
toSource { root = "/nix/store/...-source"; fileset = ./.; }
=> <error>
*/
toSource = {
/*
(required) The local directory [path](https://nixos.org/manual/nix/stable/language/values.html#type-path) that will correspond to the root of the resulting store path.
Paths in [strings](https://nixos.org/manual/nix/stable/language/values.html#type-string), including Nix store paths, cannot be passed as `root`.
`root` has to be a directory.
<!-- Ignore the indentation here, this is a nixdoc rendering bug that needs to be fixed -->
:::{.note}
Changing `root` only affects the directory structure of the resulting store path, it does not change which files are added to the store.
The only way to change which files get added to the store is by changing the `fileset` attribute.
:::
*/
root,
/*
(required) The file set whose files to import into the store.
Currently the only way to construct file sets is using [implicit coercion from paths](#sec-fileset-path-coercion).
If a directory does not recursively contain any file, it is omitted from the store path contents.
*/
fileset,
}:
let
# We cannot rename matched attribute arguments, so let's work around it with an extra `let in` statement
# For now filesets are always paths
filesetPath = fileset;
in
let
fileset = _coerce "lib.fileset.toSource: `fileset`" filesetPath;
rootFilesystemRoot = (splitRoot root).root;
filesetFilesystemRoot = (splitRoot fileset._internalBase).root;
in
if ! isPath root then
if isStringLike root then
throw ''
lib.fileset.toSource: `root` "${toString root}" is a string-like value, but it should be a path instead.
Paths in strings are not supported by `lib.fileset`, use `lib.sources` or derivations instead.''
else
throw ''
lib.fileset.toSource: `root` is of type ${typeOf root}, but it should be a path instead.''
# Currently all Nix paths have the same filesystem root, but this could change in the future.
# See also ../path/README.md
else if rootFilesystemRoot != filesetFilesystemRoot then
throw ''
lib.fileset.toSource: Filesystem roots are not the same for `fileset` and `root` "${toString root}":
`root`: root "${toString rootFilesystemRoot}"
`fileset`: root "${toString filesetFilesystemRoot}"
Different roots are not supported.''
else if ! pathExists root then
throw ''
lib.fileset.toSource: `root` ${toString root} does not exist.''
else if pathType root != "directory" then
throw ''
lib.fileset.toSource: `root` ${toString root} is a file, but it should be a directory instead. Potential solutions:
- If you want to import the file into the store _without_ a containing directory, use string interpolation or `builtins.path` instead of this function.
- If you want to import the file into the store _with_ a containing directory, set `root` to the containing directory, such as ${toString (dirOf root)}, and set `fileset` to the file path.''
else if ! hasPrefix root fileset._internalBase then
throw ''
lib.fileset.toSource: `fileset` could contain files in ${toString fileset._internalBase}, which is not under the `root` ${toString root}. Potential solutions:
- Set `root` to ${toString fileset._internalBase} or any directory higher up. This changes the layout of the resulting store path.
- Set `fileset` to a file set that cannot contain files outside the `root` ${toString root}. This could change the files included in the result.''
else
cleanSourceWith {
name = "source";
src = root;
filter = _toSourceFilter fileset;
};
}

274
lib/fileset/internal.nix Normal file
View file

@ -0,0 +1,274 @@
{ lib ? import ../. }:
let
inherit (builtins)
isAttrs
isPath
isString
pathExists
readDir
typeOf
split
;
inherit (lib.attrsets)
attrValues
mapAttrs
;
inherit (lib.filesystem)
pathType
;
inherit (lib.lists)
all
elemAt
length
;
inherit (lib.path)
append
splitRoot
;
inherit (lib.path.subpath)
components
;
inherit (lib.strings)
isStringLike
concatStringsSep
substring
stringLength
;
in
# Rare case of justified usage of rec:
# - This file is internal, so the return value doesn't matter, no need to make things overridable
# - The functions depend on each other
# - We want to expose all of these functions for easy testing
rec {
# If you change the internal representation, make sure to:
# - Update this version
# - Adjust _coerce to also accept and coerce older versions
# - Update the description of the internal representation in ./README.md
_currentVersion = 0;
# Create a fileset, see ./README.md#fileset
# Type: path -> filesetTree -> fileset
_create = base: tree: {
_type = "fileset";
_internalVersion = _currentVersion;
_internalBase = base;
_internalTree = tree;
# Double __ to make it be evaluated and ordered first
__noEval = throw ''
lib.fileset: Directly evaluating a file set is not supported. Use `lib.fileset.toSource` to turn it into a usable source instead.'';
};
# Coerce a value to a fileset, erroring when the value cannot be coerced.
# The string gives the context for error messages.
# Type: String -> Path -> fileset
_coerce = context: value:
if value._type or "" == "fileset" then
if value._internalVersion > _currentVersion then
throw ''
${context} is a file set created from a future version of the file set library with a different internal representation:
- Internal version of the file set: ${toString value._internalVersion}
- Internal version of the library: ${toString _currentVersion}
Make sure to update your Nixpkgs to have a newer version of `lib.fileset`.''
else
value
else if ! isPath value then
if isStringLike value then
throw ''
${context} "${toString value}" is a string-like value, but it should be a path instead.
Paths represented as strings are not supported by `lib.fileset`, use `lib.sources` or derivations instead.''
else
throw ''
${context} is of type ${typeOf value}, but it should be a path instead.''
else if ! pathExists value then
throw ''
${context} ${toString value} does not exist.''
else
_singleton value;
# Create a file set from a path.
# Type: Path -> fileset
_singleton = path:
let
type = pathType path;
in
if type == "directory" then
_create path type
else
# This turns a file path ./default.nix into a fileset with
# - _internalBase: ./.
# - _internalTree: {
# "default.nix" = <type>;
# # Other directory entries
# <name> = null;
# }
# See ./README.md#single-files
_create (dirOf path)
(_nestTree
(dirOf path)
[ (baseNameOf path) ]
type
);
/*
Nest a filesetTree under some extra components, while filling out all the other directory entries that aren't included with null
_nestTree ./. [ "foo" "bar" ] tree == {
foo = {
bar = tree;
<other-entries> = null;
}
<other-entries> = null;
}
Type: Path -> [ String ] -> filesetTree -> filesetTree
*/
_nestTree = targetBase: extraComponents: tree:
let
recurse = index: focusPath:
if index == length extraComponents then
tree
else
mapAttrs (_: _: null) (readDir focusPath)
// {
${elemAt extraComponents index} = recurse (index + 1) (append focusPath (elemAt extraComponents index));
};
in
recurse 0 targetBase;
# Expand "directory" filesetTree representation to the equivalent { <name> = filesetTree; }
# Type: Path -> filesetTree -> { <name> = filesetTree; }
_directoryEntries = path: value:
if isAttrs value then
value
else
readDir path;
/*
Simplify a filesetTree recursively:
- Replace all directories that have no files with `null`
This removes directories that would be empty
- Replace all directories with all files with `"directory"`
This speeds up the source filter function
Note that this function is strict, it evaluates the entire tree
Type: Path -> filesetTree -> filesetTree
*/
_simplifyTree = path: tree:
if tree == "directory" || isAttrs tree then
let
entries = _directoryEntries path tree;
simpleSubtrees = mapAttrs (name: _simplifyTree (path + "/${name}")) entries;
subtreeValues = attrValues simpleSubtrees;
in
# This triggers either when all files in a directory are filtered out
# Or when the directory doesn't contain any files at all
if all isNull subtreeValues then
null
# Triggers when we have the same as a `readDir path`, so we can turn it back into an equivalent "directory".
else if all isString subtreeValues then
"directory"
else
simpleSubtrees
else
tree;
# Turn a fileset into a source filter function suitable for `builtins.path`
# Only directories recursively containing at least one files are recursed into
# Type: Path -> fileset -> (String -> String -> Bool)
_toSourceFilter = fileset:
let
# Simplify the tree, necessary to make sure all empty directories are null
# which has the effect that they aren't included in the result
tree = _simplifyTree fileset._internalBase fileset._internalTree;
# Decompose the base into its components
# See ../path/README.md for why we're not just using `toString`
baseComponents = components (splitRoot fileset._internalBase).subpath;
# The base path as a string with a single trailing slash
baseString =
if baseComponents == [] then
# Need to handle the filesystem root specially
"/"
else
"/" + concatStringsSep "/" baseComponents + "/";
baseLength = stringLength baseString;
# Check whether a list of path components under the base path exists in the tree.
# This function is called often, so it should be fast.
# Type: [ String ] -> Bool
inTree = components:
let
recurse = index: localTree:
if isAttrs localTree then
# We have an attribute set, meaning this is a directory with at least one file
if index >= length components then
# The path may have no more components though, meaning the filter is running on the directory itself,
# so we always include it, again because there's at least one file in it.
true
else
# If we do have more components, the filter runs on some entry inside this directory, so we need to recurse
# We do +2 because builtins.split is an interleaved list of the inbetweens and the matches
recurse (index + 2) localTree.${elemAt components index}
else
# If it's not an attribute set it can only be either null (in which case it's not included)
# or a string ("directory" or "regular", etc.) in which case it's included
localTree != null;
in recurse 0 tree;
# Filter suited when there's no files
empty = _: _: false;
# Filter suited when there's some files
# This can't be used for when there's no files, because the base directory is always included
nonEmpty =
path: _:
let
# Add a slash to the path string, turning "/foo" to "/foo/",
# making sure to not have any false prefix matches below.
# Note that this would produce "//" for "/",
# but builtins.path doesn't call the filter function on the `path` argument itself,
# meaning this function can never receive "/" as an argument
pathSlash = path + "/";
in
# Same as `hasPrefix pathSlash baseString`, but more efficient.
# With base /foo/bar we need to include /foo:
# hasPrefix "/foo/" "/foo/bar/"
if substring 0 (stringLength pathSlash) baseString == pathSlash then
true
# Same as `! hasPrefix baseString pathSlash`, but more efficient.
# With base /foo/bar we need to exclude /baz
# ! hasPrefix "/baz/" "/foo/bar/"
else if substring 0 baseLength pathSlash != baseString then
false
else
# Same as `removePrefix baseString path`, but more efficient.
# From the above code we know that hasPrefix baseString pathSlash holds, so this is safe.
# We don't use pathSlash here because we only needed the trailing slash for the prefix matching.
# With base /foo and path /foo/bar/baz this gives
# inTree (split "/" (removePrefix "/foo/" "/foo/bar/baz"))
# == inTree (split "/" "bar/baz")
# == inTree [ "bar" "baz" ]
inTree (split "/" (substring baseLength (-1) path));
in
# Special case because the code below assumes that the _internalBase is always included in the result
# which shouldn't be done when we have no files at all in the base
if tree == null then
empty
else
nonEmpty;
}

View file

@ -0,0 +1,26 @@
# This overlay implements mocking of the lib.path.splitRoot function
# It pretends that the last component named "mock-root" is the root:
#
# splitRoot /foo/mock-root/bar/mock-root/baz
# => {
# root = /foo/mock-root/bar/mock-root;
# subpath = "./baz";
# }
self: super: {
path = super.path // {
splitRoot = path:
let
parts = super.path.splitRoot path;
components = self.path.subpath.components parts.subpath;
count = self.length components;
rootIndex = count - self.lists.findFirstIndex
(component: component == "mock-root")
(self.length components)
(self.reverseList components);
root = self.path.append parts.root (self.path.subpath.join (self.take rootIndex components));
subpath = self.path.subpath.join (self.drop rootIndex components);
in {
inherit root subpath;
};
};
}

350
lib/fileset/tests.sh Executable file
View file

@ -0,0 +1,350 @@
#!/usr/bin/env bash
# Tests lib.fileset
# Run:
# [nixpkgs]$ lib/fileset/tests.sh
# or:
# [nixpkgs]$ nix-build lib/tests/release.nix
set -euo pipefail
shopt -s inherit_errexit dotglob
die() {
# The second to last entry contains the line number of the top-level caller
lineIndex=$(( ${#BASH_LINENO[@]} - 2 ))
echo >&2 -e "test case at ${BASH_SOURCE[0]}:${BASH_LINENO[$lineIndex]} failed:" "$@"
exit 1
}
if test -n "${TEST_LIB:-}"; then
NIX_PATH=nixpkgs="$(dirname "$TEST_LIB")"
else
NIX_PATH=nixpkgs="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.."; pwd)"
fi
export NIX_PATH
tmp="$(mktemp -d)"
clean_up() {
rm -rf "$tmp"
}
trap clean_up EXIT SIGINT SIGTERM
work="$tmp/work"
mkdir "$work"
cd "$work"
# Crudely unquotes a JSON string by just taking everything between the first and the second quote.
# We're only using this for resulting /nix/store paths, which can't contain " anyways,
# nor can they contain any other characters that would need to be escaped specially in JSON
# This way we don't need to add a dependency on e.g. jq
crudeUnquoteJSON() {
cut -d \" -f2
}
prefixExpression='let
lib = import <nixpkgs/lib>;
internal = import <nixpkgs/lib/fileset/internal.nix> {
inherit lib;
};
in
with lib;
with internal;
with lib.fileset;'
# Check that a nix expression evaluates successfully (strictly, coercing to json, read-write-mode).
# The expression has `lib.fileset` in scope.
# If a second argument is provided, the result is checked against it as a regex.
# Otherwise, the result is output.
# Usage: expectSuccess NIX [REGEX]
expectSuccess() {
local expr=$1
if [[ "$#" -gt 1 ]]; then
local expectedResultRegex=$2
fi
if ! result=$(nix-instantiate --eval --strict --json --read-write-mode --show-trace \
--expr "$prefixExpression $expr"); then
die "$expr failed to evaluate, but it was expected to succeed"
fi
if [[ -v expectedResultRegex ]]; then
if [[ ! "$result" =~ $expectedResultRegex ]]; then
die "$expr should have evaluated to this regex pattern:\n\n$expectedResultRegex\n\nbut this was the actual result:\n\n$result"
fi
else
echo "$result"
fi
}
# Check that a nix expression fails to evaluate (strictly, coercing to json, read-write-mode).
# And check the received stderr against a regex
# The expression has `lib.fileset` in scope.
# Usage: expectFailure NIX REGEX
expectFailure() {
local expr=$1
local expectedErrorRegex=$2
if result=$(nix-instantiate --eval --strict --json --read-write-mode --show-trace 2>"$tmp/stderr" \
--expr "$prefixExpression $expr"); then
die "$expr evaluated successfully to $result, but it was expected to fail"
fi
stderr=$(<"$tmp/stderr")
if [[ ! "$stderr" =~ $expectedErrorRegex ]]; then
die "$expr should have errored with this regex pattern:\n\n$expectedErrorRegex\n\nbut this was the actual error:\n\n$stderr"
fi
}
# We conditionally use inotifywait in checkFileset.
# Check early whether it's available
# TODO: Darwin support, though not crucial since we have Linux CI
if type inotifywait 2>/dev/null >/dev/null; then
canMonitorFiles=1
else
echo "Warning: Not checking that excluded files don't get accessed since inotifywait is not available" >&2
canMonitorFiles=
fi
# Check whether a file set includes/excludes declared paths as expected, usage:
#
# tree=(
# [a/b] =1 # Declare that file a/b should exist and expect it to be included in the store path
# [c/a] = # Declare that file c/a should exist and expect it to be excluded in the store path
# [c/d/]= # Declare that directory c/d/ should exist and expect it to be excluded in the store path
# )
# checkFileset './a' # Pass the fileset as the argument
declare -A tree
checkFileset() (
# New subshell so that we can have a separate trap handler, see `trap` below
local fileset=$1
# Process the tree into separate arrays for included paths, excluded paths and excluded files.
# Also create all the paths in the local directory
local -a included=()
local -a excluded=()
local -a excludedFiles=()
for p in "${!tree[@]}"; do
# If keys end with a `/` we treat them as directories, otherwise files
if [[ "$p" =~ /$ ]]; then
mkdir -p "$p"
isFile=
else
mkdir -p "$(dirname "$p")"
touch "$p"
isFile=1
fi
case "${tree[$p]}" in
1)
included+=("$p")
;;
0)
excluded+=("$p")
if [[ -n "$isFile" ]]; then
excludedFiles+=("$p")
fi
;;
*)
die "Unsupported tree value: ${tree[$p]}"
esac
done
# Start inotifywait in the background to monitor all excluded files (if any)
if [[ -n "$canMonitorFiles" ]] && (( "${#excludedFiles[@]}" != 0 )); then
coproc watcher {
# inotifywait outputs a string on stderr when ready
# Redirect it to stdout so we can access it from the coproc's stdout fd
# exec so that the coprocess is inotify itself, making the kill below work correctly
# See below why we listen to both open and delete_self events
exec inotifywait --format='%e %w' --event open,delete_self --monitor "${excludedFiles[@]}" 2>&1
}
# This will trigger when this subshell exits, no matter if successful or not
# After exiting the subshell, the parent shell will continue executing
trap 'kill "${watcher_PID}"' exit
# Synchronously wait until inotifywait is ready
while read -r -u "${watcher[0]}" line && [[ "$line" != "Watches established." ]]; do
:
done
fi
# Call toSource with the fileset, triggering open events for all files that are added to the store
expression="toSource { root = ./.; fileset = $fileset; }"
# crudeUnquoteJSON is safe because we get back a store path in a string
storePath=$(expectSuccess "$expression" | crudeUnquoteJSON)
# Remove all files immediately after, triggering delete_self events for all of them
rm -rf -- *
# Only check for the inotify events if we actually started inotify earlier
if [[ -v watcher ]]; then
# Get the first event
read -r -u "${watcher[0]}" event file
# There's only these two possible event timelines:
# - open, ..., open, delete_self, ..., delete_self: If some excluded files were read
# - delete_self, ..., delete_self: If no excluded files were read
# So by looking at the first event we can figure out which one it is!
case "$event" in
OPEN)
die "$expression opened excluded file $file when it shouldn't have"
;;
DELETE_SELF)
# Expected events
;;
*)
die "Unexpected event type '$event' on file $file that should be excluded"
;;
esac
fi
# For each path that should be included, make sure it does occur in the resulting store path
for p in "${included[@]}"; do
if [[ ! -e "$storePath/$p" ]]; then
die "$expression doesn't include path $p when it should have"
fi
done
# For each path that should be excluded, make sure it doesn't occur in the resulting store path
for p in "${excluded[@]}"; do
if [[ -e "$storePath/$p" ]]; then
die "$expression included path $p when it shouldn't have"
fi
done
)
#### Error messages #####
# Absolute paths in strings cannot be passed as `root`
expectFailure 'toSource { root = "/nix/store/foobar"; fileset = ./.; }' 'lib.fileset.toSource: `root` "/nix/store/foobar" is a string-like value, but it should be a path instead.
\s*Paths in strings are not supported by `lib.fileset`, use `lib.sources` or derivations instead.'
# Only paths are accepted as `root`
expectFailure 'toSource { root = 10; fileset = ./.; }' 'lib.fileset.toSource: `root` is of type int, but it should be a path instead.'
# Different filesystem roots in root and fileset are not supported
mkdir -p {foo,bar}/mock-root
expectFailure 'with ((import <nixpkgs/lib>).extend (import <nixpkgs/lib/fileset/mock-splitRoot.nix>)).fileset;
toSource { root = ./foo/mock-root; fileset = ./bar/mock-root; }
' 'lib.fileset.toSource: Filesystem roots are not the same for `fileset` and `root` "'"$work"'/foo/mock-root":
\s*`root`: root "'"$work"'/foo/mock-root"
\s*`fileset`: root "'"$work"'/bar/mock-root"
\s*Different roots are not supported.'
rm -rf *
# `root` needs to exist
expectFailure 'toSource { root = ./a; fileset = ./.; }' 'lib.fileset.toSource: `root` '"$work"'/a does not exist.'
# `root` needs to be a file
touch a
expectFailure 'toSource { root = ./a; fileset = ./a; }' 'lib.fileset.toSource: `root` '"$work"'/a is a file, but it should be a directory instead. Potential solutions:
\s*- If you want to import the file into the store _without_ a containing directory, use string interpolation or `builtins.path` instead of this function.
\s*- If you want to import the file into the store _with_ a containing directory, set `root` to the containing directory, such as '"$work"', and set `fileset` to the file path.'
rm -rf *
# Only paths under `root` should be able to influence the result
mkdir a
expectFailure 'toSource { root = ./a; fileset = ./.; }' 'lib.fileset.toSource: `fileset` could contain files in '"$work"', which is not under the `root` '"$work"'/a. Potential solutions:
\s*- Set `root` to '"$work"' or any directory higher up. This changes the layout of the resulting store path.
\s*- Set `fileset` to a file set that cannot contain files outside the `root` '"$work"'/a. This could change the files included in the result.'
rm -rf *
# Path coercion only works for paths
expectFailure 'toSource { root = ./.; fileset = 10; }' 'lib.fileset.toSource: `fileset` is of type int, but it should be a path instead.'
expectFailure 'toSource { root = ./.; fileset = "/some/path"; }' 'lib.fileset.toSource: `fileset` "/some/path" is a string-like value, but it should be a path instead.
\s*Paths represented as strings are not supported by `lib.fileset`, use `lib.sources` or derivations instead.'
# Path coercion errors for non-existent paths
expectFailure 'toSource { root = ./.; fileset = ./a; }' 'lib.fileset.toSource: `fileset` '"$work"'/a does not exist.'
# File sets cannot be evaluated directly
expectFailure '_create ./. null' 'lib.fileset: Directly evaluating a file set is not supported. Use `lib.fileset.toSource` to turn it into a usable source instead.'
# Future versions of the internal representation are unsupported
expectFailure '_coerce "<tests>: value" { _type = "fileset"; _internalVersion = 1; }' '<tests>: value is a file set created from a future version of the file set library with a different internal representation:
\s*- Internal version of the file set: 1
\s*- Internal version of the library: 0
\s*Make sure to update your Nixpkgs to have a newer version of `lib.fileset`.'
# _create followed by _coerce should give the inputs back without any validation
expectSuccess '{
inherit (_coerce "<test>" (_create "base" "tree"))
_internalVersion _internalBase _internalTree;
}' '\{"_internalBase":"base","_internalTree":"tree","_internalVersion":0\}'
#### Resulting store path ####
# The store path name should be "source"
expectSuccess 'toSource { root = ./.; fileset = ./.; }' '"'"${NIX_STORE_DIR:-/nix/store}"'/.*-source"'
# We should be able to import an empty directory and end up with an empty result
tree=(
)
checkFileset './.'
# Directories recursively containing no files are not included
tree=(
[e/]=0
[d/e/]=0
[d/d/e/]=0
[d/d/f]=1
[d/f]=1
[f]=1
)
checkFileset './.'
# Check trees that could cause a naïve string prefix checking implementation to fail
tree=(
[a]=0
[ab/x]=0
[ab/xy]=1
[ab/xyz]=0
[abc]=0
)
checkFileset './ab/xy'
# Check path coercion examples in ../../doc/functions/fileset.section.md
tree=(
[a/x]=1
[a/b/y]=1
[c/]=0
[c/d/]=0
)
checkFileset './.'
tree=(
[a/x]=1
[a/b/y]=1
[c/]=0
[c/d/]=0
)
checkFileset './a'
tree=(
[a/x]=1
[a/b/y]=0
[c/]=0
[c/d/]=0
)
checkFileset './a/x'
tree=(
[a/x]=0
[a/b/y]=1
[c/]=0
[c/d/]=0
)
checkFileset './a/b'
tree=(
[a/x]=0
[a/b/y]=0
[c/]=0
[c/d/]=0
)
checkFileset './c'
# Test the source filter for the somewhat special case of files in the filesystem root
# We can't easily test this with the above functions because we can't write to the filesystem root and we don't want to make any assumptions which files are there in the sandbox
expectSuccess '_toSourceFilter (_create /. null) "/foo" ""' 'false'
expectSuccess '_toSourceFilter (_create /. { foo = "regular"; }) "/foo" ""' 'true'
expectSuccess '_toSourceFilter (_create /. { foo = null; }) "/foo" ""' 'false'
# TODO: Once we have combinators and a property testing library, derive property tests from https://en.wikipedia.org/wiki/Algebra_of_sets
echo >&2 tests ok

View file

@ -6,6 +6,7 @@
}:
let
lib = import ../.;
testWithNix = nix:
pkgs.runCommand "nixpkgs-lib-tests-nix-${nix.version}" {
buildInputs = [
@ -24,7 +25,7 @@ let
];
nativeBuildInputs = [
nix
];
] ++ lib.optional pkgs.stdenv.isLinux pkgs.inotify-tools;
strictDeps = true;
} ''
datadir="${nix}/share"
@ -50,6 +51,9 @@ let
echo "Running lib/tests/sources.sh"
TEST_LIB=$PWD/lib bash lib/tests/sources.sh
echo "Running lib/fileset/tests.sh"
TEST_LIB=$PWD/lib bash lib/fileset/tests.sh
echo "Running lib/tests/systems.nix"
[[ $(nix-instantiate --eval --strict lib/tests/systems.nix | tee /dev/stderr) == '[ ]' ]];