lib.fileset: Internal representation v2, ~12x faster unions!

$ ./benchmark.sh HEAD
    [...]
    Mean CPU time 0.04006 (σ = 0.0040146) for 10 runs is 8.193619775953792% (σ = 0.9584251052704821%) of the old value 0.488917 (σ = 0.0294955)
    [...]
This commit is contained in:
Silvan Mosberger 2023-09-14 23:34:21 +02:00
parent 45bf2c7617
commit fe6c1539cc
3 changed files with 32 additions and 48 deletions

View file

@ -41,7 +41,7 @@ An attribute set with these values:
- `_type` (constant string `"fileset"`):
Tag to indicate this value is a file set.
- `_internalVersion` (constant `1`, the current version):
- `_internalVersion` (constant `2`, the current version):
Version of the representation.
- `_internalBase` (path):
@ -67,8 +67,8 @@ An attribute set with these values:
One of the following:
- `{ <name> = filesetTree; }`:
A directory with a nested `filesetTree` value for every directory entry.
Even entries that aren't included are present as `null` because it improves laziness and allows using this as a sort of `builtins.readDir` cache.
A directory with a nested `filesetTree` value for directory entries.
Entries not included may either be omitted or set to `null`, as necessary to improve efficiency or laziness.
- `"directory"`:
A directory with all its files included recursively, allowing early cutoff for some operations.

View file

@ -14,6 +14,7 @@ let
inherit (lib.attrsets)
attrValues
mapAttrs
setAttrByPath
zipAttrsWith
;
@ -63,7 +64,7 @@ rec {
# - Increment this version
# - Add an additional migration function below
# - Update the description of the internal representation in ./README.md
_currentVersion = 1;
_currentVersion = 2;
# Migrations between versions. The 0th element converts from v0 to v1, and so on
migrations = [
@ -79,6 +80,15 @@ rec {
_internalBaseComponents = components parts.subpath;
}
)
# Convert v1 into v2: filesetTree's can now also omit attributes to signal paths not being included
(
filesetV1:
# This change is backwards compatible (but not forwards compatible, so we still need a new version)
filesetV1 // {
_internalVersion = 2;
}
)
];
# Create a fileset, see ./README.md#fileset
@ -174,50 +184,23 @@ rec {
# - _internalBase: ./.
# - _internalTree: {
# "default.nix" = <type>;
# # Other directory entries
# <name> = null;
# }
# See ./README.md#single-files
_create (dirOf path)
(_nestTree
(dirOf path)
[ (baseNameOf path) ]
type
);
{
${baseNameOf path} = type;
};
/*
Nest a filesetTree under some extra components, while filling out all the other directory entries that aren't included with null
_nestTree ./. [ "foo" "bar" ] tree == {
foo = {
bar = tree;
<other-entries> = null;
}
<other-entries> = null;
}
Type: Path -> [ String ] -> filesetTree -> filesetTree
*/
_nestTree = targetBase: extraComponents: tree:
let
recurse = index: focusPath:
if index == length extraComponents then
tree
else
mapAttrs (_: _: null) (readDir focusPath)
// {
${elemAt extraComponents index} = recurse (index + 1) (append focusPath (elemAt extraComponents index));
};
in
recurse 0 targetBase;
# Expand "directory" filesetTree representation to the equivalent { <name> = filesetTree; }
# Expand a directory representation to an equivalent one in attribute set form.
# All directory entries are included in the result.
# Type: Path -> filesetTree -> { <name> = filesetTree; }
_directoryEntries = path: value:
if isAttrs value then
value
if value == "directory" then
readDir path
else
readDir path;
# Set all entries not present to null
mapAttrs (name: value: null) (readDir path)
// value;
/*
Simplify a filesetTree recursively:
@ -368,8 +351,7 @@ rec {
# while the tree under `/foo/baz` gets nested under `{ baz = ...; ... }`
# Therefore allowing combined operations over them.
trees = map (fileset:
_nestTree
commonBase
setAttrByPath
(drop commonBaseComponentsCount fileset._internalBaseComponents)
fileset._internalTree
) filesets;

View file

@ -285,19 +285,21 @@ expectFailure 'union ./. ./.' 'lib.fileset: Directly evaluating a file set is no
# Past versions of the internal representation are supported
expectEqual '_coerce "<tests>: value" { _type = "fileset"; _internalVersion = 0; _internalBase = ./.; }' \
'{ _internalBase = ./.; _internalBaseComponents = path.subpath.components (path.splitRoot ./.).subpath; _internalBaseRoot = /.; _internalVersion = 1; _type = "fileset"; }'
'{ _internalBase = ./.; _internalBaseComponents = path.subpath.components (path.splitRoot ./.).subpath; _internalBaseRoot = /.; _internalVersion = 2; _type = "fileset"; }'
expectEqual '_coerce "<tests>: value" { _type = "fileset"; _internalVersion = 1; }' \
'{ _type = "fileset"; _internalVersion = 2; }'
# Future versions of the internal representation are unsupported
expectFailure '_coerce "<tests>: value" { _type = "fileset"; _internalVersion = 2; }' '<tests>: value is a file set created from a future version of the file set library with a different internal representation:
\s*- Internal version of the file set: 2
\s*- Internal version of the library: 1
expectFailure '_coerce "<tests>: value" { _type = "fileset"; _internalVersion = 3; }' '<tests>: value is a file set created from a future version of the file set library with a different internal representation:
\s*- Internal version of the file set: 3
\s*- Internal version of the library: 2
\s*Make sure to update your Nixpkgs to have a newer version of `lib.fileset`.'
# _create followed by _coerce should give the inputs back without any validation
expectEqual '{
inherit (_coerce "<test>" (_create ./. "directory"))
_internalVersion _internalBase _internalTree;
}' '{ _internalBase = ./.; _internalTree = "directory"; _internalVersion = 1; }'
}' '{ _internalBase = ./.; _internalTree = "directory"; _internalVersion = 2; }'
#### Resulting store path ####