lib.fileset.unions: Don't stack overflow for many files

This commit is contained in:
Silvan Mosberger 2023-09-14 21:41:03 +02:00
parent e04e40d05e
commit 7ab764e575
2 changed files with 39 additions and 39 deletions

View file

@ -14,6 +14,7 @@ let
inherit (lib.attrsets) inherit (lib.attrsets)
attrValues attrValues
mapAttrs mapAttrs
zipAttrsWith
; ;
inherit (lib.filesystem) inherit (lib.filesystem)
@ -25,6 +26,7 @@ let
commonPrefix commonPrefix
drop drop
elemAt elemAt
filter
findFirstIndex findFirstIndex
foldl' foldl'
head head
@ -342,7 +344,9 @@ rec {
# Base paths can be unioned by taking their common prefix, # Base paths can be unioned by taking their common prefix,
# e.g. such that `union /foo/bar /foo/baz` has the base path `/foo` # e.g. such that `union /foo/bar /foo/baz` has the base path `/foo`
# A list of path components common to all base paths # A list of path components common to all base paths.
# Note that commonPrefix can only be fully evaluated,
# so this cannot cause a stack overflow due to a build-up of unevaluated thunks.
commonBaseComponents = foldl' commonBaseComponents = foldl'
(components: el: commonPrefix components el._internalBaseComponents) (components: el: commonPrefix components el._internalBaseComponents)
first._internalBaseComponents first._internalBaseComponents
@ -371,46 +375,29 @@ rec {
) filesets; ) filesets;
# Folds all trees together into a single one using _unionTree # Folds all trees together into a single one using _unionTree
resultTree = foldl' # We do not use a fold here because it would cause a thunk build-up
_unionTree # which could cause a stack overflow for a large number of trees
(head trees) resultTree = _unionTrees trees;
# We could also not do the `tail` here to avoid a list allocation,
# but then we'd have to pay for a potentially expensive
# but unnecessary `_unionTree (head trees) (head trees)` call.
(tail trees);
in in
_create commonBase resultTree; _create commonBase resultTree;
# The union of multiple filesetTree's with the same base path.
# Legend for branch tables in the below tree combinator functions # Later elements are only evaluated if necessary.
# - lhs\rhs : The values for the left hand side (columns) and right hand side (rows) arguments # Type: [ filesetTree ] -> filesetTree
# - null : Value `null`, a file/directory that's not included _unionTrees = trees:
# - attrs : Satisfies `isAttrs value`, an explicitly listed directory containing nested trees let
# - str : Satisfies `isString value`, either "directory" or a file type, a fully included file/directory stringIndex = findFirstIndex isString null trees;
# - rec : A result computed by recursing withoutNull = filter (tree: tree != null) trees;
# - <number> : Indicates that the result is computed by the branch with that number in
# - * : Only the lhs/rhs needs to be evaluated, the result is always the same no matter the other side if stringIndex != null then
# If there's a string, it's always a fully included tree (dir or file),
# The union of two filesetTree's with the same base path. # no need to look at other elements
# The second argument is only evaluated if necessary. elemAt trees stringIndex
# Type: filesetTree -> filesetTree -> filesetTree else if withoutNull == [ ] then
_unionTree = lhs: rhs: # If all trees are null, then the resulting tree is also null
# This branch table shows the correctness of the branch conditions, null
# see the legend above for more details
#
# lhs\rhs | null | attrs | str |
# ------- | ------- | ------- | ----- |
# null | 1 null | 3 attrs | 3 str |
# attrs | 1 attrs | 2 rec | 3 str |
# * str | 1 str | 1 str | 1 str |
if isString lhs || rhs == null then
# Branch 1
lhs
else if isAttrs lhs && isAttrs rhs then
# Branch 2
mapAttrs (name: _unionTree lhs.${name}) rhs
else else
# Branch 3 # The non-null elements have to be attribute sets representing partial trees
rhs; # We need to recurse into those
zipAttrsWith (name: _unionTrees) withoutNull;
} }

View file

@ -457,6 +457,19 @@ checkFileset 'unions [ ./x/a ./x/b ./y/a ./z/b ]'
checkFileset 'union (union ./x/a ./x/b) (union ./y/a ./z/b)' checkFileset 'union (union ./x/a ./x/b) (union ./y/a ./z/b)'
checkFileset 'union (union (union ./x/a ./x/b) ./y/a) ./z/b' checkFileset 'union (union (union ./x/a ./x/b) ./y/a) ./z/b'
# unions should not stack overflow, even if many elements are passed
tree=()
for i in $(seq 1000); do
tree[$i/a]=1
tree[$i/b]=0
done
(
# Locally limit the maximum stack size to 100 * 1024 bytes
# If unions was implemented recursively, this would stack overflow
ulimit -s 100
checkFileset 'unions (mapAttrsToList (name: _: ./. + "/${name}/a") (builtins.readDir ./.))'
)
# TODO: Once we have combinators and a property testing library, derive property tests from https://en.wikipedia.org/wiki/Algebra_of_sets # TODO: Once we have combinators and a property testing library, derive property tests from https://en.wikipedia.org/wiki/Algebra_of_sets
echo >&2 tests ok echo >&2 tests ok