Merge pull request #47411 from graham-at-target/multi-layered-images-crafted

Multi-Layered Docker Images
This commit is contained in:
lewo 2018-10-01 09:48:24 +02:00 committed by GitHub
commit 56b4db9710
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
8 changed files with 926 additions and 6 deletions

View file

@ -682,6 +682,177 @@ hello latest de2bf4786de6 About a minute ago 25.2MB
</example>
</section>
<section xml:id="ssec-pkgs-dockerTools-buildLayeredImage">
<title>buildLayeredImage</title>
<para>
Create a Docker image with many of the store paths being on their own layer
to improve sharing between images.
</para>
<variablelist>
<varlistentry>
<term>
<varname>name</varname>
</term>
<listitem>
<para>
The name of the resulting image.
</para>
</listitem>
</varlistentry>
<varlistentry>
<term>
<varname>tag</varname> <emphasis>optional</emphasis>
</term>
<listitem>
<para>
Tag of the generated image.
</para>
<para>
<emphasis>Default:</emphasis> the output path's hash
</para>
</listitem>
</varlistentry>
<varlistentry>
<term>
<varname>contents</varname> <emphasis>optional</emphasis>
</term>
<listitem>
<para>
Top level paths in the container. Either a single derivation, or a list
of derivations.
</para>
<para>
<emphasis>Default:</emphasis> <literal>[]</literal>
</para>
</listitem>
</varlistentry>
<varlistentry>
<term>
<varname>config</varname> <emphasis>optional</emphasis>
</term>
<listitem>
<para>
Run-time configuration of the container. A full list of the options are
available at in the
<link xlink:href="https://github.com/moby/moby/blob/master/image/spec/v1.2.md#image-json-field-descriptions">
Docker Image Specification v1.2.0 </link>.
</para>
<para>
<emphasis>Default:</emphasis> <literal>{}</literal>
</para>
</listitem>
</varlistentry>
<varlistentry>
<term>
<varname>created</varname> <emphasis>optional</emphasis>
</term>
<listitem>
<para>
Date and time the layers were created. Follows the same
<literal>now</literal> exception supported by
<literal>buildImage</literal>.
</para>
<para>
<emphasis>Default:</emphasis> <literal>1970-01-01T00:00:01Z</literal>
</para>
</listitem>
</varlistentry>
<varlistentry>
<term>
<varname>maxLayers</varname> <emphasis>optional</emphasis>
</term>
<listitem>
<para>
Maximum number of layers to create.
</para>
<para>
<emphasis>Default:</emphasis> <literal>24</literal>
</para>
</listitem>
</varlistentry>
</variablelist>
<section xml:id="dockerTools-buildLayeredImage-arg-contents">
<title>Behavior of <varname>contents</varname> in the final image</title>
<para>
Each path directly listed in <varname>contents</varname> will have a
symlink in the root of the image.
</para>
<para>
For example:
<programlisting><![CDATA[
pkgs.dockerTools.buildLayeredImage {
name = "hello";
contents = [ pkgs.hello ];
}
]]></programlisting>
will create symlinks for all the paths in the <literal>hello</literal>
package:
<screen><![CDATA[
/bin/hello -> /nix/store/h1zb1padqbbb7jicsvkmrym3r6snphxg-hello-2.10/bin/hello
/share/info/hello.info -> /nix/store/h1zb1padqbbb7jicsvkmrym3r6snphxg-hello-2.10/share/info/hello.info
/share/locale/bg/LC_MESSAGES/hello.mo -> /nix/store/h1zb1padqbbb7jicsvkmrym3r6snphxg-hello-2.10/share/locale/bg/LC_MESSAGES/hello.mo
]]></screen>
</para>
</section>
<section xml:id="dockerTools-buildLayeredImage-arg-config">
<title>Automatic inclusion of <varname>config</varname> references</title>
<para>
The closure of <varname>config</varname> is automatically included in the
closure of the final image.
</para>
<para>
This allows you to make very simple Docker images with very little code.
This container will start up and run <command>hello</command>:
<programlisting><![CDATA[
pkgs.dockerTools.buildLayeredImage {
name = "hello";
config.Cmd = [ "${pkgs.hello}/bin/hello" ];
}
]]></programlisting>
</para>
</section>
<section xml:id="dockerTools-buildLayeredImage-arg-maxLayers">
<title>Adjusting <varname>maxLayers</varname></title>
<para>
Increasing the <varname>maxLayers</varname> increases the number of layers
which have a chance to be shared between different images.
</para>
<para>
Modern Docker installations support up to 128 layers, however older
versions support as few as 42.
</para>
<para>
If the produced image will not be extended by other Docker builds, it is
safe to set <varname>maxLayers</varname> to <literal>128</literal>.
However it will be impossible to extend the image further.
</para>
<para>
The first (<literal>maxLayers-2</literal>) most "popular" paths will have
their own individual layers, then layer #<literal>maxLayers-1</literal>
will contain all the remaining "unpopular" paths, and finally layer
#<literal>maxLayers</literal> will contain the Image configuration.
</para>
<para>
Docker's Layers are not inherently ordered, they are content-addressable
and are not explicitly layered until they are composed in to an Image.
</para>
</section>
</section>
<section xml:id="ssec-pkgs-dockerTools-fetchFromRegistry">
<title>pullImage</title>

View file

@ -58,5 +58,9 @@ import ./make-test.nix ({ pkgs, ... }: {
# Ensure Docker images can use an unstable date
$docker->succeed("docker load --input='${pkgs.dockerTools.examples.bash}'");
$docker->succeed("[ '1970-01-01T00:00:01Z' != \"\$(docker inspect ${pkgs.dockerTools.examples.unstableDate.imageName} | ${pkgs.jq}/bin/jq -r .[].Created)\" ]");
# Ensure Layered Docker images work
$docker->succeed("docker load --input='${pkgs.dockerTools.examples.layered-image}'");
$docker->succeed("docker run --rm ${pkgs.dockerTools.examples.layered-image.imageName}");
'';
})

View file

@ -1,4 +1,5 @@
{
symlinkJoin,
coreutils,
docker,
e2fsprogs,
@ -19,6 +20,7 @@
utillinux,
vmTools,
writeReferencesToFile,
referencesByPopularity,
writeScript,
writeText,
}:
@ -77,7 +79,9 @@ rec {
ln -sT ${docker.src}/components/engine/pkg/tarsum src/github.com/docker/docker/pkg/tarsum
go build
cp tarsum $out
mkdir -p $out/bin
cp tarsum $out/bin/
'';
# buildEnv creates symlinks to dirs, which is hard to edit inside the overlay VM
@ -270,6 +274,81 @@ rec {
perl ${pkgs.pathsFromGraph} closure-* > $out/storePaths
'';
# Create $maxLayers worth of Docker Layers, one layer per store path
# unless there are more paths than $maxLayers. In that case, create
# $maxLayers-1 for the most popular layers, and smush the remainaing
# store paths in to one final layer.
mkManyPureLayers = {
name,
# Files to add to the layer.
closure,
configJson,
# Docker has a 42-layer maximum, we pick 24 to ensure there is plenty
# of room for extension
maxLayers ? 24
}:
runCommand "${name}-granular-docker-layers" {
inherit maxLayers;
paths = referencesByPopularity closure;
buildInputs = [ jshon rsync tarsum ];
enableParallelBuilding = true;
}
''
# Delete impurities for store path layers, so they don't get
# shared and taint other projects.
cat ${configJson} \
| jshon -d config \
| jshon -s "1970-01-01T00:00:01Z" -i created > generic.json
# WARNING!
# The following code is fiddly w.r.t. ensuring every layer is
# created, and that no paths are missed. If you change the
# following head and tail call lines, double-check that your
# code behaves properly when the number of layers equals:
# maxLayers-1, maxLayers, and maxLayers+1
head -n $((maxLayers - 1)) $paths | cat -n | xargs -P$NIX_BUILD_CORES -n2 ${./store-path-to-layer.sh}
if [ $(cat $paths | wc -l) -ge $maxLayers ]; then
tail -n+$maxLayers $paths | xargs ${./store-path-to-layer.sh} $maxLayers
fi
echo "Finished building layer '$name'"
mv ./layers $out
'';
# Create a "Customisation" layer which adds symlinks at the root of
# the image to the root paths of the closure. Also add the config
# data like what command to run and the environment to run it in.
mkCustomisationLayer = {
name,
# Files to add to the layer.
contents,
baseJson,
uid ? 0, gid ? 0,
}:
runCommand "${name}-customisation-layer" {
buildInputs = [ jshon rsync tarsum ];
}
''
cp -r ${contents}/ ./layer
# Tar up the layer and throw it into 'layer.tar'.
echo "Packing layer..."
mkdir $out
tar -C layer --sort=name --mtime="@$SOURCE_DATE_EPOCH" --owner=${toString uid} --group=${toString gid} -cf $out/layer.tar .
# Compute a checksum of the tarball.
echo "Computing layer checksum..."
tarhash=$(tarsum < $out/layer.tar)
# Add a 'checksum' field to the JSON, with the value set to the
# checksum of the tarball.
cat ${baseJson} | jshon -s "$tarhash" -i checksum > $out/json
# Indicate to docker that we're using schema version 1.0.
echo -n "1.0" > $out/VERSION
'';
# Create a "layer" (set of files).
mkPureLayer = {
# Name of the layer
@ -287,7 +366,7 @@ rec {
}:
runCommand "docker-layer-${name}" {
inherit baseJson contents extraCommands;
buildInputs = [ jshon rsync ];
buildInputs = [ jshon rsync tarsum ];
}
''
mkdir layer
@ -314,11 +393,11 @@ rec {
# Compute a checksum of the tarball.
echo "Computing layer checksum..."
tarsum=$(${tarsum} < $out/layer.tar)
tarhash=$(tarsum < $out/layer.tar)
# Add a 'checksum' field to the JSON, with the value set to the
# checksum of the tarball.
cat ${baseJson} | jshon -s "$tarsum" -i checksum > $out/json
cat ${baseJson} | jshon -s "$tarhash" -i checksum > $out/json
# Indicate to docker that we're using schema version 1.0.
echo -n "1.0" > $out/VERSION
@ -402,8 +481,8 @@ rec {
# Compute the tar checksum and add it to the output json.
echo "Computing checksum..."
ts=$(${tarsum} < $out/layer.tar)
cat ${baseJson} | jshon -s "$ts" -i checksum > $out/json
tarhash=$(${tarsum}/bin/tarsum < $out/layer.tar)
cat ${baseJson} | jshon -s "$tarhash" -i checksum > $out/json
# Indicate to docker that we're using schema version 1.0.
echo -n "1.0" > $out/VERSION
@ -411,6 +490,104 @@ rec {
'';
};
buildLayeredImage = {
# Image Name
name,
# Image tag, the Nix's output hash will be used if null
tag ? null,
# Files to put on the image (a nix store path or list of paths).
contents ? [],
# Docker config; e.g. what command to run on the container.
config ? {},
# Time of creation of the image. Passing "now" will make the
# created date be the time of building.
created ? "1970-01-01T00:00:01Z",
# Docker's lowest maximum layer limit is 42-layers for an old
# version of the AUFS graph driver. We pick 24 to ensure there is
# plenty of room for extension. I believe the actual maximum is
# 128.
maxLayers ? 24
}:
let
uid = 0;
gid = 0;
baseName = baseNameOf name;
contentsEnv = symlinkJoin { name = "bulk-layers"; paths = (if builtins.isList contents then contents else [ contents ]); };
configJson = let
pure = writeText "${baseName}-config.json" (builtins.toJSON {
inherit created config;
architecture = "amd64";
os = "linux";
});
impure = runCommand "${baseName}-standard-dynamic-date.json"
{ buildInputs = [ jq ]; }
''
jq ".created = \"$(TZ=utc date --iso-8601="seconds")\"" ${pure} > $out
'';
in if created == "now" then impure else pure;
bulkLayers = mkManyPureLayers {
name = baseName;
closure = writeText "closure" "${contentsEnv} ${configJson}";
# One layer will be taken up by the customisationLayer, so
# take up one less.
maxLayers = maxLayers - 1;
inherit configJson;
};
customisationLayer = mkCustomisationLayer {
name = baseName;
contents = contentsEnv;
baseJson = configJson;
inherit uid gid;
};
result = runCommand "docker-image-${baseName}.tar.gz" {
buildInputs = [ jshon pigz coreutils findutils jq ];
# Image name and tag must be lowercase
imageName = lib.toLower name;
imageTag = if tag == null then "" else lib.toLower tag;
baseJson = configJson;
} ''
${lib.optionalString (tag == null) ''
outName="$(basename "$out")"
outHash=$(echo "$outName" | cut -d - -f 1)
imageTag=$outHash
''}
find ${bulkLayers} -mindepth 1 -maxdepth 1 | sort -t/ -k5 -n > layer-list
echo ${customisationLayer} >> layer-list
mkdir image
imageJson=$(cat ${configJson} | jq ". + {\"rootfs\": {\"diff_ids\": [], \"type\": \"layers\"}}")
manifestJson=$(jq -n "[{\"RepoTags\":[\"$imageName:$imageTag\"]}]")
for layer in $(cat layer-list); do
layerChecksum=$(sha256sum $layer/layer.tar | cut -d ' ' -f1)
layerID=$(sha256sum "$layer/json" | cut -d ' ' -f 1)
ln -s "$layer" "./image/$layerID"
manifestJson=$(echo "$manifestJson" | jq ".[0].Layers |= [\"$layerID/layer.tar\"] + .")
imageJson=$(echo "$imageJson" | jq ".history |= [{\"created\": \"$(jq -r .created ${configJson})\"}] + .")
imageJson=$(echo "$imageJson" | jq ".rootfs.diff_ids |= [\"sha256:$layerChecksum\"] + .")
done
imageJsonChecksum=$(echo "$imageJson" | sha256sum | cut -d ' ' -f1)
echo "$imageJson" > "image/$imageJsonChecksum.json"
manifestJson=$(echo "$manifestJson" | jq ".[0].Config = \"$imageJsonChecksum.json\"")
echo "$manifestJson" > image/manifest.json
jshon -n object \
-n object -s "$layerID" -i "$imageTag" \
-i "$imageName" > image/repositories
echo "Cooking the image..."
tar -C image --dereference --hard-dereference --sort=name --mtime="@$SOURCE_DATE_EPOCH" --owner=0 --group=0 --mode=a-w --xform s:'^./':: -c . | pigz -nT > $out
echo "Finished."
'';
in
result;
# 1. extract the base image
# 2. create the layer
# 3. add layer deps to the layer itself, diffing with the base image

View file

@ -150,4 +150,11 @@ rec {
contents = [ pkgs.coreutils ];
created = "now";
};
# 10. Create a layered image
layered-image = pkgs.dockerTools.buildLayeredImage {
name = "layered-image";
tag = "latest";
config.Cmd = [ "${pkgs.hello}/bin/hello" ];
};
}

View file

@ -0,0 +1,24 @@
#!/bin/sh
set -eu
layerNumber=$1
shift
layerPath="./layers/$layerNumber"
echo "Creating layer #$layerNumber for $@"
mkdir -p "$layerPath"
tar -rpf "$layerPath/layer.tar" --hard-dereference --sort=name \
--mtime="@$SOURCE_DATE_EPOCH" \
--owner=0 --group=0 "$@"
# Compute a checksum of the tarball.
tarhash=$(tarsum < $layerPath/layer.tar)
# Add a 'checksum' field to the JSON, with the value set to the
# checksum of the tarball.
cat ./generic.json | jshon -s "$tarhash" -i checksum > $layerPath/json
# Indicate to docker that we're using schema version 1.0.
echo -n "1.0" > $layerPath/VERSION

View file

@ -0,0 +1,520 @@
# IMPORTANT: Making changes?
#
# Validate your changes with python3 ./closure-graph.py --test
# Using a simple algorithm, convert the references to a path in to a
# sorted list of dependent paths based on how often they're referenced
# and how deep in the tree they live. Equally-"popular" paths are then
# sorted by name.
#
# The existing writeReferencesToFile prints the paths in a simple
# ascii-based sorting of the paths.
#
# Sorting the paths by graph improves the chances that the difference
# between two builds appear near the end of the list, instead of near
# the beginning. This makes a difference for Nix builds which export a
# closure for another program to consume, if that program implements its
# own level of binary diffing.
#
# For an example, Docker Images. If each store path is a separate layer
# then Docker Images can be very efficiently transfered between systems,
# and we get very good cache reuse between images built with the same
# version of Nixpkgs. However, since Docker only reliably supports a
# small number of layers (42) it is important to pick the individual
# layers carefully. By storing very popular store paths in the first 40
# layers, we improve the chances that the next Docker image will share
# many of those layers.*
#
# Given the dependency tree:
#
# A - B - C - D -\
# \ \ \ \
# \ \ \ \
# \ \ - E ---- F
# \- G
#
# Nodes which have multiple references are duplicated:
#
# A - B - C - D - F
# \ \ \
# \ \ \- E - F
# \ \
# \ \- E - F
# \
# \- G
#
# Each leaf node is now replaced by a counter defaulted to 1:
#
# A - B - C - D - (F:1)
# \ \ \
# \ \ \- E - (F:1)
# \ \
# \ \- E - (F:1)
# \
# \- (G:1)
#
# Then each leaf counter is merged with its parent node, replacing the
# parent node with a counter of 1, and each existing counter being
# incremented by 1. That is to say `- D - (F:1)` becomes `- (D:1, F:2)`:
#
# A - B - C - (D:1, F:2)
# \ \ \
# \ \ \- (E:1, F:2)
# \ \
# \ \- (E:1, F:2)
# \
# \- (G:1)
#
# Then each leaf counter is merged with its parent node again, merging
# any counters, then incrementing each:
#
# A - B - (C:1, D:2, E:2, F:5)
# \ \
# \ \- (E:1, F:2)
# \
# \- (G:1)
#
# And again:
#
# A - (B:1, C:2, D:3, E:4, F:8)
# \
# \- (G:1)
#
# And again:
#
# (A:1, B:2, C:3, D:4, E:5, F:9, G:2)
#
# and then paths have the following "popularity":
#
# A 1
# B 2
# C 3
# D 4
# E 5
# F 9
# G 2
#
# and the popularity contest would result in the paths being printed as:
#
# F
# E
# D
# C
# B
# G
# A
#
# * Note: People who have used a Dockerfile before assume Docker's
# Layers are inherently ordered. However, this is not true -- Docker
# layers are content-addressable and are not explicitly layered until
# they are composed in to an Image.
import sys
import json
import unittest
from pprint import pprint
from collections import defaultdict
# Find paths in the original dataset which are never referenced by
# any other paths
def find_roots(closures):
roots = [];
for closure in closures:
path = closure['path']
if not any_refer_to(path, closures):
roots.append(path)
return roots
class TestFindRoots(unittest.TestCase):
def test_find_roots(self):
self.assertCountEqual(
find_roots([
{
"path": "/nix/store/foo",
"references": [
"/nix/store/foo",
"/nix/store/bar"
]
},
{
"path": "/nix/store/bar",
"references": [
"/nix/store/bar",
"/nix/store/tux"
]
},
{
"path": "/nix/store/hello",
"references": [
]
}
]),
["/nix/store/foo", "/nix/store/hello"]
)
def any_refer_to(path, closures):
for closure in closures:
if path != closure['path']:
if path in closure['references']:
return True
return False
class TestAnyReferTo(unittest.TestCase):
def test_has_references(self):
self.assertTrue(
any_refer_to(
"/nix/store/bar",
[
{
"path": "/nix/store/foo",
"references": [
"/nix/store/bar"
]
},
]
),
)
def test_no_references(self):
self.assertFalse(
any_refer_to(
"/nix/store/foo",
[
{
"path": "/nix/store/foo",
"references": [
"/nix/store/foo",
"/nix/store/bar"
]
},
]
),
)
def all_paths(closures):
paths = []
for closure in closures:
paths.append(closure['path'])
paths.extend(closure['references'])
paths.sort()
return list(set(paths))
class TestAllPaths(unittest.TestCase):
def test_returns_all_paths(self):
self.assertCountEqual(
all_paths([
{
"path": "/nix/store/foo",
"references": [
"/nix/store/foo",
"/nix/store/bar"
]
},
{
"path": "/nix/store/bar",
"references": [
"/nix/store/bar",
"/nix/store/tux"
]
},
{
"path": "/nix/store/hello",
"references": [
]
}
]),
["/nix/store/foo", "/nix/store/bar", "/nix/store/hello", "/nix/store/tux",]
)
def test_no_references(self):
self.assertFalse(
any_refer_to(
"/nix/store/foo",
[
{
"path": "/nix/store/foo",
"references": [
"/nix/store/foo",
"/nix/store/bar"
]
},
]
),
)
# Convert:
#
# [
# { path: /nix/store/foo, references: [ /nix/store/foo, /nix/store/bar, /nix/store/baz ] },
# { path: /nix/store/bar, references: [ /nix/store/bar, /nix/store/baz ] },
# { path: /nix/store/baz, references: [ /nix/store/baz, /nix/store/tux ] },
# { path: /nix/store/tux, references: [ /nix/store/tux ] }
# ]
#
# To:
# {
# /nix/store/foo: [ /nix/store/bar, /nix/store/baz ],
# /nix/store/bar: [ /nix/store/baz ],
# /nix/store/baz: [ /nix/store/tux ] },
# /nix/store/tux: [ ]
# }
#
# Note that it drops self-references to avoid loops.
def make_lookup(closures):
lookup = {}
for closure in closures:
# paths often self-refer
nonreferential_paths = [ref for ref in closure['references'] if ref != closure['path']]
lookup[closure['path']] = nonreferential_paths
return lookup
class TestMakeLookup(unittest.TestCase):
def test_returns_lookp(self):
self.assertDictEqual(
make_lookup([
{
"path": "/nix/store/foo",
"references": [
"/nix/store/foo",
"/nix/store/bar"
]
},
{
"path": "/nix/store/bar",
"references": [
"/nix/store/bar",
"/nix/store/tux"
]
},
{
"path": "/nix/store/hello",
"references": [
]
}
]),
{
"/nix/store/foo": [ "/nix/store/bar" ],
"/nix/store/bar": [ "/nix/store/tux" ],
"/nix/store/hello": [ ],
}
)
# Convert:
#
# /nix/store/foo with
# {
# /nix/store/foo: [ /nix/store/bar, /nix/store/baz ],
# /nix/store/bar: [ /nix/store/baz ],
# /nix/store/baz: [ /nix/store/tux ] },
# /nix/store/tux: [ ]
# }
#
# To:
#
# {
# /nix/store/bar: {
# /nix/store/baz: {
# /nix/store/tux: {}
# }
# },
# /nix/store/baz: {
# /nix/store/tux: {}
# }
# }
def make_graph_segment_from_root(root, lookup):
children = {}
for ref in lookup[root]:
children[ref] = make_graph_segment_from_root(ref, lookup)
return children
class TestMakeGraphSegmentFromRoot(unittest.TestCase):
def test_returns_graph(self):
self.assertDictEqual(
make_graph_segment_from_root("/nix/store/foo", {
"/nix/store/foo": [ "/nix/store/bar" ],
"/nix/store/bar": [ "/nix/store/tux" ],
"/nix/store/tux": [ ],
"/nix/store/hello": [ ],
}),
{
"/nix/store/bar": {
"/nix/store/tux": {}
}
}
)
def test_returns_graph_tiny(self):
self.assertDictEqual(
make_graph_segment_from_root("/nix/store/tux", {
"/nix/store/foo": [ "/nix/store/bar" ],
"/nix/store/bar": [ "/nix/store/tux" ],
"/nix/store/tux": [ ],
}),
{}
)
# Convert a graph segment in to a popularity-counted dictionary:
#
# From:
# {
# /nix/store/foo: {
# /nix/store/bar: {
# /nix/store/baz: {
# /nix/store/tux: {}
# }
# }
# /nix/store/baz: {
# /nix/store/tux: {}
# }
# }
# }
#
# to:
# [
# /nix/store/foo: 1
# /nix/store/bar: 2
# /nix/store/baz: 4
# /nix/store/tux: 6
# ]
def graph_popularity_contest(full_graph):
popularity = defaultdict(int)
for path, subgraph in full_graph.items():
popularity[path] += 1
subcontest = graph_popularity_contest(subgraph)
for subpath, subpopularity in subcontest.items():
popularity[subpath] += subpopularity + 1
return popularity
class TestGraphPopularityContest(unittest.TestCase):
def test_counts_popularity(self):
self.assertDictEqual(
graph_popularity_contest({
"/nix/store/foo": {
"/nix/store/bar": {
"/nix/store/baz": {
"/nix/store/tux": {}
}
},
"/nix/store/baz": {
"/nix/store/tux": {}
}
}
}),
{
"/nix/store/foo": 1,
"/nix/store/bar": 2,
"/nix/store/baz": 4,
"/nix/store/tux": 6,
}
)
# Emit a list of packages by popularity, most first:
#
# From:
# [
# /nix/store/foo: 1
# /nix/store/bar: 1
# /nix/store/baz: 2
# /nix/store/tux: 2
# ]
#
# To:
# [ /nix/store/baz /nix/store/tux /nix/store/bar /nix/store/foo ]
def order_by_popularity(paths):
paths_by_popularity = defaultdict(list)
popularities = []
for path, popularity in paths.items():
popularities.append(popularity)
paths_by_popularity[popularity].append(path)
popularities = list(set(popularities))
popularities.sort()
flat_ordered = []
for popularity in popularities:
paths = paths_by_popularity[popularity]
paths.sort(key=package_name)
flat_ordered.extend(reversed(paths))
return list(reversed(flat_ordered))
class TestOrderByPopularity(unittest.TestCase):
def test_returns_in_order(self):
self.assertEqual(
order_by_popularity({
"/nix/store/foo": 1,
"/nix/store/bar": 1,
"/nix/store/baz": 2,
"/nix/store/tux": 2,
}),
[
"/nix/store/baz",
"/nix/store/tux",
"/nix/store/bar",
"/nix/store/foo"
]
)
def package_name(path):
parts = path.split('-')
start = parts.pop(0)
# don't throw away any data, so the order is always the same.
# even in cases where only the hash at the start has changed.
parts.append(start)
return '-'.join(parts)
def main():
filename = sys.argv[1]
key = sys.argv[2]
with open(filename) as f:
data = json.load(f)
# Data comes in as:
# [
# { path: /nix/store/foo, references: [ /nix/store/foo, /nix/store/bar, /nix/store/baz ] },
# { path: /nix/store/bar, references: [ /nix/store/bar, /nix/store/baz ] },
# { path: /nix/store/baz, references: [ /nix/store/baz, /nix/store/tux ] },
# { path: /nix/store/tux, references: [ /nix/store/tux ] }
# ]
#
# and we want to get out a list of paths ordered by how universally,
# important they are, ie: tux is referenced by every path, transitively
# so it should be #1
#
# [
# /nix/store/tux,
# /nix/store/baz,
# /nix/store/bar,
# /nix/store/foo,
# ]
graph = data[key]
roots = find_roots(graph);
lookup = make_lookup(graph)
full_graph = {}
for root in roots:
full_graph[root] = make_graph_segment_from_root(root, lookup)
ordered = order_by_popularity(graph_popularity_contest(full_graph))
missing = []
for path in all_paths(graph):
if path not in ordered:
missing.append(path)
ordered.extend(missing)
print("\n".join(ordered))
if "--test" in sys.argv:
# Don't pass --test otherwise unittest gets mad
unittest.main(argv = [f for f in sys.argv if f != "--test" ])
else:
main()

View file

@ -0,0 +1,15 @@
{ runCommand, python3, coreutils }:
# Write the references of `path' to a file, in order of how "popular" each
# reference is. Nix 2 only.
path: runCommand "closure-paths"
{
exportReferencesGraph.graph = path;
__structuredAttrs = true;
PATH = "${coreutils}/bin:${python3}/bin";
builder = builtins.toFile "builder"
''
. .attrs.sh
python3 ${./closure-graph.py} .attrs.json graph > ''${outputs[out]}
'';
}
""

View file

@ -365,6 +365,8 @@ with pkgs;
nukeReferences = callPackage ../build-support/nuke-references { };
referencesByPopularity = callPackage ../build-support/references-by-popularity { };
removeReferencesTo = callPackage ../build-support/remove-references-to { };
vmTools = callPackage ../build-support/vm { };