Merge pull request #207038 from NixOS/make-disk-image-for-uefi

make-disk-image: documentation, UEFI variables recording, improved determinism
This commit is contained in:
Jörg Thalheim 2022-12-26 11:02:28 +00:00 committed by GitHub
commit ea415d1a38
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 302 additions and 13 deletions

4
.github/CODEOWNERS vendored
View file

@ -48,6 +48,10 @@
# Nixpkgs build-support
/pkgs/build-support/writers @lassulus @Profpatsch
# Nixpkgs make-disk-image
/doc/builders/images/makediskimage.section.md @raitobezarius
/nixos/lib/make-disk-image.nix @raitobezarius
# Nixpkgs documentation
/maintainers/scripts/db-to-md.sh @jtojnar @ryantm
/maintainers/scripts/doc @jtojnar @ryantm

View file

@ -10,4 +10,5 @@
<xi:include href="images/ocitools.section.xml" />
<xi:include href="images/snaptools.section.xml" />
<xi:include href="images/portableservice.section.xml" />
<xi:include href="images/makediskimage.section.xml" />
</chapter>

View file

@ -0,0 +1,107 @@
# `<nixpkgs/nixos/lib/make-disk-image.nix>` {#sec-make-disk-image}
`<nixpkgs/nixos/lib/make-disk-image.nix>` is a function to create _disk images_ in multiple formats: raw, QCOW2 (QEMU), QCOW2-Compressed (compressed version), VDI (VirtualBox), VPC (VirtualPC).
This function can create images in two ways:
- using `cptofs` without any virtual machine to create a Nix store disk image,
- using a virtual machine to create a full NixOS installation.
When testing early-boot or lifecycle parts of NixOS such as a bootloader or multiple generations, it is necessary to opt for a full NixOS system installation.
Whereas for many web servers, applications, it is possible to work with a Nix store only disk image and is faster to build.
NixOS tests also use this function when preparing the VM. The `cptofs` method is used when `virtualisation.useBootLoader` is false (the default). Otherwise the second method is used.
## Features
For reference, read the function signature source code for documentation on arguments: <https://github.com/NixOS/nixpkgs/blob/master/nixos/lib/make-disk-image.nix>.
Features are separated in various sections depending on if you opt for a Nix-store only image or a full NixOS image.
### Common
- arbitrary NixOS configuration
- automatic or bound disk size: `diskSize` parameter, `additionalSpace` can be set when `diskSize` is `auto` to add a constant of disk space
- multiple partition table layouts: EFI, legacy, legacy + GPT, hybrid, none through `partitionTableType` parameter
- OVMF or EFI firmwares and variables templates can be customized
- root filesystem `fsType` can be customized to whatever `mkfs.${fsType}` exist during operations
- root filesystem label can be customized, defaults to `nix-store` if it's a Nix store image, otherwise `nixpkgs/nixos`
- arbitrary code can be executed after disk image was produced with `postVM`
- the current nixpkgs can be realized as a channel in the disk image, which will change the hash of the image when the sources are updated
- additional store paths can be provided through `additionalPaths`
### Full NixOS image
- arbitrary contents with permissions can be placed in the target filesystem using `contents`
- a `/etc/nixpkgs/nixos/configuration.nix` can be provided through `configFile`
- bootloaders are supported
- EFI variables can be mutated during image production and the result is exposed in `$out`
- boot partition size when partition table is `efi` or `hybrid`
### On bit-to-bit reproducibility
Images are **NOT** deterministic, please do not hesitate to try to fix this, source of determinisms are (not exhaustive) :
- bootloader installation have timestamps
- SQLite Nix store database contain registration times
- `/etc/shadow` is in a non-deterministic order
A `deterministic` flag is available for best efforts determinism.
## Usage
To produce a Nix-store only image:
```nix
let
pkgs = import <nixpkgs> {};
lib = pkgs.lib;
make-disk-image = import <nixpkgs/nixos/lib/make-disk-image.nix>;
in
make-disk-image {
inherit pkgs lib;
config = {};
additionalPaths = [ ];
format = "qcow2";
onlyNixStore = true;
partitionTableType = "none";
installBootLoader = false;
touchEFIVars = false;
diskSize = "auto";
additionalSpace = "0M"; # Defaults to 512M.
copyChannel = false;
}
```
Some arguments can be left out, they are shown explicitly for the sake of the example.
Building this derivation will provide a QCOW2 disk image containing only the Nix store and its registration information.
To produce a NixOS installation image disk with UEFI and bootloader installed:
```nix
let
pkgs = import <nixpkgs> {};
lib = pkgs.lib;
make-disk-image = import <nixpkgs/nixos/lib/make-disk-image.nix>;
evalConfig = import <nixpkgs/nixos/lib/eval-config.nix>;
in
make-disk-image {
inherit pkgs lib;
config = evalConfig {
modules = [
{
fileSystems."/" = { device = "/dev/vda"; fsType = "ext4"; autoFormat = true; };
boot.grub.device = "/dev/vda";
}
];
};
format = "qcow2";
onlyNixStore = false;
partitionTableType = "legacy+gpt";
installBootLoader = true;
touchEFIVars = true;
diskSize = "auto";
additionalSpace = "0M"; # Defaults to 512M.
copyChannel = false;
}
```

View file

@ -331,6 +331,14 @@
</listitem>
</itemizedlist>
</listitem>
<listitem>
<para>
<literal>nixos/lib/make-disk-image.nix</literal> can now
mutate EFI variables, run user-provided EFI firmware or
variable templates. This is now extensively documented in the
NixOS manual.
</para>
</listitem>
<listitem>
<para>
A new <literal>virtualisation.rosetta</literal> module was

View file

@ -93,6 +93,8 @@ In addition to numerous new and upgraded packages, this release has the followin
[headscale's example configuration](https://github.com/juanfont/headscale/blob/main/config-example.yaml)
can be directly written as attribute-set in Nix within this option.
- `nixos/lib/make-disk-image.nix` can now mutate EFI variables, run user-provided EFI firmware or variable templates. This is now extensively documented in the NixOS manual.
- A new `virtualisation.rosetta` module was added to allow running `x86_64` binaries through [Rosetta](https://developer.apple.com/documentation/apple-silicon/about-the-rosetta-translation-environment) inside virtualised NixOS guests on Apple silicon. This feature works by default with the [UTM](https://docs.getutm.app/) virtualisation [package](https://search.nixos.org/packages?channel=unstable&show=utm&from=0&size=1&sort=relevance&type=packages&query=utm).
- The new option `users.motdFile` allows configuring a Message Of The Day that can be updated dynamically.

View file

@ -1,3 +1,85 @@
/* Technical details
`make-disk-image` has a bit of magic to minimize the amount of work to do in a virtual machine.
It relies on the [LKL (Linux Kernel Library) project](https://github.com/lkl/linux) which provides Linux kernel as userspace library.
The Nix-store only image only need to run LKL tools to produce an image and will never spawn a virtual machine, whereas full images will always require a virtual machine, but also use LKL.
### Image preparation phase
Image preparation phase will produce the initial image layout in a folder:
- devise a root folder based on `$PWD`
- prepare the contents by copying and restoring ACLs in this root folder
- load in the Nix store database all additional paths computed by `pkgs.closureInfo` in a temporary Nix store
- run `nixos-install` in a temporary folder
- transfer from the temporary store the additional paths registered to the installed NixOS
- compute the size of the disk image based on the apparent size of the root folder
- partition the disk image using the corresponding script according to the partition table type
- format the partitions if needed
- use `cptofs` (LKL tool) to copy the root folder inside the disk image
At this step, the disk image already contains the Nix store, it now only needs to be converted to the desired format to be used.
### Image conversion phase
Using `qemu-img`, the disk image is converted from a raw format to the desired format: qcow2(-compressed), vdi, vpc.
### Image Partitioning
#### `none`
No partition table layout is written. The image is a bare filesystem image.
#### `legacy`
The image is partitioned using MBR. There is one primary ext4 partition starting at 1 MiB that fills the rest of the disk image.
This partition layout is unsuitable for UEFI.
#### `legacy+gpt`
This partition table type uses GPT and:
- create a "no filesystem" partition from 1MiB to 2MiB ;
- set `bios_grub` flag on this "no filesystem" partition, which marks it as a [GRUB BIOS partition](https://www.gnu.org/software/parted/manual/html_node/set.html) ;
- create a primary ext4 partition starting at 2MiB and extending to the full disk image ;
- perform optimal alignments checks on each partition
This partition layout is unsuitable for UEFI boot, because it has no ESP (EFI System Partition) partition. It can work with CSM (Compatibility Support Module) which emulates legacy (BIOS) boot for UEFI.
#### `efi`
This partition table type uses GPT and:
- creates an FAT32 ESP partition from 8MiB to specified `bootSize` parameter (256MiB by default), set it bootable ;
- creates an primary ext4 partition starting after the boot partition and extending to the full disk image
#### `hybrid`
This partition table type uses GPT and:
- creates a "no filesystem" partition from 0 to 1MiB, set `bios_grub` flag on it ;
- creates an FAT32 ESP partition from 8MiB to specified `bootSize` parameter (256MiB by default), set it bootable ;
- creates a primary ext4 partition starting after the boot one and extending to the full disk image
This partition could be booted by a BIOS able to understand GPT layouts and recognizing the MBR at the start.
### How to run determinism analysis on results?
Build your derivation with `--check` to rebuild it and verify it is the same.
If it fails, you will be left with two folders with one having `.check`.
You can use `diffoscope` to see the differences between the folders.
However, `diffoscope` is currently not able to diff two QCOW2 filesystems, thus, it is advised to use raw format.
Even if you use raw disks, `diffoscope` cannot diff the partition table and partitions recursively.
To solve this, you can run `fdisk -l $image` and generate `dd if=$image of=$image-p$i.raw skip=$start count=$sectors` for each `(start, sectors)` listed in the `fdisk` output. Now, you will have each partition as a separate file and you can compare them in pairs.
*/
{ pkgs
, lib
@ -47,6 +129,18 @@
, # Whether to invoke `switch-to-configuration boot` during image creation
installBootLoader ? true
, # Whether to output have EFIVARS available in $out/efi-vars.fd and use it during disk creation
touchEFIVars ? false
, # OVMF firmware derivation
OVMF ? pkgs.OVMF.fd
, # EFI firmware
efiFirmware ? OVMF.firmware
, # EFI variables
efiVariables ? OVMF.variables
, # The root file system type.
fsType ? "ext4"
@ -70,6 +164,22 @@
, # Disk image format, one of qcow2, qcow2-compressed, vdi, vpc, raw.
format ? "raw"
# Whether to fix:
# - GPT Disk Unique Identifier (diskGUID)
# - GPT Partition Unique Identifier: depends on the layout, root partition UUID can be controlled through `rootGPUID` option
# - GPT Partition Type Identifier: fixed according to the layout, e.g. ESP partition, etc. through `parted` invocation.
# - Filesystem Unique Identifier when fsType = ext4 for *root partition*.
# BIOS/MBR support is "best effort" at the moment.
# Boot partitions may not be deterministic.
# Also, to fix last time checked of the ext4 partition if fsType = ext4.
, deterministic ? true
# GPT Partition Unique Identifier for root partition.
, rootGPUID ? "F222513B-DED1-49FA-B591-20CE86A2FE7F"
# When fsType = ext4, this is the root Filesystem Unique Identifier.
# TODO: support other filesystems someday.
, rootFSUID ? (if fsType == "ext4" then rootGPUID else null)
, # Whether a nix channel based on the current source tree should be
# made available inside the image. Useful for interactive use of nix
# utils, but changes the hash of the image when the sources are
@ -80,15 +190,18 @@
additionalPaths ? []
}:
assert partitionTableType == "legacy" || partitionTableType == "legacy+gpt" || partitionTableType == "efi" || partitionTableType == "hybrid" || partitionTableType == "none";
# We use -E offset=X below, which is only supported by e2fsprogs
assert partitionTableType != "none" -> fsType == "ext4";
assert (lib.assertOneOf "partitionTableType" partitionTableType [ "legacy" "legacy+gpt" "efi" "hybrid" "none" ]);
assert (lib.assertMsg (fsType == "ext4" && deterministic -> rootFSUID != null) "In deterministic mode with a ext4 partition, rootFSUID must be non-null, by default, it is equal to rootGPUID.");
# We use -E offset=X below, which is only supported by e2fsprogs
assert (lib.assertMsg (partitionTableType != "none" -> fsType == "ext4") "to produce a partition table, we need to use -E offset flag which is support only for fsType = ext4");
assert (lib.assertMsg (touchEFIVars -> partitionTableType == "hybrid" || partitionTableType == "efi" || partitionTableType == "legacy+gpt") "EFI variables can be used only with a partition table of type: hybrid, efi or legacy+gpt.");
# If only Nix store image, then: contents must be empty, configFile must be unset, and we should no install bootloader.
assert (lib.assertMsg (onlyNixStore -> contents == [] && configFile == null && !installBootLoader) "In a only Nix store image, the contents must be empty, no configuration must be provided and no bootloader should be installed.");
# Either both or none of {user,group} need to be set
assert lib.all
assert (lib.assertMsg (lib.all
(attrs: ((attrs.user or null) == null)
== ((attrs.group or null) == null))
contents;
assert onlyNixStore -> contents == [] && configFile == null && !installBootLoader;
contents) "Contents of the disk image should set none of {user, group} or both at the same time.");
with lib;
@ -127,6 +240,14 @@ let format' = format; in let
mkpart primary ext4 2MB -1 \
align-check optimal 2 \
print
${optionalString deterministic ''
sgdisk \
--disk-guid=97FD5997-D90B-4AA3-8D16-C1723AEA73C \
--partition-guid=1:1C06F03B-704E-4657-B9CD-681A087A2FDC \
--partition-guid=2:970C694F-AFD0-4B99-B750-CDB7A329AB6F \
--partition-guid=3:${rootGPUID} \
$diskImage
''}
'';
efi = ''
parted --script $diskImage -- \
@ -134,6 +255,13 @@ let format' = format; in let
mkpart ESP fat32 8MiB ${bootSize} \
set 1 boot on \
mkpart primary ext4 ${bootSize} -1
${optionalString deterministic ''
sgdisk \
--disk-guid=97FD5997-D90B-4AA3-8D16-C1723AEA73C \
--partition-guid=1:1C06F03B-704E-4657-B9CD-681A087A2FDC \
--partition-guid=2:${rootGPUID} \
$diskImage
''}
'';
hybrid = ''
parted --script $diskImage -- \
@ -143,10 +271,20 @@ let format' = format; in let
mkpart no-fs 0 1024KiB \
set 2 bios_grub on \
mkpart primary ext4 ${bootSize} -1
${optionalString deterministic ''
sgdisk \
--disk-guid=97FD5997-D90B-4AA3-8D16-C1723AEA73C \
--partition-guid=1:1C06F03B-704E-4657-B9CD-681A087A2FDC \
--partition-guid=2:970C694F-AFD0-4B99-B750-CDB7A329AB6F \
--partition-guid=3:${rootGPUID} \
$diskImage
''}
'';
none = "";
}.${partitionTableType};
useEFIBoot = touchEFIVars;
nixpkgs = cleanSource pkgs.path;
# FIXME: merge with channel.nix / make-channel.nix.
@ -171,7 +309,9 @@ let format' = format; in let
config.system.build.nixos-enter
nix
systemdMinimal
] ++ stdenv.initialPath);
]
++ lib.optional deterministic gptfdisk
++ stdenv.initialPath);
# I'm preserving the line below because I'm going to search for it across nixpkgs to consolidate
# image building logic. The comment right below this now appears in 4 different places in nixpkgs :)
@ -368,20 +508,35 @@ let format' = format; in let
diskImage=$out/${filename}
'';
createEFIVars = ''
efiVars=$out/efi-vars.fd
cp ${efiVariables} $efiVars
chmod 0644 $efiVars
'';
buildImage = pkgs.vmTools.runInLinuxVM (
pkgs.runCommand name {
preVM = prepareImage;
preVM = prepareImage + lib.optionalString touchEFIVars createEFIVars;
buildInputs = with pkgs; [ util-linux e2fsprogs dosfstools ];
postVM = moveOrConvertImage + postVM;
QEMU_OPTS =
concatStringsSep " " (lib.optional useEFIBoot "-drive if=pflash,format=raw,unit=0,readonly=on,file=${efiFirmware}"
++ lib.optionals touchEFIVars [
"-drive if=pflash,format=raw,unit=1,file=$efiVars"
]
);
memSize = 1024;
} ''
export PATH=${binPath}:$PATH
rootDisk=${if partitionTableType != "none" then "/dev/vda${rootPartition}" else "/dev/vda"}
# Some tools assume these exist
ln -s vda /dev/xvda
ln -s vda /dev/sda
# It is necessary to set root filesystem unique identifier in advance, otherwise
# bootloader might get the wrong one and fail to boot.
# At the end, we reset again because we want deterministic timestamps.
${optionalString (fsType == "ext4" && deterministic) ''
tune2fs -T now ${optionalString deterministic "-U ${rootFSUID}"} -c 0 -i 0 $rootDisk
''}
# make systemd-boot find ESP without udev
mkdir /dev/block
ln -s /dev/vda1 /dev/block/254:1
@ -396,6 +551,8 @@ let format' = format; in let
mkdir -p /mnt/boot
mkfs.vfat -n ESP /dev/vda1
mount /dev/vda1 /mnt/boot
${optionalString touchEFIVars "mount -t efivarfs efivarfs /sys/firmware/efi/efivars"}
''}
# Install a configuration.nix
@ -405,7 +562,13 @@ let format' = format; in let
''}
${lib.optionalString installBootLoader ''
# Set up core system link, GRUB, etc.
# In this throwaway resource, we only have /dev/vda, but the actual VM may refer to another disk for bootloader, e.g. /dev/vdb
# Use this option to create a symlink from vda to any arbitrary device you want.
${optionalString (config.boot.loader.grub.device != "/dev/vda") ''
ln -s /dev/vda ${config.boot.loader.grub.device}
''}
# Set up core system link, bootloader (sd-boot, GRUB, uboot, etc.), etc.
NIXOS_INSTALL_BOOTLOADER=1 nixos-enter --root $mountPoint -- /nix/var/nix/profiles/system/bin/switch-to-configuration boot
# The above scripts will generate a random machine-id and we don't want to bake a single ID into all our images
@ -432,8 +595,12 @@ let format' = format; in let
# Make sure resize2fs works. Note that resize2fs has stricter criteria for resizing than a normal
# mount, so the `-c 0` and `-i 0` don't affect it. Setting it to `now` doesn't produce deterministic
# output, of course, but we can fix that when/if we start making images deterministic.
# In deterministic mode, this is fixed to 1970-01-01 (UNIX timestamp 0).
# This two-step approach is necessary otherwise `tune2fs` will want a fresher filesystem to perform
# some changes.
${optionalString (fsType == "ext4") ''
tune2fs -T now -c 0 -i 0 $rootDisk
tune2fs -T now ${optionalString deterministic "-U ${rootFSUID}"} -c 0 -i 0 $rootDisk
${optionalString deterministic "tune2fs -f -T 19700101 $rootDisk"}
''}
''
);