From 08ebd830a559692f9b34a1619cca44b1888468a0 Mon Sep 17 00:00:00 2001 From: Ihor Antonov Date: Wed, 9 May 2018 10:15:16 -0400 Subject: [PATCH] Fix kernel crash caused by absent root device --- .../maintainers/scripts/ec2/amazon-image.nix | 2 +- nixos/modules/system/boot/grow-partition.nix | 2 +- nixos/modules/system/boot/stage-1-init.sh | 50 ++++++++++--------- nixos/modules/virtualisation/amazon-image.nix | 7 --- 4 files changed, 28 insertions(+), 33 deletions(-) diff --git a/nixos/maintainers/scripts/ec2/amazon-image.nix b/nixos/maintainers/scripts/ec2/amazon-image.nix index 5ab5d400e05..eeae27ede0f 100644 --- a/nixos/maintainers/scripts/ec2/amazon-image.nix +++ b/nixos/maintainers/scripts/ec2/amazon-image.nix @@ -8,7 +8,7 @@ in { imports = [ ../../../modules/virtualisation/amazon-image.nix ]; - # Required to avoid kernel panics on KVM instances where nvme volume availability can get delayed + # Required to provide good EBS experience, # https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/nvme-ebs-volumes.html#timeout-nvme-ebs-volumes # TODO change value to 4294967295 when kernel is updated to 4.15 or later config.boot.kernelParams = [ "nvme_core.io_timeout=255" ]; diff --git a/nixos/modules/system/boot/grow-partition.nix b/nixos/modules/system/boot/grow-partition.nix index 1e6f9e442b6..8c9b1502558 100644 --- a/nixos/modules/system/boot/grow-partition.nix +++ b/nixos/modules/system/boot/grow-partition.nix @@ -30,7 +30,7 @@ with lib; boot.initrd.postDeviceCommands = '' rootDevice="${config.fileSystems."/".device}" - if [ -e "$rootDevice" ]; then + if waitDevice "$rootDevice"; then rootDevice="$(readlink -f "$rootDevice")" parentDevice="$rootDevice" while [ "''${parentDevice%[0-9]}" != "''${parentDevice}" ]; do diff --git a/nixos/modules/system/boot/stage-1-init.sh b/nixos/modules/system/boot/stage-1-init.sh index 964ec68cfe2..1facf419ed0 100644 --- a/nixos/modules/system/boot/stage-1-init.sh +++ b/nixos/modules/system/boot/stage-1-init.sh @@ -74,6 +74,32 @@ ln -s /proc/mounts /etc/mtab # to shut up mke2fs touch /etc/udev/hwdb.bin # to shut up udev touch /etc/initrd-release +# Function for waiting a device to appear. +waitDevice() { + local device="$1" + + # USB storage devices tend to appear with some delay. It would be + # great if we had a way to synchronously wait for them, but + # alas... So just wait for a few seconds for the device to + # appear. + if test ! -e $device; then + echo -n "waiting for device $device to appear..." + try=20 + while [ $try -gt 0 ]; do + sleep 1 + # also re-try lvm activation now that new block devices might have appeared + lvm vgchange -ay + # and tell udev to create nodes for the new LVs + udevadm trigger --action=add + if test -e $device; then break; fi + echo -n "." + try=$((try - 1)) + done + echo + [ $try -ne 0 ] + fi +} + # Mount special file systems. specialMount() { local device="$1" @@ -377,31 +403,7 @@ lustrateRoot () { exec 4>&- } -# Function for waiting a device to appear. -waitDevice() { - local device="$1" - # USB storage devices tend to appear with some delay. It would be - # great if we had a way to synchronously wait for them, but - # alas... So just wait for a few seconds for the device to - # appear. - if test ! -e $device; then - echo -n "waiting for device $device to appear..." - try=20 - while [ $try -gt 0 ]; do - sleep 1 - # also re-try lvm activation now that new block devices might have appeared - lvm vgchange -ay - # and tell udev to create nodes for the new LVs - udevadm trigger --action=add - if test -e $device; then break; fi - echo -n "." - try=$((try - 1)) - done - echo - [ $try -ne 0 ] - fi -} # Try to resume - all modules are loaded now. diff --git a/nixos/modules/virtualisation/amazon-image.nix b/nixos/modules/virtualisation/amazon-image.nix index f74c42a777f..e9e935e9020 100644 --- a/nixos/modules/virtualisation/amazon-image.nix +++ b/nixos/modules/virtualisation/amazon-image.nix @@ -48,13 +48,6 @@ let cfg = config.ec2; in boot.loader.grub.extraPerEntryConfig = mkIf (!cfg.hvm) "root (hd0)"; boot.loader.timeout = 0; - boot.initrd.postDeviceCommands = - '' - # Force udev to exit to prevent random "Device or resource busy - # while trying to open /dev/xvda" errors from fsck. - udevadm control --exit || true - ''; - boot.initrd.network.enable = true; # Mount all formatted ephemeral disks and activate all swap devices.