2009-02-05 15:57:07 +00:00
|
|
|
|
#! @shell@
|
2006-11-02 22:48:01 +00:00
|
|
|
|
|
2009-06-10 16:29:48 +00:00
|
|
|
|
targetRoot=/mnt-root
|
2013-01-09 21:31:57 +00:00
|
|
|
|
console=tty1
|
2021-01-03 09:09:30 +00:00
|
|
|
|
verbose="@verbose@"
|
|
|
|
|
|
|
|
|
|
info() {
|
|
|
|
|
if [[ -n "$verbose" ]]; then
|
|
|
|
|
echo "$@"
|
|
|
|
|
fi
|
|
|
|
|
}
|
2008-08-26 12:45:36 +00:00
|
|
|
|
|
2016-07-06 19:57:14 +00:00
|
|
|
|
extraUtils="@extraUtils@"
|
2009-02-05 15:57:07 +00:00
|
|
|
|
export LD_LIBRARY_PATH=@extraUtils@/lib
|
2014-07-30 13:44:47 +00:00
|
|
|
|
export PATH=@extraUtils@/bin
|
|
|
|
|
ln -s @extraUtils@/bin /bin
|
2009-02-05 15:57:07 +00:00
|
|
|
|
|
2017-04-02 18:45:44 +00:00
|
|
|
|
# Copy the secrets to their needed location
|
|
|
|
|
if [ -d "@extraUtils@/secrets" ]; then
|
|
|
|
|
for secret in $(cd "@extraUtils@/secrets"; find . -type f); do
|
|
|
|
|
mkdir -p $(dirname "/$secret")
|
|
|
|
|
ln -s "@extraUtils@/secrets/$secret" "$secret"
|
|
|
|
|
done
|
|
|
|
|
fi
|
|
|
|
|
|
2015-02-09 18:48:17 +00:00
|
|
|
|
# Stop LVM complaining about fd3
|
|
|
|
|
export LVM_SUPPRESS_FD_WARNINGS=true
|
2008-08-26 12:45:36 +00:00
|
|
|
|
|
2009-06-10 15:02:39 +00:00
|
|
|
|
fail() {
|
2010-01-04 18:04:57 +00:00
|
|
|
|
if [ -n "$panicOnFail" ]; then exit 1; fi
|
2011-09-13 18:49:50 +00:00
|
|
|
|
|
2016-07-06 21:44:02 +00:00
|
|
|
|
@preFailCommands@
|
|
|
|
|
|
2009-06-10 15:02:39 +00:00
|
|
|
|
# If starting stage 2 failed, allow the user to repair the problem
|
|
|
|
|
# in an interactive shell.
|
|
|
|
|
cat <<EOF
|
|
|
|
|
|
2014-03-07 18:39:55 +00:00
|
|
|
|
An error occurred in stage 1 of the boot process, which must mount the
|
2009-06-10 15:02:39 +00:00
|
|
|
|
root filesystem on \`$targetRoot' and then start stage 2. Press one
|
2010-01-04 18:04:57 +00:00
|
|
|
|
of the following keys:
|
2009-06-10 15:02:39 +00:00
|
|
|
|
|
2013-01-09 21:31:57 +00:00
|
|
|
|
EOF
|
|
|
|
|
if [ -n "$allowShell" ]; then cat <<EOF
|
|
|
|
|
i) to launch an interactive shell
|
2009-06-10 15:02:39 +00:00
|
|
|
|
f) to start an interactive shell having pid 1 (needed if you want to
|
2013-01-09 21:31:57 +00:00
|
|
|
|
start stage 2's init manually)
|
|
|
|
|
EOF
|
|
|
|
|
fi
|
|
|
|
|
cat <<EOF
|
|
|
|
|
r) to reboot immediately
|
|
|
|
|
*) to ignore the error and continue
|
2009-06-10 15:02:39 +00:00
|
|
|
|
EOF
|
|
|
|
|
|
2019-07-25 15:23:32 +00:00
|
|
|
|
read -n 1 reply
|
2011-09-13 18:49:50 +00:00
|
|
|
|
|
2013-01-09 21:31:57 +00:00
|
|
|
|
if [ -n "$allowShell" -a "$reply" = f ]; then
|
2015-02-13 10:20:29 +00:00
|
|
|
|
exec setsid @shell@ -c "exec @shell@ < /dev/$console >/dev/$console 2>/dev/$console"
|
2013-01-09 21:31:57 +00:00
|
|
|
|
elif [ -n "$allowShell" -a "$reply" = i ]; then
|
|
|
|
|
echo "Starting interactive shell..."
|
2019-07-25 15:23:32 +00:00
|
|
|
|
setsid @shell@ -c "exec @shell@ < /dev/$console >/dev/$console 2>/dev/$console" || fail
|
2013-01-09 21:31:57 +00:00
|
|
|
|
elif [ "$reply" = r ]; then
|
|
|
|
|
echo "Rebooting..."
|
|
|
|
|
reboot -f
|
|
|
|
|
else
|
2021-01-03 09:09:30 +00:00
|
|
|
|
info "Continuing..."
|
2013-01-09 21:31:57 +00:00
|
|
|
|
fi
|
2008-08-16 00:59:12 +00:00
|
|
|
|
}
|
|
|
|
|
|
2012-05-21 19:26:07 +00:00
|
|
|
|
trap 'fail' 0
|
2008-08-08 17:07:04 +00:00
|
|
|
|
|
|
|
|
|
|
2006-11-02 22:48:01 +00:00
|
|
|
|
# Print a greeting.
|
2021-01-03 09:09:30 +00:00
|
|
|
|
info
|
|
|
|
|
info "[1;32m<<< NixOS Stage 1 >>>[0m"
|
|
|
|
|
info
|
2006-11-02 22:48:01 +00:00
|
|
|
|
|
2016-08-27 10:29:38 +00:00
|
|
|
|
# Make several required directories.
|
2014-12-18 16:41:36 +00:00
|
|
|
|
mkdir -p /etc/udev
|
2012-05-21 19:26:07 +00:00
|
|
|
|
touch /etc/fstab # to shut up mount
|
2016-08-27 10:29:38 +00:00
|
|
|
|
ln -s /proc/mounts /etc/mtab # to shut up mke2fs
|
2014-12-19 13:38:33 +00:00
|
|
|
|
touch /etc/udev/hwdb.bin # to shut up udev
|
2014-05-21 13:19:01 +00:00
|
|
|
|
touch /etc/initrd-release
|
2016-08-27 10:29:38 +00:00
|
|
|
|
|
2022-01-10 00:06:03 +00:00
|
|
|
|
# Function for waiting for device(s) to appear.
|
2018-05-09 14:15:16 +00:00
|
|
|
|
waitDevice() {
|
|
|
|
|
local device="$1"
|
2022-01-10 00:06:03 +00:00
|
|
|
|
# Split device string using ':' as a delimiter as bcachefs
|
|
|
|
|
# uses this for multi-device filesystems, i.e. /dev/sda1:/dev/sda2:/dev/sda3
|
|
|
|
|
local IFS=':'
|
2018-05-09 14:15:16 +00:00
|
|
|
|
|
|
|
|
|
# USB storage devices tend to appear with some delay. It would be
|
|
|
|
|
# great if we had a way to synchronously wait for them, but
|
|
|
|
|
# alas... So just wait for a few seconds for the device to
|
|
|
|
|
# appear.
|
2022-01-10 00:06:03 +00:00
|
|
|
|
for dev in $device; do
|
|
|
|
|
if test ! -e $dev; then
|
|
|
|
|
echo -n "waiting for device $dev to appear..."
|
|
|
|
|
try=20
|
|
|
|
|
while [ $try -gt 0 ]; do
|
|
|
|
|
sleep 1
|
|
|
|
|
# also re-try lvm activation now that new block devices might have appeared
|
|
|
|
|
lvm vgchange -ay
|
|
|
|
|
# and tell udev to create nodes for the new LVs
|
|
|
|
|
udevadm trigger --action=add
|
|
|
|
|
if test -e $dev; then break; fi
|
|
|
|
|
echo -n "."
|
|
|
|
|
try=$((try - 1))
|
|
|
|
|
done
|
|
|
|
|
echo
|
|
|
|
|
[ $try -ne 0 ]
|
|
|
|
|
fi
|
|
|
|
|
done
|
2018-05-09 14:15:16 +00:00
|
|
|
|
}
|
|
|
|
|
|
2016-08-27 10:29:38 +00:00
|
|
|
|
# Mount special file systems.
|
|
|
|
|
specialMount() {
|
|
|
|
|
local device="$1"
|
|
|
|
|
local mountPoint="$2"
|
|
|
|
|
local options="$3"
|
|
|
|
|
local fsType="$4"
|
|
|
|
|
|
|
|
|
|
mkdir -m 0755 -p "$mountPoint"
|
|
|
|
|
mount -n -t "$fsType" -o "$options" "$device" "$mountPoint"
|
|
|
|
|
}
|
|
|
|
|
source @earlyMountScript@
|
2006-11-03 00:36:08 +00:00
|
|
|
|
|
2021-07-05 14:08:35 +00:00
|
|
|
|
# Copy initrd secrets from /.initrd-secrets to their actual destinations
|
|
|
|
|
if [ -d "/.initrd-secrets" ]; then
|
|
|
|
|
#
|
|
|
|
|
# Secrets are named by their full destination pathname and stored
|
|
|
|
|
# under /.initrd-secrets/
|
|
|
|
|
#
|
|
|
|
|
for secret in $(cd "/.initrd-secrets"; find . -type f); do
|
|
|
|
|
mkdir -p $(dirname "/$secret")
|
|
|
|
|
cp "/.initrd-secrets/$secret" "$secret"
|
|
|
|
|
done
|
|
|
|
|
fi
|
|
|
|
|
|
2016-02-24 07:54:25 +00:00
|
|
|
|
# Log the script output to /dev/kmsg or /run/log/stage-1-init.log.
|
2016-02-23 10:53:37 +00:00
|
|
|
|
mkdir -p /tmp
|
|
|
|
|
mkfifo /tmp/stage-1-init.log.fifo
|
|
|
|
|
logOutFd=8 && logErrFd=9
|
|
|
|
|
eval "exec $logOutFd>&1 $logErrFd>&2"
|
|
|
|
|
if test -w /dev/kmsg; then
|
2016-02-24 07:54:25 +00:00
|
|
|
|
tee -i < /tmp/stage-1-init.log.fifo /proc/self/fd/"$logOutFd" | while read -r line; do
|
2016-02-23 10:53:37 +00:00
|
|
|
|
if test -n "$line"; then
|
2020-10-30 15:05:48 +00:00
|
|
|
|
echo "<7>stage-1-init: [$(date)] $line" > /dev/kmsg
|
2016-02-23 10:53:37 +00:00
|
|
|
|
fi
|
|
|
|
|
done &
|
|
|
|
|
else
|
|
|
|
|
mkdir -p /run/log
|
|
|
|
|
tee -i < /tmp/stage-1-init.log.fifo /run/log/stage-1-init.log &
|
2016-02-22 05:42:24 +00:00
|
|
|
|
fi
|
2016-02-23 10:53:37 +00:00
|
|
|
|
exec > /tmp/stage-1-init.log.fifo 2>&1
|
|
|
|
|
|
2013-07-23 18:43:11 +00:00
|
|
|
|
|
2006-11-24 00:04:29 +00:00
|
|
|
|
# Process the kernel command line.
|
2009-08-27 11:57:43 +00:00
|
|
|
|
export stage2Init=/init
|
2006-11-24 00:04:29 +00:00
|
|
|
|
for o in $(cat /proc/cmdline); do
|
|
|
|
|
case $o in
|
2013-01-09 21:31:57 +00:00
|
|
|
|
console=*)
|
|
|
|
|
set -- $(IFS==; echo $o)
|
|
|
|
|
params=$2
|
|
|
|
|
set -- $(IFS=,; echo $params)
|
|
|
|
|
console=$1
|
|
|
|
|
;;
|
2006-11-24 00:04:29 +00:00
|
|
|
|
init=*)
|
|
|
|
|
set -- $(IFS==; echo $o)
|
|
|
|
|
stage2Init=$2
|
|
|
|
|
;;
|
2020-04-01 01:54:21 +00:00
|
|
|
|
boot.persistence=*)
|
|
|
|
|
set -- $(IFS==; echo $o)
|
|
|
|
|
persistence=$2
|
|
|
|
|
;;
|
|
|
|
|
boot.persistence.opt=*)
|
|
|
|
|
set -- $(IFS==; echo $o)
|
|
|
|
|
persistence_opt=$2
|
|
|
|
|
;;
|
2013-01-09 21:49:26 +00:00
|
|
|
|
boot.trace|debugtrace)
|
2006-11-24 00:04:29 +00:00
|
|
|
|
# Show each command.
|
|
|
|
|
set -x
|
|
|
|
|
;;
|
2013-01-09 21:49:26 +00:00
|
|
|
|
boot.shell_on_fail)
|
2013-01-09 21:31:57 +00:00
|
|
|
|
allowShell=1
|
|
|
|
|
;;
|
2013-01-09 21:49:26 +00:00
|
|
|
|
boot.debug1|debug1) # stop right away
|
2013-01-09 21:31:57 +00:00
|
|
|
|
allowShell=1
|
2006-11-24 00:04:29 +00:00
|
|
|
|
fail
|
|
|
|
|
;;
|
2013-01-09 21:49:26 +00:00
|
|
|
|
boot.debug1devices) # stop after loading modules and creating device nodes
|
2013-01-09 21:31:57 +00:00
|
|
|
|
allowShell=1
|
2007-05-30 10:32:42 +00:00
|
|
|
|
debug1devices=1
|
|
|
|
|
;;
|
2013-01-09 21:49:26 +00:00
|
|
|
|
boot.debug1mounts) # stop after mounting file systems
|
2013-01-09 21:31:57 +00:00
|
|
|
|
allowShell=1
|
2007-05-30 10:32:42 +00:00
|
|
|
|
debug1mounts=1
|
|
|
|
|
;;
|
2013-01-09 21:49:26 +00:00
|
|
|
|
boot.panic_on_fail|stage1panic=1)
|
2010-01-04 18:04:57 +00:00
|
|
|
|
panicOnFail=1
|
|
|
|
|
;;
|
2010-08-07 14:16:18 +00:00
|
|
|
|
root=*)
|
|
|
|
|
# If a root device is specified on the kernel command
|
|
|
|
|
# line, make it available through the symlink /dev/root.
|
|
|
|
|
# Recognise LABEL= and UUID= to support UNetbootin.
|
|
|
|
|
set -- $(IFS==; echo $o)
|
|
|
|
|
if [ $2 = "LABEL" ]; then
|
|
|
|
|
root="/dev/disk/by-label/$3"
|
|
|
|
|
elif [ $2 = "UUID" ]; then
|
|
|
|
|
root="/dev/disk/by-uuid/$3"
|
|
|
|
|
else
|
|
|
|
|
root=$2
|
|
|
|
|
fi
|
|
|
|
|
ln -s "$root" /dev/root
|
|
|
|
|
;;
|
2017-04-18 11:45:30 +00:00
|
|
|
|
copytoram)
|
|
|
|
|
copytoram=1
|
|
|
|
|
;;
|
2019-09-21 15:08:00 +00:00
|
|
|
|
findiso=*)
|
|
|
|
|
# if an iso name is supplied, try to find the device where
|
|
|
|
|
# the iso resides on
|
|
|
|
|
set -- $(IFS==; echo $o)
|
|
|
|
|
isoPath=$2
|
|
|
|
|
;;
|
2006-11-24 00:04:29 +00:00
|
|
|
|
esac
|
|
|
|
|
done
|
|
|
|
|
|
nixos: Add system-wide option to set the hostid
The old boot.spl.hostid option was not working correctly due to an
upstream bug.
Instead, now we will create the /etc/hostid file so that all applications
(including the ZFS kernel modules, ZFS user-space applications and other
unrelated programs) pick-up the same system-wide host id. Note that glibc
(and by extension, the `hostid` program) also respect the host id configured in
/etc/hostid, if it exists.
The hostid option is now mandatory when using ZFS because otherwise, ZFS will
require you to force-import your ZFS pools if you want to use them, which is
undesirable because it disables some of the checks that ZFS does to make sure it
is safe to import a ZFS pool.
The /etc/hostid file must also exist when booting the initrd, before the SPL
kernel module is loaded, so that ZFS picks up the hostid correctly.
The complexity in creating the /etc/hostid file is due to having to
write the host ID as a 32-bit binary value, taking into account the
endianness of the machine, while using only shell commands and/or simple
utilities (to avoid exploding the size of the initrd).
2014-10-23 02:59:06 +00:00
|
|
|
|
# Set hostid before modules are loaded.
|
|
|
|
|
# This is needed by the spl/zfs modules.
|
|
|
|
|
@setHostId@
|
2006-11-24 00:04:29 +00:00
|
|
|
|
|
2009-12-15 16:38:20 +00:00
|
|
|
|
# Load the required kernel modules.
|
2012-05-21 19:26:07 +00:00
|
|
|
|
mkdir -p /lib
|
|
|
|
|
ln -s @modulesClosure@/lib/modules /lib/modules
|
2018-01-08 13:13:28 +00:00
|
|
|
|
ln -s @modulesClosure@/lib/firmware /lib/firmware
|
2009-12-15 16:38:20 +00:00
|
|
|
|
echo @extraUtils@/bin/modprobe > /proc/sys/kernel/modprobe
|
|
|
|
|
for i in @kernelModules@; do
|
2021-01-03 09:09:30 +00:00
|
|
|
|
info "loading module $(basename $i)..."
|
2015-09-28 18:28:51 +00:00
|
|
|
|
modprobe $i
|
2006-12-19 22:12:44 +00:00
|
|
|
|
done
|
2006-11-03 09:45:06 +00:00
|
|
|
|
|
2007-12-25 16:07:55 +00:00
|
|
|
|
|
2007-01-10 12:42:28 +00:00
|
|
|
|
# Create device nodes in /dev.
|
2016-01-04 15:53:42 +00:00
|
|
|
|
@preDeviceCommands@
|
2021-01-03 09:09:30 +00:00
|
|
|
|
info "running udev..."
|
2020-09-22 22:33:21 +00:00
|
|
|
|
ln -sfn /proc/self/fd /dev/fd
|
2020-11-03 16:57:17 +00:00
|
|
|
|
ln -sfn /proc/self/fd/0 /dev/stdin
|
|
|
|
|
ln -sfn /proc/self/fd/1 /dev/stdout
|
|
|
|
|
ln -sfn /proc/self/fd/2 /dev/stderr
|
2019-09-22 15:14:47 +00:00
|
|
|
|
mkdir -p /etc/systemd
|
|
|
|
|
ln -sfn @linkUnits@ /etc/systemd/network
|
2012-08-14 19:31:15 +00:00
|
|
|
|
mkdir -p /etc/udev
|
|
|
|
|
ln -sfn @udevRules@ /etc/udev/rules.d
|
2010-05-16 20:40:04 +00:00
|
|
|
|
mkdir -p /dev/.mdadm
|
2012-08-14 19:31:15 +00:00
|
|
|
|
systemd-udevd --daemon
|
2010-05-16 19:02:45 +00:00
|
|
|
|
udevadm trigger --action=add
|
2015-09-28 18:28:51 +00:00
|
|
|
|
udevadm settle
|
2007-01-10 12:42:28 +00:00
|
|
|
|
|
2011-12-28 21:46:45 +00:00
|
|
|
|
|
|
|
|
|
# XXX: Use case usb->lvm will still fail, usb->luks->lvm is covered
|
|
|
|
|
@preLVMCommands@
|
|
|
|
|
|
2021-01-03 09:09:30 +00:00
|
|
|
|
info "starting device mapper and LVM..."
|
2010-01-10 19:00:29 +00:00
|
|
|
|
lvm vgchange -ay
|
2011-09-13 18:49:50 +00:00
|
|
|
|
|
2007-05-30 10:32:42 +00:00
|
|
|
|
if test -n "$debug1devices"; then fail; fi
|
|
|
|
|
|
2007-01-10 12:42:28 +00:00
|
|
|
|
|
2009-06-18 16:47:00 +00:00
|
|
|
|
@postDeviceCommands@
|
|
|
|
|
|
|
|
|
|
|
2009-02-01 19:53:59 +00:00
|
|
|
|
# Check the specified file system, if appropriate.
|
|
|
|
|
checkFS() {
|
2012-06-28 14:55:44 +00:00
|
|
|
|
local device="$1"
|
|
|
|
|
local fsType="$2"
|
2013-01-09 21:31:57 +00:00
|
|
|
|
|
2009-02-01 19:53:59 +00:00
|
|
|
|
# Only check block devices.
|
2012-06-28 14:55:44 +00:00
|
|
|
|
if [ ! -b "$device" ]; then return 0; fi
|
2009-06-15 15:50:36 +00:00
|
|
|
|
|
|
|
|
|
# Don't check ROM filesystems.
|
2012-06-28 14:55:44 +00:00
|
|
|
|
if [ "$fsType" = iso9660 -o "$fsType" = udf ]; then return 0; fi
|
|
|
|
|
|
2014-08-02 22:50:28 +00:00
|
|
|
|
# Don't check resilient COWs as they validate the fs structures at mount time
|
2018-11-03 11:34:35 +00:00
|
|
|
|
if [ "$fsType" = btrfs -o "$fsType" = zfs -o "$fsType" = bcachefs ]; then return 0; fi
|
2017-08-31 10:24:48 +00:00
|
|
|
|
|
2021-02-09 23:37:24 +00:00
|
|
|
|
# Skip fsck for apfs as the fsck utility does not support repairing the filesystem (no -a option)
|
|
|
|
|
if [ "$fsType" = apfs ]; then return 0; fi
|
|
|
|
|
|
2018-06-05 22:24:04 +00:00
|
|
|
|
# Skip fsck for nilfs2 - not needed by design and no fsck tool for this filesystem.
|
|
|
|
|
if [ "$fsType" = nilfs2 ]; then return 0; fi
|
|
|
|
|
|
2015-02-16 18:41:08 +00:00
|
|
|
|
# Skip fsck for inherently readonly filesystems.
|
|
|
|
|
if [ "$fsType" = squashfs ]; then return 0; fi
|
|
|
|
|
|
2012-06-28 14:55:44 +00:00
|
|
|
|
# If we couldn't figure out the FS type, then skip fsck.
|
|
|
|
|
if [ "$fsType" = auto ]; then
|
|
|
|
|
echo 'cannot check filesystem with type "auto"!'
|
|
|
|
|
return 0
|
|
|
|
|
fi
|
2009-06-15 15:50:36 +00:00
|
|
|
|
|
2019-09-22 15:14:47 +00:00
|
|
|
|
# Device might be already mounted manually
|
2018-09-01 13:26:16 +00:00
|
|
|
|
# e.g. NBD-device or the host filesystem of the file which contains encrypted root fs
|
|
|
|
|
if mount | grep -q "^$device on "; then
|
|
|
|
|
echo "skip checking already mounted $device"
|
|
|
|
|
return 0
|
|
|
|
|
fi
|
|
|
|
|
|
2009-06-15 16:47:37 +00:00
|
|
|
|
# Optionally, skip fsck on journaling filesystems. This option is
|
|
|
|
|
# a hack - it's mostly because e2fsck on ext3 takes much longer to
|
|
|
|
|
# recover the journal than the ext3 implementation in the kernel
|
|
|
|
|
# does (minutes versus seconds).
|
|
|
|
|
if test -z "@checkJournalingFS@" -a \
|
2012-06-28 14:55:44 +00:00
|
|
|
|
\( "$fsType" = ext3 -o "$fsType" = ext4 -o "$fsType" = reiserfs \
|
2014-03-25 20:39:10 +00:00
|
|
|
|
-o "$fsType" = xfs -o "$fsType" = jfs -o "$fsType" = f2fs \)
|
2009-06-15 16:47:37 +00:00
|
|
|
|
then
|
|
|
|
|
return 0
|
|
|
|
|
fi
|
2011-09-13 18:49:50 +00:00
|
|
|
|
|
2012-06-28 14:55:44 +00:00
|
|
|
|
echo "checking $device..."
|
|
|
|
|
|
2013-05-13 18:25:48 +00:00
|
|
|
|
fsckFlags=
|
|
|
|
|
if test "$fsType" != "btrfs"; then
|
|
|
|
|
fsckFlags="-V -a"
|
|
|
|
|
fi
|
|
|
|
|
fsck $fsckFlags "$device"
|
2009-02-01 19:53:59 +00:00
|
|
|
|
fsckResult=$?
|
|
|
|
|
|
|
|
|
|
if test $(($fsckResult | 2)) = $fsckResult; then
|
|
|
|
|
echo "fsck finished, rebooting..."
|
|
|
|
|
sleep 3
|
2013-01-09 21:31:57 +00:00
|
|
|
|
reboot -f
|
2007-02-06 16:53:36 +00:00
|
|
|
|
fi
|
|
|
|
|
|
2009-02-01 19:53:59 +00:00
|
|
|
|
if test $(($fsckResult | 4)) = $fsckResult; then
|
|
|
|
|
echo "$device has unrepaired errors, please fix them manually."
|
|
|
|
|
fail
|
|
|
|
|
fi
|
2008-11-28 12:03:56 +00:00
|
|
|
|
|
2009-02-01 19:53:59 +00:00
|
|
|
|
if test $fsckResult -ge 8; then
|
|
|
|
|
echo "fsck on $device failed."
|
|
|
|
|
fail
|
|
|
|
|
fi
|
2008-11-28 12:03:56 +00:00
|
|
|
|
|
2009-02-01 19:53:59 +00:00
|
|
|
|
return 0
|
|
|
|
|
}
|
2008-11-28 12:03:56 +00:00
|
|
|
|
|
2007-02-06 16:53:36 +00:00
|
|
|
|
|
2009-02-01 19:53:59 +00:00
|
|
|
|
# Function for mounting a file system.
|
|
|
|
|
mountFS() {
|
|
|
|
|
local device="$1"
|
|
|
|
|
local mountPoint="$2"
|
|
|
|
|
local options="$3"
|
|
|
|
|
local fsType="$4"
|
|
|
|
|
|
2012-06-28 14:55:44 +00:00
|
|
|
|
if [ "$fsType" = auto ]; then
|
|
|
|
|
fsType=$(blkid -o value -s TYPE "$device")
|
|
|
|
|
if [ -z "$fsType" ]; then fsType=auto; fi
|
|
|
|
|
fi
|
|
|
|
|
|
2015-09-24 16:13:14 +00:00
|
|
|
|
# Filter out x- options, which busybox doesn't do yet.
|
|
|
|
|
local optionsFiltered="$(IFS=,; for i in $options; do if [ "${i:0:2}" != "x-" ]; then echo -n $i,; fi; done)"
|
2020-02-05 18:27:16 +00:00
|
|
|
|
# Prefix (lower|upper|work)dir with /mnt-root (overlayfs)
|
|
|
|
|
local optionsPrefixed="$( echo "$optionsFiltered" | sed -E 's#\<(lowerdir|upperdir|workdir)=#\1=/mnt-root#g' )"
|
2015-09-24 16:13:14 +00:00
|
|
|
|
|
2020-02-05 18:27:16 +00:00
|
|
|
|
echo "$device /mnt-root$mountPoint $fsType $optionsPrefixed" >> /etc/fstab
|
2012-06-28 14:55:44 +00:00
|
|
|
|
|
|
|
|
|
checkFS "$device" "$fsType"
|
|
|
|
|
|
2015-09-24 16:13:14 +00:00
|
|
|
|
# Optionally resize the filesystem.
|
|
|
|
|
case $options in
|
|
|
|
|
*x-nixos.autoresize*)
|
|
|
|
|
if [ "$fsType" = ext2 -o "$fsType" = ext3 -o "$fsType" = ext4 ]; then
|
stage-1: modprobe ext{2,3,4} before resizing
I noticed booting a system with an ext4 root which expanded to 5T took
quite a long time (12 minutes in some cases, 43(!) in others.)
I changed stage-1 to run `resize2fs -d 62` for extra debug output and
timing information. It revealed the adjust_superblock step taking
almost all of the time:
[Fri Oct 30 11:10:15 UTC 2020] zero_high_bits_in_metadata: Memory used: 132k/0k (63k/70k), time: 0.00/ 0.00/ 0.00
[Fri Oct 30 11:21:09 UTC 2020] adjust_superblock: Memory used: 396k/4556k (295k/102k), time: 654.21/ 0.59/ 5.13
but when I ran resize2fs on a disk with the identical content growing
to the identical target size, it would only take about 30 seconds. I
looked at what happened between those two steps in the fast case with
strace and found:
```
235 getrusage(RUSAGE_SELF, {ru_utime={tv_sec=0, tv_usec=1795}, ru_stime={tv_sec=0, tv_usec=3590}, ...}) = 0
236 write(1, "zero_high_bits_in_metadata: Memo"..., 84zero_high_bits_in_metadata: Memory used: 132k/0k (72k/61k), time: 0.00/ 0.00/ 0.00
237 ) = 84
238 gettimeofday({tv_sec=1604061278, tv_usec=480147}, NULL) = 0
239 getrusage(RUSAGE_SELF, {ru_utime={tv_sec=0, tv_usec=1802}, ru_stime={tv_sec=0, tv_usec=3603}, ...}) = 0
240 gettimeofday({tv_sec=1604061278, tv_usec=480192}, NULL) = 0
241 mmap(NULL, 2564096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7fa3c7355000
242 access("/sys/fs/ext4/features/lazy_itable_init", F_OK) = 0
243 brk(0xf85000) = 0xf85000
244 brk(0xfa6000) = 0xfa6000
245 gettimeofday({tv_sec=1604061278, tv_usec=538828}, NULL) = 0
246 getrusage(RUSAGE_SELF, {ru_utime={tv_sec=0, tv_usec=58720}, ru_stime={tv_sec=0, tv_usec=3603}, ...}) = 0
247 write(1, "adjust_superblock: Memory used: "..., 79adjust_superblock: Memory used: 396k/2504k (305k/92k), time: 0.06/ 0.06/ 0.00
248 ) = 79
249 gettimeofday({tv_sec=1604061278, tv_usec=539119}, NULL) = 0
250 getrusage(RUSAGE_SELF, {ru_utime={tv_sec=0, tv_usec=58812}, ru_stime={tv_sec=0, tv_usec=3603}, ...}) = 0
251 gettimeofday({tv_sec=1604061279, tv_usec=939}, NULL) = 0
252 getrusage(RUSAGE_SELF, {ru_utime={tv_sec=0, tv_usec=520411}, ru_stime={tv_sec=0, tv_usec=3603}, ...}) = 0
253 write(1, "fix_uninit_block_bitmaps 2: Memo"..., 88fix_uninit_block_bitmaps 2: Memory used: 396k/2504k (305k/92k), time: 0.46/ 0.46/ 0.00
254 ) = 88
```
In particular the access to /sys/fs seemed interesting. Looking
at the source of resize2fs:
```
[root@ip-172-31-22-182:~/e2fsprogs-1.45.5]# rg -B2 -A1 /sys/fs/ext4/features/lazy_itable_init .
./resize/resize2fs.c
923- if (getenv("RESIZE2FS_FORCE_LAZY_ITABLE_INIT") ||
924- (!getenv("RESIZE2FS_FORCE_ITABLE_INIT") &&
925: access("/sys/fs/ext4/features/lazy_itable_init", F_OK) == 0))
926- lazy_itable_init = 1;
```
I confirmed /sys is mounted, and then found a bug suggesting the
ext4 module is maybe not loaded:
https://bugzilla.redhat.com/show_bug.cgi?id=1071909
My home server doesn't have ext4 loaded and had 3T to play with, so
I tried (and succeeded with) replicating the issue locally:
```
[root@kif:/scratch]# lsmod | grep -i ext
[root@kif:/scratch]# zfs create -V 3G rpool/scratch/ext4
[root@kif:/scratch]# time mkfs.ext4 /dev/zvol/rpool/scratch/ext4
mke2fs 1.45.5 (07-Jan-2020)
Discarding device blocks: done
Creating filesystem with 786432 4k blocks and 196608 inodes
Filesystem UUID: 560a4a8f-93dc-40cc-97a5-f10049bf801f
Superblock backups stored on blocks:
32768, 98304, 163840, 229376, 294912
Allocating group tables: done
Writing inode tables: done
Creating journal (16384 blocks): done
Writing superblocks and filesystem accounting information: done
real 0m2.261s
user 0m0.000s
sys 0m0.025s
[root@kif:/scratch]# zfs set volsize=3T rpool/scratch/ext4
[root@kif:/scratch]# time resize2fs -d 62 /dev/zvol/rpool/scratch/ext4
resize2fs 1.45.5 (07-Jan-2020)
fs has 11 inodes, 1 groups required.
fs requires 16390 data blocks.
With 1 group(s), we have 22234 blocks available.
Last group's overhead is 10534
Need 16390 data blocks in last group
Final size of last group is 26924
Estimated blocks needed: 26924
Extents safety margin: 49
Resizing the filesystem on /dev/zvol/rpool/scratch/ext4 to 805306368 (4k) blocks.
read_bitmaps: Memory used: 132k/0k (63k/70k), time: 0.00/ 0.00/ 0.00
read_bitmaps: I/O read: 1MB, write: 0MB, rate: 3802.28MB/s
fix_uninit_block_bitmaps 1: Memory used: 132k/0k (63k/70k), time: 0.00/ 0.00/ 0.00
resize_group_descriptors: Memory used: 132k/0k (68k/65k), time: 0.00/ 0.00/ 0.00
move_bg_metadata: Memory used: 132k/0k (68k/65k), time: 0.00/ 0.00/ 0.00
zero_high_bits_in_metadata: Memory used: 132k/0k (68k/65k), time: 0.00/ 0.00/ 0.00
```
here it got stuck for quite some time ... straceing this 20 minutes in revealed this in a tight loop:
```
getuid() = 0
geteuid() = 0
getgid() = 0
getegid() = 0
prctl(PR_GET_DUMPABLE) = 1 (SUID_DUMP_USER)
fallocate(3, FALLOC_FL_ZERO_RANGE, 2222649901056, 2097152) = 0
fsync(3) = 0
```
it finally ended 43(!) minutes later:
```
adjust_superblock: Memory used: 264k/3592k (210k/55k), time: 2554.03/ 0.16/15.07
fix_uninit_block_bitmaps 2: Memory used: 264k/3592k (210k/55k), time: 0.16/ 0.16/ 0.00
blocks_to_move: Memory used: 264k/3592k (211k/54k), time: 0.00/ 0.00/ 0.00
Number of free blocks: 755396/780023556, Needed: 0
block_mover: Memory used: 264k/3592k (216k/49k), time: 0.05/ 0.01/ 0.00
block_mover: I/O read: 1MB, write: 0MB, rate: 18.68MB/s
inode_scan_and_fix: Memory used: 264k/3592k (216k/49k), time: 0.00/ 0.00/ 0.00
inode_ref_fix: Memory used: 264k/3592k (216k/49k), time: 0.00/ 0.00/ 0.00
move_itables: Memory used: 264k/3592k (216k/49k), time: 0.00/ 0.00/ 0.00
calculate_summary_stats: Memory used: 264k/3592k (216k/49k), time: 16.35/16.35/ 0.00
fix_resize_inode: Memory used: 264k/3592k (222k/43k), time: 0.04/ 0.00/ 0.00
fix_resize_inode: I/O read: 1MB, write: 0MB, rate: 22.80MB/s
fix_sb_journal_backup: Memory used: 264k/3592k (222k/43k), time: 0.00/ 0.00/ 0.00
overall resize2fs: Memory used: 264k/3592k (222k/43k), time: 2570.90/16.68/15.07
overall resize2fs: I/O read: 1MB, write: 1MB, rate: 0.00MB/s
The filesystem on /dev/zvol/rpool/scratch/ext4 is now 805306368 (4k) blocks long.
real 43m1.943s
user 0m16.761s
sys 0m15.069s
```
I then cleaned up and recreated the zvol, loaded the ext4 module, created the ext4 fs,
resized the volume, and resize2fs'd and it went quite quickly:
```
[root@kif:/scratch]# zfs destroy rpool/scratch/ext4
[root@kif:/scratch]# zfs create -V 3G rpool/scratch/ext4
[root@kif:/scratch]# modprobe ext4
[root@kif:/scratch]# time resize2fs -d 62 /dev/zvol/rpool/scratch/ext4
[root@kif:/scratch]# time mkfs.ext4 /dev/zvol/rpool/scratch/ext4
mke2fs 1.45.5 (07-Jan-2020)
Discarding device blocks: done
Creating filesystem with 786432 4k blocks and 196608 inodes
Filesystem UUID: 5b415f2f-a8c4-4ba0-ac1d-78860de77610
Superblock backups stored on blocks:
32768, 98304, 163840, 229376, 294912
Allocating group tables: done
Writing inode tables: done
Creating journal (16384 blocks): done
Writing superblocks and filesystem accounting information: done
real 0m1.013s
user 0m0.001s
sys 0m0.023s
[root@kif:/scratch]# zfs set volsize=3T rpool/scratch/ext4
[root@kif:/scratch]# time resize2fs -d 62 /dev/zvol/rpool/scratch/ext4
resize2fs 1.45.5 (07-Jan-2020)
fs has 11 inodes, 1 groups required.
fs requires 16390 data blocks.
With 1 group(s), we have 22234 blocks available.
Last group's overhead is 10534
Need 16390 data blocks in last group
Final size of last group is 26924
Estimated blocks needed: 26924
Extents safety margin: 49
Resizing the filesystem on /dev/zvol/rpool/scratch/ext4 to 805306368 (4k) blocks.
read_bitmaps: Memory used: 132k/0k (63k/70k), time: 0.00/ 0.00/ 0.00
read_bitmaps: I/O read: 1MB, write: 0MB, rate: 3389.83MB/s
fix_uninit_block_bitmaps 1: Memory used: 132k/0k (63k/70k), time: 0.00/ 0.00/ 0.00
resize_group_descriptors: Memory used: 132k/0k (68k/65k), time: 0.00/ 0.00/ 0.00
move_bg_metadata: Memory used: 132k/0k (68k/65k), time: 0.00/ 0.00/ 0.00
zero_high_bits_in_metadata: Memory used: 132k/0k (68k/65k), time: 0.00/ 0.00/ 0.00
adjust_superblock: Memory used: 264k/1540k (210k/55k), time: 0.02/ 0.02/ 0.00
fix_uninit_block_bitmaps 2: Memory used: 264k/1540k (210k/55k), time: 0.15/ 0.15/ 0.00
blocks_to_move: Memory used: 264k/1540k (211k/54k), time: 0.00/ 0.00/ 0.00
Number of free blocks: 755396/780023556, Needed: 0
block_mover: Memory used: 264k/3592k (216k/49k), time: 0.01/ 0.01/ 0.00
block_mover: I/O read: 1MB, write: 0MB, rate: 157.11MB/s
inode_scan_and_fix: Memory used: 264k/3592k (216k/49k), time: 0.00/ 0.00/ 0.00
inode_ref_fix: Memory used: 264k/3592k (216k/49k), time: 0.00/ 0.00/ 0.00
move_itables: Memory used: 264k/3592k (216k/49k), time: 0.00/ 0.00/ 0.00
calculate_summary_stats: Memory used: 264k/3592k (216k/49k), time: 16.20/16.20/ 0.00
fix_resize_inode: Memory used: 264k/3592k (222k/43k), time: 0.00/ 0.00/ 0.00
fix_resize_inode: I/O read: 1MB, write: 0MB, rate: 5319.15MB/s
fix_sb_journal_backup: Memory used: 264k/3592k (222k/43k), time: 0.00/ 0.00/ 0.00
overall resize2fs: Memory used: 264k/3592k (222k/43k), time: 16.45/16.38/ 0.00
overall resize2fs: I/O read: 1MB, write: 1MB, rate: 0.06MB/s
The filesystem on /dev/zvol/rpool/scratch/ext4 is now 805306368 (4k) blocks long.
real 0m17.908s
user 0m16.386s
sys 0m0.079s
```
Success!
2020-10-30 15:20:00 +00:00
|
|
|
|
modprobe "$fsType"
|
2015-09-24 16:13:14 +00:00
|
|
|
|
echo "resizing $device..."
|
2017-06-27 20:09:05 +00:00
|
|
|
|
e2fsck -fp "$device"
|
2015-09-24 16:13:14 +00:00
|
|
|
|
resize2fs "$device"
|
2018-06-18 22:59:08 +00:00
|
|
|
|
elif [ "$fsType" = f2fs ]; then
|
2018-06-18 22:25:00 +00:00
|
|
|
|
echo "resizing $device..."
|
|
|
|
|
fsck.f2fs -fp "$device"
|
2019-09-22 15:14:47 +00:00
|
|
|
|
resize.f2fs "$device"
|
2015-09-24 16:13:14 +00:00
|
|
|
|
fi
|
|
|
|
|
;;
|
|
|
|
|
esac
|
|
|
|
|
|
2020-02-05 18:27:16 +00:00
|
|
|
|
# Create backing directories for overlayfs
|
|
|
|
|
if [ "$fsType" = overlay ]; then
|
|
|
|
|
for i in upper work; do
|
|
|
|
|
dir="$( echo "$optionsPrefixed" | grep -o "${i}dir=[^,]*" )"
|
|
|
|
|
mkdir -m 0700 -p "${dir##*=}"
|
2014-07-30 13:44:47 +00:00
|
|
|
|
done
|
|
|
|
|
fi
|
|
|
|
|
|
2021-01-03 09:09:30 +00:00
|
|
|
|
info "mounting $device on $mountPoint..."
|
2009-06-10 15:02:39 +00:00
|
|
|
|
|
2015-09-28 18:28:51 +00:00
|
|
|
|
mkdir -p "/mnt-root$mountPoint"
|
2010-01-06 00:25:14 +00:00
|
|
|
|
|
2017-09-25 11:07:25 +00:00
|
|
|
|
# For ZFS and CIFS mounts, retry a few times before giving up.
|
|
|
|
|
# We do this for ZFS as a workaround for issue NixOS/nixpkgs#25383.
|
2010-01-06 00:25:14 +00:00
|
|
|
|
local n=0
|
|
|
|
|
while true; do
|
2012-08-06 18:05:35 +00:00
|
|
|
|
mount "/mnt-root$mountPoint" && break
|
2017-09-25 11:07:25 +00:00
|
|
|
|
if [ \( "$fsType" != cifs -a "$fsType" != zfs \) -o "$n" -ge 10 ]; then fail; break; fi
|
2010-01-06 00:25:14 +00:00
|
|
|
|
echo "retrying..."
|
2017-09-25 11:07:25 +00:00
|
|
|
|
sleep 1
|
2010-01-06 00:25:14 +00:00
|
|
|
|
n=$((n + 1))
|
|
|
|
|
done
|
2016-08-22 00:15:13 +00:00
|
|
|
|
|
|
|
|
|
[ "$mountPoint" == "/" ] &&
|
|
|
|
|
[ -f "/mnt-root/etc/NIXOS_LUSTRATE" ] &&
|
|
|
|
|
lustrateRoot "/mnt-root"
|
2017-01-03 16:28:55 +00:00
|
|
|
|
|
|
|
|
|
true
|
2007-02-06 16:53:36 +00:00
|
|
|
|
}
|
|
|
|
|
|
2016-08-22 00:15:13 +00:00
|
|
|
|
lustrateRoot () {
|
|
|
|
|
local root="$1"
|
|
|
|
|
|
|
|
|
|
echo
|
|
|
|
|
echo -e "\e[1;33m<<< NixOS is now lustrating the root filesystem (cruft goes to /old-root) >>>\e[0m"
|
|
|
|
|
echo
|
|
|
|
|
|
|
|
|
|
mkdir -m 0755 -p "$root/old-root.tmp"
|
|
|
|
|
|
|
|
|
|
echo
|
|
|
|
|
echo "Moving impurities out of the way:"
|
|
|
|
|
for d in "$root"/*
|
|
|
|
|
do
|
|
|
|
|
[ "$d" == "$root/nix" ] && continue
|
|
|
|
|
[ "$d" == "$root/boot" ] && continue # Don't render the system unbootable
|
|
|
|
|
[ "$d" == "$root/old-root.tmp" ] && continue
|
|
|
|
|
|
|
|
|
|
mv -v "$d" "$root/old-root.tmp"
|
|
|
|
|
done
|
|
|
|
|
|
|
|
|
|
# Use .tmp to make sure subsequent invokations don't clash
|
|
|
|
|
mv -v "$root/old-root.tmp" "$root/old-root"
|
|
|
|
|
|
|
|
|
|
mkdir -m 0755 -p "$root/etc"
|
|
|
|
|
touch "$root/etc/NIXOS"
|
|
|
|
|
|
|
|
|
|
exec 4< "$root/old-root/etc/NIXOS_LUSTRATE"
|
|
|
|
|
|
|
|
|
|
echo
|
|
|
|
|
echo "Restoring selected impurities:"
|
|
|
|
|
while read -u 4 keeper; do
|
|
|
|
|
dirname="$(dirname "$keeper")"
|
|
|
|
|
mkdir -m 0755 -p "$root/$dirname"
|
|
|
|
|
cp -av "$root/old-root/$keeper" "$root/$keeper"
|
|
|
|
|
done
|
|
|
|
|
|
|
|
|
|
exec 4>&-
|
|
|
|
|
}
|
2007-02-06 16:53:36 +00:00
|
|
|
|
|
2016-08-07 22:32:18 +00:00
|
|
|
|
|
|
|
|
|
|
2016-08-07 22:34:23 +00:00
|
|
|
|
if test -e /sys/power/resume -a -e /sys/power/disk; then
|
2016-08-07 22:35:43 +00:00
|
|
|
|
if test -n "@resumeDevice@" && waitDevice "@resumeDevice@"; then
|
2016-08-07 22:34:23 +00:00
|
|
|
|
resumeDev="@resumeDevice@"
|
|
|
|
|
resumeInfo="$(udevadm info -q property "$resumeDev" )"
|
|
|
|
|
else
|
|
|
|
|
for sd in @resumeDevices@; do
|
|
|
|
|
# Try to detect resume device. According to Ubuntu bug:
|
|
|
|
|
# https://bugs.launchpad.net/ubuntu/+source/pm-utils/+bug/923326/comments/1
|
|
|
|
|
# when there are multiple swap devices, we can't know where the hibernate
|
|
|
|
|
# image will reside. We can check all of them for swsuspend blkid.
|
2016-08-07 22:35:43 +00:00
|
|
|
|
if waitDevice "$sd"; then
|
|
|
|
|
resumeInfo="$(udevadm info -q property "$sd")"
|
|
|
|
|
if [ "$(echo "$resumeInfo" | sed -n 's/^ID_FS_TYPE=//p')" = "swsuspend" ]; then
|
|
|
|
|
resumeDev="$sd"
|
|
|
|
|
break
|
|
|
|
|
fi
|
2016-08-07 22:34:23 +00:00
|
|
|
|
fi
|
|
|
|
|
done
|
|
|
|
|
fi
|
2016-08-07 22:35:43 +00:00
|
|
|
|
if test -n "$resumeDev"; then
|
2016-08-07 22:34:23 +00:00
|
|
|
|
resumeMajor="$(echo "$resumeInfo" | sed -n 's/^MAJOR=//p')"
|
|
|
|
|
resumeMinor="$(echo "$resumeInfo" | sed -n 's/^MINOR=//p')"
|
|
|
|
|
echo "$resumeMajor:$resumeMinor" > /sys/power/resume 2> /dev/null || echo "failed to resume..."
|
|
|
|
|
fi
|
|
|
|
|
fi
|
|
|
|
|
|
2019-09-21 15:08:00 +00:00
|
|
|
|
# If we have a path to an iso file, find the iso and link it to /dev/root
|
|
|
|
|
if [ -n "$isoPath" ]; then
|
|
|
|
|
mkdir -p /findiso
|
|
|
|
|
|
|
|
|
|
for delay in 5 10; do
|
|
|
|
|
blkid | while read -r line; do
|
|
|
|
|
device=$(echo "$line" | sed 's/:.*//')
|
|
|
|
|
type=$(echo "$line" | sed 's/.*TYPE="\([^"]*\)".*/\1/')
|
|
|
|
|
|
|
|
|
|
mount -t "$type" "$device" /findiso
|
|
|
|
|
if [ -e "/findiso$isoPath" ]; then
|
|
|
|
|
ln -sf "/findiso$isoPath" /dev/root
|
|
|
|
|
break 2
|
|
|
|
|
else
|
|
|
|
|
umount /findiso
|
|
|
|
|
fi
|
|
|
|
|
done
|
|
|
|
|
|
|
|
|
|
sleep "$delay"
|
|
|
|
|
done
|
|
|
|
|
fi
|
2016-08-07 22:34:23 +00:00
|
|
|
|
|
2006-11-12 18:48:47 +00:00
|
|
|
|
# Try to find and mount the root device.
|
2015-06-13 13:02:41 +00:00
|
|
|
|
mkdir -p $targetRoot
|
2006-11-12 18:48:47 +00:00
|
|
|
|
|
2012-05-21 19:26:07 +00:00
|
|
|
|
exec 3< @fsInfo@
|
2008-08-08 23:01:30 +00:00
|
|
|
|
|
2012-05-21 19:26:07 +00:00
|
|
|
|
while read -u 3 mountPoint; do
|
|
|
|
|
read -u 3 device
|
|
|
|
|
read -u 3 fsType
|
|
|
|
|
read -u 3 options
|
2008-08-08 23:01:30 +00:00
|
|
|
|
|
|
|
|
|
# !!! Really quick hack to support bind mounts, i.e., where the
|
|
|
|
|
# "device" should be taken relative to /mnt-root, not /. Assume
|
2009-02-01 19:53:59 +00:00
|
|
|
|
# that every device that starts with / but doesn't start with /dev
|
|
|
|
|
# is a bind mount.
|
2009-06-22 14:44:48 +00:00
|
|
|
|
pseudoDevice=
|
2008-08-08 23:01:30 +00:00
|
|
|
|
case $device in
|
|
|
|
|
/dev/*)
|
|
|
|
|
;;
|
2009-06-18 16:47:00 +00:00
|
|
|
|
//*)
|
|
|
|
|
# Don't touch SMB/CIFS paths.
|
2009-06-22 14:44:48 +00:00
|
|
|
|
pseudoDevice=1
|
2009-06-18 16:47:00 +00:00
|
|
|
|
;;
|
2008-08-08 23:01:30 +00:00
|
|
|
|
/*)
|
|
|
|
|
device=/mnt-root$device
|
|
|
|
|
;;
|
2009-06-22 14:44:48 +00:00
|
|
|
|
*)
|
|
|
|
|
# Not an absolute path; assume that it's a pseudo-device
|
|
|
|
|
# like an NFS path (e.g. "server:/path").
|
|
|
|
|
pseudoDevice=1
|
|
|
|
|
;;
|
2008-08-08 23:01:30 +00:00
|
|
|
|
esac
|
2007-02-06 16:53:36 +00:00
|
|
|
|
|
2016-08-07 22:32:18 +00:00
|
|
|
|
if test -z "$pseudoDevice" && ! waitDevice "$device"; then
|
|
|
|
|
# If it doesn't appear, try to mount it anyway (and
|
|
|
|
|
# probably fail). This is a fallback for non-device "devices"
|
|
|
|
|
# that we don't properly recognise.
|
|
|
|
|
echo "Timed out waiting for device $device, trying to mount anyway."
|
2008-08-08 23:15:36 +00:00
|
|
|
|
fi
|
|
|
|
|
|
2012-04-06 14:20:43 +00:00
|
|
|
|
# Wait once more for the udev queue to empty, just in case it's
|
|
|
|
|
# doing something with $device right now.
|
2015-09-28 18:28:51 +00:00
|
|
|
|
udevadm settle
|
2012-04-06 14:20:43 +00:00
|
|
|
|
|
2017-04-18 11:45:30 +00:00
|
|
|
|
# If copytoram is enabled: skip mounting the ISO and copy its content to a tmpfs.
|
|
|
|
|
if [ -n "$copytoram" ] && [ "$device" = /dev/root ] && [ "$mountPoint" = /iso ]; then
|
|
|
|
|
fsType=$(blkid -o value -s TYPE "$device")
|
2021-08-24 22:46:45 +00:00
|
|
|
|
fsSize=$(blockdev --getsize64 "$device" || stat -Lc '%s' "$device")
|
2017-04-18 11:45:30 +00:00
|
|
|
|
|
|
|
|
|
mkdir -p /tmp-iso
|
|
|
|
|
mount -t "$fsType" /dev/root /tmp-iso
|
|
|
|
|
mountFS tmpfs /iso size="$fsSize" tmpfs
|
|
|
|
|
|
|
|
|
|
cp -r /tmp-iso/* /mnt-root/iso/
|
|
|
|
|
|
|
|
|
|
umount /tmp-iso
|
|
|
|
|
rmdir /tmp-iso
|
|
|
|
|
continue
|
|
|
|
|
fi
|
|
|
|
|
|
2020-04-01 01:54:21 +00:00
|
|
|
|
if [ "$mountPoint" = / ] && [ "$device" = tmpfs ] && [ ! -z "$persistence" ]; then
|
|
|
|
|
echo persistence...
|
|
|
|
|
waitDevice "$persistence"
|
|
|
|
|
echo enabling persistence...
|
|
|
|
|
mountFS "$persistence" "$mountPoint" "$persistence_opt" "auto"
|
|
|
|
|
continue
|
|
|
|
|
fi
|
|
|
|
|
|
2008-08-08 23:01:30 +00:00
|
|
|
|
mountFS "$device" "$mountPoint" "$options" "$fsType"
|
|
|
|
|
done
|
2006-11-03 00:36:08 +00:00
|
|
|
|
|
2012-08-07 14:05:33 +00:00
|
|
|
|
exec 3>&-
|
|
|
|
|
|
2008-01-24 16:56:09 +00:00
|
|
|
|
|
2009-06-18 16:03:18 +00:00
|
|
|
|
@postMountCommands@
|
|
|
|
|
|
|
|
|
|
|
2014-10-10 15:16:10 +00:00
|
|
|
|
# Emit a udev rule for /dev/root to prevent systemd from complaining.
|
2014-11-11 22:48:31 +00:00
|
|
|
|
if [ -e /mnt-root/iso ]; then
|
2015-09-28 18:28:51 +00:00
|
|
|
|
eval $(udevadm info --export --export-prefix=ROOT_ --device-id-of-file=/mnt-root/iso)
|
2014-11-11 22:48:31 +00:00
|
|
|
|
else
|
2015-09-28 18:28:51 +00:00
|
|
|
|
eval $(udevadm info --export --export-prefix=ROOT_ --device-id-of-file=$targetRoot)
|
2014-11-11 22:48:31 +00:00
|
|
|
|
fi
|
2014-10-10 15:16:10 +00:00
|
|
|
|
if [ "$ROOT_MAJOR" -a "$ROOT_MINOR" -a "$ROOT_MAJOR" != 0 ]; then
|
|
|
|
|
mkdir -p /run/udev/rules.d
|
|
|
|
|
echo 'ACTION=="add|change", SUBSYSTEM=="block", ENV{MAJOR}=="'$ROOT_MAJOR'", ENV{MINOR}=="'$ROOT_MINOR'", SYMLINK+="root"' > /run/udev/rules.d/61-dev-root-link.rules
|
|
|
|
|
fi
|
|
|
|
|
|
|
|
|
|
|
2008-08-08 22:44:45 +00:00
|
|
|
|
# Stop udevd.
|
2015-09-28 18:28:51 +00:00
|
|
|
|
udevadm control --exit
|
2011-09-22 08:26:58 +00:00
|
|
|
|
|
2016-02-22 05:42:24 +00:00
|
|
|
|
# Reset the logging file descriptors.
|
|
|
|
|
# Do this just before pkill, which will kill the tee process.
|
2016-04-05 14:06:53 +00:00
|
|
|
|
exec 1>&$logOutFd 2>&$logErrFd
|
|
|
|
|
eval "exec $logOutFd>&- $logErrFd>&-"
|
2016-02-22 05:42:24 +00:00
|
|
|
|
|
2011-09-22 08:26:58 +00:00
|
|
|
|
# Kill any remaining processes, just to be sure we're not taking any
|
2014-05-21 13:19:01 +00:00
|
|
|
|
# with us into stage 2. But keep storage daemons like unionfs-fuse.
|
nixos/stage-1: Don't kill kernel threads
Unfortunately, pkill doesn't distinguish between kernel and user space
processes, so we need to make sure we don't accidentally kill kernel
threads.
Normally, a kernel thread ignores all signals, but there are a few that
do. A quick grep on the kernel source tree (as of kernel 4.6.0) shows
the following source files which use allow_signal():
drivers/isdn/mISDN/l1oip_core.c
drivers/md/md.c
drivers/misc/mic/cosm/cosm_scif_server.c
drivers/misc/mic/cosm_client/cosm_scif_client.c
drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c
drivers/staging/rtl8188eu/core/rtw_cmd.c
drivers/staging/rtl8712/rtl8712_cmd.c
drivers/target/iscsi/iscsi_target.c
drivers/target/iscsi/iscsi_target_login.c
drivers/target/iscsi/iscsi_target_nego.c
drivers/usb/atm/usbatm.c
drivers/usb/gadget/function/f_mass_storage.c
fs/jffs2/background.c
fs/lockd/clntlock.c
fs/lockd/svc.c
fs/nfs/nfs4state.c
fs/nfsd/nfssvc.c
While not all of these are necessarily kthreads and some functionality
may still be unimpeded, it's still quite harmful and can cause
unexpected side-effects, especially because some of these kthreads are
storage-related (which we obviously don't want to kill during bootup).
During discussion at #15226, @dezgeg suggested the following
implementation:
for pid in $(pgrep -v -f '@'); do
if [ "$(cat /proc/$pid/cmdline)" != "" ]; then
kill -9 "$pid"
fi
done
This has a few downsides:
* User space processes which use an empty string in their command line
won't be killed.
* It results in errors during bootup because some shell-related
processes are already terminated (maybe it's pgrep itself, haven't
checked).
* The @ is searched within the full command line, not just at the
beginning of the string. Of course, we already had this until now, so
it's not a problem of his implementation.
I posted an alternative implementation which doesn't suffer from the
first point, but even that one wasn't sufficient:
for pid in $(pgrep -v -f '^@'); do
readlink "/proc/$pid/exe" &> /dev/null || continue
echo "$pid"
done | xargs kill -9
This one spawns a subshell, which would be included in the processes to
kill and actually kills itself during the process.
So what we have now is even checking whether the shell process itself is
in the list to kill and avoids killing it just to be sure.
Also, we don't spawn a subshell anymore and use /proc/$pid/exe to
distinguish between user space and kernel processes like in the comments
of the following StackOverflow answer:
http://stackoverflow.com/a/12231039
We don't need to take care of terminating processes, because what we
actually want IS to terminate the processes.
The only point where this (and any previous) approach falls short if we
have processes that act like fork bombs, because they might spawn
additional processes between the pgrep and the killing. We can only
address this with process/control groups and this still won't save us
because the root user can escape from that as well.
Signed-off-by: aszlig <aszlig@redmoonstudios.org>
Fixes: #15226
2016-05-06 10:36:58 +00:00
|
|
|
|
#
|
|
|
|
|
# Storage daemons are distinguished by an @ in front of their command line:
|
|
|
|
|
# https://www.freedesktop.org/wiki/Software/systemd/RootStorageDaemons/
|
2016-10-28 20:50:44 +00:00
|
|
|
|
for pid in $(pgrep -v -f '^@'); do
|
nixos/stage-1: Don't kill kernel threads
Unfortunately, pkill doesn't distinguish between kernel and user space
processes, so we need to make sure we don't accidentally kill kernel
threads.
Normally, a kernel thread ignores all signals, but there are a few that
do. A quick grep on the kernel source tree (as of kernel 4.6.0) shows
the following source files which use allow_signal():
drivers/isdn/mISDN/l1oip_core.c
drivers/md/md.c
drivers/misc/mic/cosm/cosm_scif_server.c
drivers/misc/mic/cosm_client/cosm_scif_client.c
drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c
drivers/staging/rtl8188eu/core/rtw_cmd.c
drivers/staging/rtl8712/rtl8712_cmd.c
drivers/target/iscsi/iscsi_target.c
drivers/target/iscsi/iscsi_target_login.c
drivers/target/iscsi/iscsi_target_nego.c
drivers/usb/atm/usbatm.c
drivers/usb/gadget/function/f_mass_storage.c
fs/jffs2/background.c
fs/lockd/clntlock.c
fs/lockd/svc.c
fs/nfs/nfs4state.c
fs/nfsd/nfssvc.c
While not all of these are necessarily kthreads and some functionality
may still be unimpeded, it's still quite harmful and can cause
unexpected side-effects, especially because some of these kthreads are
storage-related (which we obviously don't want to kill during bootup).
During discussion at #15226, @dezgeg suggested the following
implementation:
for pid in $(pgrep -v -f '@'); do
if [ "$(cat /proc/$pid/cmdline)" != "" ]; then
kill -9 "$pid"
fi
done
This has a few downsides:
* User space processes which use an empty string in their command line
won't be killed.
* It results in errors during bootup because some shell-related
processes are already terminated (maybe it's pgrep itself, haven't
checked).
* The @ is searched within the full command line, not just at the
beginning of the string. Of course, we already had this until now, so
it's not a problem of his implementation.
I posted an alternative implementation which doesn't suffer from the
first point, but even that one wasn't sufficient:
for pid in $(pgrep -v -f '^@'); do
readlink "/proc/$pid/exe" &> /dev/null || continue
echo "$pid"
done | xargs kill -9
This one spawns a subshell, which would be included in the processes to
kill and actually kills itself during the process.
So what we have now is even checking whether the shell process itself is
in the list to kill and avoids killing it just to be sure.
Also, we don't spawn a subshell anymore and use /proc/$pid/exe to
distinguish between user space and kernel processes like in the comments
of the following StackOverflow answer:
http://stackoverflow.com/a/12231039
We don't need to take care of terminating processes, because what we
actually want IS to terminate the processes.
The only point where this (and any previous) approach falls short if we
have processes that act like fork bombs, because they might spawn
additional processes between the pgrep and the killing. We can only
address this with process/control groups and this still won't save us
because the root user can escape from that as well.
Signed-off-by: aszlig <aszlig@redmoonstudios.org>
Fixes: #15226
2016-05-06 10:36:58 +00:00
|
|
|
|
# Make sure we don't kill kernel processes, see #15226 and:
|
|
|
|
|
# http://stackoverflow.com/questions/12213445/identifying-kernel-threads
|
|
|
|
|
readlink "/proc/$pid/exe" &> /dev/null || continue
|
|
|
|
|
# Try to avoid killing ourselves.
|
|
|
|
|
[ $pid -eq $$ ] && continue
|
|
|
|
|
kill -9 "$pid"
|
|
|
|
|
done
|
2008-08-08 22:44:45 +00:00
|
|
|
|
|
2007-05-30 10:32:42 +00:00
|
|
|
|
if test -n "$debug1mounts"; then fail; fi
|
|
|
|
|
|
2006-11-24 00:04:29 +00:00
|
|
|
|
|
2009-12-15 18:31:21 +00:00
|
|
|
|
# Restore /proc/sys/kernel/modprobe to its original value.
|
|
|
|
|
echo /sbin/modprobe > /proc/sys/kernel/modprobe
|
|
|
|
|
|
|
|
|
|
|
2010-06-01 15:53:24 +00:00
|
|
|
|
# Start stage 2. `switch_root' deletes all files in the ramfs on the
|
2021-02-25 20:27:02 +00:00
|
|
|
|
# current root. The path has to be valid in the chroot not outside.
|
|
|
|
|
if [ ! -e "$targetRoot/$stage2Init" ]; then
|
|
|
|
|
stage2Check=${stage2Init}
|
|
|
|
|
while [ "$stage2Check" != "${stage2Check%/*}" ] && [ ! -L "$targetRoot/$stage2Check" ]; do
|
|
|
|
|
stage2Check=${stage2Check%/*}
|
|
|
|
|
done
|
|
|
|
|
if [ ! -L "$targetRoot/$stage2Check" ]; then
|
|
|
|
|
echo "stage 2 init script ($targetRoot/$stage2Init) not found"
|
|
|
|
|
fail
|
|
|
|
|
fi
|
2010-07-22 14:40:29 +00:00
|
|
|
|
fi
|
2009-06-10 15:02:39 +00:00
|
|
|
|
|
2011-07-24 23:36:30 +00:00
|
|
|
|
mkdir -m 0755 -p $targetRoot/proc $targetRoot/sys $targetRoot/dev $targetRoot/run
|
|
|
|
|
|
2012-07-25 02:04:28 +00:00
|
|
|
|
mount --move /proc $targetRoot/proc
|
|
|
|
|
mount --move /sys $targetRoot/sys
|
|
|
|
|
mount --move /dev $targetRoot/dev
|
|
|
|
|
mount --move /run $targetRoot/run
|
2010-06-01 15:53:24 +00:00
|
|
|
|
|
2013-01-23 10:51:58 +00:00
|
|
|
|
exec env -i $(type -P switch_root) "$targetRoot" "$stage2Init"
|
2008-08-16 00:59:12 +00:00
|
|
|
|
|
2009-06-10 15:02:39 +00:00
|
|
|
|
fail # should never be reached
|