diff --git a/nixos/modules/services/cluster/kubernetes/addon-manager.nix b/nixos/modules/services/cluster/kubernetes/addon-manager.nix index 17f2dde31a7..abd9e99ba02 100644 --- a/nixos/modules/services/cluster/kubernetes/addon-manager.nix +++ b/nixos/modules/services/cluster/kubernetes/addon-manager.nix @@ -72,7 +72,7 @@ in systemd.services.kube-addon-manager = { description = "Kubernetes addon manager"; wantedBy = [ "kubernetes.target" ]; - after = [ "kube-apiserver.service" ]; + after = [ "kube-apiserver-online.target" "node-online.target" ]; environment.ADDON_PATH = "/etc/kubernetes/addons/"; path = [ pkgs.gawk ]; serviceConfig = { diff --git a/nixos/modules/services/cluster/kubernetes/apiserver.nix b/nixos/modules/services/cluster/kubernetes/apiserver.nix index 08f929060aa..567d31f06ef 100644 --- a/nixos/modules/services/cluster/kubernetes/apiserver.nix +++ b/nixos/modules/services/cluster/kubernetes/apiserver.nix @@ -293,8 +293,9 @@ in in { systemd.services.kube-apiserver = { description = "Kubernetes APIServer Service"; - wantedBy = [ "kubernetes.target" ]; - after = [ "network.target" ]; + wantedBy = [ "kube-apiserver-online.target" ]; + after = [ "certmgr.service" ]; + before = [ "kube-apiserver-online.target" ]; serviceConfig = { Slice = "kubernetes.slice"; ExecStart = ''${top.package}/bin/kube-apiserver \ @@ -459,7 +460,28 @@ in }; })) + { + systemd.targets.kube-apiserver-online = { + wantedBy = [ "kubernetes.target" ]; + before = [ "kubernetes.target" ]; + }; + systemd.services.kube-apiserver-online = mkIf top.flannel.enable { + description = "apiserver control plane is online"; + wantedBy = [ "kube-apiserver-online.target" ]; + after = [ "kube-scheduler.service" "kube-controller-manager.service" ]; + before = [ "kube-apiserver-online.target" ]; + preStart = '' + ${top.lib.mkWaitCurl (with top.pki.certs.flannelClient; { + sleep = 3; + path = "/healthz"; + cacert = top.caFile; + inherit cert key; + })} + ''; + script = "echo apiserver control plane is online"; + }; + } ]; } diff --git a/nixos/modules/services/cluster/kubernetes/controller-manager.nix b/nixos/modules/services/cluster/kubernetes/controller-manager.nix index 27b28311adb..20f471215db 100644 --- a/nixos/modules/services/cluster/kubernetes/controller-manager.nix +++ b/nixos/modules/services/cluster/kubernetes/controller-manager.nix @@ -116,8 +116,17 @@ in systemd.services.kube-controller-manager = { description = "Kubernetes Controller Manager Service"; - wantedBy = [ "kubernetes.target" ]; + wantedBy = [ "kube-apiserver-online.target" ]; after = [ "kube-apiserver.service" ]; + before = [ "kube-apiserver-online.target" ]; + preStart = '' + ${top.lib.mkWaitCurl (with top.pki.certs.controllerManagerClient; { + sleep = 1; + path = "/api"; + cacert = top.caFile; + inherit cert key; + })} + ''; serviceConfig = { RestartSec = "30s"; Restart = "on-failure"; diff --git a/nixos/modules/services/cluster/kubernetes/default.nix b/nixos/modules/services/cluster/kubernetes/default.nix index 375e33e91b5..f1f544afc4d 100644 --- a/nixos/modules/services/cluster/kubernetes/default.nix +++ b/nixos/modules/services/cluster/kubernetes/default.nix @@ -73,6 +73,18 @@ let }; }; + mkWaitCurl = { address ? cfg.apiserverAddress, sleep ? 2, path ? "", args ? "-o /dev/null", + cacert ? null, cert ? null, key ? null, }: '' + while ! ${pkgs.curl}/bin/curl --fail-early -fs \ + ${if cacert != null then "--cacert ${cacert}" else ""} \ + ${if cert != null then "--cert ${cert}" else ""} \ + ${if key != null then "--key ${key}" else ""} \ + ${address}${path} ${args} ; do + sleep ${toString sleep} + echo Waiting to be able to reach ${address}${path} + done + ''; + kubeConfigDefaults = { server = mkDefault cfg.kubeconfig.server; caFile = mkDefault cfg.kubeconfig.caFile; @@ -162,6 +174,7 @@ in { inherit mkCert; inherit mkKubeConfig; inherit mkKubeConfigOptions; + inherit mkWaitCurl; }; type = types.attrs; }; diff --git a/nixos/modules/services/cluster/kubernetes/flannel.nix b/nixos/modules/services/cluster/kubernetes/flannel.nix index ef06acb6de3..4aa547c9d3e 100644 --- a/nixos/modules/services/cluster/kubernetes/flannel.nix +++ b/nixos/modules/services/cluster/kubernetes/flannel.nix @@ -27,7 +27,12 @@ in }; ###### implementation - config = mkIf cfg.enable { + config = mkIf cfg.enable (let + flannelBootstrapPaths = mkIf top.apiserver.enable [ + top.pki.certs.clusterAdmin.cert + top.pki.certs.clusterAdmin.key + ]; + in { services.flannel = { enable = mkDefault true; @@ -48,8 +53,10 @@ in }]; }; - systemd.services."mk-docker-opts" = { + systemd.services.mk-docker-opts = { description = "Pre-Docker Actions"; + wantedBy = [ "flannel.target" ]; + before = [ "flannel.target" ]; path = with pkgs; [ gawk gnugrep ]; script = '' ${mkDockerOpts}/mk-docker-opts -d /run/flannel/docker @@ -68,6 +75,17 @@ in }; }; + systemd.targets.flannel = { + wantedBy = [ "node-online.target" ]; + before = [ "node-online.target" ]; + }; + + systemd.services.flannel = { + wantedBy = [ "flannel.target" ]; + after = [ "kubelet.target" ]; + before = [ "flannel.target" ]; + }; + systemd.services.docker = { environment.DOCKER_OPTS = "-b none"; serviceConfig.EnvironmentFile = "-/run/flannel/docker"; @@ -93,44 +111,69 @@ in }; # give flannel som kubernetes rbac permissions if applicable - services.kubernetes.addonManager.bootstrapAddons = mkIf ((storageBackend == "kubernetes") && (elem "RBAC" top.apiserver.authorizationMode)) { + systemd.services.flannel-rbac-bootstrap = mkIf (top.apiserver.enable && (elem "RBAC" top.apiserver.authorizationMode)) { - flannel-cr = { - apiVersion = "rbac.authorization.k8s.io/v1beta1"; - kind = "ClusterRole"; - metadata = { name = "flannel"; }; - rules = [{ - apiGroups = [ "" ]; - resources = [ "pods" ]; - verbs = [ "get" ]; - } - { - apiGroups = [ "" ]; - resources = [ "nodes" ]; - verbs = [ "list" "watch" ]; - } - { - apiGroups = [ "" ]; - resources = [ "nodes/status" ]; - verbs = [ "patch" ]; - }]; - }; + wantedBy = [ "kube-apiserver-online.target" ]; + after = [ "kube-apiserver-online.target" ]; + before = [ "flannel.service" ]; + path = with pkgs; [ kubectl ]; + preStart = let + files = mapAttrsToList (n: v: pkgs.writeText "${n}.json" (builtins.toJSON v)) { + flannel-cr = { + apiVersion = "rbac.authorization.k8s.io/v1beta1"; + kind = "ClusterRole"; + metadata = { name = "flannel"; }; + rules = [{ + apiGroups = [ "" ]; + resources = [ "pods" ]; + verbs = [ "get" ]; + } + { + apiGroups = [ "" ]; + resources = [ "nodes" ]; + verbs = [ "list" "watch" ]; + } + { + apiGroups = [ "" ]; + resources = [ "nodes/status" ]; + verbs = [ "patch" ]; + }]; + }; - flannel-crb = { - apiVersion = "rbac.authorization.k8s.io/v1beta1"; - kind = "ClusterRoleBinding"; - metadata = { name = "flannel"; }; - roleRef = { - apiGroup = "rbac.authorization.k8s.io"; - kind = "ClusterRole"; - name = "flannel"; + flannel-crb = { + apiVersion = "rbac.authorization.k8s.io/v1beta1"; + kind = "ClusterRoleBinding"; + metadata = { name = "flannel"; }; + roleRef = { + apiGroup = "rbac.authorization.k8s.io"; + kind = "ClusterRole"; + name = "flannel"; + }; + subjects = [{ + kind = "User"; + name = "flannel-client"; + }]; + }; }; - subjects = [{ - kind = "User"; - name = "flannel-client"; - }]; - }; + in '' + ${top.lib.mkWaitCurl (with top.pki.certs.clusterAdmin; { + path = "/"; + cacert = top.caFile; + inherit cert key; + })} + kubectl -s ${top.apiserverAddress} --certificate-authority=${top.caFile} --client-certificate=${top.pki.certs.clusterAdmin.cert} --client-key=${top.pki.certs.clusterAdmin.key} apply -f ${concatStringsSep " \\\n -f " files} + ''; + script = "echo Ok"; + unitConfig.ConditionPathExists = flannelBootstrapPaths; }; - }; + + systemd.paths.flannel-rbac-bootstrap = mkIf top.apiserver.enable { + wantedBy = [ "flannel-rbac-bootstrap.service" ]; + pathConfig = { + PathExists = flannelBootstrapPaths; + PathChanged = flannelBootstrapPaths; + }; + }; + }); } diff --git a/nixos/modules/services/cluster/kubernetes/kubelet.nix b/nixos/modules/services/cluster/kubernetes/kubelet.nix index 86402cba7c4..b3f3c036564 100644 --- a/nixos/modules/services/cluster/kubernetes/kubelet.nix +++ b/nixos/modules/services/cluster/kubernetes/kubelet.nix @@ -252,8 +252,9 @@ in systemd.services.kubelet = { description = "Kubernetes Kubelet Service"; - wantedBy = [ "kubernetes.target" ]; - after = [ "network.target" "docker.service" "kube-apiserver.service" ]; + wantedBy = [ "kubelet.target" ]; + after = [ "kube-apiserver-online.target" ]; + before = [ "kubelet.target" ]; path = with pkgs; [ gitMinimal openssh docker utillinux iproute ethtool thin-provisioning-tools iptables socat ] ++ top.path; preStart = '' ${concatMapStrings (img: '' @@ -325,6 +326,30 @@ in }; }; + systemd.services.docker.before = [ "kubelet.service" ]; + + systemd.services.node-online = { + wantedBy = [ "node-online.target" ]; + after = [ "flannel.target" "kubelet.target" ]; + before = [ "node-online.target" ]; + # it is complicated. flannel needs kubelet to run the pause container before + # it discusses the node CIDR with apiserver and afterwards configures and restarts + # dockerd. Until then prevent creating any pods because they have to be recreated anyway + # because the network of docker0 has been changed by flannel. + script = let + docker-env = "/run/flannel/docker"; + flannel-date = "stat --print=%Y ${docker-env}"; + docker-date = "systemctl show --property=ActiveEnterTimestamp --value docker"; + in '' + while ! test -f ${docker-env} ; do sleep 1 ; done + while test `${flannel-date}` -gt `date +%s --date="$(${docker-date})"` ; do + sleep 1 + done + ''; + serviceConfig.Type = "oneshot"; + serviceConfig.Slice = "kubernetes.slice"; + }; + # Allways include cni plugins services.kubernetes.kubelet.cni.packages = [pkgs.cni-plugins]; @@ -369,5 +394,16 @@ in }; }) + { + systemd.targets.kubelet = { + wantedBy = [ "node-online.target" ]; + before = [ "node-online.target" ]; + }; + + systemd.targets.node-online = { + wantedBy = [ "kubernetes.target" ]; + before = [ "kubernetes.target" ]; + }; + } ]; } diff --git a/nixos/modules/services/cluster/kubernetes/pki.nix b/nixos/modules/services/cluster/kubernetes/pki.nix index 8ad17d4dfb4..d08d7892bb5 100644 --- a/nixos/modules/services/cluster/kubernetes/pki.nix +++ b/nixos/modules/services/cluster/kubernetes/pki.nix @@ -119,6 +119,7 @@ in cfsslCertPathPrefix = "${config.services.cfssl.dataDir}/cfssl"; cfsslCert = "${cfsslCertPathPrefix}.pem"; cfsslKey = "${cfsslCertPathPrefix}-key.pem"; + cfsslPort = toString config.services.cfssl.port; certmgrPaths = [ top.caFile @@ -191,13 +192,39 @@ in chown cfssl "${cfsslAPITokenPath}" && chmod 400 "${cfsslAPITokenPath}" '')]); + systemd.targets.cfssl-online = { + wantedBy = [ "network-online.target" ]; + after = [ "cfssl.service" "network-online.target" "cfssl-online.service" ]; + }; + + systemd.services.cfssl-online = { + description = "Wait for ${remote} to be reachable."; + wantedBy = [ "cfssl-online.target" ]; + before = [ "cfssl-online.target" ]; + preStart = '' + ${top.lib.mkWaitCurl { + address = remote; + path = "/api/v1/cfssl/info"; + args = "-kd '{}' -o /dev/null"; + }} + ''; + script = "echo Ok"; + serviceConfig = { + TimeoutSec = "300"; + }; + }; + systemd.services.kube-certmgr-bootstrap = { description = "Kubernetes certmgr bootstrapper"; - wantedBy = [ "certmgr.service" ]; - after = [ "cfssl.target" ]; + wantedBy = [ "cfssl-online.target" ]; + after = [ "cfssl-online.target" ]; + before = [ "certmgr.service" ]; script = concatStringsSep "\n" ['' set -e + mkdir -p $(dirname ${certmgrAPITokenPath}) + mkdir -p $(dirname ${top.caFile}) + # If there's a cfssl (cert issuer) running locally, then don't rely on user to # manually paste it in place. Just symlink. # otherwise, create the target file, ready for users to insert the token @@ -209,14 +236,18 @@ in fi '' (optionalString (cfg.pkiTrustOnBootstrap) '' - if [ ! -f "${top.caFile}" ] || [ $(cat "${top.caFile}" | wc -c) -lt 1 ]; then - ${pkgs.curl}/bin/curl --fail-early -f -kd '{}' ${remote}/api/v1/cfssl/info | \ - ${pkgs.cfssl}/bin/cfssljson -stdout >${top.caFile} + if [ ! -s "${top.caFile}" ]; then + ${top.lib.mkWaitCurl { + address = "https://${top.masterAddress}:${cfsslPort}"; + path = "/api/v1/cfssl/info"; + args = "-kd '{}' -o - | ${pkgs.cfssl}/bin/cfssljson -stdout >${top.caFile}"; + }} fi '') ]; serviceConfig = { - RestartSec = "10s"; + TimeoutSec = "300"; + RestartSec = "1s"; Restart = "on-failure"; }; }; @@ -254,6 +285,14 @@ in }; systemd.services.certmgr = { + wantedBy = [ "cfssl-online.target" ]; + after = [ "cfssl-online.target" "kube-certmgr-bootstrap.service" ]; + preStart = '' + while ! test -s ${certmgrAPITokenPath} ; do + sleep 1 + echo Waiting for ${certmgrAPITokenPath} + done + ''; unitConfig.ConditionPathExists = certmgrPaths; }; @@ -289,6 +328,12 @@ in '' export KUBECONFIG=${clusterAdminKubeconfig} ${kubectl}/bin/kubectl apply -f ${concatStringsSep " \\\n -f " files} + + ${top.lib.mkWaitCurl (with top.pki.certs.addonManager; { + path = "/api/v1/namespaces/kube-system/serviceaccounts/default"; + cacert = top.caFile; + inherit cert key; + })} ''; }) { @@ -384,6 +429,14 @@ in }; systemd.services.flannel = { + preStart = '' + ${top.lib.mkWaitCurl (with top.pki.certs.flannelClient; { + path = "/api/v1/nodes"; + cacert = top.caFile; + inherit cert key; + args = "-o - | grep podCIDR >/dev/null"; + })} + ''; unitConfig.ConditionPathExists = flannelPaths; }; diff --git a/nixos/modules/services/cluster/kubernetes/proxy.nix b/nixos/modules/services/cluster/kubernetes/proxy.nix index 83cd3e23100..073756d58ab 100644 --- a/nixos/modules/services/cluster/kubernetes/proxy.nix +++ b/nixos/modules/services/cluster/kubernetes/proxy.nix @@ -49,8 +49,16 @@ in systemd.services.kube-proxy = { description = "Kubernetes Proxy Service"; wantedBy = [ "kubernetes.target" ]; - after = [ "kube-apiserver.service" ]; + after = [ "node-online.target" ]; + before = [ "kubernetes.target" ]; path = with pkgs; [ iptables conntrack_tools ]; + preStart = '' + ${top.lib.mkWaitCurl (with top.pki.certs.kubeProxyClient; { + path = "/api/v1/nodes/${top.kubelet.hostname}"; + cacert = top.caFile; + inherit cert key; + })} + ''; serviceConfig = { Slice = "kubernetes.slice"; ExecStart = ''${top.package}/bin/kube-proxy \ diff --git a/nixos/modules/services/cluster/kubernetes/scheduler.nix b/nixos/modules/services/cluster/kubernetes/scheduler.nix index 0305b9aefe5..d3302a15402 100644 --- a/nixos/modules/services/cluster/kubernetes/scheduler.nix +++ b/nixos/modules/services/cluster/kubernetes/scheduler.nix @@ -59,8 +59,17 @@ in config = mkIf cfg.enable { systemd.services.kube-scheduler = { description = "Kubernetes Scheduler Service"; - wantedBy = [ "kubernetes.target" ]; + wantedBy = [ "kube-apiserver-online.target" ]; after = [ "kube-apiserver.service" ]; + before = [ "kube-apiserver-online.target" ]; + preStart = '' + ${top.lib.mkWaitCurl (with top.pki.certs.schedulerClient; { + sleep = 1; + path = "/api"; + cacert = top.caFile; + inherit cert key; + })} + ''; serviceConfig = { Slice = "kubernetes.slice"; ExecStart = ''${top.package}/bin/kube-scheduler \ diff --git a/nixos/tests/kubernetes/dns.nix b/nixos/tests/kubernetes/dns.nix index 46bcb01a526..e7db0a58ab6 100644 --- a/nixos/tests/kubernetes/dns.nix +++ b/nixos/tests/kubernetes/dns.nix @@ -77,6 +77,7 @@ let singleNodeTest = { test = '' # prepare machine1 for test + $machine1->waitForUnit("kubernetes.target"); $machine1->waitUntilSucceeds("kubectl get node machine1.${domain} | grep -w Ready"); $machine1->waitUntilSucceeds("docker load < ${redisImage}"); $machine1->waitUntilSucceeds("kubectl create -f ${redisPod}"); @@ -102,6 +103,8 @@ let # Node token exchange $machine1->waitUntilSucceeds("cp -f /var/lib/cfssl/apitoken.secret /tmp/shared/apitoken.secret"); $machine2->waitUntilSucceeds("cat /tmp/shared/apitoken.secret | nixos-kubernetes-node-join"); + $machine1->waitForUnit("kubernetes.target"); + $machine2->waitForUnit("kubernetes.target"); # prepare machines for test $machine1->waitUntilSucceeds("kubectl get node machine2.${domain} | grep -w Ready"); diff --git a/nixos/tests/kubernetes/rbac.nix b/nixos/tests/kubernetes/rbac.nix index 3ce7adcd0d7..967fe506004 100644 --- a/nixos/tests/kubernetes/rbac.nix +++ b/nixos/tests/kubernetes/rbac.nix @@ -94,6 +94,8 @@ let singlenode = base // { test = '' + $machine1->waitForUnit("kubernetes.target"); + $machine1->waitUntilSucceeds("kubectl get node machine1.my.zyx | grep -w Ready"); $machine1->waitUntilSucceeds("docker load < ${kubectlImage}"); @@ -116,6 +118,8 @@ let # Node token exchange $machine1->waitUntilSucceeds("cp -f /var/lib/cfssl/apitoken.secret /tmp/shared/apitoken.secret"); $machine2->waitUntilSucceeds("cat /tmp/shared/apitoken.secret | nixos-kubernetes-node-join"); + $machine1->waitForUnit("kubernetes.target"); + $machine2->waitForUnit("kubernetes.target"); $machine1->waitUntilSucceeds("kubectl get node machine2.my.zyx | grep -w Ready");