diff --git a/README.md b/README.md index d8778b0..ef30ce2 100644 --- a/README.md +++ b/README.md @@ -3,11 +3,9 @@ There is a [dedicated chatroom](https://matrix.to/#/#forgejo-ci:matrix.org). A m ## Table of content -- Setting up a new [K8S/DRBD/NFS k8s node](k8s.md) -- Maintenance and disaster recovery of a [K8S/DRBD/NFS k8s node](k8s-maintenance.md) +- K8S cluster [files and documentation](https://code.forgejo.org/infrastructure/k8s-cluster) - Setting up a new [LXC/DRBD Host](lxc.md) - Managing services with a [LXC/DRBD/nginx stack](drbd-nginx-lxc.md) -- Installing a [Forgejo instance in the K8S cluster](k8s-forgejo.md) - Installing a [Forgejo runner in an LXC container](runner-lxc.md) - Managing the [Octopuce host](octopuce.md) @@ -319,12 +317,6 @@ stream { Dedicated to https://codeberg.org/forgejo-contrib/forgejo-helm and running from an ephemeral disk -## hetzner{05,06} - -https://hetzner05.forgejo.org & https://hetzner06.forgejo.org run on [EX44](https://www.hetzner.com/dedicated-rootserver/ex44) Hetzner hardware. - -Nodes of [a k8s cluster](k8s.md). - ## Uberspace The website https://forgejo.org is hosted at diff --git a/k3s-host/certmanager.yml b/k3s-host/certmanager.yml deleted file mode 100644 index 5d0a104..0000000 --- a/k3s-host/certmanager.yml +++ /dev/null @@ -1,14 +0,0 @@ -apiVersion: cert-manager.io/v1 -kind: ClusterIssuer -metadata: - name: letsencrypt-http -spec: - acme: - email: contact@forgejo.org - server: https://acme-v02.api.letsencrypt.org/directory - privateKeySecretRef: - name: letsencrypt-http - solvers: - - http01: - ingress: - class: traefik diff --git a/k3s-host/metallb.yml b/k3s-host/metallb.yml deleted file mode 100644 index a4bd506..0000000 --- a/k3s-host/metallb.yml +++ /dev/null @@ -1,8 +0,0 @@ -apiVersion: metallb.io/v1beta1 -kind: IPAddressPool -metadata: - name: first-pool -spec: - addresses: - - $failover_ipv4/32 - - $failover_ipv6/128 diff --git a/k3s-host/nfs.yml b/k3s-host/nfs.yml deleted file mode 100644 index 42042c5..0000000 --- a/k3s-host/nfs.yml +++ /dev/null @@ -1,13 +0,0 @@ -apiVersion: helm.cattle.io/v1 -kind: HelmChart -metadata: - name: nfs - namespace: default -spec: - chart: nfs-subdir-external-provisioner - repo: https://kubernetes-sigs.github.io/nfs-subdir-external-provisioner - targetNamespace: default - set: - nfs.server: $node_nfs_server - nfs.path: /k8s - storageClass.name: nfs diff --git a/k3s-host/pvc.yml b/k3s-host/pvc.yml deleted file mode 100644 index 963271d..0000000 --- a/k3s-host/pvc.yml +++ /dev/null @@ -1,33 +0,0 @@ ---- -apiVersion: v1 -kind: PersistentVolume -metadata: - name: $pvc_name -spec: - capacity: - storage: $pvc_capacity - nfs: - server: $node_nfs_server - path: /k8s/$pvc_name - accessModes: - - ReadWriteMany - persistentVolumeReclaimPolicy: Retain - storageClassName: nfs - mountOptions: - - noatime - - nfsvers=4.2 - volumeMode: Filesystem ---- -apiVersion: v1 -kind: PersistentVolumeClaim -metadata: - name: $pvc_name -spec: - accessModes: - - ReadWriteMany - resources: - requests: - storage: $pvc_capacity - volumeName: $pvc_name - storageClassName: nfs - volumeMode: Filesystem diff --git a/k3s-host/secrets.sh.example b/k3s-host/secrets.sh.example deleted file mode 100644 index 29cb2a5..0000000 --- a/k3s-host/secrets.sh.example +++ /dev/null @@ -1,4 +0,0 @@ -node_drbd_shared_secret=*** -node_k8s_token=none - - diff --git a/k3s-host/setup.sh b/k3s-host/setup.sh deleted file mode 100755 index 2064583..0000000 --- a/k3s-host/setup.sh +++ /dev/null @@ -1,233 +0,0 @@ -#!/bin/bash - -SELF_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" - -if ${VERBOSE:-false}; then - set -ex - PS4='${BASH_SOURCE[0]}:$LINENO: ${FUNCNAME[0]}: ' -else - set -e -fi - -source $SELF_DIR/variables.sh -source $SELF_DIR/secrets.sh - -set -o pipefail - -self_node=$(hostname | sed -e 's/hetzner0//') -interface=${node_interface[$self_node]} - -dependencies="retry etckeeper" - -if ! which $dependencies >&/dev/null; then - sudo apt-get -q install -qq -y $dependencies -fi - -function setup_ufw() { - sudo apt-get -q install -qq -y ufw - - sudo ufw --force reset - - sudo ufw default allow incoming - sudo ufw default allow outgoing - sudo ufw default allow routed - - for from in $nodes; do - for to in $nodes; do - if test $from != $to; then - for v in ipv4 ipv6; do - eval from_ip=\${node_$v[$from]} - eval to_ip=\${node_$v[$to]} - sudo ufw allow in on $interface from $from_ip to $to_ip - done - fi - done - done - - for host_ip in ${node_ipv4[$self_node]} ${node_ipv6[$self_node]}; do - sudo ufw allow in on $interface to $host_ip port 22 proto tcp - sudo ufw deny in on $interface log-all to $host_ip - done - - for public_ip in $failover_ipv4 $failover_ipv6; do - sudo ufw allow in on $interface to $public_ip port 22,80,443,2000:3000 proto tcp - sudo ufw deny in on $interface log-all to $public_ip - done - - sudo ufw enable - sudo systemctl start ufw - sudo systemctl enable ufw - sudo ufw status verbose -} - -function setup_drbd() { - if ! test -f /etc/network/interfaces.d/drbd; then - cat <&/dev/null; then - sudo drbdadm create-md $node_drbd_resource - sudo systemctl enable drbd - sudo systemctl start drbd - fi - if ! grep --quiet '^/dev/drbd0 /precious' /etc/fstab; then - echo /dev/drbd0 /precious ext4 noauto,noatime,defaults 0 0 | sudo tee -a /etc/fstab - sudo mkdir -p /precious - fi -} - -function setup_nfs() { - sudo apt-get install -y nfs-kernel-server nfs-common - - if ! test -f /etc/network/interfaces.d/nfs; then - cat <>~/.bashrc - fi - # - # To upgrade, systemctl stop k3s before running this. A node - # that is already part of a cluster does not need the --token - # or --server so there is no need to provide the number of an - # existing node. - # - if ! sudo systemctl --quiet is-active k3s; then - args="" - if test "$existing"; then - if ! test "$node_k8s_token"; then - echo "obtain the token from node $existing with sudo cat /var/lib/rancher/k3s/server/token and set node_k8s_token= in secrets.sh" - exit 1 - fi - args="$args --token $node_k8s_token --server https://$node_k8s_ipv4_prefix.$existing:6443" - fi - if test "$self_node" = $node_k8s_etcd; then - args="$args --disable-apiserver --disable-controller-manager --disable-scheduler" - fi - export INSTALL_K3S_VERSION=$K3S_VERSION - curl -fL https://get.k3s.io | sh -s - server $args --cluster-init --disable=servicelb --disable=traefik --write-kubeconfig-mode=600 --node-ip=$node_k8s_ipv4_prefix.$self_node,$node_k8s_ipv6_prefix::$self_node $node_k8s_cidr --flannel-ipv6-masq - if test "$self_node" = $node_k8s_etcd; then - retry --times 20 -- kubectl taint nodes $(hostname) key1=value1:NoSchedule - fi - if test "$self_node" != $node_k8s_etcd; then - curl -fsSL https://raw.githubusercontent.com/helm/helm/main/scripts/get-helm-3 | bash - - fi - fi -} - -function setup_k8s_apply() { - retry --delay 30 --times 10 -- bash -c "$SELF_DIR/subst.sh $1 | kubectl apply --server-side=true -f -" -} - -function setup_k8s_traefik() { - # https://github.com/traefik/traefik-helm-chart?tab=readme-ov-file#deploying-traefik - $SELF_DIR/subst.sh traefik.yml | helm upgrade --install --namespace kube-system traefik -f - --set installCRDs=true --version $TRAEFIK_VERSION oci://ghcr.io/traefik/helm/traefik - setup_k8s_apply traefik-rate-limit.yml -} - -function setup_k8s_nfs() { - setup_k8s_apply nfs.yml -} - -function setup_k8s_metallb() { - helm repo add metallb https://metallb.github.io/metallb - helm upgrade --install metallb --set installCRDs=true metallb/metallb - setup_k8s_apply metallb.yml -} - -function setup_k8s_certmanager() { - helm upgrade --install mycertmanager --set installCRDs=true oci://registry-1.docker.io/bitnamicharts/cert-manager - setup_k8s_apply certmanager.yml -} - -function setup_k8s_pvc() { - export pvc_name=$1 - export pvc_capacity=$2 - export pvc_owner=$3 - - sudo mount -o nfsvers=4.2 $node_nfs_server:/k8s /opt - sudo mkdir -p /opt/$pvc_name - sudo chown $pvc_owner:$pvc_owner /opt/$pvc_name - sudo umount /opt - - setup_k8s_apply pvc.yml -} - -"$@" diff --git a/k3s-host/subst.sh b/k3s-host/subst.sh deleted file mode 100755 index c5afc89..0000000 --- a/k3s-host/subst.sh +++ /dev/null @@ -1,10 +0,0 @@ -#!/bin/bash - -SELF_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" - -source $SELF_DIR/variables.sh - -eval "cat < $name-secrets.yml` - -## Storage - -- `../k3s-host/setup.sh setup_k8s_pvc forgejo-$name 4Gi 1000` - -## Pod - -- `../k3s-host/subst.sh forgejo-values.yml | helm upgrade forgejo-$name -f - -f $name-values.yml -f crawler-block-values.yml -f $name-secrets.yml oci://code.forgejo.org/forgejo-helm/forgejo --atomic --wait --install` diff --git a/k8s-forgejo/crawler-block-values.yml b/k8s-forgejo/crawler-block-values.yml deleted file mode 100644 index 9b6610d..0000000 --- a/k8s-forgejo/crawler-block-values.yml +++ /dev/null @@ -1,32 +0,0 @@ -extraDeploy: - - apiVersion: traefik.io/v1alpha1 - # https://doc.traefik.io/traefik/v3.1/routing/providers/kubernetes-crd/#kind-ingressroute - kind: IngressRoute - metadata: - name: forgejo-crawler - annotations: - kubernetes.io/ingress.class: traefik - spec: - entryPoints: - - web - - websecure - routes: - # https://doc.traefik.io/traefik/v3.1/routing/routers/#rule - - match: Host(`next.forgejo.org`) && HeaderRegexp(`user-agent`, `DataForSeoBot`) - kind: Rule - priority: 1000 - services: - - name: noop@internal - kind: TraefikService - middlewares: - - name: forgejo-crawler-blocker - tls: - secretName: tls-forgejo-next-ingress-http - - apiVersion: traefik.io/v1alpha1 - kind: Middleware - metadata: - name: forgejo-crawler-blocker - spec: - ipAllowList: - sourceRange: - - 127.0.0.1/32 diff --git a/k8s-forgejo/forgejo-secrets.yml.example b/k8s-forgejo/forgejo-secrets.yml.example deleted file mode 100644 index a368c36..0000000 --- a/k8s-forgejo/forgejo-secrets.yml.example +++ /dev/null @@ -1,6 +0,0 @@ -gitea: - admin: - password: "***" - config: - mailer: - PASSWD: "***" diff --git a/k8s-forgejo/forgejo-values.yml b/k8s-forgejo/forgejo-values.yml deleted file mode 100644 index b5b644c..0000000 --- a/k8s-forgejo/forgejo-values.yml +++ /dev/null @@ -1,34 +0,0 @@ -strategy: - type: 'Recreate' - -ingress: - enabled: true - annotations: - # https://cert-manager.io/docs/usage/ingress/#supported-annotations - # https://github.com/cert-manager/cert-manager/issues/2239 - cert-manager.io/cluster-issuer: letsencrypt-http - cert-manager.io/private-key-algorithm: ECDSA - cert-manager.io/private-key-size: 384 - kubernetes.io/ingress.class: traefik - traefik.ingress.kubernetes.io/router.entrypoints: websecure - -service: - http: - type: ClusterIP - ipFamilyPolicy: PreferDualStack - clusterIP: ~ - ssh: - type: ClusterIP - clusterIP: ~ - ipFamilyPolicy: PreferDualStack - -redis-cluster: - enabled: false -postgresql: - enabled: false -postgresql-ha: - enabled: false - -persistence: - enabled: true - create: false diff --git a/k8s-forgejo/next-values.yml b/k8s-forgejo/next-values.yml deleted file mode 100644 index 7cd9595..0000000 --- a/k8s-forgejo/next-values.yml +++ /dev/null @@ -1,102 +0,0 @@ -image: - registry: codeberg.org - repository: forgejo-experimental/forgejo - tag: '8.0-test' - rootless: false - -ingress: - annotations: - # https://doc.traefik.io/traefik/v3.1/routing/providers/kubernetes-ingress/#on-ingress - # reference middlewares via `-@kubernetescrd` - traefik.ingress.kubernetes.io/router.middlewares: default-forgejo-ratelimit@kubernetescrd - tls: - - hosts: - - next.forgejo.org - secretName: tls-forgejo-next-ingress-http - hosts: - - host: next.forgejo.org - paths: - - path: / - pathType: Prefix - -service: - ssh: - port: ssh - -extraDeploy: - # Route from traefik to forgejo - - apiVersion: traefik.io/v1alpha1 - kind: IngressRouteTCP - metadata: - name: forgejo-next-ssh - annotations: - kubernetes.io/ingress.class: traefik - spec: - entryPoints: - - ssh-next # name from traefik port - routes: - - match: HostSNI(`*`) - services: - - name: forgejo-next-ssh - port: 2222 # forgejo ssh port on kubernetes service - -persistence: - claimName: forgejo-next - -gitea: - admin: - username: earl-warren - email: 'contact@earl-warren.org' - config: - APP_NAME: "Forgejo v8.0 demo" - APP_SLOGAN: "ARCHIVED USE v8.next.forgejo.org instead" - APP_DISPLAY_NAME_FORMAT: "{APP_NAME} [{APP_SLOGAN}]" - log: - LEVEL: "info" - server: - ROOT_URL: https://next.forgejo.org/ - DOMAIN: next.forgejo.org - SSH_DOMAIN: next.forgejo.org - SSH_PORT: "2020" - LFS_START_SERVER: true - OFFLINE_MODE: true - repository: - ROOT: /data/git/repositories - service: - REGISTER_EMAIL_CONFIRM: true - DEFAULT_KEEP_EMAIL_PRIVATE: true - ENABLE_NOTIFY_MAIL: true - DISABLE_REGISTRATION: true - actions: - ENABLED: false - mirror: - ENABLED: false - federation: - ENABLED: true - admin: - SEND_NOTIFICATION_EMAIL_ON_NEW_USER: true - cors: - ENABLED: true - ALLOW_DOMAIN: "*" - HEADERS: "Access-Control-Allow-Origin" - mailer: - ENABLED: true - FROM: "noreply@forgejo.org" - PROTOCOL: "smtp+starttls" - SMTP_ADDR: "ssl0.ovh.net" - SMTP_PORT: "587" - USER: "next@forgejo.org" - database: - PATH: /data/gitea.db - DB_TYPE: sqlite3 - session: - PROVIDER: db - cache: - ADAPTER: memory - queue: - TYPE: level - indexer: - REPO_INDEXER_ENABLED: true - cron.archive_cleanup: - SCHEDULE: "@hourly" - OLDER_THAN: "2h" diff --git a/k8s-maintenance.md b/k8s-maintenance.md deleted file mode 100644 index e902bb6..0000000 --- a/k8s-maintenance.md +++ /dev/null @@ -1,23 +0,0 @@ -# Disaster recovery and maintenance - -## When a machine or disk is scheduled for replacement. - -* `kubectl drain hetzner05` # evacuate all the pods out of the node to be shutdown -* `kubectl taint nodes hetzner05 key1=value1:NoSchedule` # prevent any pod from being created there (metallb speaker won't be drained, for instance) -* `kubectl delete node hetzner05` # let the cluster know it no longer exists so a new one by the same name can replace it - -## Routing the failover IP - -When the machine to which the failover IP (failover.forgejo.org) is routed is unavailable or to be shutdown, to the [Hetzner server panel](https://robot.hetzner.com/server), to the IPs tab and change the route of the failover IP to another node. All nodes are configured with the failover IP, there is nothing else to do. - -## Manual boot operations - -### On the machine that runs the NFS server - -* `sudo drbdadm primary r1` # Switch the DRBD to primary -* `sudo mount /precious` # DRBD volume shared via NFS -* `sudo ip addr add 10.53.101.100/24 dev enp5s0.4001` # add NFS server IP - -### On the other machines - -* `sudo ip addr del 10.53.101.100/24 dev enp5s0.4001` # remove NFS server IP diff --git a/k8s.md b/k8s.md deleted file mode 100644 index 3c1a63d..0000000 --- a/k8s.md +++ /dev/null @@ -1,107 +0,0 @@ -# K8S node - -Installing a K8S node using [scripts from the k3s-host](k3s-host) directory. - -## Imaging - -Using installimage from the rescue instance. - -- `wipefs -fa /dev/nvme*n1` -- `installimage -r no -n hetzner0?` -- Debian bookworm -- `PART / ext4 100G` -- `PART /srv ext4 all` -- ESC 0 + yes -- reboot - -Partitioning. - -- First disk - - OS - - non precious data such as the LXC containers with runners. -- Second disk - - a partition configured with DRBD - -Debian user. - -- `ssh root@hetzner0?.forgejo.org` -- `useradd --shell /bin/bash --create-home --groups sudo debian` -- `mkdir -p /home/debian/.ssh ; cp -a .ssh/authorized_keys /home/debian/.ssh ; chown -R debian /home/debian/.ssh` -- in `/etc/sudoers` edit `%sudo ALL=(ALL:ALL) NOPASSWD:ALL` - -## Install helpers - -Each node is identifed by the last digit of the hostname. - -```sh -sudo apt-get install git etckeeper -git clone https://code.forgejo.org/infrastructure/documentation -cd documentation/k3s-host -cp variables.sh.example variables.sh -cp secrets.sh.example secrets.sh -``` - -Variables that must be set depending on the role of the node. - -- first server node - - secrets.sh: node_drbd_shared_secret -- other server node - - secrets.sh: node_drbd_shared_secret - - secrets.sh: node_k8s_token: content of /var/lib/rancher/k3s/server/token on the first node - - variables.sh: node_k8s_existing: identifier of the first node (e.g. 5) -- etcd node - - secrets.sh: node_k8s_token: content of /var/lib/rancher/k3s/server/token on the first node - - variables.sh: node_k8s_existing: identifier of the first node (e.g. 5) - - variables.sh: node_k8s_etcd: identifier of the node whose role is just etcd (e.g. 3) - -The other variables depend on the setup. - -## Firewall - -`./setup.sh setup_ufw` - -## DRBD - -DRBD is [configured](https://linbit.com/drbd-user-guide/drbd-guide-9_0-en/#p-work) with: - -`./setup.sh setup_drbd` - -Once two nodes have DRBD setup for the first time, it can be initialized by [pretending all is in sync](https://linbit.com/drbd-user-guide/drbd-guide-9_0-en/#s-skip-initial-resync) to save the initial bitmap sync since there is actually no data at all. - - -```sh -sudo drbdadm primary r1 -sudo drbdadm new-current-uuid --clear-bitmap r1/0 -sudo mount /precious -``` - -## NFS - -`./setup.sh setup_nfs` - -On the node that has the DRBD volume `/precious` mounted, set the IP of the NFS server to be used by k8s: - -```sh -sudo ip addr add 10.53.101.100/24 dev enp5s0.4001 -``` - -## K8S - -For the first node `./setup.sh setup_k8s`. For nodes joining the cluster `./setup.sh setup_k8s 6` where `hetzner06` is an existing node. - -- [metallb](https://metallb.universe.tf) instead of the default load balancer because it does not allow for a public IP different from the `k8s` node IP. - `./setup.sh setup_k8s_metallb` -- [traefik](https://traefik.io/) [v2.10](https://doc.traefik.io/traefik/v3.1/) installed from the [v25.0](https://github.com/traefik/traefik-helm-chart/tree/v31.1.1) helm chart. - `./setup.sh setup_k8s_traefik` -- [cert-manager](https://cert-manager.io/). - `./setup.sh setup_k8s_certmanager` -- NFS storage class - `./setup.sh setup_k8s_nfs` - -## K8S NFS storage creation - -Define the 20GB `forgejo-data` pvc owned by user id 1000. - -```sh -./setup.sh setup_k8s_pvc forgejo-data 20Gi 1000 -```