1
0
Fork 0
mirror of https://code.forgejo.org/infrastructure/documentation synced 2024-11-15 09:31:52 +00:00
Find a file
2024-10-19 12:05:34 +00:00
LICENSE Initial commit 2024-09-14 11:43:39 +00:00
README.md allow everything between cluster nodes 2024-10-19 13:57:23 +02:00

The resources used by the infrastructure are in the https://code.forgejo.org/infrastructure/ organization. There is a dedicated chatroom. A mirror of this repository is available at https://git.pub.solar/forgejo/infrastructure-documentation.

LXC Hosts

All LXC hosts are setup with lxc-helpers.

name=forgejo-host
lxc-helpers.sh lxc_container_run $name -- sudo --user debian bash

See https://github.com/mikesart/inotify-info. Running multiple LXC containers will quickly use the default limit (128 on bookworm).

echo fs.inotify.max_user_instances=8192 | sudo tee -a /etc/sysctl.conf
sudo sysctl -p

Unprivileged

name=forgejo-host
lxc-helpers.sh lxc_container_create --config "unprivileged" $name
echo "lxc.start.auto = 1" | sudo tee -a /var/lib/lxc/$name/config
lxc-helpers.sh lxc_container_start $name
lxc-helpers.sh lxc_container_user_install $name $(id -u) $USER

Docker enabled

name=forgejo-host
lxc-helpers.sh lxc_container_create --config "docker" $name
echo "lxc.start.auto = 1" | sudo tee -a /var/lib/lxc/$name/config
lxc-helpers.sh lxc_container_start $name
lxc-helpers.sh lxc_install_docker $name
lxc-helpers.sh lxc_container_user_install $name $(id -u) $USER

K8S enabled

name=forgejo-host
lxc-helpers.sh lxc_container_create --config "k8s" $name
echo "lxc.start.auto = 1" | sudo tee -a /var/lib/lxc/$name/config
lxc-helpers.sh lxc_container_start $name
lxc-helpers.sh lxc_container_user_install $name $(id -u) $USER

Docker and LXC enabled

name=forgejo-host
ipv4=10.85.12
ipv6=fc33
lxc-helpers.sh lxc_container_create --config "docker lxc" $name
echo "lxc.start.auto = 1" | sudo tee -a /var/lib/lxc/$name/config
lxc-helpers.sh lxc_container_start $name
lxc-helpers.sh lxc_install_docker $name
lxc-helpers.sh lxc_install_lxc $name $ipv4 $ipv6
lxc-helpers.sh lxc_container_user_install $name $(id -u) $USER

firewall

sudo apt-get install ufw
sudo ufw default allow incoming
sudo ufw default allow outgoing
sudo ufw default allow routed

interface=enp5s0

function internode() {
  for from in $@ ; do
    for to in $@ ; do
	  if test $from != $to ; then
	    sudo ufw allow in on $interface from $from to $to
	  fi
	done
  done
}

ipv4="65.108.204.171 88.198.58.177"
internode $ipv4

ipv6="2a01:4f9:1a:a082::2 2a01:4f8:222:507::2"
internode $ipv6

for host_ip in $ipv4 $ipv6 ; do
  sudo ufw allow in on $interface to $host_ip port 22 proto tcp
  sudo ufw deny in on $interface log-all to $host_ip
done

failover="188.40.16.47 2a01:4f8:fff2:48::2"

for public_ip in $failover ; do
  sudo ufw allow in on $interface to $public_ip  port 22,80,443,2000:3000 proto tcp
  sudo ufw deny in on $interface log-all to $public_ip
done
sudo systemctl enable ufw
sudo ufw enable
sudo ufw status verbose

nftables

sudo nft list ruleset

Host reverse proxy

The reverse proxy on a host forwards to the designated LXC container with something like the following examples in /etc/nginx/sites-available/example.com, where A.B.C.D is the IP allocated to the LXC container running the web service.

And symlink:

ln -s /etc/nginx/sites-available/example.com /etc/nginx/sites-enabled/example.com

The certificate is obtained once and automatically renewed with:

sudo apt-get install certbot python3-certbot-nginx
sudo certbot -n --agree-tos --email contact@forgejo.org -d example.com --nginx

When removing a configuration, the certificate can also be removed with:

sudo certbot delete --cert-name example.com

Forwarding TCP streams (useful for ssh) requires installing the module:

sudo apt-get install libnginx-mod-stream

Rate limiting crawlers is done by adding the following to /etc/nginx/conf.d/limit.conf:

# http://nginx.org/en/docs/http/ngx_http_limit_req_module.html
# https://blog.nginx.org/blog/rate-limiting-nginx
map $http_user_agent $isbot_ua {
        default 0;
        ~*(GoogleBot|GoogleOther|bingbot|YandexBot) 1;
}
map $isbot_ua $limit_bot {
        0       "";
        1       $binary_remote_addr;
}
limit_req_zone $limit_bot zone=bots:10m rate=1r/m;
limit_req_status 429;

and the following in the location to be rate limited:

    location / {
        limit_req zone=bots burst=2 nodelay;
		...

Host wakeup-on-logs

https://code.forgejo.org/infrastructure/wakeup-on-logs

K8S wakeup-on-logs script

$ cat /etc/wakeup-on-logs/forgejo-v8
#!/bin/bash

set -x

self="${BASH_SOURCE[0]}"
name=$(basename $self)
# keep it lower than https://code.forgejo.org/infrastructure/wakeup-on-logs
# otherwise it will get killed by it
timeout=4m

function lxc_run() {
    lxc-attach $name -- sudo --user debian KUBECONFIG=/etc/rancher/k3s/k3s.yaml "$@" |& tee -a /var/log/$name.log
}

image=codeberg.org/forgejo-experimental/forgejo
major=${name##*v}
digest=$(skopeo inspect --format "{{.Digest}}" docker://$image:$major-rootless)
values=https://code.forgejo.org/infrastructure/k8s/raw/branch/main/forgejo-v$major/values.yml
lxc_run helm upgrade forgejo -f $values -f /home/debian/secrets.yml oci://code.forgejo.org/forgejo-helm/forgejo --atomic --wait --timeout $timeout --install --set image.digest=$digest

Forgejo example

server {
    listen 80;
    listen [::]:80;

    server_name example.com;

    location / {
        proxy_pass http://A.B.C.D:8080;
        proxy_set_header Host $host;
        proxy_set_header X-Real-IP $remote_addr;
        proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
        proxy_set_header X-Forwarded-Proto https;
        client_max_body_size 2G;
    }
}

GitLab example

server {
    listen 80;
    listen [::]:80;

    server_name example.com;

    location / {
       proxy_set_header Upgrade $http_upgrade;
       proxy_set_header Connection "upgrade";
       proxy_set_header Host $http_host;
       proxy_set_header X-Real-IP $remote_addr;
       proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
       proxy_set_header X-Forwarded-Proto $scheme;
       proxy_set_header X-Frame-Options SAMEORIGIN;

       client_body_timeout 60;
       client_max_body_size 200M;
       send_timeout 1200;
       lingering_timeout 5;

       proxy_buffering off;
       proxy_connect_timeout 90;
       proxy_send_timeout 300;
       proxy_read_timeout 600s;

       proxy_pass http://example.com;
       proxy_http_version 1.1;
    }
}

Vanila example

server {
    listen 80;
    listen [::]:80;

    server_name example.com;

    location / {
        proxy_pass http://A.B.C.D;
        proxy_set_header Host $host;
        proxy_set_header X-Real-IP $remote_addr;
        proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
        proxy_set_header X-Forwarded-Proto https;
    }
}

302 redirection

server {
    listen 80;
    listen [::]:80;

    server_name example.com;
    return 302 https://other.example.com$request_uri;
}

Forgejo runners

The LXC container in which the runner is installed must have capabilities that support the backend.

  • docker:// needs a Docker enabled container
  • lxc:// needs a Docker and LXC enabled container

The runners it contains are not started at boot, it must be done manually. The bash history has the command line to do so.

Installation

version=3.5.0
sudo wget -O /usr/local/bin/forgejo-runner-$version https://code.forgejo.org/forgejo/runner/releases/download/v$version/forgejo-runner-$version-linux-amd64
sudo chmod +x /usr/local/bin/forgejo-runner-$version
echo 'export TERM=xterm-256color' >> .bashrc

Creating a runner

Multiple runners can co-exist on the same machine. To keep things organized they are located in a directory that is the same as the URL from which the token is obtained. For instance DIR=codeberg.org/forgejo-integration means that the token was obtained from the https://codeberg.org/forgejo-integration organization.

If a runner only provides unprivileged docker containers, the labels in config.yml should be: labels: ['docker:docker://node:20-bookworm'].

If a runner provides LXC containers and unprivileged docker containers, the labels in config.yml should be labels: ['self-hosted:lxc://debian:bookworm', 'docker:docker://node:20-bookworm'].

name=myrunner
mkdir -p $DIR ; cd $DIR
forgejo-runner generate-config > config-$name.yml
## edit config-$name.yml and adjust the `labels:`
## Obtain a $TOKEN from https://$DIR
forgejo-runner-$version register --no-interactive --token $TOKEN --name runner --instance https://codeberg.org
forgejo-runner-$version --config config-$name.yml daemon |& cat -v > runner.log &

Octopuce

Octopuce provides hardware managed by the devops team. It can only be accessed via SSH.

To access the services hosted on the LXC containers, ssh port forwarding to the private IPs can be used. For instance:

echo 127.0.0.1 private.forgejo.org >> /etc/hosts
sudo ssh -i ~/.ssh/id_rsa -L 80:10.77.0.128:80 debian@forgejo01.octopuce.fr
firefox http://private.forgejo.org

Containers

  • forgejo-host

    Dedicated to http://private.forgejo.org

    • Docker enabled
    • upgrades checklist:
      emacs /home/debian/run-forgejo.sh # change the `image=`
      docker stop forgejo
      sudo rsync -av --numeric-ids --delete --progress /srv/forgejo/ /root/forgejo-backup/
      docker rm forgejo
      bash -x /home/debian/run-forgejo.sh
      docker logs -n 200 -f forgejo
      
  • forgejo-runner-host

    Has runners installed to run against private.forgejo.org

    • Docker and LXC enabled 10.85.12 fc33

Hetzner

All hardware machines are running Debian GNU/linux bookworm. They are LXC hosts setup with lxc-helpers.

NOTE: only use EX101 with a ASRockRack W680D4U-1L motherboard.

vSwitch

A vSwitch is assigned via the Robot console on all servers for backend communications and configured in /etc/network/interfaces for each of them with something like:

auto enp5s0.4000
iface enp5s0.4000 inet static
  address 10.53.100.2
  netmask 255.255.255.0
  vlan-raw-device enp5s0
  mtu 1400

The IP address ends with the same number as the hardware (hetzner02 => .2).

vSwitch DRBD

The vSwitch on VLAN 4000 is for DRBD exclusively

vSwitch NFS

The vSwitch on VLAN 4001 is for NFS

vSwitch k8s

The vSwitch on VLAN 4002 is for the k8s control plane

DRBD

DRBD is configured like in the following example with hetzner02 as the primary and hetzner03 as the secondary:

$ apt-get install drbd-utils
$ cat /etc/drbd.d/r0.res
resource r0 {
    net {
        # A : write completion is determined when data is written to the local disk and the local TCP transmission buffer
        # B : write completion is determined when data is written to the local disk and remote buffer cache
        # C : write completion is determined when data is written to both the local disk and the remote disk
        protocol C;
        cram-hmac-alg sha1;
        # any secret key for authentication among nodes
        shared-secret "***";
    }
    disk {
        resync-rate 1000M;
    }
    on hetzner02 {
        address 10.53.100.2:7788;
        volume 0 {
            # device name
            device /dev/drbd0;
            # specify disk to be used for device above
            disk /dev/nvme0n1p5;
            # where to create metadata
            # specify the block device name when using a different disk
            meta-disk internal;
        }
    }
    on hetzner03 {
        address 10.53.100.3:7788;
        volume 0 {
            device /dev/drbd0;
            disk /dev/nvme1n1p5;
            meta-disk internal;
        }
    }
}
$ sudo drbdadm create-md r0
$ sudo systemctl enable drbd
$ sudo systemctl start drbd

On hetzner02 (the primary), pretend all is in sync to save the initial bitmap sync since there is actually no data at all.

sudo drbdadm new-current-uuid --clear-bitmap r0/0

The DRBD device is mounted on /var/lib/lxc in /etc/fstab there is a noauto line:

/dev/drbd0 /var/lib/lxc ext4 noauto,defaults 0 0

To prevent split brain situations a manual step is required at boot time, on the machine that is going to be the primary.

sudo drbdadm primary r0
sudo drbdsetup status
sudo mount /var/lib/lxc
sudo lxc-autostart start
sudo lxc-ls -f
sudo drbdsetup status

hetzner{01,04}

https://hetzner{01,04}.forgejo.org run on EX101 Hetzner hardware.

LXC

lxc-helpers.sh lxc_install_lxc_inside 10.41.13 fc29

Disk partitioning

  • First disk
    • OS
    • a partition mounted on /srv where non precious data goes such as the LXC containers with runners.
  • Second disk
    • configured with DRBD for precious data.

Root filesystem backups

  • hetzner01:/etc/cron.daily/backup-hetzner04 rsync -aHS --delete-excluded --delete --numeric-ids --exclude /proc --exclude /dev --exclude /sys --exclude /precious --exclude /srv --exclude /var/lib/lxc 10.53.100.4:/ /srv/backups/hetzner04/ >& /var/log/$(basename $0).log
  • hetzner04:/etc/cron.daily/backup-hetzner01 rsync -aHS --delete-excluded --delete --numeric-ids --exclude /proc --exclude /dev --exclude /sys --exclude /precious --exclude /srv --exclude /var/lib/lxc 10.53.100.1:/ /srv/backups/hetzner01/ >& /var/log/$(basename $0).log

LXC containers

  • runner-lxc-helpers (hetzner01)

    Dedicated to Forgejo runners for the https://code.forgejo.org/forgejo/lxc-helpers project.

    • K8S enabled
    • code.forgejo.org/forgejo/lxc-helpers/config*.yml
  • forgejo-runners (hetzner01)

    Dedicated to Forgejo runners for the https://codeberg.org/forgejo organization.

    • Docker enabled
    • codeberg.org/forgejo/config*.yml
  • runner01-lxc (hetzner01)

    Dedicated to Forgejo runners for https://code.forgejo.org.

    • Docker and LXC enabled 10.194.201 fc35
    • code.forgejo.org/forgejo/config*.yml
    • code.forgejo.org/actions/config*.yml
    • code.forgejo.org/forgejo-integration/config*.yml
    • code.forgejo.org/forgejo-contrib/config*.yml
    • code.forgejo.org/f3/config*.yml
    • code.forgejo.org/forgefriends/config*.yml
  • forgejo-v9 (hetzner04) same as forgejo-v8

  • forgejo-v8 (hetzner04)

    Dedicated to https://v8.next.forgejo.org, see https://code.forgejo.org/infrastructure/k8s

    • K8S enabled

    • K8S wakeup-on-logs script /etc/wakeup-on-logs/forgejo-v8

    • Values file

    • nginx forwarding of SSH streams in /etc/nginx/modules-enabled/next.forgejo.org.conf

      stream {
      
        # v8 ip's
        upstream v8 {
          least_conn;
          server 10.41.13.27:2222;
        }
      
        # v8 definition
        server {
          listen 2080; # the port to listen on this server
          listen [::]:2080;
          proxy_pass v8; # forward traffic to this upstream group
        }
      }
      
  • forgefriends-forum (hetzner04)

    Dedicated to https://forum.forgefriends.org

    • Docker enabled
  • forgefriends-gitlab (hetzner04)

    Dedicated to https://lab.forgefriends.org

    • Docker enabled
  • forgefriends-cloud (hetzner04)

    Dedicated to https://cloud.forgefriends.org

    • Docker enabled
  • gna-forgejo (hetzner04)

    Dedicated to https://forgejo.gna.org

    • Docker enabled
  • gna-forum (hetzner04)

    Dedicated to https://forum.gna.org

    • Docker enabled

hetzner{02,03}

https://hetzner02.forgejo.org & https://hetzner03.forgejo.org run on EX44 Hetzner hardware.

LXC

lxc-helpers.sh lxc_install_lxc_inside 10.6.83 fc16

Disk partitioning

  • First disk
    • OS
    • a partition configured with DRBD for precious data mounted on /var/lib/lxc
  • Second disk
    • non precious data such as the LXC containers with runners.

Root filesystem backups

  • hetzner03:/etc/cron.daily/backup-hetzner02 rsync -aHS --delete-excluded --delete --numeric-ids --exclude /proc --exclude /dev --exclude /sys --exclude /srv --exclude /var/lib/lxc 10.53.100.2:/ /srv/backups/hetzner02/
  • hetzner02:/etc/cron.daily/backup-hetzner03 rsync -aHS --delete-excluded --delete --numeric-ids --exclude /proc --exclude /dev --exclude /sys --exclude /srv --exclude /var/lib/lxc 10.53.100.3:/ /srv/backups/hetzner03/

Public IP addresses

The public IP addresses attached to the hosts are not failover IPs that can be moved from one host to the next. The DNS entry needs to be updated if the primary hosts changes.

When additional IP addresses are attached to the server, they are added to /etc/network/interfaces like ipv4 65.21.67.71 and ipv6 2a01:4f9:3081:51ec::102 below.

auto enp5s0
iface enp5s0 inet static
  address 65.21.67.73
  netmask 255.255.255.192
  gateway 65.21.67.65
  # route 65.21.67.64/26 via 65.21.67.65
  up route add -net 65.21.67.64 netmask 255.255.255.192 gw 65.21.67.65 dev enp5s0
  # BEGIN code.forgejo.org
  up ip addr add 65.21.67.71/32 dev enp5s0
  up nft -f /home/debian/code.nftables
  down ip addr del 65.21.67.71/32 dev enp5s0
  # END code.forgejo.org

iface enp5s0 inet6 static
  address 2a01:4f9:3081:51ec::2
  netmask 64
  gateway fe80::1
  # BEGIN code.forgejo.org
  up ip -6 addr add 2a01:4f9:3081:51ec::102/64 dev enp5s0
  down ip -6 addr del 2a01:4f9:3081:51ec::102/64 dev enp5s0
  # END code.forgejo.org

For port forwarding to work, the LXC host must not bind them. For instance the ssh server configuration at /etc/ssh/sshd_config should not bind all IP but only a specific one.

Port 22
AddressFamily inet
ListenAddress 65.21.67.73
#ListenAddress ::

Port forwarding

Forwarding a port to an LXC container can be done with nginx streeam for the public IP of code.forgejo.org (65.21.67.71 & 2a01:4f9:3081:51ec::102) to the private IP (10.6.83.195) of the code LXC container in /etc/nginx/modules-enabled/ssh.conf:

stream {

  # code.forgejo.org ip's
  upstream codessh {
    least_conn;
    server 10.6.83.195:22;
  }

  # code.forgejo.org definition
  server {
    listen 65.21.67.71:22; # the port to listen on this server
    listen [2a01:4f9:3081:51ec::102]:22;
    proxy_pass codessh; # forward traffic to this upstream group
    proxy_timeout 3s;
    proxy_connect_timeout 3s;
  }
}

302 redirects

  • On hetzner02
    • try.next.forgejo.org redirects to v(latest stable).next.forgejo.org
    • dev.next.forgejo.org redirects to v(latest dev).next.forgejo.org

Containers

  • forgejo-code on hetzner02

    Dedicated to https://code.forgejo.org

    • Docker enabled
    • upgrades checklist:
      • ssh -t debian@hetzner02.forgejo.org lxc-helpers.sh lxc_container_run forgejo-code -- sudo --user debian bash
        emacs /home/debian/run-forgejo.sh # change the `image=`
        docker stop forgejo
        
      • ssh -t debian@hetzner02.forgejo.org sudo /etc/cron.daily/backup-forgejo-code
      • ssh -t debian@hetzner02.forgejo.org lxc-helpers.sh lxc_container_run forgejo-code -- sudo --user debian bash
        docker rm forgejo
        bash -x /home/debian/run-forgejo.sh
        docker logs -n 200 -f forgejo
        
    • Rotating 30 days backups happen daily /etc/cron.daily/forgejo-code-backup.sh
    • Add code.forgejo.org to the forgejo.org SPF record
  • forgejo-next on hetzner02

    Dedicated to https://next.forgejo.org

    • Docker enabled
    • /etc/cron.hourly/forgejo-upgrade runs /home/debian/run-forgejo.sh > /home/debian/run-forgejo-$(date +%d).log
    • When a new major version is published (8.0 for instance) run-forgejo.sh must be updated with it
    • Reset everything
      docker stop forgejo
      docker rm forgejo
      sudo rm -fr /srv/forgejo.old
      sudo mv /srv/forgejo /srv/forgejo.old
      bash -x /home/debian/run-forgejo.sh
      
    • /home/debian/next.nftables
      add table ip next;
      flush table ip next;
      add chain ip next prerouting {
        type nat hook prerouting priority 0;
        policy accept;
        ip daddr 65.21.67.65 tcp dport { 2020 } dnat to 10.6.83.213;
      };
      
    • Add to iface enp5s0 inet static in /etc/network/interfaces
      up nft -f /home/debian/next.nftables
      
    - `/etc/nginx/sites-available/next.forgejo.org` same as `/etc/nginx/sites-available/code.forgejo.org`
    
    
  • forgejo-v7 on hetzner02

    Dedicated to https://v7.next.forgejo.org

    • Docker enabled
    • /etc/cron.hourly/forgejo-upgrade runs /home/debian/run-forgejo.sh > /home/debian/run-forgejo-$(date +%d).log
    • Reset everything
      docker stop forgejo
      docker rm forgejo
      sudo rm -fr /srv/forgejo.old
      sudo mv /srv/forgejo /srv/forgejo.old
      bash -x /home/debian/run-forgejo.sh
      
    • /home/debian/v7.nftables
      add table ip v7;
      flush table ip v7;
      add chain ip v7 prerouting {
        type nat hook prerouting priority 0;
        policy accept;
        ip daddr 65.21.67.65 tcp dport { 2070 } dnat to 10.6.83.179;
      };
      
    • Add to iface enp5s0 inet static in /etc/network/interfaces
      up nft -f /home/debian/v7.nftables
      
    - `/etc/nginx/sites-available/v7.forgejo.org` same as `/etc/nginx/sites-available/code.forgejo.org`
    
    
  • static-pages on hetzner02

    See the static pages documenation for more information.

    • Unprivileged
  • runner-forgejo-helm on hetzner03

    Dedicated to https://codeberg.org/forgejo-contrib/forgejo-helm and running from an ephemeral disk

hetzner{05,06}

https://hetzner05.forgejo.org & https://hetzner06.forgejo.org run on EX44 Hetzner hardware.

LXC

lxc-helpers.sh lxc_install_lxc_inside 10.47.3 fc11

NFS

server.

sudo apt install nfs-kernel-server nfs-common
cat <<EOF | sudo tee -a /etc/exports
/precious 10.53.101.0/24(rw,fsid=0,no_root_squash,no_subtree_check)
/precious/k8s 10.53.101.0/24(rw,nohide,insecure,no_subtree_check)
EOF
sudo exportfs -av
sudo exportfs -s

client.

sudo apt install nfs-common

Disk partitioning

  • First disk
    • OS
    • non precious data such as the LXC containers with runners.
  • Second disk
    • a partition configured with DRBD

Root filesystem backups

  • hetzner05:/etc/cron.daily/backup-hetzner06 rsync -aHSv --delete-excluded --delete --numeric-ids --exclude /proc --exclude /dev --exclude /sys --exclude /srv --exclude /var/lib/lxc 10.53.100.6:/ /srv/backups/hetzner06/
  • hetzner06:/etc/cron.daily/backup-hetzner05 rsync -aHSv --delete-excluded --delete --numeric-ids --exclude /proc --exclude /dev --exclude /sys --exclude /srv --exclude /var/lib/lxc 10.53.100.5:/ /srv/backups/hetzner05/

Failover IP addresses

The failover IP addresses are configured on all hosts.

auto enp5s0
iface enp5s0 inet static
...
  up ip addr add 188.40.16.47/32 dev enp5s0

iface enp5s0 inet6 static
...
  up ip addr add 2a01:4f8:fff2:48::2/64 dev enp5s0

k8s node

The 10.88.1.5 and fd10::5 IPs are assigned to the interface with VLAN 4002.

auto enp5s0.4002
iface enp5s0.4002 inet static
  address 10.88.1.5
  netmask 255.255.0.0
  vlan-raw-device enp5s0
  mtu 1400
  up ip addr add fd10::5/48 dev enp5s0.4002

k8s first server node

sudo apt-get install curl
master_node_ip=10.88.1.5,fd10::5
curl -fL https://get.k3s.io | sh -s - server --cluster-init --disable=servicelb --write-kubeconfig-mode=644 --node-ip=$master_node_ip --cluster-cidr=10.42.0.0/16,fd01::/48 --service-cidr=10.43.0.0/16,fd02::/112 --flannel-ipv6-masq
curl -fsSL https://raw.githubusercontent.com/helm/helm/main/scripts/get-helm-3 | bash -

k8s second server node

The token is found on the first node in the /var/lib/rancher/k3s/server/token file.

sudo apt-get install curl
token=???
master_ip=10.88.1.5
second_node_ip=10.88.1.6,fd10::6
curl -fL https://get.k3s.io | sh -s - server --token $token --server https://$master_ip:6443 --cluster-init --disable=servicelb --write-kubeconfig-mode=644 --node-ip=$second_node_ip --cluster-cidr=10.42.0.0/16,fd01::/48 --service-cidr=10.43.0.0/16,fd02::/112 --flannel-ipv6-masq
curl -fsSL https://raw.githubusercontent.com/helm/helm/main/scripts/get-helm-3 | bash -
kubectl taint nodes $(hostname) key1=value1:NoSchedule

k8s dedicated etcd node

dedicated etcd node

The token is found on one of the master nodes in the /var/lib/rancher/k3s/server/token file.

master_ip=10.88.1.5
etcd_node_ip=10.88.1.3,fd10::3
curl -sfL https://get.k3s.io | sh -s - server --token "$token" --server https://$master_ip:6443 --cluster-init --disable=servicelb --disable-apiserver --disable-controller-manager --disable-scheduler --write-kubeconfig-mode=644 --node-ip=$etcd_node_ip --cluster-cidr=10.42.0.0/16,fd01::/48 --service-cidr=10.43.0.0/16,fd02::/112 --flannel-ipv6-masq
kubectl taint nodes $(hostname) key1=value1:NoSchedule

k8s networking

cert-manager.

helm install mycertmanager --set installCRDs=true oci://registry-1.docker.io/bitnamicharts/cert-manager
# wait a few seconds
cat > clusterissuer.yml <<EOF
apiVersion: cert-manager.io/v1
kind: ClusterIssuer
metadata:
  name: letsencrypt-http
spec:
  acme:
    email: contact@forgejo.org
    server: https://acme-v02.api.letsencrypt.org/directory
    privateKeySecretRef:
      name: letsencrypt-http
    solvers:
    - http01:
        ingress:
          class: traefik
EOF
kubectl apply --server-side=true -f clusterissuer.yml

k3s is configured to use metallb instead of the default load balancer because it does not allow for a public IP different from the k8s node IP.

metallb.

helm install metallb --set installCRDs=true metallb/metallb
cat > metallb.yaml <<EOF
apiVersion: metallb.io/v1beta1
kind: IPAddressPool
metadata:
  name: first-pool
spec:
  addresses:
   - 188.40.16.47/32
   - 2a01:4f8:fff2:48::0/64
EOF
sleep 120 ; kubectl apply --server-side=true -f metallb.yml

traefik requests with annotations specific IPs from metalldb.

cat > traefik.yml <<EOF
apiVersion: helm.cattle.io/v1
kind: HelmChartConfig
metadata:
  name: traefik
  namespace: kube-system
spec:
  valuesContent: |-
    ports:
      web:
        port: 80
        redirectTo:
          port: websecure
          priority: 1
	deployment:
	  replicas: 2
    service:
      annotations:
        metallb.universe.tf/allow-shared-ip: "key-to-share-188-40-16-47"
        metallb.universe.tf/loadBalancerIPs: 188.40.16.47,2a01:4f8:fff2:48::2
EOF
kubectl apply --server-side=true -f traefik.yml

forgejo configuration in ingress for the reverse proxy (traefik) to route the domain and for the ACME issuer (cert-manager) to obtain a certificate. And in service for the ssh port to be bound to the desired IPs of the load balancer (metallb).

ingress:
  enabled: true
  annotations:
    # https://cert-manager.io/docs/usage/ingress/#supported-annotations
    # https://github.com/cert-manager/cert-manager/issues/2239
    cert-manager.io/cluster-issuer: letsencrypt-http
    cert-manager.io/private-key-algorithm: ECDSA
    cert-manager.io/private-key-size: 384
    kubernetes.io/ingress.class: traefik
    traefik.ingress.kubernetes.io/router.entrypoints: websecure
  tls:
    - hosts:
        - t1.forgejo.org
      secretName: tls-forgejo-t1-ingress-http
  hosts:
    - host: t1.forgejo.org
      paths:
        - path: /
          pathType: Prefix

service:
  http:
    type: ClusterIP
    ipFamilyPolicy: PreferDualStack
    port: 3000
  ssh:
    type: LoadBalancer
    annotations:
      metallb.universe.tf/loadBalancerIPs: 188.40.16.47,2a01:4f8:fff2:48::2
      metallb.universe.tf/allow-shared-ip: "key-to-share-188-40-16-47"
    ipFamilyPolicy: PreferDualStack
    port: 2222

Define the nfs storage class.

$ cat nfs.yml
apiVersion: helm.cattle.io/v1
kind: HelmChart
metadata:
  name: nfs
  namespace: default
spec:
  chart: nfs-subdir-external-provisioner
  repo: https://kubernetes-sigs.github.io/nfs-subdir-external-provisioner
  targetNamespace: default
  set:
    nfs.server: 10.53.101.100
    nfs.path: /k8s
    storageClass.name: nfs
$ kubectl apply --server-side=true -f nfs.yml
$ sleep 120 ; kubectl get storageclass nfs

k8s NFS storage creation

Create the directory to be used, with the expected permissions (assuing /k8s is the directory exported via NFS).

sudo mkdir /precious/k8s/forgejo-data
sudo chmod 1000:1000 /precious/k8s/forgejo-data

Define the forgejo-data pvc.

$ cat pv.yml
---
apiVersion: v1
kind: PersistentVolume
metadata:
  name: forgejo-data
spec:
  capacity:
    storage: 20Gi
  nfs:
    server: 10.53.101.100
    path: /k8s/forgejo-data
  accessModes:
    - ReadWriteMany
  persistentVolumeReclaimPolicy: Retain
  storageClassName: nfs
  mountOptions:
    - noatime
    - nfsvers=4.2
  volumeMode: Filesystem
---
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
  name: forgejo-data
spec:
  accessModes:
    - ReadWriteMany
  resources:
    requests:
      storage: 20Gi
  volumeName: forgejo-data
  storageClassName: nfs
  volumeMode: Filesystem
$ kubectl apply --server-side=true -f pv.yml

Instruct the forgejo pod to use the forgejo-data pvc.

persistence:
  enabled: true
  create: false
  claimName: forgejo-data

Disaster recovery and maintenance

When a machine or disk is scheduled for replacement.

  • kubectl drain hetzner05 # evacuate all the pods out of the node to be shutdown

Routing the failover IP

When the machine to which the failover IP (failover.forgejo.org) is routed is unavailable or to be shutdown, to the Hetzner server panel, to the IPs tab and change the route of the failover IP to another node. All nodes are configured with the failover IP, there is nothing else to do.

Manual boot operations

On the machine that runs the NFS server

  • sudo drbdadm primary r1 # Switch the DRBD to primary
  • sudo mount /precious # DRBD volume shared via NFS
  • sudo ip addr add 10.53.101.100/24 dev enp5s0.4001 # add NFS server IP

On the other machines

  • sudo ip addr del 10.53.101.100/24 dev enp5s0.4001 # remove NFS server IP

Uberspace

The website https://forgejo.org is hosted at https://uberspace.de/. The https://codeberg.org/forgejo/website/ CI has credentials to push HTML pages there.