From 2519e14f483a5890f325a5fe67e547d306ee354f Mon Sep 17 00:00:00 2001 From: Flegma Date: Fri, 19 Jun 2026 09:21:54 +0200 Subject: [PATCH] chore: automated prune of superseded 5stack images Adds a per-host systemd timer that reclaims disk from old 5stack container image versions left behind under /var/lib/rancher/k3s/agent when ghcr.io/5stackgg/*:latest is re-pulled to a new digest. The prune keeps whatever image currently holds a :latest tag, so the current game-server / game-streamer images are preserved even when they are not running. A plain `crictl rmi --prune` would delete those idle-but-current images (the concern raised on #503); keying off the tag instead avoids that. Only superseded, now-untagged versions are removed. - utils/5stack-image-prune.sh: tag-aware prune via crictl + jq - utils/setup_image_prune.sh: installs the script + a weekly systemd timer (schedule configurable via IMAGE_PRUNE_ON_CALENDAR), idempotent, root-guarded - wired into update.sh, the path every node setup funnels through, so existing nodes pick it up on the next update Refs #503 --- update.sh | 4 +++ utils/5stack-image-prune.sh | 68 +++++++++++++++++++++++++++++++++++++ utils/setup_image_prune.sh | 47 +++++++++++++++++++++++++ utils/utils.sh | 1 + 4 files changed, 120 insertions(+) create mode 100755 utils/5stack-image-prune.sh create mode 100755 utils/setup_image_prune.sh diff --git a/update.sh b/update.sh index 8902b98..0ac0931 100755 --- a/update.sh +++ b/update.sh @@ -3,6 +3,10 @@ SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" source "$SCRIPT_DIR/utils/utils.sh" "$@" +# Keep each host's superseded-image prune timer installed / up to date. This is +# the path every node setup funnels through, so existing nodes pick it up too. +setup_image_prune + if [ "$REVERSE_PROXY" = true ]; then kubectl --kubeconfig=$KUBECONFIG delete certificate 5stack-ssl -n 5stack 2>/dev/null fi diff --git a/utils/5stack-image-prune.sh b/utils/5stack-image-prune.sh new file mode 100755 index 0000000..2edd4d1 --- /dev/null +++ b/utils/5stack-image-prune.sh @@ -0,0 +1,68 @@ +#!/bin/bash +# +# 5stack image prune +# +# Reclaims disk from superseded 5stack container images. Every 5stack image is +# deployed as ghcr.io/5stackgg/*:latest, so when a node pulls a new build the +# :latest tag moves to the new digest and the previous version is left behind +# untagged (a "dangling" image whose overlayfs snapshot keeps filling +# /var/lib/rancher/k3s/agent). This removes those superseded versions. +# +# The image that currently holds a :latest tag is always kept - including +# game-server / game-streamer, which usually are NOT running when this fires +# but must stay ready so a match start does not wait on a re-pull. Because we +# key off the tag (not "is it running"), a plain `crictl rmi --prune` is not +# used: that would delete the idle-but-current game-server / game-streamer +# images too. +# +# Installed and scheduled by setup_image_prune (utils/setup_image_prune.sh). +# Safe to run by hand. + +set -o pipefail + +# k3s ships crictl; prefer it on PATH, fall back to `k3s crictl`. +if command -v crictl >/dev/null 2>&1; then + CRICTL=(crictl) +elif command -v k3s >/dev/null 2>&1; then + CRICTL=(k3s crictl) +else + echo "[5stack] image-prune: crictl not found, nothing to do" + exit 0 +fi + +if ! command -v jq >/dev/null 2>&1; then + echo "[5stack] image-prune: jq not found, skipping" + exit 0 +fi + +# Superseded = a 5stack image (matched by tag or digest) that no longer carries +# a :latest tag. Pinned images (e.g. the pause sandbox) are never touched. +mapfile -t STALE < <( + "${CRICTL[@]}" images -o json 2>/dev/null | jq -r ' + .images[] + | select(.pinned != true) + | select([.repoTags[]?, .repoDigests[]?] | any(contains("ghcr.io/5stackgg/"))) + | select((.repoTags // []) | any(endswith(":latest")) | not) + | .id + ' | sort -u +) + +if [ "${#STALE[@]}" -eq 0 ]; then + echo "[5stack] image-prune: no superseded 5stack images" + exit 0 +fi + +removed=0 +for id in "${STALE[@]}"; do + [ -n "$id" ] || continue + if "${CRICTL[@]}" rmi "$id" >/dev/null 2>&1; then + echo "[5stack] image-prune: removed $id" + removed=$((removed + 1)) + else + # Still referenced by a (terminating) container, or busy - leave it for the + # next run. Removing it would not stop a running container anyway. + echo "[5stack] image-prune: skipped $id (in use or busy)" + fi +done + +echo "[5stack] image-prune: removed $removed superseded image(s)" diff --git a/utils/setup_image_prune.sh b/utils/setup_image_prune.sh new file mode 100755 index 0000000..e84257b --- /dev/null +++ b/utils/setup_image_prune.sh @@ -0,0 +1,47 @@ +#!/bin/bash + +# Installs the 5stack image-prune script and its systemd timer on the host. +# Idempotent: safe to re-run on every update. The schedule honors the +# IMAGE_PRUNE_ON_CALENDAR env var (a systemd OnCalendar= value, default +# "weekly") so it can be tuned per-deployment without code changes. +setup_image_prune() { + if [ "$EUID" -ne 0 ]; then + warn "skipping image prune timer setup (needs root)" + return 0 + fi + + step "Installing 5stack image prune timer" + + local util_dir + util_dir="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" + + install -m 0755 "$util_dir/5stack-image-prune.sh" /usr/local/bin/5stack-image-prune.sh + + cat >/etc/systemd/system/5stack-image-prune.service <<'UNIT' +[Unit] +Description=5stack prune superseded container images + +[Service] +Type=oneshot +ExecStart=/usr/local/bin/5stack-image-prune.sh +NoNewPrivileges=yes +UNIT + + cat >/etc/systemd/system/5stack-image-prune.timer </dev/null 2>&1 + ok "image prune timer enabled (OnCalendar=${IMAGE_PRUNE_ON_CALENDAR:-weekly})" +} diff --git a/utils/utils.sh b/utils/utils.sh index ef682ab..82a00e2 100755 --- a/utils/utils.sh +++ b/utils/utils.sh @@ -19,6 +19,7 @@ source "$SCRIPT_DIR/checkout_repos.sh" source "$SCRIPT_DIR/check_dev_dependencies.sh" source "$SCRIPT_DIR/watch_ssl_status.sh" source "$SCRIPT_DIR/setup_kustomize.sh" +source "$SCRIPT_DIR/setup_image_prune.sh" source "$SCRIPT_DIR/tailscale-api.sh" source "$SCRIPT_DIR/interactive_select.sh"