Skip to content

Commit

Permalink
Merge pull request #2 from ACRC/upstream/0.2
Browse files Browse the repository at this point in the history
Update to upstream version 0.2
  • Loading branch information
sd109 authored Dec 14, 2023
2 parents 3572c06 + 6d9aee6 commit 836f816
Show file tree
Hide file tree
Showing 18 changed files with 507 additions and 9 deletions.
6 changes: 5 additions & 1 deletion .github/actions/setup/action.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,10 @@ inputs:
description: The ref to use for the Azimuth configuration.
required: true
default: main
config-environment:
description: The config environment to use.
required: true
default: ci
azimuth-ops-version:
description: >
The azimuth-ops version to use. If not given, the default version is used.
Expand Down Expand Up @@ -55,7 +59,7 @@ runs:
CI_ENV: |
export OS_CLOUD="${{ inputs.os-cloud-name }}"
export OS_CLIENT_CONFIG_FILE="$PWD/clouds.yaml"
export AZIMUTH_CONFIG_ENVIRONMENT=ci
export AZIMUTH_CONFIG_ENVIRONMENT=${{ inputs.config-environment }}
export AZIMUTH_ENVIRONMENT="${{ inputs.environment-prefix }}-${{ github.run_id }}"
export ANSIBLE_FORCE_COLOR=true
Expand Down
20 changes: 20 additions & 0 deletions .github/actions/test/action.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,10 @@ inputs:
description: The name to use for the test report artifact.
required: true
default: test-report
debug-bundle-artifact-name:
description: The name to use for the debug bundle artifact.
required: true
default: debug-bundle
runs:
using: composite
steps:
Expand Down Expand Up @@ -49,3 +53,19 @@ runs:
log.html
report.html
if: ${{ always() }}

- name: Create debug bundle
shell: bash
run: |
set -e
source ./ci.env
source ./bin/activate "$AZIMUTH_CONFIG_ENVIRONMENT" "$AZIMUTH_ENVIRONMENT"
./bin/create-debug-bundle
if: ${{ always() }}

- name: Upload debug bundle
uses: actions/upload-artifact@v3
with:
name: ${{ inputs.debug-bundle-artifact-name }}
path: debug-bundle.tar.gz
if: ${{ always() }}
64 changes: 64 additions & 0 deletions .github/workflows/test-pr.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
name: Test Azimuth deployment

on:
pull_request:
types:
- opened
- synchronize
- ready_for_review
- reopened
branches:
- main
paths-ignore:
- 'docs/**'

concurrency:
group: ${{ github.head_ref }}
cancel-in-progress: true

jobs:
# This job exists so that PRs from outside the main repo are rejected
fail_on_remote:
runs-on: ubuntu-latest
steps:
- name: PR must be from a branch in the azimuth-config repo
run: exit ${{ github.repository == 'stackhpc/azimuth-config' && '0' || '1' }}

# We want jobs to wait in a queue for a slot to run, so as not to overload the test infra
# GitHub concurrency _almost_ does this, except the queue length is one :-(
# There is a feature request for what we need https://github.com/orgs/community/discussions/12835
# Until that is implemented, the only other viable option is a busy wait
wait_in_queue:
needs: [fail_on_remote]
runs-on: ubuntu-latest
steps:
- name: Wait for an available slot
uses: stackhpc/github-actions/workflow-concurrency@master
with:
max-concurrency: 1

run_azimuth_tests:
needs: [wait_in_queue]
if: ${{ !github.event.pull_request.draft }}
runs-on: ubuntu-latest
steps:
# We need to check out the code under test first in order to use local actions
- name: Checkout code under test
uses: actions/checkout@v3

- name: Set up Azimuth environment
uses: ./.github/actions/setup
with:
os-clouds: ${{ secrets.CLOUD }}
repository: ${{ github.repository }}
ref: ${{ github.ref }}

- name: Provision Azimuth
uses: ./.github/actions/provision

- name: Run Azimuth tests
uses: ./.github/actions/test

- name: Destroy Azimuth
uses: ./.github/actions/destroy
if: ${{ always() }}
46 changes: 46 additions & 0 deletions .github/workflows/test-tag.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
name: Test Azimuth deployment

on:
push:
tags:
- "*"

jobs:
# We want jobs to wait in a queue for a slot to run, so as not to overload the test infra
# GitHub concurrency _almost_ does this, except the queue length is one :-(
# There is a feature request for what we need https://github.com/orgs/community/discussions/12835
# Until that is implemented, the only other viable option is a busy wait
wait_in_queue:
runs-on: ubuntu-latest
steps:
- name: Wait for an available slot
uses: stackhpc/github-actions/workflow-concurrency@master
with:
max-concurrency: 1

# For tags, we run a full HA test (for now)
run_azimuth_tests:
needs: [wait_in_queue]
runs-on: ubuntu-latest
steps:
# We need to check out the code under test first in order to use local actions
- name: Checkout code under test
uses: actions/checkout@v3

- name: Set up Azimuth environment
uses: ./.github/actions/setup
with:
os-clouds: ${{ secrets.CLOUD }}
repository: ${{ github.repository }}
ref: ${{ github.ref }}
config-environment: ci-ha

- name: Provision Azimuth
uses: ./.github/actions/provision

- name: Run Azimuth tests
uses: ./.github/actions/test

- name: Destroy Azimuth
uses: ./.github/actions/destroy
if: ${{ always() }}
65 changes: 65 additions & 0 deletions .github/workflows/update-dependencies.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
# This workflow proposes updates to the dependencies that dependabot cannot
name: Update dependencies

on:
# Allow manual executions
workflow_dispatch:
# Run nightly
schedule:
- cron: '0 0 * * *'

jobs:
propose_github_release_updates:
runs-on: ubuntu-latest
strategy:
matrix:
include:
- key: azimuth-ops
path: ./requirements.yml
repository: stackhpc/ansible-collection-azimuth-ops
prereleases: "yes"
version_jsonpath: collections[0].version

name: ${{ matrix.key }}
steps:
- name: Checkout
uses: actions/checkout@v4

- name: Check for most recent GitHub release
id: next
uses: stackhpc/github-actions/github-latest-release@master
with:
repository: ${{ matrix.repository }}
prereleases: ${{ matrix.prereleases || 'no' }}

- name: Update dependency key
uses: stackhpc/github-actions/config-update@master
with:
path: ${{ matrix.path }}
updates: |
${{ matrix.version_jsonpath }}=${{ steps.next.outputs.version }}
- name: Generate app token for PR
uses: stackhpc/github-actions/generate-app-token@master
id: generate-app-token
with:
repository: ${{ github.repository }}
app-id: ${{ secrets.APP_ID }}
app-private-key: ${{ secrets.APP_PRIVATE_KEY }}

- name: Propose changes via PR if required
uses: peter-evans/create-pull-request@v5
with:
token: ${{ steps.generate-app-token.outputs.token }}
commit-message: >-
Update ${{ matrix.key }} to ${{ steps.next.outputs.version }}
branch: update-dependency/${{ matrix.key }}
delete-branch: true
title: >-
Update ${{ matrix.key }} to ${{ steps.next.outputs.version }}
body: >
This PR was created automatically to update
${{ matrix.key }} to ${{ steps.next.outputs.version }}.
labels: |
automation
dependency-update
13 changes: 13 additions & 0 deletions .readthedocs.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
version: 2

build:
os: ubuntu-22.04
tools:
python: "3"

mkdocs:
configuration: mkdocs.yml

python:
install:
- requirements: requirements-docs.txt
34 changes: 29 additions & 5 deletions bin/check-alerts
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,34 @@
# to check that the expected number of alerts are firing after deployment
#####

set -eo pipefail


if [ -z "$AZIMUTH_CONFIG_ROOT" ] || [ -z "$AZIMUTH_CONFIG_ENVIRONMENT_ROOT" ]; then
echo "Please activate an environment" >&2
exit 1
fi


ansible_variable() {
ANSIBLE_LOAD_CALLBACK_PLUGINS=true \
ANSIBLE_STDOUT_CALLBACK=json \
ansible -m debug -a "var=$1" all | \
jq -r ".plays[0].tasks[0].hosts.localhost.$1"
}


# Get the install_mode that is in use
install_mode="$(ansible_variable install_mode)"

# For HA installs, there should only be the watchdog
# Single-node deployments have other expected alerts
if [ "$install_mode" = "ha" ]; then
EXPECTED_ALERT_COUNT=1
else
EXPECTED_ALERT_COUNT=4
fi

echo "Starting port-forward for Prometheus API"
./bin/port-forward prometheus 9090 > /dev/null 2>&1 &
PID="$!"
Expand Down Expand Up @@ -43,11 +71,7 @@ while true; do
echo "$ALERTS" | jq -r '.data.alerts[] | " " + .labels.alertname + " is " + .state'
echo ""

# Demo deployment should have 4 pending alerts:
# watchdog + Kube{ControllerManager,Scheduler,Proxy}Down
# since k3s doesn't have distinct pods for these services
# TODO(mkjpryor) Fix these alerts in K3S deployment :D
if [[ $ALERT_COUNT == 4 ]]; then
if [[ ${ALERT_COUNT} -eq ${EXPECTED_ALERT_COUNT} ]]; then
exit
elif [[ ${RETRIES} -eq ${RETRY_LIMIT} ]]; then
exit 1
Expand Down
102 changes: 102 additions & 0 deletions bin/create-debug-bundle
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
#!/usr/bin/env bash

#####
# This script produces an archive containing useful information for debugging
#####

set -eo pipefail


if [ -z "$AZIMUTH_CONFIG_ROOT" ] || [ -z "$AZIMUTH_CONFIG_ENVIRONMENT_ROOT" ]; then
echo "Please activate an environment" >&2
exit 1
fi


ansible_variable() {
ANSIBLE_LOAD_CALLBACK_PLUGINS=true \
ANSIBLE_STDOUT_CALLBACK=json \
ansible -m debug -a "var=$1" all | \
jq -r ".plays[0].tasks[0].hosts.localhost.$1"
}


bundle_name="${1:-debug-bundle}"


# Get the install_mode that is in use
install_mode="$(ansible_variable install_mode)"

# For HA installations, we want to collect some additional information from the seed before
# collecting the regular information from the HA cluster
if [ "$install_mode" = "ha" ]; then
cluster_name="$(ansible_variable capi_cluster_release_name)"
additional_commands="$(
cat <<EOF
kubectl get pods --all-namespaces -o wide > debug-bundle/seed-list-pods.txt 2>&1
kubectl get pvc --all-namespaces -o wide > debug-bundle/seed-list-pvcs.txt 2>&1
kubectl get cluster-api -o wide > debug-bundle/seed-list-cluster-api.txt 2>&1
kubectl -n capi-addon-system logs --since=1h deploy/cluster-api-addon-provider > debug-bundle/seed-logs-cluster-api-addon-provider.txt 2>&1
kubectl -n capi-janitor-system logs --since=1h deploy/cluster-api-janitor-openstack > debug-bundle/seed-logs-cluster-api-janitor-openstack.txt 2>&1
kubectl -n capi-kubeadm-bootstrap-system logs --since=1h deploy/capi-kubeadm-bootstrap-controller-manager > debug-bundle/seed-logs-capi-kubeadm-bootstrap-controller-manager.txt 2>&1
kubectl -n capi-kubeadm-control-plane-system logs --since=1h deploy/capi-kubeadm-control-plane-controller-manager > debug-bundle/seed-logs-capi-kubeadm-control-plane-controller-manager.txt 2>&1
kubectl -n capi-system logs --since=1h deploy/capi-controller-manager > debug-bundle/seed-logs-capi-controller-manager.txt 2>&1
kubectl -n capo-system logs --since=1h deploy/capo-controller-manager > debug-bundle/seed-logs-capo-controller-manager.txt 2>&1
export KUBECONFIG=./kubeconfig-${cluster_name}.yaml
EOF
)"
fi


# We produce an archive on the seed, then pull it down
# For some reason, cating the archive at the end of the main command corrupts the file
# but using a separate command doesn't... :shrugs:

# Things that we include:
#  * List of pods
#  * List of PVCs
#  * List of installed CaaS cluster types and clusters
#  * List of installed Kubernetes templates and clusters
#  * List of installed app templates
# * List of installed Cluster API resources
#  * The last hour of logs from some key components
echo "[INFO] Collecting debug information"
"$AZIMUTH_CONFIG_ROOT/bin/seed-ssh" <<EOF
set -e
rm -rf debug-bundle
mkdir debug-bundle
${additional_commands}
kubectl get pods --all-namespaces -o wide > debug-bundle/list-pods.txt 2>&1
kubectl get pvc --all-namespaces -o wide > debug-bundle/list-pvcs.txt 2>&1
kubectl get clustertypes.caas -o wide > debug-bundle/list-caas-clustertypes.txt 2>&1
kubectl get clusters.caas --all-namespaces -o wide > debug-bundle/list-caas-clusters.txt 2>&1
kubectl get clustertemplates -o wide > debug-bundle/list-kube-templates.txt 2>&1
kubectl get clusters --all-namespaces -o wide > debug-bundle/list-kube-clusters.txt 2>&1
kubectl get apptemplates --all-namespaces -o wide > debug-bundle/list-kube-apptemplates.txt 2>&1
kubectl get cluster-api --all-namespaces -o wide > debug-bundle/list-cluster-api.txt 2>&1
kubectl -n azimuth logs --since=1h deploy/azimuth-api > debug-bundle/logs-azimuth-api.txt 2>&1
kubectl -n azimuth logs --since=1h deploy/azimuth-caas-operator > debug-bundle/logs-azimuth-caas-operator.txt 2>&1
kubectl -n azimuth logs --since=1h deploy/azimuth-capi-operator > debug-bundle/logs-azimuth-capi-operator.txt 2>&1
kubectl -n azimuth logs --since=1h deploy/azimuth-identity-operator > debug-bundle/logs-azimuth-identity-operator.txt 2>&1
kubectl -n azimuth logs --since=1h deploy/azimuth-ui > debug-bundle/logs-azimuth-ui.txt 2>&1
kubectl -n azimuth logs --since=1h deploy/zenith-server-registrar > debug-bundle/logs-zenith-server-registrar.txt 2>&1
kubectl -n azimuth logs --since=1h deploy/zenith-server-sshd > debug-bundle/logs-zenith-server-sshd.txt 2>&1
kubectl -n azimuth logs --since=1h deploy/zenith-server-sync > debug-bundle/logs-zenith-server-sync.txt 2>&1
kubectl -n capi-addon-system logs --since=1h deploy/cluster-api-addon-provider > debug-bundle/logs-cluster-api-addon-provider.txt 2>&1
kubectl -n capi-janitor-system logs --since=1h deploy/cluster-api-janitor-openstack > debug-bundle/logs-cluster-api-janitor-openstack.txt 2>&1
kubectl -n capi-kubeadm-bootstrap-system logs --since=1h deploy/capi-kubeadm-bootstrap-controller-manager > debug-bundle/logs-capi-kubeadm-bootstrap-controller-manager.txt 2>&1
kubectl -n capi-kubeadm-control-plane-system logs --since=1h deploy/capi-kubeadm-control-plane-controller-manager > debug-bundle/logs-capi-kubeadm-control-plane-controller-manager.txt 2>&1
kubectl -n capi-system logs --since=1h deploy/capi-controller-manager > debug-bundle/logs-capi-controller-manager.txt 2>&1
kubectl -n capo-system logs --since=1h deploy/capo-controller-manager > debug-bundle/logs-capo-controller-manager.txt 2>&1
tar -czf debug-bundle.tar.gz -C debug-bundle \$(ls -A debug-bundle)
EOF
echo "[INFO] Fetching debug bundle"
"$AZIMUTH_CONFIG_ROOT/bin/seed-ssh" -- cat debug-bundle.tar.gz > "$bundle_name.tar.gz"
echo "[INFO] Cleaning up interim files"
"$AZIMUTH_CONFIG_ROOT/bin/seed-ssh" -- rm -rf debug-bundle debug-bundle.tar.gz
Loading

0 comments on commit 836f816

Please sign in to comment.