Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ci: enable nutanix self hosted e2e tests #987

Draft
wants to merge 4 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions .github/workflows/checks.yml
Original file line number Diff line number Diff line change
Expand Up @@ -98,16 +98,16 @@ jobs:
- {"provider": "Docker", "kubernetesVersion": "v1.29.9"}
- {"provider": "Docker", "kubernetesVersion": "v1.30.6"}
- {"provider": "Docker", "kubernetesVersion": "v1.31.2"}
# Uncomment below once we have the ability to run e2e tests on other providers from GHA.
supershal marked this conversation as resolved.
Show resolved Hide resolved
# - {"provider": "Nutanix", "kubernetesVersion": "v1.29.6"}
- {"provider": "Nutanix", "kubernetesVersion": "v1.30.5", "osImage": "nkp-rocky-9.4-release-1.30.5-20240930171619"}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this should be the new 9.5 image

# - {"provider": "AWS", "kubernetesVersion": "v1.29.6"}
fail-fast: false
uses: ./.github/workflows/e2e.yml
with:
focus: Self-hosted
provider: ${{ matrix.config.provider }}
kubernetes-version: ${{ matrix.config.kubernetesVersion }}
runs-on: ${{ matrix.config.provider == 'Nutanix' && 'self-hosted-ncn-dind' || 'ubuntu-22.04' }}
runs-on: 'self-hosted-ncn-dind'
os-image: ${{ matrix.config.provider == 'Nutanix' && matrix.config.osImage || '' }}
secrets: inherit
permissions:
contents: read
Expand Down
9 changes: 9 additions & 0 deletions .github/workflows/e2e.yml
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,14 @@ jobs:
restore-keys: |
${{ runner.os }}-go-

- name: Login to Internal Container Registry
if: inputs.focus == 'Self-hosted'
uses: docker/login-action@v3
with:
registry: ${{ secrets.LOCAL_IMAGE_REGISTRY }}
username: ${{ secrets.LOCAL_IMAGE_REGISTRY_USERNAME }}
password: ${{ secrets.LOCAL_IMAGE_REGISTRY_TOKEN }}

# The default disk size of Github hosted runners is ~14GB, this is not enough to run the e2e tests.
# Cleanup the disk, see upstream discussion https://github.com/actions/runner-images/issues/2840.
- name: Cleanup Disk Space
Expand Down Expand Up @@ -94,6 +102,7 @@ jobs:
NUTANIX_MACHINE_TEMPLATE_IMAGE_NAME: ${{ inputs.os-image }}
KINDEST_IMAGE_TAG: ${{ inputs.kubernetes-version }}
E2E_KUBERNETES_VERSION: ${{ inputs.kubernetes-version }}
LOCAL_IMAGE_REGISTRY: ${{ inputs.focus == 'Self-hosted' && secrets.LOCAL_IMAGE_REGISTRY || 'ko.local' }}

- if: success() || failure() # always run even if the previous step fails
name: Publish e2e test report
Expand Down
17 changes: 13 additions & 4 deletions .goreleaser.yml
Original file line number Diff line number Diff line change
Expand Up @@ -44,8 +44,9 @@ before:
$(helm template {{ .ProjectName }} ./charts/{{ .ProjectName }} \
--namespace caren-system \
--set-string image.tag=v{{ trimprefix .Version "v" }}{{ if .IsSnapshot }}-{{ .Runtime.Goarch }}{{ end }} \
--set-string helmRepository.images.bundleInitializer.tag=v{{ trimprefix .Version "v" }}{{ if .IsSnapshot }}-{{ .Runtime.Goarch }} \
--set-string image.repository=ko.local/{{ .ProjectName }}{{ end }} \
--set-string helmRepository.images.bundleInitializer.tag=v{{ trimprefix .Version "v" }}{{ if .IsSnapshot }}-{{ .Runtime.Goarch }}{{ end }} \
{{ if .IsSnapshot }}--set-string image.repository={{ .Env.LOCAL_IMAGE_REGISTRY }}/{{ .ProjectName }}{{ end }} \
{{ if .IsSnapshot }}--set-string helmRepository.images.bundleInitializer.repository={{ .Env.LOCAL_IMAGE_REGISTRY }}/cluster-api-runtime-extensions-helm-chart-bundle-initializer{{ end }} \
)
EOF'
- sed -i -e 's/\${/$${/g' -e 's/v0.0.0-dev/v{{ trimprefix .Version "v" }}/g' runtime-extensions-components.yaml
Expand Down Expand Up @@ -90,14 +91,22 @@ builds:
sh -ec 'if [ {{ .IsSnapshot }} == true ] && [ {{ .Runtime.Goarch }} == {{ .Arch }} ]; then
env SOURCE_DATE_EPOCH=$(date +%s) \
KO_DATA_DATE_EPOCH=$(date +%s) \
KO_DOCKER_REPO=ko.local/{{ .ProjectName }} \
KO_DOCKER_REPO={{ .Env.LOCAL_IMAGE_REGISTRY }}/{{ .ProjectName }} \
ko build \
--bare \
--platform linux/{{ .Arch }} \
-t v{{ trimprefix .Version "v" }}-{{ .Arch }} \
./cmd
docker buildx build \
--platform linux/{{ .Arch }} \
-t {{ .Env.LOCAL_IMAGE_REGISTRY }}/cluster-api-runtime-extensions-helm-chart-bundle-initializer:v{{ trimprefix .Version "v" }}-{{ .Arch }} \
-f ./hack/addons/helm-chart-bundler/Dockerfile \
--load \
.
if [ {{ .Env.LOCAL_IMAGE_REGISTRY }} != "ko.local" ]; then
docker push {{ .Env.LOCAL_IMAGE_REGISTRY }}/cluster-api-runtime-extensions-helm-chart-bundle-initializer:v{{ trimprefix .Version "v" }}-{{ .Arch }}
fi
fi'

archives:
- name_template: '{{ .ProjectName }}_v{{ trimprefix .Version "v" }}_{{ .Os }}_{{ .Arch }}'
builds:
Expand Down
6 changes: 6 additions & 0 deletions make/go.mk
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,12 @@ override undefine GOARCH
ALL_GO_SUBMODULES := $(shell find -mindepth 2 -maxdepth 2 -name go.mod -printf '%P\n' | sort)
GO_SUBMODULES_NO_DOCS := $(filter-out $(addsuffix /go.mod,docs),$(ALL_GO_SUBMODULES))
THIRD_PARTY_GO_SUBMODULES := $(shell find hack/third-party -mindepth 2 -name go.mod -printf 'hack/third-party/%P\n' | sort)
# self hosted tests requires the local caren images to be available to the workload clusters.
# export LOCAL_IMAGE_REGISTRY to an accessible registry when running self hosted tests.
# When e2e tests builds the project using goreleaser, the images are pushed to the registry and available for the workload clusters.
# by default, the CAREN image is stored in local container engine store as ko.local/cluster-api-runtime-extensions-nutanix:${TAG}
LOCAL_IMAGE_REGISTRY ?= ko.local
export LOCAL_IMAGE_REGISTRY

define go_test
source <(setup-envtest use -p env $(ENVTEST_VERSION)) && \
Expand Down
6 changes: 3 additions & 3 deletions test/e2e/config/caren.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,9 @@
managementClusterName: caren-e2e

images:
- name: ko.local/cluster-api-runtime-extensions-nutanix:${E2E_IMAGE_TAG}
- name: ${LOCAL_IMAGE_REGISTRY}/cluster-api-runtime-extensions-nutanix:${E2E_IMAGE_TAG}
loadBehavior: mustLoad
- name: ghcr.io/nutanix-cloud-native/cluster-api-runtime-extensions-helm-chart-bundle-initializer:${E2E_IMAGE_TAG}
- name: ${LOCAL_IMAGE_REGISTRY}/cluster-api-runtime-extensions-helm-chart-bundle-initializer:${E2E_IMAGE_TAG}
loadBehavior: mustLoad

providers:
Expand Down Expand Up @@ -211,7 +211,7 @@ variables:
# DOCKER_HUB_PASSWORD: ""

intervals:
default/wait-controllers: ["3m", "10s"]
default/wait-controllers: ["10m", "10s"]
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

the helm-repository containers creates PV to store the charts. The PV creation is slow on the nutanix workload cluster. this creates flakes in tests. Increasing timeout to ensure hem-repository comes up.

default/wait-cluster: ["10m", "10s"]
default/wait-control-plane: ["10m", "10s"]
default/wait-worker-nodes: ["10m", "10s"]
Expand Down
26 changes: 26 additions & 0 deletions test/e2e/framework/self_hosted.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@ import (
"sigs.k8s.io/cluster-api/test/framework/clusterctl"
"sigs.k8s.io/cluster-api/util"
"sigs.k8s.io/controller-runtime/pkg/client"

"github.com/nutanix-cloud-native/cluster-api-runtime-extensions-nutanix/test/e2e/framework/nutanix"
)

// SelfHostedSpecInput is the input for SelfHostedSpec.
Expand Down Expand Up @@ -180,6 +182,30 @@ func SelfHostedSpec(ctx context.Context, inputGetter func() SelfHostedSpecInput)
if input.InfrastructureProvider != nil {
infrastructureProvider = *input.InfrastructureProvider
}

// For Nutanix provider, reserve an IP address for the workload cluster control plane endpoint -
// remember to unreserve it!
if infrastructureProvider == "nutanix" {
By(
"Reserving an IP address for the workload cluster control plane endpoint",
)
nutanixClient, err := nutanix.NewV4Client(
nutanix.CredentialsFromCAPIE2EConfig(input.E2EConfig),
)
Expect(err).ToNot(HaveOccurred())
//nolint:contextcheck // ReserverIP function does not accept context. Its okay to ignore the context check in tests.
controlPlaneEndpointIP, unreserveControlPlaneEndpointIP, err := nutanix.ReserveIP(
input.E2EConfig.GetVariable("NUTANIX_SUBNET_NAME"),
input.E2EConfig.GetVariable(
"NUTANIX_PRISM_ELEMENT_CLUSTER_NAME",
),
nutanixClient,
)
Expect(err).ToNot(HaveOccurred())
DeferCleanup(unreserveControlPlaneEndpointIP)
clusterctlVariables["CONTROL_PLANE_ENDPOINT_IP"] = controlPlaneEndpointIP
}

clusterctl.ApplyClusterTemplateAndWait(ctx, clusterctl.ApplyClusterTemplateAndWaitInput{
ClusterProxy: input.BootstrapClusterProxy,
ConfigCluster: clusterctl.ConfigClusterInput{
Expand Down
7 changes: 4 additions & 3 deletions test/e2e/self_hosted_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -116,9 +116,10 @@ var _ = Describe("Self-hosted", Serial, func() {
WaitForAddonsToBeReadyInWorkloadCluster(
ctx,
WaitForAddonsToBeReadyInWorkloadClusterInput{
AddonsConfig: addonsConfig,
ClusterProxy: proxy,
WorkloadCluster: workloadCluster,
AddonsConfig: addonsConfig,
ClusterProxy: proxy,
WorkloadCluster: workloadCluster,
InfrastructureProvider: lowercaseProvider,
DeploymentIntervals: e2eConfig.GetIntervals(
flavour,
"wait-deployment",
Expand Down
Loading