From 776bc65b7ca09bf4e5102d0c05cfafa7ce58b389 Mon Sep 17 00:00:00 2001 From: Utku Ozdemir Date: Fri, 29 Nov 2024 21:06:15 +0100 Subject: [PATCH] test: add static infra provider (bare-metal provider) integration tests Add an integration test, which is supposed to be run by a static infra provider, to test the cluster creation/destruction/re-creation flow. Signed-off-by: Utku Ozdemir --- cmd/integration-test/pkg/tests/cluster.go | 6 +- cmd/integration-test/pkg/tests/extensions.go | 41 +++++ cmd/integration-test/pkg/tests/infra.go | 177 +++++++++++++++++++ cmd/integration-test/pkg/tests/tests.go | 97 ++++++++++ 4 files changed, 320 insertions(+), 1 deletion(-) diff --git a/cmd/integration-test/pkg/tests/cluster.go b/cmd/integration-test/pkg/tests/cluster.go index 80ad7ff6..c2f66b2b 100644 --- a/cmd/integration-test/pkg/tests/cluster.go +++ b/cmd/integration-test/pkg/tests/cluster.go @@ -57,6 +57,8 @@ type ClusterOptions struct { ProviderData string ScalingTimeout time.Duration + + SkipExtensionCheckOnCreate bool } // MachineOptions are the options for machine creation. @@ -79,7 +81,9 @@ func CreateCluster(testCtx context.Context, cli *client.Client, options ClusterO require := require.New(t) pickUnallocatedMachines(ctx, t, st, options.ControlPlanes+options.Workers, func(machineIDs []resource.ID) { - checkExtensionWithRetries(ctx, t, cli, HelloWorldServiceExtensionName, machineIDs...) + if !options.SkipExtensionCheckOnCreate { + checkExtensionWithRetries(ctx, t, cli, HelloWorldServiceExtensionName, machineIDs...) + } if options.BeforeClusterCreateFunc != nil { options.BeforeClusterCreateFunc(ctx, t, cli, machineIDs) diff --git a/cmd/integration-test/pkg/tests/extensions.go b/cmd/integration-test/pkg/tests/extensions.go index a1f94636..39c14e45 100644 --- a/cmd/integration-test/pkg/tests/extensions.go +++ b/cmd/integration-test/pkg/tests/extensions.go @@ -105,3 +105,44 @@ func checkExtension(ctx context.Context, cli *client.Client, machineID resource. return fmt.Errorf("extension %q is not found on machine %q", extension, machineStatus.Metadata().ID()) } + +// UpdateExtensions updates the extensions on all the machines of the given cluster. +func UpdateExtensions(ctx context.Context, cli *client.Client, cluster string, extensions []string) TestFunc { + return func(t *testing.T) { + clusterMachineList, err := safe.StateListAll[*omni.ClusterMachine](ctx, cli.Omni().State(), state.WithLabelQuery(resource.LabelEqual(omni.LabelCluster, cluster))) + require.NoError(t, err) + + require.Greater(t, clusterMachineList.Len(), 0) + + for clusterMachine := range clusterMachineList.All() { + var extensionsConfig *omni.ExtensionsConfiguration + + extensionsConfig, err = safe.StateGetByID[*omni.ExtensionsConfiguration](ctx, cli.Omni().State(), clusterMachine.Metadata().ID()) + if err != nil && !state.IsNotFoundError(err) { + require.NoError(t, err) + } + + updateSpec := func(res *omni.ExtensionsConfiguration) error { + res.Metadata().Labels().Set(omni.LabelCluster, cluster) + res.Metadata().Labels().Set(omni.LabelClusterMachine, clusterMachine.Metadata().ID()) + + res.TypedSpec().Value.Extensions = extensions + + return nil + } + + if extensionsConfig == nil { + extensionsConfig = omni.NewExtensionsConfiguration(resources.DefaultNamespace, clusterMachine.Metadata().ID()) + + require.NoError(t, updateSpec(extensionsConfig)) + + require.NoError(t, cli.Omni().State().Create(ctx, extensionsConfig)) + + continue + } + + _, err = safe.StateUpdateWithConflicts[*omni.ExtensionsConfiguration](ctx, cli.Omni().State(), extensionsConfig.Metadata(), updateSpec) + require.NoError(t, err) + } + } +} diff --git a/cmd/integration-test/pkg/tests/infra.go b/cmd/integration-test/pkg/tests/infra.go index 5307451c..22c79bca 100644 --- a/cmd/integration-test/pkg/tests/infra.go +++ b/cmd/integration-test/pkg/tests/infra.go @@ -19,6 +19,8 @@ import ( "github.com/siderolabs/go-retry/retry" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" + "go.uber.org/zap" + "go.uber.org/zap/zaptest" "github.com/siderolabs/omni/client/pkg/client" "github.com/siderolabs/omni/client/pkg/omni/resources" @@ -155,3 +157,178 @@ func AssertMachinesShouldBeDeprovisioned(testCtx context.Context, client *client } } } + +// AcceptInfraMachines asserts that there are a certain number of machines that are not accepted, provisioned by the static infra provider with the given ID. +// +// It then accepts them all and asserts that the states of various resources are updated as expected. +func AcceptInfraMachines(testCtx context.Context, omniState state.State, expectedCount int) TestFunc { + return func(t *testing.T) { + logger := zaptest.NewLogger(t) + + ctx, cancel := context.WithTimeout(testCtx, time.Minute*10) + defer cancel() + + rtestutils.AssertLength[*siderolink.Link](ctx, t, omniState, expectedCount) + + linkList, err := safe.StateListAll[*siderolink.Link](ctx, omniState) + require.NoError(t, err) + + // link count should match the expected count + require.Equal(t, expectedCount, linkList.Len()) + + ids := make([]resource.ID, 0, linkList.Len()) + + var infraProviderID string + + for link := range linkList.All() { + ids = append(ids, link.Metadata().ID()) + + infraProviderID, _ = link.Metadata().Annotations().Get(omni.LabelInfraProviderID) + + require.NotEmpty(t, infraProviderID) + + rtestutils.AssertResource[*infra.Machine](ctx, t, omniState, link.Metadata().ID(), func(res *infra.Machine, assertion *assert.Assertions) { + assertion.False(res.TypedSpec().Value.Accepted) + }) + + rtestutils.AssertNoResource[*infra.MachineStatus](ctx, t, omniState, link.Metadata().ID()) + + rtestutils.AssertNoResource[*omni.Machine](ctx, t, omniState, link.Metadata().ID()) + + // Accept the machine + infraMachineConfig := omni.NewInfraMachineConfig(resources.DefaultNamespace, link.Metadata().ID()) + + infraMachineConfig.TypedSpec().Value.Accepted = true + + require.NoError(t, omniState.Create(ctx, infraMachineConfig)) + } + + logger.Info("accepted machines", zap.String("infra_provider_id", infraProviderID), zap.Strings("machine_ids", ids)) + + providerStatus, err := safe.StateGetByID[*infra.ProviderStatus](ctx, omniState, infraProviderID) + require.NoError(t, err) + + _, isStaticProvider := providerStatus.Metadata().Labels().Get(omni.LabelIsStaticInfraProvider) + require.True(t, isStaticProvider) + + // Assert that the infra.Machines are now marked as accepted + rtestutils.AssertResources(ctx, t, omniState, ids, func(res *infra.Machine, assertion *assert.Assertions) { + assertion.True(res.TypedSpec().Value.Accepted) + }) + + // Assert that omni.Machine resources are now created and marked as managed by the static infra provider + rtestutils.AssertResources(ctx, t, omniState, ids, func(res *omni.Machine, assertion *assert.Assertions) { + _, isManagedByStaticInfraProvider := res.Metadata().Labels().Get(omni.LabelIsManagedByStaticInfraProvider) + + assertion.True(isManagedByStaticInfraProvider) + }) + + // Assert that omni.Machine resources are now created + rtestutils.AssertResources(ctx, t, omniState, ids, func(res *omni.Machine, assertion *assert.Assertions) { + _, isManagedByStaticInfraProvider := res.Metadata().Labels().Get(omni.LabelIsManagedByStaticInfraProvider) + + assertion.True(isManagedByStaticInfraProvider) + }) + + // Assert that infra.MachineStatus resources are now created, and they are marked as ready to use + rtestutils.AssertResources(ctx, t, omniState, ids, func(res *infra.MachineStatus, assertion *assert.Assertions) { + assertion.True(res.TypedSpec().Value.ReadyToUse) + }) + } +} + +// AssertInfraMachinesAreAllocated asserts that the machines that belong to the given cluster and managed by a static infra provider +// are marked as allocated in the related resources. +func AssertInfraMachinesAreAllocated(testCtx context.Context, omniState state.State, clusterID, talosVersion string, extensions []string) TestFunc { + return func(t *testing.T) { + ctx, cancel := context.WithTimeout(testCtx, time.Minute*10) + defer cancel() + + nodeList, err := safe.StateListAll[*omni.MachineSetNode](ctx, omniState, state.WithLabelQuery(resource.LabelEqual(omni.LabelCluster, clusterID))) + require.NoError(t, err) + + require.Greater(t, nodeList.Len(), 0) + + for machineSetNode := range nodeList.All() { + id := machineSetNode.Metadata().ID() + + // there must be an infra.Machine resource for each node + rtestutils.AssertResource[*infra.Machine](ctx, t, omniState, id, func(res *infra.Machine, assertion *assert.Assertions) { + assertion.Equal(talosVersion, res.TypedSpec().Value.ClusterTalosVersion) + assertion.Empty(res.TypedSpec().Value.WipeId) + assertion.Equal(extensions, res.TypedSpec().Value.Extensions) + }) + + // machine is allocated, so the ReadyToUse field is set to false + rtestutils.AssertResource[*infra.MachineStatus](ctx, t, omniState, id, func(res *infra.MachineStatus, assertion *assert.Assertions) { + assertion.False(res.TypedSpec().Value.ReadyToUse) + }) + + // omni receives a SequenceEvent from the SideroLink event sink and sets the Installed field to true + rtestutils.AssertResource[*infra.MachineState](ctx, t, omniState, id, func(res *infra.MachineState, assertion *assert.Assertions) { + assertion.True(res.TypedSpec().Value.Installed) + }) + } + } +} + +// AssertAllInfraMachinesAreUnallocated asserts that all infra machines are unallocated. +func AssertAllInfraMachinesAreUnallocated(testCtx context.Context, omniState state.State) TestFunc { + return func(t *testing.T) { + logger := zaptest.NewLogger(t) + + ctx, cancel := context.WithTimeout(testCtx, time.Minute*10) + defer cancel() + + infraMachineList, err := safe.StateListAll[*infra.Machine](ctx, omniState) + require.NoError(t, err) + + require.Greater(t, infraMachineList.Len(), 0) + + for infraMachine := range infraMachineList.All() { + id := infraMachine.Metadata().ID() + + rtestutils.AssertResource[*infra.Machine](ctx, t, omniState, id, func(res *infra.Machine, assertion *assert.Assertions) { + assertion.Empty(res.TypedSpec().Value.ClusterTalosVersion) + assertion.Empty(res.TypedSpec().Value.Extensions) + + if assertion.NotEmpty(res.TypedSpec().Value.WipeId) { // the machine should be marked for wipe + logger.Info("machine is marked for wipe", zap.String("machine_id", id), zap.String("wipe_id", res.TypedSpec().Value.WipeId)) + } + }) + + // machine is unallocated, so the ReadyToUse field will be set to true + rtestutils.AssertResource[*infra.MachineStatus](ctx, t, omniState, id, func(res *infra.MachineStatus, assertion *assert.Assertions) { + assertion.True(res.TypedSpec().Value.ReadyToUse) + }) + + // provider wipes the machine and sets the Installed field to false + rtestutils.AssertResource[*infra.MachineState](ctx, t, omniState, id, func(res *infra.MachineState, assertion *assert.Assertions) { + assertion.False(res.TypedSpec().Value.Installed) + }) + } + } +} + +// DestroyInfraMachines removes siderolink.Link resources for all machines managed by a static infra provider, +// and asserts that the related infra.Machine and infra.MachineStatus resources are deleted. +func DestroyInfraMachines(testCtx context.Context, omniState state.State) TestFunc { + return func(t *testing.T) { + ctx, cancel := context.WithTimeout(testCtx, time.Minute*10) + defer cancel() + + machineList, err := safe.StateListAll[*omni.Machine](ctx, omniState, state.WithLabelQuery(resource.LabelExists(omni.LabelIsManagedByStaticInfraProvider))) + require.NoError(t, err) + + require.Greater(t, machineList.Len(), 0) + + for machine := range machineList.All() { + id := machine.Metadata().ID() + + rtestutils.Destroy[*siderolink.Link](ctx, t, omniState, []string{id}) + + rtestutils.AssertNoResource[*infra.Machine](ctx, t, omniState, id) + rtestutils.AssertNoResource[*infra.MachineStatus](ctx, t, omniState, id) + } + } +} diff --git a/cmd/integration-test/pkg/tests/tests.go b/cmd/integration-test/pkg/tests/tests.go index 3cee19e7..823147e7 100644 --- a/cmd/integration-test/pkg/tests/tests.go +++ b/cmd/integration-test/pkg/tests/tests.go @@ -70,6 +70,7 @@ type Options struct { AnotherKubernetesVersion string OmnictlPath string ScalingTimeout time.Duration + StaticInfraProvider string } func (o Options) defaultInfraProvider() string { @@ -1331,6 +1332,102 @@ Test flow of cluster creation and scaling using cluster templates.`, ), Finalizer: DestroyCluster(ctx, rootClient.Omni().State(), "integration-workload-proxy"), }, + { + Name: "StaticInfraProvider", + Description: ` +Tests common Omni operations on machines created by a static infrastructure provider:, + +- expect all machines to be unaccepted and accept them +- assert that machines are ready to use +- create a 3+1 cluster - assert that cluster is healthy and ready +- assert that machines are not ready to use (occupied) +- destroy the cluster - assert that machines are wiped, then marked as ready to use +- create a new 3+1 cluster +- assert that cluster is healthy and ready +- remove links of the machines +- assert that infra resources for those machines are removed +`, + Parallel: true, + Subtests: subTests( + subTest{ + "AcceptMachines", + AcceptInfraMachines(ctx, rootClient.Omni().State(), options.ExpectedMachines), + }, + subTest{ + "ClusterShouldBeCreated", + CreateCluster(ctx, rootClient, ClusterOptions{ + Name: "integration-static-infra-provider", + ControlPlanes: 3, + Workers: 1, + + MachineOptions: options.MachineOptions, + ScalingTimeout: options.ScalingTimeout, + + SkipExtensionCheckOnCreate: true, + }), + }, + ).Append( + TestBlockClusterAndTalosAPIAndKubernetesShouldBeReady( + ctx, rootClient, + "integration-static-infra-provider", + options.MachineOptions.TalosVersion, + options.MachineOptions.KubernetesVersion, + talosAPIKeyPrepare, + )..., + ).Append( + subTest{ + "ExtensionsShouldBeUpdated", + UpdateExtensions(ctx, rootClient, "integration-static-infra-provider", []string{"siderolabs/binfmt-misc", "siderolabs/glibc"}), + }, + subTest{ + "MachinesShouldBeAllocated", + AssertInfraMachinesAreAllocated(ctx, rootClient.Omni().State(), "integration-static-infra-provider", + options.MachineOptions.TalosVersion, []string{"siderolabs/binfmt-misc", "siderolabs/glibc"}), + }, + ).Append( + subTest{ + "ClusterShouldBeDestroyed", + AssertDestroyCluster(ctx, rootClient.Omni().State(), "integration-static-infra-provider", false), + }, + ).Append( + subTest{ + "MachinesShouldBeUnallocated", + AssertAllInfraMachinesAreUnallocated(ctx, rootClient.Omni().State()), + }, + ).Append( + subTest{ + "ClusterShouldBeRecreated", + CreateCluster(ctx, rootClient, ClusterOptions{ + Name: "integration-static-infra-provider", + ControlPlanes: 3, + Workers: 1, + + MachineOptions: options.MachineOptions, + ScalingTimeout: options.ScalingTimeout, + + SkipExtensionCheckOnCreate: true, + }), + }, + ).Append( + TestBlockClusterAndTalosAPIAndKubernetesShouldBeReady( + ctx, rootClient, + "integration-static-infra-provider", + options.MachineOptions.TalosVersion, + options.MachineOptions.KubernetesVersion, + talosAPIKeyPrepare, + )..., + ).Append( + subTest{ + "ClusterShouldBeDestroyed", + AssertDestroyCluster(ctx, rootClient.Omni().State(), "integration-static-infra-provider", false), + }, + subTest{ + "InfraMachinesShouldBeDestroyed", + DestroyInfraMachines(ctx, rootClient.Omni().State()), + }, + ), + Finalizer: DestroyCluster(ctx, rootClient.Omni().State(), "integration-static-infra-provider"), + }, } var re *regexp.Regexp