Skip to content

Commit

Permalink
test: add static infra provider (bare-metal provider) integration tests
Browse files Browse the repository at this point in the history
Add an integration test, which is supposed to be run by a static infra provider, to test the cluster creation/destruction/re-creation flow.

Signed-off-by: Utku Ozdemir <[email protected]>
  • Loading branch information
utkuozdemir committed Nov 29, 2024
1 parent d879c6e commit 776bc65
Show file tree
Hide file tree
Showing 4 changed files with 320 additions and 1 deletion.
6 changes: 5 additions & 1 deletion cmd/integration-test/pkg/tests/cluster.go
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,8 @@ type ClusterOptions struct {
ProviderData string

ScalingTimeout time.Duration

SkipExtensionCheckOnCreate bool
}

// MachineOptions are the options for machine creation.
Expand All @@ -79,7 +81,9 @@ func CreateCluster(testCtx context.Context, cli *client.Client, options ClusterO
require := require.New(t)

pickUnallocatedMachines(ctx, t, st, options.ControlPlanes+options.Workers, func(machineIDs []resource.ID) {
checkExtensionWithRetries(ctx, t, cli, HelloWorldServiceExtensionName, machineIDs...)
if !options.SkipExtensionCheckOnCreate {
checkExtensionWithRetries(ctx, t, cli, HelloWorldServiceExtensionName, machineIDs...)
}

if options.BeforeClusterCreateFunc != nil {
options.BeforeClusterCreateFunc(ctx, t, cli, machineIDs)
Expand Down
41 changes: 41 additions & 0 deletions cmd/integration-test/pkg/tests/extensions.go
Original file line number Diff line number Diff line change
Expand Up @@ -105,3 +105,44 @@ func checkExtension(ctx context.Context, cli *client.Client, machineID resource.

return fmt.Errorf("extension %q is not found on machine %q", extension, machineStatus.Metadata().ID())
}

// UpdateExtensions updates the extensions on all the machines of the given cluster.
func UpdateExtensions(ctx context.Context, cli *client.Client, cluster string, extensions []string) TestFunc {
return func(t *testing.T) {
clusterMachineList, err := safe.StateListAll[*omni.ClusterMachine](ctx, cli.Omni().State(), state.WithLabelQuery(resource.LabelEqual(omni.LabelCluster, cluster)))
require.NoError(t, err)

require.Greater(t, clusterMachineList.Len(), 0)

for clusterMachine := range clusterMachineList.All() {
var extensionsConfig *omni.ExtensionsConfiguration

extensionsConfig, err = safe.StateGetByID[*omni.ExtensionsConfiguration](ctx, cli.Omni().State(), clusterMachine.Metadata().ID())
if err != nil && !state.IsNotFoundError(err) {
require.NoError(t, err)
}

updateSpec := func(res *omni.ExtensionsConfiguration) error {
res.Metadata().Labels().Set(omni.LabelCluster, cluster)
res.Metadata().Labels().Set(omni.LabelClusterMachine, clusterMachine.Metadata().ID())

res.TypedSpec().Value.Extensions = extensions

return nil
}

if extensionsConfig == nil {
extensionsConfig = omni.NewExtensionsConfiguration(resources.DefaultNamespace, clusterMachine.Metadata().ID())

require.NoError(t, updateSpec(extensionsConfig))

require.NoError(t, cli.Omni().State().Create(ctx, extensionsConfig))

continue
}

_, err = safe.StateUpdateWithConflicts[*omni.ExtensionsConfiguration](ctx, cli.Omni().State(), extensionsConfig.Metadata(), updateSpec)
require.NoError(t, err)
}
}
}
177 changes: 177 additions & 0 deletions cmd/integration-test/pkg/tests/infra.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@ import (
"github.com/siderolabs/go-retry/retry"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"go.uber.org/zap"
"go.uber.org/zap/zaptest"

"github.com/siderolabs/omni/client/pkg/client"
"github.com/siderolabs/omni/client/pkg/omni/resources"
Expand Down Expand Up @@ -155,3 +157,178 @@ func AssertMachinesShouldBeDeprovisioned(testCtx context.Context, client *client
}
}
}

// AcceptInfraMachines asserts that there are a certain number of machines that are not accepted, provisioned by the static infra provider with the given ID.
//
// It then accepts them all and asserts that the states of various resources are updated as expected.
func AcceptInfraMachines(testCtx context.Context, omniState state.State, expectedCount int) TestFunc {
return func(t *testing.T) {
logger := zaptest.NewLogger(t)

ctx, cancel := context.WithTimeout(testCtx, time.Minute*10)
defer cancel()

rtestutils.AssertLength[*siderolink.Link](ctx, t, omniState, expectedCount)

linkList, err := safe.StateListAll[*siderolink.Link](ctx, omniState)
require.NoError(t, err)

// link count should match the expected count
require.Equal(t, expectedCount, linkList.Len())

ids := make([]resource.ID, 0, linkList.Len())

var infraProviderID string

for link := range linkList.All() {
ids = append(ids, link.Metadata().ID())

infraProviderID, _ = link.Metadata().Annotations().Get(omni.LabelInfraProviderID)

require.NotEmpty(t, infraProviderID)

rtestutils.AssertResource[*infra.Machine](ctx, t, omniState, link.Metadata().ID(), func(res *infra.Machine, assertion *assert.Assertions) {
assertion.False(res.TypedSpec().Value.Accepted)
})

rtestutils.AssertNoResource[*infra.MachineStatus](ctx, t, omniState, link.Metadata().ID())

rtestutils.AssertNoResource[*omni.Machine](ctx, t, omniState, link.Metadata().ID())

// Accept the machine
infraMachineConfig := omni.NewInfraMachineConfig(resources.DefaultNamespace, link.Metadata().ID())

infraMachineConfig.TypedSpec().Value.Accepted = true

require.NoError(t, omniState.Create(ctx, infraMachineConfig))
}

logger.Info("accepted machines", zap.String("infra_provider_id", infraProviderID), zap.Strings("machine_ids", ids))

providerStatus, err := safe.StateGetByID[*infra.ProviderStatus](ctx, omniState, infraProviderID)
require.NoError(t, err)

_, isStaticProvider := providerStatus.Metadata().Labels().Get(omni.LabelIsStaticInfraProvider)
require.True(t, isStaticProvider)

// Assert that the infra.Machines are now marked as accepted
rtestutils.AssertResources(ctx, t, omniState, ids, func(res *infra.Machine, assertion *assert.Assertions) {
assertion.True(res.TypedSpec().Value.Accepted)
})

// Assert that omni.Machine resources are now created and marked as managed by the static infra provider
rtestutils.AssertResources(ctx, t, omniState, ids, func(res *omni.Machine, assertion *assert.Assertions) {
_, isManagedByStaticInfraProvider := res.Metadata().Labels().Get(omni.LabelIsManagedByStaticInfraProvider)

assertion.True(isManagedByStaticInfraProvider)
})

// Assert that omni.Machine resources are now created
rtestutils.AssertResources(ctx, t, omniState, ids, func(res *omni.Machine, assertion *assert.Assertions) {
_, isManagedByStaticInfraProvider := res.Metadata().Labels().Get(omni.LabelIsManagedByStaticInfraProvider)

assertion.True(isManagedByStaticInfraProvider)
})

// Assert that infra.MachineStatus resources are now created, and they are marked as ready to use
rtestutils.AssertResources(ctx, t, omniState, ids, func(res *infra.MachineStatus, assertion *assert.Assertions) {
assertion.True(res.TypedSpec().Value.ReadyToUse)
})
}
}

// AssertInfraMachinesAreAllocated asserts that the machines that belong to the given cluster and managed by a static infra provider
// are marked as allocated in the related resources.
func AssertInfraMachinesAreAllocated(testCtx context.Context, omniState state.State, clusterID, talosVersion string, extensions []string) TestFunc {
return func(t *testing.T) {
ctx, cancel := context.WithTimeout(testCtx, time.Minute*10)
defer cancel()

nodeList, err := safe.StateListAll[*omni.MachineSetNode](ctx, omniState, state.WithLabelQuery(resource.LabelEqual(omni.LabelCluster, clusterID)))
require.NoError(t, err)

require.Greater(t, nodeList.Len(), 0)

for machineSetNode := range nodeList.All() {
id := machineSetNode.Metadata().ID()

// there must be an infra.Machine resource for each node
rtestutils.AssertResource[*infra.Machine](ctx, t, omniState, id, func(res *infra.Machine, assertion *assert.Assertions) {
assertion.Equal(talosVersion, res.TypedSpec().Value.ClusterTalosVersion)
assertion.Empty(res.TypedSpec().Value.WipeId)
assertion.Equal(extensions, res.TypedSpec().Value.Extensions)
})

// machine is allocated, so the ReadyToUse field is set to false
rtestutils.AssertResource[*infra.MachineStatus](ctx, t, omniState, id, func(res *infra.MachineStatus, assertion *assert.Assertions) {
assertion.False(res.TypedSpec().Value.ReadyToUse)
})

// omni receives a SequenceEvent from the SideroLink event sink and sets the Installed field to true
rtestutils.AssertResource[*infra.MachineState](ctx, t, omniState, id, func(res *infra.MachineState, assertion *assert.Assertions) {
assertion.True(res.TypedSpec().Value.Installed)
})
}
}
}

// AssertAllInfraMachinesAreUnallocated asserts that all infra machines are unallocated.
func AssertAllInfraMachinesAreUnallocated(testCtx context.Context, omniState state.State) TestFunc {
return func(t *testing.T) {
logger := zaptest.NewLogger(t)

ctx, cancel := context.WithTimeout(testCtx, time.Minute*10)
defer cancel()

infraMachineList, err := safe.StateListAll[*infra.Machine](ctx, omniState)
require.NoError(t, err)

require.Greater(t, infraMachineList.Len(), 0)

for infraMachine := range infraMachineList.All() {
id := infraMachine.Metadata().ID()

rtestutils.AssertResource[*infra.Machine](ctx, t, omniState, id, func(res *infra.Machine, assertion *assert.Assertions) {
assertion.Empty(res.TypedSpec().Value.ClusterTalosVersion)
assertion.Empty(res.TypedSpec().Value.Extensions)

if assertion.NotEmpty(res.TypedSpec().Value.WipeId) { // the machine should be marked for wipe
logger.Info("machine is marked for wipe", zap.String("machine_id", id), zap.String("wipe_id", res.TypedSpec().Value.WipeId))
}
})

// machine is unallocated, so the ReadyToUse field will be set to true
rtestutils.AssertResource[*infra.MachineStatus](ctx, t, omniState, id, func(res *infra.MachineStatus, assertion *assert.Assertions) {
assertion.True(res.TypedSpec().Value.ReadyToUse)
})

// provider wipes the machine and sets the Installed field to false
rtestutils.AssertResource[*infra.MachineState](ctx, t, omniState, id, func(res *infra.MachineState, assertion *assert.Assertions) {
assertion.False(res.TypedSpec().Value.Installed)
})
}
}
}

// DestroyInfraMachines removes siderolink.Link resources for all machines managed by a static infra provider,
// and asserts that the related infra.Machine and infra.MachineStatus resources are deleted.
func DestroyInfraMachines(testCtx context.Context, omniState state.State) TestFunc {
return func(t *testing.T) {
ctx, cancel := context.WithTimeout(testCtx, time.Minute*10)
defer cancel()

machineList, err := safe.StateListAll[*omni.Machine](ctx, omniState, state.WithLabelQuery(resource.LabelExists(omni.LabelIsManagedByStaticInfraProvider)))
require.NoError(t, err)

require.Greater(t, machineList.Len(), 0)

for machine := range machineList.All() {
id := machine.Metadata().ID()

rtestutils.Destroy[*siderolink.Link](ctx, t, omniState, []string{id})

rtestutils.AssertNoResource[*infra.Machine](ctx, t, omniState, id)
rtestutils.AssertNoResource[*infra.MachineStatus](ctx, t, omniState, id)
}
}
}
97 changes: 97 additions & 0 deletions cmd/integration-test/pkg/tests/tests.go
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@ type Options struct {
AnotherKubernetesVersion string
OmnictlPath string
ScalingTimeout time.Duration
StaticInfraProvider string
}

func (o Options) defaultInfraProvider() string {
Expand Down Expand Up @@ -1331,6 +1332,102 @@ Test flow of cluster creation and scaling using cluster templates.`,
),
Finalizer: DestroyCluster(ctx, rootClient.Omni().State(), "integration-workload-proxy"),
},
{
Name: "StaticInfraProvider",
Description: `
Tests common Omni operations on machines created by a static infrastructure provider:,
- expect all machines to be unaccepted and accept them
- assert that machines are ready to use
- create a 3+1 cluster - assert that cluster is healthy and ready
- assert that machines are not ready to use (occupied)
- destroy the cluster - assert that machines are wiped, then marked as ready to use
- create a new 3+1 cluster
- assert that cluster is healthy and ready
- remove links of the machines
- assert that infra resources for those machines are removed
`,
Parallel: true,
Subtests: subTests(
subTest{
"AcceptMachines",
AcceptInfraMachines(ctx, rootClient.Omni().State(), options.ExpectedMachines),
},
subTest{
"ClusterShouldBeCreated",
CreateCluster(ctx, rootClient, ClusterOptions{
Name: "integration-static-infra-provider",
ControlPlanes: 3,
Workers: 1,

MachineOptions: options.MachineOptions,
ScalingTimeout: options.ScalingTimeout,

SkipExtensionCheckOnCreate: true,
}),
},
).Append(
TestBlockClusterAndTalosAPIAndKubernetesShouldBeReady(
ctx, rootClient,
"integration-static-infra-provider",
options.MachineOptions.TalosVersion,
options.MachineOptions.KubernetesVersion,
talosAPIKeyPrepare,
)...,
).Append(
subTest{
"ExtensionsShouldBeUpdated",
UpdateExtensions(ctx, rootClient, "integration-static-infra-provider", []string{"siderolabs/binfmt-misc", "siderolabs/glibc"}),
},
subTest{
"MachinesShouldBeAllocated",
AssertInfraMachinesAreAllocated(ctx, rootClient.Omni().State(), "integration-static-infra-provider",
options.MachineOptions.TalosVersion, []string{"siderolabs/binfmt-misc", "siderolabs/glibc"}),
},
).Append(
subTest{
"ClusterShouldBeDestroyed",
AssertDestroyCluster(ctx, rootClient.Omni().State(), "integration-static-infra-provider", false),
},
).Append(
subTest{
"MachinesShouldBeUnallocated",
AssertAllInfraMachinesAreUnallocated(ctx, rootClient.Omni().State()),
},
).Append(
subTest{
"ClusterShouldBeRecreated",
CreateCluster(ctx, rootClient, ClusterOptions{
Name: "integration-static-infra-provider",
ControlPlanes: 3,
Workers: 1,

MachineOptions: options.MachineOptions,
ScalingTimeout: options.ScalingTimeout,

SkipExtensionCheckOnCreate: true,
}),
},
).Append(
TestBlockClusterAndTalosAPIAndKubernetesShouldBeReady(
ctx, rootClient,
"integration-static-infra-provider",
options.MachineOptions.TalosVersion,
options.MachineOptions.KubernetesVersion,
talosAPIKeyPrepare,
)...,
).Append(
subTest{
"ClusterShouldBeDestroyed",
AssertDestroyCluster(ctx, rootClient.Omni().State(), "integration-static-infra-provider", false),
},
subTest{
"InfraMachinesShouldBeDestroyed",
DestroyInfraMachines(ctx, rootClient.Omni().State()),
},
),
Finalizer: DestroyCluster(ctx, rootClient.Omni().State(), "integration-static-infra-provider"),
},
}

var re *regexp.Regexp
Expand Down

0 comments on commit 776bc65

Please sign in to comment.