From 296f6003e992a47a114b71bece0db3c6e73b9156 Mon Sep 17 00:00:00 2001 From: Alex Ott Date: Thu, 9 Jan 2025 12:39:47 +0100 Subject: [PATCH] [Exporter] Use Go SDK to list clusters This helps us with the following: - use paginated output that is more efficient - automatically filter non-interactive clusters instead of doing it ourselves P.S. We still have usage of 2.0 API coming from mounts and some other places --- exporter/exporter_test.go | 17 ++-- exporter/impl_compute.go | 91 +++++++++++++++++++ exporter/importables.go | 37 +------- exporter/importables_test.go | 4 +- .../test-data/clusters-list-response.json | 5 - exporter/util_compute.go | 42 --------- 6 files changed, 103 insertions(+), 93 deletions(-) create mode 100644 exporter/impl_compute.go diff --git a/exporter/exporter_test.go b/exporter/exporter_test.go index a3bc50b27d..7fd255b258 100644 --- a/exporter/exporter_test.go +++ b/exporter/exporter_test.go @@ -192,7 +192,7 @@ func TestImportingMounts(t *testing.T) { { Method: "POST", ReuseRequest: true, - Resource: "/api/2.0/clusters/events", + Resource: "/api/2.1/clusters/events", Response: clusters.EventsResponse{ Events: []clusters.ClusterEvent{}, }, @@ -674,7 +674,7 @@ func TestImportingUsersGroupsSecretScopes(t *testing.T) { }, { Method: "GET", - Resource: "/api/2.0/clusters/list", + Resource: "/api/2.1/clusters/list?filter_by.cluster_sources=UI&filter_by.cluster_sources=API&page_size=100", Response: clusters.ClusterList{}, }, { @@ -808,7 +808,7 @@ func TestImportingNoResourcesError(t *testing.T) { }, { Method: "GET", - Resource: "/api/2.0/clusters/list", + Resource: "/api/2.1/clusters/list?filter_by.cluster_sources=UI&filter_by.cluster_sources=API&page_size=100", Response: clusters.ClusterList{}, }, { @@ -862,7 +862,7 @@ func TestImportingClusters(t *testing.T) { }, { Method: "GET", - Resource: "/api/2.0/clusters/list", + Resource: "/api/2.1/clusters/list?filter_by.cluster_sources=UI&filter_by.cluster_sources=API&page_size=100", Response: getJSONObject("test-data/clusters-list-response.json"), ReuseRequest: true, }, @@ -934,9 +934,10 @@ func TestImportingClusters(t *testing.T) { Response: getJSONObject("test-data/get-cluster-awscluster-response.json"), }, { - Method: "GET", - Resource: "/api/2.0/libraries/cluster-status?cluster_id=awscluster", - Response: getJSONObject("test-data/libraries-cluster-status-test2.json"), + Method: "GET", + Resource: "/api/2.0/libraries/cluster-status?cluster_id=awscluster", + Response: getJSONObject("test-data/libraries-cluster-status-test2.json"), + ReuseRequest: true, }, { Method: "GET", @@ -1588,7 +1589,7 @@ func TestImportingSecrets(t *testing.T) { }, { Method: "GET", - Resource: "/api/2.0/clusters/list", + Resource: "/api/2.1/clusters/list?filter_by.cluster_sources=UI&filter_by.cluster_sources=API&page_size=100", Response: clusters.ClusterList{}, }, { diff --git a/exporter/impl_compute.go b/exporter/impl_compute.go new file mode 100644 index 0000000000..5bc08f22cb --- /dev/null +++ b/exporter/impl_compute.go @@ -0,0 +1,91 @@ +package exporter + +import ( + "log" + "strings" + + sdk_compute "github.com/databricks/databricks-sdk-go/service/compute" +) + +func listClusters(ic *importContext) error { + lastActiveMs := ic.getLastActiveMs() + interactiveClusters := []sdk_compute.ClusterSource{sdk_compute.ClusterSourceUi, sdk_compute.ClusterSourceApi} + + it := ic.workspaceClient.Clusters.List(ic.Context, sdk_compute.ListClustersRequest{ + FilterBy: &sdk_compute.ListClustersFilterBy{ + ClusterSources: interactiveClusters, + }, + PageSize: 100, + }) + i := 0 + for it.HasNext(ic.Context) { + c, err := it.Next(ic.Context) + if err != nil { + return err + } + i++ + + if strings.HasPrefix(c.ClusterName, "terraform-") { + log.Printf("[INFO] Skipping terraform-specific cluster %s", c.ClusterName) + continue + } + if !ic.MatchesName(c.ClusterName) { + log.Printf("[INFO] Skipping %s because it doesn't match %s", c.ClusterName, ic.match) + continue + } + if c.LastRestartedTime > 0 && c.LastRestartedTime < lastActiveMs { + log.Printf("[INFO] Old inactive cluster %s", c.ClusterName) + continue + } + ic.Emit(&resource{ + Resource: "databricks_cluster", + ID: c.ClusterId, + }) + if i%50 == 0 { + log.Printf("[INFO] Scanned %d clusters", i) + } + } + return nil +} + +func (ic *importContext) importCluster(c *sdk_compute.ClusterSpec) { + if c == nil { + return + } + if c.AwsAttributes != nil && c.AwsAttributes.InstanceProfileArn != "" { + ic.Emit(&resource{ + Resource: "databricks_instance_profile", + ID: c.AwsAttributes.InstanceProfileArn, + }) + } + if c.InstancePoolId != "" { + // set enable_elastic_disk to false, and remove aws/gcp/azure_attributes + ic.Emit(&resource{ + Resource: "databricks_instance_pool", + ID: c.InstancePoolId, + }) + } + if c.DriverInstancePoolId != "" { + ic.Emit(&resource{ + Resource: "databricks_instance_pool", + ID: c.DriverInstancePoolId, + }) + } + if c.PolicyId != "" { + ic.Emit(&resource{ + Resource: "databricks_cluster_policy", + ID: c.PolicyId, + }) + } + ic.emitInitScripts(c.InitScripts) + ic.emitSecretsFromSecretsPathMap(c.SparkConf) + ic.emitSecretsFromSecretsPathMap(c.SparkEnvVars) + ic.emitUserOrServicePrincipal(c.SingleUserName) + if c.Kind.String() != "" && c.SingleUserName != "" { + ic.Emit(&resource{ + Resource: "databricks_group", + Attribute: "display_name", + Value: c.SingleUserName, + }) + } +} diff --git a/exporter/importables.go b/exporter/importables.go index 14b01778fc..71bd04fb36 100644 --- a/exporter/importables.go +++ b/exporter/importables.go @@ -28,7 +28,6 @@ import ( "github.com/databricks/databricks-sdk-go/service/vectorsearch" sdk_workspace "github.com/databricks/databricks-sdk-go/service/workspace" tfcatalog "github.com/databricks/terraform-provider-databricks/catalog" - "github.com/databricks/terraform-provider-databricks/clusters" "github.com/databricks/terraform-provider-databricks/common" "github.com/databricks/terraform-provider-databricks/jobs" "github.com/databricks/terraform-provider-databricks/mws" @@ -329,41 +328,7 @@ var resourcesMap map[string]importable = map[string]importable{ {Path: "init_scripts.workspace.destination", Resource: "databricks_repo", Match: "path", MatchType: MatchPrefix, SearchValueTransformFunc: appendEndingSlashToDirName}, }, - List: func(ic *importContext) error { - clusters, err := clusters.NewClustersAPI(ic.Context, ic.Client).List() - if err != nil { - return err - } - lastActiveMs := ic.getLastActiveMs() - nonInteractiveClusters := []string{"JOB", "MODELS", "PIPELINE_MAINTENANCE", "PIPELINE", "SQL"} - for offset, c := range clusters { - if slices.Contains(nonInteractiveClusters, string(c.ClusterSource)) { - // TODO: Should we check cluster name as well? - // jobRunClusterNameRegex = regexp.MustCompile(`^job-\d+-run-\d+$`) - // jobRunClusterNameRegex.MatchString(c.ClusterName) - log.Printf("[INFO] Skipping non-interactive cluster %s", c.ClusterID) - continue - } - if strings.HasPrefix(c.ClusterName, "terraform-") { - log.Printf("[INFO] Skipping terraform-specific cluster %s", c.ClusterName) - continue - } - if !ic.MatchesName(c.ClusterName) { - log.Printf("[INFO] Skipping %s because it doesn't match %s", c.ClusterName, ic.match) - continue - } - if c.LastActivityTime > 0 && c.LastActivityTime < lastActiveMs { - log.Printf("[INFO] Older inactive cluster %s", c.ClusterName) - continue - } - ic.Emit(&resource{ - Resource: "databricks_cluster", - ID: c.ClusterID, - }) - log.Printf("[INFO] Scanned %d of %d clusters", offset+1, len(clusters)) - } - return nil - }, + List: listClusters, Import: func(ic *importContext, r *resource) error { var c compute.ClusterSpec s := ic.Resources["databricks_cluster"].Schema diff --git a/exporter/importables_test.go b/exporter/importables_test.go index fe99d93f61..e3f612e83b 100644 --- a/exporter/importables_test.go +++ b/exporter/importables_test.go @@ -402,7 +402,7 @@ func TestClusterListFails(t *testing.T) { qa.HTTPFixturesApply(t, []qa.HTTPFixture{ { Method: "GET", - Resource: "/api/2.0/clusters/list", + Resource: "/api/2.1/clusters/list?filter_by.cluster_sources=UI&filter_by.cluster_sources=API&page_size=100", Status: 404, Response: apierr.NotFound("nope"), }, @@ -417,7 +417,7 @@ func TestClusterList_NoNameMatch(t *testing.T) { qa.HTTPFixturesApply(t, []qa.HTTPFixture{ { Method: "GET", - Resource: "/api/2.0/clusters/list", + Resource: "/api/2.1/clusters/list?filter_by.cluster_sources=UI&filter_by.cluster_sources=API&page_size=100", Response: clusters.ClusterList{ Clusters: []clusters.ClusterInfo{ { diff --git a/exporter/test-data/clusters-list-response.json b/exporter/test-data/clusters-list-response.json index 35cfc9fb30..09ca6029d9 100644 --- a/exporter/test-data/clusters-list-response.json +++ b/exporter/test-data/clusters-list-response.json @@ -4,11 +4,6 @@ "cluster_id": "23443", "cluster_name": "terraform-abc" }, - { - "cluster_id": "23443", - "cluster_name": "job-34234234-run-32423432", - "cluster_source": "JOB" - }, { "autotermination_minutes": 120, "azure_attributes": { diff --git a/exporter/util_compute.go b/exporter/util_compute.go index 2c3f3222ea..fbe15d6258 100644 --- a/exporter/util_compute.go +++ b/exporter/util_compute.go @@ -32,48 +32,6 @@ func (ic *importContext) emitInitScripts(initScripts []compute.InitScriptInfo) { } } -func (ic *importContext) importCluster(c *compute.ClusterSpec) { - if c == nil { - return - } - if c.AwsAttributes != nil && c.AwsAttributes.InstanceProfileArn != "" { - ic.Emit(&resource{ - Resource: "databricks_instance_profile", - ID: c.AwsAttributes.InstanceProfileArn, - }) - } - if c.InstancePoolId != "" { - // set enable_elastic_disk to false, and remove aws/gcp/azure_attributes - ic.Emit(&resource{ - Resource: "databricks_instance_pool", - ID: c.InstancePoolId, - }) - } - if c.DriverInstancePoolId != "" { - ic.Emit(&resource{ - Resource: "databricks_instance_pool", - ID: c.DriverInstancePoolId, - }) - } - if c.PolicyId != "" { - ic.Emit(&resource{ - Resource: "databricks_cluster_policy", - ID: c.PolicyId, - }) - } - ic.emitInitScripts(c.InitScripts) - ic.emitSecretsFromSecretsPathMap(c.SparkConf) - ic.emitSecretsFromSecretsPathMap(c.SparkEnvVars) - ic.emitUserOrServicePrincipal(c.SingleUserName) - if c.Kind.String() != "" && c.SingleUserName != "" { - ic.Emit(&resource{ - Resource: "databricks_group", - Attribute: "display_name", - Value: c.SingleUserName, - }) - } -} - func (ic *importContext) emitSecretsFromSecretPathString(v string) { if res := secretPathRegex.FindStringSubmatch(v); res != nil { ic.Emit(&resource{