From 53780f682f8ba3201119d5a69eafe8b3d12eb61d Mon Sep 17 00:00:00 2001 From: Alex Ott Date: Tue, 20 Aug 2024 15:51:41 +0200 Subject: [PATCH] [Exporter] Better support for notebooks with /Workspace path (#3901) ## Changes Jobs, DLT and other services are switching to use paths starting /Workspace for all objects (notebooks, ...), not only for workspace files. This PR improves handling of such objects in exporter. Resolves #3195 ## Tests - [x] `make test` run locally - [ ] relevant change in `docs/` folder - [ ] covered with integration tests in `internal/acceptance` - [ ] relevant acceptance tests are passing - [ ] using Go SDK --- exporter/exporter_test.go | 42 +++++++++++++++++++++--------- exporter/importables.go | 27 ++++++++++++++----- exporter/test-data/get-job-12.json | 2 +- exporter/util_workspace.go | 24 ++++++++++++----- 4 files changed, 68 insertions(+), 27 deletions(-) diff --git a/exporter/exporter_test.go b/exporter/exporter_test.go index a539209287..8a05efcd90 100644 --- a/exporter/exporter_test.go +++ b/exporter/exporter_test.go @@ -1160,7 +1160,7 @@ func TestImportingJobs_JobList(t *testing.T) { }, }, NotebookTask: &jobs.NotebookTask{ - NotebookPath: "/Test", + NotebookPath: "/Workspace/Test", }, PipelineTask: &jobs.PipelineTask{ PipelineID: "123", @@ -1289,6 +1289,7 @@ func TestImportingJobs_JobListMultiTask(t *testing.T) { qa.HTTPFixturesApply(t, []qa.HTTPFixture{ meAdminFixture, + noCurrentMetastoreAttached, emptyRepos, { Method: "GET", @@ -1523,20 +1524,35 @@ func TestImportingJobs_JobListMultiTask(t *testing.T) { defer os.RemoveAll(tmpDir) ic.Directory = tmpDir - err := ic.Importables["databricks_job"].List(ic) + err := ic.Run() assert.NoError(t, err) - resources := ic.Scope.Sorted() - for _, res := range resources { - if res.Resource != "databricks_job" { - continue - } - // simulate complex HCL write - err = ic.dataToHcl(ic.Importables["databricks_job"], []string{}, ic.Resources["databricks_job"], - res, hclwrite.NewEmptyFile().Body()) - - assert.NoError(t, err) - } + content, err := os.ReadFile(tmpDir + "/jobs.tf") + assert.NoError(t, err) + contentStr := string(content) + assert.True(t, strings.Contains(contentStr, `resource "databricks_job" "dummy_14"`)) + assert.True(t, strings.Contains(contentStr, `spark_jar_task { + main_class_name = "com.databricks.examples.ProjectDriver" + jar_uri = databricks_dbfs_file._0eee4efe7411a5bdca65d7b79188026c_test_jar.dbfs_path + }`)) + assert.True(t, strings.Contains(contentStr, `run_job_task { + job_id = databricks_job.dummy_14.id + }`)) + assert.True(t, strings.Contains(contentStr, `notebook_task { + notebook_path = "/Test" + }`)) + assert.True(t, strings.Contains(contentStr, `library { + jar = databricks_dbfs_file._0eee4efe7411a5bdca65d7b79188026c_test_jar.dbfs_path + }`)) + assert.True(t, strings.Contains(contentStr, `job_cluster { + new_cluster { + spark_version = "6.4.x-scala2.11" + policy_id = "123" + num_workers = 2 + instance_pool_id = databricks_instance_pool.pool_1.id + } + job_cluster_key = "shared" + }`)) }) } diff --git a/exporter/importables.go b/exporter/importables.go index 717ca4ae6d..db4bf78f29 100644 --- a/exporter/importables.go +++ b/exporter/importables.go @@ -380,10 +380,10 @@ var resourcesMap map[string]importable = map[string]importable{ }, Depends: []reference{ {Path: "job_cluster.new_cluster.aws_attributes.instance_profile_arn", Resource: "databricks_instance_profile"}, - {Path: "job_cluster.new_cluster.driver_instance_pool_id", Resource: "databricks_instance_pool"}, {Path: "job_cluster.new_cluster.init_scripts.dbfs.destination", Resource: "databricks_dbfs_file", Match: "dbfs_path"}, {Path: "job_cluster.new_cluster.init_scripts.volumes.destination", Resource: "databricks_file"}, {Path: "job_cluster.new_cluster.init_scripts.workspace.destination", Resource: "databricks_workspace_file"}, + {Path: "job_cluster.new_cluster.driver_instance_pool_id", Resource: "databricks_instance_pool"}, {Path: "job_cluster.new_cluster.instance_pool_id", Resource: "databricks_instance_pool"}, {Path: "job_cluster.new_cluster.policy_id", Resource: "databricks_cluster_policy"}, {Path: "run_as.service_principal_name", Resource: "databricks_service_principal", Match: "application_id"}, @@ -400,16 +400,17 @@ var resourcesMap map[string]importable = map[string]importable{ {Path: "task.library.requirements", Resource: "databricks_file"}, {Path: "task.library.requirements", Resource: "databricks_workspace_file", Match: "workspace_path"}, {Path: "task.new_cluster.aws_attributes.instance_profile_arn", Resource: "databricks_instance_profile"}, - {Path: "task.new_cluster.driver_instance_pool_id", Resource: "databricks_instance_pool"}, {Path: "task.new_cluster.init_scripts.dbfs.destination", Resource: "databricks_dbfs_file", Match: "dbfs_path"}, {Path: "task.new_cluster.init_scripts.volumes.destination", Resource: "databricks_file"}, {Path: "task.new_cluster.init_scripts.workspace.destination", Resource: "databricks_workspace_file"}, {Path: "task.new_cluster.instance_pool_id", Resource: "databricks_instance_pool"}, + {Path: "task.new_cluster.driver_instance_pool_id", Resource: "databricks_instance_pool"}, {Path: "task.new_cluster.policy_id", Resource: "databricks_cluster_policy"}, {Path: "task.notebook_task.base_parameters", Resource: "databricks_dbfs_file", Match: "dbfs_path"}, {Path: "task.notebook_task.base_parameters", Resource: "databricks_file"}, {Path: "task.notebook_task.base_parameters", Resource: "databricks_workspace_file", Match: "workspace_path"}, {Path: "task.notebook_task.notebook_path", Resource: "databricks_notebook"}, + {Path: "task.notebook_task.notebook_path", Resource: "databricks_notebook", Match: "workspace_path"}, {Path: "task.notebook_task.warehouse_id", Resource: "databricks_sql_endpoint"}, {Path: "task.pipeline_task.pipeline_id", Resource: "databricks_pipeline"}, {Path: "task.python_wheel_task.named_parameters", Resource: "databricks_dbfs_file", Match: "dbfs_path"}, @@ -469,6 +470,8 @@ var resourcesMap map[string]importable = map[string]importable{ MatchType: MatchPrefix, SearchValueTransformFunc: appendEndingSlashToDirName}, {Path: "task.notebook_task.notebook_path", Resource: "databricks_repo", Match: "path", MatchType: MatchPrefix, SearchValueTransformFunc: appendEndingSlashToDirName}, + {Path: "task.notebook_task.notebook_path", Resource: "databricks_repo", Match: "workspace_path", + MatchType: MatchPrefix, SearchValueTransformFunc: appendEndingSlashToDirName}, {Path: "task.python_wheel_task.named_parameters", Resource: "databricks_repo", Match: "workspace_path", MatchType: MatchPrefix, SearchValueTransformFunc: appendEndingSlashToDirName}, {Path: "task.python_wheel_task.parameters", Resource: "databricks_repo", Match: "workspace_path", @@ -477,6 +480,8 @@ var resourcesMap map[string]importable = map[string]importable{ MatchType: MatchPrefix, SearchValueTransformFunc: appendEndingSlashToDirName}, {Path: "task.spark_python_task.python_file", Resource: "databricks_repo", Match: "path", MatchType: MatchPrefix, SearchValueTransformFunc: appendEndingSlashToDirName}, + {Path: "task.spark_python_task.python_file", Resource: "databricks_repo", Match: "workspace_path", + MatchType: MatchPrefix, SearchValueTransformFunc: appendEndingSlashToDirName}, {Path: "task.spark_jar_task.parameters", Resource: "databricks_repo", Match: "workspace_path", MatchType: MatchPrefix, SearchValueTransformFunc: appendEndingSlashToDirName}, {Path: "task.spark_submit_task.parameters", Resource: "databricks_repo", Match: "workspace_path", @@ -597,10 +602,12 @@ var resourcesMap map[string]importable = map[string]importable{ ic.emitFilesFromMap(task.RunJobTask.JobParameters) } ic.importCluster(task.NewCluster) - ic.Emit(&resource{ - Resource: "databricks_cluster", - ID: task.ExistingClusterId, - }) + if task.ExistingClusterId != "" { + ic.Emit(&resource{ + Resource: "databricks_cluster", + ID: task.ExistingClusterId, + }) + } ic.emitLibraries(task.Libraries) if task.WebhookNotifications != nil { @@ -720,6 +727,8 @@ var resourcesMap map[string]importable = map[string]importable{ if strings.HasSuffix(pathString, ".notebook_task.0.source") && js.GitSource == nil && d.Get(pathString).(string) == "WORKSPACE" { return true } + // TODO: add should omit for new cluster in the task? + // TODO: double check it } if res := jobClustersRegex.FindStringSubmatch(pathString); res != nil { // analyze job clusters return makeShouldOmitFieldForCluster(jobClustersRegex)(ic, pathString, as, d) @@ -2122,15 +2131,21 @@ var resourcesMap map[string]importable = map[string]importable{ {Path: "configuration", Resource: "databricks_file"}, {Path: "configuration", Resource: "databricks_workspace_file", Match: "workspace_path"}, {Path: "library.notebook.path", Resource: "databricks_notebook"}, + {Path: "library.notebook.path", Resource: "databricks_notebook", Match: "workspace_path"}, {Path: "library.file.path", Resource: "databricks_workspace_file"}, + {Path: "library.file.path", Resource: "databricks_workspace_file", Match: "workspace_path"}, {Path: "library.jar", Resource: "databricks_dbfs_file", Match: "dbfs_path"}, {Path: "library.whl", Resource: "databricks_dbfs_file", Match: "dbfs_path"}, {Path: "configuration", Resource: "databricks_repo", Match: "workspace_path", MatchType: MatchPrefix, SearchValueTransformFunc: appendEndingSlashToDirName}, {Path: "library.notebook.path", Resource: "databricks_repo", Match: "path", MatchType: MatchPrefix, SearchValueTransformFunc: appendEndingSlashToDirName}, + {Path: "library.notebook.path", Resource: "databricks_repo", Match: "workspace_path", + MatchType: MatchPrefix, SearchValueTransformFunc: appendEndingSlashToDirName}, {Path: "library.file.path", Resource: "databricks_repo", Match: "path", MatchType: MatchPrefix, SearchValueTransformFunc: appendEndingSlashToDirName}, + {Path: "library.file.path", Resource: "databricks_repo", Match: "workspace_path", + MatchType: MatchPrefix, SearchValueTransformFunc: appendEndingSlashToDirName}, {Path: "cluster.init_scripts.workspace.destination", Resource: "databricks_repo", Match: "workspace_path", MatchType: MatchPrefix, SearchValueTransformFunc: appendEndingSlashToDirName}, }, diff --git a/exporter/test-data/get-job-12.json b/exporter/test-data/get-job-12.json index 0f6ed0108e..a5ec0bf502 100644 --- a/exporter/test-data/get-job-12.json +++ b/exporter/test-data/get-job-12.json @@ -11,7 +11,7 @@ "task_key": "test", "existing_cluster_id": "test2", "notebook_task": { - "notebook_path": "/Users/test@test.com/Test" + "notebook_path": "/Workspace/Users/test@test.com/Test" } } ], diff --git a/exporter/util_workspace.go b/exporter/util_workspace.go index 0361100592..388c2b57e1 100644 --- a/exporter/util_workspace.go +++ b/exporter/util_workspace.go @@ -41,25 +41,35 @@ func (ic *importContext) emitRepoByPath(path string) { } } +func isRepoPath(path string) bool { + return strings.HasPrefix(path, "/Repos") || strings.HasPrefix(path, "/Workspace/Repos") +} + +func maybeStringWorkspacePrefix(path string) string { + if strings.HasPrefix(path, "/Workspace/") { + return path[10:] + } + return path +} + func (ic *importContext) emitWorkspaceFileOrRepo(path string) { - if strings.HasPrefix(path, "/Repos") { - ic.emitRepoByPath(path) + if isRepoPath(path) { + ic.emitRepoByPath(maybeStringWorkspacePrefix(path)) } else { // TODO: wrap this into ic.shouldEmit... // TODO: strip /Workspace prefix if it's provided ic.Emit(&resource{ Resource: "databricks_workspace_file", - ID: path, + ID: maybeStringWorkspacePrefix(path), }) } } func (ic *importContext) emitNotebookOrRepo(path string) { - if strings.HasPrefix(path, "/Repos") { - ic.emitRepoByPath(path) + if isRepoPath(path) { + ic.emitRepoByPath(maybeStringWorkspacePrefix(path)) } else { - // TODO: strip /Workspace prefix if it's provided - ic.maybeEmitWorkspaceObject("databricks_notebook", path, nil) + ic.maybeEmitWorkspaceObject("databricks_notebook", maybeStringWorkspacePrefix(path), nil) } }