[Exporter] Better support for notebooks with /Workspace path (#3901)

## Changes  Jobs, DLT and other services are switching to use paths starting /Workspace for all objects (notebooks, ...), not only for workspace files. This PR improves handling of such objects in exporter. Resolves #3195 ## Tests  - [x] `make test` run locally - [ ] relevant change in `docs/` folder - [ ] covered with integration tests in `internal/acceptance` - [ ] relevant acceptance tests are passing - [ ] using Go SDK
databricks · Aug 20, 2024 · 53780f6 · 53780f6
1 parent 44f7689
commit 53780f6
Show file tree

Hide file tree

Showing 4 changed files with 68 additions and 27 deletions.
diff --git a/exporter/exporter_test.go b/exporter/exporter_test.go
@@ -1160,7 +1160,7 @@ func TestImportingJobs_JobList(t *testing.T) {
 							},
 						},
 						NotebookTask: &jobs.NotebookTask{
-							NotebookPath: "/Test",
+							NotebookPath: "/Workspace/Test",
 						},
 						PipelineTask: &jobs.PipelineTask{
 							PipelineID: "123",
@@ -1289,6 +1289,7 @@ func TestImportingJobs_JobListMultiTask(t *testing.T) {
 	qa.HTTPFixturesApply(t,
 		[]qa.HTTPFixture{
 			meAdminFixture,
+			noCurrentMetastoreAttached,
 			emptyRepos,
 			{
 				Method:   "GET",
@@ -1523,20 +1524,35 @@ func TestImportingJobs_JobListMultiTask(t *testing.T) {
 			defer os.RemoveAll(tmpDir)
 			ic.Directory = tmpDir
 
-			err := ic.Importables["databricks_job"].List(ic)
+			err := ic.Run()
 			assert.NoError(t, err)
 
-			resources := ic.Scope.Sorted()
-			for _, res := range resources {
-				if res.Resource != "databricks_job" {
-					continue
-				}
-				// simulate complex HCL write
-				err = ic.dataToHcl(ic.Importables["databricks_job"], []string{}, ic.Resources["databricks_job"],
-					res, hclwrite.NewEmptyFile().Body())
-
-				assert.NoError(t, err)
-			}
+			content, err := os.ReadFile(tmpDir + "/jobs.tf")
+			assert.NoError(t, err)
+			contentStr := string(content)
+			assert.True(t, strings.Contains(contentStr, `resource "databricks_job" "dummy_14"`))
+			assert.True(t, strings.Contains(contentStr, `spark_jar_task {
+      main_class_name = "com.databricks.examples.ProjectDriver"
+      jar_uri         = databricks_dbfs_file._0eee4efe7411a5bdca65d7b79188026c_test_jar.dbfs_path
+    }`))
+			assert.True(t, strings.Contains(contentStr, `run_job_task {
+      job_id = databricks_job.dummy_14.id
+    }`))
+			assert.True(t, strings.Contains(contentStr, `notebook_task {
+      notebook_path = "/Test"
+    }`))
+			assert.True(t, strings.Contains(contentStr, `library {
+      jar = databricks_dbfs_file._0eee4efe7411a5bdca65d7b79188026c_test_jar.dbfs_path
+    }`))
+			assert.True(t, strings.Contains(contentStr, `job_cluster {
+    new_cluster {
+      spark_version    = "6.4.x-scala2.11"
+      policy_id        = "123"
+      num_workers      = 2
+      instance_pool_id = databricks_instance_pool.pool_1.id
+    }
+    job_cluster_key = "shared"
+  }`))
 		})
 }
 

diff --git a/exporter/importables.go b/exporter/importables.go
@@ -380,10 +380,10 @@ var resourcesMap map[string]importable = map[string]importable{
 		},
 		Depends: []reference{
 			{Path: "job_cluster.new_cluster.aws_attributes.instance_profile_arn", Resource: "databricks_instance_profile"},
-			{Path: "job_cluster.new_cluster.driver_instance_pool_id", Resource: "databricks_instance_pool"},
 			{Path: "job_cluster.new_cluster.init_scripts.dbfs.destination", Resource: "databricks_dbfs_file", Match: "dbfs_path"},
 			{Path: "job_cluster.new_cluster.init_scripts.volumes.destination", Resource: "databricks_file"},
 			{Path: "job_cluster.new_cluster.init_scripts.workspace.destination", Resource: "databricks_workspace_file"},
+			{Path: "job_cluster.new_cluster.driver_instance_pool_id", Resource: "databricks_instance_pool"},
 			{Path: "job_cluster.new_cluster.instance_pool_id", Resource: "databricks_instance_pool"},
 			{Path: "job_cluster.new_cluster.policy_id", Resource: "databricks_cluster_policy"},
 			{Path: "run_as.service_principal_name", Resource: "databricks_service_principal", Match: "application_id"},
@@ -400,16 +400,17 @@ var resourcesMap map[string]importable = map[string]importable{
 			{Path: "task.library.requirements", Resource: "databricks_file"},
 			{Path: "task.library.requirements", Resource: "databricks_workspace_file", Match: "workspace_path"},
 			{Path: "task.new_cluster.aws_attributes.instance_profile_arn", Resource: "databricks_instance_profile"},
-			{Path: "task.new_cluster.driver_instance_pool_id", Resource: "databricks_instance_pool"},
 			{Path: "task.new_cluster.init_scripts.dbfs.destination", Resource: "databricks_dbfs_file", Match: "dbfs_path"},
 			{Path: "task.new_cluster.init_scripts.volumes.destination", Resource: "databricks_file"},
 			{Path: "task.new_cluster.init_scripts.workspace.destination", Resource: "databricks_workspace_file"},
 			{Path: "task.new_cluster.instance_pool_id", Resource: "databricks_instance_pool"},
+			{Path: "task.new_cluster.driver_instance_pool_id", Resource: "databricks_instance_pool"},
 			{Path: "task.new_cluster.policy_id", Resource: "databricks_cluster_policy"},
 			{Path: "task.notebook_task.base_parameters", Resource: "databricks_dbfs_file", Match: "dbfs_path"},
 			{Path: "task.notebook_task.base_parameters", Resource: "databricks_file"},
 			{Path: "task.notebook_task.base_parameters", Resource: "databricks_workspace_file", Match: "workspace_path"},
 			{Path: "task.notebook_task.notebook_path", Resource: "databricks_notebook"},
+			{Path: "task.notebook_task.notebook_path", Resource: "databricks_notebook", Match: "workspace_path"},
 			{Path: "task.notebook_task.warehouse_id", Resource: "databricks_sql_endpoint"},
 			{Path: "task.pipeline_task.pipeline_id", Resource: "databricks_pipeline"},
 			{Path: "task.python_wheel_task.named_parameters", Resource: "databricks_dbfs_file", Match: "dbfs_path"},
@@ -469,6 +470,8 @@ var resourcesMap map[string]importable = map[string]importable{
 				MatchType: MatchPrefix, SearchValueTransformFunc: appendEndingSlashToDirName},
 			{Path: "task.notebook_task.notebook_path", Resource: "databricks_repo", Match: "path",
 				MatchType: MatchPrefix, SearchValueTransformFunc: appendEndingSlashToDirName},
+			{Path: "task.notebook_task.notebook_path", Resource: "databricks_repo", Match: "workspace_path",
+				MatchType: MatchPrefix, SearchValueTransformFunc: appendEndingSlashToDirName},
 			{Path: "task.python_wheel_task.named_parameters", Resource: "databricks_repo", Match: "workspace_path",
 				MatchType: MatchPrefix, SearchValueTransformFunc: appendEndingSlashToDirName},
 			{Path: "task.python_wheel_task.parameters", Resource: "databricks_repo", Match: "workspace_path",
@@ -477,6 +480,8 @@ var resourcesMap map[string]importable = map[string]importable{
 				MatchType: MatchPrefix, SearchValueTransformFunc: appendEndingSlashToDirName},
 			{Path: "task.spark_python_task.python_file", Resource: "databricks_repo", Match: "path",
 				MatchType: MatchPrefix, SearchValueTransformFunc: appendEndingSlashToDirName},
+			{Path: "task.spark_python_task.python_file", Resource: "databricks_repo", Match: "workspace_path",
+				MatchType: MatchPrefix, SearchValueTransformFunc: appendEndingSlashToDirName},
 			{Path: "task.spark_jar_task.parameters", Resource: "databricks_repo", Match: "workspace_path",
 				MatchType: MatchPrefix, SearchValueTransformFunc: appendEndingSlashToDirName},
 			{Path: "task.spark_submit_task.parameters", Resource: "databricks_repo", Match: "workspace_path",
@@ -597,10 +602,12 @@ var resourcesMap map[string]importable = map[string]importable{
 					ic.emitFilesFromMap(task.RunJobTask.JobParameters)
 				}
 				ic.importCluster(task.NewCluster)
-				ic.Emit(&resource{
-					Resource: "databricks_cluster",
-					ID:       task.ExistingClusterId,
-				})
+				if task.ExistingClusterId != "" {
+					ic.Emit(&resource{
+						Resource: "databricks_cluster",
+						ID:       task.ExistingClusterId,
+					})
+				}
 				ic.emitLibraries(task.Libraries)
 
 				if task.WebhookNotifications != nil {
@@ -720,6 +727,8 @@ var resourcesMap map[string]importable = map[string]importable{
 				if strings.HasSuffix(pathString, ".notebook_task.0.source") && js.GitSource == nil && d.Get(pathString).(string) == "WORKSPACE" {
 					return true
 				}
+				// TODO: add should omit for new cluster in the task?
+				// TODO: double check it
 			}
 			if res := jobClustersRegex.FindStringSubmatch(pathString); res != nil { // analyze job clusters
 				return makeShouldOmitFieldForCluster(jobClustersRegex)(ic, pathString, as, d)
@@ -2122,15 +2131,21 @@ var resourcesMap map[string]importable = map[string]importable{
 			{Path: "configuration", Resource: "databricks_file"},
 			{Path: "configuration", Resource: "databricks_workspace_file", Match: "workspace_path"},
 			{Path: "library.notebook.path", Resource: "databricks_notebook"},
+			{Path: "library.notebook.path", Resource: "databricks_notebook", Match: "workspace_path"},
 			{Path: "library.file.path", Resource: "databricks_workspace_file"},
+			{Path: "library.file.path", Resource: "databricks_workspace_file", Match: "workspace_path"},
 			{Path: "library.jar", Resource: "databricks_dbfs_file", Match: "dbfs_path"},
 			{Path: "library.whl", Resource: "databricks_dbfs_file", Match: "dbfs_path"},
 			{Path: "configuration", Resource: "databricks_repo", Match: "workspace_path",
 				MatchType: MatchPrefix, SearchValueTransformFunc: appendEndingSlashToDirName},
 			{Path: "library.notebook.path", Resource: "databricks_repo", Match: "path",
 				MatchType: MatchPrefix, SearchValueTransformFunc: appendEndingSlashToDirName},
+			{Path: "library.notebook.path", Resource: "databricks_repo", Match: "workspace_path",
+				MatchType: MatchPrefix, SearchValueTransformFunc: appendEndingSlashToDirName},
 			{Path: "library.file.path", Resource: "databricks_repo", Match: "path",
 				MatchType: MatchPrefix, SearchValueTransformFunc: appendEndingSlashToDirName},
+			{Path: "library.file.path", Resource: "databricks_repo", Match: "workspace_path",
+				MatchType: MatchPrefix, SearchValueTransformFunc: appendEndingSlashToDirName},
 			{Path: "cluster.init_scripts.workspace.destination", Resource: "databricks_repo", Match: "workspace_path",
 				MatchType: MatchPrefix, SearchValueTransformFunc: appendEndingSlashToDirName},
 		},

diff --git a/exporter/test-data/get-job-12.json b/exporter/test-data/get-job-12.json
@@ -11,7 +11,7 @@
               "task_key": "test",
               "existing_cluster_id": "test2",
               "notebook_task": {
-                  "notebook_path": "/Users/[email protected]/Test"
+                  "notebook_path": "/Workspace/Users/[email protected]/Test"
               }
           }
       ],

diff --git a/exporter/util_workspace.go b/exporter/util_workspace.go
@@ -41,25 +41,35 @@ func (ic *importContext) emitRepoByPath(path string) {
 	}
 }
 
+func isRepoPath(path string) bool {
+	return strings.HasPrefix(path, "/Repos") || strings.HasPrefix(path, "/Workspace/Repos")
+}
+
+func maybeStringWorkspacePrefix(path string) string {
+	if strings.HasPrefix(path, "/Workspace/") {
+		return path[10:]
+	}
+	return path
+}
+
 func (ic *importContext) emitWorkspaceFileOrRepo(path string) {
-	if strings.HasPrefix(path, "/Repos") {
-		ic.emitRepoByPath(path)
+	if isRepoPath(path) {
+		ic.emitRepoByPath(maybeStringWorkspacePrefix(path))
 	} else {
 		// TODO: wrap this into ic.shouldEmit...
 		// TODO: strip /Workspace prefix if it's provided
 		ic.Emit(&resource{
 			Resource: "databricks_workspace_file",
-			ID:       path,
+			ID:       maybeStringWorkspacePrefix(path),
 		})
 	}
 }
 
 func (ic *importContext) emitNotebookOrRepo(path string) {
-	if strings.HasPrefix(path, "/Repos") {
-		ic.emitRepoByPath(path)
+	if isRepoPath(path) {
+		ic.emitRepoByPath(maybeStringWorkspacePrefix(path))
 	} else {
-		// TODO: strip /Workspace prefix if it's provided
-		ic.maybeEmitWorkspaceObject("databricks_notebook", path, nil)
+		ic.maybeEmitWorkspaceObject("databricks_notebook", maybeStringWorkspacePrefix(path), nil)
 	}
 }