Skip to content

Commit

Permalink
[Exporter] Better support for notebooks with /Workspace path (#3901)
Browse files Browse the repository at this point in the history
## Changes
<!-- Summary of your changes that are easy to understand -->

Jobs, DLT and other services are switching to use paths starting
/Workspace for all objects (notebooks, ...), not only for workspace
files. This PR improves handling of such objects in exporter.

Resolves #3195

## Tests
<!-- 
How is this tested? Please see the checklist below and also describe any
other relevant tests
-->

- [x] `make test` run locally
- [ ] relevant change in `docs/` folder
- [ ] covered with integration tests in `internal/acceptance`
- [ ] relevant acceptance tests are passing
- [ ] using Go SDK
  • Loading branch information
alexott authored Aug 20, 2024
1 parent 44f7689 commit 53780f6
Show file tree
Hide file tree
Showing 4 changed files with 68 additions and 27 deletions.
42 changes: 29 additions & 13 deletions exporter/exporter_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1160,7 +1160,7 @@ func TestImportingJobs_JobList(t *testing.T) {
},
},
NotebookTask: &jobs.NotebookTask{
NotebookPath: "/Test",
NotebookPath: "/Workspace/Test",
},
PipelineTask: &jobs.PipelineTask{
PipelineID: "123",
Expand Down Expand Up @@ -1289,6 +1289,7 @@ func TestImportingJobs_JobListMultiTask(t *testing.T) {
qa.HTTPFixturesApply(t,
[]qa.HTTPFixture{
meAdminFixture,
noCurrentMetastoreAttached,
emptyRepos,
{
Method: "GET",
Expand Down Expand Up @@ -1523,20 +1524,35 @@ func TestImportingJobs_JobListMultiTask(t *testing.T) {
defer os.RemoveAll(tmpDir)
ic.Directory = tmpDir

err := ic.Importables["databricks_job"].List(ic)
err := ic.Run()
assert.NoError(t, err)

resources := ic.Scope.Sorted()
for _, res := range resources {
if res.Resource != "databricks_job" {
continue
}
// simulate complex HCL write
err = ic.dataToHcl(ic.Importables["databricks_job"], []string{}, ic.Resources["databricks_job"],
res, hclwrite.NewEmptyFile().Body())

assert.NoError(t, err)
}
content, err := os.ReadFile(tmpDir + "/jobs.tf")
assert.NoError(t, err)
contentStr := string(content)
assert.True(t, strings.Contains(contentStr, `resource "databricks_job" "dummy_14"`))
assert.True(t, strings.Contains(contentStr, `spark_jar_task {
main_class_name = "com.databricks.examples.ProjectDriver"
jar_uri = databricks_dbfs_file._0eee4efe7411a5bdca65d7b79188026c_test_jar.dbfs_path
}`))
assert.True(t, strings.Contains(contentStr, `run_job_task {
job_id = databricks_job.dummy_14.id
}`))
assert.True(t, strings.Contains(contentStr, `notebook_task {
notebook_path = "/Test"
}`))
assert.True(t, strings.Contains(contentStr, `library {
jar = databricks_dbfs_file._0eee4efe7411a5bdca65d7b79188026c_test_jar.dbfs_path
}`))
assert.True(t, strings.Contains(contentStr, `job_cluster {
new_cluster {
spark_version = "6.4.x-scala2.11"
policy_id = "123"
num_workers = 2
instance_pool_id = databricks_instance_pool.pool_1.id
}
job_cluster_key = "shared"
}`))
})
}

Expand Down
27 changes: 21 additions & 6 deletions exporter/importables.go
Original file line number Diff line number Diff line change
Expand Up @@ -380,10 +380,10 @@ var resourcesMap map[string]importable = map[string]importable{
},
Depends: []reference{
{Path: "job_cluster.new_cluster.aws_attributes.instance_profile_arn", Resource: "databricks_instance_profile"},
{Path: "job_cluster.new_cluster.driver_instance_pool_id", Resource: "databricks_instance_pool"},
{Path: "job_cluster.new_cluster.init_scripts.dbfs.destination", Resource: "databricks_dbfs_file", Match: "dbfs_path"},
{Path: "job_cluster.new_cluster.init_scripts.volumes.destination", Resource: "databricks_file"},
{Path: "job_cluster.new_cluster.init_scripts.workspace.destination", Resource: "databricks_workspace_file"},
{Path: "job_cluster.new_cluster.driver_instance_pool_id", Resource: "databricks_instance_pool"},
{Path: "job_cluster.new_cluster.instance_pool_id", Resource: "databricks_instance_pool"},
{Path: "job_cluster.new_cluster.policy_id", Resource: "databricks_cluster_policy"},
{Path: "run_as.service_principal_name", Resource: "databricks_service_principal", Match: "application_id"},
Expand All @@ -400,16 +400,17 @@ var resourcesMap map[string]importable = map[string]importable{
{Path: "task.library.requirements", Resource: "databricks_file"},
{Path: "task.library.requirements", Resource: "databricks_workspace_file", Match: "workspace_path"},
{Path: "task.new_cluster.aws_attributes.instance_profile_arn", Resource: "databricks_instance_profile"},
{Path: "task.new_cluster.driver_instance_pool_id", Resource: "databricks_instance_pool"},
{Path: "task.new_cluster.init_scripts.dbfs.destination", Resource: "databricks_dbfs_file", Match: "dbfs_path"},
{Path: "task.new_cluster.init_scripts.volumes.destination", Resource: "databricks_file"},
{Path: "task.new_cluster.init_scripts.workspace.destination", Resource: "databricks_workspace_file"},
{Path: "task.new_cluster.instance_pool_id", Resource: "databricks_instance_pool"},
{Path: "task.new_cluster.driver_instance_pool_id", Resource: "databricks_instance_pool"},
{Path: "task.new_cluster.policy_id", Resource: "databricks_cluster_policy"},
{Path: "task.notebook_task.base_parameters", Resource: "databricks_dbfs_file", Match: "dbfs_path"},
{Path: "task.notebook_task.base_parameters", Resource: "databricks_file"},
{Path: "task.notebook_task.base_parameters", Resource: "databricks_workspace_file", Match: "workspace_path"},
{Path: "task.notebook_task.notebook_path", Resource: "databricks_notebook"},
{Path: "task.notebook_task.notebook_path", Resource: "databricks_notebook", Match: "workspace_path"},
{Path: "task.notebook_task.warehouse_id", Resource: "databricks_sql_endpoint"},
{Path: "task.pipeline_task.pipeline_id", Resource: "databricks_pipeline"},
{Path: "task.python_wheel_task.named_parameters", Resource: "databricks_dbfs_file", Match: "dbfs_path"},
Expand Down Expand Up @@ -469,6 +470,8 @@ var resourcesMap map[string]importable = map[string]importable{
MatchType: MatchPrefix, SearchValueTransformFunc: appendEndingSlashToDirName},
{Path: "task.notebook_task.notebook_path", Resource: "databricks_repo", Match: "path",
MatchType: MatchPrefix, SearchValueTransformFunc: appendEndingSlashToDirName},
{Path: "task.notebook_task.notebook_path", Resource: "databricks_repo", Match: "workspace_path",
MatchType: MatchPrefix, SearchValueTransformFunc: appendEndingSlashToDirName},
{Path: "task.python_wheel_task.named_parameters", Resource: "databricks_repo", Match: "workspace_path",
MatchType: MatchPrefix, SearchValueTransformFunc: appendEndingSlashToDirName},
{Path: "task.python_wheel_task.parameters", Resource: "databricks_repo", Match: "workspace_path",
Expand All @@ -477,6 +480,8 @@ var resourcesMap map[string]importable = map[string]importable{
MatchType: MatchPrefix, SearchValueTransformFunc: appendEndingSlashToDirName},
{Path: "task.spark_python_task.python_file", Resource: "databricks_repo", Match: "path",
MatchType: MatchPrefix, SearchValueTransformFunc: appendEndingSlashToDirName},
{Path: "task.spark_python_task.python_file", Resource: "databricks_repo", Match: "workspace_path",
MatchType: MatchPrefix, SearchValueTransformFunc: appendEndingSlashToDirName},
{Path: "task.spark_jar_task.parameters", Resource: "databricks_repo", Match: "workspace_path",
MatchType: MatchPrefix, SearchValueTransformFunc: appendEndingSlashToDirName},
{Path: "task.spark_submit_task.parameters", Resource: "databricks_repo", Match: "workspace_path",
Expand Down Expand Up @@ -597,10 +602,12 @@ var resourcesMap map[string]importable = map[string]importable{
ic.emitFilesFromMap(task.RunJobTask.JobParameters)
}
ic.importCluster(task.NewCluster)
ic.Emit(&resource{
Resource: "databricks_cluster",
ID: task.ExistingClusterId,
})
if task.ExistingClusterId != "" {
ic.Emit(&resource{
Resource: "databricks_cluster",
ID: task.ExistingClusterId,
})
}
ic.emitLibraries(task.Libraries)

if task.WebhookNotifications != nil {
Expand Down Expand Up @@ -720,6 +727,8 @@ var resourcesMap map[string]importable = map[string]importable{
if strings.HasSuffix(pathString, ".notebook_task.0.source") && js.GitSource == nil && d.Get(pathString).(string) == "WORKSPACE" {
return true
}
// TODO: add should omit for new cluster in the task?
// TODO: double check it
}
if res := jobClustersRegex.FindStringSubmatch(pathString); res != nil { // analyze job clusters
return makeShouldOmitFieldForCluster(jobClustersRegex)(ic, pathString, as, d)
Expand Down Expand Up @@ -2122,15 +2131,21 @@ var resourcesMap map[string]importable = map[string]importable{
{Path: "configuration", Resource: "databricks_file"},
{Path: "configuration", Resource: "databricks_workspace_file", Match: "workspace_path"},
{Path: "library.notebook.path", Resource: "databricks_notebook"},
{Path: "library.notebook.path", Resource: "databricks_notebook", Match: "workspace_path"},
{Path: "library.file.path", Resource: "databricks_workspace_file"},
{Path: "library.file.path", Resource: "databricks_workspace_file", Match: "workspace_path"},
{Path: "library.jar", Resource: "databricks_dbfs_file", Match: "dbfs_path"},
{Path: "library.whl", Resource: "databricks_dbfs_file", Match: "dbfs_path"},
{Path: "configuration", Resource: "databricks_repo", Match: "workspace_path",
MatchType: MatchPrefix, SearchValueTransformFunc: appendEndingSlashToDirName},
{Path: "library.notebook.path", Resource: "databricks_repo", Match: "path",
MatchType: MatchPrefix, SearchValueTransformFunc: appendEndingSlashToDirName},
{Path: "library.notebook.path", Resource: "databricks_repo", Match: "workspace_path",
MatchType: MatchPrefix, SearchValueTransformFunc: appendEndingSlashToDirName},
{Path: "library.file.path", Resource: "databricks_repo", Match: "path",
MatchType: MatchPrefix, SearchValueTransformFunc: appendEndingSlashToDirName},
{Path: "library.file.path", Resource: "databricks_repo", Match: "workspace_path",
MatchType: MatchPrefix, SearchValueTransformFunc: appendEndingSlashToDirName},
{Path: "cluster.init_scripts.workspace.destination", Resource: "databricks_repo", Match: "workspace_path",
MatchType: MatchPrefix, SearchValueTransformFunc: appendEndingSlashToDirName},
},
Expand Down
2 changes: 1 addition & 1 deletion exporter/test-data/get-job-12.json
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
"task_key": "test",
"existing_cluster_id": "test2",
"notebook_task": {
"notebook_path": "/Users/[email protected]/Test"
"notebook_path": "/Workspace/Users/[email protected]/Test"
}
}
],
Expand Down
24 changes: 17 additions & 7 deletions exporter/util_workspace.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,25 +41,35 @@ func (ic *importContext) emitRepoByPath(path string) {
}
}

func isRepoPath(path string) bool {
return strings.HasPrefix(path, "/Repos") || strings.HasPrefix(path, "/Workspace/Repos")
}

func maybeStringWorkspacePrefix(path string) string {
if strings.HasPrefix(path, "/Workspace/") {
return path[10:]
}
return path
}

func (ic *importContext) emitWorkspaceFileOrRepo(path string) {
if strings.HasPrefix(path, "/Repos") {
ic.emitRepoByPath(path)
if isRepoPath(path) {
ic.emitRepoByPath(maybeStringWorkspacePrefix(path))
} else {
// TODO: wrap this into ic.shouldEmit...
// TODO: strip /Workspace prefix if it's provided
ic.Emit(&resource{
Resource: "databricks_workspace_file",
ID: path,
ID: maybeStringWorkspacePrefix(path),
})
}
}

func (ic *importContext) emitNotebookOrRepo(path string) {
if strings.HasPrefix(path, "/Repos") {
ic.emitRepoByPath(path)
if isRepoPath(path) {
ic.emitRepoByPath(maybeStringWorkspacePrefix(path))
} else {
// TODO: strip /Workspace prefix if it's provided
ic.maybeEmitWorkspaceObject("databricks_notebook", path, nil)
ic.maybeEmitWorkspaceObject("databricks_notebook", maybeStringWorkspacePrefix(path), nil)
}
}

Expand Down

0 comments on commit 53780f6

Please sign in to comment.