From a79e9ef93f1ddb1cb2e62dd0b76706b3da353255 Mon Sep 17 00:00:00 2001 From: Renaud Hartert Date: Fri, 27 Dec 2024 17:11:59 +0100 Subject: [PATCH 1/2] Add retrier on 504 --- common/retry.go | 22 +++++++- common/retry_test.go | 85 +++++++++++++++++++++++++++++ permissions/resource_permissions.go | 12 +++- 3 files changed, 114 insertions(+), 5 deletions(-) diff --git a/common/retry.go b/common/retry.go index 4d116ad916..81bd107f70 100644 --- a/common/retry.go +++ b/common/retry.go @@ -2,9 +2,11 @@ package common import ( "context" - "log" + "errors" "regexp" + "github.com/databricks/databricks-sdk-go/apierr" + "github.com/databricks/databricks-sdk-go/logger" "github.com/databricks/databricks-sdk-go/retries" ) @@ -15,7 +17,7 @@ func RetryOnTimeout[T any](ctx context.Context, f func(context.Context) (*T, err msg := err.Error() isTimeout := timeoutRegex.MatchString(msg) if isTimeout { - log.Printf("[DEBUG] Retrying due to timeout: %s", msg) + logger.Debugf(ctx, "Retrying due to timeout: %s", msg) } return isTimeout })) @@ -23,3 +25,19 @@ func RetryOnTimeout[T any](ctx context.Context, f func(context.Context) (*T, err return f(ctx) }) } + +// RetryOn504 returns a [retries.Retrier] that calls the given method +// until it either succeeds or returns an error that is different from +// [apierr.ErrDeadlineExceeded]. +func RetryOn504[T any](ctx context.Context, f func(context.Context) (*T, error)) (*T, error) { + r := retries.New[T](retries.WithTimeout(-1), retries.WithRetryFunc(func(err error) bool { + if !errors.Is(err, apierr.ErrDeadlineExceeded) { + return false + } + logger.Debugf(ctx, "Retrying on error 504") + return true + })) + return r.Run(ctx, func(ctx context.Context) (*T, error) { + return f(ctx) + }) +} diff --git a/common/retry_test.go b/common/retry_test.go index 4ca3d160dd..152bcb7c18 100644 --- a/common/retry_test.go +++ b/common/retry_test.go @@ -5,6 +5,7 @@ import ( "errors" "testing" + "github.com/databricks/databricks-sdk-go/apierr" "github.com/databricks/databricks-sdk-go/experimental/mocks" "github.com/databricks/databricks-sdk-go/service/workspace" "github.com/stretchr/testify/assert" @@ -47,3 +48,87 @@ func TestRetryOnTimeout_NonRetriableError(t *testing.T) { }) assert.ErrorIs(t, err, expected) } + +func TestRetryOn504_noError(t *testing.T) { + wantErr := error(nil) + wantRes := (*workspace.ObjectInfo)(nil) + wantCalls := 1 + + w := mocks.NewMockWorkspaceClient(t) + api := w.GetMockWorkspaceAPI().EXPECT() + api.GetStatusByPath(mock.Anything, mock.Anything).Return(wantRes, wantErr) + + gotCalls := 0 + gotRes, gotErr := RetryOn504(context.Background(), func(ctx context.Context) (*workspace.ObjectInfo, error) { + gotCalls += 1 + return w.WorkspaceClient.Workspace.GetStatusByPath(ctx, "path") + }) + + assert.ErrorIs(t, gotErr, wantErr) + assert.Equal(t, gotRes, wantRes) + assert.Equal(t, gotCalls, wantCalls) +} + +func TestRetryOn504_errorNot504(t *testing.T) { + wantErr := errors.New("test error") + wantRes := (*workspace.ObjectInfo)(nil) + wantCalls := 1 + + w := mocks.NewMockWorkspaceClient(t) + api := w.GetMockWorkspaceAPI().EXPECT() + api.GetStatusByPath(mock.Anything, mock.Anything).Return(wantRes, wantErr) + + gotCalls := 0 + gotRes, gotErr := RetryOn504(context.Background(), func(ctx context.Context) (*workspace.ObjectInfo, error) { + gotCalls += 1 + return w.WorkspaceClient.Workspace.GetStatusByPath(ctx, "path") + }) + + assert.ErrorIs(t, gotErr, wantErr) + assert.Equal(t, gotRes, wantRes) + assert.Equal(t, gotCalls, wantCalls) +} + +func TestRetryOn504_error504ThenFail(t *testing.T) { + wantErr := errors.New("test error") + wantRes := (*workspace.ObjectInfo)(nil) + wantCalls := 2 + + w := mocks.NewMockWorkspaceClient(t) + api := w.GetMockWorkspaceAPI().EXPECT() + call := api.GetStatusByPath(mock.Anything, mock.Anything).Return(nil, apierr.ErrDeadlineExceeded) + call.Repeatability = 1 + api.GetStatusByPath(mock.Anything, mock.Anything).Return(wantRes, wantErr) + + gotCalls := 0 + gotRes, gotErr := RetryOn504(context.Background(), func(ctx context.Context) (*workspace.ObjectInfo, error) { + gotCalls++ + return w.WorkspaceClient.Workspace.GetStatusByPath(ctx, "path") + }) + + assert.ErrorIs(t, gotErr, wantErr) + assert.Equal(t, gotRes, wantRes) + assert.Equal(t, gotCalls, wantCalls) +} + +func TestRetryOn504_error504ThenSuccess(t *testing.T) { + wantErr := error(nil) + wantRes := &workspace.ObjectInfo{} + wantCalls := 2 + + w := mocks.NewMockWorkspaceClient(t) + api := w.GetMockWorkspaceAPI().EXPECT() + call := api.GetStatusByPath(mock.Anything, mock.Anything).Return(nil, apierr.ErrDeadlineExceeded) + call.Repeatability = 1 + api.GetStatusByPath(mock.Anything, mock.Anything).Return(wantRes, wantErr) + + gotCalls := 0 + gotRes, gotErr := RetryOn504(context.Background(), func(ctx context.Context) (*workspace.ObjectInfo, error) { + gotCalls++ + return w.WorkspaceClient.Workspace.GetStatusByPath(ctx, "path") + }) + + assert.ErrorIs(t, gotErr, wantErr) + assert.Equal(t, gotRes, wantRes) + assert.Equal(t, gotCalls, wantCalls) +} diff --git a/permissions/resource_permissions.go b/permissions/resource_permissions.go index 6eb138fb80..ef6f9ab31b 100644 --- a/permissions/resource_permissions.go +++ b/permissions/resource_permissions.go @@ -122,10 +122,16 @@ func (a PermissionsAPI) readRaw(objectID string, mapping resourcePermissions) (* } idParts := strings.Split(objectID, "/") id := idParts[len(idParts)-1] - permissions, err := w.Permissions.Get(a.context, iam.GetPermissionRequest{ - RequestObjectId: id, - RequestObjectType: mapping.requestObjectType, + + // TODO: This a temporary measure to implement retry on 504 until this is + // supported natively in the Go SDK. + permissions, err := common.RetryOn504(a.context, func(ctx context.Context) (*iam.ObjectPermissions, error) { + return w.Permissions.Get(a.context, iam.GetPermissionRequest{ + RequestObjectId: id, + RequestObjectType: mapping.requestObjectType, + }) }) + var apiErr *apierr.APIError // https://github.com/databricks/terraform-provider-databricks/issues/1227 // platform propagates INVALID_STATE error for auto-purged clusters in From 1cf38c5c8e2d1ea191e43989144bd38aa4d51323 Mon Sep 17 00:00:00 2001 From: Renaud Hartert Date: Fri, 27 Dec 2024 17:33:24 +0100 Subject: [PATCH 2/2] Test the resource too --- permissions/resource_permissions_test.go | 30 ++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/permissions/resource_permissions_test.go b/permissions/resource_permissions_test.go index 12b3219acf..90b6469390 100644 --- a/permissions/resource_permissions_test.go +++ b/permissions/resource_permissions_test.go @@ -484,6 +484,36 @@ func TestResourcePermissionsRead_EmptyListResultsInRemoval(t *testing.T) { }.ApplyNoError(t) } +func TestResourcePermissionsRead_EmptyListResultsInRemovalWith504Errors(t *testing.T) { + qa.ResourceFixture{ + MockWorkspaceClientFunc: func(mwc *mocks.MockWorkspaceClient) { + mwc.GetMockCurrentUserAPI().EXPECT().Me(mock.Anything).Return(&iam.User{UserName: "admin"}, nil) + + req := iam.GetPermissionRequest{ + RequestObjectId: "abc", + RequestObjectType: "clusters", + } + + // Fail 3 times with a 504 error. These should be retried + // transparently. + call := mwc.GetMockPermissionsAPI().EXPECT().Get(mock.Anything, req).Return(nil, apierr.ErrDeadlineExceeded) + call.Repeatability = 3 + + mwc.GetMockPermissionsAPI().EXPECT().Get(mock.Anything, req).Return(&iam.ObjectPermissions{ + ObjectId: "/clusters/abc", + ObjectType: "cluster", + }, nil) + }, + Resource: ResourcePermissions(), + Read: true, + Removed: true, + InstanceState: map[string]string{ + "cluster_id": "abc", + }, + ID: "/clusters/abc", + }.ApplyNoError(t) +} + func TestResourcePermissionsDelete(t *testing.T) { d, err := qa.ResourceFixture{ MockWorkspaceClientFunc: func(mwc *mocks.MockWorkspaceClient) {