diff --git a/pkg/health/health.go b/pkg/health/health.go index 5951778..8ff1669 100644 --- a/pkg/health/health.go +++ b/pkg/health/health.go @@ -110,15 +110,21 @@ func IsWorse(current, new HealthStatusCode) bool { return newIndex > currentIndex } -func GetHealthByConfigType(configType string, obj map[string]any, states ...string) HealthStatus { - switch configType { - case "AWS::ECS::Task": - return GetECSTaskHealth(obj) - } +func get(obj map[string]any, keys ...string) string { + v, _, _ := unstructured.NestedString(obj, keys...) + return strings.TrimSpace(v) +} +func isArgoHealth(s HealthStatusCode) bool { + return s == "Suspended" || s == "Degraded" || s == "Progressing" +} + +func GetHealthByConfigType(configType string, obj map[string]any, states ...string) HealthStatus { configClass := strings.Split(configType, "::")[0] switch strings.ToLower(configClass) { + case "aws": + return getAWSHealthByConfigType(configType, obj, states...) case "mongo": return GetMongoHealth(obj) case "kubernetes", "crossplane", "missioncontrol", "flux", "argo": @@ -172,37 +178,56 @@ func GetResourceHealth( Status: HealthStatusUnknown, Message: err.Error(), } - } else { - return health, nil } } - if healthOverride != nil { - health, err := healthOverride.GetResourceHealth(obj) + if health == nil && healthOverride != nil { + health, err = healthOverride.GetResourceHealth(obj) if err != nil { - health = &HealthStatus{ + return &HealthStatus{ Status: HealthStatusUnknown, Message: err.Error(), - } - return health, err - } - if health != nil { - return health, nil + }, err } } - if obj.GetDeletionTimestamp() != nil { - return &HealthStatus{ - Status: HealthStatusTerminating, - }, nil + if health == nil || + health.Status == "" || + isArgoHealth(health.Status) { + // try and get a better status from conditions + defaultHealth, err := GetDefaultHealth(obj) + if err != nil { + return &HealthStatus{ + Status: "HealthParseError", + Message: lo.Elipse(err.Error(), 500), + }, nil + } + if health == nil { + health = defaultHealth + } + if health.Status == "" { + health.Status = defaultHealth.Status + } + + if defaultHealth.Status != "" && isArgoHealth(health.Status) && !isArgoHealth(defaultHealth.Status) { + health.Status = defaultHealth.Status + } + if health.Message == "" { + health.Message = defaultHealth.Message + } } if health == nil { - return &HealthStatus{ + health = &HealthStatus{ Status: HealthStatusUnknown, Ready: true, - }, nil + } } + if obj.GetDeletionTimestamp() != nil { + health.Status = HealthStatusTerminating + health.Ready = false + } + return health, err } @@ -274,5 +299,5 @@ func GetHealthCheckFunc(gvk schema.GroupVersionKind) func(obj *unstructured.Unst return getHPAHealth } } - return GetDefaultHealth + return nil } diff --git a/pkg/health/health_aws.go b/pkg/health/health_aws.go index e1f42e4..cca6002 100644 --- a/pkg/health/health_aws.go +++ b/pkg/health/health_aws.go @@ -1,5 +1,50 @@ package health +import ( + "fmt" + "strings" +) + func GetAWSResourceHealth(_, status string) (health HealthStatus) { return GetHealthFromStatusName(status) } + +func getAWSHealthByConfigType(configType string, obj map[string]any, states ...string) HealthStatus { + switch configType { + case "AWS::ECS::Task": + return GetECSTaskHealth(obj) + case "AWS::Cloudformation::Stack": + return GetHealthFromStatusName(get(obj, "StackStatus"), get(obj, "StackStatusReason")) + case "AWS::EC2::Instance": + return GetHealthFromStatusName(get(obj, "State")) + case "AWS::RDS::DBInstance": + return GetHealthFromStatusName(get(obj, "DBInstanceStatus")) + case "AWS::ElasticLoadBalancing::LoadBalancer": + return GetHealthFromStatusName(get(obj, "State", "Code")) + case "AWS::AutoScaling::AutoScalingGroup": + return GetHealthFromStatusName(get(obj, "Status")) + case "AWS::Lambda::Function": + return GetHealthFromStatusName(get(obj, "State"), get(obj, "StateReasonCode")) + case "AWS::DynamoDB::Table": + return GetHealthFromStatusName(get(obj, "TableStatus")) + case "AWS::ElastiCache::CacheCluster": + return GetHealthFromStatusName(get(obj, "CacheClusterStatus")) + } + + if len(states) > 0 { + return GetHealthFromStatusName(states[0]) + } else { + for k, v := range obj { + _k := strings.ToLower(k) + _v := fmt.Sprintf("%s", v) + if _k == "status" || _k == "state" || + strings.HasSuffix(_k, "status") { + return GetHealthFromStatusName(_v) + } + } + } + return HealthStatus{ + Health: HealthUnknown, + } + +} diff --git a/pkg/health/health_aws_ecs.go b/pkg/health/health_aws_ecs.go index 5a89a19..1a2892f 100644 --- a/pkg/health/health_aws_ecs.go +++ b/pkg/health/health_aws_ecs.go @@ -14,14 +14,14 @@ func GetECSTaskHealth(obj map[string]any) (health HealthStatus) { } if v, ok := obj["HealthStatus"].(string); ok { - hr.Health = Health(lo.CamelCase(v)) + hr.Status = HealthStatusCode(HumanCase(v)) } - switch hr.Status { + switch strings.ToUpper(string(hr.Status)) { case "RUNNING": hr.Health = HealthHealthy hr.Ready = true - case "STOPPED", "DELETED": + case "STOPPED", "DELETED", "UNKNOWN": hr.Ready = true hr.Health = HealthUnknown } @@ -31,15 +31,16 @@ func GetECSTaskHealth(obj map[string]any) (health HealthStatus) { if stopCode != "" { hr.Status = HealthStatusCode(stopCode) } - switch stopCode { - case "TaskFailedToStart": + + switch strings.ToUpper(stopCode) { + case "TASKFAILEDTOSTART": hr.Health = HealthUnhealthy - case "EssentialContainerExited": + case "ESSENTIALCONTAINEREXITED": hr.Status = HealthStatusCrashed hr.Health = HealthUnhealthy - case "UserInitiated": + case "USERINITIATED": hr.Status = HealthStatusStopped - case "ServiceSchedulerInitiated": + case "SERVICESCHEDULERINITIATED": hr.Status = HealthStatusTerminating } @@ -52,15 +53,15 @@ func GetECSTaskHealth(obj map[string]any) (health HealthStatus) { hr.Message = strings.TrimSpace(reason[idx+1:]) } - switch hr.Status { - case "ContainerRuntimeError", "ContainerRuntimeTimeoutError", "OutOfMemoryError": + switch strings.ToUpper(string(hr.Status)) { + case "CONTAINERRUNTIMEERROR", "CONTAINERRUNTIMETIMEOUTERROR", "OUTOFMEMORYERROR": hr.Health = HealthUnhealthy - case "InternalError", "CannotCreateVolumeError", "ResourceNotFoundException", "CannotStartContainerError": + case "INTERNALERROR", "CANNOTCREATEVOLUMEERROR", "RESOURCENOTFOUNDERROR", "CANNOTSTARTCONTAINERERROR": hr.Health = HealthUnhealthy hr.Ready = true - case "SpotInterruptionError", "CannotStopContainerError", "CannotInspectContainerError": + case "SPOTINTERRUPTIONERROR", "CANNOTSTOPCONTAINERERROR", "CANNOTINSPECTCONTAINERERROR": hr.Health = HealthWarning - case "TaskFailedToStart", "ResourceInitializationError", "CannotPullContainer": + case "TASKFAILEDTOSTART", "RESOURCEINITIALIZATIONERROR", "CANNOTPULLCONTAINER": hr.Health = HealthUnhealthy default: hr.Health = HealthUnhealthy diff --git a/pkg/health/health_test.go b/pkg/health/health_test.go index f72f547..a0d66ac 100644 --- a/pkg/health/health_test.go +++ b/pkg/health/health_test.go @@ -83,9 +83,15 @@ func assertAppHealthMsg( expectedStatus health.HealthStatusCode, expectedHealth health.Health, expectedReady bool, - expectedMsg string, overrides ...string, ) { + + var expectedMsg *string + if len(overrides) > 0 { + expectedMsg = lo.ToPtr(overrides[0]) + overrides = overrides[1:] + } + m := make(map[string]string) for k, v := range defaultOverrides { m[k] = v @@ -103,35 +109,12 @@ func assertAppHealthMsg( assert.Equal(t, expectedHealth, health.Health) assert.Equal(t, expectedReady, health.Ready) assert.Equal(t, expectedStatus, health.Status) - assert.Equal(t, expectedMsg, health.Message) + if expectedMsg != nil { + assert.Equal(t, *expectedMsg, health.Message) + } }) } -func assertAppHealth( - t *testing.T, - yamlPath string, - expectedStatus health.HealthStatusCode, - expectedHealth health.Health, - expectedReady bool, - overrides ...string, -) { - m := make(map[string]string) - for k, v := range defaultOverrides { - m[k] = v - } - if len(overrides)%2 == 1 { - assert.FailNow(t, "even number of overrides") - } - for i := 0; i < len(overrides); i += 2 { - m[overrides[i]] = overrides[i+1] - } - health, _ := getHealthStatus(yamlPath, t, m) - assert.NotNil(t, health) - assert.Equal(t, expectedHealth, health.Health) - assert.Equal(t, expectedReady, health.Ready) - assert.Equal(t, expectedStatus, health.Status) -} - func assertAppHealthWithOverwriteMsg( t *testing.T, yamlPath string, @@ -238,7 +221,7 @@ func TestCrossplane(t *testing.T) { "ActivePackageRevision", health.HealthHealthy, true, - "", + "ActivePackageRevision HealthyPackageRevision", ) assertAppHealthMsg( t, @@ -259,8 +242,8 @@ func TestCrossplane(t *testing.T) { } func TestNamespace(t *testing.T) { - assertAppHealth(t, "./testdata/namespace.yaml", health.HealthStatusHealthy, health.HealthUnknown, true) - assertAppHealth( + assertAppHealthMsg(t, "./testdata/namespace.yaml", health.HealthStatusHealthy, health.HealthUnknown, true) + assertAppHealthMsg( t, "./testdata/namespace-terminating.yaml", health.HealthStatusTerminating, @@ -271,7 +254,7 @@ func TestNamespace(t *testing.T) { func TestCertificateRequest(t *testing.T) { // approved but not issued in 1h - assertAppHealth(t, "./testdata/certificate-request-approved.yaml", "Approved", health.HealthUnhealthy, false) + assertAppHealthMsg(t, "./testdata/certificate-request-approved.yaml", "Approved", health.HealthUnhealthy, false) // approved in the last 1h assertAppHealthWithOverwriteMsg(t, "./testdata/certificate-request-approved.yaml", map[string]string{ @@ -288,23 +271,23 @@ func TestCertificate(t *testing.T) { // "2024-10-28T08:05:00Z": time.Now().Add(-time.Hour * 2).Format(time.RFC3339), // }, "IncorrectIssuer", health.HealthUnhealthy, false, `Issuing certificate as Secret was previously issued by "Issuer.cert-manager.io/"`) - // assertAppHealth(t, "./testdata/certificate-expired.yaml", "Expired", health.HealthUnhealthy, true) + // assertAppHealthMsg(t, "./testdata/certificate-expired.yaml", "Expired", health.HealthUnhealthy, true) // assertAppHealthWithOverwrite(t, "./testdata/about-to-expire.yaml", map[string]string{ // "2024-06-26T12:25:46Z": time.Now().Add(time.Hour).UTC().Format("2006-01-02T15:04:05Z"), // }, health.HealthStatusWarning, health.HealthWarning, true) - assertAppHealth(t, "./testdata/certificate-healthy.yaml", "Issued", health.HealthHealthy, true) + assertAppHealthMsg(t, "./testdata/certificate-healthy.yaml", "Issued", health.HealthHealthy, true) b := "../resource_customizations/cert-manager.io/Certificate/testdata/" - assertAppHealth(t, b+"degraded_configError.yaml", "ConfigError", health.HealthUnhealthy, true) - assertAppHealth(t, b+"progressing_issuing.yaml", "Issuing", health.HealthUnknown, false) + assertAppHealthMsg(t, b+"degraded_configError.yaml", "ConfigError", health.HealthUnhealthy, true) + assertAppHealthMsg(t, b+"progressing_issuing.yaml", "Issuing", health.HealthUnknown, false) } func TestExternalSecrets(t *testing.T) { b := "../resource_customizations/external-secrets.io/ExternalSecret/testdata/" - assertAppHealth(t, b+"degraded.yaml", "SecretSyncedError", health.HealthUnhealthy, false) - assertAppHealth(t, b+"progressing.yaml", "", health.HealthUnknown, false) - assertAppHealth(t, b+"healthy.yaml", "SecretSynced", health.HealthHealthy, true) + assertAppHealthMsg(t, b+"degraded.yaml", "SecretSyncedError", health.HealthUnhealthy, true) + assertAppHealthMsg(t, b+"progressing.yaml", "Progressing", health.HealthUnknown, false) + assertAppHealthMsg(t, b+"healthy.yaml", "SecretSynced", health.HealthHealthy, true) } func TestDeploymentHealth(t *testing.T) { @@ -443,25 +426,25 @@ func TestStatefulSetOnDeleteHealth(t *testing.T) { } func TestDaemonSetOnDeleteHealth(t *testing.T) { - assertAppHealth(t, "./testdata/daemonset-ondelete.yaml", health.HealthStatusRunning, health.HealthHealthy, true) + assertAppHealthMsg(t, "./testdata/daemonset-ondelete.yaml", health.HealthStatusRunning, health.HealthHealthy, true) } func TestPVCHealth(t *testing.T) { - assertAppHealth(t, "./testdata/pvc-bound.yaml", health.HealthStatusHealthy, health.HealthHealthy, true) - assertAppHealth(t, "./testdata/pvc-pending.yaml", health.HealthStatusProgressing, health.HealthHealthy, false) + assertAppHealthMsg(t, "./testdata/pvc-bound.yaml", health.HealthStatusHealthy, health.HealthHealthy, true) + assertAppHealthMsg(t, "./testdata/pvc-pending.yaml", health.HealthStatusProgressing, health.HealthHealthy, false) } func TestServiceHealth(t *testing.T) { - assertAppHealth(t, "./testdata/svc-clusterip.yaml", health.HealthStatusUnknown, health.HealthUnknown, true) - assertAppHealth(t, "./testdata/svc-loadbalancer.yaml", health.HealthStatusRunning, health.HealthHealthy, true) - assertAppHealth( + assertAppHealthMsg(t, "./testdata/svc-clusterip.yaml", health.HealthStatusUnknown, health.HealthUnknown, true) + assertAppHealthMsg(t, "./testdata/svc-loadbalancer.yaml", health.HealthStatusRunning, health.HealthHealthy, true) + assertAppHealthMsg( t, "./testdata/svc-loadbalancer-unassigned.yaml", health.HealthStatusCreating, health.HealthUnknown, false, ) - assertAppHealth( + assertAppHealthMsg( t, "./testdata/svc-loadbalancer-nonemptylist.yaml", health.HealthStatusRunning, @@ -471,69 +454,69 @@ func TestServiceHealth(t *testing.T) { } func TestIngressHealth(t *testing.T) { - assertAppHealth(t, "./testdata/ingress.yaml", health.HealthStatusHealthy, health.HealthHealthy, true) - assertAppHealth(t, "./testdata/ingress-unassigned.yaml", health.HealthStatusPending, health.HealthHealthy, false) - assertAppHealth(t, "./testdata/ingress-nonemptylist.yaml", health.HealthStatusHealthy, health.HealthHealthy, true) + assertAppHealthMsg(t, "./testdata/ingress.yaml", health.HealthStatusHealthy, health.HealthHealthy, true) + assertAppHealthMsg(t, "./testdata/ingress-unassigned.yaml", health.HealthStatusPending, health.HealthHealthy, false) + assertAppHealthMsg(t, "./testdata/ingress-nonemptylist.yaml", health.HealthStatusHealthy, health.HealthHealthy, true) } func TestCRD(t *testing.T) { b := "../resource_customizations/serving.knative.dev/Service/testdata/" - assertAppHealth(t, "./testdata/knative-service.yaml", "", health.HealthUnknown, false) - assertAppHealth(t, b+"degraded.yaml", "RevisionFailed", health.HealthUnhealthy, false) - assertAppHealth(t, b+"healthy.yaml", "", health.HealthHealthy, true) - assertAppHealth(t, b+"progressing.yaml", "", health.HealthUnknown, false) + assertAppHealthMsg(t, "./testdata/knative-service.yaml", "Progressing", health.HealthUnknown, false) + assertAppHealthMsg(t, b+"degraded.yaml", "RevisionFailed", health.HealthUnhealthy, true) + assertAppHealthMsg(t, b+"healthy.yaml", "", health.HealthHealthy, true) + assertAppHealthMsg(t, b+"progressing.yaml", "Progressing", health.HealthUnknown, false) } func TestCnrmPubSub(t *testing.T) { b := "../resource_customizations/pubsub.cnrm.cloud.google.com/PubSubTopic/testdata/" - assertAppHealth(t, b+"dependency_not_found.yaml", "DependencyNotFound", health.HealthUnhealthy, true) - assertAppHealth(t, b+"dependency_not_ready.yaml", "DependencyNotReady", health.HealthUnknown, false) - assertAppHealth(t, b+"up_to_date.yaml", "UpToDate", health.HealthHealthy, true) - assertAppHealth(t, b+"update_failed.yaml", "UpdateFailed", health.HealthUnhealthy, true) - assertAppHealth(t, b+"update_in_progress.yaml", "", health.HealthUnknown, false) + assertAppHealthMsg(t, b+"dependency_not_found.yaml", "DependencyNotFound", health.HealthUnhealthy, true) + assertAppHealthMsg(t, b+"dependency_not_ready.yaml", "DependencyNotReady", health.HealthUnknown, false) + assertAppHealthMsg(t, b+"up_to_date.yaml", "UpToDate", health.HealthHealthy, true) + assertAppHealthMsg(t, b+"update_failed.yaml", "UpdateFailed", health.HealthUnhealthy, true) + assertAppHealthMsg(t, b+"update_in_progress.yaml", "Progressing", health.HealthUnknown, false) } func TestJob(t *testing.T) { - assertAppHealth(t, "./testdata/job-running.yaml", health.HealthStatusRunning, health.HealthHealthy, false) - assertAppHealth(t, "./testdata/job-failed.yaml", health.HealthStatusError, health.HealthUnhealthy, true) - assertAppHealth(t, "./testdata/job-succeeded.yaml", health.HealthStatusCompleted, health.HealthHealthy, true) - assertAppHealth(t, "./testdata/job-suspended.yaml", health.HealthStatusSuspended, health.HealthUnknown, false) + assertAppHealthMsg(t, "./testdata/job-running.yaml", health.HealthStatusRunning, health.HealthHealthy, false) + assertAppHealthMsg(t, "./testdata/job-failed.yaml", health.HealthStatusError, health.HealthUnhealthy, true) + assertAppHealthMsg(t, "./testdata/job-succeeded.yaml", health.HealthStatusCompleted, health.HealthHealthy, true) + assertAppHealthMsg(t, "./testdata/job-suspended.yaml", health.HealthStatusSuspended, health.HealthUnknown, false) } func TestHPA(t *testing.T) { - assertAppHealth(t, "./testdata/hpa-v2-healthy.yaml", health.HealthStatusHealthy, health.HealthHealthy, true) - assertAppHealth(t, "./testdata/hpa-v2-degraded.yaml", health.HealthStatusDegraded, health.HealthUnhealthy, false) - assertAppHealth( + assertAppHealthMsg(t, "./testdata/hpa-v2-healthy.yaml", health.HealthStatusHealthy, health.HealthHealthy, true) + assertAppHealthMsg(t, "./testdata/hpa-v2-degraded.yaml", health.HealthStatusDegraded, health.HealthUnhealthy, false) + assertAppHealthMsg( t, "./testdata/hpa-v2-progressing.yaml", health.HealthStatusProgressing, health.HealthHealthy, false, ) - assertAppHealth(t, "./testdata/hpa-v2beta2-healthy.yaml", health.HealthStatusHealthy, health.HealthHealthy, true) - assertAppHealth( + assertAppHealthMsg(t, "./testdata/hpa-v2beta2-healthy.yaml", health.HealthStatusHealthy, health.HealthHealthy, true) + assertAppHealthMsg( t, "./testdata/hpa-v2beta1-healthy-disabled.yaml", health.HealthStatusHealthy, health.HealthHealthy, true, ) - assertAppHealth(t, "./testdata/hpa-v2beta1-healthy.yaml", health.HealthStatusHealthy, health.HealthHealthy, true) - assertAppHealth(t, "./testdata/hpa-v1-degraded.yaml", health.HealthStatusDegraded, health.HealthUnhealthy, false) - assertAppHealth(t, "./testdata/hpa-v2-degraded.yaml", health.HealthStatusDegraded, health.HealthUnhealthy, false) + assertAppHealthMsg(t, "./testdata/hpa-v2beta1-healthy.yaml", health.HealthStatusHealthy, health.HealthHealthy, true) + assertAppHealthMsg(t, "./testdata/hpa-v1-degraded.yaml", health.HealthStatusDegraded, health.HealthUnhealthy, false) + assertAppHealthMsg(t, "./testdata/hpa-v2-degraded.yaml", health.HealthStatusDegraded, health.HealthUnhealthy, false) - assertAppHealth(t, "./testdata/hpa-v1-healthy.yaml", health.HealthStatusHealthy, health.HealthHealthy, true) - assertAppHealth(t, "./testdata/hpa-v1-healthy-toofew.yaml", health.HealthStatusHealthy, health.HealthHealthy, true) - assertAppHealth( + assertAppHealthMsg(t, "./testdata/hpa-v1-healthy.yaml", health.HealthStatusHealthy, health.HealthHealthy, true) + assertAppHealthMsg(t, "./testdata/hpa-v1-healthy-toofew.yaml", health.HealthStatusHealthy, health.HealthHealthy, true) + assertAppHealthMsg( t, "./testdata/hpa-v1-progressing.yaml", health.HealthStatusProgressing, health.HealthHealthy, false, ) - assertAppHealth( + assertAppHealthMsg( t, "./testdata/hpa-v1-progressing-with-no-annotations.yaml", health.HealthStatusProgressing, @@ -553,8 +536,8 @@ func TestReplicaSet(t *testing.T) { } func TestPod(t *testing.T) { - assertAppHealth(t, "./testdata/terminating-stuck.yaml", "TerminatingStalled", health.HealthWarning, false) - assertAppHealth(t, "./testdata/terminating-namespace.yaml", "TerminatingStalled", health.HealthWarning, false) + assertAppHealthMsg(t, "./testdata/terminating-stuck.yaml", "TerminatingStalled", health.HealthWarning, false) + assertAppHealthMsg(t, "./testdata/terminating-namespace.yaml", "TerminatingStalled", health.HealthWarning, false) assertAppHealthWithOverwrite(t, "./testdata/pod-terminating.yaml", map[string]string{ "2024-07-01T06:52:22Z": time.Now().Add(-time.Minute * 20).UTC().Format("2006-01-02T15:04:05Z"), @@ -569,7 +552,7 @@ func TestPod(t *testing.T) { }, health.HealthStatusStarting, health.HealthUnknown, false, "Container nginx is waiting for readiness probe") // Pod not ready - assertAppHealth( + assertAppHealthMsg( t, "./testdata/pod-not-ready-but-container-ready.yaml", health.HealthStatusRunning, @@ -578,7 +561,7 @@ func TestPod(t *testing.T) { ) // Restart Loop - assertAppHealth( + assertAppHealthMsg( t, "./testdata/pod-ready-container-terminated.yaml", health.HealthStatusRunning, @@ -611,56 +594,56 @@ func TestPod(t *testing.T) { "2024-07-17T14:29:51Z": "2024-06-17T14:29:51Z", }, health.HealthStatusRunning, health.HealthHealthy, true) - assertAppHealth(t, "./testdata/pod-old-restarts.yaml", health.HealthStatusRunning, health.HealthHealthy, true) + assertAppHealthMsg(t, "./testdata/pod-old-restarts.yaml", health.HealthStatusRunning, health.HealthHealthy, true) - assertAppHealth(t, "./testdata/pod-pending.yaml", health.HealthStatusPending, health.HealthUnknown, false) - assertAppHealth( + assertAppHealthMsg(t, "./testdata/pod-pending.yaml", health.HealthStatusPending, health.HealthUnknown, false) + assertAppHealthMsg( t, "./testdata/pod-running-not-ready.yaml", health.HealthStatusStarting, health.HealthUnknown, false, ) - assertAppHealth( + assertAppHealthMsg( t, "./testdata/pod-crashloop.yaml", health.HealthStatusCrashLoopBackoff, health.HealthUnhealthy, false, ) - assertAppHealth( + assertAppHealthMsg( t, "./testdata/pod-crashloop-pending.yaml", health.HealthStatusCrashLoopBackoff, health.HealthUnhealthy, false, ) - assertAppHealth(t, "./testdata/pod-imagepullbackoff.yaml", "ImagePullBackOff", health.HealthUnhealthy, false) - assertAppHealth(t, "./testdata/pod-error.yaml", health.HealthStatusError, health.HealthUnhealthy, true) - assertAppHealth( + assertAppHealthMsg(t, "./testdata/pod-imagepullbackoff.yaml", "ImagePullBackOff", health.HealthUnhealthy, false) + assertAppHealthMsg(t, "./testdata/pod-error.yaml", health.HealthStatusError, health.HealthUnhealthy, true) + assertAppHealthMsg( t, "./testdata/pod-running-restart-always.yaml", health.HealthStatusRunning, health.HealthHealthy, true, ) - assertAppHealth( + assertAppHealthMsg( t, "./testdata/pod-running-restart-never.yaml", health.HealthStatusRunning, health.HealthHealthy, false, ) - assertAppHealth( + assertAppHealthMsg( t, "./testdata/pod-running-restart-onfailure.yaml", health.HealthStatusRunning, health.HealthUnhealthy, false, ) - assertAppHealth(t, "./testdata/pod-failed.yaml", health.HealthStatusError, health.HealthUnhealthy, true) - assertAppHealth(t, "./testdata/pod-succeeded.yaml", health.HealthStatusCompleted, health.HealthHealthy, true) - assertAppHealth( + assertAppHealthMsg(t, "./testdata/pod-failed.yaml", health.HealthStatusError, health.HealthUnhealthy, true) + assertAppHealthMsg(t, "./testdata/pod-succeeded.yaml", health.HealthStatusCompleted, health.HealthHealthy, true) + assertAppHealthMsg( t, "./testdata/pod-init-container-fail.yaml", health.HealthStatusCrashLoopBackoff, @@ -670,10 +653,10 @@ func TestPod(t *testing.T) { } // func TestAPIService(t *testing.T) { -// assertAppHealth(t, "./testdata/apiservice-v1-true.yaml", HealthStatusHealthy, health.HealthHealthy, true) -// assertAppHealth(t, "./testdata/apiservice-v1-false.yaml", HealthStatusProgressing, health.HealthHealthy, true) -// assertAppHealth(t, "./testdata/apiservice-v1beta1-true.yaml", HealthStatusHealthy, health.HealthHealthy, true) -// assertAppHealth(t, "./testdata/apiservice-v1beta1-false.yaml", HealthStatusProgressing, health.HealthHealthy, true) +// assertAppHealthMsg(t, "./testdata/apiservice-v1-true.yaml", HealthStatusHealthy, health.HealthHealthy, true) +// assertAppHealthMsg(t, "./testdata/apiservice-v1-false.yaml", HealthStatusProgressing, health.HealthHealthy, true) +// assertAppHealthMsg(t, "./testdata/apiservice-v1beta1-true.yaml", HealthStatusHealthy, health.HealthHealthy, true) +// assertAppHealthMsg(t, "./testdata/apiservice-v1beta1-false.yaml", HealthStatusProgressing, health.HealthHealthy, true) // } func TestGetArgoWorkflowHealth(t *testing.T) { @@ -729,14 +712,14 @@ func TestGetArgoWorkflowHealth(t *testing.T) { } func TestArgoApplication(t *testing.T) { - assertAppHealth( + assertAppHealthMsg( t, "./testdata/argo-application-healthy.yaml", health.HealthStatusHealthy, health.HealthHealthy, true, ) - assertAppHealth( + assertAppHealthMsg( t, "./testdata/argo-application-missing.yaml", health.HealthStatusMissing, @@ -762,27 +745,27 @@ func TestFluxResources(t *testing.T) { false, "HelmRelease/mission-control-agent/atlas-topology dry-run failed: failed to create typed patch object (mission-control-agent/atlas-topology; helm.toolkit.fluxcd.io/v2, Kind=HelmRelease): .spec.chart.spec.targetNamespace: field not declared in schema\n", ) - assertAppHealth( + assertAppHealthMsg( t, "./testdata/flux-kustomization-healthy.yaml", "ReconciliationSucceeded", health.HealthHealthy, true, ) - assertAppHealth(t, "./testdata/flux-kustomization-unhealthy.yaml", "Progressing", health.HealthUnknown, false) - assertAppHealth(t, "./testdata/flux-kustomization-failed.yaml", "BuildFailed", health.HealthUnhealthy, false) + assertAppHealthMsg(t, "./testdata/flux-kustomization-unhealthy.yaml", "Progressing", health.HealthUnknown, false) + assertAppHealthMsg(t, "./testdata/flux-kustomization-failed.yaml", "BuildFailed", health.HealthUnhealthy, false) status, _ := getHealthStatus("./testdata/flux-kustomization-failed.yaml", t, nil) assert.Contains(t, status.Message, "err='accumulating resources from 'kubernetes_resource_ingress_fail.yaml'") - assertAppHealth( + assertAppHealthMsg( t, "./testdata/flux-helmrelease-healthy.yaml", "ReconciliationSucceeded", health.HealthHealthy, true, ) - assertAppHealth(t, "./testdata/flux-helmrelease-unhealthy.yaml", "UpgradeFailed", health.HealthUnhealthy, true) - assertAppHealth(t, "./testdata/flux-helmrelease-upgradefailed.yaml", "UpgradeFailed", health.HealthUnhealthy, true) + assertAppHealthMsg(t, "./testdata/flux-helmrelease-unhealthy.yaml", "UpgradeFailed", health.HealthUnhealthy, true) + assertAppHealthMsg(t, "./testdata/flux-helmrelease-upgradefailed.yaml", "UpgradeFailed", health.HealthUnhealthy, true) helmreleaseStatus, _ := getHealthStatus("./testdata/flux-helmrelease-upgradefailed.yaml", t, nil) assert.Contains( t, @@ -791,11 +774,11 @@ func TestFluxResources(t *testing.T) { ) assert.Equal(t, helmreleaseStatus.Status, health.HealthStatusUpgradeFailed) - assertAppHealth(t, "./testdata/flux-helmrepository-healthy.yaml", "Succeeded", health.HealthHealthy, true) - assertAppHealth(t, "./testdata/flux-helmrepository-unhealthy.yaml", "Failed", health.HealthUnhealthy, false) + assertAppHealthMsg(t, "./testdata/flux-helmrepository-healthy.yaml", "Succeeded", health.HealthHealthy, true) + assertAppHealthMsg(t, "./testdata/flux-helmrepository-unhealthy.yaml", "Failed", health.HealthUnhealthy, false) - assertAppHealth(t, "./testdata/flux-gitrepository-healthy.yaml", "Succeeded", health.HealthHealthy, true) - assertAppHealth( + assertAppHealthMsg(t, "./testdata/flux-gitrepository-healthy.yaml", "Succeeded", health.HealthHealthy, true) + assertAppHealthMsg( t, "./testdata/flux-gitrepository-unhealthy.yaml", "GitOperationFailed", diff --git a/pkg/health/statusMap.yaml b/pkg/health/statusMap.yaml index 3162340..e21c46f 100644 --- a/pkg/health/statusMap.yaml +++ b/pkg/health/statusMap.yaml @@ -44,23 +44,6 @@ CertificateRequest: status: Issuing Failed: ready: true - status: - acme: - order: - url: https://acme-v02.api.letsencrypt.org/acme/order/45250083/316944902 - conditions: - - lastTransitionTime: '2019-02-15T18:21:10Z' - message: Order validated - reason: OrderValidated - status: 'False' - type: ValidateFailed - - lastTransitionTime: null - message: Certificate issued successfully - reason: CertIssued - status: 'True' - type: Ready - - Kustomization: conditions: Ready: diff --git a/pkg/health/status_name.go b/pkg/health/status_name.go index ad633f3..113eae0 100644 --- a/pkg/health/status_name.go +++ b/pkg/health/status_name.go @@ -2,29 +2,18 @@ package health import ( "strings" - - "github.com/samber/lo" ) -func GetHealthFromStatusName(status string) (health HealthStatus) { +func GetHealthFromStatusName(status string, reasons ...string) (health HealthStatus) { if status == "" { return HealthStatus{} } - status = strings.ReplaceAll(status, "_", " ") - status = strings.ReplaceAll(status, "-", " ") - - words := lo.Words(status) - for i, word := range words { - words[i] = lo.Capitalize(word) - } hr := HealthStatus{ - Status: HealthStatusCode(strings.Join(words, " ")), + Status: HealthStatusCode(HumanCase(status)), } - status = strings.ToLower(status) - - switch status { + switch strings.ToLower(string(hr.Status)) { case "update complete cleanup in progress", "update in progress", "updating", @@ -72,5 +61,12 @@ func GetHealthFromStatusName(status string) (health HealthStatus) { } } + for _, v := range reasons { + if v != "" { + hr.Message = v + break + } + } + return hr } diff --git a/pkg/health/testdata/AWS/Cloudformation/Stack/healthy.yaml b/pkg/health/testdata/AWS/Cloudformation/Stack/healthy.yaml index 58faa73..5202189 100644 --- a/pkg/health/testdata/AWS/Cloudformation/Stack/healthy.yaml +++ b/pkg/health/testdata/AWS/Cloudformation/Stack/healthy.yaml @@ -1,3 +1,5 @@ +annotations: + expected-status: Create Complete CreationTime: "2023-05-09T03:59:08.629Z" DriftInformation: StackDriftStatus: NOT_CHECKED diff --git a/pkg/health/testdata/AWS/Cloudformation/Stack/unhealthy.yaml b/pkg/health/testdata/AWS/Cloudformation/Stack/unhealthy.yaml index 69d7361..d5fcc78 100644 --- a/pkg/health/testdata/AWS/Cloudformation/Stack/unhealthy.yaml +++ b/pkg/health/testdata/AWS/Cloudformation/Stack/unhealthy.yaml @@ -1,5 +1,6 @@ annotations: - message: 'The following resource(s) failed to create: [ECSCluster].' + expected-message: 'The following resource(s) failed to create: [ECSCluster].' + expected-status: Create Failed CreationTime: 2024-06-27T14:23:12.654Z DriftInformation: StackDriftStatus: NOT_CHECKED diff --git a/pkg/health/testdata/AWS/ECS/Task/healthy.yaml b/pkg/health/testdata/AWS/ECS/Task/healthy.yaml new file mode 100644 index 0000000..abc1a97 --- /dev/null +++ b/pkg/health/testdata/AWS/ECS/Task/healthy.yaml @@ -0,0 +1,74 @@ +annotations: + expected-status: Running +Attachments: + - Id: beb2fa58-3e15-4877-b097-b8e0a4c91f04 + Type: ElasticNetworkInterface + Status: ATTACHED + Details: + - Name: subnetId + Value: subnet-099e59970db9eeb65 + - Name: networkInterfaceId + Value: eni-099e55471b484321b + - Name: macAddress + Value: 0a:f5:69:ba:ad:53 + - Name: privateDnsName + Value: ip-172-31-44-167.eu-west-1.compute.internal + - Name: privateIPv4Address + Value: 172.31.44.167 +Attributes: + - Name: ecs.cpu-architecture + Value: x86_64 + TargetType: "" +AvailabilityZone: eu-west-1b +ClusterArn: arn:aws:ecs:eu-west-1:123456789023:cluster/demo-dev-cluster +Connectivity: CONNECTED +ConnectivityAt: 2024-11-01T17:50:30.827Z +Containers: + - Cpu: "0" + Name: demo-another-container + Image: httpd:latest + GpuIds: [] + TaskArn: arn:aws:ecs:eu-west-1:123456789023:task/demo-dev-cluster/d8f8db0d67e34774935b2fbbc82961a3 + RuntimeId: d8f8db0d67e34774935b2fbbc82961a3-1560160490 + LastStatus: RUNNING + ImageDigest: sha256:bbea29057f25d9543e6a96a8e3cc7c7c937206d20eab2323f478fdb2469d536d + ContainerArn: arn:aws:ecs:eu-west-1:123456789023:container/demo-dev-cluster/d8f8db0d67e34774935b2fbbc82961a3/edf9b73b-b387-480e-954f-2f8dcd531a70 + HealthStatus: UNKNOWN + ManagedAgents: [] + NetworkBindings: [] + NetworkInterfaces: + - AttachmentId: beb2fa58-3e15-4877-b097-b8e0a4c91f04 + PrivateIpv4Address: 172.31.44.167 +Cpu: "512" +CreatedAt: 2024-11-01T17:50:27.075Z +DesiredStatus: RUNNING +EnableExecuteCommand: false +EphemeralStorage: + SizeInGiB: 20 +FargateEphemeralStorage: + SizeInGiB: 20 +Group: service:httpd-service +HealthStatus: RUNNING +InferenceAccelerators: [] +LastStatus: RUNNING +LaunchType: FARGATE +Memory: "1024" +Overrides: + ContainerOverrides: + - Name: demo-another-container + Command: [] + Environment: [] + EnvironmentFiles: [] + ResourceRequirements: [] + InferenceAcceleratorOverrides: [] +PlatformFamily: Linux +PlatformVersion: 1.4.0 +PullStartedAt: 2024-11-01T17:50:37.967Z +PullStoppedAt: 2024-11-01T17:50:44.584Z +StartedAt: 2024-11-01T17:50:44.935Z +StartedBy: ecs-svc/4462357315744281090 +StopCode: "" +Tags: [] +TaskArn: arn:aws:ecs:eu-west-1:123456789023:task/demo-dev-cluster/d8f8db0d67e34774935b2fbbc82961a3 +TaskDefinitionArn: arn:aws:ecs:eu-west-1:123456789023:task-definition/demo-httpd-task:1 +Version: 4 diff --git a/pkg/health/testdata/AWS/EKS/healthy.yaml b/pkg/health/testdata/AWS/EKS/healthy.yaml new file mode 100644 index 0000000..327aeb9 --- /dev/null +++ b/pkg/health/testdata/AWS/EKS/healthy.yaml @@ -0,0 +1,58 @@ +annotations: + expected-status: Active +AccessConfig: + AuthenticationMode: API_AND_CONFIG_MAP +Arn: arn:aws:eks:eu-west-1:123456789012:cluster/mission-control-demo-cluster +CertificateAuthority: + +CreatedAt: 2023-05-09T03:48:10.68Z +EncryptionConfig: [] +Endpoint: https://5E144B931E5FDA8DC41D80BC8F05AC7B.sk1.eu-west-1.eks.amazonaws.com +Health: + Issues: [] +Identity: + Oidc: + Issuer: https://oidc.eks.eu-west-1.amazonaws.com/id/5E144B931E5FDA8DC41D80BC8F05AC7B +KubernetesNetworkConfig: + IpFamily: ipv4 + ServiceIpv4Cidr: 172.20.0.0/16 +Logging: + ClusterLogging: + - Types: + - api + - audit + - authenticator + - controllerManager + - scheduler + Enabled: false +Name: mission-control-demo-cluster +PlatformVersion: eks.8 +ResourcesVpcConfig: + VpcId: vpc-09c963a9231399689 + SubnetIds: + - subnet-0e33965610ced8995 + - subnet-017dfefe7e6f269f2 + - subnet-09cba264d3549e8c0 + - subnet-0ae3c610ed5155c39 + - subnet-02a8a8128859f04ad + - subnet-03977c8ae22ce460a + SecurityGroupIds: + - sg-08cc59940602df0fe + PublicAccessCidrs: + - 0.0.0.0/0 + EndpointPublicAccess: true + EndpointPrivateAccess: false + ClusterSecurityGroupId: sg-0fef3b0123b019c30 +RoleArn: arn:aws:iam::123456789012:role/eksctl-mission-control-demo-cluster-cl-ServiceRole-JFHLKKM3MPEE +Status: ACTIVE +Tags: + region: eu-west-1 + account: "123456789012" + aws:cloudformation:stack-id: arn:aws:cloudformation:eu-west-1:123456789012:stack/eksctl-mission-control-demo-cluster-cluster/42e51dc0-ee1c-11ed-a71f-0a5f2673308f + alpha.eksctl.io/cluster-name: mission-control-demo-cluster + aws:cloudformation:logical-id: ControlPlane + aws:cloudformation:stack-name: eksctl-mission-control-demo-cluster-cluster + alpha.eksctl.io/eksctl-version: 0.140.0 + alpha.eksctl.io/cluster-oidc-enabled: "true" + eksctl.cluster.k8s.io/v1alpha1/cluster-name: mission-control-demo-cluster +Version: "1.30" diff --git a/pkg/health/testdata/Kubernetes/Prometheus/healthy.yaml b/pkg/health/testdata/Kubernetes/Prometheus/healthy.yaml new file mode 100644 index 0000000..09ba1a3 --- /dev/null +++ b/pkg/health/testdata/Kubernetes/Prometheus/healthy.yaml @@ -0,0 +1,91 @@ +apiVersion: monitoring.coreos.com/v1 +kind: Prometheus +metadata: + uid: c41090c9-10dd-4a56-8a37-182d20e6ccac + name: kube-prometheus-stack-prometheus + labels: + app: kube-prometheus-stack-prometheus + chart: kube-prometheus-stack-44.3.0 + release: kube-prometheus-stack + heritage: Helm + app.kubernetes.io/part-of: kube-prometheus-stack + app.kubernetes.io/version: 44.3.0 + app.kubernetes.io/instance: kube-prometheus-stack + helm.toolkit.fluxcd.io/name: kube-prometheus-stack + app.kubernetes.io/managed-by: Helm + helm.toolkit.fluxcd.io/namespace: monitoring + namespace: monitoring + annotations: + meta.helm.sh/release-name: kube-prometheus-stack + meta.helm.sh/release-namespace: monitoring + creationTimestamp: 2023-05-22T13:13:39Z +spec: + image: quay.io/prometheus/prometheus:v2.41.0 + paused: false + shards: 1 + thanos: + objectStorageConfig: + key: config.yaml + name: thanos-config + optional: false + storage: + volumeClaimTemplate: + spec: + resources: + requests: + storage: 60Gi + accessModes: + - ReadWriteOnce + storageClassName: gp2 + version: v2.41.0 + alerting: + alertmanagers: + - name: kube-prometheus-stack-alertmanager + port: http-web + namespace: monitoring + apiVersion: v2 + pathPrefix: / + logLevel: info + portName: http-web + replicas: 1 + logFormat: logfmt + retention: 30d + externalUrl: http://prometheus.demo.aws.flanksource.com/ + hostNetwork: false + listenLocal: false + routePrefix: / + ruleSelector: {} + probeSelector: {} + enableAdminAPI: false + scrapeInterval: 30s + walCompression: true + securityContext: + fsGroup: 2000 + runAsUser: 1000 + runAsGroup: 2000 + runAsNonRoot: true + evaluationInterval: 30s + podMonitorSelector: {} + serviceAccountName: kube-prometheus-stack-prometheus + ruleNamespaceSelector: {} + probeNamespaceSelector: {} + serviceMonitorSelector: {} + podMonitorNamespaceSelector: {} + serviceMonitorNamespaceSelector: {} +status: + paused: false + replicas: 1 + conditions: + - type: Available + status: "True" + - type: Reconciled + status: "True" + shardStatuses: + - shardID: "0" + replicas: 1 + updatedReplicas: 1 + availableReplicas: 1 + unavailableReplicas: 0 + updatedReplicas: 1 + availableReplicas: 1 + unavailableReplicas: 0 diff --git a/pkg/health/testdata/Kubernetes/RoleBinding/unknown.yaml b/pkg/health/testdata/Kubernetes/RoleBinding/unknown.yaml new file mode 100644 index 0000000..7ebfd02 --- /dev/null +++ b/pkg/health/testdata/Kubernetes/RoleBinding/unknown.yaml @@ -0,0 +1,28 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + uid: ec37ec40-9121-477d-8e90-88ea0bf99d1d + name: argo-argocd-notifications-controller + labels: + helm.sh/chart: argo-cd-7.7.3 + app.kubernetes.io/name: argocd-notifications-controller + app.kubernetes.io/part-of: argocd + app.kubernetes.io/version: v2.13.0 + app.kubernetes.io/instance: argo + app.kubernetes.io/component: notifications-controller + helm.toolkit.fluxcd.io/name: argo + app.kubernetes.io/managed-by: Helm + helm.toolkit.fluxcd.io/namespace: argo + namespace: argo + annotations: + meta.helm.sh/release-name: argo + meta.helm.sh/release-namespace: argo + creationTimestamp: 2023-10-19T13:44:27Z +roleRef: + kind: Role + name: argo-argocd-notifications-controller + apiGroup: rbac.authorization.k8s.io +subjects: + - kind: ServiceAccount + name: argocd-notifications-controller + namespace: argo diff --git a/pkg/health/utils.go b/pkg/health/utils.go index 63e7e5b..71b2eb2 100644 --- a/pkg/health/utils.go +++ b/pkg/health/utils.go @@ -2,8 +2,10 @@ package health import ( "fmt" + "strings" "time" + "github.com/samber/lo" "k8s.io/apimachinery/pkg/util/json" corev1 "k8s.io/api/core/v1" @@ -160,3 +162,14 @@ func GetStartDeadline(containers ...corev1.Container) time.Duration { func IsContainerStarting(creation time.Time, containers ...corev1.Container) bool { return time.Since(creation) < GetStartDeadline(containers...) } + +func HumanCase(s string) string { + s = strings.ReplaceAll(s, "_", " ") + s = strings.ReplaceAll(s, "-", " ") + s = strings.ReplaceAll(s, "([A-Z])", " $1") + items := strings.Split(strings.TrimSpace(strings.ToLower(s)), " ") + for i := range items { + items[i] = lo.Capitalize(items[i]) + } + return strings.Join(items, " ") +}