From bb263a3beb30fa468f76a24710c61dde15bf1405 Mon Sep 17 00:00:00 2001 From: Assaf Admi <90143867+assafad@users.noreply.github.com> Date: Sun, 1 Jan 2023 08:35:03 +0200 Subject: [PATCH] Add `operator_health_impact` label to CNAO alerts (#1494) Signed-off-by: assafad Signed-off-by: assafad --- data/monitoring/prom-rule.yaml | 4 ++++ hack/prom-rule-ci/prom-rules-tests.yaml | 4 ++++ test/e2e/monitoring/rules_test.go | 7 +++++++ 3 files changed, 15 insertions(+) diff --git a/data/monitoring/prom-rule.yaml b/data/monitoring/prom-rule.yaml index 1d9b68dd2..b9618b61f 100644 --- a/data/monitoring/prom-rule.yaml +++ b/data/monitoring/prom-rule.yaml @@ -20,6 +20,7 @@ spec: for: 5m labels: severity: warning + operator_health_impact: warning kubernetes_operator_part_of: kubevirt kubernetes_operator_component: cluster-network-addons-operator - alert: NetworkAddonsConfigNotReady @@ -30,6 +31,7 @@ spec: for: 5m labels: severity: warning + operator_health_impact: warning kubernetes_operator_part_of: kubevirt kubernetes_operator_component: cluster-network-addons-operator # +help:summary="Total count of duplicate KubeMacPool MAC addresses",type=Gauge @@ -43,6 +45,7 @@ spec: for: 5m labels: severity: warning + operator_health_impact: warning kubernetes_operator_part_of: kubevirt kubernetes_operator_component: cluster-network-addons-operator # +help:summary="Total count of running KubeMacPool manager pods",type=Gauge @@ -59,5 +62,6 @@ spec: for: 5m labels: severity: critical + operator_health_impact: critical kubernetes_operator_part_of: kubevirt kubernetes_operator_component: cluster-network-addons-operator diff --git a/hack/prom-rule-ci/prom-rules-tests.yaml b/hack/prom-rule-ci/prom-rules-tests.yaml index 124ef0a4a..34d2c4bec 100644 --- a/hack/prom-rule-ci/prom-rules-tests.yaml +++ b/hack/prom-rule-ci/prom-rules-tests.yaml @@ -21,6 +21,7 @@ tests: runbook_url: "https://kubevirt.io/monitoring/runbooks/CnaoDown" exp_labels: severity: "warning" + operator_health_impact: "warning" kubernetes_operator_part_of: "kubevirt" kubernetes_operator_component: "cluster-network-addons-operator" # CnaoDown negative tests @@ -53,6 +54,7 @@ tests: runbook_url: "https://kubevirt.io/monitoring/runbooks/NetworkAddonsConfigNotReady" exp_labels: severity: "warning" + operator_health_impact: "warning" kubernetes_operator_part_of: "kubevirt" kubernetes_operator_component: "cluster-network-addons-operator" @@ -88,6 +90,7 @@ tests: runbook_url: "https://kubevirt.io/monitoring/runbooks/KubeMacPoolDuplicateMacsFound" exp_labels: severity: "warning" + operator_health_impact: "warning" kubernetes_operator_part_of: "kubevirt" kubernetes_operator_component: "cluster-network-addons-operator" @@ -121,6 +124,7 @@ tests: runbook_url: "https://kubevirt.io/monitoring/runbooks/KubeMacPoolDown" exp_labels: severity: "critical" + operator_health_impact: "critical" kubernetes_operator_part_of: "kubevirt" kubernetes_operator_component: "cluster-network-addons-operator" diff --git a/test/e2e/monitoring/rules_test.go b/test/e2e/monitoring/rules_test.go index 4adae9d04..106ffeed3 100644 --- a/test/e2e/monitoring/rules_test.go +++ b/test/e2e/monitoring/rules_test.go @@ -59,6 +59,7 @@ var _ = Context("Prometheus Rules", func() { if len(rule.Alert) > 0 { Expect(rule.Labels).ToNot(BeNil()) checkForSeverityLabel(rule) + checkForHealthImpactLabel(rule) checkForPartOfLabel(rule) checkForComponentLabel(rule) } @@ -97,6 +98,12 @@ func checkForSeverityLabel(rule monitoringv1.Rule) { ExpectWithOffset(1, severity).To(BeElementOf("info", "warning", "critical"), fmt.Sprintf("%s severity label is not valid", rule.Alert)) } +func checkForHealthImpactLabel(rule monitoringv1.Rule) { + operatorHealthImpact, ok := rule.Labels["operator_health_impact"] + ExpectWithOffset(1, ok).To(BeTrue(), fmt.Sprintf("%s does not have operator_health_impact label", rule.Alert)) + ExpectWithOffset(1, operatorHealthImpact).To(BeElementOf("none", "warning", "critical"), fmt.Sprintf("%s operator_health_impact label is not valid", rule.Alert)) +} + func checkForPartOfLabel(rule monitoringv1.Rule) { kubernetesOperatorPartOf, ok := rule.Labels["kubernetes_operator_part_of"] ExpectWithOffset(1, ok).To(BeTrue(), fmt.Sprintf("%s does not have kubernetes_operator_part_of label", rule.Alert))