From 8f38753be2d050560d6f52573bf920ce91293357 Mon Sep 17 00:00:00 2001 From: Pranshu Srivastava Date: Tue, 10 Jan 2023 16:48:50 +0530 Subject: [PATCH] Use kube-scheduler's metrics instead of kube-state-metrics Use kube-scheduler's metrics instead of kube-state-metrics, as they are more precise. Refer the links below for more details. * https://github.com/kubernetes/enhancements/tree/master/keps/sig-instrumentation/1748-pod-resource-metrics?rgh-link-date=2022-10-07T13%3A34%3A39Z#expose-new-metrics * https://github.com/kubernetes/kube-state-metrics/pull/1849 --- alerts/resource_alerts.libsonnet | 20 +- dashboards/resources/cluster.libsonnet | 24 +- dashboards/resources/multi-cluster.libsonnet | 6 +- dashboards/resources/namespace.libsonnet | 24 +- dashboards/resources/node.libsonnet | 16 +- dashboards/resources/pod.libsonnet | 2 +- .../resources/workload-namespace.libsonnet | 2 +- dashboards/resources/workload.libsonnet | 2 +- rules/apps.libsonnet | 82 +-- rules/windows.libsonnet | 68 +-- tests.yaml | 537 ++++++++---------- 11 files changed, 374 insertions(+), 409 deletions(-) diff --git a/alerts/resource_alerts.libsonnet b/alerts/resource_alerts.libsonnet index 04a8e3a00..ffdcfd53f 100644 --- a/alerts/resource_alerts.libsonnet +++ b/alerts/resource_alerts.libsonnet @@ -34,18 +34,18 @@ } + if $._config.showMultiCluster then { expr: ||| - sum(namespace_cpu:kube_pod_container_resource_requests:sum{%(ignoringOverprovisionedWorkloadSelector)s}) by (%(clusterLabel)s) - (sum(kube_node_status_allocatable{%(kubeStateMetricsSelector)s,resource="cpu"}) by (%(clusterLabel)s) - max(kube_node_status_allocatable{%(kubeStateMetricsSelector)s,resource="cpu"}) by (%(clusterLabel)s)) > 0 + sum(namespace_cpu:kube_pod_resource_request_or_kube_pod_container_resource_requests:sum{%(ignoringOverprovisionedWorkloadSelector)s}) by (%(clusterLabel)s) - (sum(kube_node_status_allocatable{%(kubeStateMetricsSelector)s,resource="cpu"}) by (%(clusterLabel)s) - max(kube_node_status_allocatable{%(kubeStateMetricsSelector)s,resource="cpu"}) by (%(clusterLabel)s)) > 0 and - (sum(kube_node_status_allocatable{%(kubeStateMetricsSelector)s,resource="cpu"}) by (%(clusterLabel)s) - max(kube_node_status_allocatable{%(kubeStateMetricsSelector)s,resource="cpu"}) by (%(clusterLabel)s)) > 0 + (sum(kube_node_status_allocatable{resource="cpu"}) by (%(clusterLabel)s) - max(kube_node_status_allocatable{resource="cpu"}) by (%(clusterLabel)s)) > 0 ||| % $._config, annotations+: { description: 'Cluster {{ $labels.%(clusterLabel)s }} has overcommitted CPU resource requests for Pods by {{ $value }} CPU shares and cannot tolerate node failure.' % $._config, }, } else { expr: ||| - sum(namespace_cpu:kube_pod_container_resource_requests:sum{%(ignoringOverprovisionedWorkloadSelector)s}) - (sum(kube_node_status_allocatable{resource="cpu", %(kubeStateMetricsSelector)s}) - max(kube_node_status_allocatable{resource="cpu", %(kubeStateMetricsSelector)s})) > 0 + sum(namespace_cpu:kube_pod_resource_request_or_kube_pod_container_resource_requests:sum{%(ignoringOverprovisionedWorkloadSelector)s}) - (sum(kube_node_status_allocatable{resource="cpu"}) - max(kube_node_status_allocatable{resource="cpu"})) > 0 and - (sum(kube_node_status_allocatable{resource="cpu", %(kubeStateMetricsSelector)s}) - max(kube_node_status_allocatable{resource="cpu", %(kubeStateMetricsSelector)s})) > 0 + (sum(kube_node_status_allocatable{resource="cpu"}) - max(kube_node_status_allocatable{resource="cpu"})) > 0 ||| % $._config, annotations+: { description: 'Cluster has overcommitted CPU resource requests for Pods by {{ $value }} CPU shares and cannot tolerate node failure.' % $._config, @@ -63,9 +63,9 @@ } + if $._config.showMultiCluster then { expr: ||| - sum(namespace_memory:kube_pod_container_resource_requests:sum{%(ignoringOverprovisionedWorkloadSelector)s}) by (%(clusterLabel)s) - (sum(kube_node_status_allocatable{resource="memory", %(kubeStateMetricsSelector)s}) by (%(clusterLabel)s) - max(kube_node_status_allocatable{resource="memory", %(kubeStateMetricsSelector)s}) by (%(clusterLabel)s)) > 0 + sum(namespace_memory:kube_pod_resource_request_or_kube_pod_container_resource_requests:sum{%(ignoringOverprovisionedWorkloadSelector)s}) by (%(clusterLabel)s) - (sum(kube_node_status_allocatable{resource="memory"}) by (%(clusterLabel)s) - max(kube_node_status_allocatable{resource="memory"}) by (%(clusterLabel)s)) > 0 and - (sum(kube_node_status_allocatable{resource="memory", %(kubeStateMetricsSelector)s}) by (%(clusterLabel)s) - max(kube_node_status_allocatable{resource="memory", %(kubeStateMetricsSelector)s}) by (%(clusterLabel)s)) > 0 + (sum(kube_node_status_allocatable{resource="memory"}) by (%(clusterLabel)s) - max(kube_node_status_allocatable{resource="memory"}) by (%(clusterLabel)s)) > 0 ||| % $._config, annotations+: { description: 'Cluster {{ $labels.%(clusterLabel)s }} has overcommitted memory resource requests for Pods by {{ $value | humanize }} bytes and cannot tolerate node failure.' % $._config, @@ -73,9 +73,9 @@ } else { expr: ||| - sum(namespace_memory:kube_pod_container_resource_requests:sum{%(ignoringOverprovisionedWorkloadSelector)s}) - (sum(kube_node_status_allocatable{resource="memory", %(kubeStateMetricsSelector)s}) - max(kube_node_status_allocatable{resource="memory", %(kubeStateMetricsSelector)s})) > 0 + sum(namespace_memory:kube_pod_resource_request_or_kube_pod_container_resource_requests:sum{%(ignoringOverprovisionedWorkloadSelector)s}) - (sum(kube_node_status_allocatable{resource="memory"}) - max(kube_node_status_allocatable{resource="memory"})) > 0 and - (sum(kube_node_status_allocatable{resource="memory", %(kubeStateMetricsSelector)s}) - max(kube_node_status_allocatable{resource="memory", %(kubeStateMetricsSelector)s})) > 0 + (sum(kube_node_status_allocatable{resource="memory"}) - max(kube_node_status_allocatable{resource="memory"})) > 0 ||| % $._config, annotations+: { description: 'Cluster has overcommitted memory resource requests for Pods by {{ $value | humanize }} bytes and cannot tolerate node failure.', @@ -199,9 +199,9 @@ { alert: 'CPUThrottlingHigh', expr: ||| - sum(increase(container_cpu_cfs_throttled_periods_total{container!="", %(cpuThrottlingSelector)s}[5m])) by (%(clusterLabel)s, container, pod, namespace) + sum(increase(container_cpu_cfs_throttled_periods_total{container!="", %(cpuThrottlingSelector)s}[5m])) by (container, pod, namespace) / - sum(increase(container_cpu_cfs_periods_total{%(cpuThrottlingSelector)s}[5m])) by (%(clusterLabel)s, container, pod, namespace) + sum(increase(container_cpu_cfs_periods_total{%(cpuThrottlingSelector)s}[5m])) by (container, pod, namespace) > ( %(cpuThrottlingPercent)s / 100 ) ||| % $._config, 'for': '15m', diff --git a/dashboards/resources/cluster.libsonnet b/dashboards/resources/cluster.libsonnet index 9f83c905e..0fb9a44da 100644 --- a/dashboards/resources/cluster.libsonnet +++ b/dashboards/resources/cluster.libsonnet @@ -88,7 +88,7 @@ local var = g.dashboard.variable; statPanel( 'CPU Requests Commitment', 'percentunit', - 'sum(namespace_cpu:kube_pod_container_resource_requests:sum{%(clusterLabel)s="$cluster"}) / sum(kube_node_status_allocatable{%(kubeStateMetricsSelector)s,resource="cpu",%(clusterLabel)s="$cluster"})' % $._config + 'sum(namespace_cpu:kube_pod_resource_request_or_kube_pod_container_resource_requests:sum{%(clusterLabel)s="$cluster"}) / sum(kube_node_status_allocatable{%(kubeStateMetricsSelector)s,resource="cpu",%(clusterLabel)s="$cluster"})' % $._config ) + stat.gridPos.withW(4) + stat.gridPos.withH(3), @@ -96,7 +96,7 @@ local var = g.dashboard.variable; statPanel( 'CPU Limits Commitment', 'percentunit', - 'sum(namespace_cpu:kube_pod_container_resource_limits:sum{%(clusterLabel)s="$cluster"}) / sum(kube_node_status_allocatable{%(kubeStateMetricsSelector)s,resource="cpu",%(clusterLabel)s="$cluster"})' % $._config + 'sum(namespace_cpu:kube_pod_resource_limit_or_kube_pod_container_resource_limits:sum{%(clusterLabel)s="$cluster"}) / sum(kube_node_status_allocatable{%(kubeStateMetricsSelector)s,resource="cpu",%(clusterLabel)s="$cluster"})' % $._config ) + stat.gridPos.withW(4) + stat.gridPos.withH(3), @@ -112,7 +112,7 @@ local var = g.dashboard.variable; statPanel( 'Memory Requests Commitment', 'percentunit', - 'sum(namespace_memory:kube_pod_container_resource_requests:sum{%(clusterLabel)s="$cluster"}) / sum(kube_node_status_allocatable{%(kubeStateMetricsSelector)s,resource="memory",%(clusterLabel)s="$cluster"})' % $._config + 'sum(namespace_memory:kube_pod_resource_request_or_kube_pod_container_resource_requests:sum{%(clusterLabel)s="$cluster"}) / sum(kube_node_status_allocatable{%(kubeStateMetricsSelector)s,resource="memory",%(clusterLabel)s="$cluster"})' % $._config ) + stat.gridPos.withW(4) + stat.gridPos.withH(3), @@ -120,7 +120,7 @@ local var = g.dashboard.variable; statPanel( 'Memory Limits Commitment', 'percentunit', - 'sum(namespace_memory:kube_pod_container_resource_limits:sum{%(clusterLabel)s="$cluster"}) / sum(kube_node_status_allocatable{%(kubeStateMetricsSelector)s,resource="memory",%(clusterLabel)s="$cluster"})' % $._config + 'sum(namespace_memory:kube_pod_resource_limit_or_kube_pod_container_resource_limits:sum{%(clusterLabel)s="$cluster"}) / sum(kube_node_status_allocatable{%(kubeStateMetricsSelector)s,resource="memory",%(clusterLabel)s="$cluster"})' % $._config ) + stat.gridPos.withW(4) + stat.gridPos.withH(3), @@ -148,19 +148,19 @@ local var = g.dashboard.variable; + prometheus.withInstant(true) + prometheus.withFormat('table'), - prometheus.new('${datasource}', 'sum(namespace_cpu:kube_pod_container_resource_requests:sum{%(clusterLabel)s="$cluster"}) by (namespace)' % $._config) + prometheus.new('${datasource}', 'sum(namespace_cpu:kube_pod_resource_request_or_kube_pod_container_resource_requests:sum{%(clusterLabel)s="$cluster"}) by (namespace)' % $._config) + prometheus.withInstant(true) + prometheus.withFormat('table'), - prometheus.new('${datasource}', 'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{%(clusterLabel)s="$cluster"}) by (namespace) / sum(namespace_cpu:kube_pod_container_resource_requests:sum{%(clusterLabel)s="$cluster"}) by (namespace)' % $._config) + prometheus.new('${datasource}', 'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{%(clusterLabel)s="$cluster"}) by (namespace) / sum(namespace_cpu:kube_pod_resource_request_or_kube_pod_container_resource_requests:sum{%(clusterLabel)s="$cluster"}) by (namespace)' % $._config) + prometheus.withInstant(true) + prometheus.withFormat('table'), - prometheus.new('${datasource}', 'sum(namespace_cpu:kube_pod_container_resource_limits:sum{%(clusterLabel)s="$cluster"}) by (namespace)' % $._config) + prometheus.new('${datasource}', 'sum(namespace_cpu:kube_pod_resource_limit_or_kube_pod_container_resource_limits:sum{%(clusterLabel)s="$cluster"}) by (namespace)' % $._config) + prometheus.withInstant(true) + prometheus.withFormat('table'), - prometheus.new('${datasource}', 'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{%(clusterLabel)s="$cluster"}) by (namespace) / sum(namespace_cpu:kube_pod_container_resource_limits:sum{%(clusterLabel)s="$cluster"}) by (namespace)' % $._config) + prometheus.new('${datasource}', 'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{%(clusterLabel)s="$cluster"}) by (namespace) / sum(namespace_cpu:kube_pod_resource_limit_or_kube_pod_container_resource_limits:sum{%(clusterLabel)s="$cluster"}) by (namespace)' % $._config) + prometheus.withInstant(true) + prometheus.withFormat('table'), ]) @@ -265,19 +265,19 @@ local var = g.dashboard.variable; + prometheus.withInstant(true) + prometheus.withFormat('table'), - prometheus.new('${datasource}', 'sum(namespace_memory:kube_pod_container_resource_requests:sum{%(clusterLabel)s="$cluster"}) by (namespace)' % $._config) + prometheus.new('${datasource}', 'sum(namespace_memory:kube_pod_resource_request_or_kube_pod_container_resource_requests:sum{%(clusterLabel)s="$cluster"}) by (namespace)' % $._config) + prometheus.withInstant(true) + prometheus.withFormat('table'), - prometheus.new('${datasource}', 'sum(container_memory_rss{%(cadvisorSelector)s, %(clusterLabel)s="$cluster", container!=""}) by (namespace) / sum(namespace_memory:kube_pod_container_resource_requests:sum{%(clusterLabel)s="$cluster"}) by (namespace)' % $._config) + prometheus.new('${datasource}', 'sum(container_memory_rss{%(cadvisorSelector)s, %(clusterLabel)s="$cluster", container!=""}) by (namespace) / sum(namespace_memory:kube_pod_resource_request_or_kube_pod_container_resource_requests:sum{%(clusterLabel)s="$cluster"}) by (namespace)' % $._config) + prometheus.withInstant(true) + prometheus.withFormat('table'), - prometheus.new('${datasource}', 'sum(namespace_memory:kube_pod_container_resource_limits:sum{%(clusterLabel)s="$cluster"}) by (namespace)' % $._config) + prometheus.new('${datasource}', 'sum(namespace_memory:kube_pod_resource_limit_or_kube_pod_container_resource_limits:sum{%(clusterLabel)s="$cluster"}) by (namespace)' % $._config) + prometheus.withInstant(true) + prometheus.withFormat('table'), - prometheus.new('${datasource}', 'sum(container_memory_rss{%(cadvisorSelector)s, %(clusterLabel)s="$cluster", container!=""}) by (namespace) / sum(namespace_memory:kube_pod_container_resource_limits:sum{%(clusterLabel)s="$cluster"}) by (namespace)' % $._config) + prometheus.new('${datasource}', 'sum(container_memory_rss{%(cadvisorSelector)s, %(clusterLabel)s="$cluster", container!=""}) by (namespace) / sum(namespace_memory:kube_pod_resource_limit_or_kube_pod_container_resource_limits:sum{%(clusterLabel)s="$cluster"}) by (namespace)' % $._config) + prometheus.withInstant(true) + prometheus.withFormat('table'), ]) diff --git a/dashboards/resources/multi-cluster.libsonnet b/dashboards/resources/multi-cluster.libsonnet index 0043f1613..70145473e 100644 --- a/dashboards/resources/multi-cluster.libsonnet +++ b/dashboards/resources/multi-cluster.libsonnet @@ -68,7 +68,7 @@ local var = g.dashboard.variable; statPanel( 'CPU Requests Commitment', 'percentunit', - 'sum(kube_pod_container_resource_requests{%(kubeStateMetricsSelector)s, resource="cpu"}) / sum(kube_node_status_allocatable{%(kubeStateMetricsSelector)s, resource="cpu"})' % $._config + 'sum(kube_pod_resource_request{%(kubeSchedulerSelector)s, resource="cpu"} or kube_pod_container_resource_requests{%(kubeStateMetricsSelector)s, resource="cpu"}) / sum(kube_node_status_allocatable{%(kubeStateMetricsSelector)s, resource="cpu"})' % $._config ), statPanel( @@ -110,10 +110,10 @@ local var = g.dashboard.variable; prometheus.new('${datasource}', 'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate) by (%(clusterLabel)s)' % $._config) + prometheus.withInstant(true) + prometheus.withFormat('table'), - prometheus.new('${datasource}', 'sum(kube_pod_container_resource_requests{%(kubeStateMetricsSelector)s, resource="cpu"}) by (%(clusterLabel)s)' % $._config) + prometheus.new('${datasource}', 'sum(kube_pod_resource_request{%(kubeSchedulerSelector)s, resource="cpu"} or kube_pod_container_resource_requests{%(kubeStateMetricsSelector)s, resource="cpu"}) by (%(clusterLabel)s)' % $._config) + prometheus.withInstant(true) + prometheus.withFormat('table'), - prometheus.new('${datasource}', 'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate) by (%(clusterLabel)s) / sum(kube_pod_container_resource_requests{%(kubeStateMetricsSelector)s, resource="cpu"}) by (%(clusterLabel)s)' % $._config) + prometheus.new('${datasource}', 'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate) by (%(clusterLabel)s) / sum(kube_pod_resource_request{%(kubeSchedulerSelector)s, resource="cpu"} or kube_pod_container_resource_requests{%(kubeStateMetricsSelector)s, resource="cpu"}) by (%(clusterLabel)s)' % $._config) + prometheus.withInstant(true) + prometheus.withFormat('table'), prometheus.new('${datasource}', 'sum(kube_pod_container_resource_limits{%(kubeStateMetricsSelector)s, resource="cpu"}) by (%(clusterLabel)s)' % $._config) diff --git a/dashboards/resources/namespace.libsonnet b/dashboards/resources/namespace.libsonnet index 7119b2b3a..1e9016137 100644 --- a/dashboards/resources/namespace.libsonnet +++ b/dashboards/resources/namespace.libsonnet @@ -91,7 +91,7 @@ local var = g.dashboard.variable; statPanel( 'CPU Utilisation (from requests)', 'percentunit', - 'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{%(clusterLabel)s="$cluster", namespace="$namespace"}) / sum(kube_pod_container_resource_requests{%(kubeStateMetricsSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", resource="cpu"})' % $._config + 'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{%(clusterLabel)s="$cluster", namespace="$namespace"}) / sum(kube_pod_resource_request{%(kubeSchedulerSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", resource="cpu"} or kube_pod_container_resource_requests{%(kubeStateMetricsSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", resource="cpu"})' % $._config ) + stat.gridPos.withW(6) + stat.gridPos.withH(3), @@ -99,7 +99,7 @@ local var = g.dashboard.variable; statPanel( 'CPU Utilisation (from limits)', 'percentunit', - 'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{%(clusterLabel)s="$cluster", namespace="$namespace"}) / sum(kube_pod_container_resource_limits{%(kubeStateMetricsSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", resource="cpu"})' % $._config + 'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{%(clusterLabel)s="$cluster", namespace="$namespace"}) / sum(kube_pod_resource_limit{%(kubeSchedulerSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", resource="cpu"} or kube_pod_container_resource_limits{%(kubeStateMetricsSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", resource="cpu"})' % $._config ) + stat.gridPos.withW(6) + stat.gridPos.withH(3), @@ -107,7 +107,7 @@ local var = g.dashboard.variable; statPanel( 'Memory Utilisation (from requests)', 'percentunit', - 'sum(container_memory_working_set_bytes{%(cadvisorSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace",container!="", image!=""}) / sum(kube_pod_container_resource_requests{%(kubeStateMetricsSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", resource="memory"})' % $._config + 'sum(container_memory_working_set_bytes{%(cadvisorSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace",container!="", image!=""}) / sum(kube_pod_resource_request{%(kubeSchedulerSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", resource="memory"} or kube_pod_container_resource_requests{%(kubeStateMetricsSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", resource="memory"})' % $._config ) + stat.gridPos.withW(6) + stat.gridPos.withH(3), @@ -115,7 +115,7 @@ local var = g.dashboard.variable; statPanel( 'Memory Utilisation (from limits)', 'percentunit', - 'sum(container_memory_working_set_bytes{%(cadvisorSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace",container!="", image!=""}) / sum(kube_pod_container_resource_limits{%(kubeStateMetricsSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", resource="memory"})' % $._config + 'sum(container_memory_working_set_bytes{%(cadvisorSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace",container!="", image!=""}) / sum(kube_pod_resource_limit{%(kubeSchedulerSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", resource="memory"} or kube_pod_container_resource_limits{%(kubeStateMetricsSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", resource="memory"})' % $._config ) + stat.gridPos.withW(6) + stat.gridPos.withH(3), @@ -200,16 +200,16 @@ local var = g.dashboard.variable; prometheus.new('${datasource}', 'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{%(clusterLabel)s="$cluster", namespace="$namespace"}) by (pod)' % $._config) + prometheus.withInstant(true) + prometheus.withFormat('table'), - prometheus.new('${datasource}', 'sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_requests{%(clusterLabel)s="$cluster", namespace="$namespace"}) by (pod)' % $._config) + prometheus.new('${datasource}', 'sum(cluster:namespace:pod_cpu:active:kube_pod_resource_request_or_kube_pod_container_resource_requests{%(clusterLabel)s="$cluster", namespace="$namespace"}) by (pod)' % $._config) + prometheus.withInstant(true) + prometheus.withFormat('table'), - prometheus.new('${datasource}', 'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{%(clusterLabel)s="$cluster", namespace="$namespace"}) by (pod) / sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_requests{%(clusterLabel)s="$cluster", namespace="$namespace"}) by (pod)' % $._config) + prometheus.new('${datasource}', 'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{%(clusterLabel)s="$cluster", namespace="$namespace"}) by (pod) / sum(cluster:namespace:pod_cpu:active:kube_pod_resource_request_or_kube_pod_container_resource_requests{%(clusterLabel)s="$cluster", namespace="$namespace"}) by (pod)' % $._config) + prometheus.withInstant(true) + prometheus.withFormat('table'), - prometheus.new('${datasource}', 'sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_limits{%(clusterLabel)s="$cluster", namespace="$namespace"}) by (pod)' % $._config) + prometheus.new('${datasource}', 'sum(cluster:namespace:pod_cpu:active:kube_pod_resource_limit_or_kube_pod_container_resource_limits{%(clusterLabel)s="$cluster", namespace="$namespace"}) by (pod)' % $._config) + prometheus.withInstant(true) + prometheus.withFormat('table'), - prometheus.new('${datasource}', 'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{%(clusterLabel)s="$cluster", namespace="$namespace"}) by (pod) / sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_limits{%(clusterLabel)s="$cluster", namespace="$namespace"}) by (pod)' % $._config) + prometheus.new('${datasource}', 'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{%(clusterLabel)s="$cluster", namespace="$namespace"}) by (pod) / sum(cluster:namespace:pod_cpu:active:kube_pod_resource_limit_or_kube_pod_container_resource_limits{%(clusterLabel)s="$cluster", namespace="$namespace"}) by (pod)' % $._config) + prometheus.withInstant(true) + prometheus.withFormat('table'), ]) @@ -363,16 +363,16 @@ local var = g.dashboard.variable; prometheus.new('${datasource}', 'sum(container_memory_working_set_bytes{%(cadvisorSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace",container!="", image!=""}) by (pod)' % $._config) + prometheus.withInstant(true) + prometheus.withFormat('table'), - prometheus.new('${datasource}', 'sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_requests{%(clusterLabel)s="$cluster", namespace="$namespace"}) by (pod)' % $._config) + prometheus.new('${datasource}', 'sum(cluster:namespace:pod_memory:active:kube_pod_resource_request_or_kube_pod_container_resource_requests{%(clusterLabel)s="$cluster", namespace="$namespace"}) by (pod)' % $._config) + prometheus.withInstant(true) + prometheus.withFormat('table'), - prometheus.new('${datasource}', 'sum(container_memory_working_set_bytes{%(cadvisorSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace",container!="", image!=""}) by (pod) / sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_requests{%(clusterLabel)s="$cluster", namespace="$namespace"}) by (pod)' % $._config) + prometheus.new('${datasource}', 'sum(container_memory_working_set_bytes{%(cadvisorSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace",container!="", image!=""}) by (pod) / sum(cluster:namespace:pod_memory:active:kube_pod_resource_request_or_kube_pod_container_resource_requests{%(clusterLabel)s="$cluster", namespace="$namespace"}) by (pod)' % $._config) + prometheus.withInstant(true) + prometheus.withFormat('table'), - prometheus.new('${datasource}', 'sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_limits{%(clusterLabel)s="$cluster", namespace="$namespace"}) by (pod)' % $._config) + prometheus.new('${datasource}', 'sum(cluster:namespace:pod_memory:active:kube_pod_resource_limit_or_kube_pod_container_resource_limits{%(clusterLabel)s="$cluster", namespace="$namespace"}) by (pod)' % $._config) + prometheus.withInstant(true) + prometheus.withFormat('table'), - prometheus.new('${datasource}', 'sum(container_memory_working_set_bytes{%(cadvisorSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace",container!="", image!=""}) by (pod) / sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_limits{%(clusterLabel)s="$cluster", namespace="$namespace"}) by (pod)' % $._config) + prometheus.new('${datasource}', 'sum(container_memory_working_set_bytes{%(cadvisorSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace",container!="", image!=""}) by (pod) / sum(cluster:namespace:pod_memory:active:kube_pod_resource_limit_or_kube_pod_container_resource_limits{%(clusterLabel)s="$cluster", namespace="$namespace"}) by (pod)' % $._config) + prometheus.withInstant(true) + prometheus.withFormat('table'), prometheus.new('${datasource}', 'sum(container_memory_rss{%(cadvisorSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace",container!=""}) by (pod)' % $._config) diff --git a/dashboards/resources/node.libsonnet b/dashboards/resources/node.libsonnet index 806cb2bde..e70f76299 100644 --- a/dashboards/resources/node.libsonnet +++ b/dashboards/resources/node.libsonnet @@ -108,16 +108,16 @@ local var = g.dashboard.variable; prometheus.new('${datasource}', 'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{%(clusterLabel)s="$cluster", node=~"$node"}) by (pod)' % $._config) + prometheus.withInstant(true) + prometheus.withFormat('table'), - prometheus.new('${datasource}', 'sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_requests{%(clusterLabel)s="$cluster", node=~"$node"}) by (pod)' % $._config) + prometheus.new('${datasource}', 'sum(cluster:namespace:pod_cpu:active:kube_pod_resource_request_or_kube_pod_container_resource_requests{%(clusterLabel)s="$cluster", node=~"$node"}) by (pod)' % $._config) + prometheus.withInstant(true) + prometheus.withFormat('table'), - prometheus.new('${datasource}', 'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{%(clusterLabel)s="$cluster", node=~"$node"}) by (pod) / sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_requests{%(clusterLabel)s="$cluster", node=~"$node"}) by (pod)' % $._config) + prometheus.new('${datasource}', 'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{%(clusterLabel)s="$cluster", node=~"$node"}) by (pod) / sum(cluster:namespace:pod_cpu:active:kube_pod_resource_request_or_kube_pod_container_resource_requests{%(clusterLabel)s="$cluster", node=~"$node"}) by (pod)' % $._config) + prometheus.withInstant(true) + prometheus.withFormat('table'), - prometheus.new('${datasource}', 'sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_limits{%(clusterLabel)s="$cluster", node=~"$node"}) by (pod)' % $._config) + prometheus.new('${datasource}', 'sum(cluster:namespace:pod_cpu:active:kube_pod_resource_limit_or_kube_pod_container_resource_limits{%(clusterLabel)s="$cluster", node=~"$node"}) by (pod)' % $._config) + prometheus.withInstant(true) + prometheus.withFormat('table'), - prometheus.new('${datasource}', 'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{%(clusterLabel)s="$cluster", node=~"$node"}) by (pod) / sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_limits{%(clusterLabel)s="$cluster", node=~"$node"}) by (pod)' % $._config) + prometheus.new('${datasource}', 'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{%(clusterLabel)s="$cluster", node=~"$node"}) by (pod) / sum(cluster:namespace:pod_cpu:active:kube_pod_resource_limit_or_kube_pod_container_resource_limits{%(clusterLabel)s="$cluster", node=~"$node"}) by (pod)' % $._config) + prometheus.withInstant(true) + prometheus.withFormat('table'), ]) @@ -208,16 +208,16 @@ local var = g.dashboard.variable; prometheus.new('${datasource}', 'sum(node_namespace_pod_container:container_memory_working_set_bytes{%(clusterLabel)s="$cluster", node=~"$node",container!=""}) by (pod)' % $._config) + prometheus.withInstant(true) + prometheus.withFormat('table'), - prometheus.new('${datasource}', 'sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_requests{%(clusterLabel)s="$cluster", node=~"$node"}) by (pod)' % $._config) + prometheus.new('${datasource}', 'sum(cluster:namespace:pod_memory:active:kube_pod_resource_request_or_kube_pod_container_resource_requests{%(clusterLabel)s="$cluster", node=~"$node"}) by (pod)' % $._config) + prometheus.withInstant(true) + prometheus.withFormat('table'), - prometheus.new('${datasource}', 'sum(node_namespace_pod_container:container_memory_working_set_bytes{%(clusterLabel)s="$cluster", node=~"$node",container!=""}) by (pod) / sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_requests{%(clusterLabel)s="$cluster", node=~"$node"}) by (pod)' % $._config) + prometheus.new('${datasource}', 'sum(node_namespace_pod_container:container_memory_working_set_bytes{%(clusterLabel)s="$cluster", node=~"$node",container!=""}) by (pod) / sum(cluster:namespace:pod_memory:active:kube_pod_resource_request_or_kube_pod_container_resource_requests{%(clusterLabel)s="$cluster", node=~"$node"}) by (pod)' % $._config) + prometheus.withInstant(true) + prometheus.withFormat('table'), - prometheus.new('${datasource}', 'sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_limits{%(clusterLabel)s="$cluster", node=~"$node"}) by (pod)' % $._config) + prometheus.new('${datasource}', 'sum(cluster:namespace:pod_memory:active:kube_pod_resource_limit_or_kube_pod_container_resource_limits{%(clusterLabel)s="$cluster", node=~"$node"}) by (pod)' % $._config) + prometheus.withInstant(true) + prometheus.withFormat('table'), - prometheus.new('${datasource}', 'sum(node_namespace_pod_container:container_memory_working_set_bytes{%(clusterLabel)s="$cluster", node=~"$node",container!=""}) by (pod) / sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_limits{%(clusterLabel)s="$cluster", node=~"$node"}) by (pod)' % $._config) + prometheus.new('${datasource}', 'sum(node_namespace_pod_container:container_memory_working_set_bytes{%(clusterLabel)s="$cluster", node=~"$node",container!=""}) by (pod) / sum(cluster:namespace:pod_memory:active:kube_pod_resource_limit_or_kube_pod_container_resource_limits{%(clusterLabel)s="$cluster", node=~"$node"}) by (pod)' % $._config) + prometheus.withInstant(true) + prometheus.withFormat('table'), prometheus.new('${datasource}', 'sum(node_namespace_pod_container:container_memory_rss{%(clusterLabel)s="$cluster", node=~"$node",container!=""}) by (pod)' % $._config) diff --git a/dashboards/resources/pod.libsonnet b/dashboards/resources/pod.libsonnet index b7be85fd5..3bf17000f 100644 --- a/dashboards/resources/pod.libsonnet +++ b/dashboards/resources/pod.libsonnet @@ -80,7 +80,7 @@ local var = g.dashboard.variable; local cpuRequestsQuery = ||| sum( - kube_pod_container_resource_requests{%(kubeStateMetricsSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod", resource="cpu"} + kube_pod_resource_request{%(kubeSchedulerSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod", resource="cpu"} or kube_pod_container_resource_requests{%(kubeStateMetricsSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod", resource="cpu"} ) ||| % $._config; diff --git a/dashboards/resources/workload-namespace.libsonnet b/dashboards/resources/workload-namespace.libsonnet index 458ee6014..2f618cc48 100644 --- a/dashboards/resources/workload-namespace.libsonnet +++ b/dashboards/resources/workload-namespace.libsonnet @@ -99,7 +99,7 @@ local var = g.dashboard.variable; local cpuRequestsQuery = ||| sum( - kube_pod_container_resource_requests{%(kubeStateMetricsSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", resource="cpu"} + (kube_pod_resource_request{%(kubeSchedulerSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", resource="cpu"} or kube_pod_container_resource_requests{%(kubeStateMetricsSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", resource="cpu"}) * on(namespace,pod) group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster", namespace="$namespace", workload_type=~"$type"} ) by (workload, workload_type) diff --git a/dashboards/resources/workload.libsonnet b/dashboards/resources/workload.libsonnet index 21ffcdbd8..415dc6d2a 100644 --- a/dashboards/resources/workload.libsonnet +++ b/dashboards/resources/workload.libsonnet @@ -111,7 +111,7 @@ local var = g.dashboard.variable; local cpuRequestsQuery = ||| sum( - kube_pod_container_resource_requests{%(kubeStateMetricsSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", resource="cpu"} + (kube_pod_resource_request{%(kubeSchedulerSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", resource="cpu"} or kube_pod_container_resource_requests{%(kubeStateMetricsSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", resource="cpu"}) * on(namespace,pod) group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster", namespace="$namespace", workload="$workload", workload_type=~"$type"} ) by (pod) diff --git a/rules/apps.libsonnet b/rules/apps.libsonnet index 7cd9c0804..5ee04d083 100644 --- a/rules/apps.libsonnet +++ b/rules/apps.libsonnet @@ -84,23 +84,26 @@ name: 'k8s.rules.container_memory_requests', rules: [ { - record: 'cluster:namespace:pod_memory:active:kube_pod_container_resource_requests', + record: 'cluster:namespace:pod_memory:active:kube_pod_resource_request_or_kube_pod_container_resource_requests', expr: ||| - kube_pod_container_resource_requests{resource="memory",%(kubeStateMetricsSelector)s} * on (namespace, pod, %(clusterLabel)s) + kube_pod_resource_request{resource="memory",%(kubeSchedulerSelector)s} or (kube_pod_container_resource_requests{resource="memory",%(kubeStateMetricsSelector)s} * on (namespace, pod, %(clusterLabel)s) group_left() max by (namespace, pod, %(clusterLabel)s) ( (kube_pod_status_phase{phase=~"Pending|Running"} == 1) - ) + )) ||| % $._config, }, { - record: 'namespace_memory:kube_pod_container_resource_requests:sum', + record: 'namespace_memory:kube_pod_resource_request_or_kube_pod_container_resource_requests:sum', expr: ||| sum by (namespace, %(clusterLabel)s) ( sum by (namespace, pod, %(clusterLabel)s) ( max by (namespace, pod, container, %(clusterLabel)s) ( - kube_pod_container_resource_requests{resource="memory",%(kubeStateMetricsSelector)s} - ) * on(namespace, pod, %(clusterLabel)s) group_left() max by (namespace, pod, %(clusterLabel)s) ( - kube_pod_status_phase{phase=~"Pending|Running"} == 1 + kube_pod_resource_request{resource="memory",%(kubeSchedulerSelector)s} + ) or + max by (namespace, pod, container, %(clusterLabel)s) ( + kube_pod_container_resource_requests{resource="memory",%(kubeStateMetricsSelector)s} + ) * on (namespace, pod, %(clusterLabel)s) group_left() max by (namespace, pod, %(clusterLabel)s) ( + (kube_pod_status_phase{phase=~"Pending|Running"} == 1) ) ) ) @@ -112,23 +115,28 @@ name: 'k8s.rules.container_cpu_requests', rules: [ { - record: 'cluster:namespace:pod_cpu:active:kube_pod_container_resource_requests', + record: 'cluster:namespace:pod_cpu:active:kube_pod_resource_request_or_kube_pod_container_resource_requests', expr: ||| - kube_pod_container_resource_requests{resource="cpu",%(kubeStateMetricsSelector)s} * on (namespace, pod, %(clusterLabel)s) - group_left() max by (namespace, pod, %(clusterLabel)s) ( - (kube_pod_status_phase{phase=~"Pending|Running"} == 1) + kube_pod_resource_request{resource="cpu",%(kubeSchedulerSelector)s} or ( + kube_pod_container_resource_requests{resource="cpu",%(kubeStateMetricsSelector)s} * on (namespace, pod, %(clusterLabel)s) + group_left() max by (namespace, pod, %(clusterLabel)s) ( + (kube_pod_status_phase{phase=~"Pending|Running"} == 1) + ) ) ||| % $._config, }, { - record: 'namespace_cpu:kube_pod_container_resource_requests:sum', + record: 'namespace_cpu:kube_pod_resource_request_or_kube_pod_container_resource_requests:sum', expr: ||| sum by (namespace, %(clusterLabel)s) ( sum by (namespace, pod, %(clusterLabel)s) ( max by (namespace, pod, container, %(clusterLabel)s) ( - kube_pod_container_resource_requests{resource="cpu",%(kubeStateMetricsSelector)s} - ) * on(namespace, pod, %(clusterLabel)s) group_left() max by (namespace, pod, %(clusterLabel)s) ( - kube_pod_status_phase{phase=~"Pending|Running"} == 1 + kube_pod_resource_request{resource="cpu",%(kubeSchedulerSelector)s} + ) or + max by (namespace, pod, container, %(clusterLabel)s) ( + kube_pod_container_resource_requests{resource="cpu",%(kubeStateMetricsSelector)s} + ) * on (namespace, pod, %(clusterLabel)s) group_left() max by (namespace, pod, %(clusterLabel)s) ( + (kube_pod_status_phase{phase=~"Pending|Running"} == 1) ) ) ) @@ -140,23 +148,28 @@ name: 'k8s.rules.container_memory_limits', rules: [ { - record: 'cluster:namespace:pod_memory:active:kube_pod_container_resource_limits', + record: 'cluster:namespace:pod_memory:active:kube_pod_resource_limit_or_kube_pod_container_resource_limits', expr: ||| - kube_pod_container_resource_limits{resource="memory",%(kubeStateMetricsSelector)s} * on (namespace, pod, %(clusterLabel)s) - group_left() max by (namespace, pod, %(clusterLabel)s) ( - (kube_pod_status_phase{phase=~"Pending|Running"} == 1) + kube_pod_resource_limit{resource="memory",%(kubeSchedulerSelector)s} or ( + kube_pod_container_resource_limits{resource="memory",%(kubeStateMetricsSelector)s} * on (namespace, pod, %(clusterLabel)s) + group_left() max by (namespace, pod, %(clusterLabel)s) ( + (kube_pod_status_phase{phase=~"Pending|Running"} == 1) + ) ) ||| % $._config, }, { - record: 'namespace_memory:kube_pod_container_resource_limits:sum', + record: 'namespace_memory:kube_pod_resource_limit_or_kube_pod_container_resource_limits:sum', expr: ||| sum by (namespace, %(clusterLabel)s) ( sum by (namespace, pod, %(clusterLabel)s) ( max by (namespace, pod, container, %(clusterLabel)s) ( - kube_pod_container_resource_limits{resource="memory",%(kubeStateMetricsSelector)s} - ) * on(namespace, pod, %(clusterLabel)s) group_left() max by (namespace, pod, %(clusterLabel)s) ( - kube_pod_status_phase{phase=~"Pending|Running"} == 1 + kube_pod_resource_limit{resource="memory",%(kubeSchedulerSelector)s} + ) or + max by (namespace, pod, container, %(clusterLabel)s) ( + kube_pod_container_resource_limits{resource="memory",%(kubeStateMetricsSelector)s} + ) * on (namespace, pod, %(clusterLabel)s) group_left() max by (namespace, pod, %(clusterLabel)s) ( + (kube_pod_status_phase{phase=~"Pending|Running"} == 1) ) ) ) @@ -168,23 +181,28 @@ name: 'k8s.rules.container_cpu_limits', rules: [ { - record: 'cluster:namespace:pod_cpu:active:kube_pod_container_resource_limits', + record: 'cluster:namespace:pod_cpu:active:kube_pod_resource_limit_or_kube_pod_container_resource_limits', expr: ||| - kube_pod_container_resource_limits{resource="cpu",%(kubeStateMetricsSelector)s} * on (namespace, pod, %(clusterLabel)s) - group_left() max by (namespace, pod, %(clusterLabel)s) ( - (kube_pod_status_phase{phase=~"Pending|Running"} == 1) - ) + kube_pod_resource_limit{resource="cpu",%(kubeSchedulerSelector)s} or ( + kube_pod_container_resource_limits{resource="cpu",%(kubeStateMetricsSelector)s} * on (namespace, pod, %(clusterLabel)s) + group_left() max by (namespace, pod, %(clusterLabel)s) ( + (kube_pod_status_phase{phase=~"Pending|Running"} == 1) + ) + ) ||| % $._config, }, { - record: 'namespace_cpu:kube_pod_container_resource_limits:sum', + record: 'namespace_cpu:kube_pod_resource_limit_or_kube_pod_container_resource_limits:sum', expr: ||| sum by (namespace, %(clusterLabel)s) ( sum by (namespace, pod, %(clusterLabel)s) ( max by (namespace, pod, container, %(clusterLabel)s) ( - kube_pod_container_resource_limits{resource="cpu",%(kubeStateMetricsSelector)s} - ) * on(namespace, pod, %(clusterLabel)s) group_left() max by (namespace, pod, %(clusterLabel)s) ( - kube_pod_status_phase{phase=~"Pending|Running"} == 1 + kube_pod_resource_limit{resource="cpu",%(kubeSchedulerSelector)s} + ) or + max by (namespace, pod, container, %(clusterLabel)s) ( + kube_pod_container_resource_limits{resource="cpu",%(kubeStateMetricsSelector)s} + ) * on (namespace, pod, %(clusterLabel)s) group_left() max by (namespace, pod, %(clusterLabel)s) ( + (kube_pod_status_phase{phase=~"Pending|Running"} == 1) ) ) ) diff --git a/rules/windows.libsonnet b/rules/windows.libsonnet index 3e1d4ba8a..02c1df946 100644 --- a/rules/windows.libsonnet +++ b/rules/windows.libsonnet @@ -8,7 +8,7 @@ // This rule gives the number of windows nodes record: 'node:windows_node:sum', expr: ||| - count by (%(clusterLabel)s) ( + count ( windows_system_system_up_time{%(windowsExporterSelector)s} ) ||| % $._config, @@ -17,7 +17,7 @@ // This rule gives the number of CPUs per node. record: 'node:windows_node_num_cpu:sum', expr: ||| - count by (%(clusterLabel)s, instance) (sum by (%(clusterLabel)s, instance, core) ( + count by (instance) (sum by (instance, core) ( windows_cpu_time_total{%(windowsExporterSelector)s} )) ||| % $._config, @@ -26,14 +26,14 @@ // CPU utilisation is % CPU is not idle. record: ':windows_node_cpu_utilisation:avg1m', expr: ||| - 1 - avg by (%(clusterLabel)s) (rate(windows_cpu_time_total{%(windowsExporterSelector)s,mode="idle"}[1m])) + 1 - avg(rate(windows_cpu_time_total{%(windowsExporterSelector)s,mode="idle"}[1m])) ||| % $._config, }, { // CPU utilisation is % CPU is not idle. record: 'node:windows_node_cpu_utilisation:avg1m', expr: ||| - 1 - avg by (%(clusterLabel)s, instance) ( + 1 - avg by (instance) ( rate(windows_cpu_time_total{%(windowsExporterSelector)s,mode="idle"}[1m]) ) ||| % $._config, @@ -42,9 +42,9 @@ record: ':windows_node_memory_utilisation:', expr: ||| 1 - - sum by (%(clusterLabel)s) (windows_memory_available_bytes{%(windowsExporterSelector)s}) + sum(windows_memory_available_bytes{%(windowsExporterSelector)s}) / - sum by (%(clusterLabel)s) (windows_os_visible_memory_bytes{%(windowsExporterSelector)s}) + sum(windows_os_visible_memory_bytes{%(windowsExporterSelector)s}) ||| % $._config, }, // Add separate rules for Free & Total, so we can aggregate across clusters @@ -52,7 +52,7 @@ { record: ':windows_node_memory_MemFreeCached_bytes:sum', expr: ||| - sum by (%(clusterLabel)s) (windows_memory_available_bytes{%(windowsExporterSelector)s} + windows_memory_cache_bytes{%(windowsExporterSelector)s}) + sum(windows_memory_available_bytes{%(windowsExporterSelector)s} + windows_memory_cache_bytes{%(windowsExporterSelector)s}) ||| % $._config, }, { @@ -64,7 +64,7 @@ { record: ':windows_node_memory_MemTotal_bytes:sum', expr: ||| - sum by (%(clusterLabel)s) (windows_os_visible_memory_bytes{%(windowsExporterSelector)s}) + sum(windows_os_visible_memory_bytes{%(windowsExporterSelector)s}) ||| % $._config, }, { @@ -72,7 +72,7 @@ // SINCE 2018-02-08 record: 'node:windows_node_memory_bytes_available:sum', expr: ||| - sum by (%(clusterLabel)s, instance) ( + sum by (instance) ( (windows_memory_available_bytes{%(windowsExporterSelector)s}) ) ||| % $._config, @@ -81,7 +81,7 @@ // Total memory per node record: 'node:windows_node_memory_bytes_total:sum', expr: ||| - sum by (%(clusterLabel)s, instance) ( + sum by (instance) ( windows_os_visible_memory_bytes{%(windowsExporterSelector)s} ) ||| % $._config, @@ -111,7 +111,7 @@ // Disk utilisation (ms spent, by rate() it's bound by 1 second) record: ':windows_node_disk_utilisation:avg_irate', expr: ||| - avg by (%(clusterLabel)s) (irate(windows_logical_disk_read_seconds_total{%(windowsExporterSelector)s}[1m]) + + avg(irate(windows_logical_disk_read_seconds_total{%(windowsExporterSelector)s}[1m]) + irate(windows_logical_disk_write_seconds_total{%(windowsExporterSelector)s}[1m]) ) ||| % $._config, @@ -120,7 +120,7 @@ // Disk utilisation (ms spent, by rate() it's bound by 1 second) record: 'node:windows_node_disk_utilisation:avg_irate', expr: ||| - avg by (%(clusterLabel)s, instance) ( + avg by (instance) ( (irate(windows_logical_disk_read_seconds_total{%(windowsExporterSelector)s}[1m]) + irate(windows_logical_disk_write_seconds_total{%(windowsExporterSelector)s}[1m])) ) @@ -129,7 +129,7 @@ { record: 'node:windows_node_filesystem_usage:', expr: ||| - max by (%(clusterLabel)s,instance,volume)( + max by (instance,volume)( (windows_logical_disk_size_bytes{%(windowsExporterSelector)s} - windows_logical_disk_free_bytes{%(windowsExporterSelector)s}) / windows_logical_disk_size_bytes{%(windowsExporterSelector)s} @@ -139,19 +139,19 @@ { record: 'node:windows_node_filesystem_avail:', expr: ||| - max by (%(clusterLabel)s, instance, volume) (windows_logical_disk_free_bytes{%(windowsExporterSelector)s} / windows_logical_disk_size_bytes{%(windowsExporterSelector)s}) + max by (instance, volume) (windows_logical_disk_free_bytes{%(windowsExporterSelector)s} / windows_logical_disk_size_bytes{%(windowsExporterSelector)s}) ||| % $._config, }, { record: ':windows_node_net_utilisation:sum_irate', expr: ||| - sum by (%(clusterLabel)s) (irate(windows_net_bytes_total{%(windowsExporterSelector)s}[1m])) + sum(irate(windows_net_bytes_total{%(windowsExporterSelector)s}[1m])) ||| % $._config, }, { record: 'node:windows_node_net_utilisation:sum_irate', expr: ||| - sum by (%(clusterLabel)s, instance) ( + sum by (instance) ( (irate(windows_net_bytes_total{%(windowsExporterSelector)s}[1m])) ) ||| % $._config, @@ -159,14 +159,14 @@ { record: ':windows_node_net_saturation:sum_irate', expr: ||| - sum by (%(clusterLabel)s) (irate(windows_net_packets_received_discarded_total{%(windowsExporterSelector)s}[1m])) + - sum by (%(clusterLabel)s) (irate(windows_net_packets_outbound_discarded_total{%(windowsExporterSelector)s}[1m])) + sum(irate(windows_net_packets_received_discarded_total{%(windowsExporterSelector)s}[1m])) + + sum(irate(windows_net_packets_outbound_discarded_total{%(windowsExporterSelector)s}[1m])) ||| % $._config, }, { record: 'node:windows_node_net_saturation:sum_irate', expr: ||| - sum by (%(clusterLabel)s, instance) ( + sum by (instance) ( (irate(windows_net_packets_received_discarded_total{%(windowsExporterSelector)s}[1m]) + irate(windows_net_packets_outbound_discarded_total{%(windowsExporterSelector)s}[1m])) ) @@ -180,71 +180,71 @@ { record: 'windows_pod_container_available', expr: ||| - windows_container_available{%(windowsExporterSelector)s, container_id != ""} * on(container_id, %(clusterLabel)s) group_left(container, pod, namespace) max(kube_pod_container_info{%(kubeStateMetricsSelector)s, container_id != ""}) by(container, container_id, pod, namespace, %(clusterLabel)s) + windows_container_available{%(windowsExporterSelector)s} * on(container_id) group_left(container, pod, namespace) max(kube_pod_container_info{%(kubeStateMetricsSelector)s}) by(container, container_id, pod, namespace) ||| % $._config, }, { record: 'windows_container_total_runtime', expr: ||| - windows_container_cpu_usage_seconds_total{%(windowsExporterSelector)s, container_id != ""} * on(container_id, %(clusterLabel)s) group_left(container, pod, namespace) max(kube_pod_container_info{%(kubeStateMetricsSelector)s, container_id != ""}) by(container, container_id, pod, namespace, %(clusterLabel)s) + windows_container_cpu_usage_seconds_total{%(windowsExporterSelector)s} * on(container_id) group_left(container, pod, namespace) max(kube_pod_container_info{%(kubeStateMetricsSelector)s}) by(container, container_id, pod, namespace) ||| % $._config, }, { record: 'windows_container_memory_usage', expr: ||| - windows_container_memory_usage_commit_bytes{%(windowsExporterSelector)s, container_id != ""} * on(container_id, %(clusterLabel)s) group_left(container, pod, namespace) max(kube_pod_container_info{%(kubeStateMetricsSelector)s, container_id != ""}) by(container, container_id, pod, namespace, %(clusterLabel)s) + windows_container_memory_usage_commit_bytes{%(windowsExporterSelector)s} * on(container_id) group_left(container, pod, namespace) max(kube_pod_container_info{%(kubeStateMetricsSelector)s}) by(container, container_id, pod, namespace) ||| % $._config, }, { record: 'windows_container_private_working_set_usage', expr: ||| - windows_container_memory_usage_private_working_set_bytes{%(windowsExporterSelector)s, container_id != ""} * on(container_id, %(clusterLabel)s) group_left(container, pod, namespace) max(kube_pod_container_info{%(kubeStateMetricsSelector)s, container_id != ""}) by(container, container_id, pod, namespace, %(clusterLabel)s) + windows_container_memory_usage_private_working_set_bytes{%(windowsExporterSelector)s} * on(container_id) group_left(container, pod, namespace) max(kube_pod_container_info{%(kubeStateMetricsSelector)s}) by(container, container_id, pod, namespace) ||| % $._config, }, { record: 'windows_container_network_received_bytes_total', expr: ||| - windows_container_network_receive_bytes_total{%(windowsExporterSelector)s, container_id != ""} * on(container_id, %(clusterLabel)s) group_left(container, pod, namespace) max(kube_pod_container_info{%(kubeStateMetricsSelector)s, container_id != ""}) by(container, container_id, pod, namespace, %(clusterLabel)s) + windows_container_network_receive_bytes_total{%(windowsExporterSelector)s} * on(container_id) group_left(container, pod, namespace) max(kube_pod_container_info{%(kubeStateMetricsSelector)s}) by(container, container_id, pod, namespace) ||| % $._config, }, { record: 'windows_container_network_transmitted_bytes_total', expr: ||| - windows_container_network_transmit_bytes_total{%(windowsExporterSelector)s, container_id != ""} * on(container_id, %(clusterLabel)s) group_left(container, pod, namespace) max(kube_pod_container_info{%(kubeStateMetricsSelector)s, container_id != ""}) by(container, container_id, pod, namespace, %(clusterLabel)s) + windows_container_network_transmit_bytes_total{%(windowsExporterSelector)s} * on(container_id) group_left(container, pod, namespace) max(kube_pod_container_info{%(kubeStateMetricsSelector)s}) by(container, container_id, pod, namespace) ||| % $._config, }, { record: 'kube_pod_windows_container_resource_memory_request', expr: ||| - max by (%(clusterLabel)s, namespace, pod, container) ( - kube_pod_container_resource_requests{resource="memory",%(kubeStateMetricsSelector)s} - ) * on(container,pod,namespace,%(clusterLabel)s) (windows_pod_container_available) + max by (namespace, pod, container) ( + kube_pod_resource_request{resource="memory",%(kubeSchedulerSelector)s} or kube_pod_container_resource_requests{resource="memory",%(kubeStateMetricsSelector)s} + ) * on(container,pod,namespace) (windows_pod_container_available) ||| % $._config, }, { record: 'kube_pod_windows_container_resource_memory_limit', expr: ||| - kube_pod_container_resource_limits{resource="memory",%(kubeStateMetricsSelector)s} * on(container,pod,namespace,%(clusterLabel)s) (windows_pod_container_available) + (kube_pod_resource_limit{resource="memory",%(kubeSchedulerSelector)s} or kube_pod_container_resource_limits{resource="memory",%(kubeStateMetricsSelector)s}) * on(container,pod,namespace) (windows_pod_container_available) ||| % $._config, }, { record: 'kube_pod_windows_container_resource_cpu_cores_request', expr: ||| - max by (%(clusterLabel)s, namespace, pod, container) ( - kube_pod_container_resource_requests{resource="cpu",%(kubeStateMetricsSelector)s} - ) * on(container,pod,namespace,%(clusterLabel)s) (windows_pod_container_available) + max by (namespace, pod, container) ( + kube_pod_resource_request{resource="cpu",%(kubeSchedulerSelector)s} or kube_pod_container_resource_requests{resource="cpu",%(kubeStateMetricsSelector)s} + ) * on(container,pod,namespace) (windows_pod_container_available) ||| % $._config, }, { record: 'kube_pod_windows_container_resource_cpu_cores_limit', expr: ||| - kube_pod_container_resource_limits{resource="cpu",%(kubeStateMetricsSelector)s} * on(container,pod,namespace,%(clusterLabel)s) (windows_pod_container_available) + (kube_pod_resource_limit{resource="cpu",%(kubeSchedulerSelector)s} or kube_pod_container_resource_limits{resource="cpu",%(kubeStateMetricsSelector)s}) * on(container,pod,namespace) (windows_pod_container_available) ||| % $._config, }, { record: 'namespace_pod_container:windows_container_cpu_usage_seconds_total:sum_rate', expr: ||| - sum by (%(clusterLabel)s, namespace, pod, container) ( + sum by (namespace, pod, container) ( rate(windows_container_total_runtime{}[5m]) ) ||| % $._config, diff --git a/tests.yaml b/tests.yaml index 82dd4bb60..a8c35e060 100644 --- a/tests.yaml +++ b/tests.yaml @@ -8,13 +8,13 @@ tests: # PersistentVolume disk space - interval: 1m input_series: - - series: 'kubelet_volume_stats_available_bytes{job="kubelet",cluster="kubernetes",namespace="monitoring",persistentvolumeclaim="somepvc"}' + - series: 'kubelet_volume_stats_available_bytes{job="kubelet",namespace="monitoring",persistentvolumeclaim="somepvc"}' values: '1024 512 64 16' - - series: 'kubelet_volume_stats_capacity_bytes{job="kubelet",cluster="kubernetes",namespace="monitoring",persistentvolumeclaim="somepvc"}' + - series: 'kubelet_volume_stats_capacity_bytes{job="kubelet",namespace="monitoring",persistentvolumeclaim="somepvc"}' values: '1024 1024 1024 1024' - - series: 'kubelet_volume_stats_used_bytes{job="kubelet",cluster="kubernetes",namespace="monitoring",persistentvolumeclaim="somepvc"}' + - series: 'kubelet_volume_stats_used_bytes{job="kubelet",namespace="monitoring",persistentvolumeclaim="somepvc"}' values: '16 64 512 1024' - - series: 'kube_persistentvolumeclaim_access_mode{job="ksm",cluster="kubernetes",namespace="monitoring",persistentvolumeclaim="somepvc", access_mode="ReadWriteOnce", service="kube-state-metrics"}' + - series: 'kube_persistentvolumeclaim_access_mode{job="ksm",namespace="monitoring",persistentvolumeclaim="somepvc", access_mode="ReadWriteOnce", service="kube-state-metrics"}' values: '1 1 1 1' alert_rule_test: - eval_time: 1m @@ -29,24 +29,23 @@ tests: - exp_labels: job: kubelet namespace: monitoring - cluster: kubernetes persistentvolumeclaim: somepvc severity: critical exp_annotations: summary: "PersistentVolume is filling up." - description: 'The PersistentVolume claimed by somepvc in Namespace monitoring on Cluster kubernetes is only 1.562% free.' + description: 'The PersistentVolume claimed by somepvc in Namespace monitoring is only 1.562% free.' runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubepersistentvolumefillingup # Don't alert when PVC access_mode is ReadOnlyMany - interval: 1m input_series: - - series: 'kubelet_volume_stats_available_bytes{job="kubelet",cluster="kubernetes",namespace="monitoring",persistentvolumeclaim="somepvc"}' + - series: 'kubelet_volume_stats_available_bytes{job="kubelet",namespace="monitoring",persistentvolumeclaim="somepvc"}' values: '1024 512 64 16' - - series: 'kubelet_volume_stats_capacity_bytes{job="kubelet",cluster="kubernetes",namespace="monitoring",persistentvolumeclaim="somepvc"}' + - series: 'kubelet_volume_stats_capacity_bytes{job="kubelet",namespace="monitoring",persistentvolumeclaim="somepvc"}' values: '1024 1024 1024 1024' - - series: 'kubelet_volume_stats_used_bytes{job="kubelet",cluster="kubernetes",namespace="monitoring",persistentvolumeclaim="somepvc"}' + - series: 'kubelet_volume_stats_used_bytes{job="kubelet",namespace="monitoring",persistentvolumeclaim="somepvc"}' values: '16 64 512 1024' - - series: 'kube_persistentvolumeclaim_access_mode{job="ksm",cluster="kubernetes",namespace="monitoring",persistentvolumeclaim="somepvc", access_mode="ReadOnlyMany", service="kube-state-metrics"}' + - series: 'kube_persistentvolumeclaim_access_mode{job="ksm",namespace="monitoring",persistentvolumeclaim="somepvc", access_mode="ReadOnlyMany", service="kube-state-metrics"}' values: '1 1 1 1' alert_rule_test: - eval_time: 1m @@ -62,11 +61,11 @@ tests: # See https://github.com/kubernetes/kubernetes/commit/b997e0e4d6ccbead435a47d6ac75b0db3d17252f for details. - interval: 1m input_series: - - series: 'kubelet_volume_stats_available_bytes{job="kubelet",cluster="kubernetes",namespace="monitoring",persistentvolumeclaim="somepvc"}' + - series: 'kubelet_volume_stats_available_bytes{job="kubelet",namespace="monitoring",persistentvolumeclaim="somepvc"}' values: '1024 512 64 16' - - series: 'kubelet_volume_stats_capacity_bytes{job="kubelet",cluster="kubernetes",namespace="monitoring",persistentvolumeclaim="somepvc"}' + - series: 'kubelet_volume_stats_capacity_bytes{job="kubelet",namespace="monitoring",persistentvolumeclaim="somepvc"}' values: '1024 1024 1024 1024' - - series: 'kubelet_volume_stats_used_bytes{job="kubelet",cluster="kubernetes",namespace="monitoring",persistentvolumeclaim="somepvc"}' + - series: 'kubelet_volume_stats_used_bytes{job="kubelet",namespace="monitoring",persistentvolumeclaim="somepvc"}' values: '0 0 0 0' alert_rule_test: - eval_time: 1m @@ -81,15 +80,15 @@ tests: # Don't alert when PVC has been labelled as fully utilised - interval: 1m input_series: - - series: 'kubelet_volume_stats_available_bytes{job="kubelet",cluster="kubernetes",namespace="monitoring",persistentvolumeclaim="somepvc"}' + - series: 'kubelet_volume_stats_available_bytes{job="kubelet",namespace="monitoring",persistentvolumeclaim="somepvc"}' values: '1024 512 64 16' - - series: 'kubelet_volume_stats_capacity_bytes{job="kubelet",cluster="kubernetes",namespace="monitoring",persistentvolumeclaim="somepvc"}' + - series: 'kubelet_volume_stats_capacity_bytes{job="kubelet",namespace="monitoring",persistentvolumeclaim="somepvc"}' values: '1024 1024 1024 1024' - - series: 'kubelet_volume_stats_used_bytes{job="kubelet",cluster="kubernetes",namespace="monitoring",persistentvolumeclaim="somepvc"}' + - series: 'kubelet_volume_stats_used_bytes{job="kubelet",namespace="monitoring",persistentvolumeclaim="somepvc"}' values: '16 64 512 1024' - - series: 'kube_persistentvolumeclaim_access_mode{job="ksm",cluster="kubernetes",namespace="monitoring",persistentvolumeclaim="somepvc", access_mode="ReadWriteOnce", service="kube-state-metrics"}' + - series: 'kube_persistentvolumeclaim_access_mode{job="ksm",namespace="monitoring",persistentvolumeclaim="somepvc", access_mode="ReadWriteOnce", service="kube-state-metrics"}' values: '1 1 1 1' - - series: 'kube_persistentvolumeclaim_labels{job="kubelet",cluster="kubernetes",namespace="monitoring",persistentvolumeclaim="somepvc",label_excluded_from_alerts="true"}' + - series: 'kube_persistentvolumeclaim_labels{job="kubelet",namespace="monitoring",persistentvolumeclaim="somepvc",label_excluded_from_alerts="true"}' values: '1 1 1 1' alert_rule_test: - eval_time: 1m @@ -103,11 +102,11 @@ tests: - interval: 1m input_series: - - series: 'kubelet_volume_stats_available_bytes{job="kubelet",cluster="kubernetes",namespace="monitoring",persistentvolumeclaim="somepvc"}' + - series: 'kubelet_volume_stats_available_bytes{job="kubelet",namespace="monitoring",persistentvolumeclaim="somepvc"}' values: '1024-10x61' - - series: 'kubelet_volume_stats_capacity_bytes{job="kubelet",cluster="kubernetes",namespace="monitoring",persistentvolumeclaim="somepvc"}' + - series: 'kubelet_volume_stats_capacity_bytes{job="kubelet",namespace="monitoring",persistentvolumeclaim="somepvc"}' values: '32768+0x61' - - series: 'kubelet_volume_stats_used_bytes{job="kubelet",cluster="kubernetes",namespace="monitoring",persistentvolumeclaim="somepvc"}' + - series: 'kubelet_volume_stats_used_bytes{job="kubelet",namespace="monitoring",persistentvolumeclaim="somepvc"}' values: '1024+10x61' alert_rule_test: - eval_time: 1h @@ -116,23 +115,22 @@ tests: - exp_labels: job: kubelet namespace: monitoring - cluster: kubernetes persistentvolumeclaim: somepvc severity: critical exp_annotations: summary: "PersistentVolume is filling up." - description: 'The PersistentVolume claimed by somepvc in Namespace monitoring on Cluster kubernetes is only 1.294% free.' + description: 'The PersistentVolume claimed by somepvc in Namespace monitoring is only 1.294% free.' runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubepersistentvolumefillingup - interval: 1m input_series: - - series: 'kubelet_volume_stats_available_bytes{job="kubelet",cluster="kubernetes",namespace="monitoring",persistentvolumeclaim="somepvc"}' + - series: 'kubelet_volume_stats_available_bytes{job="kubelet",namespace="monitoring",persistentvolumeclaim="somepvc"}' values: '1024-10x61' - - series: 'kubelet_volume_stats_capacity_bytes{job="kubelet",cluster="kubernetes",namespace="monitoring",persistentvolumeclaim="somepvc"}' + - series: 'kubelet_volume_stats_capacity_bytes{job="kubelet",namespace="monitoring",persistentvolumeclaim="somepvc"}' values: '32768+0x61' - - series: 'kubelet_volume_stats_used_bytes{job="kubelet",cluster="kubernetes",namespace="monitoring",persistentvolumeclaim="somepvc"}' + - series: 'kubelet_volume_stats_used_bytes{job="kubelet",namespace="monitoring",persistentvolumeclaim="somepvc"}' values: '1024+10x61' - - series: 'kube_persistentvolumeclaim_access_mode{job="ksm",cluster="kubernetes",namespace="monitoring",persistentvolumeclaim="somepvc", access_mode="ReadWriteOnce", service="kube-state-metrics"}' + - series: 'kube_persistentvolumeclaim_access_mode{job="ksm",namespace="monitoring",persistentvolumeclaim="somepvc", access_mode="ReadWriteOnce", service="kube-state-metrics"}' values: '1x61' alert_rule_test: - eval_time: 61m @@ -141,33 +139,31 @@ tests: - exp_labels: job: kubelet namespace: monitoring - cluster: kubernetes persistentvolumeclaim: somepvc severity: warning exp_annotations: summary: "PersistentVolume is filling up." - description: 'Based on recent sampling, the PersistentVolume claimed by somepvc in Namespace monitoring on Cluster kubernetes is expected to fill up within four days. Currently 1.263% is available.' + description: 'Based on recent sampling, the PersistentVolume claimed by somepvc in Namespace monitoring is expected to fill up within four days. Currently 1.263% is available.' runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubepersistentvolumefillingup - exp_labels: job: kubelet namespace: monitoring - cluster: kubernetes persistentvolumeclaim: somepvc severity: critical exp_annotations: summary: "PersistentVolume is filling up." - description: 'The PersistentVolume claimed by somepvc in Namespace monitoring on Cluster kubernetes is only 1.263% free.' + description: 'The PersistentVolume claimed by somepvc in Namespace monitoring is only 1.263% free.' runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubepersistentvolumefillingup # Block volume mounts can report 0 for the kubelet_volume_stats_used_bytes metric but it shouldn't trigger the KubePersistentVolumeFillingUp alert. # See https://github.com/kubernetes/kubernetes/commit/b997e0e4d6ccbead435a47d6ac75b0db3d17252f for details. - interval: 1m input_series: - - series: 'kubelet_volume_stats_available_bytes{job="kubelet",cluster="kubernetes",namespace="monitoring",persistentvolumeclaim="somepvc"}' + - series: 'kubelet_volume_stats_available_bytes{job="kubelet",namespace="monitoring",persistentvolumeclaim="somepvc"}' values: '1024-10x61' - - series: 'kubelet_volume_stats_capacity_bytes{job="kubelet",cluster="kubernetes",namespace="monitoring",persistentvolumeclaim="somepvc"}' + - series: 'kubelet_volume_stats_capacity_bytes{job="kubelet",namespace="monitoring",persistentvolumeclaim="somepvc"}' values: '32768+0x61' - - series: 'kubelet_volume_stats_used_bytes{job="kubelet",cluster="kubernetes",namespace="monitoring",persistentvolumeclaim="somepvc"}' + - series: 'kubelet_volume_stats_used_bytes{job="kubelet",namespace="monitoring",persistentvolumeclaim="somepvc"}' values: '0x61' alert_rule_test: - eval_time: 61m @@ -176,13 +172,13 @@ tests: # Don't alert when PVC access_mode is ReadOnlyMany - interval: 1m input_series: - - series: 'kubelet_volume_stats_available_bytes{job="kubelet",cluster="kubernetes",namespace="monitoring",persistentvolumeclaim="somepvc"}' + - series: 'kubelet_volume_stats_available_bytes{job="kubelet",namespace="monitoring",persistentvolumeclaim="somepvc"}' values: '1024-10x61' - - series: 'kubelet_volume_stats_capacity_bytes{job="kubelet",cluster="kubernetes",namespace="monitoring",persistentvolumeclaim="somepvc"}' + - series: 'kubelet_volume_stats_capacity_bytes{job="kubelet",namespace="monitoring",persistentvolumeclaim="somepvc"}' values: '32768+0x61' - - series: 'kubelet_volume_stats_used_bytes{job="kubelet",cluster="kubernetes",namespace="monitoring",persistentvolumeclaim="somepvc"}' + - series: 'kubelet_volume_stats_used_bytes{job="kubelet",namespace="monitoring",persistentvolumeclaim="somepvc"}' values: '1x61' - - series: 'kube_persistentvolumeclaim_access_mode{job="ksm",cluster="kubernetes",namespace="monitoring",persistentvolumeclaim="somepvc", access_mode="ReadOnlyMany", service="kube-state-metrics"}' + - series: 'kube_persistentvolumeclaim_access_mode{job="ksm",namespace="monitoring",persistentvolumeclaim="somepvc", access_mode="ReadOnlyMany", service="kube-state-metrics"}' values: '1x61' alert_rule_test: - eval_time: 61m @@ -190,15 +186,15 @@ tests: - interval: 1m input_series: - - series: 'kubelet_volume_stats_available_bytes{job="kubelet",cluster="kubernetes",namespace="monitoring",persistentvolumeclaim="somepvc"}' + - series: 'kubelet_volume_stats_available_bytes{job="kubelet",namespace="monitoring",persistentvolumeclaim="somepvc"}' values: '1024-10x61' - - series: 'kubelet_volume_stats_capacity_bytes{job="kubelet",cluster="kubernetes",namespace="monitoring",persistentvolumeclaim="somepvc"}' + - series: 'kubelet_volume_stats_capacity_bytes{job="kubelet",namespace="monitoring",persistentvolumeclaim="somepvc"}' values: '32768+0x61' - - series: 'kubelet_volume_stats_used_bytes{job="kubelet",cluster="kubernetes",namespace="monitoring",persistentvolumeclaim="somepvc"}' + - series: 'kubelet_volume_stats_used_bytes{job="kubelet",namespace="monitoring",persistentvolumeclaim="somepvc"}' values: '1024+10x61' - - series: 'kube_persistentvolumeclaim_access_mode{job="ksm",cluster="kubernetes",namespace="monitoring",persistentvolumeclaim="somepvc", access_mode="ReadWriteOnce", service="kube-state-metrics"}' + - series: 'kube_persistentvolumeclaim_access_mode{job="ksm",namespace="monitoring",persistentvolumeclaim="somepvc", access_mode="ReadWriteOnce", service="kube-state-metrics"}' values: '1x61' - - series: 'kube_persistentvolumeclaim_labels{job="ksm",cluster="kubernetes",namespace="monitoring",persistentvolumeclaim="somepvc",label_excluded_from_alerts="true"}' + - series: 'kube_persistentvolumeclaim_labels{job="ksm",namespace="monitoring",persistentvolumeclaim="somepvc",label_excluded_from_alerts="true"}' values: '1x61' alert_rule_test: - eval_time: 61m @@ -207,13 +203,13 @@ tests: # PersistentVolume inodes - interval: 1m input_series: - - series: 'kubelet_volume_stats_inodes_free{job="kubelet",cluster="kubernetes",namespace="monitoring",persistentvolumeclaim="somepvc"}' + - series: 'kubelet_volume_stats_inodes_free{job="kubelet",namespace="monitoring",persistentvolumeclaim="somepvc"}' values: '1024 512 64 16' - - series: 'kubelet_volume_stats_inodes{job="kubelet",cluster="kubernetes",namespace="monitoring",persistentvolumeclaim="somepvc"}' + - series: 'kubelet_volume_stats_inodes{job="kubelet",namespace="monitoring",persistentvolumeclaim="somepvc"}' values: '1024 1024 1024 1024' - - series: 'kubelet_volume_stats_inodes_used{job="kubelet",cluster="kubernetes",namespace="monitoring",persistentvolumeclaim="somepvc"}' + - series: 'kubelet_volume_stats_inodes_used{job="kubelet",namespace="monitoring",persistentvolumeclaim="somepvc"}' values: '16 64 512 1024' - - series: 'kube_persistentvolumeclaim_access_mode{job="ksm",cluster="kubernetes",namespace="monitoring",persistentvolumeclaim="somepvc", access_mode="ReadWriteOnce", service="kube-state-metrics"}' + - series: 'kube_persistentvolumeclaim_access_mode{job="ksm",namespace="monitoring",persistentvolumeclaim="somepvc", access_mode="ReadWriteOnce", service="kube-state-metrics"}' values: '1 1 1 1' alert_rule_test: - eval_time: 1m @@ -228,24 +224,23 @@ tests: - exp_labels: job: kubelet namespace: monitoring - cluster: kubernetes persistentvolumeclaim: somepvc severity: critical exp_annotations: summary: "PersistentVolumeInodes are filling up." - description: 'The PersistentVolume claimed by somepvc in Namespace monitoring on Cluster kubernetes only has 1.562% free inodes.' + description: 'The PersistentVolume claimed by somepvc in Namespace monitoring only has 1.562% free inodes.' runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubepersistentvolumeinodesfillingup # Don't alert when PVC access_mode is ReadOnlyMany - interval: 1m input_series: - - series: 'kubelet_volume_stats_inodes_free{job="kubelet",cluster="kubernetes",namespace="monitoring",persistentvolumeclaim="somepvc"}' + - series: 'kubelet_volume_stats_inodes_free{job="kubelet",namespace="monitoring",persistentvolumeclaim="somepvc"}' values: '1024 512 64 16' - - series: 'kubelet_volume_stats_inodes{job="kubelet",cluster="kubernetes",namespace="monitoring",persistentvolumeclaim="somepvc"}' + - series: 'kubelet_volume_stats_inodes{job="kubelet",namespace="monitoring",persistentvolumeclaim="somepvc"}' values: '1024 1024 1024 1024' - - series: 'kubelet_volume_stats_inodes_used{job="kubelet",cluster="kubernetes",namespace="monitoring",persistentvolumeclaim="somepvc"}' + - series: 'kubelet_volume_stats_inodes_used{job="kubelet",namespace="monitoring",persistentvolumeclaim="somepvc"}' values: '16 64 512 1024' - - series: 'kube_persistentvolumeclaim_access_mode{job="ksm",cluster="kubernetes",namespace="monitoring",persistentvolumeclaim="somepvc", access_mode="ReadOnlyMany", service="kube-state-metrics"}' + - series: 'kube_persistentvolumeclaim_access_mode{job="ksm",namespace="monitoring",persistentvolumeclaim="somepvc", access_mode="ReadOnlyMany", service="kube-state-metrics"}' values: '1 1 1 1' alert_rule_test: - eval_time: 1m @@ -261,11 +256,11 @@ tests: # See https://github.com/kubernetes/kubernetes/commit/b997e0e4d6ccbead435a47d6ac75b0db3d17252f for details. - interval: 1m input_series: - - series: 'kubelet_volume_stats_inodes_free{job="kubelet",cluster="kubernetes",namespace="monitoring",persistentvolumeclaim="somepvc"}' + - series: 'kubelet_volume_stats_inodes_free{job="kubelet",namespace="monitoring",persistentvolumeclaim="somepvc"}' values: '1024 512 64 16' - - series: 'kubelet_volume_stats_inodes{job="kubelet",cluster="kubernetes",namespace="monitoring",persistentvolumeclaim="somepvc"}' + - series: 'kubelet_volume_stats_inodes{job="kubelet",namespace="monitoring",persistentvolumeclaim="somepvc"}' values: '1024 1024 1024 1024' - - series: 'kubelet_volume_stats_inodes_used{job="kubelet",cluster="kubernetes",namespace="monitoring",persistentvolumeclaim="somepvc"}' + - series: 'kubelet_volume_stats_inodes_used{job="kubelet",namespace="monitoring",persistentvolumeclaim="somepvc"}' values: '0 0 0 0' alert_rule_test: - eval_time: 1m @@ -280,15 +275,15 @@ tests: # Don't alert when PVC has been labelled as fully utilised - interval: 1m input_series: - - series: 'kubelet_volume_stats_inodes_free{job="kubelet",cluster="kubernetes",namespace="monitoring",persistentvolumeclaim="somepvc"}' + - series: 'kubelet_volume_stats_inodes_free{job="kubelet",namespace="monitoring",persistentvolumeclaim="somepvc"}' values: '1024 512 64 16' - - series: 'kubelet_volume_stats_inodes{job="kubelet",cluster="kubernetes",namespace="monitoring",persistentvolumeclaim="somepvc"}' + - series: 'kubelet_volume_stats_inodes{job="kubelet",namespace="monitoring",persistentvolumeclaim="somepvc"}' values: '1024 1024 1024 1024' - - series: 'kubelet_volume_stats_inodes_used{job="kubelet",cluster="kubernetes",namespace="monitoring",persistentvolumeclaim="somepvc"}' + - series: 'kubelet_volume_stats_inodes_used{job="kubelet",namespace="monitoring",persistentvolumeclaim="somepvc"}' values: '16 64 512 1024' - - series: 'kube_persistentvolumeclaim_access_mode{job="ksm",cluster="kubernetes",namespace="monitoring",persistentvolumeclaim="somepvc", access_mode="ReadWriteOnce", service="kube-state-metrics"}' + - series: 'kube_persistentvolumeclaim_access_mode{job="ksm",namespace="monitoring",persistentvolumeclaim="somepvc", access_mode="ReadWriteOnce", service="kube-state-metrics"}' values: '1 1 1 1' - - series: 'kube_persistentvolumeclaim_labels{job="kubelet",cluster="kubernetes",namespace="monitoring",persistentvolumeclaim="somepvc",label_excluded_from_alerts="true"}' + - series: 'kube_persistentvolumeclaim_labels{job="kubelet",namespace="monitoring",persistentvolumeclaim="somepvc",label_excluded_from_alerts="true"}' values: '1 1 1 1' alert_rule_test: - eval_time: 1m @@ -302,11 +297,11 @@ tests: - interval: 1m input_series: - - series: 'kubelet_volume_stats_inodes_free{job="kubelet",cluster="kubernetes",namespace="monitoring",persistentvolumeclaim="somepvc"}' + - series: 'kubelet_volume_stats_inodes_free{job="kubelet",namespace="monitoring",persistentvolumeclaim="somepvc"}' values: '1024-10x61' - - series: 'kubelet_volume_stats_inodes{job="kubelet",cluster="kubernetes",namespace="monitoring",persistentvolumeclaim="somepvc"}' + - series: 'kubelet_volume_stats_inodes{job="kubelet",namespace="monitoring",persistentvolumeclaim="somepvc"}' values: '32768+0x61' - - series: 'kubelet_volume_stats_inodes_used{job="kubelet",cluster="kubernetes",namespace="monitoring",persistentvolumeclaim="somepvc"}' + - series: 'kubelet_volume_stats_inodes_used{job="kubelet",namespace="monitoring",persistentvolumeclaim="somepvc"}' values: '1024+10x61' alert_rule_test: - eval_time: 1h @@ -315,23 +310,22 @@ tests: - exp_labels: job: kubelet namespace: monitoring - cluster: kubernetes persistentvolumeclaim: somepvc severity: critical exp_annotations: summary: "PersistentVolumeInodes are filling up." - description: 'The PersistentVolume claimed by somepvc in Namespace monitoring on Cluster kubernetes only has 1.294% free inodes.' + description: 'The PersistentVolume claimed by somepvc in Namespace monitoring only has 1.294% free inodes.' runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubepersistentvolumeinodesfillingup - interval: 1m input_series: - - series: 'kubelet_volume_stats_inodes_free{job="kubelet",cluster="kubernetes",namespace="monitoring",persistentvolumeclaim="somepvc"}' + - series: 'kubelet_volume_stats_inodes_free{job="kubelet",namespace="monitoring",persistentvolumeclaim="somepvc"}' values: '1024-10x61' - - series: 'kubelet_volume_stats_inodes{job="kubelet",cluster="kubernetes",namespace="monitoring",persistentvolumeclaim="somepvc"}' + - series: 'kubelet_volume_stats_inodes{job="kubelet",namespace="monitoring",persistentvolumeclaim="somepvc"}' values: '32768+0x61' - - series: 'kubelet_volume_stats_inodes_used{job="kubelet",cluster="kubernetes",namespace="monitoring",persistentvolumeclaim="somepvc"}' + - series: 'kubelet_volume_stats_inodes_used{job="kubelet",namespace="monitoring",persistentvolumeclaim="somepvc"}' values: '1024+10x61' - - series: 'kube_persistentvolumeclaim_access_mode{job="ksm",cluster="kubernetes",namespace="monitoring",persistentvolumeclaim="somepvc", access_mode="ReadWriteOnce", service="kube-state-metrics"}' + - series: 'kube_persistentvolumeclaim_access_mode{job="ksm",namespace="monitoring",persistentvolumeclaim="somepvc", access_mode="ReadWriteOnce", service="kube-state-metrics"}' values: '1x61' alert_rule_test: - eval_time: 61m @@ -340,33 +334,31 @@ tests: - exp_labels: job: kubelet namespace: monitoring - cluster: kubernetes persistentvolumeclaim: somepvc severity: warning exp_annotations: summary: "PersistentVolumeInodes are filling up." - description: 'Based on recent sampling, the PersistentVolume claimed by somepvc in Namespace monitoring on Cluster kubernetes is expected to run out of inodes within four days. Currently 1.263% of its inodes are free.' + description: 'Based on recent sampling, the PersistentVolume claimed by somepvc in Namespace monitoring is expected to run out of inodes within four days. Currently 1.263% of its inodes are free.' runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubepersistentvolumeinodesfillingup - exp_labels: job: kubelet namespace: monitoring - cluster: kubernetes persistentvolumeclaim: somepvc severity: critical exp_annotations: summary: "PersistentVolumeInodes are filling up." - description: 'The PersistentVolume claimed by somepvc in Namespace monitoring on Cluster kubernetes only has 1.263% free inodes.' + description: 'The PersistentVolume claimed by somepvc in Namespace monitoring only has 1.263% free inodes.' runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubepersistentvolumeinodesfillingup # Block volume mounts can report 0 for the kubelet_volume_stats_inodes_used metric but it shouldn't trigger the KubePersistentVolumeInodesFillingUp alert. # See https://github.com/kubernetes/kubernetes/commit/b997e0e4d6ccbead435a47d6ac75b0db3d17252f for details. - interval: 1m input_series: - - series: 'kubelet_volume_stats_inodes_free{job="kubelet",cluster="kubernetes",namespace="monitoring",persistentvolumeclaim="somepvc"}' + - series: 'kubelet_volume_stats_inodes_free{job="kubelet",namespace="monitoring",persistentvolumeclaim="somepvc"}' values: '1024-10x61' - - series: 'kubelet_volume_stats_inodes{job="kubelet",cluster="kubernetes",namespace="monitoring",persistentvolumeclaim="somepvc"}' + - series: 'kubelet_volume_stats_inodes{job="kubelet",namespace="monitoring",persistentvolumeclaim="somepvc"}' values: '32768+0x61' - - series: 'kubelet_volume_stats_inodes_used{job="kubelet",cluster="kubernetes",namespace="monitoring",persistentvolumeclaim="somepvc"}' + - series: 'kubelet_volume_stats_inodes_used{job="kubelet",namespace="monitoring",persistentvolumeclaim="somepvc"}' values: '0x61' alert_rule_test: - eval_time: 61m @@ -375,13 +367,13 @@ tests: # Don't alert when PVC access_mode is ReadOnlyMany - interval: 1m input_series: - - series: 'kubelet_volume_stats_inodes_free{job="kubelet",cluster="kubernetes",namespace="monitoring",persistentvolumeclaim="somepvc"}' + - series: 'kubelet_volume_stats_inodes_free{job="kubelet",namespace="monitoring",persistentvolumeclaim="somepvc"}' values: '1024-10x61' - - series: 'kubelet_volume_stats_inodes{job="kubelet",cluster="kubernetes",namespace="monitoring",persistentvolumeclaim="somepvc"}' + - series: 'kubelet_volume_stats_inodes{job="kubelet",namespace="monitoring",persistentvolumeclaim="somepvc"}' values: '32768+0x61' - - series: 'kubelet_volume_stats_used_bytes{job="kubelet",cluster="kubernetes",namespace="monitoring",persistentvolumeclaim="somepvc"}' + - series: 'kubelet_volume_stats_used_bytes{job="kubelet",namespace="monitoring",persistentvolumeclaim="somepvc"}' values: '1x61' - - series: 'kube_persistentvolumeclaim_access_mode{job="ksm",cluster="kubernetes",namespace="monitoring",persistentvolumeclaim="somepvc", access_mode="ReadOnlyMany", service="kube-state-metrics"}' + - series: 'kube_persistentvolumeclaim_access_mode{job="ksm",namespace="monitoring",persistentvolumeclaim="somepvc", access_mode="ReadOnlyMany", service="kube-state-metrics"}' values: '1x61' alert_rule_test: - eval_time: 61m @@ -389,15 +381,15 @@ tests: - interval: 1m input_series: - - series: 'kubelet_volume_stats_inodes_free{job="kubelet",cluster="kubernetes",namespace="monitoring",persistentvolumeclaim="somepvc"}' + - series: 'kubelet_volume_stats_inodes_free{job="kubelet",namespace="monitoring",persistentvolumeclaim="somepvc"}' values: '1024-10x61' - - series: 'kubelet_volume_stats_capacity_bytes{job="kubelet",cluster="kubernetes",namespace="monitoring",persistentvolumeclaim="somepvc"}' + - series: 'kubelet_volume_stats_capacity_bytes{job="kubelet",namespace="monitoring",persistentvolumeclaim="somepvc"}' values: '32768+0x61' - - series: 'kubelet_volume_stats_used_bytes{job="kubelet",cluster="kubernetes",namespace="monitoring",persistentvolumeclaim="somepvc"}' + - series: 'kubelet_volume_stats_used_bytes{job="kubelet",namespace="monitoring",persistentvolumeclaim="somepvc"}' values: '1024+10x61' - - series: 'kube_persistentvolumeclaim_access_mode{job="ksm",cluster="kubernetes",namespace="monitoring",persistentvolumeclaim="somepvc", access_mode="ReadWriteOnce", service="kube-state-metrics"}' + - series: 'kube_persistentvolumeclaim_access_mode{job="ksm",namespace="monitoring",persistentvolumeclaim="somepvc", access_mode="ReadWriteOnce", service="kube-state-metrics"}' values: '1x61' - - series: 'kube_persistentvolumeclaim_labels{job="ksm",cluster="kubernetes",namespace="monitoring",persistentvolumeclaim="somepvc",label_excluded_from_alerts="true"}' + - series: 'kube_persistentvolumeclaim_labels{job="ksm",namespace="monitoring",persistentvolumeclaim="somepvc",label_excluded_from_alerts="true"}' values: '1x61' alert_rule_test: - eval_time: 61m @@ -405,19 +397,19 @@ tests: - interval: 1m input_series: - - series: 'kube_node_status_capacity{resource="pods",instance="172.17.0.5:8443",cluster="kubernetes",node="minikube",job="kube-state-metrics",namespace="kube-system"}' + - series: 'kube_node_status_capacity{resource="pods",instance="172.17.0.5:8443",node="minikube",job="kube-state-metrics", namespace="kube-system"}' values: '3+0x15' - - series: 'kube_pod_info{endpoint="https-main",instance="172.17.0.5:8443",job="kube-state-metrics",cluster="kubernetes",namespace="kube-system",node="minikube",pod="pod-1",service="kube-state-metrics"}' + - series: 'kube_pod_info{endpoint="https-main",instance="172.17.0.5:8443",job="kube-state-metrics",namespace="kube-system",node="minikube",pod="pod-1",service="kube-state-metrics"}' values: '1+0x15' - - series: 'kube_pod_status_phase{endpoint="https-main",instance="172.17.0.5:8443",job="kube-state-metrics",cluster="kubernetes",namespace="kube-system",phase="Running",pod="pod-1",service="kube-state-metrics"}' + - series: 'kube_pod_status_phase{endpoint="https-main",instance="172.17.0.5:8443",job="kube-state-metrics",namespace="kube-system",phase="Running",pod="pod-1",service="kube-state-metrics"}' values: '1+0x15' - - series: 'kube_pod_info{endpoint="https-main",instance="172.17.0.5:8443",job="kube-state-metrics",cluster="kubernetes",namespace="kube-system",node="minikube",pod="pod-2",service="kube-state-metrics"}' + - series: 'kube_pod_info{endpoint="https-main",instance="172.17.0.5:8443",job="kube-state-metrics",namespace="kube-system",node="minikube",pod="pod-2",service="kube-state-metrics"}' values: '1+0x15' - - series: 'kube_pod_status_phase{endpoint="https-main",instance="172.17.0.5:8443",job="kube-state-metrics",cluster="kubernetes",namespace="kube-system",phase="Running",pod="pod-2",service="kube-state-metrics"}' + - series: 'kube_pod_status_phase{endpoint="https-main",instance="172.17.0.5:8443",job="kube-state-metrics",namespace="kube-system",phase="Running",pod="pod-2",service="kube-state-metrics"}' values: '1+0x15' - - series: 'kube_pod_info{endpoint="https-main",instance="172.17.0.5:8443",job="kube-state-metrics",cluster="kubernetes",namespace="kube-system",node="minikube",pod="pod-3",service="kube-state-metrics"}' + - series: 'kube_pod_info{endpoint="https-main",instance="172.17.0.5:8443",job="kube-state-metrics",namespace="kube-system",node="minikube",pod="pod-3",service="kube-state-metrics"}' values: '1+0x15' - - series: 'kube_pod_status_phase{endpoint="https-main",instance="172.17.0.5:8443",job="kube-state-metrics",cluster="kubernetes",namespace="kube-system",phase="Running",pod="pod-3",service="kube-state-metrics"}' + - series: 'kube_pod_status_phase{endpoint="https-main",instance="172.17.0.5:8443",job="kube-state-metrics",namespace="kube-system",phase="Running",pod="pod-3",service="kube-state-metrics"}' values: '1+0x15' alert_rule_test: - eval_time: 10m @@ -426,7 +418,6 @@ tests: alertname: KubeletTooManyPods exp_alerts: - exp_labels: - cluster: kubernetes node: minikube severity: info exp_annotations: @@ -436,129 +427,128 @@ tests: - interval: 1m input_series: - - series: 'kube_pod_container_resource_requests{resource="cpu",container="kube-apiserver-67",endpoint="https-main",instance="ksm-1",job="kube-state-metrics",cluster="kubernetes",namespace="kube-apiserver",node="node-1",pod="pod-1",service="ksm"}' + - series: 'kube_pod_resource_request{resource="cpu",endpoint="https-main",instance="ksm-1",job="kube-state-metrics",namespace="kube-apiserver",node="node-1",pod="pod-1",service="ksm"}' values: '0.15+0x10' - - series: 'kube_pod_container_resource_requests{resource="cpu",container="kube-apiserver-67",endpoint="https-main",instance="ksm-1",job="kube-state-metrics",cluster="kubernetes",namespace="kube-apiserver",node="node-1",pod="pod-2",service="ksm"}' + - series: 'kube_pod_resource_request{resource="cpu",endpoint="https-main",instance="ksm-1",job="kube-state-metrics",namespace="kube-apiserver",node="node-1",pod="pod-2",service="ksm"}' values: '0.15+0x10' - - series: 'kube_pod_container_resource_requests{resource="cpu",container="kube-apiserver-67",endpoint="https-main",instance="ksm-2",job="kube-state-metrics",cluster="kubernetes",namespace="kube-apiserver",node="node-1",pod="pod-1",service="ksm"}' + - series: 'kube_pod_resource_request{resource="cpu",endpoint="https-main",instance="ksm-2",job="kube-state-metrics",namespace="kube-apiserver",node="node-1",pod="pod-1",service="ksm"}' values: '0.1+0x10' - - series: 'kube_pod_container_resource_requests{resource="memory",container="kube-apiserver-67",endpoint="https-main",instance="ksm-1",job="kube-state-metrics",cluster="kubernetes",namespace="kube-apiserver",node="node-1",pod="pod-1",service="ksm"}' + - series: 'kube_pod_resource_request{resource="memory",endpoint="https-main",instance="ksm-1",job="kube-state-metrics",namespace="kube-apiserver",node="node-1",pod="pod-1",service="ksm"}' values: '1E9+0x10' - - series: 'kube_pod_container_resource_requests{resource="memory",container="kube-apiserver-67",endpoint="https-main",instance="ksm-1",job="kube-state-metrics",cluster="kubernetes",namespace="kube-apiserver",node="node-1",pod="pod-2",service="ksm"}' + - series: 'kube_pod_resource_request{resource="memory",endpoint="https-main",instance="ksm-1",job="kube-state-metrics",namespace="kube-apiserver",node="node-1",pod="pod-2",service="ksm"}' values: '1E9+0x10' - - series: 'kube_pod_container_resource_requests{resource="memory",container="kube-apiserver-67",endpoint="https-main",instance="ksm-2",job="kube-state-metrics",cluster="kubernetes",namespace="kube-apiserver",node="node-1",pod="pod-1",service="ksm"}' + - series: 'kube_pod_resource_request{resource="memory",endpoint="https-main",instance="ksm-2",job="kube-state-metrics",namespace="kube-apiserver",node="node-1",pod="pod-1",service="ksm"}' values: '0.5E9+0x10' # Duplicate kube_pod_status_phase timeseries for the same pod. - - series: 'kube_pod_status_phase{endpoint="https-main",instance="ksm-1",job="kube-state-metrics",cluster="kubernetes",namespace="kube-apiserver",phase="Running",pod="pod-1",service="ksm"}' + - series: 'kube_pod_status_phase{endpoint="https-main",instance="ksm-1",job="kube-state-metrics",namespace="kube-apiserver",phase="Running",pod="pod-1",service="ksm"}' values: '1 stale' - - series: 'kube_pod_status_phase{endpoint="https-main",instance="ksm-1",job="kube-state-metrics",cluster="kubernetes",namespace="kube-apiserver",phase="Pending",pod="pod-1",service="ksm"}' + - series: 'kube_pod_status_phase{endpoint="https-main",instance="ksm-1",job="kube-state-metrics",namespace="kube-apiserver",phase="Pending",pod="pod-1",service="ksm"}' values: '1+0x10' - - series: 'kube_pod_status_phase{endpoint="https-main",instance="ksm-1",job="kube-state-metrics",cluster="kubernetes",namespace="kube-apiserver",phase="Completed",pod="pod-2",service="ksm"}' + - series: 'kube_pod_status_phase{endpoint="https-main",instance="ksm-1",job="kube-state-metrics",namespace="kube-apiserver",phase="Completed",pod="pod-2",service="ksm"}' values: '1+0x10' - - series: 'kube_pod_status_phase{endpoint="https-main",instance="ksm-2",job="kube-state-metrics",cluster="kubernetes",namespace="kube-apiserver",phase="Running",pod="pod-1",service="ksm"}' + - series: 'kube_pod_status_phase{endpoint="https-main",instance="ksm-2",job="kube-state-metrics",namespace="kube-apiserver",phase="Running",pod="pod-1",service="ksm"}' values: '1+0x10' promql_expr_test: - eval_time: 0m - expr: namespace_cpu:kube_pod_container_resource_requests:sum + expr: namespace_cpu:kube_pod_resource_request:sum exp_samples: - value: 0.15 - labels: 'namespace_cpu:kube_pod_container_resource_requests:sum{cluster="kubernetes",namespace="kube-apiserver"}' + labels: 'namespace_cpu:kube_pod_resource_request:sum{namespace="kube-apiserver"}' - eval_time: 0m - expr: namespace_memory:kube_pod_container_resource_requests:sum + expr: namespace_memory:kube_pod_resource_request:sum exp_samples: - value: 1.0e+9 - labels: 'namespace_memory:kube_pod_container_resource_requests:sum{cluster="kubernetes",namespace="kube-apiserver"}' + labels: 'namespace_memory:kube_pod_resource_request:sum{namespace="kube-apiserver"}' - eval_time: 1m - expr: namespace_cpu:kube_pod_container_resource_requests:sum + expr: namespace_cpu:kube_pod_resource_request:sum exp_samples: - value: 0.15 - labels: 'namespace_cpu:kube_pod_container_resource_requests:sum{cluster="kubernetes",namespace="kube-apiserver"}' + labels: 'namespace_cpu:kube_pod_resource_request:sum{namespace="kube-apiserver"}' - eval_time: 1m - expr: namespace_memory:kube_pod_container_resource_requests:sum + expr: namespace_memory:kube_pod_resource_request:sum exp_samples: - value: 1.0e+9 - labels: 'namespace_memory:kube_pod_container_resource_requests:sum{cluster="kubernetes",namespace="kube-apiserver"}' + labels: 'namespace_memory:kube_pod_resource_request:sum{namespace="kube-apiserver"}' - interval: 1m input_series: - - series: 'kube_pod_container_resource_requests{resource="cpu",container="kube-apiserver-67",endpoint="https-main",instance="ksm-1",job="kube-state-metrics",cluster="kubernetes",namespace="kube-apiserver",node="node-1",pod="pod-1",service="ksm"}' + - series: 'kube_pod_resource_request{resource="cpu",endpoint="https-main",instance="ksm-1",job="kube-state-metrics",namespace="kube-apiserver",node="node-1",pod="pod-1",service="ksm",cluster="test"}' values: '0.15+0x10' - - series: 'kube_pod_container_resource_requests{resource="cpu",container="kube-apiserver-67",endpoint="https-main",instance="ksm-1",job="kube-state-metrics",cluster="kubernetes",namespace="kube-apiserver",node="node-1",pod="pod-2",service="ksm"}' + - series: 'kube_pod_resource_request{resource="cpu",endpoint="https-main",instance="ksm-1",job="kube-state-metrics",namespace="kube-apiserver",node="node-1",pod="pod-2",service="ksm",cluster="test"}' values: '0.15+0x10' - - series: 'kube_pod_container_resource_requests{resource="cpu",container="kube-apiserver-67",endpoint="https-main",instance="ksm-2",job="kube-state-metrics",cluster="kubernetes",namespace="kube-apiserver",node="node-1",pod="pod-1",service="ksm"}' + - series: 'kube_pod_resource_request{resource="cpu",endpoint="https-main",instance="ksm-2",job="kube-state-metrics",namespace="kube-apiserver",node="node-1",pod="pod-1",service="ksm",cluster="test"}' values: '0.1+0x10' - - series: 'kube_pod_container_resource_requests{resource="memory",container="kube-apiserver-67",endpoint="https-main",instance="ksm-1",job="kube-state-metrics",cluster="kubernetes",namespace="kube-apiserver",node="node-1",pod="pod-1",service="ksm"}' + - series: 'kube_pod_resource_request{resource="memory",endpoint="https-main",instance="ksm-1",job="kube-state-metrics",namespace="kube-apiserver",node="node-1",pod="pod-1",service="ksm",cluster="test"}' values: '1E9+0x10' - - series: 'kube_pod_container_resource_requests{resource="memory",container="kube-apiserver-67",endpoint="https-main",instance="ksm-1",job="kube-state-metrics",cluster="kubernetes",namespace="kube-apiserver",node="node-1",pod="pod-2",service="ksm"}' + - series: 'kube_pod_resource_request{resource="memory",endpoint="https-main",instance="ksm-1",job="kube-state-metrics",namespace="kube-apiserver",node="node-1",pod="pod-2",service="ksm",cluster="test"}' values: '1E9+0x10' - - series: 'kube_pod_container_resource_requests{resource="memory",container="kube-apiserver-67",endpoint="https-main",instance="ksm-2",job="kube-state-metrics",cluster="kubernetes",namespace="kube-apiserver",node="node-1",pod="pod-1",service="ksm"}' + - series: 'kube_pod_resource_request{resource="memory",endpoint="https-main",instance="ksm-2",job="kube-state-metrics",namespace="kube-apiserver",node="node-1",pod="pod-1",service="ksm",cluster="test"}' values: '0.5E9+0x10' # Duplicate kube_pod_status_phase timeseries for the same pod. - - series: 'kube_pod_status_phase{endpoint="https-main",instance="ksm-1",job="kube-state-metrics",cluster="kubernetes",namespace="kube-apiserver",phase="Running",pod="pod-1",service="ksm"}' + - series: 'kube_pod_status_phase{endpoint="https-main",instance="ksm-1",job="kube-state-metrics",namespace="kube-apiserver",phase="Running",pod="pod-1",service="ksm",cluster="test"}' values: '1 stale' - - series: 'kube_pod_status_phase{endpoint="https-main",instance="ksm-1",job="kube-state-metrics",cluster="kubernetes",namespace="kube-apiserver",phase="Pending",pod="pod-1",service="ksm"}' + - series: 'kube_pod_status_phase{endpoint="https-main",instance="ksm-1",job="kube-state-metrics",namespace="kube-apiserver",phase="Pending",pod="pod-1",service="ksm",cluster="test"}' values: '1+0x10' - - series: 'kube_pod_status_phase{endpoint="https-main",instance="ksm-1",job="kube-state-metrics",cluster="kubernetes",namespace="kube-apiserver",phase="Completed",pod="pod-2",service="ksm"}' + - series: 'kube_pod_status_phase{endpoint="https-main",instance="ksm-1",job="kube-state-metrics",namespace="kube-apiserver",phase="Completed",pod="pod-2",service="ksm",cluster="test"}' values: '1+0x10' - - series: 'kube_pod_status_phase{endpoint="https-main",instance="ksm-2",job="kube-state-metrics",cluster="kubernetes",namespace="kube-apiserver",phase="Running",pod="pod-1",service="ksm"}' + - series: 'kube_pod_status_phase{endpoint="https-main",instance="ksm-2",job="kube-state-metrics",namespace="kube-apiserver",phase="Running",pod="pod-1",service="ksm",cluster="test"}' values: '1+0x10' promql_expr_test: - eval_time: 0m - expr: namespace_cpu:kube_pod_container_resource_requests:sum + expr: namespace_cpu:kube_pod_resource_request:sum exp_samples: - value: 0.15 - labels: 'namespace_cpu:kube_pod_container_resource_requests:sum{cluster="kubernetes",namespace="kube-apiserver"}' + labels: 'namespace_cpu:kube_pod_resource_request:sum{namespace="kube-apiserver",cluster="test"}' - eval_time: 0m - expr: namespace_memory:kube_pod_container_resource_requests:sum + expr: namespace_memory:kube_pod_resource_request:sum exp_samples: - value: 1.0e+9 - labels: 'namespace_memory:kube_pod_container_resource_requests:sum{cluster="kubernetes",namespace="kube-apiserver"}' + labels: 'namespace_memory:kube_pod_resource_request:sum{namespace="kube-apiserver",cluster="test"}' - eval_time: 1m - expr: namespace_cpu:kube_pod_container_resource_requests:sum + expr: namespace_cpu:kube_pod_resource_request:sum exp_samples: - value: 0.15 - labels: 'namespace_cpu:kube_pod_container_resource_requests:sum{cluster="kubernetes",namespace="kube-apiserver"}' + labels: 'namespace_cpu:kube_pod_resource_request:sum{namespace="kube-apiserver",cluster="test"}' - eval_time: 1m - expr: namespace_memory:kube_pod_container_resource_requests:sum + expr: namespace_memory:kube_pod_resource_request:sum exp_samples: - value: 1.0e+9 - labels: 'namespace_memory:kube_pod_container_resource_requests:sum{cluster="kubernetes",namespace="kube-apiserver"}' + labels: 'namespace_memory:kube_pod_resource_request:sum{namespace="kube-apiserver",cluster="test"}' - interval: 1m input_series: # Create a histogram where all of the last 10 samples are in the +Inf (> 10 seconds) bucket. - - series: 'kubelet_pleg_relist_duration_seconds_bucket{job="kubelet", le="0.005", cluster="kubernetes",instance="10.0.2.15:10250"}' + - series: 'kubelet_pleg_relist_duration_seconds_bucket{job="kublet", le="0.005", instance="10.0.2.15:10250"}' values: '1+0x10' - - series: 'kubelet_pleg_relist_duration_seconds_bucket{job="kubelet", le="0.01", cluster="kubernetes",instance="10.0.2.15:10250"}' + - series: 'kubelet_pleg_relist_duration_seconds_bucket{job="kublet", le="0.01", instance="10.0.2.15:10250"}' values: '1+0x10' - - series: 'kubelet_pleg_relist_duration_seconds_bucket{job="kubelet", le="0.025", cluster="kubernetes",instance="10.0.2.15:10250"}' + - series: 'kubelet_pleg_relist_duration_seconds_bucket{job="kublet", le="0.025", instance="10.0.2.15:10250"}' values: '1+0x10' - - series: 'kubelet_pleg_relist_duration_seconds_bucket{job="kubelet", le="0.05", cluster="kubernetes",instance="10.0.2.15:10250"}' + - series: 'kubelet_pleg_relist_duration_seconds_bucket{job="kublet", le="0.05", instance="10.0.2.15:10250"}' values: '1+0x10' - - series: 'kubelet_pleg_relist_duration_seconds_bucket{job="kubelet", le="0.1", cluster="kubernetes",instance="10.0.2.15:10250"}' + - series: 'kubelet_pleg_relist_duration_seconds_bucket{job="kublet", le="0.1", instance="10.0.2.15:10250"}' values: '1+0x10' - - series: 'kubelet_pleg_relist_duration_seconds_bucket{job="kubelet", le="0.25", cluster="kubernetes",instance="10.0.2.15:10250"}' + - series: 'kubelet_pleg_relist_duration_seconds_bucket{job="kublet", le="0.25", instance="10.0.2.15:10250"}' values: '1+0x10' - - series: 'kubelet_pleg_relist_duration_seconds_bucket{job="kubelet", le="0.5", cluster="kubernetes",instance="10.0.2.15:10250"}' + - series: 'kubelet_pleg_relist_duration_seconds_bucket{job="kublet", le="0.5", instance="10.0.2.15:10250"}' values: '1+0x10' - - series: 'kubelet_pleg_relist_duration_seconds_bucket{job="kubelet", le="1", cluster="kubernetes",instance="10.0.2.15:10250"}' + - series: 'kubelet_pleg_relist_duration_seconds_bucket{job="kublet", le="1", instance="10.0.2.15:10250"}' values: '1+0x10' - - series: 'kubelet_pleg_relist_duration_seconds_bucket{job="kubelet", le="2.5", cluster="kubernetes",instance="10.0.2.15:10250"}' + - series: 'kubelet_pleg_relist_duration_seconds_bucket{job="kublet", le="2.5", instance="10.0.2.15:10250"}' values: '1+0x10' - - series: 'kubelet_pleg_relist_duration_seconds_bucket{job="kubelet", le="5", cluster="kubernetes",instance="10.0.2.15:10250"}' + - series: 'kubelet_pleg_relist_duration_seconds_bucket{job="kublet", le="5", instance="10.0.2.15:10250"}' values: '1+0x10' - - series: 'kubelet_pleg_relist_duration_seconds_bucket{job="kubelet", le="10", cluster="kubernetes",instance="10.0.2.15:10250"}' + - series: 'kubelet_pleg_relist_duration_seconds_bucket{job="kublet", le="10", instance="10.0.2.15:10250"}' values: '1+0x10' - - series: 'kubelet_pleg_relist_duration_seconds_bucket{job="kubelet", le="+Inf", cluster="kubernetes",instance="10.0.2.15:10250"}' + - series: 'kubelet_pleg_relist_duration_seconds_bucket{job="kublet", le="+Inf", instance="10.0.2.15:10250"}' values: '30+1x10' - - series: 'kubelet_node_name{endpoint="https-metrics",cluster="kubernetes",instance="10.0.2.15:10250",job="kubelet",namespace="kube-system",node="minikube",service="kubelet"}' + - series: 'kubelet_node_name{endpoint="https-metrics",instance="10.0.2.15:10250",job="kubelet",namespace="kube-system",node="minikube",service="kubelet"}' values: '1 1 1 1 1 1 1 1 1 1' alert_rule_test: - eval_time: 10m alertname: KubeletPlegDurationHigh exp_alerts: - exp_labels: - cluster: "kubernetes" instance: 10.0.2.15:10250 node: minikube quantile: 0.99 @@ -570,14 +560,13 @@ tests: - interval: 1m input_series: - - series: 'kube_node_status_condition{condition="Ready",endpoint="https-main",cluster="kubernetes",instance="10.0.2.15:10250",job="kube-state-metrics",namespace="monitoring",node="minikube",pod="kube-state-metrics-b894d84cc-d6htw",service="kube-state-metrics",status="true"}' + - series: 'kube_node_status_condition{condition="Ready",endpoint="https-main",instance="10.0.2.15:10250",job="kube-state-metrics",namespace="monitoring",node="minikube",pod="kube-state-metrics-b894d84cc-d6htw",service="kube-state-metrics",status="true"}' values: '1 0 1 0 1 0 0 0 1 0 1 0 0 0 1 0 1 0 0 1' alert_rule_test: - eval_time: 18m alertname: KubeNodeReadinessFlapping exp_alerts: - exp_labels: - cluster: kubernetes node: minikube severity: warning exp_annotations: @@ -588,27 +577,27 @@ tests: # Verify that node:node_num_cpu:sum triggers no many-to-many errors. - interval: 1m input_series: - - series: 'node_cpu_seconds_total{cpu="0",endpoint="https",instance="instance1",job="node-exporter",mode="idle",cluster="kubernetes",namespace="openshift-monitoring",pod="node-exporter-1",service="node-exporter"}' + - series: 'node_cpu_seconds_total{cpu="0",endpoint="https",instance="instance1",job="node-exporter",mode="idle",namespace="openshift-monitoring",pod="node-exporter-1",service="node-exporter"}' values: '1 1' - - series: 'node_cpu_seconds_total{cpu="1",endpoint="https",instance="instance1",job="node-exporter",mode="idle",cluster="kubernetes",namespace="openshift-monitoring",pod="node-exporter-1",service="node-exporter"}' + - series: 'node_cpu_seconds_total{cpu="1",endpoint="https",instance="instance1",job="node-exporter",mode="idle",namespace="openshift-monitoring",pod="node-exporter-1",service="node-exporter"}' values: '1 1' - - series: 'kube_pod_info{cluster="kubernetes",namespace="openshift-monitoring",node="node-1",pod="node-exporter-1",job="kube-state-metrics",instance="10.129.2.7:8443"}' + - series: 'kube_pod_info{namespace="openshift-monitoring",node="node-1",pod="node-exporter-1",job="kube-state-metrics",instance="10.129.2.7:8443"}' values: '1 1' - - series: 'kube_pod_info{cluster="kubernetes",namespace="openshift-monitoring",node="node-1",pod="alertmanager-0",job="kube-state-metrics",instance="10.129.2.7:8443"}' + - series: 'kube_pod_info{namespace="openshift-monitoring",node="node-1",pod="alertmanager-0",job="kube-state-metrics",instance="10.129.2.7:8443"}' values: '1 stale' - - series: 'kube_pod_info{cluster="kubernetes",namespace="openshift-monitoring",node="node-2",pod="alertmanager-0",job="kube-state-metrics",instance="10.129.2.7:8443"}' + - series: 'kube_pod_info{namespace="openshift-monitoring",node="node-2",pod="alertmanager-0",job="kube-state-metrics",instance="10.129.2.7:8443"}' values: '1 1' promql_expr_test: - eval_time: 0m expr: node:node_num_cpu:sum exp_samples: - value: 2 - labels: 'node:node_num_cpu:sum{cluster="kubernetes",node="node-1"}' + labels: 'node:node_num_cpu:sum{node="node-1"}' - eval_time: 1m expr: node:node_num_cpu:sum exp_samples: - value: 2 - labels: 'node:node_num_cpu:sum{cluster="kubernetes",node="node-1"}' + labels: 'node:node_num_cpu:sum{node="node-1"}' # Verify that node:node_num_cpu:sum doesn't trigger many-to-many errors when # node_namespace_pod:kube_pod_info: has duplicate entries for the same @@ -616,73 +605,72 @@ tests: # it didn't add stale markers to the "old" series on shutdown. - interval: 1m input_series: - - series: 'node_cpu_seconds_total{cpu="0",endpoint="https",instance="instance1",job="node-exporter",mode="idle",cluster="kubernetes",namespace="openshift-monitoring",pod="node-exporter-1",service="node-exporter"}' + - series: 'node_cpu_seconds_total{cpu="0",endpoint="https",instance="instance1",job="node-exporter",mode="idle",namespace="openshift-monitoring",pod="node-exporter-1",service="node-exporter"}' values: '1 1' - - series: 'node_cpu_seconds_total{cpu="0",endpoint="https",instance="instance2",job="node-exporter",mode="idle",cluster="kubernetes",namespace="openshift-monitoring",pod="node-exporter-2",service="node-exporter"}' + - series: 'node_cpu_seconds_total{cpu="0",endpoint="https",instance="instance2",job="node-exporter",mode="idle",namespace="openshift-monitoring",pod="node-exporter-2",service="node-exporter"}' values: '1 1' - - series: 'node_namespace_pod:kube_pod_info:{cluster="kubernetes",node="node-1",namespace="openshift-monitoring",pod="node-exporter-1"}' + - series: 'node_namespace_pod:kube_pod_info:{node="node-1",namespace="openshift-monitoring",pod="node-exporter-1"}' values: '1 1' - - series: 'node_namespace_pod:kube_pod_info:{cluster="kubernetes",node="node-2",namespace="openshift-monitoring",pod="node-exporter-2"}' + - series: 'node_namespace_pod:kube_pod_info:{node="node-2",namespace="openshift-monitoring",pod="node-exporter-2"}' values: '1 1' # series for the "old" prometheus instance. - - series: 'node_namespace_pod:kube_pod_info:{cluster="kubernetes",node="node-1",namespace="openshift-monitoring",pod="prometheus-0"}' + - series: 'node_namespace_pod:kube_pod_info:{node="node-1",namespace="openshift-monitoring",pod="prometheus-0"}' values: '1' # series for the "new" prometheus instance. - - series: 'node_namespace_pod:kube_pod_info:{cluster="kubernetes",node="node-2",namespace="openshift-monitoring",pod="prometheus-0"}' + - series: 'node_namespace_pod:kube_pod_info:{node="node-2",namespace="openshift-monitoring",pod="prometheus-0"}' values: 'stale 1' promql_expr_test: - eval_time: 0m expr: node:node_num_cpu:sum exp_samples: - value: 1 - labels: 'node:node_num_cpu:sum{cluster="kubernetes",node="node-1"}' + labels: 'node:node_num_cpu:sum{node="node-1"}' - value: 1 - labels: 'node:node_num_cpu:sum{cluster="kubernetes",node="node-2"}' + labels: 'node:node_num_cpu:sum{node="node-2"}' - eval_time: 1m expr: node:node_num_cpu:sum exp_samples: - value: 1 - labels: 'node:node_num_cpu:sum{cluster="kubernetes",node="node-1"}' + labels: 'node:node_num_cpu:sum{node="node-1"}' - value: 1 - labels: 'node:node_num_cpu:sum{cluster="kubernetes",node="node-2"}' + labels: 'node:node_num_cpu:sum{node="node-2"}' - interval: 1m input_series: - - series: 'kube_pod_owner{endpoint="https",instance="instance1",job="kube-state-metrics",cluster="kubernetes",namespace="ns1",owner_is_controller="true",owner_kind="ReplicaSet",owner_name="ds-7cc77d965f",pod="ds-7cc77d965f-cgsdv",service="ksm"}' + - series: 'kube_pod_owner{endpoint="https",instance="instance1",job="kube-state-metrics",namespace="ns1",owner_is_controller="true",owner_kind="ReplicaSet",owner_name="ds-7cc77d965f",pod="ds-7cc77d965f-cgsdv",service="ksm"}' values: '1 1' - - series: 'kube_pod_owner{endpoint="https",instance="instance2",job="kube-state-metrics",cluster="kubernetes",namespace="ns1",owner_is_controller="true",owner_kind="ReplicaSet",owner_name="ds-7cc77d965f",pod="ds-7cc77d965f-cgsdv",service="ksm"}' + - series: 'kube_pod_owner{endpoint="https",instance="instance2",job="kube-state-metrics",namespace="ns1",owner_is_controller="true",owner_kind="ReplicaSet",owner_name="ds-7cc77d965f",pod="ds-7cc77d965f-cgsdv",service="ksm"}' values: '1 stale' - - series: 'kube_replicaset_owner{endpoint="https",instance="instance1",job="kube-state-metrics",cluster="kubernetes",namespace="ns1",owner_is_controller="true",owner_kind="Deployment",owner_name="ds",pod="ds-777f6bf798-kq7tj",replicaset="ds-7cc77d965f",service="ksm"}' + - series: 'kube_replicaset_owner{endpoint="https",instance="instance1",job="kube-state-metrics",namespace="ns1",owner_is_controller="true",owner_kind="Deployment",owner_name="ds",pod="ds-777f6bf798-kq7tj",replicaset="ds-7cc77d965f",service="ksm"}' values: '1 1' - - series: 'kube_replicaset_owner{endpoint="https",instance="instance2",job="kube-state-metrics",cluster="kubernetes",namespace="ns1",owner_is_controller="true",owner_kind="Deployment",owner_name="ds",pod="ds-777f6bf798-kq7tj",replicaset="ds-7cc77d965f",service="ksm"}' + - series: 'kube_replicaset_owner{endpoint="https",instance="instance2",job="kube-state-metrics",namespace="ns1",owner_is_controller="true",owner_kind="Deployment",owner_name="ds",pod="ds-777f6bf798-kq7tj",replicaset="ds-7cc77d965f",service="ksm"}' values: '1 stale' promql_expr_test: - eval_time: 0m expr: namespace_workload_pod:kube_pod_owner:relabel exp_samples: - value: 1 - labels: 'namespace_workload_pod:kube_pod_owner:relabel{cluster="kubernetes",namespace="ns1", pod="ds-7cc77d965f-cgsdv", workload="ds", workload_type="deployment"}' + labels: 'namespace_workload_pod:kube_pod_owner:relabel{namespace="ns1", pod="ds-7cc77d965f-cgsdv", workload="ds", workload_type="deployment"}' - eval_time: 1m expr: namespace_workload_pod:kube_pod_owner:relabel exp_samples: - value: 1 - labels: 'namespace_workload_pod:kube_pod_owner:relabel{cluster="kubernetes",namespace="ns1", pod="ds-7cc77d965f-cgsdv", workload="ds", workload_type="deployment"}' + labels: 'namespace_workload_pod:kube_pod_owner:relabel{namespace="ns1", pod="ds-7cc77d965f-cgsdv", workload="ds", workload_type="deployment"}' - interval: 1m input_series: - - series: 'kube_pod_status_phase{endpoint="https",instance="instance1",job="kube-state-metrics",cluster="kubernetes",namespace="ns1",phase="Pending",pod="pod-ds-7cc77d965f-cgsdv",service="ksm"}' + - series: 'kube_pod_status_phase{endpoint="https",instance="instance1",job="kube-state-metrics",namespace="ns1",phase="Pending",pod="pod-ds-7cc77d965f-cgsdv",service="ksm"}' values: '1+0x20' - - series: 'kube_pod_owner{endpoint="https",instance="instance1",job="kube-state-metrics",cluster="kubernetes",namespace="ns1",owner_is_controller="false",owner_kind="",owner_name="ds-7cc77d965f",pod="pod-ds-7cc77d965f-cgsdv",service="ksm"}' + - series: 'kube_pod_owner{endpoint="https",instance="instance1",job="kube-state-metrics",namespace="ns1",owner_is_controller="false",owner_kind="",owner_name="ds-7cc77d965f",pod="pod-ds-7cc77d965f-cgsdv",service="ksm"}' values: '1+0x20' - - series: 'kube_pod_owner{endpoint="https",instance="instance1",job="kube-state-metrics",cluster="kubernetes",namespace="ns1",owner_is_controller="true",owner_kind="ReplicaSet",owner_name="ds-7cc77d965f",pod="pod-ds-7cc77d965f-cgsdv",service="ksm"}' + - series: 'kube_pod_owner{endpoint="https",instance="instance1",job="kube-state-metrics",namespace="ns1",owner_is_controller="true",owner_kind="ReplicaSet",owner_name="ds-7cc77d965f",pod="pod-ds-7cc77d965f-cgsdv",service="ksm"}' values: '1+0x20' alert_rule_test: - eval_time: 15m alertname: KubePodNotReady exp_alerts: - exp_labels: - cluster: kubernetes namespace: ns1 pod: pod-ds-7cc77d965f-cgsdv severity: warning @@ -693,120 +681,120 @@ tests: - interval: 1m input_series: - - series: 'container_cpu_usage_seconds_total{container="alertmanager",cpu="total",endpoint="https",id="/kubepods.slice/kubepods-burstable.slice/kubepods-burstable-pod3426a9c5_53d6_4736_9ca8_f575828e3e4b.slice/crio-f0d7fb2c909605aad16946ff065a42b25cdcdb812459e712ecdd6bce8a3ed6cb.scope",image="quay.io/prometheus/alertmanager:latest",instance="instance1",job="cadvisor",name="name1",cluster="kubernetes",namespace="monitoring",pod="alertmanager-main-0",service="kubelet"}' + - series: 'container_cpu_usage_seconds_total{container="alertmanager",cpu="total",endpoint="https",id="/kubepods.slice/kubepods-burstable.slice/kubepods-burstable-pod3426a9c5_53d6_4736_9ca8_f575828e3e4b.slice/crio-f0d7fb2c909605aad16946ff065a42b25cdcdb812459e712ecdd6bce8a3ed6cb.scope",image="quay.io/prometheus/alertmanager:latest",instance="instance1",job="cadvisor",name="name1",namespace="monitoring",pod="alertmanager-main-0",service="kubelet"}' values: '0+3x5' - - series: 'container_cpu_usage_seconds_total{container="alertmanager",cpu="total",endpoint="https",id="/kubepods.slice/kubepods-burstable.slice/kubepods-burstable-pod3426a9c5_53d6_4736_9ca8_f575828e3e4b.slice/crio-f0d7fb2c909605aad16946ff065a42b25cdcdb812459e712ecdd6bce8a3ed6cb.scope",image="quay.io/prometheus/alertmanager:latest",instance="instance1",job="cadvisor",name="name1",cluster="kubernetes",namespace="monitoring",pod="alertmanager-main-1",service="kubelet"}' + - series: 'container_cpu_usage_seconds_total{container="alertmanager",cpu="total",endpoint="https",id="/kubepods.slice/kubepods-burstable.slice/kubepods-burstable-pod3426a9c5_53d6_4736_9ca8_f575828e3e4b.slice/crio-f0d7fb2c909605aad16946ff065a42b25cdcdb812459e712ecdd6bce8a3ed6cb.scope",image="quay.io/prometheus/alertmanager:latest",instance="instance1",job="cadvisor",name="name1",namespace="monitoring",pod="alertmanager-main-1",service="kubelet"}' values: '0+3x5' # Duplicate timeseries from different instances. - - series: 'kube_pod_info{cluster="kubernetes",namespace="monitoring",node="node1",pod="alertmanager-main-0",job="kube-state-metrics",instance="instance1"}' + - series: 'kube_pod_info{namespace="monitoring",node="node1",pod="alertmanager-main-0",job="kube-state-metrics",instance="instance1"}' values: '1+0x5' - - series: 'kube_pod_info{cluster="kubernetes",namespace="monitoring",node="node1",pod="alertmanager-main-0",job="kube-state-metrics",instance="instance2"}' + - series: 'kube_pod_info{namespace="monitoring",node="node1",pod="alertmanager-main-0",job="kube-state-metrics",instance="instance2"}' values: '1+0x5' # Missing node label. - - series: 'kube_pod_info{cluster="kubernetes",namespace="monitoring",pod="alertmanager-main-1",job="kube-state-metrics",instance="instance1"}' + - series: 'kube_pod_info{namespace="monitoring",pod="alertmanager-main-1",job="kube-state-metrics",instance="instance1"}' values: '1+0x5' promql_expr_test: - eval_time: 5m expr: node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate exp_samples: - value: 5.0e-2 - labels: 'node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster="kubernetes",namespace="monitoring", pod="alertmanager-main-0", container="alertmanager",node="node1"}' + labels: 'node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{namespace="monitoring", pod="alertmanager-main-0", container="alertmanager", node="node1"}' - interval: 1m input_series: - - series: 'container_memory_working_set_bytes{container="alertmanager",endpoint="https",id="/kubepods.slice/kubepods-burstable.slice/kubepods-burstable-pod3426a9c5_53d6_4736_9ca8_f575828e3e4b.slice/crio-f0d7fb2c909605aad16946ff065a42b25cdcdb812459e712ecdd6bce8a3ed6cb.scope",image="quay.io/prometheus/alertmanager:latest",instance="instance1",job="cadvisor",name="name1",cluster="kubernetes",namespace="monitoring",pod="alertmanager-main-0",service="kubelet"}' + - series: 'container_memory_working_set_bytes{container="alertmanager",endpoint="https",id="/kubepods.slice/kubepods-burstable.slice/kubepods-burstable-pod3426a9c5_53d6_4736_9ca8_f575828e3e4b.slice/crio-f0d7fb2c909605aad16946ff065a42b25cdcdb812459e712ecdd6bce8a3ed6cb.scope",image="quay.io/prometheus/alertmanager:latest",instance="instance1",job="cadvisor",name="name1",namespace="monitoring",pod="alertmanager-main-0",service="kubelet"}' values: '1000+0x5' - - series: 'container_memory_working_set_bytes{container="alertmanager",endpoint="https",id="/kubepods.slice/kubepods-burstable.slice/kubepods-burstable-pod3426a9c5_53d6_4736_9ca8_f575828e3e4b.slice/crio-f0d7fb2c909605aad16946ff065a42b25cdcdb812459e712ecdd6bce8a3ed6cb.scope",image="quay.io/prometheus/alertmanager:latest",instance="instance1",job="cadvisor",name="name1",cluster="kubernetes",namespace="monitoring",pod="alertmanager-main-1",service="kubelet"}' + - series: 'container_memory_working_set_bytes{container="alertmanager",endpoint="https",id="/kubepods.slice/kubepods-burstable.slice/kubepods-burstable-pod3426a9c5_53d6_4736_9ca8_f575828e3e4b.slice/crio-f0d7fb2c909605aad16946ff065a42b25cdcdb812459e712ecdd6bce8a3ed6cb.scope",image="quay.io/prometheus/alertmanager:latest",instance="instance1",job="cadvisor",name="name1",namespace="monitoring",pod="alertmanager-main-1",service="kubelet"}' values: '1000+0x5' # Duplicate timeseries from different instances. - - series: 'kube_pod_info{cluster="kubernetes",namespace="monitoring",node="node1",pod="alertmanager-main-0",job="kube-state-metrics",instance="instance1"}' + - series: 'kube_pod_info{namespace="monitoring",node="node1",pod="alertmanager-main-0",job="kube-state-metrics",instance="instance1"}' values: '1+0x5' - - series: 'kube_pod_info{cluster="kubernetes",namespace="monitoring",node="node1",pod="alertmanager-main-0",job="kube-state-metrics",instance="instance2"}' + - series: 'kube_pod_info{namespace="monitoring",node="node1",pod="alertmanager-main-0",job="kube-state-metrics",instance="instance2"}' values: '1+0x5' # Missing node label. - - series: 'kube_pod_info{cluster="kubernetes",namespace="monitoring",pod="alertmanager-main-1",job="kube-state-metrics",instance="instance1"}' + - series: 'kube_pod_info{namespace="monitoring",pod="alertmanager-main-1",job="kube-state-metrics",instance="instance1"}' values: '1+0x5' promql_expr_test: - eval_time: 5m expr: node_namespace_pod_container:container_memory_working_set_bytes exp_samples: - value: 1.0e+3 - labels: 'node_namespace_pod_container:container_memory_working_set_bytes{container="alertmanager",endpoint="https",id="/kubepods.slice/kubepods-burstable.slice/kubepods-burstable-pod3426a9c5_53d6_4736_9ca8_f575828e3e4b.slice/crio-f0d7fb2c909605aad16946ff065a42b25cdcdb812459e712ecdd6bce8a3ed6cb.scope",image="quay.io/prometheus/alertmanager:latest",instance="instance1",job="cadvisor",name="name1",cluster="kubernetes",namespace="monitoring",node="node1",pod="alertmanager-main-0",service="kubelet"}' + labels: 'node_namespace_pod_container:container_memory_working_set_bytes{container="alertmanager",endpoint="https",id="/kubepods.slice/kubepods-burstable.slice/kubepods-burstable-pod3426a9c5_53d6_4736_9ca8_f575828e3e4b.slice/crio-f0d7fb2c909605aad16946ff065a42b25cdcdb812459e712ecdd6bce8a3ed6cb.scope",image="quay.io/prometheus/alertmanager:latest",instance="instance1",job="cadvisor",name="name1",namespace="monitoring",node="node1",pod="alertmanager-main-0",service="kubelet"}' - interval: 1m input_series: - - series: 'container_memory_rss{container="alertmanager",endpoint="https",id="/kubepods.slice/kubepods-burstable.slice/kubepods-burstable-pod3426a9c5_53d6_4736_9ca8_f575828e3e4b.slice/crio-f0d7fb2c909605aad16946ff065a42b25cdcdb812459e712ecdd6bce8a3ed6cb.scope",image="quay.io/prometheus/alertmanager:latest",instance="instance1",job="cadvisor",name="name1",cluster="kubernetes",namespace="monitoring",pod="alertmanager-main-0",service="kubelet"}' + - series: 'container_memory_rss{container="alertmanager",endpoint="https",id="/kubepods.slice/kubepods-burstable.slice/kubepods-burstable-pod3426a9c5_53d6_4736_9ca8_f575828e3e4b.slice/crio-f0d7fb2c909605aad16946ff065a42b25cdcdb812459e712ecdd6bce8a3ed6cb.scope",image="quay.io/prometheus/alertmanager:latest",instance="instance1",job="cadvisor",name="name1",namespace="monitoring",pod="alertmanager-main-0",service="kubelet"}' values: '1000+0x5' - - series: 'container_memory_rss{container="alertmanager",endpoint="https",id="/kubepods.slice/kubepods-burstable.slice/kubepods-burstable-pod3426a9c5_53d6_4736_9ca8_f575828e3e4b.slice/crio-f0d7fb2c909605aad16946ff065a42b25cdcdb812459e712ecdd6bce8a3ed6cb.scope",image="quay.io/prometheus/alertmanager:latest",instance="instance1",job="cadvisor",name="name1",cluster="kubernetes",namespace="monitoring",pod="alertmanager-main-1",service="kubelet"}' + - series: 'container_memory_rss{container="alertmanager",endpoint="https",id="/kubepods.slice/kubepods-burstable.slice/kubepods-burstable-pod3426a9c5_53d6_4736_9ca8_f575828e3e4b.slice/crio-f0d7fb2c909605aad16946ff065a42b25cdcdb812459e712ecdd6bce8a3ed6cb.scope",image="quay.io/prometheus/alertmanager:latest",instance="instance1",job="cadvisor",name="name1",namespace="monitoring",pod="alertmanager-main-1",service="kubelet"}' values: '1000+0x5' # Duplicate timeseries from different instances. - - series: 'kube_pod_info{cluster="kubernetes",namespace="monitoring",node="node1",pod="alertmanager-main-0",job="kube-state-metrics",instance="instance1"}' + - series: 'kube_pod_info{namespace="monitoring",node="node1",pod="alertmanager-main-0",job="kube-state-metrics",instance="instance1"}' values: '1+0x5' - - series: 'kube_pod_info{cluster="kubernetes",namespace="monitoring",node="node1",pod="alertmanager-main-0",job="kube-state-metrics",instance="instance2"}' + - series: 'kube_pod_info{namespace="monitoring",node="node1",pod="alertmanager-main-0",job="kube-state-metrics",instance="instance2"}' values: '1+0x5' # Missing node label. - - series: 'kube_pod_info{cluster="kubernetes",namespace="monitoring",pod="alertmanager-main-1",job="kube-state-metrics",instance="instance1"}' + - series: 'kube_pod_info{namespace="monitoring",pod="alertmanager-main-1",job="kube-state-metrics",instance="instance1"}' values: '1+0x5' promql_expr_test: - eval_time: 5m expr: node_namespace_pod_container:container_memory_rss exp_samples: - value: 1.0e+3 - labels: 'node_namespace_pod_container:container_memory_rss{container="alertmanager",endpoint="https",id="/kubepods.slice/kubepods-burstable.slice/kubepods-burstable-pod3426a9c5_53d6_4736_9ca8_f575828e3e4b.slice/crio-f0d7fb2c909605aad16946ff065a42b25cdcdb812459e712ecdd6bce8a3ed6cb.scope",image="quay.io/prometheus/alertmanager:latest",instance="instance1",job="cadvisor",name="name1",cluster="kubernetes",namespace="monitoring",node="node1",pod="alertmanager-main-0",service="kubelet"}' + labels: 'node_namespace_pod_container:container_memory_rss{container="alertmanager",endpoint="https",id="/kubepods.slice/kubepods-burstable.slice/kubepods-burstable-pod3426a9c5_53d6_4736_9ca8_f575828e3e4b.slice/crio-f0d7fb2c909605aad16946ff065a42b25cdcdb812459e712ecdd6bce8a3ed6cb.scope",image="quay.io/prometheus/alertmanager:latest",instance="instance1",job="cadvisor",name="name1",namespace="monitoring",node="node1",pod="alertmanager-main-0",service="kubelet"}' - interval: 1m input_series: - - series: 'container_memory_cache{container="alertmanager",endpoint="https",id="/kubepods.slice/kubepods-burstable.slice/kubepods-burstable-pod3426a9c5_53d6_4736_9ca8_f575828e3e4b.slice/crio-f0d7fb2c909605aad16946ff065a42b25cdcdb812459e712ecdd6bce8a3ed6cb.scope",image="quay.io/prometheus/alertmanager:latest",instance="instance1",job="cadvisor",name="name1",cluster="kubernetes",namespace="monitoring",pod="alertmanager-main-0",service="kubelet"}' + - series: 'container_memory_cache{container="alertmanager",endpoint="https",id="/kubepods.slice/kubepods-burstable.slice/kubepods-burstable-pod3426a9c5_53d6_4736_9ca8_f575828e3e4b.slice/crio-f0d7fb2c909605aad16946ff065a42b25cdcdb812459e712ecdd6bce8a3ed6cb.scope",image="quay.io/prometheus/alertmanager:latest",instance="instance1",job="cadvisor",name="name1",namespace="monitoring",pod="alertmanager-main-0",service="kubelet"}' values: '1000+0x5' - - series: 'container_memory_cache{container="alertmanager",endpoint="https",id="/kubepods.slice/kubepods-burstable.slice/kubepods-burstable-pod3426a9c5_53d6_4736_9ca8_f575828e3e4b.slice/crio-f0d7fb2c909605aad16946ff065a42b25cdcdb812459e712ecdd6bce8a3ed6cb.scope",image="quay.io/prometheus/alertmanager:latest",instance="instance1",job="cadvisor",name="name1",cluster="kubernetes",namespace="monitoring",pod="alertmanager-main-1",service="kubelet"}' + - series: 'container_memory_cache{container="alertmanager",endpoint="https",id="/kubepods.slice/kubepods-burstable.slice/kubepods-burstable-pod3426a9c5_53d6_4736_9ca8_f575828e3e4b.slice/crio-f0d7fb2c909605aad16946ff065a42b25cdcdb812459e712ecdd6bce8a3ed6cb.scope",image="quay.io/prometheus/alertmanager:latest",instance="instance1",job="cadvisor",name="name1",namespace="monitoring",pod="alertmanager-main-1",service="kubelet"}' values: '1000+0x5' # Duplicate timeseries from different instances. - - series: 'kube_pod_info{cluster="kubernetes",namespace="monitoring",node="node1",pod="alertmanager-main-0",job="kube-state-metrics",instance="instance1"}' + - series: 'kube_pod_info{namespace="monitoring",node="node1",pod="alertmanager-main-0",job="kube-state-metrics",instance="instance1"}' values: '1+0x5' - - series: 'kube_pod_info{cluster="kubernetes",namespace="monitoring",node="node1",pod="alertmanager-main-0",job="kube-state-metrics",instance="instance2"}' + - series: 'kube_pod_info{namespace="monitoring",node="node1",pod="alertmanager-main-0",job="kube-state-metrics",instance="instance2"}' values: '1+0x5' # Missing node label. - - series: 'kube_pod_info{cluster="kubernetes",namespace="monitoring",pod="alertmanager-main-1",job="kube-state-metrics",instance="instance1"}' + - series: 'kube_pod_info{namespace="monitoring",pod="alertmanager-main-1",job="kube-state-metrics",instance="instance1"}' values: '1+0x5' promql_expr_test: - eval_time: 5m expr: node_namespace_pod_container:container_memory_cache exp_samples: - value: 1.0e+3 - labels: 'node_namespace_pod_container:container_memory_cache{container="alertmanager",endpoint="https",id="/kubepods.slice/kubepods-burstable.slice/kubepods-burstable-pod3426a9c5_53d6_4736_9ca8_f575828e3e4b.slice/crio-f0d7fb2c909605aad16946ff065a42b25cdcdb812459e712ecdd6bce8a3ed6cb.scope",image="quay.io/prometheus/alertmanager:latest",instance="instance1",job="cadvisor",name="name1",cluster="kubernetes",namespace="monitoring",node="node1",pod="alertmanager-main-0",service="kubelet"}' + labels: 'node_namespace_pod_container:container_memory_cache{container="alertmanager",endpoint="https",id="/kubepods.slice/kubepods-burstable.slice/kubepods-burstable-pod3426a9c5_53d6_4736_9ca8_f575828e3e4b.slice/crio-f0d7fb2c909605aad16946ff065a42b25cdcdb812459e712ecdd6bce8a3ed6cb.scope",image="quay.io/prometheus/alertmanager:latest",instance="instance1",job="cadvisor",name="name1",namespace="monitoring",node="node1",pod="alertmanager-main-0",service="kubelet"}' - interval: 1m input_series: - - series: 'container_memory_swap{container="alertmanager",endpoint="https",id="/kubepods.slice/kubepods-burstable.slice/kubepods-burstable-pod3426a9c5_53d6_4736_9ca8_f575828e3e4b.slice/crio-f0d7fb2c909605aad16946ff065a42b25cdcdb812459e712ecdd6bce8a3ed6cb.scope",image="quay.io/prometheus/alertmanager:latest",instance="instance1",job="cadvisor",name="name1",cluster="kubernetes",namespace="monitoring",pod="alertmanager-main-0",service="kubelet"}' + - series: 'container_memory_swap{container="alertmanager",endpoint="https",id="/kubepods.slice/kubepods-burstable.slice/kubepods-burstable-pod3426a9c5_53d6_4736_9ca8_f575828e3e4b.slice/crio-f0d7fb2c909605aad16946ff065a42b25cdcdb812459e712ecdd6bce8a3ed6cb.scope",image="quay.io/prometheus/alertmanager:latest",instance="instance1",job="cadvisor",name="name1",namespace="monitoring",pod="alertmanager-main-0",service="kubelet"}' values: '1000+0x5' - - series: 'container_memory_swap{container="alertmanager",endpoint="https",id="/kubepods.slice/kubepods-burstable.slice/kubepods-burstable-pod3426a9c5_53d6_4736_9ca8_f575828e3e4b.slice/crio-f0d7fb2c909605aad16946ff065a42b25cdcdb812459e712ecdd6bce8a3ed6cb.scope",image="quay.io/prometheus/alertmanager:latest",instance="instance1",job="cadvisor",name="name1",cluster="kubernetes",namespace="monitoring",pod="alertmanager-main-1",service="kubelet"}' + - series: 'container_memory_swap{container="alertmanager",endpoint="https",id="/kubepods.slice/kubepods-burstable.slice/kubepods-burstable-pod3426a9c5_53d6_4736_9ca8_f575828e3e4b.slice/crio-f0d7fb2c909605aad16946ff065a42b25cdcdb812459e712ecdd6bce8a3ed6cb.scope",image="quay.io/prometheus/alertmanager:latest",instance="instance1",job="cadvisor",name="name1",namespace="monitoring",pod="alertmanager-main-1",service="kubelet"}' values: '1000+0x5' # Duplicate timeseries from different instances. - - series: 'kube_pod_info{cluster="kubernetes",namespace="monitoring",node="node1",pod="alertmanager-main-0",job="kube-state-metrics",instance="instance1"}' + - series: 'kube_pod_info{namespace="monitoring",node="node1",pod="alertmanager-main-0",job="kube-state-metrics",instance="instance1"}' values: '1+0x5' - - series: 'kube_pod_info{cluster="kubernetes",namespace="monitoring",node="node1",pod="alertmanager-main-0",job="kube-state-metrics",instance="instance2"}' + - series: 'kube_pod_info{namespace="monitoring",node="node1",pod="alertmanager-main-0",job="kube-state-metrics",instance="instance2"}' values: '1+0x5' # Missing node label. - - series: 'kube_pod_info{cluster="kubernetes",namespace="monitoring",pod="alertmanager-main-1",job="kube-state-metrics",instance="instance1"}' + - series: 'kube_pod_info{namespace="monitoring",pod="alertmanager-main-1",job="kube-state-metrics",instance="instance1"}' values: '1+0x5' promql_expr_test: - eval_time: 5m expr: node_namespace_pod_container:container_memory_swap exp_samples: - value: 1.0e+3 - labels: 'node_namespace_pod_container:container_memory_swap{container="alertmanager",endpoint="https",id="/kubepods.slice/kubepods-burstable.slice/kubepods-burstable-pod3426a9c5_53d6_4736_9ca8_f575828e3e4b.slice/crio-f0d7fb2c909605aad16946ff065a42b25cdcdb812459e712ecdd6bce8a3ed6cb.scope",image="quay.io/prometheus/alertmanager:latest",instance="instance1",job="cadvisor",name="name1",cluster="kubernetes",namespace="monitoring",node="node1",pod="alertmanager-main-0",service="kubelet"}' + labels: 'node_namespace_pod_container:container_memory_swap{container="alertmanager",endpoint="https",id="/kubepods.slice/kubepods-burstable.slice/kubepods-burstable-pod3426a9c5_53d6_4736_9ca8_f575828e3e4b.slice/crio-f0d7fb2c909605aad16946ff065a42b25cdcdb812459e712ecdd6bce8a3ed6cb.scope",image="quay.io/prometheus/alertmanager:latest",instance="instance1",job="cadvisor",name="name1",namespace="monitoring",node="node1",pod="alertmanager-main-0",service="kubelet"}' - interval: 1m # Current unequal desired and not progressing. input_series: - - series: 'kube_daemonset_status_current_number_scheduled{job="kube-state-metrics",cluster="kubernetes",namespace="monitoring",daemonset="node-exporter"}' + - series: 'kube_daemonset_status_current_number_scheduled{job="kube-state-metrics",namespace="monitoring",daemonset="node-exporter"}' values: '4 4 4 4 3 4 4 4 3 4 4 4 3 4 4 4 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 4 4' - - series: 'kube_daemonset_status_desired_number_scheduled{job="kube-state-metrics",cluster="kubernetes",namespace="monitoring",daemonset="node-exporter"}' + - series: 'kube_daemonset_status_desired_number_scheduled{job="kube-state-metrics",namespace="monitoring",daemonset="node-exporter"}' values: '4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4' - - series: 'kube_daemonset_status_number_misscheduled{job="kube-state-metrics",cluster="kubernetes",namespace="monitoring",daemonset="node-exporter"}' + - series: 'kube_daemonset_status_number_misscheduled{job="kube-state-metrics",namespace="monitoring",daemonset="node-exporter"}' values: '0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0' - - series: 'kube_daemonset_status_updated_number_scheduled{job="kube-state-metrics",cluster="kubernetes",namespace="monitoring",daemonset="node-exporter"}' + - series: 'kube_daemonset_status_updated_number_scheduled{job="kube-state-metrics",namespace="monitoring",daemonset="node-exporter"}' values: '4 4 0 0 0 1 1 1 1 2 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 4 4' - - series: 'kube_daemonset_status_number_available{job="kube-state-metrics",cluster="kubernetes",namespace="monitoring",daemonset="node-exporter"}' + - series: 'kube_daemonset_status_number_available{job="kube-state-metrics",namespace="monitoring",daemonset="node-exporter"}' values: '4 4 4 3 3 3 4 3 3 3 4 3 3 3 4 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 4' alert_rule_test: - eval_time: 32m @@ -817,7 +805,6 @@ tests: - exp_labels: job: kube-state-metrics namespace: monitoring - cluster: kubernetes daemonset: node-exporter severity: warning exp_annotations: @@ -826,43 +813,18 @@ tests: runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubedaemonsetrolloutstuck - eval_time: 34m alertname: KubeDaemonSetRolloutStuck -# KubeDeploymentRolloutStuck -- interval: 1m - input_series: - - series: 'kube_deployment_status_condition{job="kube-state-metrics",cluster="kubernetes",namespace="monitoring",deployment="stuck", condition="Progressing", status="false"}' - values: '1+0x17 0+0x5' - alert_rule_test: - - eval_time: 14m - alertname: KubeDeploymentRolloutStuck - - eval_time: 16m - alertname: KubeDeploymentRolloutStuck - exp_alerts: - - exp_labels: - job: kube-state-metrics - namespace: monitoring - cluster: kubernetes - deployment: stuck - severity: warning - condition: Progressing - status: "false" - exp_annotations: - summary: 'Deployment rollout is not progressing.' - description: 'Rollout of deployment monitoring/stuck is not progressing for longer than 15 minutes.' - runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubedeploymentrolloutstuck - - eval_time: 18m - alertname: KubeDeploymentRolloutStuck - interval: 1m # Misscheduled is non zero. input_series: - - series: 'kube_daemonset_status_current_number_scheduled{job="kube-state-metrics",cluster="kubernetes",namespace="monitoring",daemonset="node-exporter"}' + - series: 'kube_daemonset_status_current_number_scheduled{job="kube-state-metrics",namespace="monitoring",daemonset="node-exporter"}' values: '4 4 4 4 3 4 4 4 3 4 4 4 3 4 4 4 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 4 4' - - series: 'kube_daemonset_status_desired_number_scheduled{job="kube-state-metrics",cluster="kubernetes",namespace="monitoring",daemonset="node-exporter"}' + - series: 'kube_daemonset_status_desired_number_scheduled{job="kube-state-metrics",namespace="monitoring",daemonset="node-exporter"}' values: '4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4' - - series: 'kube_daemonset_status_number_misscheduled{job="kube-state-metrics",cluster="kubernetes",namespace="monitoring",daemonset="node-exporter"}' + - series: 'kube_daemonset_status_number_misscheduled{job="kube-state-metrics",namespace="monitoring",daemonset="node-exporter"}' values: '0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0' - - series: 'kube_daemonset_status_updated_number_scheduled{job="kube-state-metrics",cluster="kubernetes",namespace="monitoring",daemonset="node-exporter"}' + - series: 'kube_daemonset_status_updated_number_scheduled{job="kube-state-metrics",namespace="monitoring",daemonset="node-exporter"}' values: '4 4 0 0 0 1 1 1 1 2 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 4 4' - - series: 'kube_daemonset_status_number_available{job="kube-state-metrics",cluster="kubernetes",namespace="monitoring",daemonset="node-exporter"}' + - series: 'kube_daemonset_status_number_available{job="kube-state-metrics",namespace="monitoring",daemonset="node-exporter"}' values: '4 4 4 3 3 3 4 3 3 3 4 3 3 3 4 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 4' alert_rule_test: - eval_time: 32m @@ -873,7 +835,6 @@ tests: - exp_labels: job: kube-state-metrics namespace: monitoring - cluster: kubernetes daemonset: node-exporter severity: warning exp_annotations: @@ -885,15 +846,15 @@ tests: - interval: 1m # Updated number unequal desired. input_series: - - series: 'kube_daemonset_status_current_number_scheduled{job="kube-state-metrics",cluster="kubernetes",namespace="monitoring",daemonset="node-exporter"}' + - series: 'kube_daemonset_status_current_number_scheduled{job="kube-state-metrics",namespace="monitoring",daemonset="node-exporter"}' values: '4 4 4 4 3 4 4 4 3 4 4 4 3 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4' - - series: 'kube_daemonset_status_desired_number_scheduled{job="kube-state-metrics",cluster="kubernetes",namespace="monitoring",daemonset="node-exporter"}' + - series: 'kube_daemonset_status_desired_number_scheduled{job="kube-state-metrics",namespace="monitoring",daemonset="node-exporter"}' values: '4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4' - - series: 'kube_daemonset_status_number_misscheduled{job="kube-state-metrics",cluster="kubernetes",namespace="monitoring",daemonset="node-exporter"}' + - series: 'kube_daemonset_status_number_misscheduled{job="kube-state-metrics",namespace="monitoring",daemonset="node-exporter"}' values: '0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0' - - series: 'kube_daemonset_status_updated_number_scheduled{job="kube-state-metrics",cluster="kubernetes",namespace="monitoring",daemonset="node-exporter"}' + - series: 'kube_daemonset_status_updated_number_scheduled{job="kube-state-metrics",namespace="monitoring",daemonset="node-exporter"}' values: '4 4 0 0 0 1 1 1 1 2 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 4 4' - - series: 'kube_daemonset_status_number_available{job="kube-state-metrics",cluster="kubernetes",namespace="monitoring",daemonset="node-exporter"}' + - series: 'kube_daemonset_status_number_available{job="kube-state-metrics",namespace="monitoring",daemonset="node-exporter"}' values: '4 4 4 3 3 3 4 3 3 3 4 3 3 3 4 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 4' alert_rule_test: - eval_time: 32m @@ -904,7 +865,6 @@ tests: - exp_labels: job: kube-state-metrics namespace: monitoring - cluster: kubernetes daemonset: node-exporter severity: warning exp_annotations: @@ -916,15 +876,15 @@ tests: - interval: 1m # Number available unequal desired. input_series: - - series: 'kube_daemonset_status_current_number_scheduled{job="kube-state-metrics",cluster="kubernetes",namespace="monitoring",daemonset="node-exporter"}' + - series: 'kube_daemonset_status_current_number_scheduled{job="kube-state-metrics",namespace="monitoring",daemonset="node-exporter"}' values: '4 4 4 4 3 4 4 4 3 4 4 4 3 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4' - - series: 'kube_daemonset_status_desired_number_scheduled{job="kube-state-metrics",cluster="kubernetes",namespace="monitoring",daemonset="node-exporter"}' + - series: 'kube_daemonset_status_desired_number_scheduled{job="kube-state-metrics",namespace="monitoring",daemonset="node-exporter"}' values: '4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4' - - series: 'kube_daemonset_status_number_misscheduled{job="kube-state-metrics",cluster="kubernetes",namespace="monitoring",daemonset="node-exporter"}' + - series: 'kube_daemonset_status_number_misscheduled{job="kube-state-metrics",namespace="monitoring",daemonset="node-exporter"}' values: '0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0' - - series: 'kube_daemonset_status_updated_number_scheduled{job="kube-state-metrics",cluster="kubernetes",namespace="monitoring",daemonset="node-exporter"}' + - series: 'kube_daemonset_status_updated_number_scheduled{job="kube-state-metrics",namespace="monitoring",daemonset="node-exporter"}' values: '4 4 0 0 0 1 1 1 1 2 2 2 2 3 3 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4' - - series: 'kube_daemonset_status_number_available{job="kube-state-metrics",cluster="kubernetes",namespace="monitoring",daemonset="node-exporter"}' + - series: 'kube_daemonset_status_number_available{job="kube-state-metrics",namespace="monitoring",daemonset="node-exporter"}' values: '4 4 4 3 3 3 4 3 3 3 4 3 3 3 4 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 4' alert_rule_test: - eval_time: 34m @@ -935,7 +895,6 @@ tests: - exp_labels: job: kube-state-metrics namespace: monitoring - cluster: kubernetes daemonset: node-exporter severity: warning exp_annotations: @@ -947,7 +906,7 @@ tests: - interval: 1m input_series: - - series: 'kubelet_certificate_manager_client_ttl_seconds{job="kubelet",cluster="kubernetes",namespace="monitoring",node="minikube"}' + - series: 'kubelet_certificate_manager_client_ttl_seconds{job="kubelet",namespace="monitoring",node="minikube"}' values: '86400-60x1' alert_rule_test: - eval_time: 0m @@ -956,7 +915,6 @@ tests: - exp_labels: job: kubelet namespace: monitoring - cluster: kubernetes node: minikube severity: warning exp_annotations: @@ -969,7 +927,6 @@ tests: - exp_labels: job: kubelet namespace: monitoring - cluster: kubernetes node: minikube severity: warning exp_annotations: @@ -979,7 +936,6 @@ tests: - exp_labels: job: kubelet namespace: monitoring - cluster: kubernetes node: minikube severity: critical exp_annotations: @@ -989,7 +945,7 @@ tests: - interval: 1m input_series: - - series: 'kubelet_certificate_manager_server_ttl_seconds{job="kubelet",cluster="kubernetes",namespace="monitoring",node="minikube"}' + - series: 'kubelet_certificate_manager_server_ttl_seconds{job="kubelet",namespace="monitoring",node="minikube"}' values: '86400-60x1' alert_rule_test: - eval_time: 0m @@ -998,7 +954,6 @@ tests: - exp_labels: job: kubelet namespace: monitoring - cluster: kubernetes node: minikube severity: warning exp_annotations: @@ -1011,7 +966,6 @@ tests: - exp_labels: job: kubelet namespace: monitoring - cluster: kubernetes node: minikube severity: warning exp_annotations: @@ -1021,7 +975,6 @@ tests: - exp_labels: job: kubelet namespace: monitoring - cluster: kubernetes node: minikube severity: critical exp_annotations: @@ -1031,7 +984,7 @@ tests: - interval: 1m input_series: - - series: 'kubelet_certificate_manager_client_expiration_renew_errors{job="kubelet",cluster="kubernetes",namespace="monitoring",node="minikube"}' + - series: 'kubelet_certificate_manager_client_expiration_renew_errors{job="kubelet",namespace="monitoring",node="minikube"}' values: '0+1x20' alert_rule_test: - eval_time: 16m @@ -1040,7 +993,6 @@ tests: - exp_labels: job: kubelet namespace: monitoring - cluster: kubernetes node: minikube severity: warning exp_annotations: @@ -1051,7 +1003,7 @@ tests: - interval: 1m input_series: - - series: 'kubelet_server_expiration_renew_errors{job="kubelet",cluster="kubernetes",namespace="monitoring",node="minikube"}' + - series: 'kubelet_server_expiration_renew_errors{job="kubelet",namespace="monitoring",node="minikube"}' values: '0+1x20' alert_rule_test: - eval_time: 16m @@ -1060,7 +1012,6 @@ tests: - exp_labels: job: kubelet namespace: monitoring - cluster: kubernetes node: minikube severity: warning exp_annotations: @@ -1070,14 +1021,13 @@ tests: - interval: 1m input_series: - - series: 'kube_job_failed{instance="instance1",condition="true",job="kube-state-metrics",job_name="job-1597623120",cluster="kubernetes",namespace="ns1"}' + - series: 'kube_job_failed{instance="instance1",condition="true",job="kube-state-metrics",job_name="job-1597623120",namespace="ns1"}' values: '1+0x20' alert_rule_test: - eval_time: 15m alertname: KubeJobFailed exp_alerts: - exp_labels: - cluster: "kubernetes" namespace: ns1 job_name: job-1597623120 severity: warning @@ -1091,9 +1041,9 @@ tests: - interval: 1m input_series: - - series: 'kube_job_status_start_time{cluster="kubernetes",namespace="ns1", job="kube-state-metrics", instance="instance1", job_name="job1"}' + - series: 'kube_job_status_start_time{namespace="ns1", job="kube-state-metrics", instance="instance1", job_name="job1"}' values: '0+0x200 _x500 0+0x40' - - series: 'kube_job_status_active{cluster="kubernetes",namespace="ns1", job="kube-state-metrics", instance="instance1", job_name="job1"}' + - series: 'kube_job_status_active{namespace="ns1", job="kube-state-metrics", instance="instance1", job_name="job1"}' values: '1x200 _x500 1x40' alert_rule_test: - eval_time: 6h @@ -1102,7 +1052,6 @@ tests: alertname: KubeJobNotCompleted exp_alerts: - exp_labels: - cluster: "kubernetes" namespace: ns1 job_name: job1 severity: warning @@ -1113,9 +1062,9 @@ tests: - interval: 1m input_series: - - series: 'kube_job_status_start_time{cluster="kubernetes",namespace="ns1", job="kube-state-metrics", instance="instance1", job_name="job1"}' + - series: 'kube_job_status_start_time{namespace="ns1", job="kube-state-metrics", instance="instance1", job_name="job1"}' values: '0+0x740' - - series: 'kube_job_status_active{cluster="kubernetes",namespace="ns1", job="kube-state-metrics", instance="instance1", job_name="job1"}' + - series: 'kube_job_status_active{namespace="ns1", job="kube-state-metrics", instance="instance1", job_name="job1"}' values: '1+0x710 0x30' alert_rule_test: - eval_time: 6h @@ -1145,7 +1094,7 @@ tests: - interval: 1m input_series: - - series: 'kube_pod_container_status_waiting_reason{reason="CrashLoopBackOff",cluster="kubernetes",namespace="test",pod="static-web",container="script",job="kube-state-metrics"}' + - series: 'kube_pod_container_status_waiting_reason{reason="CrashLoopBackOff",namespace="test",pod="static-web",container="script",job="kube-state-metrics"}' values: '1 1 stale _x3 1 1 stale _x2 1+0x4 stale' alert_rule_test: - eval_time: 10m # alert hasn't fired @@ -1157,7 +1106,6 @@ tests: severity: "warning" container: "script" job: "kube-state-metrics" - cluster: "kubernetes" namespace: "test" pod: "static-web" reason: "CrashLoopBackOff" @@ -1166,13 +1114,12 @@ tests: runbook_url: "https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubepodcrashlooping" summary: "Pod is crash looping." - eval_time: 20m - alertname: KubePodCrashLooping # alert fired for a period of 5 minutes after resolution because the alert looks back at the last 5 minutes of data and the range vector doesn't take stale samples into account + alertname: KubePodCrashLooping # alert fired for a period of 5 minutes after resolution because the alert looks back at the last 5 minutes of data and the range vector doesn't take stale samples into account exp_alerts: - exp_labels: severity: "warning" container: "script" job: "kube-state-metrics" - cluster: "kubernetes" namespace: "test" pod: "static-web" reason: "CrashLoopBackOff" @@ -1186,15 +1133,15 @@ tests: # When ResourceQuota has both cpu and requests.cpu, min value of those will be taken into account for quota calculation. - interval: 1m input_series: - - series: 'kube_resourcequota{cluster="kubernetes",namespace="test", resource="cpu", type="hard", job="kube-state-metrics"}' + - series: 'kube_resourcequota{namespace="test", resource="cpu", type="hard", job="kube-state-metrics"}' values: '1000x10' - - series: 'kube_resourcequota{cluster="kubernetes",namespace="test", resource="requests.cpu", type="hard", job="kube-state-metrics"}' + - series: 'kube_resourcequota{namespace="test", resource="requests.cpu", type="hard", job="kube-state-metrics"}' values: '100x10' - - series: 'kube_resourcequota{cluster="kubernetes",namespace="test1", resource="requests.cpu", type="hard", job="kube-state-metrics"}' + - series: 'kube_resourcequota{namespace="test1", resource="requests.cpu", type="hard", job="kube-state-metrics"}' values: '50x10' - - series: 'kube_node_status_allocatable{cluster="kubernetes",namespace="monitoring",node="n1", resource="cpu", job="kube-state-metrics"}' + - series: 'kube_node_status_allocatable{namespace="monitoring", node="n1", resource="cpu", job="kube-state-metrics"}' values: '100x10' - - series: 'kube_node_status_allocatable{cluster="kubernetes",namespace="monitoring",node="n2", resource="cpu", job="kube-state-metrics"}' + - series: 'kube_node_status_allocatable{namespace="monitoring", node="n2", resource="cpu", job="kube-state-metrics"}' values: '100x10' alert_rule_test: - eval_time: 4m @@ -1203,15 +1150,15 @@ tests: alertname: KubeCPUQuotaOvercommit - interval: 1m input_series: - - series: 'kube_resourcequota{cluster="kubernetes",namespace="test", resource="cpu", type="hard", job="kube-state-metrics"}' + - series: 'kube_resourcequota{namespace="test", resource="cpu", type="hard", job="kube-state-metrics"}' values: '1000x10' - - series: 'kube_resourcequota{cluster="kubernetes",namespace="test", resource="requests.cpu", type="hard", job="kube-state-metrics"}' + - series: 'kube_resourcequota{namespace="test", resource="requests.cpu", type="hard", job="kube-state-metrics"}' values: '200x10' - - series: 'kube_resourcequota{cluster="kubernetes",namespace="test1", resource="requests.cpu", type="hard", job="kube-state-metrics"}' + - series: 'kube_resourcequota{namespace="test1", resource="requests.cpu", type="hard", job="kube-state-metrics"}' values: '200x10' - - series: 'kube_node_status_allocatable{cluster="kubernetes",namespace="monitoring",node="n1", resource="cpu", job="kube-state-metrics"}' + - series: 'kube_node_status_allocatable{namespace="monitoring", node="n1", resource="cpu", job="kube-state-metrics"}' values: '100x10' - - series: 'kube_node_status_allocatable{cluster="kubernetes",namespace="monitoring",node="n2", resource="cpu", job="kube-state-metrics"}' + - series: 'kube_node_status_allocatable{namespace="monitoring", node="n2", resource="cpu", job="kube-state-metrics"}' values: '100x10' alert_rule_test: - eval_time: 4m @@ -1229,15 +1176,15 @@ tests: # When ResourceQuota has both memory and requests.memory, min value of those will be taken into account for quota calculation. - interval: 1m input_series: - - series: 'kube_resourcequota{cluster="kubernetes",namespace="test", resource="memory", type="hard", job="kube-state-metrics"}' + - series: 'kube_resourcequota{namespace="test", resource="memory", type="hard", job="kube-state-metrics"}' values: '1000x10' - - series: 'kube_resourcequota{cluster="kubernetes",namespace="test", resource="requests.memory", type="hard", job="kube-state-metrics"}' + - series: 'kube_resourcequota{namespace="test", resource="requests.memory", type="hard", job="kube-state-metrics"}' values: '100x10' - - series: 'kube_resourcequota{cluster="kubernetes",namespace="test1", resource="requests.memory", type="hard", job="kube-state-metrics"}' + - series: 'kube_resourcequota{namespace="test1", resource="requests.memory", type="hard", job="kube-state-metrics"}' values: '50x10' - - series: 'kube_node_status_allocatable{cluster="kubernetes",namespace="monitoring",node="n1", resource="memory", job="kube-state-metrics"}' + - series: 'kube_node_status_allocatable{namespace="monitoring", node="n1", resource="memory", job="kube-state-metrics"}' values: '100x10' - - series: 'kube_node_status_allocatable{cluster="kubernetes",namespace="monitoring",node="n2", resource="memory", job="kube-state-metrics"}' + - series: 'kube_node_status_allocatable{namespace="monitoring", node="n2", resource="memory", job="kube-state-metrics"}' values: '100x10' alert_rule_test: - eval_time: 4m @@ -1246,15 +1193,15 @@ tests: alertname: KubeMemoryQuotaOvercommit - interval: 1m input_series: - - series: 'kube_resourcequota{cluster="kubernetes",namespace="test", resource="memory", type="hard", job="kube-state-metrics"}' + - series: 'kube_resourcequota{namespace="test", resource="memory", type="hard", job="kube-state-metrics"}' values: '1000x10' - - series: 'kube_resourcequota{cluster="kubernetes",namespace="test", resource="requests.memory", type="hard", job="kube-state-metrics"}' + - series: 'kube_resourcequota{namespace="test", resource="requests.memory", type="hard", job="kube-state-metrics"}' values: '500x10' - - series: 'kube_resourcequota{cluster="kubernetes",namespace="test1", resource="requests.memory", type="hard", job="kube-state-metrics"}' + - series: 'kube_resourcequota{namespace="test1", resource="requests.memory", type="hard", job="kube-state-metrics"}' values: '500x10' - - series: 'kube_node_status_allocatable{cluster="kubernetes",namespace="monitoring",node="n1", resource="memory", job="kube-state-metrics"}' + - series: 'kube_node_status_allocatable{namespace="monitoring", node="n1", resource="memory", job="kube-state-metrics"}' values: '10x10' - - series: 'kube_node_status_allocatable{cluster="kubernetes",namespace="monitoring",node="n2", resource="memory", job="kube-state-metrics"}' + - series: 'kube_node_status_allocatable{namespace="monitoring", node="n2", resource="memory", job="kube-state-metrics"}' values: '10x10' alert_rule_test: - eval_time: 4m