From 67a87ba2b90589fb52e9273a945ce684ac5c112c Mon Sep 17 00:00:00 2001 From: Pranshu Srivastava Date: Tue, 24 Sep 2024 03:01:10 +0530 Subject: [PATCH] Prefer kube-scheduler's resource metrics to kube-state-metrics' Since they are more accurate. --- alerts/resource_alerts.libsonnet | 8 ++--- dashboards/resources/cluster.libsonnet | 12 ++++---- dashboards/resources/multi-cluster.libsonnet | 24 +++++++-------- dashboards/resources/namespace.libsonnet | 24 +++++++-------- dashboards/resources/node.libsonnet | 16 +++++----- dashboards/resources/pod.libsonnet | 14 ++++----- .../resources/workload-namespace.libsonnet | 2 +- dashboards/resources/workload.libsonnet | 2 +- rules/apps.libsonnet | 30 +++++++++---------- rules/windows.libsonnet | 8 ++--- tests.yaml | 16 +++++----- 11 files changed, 78 insertions(+), 78 deletions(-) diff --git a/alerts/resource_alerts.libsonnet b/alerts/resource_alerts.libsonnet index 04a8e3a00..c6422935a 100644 --- a/alerts/resource_alerts.libsonnet +++ b/alerts/resource_alerts.libsonnet @@ -34,7 +34,7 @@ } + if $._config.showMultiCluster then { expr: ||| - sum(namespace_cpu:kube_pod_container_resource_requests:sum{%(ignoringOverprovisionedWorkloadSelector)s}) by (%(clusterLabel)s) - (sum(kube_node_status_allocatable{%(kubeStateMetricsSelector)s,resource="cpu"}) by (%(clusterLabel)s) - max(kube_node_status_allocatable{%(kubeStateMetricsSelector)s,resource="cpu"}) by (%(clusterLabel)s)) > 0 + sum(namespace_cpu:kube_pod_resource_request_or_kube_pod_container_resource_requests:sum{%(ignoringOverprovisionedWorkloadSelector)s}) by (%(clusterLabel)s) - (sum(kube_node_status_allocatable{%(kubeStateMetricsSelector)s,resource="cpu"}) by (%(clusterLabel)s) - max(kube_node_status_allocatable{%(kubeStateMetricsSelector)s,resource="cpu"}) by (%(clusterLabel)s)) > 0 and (sum(kube_node_status_allocatable{%(kubeStateMetricsSelector)s,resource="cpu"}) by (%(clusterLabel)s) - max(kube_node_status_allocatable{%(kubeStateMetricsSelector)s,resource="cpu"}) by (%(clusterLabel)s)) > 0 ||| % $._config, @@ -43,7 +43,7 @@ }, } else { expr: ||| - sum(namespace_cpu:kube_pod_container_resource_requests:sum{%(ignoringOverprovisionedWorkloadSelector)s}) - (sum(kube_node_status_allocatable{resource="cpu", %(kubeStateMetricsSelector)s}) - max(kube_node_status_allocatable{resource="cpu", %(kubeStateMetricsSelector)s})) > 0 + sum(namespace_cpu:kube_pod_resource_request_or_kube_pod_container_resource_requests:sum{%(ignoringOverprovisionedWorkloadSelector)s}) - (sum(kube_node_status_allocatable{resource="cpu", %(kubeStateMetricsSelector)s}) - max(kube_node_status_allocatable{resource="cpu", %(kubeStateMetricsSelector)s})) > 0 and (sum(kube_node_status_allocatable{resource="cpu", %(kubeStateMetricsSelector)s}) - max(kube_node_status_allocatable{resource="cpu", %(kubeStateMetricsSelector)s})) > 0 ||| % $._config, @@ -63,7 +63,7 @@ } + if $._config.showMultiCluster then { expr: ||| - sum(namespace_memory:kube_pod_container_resource_requests:sum{%(ignoringOverprovisionedWorkloadSelector)s}) by (%(clusterLabel)s) - (sum(kube_node_status_allocatable{resource="memory", %(kubeStateMetricsSelector)s}) by (%(clusterLabel)s) - max(kube_node_status_allocatable{resource="memory", %(kubeStateMetricsSelector)s}) by (%(clusterLabel)s)) > 0 + sum(namespace_memory:kube_pod_resource_request_or_kube_pod_container_resource_requests:sum{%(ignoringOverprovisionedWorkloadSelector)s}) by (%(clusterLabel)s) - (sum(kube_node_status_allocatable{resource="memory", %(kubeStateMetricsSelector)s}) by (%(clusterLabel)s) - max(kube_node_status_allocatable{resource="memory", %(kubeStateMetricsSelector)s}) by (%(clusterLabel)s)) > 0 and (sum(kube_node_status_allocatable{resource="memory", %(kubeStateMetricsSelector)s}) by (%(clusterLabel)s) - max(kube_node_status_allocatable{resource="memory", %(kubeStateMetricsSelector)s}) by (%(clusterLabel)s)) > 0 ||| % $._config, @@ -73,7 +73,7 @@ } else { expr: ||| - sum(namespace_memory:kube_pod_container_resource_requests:sum{%(ignoringOverprovisionedWorkloadSelector)s}) - (sum(kube_node_status_allocatable{resource="memory", %(kubeStateMetricsSelector)s}) - max(kube_node_status_allocatable{resource="memory", %(kubeStateMetricsSelector)s})) > 0 + sum(namespace_memory:kube_pod_resource_request_or_kube_pod_container_resource_requests:sum{%(ignoringOverprovisionedWorkloadSelector)s}) - (sum(kube_node_status_allocatable{resource="memory", %(kubeStateMetricsSelector)s}) - max(kube_node_status_allocatable{resource="memory", %(kubeStateMetricsSelector)s})) > 0 and (sum(kube_node_status_allocatable{resource="memory", %(kubeStateMetricsSelector)s}) - max(kube_node_status_allocatable{resource="memory", %(kubeStateMetricsSelector)s})) > 0 ||| % $._config, diff --git a/dashboards/resources/cluster.libsonnet b/dashboards/resources/cluster.libsonnet index 9f83c905e..de337128b 100644 --- a/dashboards/resources/cluster.libsonnet +++ b/dashboards/resources/cluster.libsonnet @@ -88,7 +88,7 @@ local var = g.dashboard.variable; statPanel( 'CPU Requests Commitment', 'percentunit', - 'sum(namespace_cpu:kube_pod_container_resource_requests:sum{%(clusterLabel)s="$cluster"}) / sum(kube_node_status_allocatable{%(kubeStateMetricsSelector)s,resource="cpu",%(clusterLabel)s="$cluster"})' % $._config + 'sum(namespace_cpu:kube_pod_resource_request_or_kube_pod_container_resource_requests:sum{%(clusterLabel)s="$cluster"}) / sum(kube_node_status_allocatable{%(kubeStateMetricsSelector)s,resource="cpu",%(clusterLabel)s="$cluster"})' % $._config ) + stat.gridPos.withW(4) + stat.gridPos.withH(3), @@ -96,7 +96,7 @@ local var = g.dashboard.variable; statPanel( 'CPU Limits Commitment', 'percentunit', - 'sum(namespace_cpu:kube_pod_container_resource_limits:sum{%(clusterLabel)s="$cluster"}) / sum(kube_node_status_allocatable{%(kubeStateMetricsSelector)s,resource="cpu",%(clusterLabel)s="$cluster"})' % $._config + 'sum(namespace_cpu:kube_pod_resource_limit_or_kube_pod_container_resource_limits:sum{%(clusterLabel)s="$cluster"}) / sum(kube_node_status_allocatable{%(kubeStateMetricsSelector)s,resource="cpu",%(clusterLabel)s="$cluster"})' % $._config ) + stat.gridPos.withW(4) + stat.gridPos.withH(3), @@ -148,19 +148,19 @@ local var = g.dashboard.variable; + prometheus.withInstant(true) + prometheus.withFormat('table'), - prometheus.new('${datasource}', 'sum(namespace_cpu:kube_pod_container_resource_requests:sum{%(clusterLabel)s="$cluster"}) by (namespace)' % $._config) + prometheus.new('${datasource}', 'sum(namespace_cpu:kube_pod_resource_request_or_kube_pod_container_resource_requests:sum{%(clusterLabel)s="$cluster"}) by (namespace)' % $._config) + prometheus.withInstant(true) + prometheus.withFormat('table'), - prometheus.new('${datasource}', 'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{%(clusterLabel)s="$cluster"}) by (namespace) / sum(namespace_cpu:kube_pod_container_resource_requests:sum{%(clusterLabel)s="$cluster"}) by (namespace)' % $._config) + prometheus.new('${datasource}', 'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{%(clusterLabel)s="$cluster"}) by (namespace) / sum(namespace_cpu:kube_pod_resource_request_or_kube_pod_container_resource_requests:sum{%(clusterLabel)s="$cluster"}) by (namespace)' % $._config) + prometheus.withInstant(true) + prometheus.withFormat('table'), - prometheus.new('${datasource}', 'sum(namespace_cpu:kube_pod_container_resource_limits:sum{%(clusterLabel)s="$cluster"}) by (namespace)' % $._config) + prometheus.new('${datasource}', 'sum(namespace_cpu:kube_pod_resource_limit_or_kube_pod_container_resource_limits:sum{%(clusterLabel)s="$cluster"}) by (namespace)' % $._config) + prometheus.withInstant(true) + prometheus.withFormat('table'), - prometheus.new('${datasource}', 'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{%(clusterLabel)s="$cluster"}) by (namespace) / sum(namespace_cpu:kube_pod_container_resource_limits:sum{%(clusterLabel)s="$cluster"}) by (namespace)' % $._config) + prometheus.new('${datasource}', 'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{%(clusterLabel)s="$cluster"}) by (namespace) / sum(namespace_cpu:kube_pod_resource_limit_or_kube_pod_container_resource_limits:sum{%(clusterLabel)s="$cluster"}) by (namespace)' % $._config) + prometheus.withInstant(true) + prometheus.withFormat('table'), ]) diff --git a/dashboards/resources/multi-cluster.libsonnet b/dashboards/resources/multi-cluster.libsonnet index 0043f1613..68c93c053 100644 --- a/dashboards/resources/multi-cluster.libsonnet +++ b/dashboards/resources/multi-cluster.libsonnet @@ -68,13 +68,13 @@ local var = g.dashboard.variable; statPanel( 'CPU Requests Commitment', 'percentunit', - 'sum(kube_pod_container_resource_requests{%(kubeStateMetricsSelector)s, resource="cpu"}) / sum(kube_node_status_allocatable{%(kubeStateMetricsSelector)s, resource="cpu"})' % $._config + 'sum(kube_pod_resource_request{%(kubeSchedulerSelector)s, resource="cpu"} or kube_pod_container_resource_requests{%(kubeStateMetricsSelector)s, resource="cpu"}) / sum(kube_node_status_allocatable{%(kubeSchedulerSelector)s, resource="cpu"} or kube_node_status_allocatable{%(kubeStateMetricsSelector)s, resource="cpu"})' % $._config ), statPanel( 'CPU Limits Commitment', 'percentunit', - 'sum(kube_pod_container_resource_limits{%(kubeStateMetricsSelector)s, resource="cpu"}) / sum(kube_node_status_allocatable{%(kubeStateMetricsSelector)s, resource="cpu"})' % $._config + 'sum(kube_pod_resource_limit{%(kubeSchedulerSelector)s, resource="cpu"} or kube_pod_container_resource_limits{%(kubeStateMetricsSelector)s, resource="cpu"}) / sum(kube_node_status_allocatable{%(kubeSchedulerSelector)s, resource="cpu"} or kube_node_status_allocatable{%(kubeStateMetricsSelector)s, resource="cpu"})' % $._config ), statPanel( @@ -86,13 +86,13 @@ local var = g.dashboard.variable; statPanel( 'Memory Requests Commitment', 'percentunit', - 'sum(kube_pod_container_resource_requests{%(kubeStateMetricsSelector)s, resource="memory"}) / sum(kube_node_status_allocatable{%(kubeStateMetricsSelector)s, resource="memory"})' % $._config + 'sum(kube_pod_resource_request{%(kubeSchedulerSelector)s, resource="memory"} or kube_pod_container_resource_requests{%(kubeStateMetricsSelector)s, resource="memory"}) / sum(kube_node_status_allocatable{%(kubeSchedulerSelector)s, resource="memory"} or kube_node_status_allocatable{%(kubeStateMetricsSelector)s, resource="memory"})' % $._config ), statPanel( 'Memory Limits Commitment', 'percentunit', - 'sum(kube_pod_container_resource_limits{%(kubeStateMetricsSelector)s, resource="memory"}) / sum(kube_node_status_allocatable{%(kubeStateMetricsSelector)s, resource="memory"})' % $._config + 'sum(kube_pod_resource_limit{%(kubeSchedulerSelector)s, resource="memory"} or kube_pod_container_resource_limits{%(kubeStateMetricsSelector)s, resource="memory"}) / sum(kube_node_status_allocatable{%(kubeSchedulerSelector)s, resource="memory"} or kube_node_status_allocatable{%(kubeStateMetricsSelector)s, resource="memory"})' % $._config ), ], @@ -110,16 +110,16 @@ local var = g.dashboard.variable; prometheus.new('${datasource}', 'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate) by (%(clusterLabel)s)' % $._config) + prometheus.withInstant(true) + prometheus.withFormat('table'), - prometheus.new('${datasource}', 'sum(kube_pod_container_resource_requests{%(kubeStateMetricsSelector)s, resource="cpu"}) by (%(clusterLabel)s)' % $._config) + prometheus.new('${datasource}', 'sum(kube_pod_resource_request{%(kubeSchedulerSelector)s, resource="cpu"} or kube_pod_container_resource_requests{%(kubeStateMetricsSelector)s, resource="cpu"}) by (%(clusterLabel)s)' % $._config) + prometheus.withInstant(true) + prometheus.withFormat('table'), - prometheus.new('${datasource}', 'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate) by (%(clusterLabel)s) / sum(kube_pod_container_resource_requests{%(kubeStateMetricsSelector)s, resource="cpu"}) by (%(clusterLabel)s)' % $._config) + prometheus.new('${datasource}', 'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate) by (%(clusterLabel)s) / sum(kube_pod_resource_request{%(kubeSchedulerSelector)s, resource="cpu"} or kube_pod_container_resource_requests{%(kubeStateMetricsSelector)s, resource="cpu"}) by (%(clusterLabel)s)' % $._config) + prometheus.withInstant(true) + prometheus.withFormat('table'), - prometheus.new('${datasource}', 'sum(kube_pod_container_resource_limits{%(kubeStateMetricsSelector)s, resource="cpu"}) by (%(clusterLabel)s)' % $._config) + prometheus.new('${datasource}', 'sum(kube_pod_resource_limit{%(kubeSchedulerSelector)s, resource="cpu"} or kube_pod_container_resource_limits{%(kubeStateMetricsSelector)s, resource="cpu"}) by (%(clusterLabel)s)' % $._config) + prometheus.withInstant(true) + prometheus.withFormat('table'), - prometheus.new('${datasource}', 'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate) by (%(clusterLabel)s) / sum(kube_pod_container_resource_limits{%(kubeStateMetricsSelector)s, resource="cpu"}) by (%(clusterLabel)s)' % $._config) + prometheus.new('${datasource}', 'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate) by (%(clusterLabel)s) / sum(kube_pod_resource_limit{%(kubeSchedulerSelector)s, resource="cpu"} or kube_pod_container_resource_limits{%(kubeStateMetricsSelector)s, resource="cpu"}) by (%(clusterLabel)s)' % $._config) + prometheus.withInstant(true) + prometheus.withFormat('table'), ]) @@ -209,16 +209,16 @@ local var = g.dashboard.variable; prometheus.new('${datasource}', 'sum(container_memory_rss{%(cadvisorSelector)s, container!=""}) by (%(clusterLabel)s)' % $._config) + prometheus.withInstant(true) + prometheus.withFormat('table'), - prometheus.new('${datasource}', 'sum(kube_pod_container_resource_requests{%(kubeStateMetricsSelector)s, resource="memory"}) by (%(clusterLabel)s)' % $._config) + prometheus.new('${datasource}', 'sum(kube_pod_resource_request{%(kubeSchedulerSelector)s, resource="memory"} or kube_pod_container_resource_requests{%(kubeStateMetricsSelector)s, resource="memory"}) by (%(clusterLabel)s)' % $._config) + prometheus.withInstant(true) + prometheus.withFormat('table'), - prometheus.new('${datasource}', 'sum(container_memory_rss{%(cadvisorSelector)s, container!=""}) by (%(clusterLabel)s) / sum(kube_pod_container_resource_requests{%(kubeStateMetricsSelector)s, resource="memory"}) by (%(clusterLabel)s)' % $._config) + prometheus.new('${datasource}', 'sum(container_memory_rss{%(cadvisorSelector)s, container!=""}) by (%(clusterLabel)s) / sum(kube_pod_resource_request{%(kubeSchedulerSelector)s, resource="memory"} or kube_pod_container_resource_requests{%(kubeStateMetricsSelector)s, resource="memory"}) by (%(clusterLabel)s)' % $._config) + prometheus.withInstant(true) + prometheus.withFormat('table'), - prometheus.new('${datasource}', 'sum(kube_pod_container_resource_limits{%(kubeStateMetricsSelector)s, resource="memory"}) by (%(clusterLabel)s)' % $._config) + prometheus.new('${datasource}', 'sum(kube_pod_resource_limit{%(kubeSchedulerSelector)s, resource="memory"} or kube_pod_container_resource_limits{%(kubeStateMetricsSelector)s, resource="memory"}) by (%(clusterLabel)s)' % $._config) + prometheus.withInstant(true) + prometheus.withFormat('table'), - prometheus.new('${datasource}', 'sum(container_memory_rss{%(cadvisorSelector)s, container!=""}) by (%(clusterLabel)s) / sum(kube_pod_container_resource_limits{%(kubeStateMetricsSelector)s, resource="memory"}) by (%(clusterLabel)s)' % $._config) + prometheus.new('${datasource}', 'sum(container_memory_rss{%(cadvisorSelector)s, container!=""}) by (%(clusterLabel)s) / sum(kube_pod_resource_limit{%(kubeSchedulerSelector)s, resource="memory"} or kube_pod_container_resource_limits{%(kubeStateMetricsSelector)s, resource="memory"}) by (%(clusterLabel)s)' % $._config) + prometheus.withInstant(true) + prometheus.withFormat('table'), ]) diff --git a/dashboards/resources/namespace.libsonnet b/dashboards/resources/namespace.libsonnet index 7119b2b3a..1e9016137 100644 --- a/dashboards/resources/namespace.libsonnet +++ b/dashboards/resources/namespace.libsonnet @@ -91,7 +91,7 @@ local var = g.dashboard.variable; statPanel( 'CPU Utilisation (from requests)', 'percentunit', - 'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{%(clusterLabel)s="$cluster", namespace="$namespace"}) / sum(kube_pod_container_resource_requests{%(kubeStateMetricsSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", resource="cpu"})' % $._config + 'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{%(clusterLabel)s="$cluster", namespace="$namespace"}) / sum(kube_pod_resource_request{%(kubeSchedulerSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", resource="cpu"} or kube_pod_container_resource_requests{%(kubeStateMetricsSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", resource="cpu"})' % $._config ) + stat.gridPos.withW(6) + stat.gridPos.withH(3), @@ -99,7 +99,7 @@ local var = g.dashboard.variable; statPanel( 'CPU Utilisation (from limits)', 'percentunit', - 'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{%(clusterLabel)s="$cluster", namespace="$namespace"}) / sum(kube_pod_container_resource_limits{%(kubeStateMetricsSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", resource="cpu"})' % $._config + 'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{%(clusterLabel)s="$cluster", namespace="$namespace"}) / sum(kube_pod_resource_limit{%(kubeSchedulerSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", resource="cpu"} or kube_pod_container_resource_limits{%(kubeStateMetricsSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", resource="cpu"})' % $._config ) + stat.gridPos.withW(6) + stat.gridPos.withH(3), @@ -107,7 +107,7 @@ local var = g.dashboard.variable; statPanel( 'Memory Utilisation (from requests)', 'percentunit', - 'sum(container_memory_working_set_bytes{%(cadvisorSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace",container!="", image!=""}) / sum(kube_pod_container_resource_requests{%(kubeStateMetricsSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", resource="memory"})' % $._config + 'sum(container_memory_working_set_bytes{%(cadvisorSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace",container!="", image!=""}) / sum(kube_pod_resource_request{%(kubeSchedulerSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", resource="memory"} or kube_pod_container_resource_requests{%(kubeStateMetricsSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", resource="memory"})' % $._config ) + stat.gridPos.withW(6) + stat.gridPos.withH(3), @@ -115,7 +115,7 @@ local var = g.dashboard.variable; statPanel( 'Memory Utilisation (from limits)', 'percentunit', - 'sum(container_memory_working_set_bytes{%(cadvisorSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace",container!="", image!=""}) / sum(kube_pod_container_resource_limits{%(kubeStateMetricsSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", resource="memory"})' % $._config + 'sum(container_memory_working_set_bytes{%(cadvisorSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace",container!="", image!=""}) / sum(kube_pod_resource_limit{%(kubeSchedulerSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", resource="memory"} or kube_pod_container_resource_limits{%(kubeStateMetricsSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", resource="memory"})' % $._config ) + stat.gridPos.withW(6) + stat.gridPos.withH(3), @@ -200,16 +200,16 @@ local var = g.dashboard.variable; prometheus.new('${datasource}', 'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{%(clusterLabel)s="$cluster", namespace="$namespace"}) by (pod)' % $._config) + prometheus.withInstant(true) + prometheus.withFormat('table'), - prometheus.new('${datasource}', 'sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_requests{%(clusterLabel)s="$cluster", namespace="$namespace"}) by (pod)' % $._config) + prometheus.new('${datasource}', 'sum(cluster:namespace:pod_cpu:active:kube_pod_resource_request_or_kube_pod_container_resource_requests{%(clusterLabel)s="$cluster", namespace="$namespace"}) by (pod)' % $._config) + prometheus.withInstant(true) + prometheus.withFormat('table'), - prometheus.new('${datasource}', 'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{%(clusterLabel)s="$cluster", namespace="$namespace"}) by (pod) / sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_requests{%(clusterLabel)s="$cluster", namespace="$namespace"}) by (pod)' % $._config) + prometheus.new('${datasource}', 'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{%(clusterLabel)s="$cluster", namespace="$namespace"}) by (pod) / sum(cluster:namespace:pod_cpu:active:kube_pod_resource_request_or_kube_pod_container_resource_requests{%(clusterLabel)s="$cluster", namespace="$namespace"}) by (pod)' % $._config) + prometheus.withInstant(true) + prometheus.withFormat('table'), - prometheus.new('${datasource}', 'sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_limits{%(clusterLabel)s="$cluster", namespace="$namespace"}) by (pod)' % $._config) + prometheus.new('${datasource}', 'sum(cluster:namespace:pod_cpu:active:kube_pod_resource_limit_or_kube_pod_container_resource_limits{%(clusterLabel)s="$cluster", namespace="$namespace"}) by (pod)' % $._config) + prometheus.withInstant(true) + prometheus.withFormat('table'), - prometheus.new('${datasource}', 'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{%(clusterLabel)s="$cluster", namespace="$namespace"}) by (pod) / sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_limits{%(clusterLabel)s="$cluster", namespace="$namespace"}) by (pod)' % $._config) + prometheus.new('${datasource}', 'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{%(clusterLabel)s="$cluster", namespace="$namespace"}) by (pod) / sum(cluster:namespace:pod_cpu:active:kube_pod_resource_limit_or_kube_pod_container_resource_limits{%(clusterLabel)s="$cluster", namespace="$namespace"}) by (pod)' % $._config) + prometheus.withInstant(true) + prometheus.withFormat('table'), ]) @@ -363,16 +363,16 @@ local var = g.dashboard.variable; prometheus.new('${datasource}', 'sum(container_memory_working_set_bytes{%(cadvisorSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace",container!="", image!=""}) by (pod)' % $._config) + prometheus.withInstant(true) + prometheus.withFormat('table'), - prometheus.new('${datasource}', 'sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_requests{%(clusterLabel)s="$cluster", namespace="$namespace"}) by (pod)' % $._config) + prometheus.new('${datasource}', 'sum(cluster:namespace:pod_memory:active:kube_pod_resource_request_or_kube_pod_container_resource_requests{%(clusterLabel)s="$cluster", namespace="$namespace"}) by (pod)' % $._config) + prometheus.withInstant(true) + prometheus.withFormat('table'), - prometheus.new('${datasource}', 'sum(container_memory_working_set_bytes{%(cadvisorSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace",container!="", image!=""}) by (pod) / sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_requests{%(clusterLabel)s="$cluster", namespace="$namespace"}) by (pod)' % $._config) + prometheus.new('${datasource}', 'sum(container_memory_working_set_bytes{%(cadvisorSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace",container!="", image!=""}) by (pod) / sum(cluster:namespace:pod_memory:active:kube_pod_resource_request_or_kube_pod_container_resource_requests{%(clusterLabel)s="$cluster", namespace="$namespace"}) by (pod)' % $._config) + prometheus.withInstant(true) + prometheus.withFormat('table'), - prometheus.new('${datasource}', 'sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_limits{%(clusterLabel)s="$cluster", namespace="$namespace"}) by (pod)' % $._config) + prometheus.new('${datasource}', 'sum(cluster:namespace:pod_memory:active:kube_pod_resource_limit_or_kube_pod_container_resource_limits{%(clusterLabel)s="$cluster", namespace="$namespace"}) by (pod)' % $._config) + prometheus.withInstant(true) + prometheus.withFormat('table'), - prometheus.new('${datasource}', 'sum(container_memory_working_set_bytes{%(cadvisorSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace",container!="", image!=""}) by (pod) / sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_limits{%(clusterLabel)s="$cluster", namespace="$namespace"}) by (pod)' % $._config) + prometheus.new('${datasource}', 'sum(container_memory_working_set_bytes{%(cadvisorSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace",container!="", image!=""}) by (pod) / sum(cluster:namespace:pod_memory:active:kube_pod_resource_limit_or_kube_pod_container_resource_limits{%(clusterLabel)s="$cluster", namespace="$namespace"}) by (pod)' % $._config) + prometheus.withInstant(true) + prometheus.withFormat('table'), prometheus.new('${datasource}', 'sum(container_memory_rss{%(cadvisorSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace",container!=""}) by (pod)' % $._config) diff --git a/dashboards/resources/node.libsonnet b/dashboards/resources/node.libsonnet index 806cb2bde..e70f76299 100644 --- a/dashboards/resources/node.libsonnet +++ b/dashboards/resources/node.libsonnet @@ -108,16 +108,16 @@ local var = g.dashboard.variable; prometheus.new('${datasource}', 'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{%(clusterLabel)s="$cluster", node=~"$node"}) by (pod)' % $._config) + prometheus.withInstant(true) + prometheus.withFormat('table'), - prometheus.new('${datasource}', 'sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_requests{%(clusterLabel)s="$cluster", node=~"$node"}) by (pod)' % $._config) + prometheus.new('${datasource}', 'sum(cluster:namespace:pod_cpu:active:kube_pod_resource_request_or_kube_pod_container_resource_requests{%(clusterLabel)s="$cluster", node=~"$node"}) by (pod)' % $._config) + prometheus.withInstant(true) + prometheus.withFormat('table'), - prometheus.new('${datasource}', 'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{%(clusterLabel)s="$cluster", node=~"$node"}) by (pod) / sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_requests{%(clusterLabel)s="$cluster", node=~"$node"}) by (pod)' % $._config) + prometheus.new('${datasource}', 'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{%(clusterLabel)s="$cluster", node=~"$node"}) by (pod) / sum(cluster:namespace:pod_cpu:active:kube_pod_resource_request_or_kube_pod_container_resource_requests{%(clusterLabel)s="$cluster", node=~"$node"}) by (pod)' % $._config) + prometheus.withInstant(true) + prometheus.withFormat('table'), - prometheus.new('${datasource}', 'sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_limits{%(clusterLabel)s="$cluster", node=~"$node"}) by (pod)' % $._config) + prometheus.new('${datasource}', 'sum(cluster:namespace:pod_cpu:active:kube_pod_resource_limit_or_kube_pod_container_resource_limits{%(clusterLabel)s="$cluster", node=~"$node"}) by (pod)' % $._config) + prometheus.withInstant(true) + prometheus.withFormat('table'), - prometheus.new('${datasource}', 'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{%(clusterLabel)s="$cluster", node=~"$node"}) by (pod) / sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_limits{%(clusterLabel)s="$cluster", node=~"$node"}) by (pod)' % $._config) + prometheus.new('${datasource}', 'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{%(clusterLabel)s="$cluster", node=~"$node"}) by (pod) / sum(cluster:namespace:pod_cpu:active:kube_pod_resource_limit_or_kube_pod_container_resource_limits{%(clusterLabel)s="$cluster", node=~"$node"}) by (pod)' % $._config) + prometheus.withInstant(true) + prometheus.withFormat('table'), ]) @@ -208,16 +208,16 @@ local var = g.dashboard.variable; prometheus.new('${datasource}', 'sum(node_namespace_pod_container:container_memory_working_set_bytes{%(clusterLabel)s="$cluster", node=~"$node",container!=""}) by (pod)' % $._config) + prometheus.withInstant(true) + prometheus.withFormat('table'), - prometheus.new('${datasource}', 'sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_requests{%(clusterLabel)s="$cluster", node=~"$node"}) by (pod)' % $._config) + prometheus.new('${datasource}', 'sum(cluster:namespace:pod_memory:active:kube_pod_resource_request_or_kube_pod_container_resource_requests{%(clusterLabel)s="$cluster", node=~"$node"}) by (pod)' % $._config) + prometheus.withInstant(true) + prometheus.withFormat('table'), - prometheus.new('${datasource}', 'sum(node_namespace_pod_container:container_memory_working_set_bytes{%(clusterLabel)s="$cluster", node=~"$node",container!=""}) by (pod) / sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_requests{%(clusterLabel)s="$cluster", node=~"$node"}) by (pod)' % $._config) + prometheus.new('${datasource}', 'sum(node_namespace_pod_container:container_memory_working_set_bytes{%(clusterLabel)s="$cluster", node=~"$node",container!=""}) by (pod) / sum(cluster:namespace:pod_memory:active:kube_pod_resource_request_or_kube_pod_container_resource_requests{%(clusterLabel)s="$cluster", node=~"$node"}) by (pod)' % $._config) + prometheus.withInstant(true) + prometheus.withFormat('table'), - prometheus.new('${datasource}', 'sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_limits{%(clusterLabel)s="$cluster", node=~"$node"}) by (pod)' % $._config) + prometheus.new('${datasource}', 'sum(cluster:namespace:pod_memory:active:kube_pod_resource_limit_or_kube_pod_container_resource_limits{%(clusterLabel)s="$cluster", node=~"$node"}) by (pod)' % $._config) + prometheus.withInstant(true) + prometheus.withFormat('table'), - prometheus.new('${datasource}', 'sum(node_namespace_pod_container:container_memory_working_set_bytes{%(clusterLabel)s="$cluster", node=~"$node",container!=""}) by (pod) / sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_limits{%(clusterLabel)s="$cluster", node=~"$node"}) by (pod)' % $._config) + prometheus.new('${datasource}', 'sum(node_namespace_pod_container:container_memory_working_set_bytes{%(clusterLabel)s="$cluster", node=~"$node",container!=""}) by (pod) / sum(cluster:namespace:pod_memory:active:kube_pod_resource_limit_or_kube_pod_container_resource_limits{%(clusterLabel)s="$cluster", node=~"$node"}) by (pod)' % $._config) + prometheus.withInstant(true) + prometheus.withFormat('table'), prometheus.new('${datasource}', 'sum(node_namespace_pod_container:container_memory_rss{%(clusterLabel)s="$cluster", node=~"$node",container!=""}) by (pod)' % $._config) diff --git a/dashboards/resources/pod.libsonnet b/dashboards/resources/pod.libsonnet index b7be85fd5..389b1589e 100644 --- a/dashboards/resources/pod.libsonnet +++ b/dashboards/resources/pod.libsonnet @@ -80,7 +80,7 @@ local var = g.dashboard.variable; local cpuRequestsQuery = ||| sum( - kube_pod_container_resource_requests{%(kubeStateMetricsSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod", resource="cpu"} + kube_pod_resource_request{%(kubeSchedulerSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod", resource="cpu"} or kube_pod_container_resource_requests{%(kubeStateMetricsSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod", resource="cpu"} ) ||| % $._config; @@ -217,10 +217,10 @@ local var = g.dashboard.variable; prometheus.new('${datasource}', 'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{%(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod", container!=""}) by (container) / sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_requests{%(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod", container!=""}) by (container)' % $._config) + prometheus.withInstant(true) + prometheus.withFormat('table'), - prometheus.new('${datasource}', 'sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_limits{%(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod", container!=""}) by (container)' % $._config) + prometheus.new('${datasource}', 'sum(cluster:namespace:pod_cpu:active:kube_pod_resource_limit_or_kube_pod_container_resource_limits{%(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod", container!=""}) by (container)' % $._config) + prometheus.withInstant(true) + prometheus.withFormat('table'), - prometheus.new('${datasource}', 'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{%(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod", container!=""}) by (container) / sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_limits{%(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod", container!=""}) by (container)' % $._config) + prometheus.new('${datasource}', 'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{%(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod", container!=""}) by (container) / sum(cluster:namespace:pod_cpu:active:kube_pod_resource_limit_or_kube_pod_container_resource_limits{%(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod", container!=""}) by (container)' % $._config) + prometheus.withInstant(true) + prometheus.withFormat('table'), ]) @@ -356,16 +356,16 @@ local var = g.dashboard.variable; prometheus.new('${datasource}', 'sum(container_memory_working_set_bytes{%(cadvisorSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod", container!="", image!=""}) by (container)' % $._config) + prometheus.withInstant(true) + prometheus.withFormat('table'), - prometheus.new('${datasource}', 'sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_requests{%(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod"}) by (container)' % $._config) + prometheus.new('${datasource}', 'sum(cluster:namespace:pod_memory:active:kube_pod_resource_request_or_kube_pod_container_resource_requests{%(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod"}) by (container)' % $._config) + prometheus.withInstant(true) + prometheus.withFormat('table'), - prometheus.new('${datasource}', 'sum(container_memory_working_set_bytes{%(cadvisorSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod", image!=""}) by (container) / sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_requests{%(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod"}) by (container)' % $._config) + prometheus.new('${datasource}', 'sum(container_memory_working_set_bytes{%(cadvisorSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod", image!=""}) by (container) / sum(cluster:namespace:pod_memory:active:kube_pod_resource_request_or_kube_pod_container_resource_requests{%(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod"}) by (container)' % $._config) + prometheus.withInstant(true) + prometheus.withFormat('table'), - prometheus.new('${datasource}', 'sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_limits{%(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod"}) by (container)' % $._config) + prometheus.new('${datasource}', 'sum(cluster:namespace:pod_memory:active:kube_pod_resource_limit_or_kube_pod_container_resource_limits{%(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod"}) by (container)' % $._config) + prometheus.withInstant(true) + prometheus.withFormat('table'), - prometheus.new('${datasource}', 'sum(container_memory_working_set_bytes{%(cadvisorSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod", container!="", image!=""}) by (container) / sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_limits{%(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod"}) by (container)' % $._config) + prometheus.new('${datasource}', 'sum(container_memory_working_set_bytes{%(cadvisorSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod", container!="", image!=""}) by (container) / sum(cluster:namespace:pod_memory:active:kube_pod_resource_limit_or_kube_pod_container_resource_limits{%(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod"}) by (container)' % $._config) + prometheus.withInstant(true) + prometheus.withFormat('table'), prometheus.new('${datasource}', 'sum(container_memory_rss{%(cadvisorSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod", container != "", container != "POD"}) by (container)' % $._config) diff --git a/dashboards/resources/workload-namespace.libsonnet b/dashboards/resources/workload-namespace.libsonnet index 458ee6014..2f618cc48 100644 --- a/dashboards/resources/workload-namespace.libsonnet +++ b/dashboards/resources/workload-namespace.libsonnet @@ -99,7 +99,7 @@ local var = g.dashboard.variable; local cpuRequestsQuery = ||| sum( - kube_pod_container_resource_requests{%(kubeStateMetricsSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", resource="cpu"} + (kube_pod_resource_request{%(kubeSchedulerSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", resource="cpu"} or kube_pod_container_resource_requests{%(kubeStateMetricsSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", resource="cpu"}) * on(namespace,pod) group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster", namespace="$namespace", workload_type=~"$type"} ) by (workload, workload_type) diff --git a/dashboards/resources/workload.libsonnet b/dashboards/resources/workload.libsonnet index 21ffcdbd8..415dc6d2a 100644 --- a/dashboards/resources/workload.libsonnet +++ b/dashboards/resources/workload.libsonnet @@ -111,7 +111,7 @@ local var = g.dashboard.variable; local cpuRequestsQuery = ||| sum( - kube_pod_container_resource_requests{%(kubeStateMetricsSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", resource="cpu"} + (kube_pod_resource_request{%(kubeSchedulerSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", resource="cpu"} or kube_pod_container_resource_requests{%(kubeStateMetricsSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", resource="cpu"}) * on(namespace,pod) group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster", namespace="$namespace", workload="$workload", workload_type=~"$type"} ) by (pod) diff --git a/rules/apps.libsonnet b/rules/apps.libsonnet index 7cd9c0804..c5a58462f 100644 --- a/rules/apps.libsonnet +++ b/rules/apps.libsonnet @@ -84,21 +84,21 @@ name: 'k8s.rules.container_memory_requests', rules: [ { - record: 'cluster:namespace:pod_memory:active:kube_pod_container_resource_requests', + record: 'cluster:namespace:pod_memory:active:kube_pod_resource_request_or_kube_pod_container_resource_requests', expr: ||| - kube_pod_container_resource_requests{resource="memory",%(kubeStateMetricsSelector)s} * on (namespace, pod, %(clusterLabel)s) + (kube_pod_resource_request{resource="memory",%(kubeSchedulerSelector)s} or (kube_pod_container_resource_requests{resource="memory",%(kubeStateMetricsSelector)s}) * on (namespace, pod, %(clusterLabel)s) group_left() max by (namespace, pod, %(clusterLabel)s) ( (kube_pod_status_phase{phase=~"Pending|Running"} == 1) ) ||| % $._config, }, { - record: 'namespace_memory:kube_pod_container_resource_requests:sum', + record: 'namespace_memory:kube_pod_resource_request_or_kube_pod_container_resource_requests:sum', expr: ||| sum by (namespace, %(clusterLabel)s) ( sum by (namespace, pod, %(clusterLabel)s) ( max by (namespace, pod, container, %(clusterLabel)s) ( - kube_pod_container_resource_requests{resource="memory",%(kubeStateMetricsSelector)s} + kube_pod_resource_request{resource="memory",%(kubeSchedulerSelector)s} or kube_pod_container_resource_requests{resource="memory",%(kubeStateMetricsSelector)s} ) * on(namespace, pod, %(clusterLabel)s) group_left() max by (namespace, pod, %(clusterLabel)s) ( kube_pod_status_phase{phase=~"Pending|Running"} == 1 ) @@ -112,21 +112,21 @@ name: 'k8s.rules.container_cpu_requests', rules: [ { - record: 'cluster:namespace:pod_cpu:active:kube_pod_container_resource_requests', + record: 'cluster:namespace:pod_cpu:active:kube_pod_resource_request_or_kube_pod_container_resource_requests', expr: ||| - kube_pod_container_resource_requests{resource="cpu",%(kubeStateMetricsSelector)s} * on (namespace, pod, %(clusterLabel)s) + (kube_pod_resource_request{resource="memory",%(kubeSchedulerSelector)s} or (kube_pod_container_resource_requests{resource="cpu",%(kubeStateMetricsSelector)s}) * on (namespace, pod, %(clusterLabel)s) group_left() max by (namespace, pod, %(clusterLabel)s) ( (kube_pod_status_phase{phase=~"Pending|Running"} == 1) ) ||| % $._config, }, { - record: 'namespace_cpu:kube_pod_container_resource_requests:sum', + record: 'namespace_cpu:kube_pod_resource_request_or_kube_pod_container_resource_requests:sum', expr: ||| sum by (namespace, %(clusterLabel)s) ( sum by (namespace, pod, %(clusterLabel)s) ( max by (namespace, pod, container, %(clusterLabel)s) ( - kube_pod_container_resource_requests{resource="cpu",%(kubeStateMetricsSelector)s} + kube_pod_resource_request{resource="cpu",%(kubeSchedulerSelector)s} or kube_pod_container_resource_requests{resource="cpu",%(kubeStateMetricsSelector)s} ) * on(namespace, pod, %(clusterLabel)s) group_left() max by (namespace, pod, %(clusterLabel)s) ( kube_pod_status_phase{phase=~"Pending|Running"} == 1 ) @@ -140,16 +140,16 @@ name: 'k8s.rules.container_memory_limits', rules: [ { - record: 'cluster:namespace:pod_memory:active:kube_pod_container_resource_limits', + record: 'cluster:namespace:pod_memory:active:kube_pod_resource_limit_or_kube_pod_container_resource_limits', expr: ||| - kube_pod_container_resource_limits{resource="memory",%(kubeStateMetricsSelector)s} * on (namespace, pod, %(clusterLabel)s) + (kube_pod_resource_limit{resource="memory",%(kubeSchedulerSelector)s} or kube_pod_container_resource_limits{resource="memory",%(kubeStateMetricsSelector)s}) * on (namespace, pod, %(clusterLabel)s) group_left() max by (namespace, pod, %(clusterLabel)s) ( (kube_pod_status_phase{phase=~"Pending|Running"} == 1) ) ||| % $._config, }, { - record: 'namespace_memory:kube_pod_container_resource_limits:sum', + record: 'namespace_memory:kube_pod_resource_limit_or_kube_pod_container_resource_limits:sum', expr: ||| sum by (namespace, %(clusterLabel)s) ( sum by (namespace, pod, %(clusterLabel)s) ( @@ -168,21 +168,21 @@ name: 'k8s.rules.container_cpu_limits', rules: [ { - record: 'cluster:namespace:pod_cpu:active:kube_pod_container_resource_limits', + record: 'cluster:namespace:pod_cpu:active:kube_pod_resource_limit_or_kube_pod_container_resource_limits', expr: ||| - kube_pod_container_resource_limits{resource="cpu",%(kubeStateMetricsSelector)s} * on (namespace, pod, %(clusterLabel)s) + (kube_pod_resource_limit{resource="memory",%(kubeSchedulerSelector)s} or kube_pod_container_resource_limits{resource="cpu",%(kubeStateMetricsSelector)s}) * on (namespace, pod, %(clusterLabel)s) group_left() max by (namespace, pod, %(clusterLabel)s) ( (kube_pod_status_phase{phase=~"Pending|Running"} == 1) ) ||| % $._config, }, { - record: 'namespace_cpu:kube_pod_container_resource_limits:sum', + record: 'namespace_cpu:kube_pod_resource_limit_or_kube_pod_container_resource_limits:sum', expr: ||| sum by (namespace, %(clusterLabel)s) ( sum by (namespace, pod, %(clusterLabel)s) ( max by (namespace, pod, container, %(clusterLabel)s) ( - kube_pod_container_resource_limits{resource="cpu",%(kubeStateMetricsSelector)s} + kube_pod_resource_limit{resource="memory",%(kubeSchedulerSelector)s} or kube_pod_container_resource_limits{resource="cpu",%(kubeStateMetricsSelector)s} ) * on(namespace, pod, %(clusterLabel)s) group_left() max by (namespace, pod, %(clusterLabel)s) ( kube_pod_status_phase{phase=~"Pending|Running"} == 1 ) diff --git a/rules/windows.libsonnet b/rules/windows.libsonnet index 3e1d4ba8a..d86d29167 100644 --- a/rules/windows.libsonnet +++ b/rules/windows.libsonnet @@ -217,28 +217,28 @@ record: 'kube_pod_windows_container_resource_memory_request', expr: ||| max by (%(clusterLabel)s, namespace, pod, container) ( - kube_pod_container_resource_requests{resource="memory",%(kubeStateMetricsSelector)s} + kube_pod_resource_request{resource="memory",%(kubeSchedulerSelector)s} or kube_pod_container_resource_requests{resource="memory",%(kubeStateMetricsSelector)s} ) * on(container,pod,namespace,%(clusterLabel)s) (windows_pod_container_available) ||| % $._config, }, { record: 'kube_pod_windows_container_resource_memory_limit', expr: ||| - kube_pod_container_resource_limits{resource="memory",%(kubeStateMetricsSelector)s} * on(container,pod,namespace,%(clusterLabel)s) (windows_pod_container_available) + (kube_pod_resource_limit{resource="memory",%(kubeSchedulerSelector)s} or kube_pod_container_resource_limits{resource="memory",%(kubeStateMetricsSelector)s}) * on(container,pod,namespace,%(clusterLabel)s) (windows_pod_container_available) ||| % $._config, }, { record: 'kube_pod_windows_container_resource_cpu_cores_request', expr: ||| max by (%(clusterLabel)s, namespace, pod, container) ( - kube_pod_container_resource_requests{resource="cpu",%(kubeStateMetricsSelector)s} + kube_pod_resource_request{resource="cpu",%(kubeSchedulerSelector)s} or kube_pod_container_resource_requests{resource="cpu",%(kubeStateMetricsSelector)s} ) * on(container,pod,namespace,%(clusterLabel)s) (windows_pod_container_available) ||| % $._config, }, { record: 'kube_pod_windows_container_resource_cpu_cores_limit', expr: ||| - kube_pod_container_resource_limits{resource="cpu",%(kubeStateMetricsSelector)s} * on(container,pod,namespace,%(clusterLabel)s) (windows_pod_container_available) + (kube_pod_resource_limit{resource="cpu",%(kubeSchedulerSelector)s} or kube_pod_container_resource_limits{resource="cpu",%(kubeStateMetricsSelector)s}) * on(container,pod,namespace,%(clusterLabel)s) (windows_pod_container_available) ||| % $._config, }, { diff --git a/tests.yaml b/tests.yaml index 82dd4bb60..71ab61f9a 100644 --- a/tests.yaml +++ b/tests.yaml @@ -459,20 +459,20 @@ tests: values: '1+0x10' promql_expr_test: - eval_time: 0m - expr: namespace_cpu:kube_pod_container_resource_requests:sum + expr: namespace_cpu:kube_pod_resource_request_or_kube_pod_container_resource_requests:sum exp_samples: - value: 0.15 - labels: 'namespace_cpu:kube_pod_container_resource_requests:sum{cluster="kubernetes",namespace="kube-apiserver"}' + labels: 'namespace_cpu:kube_pod_resource_request_or_kube_pod_container_resource_requests:sum{cluster="kubernetes",namespace="kube-apiserver"}' - eval_time: 0m expr: namespace_memory:kube_pod_container_resource_requests:sum exp_samples: - value: 1.0e+9 labels: 'namespace_memory:kube_pod_container_resource_requests:sum{cluster="kubernetes",namespace="kube-apiserver"}' - eval_time: 1m - expr: namespace_cpu:kube_pod_container_resource_requests:sum + expr: namespace_cpu:kube_pod_resource_request_or_kube_pod_container_resource_requests:sum exp_samples: - value: 0.15 - labels: 'namespace_cpu:kube_pod_container_resource_requests:sum{cluster="kubernetes",namespace="kube-apiserver"}' + labels: 'namespace_cpu:kube_pod_resource_request_or_kube_pod_container_resource_requests:sum{cluster="kubernetes",namespace="kube-apiserver"}' - eval_time: 1m expr: namespace_memory:kube_pod_container_resource_requests:sum exp_samples: @@ -504,20 +504,20 @@ tests: values: '1+0x10' promql_expr_test: - eval_time: 0m - expr: namespace_cpu:kube_pod_container_resource_requests:sum + expr: namespace_cpu:kube_pod_resource_request_or_kube_pod_container_resource_requests:sum exp_samples: - value: 0.15 - labels: 'namespace_cpu:kube_pod_container_resource_requests:sum{cluster="kubernetes",namespace="kube-apiserver"}' + labels: 'namespace_cpu:kube_pod_resource_request_or_kube_pod_container_resource_requests:sum{cluster="kubernetes",namespace="kube-apiserver"}' - eval_time: 0m expr: namespace_memory:kube_pod_container_resource_requests:sum exp_samples: - value: 1.0e+9 labels: 'namespace_memory:kube_pod_container_resource_requests:sum{cluster="kubernetes",namespace="kube-apiserver"}' - eval_time: 1m - expr: namespace_cpu:kube_pod_container_resource_requests:sum + expr: namespace_cpu:kube_pod_resource_request_or_kube_pod_container_resource_requests:sum exp_samples: - value: 0.15 - labels: 'namespace_cpu:kube_pod_container_resource_requests:sum{cluster="kubernetes",namespace="kube-apiserver"}' + labels: 'namespace_cpu:kube_pod_resource_request_or_kube_pod_container_resource_requests:sum{cluster="kubernetes",namespace="kube-apiserver"}' - eval_time: 1m expr: namespace_memory:kube_pod_container_resource_requests:sum exp_samples: