From ce7c3f8dc6b5b5e243afcfb8bf2ac408dd4009e0 Mon Sep 17 00:00:00 2001 From: Pranshu Srivastava Date: Tue, 10 Jan 2023 16:48:50 +0530 Subject: [PATCH] Use kube-scheduler's metrics instead of kube-state-metrics Use kube-scheduler's metrics instead of kube-state-metrics, as they are more precise. Refer the links below for more details. * https://github.com/kubernetes/enhancements/tree/master/keps/sig-instrumentation/1748-pod-resource-metrics?rgh-link-date=2022-10-07T13%3A34%3A39Z#expose-new-metrics * https://github.com/kubernetes/kube-state-metrics/pull/1849 --- alerts/resource_alerts.libsonnet | 8 +- dashboards/resources/cluster.libsonnet | 24 +++--- dashboards/resources/multi-cluster.libsonnet | 24 +++--- dashboards/resources/namespace.libsonnet | 24 +++--- dashboards/resources/node.libsonnet | 16 ++-- dashboards/resources/pod.libsonnet | 18 +--- .../resources/workload-namespace.libsonnet | 2 +- dashboards/resources/workload.libsonnet | 2 +- rules/apps.libsonnet | 82 +++++++++++-------- rules/windows.libsonnet | 8 +- tests.yaml | 56 ++++++------- 11 files changed, 133 insertions(+), 131 deletions(-) diff --git a/alerts/resource_alerts.libsonnet b/alerts/resource_alerts.libsonnet index 616776f12..4d5ec8ac5 100644 --- a/alerts/resource_alerts.libsonnet +++ b/alerts/resource_alerts.libsonnet @@ -34,7 +34,7 @@ } + if $._config.showMultiCluster then { expr: ||| - sum(namespace_cpu:kube_pod_container_resource_requests:sum{%(kubeStateMetricsSelector)s,%(ignoringOverprovisionedWorkloadSelector)s}) by (%(clusterLabel)s) - (sum(kube_node_status_allocatable{%(kubeStateMetricsSelector)s,resource="cpu"}) by (%(clusterLabel)s) - max(kube_node_status_allocatable{%(kubeStateMetricsSelector)s,resource="cpu"}) by (%(clusterLabel)s)) > 0 + sum(namespace_cpu:kube_pod_resource_request_or_kube_pod_container_resource_requests:sum{%(kubeStateMetricsSelector)s,%(ignoringOverprovisionedWorkloadSelector)s}) by (%(clusterLabel)s) - (sum(kube_node_status_allocatable{%(kubeStateMetricsSelector)s,resource="cpu"}) by (%(clusterLabel)s) - max(kube_node_status_allocatable{%(kubeStateMetricsSelector)s,resource="cpu"}) by (%(clusterLabel)s)) > 0 and (sum(kube_node_status_allocatable{%(kubeStateMetricsSelector)s,resource="cpu"}) by (%(clusterLabel)s) - max(kube_node_status_allocatable{%(kubeStateMetricsSelector)s,resource="cpu"}) by (%(clusterLabel)s)) > 0 ||| % $._config, @@ -43,7 +43,7 @@ }, } else { expr: ||| - sum(namespace_cpu:kube_pod_container_resource_requests:sum{%(ignoringOverprovisionedWorkloadSelector)s}) - (sum(kube_node_status_allocatable{resource="cpu", %(kubeStateMetricsSelector)s}) - max(kube_node_status_allocatable{resource="cpu", %(kubeStateMetricsSelector)s})) > 0 + sum(namespace_cpu:kube_pod_resource_request_or_kube_pod_container_resource_requests:sum{%(ignoringOverprovisionedWorkloadSelector)s}) - (sum(kube_node_status_allocatable{resource="cpu", %(kubeStateMetricsSelector)s}) - max(kube_node_status_allocatable{resource="cpu", %(kubeStateMetricsSelector)s})) > 0 and (sum(kube_node_status_allocatable{resource="cpu", %(kubeStateMetricsSelector)s}) - max(kube_node_status_allocatable{resource="cpu", %(kubeStateMetricsSelector)s})) > 0 ||| % $._config, @@ -63,7 +63,7 @@ } + if $._config.showMultiCluster then { expr: ||| - sum(namespace_memory:kube_pod_container_resource_requests:sum{%(ignoringOverprovisionedWorkloadSelector)s}) by (%(clusterLabel)s) - (sum(kube_node_status_allocatable{resource="memory", %(kubeStateMetricsSelector)s}) by (%(clusterLabel)s) - max(kube_node_status_allocatable{resource="memory", %(kubeStateMetricsSelector)s}) by (%(clusterLabel)s)) > 0 + sum(namespace_memory:kube_pod_resource_request_or_kube_pod_container_resource_requests:sum{%(ignoringOverprovisionedWorkloadSelector)s}) by (%(clusterLabel)s) - (sum(kube_node_status_allocatable{resource="memory", %(kubeStateMetricsSelector)s}) by (%(clusterLabel)s) - max(kube_node_status_allocatable{resource="memory", %(kubeStateMetricsSelector)s}) by (%(clusterLabel)s)) > 0 and (sum(kube_node_status_allocatable{resource="memory", %(kubeStateMetricsSelector)s}) by (%(clusterLabel)s) - max(kube_node_status_allocatable{resource="memory", %(kubeStateMetricsSelector)s}) by (%(clusterLabel)s)) > 0 ||| % $._config, @@ -73,7 +73,7 @@ } else { expr: ||| - sum(namespace_memory:kube_pod_container_resource_requests:sum{%(ignoringOverprovisionedWorkloadSelector)s}) - (sum(kube_node_status_allocatable{resource="memory", %(kubeStateMetricsSelector)s}) - max(kube_node_status_allocatable{resource="memory", %(kubeStateMetricsSelector)s})) > 0 + sum(namespace_memory:kube_pod_resource_request_or_kube_pod_container_resource_requests:sum{%(ignoringOverprovisionedWorkloadSelector)s}) - (sum(kube_node_status_allocatable{resource="memory", %(kubeStateMetricsSelector)s}) - max(kube_node_status_allocatable{resource="memory", %(kubeStateMetricsSelector)s})) > 0 and (sum(kube_node_status_allocatable{resource="memory", %(kubeStateMetricsSelector)s}) - max(kube_node_status_allocatable{resource="memory", %(kubeStateMetricsSelector)s})) > 0 ||| % $._config, diff --git a/dashboards/resources/cluster.libsonnet b/dashboards/resources/cluster.libsonnet index eb19b7c22..78c1e0f53 100644 --- a/dashboards/resources/cluster.libsonnet +++ b/dashboards/resources/cluster.libsonnet @@ -146,11 +146,11 @@ local template = grafana.template; ) .addPanel( g.panel('CPU Requests Commitment') + - g.statPanel('sum(namespace_cpu:kube_pod_container_resource_requests:sum{%(clusterLabel)s="$cluster"}) / sum(kube_node_status_allocatable{%(kubeStateMetricsSelector)s,resource="cpu",%(clusterLabel)s="$cluster"})' % $._config) + g.statPanel('sum(namespace_cpu:kube_pod_resource_request_or_kube_pod_container_resource_requests:sum{%(clusterLabel)s="$cluster"}) / sum(kube_node_status_allocatable{%(kubeStateMetricsSelector)s,resource="cpu",%(clusterLabel)s="$cluster"})' % $._config) ) .addPanel( g.panel('CPU Limits Commitment') + - g.statPanel('sum(namespace_cpu:kube_pod_container_resource_limits:sum{%(clusterLabel)s="$cluster"}) / sum(kube_node_status_allocatable{%(kubeStateMetricsSelector)s,resource="cpu",%(clusterLabel)s="$cluster"})' % $._config) + g.statPanel('sum(namespace_cpu:kube_pod_resource_limit_or_kube_pod_container_resource_limits:sum{%(clusterLabel)s="$cluster"}) / sum(kube_node_status_allocatable{%(kubeStateMetricsSelector)s,resource="cpu",%(clusterLabel)s="$cluster"})' % $._config) ) .addPanel( g.panel('Memory Utilisation') + @@ -158,11 +158,11 @@ local template = grafana.template; ) .addPanel( g.panel('Memory Requests Commitment') + - g.statPanel('sum(namespace_memory:kube_pod_container_resource_requests:sum{%(clusterLabel)s="$cluster"}) / sum(kube_node_status_allocatable{%(kubeStateMetricsSelector)s,resource="memory",%(clusterLabel)s="$cluster"})' % $._config) + g.statPanel('sum(namespace_memory:kube_pod_resource_request_or_kube_pod_container_resource_requests:sum{%(clusterLabel)s="$cluster"}) / sum(kube_node_status_allocatable{%(kubeStateMetricsSelector)s,resource="memory",%(clusterLabel)s="$cluster"})' % $._config) ) .addPanel( g.panel('Memory Limits Commitment') + - g.statPanel('sum(namespace_memory:kube_pod_container_resource_limits:sum{%(clusterLabel)s="$cluster"}) / sum(kube_node_status_allocatable{%(kubeStateMetricsSelector)s,resource="memory",%(clusterLabel)s="$cluster"})' % $._config) + g.statPanel('sum(namespace_memory:kube_pod_resource_limit_or_kube_pod_container_resource_limits:sum{%(clusterLabel)s="$cluster"}) / sum(kube_node_status_allocatable{%(kubeStateMetricsSelector)s,resource="memory",%(clusterLabel)s="$cluster"})' % $._config) ) ) .addRow( @@ -179,10 +179,10 @@ local template = grafana.template; g.panel('CPU Quota') + g.tablePanel(podWorkloadColumns + [ 'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{%(clusterLabel)s="$cluster"}) by (namespace)' % $._config, - 'sum(namespace_cpu:kube_pod_container_resource_requests:sum{%(clusterLabel)s="$cluster"}) by (namespace)' % $._config, - 'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{%(clusterLabel)s="$cluster"}) by (namespace) / sum(namespace_cpu:kube_pod_container_resource_requests:sum{%(clusterLabel)s="$cluster"}) by (namespace)' % $._config, - 'sum(namespace_cpu:kube_pod_container_resource_limits:sum{%(clusterLabel)s="$cluster"}) by (namespace)' % $._config, - 'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{%(clusterLabel)s="$cluster"}) by (namespace) / sum(namespace_cpu:kube_pod_container_resource_limits:sum{%(clusterLabel)s="$cluster"}) by (namespace)' % $._config, + 'sum(namespace_cpu:kube_pod_resource_request_or_kube_pod_container_resource_requests:sum{%(clusterLabel)s="$cluster"}) by (namespace)' % $._config, + 'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{%(clusterLabel)s="$cluster"}) by (namespace) / sum(namespace_cpu:kube_pod_resource_request_or_kube_pod_container_resource_requests:sum{%(clusterLabel)s="$cluster"}) by (namespace)' % $._config, + 'sum(namespace_cpu:kube_pod_resource_limit_or_kube_pod_container_resource_limits:sum{%(clusterLabel)s="$cluster"}) by (namespace)' % $._config, + 'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{%(clusterLabel)s="$cluster"}) by (namespace) / sum(namespace_cpu:kube_pod_resource_limit_or_kube_pod_container_resource_limits:sum{%(clusterLabel)s="$cluster"}) by (namespace)' % $._config, ], tableStyles { 'Value #C': { alias: 'CPU Usage' }, 'Value #D': { alias: 'CPU Requests' }, @@ -209,10 +209,10 @@ local template = grafana.template; g.tablePanel(podWorkloadColumns + [ // Not using container_memory_usage_bytes here because that includes page cache 'sum(container_memory_rss{%(cadvisorSelector)s, %(clusterLabel)s="$cluster", container!=""}) by (namespace)' % $._config, - 'sum(namespace_memory:kube_pod_container_resource_requests:sum{%(clusterLabel)s="$cluster"}) by (namespace)' % $._config, - 'sum(container_memory_rss{%(cadvisorSelector)s, %(clusterLabel)s="$cluster", container!=""}) by (namespace) / sum(namespace_memory:kube_pod_container_resource_requests:sum{%(clusterLabel)s="$cluster"}) by (namespace)' % $._config, - 'sum(namespace_memory:kube_pod_container_resource_limits:sum{%(clusterLabel)s="$cluster"}) by (namespace)' % $._config, - 'sum(container_memory_rss{%(cadvisorSelector)s, %(clusterLabel)s="$cluster", container!=""}) by (namespace) / sum(namespace_memory:kube_pod_container_resource_limits:sum{%(clusterLabel)s="$cluster"}) by (namespace)' % $._config, + 'sum(namespace_memory:kube_pod_resource_request_or_kube_pod_container_resource_requests:sum{%(clusterLabel)s="$cluster"}) by (namespace)' % $._config, + 'sum(container_memory_rss{%(cadvisorSelector)s, %(clusterLabel)s="$cluster", container!=""}) by (namespace) / sum(namespace_memory:kube_pod_resource_request_or_kube_pod_container_resource_requests:sum{%(clusterLabel)s="$cluster"}) by (namespace)' % $._config, + 'sum(namespace_memory:kube_pod_resource_limit_or_kube_pod_container_resource_limits:sum{%(clusterLabel)s="$cluster"}) by (namespace)' % $._config, + 'sum(container_memory_rss{%(cadvisorSelector)s, %(clusterLabel)s="$cluster", container!=""}) by (namespace) / sum(namespace_memory:kube_pod_resource_limit_or_kube_pod_container_resource_limits:sum{%(clusterLabel)s="$cluster"}) by (namespace)' % $._config, ], tableStyles { 'Value #C': { alias: 'Memory Usage', unit: 'bytes' }, 'Value #D': { alias: 'Memory Requests', unit: 'bytes' }, diff --git a/dashboards/resources/multi-cluster.libsonnet b/dashboards/resources/multi-cluster.libsonnet index b13276c3c..d0a82e245 100644 --- a/dashboards/resources/multi-cluster.libsonnet +++ b/dashboards/resources/multi-cluster.libsonnet @@ -28,11 +28,11 @@ local g = import 'github.com/grafana/jsonnet-libs/grafana-builder/grafana.libson ) .addPanel( g.panel('CPU Requests Commitment') + - g.statPanel('sum(kube_pod_container_resource_requests{%(kubeStateMetricsSelector)s, resource="cpu"}) / sum(kube_node_status_allocatable{%(kubeStateMetricsSelector)s, resource="cpu"})' % $._config) + g.statPanel('sum(kube_pod_resource_request{%(kubeSchedulerSelector)s, resource="cpu"} or kube_pod_container_resource_requests{%(kubeStateMetricsSelector)s, resource="cpu"}) / sum(kube_node_status_allocatable{%(kubeSchedulerSelector)s, resource="cpu"} or kube_node_status_allocatable{%(kubeStateMetricsSelector)s, resource="cpu"})' % $._config) ) .addPanel( g.panel('CPU Limits Commitment') + - g.statPanel('sum(kube_pod_container_resource_limits{%(kubeStateMetricsSelector)s, resource="cpu"}) / sum(kube_node_status_allocatable{%(kubeStateMetricsSelector)s, resource="cpu"})' % $._config) + g.statPanel('sum(kube_pod_resource_limit{%(kubeSchedulerSelector)s, resource="cpu"} or kube_pod_container_resource_limits{%(kubeStateMetricsSelector)s, resource="cpu"}) / sum(kube_node_status_allocatable{%(kubeSchedulerSelector)s, resource="cpu"} or kube_node_status_allocatable{%(kubeStateMetricsSelector)s, resource="cpu"})' % $._config) ) .addPanel( g.panel('Memory Utilisation') + @@ -40,11 +40,11 @@ local g = import 'github.com/grafana/jsonnet-libs/grafana-builder/grafana.libson ) .addPanel( g.panel('Memory Requests Commitment') + - g.statPanel('sum(kube_pod_container_resource_requests{%(kubeStateMetricsSelector)s, resource="memory"}) / sum(kube_node_status_allocatable{%(kubeStateMetricsSelector)s, resource="memory"})' % $._config) + g.statPanel('sum(kube_pod_resource_request{%(kubeSchedulerSelector)s, resource="memory"} or kube_pod_container_resource_requests{%(kubeStateMetricsSelector)s, resource="memory"}) / sum(kube_node_status_allocatable{%(kubeSchedulerSelector)s, resource="memory"} or kube_node_status_allocatable{%(kubeStateMetricsSelector)s, resource="memory"})' % $._config) ) .addPanel( g.panel('Memory Limits Commitment') + - g.statPanel('sum(kube_pod_container_resource_limits{%(kubeStateMetricsSelector)s, resource="memory"}) / sum(kube_node_status_allocatable{%(kubeStateMetricsSelector)s, resource="memory"})' % $._config) + g.statPanel('sum(kube_pod_resource_limit{%(kubeSchedulerSelector)s, resource="memory"} or kube_pod_container_resource_limits{%(kubeStateMetricsSelector)s, resource="memory"}) / sum(kube_node_status_allocatable{%(kubeSchedulerSelector)s, resource="memory"} or kube_node_status_allocatable{%(kubeStateMetricsSelector)s, resource="memory"})' % $._config) ) ) .addRow( @@ -61,10 +61,10 @@ local g = import 'github.com/grafana/jsonnet-libs/grafana-builder/grafana.libson g.panel('CPU Quota') + g.tablePanel([ 'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate) by (%(clusterLabel)s)' % $._config, - 'sum(kube_pod_container_resource_requests{%(kubeStateMetricsSelector)s, resource="cpu"}) by (%(clusterLabel)s)' % $._config, - 'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate) by (%(clusterLabel)s) / sum(kube_pod_container_resource_requests{%(kubeStateMetricsSelector)s, resource="cpu"}) by (%(clusterLabel)s)' % $._config, - 'sum(kube_pod_container_resource_limits{%(kubeStateMetricsSelector)s, resource="cpu"}) by (%(clusterLabel)s)' % $._config, - 'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate) by (%(clusterLabel)s) / sum(kube_pod_container_resource_limits{%(kubeStateMetricsSelector)s, resource="cpu"}) by (%(clusterLabel)s)' % $._config, + 'sum(kube_pod_resource_request{%(kubeSchedulerSelector)s, resource="cpu"} or kube_pod_container_resource_requests{%(kubeStateMetricsSelector)s, resource="cpu"}) by (%(clusterLabel)s)' % $._config, + 'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate) by (%(clusterLabel)s) / sum(kube_pod_resource_request{%(kubeSchedulerSelector)s, resource="cpu"} or kube_pod_container_resource_requests{%(kubeStateMetricsSelector)s, resource="cpu"}) by (%(clusterLabel)s)' % $._config, + 'sum(kube_pod_resource_limit{%(kubeSchedulerSelector)s, resource="cpu"} or kube_pod_container_resource_limits{%(kubeStateMetricsSelector)s, resource="cpu"}) by (%(clusterLabel)s)' % $._config, + 'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate) by (%(clusterLabel)s) / sum(kube_pod_resource_limit{%(kubeSchedulerSelector)s, resource="cpu"} or kube_pod_container_resource_limits{%(kubeStateMetricsSelector)s, resource="cpu"}) by (%(clusterLabel)s)' % $._config, ], tableStyles { 'Value #A': { alias: 'CPU Usage' }, 'Value #B': { alias: 'CPU Requests' }, @@ -90,10 +90,10 @@ local g = import 'github.com/grafana/jsonnet-libs/grafana-builder/grafana.libson g.tablePanel([ // Not using container_memory_usage_bytes here because that includes page cache 'sum(container_memory_rss{%(cadvisorSelector)s, container!=""}) by (%(clusterLabel)s)' % $._config, - 'sum(kube_pod_container_resource_requests{%(kubeStateMetricsSelector)s, resource="memory"}) by (%(clusterLabel)s)' % $._config, - 'sum(container_memory_rss{%(cadvisorSelector)s, container!=""}) by (%(clusterLabel)s) / sum(kube_pod_container_resource_requests{%(kubeStateMetricsSelector)s, resource="memory"}) by (%(clusterLabel)s)' % $._config, - 'sum(kube_pod_container_resource_limits{%(kubeStateMetricsSelector)s, resource="memory"}) by (%(clusterLabel)s)' % $._config, - 'sum(container_memory_rss{%(cadvisorSelector)s, container!=""}) by (%(clusterLabel)s) / sum(kube_pod_container_resource_limits{%(kubeStateMetricsSelector)s, resource="memory"}) by (%(clusterLabel)s)' % $._config, + 'sum(kube_pod_resource_request{%(kubeSchedulerSelector)s, resource="memory"} or kube_pod_container_resource_requests{%(kubeStateMetricsSelector)s, resource="memory"}) by (%(clusterLabel)s)' % $._config, + 'sum(container_memory_rss{%(cadvisorSelector)s, container!=""}) by (%(clusterLabel)s) / sum(kube_pod_resource_request{%(kubeSchedulerSelector)s, resource="memory"} or kube_pod_container_resource_requests{%(kubeStateMetricsSelector)s, resource="memory"}) by (%(clusterLabel)s)' % $._config, + 'sum(kube_pod_resource_limit{%(kubeSchedulerSelector)s, resource="memory"} or kube_pod_container_resource_limits{%(kubeStateMetricsSelector)s, resource="memory"}) by (%(clusterLabel)s)' % $._config, + 'sum(container_memory_rss{%(cadvisorSelector)s, container!=""}) by (%(clusterLabel)s) / sum(kube_pod_resource_limit{%(kubeSchedulerSelector)s, resource="memory"} or kube_pod_container_resource_limits{%(kubeStateMetricsSelector)s, resource="memory"}) by (%(clusterLabel)s)' % $._config, ], tableStyles { 'Value #A': { alias: 'Memory Usage', unit: 'bytes' }, 'Value #B': { alias: 'Memory Requests', unit: 'bytes' }, diff --git a/dashboards/resources/namespace.libsonnet b/dashboards/resources/namespace.libsonnet index c7a1fc4c9..c5e723eb3 100644 --- a/dashboards/resources/namespace.libsonnet +++ b/dashboards/resources/namespace.libsonnet @@ -143,19 +143,19 @@ local template = grafana.template; }) .addPanel( g.panel('CPU Utilisation (from requests)') + - g.statPanel('sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{%(clusterLabel)s="$cluster", namespace="$namespace"}) / sum(kube_pod_container_resource_requests{%(kubeStateMetricsSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", resource="cpu"})' % $._config) + g.statPanel('sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{%(clusterLabel)s="$cluster", namespace="$namespace"}) / sum(kube_pod_resource_request{%(kubeSchedulerSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", resource="cpu"} or kube_pod_container_resource_requests{%(kubeStateMetricsSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", resource="cpu"})' % $._config) ) .addPanel( g.panel('CPU Utilisation (from limits)') + - g.statPanel('sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{%(clusterLabel)s="$cluster", namespace="$namespace"}) / sum(kube_pod_container_resource_limits{%(kubeStateMetricsSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", resource="cpu"})' % $._config) + g.statPanel('sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{%(clusterLabel)s="$cluster", namespace="$namespace"}) / sum(kube_pod_resource_limit{%(kubeSchedulerSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", resource="cpu"} or kube_pod_container_resource_limits{%(kubeStateMetricsSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", resource="cpu"})' % $._config) ) .addPanel( g.panel('Memory Utilisation (from requests)') + - g.statPanel('sum(container_memory_working_set_bytes{%(cadvisorSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace",container!="", image!=""}) / sum(kube_pod_container_resource_requests{%(kubeStateMetricsSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", resource="memory"})' % $._config) + g.statPanel('sum(container_memory_working_set_bytes{%(cadvisorSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace",container!="", image!=""}) / sum(kube_pod_resource_request{%(kubeSchedulerSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", resource="memory"} or kube_pod_container_resource_requests{%(kubeStateMetricsSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", resource="memory"})' % $._config) ) .addPanel( g.panel('Memory Utilisation (from limits)') + - g.statPanel('sum(container_memory_working_set_bytes{%(cadvisorSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace",container!="", image!=""}) / sum(kube_pod_container_resource_limits{%(kubeStateMetricsSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", resource="memory"})' % $._config) + g.statPanel('sum(container_memory_working_set_bytes{%(cadvisorSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace",container!="", image!=""}) / sum(kube_pod_resource_limit{%(kubeSchedulerSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", resource="memory"} or kube_pod_container_resource_limits{%(kubeStateMetricsSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", resource="memory"})' % $._config) ) ) .addRow( @@ -201,10 +201,10 @@ local template = grafana.template; g.panel('CPU Quota') + g.tablePanel([ 'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{%(clusterLabel)s="$cluster", namespace="$namespace"}) by (pod)' % $._config, - 'sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_requests{%(clusterLabel)s="$cluster", namespace="$namespace"}) by (pod)' % $._config, - 'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{%(clusterLabel)s="$cluster", namespace="$namespace"}) by (pod) / sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_requests{%(clusterLabel)s="$cluster", namespace="$namespace"}) by (pod)' % $._config, - 'sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_limits{%(clusterLabel)s="$cluster", namespace="$namespace"}) by (pod)' % $._config, - 'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{%(clusterLabel)s="$cluster", namespace="$namespace"}) by (pod) / sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_limits{%(clusterLabel)s="$cluster", namespace="$namespace"}) by (pod)' % $._config, + 'sum(cluster:namespace:pod_cpu:active:kube_pod_resource_request_or_kube_pod_container_resource_requests{%(clusterLabel)s="$cluster", namespace="$namespace"}) by (pod)' % $._config, + 'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{%(clusterLabel)s="$cluster", namespace="$namespace"}) by (pod) / sum(cluster:namespace:pod_cpu:active:kube_pod_resource_request_or_kube_pod_container_resource_requests{%(clusterLabel)s="$cluster", namespace="$namespace"}) by (pod)' % $._config, + 'sum(cluster:namespace:pod_cpu:active:kube_pod_resource_limit_or_kube_pod_container_resource_limits{%(clusterLabel)s="$cluster", namespace="$namespace"}) by (pod)' % $._config, + 'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{%(clusterLabel)s="$cluster", namespace="$namespace"}) by (pod) / sum(cluster:namespace:pod_cpu:active:kube_pod_resource_limit_or_kube_pod_container_resource_limits{%(clusterLabel)s="$cluster", namespace="$namespace"}) by (pod)' % $._config, ], tableStyles { 'Value #A': { alias: 'CPU Usage' }, 'Value #B': { alias: 'CPU Requests' }, @@ -260,10 +260,10 @@ local template = grafana.template; g.panel('Memory Quota') + g.tablePanel([ 'sum(container_memory_working_set_bytes{%(cadvisorSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace",container!="", image!=""}) by (pod)' % $._config, - 'sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_requests{%(clusterLabel)s="$cluster", namespace="$namespace"}) by (pod)' % $._config, - 'sum(container_memory_working_set_bytes{%(cadvisorSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace",container!="", image!=""}) by (pod) / sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_requests{%(clusterLabel)s="$cluster", namespace="$namespace"}) by (pod)' % $._config, - 'sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_limits{%(clusterLabel)s="$cluster", namespace="$namespace"}) by (pod)' % $._config, - 'sum(container_memory_working_set_bytes{%(cadvisorSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace",container!="", image!=""}) by (pod) / sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_limits{%(clusterLabel)s="$cluster", namespace="$namespace"}) by (pod)' % $._config, + 'sum(cluster:namespace:pod_memory:active:kube_pod_resource_request_or_kube_pod_container_resource_requests{%(clusterLabel)s="$cluster", namespace="$namespace"}) by (pod)' % $._config, + 'sum(container_memory_working_set_bytes{%(cadvisorSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace",container!="", image!=""}) by (pod) / sum(cluster:namespace:pod_memory:active:kube_pod_resource_request_or_kube_pod_container_resource_requests{%(clusterLabel)s="$cluster", namespace="$namespace"}) by (pod)' % $._config, + 'sum(cluster:namespace:pod_memory:active:kube_pod_resource_limit_or_kube_pod_container_resource_limits{%(clusterLabel)s="$cluster", namespace="$namespace"}) by (pod)' % $._config, + 'sum(container_memory_working_set_bytes{%(cadvisorSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace",container!="", image!=""}) by (pod) / sum(cluster:namespace:pod_memory:active:kube_pod_resource_limit_or_kube_pod_container_resource_limits{%(clusterLabel)s="$cluster", namespace="$namespace"}) by (pod)' % $._config, 'sum(container_memory_rss{%(cadvisorSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace",container!=""}) by (pod)' % $._config, 'sum(container_memory_cache{%(cadvisorSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace",container!=""}) by (pod)' % $._config, 'sum(container_memory_swap{%(cadvisorSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace",container!=""}) by (pod)' % $._config, diff --git a/dashboards/resources/node.libsonnet b/dashboards/resources/node.libsonnet index 55ac10b5d..33bdad2c1 100644 --- a/dashboards/resources/node.libsonnet +++ b/dashboards/resources/node.libsonnet @@ -77,10 +77,10 @@ local template = grafana.template; g.panel('CPU Quota') + g.tablePanel([ 'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{%(clusterLabel)s="$cluster", node=~"$node"}) by (pod)' % $._config, - 'sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_requests{%(clusterLabel)s="$cluster", node=~"$node"}) by (pod)' % $._config, - 'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{%(clusterLabel)s="$cluster", node=~"$node"}) by (pod) / sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_requests{%(clusterLabel)s="$cluster", node=~"$node"}) by (pod)' % $._config, - 'sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_limits{%(clusterLabel)s="$cluster", node=~"$node"}) by (pod)' % $._config, - 'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{%(clusterLabel)s="$cluster", node=~"$node"}) by (pod) / sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_limits{%(clusterLabel)s="$cluster", node=~"$node"}) by (pod)' % $._config, + 'sum(cluster:namespace:pod_cpu:active:kube_pod_resource_request_or_kube_pod_container_resource_requests{%(clusterLabel)s="$cluster", node=~"$node"}) by (pod)' % $._config, + 'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{%(clusterLabel)s="$cluster", node=~"$node"}) by (pod) / sum(cluster:namespace:pod_cpu:active:kube_pod_resource_request_or_kube_pod_container_resource_requests{%(clusterLabel)s="$cluster", node=~"$node"}) by (pod)' % $._config, + 'sum(cluster:namespace:pod_cpu:active:kube_pod_resource_limit_or_kube_pod_container_resource_limits{%(clusterLabel)s="$cluster", node=~"$node"}) by (pod)' % $._config, + 'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{%(clusterLabel)s="$cluster", node=~"$node"}) by (pod) / sum(cluster:namespace:pod_cpu:active:kube_pod_resource_limit_or_kube_pod_container_resource_limits{%(clusterLabel)s="$cluster", node=~"$node"}) by (pod)' % $._config, ], tableStyles { 'Value #A': { alias: 'CPU Usage' }, 'Value #B': { alias: 'CPU Requests' }, @@ -127,10 +127,10 @@ local template = grafana.template; g.panel('Memory Quota') + g.tablePanel([ 'sum(node_namespace_pod_container:container_memory_working_set_bytes{%(clusterLabel)s="$cluster", node=~"$node",container!=""}) by (pod)' % $._config, - 'sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_requests{%(clusterLabel)s="$cluster", node=~"$node"}) by (pod)' % $._config, - 'sum(node_namespace_pod_container:container_memory_working_set_bytes{%(clusterLabel)s="$cluster", node=~"$node",container!=""}) by (pod) / sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_requests{%(clusterLabel)s="$cluster", node=~"$node"}) by (pod)' % $._config, - 'sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_limits{%(clusterLabel)s="$cluster", node=~"$node"}) by (pod)' % $._config, - 'sum(node_namespace_pod_container:container_memory_working_set_bytes{%(clusterLabel)s="$cluster", node=~"$node",container!=""}) by (pod) / sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_limits{%(clusterLabel)s="$cluster", node=~"$node"}) by (pod)' % $._config, + 'sum(cluster:namespace:pod_memory:active:kube_pod_resource_request_or_kube_pod_container_resource_requests{%(clusterLabel)s="$cluster", node=~"$node"}) by (pod)' % $._config, + 'sum(node_namespace_pod_container:container_memory_working_set_bytes{%(clusterLabel)s="$cluster", node=~"$node",container!=""}) by (pod) / sum(cluster:namespace:pod_memory:active:kube_pod_resource_request_or_kube_pod_container_resource_requests{%(clusterLabel)s="$cluster", node=~"$node"}) by (pod)' % $._config, + 'sum(cluster:namespace:pod_memory:active:kube_pod_resource_limit_or_kube_pod_container_resource_limits{%(clusterLabel)s="$cluster", node=~"$node"}) by (pod)' % $._config, + 'sum(node_namespace_pod_container:container_memory_working_set_bytes{%(clusterLabel)s="$cluster", node=~"$node",container!=""}) by (pod) / sum(cluster:namespace:pod_memory:active:kube_pod_resource_limit_or_kube_pod_container_resource_limits{%(clusterLabel)s="$cluster", node=~"$node"}) by (pod)' % $._config, 'sum(node_namespace_pod_container:container_memory_rss{%(clusterLabel)s="$cluster", node=~"$node",container!=""}) by (pod)' % $._config, 'sum(node_namespace_pod_container:container_memory_cache{%(clusterLabel)s="$cluster", node=~"$node",container!=""}) by (pod)' % $._config, 'sum(node_namespace_pod_container:container_memory_swap{%(clusterLabel)s="$cluster", node=~"$node",container!=""}) by (pod)' % $._config, diff --git a/dashboards/resources/pod.libsonnet b/dashboards/resources/pod.libsonnet index 1a83bf24f..3408d8960 100644 --- a/dashboards/resources/pod.libsonnet +++ b/dashboards/resources/pod.libsonnet @@ -50,7 +50,7 @@ local template = grafana.template; local cpuRequestsQuery = ||| sum( - kube_pod_container_resource_requests{%(kubeStateMetricsSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod", resource="cpu"} + kube_pod_resource_request{%(kubeSchedulerSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod", resource="cpu"} or kube_pod_container_resource_requests{%(kubeStateMetricsSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod", resource="cpu"} ) ||| % $._config; @@ -176,16 +176,8 @@ local template = grafana.template; g.panel('CPU Quota') + g.tablePanel([ 'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{%(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod"}) by (container)' % $._config, - 'sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_requests{%(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod"}) by (container)' % $._config, - 'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{%(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod"}) by (container) / sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_requests{%(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod"}) by (container)' % $._config, - 'sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_limits{%(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod"}) by (container)' % $._config, - 'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{%(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod"}) by (container) / sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_limits{%(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod"}) by (container)' % $._config, ], tableStyles { 'Value #A': { alias: 'CPU Usage' }, - 'Value #B': { alias: 'CPU Requests' }, - 'Value #C': { alias: 'CPU Requests %', unit: 'percentunit' }, - 'Value #D': { alias: 'CPU Limits' }, - 'Value #E': { alias: 'CPU Limits %', unit: 'percentunit' }, }) ) ) @@ -236,19 +228,11 @@ local template = grafana.template; g.panel('Memory Quota') + g.tablePanel([ 'sum(container_memory_working_set_bytes{%(cadvisorSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod", container!="", image!=""}) by (container)' % $._config, - 'sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_requests{%(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod"}) by (container)' % $._config, - 'sum(container_memory_working_set_bytes{%(cadvisorSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod", image!=""}) by (container) / sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_requests{%(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod"}) by (container)' % $._config, - 'sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_limits{%(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod"}) by (container)' % $._config, - 'sum(container_memory_working_set_bytes{%(cadvisorSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod", container!="", image!=""}) by (container) / sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_limits{%(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod"}) by (container)' % $._config, 'sum(container_memory_rss{%(cadvisorSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod", container != "", container != "POD"}) by (container)' % $._config, 'sum(container_memory_cache{%(cadvisorSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod", container != "", container != "POD"}) by (container)' % $._config, 'sum(container_memory_swap{%(cadvisorSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod", container != "", container != "POD"}) by (container)' % $._config, ], tableStyles { 'Value #A': { alias: 'Memory Usage (WSS)', unit: 'bytes' }, - 'Value #B': { alias: 'Memory Requests', unit: 'bytes' }, - 'Value #C': { alias: 'Memory Requests %', unit: 'percentunit' }, - 'Value #D': { alias: 'Memory Limits', unit: 'bytes' }, - 'Value #E': { alias: 'Memory Limits %', unit: 'percentunit' }, 'Value #F': { alias: 'Memory Usage (RSS)', unit: 'bytes' }, 'Value #G': { alias: 'Memory Usage (Cache)', unit: 'bytes' }, 'Value #H': { alias: 'Memory Usage (Swap)', unit: 'bytes' }, diff --git a/dashboards/resources/workload-namespace.libsonnet b/dashboards/resources/workload-namespace.libsonnet index d8b062c89..f741c0080 100644 --- a/dashboards/resources/workload-namespace.libsonnet +++ b/dashboards/resources/workload-namespace.libsonnet @@ -137,7 +137,7 @@ local template = grafana.template; local cpuRequestsQuery = ||| sum( - kube_pod_container_resource_requests{%(kubeStateMetricsSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", resource="cpu"} + (kube_pod_resource_request{%(kubeSchedulerSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", resource="cpu"} or kube_pod_container_resource_requests{%(kubeStateMetricsSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", resource="cpu"}) * on(namespace,pod) group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster", namespace="$namespace", workload_type=~"$type"} ) by (workload, workload_type) diff --git a/dashboards/resources/workload.libsonnet b/dashboards/resources/workload.libsonnet index a2581cc3d..25e722fa0 100644 --- a/dashboards/resources/workload.libsonnet +++ b/dashboards/resources/workload.libsonnet @@ -135,7 +135,7 @@ local template = grafana.template; local cpuRequestsQuery = ||| sum( - kube_pod_container_resource_requests{%(kubeStateMetricsSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", resource="cpu"} + (kube_pod_resource_request{%(kubeSchedulerSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", resource="cpu"} or kube_pod_container_resource_requests{%(kubeStateMetricsSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", resource="cpu"}) * on(namespace,pod) group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster", namespace="$namespace", workload="$workload", workload_type=~"$type"} ) by (pod) diff --git a/rules/apps.libsonnet b/rules/apps.libsonnet index e24914a37..832860749 100644 --- a/rules/apps.libsonnet +++ b/rules/apps.libsonnet @@ -84,92 +84,110 @@ name: 'k8s.rules.container_resource', rules: [ { - record: 'cluster:namespace:pod_memory:active:kube_pod_container_resource_requests', + record: 'cluster:namespace:pod_memory:active:kube_pod_resource_request_or_kube_pod_container_resource_requests', expr: ||| - kube_pod_container_resource_requests{resource="memory",%(kubeStateMetricsSelector)s} * on (namespace, pod, %(clusterLabel)s) + kube_pod_resource_request{resource="memory",%(kubeSchedulerSelector)s} or (kube_pod_container_resource_requests{resource="memory",%(kubeStateMetricsSelector)s} * on (namespace, pod, %(clusterLabel)s) group_left() max by (namespace, pod, %(clusterLabel)s) ( (kube_pod_status_phase{phase=~"Pending|Running"} == 1) - ) + )) ||| % $._config, }, { - record: 'namespace_memory:kube_pod_container_resource_requests:sum', + record: 'namespace_memory:kube_pod_resource_request_or_kube_pod_container_resource_requests:sum', expr: ||| sum by (namespace, %(clusterLabel)s) ( sum by (namespace, pod, %(clusterLabel)s) ( max by (namespace, pod, container, %(clusterLabel)s) ( - kube_pod_container_resource_requests{resource="memory",%(kubeStateMetricsSelector)s} - ) * on(namespace, pod, %(clusterLabel)s) group_left() max by (namespace, pod, %(clusterLabel)s) ( - kube_pod_status_phase{phase=~"Pending|Running"} == 1 + kube_pod_resource_request{resource="memory",%(kubeSchedulerSelector)s} + ) or + max by (namespace, pod, container, %(clusterLabel)s) ( + kube_pod_container_resource_requests{resource="memory",%(kubeStateMetricsSelector)s} + ) * on (namespace, pod, %(clusterLabel)s) group_left() max by (namespace, pod, %(clusterLabel)s) ( + (kube_pod_status_phase{phase=~"Pending|Running"} == 1) ) ) ) ||| % $._config, }, { - record: 'cluster:namespace:pod_cpu:active:kube_pod_container_resource_requests', + record: 'cluster:namespace:pod_cpu:active:kube_pod_resource_request_or_kube_pod_container_resource_requests', expr: ||| - kube_pod_container_resource_requests{resource="cpu",%(kubeStateMetricsSelector)s} * on (namespace, pod, %(clusterLabel)s) - group_left() max by (namespace, pod, %(clusterLabel)s) ( - (kube_pod_status_phase{phase=~"Pending|Running"} == 1) + kube_pod_resource_request{resource="cpu",%(kubeSchedulerSelector)s} or ( + kube_pod_container_resource_requests{resource="cpu",%(kubeStateMetricsSelector)s} * on (namespace, pod, %(clusterLabel)s) + group_left() max by (namespace, pod, %(clusterLabel)s) ( + (kube_pod_status_phase{phase=~"Pending|Running"} == 1) + ) ) ||| % $._config, }, { - record: 'namespace_cpu:kube_pod_container_resource_requests:sum', + record: 'namespace_cpu:kube_pod_resource_request_or_kube_pod_container_resource_requests:sum', expr: ||| sum by (namespace, %(clusterLabel)s) ( sum by (namespace, pod, %(clusterLabel)s) ( max by (namespace, pod, container, %(clusterLabel)s) ( - kube_pod_container_resource_requests{resource="cpu",%(kubeStateMetricsSelector)s} - ) * on(namespace, pod, %(clusterLabel)s) group_left() max by (namespace, pod, %(clusterLabel)s) ( - kube_pod_status_phase{phase=~"Pending|Running"} == 1 + kube_pod_resource_request{resource="cpu",%(kubeSchedulerSelector)s} + ) or + max by (namespace, pod, container, %(clusterLabel)s) ( + kube_pod_container_resource_requests{resource="cpu",%(kubeStateMetricsSelector)s} + ) * on (namespace, pod, %(clusterLabel)s) group_left() max by (namespace, pod, %(clusterLabel)s) ( + (kube_pod_status_phase{phase=~"Pending|Running"} == 1) ) ) ) ||| % $._config, }, { - record: 'cluster:namespace:pod_memory:active:kube_pod_container_resource_limits', + record: 'cluster:namespace:pod_memory:active:kube_pod_resource_limit_or_kube_pod_container_resource_limits', expr: ||| - kube_pod_container_resource_limits{resource="memory",%(kubeStateMetricsSelector)s} * on (namespace, pod, %(clusterLabel)s) - group_left() max by (namespace, pod, %(clusterLabel)s) ( - (kube_pod_status_phase{phase=~"Pending|Running"} == 1) + kube_pod_resource_limit{resource="memory",%(kubeSchedulerSelector)s} or ( + kube_pod_container_resource_limits{resource="memory",%(kubeStateMetricsSelector)s} * on (namespace, pod, %(clusterLabel)s) + group_left() max by (namespace, pod, %(clusterLabel)s) ( + (kube_pod_status_phase{phase=~"Pending|Running"} == 1) + ) ) ||| % $._config, }, { - record: 'namespace_memory:kube_pod_container_resource_limits:sum', + record: 'namespace_memory:kube_pod_resource_limit_or_kube_pod_container_resource_limits:sum', expr: ||| sum by (namespace, %(clusterLabel)s) ( sum by (namespace, pod, %(clusterLabel)s) ( max by (namespace, pod, container, %(clusterLabel)s) ( - kube_pod_container_resource_limits{resource="memory",%(kubeStateMetricsSelector)s} - ) * on(namespace, pod, %(clusterLabel)s) group_left() max by (namespace, pod, %(clusterLabel)s) ( - kube_pod_status_phase{phase=~"Pending|Running"} == 1 + kube_pod_resource_limit{resource="memory",%(kubeSchedulerSelector)s} + ) or + max by (namespace, pod, container, %(clusterLabel)s) ( + kube_pod_container_resource_limits{resource="memory",%(kubeStateMetricsSelector)s} + ) * on (namespace, pod, %(clusterLabel)s) group_left() max by (namespace, pod, %(clusterLabel)s) ( + (kube_pod_status_phase{phase=~"Pending|Running"} == 1) ) ) ) ||| % $._config, }, { - record: 'cluster:namespace:pod_cpu:active:kube_pod_container_resource_limits', + record: 'cluster:namespace:pod_cpu:active:kube_pod_resource_limit_or_kube_pod_container_resource_limits', expr: ||| - kube_pod_container_resource_limits{resource="cpu",%(kubeStateMetricsSelector)s} * on (namespace, pod, %(clusterLabel)s) - group_left() max by (namespace, pod, %(clusterLabel)s) ( - (kube_pod_status_phase{phase=~"Pending|Running"} == 1) - ) + kube_pod_resource_limit{resource="cpu",%(kubeSchedulerSelector)s} or ( + kube_pod_container_resource_limits{resource="cpu",%(kubeStateMetricsSelector)s} * on (namespace, pod, %(clusterLabel)s) + group_left() max by (namespace, pod, %(clusterLabel)s) ( + (kube_pod_status_phase{phase=~"Pending|Running"} == 1) + ) + ) ||| % $._config, }, { - record: 'namespace_cpu:kube_pod_container_resource_limits:sum', + record: 'namespace_cpu:kube_pod_resource_limit_or_kube_pod_container_resource_limits:sum', expr: ||| sum by (namespace, %(clusterLabel)s) ( sum by (namespace, pod, %(clusterLabel)s) ( max by (namespace, pod, container, %(clusterLabel)s) ( - kube_pod_container_resource_limits{resource="cpu",%(kubeStateMetricsSelector)s} - ) * on(namespace, pod, %(clusterLabel)s) group_left() max by (namespace, pod, %(clusterLabel)s) ( - kube_pod_status_phase{phase=~"Pending|Running"} == 1 + kube_pod_resource_limit{resource="cpu",%(kubeSchedulerSelector)s} + ) or + max by (namespace, pod, container, %(clusterLabel)s) ( + kube_pod_container_resource_limits{resource="cpu",%(kubeStateMetricsSelector)s} + ) * on (namespace, pod, %(clusterLabel)s) group_left() max by (namespace, pod, %(clusterLabel)s) ( + (kube_pod_status_phase{phase=~"Pending|Running"} == 1) ) ) ) diff --git a/rules/windows.libsonnet b/rules/windows.libsonnet index 3e1d4ba8a..d86d29167 100644 --- a/rules/windows.libsonnet +++ b/rules/windows.libsonnet @@ -217,28 +217,28 @@ record: 'kube_pod_windows_container_resource_memory_request', expr: ||| max by (%(clusterLabel)s, namespace, pod, container) ( - kube_pod_container_resource_requests{resource="memory",%(kubeStateMetricsSelector)s} + kube_pod_resource_request{resource="memory",%(kubeSchedulerSelector)s} or kube_pod_container_resource_requests{resource="memory",%(kubeStateMetricsSelector)s} ) * on(container,pod,namespace,%(clusterLabel)s) (windows_pod_container_available) ||| % $._config, }, { record: 'kube_pod_windows_container_resource_memory_limit', expr: ||| - kube_pod_container_resource_limits{resource="memory",%(kubeStateMetricsSelector)s} * on(container,pod,namespace,%(clusterLabel)s) (windows_pod_container_available) + (kube_pod_resource_limit{resource="memory",%(kubeSchedulerSelector)s} or kube_pod_container_resource_limits{resource="memory",%(kubeStateMetricsSelector)s}) * on(container,pod,namespace,%(clusterLabel)s) (windows_pod_container_available) ||| % $._config, }, { record: 'kube_pod_windows_container_resource_cpu_cores_request', expr: ||| max by (%(clusterLabel)s, namespace, pod, container) ( - kube_pod_container_resource_requests{resource="cpu",%(kubeStateMetricsSelector)s} + kube_pod_resource_request{resource="cpu",%(kubeSchedulerSelector)s} or kube_pod_container_resource_requests{resource="cpu",%(kubeStateMetricsSelector)s} ) * on(container,pod,namespace,%(clusterLabel)s) (windows_pod_container_available) ||| % $._config, }, { record: 'kube_pod_windows_container_resource_cpu_cores_limit', expr: ||| - kube_pod_container_resource_limits{resource="cpu",%(kubeStateMetricsSelector)s} * on(container,pod,namespace,%(clusterLabel)s) (windows_pod_container_available) + (kube_pod_resource_limit{resource="cpu",%(kubeSchedulerSelector)s} or kube_pod_container_resource_limits{resource="cpu",%(kubeStateMetricsSelector)s}) * on(container,pod,namespace,%(clusterLabel)s) (windows_pod_container_available) ||| % $._config, }, { diff --git a/tests.yaml b/tests.yaml index f1893b63c..217658790 100644 --- a/tests.yaml +++ b/tests.yaml @@ -436,17 +436,17 @@ tests: - interval: 1m input_series: - - series: 'kube_pod_container_resource_requests{resource="cpu",container="kube-apiserver-67",endpoint="https-main",instance="ksm-1",job="kube-state-metrics",cluster="kubernetes",namespace="kube-apiserver",node="node-1",pod="pod-1",service="ksm"}' + - series: 'kube_pod_resource_request{resource="cpu",endpoint="https-main",instance="ksm-1",job="kube-state-metrics",cluster="kubernetes",namespace="kube-apiserver",node="node-1",pod="pod-1",service="ksm"}' values: '0.15+0x10' - - series: 'kube_pod_container_resource_requests{resource="cpu",container="kube-apiserver-67",endpoint="https-main",instance="ksm-1",job="kube-state-metrics",cluster="kubernetes",namespace="kube-apiserver",node="node-1",pod="pod-2",service="ksm"}' + - series: 'kube_pod_resource_request{resource="cpu",endpoint="https-main",instance="ksm-1",job="kube-state-metrics",cluster="kubernetes",namespace="kube-apiserver",node="node-1",pod="pod-2",service="ksm"}' values: '0.15+0x10' - - series: 'kube_pod_container_resource_requests{resource="cpu",container="kube-apiserver-67",endpoint="https-main",instance="ksm-2",job="kube-state-metrics",cluster="kubernetes",namespace="kube-apiserver",node="node-1",pod="pod-1",service="ksm"}' + - series: 'kube_pod_resource_request{resource="cpu",endpoint="https-main",instance="ksm-2",job="kube-state-metrics",cluster="kubernetes",namespace="kube-apiserver",node="node-1",pod="pod-1",service="ksm"}' values: '0.1+0x10' - - series: 'kube_pod_container_resource_requests{resource="memory",container="kube-apiserver-67",endpoint="https-main",instance="ksm-1",job="kube-state-metrics",cluster="kubernetes",namespace="kube-apiserver",node="node-1",pod="pod-1",service="ksm"}' + - series: 'kube_pod_resource_request{resource="memory",endpoint="https-main",instance="ksm-1",job="kube-state-metrics",cluster="kubernetes",namespace="kube-apiserver",node="node-1",pod="pod-1",service="ksm"}' values: '1E9+0x10' - - series: 'kube_pod_container_resource_requests{resource="memory",container="kube-apiserver-67",endpoint="https-main",instance="ksm-1",job="kube-state-metrics",cluster="kubernetes",namespace="kube-apiserver",node="node-1",pod="pod-2",service="ksm"}' + - series: 'kube_pod_resource_request{resource="memory",endpoint="https-main",instance="ksm-1",job="kube-state-metrics",cluster="kubernetes",namespace="kube-apiserver",node="node-1",pod="pod-2",service="ksm"}' values: '1E9+0x10' - - series: 'kube_pod_container_resource_requests{resource="memory",container="kube-apiserver-67",endpoint="https-main",instance="ksm-2",job="kube-state-metrics",cluster="kubernetes",namespace="kube-apiserver",node="node-1",pod="pod-1",service="ksm"}' + - series: 'kube_pod_resource_request{resource="memory",endpoint="https-main",instance="ksm-2",job="kube-state-metrics",cluster="kubernetes",namespace="kube-apiserver",node="node-1",pod="pod-1",service="ksm"}' values: '0.5E9+0x10' # Duplicate kube_pod_status_phase timeseries for the same pod. - series: 'kube_pod_status_phase{endpoint="https-main",instance="ksm-1",job="kube-state-metrics",cluster="kubernetes",namespace="kube-apiserver",phase="Running",pod="pod-1",service="ksm"}' @@ -459,39 +459,39 @@ tests: values: '1+0x10' promql_expr_test: - eval_time: 0m - expr: namespace_cpu:kube_pod_container_resource_requests:sum + expr: namespace_cpu:kube_pod_resource_request:sum exp_samples: - value: 0.15 - labels: 'namespace_cpu:kube_pod_container_resource_requests:sum{cluster="kubernetes",namespace="kube-apiserver"}' + labels: 'namespace_cpu:kube_pod_resource_request:sum{cluster="kubernetes",namespace="kube-apiserver"}' - eval_time: 0m - expr: namespace_memory:kube_pod_container_resource_requests:sum + expr: namespace_memory:kube_pod_resource_request:sum exp_samples: - value: 1.0e+9 - labels: 'namespace_memory:kube_pod_container_resource_requests:sum{cluster="kubernetes",namespace="kube-apiserver"}' + labels: 'namespace_memory:kube_pod_resource_request:sum{cluster="kubernetes",namespace="kube-apiserver"}' - eval_time: 1m - expr: namespace_cpu:kube_pod_container_resource_requests:sum + expr: namespace_cpu:kube_pod_resource_request:sum exp_samples: - value: 0.15 - labels: 'namespace_cpu:kube_pod_container_resource_requests:sum{cluster="kubernetes",namespace="kube-apiserver"}' + labels: 'namespace_cpu:kube_pod_resource_request:sum{cluster="kubernetes",namespace="kube-apiserver"}' - eval_time: 1m - expr: namespace_memory:kube_pod_container_resource_requests:sum + expr: namespace_memory:kube_pod_resource_request:sum exp_samples: - value: 1.0e+9 - labels: 'namespace_memory:kube_pod_container_resource_requests:sum{cluster="kubernetes",namespace="kube-apiserver"}' + labels: 'namespace_memory:kube_pod_resource_request:sum{cluster="kubernetes",namespace="kube-apiserver"}' - interval: 1m input_series: - - series: 'kube_pod_container_resource_requests{resource="cpu",container="kube-apiserver-67",endpoint="https-main",instance="ksm-1",job="kube-state-metrics",cluster="kubernetes",namespace="kube-apiserver",node="node-1",pod="pod-1",service="ksm"}' + - series: 'kube_pod_resource_request{resource="cpu",endpoint="https-main",instance="ksm-1",job="kube-state-metrics",cluster="kubernetes",namespace="kube-apiserver",node="node-1",pod="pod-1",service="ksm"}' values: '0.15+0x10' - - series: 'kube_pod_container_resource_requests{resource="cpu",container="kube-apiserver-67",endpoint="https-main",instance="ksm-1",job="kube-state-metrics",cluster="kubernetes",namespace="kube-apiserver",node="node-1",pod="pod-2",service="ksm"}' + - series: 'kube_pod_resource_request{resource="cpu",endpoint="https-main",instance="ksm-1",job="kube-state-metrics",cluster="kubernetes",namespace="kube-apiserver",node="node-1",pod="pod-2",service="ksm"}' values: '0.15+0x10' - - series: 'kube_pod_container_resource_requests{resource="cpu",container="kube-apiserver-67",endpoint="https-main",instance="ksm-2",job="kube-state-metrics",cluster="kubernetes",namespace="kube-apiserver",node="node-1",pod="pod-1",service="ksm"}' + - series: 'kube_pod_resource_request{resource="cpu",endpoint="https-main",instance="ksm-2",job="kube-state-metrics",cluster="kubernetes",namespace="kube-apiserver",node="node-1",pod="pod-1",service="ksm"}' values: '0.1+0x10' - - series: 'kube_pod_container_resource_requests{resource="memory",container="kube-apiserver-67",endpoint="https-main",instance="ksm-1",job="kube-state-metrics",cluster="kubernetes",namespace="kube-apiserver",node="node-1",pod="pod-1",service="ksm"}' + - series: 'kube_pod_resource_request{resource="memory",endpoint="https-main",instance="ksm-1",job="kube-state-metrics",cluster="kubernetes",namespace="kube-apiserver",node="node-1",pod="pod-1",service="ksm"}' values: '1E9+0x10' - - series: 'kube_pod_container_resource_requests{resource="memory",container="kube-apiserver-67",endpoint="https-main",instance="ksm-1",job="kube-state-metrics",cluster="kubernetes",namespace="kube-apiserver",node="node-1",pod="pod-2",service="ksm"}' + - series: 'kube_pod_resource_request{resource="memory",endpoint="https-main",instance="ksm-1",job="kube-state-metrics",cluster="kubernetes",namespace="kube-apiserver",node="node-1",pod="pod-2",service="ksm"}' values: '1E9+0x10' - - series: 'kube_pod_container_resource_requests{resource="memory",container="kube-apiserver-67",endpoint="https-main",instance="ksm-2",job="kube-state-metrics",cluster="kubernetes",namespace="kube-apiserver",node="node-1",pod="pod-1",service="ksm"}' + - series: 'kube_pod_resource_request{resource="memory",endpoint="https-main",instance="ksm-2",job="kube-state-metrics",cluster="kubernetes",namespace="kube-apiserver",node="node-1",pod="pod-1",service="ksm"}' values: '0.5E9+0x10' # Duplicate kube_pod_status_phase timeseries for the same pod. - series: 'kube_pod_status_phase{endpoint="https-main",instance="ksm-1",job="kube-state-metrics",cluster="kubernetes",namespace="kube-apiserver",phase="Running",pod="pod-1",service="ksm"}' @@ -504,25 +504,25 @@ tests: values: '1+0x10' promql_expr_test: - eval_time: 0m - expr: namespace_cpu:kube_pod_container_resource_requests:sum + expr: namespace_cpu:kube_pod_resource_request:sum exp_samples: - value: 0.15 - labels: 'namespace_cpu:kube_pod_container_resource_requests:sum{cluster="kubernetes",namespace="kube-apiserver"}' + labels: 'namespace_cpu:kube_pod_resource_request:sum{cluster="kubernetes",namespace="kube-apiserver"}' - eval_time: 0m - expr: namespace_memory:kube_pod_container_resource_requests:sum + expr: namespace_memory:kube_pod_resource_request:sum exp_samples: - value: 1.0e+9 - labels: 'namespace_memory:kube_pod_container_resource_requests:sum{cluster="kubernetes",namespace="kube-apiserver"}' + labels: 'namespace_memory:kube_pod_resource_request:sum{cluster="kubernetes",namespace="kube-apiserver"}' - eval_time: 1m - expr: namespace_cpu:kube_pod_container_resource_requests:sum + expr: namespace_cpu:kube_pod_resource_request:sum exp_samples: - value: 0.15 - labels: 'namespace_cpu:kube_pod_container_resource_requests:sum{cluster="kubernetes",namespace="kube-apiserver"}' + labels: 'namespace_cpu:kube_pod_resource_request:sum{cluster="kubernetes",namespace="kube-apiserver"}' - eval_time: 1m - expr: namespace_memory:kube_pod_container_resource_requests:sum + expr: namespace_memory:kube_pod_resource_request:sum exp_samples: - value: 1.0e+9 - labels: 'namespace_memory:kube_pod_container_resource_requests:sum{cluster="kubernetes",namespace="kube-apiserver"}' + labels: 'namespace_memory:kube_pod_resource_request:sum{cluster="kubernetes",namespace="kube-apiserver"}' - interval: 1m input_series: