diff --git a/alerts/resource_alerts.libsonnet b/alerts/resource_alerts.libsonnet index 7ac86cfc9..6faae5299 100644 --- a/alerts/resource_alerts.libsonnet +++ b/alerts/resource_alerts.libsonnet @@ -34,7 +34,7 @@ } + if $._config.showMultiCluster then { expr: ||| - sum(namespace_cpu:kube_pod_resource_request:sum{%(ignoringOverprovisionedWorkloadSelector)s}) by (%(clusterLabel)s) - (sum(kube_node_status_allocatable{resource="cpu"}) by (%(clusterLabel)s) - max(kube_node_status_allocatable{resource="cpu"}) by (%(clusterLabel)s)) > 0 + sum(namespace_cpu:kube_pod_resource_request_or_kube_pod_container_resource_requests:sum{%(ignoringOverprovisionedWorkloadSelector)s}) by (%(clusterLabel)s) - (sum(kube_node_status_allocatable{resource="cpu"}) by (%(clusterLabel)s) - max(kube_node_status_allocatable{resource="cpu"}) by (%(clusterLabel)s)) > 0 and (sum(kube_node_status_allocatable{resource="cpu"}) by (%(clusterLabel)s) - max(kube_node_status_allocatable{resource="cpu"}) by (%(clusterLabel)s)) > 0 ||| % $._config, @@ -43,7 +43,7 @@ }, } else { expr: ||| - sum(namespace_cpu:kube_pod_resource_request:sum{%(ignoringOverprovisionedWorkloadSelector)s}) - (sum(kube_node_status_allocatable{resource="cpu"}) - max(kube_node_status_allocatable{resource="cpu"})) > 0 + sum(namespace_cpu:kube_pod_resource_request_or_kube_pod_container_resource_requests:sum{%(ignoringOverprovisionedWorkloadSelector)s}) - (sum(kube_node_status_allocatable{resource="cpu"}) - max(kube_node_status_allocatable{resource="cpu"})) > 0 and (sum(kube_node_status_allocatable{resource="cpu"}) - max(kube_node_status_allocatable{resource="cpu"})) > 0 ||| % $._config, @@ -63,7 +63,7 @@ } + if $._config.showMultiCluster then { expr: ||| - sum(namespace_memory:kube_pod_resource_request:sum{%(ignoringOverprovisionedWorkloadSelector)s}) by (%(clusterLabel)s) - (sum(kube_node_status_allocatable{resource="memory"}) by (%(clusterLabel)s) - max(kube_node_status_allocatable{resource="memory"}) by (%(clusterLabel)s)) > 0 + sum(namespace_memory:kube_pod_resource_request_or_kube_pod_container_resource_requests:sum{%(ignoringOverprovisionedWorkloadSelector)s}) by (%(clusterLabel)s) - (sum(kube_node_status_allocatable{resource="memory"}) by (%(clusterLabel)s) - max(kube_node_status_allocatable{resource="memory"}) by (%(clusterLabel)s)) > 0 and (sum(kube_node_status_allocatable{resource="memory"}) by (%(clusterLabel)s) - max(kube_node_status_allocatable{resource="memory"}) by (%(clusterLabel)s)) > 0 ||| % $._config, @@ -73,7 +73,7 @@ } else { expr: ||| - sum(namespace_memory:kube_pod_resource_request:sum{%(ignoringOverprovisionedWorkloadSelector)s}) - (sum(kube_node_status_allocatable{resource="memory"}) - max(kube_node_status_allocatable{resource="memory"})) > 0 + sum(namespace_memory:kube_pod_resource_request_or_kube_pod_container_resource_requests:sum{%(ignoringOverprovisionedWorkloadSelector)s}) - (sum(kube_node_status_allocatable{resource="memory"}) - max(kube_node_status_allocatable{resource="memory"})) > 0 and (sum(kube_node_status_allocatable{resource="memory"}) - max(kube_node_status_allocatable{resource="memory"})) > 0 ||| % $._config, diff --git a/dashboards/resources/cluster.libsonnet b/dashboards/resources/cluster.libsonnet index b98b1a1b8..afbd96d81 100644 --- a/dashboards/resources/cluster.libsonnet +++ b/dashboards/resources/cluster.libsonnet @@ -144,11 +144,11 @@ local template = grafana.template; ) .addPanel( g.panel('CPU Requests Commitment') + - g.statPanel('sum(namespace_cpu:kube_pod_resource_request:sum{%(clusterLabel)s="$cluster"}) / sum(kube_node_status_allocatable{%(kubeStateMetricsSelector)s,resource="cpu",%(clusterLabel)s="$cluster"})' % $._config) + g.statPanel('sum(namespace_cpu:kube_pod_resource_request_or_kube_pod_container_resource_requests:sum{%(clusterLabel)s="$cluster"}) / sum(kube_node_status_allocatable{%(kubeStateMetricsSelector)s,resource="cpu",%(clusterLabel)s="$cluster"})' % $._config) ) .addPanel( g.panel('CPU Limits Commitment') + - g.statPanel('sum(namespace_cpu:kube_pod_resource_limit:sum{%(clusterLabel)s="$cluster"}) / sum(kube_node_status_allocatable{%(kubeStateMetricsSelector)s,resource="cpu",%(clusterLabel)s="$cluster"})' % $._config) + g.statPanel('sum(namespace_cpu:kube_pod_resource_limit_or_kube_pod_container_resource_limits:sum{%(clusterLabel)s="$cluster"}) / sum(kube_node_status_allocatable{%(kubeStateMetricsSelector)s,resource="cpu",%(clusterLabel)s="$cluster"})' % $._config) ) .addPanel( g.panel('Memory Utilisation') + @@ -156,11 +156,11 @@ local template = grafana.template; ) .addPanel( g.panel('Memory Requests Commitment') + - g.statPanel('sum(namespace_memory:kube_pod_resource_request:sum{%(clusterLabel)s="$cluster"}) / sum(kube_node_status_allocatable{%(kubeStateMetricsSelector)s,resource="memory",%(clusterLabel)s="$cluster"})' % $._config) + g.statPanel('sum(namespace_memory:kube_pod_resource_request_or_kube_pod_container_resource_requests:sum{%(clusterLabel)s="$cluster"}) / sum(kube_node_status_allocatable{%(kubeStateMetricsSelector)s,resource="memory",%(clusterLabel)s="$cluster"})' % $._config) ) .addPanel( g.panel('Memory Limits Commitment') + - g.statPanel('sum(namespace_memory:kube_pod_resource_limit:sum{%(clusterLabel)s="$cluster"}) / sum(kube_node_status_allocatable{%(kubeStateMetricsSelector)s,resource="memory",%(clusterLabel)s="$cluster"})' % $._config) + g.statPanel('sum(namespace_memory:kube_pod_resource_limit_or_kube_pod_container_resource_limits:sum{%(clusterLabel)s="$cluster"}) / sum(kube_node_status_allocatable{%(kubeStateMetricsSelector)s,resource="memory",%(clusterLabel)s="$cluster"})' % $._config) ) ) .addRow( @@ -177,10 +177,10 @@ local template = grafana.template; g.panel('CPU Quota') + g.tablePanel(podWorkloadColumns + [ 'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{%(clusterLabel)s="$cluster"}) by (namespace)' % $._config, - 'sum(namespace_cpu:kube_pod_resource_request:sum{%(clusterLabel)s="$cluster"}) by (namespace)' % $._config, - 'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{%(clusterLabel)s="$cluster"}) by (namespace) / sum(namespace_cpu:kube_pod_resource_request:sum{%(clusterLabel)s="$cluster"}) by (namespace)' % $._config, - 'sum(namespace_cpu:kube_pod_resource_limit:sum{%(clusterLabel)s="$cluster"}) by (namespace)' % $._config, - 'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{%(clusterLabel)s="$cluster"}) by (namespace) / sum(namespace_cpu:kube_pod_resource_limit:sum{%(clusterLabel)s="$cluster"}) by (namespace)' % $._config, + 'sum(namespace_cpu:kube_pod_resource_request_or_kube_pod_container_resource_requests:sum{%(clusterLabel)s="$cluster"}) by (namespace)' % $._config, + 'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{%(clusterLabel)s="$cluster"}) by (namespace) / sum(namespace_cpu:kube_pod_resource_request_or_kube_pod_container_resource_requests:sum{%(clusterLabel)s="$cluster"}) by (namespace)' % $._config, + 'sum(namespace_cpu:kube_pod_resource_limit_or_kube_pod_container_resource_limits:sum{%(clusterLabel)s="$cluster"}) by (namespace)' % $._config, + 'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{%(clusterLabel)s="$cluster"}) by (namespace) / sum(namespace_cpu:kube_pod_resource_limit_or_kube_pod_container_resource_limits:sum{%(clusterLabel)s="$cluster"}) by (namespace)' % $._config, ], tableStyles { 'Value #C': { alias: 'CPU Usage' }, 'Value #D': { alias: 'CPU Requests' }, @@ -207,10 +207,10 @@ local template = grafana.template; g.tablePanel(podWorkloadColumns + [ // Not using container_memory_usage_bytes here because that includes page cache 'sum(container_memory_rss{%(cadvisorSelector)s, %(clusterLabel)s="$cluster", container!=""}) by (namespace)' % $._config, - 'sum(namespace_memory:kube_pod_resource_request:sum{%(clusterLabel)s="$cluster"}) by (namespace)' % $._config, - 'sum(container_memory_rss{%(cadvisorSelector)s, %(clusterLabel)s="$cluster", container!=""}) by (namespace) / sum(namespace_memory:kube_pod_resource_request:sum{%(clusterLabel)s="$cluster"}) by (namespace)' % $._config, - 'sum(namespace_memory:kube_pod_resource_limit:sum{%(clusterLabel)s="$cluster"}) by (namespace)' % $._config, - 'sum(container_memory_rss{%(cadvisorSelector)s, %(clusterLabel)s="$cluster", container!=""}) by (namespace) / sum(namespace_memory:kube_pod_resource_limit:sum{%(clusterLabel)s="$cluster"}) by (namespace)' % $._config, + 'sum(namespace_memory:kube_pod_resource_request_or_kube_pod_container_resource_requests:sum{%(clusterLabel)s="$cluster"}) by (namespace)' % $._config, + 'sum(container_memory_rss{%(cadvisorSelector)s, %(clusterLabel)s="$cluster", container!=""}) by (namespace) / sum(namespace_memory:kube_pod_resource_request_or_kube_pod_container_resource_requests:sum{%(clusterLabel)s="$cluster"}) by (namespace)' % $._config, + 'sum(namespace_memory:kube_pod_resource_limit_or_kube_pod_container_resource_limits:sum{%(clusterLabel)s="$cluster"}) by (namespace)' % $._config, + 'sum(container_memory_rss{%(cadvisorSelector)s, %(clusterLabel)s="$cluster", container!=""}) by (namespace) / sum(namespace_memory:kube_pod_resource_limit_or_kube_pod_container_resource_limits:sum{%(clusterLabel)s="$cluster"}) by (namespace)' % $._config, ], tableStyles { 'Value #C': { alias: 'Memory Usage', unit: 'bytes' }, 'Value #D': { alias: 'Memory Requests', unit: 'bytes' }, diff --git a/dashboards/resources/multi-cluster.libsonnet b/dashboards/resources/multi-cluster.libsonnet index 3c3f7b3df..d23c8e491 100644 --- a/dashboards/resources/multi-cluster.libsonnet +++ b/dashboards/resources/multi-cluster.libsonnet @@ -26,11 +26,11 @@ local g = import 'github.com/grafana/jsonnet-libs/grafana-builder/grafana.libson ) .addPanel( g.panel('CPU Requests Commitment') + - g.statPanel('sum(kube_pod_resource_request{%(kubeSchedulerSelector)s, resource="cpu"}) / sum(kube_node_status_allocatable{%(kubeSchedulerSelector)s, resource="cpu"})' % $._config) + g.statPanel('sum(kube_pod_resource_request{%(kubeSchedulerSelector)s, resource="cpu"} or kube_pod_container_resource_requests{%(kubeStateMetricsSelector)s, resource="cpu"}) / sum(kube_node_status_allocatable{%(kubeSchedulerSelector)s, resource="cpu"} or kube_node_status_allocatable{%(kubeStateMetricsSelector)s, resource="cpu"})' % $._config) ) .addPanel( g.panel('CPU Limits Commitment') + - g.statPanel('sum(kube_pod_resource_limit{%(kubeSchedulerSelector)s, resource="cpu"}) / sum(kube_node_status_allocatable{%(kubeSchedulerSelector)s, resource="cpu"})' % $._config) + g.statPanel('sum(kube_pod_resource_limit{%(kubeSchedulerSelector)s, resource="cpu"} or kube_pod_container_resource_limits{%(kubeStateMetricsSelector)s, resource="cpu"}) / sum(kube_node_status_allocatable{%(kubeSchedulerSelector)s, resource="cpu"} or kube_node_status_allocatable{%(kubeStateMetricsSelector)s, resource="cpu"})' % $._config) ) .addPanel( g.panel('Memory Utilisation') + @@ -38,11 +38,11 @@ local g = import 'github.com/grafana/jsonnet-libs/grafana-builder/grafana.libson ) .addPanel( g.panel('Memory Requests Commitment') + - g.statPanel('sum(kube_pod_resource_request{%(kubeSchedulerSelector)s, resource="memory"}) / sum(kube_node_status_allocatable{%(kubeSchedulerSelector)s, resource="memory"})' % $._config) + g.statPanel('sum(kube_pod_resource_request{%(kubeSchedulerSelector)s, resource="memory"} or kube_pod_container_resource_requests{%(kubeStateMetricsSelector)s, resource="memory"}) / sum(kube_node_status_allocatable{%(kubeSchedulerSelector)s, resource="memory"} or kube_node_status_allocatable{%(kubeStateMetricsSelector)s, resource="memory"})' % $._config) ) .addPanel( g.panel('Memory Limits Commitment') + - g.statPanel('sum(kube_pod_resource_limit{%(kubeSchedulerSelector)s, resource="memory"}) / sum(kube_node_status_allocatable{%(kubeSchedulerSelector)s, resource="memory"})' % $._config) + g.statPanel('sum(kube_pod_resource_limit{%(kubeSchedulerSelector)s, resource="memory"} or kube_pod_container_resource_limits{%(kubeStateMetricsSelector)s, resource="memory"}) / sum(kube_node_status_allocatable{%(kubeSchedulerSelector)s, resource="memory"} or kube_node_status_allocatable{%(kubeStateMetricsSelector)s, resource="memory"})' % $._config) ) ) .addRow( @@ -59,10 +59,10 @@ local g = import 'github.com/grafana/jsonnet-libs/grafana-builder/grafana.libson g.panel('CPU Quota') + g.tablePanel([ 'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate) by (%(clusterLabel)s)' % $._config, - 'sum(kube_pod_resource_request{%(kubeSchedulerSelector)s, resource="cpu"}) by (%(clusterLabel)s)' % $._config, - 'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate) by (%(clusterLabel)s) / sum(kube_pod_resource_request{%(kubeSchedulerSelector)s, resource="cpu"}) by (%(clusterLabel)s)' % $._config, - 'sum(kube_pod_resource_limit{%(kubeSchedulerSelector)s, resource="cpu"}) by (%(clusterLabel)s)' % $._config, - 'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate) by (%(clusterLabel)s) / sum(kube_pod_resource_limit{%(kubeSchedulerSelector)s, resource="cpu"}) by (%(clusterLabel)s)' % $._config, + 'sum(kube_pod_resource_request{%(kubeSchedulerSelector)s, resource="cpu"} or kube_pod_container_resource_requests{%(kubeStateMetricsSelector)s, resource="cpu"}) by (%(clusterLabel)s)' % $._config, + 'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate) by (%(clusterLabel)s) / sum(kube_pod_resource_request{%(kubeSchedulerSelector)s, resource="cpu"} or kube_pod_container_resource_requests{%(kubeStateMetricsSelector)s, resource="cpu"}) by (%(clusterLabel)s)' % $._config, + 'sum(kube_pod_resource_limit{%(kubeSchedulerSelector)s, resource="cpu"} or kube_pod_container_resource_limits{%(kubeStateMetricsSelector)s, resource="cpu"}) by (%(clusterLabel)s)' % $._config, + 'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate) by (%(clusterLabel)s) / sum(kube_pod_resource_limit{%(kubeSchedulerSelector)s, resource="cpu"} or kube_pod_container_resource_limits{%(kubeStateMetricsSelector)s, resource="cpu"}) by (%(clusterLabel)s)' % $._config, ], tableStyles { 'Value #A': { alias: 'CPU Usage' }, 'Value #B': { alias: 'CPU Requests' }, @@ -88,10 +88,10 @@ local g = import 'github.com/grafana/jsonnet-libs/grafana-builder/grafana.libson g.tablePanel([ // Not using container_memory_usage_bytes here because that includes page cache 'sum(container_memory_rss{%(cadvisorSelector)s, container!=""}) by (%(clusterLabel)s)' % $._config, - 'sum(kube_pod_resource_request{%(kubeSchedulerSelector)s, resource="memory"}) by (%(clusterLabel)s)' % $._config, - 'sum(container_memory_rss{%(cadvisorSelector)s, container!=""}) by (%(clusterLabel)s) / sum(kube_pod_resource_request{%(kubeSchedulerSelector)s, resource="memory"}) by (%(clusterLabel)s)' % $._config, - 'sum(kube_pod_resource_limit{%(kubeSchedulerSelector)s, resource="memory"}) by (%(clusterLabel)s)' % $._config, - 'sum(container_memory_rss{%(cadvisorSelector)s, container!=""}) by (%(clusterLabel)s) / sum(kube_pod_resource_limit{%(kubeSchedulerSelector)s, resource="memory"}) by (%(clusterLabel)s)' % $._config, + 'sum(kube_pod_resource_request{%(kubeSchedulerSelector)s, resource="memory"} or kube_pod_container_resource_requests{%(kubeStateMetricsSelector)s, resource="memory"}) by (%(clusterLabel)s)' % $._config, + 'sum(container_memory_rss{%(cadvisorSelector)s, container!=""}) by (%(clusterLabel)s) / sum(kube_pod_resource_request{%(kubeSchedulerSelector)s, resource="memory"} or kube_pod_container_resource_requests{%(kubeStateMetricsSelector)s, resource="memory"}) by (%(clusterLabel)s)' % $._config, + 'sum(kube_pod_resource_limit{%(kubeSchedulerSelector)s, resource="memory"} or kube_pod_container_resource_limits{%(kubeStateMetricsSelector)s, resource="memory"}) by (%(clusterLabel)s)' % $._config, + 'sum(container_memory_rss{%(cadvisorSelector)s, container!=""}) by (%(clusterLabel)s) / sum(kube_pod_resource_limit{%(kubeSchedulerSelector)s, resource="memory"} or kube_pod_container_resource_limits{%(kubeStateMetricsSelector)s, resource="memory"}) by (%(clusterLabel)s)' % $._config, ], tableStyles { 'Value #A': { alias: 'Memory Usage', unit: 'bytes' }, 'Value #B': { alias: 'Memory Requests', unit: 'bytes' }, diff --git a/dashboards/resources/namespace.libsonnet b/dashboards/resources/namespace.libsonnet index 650b55a25..6fdc02668 100644 --- a/dashboards/resources/namespace.libsonnet +++ b/dashboards/resources/namespace.libsonnet @@ -141,19 +141,19 @@ local template = grafana.template; }) .addPanel( g.panel('CPU Utilisation (from requests)') + - g.statPanel('sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{%(clusterLabel)s="$cluster", namespace="$namespace"}) / sum(kube_pod_resource_request{%(kubeSchedulerSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", resource="cpu"})' % $._config) + g.statPanel('sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{%(clusterLabel)s="$cluster", namespace="$namespace"}) / sum(kube_pod_resource_request{%(kubeSchedulerSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", resource="cpu"} or kube_pod_container_resource_requests{%(kubeStateMetricsSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", resource="cpu"})' % $._config) ) .addPanel( g.panel('CPU Utilisation (from limits)') + - g.statPanel('sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{%(clusterLabel)s="$cluster", namespace="$namespace"}) / sum(kube_pod_resource_limit{%(kubeSchedulerSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", resource="cpu"})' % $._config) + g.statPanel('sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{%(clusterLabel)s="$cluster", namespace="$namespace"}) / sum(kube_pod_resource_limit{%(kubeSchedulerSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", resource="cpu"} or kube_pod_container_resource_limits{%(kubeStateMetricsSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", resource="cpu"})' % $._config) ) .addPanel( g.panel('Memory Utilisation (from requests)') + - g.statPanel('sum(container_memory_working_set_bytes{%(cadvisorSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace",container!="", image!=""}) / sum(kube_pod_resource_request{%(kubeSchedulerSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", resource="memory"})' % $._config) + g.statPanel('sum(container_memory_working_set_bytes{%(cadvisorSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace",container!="", image!=""}) / sum(kube_pod_resource_request{%(kubeSchedulerSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", resource="memory"} or kube_pod_container_resource_requests{%(kubeStateMetricsSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", resource="memory"})' % $._config) ) .addPanel( g.panel('Memory Utilisation (from limits)') + - g.statPanel('sum(container_memory_working_set_bytes{%(cadvisorSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace",container!="", image!=""}) / sum(kube_pod_resource_limit{%(kubeSchedulerSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", resource="memory"})' % $._config) + g.statPanel('sum(container_memory_working_set_bytes{%(cadvisorSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace",container!="", image!=""}) / sum(kube_pod_resource_limit{%(kubeSchedulerSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", resource="memory"} or kube_pod_container_resource_limits{%(kubeStateMetricsSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", resource="memory"})' % $._config) ) ) .addRow( @@ -199,10 +199,10 @@ local template = grafana.template; g.panel('CPU Quota') + g.tablePanel([ 'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{%(clusterLabel)s="$cluster", namespace="$namespace"}) by (pod)' % $._config, - 'sum(cluster:namespace:pod_cpu:active:kube_pod_resource_request{%(clusterLabel)s="$cluster", namespace="$namespace"}) by (pod)' % $._config, - 'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{%(clusterLabel)s="$cluster", namespace="$namespace"}) by (pod) / sum(cluster:namespace:pod_cpu:active:kube_pod_resource_request{%(clusterLabel)s="$cluster", namespace="$namespace"}) by (pod)' % $._config, - 'sum(cluster:namespace:pod_cpu:active:kube_pod_resource_limit{%(clusterLabel)s="$cluster", namespace="$namespace"}) by (pod)' % $._config, - 'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{%(clusterLabel)s="$cluster", namespace="$namespace"}) by (pod) / sum(cluster:namespace:pod_cpu:active:kube_pod_resource_limit{%(clusterLabel)s="$cluster", namespace="$namespace"}) by (pod)' % $._config, + 'sum(cluster:namespace:pod_cpu:active:kube_pod_resource_request_or_kube_pod_container_resource_requests{%(clusterLabel)s="$cluster", namespace="$namespace"}) by (pod)' % $._config, + 'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{%(clusterLabel)s="$cluster", namespace="$namespace"}) by (pod) / sum(cluster:namespace:pod_cpu:active:kube_pod_resource_request_or_kube_pod_container_resource_requests{%(clusterLabel)s="$cluster", namespace="$namespace"}) by (pod)' % $._config, + 'sum(cluster:namespace:pod_cpu:active:kube_pod_resource_limit_or_kube_pod_container_resource_limits{%(clusterLabel)s="$cluster", namespace="$namespace"}) by (pod)' % $._config, + 'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{%(clusterLabel)s="$cluster", namespace="$namespace"}) by (pod) / sum(cluster:namespace:pod_cpu:active:kube_pod_resource_limit_or_kube_pod_container_resource_limits{%(clusterLabel)s="$cluster", namespace="$namespace"}) by (pod)' % $._config, ], tableStyles { 'Value #A': { alias: 'CPU Usage' }, 'Value #B': { alias: 'CPU Requests' }, @@ -258,10 +258,10 @@ local template = grafana.template; g.panel('Memory Quota') + g.tablePanel([ 'sum(container_memory_working_set_bytes{%(cadvisorSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace",container!="", image!=""}) by (pod)' % $._config, - 'sum(cluster:namespace:pod_memory:active:kube_pod_resource_request{%(clusterLabel)s="$cluster", namespace="$namespace"}) by (pod)' % $._config, - 'sum(container_memory_working_set_bytes{%(cadvisorSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace",container!="", image!=""}) by (pod) / sum(cluster:namespace:pod_memory:active:kube_pod_resource_request{%(clusterLabel)s="$cluster", namespace="$namespace"}) by (pod)' % $._config, - 'sum(cluster:namespace:pod_memory:active:kube_pod_resource_limit{%(clusterLabel)s="$cluster", namespace="$namespace"}) by (pod)' % $._config, - 'sum(container_memory_working_set_bytes{%(cadvisorSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace",container!="", image!=""}) by (pod) / sum(cluster:namespace:pod_memory:active:kube_pod_resource_limit{%(clusterLabel)s="$cluster", namespace="$namespace"}) by (pod)' % $._config, + 'sum(cluster:namespace:pod_memory:active:kube_pod_resource_request_or_kube_pod_container_resource_requests{%(clusterLabel)s="$cluster", namespace="$namespace"}) by (pod)' % $._config, + 'sum(container_memory_working_set_bytes{%(cadvisorSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace",container!="", image!=""}) by (pod) / sum(cluster:namespace:pod_memory:active:kube_pod_resource_request_or_kube_pod_container_resource_requests{%(clusterLabel)s="$cluster", namespace="$namespace"}) by (pod)' % $._config, + 'sum(cluster:namespace:pod_memory:active:kube_pod_resource_limit_or_kube_pod_container_resource_limits{%(clusterLabel)s="$cluster", namespace="$namespace"}) by (pod)' % $._config, + 'sum(container_memory_working_set_bytes{%(cadvisorSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace",container!="", image!=""}) by (pod) / sum(cluster:namespace:pod_memory:active:kube_pod_resource_limit_or_kube_pod_container_resource_limits{%(clusterLabel)s="$cluster", namespace="$namespace"}) by (pod)' % $._config, 'sum(container_memory_rss{%(cadvisorSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace",container!=""}) by (pod)' % $._config, 'sum(container_memory_cache{%(cadvisorSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace",container!=""}) by (pod)' % $._config, 'sum(container_memory_swap{%(cadvisorSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace",container!=""}) by (pod)' % $._config, diff --git a/dashboards/resources/node.libsonnet b/dashboards/resources/node.libsonnet index 0f865b5a2..3bd83498b 100644 --- a/dashboards/resources/node.libsonnet +++ b/dashboards/resources/node.libsonnet @@ -75,10 +75,10 @@ local template = grafana.template; g.panel('CPU Quota') + g.tablePanel([ 'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{%(clusterLabel)s="$cluster", node=~"$node"}) by (pod)' % $._config, - 'sum(cluster:namespace:pod_cpu:active:kube_pod_resource_request{%(clusterLabel)s="$cluster", node=~"$node"}) by (pod)' % $._config, - 'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{%(clusterLabel)s="$cluster", node=~"$node"}) by (pod) / sum(cluster:namespace:pod_cpu:active:kube_pod_resource_request{%(clusterLabel)s="$cluster", node=~"$node"}) by (pod)' % $._config, - 'sum(cluster:namespace:pod_cpu:active:kube_pod_resource_limit{%(clusterLabel)s="$cluster", node=~"$node"}) by (pod)' % $._config, - 'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{%(clusterLabel)s="$cluster", node=~"$node"}) by (pod) / sum(cluster:namespace:pod_cpu:active:kube_pod_resource_limit{%(clusterLabel)s="$cluster", node=~"$node"}) by (pod)' % $._config, + 'sum(cluster:namespace:pod_cpu:active:kube_pod_resource_request_or_kube_pod_container_resource_requests{%(clusterLabel)s="$cluster", node=~"$node"}) by (pod)' % $._config, + 'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{%(clusterLabel)s="$cluster", node=~"$node"}) by (pod) / sum(cluster:namespace:pod_cpu:active:kube_pod_resource_request_or_kube_pod_container_resource_requests{%(clusterLabel)s="$cluster", node=~"$node"}) by (pod)' % $._config, + 'sum(cluster:namespace:pod_cpu:active:kube_pod_resource_limit_or_kube_pod_container_resource_limits{%(clusterLabel)s="$cluster", node=~"$node"}) by (pod)' % $._config, + 'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{%(clusterLabel)s="$cluster", node=~"$node"}) by (pod) / sum(cluster:namespace:pod_cpu:active:kube_pod_resource_limit_or_kube_pod_container_resource_limits{%(clusterLabel)s="$cluster", node=~"$node"}) by (pod)' % $._config, ], tableStyles { 'Value #A': { alias: 'CPU Usage' }, 'Value #B': { alias: 'CPU Requests' }, @@ -125,10 +125,10 @@ local template = grafana.template; g.panel('Memory Quota') + g.tablePanel([ 'sum(node_namespace_pod_container:container_memory_working_set_bytes{%(clusterLabel)s="$cluster", node=~"$node",container!=""}) by (pod)' % $._config, - 'sum(cluster:namespace:pod_memory:active:kube_pod_resource_request{%(clusterLabel)s="$cluster", node=~"$node"}) by (pod)' % $._config, - 'sum(node_namespace_pod_container:container_memory_working_set_bytes{%(clusterLabel)s="$cluster", node=~"$node",container!=""}) by (pod) / sum(cluster:namespace:pod_memory:active:kube_pod_resource_request{%(clusterLabel)s="$cluster", node=~"$node"}) by (pod)' % $._config, - 'sum(cluster:namespace:pod_memory:active:kube_pod_resource_limit{%(clusterLabel)s="$cluster", node=~"$node"}) by (pod)' % $._config, - 'sum(node_namespace_pod_container:container_memory_working_set_bytes{%(clusterLabel)s="$cluster", node=~"$node",container!=""}) by (pod) / sum(cluster:namespace:pod_memory:active:kube_pod_resource_limit{%(clusterLabel)s="$cluster", node=~"$node"}) by (pod)' % $._config, + 'sum(cluster:namespace:pod_memory:active:kube_pod_resource_request_or_kube_pod_container_resource_requests{%(clusterLabel)s="$cluster", node=~"$node"}) by (pod)' % $._config, + 'sum(node_namespace_pod_container:container_memory_working_set_bytes{%(clusterLabel)s="$cluster", node=~"$node",container!=""}) by (pod) / sum(cluster:namespace:pod_memory:active:kube_pod_resource_request_or_kube_pod_container_resource_requests{%(clusterLabel)s="$cluster", node=~"$node"}) by (pod)' % $._config, + 'sum(cluster:namespace:pod_memory:active:kube_pod_resource_limit_or_kube_pod_container_resource_limits{%(clusterLabel)s="$cluster", node=~"$node"}) by (pod)' % $._config, + 'sum(node_namespace_pod_container:container_memory_working_set_bytes{%(clusterLabel)s="$cluster", node=~"$node",container!=""}) by (pod) / sum(cluster:namespace:pod_memory:active:kube_pod_resource_limit_or_kube_pod_container_resource_limits{%(clusterLabel)s="$cluster", node=~"$node"}) by (pod)' % $._config, 'sum(node_namespace_pod_container:container_memory_rss{%(clusterLabel)s="$cluster", node=~"$node",container!=""}) by (pod)' % $._config, 'sum(node_namespace_pod_container:container_memory_cache{%(clusterLabel)s="$cluster", node=~"$node",container!=""}) by (pod)' % $._config, 'sum(node_namespace_pod_container:container_memory_swap{%(clusterLabel)s="$cluster", node=~"$node",container!=""}) by (pod)' % $._config, diff --git a/dashboards/resources/pod.libsonnet b/dashboards/resources/pod.libsonnet index c638b3256..bc54649ce 100644 --- a/dashboards/resources/pod.libsonnet +++ b/dashboards/resources/pod.libsonnet @@ -50,7 +50,7 @@ local template = grafana.template; local cpuRequestsQuery = ||| sum( - kube_pod_resource_request{%(kubeSchedulerSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod", resource="cpu"} + kube_pod_resource_request{%(kubeSchedulerSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod", resource="cpu"} or kube_pod_container_resource_requests{%(kubeStateMetricsSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod", resource="cpu"} ) ||| % $._config; diff --git a/dashboards/resources/workload-namespace.libsonnet b/dashboards/resources/workload-namespace.libsonnet index 2a2a4d7d7..4416a44f4 100644 --- a/dashboards/resources/workload-namespace.libsonnet +++ b/dashboards/resources/workload-namespace.libsonnet @@ -137,7 +137,7 @@ local template = grafana.template; local cpuRequestsQuery = ||| sum( - kube_pod_resource_request{%(kubeSchedulerSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", resource="cpu"} + (kube_pod_resource_request{%(kubeSchedulerSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", resource="cpu"} or kube_pod_container_resource_requests{%(kubeStateMetricsSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", resource="cpu"}) * on(namespace,pod) group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster", namespace="$namespace", workload_type="$type"} ) by (workload, workload_type) diff --git a/dashboards/resources/workload.libsonnet b/dashboards/resources/workload.libsonnet index c5de8628d..6e6a93983 100644 --- a/dashboards/resources/workload.libsonnet +++ b/dashboards/resources/workload.libsonnet @@ -135,7 +135,7 @@ local template = grafana.template; local cpuRequestsQuery = ||| sum( - kube_pod_resource_request{%(kubeSchedulerSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", resource="cpu"} + (kube_pod_resource_request{%(kubeSchedulerSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", resource="cpu"} or kube_pod_container_resource_requests{%(kubeStateMetricsSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", resource="cpu"}) * on(namespace,pod) group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster", namespace="$namespace", workload="$workload", workload_type="$type"} ) by (pod) diff --git a/rules/apps.libsonnet b/rules/apps.libsonnet index 21ee961ec..cffca195e 100644 --- a/rules/apps.libsonnet +++ b/rules/apps.libsonnet @@ -59,72 +59,110 @@ ||| % $._config, }, { - record: 'cluster:namespace:pod_memory:active:kube_pod_resource_request', + record: 'cluster:namespace:pod_memory:active:kube_pod_resource_request_or_kube_pod_container_resource_requests', expr: ||| - kube_pod_resource_request{resource="memory",%(kubeSchedulerSelector)s} + kube_pod_resource_request{resource="memory",%(kubeSchedulerSelector)s} or (kube_pod_container_resource_requests{resource="memory",%(kubeStateMetricsSelector)s} * on (namespace, pod, %(clusterLabel)s) + group_left() max by (namespace, pod, %(clusterLabel)s) ( + (kube_pod_status_phase{phase=~"Pending|Running"} == 1) + )) ||| % $._config, }, { - record: 'namespace_memory:kube_pod_resource_request:sum', + record: 'namespace_memory:kube_pod_resource_request_or_kube_pod_container_resource_requests:sum', expr: ||| sum by (namespace, %(clusterLabel)s) ( sum by (namespace, pod, %(clusterLabel)s) ( max by (namespace, pod, container, %(clusterLabel)s) ( kube_pod_resource_request{resource="memory",%(kubeSchedulerSelector)s} + ) or ( + kube_pod_container_resource_requests{resource="memory",%(kubeStateMetricsSelector)s} * on (namespace, pod, %(clusterLabel)s) + group_left() max by (namespace, pod, %(clusterLabel)s) ( + (kube_pod_status_phase{phase=~"Pending|Running"} == 1) + ) ) ) ) ||| % $._config, }, { - record: 'cluster:namespace:pod_cpu:active:kube_pod_resource_request', + record: 'cluster:namespace:pod_cpu:active:kube_pod_resource_request_or_kube_pod_container_resource_requests', expr: ||| - kube_pod_resource_request{resource="cpu",%(kubeSchedulerSelector)s} + kube_pod_resource_request{resource="cpu",%(kubeSchedulerSelector)s} or ( + kube_pod_container_resource_requests{resource="cpu",%(kubeStateMetricsSelector)s} * on (namespace, pod, %(clusterLabel)s) + group_left() max by (namespace, pod, %(clusterLabel)s) ( + (kube_pod_status_phase{phase=~"Pending|Running"} == 1) + ) + ) ||| % $._config, }, { - record: 'namespace_cpu:kube_pod_resource_request:sum', + record: 'namespace_cpu:kube_pod_resource_request_or_kube_pod_container_resource_requests:sum', expr: ||| sum by (namespace, %(clusterLabel)s) ( sum by (namespace, pod, %(clusterLabel)s) ( max by (namespace, pod, container, %(clusterLabel)s) ( kube_pod_resource_request{resource="cpu",%(kubeSchedulerSelector)s} + ) or ( + kube_pod_container_resource_requests{resource="cpu",%(kubeStateMetricsSelector)s} * on (namespace, pod, %(clusterLabel)s) + group_left() max by (namespace, pod, %(clusterLabel)s) ( + (kube_pod_status_phase{phase=~"Pending|Running"} == 1) + ) ) ) ) ||| % $._config, }, { - record: 'cluster:namespace:pod_memory:active:kube_pod_resource_limit', + record: 'cluster:namespace:pod_memory:active:kube_pod_resource_limit_or_kube_pod_container_resource_limits', expr: ||| - kube_pod_resource_limit{resource="memory",%(kubeSchedulerSelector)s} + kube_pod_resource_limit{resource="memory",%(kubeSchedulerSelector)s} or ( + kube_pod_container_resource_limits{resource="memory",%(kubeStateMetricsSelector)s} * on (namespace, pod, %(clusterLabel)s) + group_left() max by (namespace, pod, %(clusterLabel)s) ( + (kube_pod_status_phase{phase=~"Pending|Running"} == 1) + ) + ) ||| % $._config, }, { - record: 'namespace_memory:kube_pod_resource_limit:sum', + record: 'namespace_memory:kube_pod_resource_limit_or_kube_pod_container_resource_limits:sum', expr: ||| sum by (namespace, %(clusterLabel)s) ( sum by (namespace, pod, %(clusterLabel)s) ( max by (namespace, pod, container, %(clusterLabel)s) ( kube_pod_resource_limit{resource="memory",%(kubeSchedulerSelector)s} + ) or ( + kube_pod_container_resource_limits{resource="memory",%(kubeStateMetricsSelector)s} * on (namespace, pod, %(clusterLabel)s) + group_left() max by (namespace, pod, %(clusterLabel)s) ( + (kube_pod_status_phase{phase=~"Pending|Running"} == 1) + ) ) ) ) ||| % $._config, }, { - record: 'cluster:namespace:pod_cpu:active:kube_pod_resource_limit', + record: 'cluster:namespace:pod_cpu:active:kube_pod_resource_limit_or_kube_pod_container_resource_limits', expr: ||| - kube_pod_resource_limit{resource="cpu",%(kubeSchedulerSelector)s} + kube_pod_resource_limit{resource="cpu",%(kubeSchedulerSelector)s} or ( + kube_pod_container_resource_limits{resource="cpu",%(kubeStateMetricsSelector)s} * on (namespace, pod, %(clusterLabel)s) + group_left() max by (namespace, pod, %(clusterLabel)s) ( + (kube_pod_status_phase{phase=~"Pending|Running"} == 1) + ) + ) ||| % $._config, }, { - record: 'namespace_cpu:kube_pod_resource_limit:sum', + record: 'namespace_cpu:kube_pod_resource_limit_or_kube_pod_container_resource_limits:sum', expr: ||| sum by (namespace, %(clusterLabel)s) ( sum by (namespace, pod, %(clusterLabel)s) ( max by (namespace, pod, container, %(clusterLabel)s) ( kube_pod_resource_limit{resource="cpu",%(kubeSchedulerSelector)s} + ) or ( + kube_pod_container_resource_limits{resource="cpu",%(kubeStateMetricsSelector)s} * on (namespace, pod, %(clusterLabel)s) + group_left() max by (namespace, pod, %(clusterLabel)s) ( + (kube_pod_status_phase{phase=~"Pending|Running"} == 1) + ) ) ) ) diff --git a/rules/windows.libsonnet b/rules/windows.libsonnet index 226e1e60e..02c1df946 100644 --- a/rules/windows.libsonnet +++ b/rules/windows.libsonnet @@ -217,28 +217,28 @@ record: 'kube_pod_windows_container_resource_memory_request', expr: ||| max by (namespace, pod, container) ( - kube_pod_resource_request{resource="memory",%(kubeSchedulerSelector)s} + kube_pod_resource_request{resource="memory",%(kubeSchedulerSelector)s} or kube_pod_container_resource_requests{resource="memory",%(kubeStateMetricsSelector)s} ) * on(container,pod,namespace) (windows_pod_container_available) ||| % $._config, }, { record: 'kube_pod_windows_container_resource_memory_limit', expr: ||| - kube_pod_resource_limit{resource="memory",%(kubeSchedulerSelector)s} * on(container,pod,namespace) (windows_pod_container_available) + (kube_pod_resource_limit{resource="memory",%(kubeSchedulerSelector)s} or kube_pod_container_resource_limits{resource="memory",%(kubeStateMetricsSelector)s}) * on(container,pod,namespace) (windows_pod_container_available) ||| % $._config, }, { record: 'kube_pod_windows_container_resource_cpu_cores_request', expr: ||| max by (namespace, pod, container) ( - kube_pod_resource_request{resource="cpu",%(kubeSchedulerSelector)s} + kube_pod_resource_request{resource="cpu",%(kubeSchedulerSelector)s} or kube_pod_container_resource_requests{resource="cpu",%(kubeStateMetricsSelector)s} ) * on(container,pod,namespace) (windows_pod_container_available) ||| % $._config, }, { record: 'kube_pod_windows_container_resource_cpu_cores_limit', expr: ||| - kube_pod_resource_limit{resource="cpu",%(kubeSchedulerSelector)s} * on(container,pod,namespace) (windows_pod_container_available) + (kube_pod_resource_limit{resource="cpu",%(kubeSchedulerSelector)s} or kube_pod_container_resource_limits{resource="cpu",%(kubeStateMetricsSelector)s}) * on(container,pod,namespace) (windows_pod_container_available) ||| % $._config, }, {