Skip to content

Commit

Permalink
fixup! Use kube-scheduler's metrics instead of kube-state-metrics
Browse files Browse the repository at this point in the history
  • Loading branch information
rexagod committed Jan 11, 2023
1 parent 9ddfce6 commit 758d714
Show file tree
Hide file tree
Showing 7 changed files with 31 additions and 31 deletions.
24 changes: 12 additions & 12 deletions dashboards/resources/multi-cluster.libsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -26,23 +26,23 @@ local g = import 'github.com/grafana/jsonnet-libs/grafana-builder/grafana.libson
)
.addPanel(
g.panel('CPU Requests Commitment') +
g.statPanel('sum(kube_pod_resource_requests{%(kubeStateMetricsSelector)s, resource="cpu"}) / sum(kube_node_status_allocatable{%(kubeStateMetricsSelector)s, resource="cpu"})' % $._config)
g.statPanel('sum(kube_pod_resource_requests{%(kubeSchedulerSelector)s, resource="cpu"}) / sum(kube_node_status_allocatable{%(kubeSchedulerSelector)s, resource="cpu"})' % $._config)
)
.addPanel(
g.panel('CPU Limits Commitment') +
g.statPanel('sum(kube_pod_resource_limits{%(kubeStateMetricsSelector)s, resource="cpu"}) / sum(kube_node_status_allocatable{%(kubeStateMetricsSelector)s, resource="cpu"})' % $._config)
g.statPanel('sum(kube_pod_resource_limits{%(kubeSchedulerSelector)s, resource="cpu"}) / sum(kube_node_status_allocatable{%(kubeSchedulerSelector)s, resource="cpu"})' % $._config)
)
.addPanel(
g.panel('Memory Utilisation') +
g.statPanel('1 - sum(:node_memory_MemAvailable_bytes:sum) / sum(node_memory_MemTotal_bytes{%(nodeExporterSelector)s})' % $._config)
)
.addPanel(
g.panel('Memory Requests Commitment') +
g.statPanel('sum(kube_pod_resource_requests{%(kubeStateMetricsSelector)s, resource="memory"}) / sum(kube_node_status_allocatable{%(kubeStateMetricsSelector)s, resource="memory"})' % $._config)
g.statPanel('sum(kube_pod_resource_requests{%(kubeSchedulerSelector)s, resource="memory"}) / sum(kube_node_status_allocatable{%(kubeSchedulerSelector)s, resource="memory"})' % $._config)
)
.addPanel(
g.panel('Memory Limits Commitment') +
g.statPanel('sum(kube_pod_resource_limits{%(kubeStateMetricsSelector)s, resource="memory"}) / sum(kube_node_status_allocatable{%(kubeStateMetricsSelector)s, resource="memory"})' % $._config)
g.statPanel('sum(kube_pod_resource_limits{%(kubeSchedulerSelector)s, resource="memory"}) / sum(kube_node_status_allocatable{%(kubeSchedulerSelector)s, resource="memory"})' % $._config)
)
)
.addRow(
Expand All @@ -59,10 +59,10 @@ local g = import 'github.com/grafana/jsonnet-libs/grafana-builder/grafana.libson
g.panel('CPU Quota') +
g.tablePanel([
'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate) by (%(clusterLabel)s)' % $._config,
'sum(kube_pod_resource_requests{%(kubeStateMetricsSelector)s, resource="cpu"}) by (%(clusterLabel)s)' % $._config,
'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate) by (%(clusterLabel)s) / sum(kube_pod_resource_requests{%(kubeStateMetricsSelector)s, resource="cpu"}) by (%(clusterLabel)s)' % $._config,
'sum(kube_pod_resource_limits{%(kubeStateMetricsSelector)s, resource="cpu"}) by (%(clusterLabel)s)' % $._config,
'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate) by (%(clusterLabel)s) / sum(kube_pod_resource_limits{%(kubeStateMetricsSelector)s, resource="cpu"}) by (%(clusterLabel)s)' % $._config,
'sum(kube_pod_resource_requests{%(kubeSchedulerSelector)s, resource="cpu"}) by (%(clusterLabel)s)' % $._config,
'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate) by (%(clusterLabel)s) / sum(kube_pod_resource_requests{%(kubeSchedulerSelector)s, resource="cpu"}) by (%(clusterLabel)s)' % $._config,
'sum(kube_pod_resource_limits{%(kubeSchedulerSelector)s, resource="cpu"}) by (%(clusterLabel)s)' % $._config,
'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate) by (%(clusterLabel)s) / sum(kube_pod_resource_limits{%(kubeSchedulerSelector)s, resource="cpu"}) by (%(clusterLabel)s)' % $._config,
], tableStyles {
'Value #A': { alias: 'CPU Usage' },
'Value #B': { alias: 'CPU Requests' },
Expand All @@ -88,10 +88,10 @@ local g = import 'github.com/grafana/jsonnet-libs/grafana-builder/grafana.libson
g.tablePanel([
// Not using container_memory_usage_bytes here because that includes page cache
'sum(container_memory_rss{%(cadvisorSelector)s, container!=""}) by (%(clusterLabel)s)' % $._config,
'sum(kube_pod_resource_requests{%(kubeStateMetricsSelector)s, resource="memory"}) by (%(clusterLabel)s)' % $._config,
'sum(container_memory_rss{%(cadvisorSelector)s, container!=""}) by (%(clusterLabel)s) / sum(kube_pod_resource_requests{%(kubeStateMetricsSelector)s, resource="memory"}) by (%(clusterLabel)s)' % $._config,
'sum(kube_pod_resource_limits{%(kubeStateMetricsSelector)s, resource="memory"}) by (%(clusterLabel)s)' % $._config,
'sum(container_memory_rss{%(cadvisorSelector)s, container!=""}) by (%(clusterLabel)s) / sum(kube_pod_resource_limits{%(kubeStateMetricsSelector)s, resource="memory"}) by (%(clusterLabel)s)' % $._config,
'sum(kube_pod_resource_requests{%(kubeSchedulerSelector)s, resource="memory"}) by (%(clusterLabel)s)' % $._config,
'sum(container_memory_rss{%(cadvisorSelector)s, container!=""}) by (%(clusterLabel)s) / sum(kube_pod_resource_requests{%(kubeSchedulerSelector)s, resource="memory"}) by (%(clusterLabel)s)' % $._config,
'sum(kube_pod_resource_limits{%(kubeSchedulerSelector)s, resource="memory"}) by (%(clusterLabel)s)' % $._config,
'sum(container_memory_rss{%(cadvisorSelector)s, container!=""}) by (%(clusterLabel)s) / sum(kube_pod_resource_limits{%(kubeSchedulerSelector)s, resource="memory"}) by (%(clusterLabel)s)' % $._config,
], tableStyles {
'Value #A': { alias: 'Memory Usage', unit: 'bytes' },
'Value #B': { alias: 'Memory Requests', unit: 'bytes' },
Expand Down
8 changes: 4 additions & 4 deletions dashboards/resources/namespace.libsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -141,19 +141,19 @@ local template = grafana.template;
})
.addPanel(
g.panel('CPU Utilisation (from requests)') +
g.statPanel('sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{%(clusterLabel)s="$cluster", namespace="$namespace"}) / sum(kube_pod_resource_requests{%(kubeStateMetricsSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", resource="cpu"})' % $._config)
g.statPanel('sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{%(clusterLabel)s="$cluster", namespace="$namespace"}) / sum(kube_pod_resource_requests{%(kubeSchedulerSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", resource="cpu"})' % $._config)
)
.addPanel(
g.panel('CPU Utilisation (from limits)') +
g.statPanel('sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{%(clusterLabel)s="$cluster", namespace="$namespace"}) / sum(kube_pod_resource_limits{%(kubeStateMetricsSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", resource="cpu"})' % $._config)
g.statPanel('sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{%(clusterLabel)s="$cluster", namespace="$namespace"}) / sum(kube_pod_resource_limits{%(kubeSchedulerSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", resource="cpu"})' % $._config)
)
.addPanel(
g.panel('Memory Utilisation (from requests)') +
g.statPanel('sum(container_memory_working_set_bytes{%(cadvisorSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace",container!="", image!=""}) / sum(kube_pod_resource_requests{%(kubeStateMetricsSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", resource="memory"})' % $._config)
g.statPanel('sum(container_memory_working_set_bytes{%(cadvisorSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace",container!="", image!=""}) / sum(kube_pod_resource_requests{%(kubeSchedulerSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", resource="memory"})' % $._config)
)
.addPanel(
g.panel('Memory Utilisation (from limits)') +
g.statPanel('sum(container_memory_working_set_bytes{%(cadvisorSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace",container!="", image!=""}) / sum(kube_pod_resource_limits{%(kubeStateMetricsSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", resource="memory"})' % $._config)
g.statPanel('sum(container_memory_working_set_bytes{%(cadvisorSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace",container!="", image!=""}) / sum(kube_pod_resource_limits{%(kubeSchedulerSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", resource="memory"})' % $._config)
)
)
.addRow(
Expand Down
2 changes: 1 addition & 1 deletion dashboards/resources/pod.libsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ local template = grafana.template;

local cpuRequestsQuery = |||
sum(
kube_pod_resource_requests{%(kubeStateMetricsSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod", resource="cpu"}
kube_pod_resource_requests{%(kubeSchedulerSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod", resource="cpu"}
)
||| % $._config;

Expand Down
2 changes: 1 addition & 1 deletion dashboards/resources/workload-namespace.libsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,7 @@ local template = grafana.template;

local cpuRequestsQuery = |||
sum(
kube_pod_resource_requests{%(kubeStateMetricsSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", resource="cpu"}
kube_pod_resource_requests{%(kubeSchedulerSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", resource="cpu"}
* on(namespace,pod)
group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster", namespace="$namespace", workload_type="$type"}
) by (workload, workload_type)
Expand Down
2 changes: 1 addition & 1 deletion dashboards/resources/workload.libsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,7 @@ local template = grafana.template;

local cpuRequestsQuery = |||
sum(
kube_pod_resource_requests{%(kubeStateMetricsSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", resource="cpu"}
kube_pod_resource_requests{%(kubeSchedulerSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", resource="cpu"}
* on(namespace,pod)
group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster", namespace="$namespace", workload="$workload", workload_type="$type"}
) by (pod)
Expand Down
16 changes: 8 additions & 8 deletions rules/apps.libsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@
{
record: 'cluster:namespace:pod_memory:active:kube_pod_resource_requests',
expr: |||
kube_pod_resource_requests{resource="memory",%(kubeStateMetricsSelector)s} * on (namespace, pod, %(clusterLabel)s)
kube_pod_resource_requests{resource="memory",%(kubeSchedulerSelector)s} * on (namespace, pod, %(clusterLabel)s)
group_left() max by (namespace, pod, %(clusterLabel)s) (
(kube_pod_status_phase{phase=~"Pending|Running"} == 1)
)
Expand All @@ -73,7 +73,7 @@
sum by (namespace, %(clusterLabel)s) (
sum by (namespace, pod, %(clusterLabel)s) (
max by (namespace, pod, container, %(clusterLabel)s) (
kube_pod_resource_requests{resource="memory",%(kubeStateMetricsSelector)s}
kube_pod_resource_requests{resource="memory",%(kubeSchedulerSelector)s}
) * on(namespace, pod, %(clusterLabel)s) group_left() max by (namespace, pod, %(clusterLabel)s) (
kube_pod_status_phase{phase=~"Pending|Running"} == 1
)
Expand All @@ -84,7 +84,7 @@
{
record: 'cluster:namespace:pod_cpu:active:kube_pod_resource_requests',
expr: |||
kube_pod_resource_requests{resource="cpu",%(kubeStateMetricsSelector)s} * on (namespace, pod, %(clusterLabel)s)
kube_pod_resource_requests{resource="cpu",%(kubeSchedulerSelector)s} * on (namespace, pod, %(clusterLabel)s)
group_left() max by (namespace, pod, %(clusterLabel)s) (
(kube_pod_status_phase{phase=~"Pending|Running"} == 1)
)
Expand All @@ -96,7 +96,7 @@
sum by (namespace, %(clusterLabel)s) (
sum by (namespace, pod, %(clusterLabel)s) (
max by (namespace, pod, container, %(clusterLabel)s) (
kube_pod_resource_requests{resource="cpu",%(kubeStateMetricsSelector)s}
kube_pod_resource_requests{resource="cpu",%(kubeSchedulerSelector)s}
) * on(namespace, pod, %(clusterLabel)s) group_left() max by (namespace, pod, %(clusterLabel)s) (
kube_pod_status_phase{phase=~"Pending|Running"} == 1
)
Expand All @@ -107,7 +107,7 @@
{
record: 'cluster:namespace:pod_memory:active:kube_pod_resource_limits',
expr: |||
kube_pod_resource_limits{resource="memory",%(kubeStateMetricsSelector)s} * on (namespace, pod, %(clusterLabel)s)
kube_pod_resource_limits{resource="memory",%(kubeSchedulerSelector)s} * on (namespace, pod, %(clusterLabel)s)
group_left() max by (namespace, pod, %(clusterLabel)s) (
(kube_pod_status_phase{phase=~"Pending|Running"} == 1)
)
Expand All @@ -119,7 +119,7 @@
sum by (namespace, %(clusterLabel)s) (
sum by (namespace, pod, %(clusterLabel)s) (
max by (namespace, pod, container, %(clusterLabel)s) (
kube_pod_resource_limits{resource="memory",%(kubeStateMetricsSelector)s}
kube_pod_resource_limits{resource="memory",%(kubeSchedulerSelector)s}
) * on(namespace, pod, %(clusterLabel)s) group_left() max by (namespace, pod, %(clusterLabel)s) (
kube_pod_status_phase{phase=~"Pending|Running"} == 1
)
Expand All @@ -130,7 +130,7 @@
{
record: 'cluster:namespace:pod_cpu:active:kube_pod_resource_limits',
expr: |||
kube_pod_resource_limits{resource="cpu",%(kubeStateMetricsSelector)s} * on (namespace, pod, %(clusterLabel)s)
kube_pod_resource_limits{resource="cpu",%(kubeSchedulerSelector)s} * on (namespace, pod, %(clusterLabel)s)
group_left() max by (namespace, pod, %(clusterLabel)s) (
(kube_pod_status_phase{phase=~"Pending|Running"} == 1)
)
Expand All @@ -142,7 +142,7 @@
sum by (namespace, %(clusterLabel)s) (
sum by (namespace, pod, %(clusterLabel)s) (
max by (namespace, pod, container, %(clusterLabel)s) (
kube_pod_resource_limits{resource="cpu",%(kubeStateMetricsSelector)s}
kube_pod_resource_limits{resource="cpu",%(kubeSchedulerSelector)s}
) * on(namespace, pod, %(clusterLabel)s) group_left() max by (namespace, pod, %(clusterLabel)s) (
kube_pod_status_phase{phase=~"Pending|Running"} == 1
)
Expand Down
8 changes: 4 additions & 4 deletions rules/windows.libsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -217,28 +217,28 @@
record: 'kube_pod_windows_container_resource_memory_request',
expr: |||
max by (namespace, pod, container) (
kube_pod_resource_requests{resource="memory",%(kubeStateMetricsSelector)s}
kube_pod_resource_requests{resource="memory",%(kubeSchedulerSelector)s}
) * on(container,pod,namespace) (windows_pod_container_available)
||| % $._config,
},
{
record: 'kube_pod_windows_container_resource_memory_limit',
expr: |||
kube_pod_resource_limits{resource="memory",%(kubeStateMetricsSelector)s} * on(container,pod,namespace) (windows_pod_container_available)
kube_pod_resource_limits{resource="memory",%(kubeSchedulerSelector)s} * on(container,pod,namespace) (windows_pod_container_available)
||| % $._config,
},
{
record: 'kube_pod_windows_container_resource_cpu_cores_request',
expr: |||
max by (namespace, pod, container) (
kube_pod_resource_requests{resource="cpu",%(kubeStateMetricsSelector)s}
kube_pod_resource_requests{resource="cpu",%(kubeSchedulerSelector)s}
) * on(container,pod,namespace) (windows_pod_container_available)
||| % $._config,
},
{
record: 'kube_pod_windows_container_resource_cpu_cores_limit',
expr: |||
kube_pod_resource_limits{resource="cpu",%(kubeStateMetricsSelector)s} * on(container,pod,namespace) (windows_pod_container_available)
kube_pod_resource_limits{resource="cpu",%(kubeSchedulerSelector)s} * on(container,pod,namespace) (windows_pod_container_available)
||| % $._config,
},
{
Expand Down

0 comments on commit 758d714

Please sign in to comment.