From 3f8e350477c758a01105d6e9169fd5afb6332d5c Mon Sep 17 00:00:00 2001 From: Konstantin-petrenko Date: Thu, 10 Nov 2022 09:41:32 +0100 Subject: [PATCH] feat: create monitoring for edge nodes - create helm edgeapplication charts for node-exporter and grafana-agent - adopt metrics to default dashboards - make the abillity to monitor different nodes by the name --- charts/grafana-agent/Chart.yaml | 13 + charts/grafana-agent/templates/_helpers.tpl | 67 ++++ charts/grafana-agent/templates/configmap.yaml | 33 ++ .../templates/operator-deployment.yaml | 95 +++++ charts/grafana-agent/values.yaml | 78 +++++ charts/node-exporter/Chart.yaml | 18 + charts/node-exporter/README.md | 77 +++++ charts/node-exporter/ci/port-values.yaml | 3 + charts/node-exporter/templates/NOTES.txt | 15 + charts/node-exporter/templates/_helpers.tpl | 107 ++++++ .../templates/edgeapplication.yaml | 271 +++++++++++++++ charts/node-exporter/templates/endpoints.yaml | 17 + .../templates/psp-clusterrole.yaml | 13 + .../templates/psp-clusterrolebinding.yaml | 15 + charts/node-exporter/templates/psp.yaml | 48 +++ .../templates/serviceaccount.yaml | 14 + .../templates/servicemonitor.yaml | 51 +++ .../templates/verticalpodautoscaler.yaml | 33 ++ charts/node-exporter/values.yaml | 326 ++++++++++++++++++ .../grafana-agent/nats_metric_instance.yaml | 2 +- dev/charts/grafana/values.yaml | 60 ++-- dev/charts/mimir/values.yaml | 5 +- devspace.yaml | 90 ++++- node_names.sh | 20 ++ 24 files changed, 1425 insertions(+), 46 deletions(-) create mode 100644 charts/grafana-agent/Chart.yaml create mode 100644 charts/grafana-agent/templates/_helpers.tpl create mode 100644 charts/grafana-agent/templates/configmap.yaml create mode 100644 charts/grafana-agent/templates/operator-deployment.yaml create mode 100644 charts/grafana-agent/values.yaml create mode 100644 charts/node-exporter/Chart.yaml create mode 100644 charts/node-exporter/README.md create mode 100644 charts/node-exporter/ci/port-values.yaml create mode 100644 charts/node-exporter/templates/NOTES.txt create mode 100644 charts/node-exporter/templates/_helpers.tpl create mode 100644 charts/node-exporter/templates/edgeapplication.yaml create mode 100644 charts/node-exporter/templates/endpoints.yaml create mode 100644 charts/node-exporter/templates/psp-clusterrole.yaml create mode 100644 charts/node-exporter/templates/psp-clusterrolebinding.yaml create mode 100644 charts/node-exporter/templates/psp.yaml create mode 100644 charts/node-exporter/templates/serviceaccount.yaml create mode 100644 charts/node-exporter/templates/servicemonitor.yaml create mode 100644 charts/node-exporter/templates/verticalpodautoscaler.yaml create mode 100644 charts/node-exporter/values.yaml create mode 100755 node_names.sh diff --git a/charts/grafana-agent/Chart.yaml b/charts/grafana-agent/Chart.yaml new file mode 100644 index 0000000..91b3a86 --- /dev/null +++ b/charts/grafana-agent/Chart.yaml @@ -0,0 +1,13 @@ +apiVersion: v2 +appVersion: 0.28.0 +description: A Helm chart for Grafana Agent Operator +home: https://grafana.com/docs/agent/v0.28/ +icon: https://raw.githubusercontent.com/grafana/agent/v0.28.0/docs/sources/assets/logo_and_name.png +maintainers: +- email: grafana-agent-team@googlegroups.com + name: Grafana Agent Team +name: grafana-agent-operator +sources: +- https://github.com/grafana/agent/tree/v0.28.0/pkg/operator +type: application +version: 0.2.8 diff --git a/charts/grafana-agent/templates/_helpers.tpl b/charts/grafana-agent/templates/_helpers.tpl new file mode 100644 index 0000000..f464671 --- /dev/null +++ b/charts/grafana-agent/templates/_helpers.tpl @@ -0,0 +1,67 @@ +{{/* +Expand the name of the chart. +*/}} +{{- define "ga-operator.name" -}} +{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Create a default fully qualified app name. +We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). +If release name contains chart name it will be used as a full name. +*/}} +{{- define "ga-operator.fullname" -}} +{{- if .Values.fullnameOverride }} +{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- $name := default .Chart.Name .Values.nameOverride }} +{{- if contains $name .Release.Name }} +{{- .Release.Name | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }} +{{- end }} +{{- end }} +{{- end }} + +{{/* +Create chart name and version as used by the chart label. +*/}} +{{- define "ga-operator.chart" -}} +{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Common labels +*/}} +{{- define "ga-operator.labels" -}} +{{ include "ga-operator.selectorLabels" . }} +app.kubernetes.io/managed-by: {{ .Release.Service }} +app.kubernetes.io/component: operator +helm.sh/chart: {{ include "ga-operator.chart" . }} +{{- if .Chart.AppVersion }} +app.kubernetes.io/version: {{ .Chart.AppVersion | quote }} +{{- end }} +{{- if .Values.customLabels }} +{{ toYaml .Values.customLabels }} +{{- end }} +{{- end }} + +{{/* +Selector labels +*/}} +{{- define "ga-operator.selectorLabels" -}} +app.kubernetes.io/name: {{ include "ga-operator.name" . }} +app.kubernetes.io/instance: {{ .Release.Name }} +{{- end }} + +{{/* +Create the name of the service account to use +*/}} +{{- define "ga-operator.serviceAccountName" -}} +{{- if .Values.serviceAccount.create }} +{{- default (include "ga-operator.fullname" .) .Values.serviceAccount.name }} +{{- else }} +{{- default "default" .Values.serviceAccount.name }} +{{- end }} +{{- end }} + diff --git a/charts/grafana-agent/templates/configmap.yaml b/charts/grafana-agent/templates/configmap.yaml new file mode 100644 index 0000000..f11937f --- /dev/null +++ b/charts/grafana-agent/templates/configmap.yaml @@ -0,0 +1,33 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: grafana-agent-yml + namespace: {{ .Values.namespace | default .Release.Namespace }} + labels: + k8s-app: grafana-agent +data: + agent.yaml: | + server: + log_level: info + metrics: + global: + scrape_interval: 1m + remote_write: + - url: {{ .Values.configmap.agent_yaml.global.remote_write }} + external_labels: + edge_node: ${NODE_NAME} + configs: + - name: node-exporter-edgenodes + scrape_configs: + - job_name: agent + static_configs: + - targets: {{ .Values.configmap.agent_yaml.config.targets }} + relabel_configs: + - source_labels: [__address__] + target_label: instance + replacement: ${NODE_NAME} + remote_write: + - url: {{ .Values.configmap.agent_yaml.config.remote_write.url }} + basic_auth: + username: {{ .Values.configmap.agent_yaml.config.remote_write.basic_auth.username }} + password: {{ .Values.configmap.agent_yaml.config.remote_write.basic_auth.password }} diff --git a/charts/grafana-agent/templates/operator-deployment.yaml b/charts/grafana-agent/templates/operator-deployment.yaml new file mode 100644 index 0000000..c70dc30 --- /dev/null +++ b/charts/grafana-agent/templates/operator-deployment.yaml @@ -0,0 +1,95 @@ +apiVersion: apps.kubeedge.io/v1alpha1 +kind: EdgeApplication +metadata: + name: grafana-agent + labels: + app.kubernetes.io/name: grafana-agent + namespace: {{ .Values.namespace | default .Release.Namespace }} +spec: + workloadTemplate: + manifests: + - apiVersion: apps/v1 + kind: Deployment + metadata: + namespace: {{ .Values.namespace | default .Release.Namespace }} + name: {{ include "ga-operator.fullname" . }} + labels: +{{ include "ga-operator.labels" . | indent 12 }} + {{- with .Values.annotations }} + annotations: + {{- toYaml . | nindent 4 }} + {{- end }} + spec: + replicas: 1 + selector: + matchLabels: +{{ include "ga-operator.selectorLabels" . | indent 14 }} + template: + metadata: + labels: +{{ include "ga-operator.selectorLabels" . | indent 16 }} + {{- with .Values.podLabels }} + {{- toYaml . | nindent 16 }} + {{- end }} + {{- with .Values.podAnnotations }} + annotations: + {{ toYaml . | indent 16 }} + {{- end }} + spec: + {{- with .Values.podSecurityContext }} + securityContext: + {{- toYaml . | nindent 16 }} + {{- end }} + containers: + - name: {{ include "ga-operator.name" . }} + image: "{{ .Values.image.registry }}/{{ .Values.image.repository }}:{{ .Values.image.tag }}" + imagePullPolicy: {{ .Values.image.pullPolicy }} + env: + - name: NODE_NAME + valueFrom: + fieldRef: + fieldPath: spec.nodeName + {{- with .Values.containerSecurityContext }} + securityContext: + {{- toYaml . | nindent 18 }} + {{- end }} + {{- with .Values.resources }} + resources: + {{- toYaml . | nindent 18 }} + {{- end }} + {{- if or (and .Values.kubeletService.namespace .Values.kubeletService.serviceName) (.Values.extraArgs) }} + volumeMounts: + - name: agent-yaml + mountPath: /etc/agent/ + args: + {{- if .Values.extraArgs }} + {{- range .Values.extraArgs }} + - {{ . }} + {{- end }} + {{- end }} + {{- end }} + {{- with .Values.image.pullSecrets }} + imagePullSecrets: + {{- toYaml . | nindent 16 }} + {{- end }} + {{- with .Values.nodeSelector }} + nodeSelector: + {{- toYaml . | nindent 16 }} + {{- end }} + {{- with .Values.tolerations }} + volumes: + - name: agent-yaml + configMap: + name: grafana-agent-yml + tolerations: + {{- toYaml . | nindent 16 }} + {{- end }} + {{- with .Values.affinity }} + affinity: + {{- toYaml . | nindent 16 }} + {{- end }} + workloadScope: + targetNodeGroups: + {{- with .Values.targetNodeGroups }} + {{- toYaml . | nindent 6 }} + {{- end }} diff --git a/charts/grafana-agent/values.yaml b/charts/grafana-agent/values.yaml new file mode 100644 index 0000000..5d55d9d --- /dev/null +++ b/charts/grafana-agent/values.yaml @@ -0,0 +1,78 @@ +# -- Overrides the chart's name +nameOverride: "" +# -- Overrides the chart's computed fullname +fullnameOverride: "" +# -- Annotations for the Deployment +annotations: {} +# -- Annotations for the Deployment Pods +podAnnotations: {} +# -- Annotations for the Deployment Pods +podLabels: {} +# -- Pod security context (runAsUser, etc.) +podSecurityContext: {} +# -- Container security context (allowPrivilegeEscalation, etc.) +containerSecurityContext: {} +configmap: + agent_yaml: + global: + remote_write: http://grafana-mimir-nginx.monitoring.svc:80/api/v1/push + config: + targets: ["edgeapplication-node-exporter-prometheus-node-exporter.nodegroup.svc:9100"] + remote_write: + url: http://grafana-mimir-nginx.monitoring.svc:80/api/v1/push + basic_auth: + username: nats + password: Mjk0YTA2MzMwMjc4NDYxYzM1YmU1ZDky +rbac: + # -- Toggle to create ClusterRole and ClusterRoleBinding + create: true + # -- Name of a PodSecurityPolicy to use in the ClusterRole. If unset, no PodSecurityPolicy is used. + podSecurityPolicyName: '' +serviceAccount: + # -- Toggle to create ServiceAccount + create: true + # -- Service account name + name: +image: + # -- Image registry + registry: docker.io + # -- Image repo + repository: grafana/agent + # -- Image tag + tag: v0.28.0 + # -- Image pull policy + pullPolicy: IfNotPresent + # -- Image pull secrets + pullSecrets: [] +# -- If both are set, Agent Operator will create and maintain a service for scraping kubelets +# https://grafana.com/docs/agent/latest/operator/getting-started/#monitor-kubelets +kubeletService: + namespace: default + serviceName: kubelet +# -- List of additional cli arguments to configure agent-operator (example: `--log.level`) +# extraArgs: {} +extraArgs: + - -enable-features=integrations-next + - -config.file=/etc/agent/agent.yaml + - -config.expand-env +# -- Resource limits and requests config +resources: {} +# -- nodeSelector configuration +nodeSelector: {} +# -- Tolerations applied to Pods +tolerations: + - effect: NoExecute + key: edgefarm.applications + operator: Exists +# -- Pod affinity configuration +# affinity: {} +affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: node-role.kubernetes.io/edge + operator: Exists +targetNodeGroups: + - name: virtual-6a87250b + - name: virtual-e602bbba diff --git a/charts/node-exporter/Chart.yaml b/charts/node-exporter/Chart.yaml new file mode 100644 index 0000000..f2a7fb4 --- /dev/null +++ b/charts/node-exporter/Chart.yaml @@ -0,0 +1,18 @@ +apiVersion: v2 +appVersion: 1.3.1 +description: A Helm chart for prometheus node-exporter +home: https://github.com/prometheus/node_exporter/ +keywords: +- node-exporter +- prometheus +- exporter +maintainers: +- email: gianrubio@gmail.com + name: gianrubio +- email: zanhsieh@gmail.com + name: zanhsieh +name: prometheus-node-exporter +sources: +- https://github.com/prometheus/node_exporter/ +type: application +version: 4.4.1 diff --git a/charts/node-exporter/README.md b/charts/node-exporter/README.md new file mode 100644 index 0000000..02de7b1 --- /dev/null +++ b/charts/node-exporter/README.md @@ -0,0 +1,77 @@ +# Prometheus `Node Exporter` + +Prometheus exporter for hardware and OS metrics exposed by *NIX kernels, written in Go with pluggable metric collectors. + +This chart bootstraps a prometheus [`Node Exporter`](http://github.com/prometheus/node_exporter) daemonset on a [Kubernetes](http://kubernetes.io) cluster using the [Helm](https://helm.sh) package manager. + +## Get Repository Info + +```console +helm repo add prometheus-community https://prometheus-community.github.io/helm-charts +helm repo update +``` + +_See [`helm repo`](https://helm.sh/docs/helm/helm_repo/) for command documentation._ + +## Install Chart + +```console +helm install [RELEASE_NAME] prometheus-community/prometheus-node-exporter +``` + +_See [configuration](#configuring) below._ + +_See [helm install](https://helm.sh/docs/helm/helm_install/) for command documentation._ + +## Uninstall Chart + +```console +helm uninstall [RELEASE_NAME] +``` + +This removes all the Kubernetes components associated with the chart and deletes the release. + +_See [helm uninstall](https://helm.sh/docs/helm/helm_uninstall/) for command documentation._ + +## Upgrading Chart + +```console +helm upgrade [RELEASE_NAME] [CHART] --install +``` + +_See [helm upgrade](https://helm.sh/docs/helm/helm_upgrade/) for command documentation._ + +### 3.x to 4.x + +Starting from version 4.0.0, the `node exporter` chart is using the [Kubernetes recommended labels](https://kubernetes.io/docs/concepts/overview/working-with-objects/common-labels/). Therefore you have to delete the daemonset before you upgrade. + +```console +kubectl delete daemonset -l app=prometheus-node-exporter +helm upgrade -i prometheus-node-exporter prometheus-community/prometheus-node-exporter +``` + +If you use your own custom [ServiceMonitor](https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api.md#servicemonitor) or [PodMonitor](https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api.md#podmonitor), please ensure to upgrade their `selector` fields accordingly to the new labels. + +### From 2.x to 3.x + +Change the following: + +```yaml +hostRootFsMount: true +``` + +to: + +```yaml +hostRootFsMount: + enabled: true + mountPropagation: HostToContainer +``` + +## Configuring + +See [Customizing the Chart Before Installing](https://helm.sh/docs/intro/using_helm/#customizing-the-chart-before-installing). To see all configurable options with detailed comments, visit the chart's [values.yaml](./values.yaml), or run these configuration commands: + +```console +helm show values prometheus-community/prometheus-node-exporter +``` diff --git a/charts/node-exporter/ci/port-values.yaml b/charts/node-exporter/ci/port-values.yaml new file mode 100644 index 0000000..dbfb4b6 --- /dev/null +++ b/charts/node-exporter/ci/port-values.yaml @@ -0,0 +1,3 @@ +service: + targetPort: 9102 + port: 9102 diff --git a/charts/node-exporter/templates/NOTES.txt b/charts/node-exporter/templates/NOTES.txt new file mode 100644 index 0000000..df05e3f --- /dev/null +++ b/charts/node-exporter/templates/NOTES.txt @@ -0,0 +1,15 @@ +1. Get the application URL by running these commands: +{{- if contains "NodePort" .Values.service.type }} + export NODE_PORT=$(kubectl get --namespace {{ template "prometheus-node-exporter.namespace" . }} -o jsonpath="{.spec.ports[0].nodePort}" services {{ template "prometheus-node-exporter.fullname" . }}) + export NODE_IP=$(kubectl get nodes --namespace {{ template "prometheus-node-exporter.namespace" . }} -o jsonpath="{.items[0].status.addresses[0].address}") + echo http://$NODE_IP:$NODE_PORT +{{- else if contains "LoadBalancer" .Values.service.type }} + NOTE: It may take a few minutes for the LoadBalancer IP to be available. + You can watch the status of by running 'kubectl get svc -w {{ template "prometheus-node-exporter.fullname" . }}' + export SERVICE_IP=$(kubectl get svc --namespace {{ template "prometheus-node-exporter.namespace" . }} {{ template "prometheus-node-exporter.fullname" . }} -o jsonpath='{.status.loadBalancer.ingress[0].ip}') + echo http://$SERVICE_IP:{{ .Values.service.port }} +{{- else if contains "ClusterIP" .Values.service.type }} + export POD_NAME=$(kubectl get pods --namespace {{ template "prometheus-node-exporter.namespace" . }} -l "app.kubernetes.io/name={{ template "prometheus-node-exporter.name" . }},app.kubernetes.io/instance={{ .Release.Name }}" -o jsonpath="{.items[0].metadata.name}") + echo "Visit http://127.0.0.1:9100 to use your application" + kubectl port-forward --namespace {{ template "prometheus-node-exporter.namespace" . }} $POD_NAME 9100 +{{- end }} diff --git a/charts/node-exporter/templates/_helpers.tpl b/charts/node-exporter/templates/_helpers.tpl new file mode 100644 index 0000000..2103089 --- /dev/null +++ b/charts/node-exporter/templates/_helpers.tpl @@ -0,0 +1,107 @@ +{{/* vim: set filetype=mustache: */}} +{{/* +Expand the name of the chart. +*/}} +{{- define "prometheus-node-exporter.name" -}} +{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" -}} +{{- end -}} + +{{/* +Create a default fully qualified app name. +We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). +If release name contains chart name it will be used as a full name. +*/}} +{{- define "prometheus-node-exporter.fullname" -}} +{{- if .Values.fullnameOverride -}} +{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" -}} +{{- else -}} +{{- $name := default .Chart.Name .Values.nameOverride -}} +{{- if contains $name .Release.Name -}} +{{- .Release.Name | trunc 63 | trimSuffix "-" -}} +{{- else -}} +{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" -}} +{{- end -}} +{{- end -}} +{{- end -}} + +{{/* Generate basic labels */}} +{{- define "prometheus-node-exporter.labels" }} +helm.sh/chart: {{ template "prometheus-node-exporter.chart" . }} +app.kubernetes.io/managed-by: {{ .Release.Service }} +app.kubernetes.io/component: metrics +app.kubernetes.io/part-of: {{ template "prometheus-node-exporter.name" . }} +{{- include "prometheus-node-exporter.selectorLabels" . }} +{{- if .Chart.AppVersion }} +app.kubernetes.io/version: {{ .Chart.AppVersion | quote }} +{{- end }} +{{- if .Values.podLabels}} +{{ toYaml .Values.podLabels }} +{{- end }} +{{- if .Values.releaseLabel }} +release: {{ .Release.Name }} +{{- end }} +{{- end }} + +{{/* +Selector labels +*/}} +{{- define "prometheus-node-exporter.selectorLabels" }} +app.kubernetes.io/instance: {{ .Release.Name }} +app.kubernetes.io/name: {{ template "prometheus-node-exporter.name" . }} +{{- end }} + +{{/* +Create chart name and version as used by the chart label. +*/}} +{{- define "prometheus-node-exporter.chart" -}} +{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" -}} +{{- end -}} + + +{{/* +Create the name of the service account to use +*/}} +{{- define "prometheus-node-exporter.serviceAccountName" -}} +{{- if .Values.serviceAccount.create -}} + {{ default (include "prometheus-node-exporter.fullname" .) .Values.serviceAccount.name }} +{{- else -}} + {{ default "default" .Values.serviceAccount.name }} +{{- end -}} +{{- end -}} + +{{/* +The image to use +*/}} +{{- define "prometheus-node-exporter.image" -}} +{{- if .Values.image.sha -}} +{{- printf "%s:%s@%s" .Values.image.repository (default (printf "v%s" .Chart.AppVersion) .Values.image.tag) .Values.image.sha }} +{{- else -}} +{{- printf "%s:%s" .Values.image.repository (default (printf "v%s" .Chart.AppVersion) .Values.image.tag) }} +{{- end }} +{{- end }} + +{{/* +Allow the release namespace to be overridden for multi-namespace deployments in combined charts +*/}} +{{- define "prometheus-node-exporter.namespace" -}} + {{- if .Values.namespaceOverride -}} + {{- .Values.namespaceOverride -}} + {{- else -}} + {{- .Release.Namespace -}} + {{- end -}} +{{- end -}} + +{{/* +Create the namespace name of the service monitor +*/}} +{{- define "prometheus-node-exporter.monitor-namespace" -}} + {{- if .Values.namespaceOverride -}} + {{- .Values.namespaceOverride -}} + {{- else -}} + {{- if .Values.prometheus.monitor.namespace -}} + {{- .Values.prometheus.monitor.namespace -}} + {{- else -}} + {{- .Release.Namespace -}} + {{- end -}} + {{- end -}} +{{- end -}} diff --git a/charts/node-exporter/templates/edgeapplication.yaml b/charts/node-exporter/templates/edgeapplication.yaml new file mode 100644 index 0000000..a79937d --- /dev/null +++ b/charts/node-exporter/templates/edgeapplication.yaml @@ -0,0 +1,271 @@ +apiVersion: apps.kubeedge.io/v1alpha1 +kind: EdgeApplication +metadata: + name: node-exporter + labels: + app.kubernetes.io/name: prometheus-node-exporter + namespace: {{ .Values.namespace | default .Release.Namespace }} +spec: + workloadTemplate: + manifests: + - apiVersion: apps/v1 + kind: Deployment + metadata: + name: {{ template "prometheus-node-exporter.fullname" . }} + namespace: {{ .Values.namespace | default .Release.Namespace }} + labels: {{ include "prometheus-node-exporter.labels" . | indent 11 }} + spec: + selector: + matchLabels: + {{- include "prometheus-node-exporter.selectorLabels" . | indent 13 }} + {{- if .Values.updateStrategy }} + strategy: +{{ toYaml .Values.updateStrategy | indent 11 }} + {{- end }} + template: + metadata: + labels: {{ include "prometheus-node-exporter.labels" . | indent 17 }} + {{- if .Values.podAnnotations }} + annotations: + {{- toYaml .Values.podAnnotations | nindent 17 }} + {{- end }} + spec: + automountServiceAccountToken: {{ .Values.serviceAccount.automountServiceAccountToken }} + serviceAccountName: {{ template "prometheus-node-exporter.serviceAccountName" . }} + {{- if .Values.securityContext }} + securityContext: +{{ toYaml .Values.securityContext | indent 15 }} + {{- end }} + {{- if .Values.priorityClassName }} + priorityClassName: {{ .Values.priorityClassName }} + {{- end }} + {{- if .Values.extraInitContainers }} + initContainers: + {{ toYaml .Values.extraInitContainers | nindent 15 }} + {{- end }} + containers: + - name: node-exporter + image: {{ include "prometheus-node-exporter.image" . }} + imagePullPolicy: {{ .Values.image.pullPolicy }} + args: + - --path.procfs=/host/proc + - --path.sysfs=/host/sys + {{- if .Values.hostRootFsMount.enabled }} + - --path.rootfs=/host/root + {{- end }} + - --web.listen-address=[$(HOST_IP)]:{{ .Values.service.port }} + {{- if .Values.extraArgs }} + {{ toYaml .Values.extraArgs | indent 21 }} + {{- end }} + {{- with .Values.containerSecurityContext }} + securityContext: {{ toYaml . | nindent 21 }} + {{- end }} + env: + - name: HOST_IP + {{- if .Values.service.listenOnAllInterfaces }} + value: 0.0.0.0 + {{- else }} + valueFrom: + fieldRef: + apiVersion: v1 + fieldPath: status.hostIP + {{- end }} + {{- range $key, $value := .Values.env }} + - name: {{ $key }} + value: {{ $value | quote }} + {{- end }} + ports: + - name: {{ .Values.service.portName }} + containerPort: {{ .Values.service.port }} + protocol: TCP + livenessProbe: + failureThreshold: {{ .Values.livenessProbe.failureThreshold }} + httpGet: + httpHeaders: + {{- range $_, $header := .Values.livenessProbe.httpGet.httpHeaders }} + - name: {{ $header.name }} + value: {{ $header.value }} + {{- end }} + path: / + port: {{ .Values.service.port }} + scheme: {{ upper .Values.livenessProbe.httpGet.scheme }} + initialDelaySeconds: {{ .Values.livenessProbe.initialDelaySeconds }} + periodSeconds: {{ .Values.livenessProbe.periodSeconds }} + successThreshold: {{ .Values.livenessProbe.successThreshold }} + timeoutSeconds: {{ .Values.livenessProbe.timeoutSeconds }} + readinessProbe: + failureThreshold: {{ .Values.readinessProbe.failureThreshold }} + httpGet: + httpHeaders: + {{- range $_, $header := .Values.readinessProbe.httpGet.httpHeaders }} + - name: {{ $header.name }} + value: {{ $header.value }} + {{- end }} + path: / + port: {{ .Values.service.port }} + scheme: {{ upper .Values.readinessProbe.httpGet.scheme }} + initialDelaySeconds: {{ .Values.readinessProbe.initialDelaySeconds }} + periodSeconds: {{ .Values.readinessProbe.periodSeconds }} + successThreshold: {{ .Values.readinessProbe.successThreshold }} + timeoutSeconds: {{ .Values.readinessProbe.timeoutSeconds }} + resources: + {{ toYaml .Values.resources | indent 13 }} + volumeMounts: + - name: proc + mountPath: /host/proc + readOnly: true + - name: sys + mountPath: /host/sys + readOnly: true + {{- if .Values.hostRootFsMount.enabled }} + - name: root + mountPath: /host/root + {{- with .Values.hostRootFsMount.mountPropagation }} + mountPropagation: {{ . }} + {{- end }} + readOnly: true + {{- end }} + {{- if .Values.extraHostVolumeMounts }} + {{- range $_, $mount := .Values.extraHostVolumeMounts }} + - name: {{ $mount.name }} + mountPath: {{ $mount.mountPath }} + readOnly: {{ $mount.readOnly }} + {{- if $mount.mountPropagation }} + mountPropagation: {{ $mount.mountPropagation }} + {{- end }} + {{- end }} + {{- end }} + {{- if .Values.sidecarVolumeMount }} + {{- range $_, $mount := .Values.sidecarVolumeMount }} + - name: {{ $mount.name }} + mountPath: {{ $mount.mountPath }} + readOnly: true + {{- end }} + {{- end }} + {{- if .Values.configmaps }} + {{- range $_, $mount := .Values.configmaps }} + - name: {{ $mount.name }} + mountPath: {{ $mount.mountPath }} + {{- end }} + {{- if .Values.secrets }} + {{- range $_, $mount := .Values.secrets }} + - name: {{ .name }} + mountPath: {{ .mountPath }} + {{- end }} + {{- end }} + {{- end }} + {{- if .Values.sidecars }} + {{ toYaml .Values.sidecars | indent 15 }} + {{- if or .Values.sidecarVolumeMount .Values.sidecarHostVolumeMounts }} + volumeMounts: + {{- range $_, $mount := .Values.sidecarVolumeMount }} + - name: {{ $mount.name }} + mountPath: {{ $mount.mountPath }} + readOnly: {{ $mount.readOnly }} + {{- end }} + {{- range $_, $mount := .Values.sidecarHostVolumeMounts }} + - name: {{ $mount.name }} + mountPath: {{ $mount.mountPath }} + readOnly: {{ $mount.readOnly }} + {{- if $mount.mountPropagation }} + mountPropagation: {{ $mount.mountPropagation }} + {{- end }} + {{- end }} + {{- end }} + {{- end }} + {{- if .Values.imagePullSecrets }} + imagePullSecrets: + {{ toYaml .Values.imagePullSecrets | indent 17 }} + {{- end }} + hostNetwork: {{ .Values.hostNetwork }} + hostPID: {{ .Values.hostPID }} + {{- if .Values.affinity }} + affinity: + {{ toYaml .Values.affinity | indent 17 }} + {{- end }} + {{- with .Values.dnsConfig }} + dnsConfig: + {{ toYaml . | indent 17 }} + {{- end }} + {{- if .Values.nodeSelector }} + nodeSelector: + {{ toYaml .Values.nodeSelector | indent 17 }} + {{- end }} + {{- with .Values.tolerations }} + tolerations: +{{ toYaml . | indent 15 }} + {{- end }} + volumes: + - name: proc + hostPath: + path: /proc + - name: sys + hostPath: + path: /sys + {{- if .Values.hostRootFsMount.enabled }} + - name: root + hostPath: + path: / + {{- end }} + {{- if .Values.extraHostVolumeMounts }} + {{- range $_, $mount := .Values.extraHostVolumeMounts }} + - name: {{ $mount.name }} + hostPath: + path: {{ $mount.hostPath }} + {{- end }} + {{- end }} + {{- if .Values.sidecarVolumeMount }} + {{- range $_, $mount := .Values.sidecarVolumeMount }} + - name: {{ $mount.name }} + emptyDir: + medium: Memory + {{- end }} + {{- end }} + {{- if .Values.sidecarHostVolumeMounts }} + {{- range $_, $mount := .Values.sidecarHostVolumeMounts }} + - name: {{ $mount.name }} + hostPath: + path: {{ $mount.hostPath }} + {{- end }} + {{- end }} + {{- if .Values.configmaps }} + {{- range $_, $mount := .Values.configmaps }} + - name: {{ $mount.name }} + configMap: + name: {{ $mount.name }} + {{- end }} + {{- end }} + {{- if .Values.secrets }} + {{- range $_, $mount := .Values.secrets }} + - name: {{ $mount.name }} + secret: + secretName: {{ $mount.name }} + {{- end }} + {{- end }} + - apiVersion: v1 + kind: Service + metadata: + name: {{ template "prometheus-node-exporter.fullname" . }} + namespace: {{ .Values.namespace | default .Release.Namespace }} + labels: {{ include "prometheus-node-exporter.labels" . | indent 11 }} + {{- if .Values.service.annotations }} + annotations: + {{ toYaml .Values.service.annotations | indent 11 }} + {{- end }} + spec: + type: {{ .Values.service.type }} + ports: + - port: {{ .Values.service.port }} + {{- if ( and (eq .Values.service.type "NodePort" ) (not (empty .Values.service.nodePort)) ) }} + nodePort: {{ .Values.service.nodePort }} + {{- end }} + targetPort: {{ .Values.service.targetPort }} + protocol: TCP + name: {{ .Values.service.portName }} + selector: + {{- include "prometheus-node-exporter.selectorLabels" . | indent 11 }} + workloadScope: + targetNodeGroups: + {{- with .Values.targetNodeGroups }} + {{- toYaml . | nindent 6 }} + {{- end }} \ No newline at end of file diff --git a/charts/node-exporter/templates/endpoints.yaml b/charts/node-exporter/templates/endpoints.yaml new file mode 100644 index 0000000..ef3e270 --- /dev/null +++ b/charts/node-exporter/templates/endpoints.yaml @@ -0,0 +1,17 @@ +{{- if .Values.endpoints }} +apiVersion: v1 +kind: Endpoints +metadata: + name: {{ template "prometheus-node-exporter.fullname" . }} + namespace: {{ template "prometheus-node-exporter.namespace" . }} + labels: {{ include "prometheus-node-exporter.labels" . | indent 4 }} +subsets: + - addresses: + {{- range .Values.endpoints }} + - ip: {{ . }} + {{- end }} + ports: + - name: {{ .Values.service.portName }} + port: 9100 + protocol: TCP +{{- end }} diff --git a/charts/node-exporter/templates/psp-clusterrole.yaml b/charts/node-exporter/templates/psp-clusterrole.yaml new file mode 100644 index 0000000..e2144cf --- /dev/null +++ b/charts/node-exporter/templates/psp-clusterrole.yaml @@ -0,0 +1,13 @@ +{{- if and .Values.rbac.create .Values.rbac.pspEnabled (.Capabilities.APIVersions.Has "policy/v1beta1/PodSecurityPolicy") }} +kind: ClusterRole +apiVersion: rbac.authorization.k8s.io/v1 +metadata: + name: psp-{{ template "prometheus-node-exporter.fullname" . }} + labels: {{ include "prometheus-node-exporter.labels" . | indent 4 }} +rules: +- apiGroups: ['extensions'] + resources: ['podsecuritypolicies'] + verbs: ['use'] + resourceNames: + - {{ template "prometheus-node-exporter.fullname" . }} +{{- end }} diff --git a/charts/node-exporter/templates/psp-clusterrolebinding.yaml b/charts/node-exporter/templates/psp-clusterrolebinding.yaml new file mode 100644 index 0000000..49034b0 --- /dev/null +++ b/charts/node-exporter/templates/psp-clusterrolebinding.yaml @@ -0,0 +1,15 @@ +{{- if and .Values.rbac.create .Values.rbac.pspEnabled (.Capabilities.APIVersions.Has "policy/v1beta1/PodSecurityPolicy") }} +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: psp-{{ template "prometheus-node-exporter.fullname" . }} + labels: {{ include "prometheus-node-exporter.labels" . | indent 4 }} +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: psp-{{ template "prometheus-node-exporter.fullname" . }} +subjects: + - kind: ServiceAccount + name: {{ template "prometheus-node-exporter.fullname" . }} + namespace: {{ template "prometheus-node-exporter.namespace" . }} +{{- end }} diff --git a/charts/node-exporter/templates/psp.yaml b/charts/node-exporter/templates/psp.yaml new file mode 100644 index 0000000..b47d258 --- /dev/null +++ b/charts/node-exporter/templates/psp.yaml @@ -0,0 +1,48 @@ +{{- if and .Values.rbac.create .Values.rbac.pspEnabled (.Capabilities.APIVersions.Has "policy/v1beta1/PodSecurityPolicy") }} +apiVersion: policy/v1beta1 +kind: PodSecurityPolicy +metadata: + name: {{ template "prometheus-node-exporter.fullname" . }} + namespace: {{ template "prometheus-node-exporter.namespace" . }} + labels: {{ include "prometheus-node-exporter.labels" . | indent 4 }} + {{- with .Values.rbac.pspAnnotations }} + annotations: + {{- toYaml . | nindent 4 }} + {{- end}} +spec: + privileged: false + # Allow core volume types. + volumes: + - 'configMap' + - 'emptyDir' + - 'projected' + - 'secret' + - 'downwardAPI' + - 'persistentVolumeClaim' + - 'hostPath' + hostNetwork: true + hostIPC: false + hostPID: true + hostPorts: + - min: 0 + max: 65535 + runAsUser: + # Permits the container to run with root privileges as well. + rule: 'RunAsAny' + seLinux: + # This policy assumes the nodes are using AppArmor rather than SELinux. + rule: 'RunAsAny' + supplementalGroups: + rule: 'MustRunAs' + ranges: + # Allow adding the root group. + - min: 0 + max: 65535 + fsGroup: + rule: 'MustRunAs' + ranges: + # Allow adding the root group. + - min: 0 + max: 65535 + readOnlyRootFilesystem: false +{{- end }} diff --git a/charts/node-exporter/templates/serviceaccount.yaml b/charts/node-exporter/templates/serviceaccount.yaml new file mode 100644 index 0000000..dc3fee6 --- /dev/null +++ b/charts/node-exporter/templates/serviceaccount.yaml @@ -0,0 +1,14 @@ +{{- if .Values.rbac.create -}} +{{- if .Values.serviceAccount.create -}} +apiVersion: v1 +kind: ServiceAccount +metadata: + name: {{ template "prometheus-node-exporter.serviceAccountName" . }} + namespace: {{ template "prometheus-node-exporter.namespace" . }} + labels: {{ include "prometheus-node-exporter.labels" . | indent 4 }} + annotations: +{{ toYaml .Values.serviceAccount.annotations | indent 4 }} +imagePullSecrets: +{{ toYaml .Values.serviceAccount.imagePullSecrets | indent 2 }} +{{- end -}} +{{- end -}} diff --git a/charts/node-exporter/templates/servicemonitor.yaml b/charts/node-exporter/templates/servicemonitor.yaml new file mode 100644 index 0000000..3408c67 --- /dev/null +++ b/charts/node-exporter/templates/servicemonitor.yaml @@ -0,0 +1,51 @@ +{{- if .Values.prometheus.monitor.enabled }} +apiVersion: {{ .Values.prometheus.monitor.apiVersion | default "monitoring.coreos.com/v1" }} +kind: ServiceMonitor +metadata: + name: {{ template "prometheus-node-exporter.fullname" . }} + namespace: {{ template "prometheus-node-exporter.monitor-namespace" . }} + labels: {{ include "prometheus-node-exporter.labels" . | indent 4 }} + {{- if .Values.prometheus.monitor.additionalLabels }} + {{- toYaml .Values.prometheus.monitor.additionalLabels | nindent 4 }} + {{- end }} +spec: + jobLabel: {{ default "app.kubernetes.io/name" .Values.prometheus.monitor.jobLabel }} + selector: + matchLabels: + {{- if .Values.prometheus.monitor.selectorOverride }} + {{ toYaml .Values.prometheus.monitor.selectorOverride | indent 6 }} + {{ else }} + {{ include "prometheus-node-exporter.selectorLabels" . | indent 6 }} + {{- end }} + endpoints: + - port: {{ .Values.service.portName }} + scheme: {{ .Values.prometheus.monitor.scheme }} + {{- with .Values.prometheus.monitor.basicAuth }} + basicAuth: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.prometheus.monitor.bearerTokenFile }} + bearerTokenFile: {{ . }} + {{- end }} + {{- with .Values.prometheus.monitor.tlsConfig }} + tlsConfig: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.prometheus.monitor.proxyUrl }} + proxyUrl: {{ . }} + {{- end }} + {{- with .Values.prometheus.monitor.interval }} + interval: {{ . }} + {{- end }} + {{- with .Values.prometheus.monitor.scrapeTimeout }} + scrapeTimeout: {{ . }} + {{- end }} + {{- with .Values.prometheus.monitor.relabelings }} + relabelings: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.prometheus.monitor.metricRelabelings }} + metricRelabelings: + {{- toYaml . | nindent 8 }} + {{- end }} +{{- end }} diff --git a/charts/node-exporter/templates/verticalpodautoscaler.yaml b/charts/node-exporter/templates/verticalpodautoscaler.yaml new file mode 100644 index 0000000..918dc0b --- /dev/null +++ b/charts/node-exporter/templates/verticalpodautoscaler.yaml @@ -0,0 +1,33 @@ +{{- if and (.Capabilities.APIVersions.Has "autoscaling.k8s.io/v1") (.Values.verticalPodAutoscaler.enabled) }} +apiVersion: autoscaling.k8s.io/v1 +kind: VerticalPodAutoscaler +metadata: + name: {{ template "prometheus-node-exporter.fullname" . }} + namespace: {{ template "prometheus-node-exporter.namespace" . }} + labels: {{ include "prometheus-node-exporter.labels" . | indent 4 }} +spec: + resourcePolicy: + containerPolicies: + - containerName: {{ template "prometheus-node-exporter.name" . }} + {{- if .Values.verticalPodAutoscaler.controlledResources }} + controlledResources: {{ .Values.verticalPodAutoscaler.controlledResources }} + {{- end }} + {{- if .Values.verticalPodAutoscaler.maxAllowed }} + maxAllowed: + {{ toYaml .Values.verticalPodAutoscaler.maxAllowed | nindent 8 }} + {{- end }} + {{- if .Values.verticalPodAutoscaler.minAllowed }} + minAllowed: + {{ toYaml .Values.verticalPodAutoscaler.minAllowed | nindent 8 }} + {{- end }} + targetRef: + apiVersion: apps/v1 + kind: DaemonSet + name: {{ template "prometheus-node-exporter.fullname" . }} + {{- if .Values.verticalPodAutoscaler.updatePolicy }} + updatePolicy: + {{- if .Values.verticalPodAutoscaler.updatePolicy.updateMode }} + updateMode: {{ .Values.verticalPodAutoscaler.updatePolicy.updateMode }} + {{- end }} + {{- end }} +{{- end }} diff --git a/charts/node-exporter/values.yaml b/charts/node-exporter/values.yaml new file mode 100644 index 0000000..6b7bc3e --- /dev/null +++ b/charts/node-exporter/values.yaml @@ -0,0 +1,326 @@ +# Default values for prometheus-node-exporter. +# This is a YAML-formatted file. +# Declare variables to be passed into your templates. +image: + repository: quay.io/prometheus/node-exporter + # Overrides the image tag whose default is {{ printf "v%s" .Chart.AppVersion }} + tag: "" + pullPolicy: IfNotPresent + sha: "" +imagePullSecrets: [] +# - name: "image-pull-secret" + +service: + type: ClusterIP + port: 9100 + targetPort: 9100 + nodePort: + portName: metrics + listenOnAllInterfaces: true + annotations: + prometheus.io/scrape: "true" +# Additional environment variables that will be passed to the daemonset +env: {} +## env: +## VARIABLE: value + +prometheus: + monitor: + enabled: false + additionalLabels: {} + namespace: "" + jobLabel: "" + scheme: http + basicAuth: {} + bearerTokenFile: + tlsConfig: {} + ## proxyUrl: URL of a proxy that should be used for scraping. + ## + proxyUrl: "" + ## Override serviceMonitor selector + ## + selectorOverride: {} + relabelings: [] + metricRelabelings: [] + interval: "" + scrapeTimeout: 10s + ## prometheus.monitor.apiVersion ApiVersion for the serviceMonitor Resource(defaults to "monitoring.coreos.com/v1") + apiVersion: "" +## Customize the updateStrategy if set +updateStrategy: + type: RollingUpdate + rollingUpdate: + maxUnavailable: 1 +resources: {} +# We usually recommend not to specify default resources and to leave this as a conscious +# choice for the user. This also increases chances charts run on environments with little +# resources, such as Minikube. If you do want to specify resources, uncomment the following +# lines, adjust them as necessary, and remove the curly braces after 'resources:'. +# limits: +# cpu: 200m +# memory: 50Mi +# requests: +# cpu: 100m +# memory: 30Mi + +serviceAccount: + # Specifies whether a ServiceAccount should be created + create: true + # The name of the ServiceAccount to use. + # If not set and create is true, a name is generated using the fullname template + name: + annotations: {} + imagePullSecrets: [] + automountServiceAccountToken: false +securityContext: + fsGroup: 65534 + runAsGroup: 65534 + runAsNonRoot: true + runAsUser: 65534 +containerSecurityContext: {} +# capabilities: +# add: +# - SYS_TIME + +rbac: + ## If true, create & use RBAC resources + ## + create: true + ## If true, create & use Pod Security Policy resources + ## https://kubernetes.io/docs/concepts/policy/pod-security-policy/ + pspEnabled: true + pspAnnotations: {} +# for deployments that have node_exporter deployed outside of the cluster, list +# their addresses here +endpoints: [] +# Expose the service to the host network +hostNetwork: true +# Share the host process ID namespace +hostPID: true +# Mount the node's root file system (/) at /host/root in the container +hostRootFsMount: + enabled: true + # Defines how new mounts in existing mounts on the node or in the container + # are propagated to the container or node, respectively. Possible values are + # None, HostToContainer, and Bidirectional. If this field is omitted, then + # None is used. More information on: + # https://kubernetes.io/docs/concepts/storage/volumes/#mount-propagation + mountPropagation: HostToContainer +## Assign a group of affinity scheduling rules +## +affinity: {} +# nodeAffinity: +# requiredDuringSchedulingIgnoredDuringExecution: +# nodeSelectorTerms: +# - matchFields: +# - key: metadata.name +# operator: In +# values: +# - target-host-name + +# Annotations to be added to node exporter pods +podAnnotations: + # Fix for very slow GKE cluster upgrades + cluster-autoscaler.kubernetes.io/safe-to-evict: "true" +# Extra labels to be added to node exporter pods +podLabels: {} +## set to true to add the release label so scraping of the servicemonitor with kube-prometheus-stack works out of the box +releaseLabel: false +# Custom DNS configuration to be added to prometheus-node-exporter pods +dnsConfig: {} +# nameservers: +# - 1.2.3.4 +# searches: +# - ns1.svc.cluster-domain.example +# - my.dns.search.suffix +# options: +# - name: ndots +# value: "2" +# - name: edns0 + +## Assign a nodeSelector if operating a hybrid cluster +## +nodeSelector: {} +# beta.kubernetes.io/arch: amd64 +# beta.kubernetes.io/os: linux + +tolerations: + - effect: NoExecute + key: edgefarm.applications + operator: Exists +## Assign a PriorityClassName to pods if set +# priorityClassName: "" + +## Additional container arguments +## +extraArgs: [] +# - --collector.diskstats.ignored-devices=^(ram|loop|fd|(h|s|v)d[a-z]|nvme\\d+n\\d+p)\\d+$ +# - --collector.textfile.directory=/run/prometheus + +## Additional mounts from the host to node-exporter container +## +extraHostVolumeMounts: [] +# - name: +# hostPath: +# mountPath: +# readOnly: true|false +# mountPropagation: None|HostToContainer|Bidirectional + +## Additional configmaps to be mounted. +## +configmaps: [] +# - name: +# mountPath: +secrets: [] +# - name: +# mountPath: +## Override the deployment namespace +## +namespaceOverride: "" +## Additional containers for export metrics to text file +## +sidecars: [] +## - name: nvidia-dcgm-exporter +## image: nvidia/dcgm-exporter:1.4.3 + +## Volume for sidecar containers +## +sidecarVolumeMount: [] +## - name: collector-textfiles +## mountPath: /run/prometheus +## readOnly: false + +## Additional mounts from the host to sidecar containers +## +sidecarHostVolumeMounts: [] +# - name: +# hostPath: +# mountPath: +# readOnly: true|false +# mountPropagation: None|HostToContainer|Bidirectional + +## Additional InitContainers to initialize the pod +## +extraInitContainers: [] +## Liveness probe +## +livenessProbe: + failureThreshold: 3 + httpGet: + httpHeaders: [] + scheme: http + initialDelaySeconds: 0 + periodSeconds: 10 + successThreshold: 1 + timeoutSeconds: 1 +## Readiness probe +## +readinessProbe: + failureThreshold: 3 + httpGet: + httpHeaders: [] + scheme: http + initialDelaySeconds: 0 + periodSeconds: 10 + successThreshold: 1 + timeoutSeconds: 1 +# Enable vertical pod autoscaler support for prometheus-node-exporter +verticalPodAutoscaler: + enabled: false + # List of resources that the vertical pod autoscaler can control. Defaults to cpu and memory + controlledResources: [] + # Define the max allowed resources for the pod + maxAllowed: {} + # cpu: 200m + # memory: 100Mi + # Define the min allowed resources for the pod + minAllowed: {} + # cpu: 200m + # memory: 100Mi +# updatePolicy: +# Specifies whether recommended updates are applied when a Pod is started and whether recommended updates +# are applied during the life of a Pod. Possible values are "Off", "Initial", "Recreate", and "Auto". +# updateMode: Auto + +targetNodeGroups: + - name: virtual-6a87250b + - name: virtual-e602bbba +--- +# -- Overrides the chart's name +nameOverride: "" +# -- Overrides the chart's computed fullname +fullnameOverride: "" +# -- Annotations for the Deployment +annotations: {} +# -- Annotations for the Deployment Pods +podAnnotations: {} +# -- Annotations for the Deployment Pods +podLabels: {} +# -- Pod security context (runAsUser, etc.) +podSecurityContext: {} +# -- Container security context (allowPrivilegeEscalation, etc.) +containerSecurityContext: {} +configmap: + agent_yaml: + global: + remote_write: http://grafana-mimir-nginx.monitoring.svc:80/api/v1/push + config: + targets: ["edgeapplication-node-exporter-prometheus-node-exporter.nodegroup.svc:9100"] + remote_write: + url: http://grafana-mimir-nginx.monitoring.svc:80/api/v1/push + basic_auth: + username: nats + password: Mjk0YTA2MzMwMjc4NDYxYzM1YmU1ZDky +rbac: + # -- Toggle to create ClusterRole and ClusterRoleBinding + create: true + # -- Name of a PodSecurityPolicy to use in the ClusterRole. If unset, no PodSecurityPolicy is used. + podSecurityPolicyName: '' +serviceAccount: + # -- Toggle to create ServiceAccount + create: true + # -- Service account name + name: +image: + # -- Image registry + registry: docker.io + # -- Image repo + repository: grafana/agent + # -- Image tag + tag: v0.28.0 + # -- Image pull policy + pullPolicy: IfNotPresent + # -- Image pull secrets + pullSecrets: [] +# -- If both are set, Agent Operator will create and maintain a service for scraping kubelets +# https://grafana.com/docs/agent/latest/operator/getting-started/#monitor-kubelets +kubeletService: + namespace: default + serviceName: kubelet +# -- List of additional cli arguments to configure agent-operator (example: `--log.level`) +# extraArgs: {} +extraArgs: + - -enable-features=integrations-next + - -config.file=/etc/agent/agent.yaml + - -config.expand-env +# -- Resource limits and requests config +resources: {} +# -- nodeSelector configuration +nodeSelector: {} +# -- Tolerations applied to Pods +tolerations: + - effect: NoExecute + key: edgefarm.applications + operator: Exists +# -- Pod affinity configuration +# affinity: {} +affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: node-role.kubernetes.io/edge + operator: Exists +targetNodeGroups: + - name: virtual-6a87250b + - name: virtual-e602bbba diff --git a/dev/charts/grafana-agent/nats_metric_instance.yaml b/dev/charts/grafana-agent/nats_metric_instance.yaml index 92580bf..8b58f2f 100644 --- a/dev/charts/grafana-agent/nats_metric_instance.yaml +++ b/dev/charts/grafana-agent/nats_metric_instance.yaml @@ -9,7 +9,7 @@ metadata: app.kubernetes.io/name: mimir spec: remoteWrite: - - url: http://grafana-mimir-nginx.monitoring.svc:80/api/v1/push + - url: http://grafana-mimir-nginx.monitoring.svc:10080/api/v1/push basicAuth: username: name: primary-credentials-metrics diff --git a/dev/charts/grafana/values.yaml b/dev/charts/grafana/values.yaml index 6f14a21..987b76d 100644 --- a/dev/charts/grafana/values.yaml +++ b/dev/charts/grafana/values.yaml @@ -6,46 +6,46 @@ ingress: path: / pathType: Prefix hosts: - - grafana.10-5-0-30.nip.io + - grafana.localhost datasources: - datasources.yaml: - apiVersion: 1 - datasources: - - name: mimir - type: prometheus - url: http://grafana-mimir-nginx.monitoring.svc:80/prometheus - access: proxy - basicAuth: true - basicAuthUser: nats - withCredentials: true - isDefault: true - editable: true - secureJsonData: - basicAuthPassword: test + datasources.yaml: + apiVersion: 1 + datasources: + - name: mimir + type: prometheus + url: http://grafana-mimir-nginx.monitoring.svc:80/prometheus + access: proxy + basicAuth: true + basicAuthUser: nats + withCredentials: true + isDefault: true + editable: true + secureJsonData: + basicAuthPassword: test dashboardProviders: - dashboardproviders.yaml: - apiVersion: 1 - providers: - - name: 'default' - orgId: 1 - folder: '' - type: file - disableDeletion: false - editable: true - options: - path: /var/lib/grafana/dashboards/default + dashboardproviders.yaml: + apiVersion: 1 + providers: + - name: "default" + orgId: 1 + folder: "" + type: file + disableDeletion: false + editable: true + options: + path: /var/lib/grafana/dashboards/default dashboards: default: nats-dashboard: url: https://raw.githubusercontent.com/edgefarm/grafana-dashboards/main/dashboards/nats.json - token: '' + token: "" node-exporter-dashboard: url: https://raw.githubusercontent.com/edgefarm/grafana-dashboards/main/dashboards/node_exporter.json - token: '' - + token: "" + #if you want persistend disk # persistence: # type: pvc @@ -56,4 +56,4 @@ dashboards: # size: 1Gi # # annotations: {} # finalizers: -# - kubernetes.io/pvc-protection +# - kubernetes.io/pvc-protection \ No newline at end of file diff --git a/dev/charts/mimir/values.yaml b/dev/charts/mimir/values.yaml index 85e2c32..9213ff1 100644 --- a/dev/charts/mimir/values.yaml +++ b/dev/charts/mimir/values.yaml @@ -1,6 +1,6 @@ nginx: basicAuth: - enabled: true + enabled: false username: nats password: test htpasswd: >- @@ -14,4 +14,5 @@ metaMonitoring: metrics: enabled: true remote: - url: 'http://grafana-mimir-nginx.monitoring.svc:80/api/v1/push' \ No newline at end of file + url: 'http://grafana-mimir-nginx.monitoring.svc:80/api/v1/push' + diff --git a/devspace.yaml b/devspace.yaml index 04d945f..ff9accf 100644 --- a/devspace.yaml +++ b/devspace.yaml @@ -1,5 +1,9 @@ version: v2beta1 +vars: + mimir_password: + command: date +%s | sha256sum | base64 | head -c 32 ; echo + pipelines: deploy-agent: |- # install grafana agent @@ -18,14 +22,20 @@ pipelines: deploy-mimir-grafana: |- # install grafana and mimir + create_deployments prometheus-crd create_deployments grafana create_deployments grafana-mimir deploy-monitoring: |- # install all monitoring setup - run_pipelines deploy-agent - run_pipelines deploy-nats deploy-node-exporter run_pipelines deploy-mimir-grafana + run_pipelines deploy-edge-applications + + deploy-edge-applications: |- + # install grafana-agent and node-exporter edge applicaitons + bash node_names.sh + create_deployments edgeapplication-node-exporter + create_deployments edgeapplication-grafana-agent deployments: grafana-mimir: @@ -35,6 +45,10 @@ deployments: repo: https://grafana.github.io/helm-charts valuesFiles: - ./dev/charts/mimir/values.yaml + values: + nginx: + basicAuth: + password: ${mimir_password} namespace: monitoring grafana: @@ -44,21 +58,54 @@ deployments: repo: https://grafana.github.io/helm-charts valuesFiles: - ./dev/charts/grafana/values.yaml + values: + datasources: + datasources.yaml: + datasources: + - name: mimir + type: prometheus + url: http://grafana-mimir-nginx.monitoring.svc:80/prometheus + access: proxy + basicAuth: true + basicAuthUser: nats + withCredentials: true + isDefault: true + editable: true + secureJsonData: + basicAuthPassword: ${mimir_password} namespace: monitoring - grafana-agent: + edgeapplication-node-exporter: helm: chart: - name: grafana-agent-operator - repo: https://grafana.github.io/helm-charts - valuesFiles: - - ./dev/charts/grafana-agent/values.yaml - namespace: monitoring + name: ./charts/node-exporter + namespace: nodegroup - monitoring-secret-for-grafana-agent: + edgeapplication-grafana-agent: + helm: + chart: + name: ./charts/grafana-agent/ + values: + configmap: + agent_yaml: + config: + remote_write: + basic_auth: + username: nats + password: ${mimir_password} + namespace: nodegroup + + prometheus-crd: kubectl: manifests: - - ./dev/charts/grafana-agent/secret.yaml + - https://raw.githubusercontent.com/grafana/agent/main/production/operator/crds/monitoring.coreos.com_podmonitors.yaml + - https://raw.githubusercontent.com/grafana/agent/main/production/operator/crds/monitoring.coreos.com_probes.yaml + - https://raw.githubusercontent.com/grafana/agent/main/production/operator/crds/monitoring.coreos.com_servicemonitors.yaml + - https://raw.githubusercontent.com/grafana/agent/main/production/operator/crds/monitoring.grafana.com_grafanaagents.yaml + - https://raw.githubusercontent.com/grafana/agent/main/production/operator/crds/monitoring.grafana.com_integrations.yaml + - https://raw.githubusercontent.com/grafana/agent/main/production/operator/crds/monitoring.grafana.com_logsinstances.yaml + - https://raw.githubusercontent.com/grafana/agent/main/production/operator/crds/monitoring.grafana.com_metricsinstances.yaml + - https://raw.githubusercontent.com/grafana/agent/main/production/operator/crds/monitoring.grafana.com_podlogs.yaml namespace: monitoring nats: @@ -70,6 +117,16 @@ deployments: - ./dev/charts/nats/values.yaml namespace: nats +#old setup maybe we will need in future + grafana-agent: + helm: + chart: + name: grafana-agent-operator + repo: https://grafana.github.io/helm-charts + valuesFiles: + - ./dev/charts/grafana-agent/values.yaml + namespace: monitoring + node-exporter: helm: chart: @@ -79,13 +136,12 @@ deployments: - ./dev/charts/node-exporter/values.yaml namespace: monitoring - service-monitor-for-node-exporter: + monitoring-secret-for-grafana-agent: kubectl: manifests: - - ./dev/charts/node-exporter/service-monitor.yaml + - ./dev/charts/grafana-agent/secret.yaml namespace: monitoring - monitoring-resources-for-nats: kubectl: manifests: @@ -93,3 +149,11 @@ deployments: - ./dev/charts/grafana-agent/rbac_grafana_agent.yaml - ./dev/charts/grafana-agent/nats_grafana_agent.yaml namespace: monitoring + + service-monitor-for-node-exporter: + kubectl: + manifests: + - ./dev/charts/node-exporter/service-monitor.yaml + namespace: monitoring + + \ No newline at end of file diff --git a/node_names.sh b/node_names.sh new file mode 100755 index 0000000..eb3c95d --- /dev/null +++ b/node_names.sh @@ -0,0 +1,20 @@ +#!/bin/bash +# set -o xtrace # uncomment it if you want deeper output +set -o errexit +set -o pipefail +nodes=$(kubectl get nodes --selector=node-role.kubernetes.io/agent | awk '(NR>1)' | awk '{print $1;}') +IFS=$'\r\n' +array=($nodes) +array=("${array[@]/#/ - name: }") +touch node.yaml + +yq 'del(.targetNodeGroups.[])' -i ./charts/node-exporter/values.yaml +yq 'del(.targetNodeGroups.[])' -i ./charts/grafana-agent/values.yaml + +for str in ${array[@]}; do + echo $str >> node.yaml +done + +sed -i -e 's/.*targetNodeGroups:.*/targetNodeGroups:/' ./charts/node-exporter/values.yaml ./charts/grafana-agent/values.yaml +sed -i -e '/targetNodeGroups:/r node.yaml' ./charts/node-exporter/values.yaml ./charts/grafana-agent/values.yaml +rm -f node.yaml