Skip to content

Commit

Permalink
feat: create monitoring for edge nodes
Browse files Browse the repository at this point in the history
- create helm edgeapplication charts for node-exporter and grafana-agent 
- adopt metrics to default dashboards
- make the abillity to monitor different nodes by the name
  • Loading branch information
Konstantin-petrenko authored Nov 10, 2022
1 parent b509d27 commit 3f8e350
Show file tree
Hide file tree
Showing 24 changed files with 1,425 additions and 46 deletions.
13 changes: 13 additions & 0 deletions charts/grafana-agent/Chart.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
apiVersion: v2
appVersion: 0.28.0
description: A Helm chart for Grafana Agent Operator
home: https://grafana.com/docs/agent/v0.28/
icon: https://raw.githubusercontent.com/grafana/agent/v0.28.0/docs/sources/assets/logo_and_name.png
maintainers:
- email: grafana-agent-team@googlegroups.com
name: Grafana Agent Team
name: grafana-agent-operator
sources:
- https://github.com/grafana/agent/tree/v0.28.0/pkg/operator
type: application
version: 0.2.8
67 changes: 67 additions & 0 deletions charts/grafana-agent/templates/_helpers.tpl
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
{{/*
Expand the name of the chart.
*/}}
{{- define "ga-operator.name" -}}
{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }}
{{- end }}

{{/*
Create a default fully qualified app name.
We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec).
If release name contains chart name it will be used as a full name.
*/}}
{{- define "ga-operator.fullname" -}}
{{- if .Values.fullnameOverride }}
{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }}
{{- else }}
{{- $name := default .Chart.Name .Values.nameOverride }}
{{- if contains $name .Release.Name }}
{{- .Release.Name | trunc 63 | trimSuffix "-" }}
{{- else }}
{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }}
{{- end }}
{{- end }}
{{- end }}

{{/*
Create chart name and version as used by the chart label.
*/}}
{{- define "ga-operator.chart" -}}
{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }}
{{- end }}

{{/*
Common labels
*/}}
{{- define "ga-operator.labels" -}}
{{ include "ga-operator.selectorLabels" . }}
app.kubernetes.io/managed-by: {{ .Release.Service }}
app.kubernetes.io/component: operator
helm.sh/chart: {{ include "ga-operator.chart" . }}
{{- if .Chart.AppVersion }}
app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
{{- end }}
{{- if .Values.customLabels }}
{{ toYaml .Values.customLabels }}
{{- end }}
{{- end }}

{{/*
Selector labels
*/}}
{{- define "ga-operator.selectorLabels" -}}
app.kubernetes.io/name: {{ include "ga-operator.name" . }}
app.kubernetes.io/instance: {{ .Release.Name }}
{{- end }}

{{/*
Create the name of the service account to use
*/}}
{{- define "ga-operator.serviceAccountName" -}}
{{- if .Values.serviceAccount.create }}
{{- default (include "ga-operator.fullname" .) .Values.serviceAccount.name }}
{{- else }}
{{- default "default" .Values.serviceAccount.name }}
{{- end }}
{{- end }}

33 changes: 33 additions & 0 deletions charts/grafana-agent/templates/configmap.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
apiVersion: v1
kind: ConfigMap
metadata:
name: grafana-agent-yml
namespace: {{ .Values.namespace | default .Release.Namespace }}
labels:
k8s-app: grafana-agent
data:
agent.yaml: |
server:
log_level: info
metrics:
global:
scrape_interval: 1m
remote_write:
- url: {{ .Values.configmap.agent_yaml.global.remote_write }}
external_labels:
edge_node: ${NODE_NAME}
configs:
- name: node-exporter-edgenodes
scrape_configs:
- job_name: agent
static_configs:
- targets: {{ .Values.configmap.agent_yaml.config.targets }}
relabel_configs:
- source_labels: [__address__]
target_label: instance
replacement: ${NODE_NAME}
remote_write:
- url: {{ .Values.configmap.agent_yaml.config.remote_write.url }}
basic_auth:
username: {{ .Values.configmap.agent_yaml.config.remote_write.basic_auth.username }}
password: {{ .Values.configmap.agent_yaml.config.remote_write.basic_auth.password }}
95 changes: 95 additions & 0 deletions charts/grafana-agent/templates/operator-deployment.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
apiVersion: apps.kubeedge.io/v1alpha1
kind: EdgeApplication
metadata:
name: grafana-agent
labels:
app.kubernetes.io/name: grafana-agent
namespace: {{ .Values.namespace | default .Release.Namespace }}
spec:
workloadTemplate:
manifests:
- apiVersion: apps/v1
kind: Deployment
metadata:
namespace: {{ .Values.namespace | default .Release.Namespace }}
name: {{ include "ga-operator.fullname" . }}
labels:
{{ include "ga-operator.labels" . | indent 12 }}
{{- with .Values.annotations }}
annotations:
{{- toYaml . | nindent 4 }}
{{- end }}
spec:
replicas: 1
selector:
matchLabels:
{{ include "ga-operator.selectorLabels" . | indent 14 }}
template:
metadata:
labels:
{{ include "ga-operator.selectorLabels" . | indent 16 }}
{{- with .Values.podLabels }}
{{- toYaml . | nindent 16 }}
{{- end }}
{{- with .Values.podAnnotations }}
annotations:
{{ toYaml . | indent 16 }}
{{- end }}
spec:
{{- with .Values.podSecurityContext }}
securityContext:
{{- toYaml . | nindent 16 }}
{{- end }}
containers:
- name: {{ include "ga-operator.name" . }}
image: "{{ .Values.image.registry }}/{{ .Values.image.repository }}:{{ .Values.image.tag }}"
imagePullPolicy: {{ .Values.image.pullPolicy }}
env:
- name: NODE_NAME
valueFrom:
fieldRef:
fieldPath: spec.nodeName
{{- with .Values.containerSecurityContext }}
securityContext:
{{- toYaml . | nindent 18 }}
{{- end }}
{{- with .Values.resources }}
resources:
{{- toYaml . | nindent 18 }}
{{- end }}
{{- if or (and .Values.kubeletService.namespace .Values.kubeletService.serviceName) (.Values.extraArgs) }}
volumeMounts:
- name: agent-yaml
mountPath: /etc/agent/
args:
{{- if .Values.extraArgs }}
{{- range .Values.extraArgs }}
- {{ . }}
{{- end }}
{{- end }}
{{- end }}
{{- with .Values.image.pullSecrets }}
imagePullSecrets:
{{- toYaml . | nindent 16 }}
{{- end }}
{{- with .Values.nodeSelector }}
nodeSelector:
{{- toYaml . | nindent 16 }}
{{- end }}
{{- with .Values.tolerations }}
volumes:
- name: agent-yaml
configMap:
name: grafana-agent-yml
tolerations:
{{- toYaml . | nindent 16 }}
{{- end }}
{{- with .Values.affinity }}
affinity:
{{- toYaml . | nindent 16 }}
{{- end }}
workloadScope:
targetNodeGroups:
{{- with .Values.targetNodeGroups }}
{{- toYaml . | nindent 6 }}
{{- end }}
78 changes: 78 additions & 0 deletions charts/grafana-agent/values.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
# -- Overrides the chart's name
nameOverride: ""
# -- Overrides the chart's computed fullname
fullnameOverride: ""
# -- Annotations for the Deployment
annotations: {}
# -- Annotations for the Deployment Pods
podAnnotations: {}
# -- Annotations for the Deployment Pods
podLabels: {}
# -- Pod security context (runAsUser, etc.)
podSecurityContext: {}
# -- Container security context (allowPrivilegeEscalation, etc.)
containerSecurityContext: {}
configmap:
agent_yaml:
global:
remote_write: http://grafana-mimir-nginx.monitoring.svc:80/api/v1/push
config:
targets: ["edgeapplication-node-exporter-prometheus-node-exporter.nodegroup.svc:9100"]
remote_write:
url: http://grafana-mimir-nginx.monitoring.svc:80/api/v1/push
basic_auth:
username: nats
password: Mjk0YTA2MzMwMjc4NDYxYzM1YmU1ZDky
rbac:
# -- Toggle to create ClusterRole and ClusterRoleBinding
create: true
# -- Name of a PodSecurityPolicy to use in the ClusterRole. If unset, no PodSecurityPolicy is used.
podSecurityPolicyName: ''
serviceAccount:
# -- Toggle to create ServiceAccount
create: true
# -- Service account name
name:
image:
# -- Image registry
registry: docker.io
# -- Image repo
repository: grafana/agent
# -- Image tag
tag: v0.28.0
# -- Image pull policy
pullPolicy: IfNotPresent
# -- Image pull secrets
pullSecrets: []
# -- If both are set, Agent Operator will create and maintain a service for scraping kubelets
# https://grafana.com/docs/agent/latest/operator/getting-started/#monitor-kubelets
kubeletService:
namespace: default
serviceName: kubelet
# -- List of additional cli arguments to configure agent-operator (example: `--log.level`)
# extraArgs: {}
extraArgs:
- -enable-features=integrations-next
- -config.file=/etc/agent/agent.yaml
- -config.expand-env
# -- Resource limits and requests config
resources: {}
# -- nodeSelector configuration
nodeSelector: {}
# -- Tolerations applied to Pods
tolerations:
- effect: NoExecute
key: edgefarm.applications
operator: Exists
# -- Pod affinity configuration
# affinity: {}
affinity:
nodeAffinity:
requiredDuringSchedulingIgnoredDuringExecution:
nodeSelectorTerms:
- matchExpressions:
- key: node-role.kubernetes.io/edge
operator: Exists
targetNodeGroups:
- name: virtual-6a87250b
- name: virtual-e602bbba
18 changes: 18 additions & 0 deletions charts/node-exporter/Chart.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
apiVersion: v2
appVersion: 1.3.1
description: A Helm chart for prometheus node-exporter
home: https://github.com/prometheus/node_exporter/
keywords:
- node-exporter
- prometheus
- exporter
maintainers:
- email: gianrubio@gmail.com
name: gianrubio
- email: zanhsieh@gmail.com
name: zanhsieh
name: prometheus-node-exporter
sources:
- https://github.com/prometheus/node_exporter/
type: application
version: 4.4.1
77 changes: 77 additions & 0 deletions charts/node-exporter/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
# Prometheus `Node Exporter`

Prometheus exporter for hardware and OS metrics exposed by *NIX kernels, written in Go with pluggable metric collectors.

This chart bootstraps a prometheus [`Node Exporter`](http://github.com/prometheus/node_exporter) daemonset on a [Kubernetes](http://kubernetes.io) cluster using the [Helm](https://helm.sh) package manager.

## Get Repository Info

```console
helm repo add prometheus-community https://prometheus-community.github.io/helm-charts
helm repo update
```

_See [`helm repo`](https://helm.sh/docs/helm/helm_repo/) for command documentation._

## Install Chart

```console
helm install [RELEASE_NAME] prometheus-community/prometheus-node-exporter
```

_See [configuration](#configuring) below._

_See [helm install](https://helm.sh/docs/helm/helm_install/) for command documentation._

## Uninstall Chart

```console
helm uninstall [RELEASE_NAME]
```

This removes all the Kubernetes components associated with the chart and deletes the release.

_See [helm uninstall](https://helm.sh/docs/helm/helm_uninstall/) for command documentation._

## Upgrading Chart

```console
helm upgrade [RELEASE_NAME] [CHART] --install
```

_See [helm upgrade](https://helm.sh/docs/helm/helm_upgrade/) for command documentation._

### 3.x to 4.x

Starting from version 4.0.0, the `node exporter` chart is using the [Kubernetes recommended labels](https://kubernetes.io/docs/concepts/overview/working-with-objects/common-labels/). Therefore you have to delete the daemonset before you upgrade.

```console
kubectl delete daemonset -l app=prometheus-node-exporter
helm upgrade -i prometheus-node-exporter prometheus-community/prometheus-node-exporter
```

If you use your own custom [ServiceMonitor](https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api.md#servicemonitor) or [PodMonitor](https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api.md#podmonitor), please ensure to upgrade their `selector` fields accordingly to the new labels.

### From 2.x to 3.x

Change the following:

```yaml
hostRootFsMount: true
```
to:
```yaml
hostRootFsMount:
enabled: true
mountPropagation: HostToContainer
```
## Configuring
See [Customizing the Chart Before Installing](https://helm.sh/docs/intro/using_helm/#customizing-the-chart-before-installing). To see all configurable options with detailed comments, visit the chart's [values.yaml](./values.yaml), or run these configuration commands:
```console
helm show values prometheus-community/prometheus-node-exporter
```
3 changes: 3 additions & 0 deletions charts/node-exporter/ci/port-values.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
service:
targetPort: 9102
port: 9102
Loading

0 comments on commit 3f8e350

Please sign in to comment.