From 49f8f299245fea30d90ce10f38c940014570d5f4 Mon Sep 17 00:00:00 2001 From: Armin Schlegel Date: Fri, 9 Dec 2022 14:45:53 +0100 Subject: [PATCH] feat: add logging system to monitoring installation (#21) * fix: rename variables * chore: add Readme.md * feat: added grafana dashboards for edge system and journal logs --- README.md | 107 ++++++++++++++++-- charts/grafana-agent/templates/configmap.yaml | 2 +- charts/grafana-agent/values.yaml | 47 +++++--- dev/charts/grafana/values.yaml | 12 +- devspace.yaml | 79 +++++++++---- 5 files changed, 192 insertions(+), 55 deletions(-) diff --git a/README.md b/README.md index 254f058..e84f60b 100644 --- a/README.md +++ b/README.md @@ -32,12 +32,12 @@ # About The Project -TODO +`edgefarm.monitoring` uses different open source tools to provide monitoring of edge nodes, default k8s nodes and monitoring of application on them. ## Features -- feature 1 -- feature 2 +- monitoring for k8s nodes and applicatons on them +- monitoring for edge nodes and applicatons on them ![Product Name Screen Shot][product-screenshot] @@ -49,7 +49,14 @@ Follow those simple steps, to install edgefarm.monitor in your cluster. ## ✔️ Prerequisites -TODO +- [edgefarm.core](https://github.com/edgefarm/edgefarm.core) +- [devspace](https://devspace.sh/) +- [kind](https://kind.sigs.k8s.io) +- [kubectl](https://kubernetes.io/docs/tasks/tools/install-kubectl/) +- [kustomize](https://kustomize.io/) +- [helm](https://helm.sh/) +- [mkcert](https://github.com/FiloSottile/mkcert) +- [jq](https://stedolan.github.io/jq/) ## 💡 [Do not skip] Mandatory step @@ -57,13 +64,90 @@ TODO, maybe eliiminate this section ## 🎯 Installation -TODO - -It will take around 5 minutes to complete, ... +To init and deploy monitoring, execute the following commands. +The installation takes about 3 minutes. + +Have a look at the `help` command to get an overview of all available commands. + +```console +$ devspace run help + General monitoring commands: + devspace run init Create password for monitoring and store it locally + devspace run deploy-monitoring Creates full setup of monitoring + devspace run-pipeline purge-monitoring Remove monitoring setup +``` + +And spin up the monitoring: + +```console +devspace run init +devspace run deploy-monitoring +``` + +Once done, you'll find the following pods running: + +```console +$ kubectl get pods -n monitoring +NAMESPACE NAME READY STATUS RESTARTS AGE +grafana-74796596fd-sxmzn 1/1 Running 0 79s +grafana-agent-operator-5f657cdf4-82z5x 1/1 Running 0 39s +grafana-mimir-alertmanager-0 1/1 Running 0 74s +grafana-mimir-compactor-0 0/1 Running 0 74s +grafana-mimir-distributor-689cdd7965-7hp7g 1/1 Running 0 74s +grafana-mimir-ingester-0 0/1 Running 0 74s +grafana-mimir-ingester-1 0/1 Running 0 74s +grafana-mimir-ingester-2 0/1 Running 0 74s +grafana-mimir-make-minio-buckets--1-g5wwf 0/1 Completed 0 74s +grafana-mimir-minio-67599d86b-ptc99 1/1 Running 0 74s +grafana-mimir-nginx-57db45f4c5-ltj89 1/1 Running 0 74s +grafana-mimir-overrides-exporter-7c89b68f5-kkpcm 1/1 Running 0 74s +grafana-mimir-querier-c4f8875f8-cflhs 1/1 Running 0 74s +grafana-mimir-querier-c4f8875f8-hqvcz 1/1 Running 0 74s +grafana-mimir-query-frontend-75b98c69c5-hnpz2 1/1 Running 0 74s +grafana-mimir-query-scheduler-6c484c5bfb-hw9xg 1/1 Running 0 74s +grafana-mimir-query-scheduler-6c484c5bfb-x6q24 1/1 Running 0 74s +grafana-mimir-ruler-7c789cbc4d-pcghw 1/1 Running 0 74s +grafana-mimir-store-gateway-0 0/1 Running 0 74s +main-nats-monitoring-0 2/2 Running 0 36s +``` + +```console +$ kubectl get pods -n loki +NAMESPACE NAME READY STATUS RESTARTS AGE +loki-loki-distributed-compactor-5f76749b97-z9dz8 1/1 Running 0 79s +loki-loki-distributed-distributor-66c884fcd7-6rtgv 1/1 Running 0 79s +loki-loki-distributed-distributor-66c884fcd7-xhrz2 1/1 Running 0 79s +loki-loki-distributed-gateway-85f85b8675-h4ljb 1/1 Running 0 79s +loki-loki-distributed-ingester-0 1/1 Running 0 79s +loki-loki-distributed-ingester-1 1/1 Running 0 79s +loki-loki-distributed-querier-0 1/1 Running 0 79s +loki-loki-distributed-querier-1 1/1 Running 0 79s +loki-loki-distributed-query-frontend-66b88985cc-sktxb 1/1 Running 0 79s +loki-loki-distributed-table-manager-576c8cb5f6-kgwmz 1/1 Running 0 79s +promtail-hrnrx 1/1 Running 0 80s +promtail-xvnf5 1/1 Running 0 80s +``` +After this you can access http://grafana.localhost/login to see metrics and logs. +By default: +User is admin +Password you can find in: +- File which was create by this command +```console +devspace run init +``` +$HOME/.devspace/edgefarn.monitor/monitoring_password/ + +- Grafana secret +```console +kubectl get secrets -n monitoring grafana -o jsonpath="{.data.admin-password}" | base64 -d | xargs echo +``` # Usage -TODO +To uninstall monitoring setup run: +```console +devspace run-pipeline purge-monitoring +``` # Examples @@ -85,16 +169,15 @@ TODO TODO + Code contributions are very much **welcome**. 1. Fork the Project 2. Create your Branch (`git checkout -b AmazingFeature`) 3. Commit your Changes (`git commit -m 'Add some AmazingFeature") 4. Push to the Branch (`git push origin AmazingFeature`) -5. Open a Pull Request targetting the `staging` branch. - - +5. Open a Pull Request targetting the `beta` branch. -# Acknowledgements +# 🫶 Acknowledgements TODO diff --git a/charts/grafana-agent/templates/configmap.yaml b/charts/grafana-agent/templates/configmap.yaml index b2fa776..873f1a5 100644 --- a/charts/grafana-agent/templates/configmap.yaml +++ b/charts/grafana-agent/templates/configmap.yaml @@ -47,7 +47,7 @@ data: {{- end }} - job_name: leaf-nats static_configs: - - targets: {{ .Values.configmap.agent_yaml.config.job.leaf_nats.targets }} + - targets: {{ .Values.configmap.agent_yaml.metrics.configs.edgenodes_exporter.scrape_configs.job.leaf_nats.targets }} relabel_configs: - source_labels: [__address__] target_label: instance diff --git a/charts/grafana-agent/values.yaml b/charts/grafana-agent/values.yaml index ac78249..997f889 100644 --- a/charts/grafana-agent/values.yaml +++ b/charts/grafana-agent/values.yaml @@ -17,22 +17,37 @@ configmap: agent_yaml: global: remote_write: http://grafana-mimir-nginx.monitoring.svc:80/api/v1/push - config: - job: - node_exporter: - targets: - - "edge-node-exporter-prometheus-node-exporter.edge-monitoring.svc:9100" - cadvisor: - targets: - - "edge-cadvisor.edge-monitoring.svc:8080" - leaf_nats: - targets: - - "leaf-nats.nats.svc:7777" - remote_write: - url: http://grafana-mimir-nginx.monitoring.svc:80/api/v1/push - basic_auth: - username: nats - password: Bb3eNlCLPGNuVzqXBc4MnyHRvDt5eJbuYDE10h9Q + metrics: + configs: + edgenodes_exporter: + scrape_configs: + job: + node_exporter: + targets: ["edge-node-exporter-prometheus-node-exporter.edge-monitoring.svc:9100"] + cadvisor: + targets: ["edge-cadvisor.edge-monitoring.svc:8080"] + leaf_nats: + targets: + - "leaf-nats.nats.svc:7777" + remote_write: + url: http://grafana-mimir-nginx.monitoring.svc:80/api/v1/push + basic_auth: + username: test + password: Bb3eNlCLPGNuVzqXBc4MnyHRvDt5eJbuYDE10h9Q + + logs: + configs: + default: + clients: + url: http://loki-loki-distributed-gateway.loki.svc/loki/api/v1/push + basic_auth: + username: test + password: Bb3eNlCLPGNuVzqXBc4MnyHRvDt5eJbuYDE10h9Q + scrape_configs: + job: + system: + targets: localhost + rbac: # -- Toggle to create ClusterRole and ClusterRoleBinding create: true diff --git a/dev/charts/grafana/values.yaml b/dev/charts/grafana/values.yaml index 4562b60..48b40a7 100644 --- a/dev/charts/grafana/values.yaml +++ b/dev/charts/grafana/values.yaml @@ -42,21 +42,21 @@ dashboards: nats-dashboard: url: https://raw.githubusercontent.com/edgefarm/grafana-dashboards/main/dashboards/nats.json token: "" - node-exporter-dashboard: - url: https://raw.githubusercontent.com/edgefarm/grafana-dashboards/main/dashboards/node_exporter.json - token: "" node-exporter-custom-dashboard: url: https://raw.githubusercontent.com/edgefarm/grafana-dashboards/main/dashboards/node_exporter_custom.json token: "" docker-containers-dashboard: url: https://raw.githubusercontent.com/edgefarm/grafana-dashboards/main/dashboards/docker_containers.json token: "" - cadvisor-dashboard: - url: https://raw.githubusercontent.com/edgefarm/grafana-dashboards/main/dashboards/cadvisor.json - token: "" cadvisor-custom: url: https://raw.githubusercontent.com/edgefarm/grafana-dashboards/main/dashboards/cadvisor_custom.json token: "" + edge_system_logs: + url: https://raw.githubusercontent.com/edgefarm/grafana-dashboards/main/dashboards/edge_system_logs.json + token: "" + edge_journal_logs: + url: https://raw.githubusercontent.com/edgefarm/grafana-dashboards/main/dashboards/edge_journal_logs.json + token: "" #if you want persistend disk # persistence: # type: pvc diff --git a/devspace.yaml b/devspace.yaml index 9b0d768..23e34b3 100644 --- a/devspace.yaml +++ b/devspace.yaml @@ -2,27 +2,25 @@ version: v2beta1 name: edgefarm-network vars: - mimir_password_dir: - command: echo $HOME/.devspace/edgefarn.monitor/mimir_password/ - - mimir_user: - value: nats - - mimir_password: + monitoring_password_dir: + command: echo $HOME/.devspace/edgefarn.monitor/monitoring_password/ + monitoring_password: command: |- #!/bin/bash set -e - echo $(cat ${mimir_password_dir}/password) + echo $(cat ${monitoring_password_dir}/password) + monitoring_user: + value: edgefarm-monitor functions: - init_mimir_password: |- - # usage init_mimir_password + init_monitoring_password: |- + # usage init_monitoring_password #!/usr/bin/env bash set -e set -u if [ "$#" -ne 1 ]; then echo "Illegal number of parameters" - echo "mimir_password_psk" + echo "monitoring_password_psk" exit 1 fi if ! [ -f "${1}/password" ]; then @@ -31,16 +29,37 @@ functions: date +%s | sha256sum | base64 | head -c 32 > ${1}/password fi +commands: + help: |- + #!/bin/bash + set -e + GREEN='\033[0;32m' + BRED='\033[3;31m' + BGREEN='\033[1;32m' + GREY='\033[0;36m' + BOLD='\033[1m' + NC='\033[0m' # No Color + echo -e "${BGREEN}Usage of ${BRED}edgefarm.monitoring:${NC}" + echo -e "${GREEN} General monitoring commands:${NC}" + echo -e "${BOLD} devspace run-pipeline init # ${GREY}Create password for monitoring and store it locally${NC}" + echo -e "${BOLD} devspace run-pipeline deploy-monitoring # ${GREY}Creates full setup of monitoring${NC}" + echo -e "${BOLD} devspace run-pipeline purge-monitoring # ${GREY}Remove monitoring setup${NC}" + pipelines: init: |- - # generating password for mimir - init_mimir_password ${mimir_password_dir} + # generating password for monitoring + init_monitoring_password ${monitoring_password_dir} # deploy-node-exporter: |- # # deploy node exporter # create_deployments node-exporter # create_deployments service-monitor-for-node-exporter + deploy-logs: |- + # install loging components + create_deployments promtail + helm install loki-loki-distributed --namespace loki ./dev/charts/loki-distributed --set loki.storageConfig.aws.s3=http://${monitoring_user}:${monitoring_password}@grafana-mimir-minio.monitoring.svc:9000 --set gateway.basicAuth.username=${monitoring_user} --set gateway.basicAuth.password=${monitoring_password} + deploy-monitoring: |- # install all monitoring setup: grafana, grafana-operator and mimir create_deployments prometheus-crd @@ -56,6 +75,17 @@ pipelines: create_deployments edge-cadvisor create_deployments edge-grafana-agent + # install logging services + run_pipelines deploy-logs + + purge-monitoring: |- + purge_deployments edge-node-exporter edge-cadvisor edge-grafana-agent main-nats-monitoring + purge_deployments grafana-mimir grafana prometheus-crd grafana-agent-operator + purge_deployments promtail + helm uninstall loki-loki-distributed -n loki + kubectl delete pvc -n monitoring --all + kubectl delete pvc -n loki --all + deployments: grafana-mimir: helm: @@ -120,11 +150,20 @@ deployments: values: configmap: agent_yaml: - config: - remote_write: - basic_auth: - username: ${mimir_user} - password: ${mimir_password} + metrics: + configs: + edgenodes_exporter: + remote_write: + basic_auth: + username: ${monitoring_user} + password: ${monitoring_password} + logs: + configs: + default: + clients: + basic_auth: + username: ${monitoring_user} + password: ${monitoring_password} namespace: edge-monitoring edge-cadvisor: @@ -162,8 +201,8 @@ deployments: path: ./charts/main-nats-monitoring values: mimir: - user: ${mimir_user} - password: ${mimir_password} + user: ${monitoring_user} + password: ${monitoring_password} agent: image: grafana/agent:v0.28.0 namespace: monitoring