From e89d922e8bf0315c4c86bb1581deb4fbc65fa3f7 Mon Sep 17 00:00:00 2001 From: Uche Madu Date: Sat, 11 Nov 2023 12:59:09 +0100 Subject: [PATCH] fix: grafana dashboard ref --- argocd-app/monitoring/values-dev.yaml | 30 +++++++++++++-------------- argocd-app/my-airflow/values-dev.yaml | 4 ++++ 2 files changed, 19 insertions(+), 15 deletions(-) diff --git a/argocd-app/monitoring/values-dev.yaml b/argocd-app/monitoring/values-dev.yaml index 111231f..0d2e559 100644 --- a/argocd-app/monitoring/values-dev.yaml +++ b/argocd-app/monitoring/values-dev.yaml @@ -46,21 +46,21 @@ kube-prometheus-stack: path: /var/lib/grafana/dashboards/custom dashboards: - # default: - # prometheus-overview: - # gnetId: 3662 # imports dashboard from grafana.com - # revision: 2 - # datasource: Prometheus - # alertmanager: - # gnetId: 9578 # imports dashboard from grafana.com - # revision: 4 - # node-exporter: - # gnetId: 1860 # imports dashboard from grafana.com - # revision: 33 - # datasource: Prometheus - # argocd: - # gnetId: 14584 # imports dashboard from grafana.com - # revision: 1 + default: + prometheus-overview: + gnetId: 3662 # imports dashboard from grafana.com + revision: 2 + datasource: Prometheus + alertmanager: + gnetId: 9578 # imports dashboard from grafana.com + revision: 4 + node-exporter: + gnetId: 1860 # imports dashboard from grafana.com + revision: 33 + datasource: Prometheus + argocd: + gnetId: 14584 # imports dashboard from grafana.com + revision: 1 custom: airflow-cluster: file: dashboards/airflow-cluster-dashboard.json diff --git a/argocd-app/my-airflow/values-dev.yaml b/argocd-app/my-airflow/values-dev.yaml index b849e06..3b12444 100644 --- a/argocd-app/my-airflow/values-dev.yaml +++ b/argocd-app/my-airflow/values-dev.yaml @@ -133,3 +133,7 @@ airflow: applyCustomEnv: false jobAnnotations: "argocd.argoproj.io/hook": Sync + + statsd: + overrideMappings: + - "mappings:\r\n # Airflow StatsD metrics mappings (https://airflow.apache.org/docs/apache-airflow/stable/logging-monitoring/metrics.html)\r\n # === Counters ===\r\n - match: \"(.+)\\\\.(.+)_start$\"\r\n match_metric_type: counter\r\n name: \"af_agg_job_start\"\r\n match_type: regex\r\n labels:\r\n airflow_id: \"$1\"\r\n job_name: \"$2\"\r\n - match: \"(.+)\\\\.(.+)_end$\"\r\n match_metric_type: counter\r\n name: \"af_agg_job_end\"\r\n match_type: regex\r\n labels:\r\n airflow_id: \"$1\"\r\n job_name: \"$2\"\r\n - match: \"(.+)\\\\.operator_failures_(.+)$\"\r\n match_metric_type: counter\r\n name: \"af_agg_operator_failures\"\r\n match_type: regex\r\n labels:\r\n airflow_id: \"$1\"\r\n operator_name: \"$2\"\r\n - match: \"(.+)\\\\.operator_successes_(.+)$\"\r\n match_metric_type: counter\r\n name: \"af_agg_operator_successes\"\r\n match_type: regex\r\n labels:\r\n airflow_id: \"$1\"\r\n operator_name: \"$2\"\r\n - match: \"*.ti_failures\"\r\n match_metric_type: counter\r\n name: \"af_agg_ti_failures\"\r\n labels:\r\n airflow_id: \"$1\"\r\n - match: \"*.ti_successes\"\r\n match_metric_type: counter\r\n name: \"af_agg_ti_successes\"\r\n labels:\r\n airflow_id: \"$1\"\r\n - match: \"*.zombies_killed\"\r\n match_metric_type: counter\r\n name: \"af_agg_zombies_killed\"\r\n labels:\r\n airflow_id: \"$1\"\r\n - match: \"*.scheduler_heartbeat\"\r\n match_metric_type: counter\r\n name: \"af_agg_scheduler_heartbeat\"\r\n labels:\r\n airflow_id: \"$1\"\r\n - match: \"*.dag_processing.processes\"\r\n match_metric_type: counter\r\n name: \"af_agg_dag_processing_processes\"\r\n labels:\r\n airflow_id: \"$1\"\r\n - match: \"*.scheduler.tasks.killed_externally\"\r\n match_metric_type: counter\r\n name: \"af_agg_scheduler_tasks_killed_externally\"\r\n labels:\r\n airflow_id: \"$1\"\r\n - match: \"*.scheduler.tasks.running\"\r\n match_metric_type: counter\r\n name: \"af_agg_scheduler_tasks_running\"\r\n labels:\r\n airflow_id: \"$1\"\r\n - match: \"*.scheduler.tasks.starving\"\r\n match_metric_type: counter\r\n name: \"af_agg_scheduler_tasks_starving\"\r\n labels:\r\n airflow_id: \"$1\"\r\n - match: \"*.scheduler.orphaned_tasks.cleared\"\r\n match_metric_type: counter\r\n name: \"af_agg_scheduler_orphaned_tasks_cleared\"\r\n labels:\r\n airflow_id: \"$1\"\r\n - match: \"*.scheduler.orphaned_tasks.adopted\"\r\n match_metric_type: counter\r\n name: \"af_agg_scheduler_orphaned_tasks_adopted\"\r\n labels:\r\n airflow_id: \"$1\"\r\n - match: \"*.scheduler.critical_section_busy\"\r\n match_metric_type: counter\r\n name: \"af_agg_scheduler_critical_section_busy\"\r\n labels:\r\n airflow_id: \"$1\"\r\n - match: \"*.sla_email_notification_failure\"\r\n match_metric_type: counter\r\n name: \"af_agg_sla_email_notification_failure\"\r\n labels:\r\n airflow_id: \"$1\"\r\n - match: \"*.ti.start.*.*\"\r\n match_metric_type: counter\r\n name: \"af_agg_ti_start\"\r\n labels:\r\n airflow_id: \"$1\"\r\n dag_id: \"$2\"\r\n task_id: \"$3\"\r\n - match: \"*.ti.finish.*.*.*\"\r\n match_metric_type: counter\r\n name: \"af_agg_ti_finish\"\r\n labels:\r\n airflow_id: \"$1\"\r\n dag_id: \"$2\"\r\n task_id: \"$3\"\r\n state: \"$4\"\r\n - match: \"*.dag.callback_exceptions\"\r\n match_metric_type: counter\r\n name: \"af_agg_dag_callback_exceptions\"\r\n labels:\r\n airflow_id: \"$1\"\r\n - match: \"*.celery.task_timeout_error\"\r\n match_metric_type: counter\r\n name: \"af_agg_celery_task_timeout_error\"\r\n labels:\r\n airflow_id: \"$1\"\r\n\r\n # === Gauges ===\r\n - match: \"*.dagbag_size\"\r\n match_metric_type: gauge\r\n name: \"af_agg_dagbag_size\"\r\n labels:\r\n airflow_id: \"$1\"\r\n - match: \"*.dag_processing.import_errors\"\r\n match_metric_type: gauge\r\n name: \"af_agg_dag_processing_import_errors\"\r\n labels:\r\n airflow_id: \"$1\"\r\n - match: \"*.dag_processing.total_parse_time\"\r\n match_metric_type: gauge\r\n name: \"af_agg_dag_processing_total_parse_time\"\r\n labels:\r\n airflow_id: \"$1\"\r\n - match: \"*.dag_processing.last_runtime.*\"\r\n match_metric_type: gauge\r\n name: \"af_agg_dag_processing_last_runtime\"\r\n labels:\r\n airflow_id: \"$1\"\r\n dag_file: \"$2\"\r\n - match: \"*.dag_processing.last_run.seconds_ago.*\"\r\n match_metric_type: gauge\r\n name: \"af_agg_dag_processing_last_run_seconds\"\r\n labels:\r\n airflow_id: \"$1\"\r\n dag_file: \"$2\"\r\n - match: \"*.dag_processing.processor_timeouts\"\r\n match_metric_type: gauge\r\n name: \"af_agg_dag_processing_processor_timeouts\"\r\n labels:\r\n airflow_id: \"$1\"\r\n - match: \"*.executor.open_slots\"\r\n match_metric_type: gauge\r\n name: \"af_agg_executor_open_slots\"\r\n labels:\r\n airflow_id: \"$1\"\r\n - match: \"*.executor.queued_tasks\"\r\n match_metric_type: gauge\r\n name: \"af_agg_executor_queued_tasks\"\r\n labels:\r\n airflow_id: \"$1\"\r\n - match: \"*.executor.running_tasks\"\r\n match_metric_type: gauge\r\n name: \"af_agg_executor_running_tasks\"\r\n labels:\r\n airflow_id: \"$1\"\r\n - match: \"*.pool.open_slots.*\"\r\n match_metric_type: gauge\r\n name: \"af_agg_pool_open_slots\"\r\n labels:\r\n airflow_id: \"$1\"\r\n pool_name: \"$2\"\r\n - match: \"*.pool.queued_slots.*\"\r\n match_metric_type: gauge\r\n name: \"af_agg_pool_queued_slots\"\r\n labels:\r\n airflow_id: \"$1\"\r\n pool_name: \"$2\"\r\n - match: \"*.pool.running_slots.*\"\r\n match_metric_type: gauge\r\n name: \"af_agg_pool_running_slots\"\r\n labels:\r\n airflow_id: \"$1\"\r\n pool_name: \"$2\"\r\n - match: \"*.pool.starving_tasks.*\"\r\n match_metric_type: gauge\r\n name: \"af_agg_pool_starving_tasks\"\r\n labels:\r\n airflow_id: \"$1\"\r\n pool_name: \"$2\"\r\n - match: \"*.smart_sensor_operator.poked_tasks\"\r\n match_metric_type: gauge\r\n name: \"af_agg_smart_sensor_operator_poked_tasks\"\r\n labels:\r\n airflow_id: \"$1\"\r\n - match: \"*.smart_sensor_operator.poked_success\"\r\n match_metric_type: gauge\r\n name: \"af_agg_smart_sensor_operator_poked_success\"\r\n labels:\r\n airflow_id: \"$1\"\r\n - match: \"*.smart_sensor_operator.poked_exception\"\r\n match_metric_type: gauge\r\n name: \"af_agg_smart_sensor_operator_poked_exception\"\r\n labels:\r\n airflow_id: \"$1\"\r\n - match: \"*.smart_sensor_operator.exception_failures\"\r\n match_metric_type: gauge\r\n name: \"af_agg_smart_sensor_operator_exception_failures\"\r\n labels:\r\n airflow_id: \"$1\"\r\n - match: \"*.smart_sensor_operator.infra_failures\"\r\n match_metric_type: gauge\r\n name: \"af_agg_smart_sensor_operator_infra_failures\"\r\n labels:\r\n airflow_id: \"$1\"\r\n\r\n # === Timers ===\r\n - match: \"*.dagrun.dependency-check.*\"\r\n match_metric_type: observer\r\n name: \"af_agg_dagrun_dependency_check\"\r\n labels:\r\n airflow_id: \"$1\"\r\n dag_id: \"$2\"\r\n - match: \"*.dag.*.*.duration\"\r\n match_metric_type: observer\r\n name: \"af_agg_dag_task_duration\"\r\n labels:\r\n airflow_id: \"$1\"\r\n dag_id: \"$2\"\r\n task_id: \"$3\"\r\n - match: \"*.dag_processing.last_duration.*\"\r\n match_metric_type: observer\r\n name: \"af_agg_dag_processing_duration\"\r\n labels:\r\n airflow_id: \"$1\"\r\n dag_file: \"$2\"\r\n - match: \"*.dagrun.duration.success.*\"\r\n match_metric_type: observer\r\n name: \"af_agg_dagrun_duration_success\"\r\n labels:\r\n airflow_id: \"$1\"\r\n dag_id: \"$2\"\r\n - match: \"*.dagrun.duration.failed.*\"\r\n match_metric_type: observer\r\n name: \"af_agg_dagrun_duration_failed\"\r\n labels:\r\n airflow_id: \"$1\"\r\n dag_id: \"$2\"\r\n - match: \"*.dagrun.schedule_delay.*\"\r\n match_metric_type: observer\r\n name: \"af_agg_dagrun_schedule_delay\"\r\n labels:\r\n airflow_id: \"$1\"\r\n dag_id: \"$2\"\r\n - match: \"*.scheduler.critical_section_duration\"\r\n match_metric_type: observer\r\n name: \"af_agg_scheduler_critical_section_duration\"\r\n labels:\r\n airflow_id: \"$1\"\r\n - match: \"*.dagrun.*.first_task_scheduling_delay\"\r\n match_metric_type: observer\r\n name: \"af_agg_dagrun_first_task_scheduling_delay\"\r\n labels:\r\n airflow_id: \"$1\"\r\n dag_id: \"$2\"" \ No newline at end of file