Skip to content

Commit

Permalink
Merge pull request #145 from uche-madu/develop
Browse files Browse the repository at this point in the history
fix: grafana dashboard ref
  • Loading branch information
uche-madu authored Nov 11, 2023
2 parents e205cb4 + e89d922 commit b782f5a
Show file tree
Hide file tree
Showing 2 changed files with 19 additions and 15 deletions.
30 changes: 15 additions & 15 deletions argocd-app/monitoring/values-dev.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -46,21 +46,21 @@ kube-prometheus-stack:
path: /var/lib/grafana/dashboards/custom

dashboards:
# default:
# prometheus-overview:
# gnetId: 3662 # imports dashboard from grafana.com
# revision: 2
# datasource: Prometheus
# alertmanager:
# gnetId: 9578 # imports dashboard from grafana.com
# revision: 4
# node-exporter:
# gnetId: 1860 # imports dashboard from grafana.com
# revision: 33
# datasource: Prometheus
# argocd:
# gnetId: 14584 # imports dashboard from grafana.com
# revision: 1
default:
prometheus-overview:
gnetId: 3662 # imports dashboard from grafana.com
revision: 2
datasource: Prometheus
alertmanager:
gnetId: 9578 # imports dashboard from grafana.com
revision: 4
node-exporter:
gnetId: 1860 # imports dashboard from grafana.com
revision: 33
datasource: Prometheus
argocd:
gnetId: 14584 # imports dashboard from grafana.com
revision: 1
custom:
airflow-cluster:
file: dashboards/airflow-cluster-dashboard.json
Expand Down
4 changes: 4 additions & 0 deletions argocd-app/my-airflow/values-dev.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -133,3 +133,7 @@ airflow:
applyCustomEnv: false
jobAnnotations:
"argocd.argoproj.io/hook": Sync

statsd:
overrideMappings:
- "mappings:\r\n # Airflow StatsD metrics mappings (https://airflow.apache.org/docs/apache-airflow/stable/logging-monitoring/metrics.html)\r\n # === Counters ===\r\n - match: \"(.+)\\\\.(.+)_start$\"\r\n match_metric_type: counter\r\n name: \"af_agg_job_start\"\r\n match_type: regex\r\n labels:\r\n airflow_id: \"$1\"\r\n job_name: \"$2\"\r\n - match: \"(.+)\\\\.(.+)_end$\"\r\n match_metric_type: counter\r\n name: \"af_agg_job_end\"\r\n match_type: regex\r\n labels:\r\n airflow_id: \"$1\"\r\n job_name: \"$2\"\r\n - match: \"(.+)\\\\.operator_failures_(.+)$\"\r\n match_metric_type: counter\r\n name: \"af_agg_operator_failures\"\r\n match_type: regex\r\n labels:\r\n airflow_id: \"$1\"\r\n operator_name: \"$2\"\r\n - match: \"(.+)\\\\.operator_successes_(.+)$\"\r\n match_metric_type: counter\r\n name: \"af_agg_operator_successes\"\r\n match_type: regex\r\n labels:\r\n airflow_id: \"$1\"\r\n operator_name: \"$2\"\r\n - match: \"*.ti_failures\"\r\n match_metric_type: counter\r\n name: \"af_agg_ti_failures\"\r\n labels:\r\n airflow_id: \"$1\"\r\n - match: \"*.ti_successes\"\r\n match_metric_type: counter\r\n name: \"af_agg_ti_successes\"\r\n labels:\r\n airflow_id: \"$1\"\r\n - match: \"*.zombies_killed\"\r\n match_metric_type: counter\r\n name: \"af_agg_zombies_killed\"\r\n labels:\r\n airflow_id: \"$1\"\r\n - match: \"*.scheduler_heartbeat\"\r\n match_metric_type: counter\r\n name: \"af_agg_scheduler_heartbeat\"\r\n labels:\r\n airflow_id: \"$1\"\r\n - match: \"*.dag_processing.processes\"\r\n match_metric_type: counter\r\n name: \"af_agg_dag_processing_processes\"\r\n labels:\r\n airflow_id: \"$1\"\r\n - match: \"*.scheduler.tasks.killed_externally\"\r\n match_metric_type: counter\r\n name: \"af_agg_scheduler_tasks_killed_externally\"\r\n labels:\r\n airflow_id: \"$1\"\r\n - match: \"*.scheduler.tasks.running\"\r\n match_metric_type: counter\r\n name: \"af_agg_scheduler_tasks_running\"\r\n labels:\r\n airflow_id: \"$1\"\r\n - match: \"*.scheduler.tasks.starving\"\r\n match_metric_type: counter\r\n name: \"af_agg_scheduler_tasks_starving\"\r\n labels:\r\n airflow_id: \"$1\"\r\n - match: \"*.scheduler.orphaned_tasks.cleared\"\r\n match_metric_type: counter\r\n name: \"af_agg_scheduler_orphaned_tasks_cleared\"\r\n labels:\r\n airflow_id: \"$1\"\r\n - match: \"*.scheduler.orphaned_tasks.adopted\"\r\n match_metric_type: counter\r\n name: \"af_agg_scheduler_orphaned_tasks_adopted\"\r\n labels:\r\n airflow_id: \"$1\"\r\n - match: \"*.scheduler.critical_section_busy\"\r\n match_metric_type: counter\r\n name: \"af_agg_scheduler_critical_section_busy\"\r\n labels:\r\n airflow_id: \"$1\"\r\n - match: \"*.sla_email_notification_failure\"\r\n match_metric_type: counter\r\n name: \"af_agg_sla_email_notification_failure\"\r\n labels:\r\n airflow_id: \"$1\"\r\n - match: \"*.ti.start.*.*\"\r\n match_metric_type: counter\r\n name: \"af_agg_ti_start\"\r\n labels:\r\n airflow_id: \"$1\"\r\n dag_id: \"$2\"\r\n task_id: \"$3\"\r\n - match: \"*.ti.finish.*.*.*\"\r\n match_metric_type: counter\r\n name: \"af_agg_ti_finish\"\r\n labels:\r\n airflow_id: \"$1\"\r\n dag_id: \"$2\"\r\n task_id: \"$3\"\r\n state: \"$4\"\r\n - match: \"*.dag.callback_exceptions\"\r\n match_metric_type: counter\r\n name: \"af_agg_dag_callback_exceptions\"\r\n labels:\r\n airflow_id: \"$1\"\r\n - match: \"*.celery.task_timeout_error\"\r\n match_metric_type: counter\r\n name: \"af_agg_celery_task_timeout_error\"\r\n labels:\r\n airflow_id: \"$1\"\r\n\r\n # === Gauges ===\r\n - match: \"*.dagbag_size\"\r\n match_metric_type: gauge\r\n name: \"af_agg_dagbag_size\"\r\n labels:\r\n airflow_id: \"$1\"\r\n - match: \"*.dag_processing.import_errors\"\r\n match_metric_type: gauge\r\n name: \"af_agg_dag_processing_import_errors\"\r\n labels:\r\n airflow_id: \"$1\"\r\n - match: \"*.dag_processing.total_parse_time\"\r\n match_metric_type: gauge\r\n name: \"af_agg_dag_processing_total_parse_time\"\r\n labels:\r\n airflow_id: \"$1\"\r\n - match: \"*.dag_processing.last_runtime.*\"\r\n match_metric_type: gauge\r\n name: \"af_agg_dag_processing_last_runtime\"\r\n labels:\r\n airflow_id: \"$1\"\r\n dag_file: \"$2\"\r\n - match: \"*.dag_processing.last_run.seconds_ago.*\"\r\n match_metric_type: gauge\r\n name: \"af_agg_dag_processing_last_run_seconds\"\r\n labels:\r\n airflow_id: \"$1\"\r\n dag_file: \"$2\"\r\n - match: \"*.dag_processing.processor_timeouts\"\r\n match_metric_type: gauge\r\n name: \"af_agg_dag_processing_processor_timeouts\"\r\n labels:\r\n airflow_id: \"$1\"\r\n - match: \"*.executor.open_slots\"\r\n match_metric_type: gauge\r\n name: \"af_agg_executor_open_slots\"\r\n labels:\r\n airflow_id: \"$1\"\r\n - match: \"*.executor.queued_tasks\"\r\n match_metric_type: gauge\r\n name: \"af_agg_executor_queued_tasks\"\r\n labels:\r\n airflow_id: \"$1\"\r\n - match: \"*.executor.running_tasks\"\r\n match_metric_type: gauge\r\n name: \"af_agg_executor_running_tasks\"\r\n labels:\r\n airflow_id: \"$1\"\r\n - match: \"*.pool.open_slots.*\"\r\n match_metric_type: gauge\r\n name: \"af_agg_pool_open_slots\"\r\n labels:\r\n airflow_id: \"$1\"\r\n pool_name: \"$2\"\r\n - match: \"*.pool.queued_slots.*\"\r\n match_metric_type: gauge\r\n name: \"af_agg_pool_queued_slots\"\r\n labels:\r\n airflow_id: \"$1\"\r\n pool_name: \"$2\"\r\n - match: \"*.pool.running_slots.*\"\r\n match_metric_type: gauge\r\n name: \"af_agg_pool_running_slots\"\r\n labels:\r\n airflow_id: \"$1\"\r\n pool_name: \"$2\"\r\n - match: \"*.pool.starving_tasks.*\"\r\n match_metric_type: gauge\r\n name: \"af_agg_pool_starving_tasks\"\r\n labels:\r\n airflow_id: \"$1\"\r\n pool_name: \"$2\"\r\n - match: \"*.smart_sensor_operator.poked_tasks\"\r\n match_metric_type: gauge\r\n name: \"af_agg_smart_sensor_operator_poked_tasks\"\r\n labels:\r\n airflow_id: \"$1\"\r\n - match: \"*.smart_sensor_operator.poked_success\"\r\n match_metric_type: gauge\r\n name: \"af_agg_smart_sensor_operator_poked_success\"\r\n labels:\r\n airflow_id: \"$1\"\r\n - match: \"*.smart_sensor_operator.poked_exception\"\r\n match_metric_type: gauge\r\n name: \"af_agg_smart_sensor_operator_poked_exception\"\r\n labels:\r\n airflow_id: \"$1\"\r\n - match: \"*.smart_sensor_operator.exception_failures\"\r\n match_metric_type: gauge\r\n name: \"af_agg_smart_sensor_operator_exception_failures\"\r\n labels:\r\n airflow_id: \"$1\"\r\n - match: \"*.smart_sensor_operator.infra_failures\"\r\n match_metric_type: gauge\r\n name: \"af_agg_smart_sensor_operator_infra_failures\"\r\n labels:\r\n airflow_id: \"$1\"\r\n\r\n # === Timers ===\r\n - match: \"*.dagrun.dependency-check.*\"\r\n match_metric_type: observer\r\n name: \"af_agg_dagrun_dependency_check\"\r\n labels:\r\n airflow_id: \"$1\"\r\n dag_id: \"$2\"\r\n - match: \"*.dag.*.*.duration\"\r\n match_metric_type: observer\r\n name: \"af_agg_dag_task_duration\"\r\n labels:\r\n airflow_id: \"$1\"\r\n dag_id: \"$2\"\r\n task_id: \"$3\"\r\n - match: \"*.dag_processing.last_duration.*\"\r\n match_metric_type: observer\r\n name: \"af_agg_dag_processing_duration\"\r\n labels:\r\n airflow_id: \"$1\"\r\n dag_file: \"$2\"\r\n - match: \"*.dagrun.duration.success.*\"\r\n match_metric_type: observer\r\n name: \"af_agg_dagrun_duration_success\"\r\n labels:\r\n airflow_id: \"$1\"\r\n dag_id: \"$2\"\r\n - match: \"*.dagrun.duration.failed.*\"\r\n match_metric_type: observer\r\n name: \"af_agg_dagrun_duration_failed\"\r\n labels:\r\n airflow_id: \"$1\"\r\n dag_id: \"$2\"\r\n - match: \"*.dagrun.schedule_delay.*\"\r\n match_metric_type: observer\r\n name: \"af_agg_dagrun_schedule_delay\"\r\n labels:\r\n airflow_id: \"$1\"\r\n dag_id: \"$2\"\r\n - match: \"*.scheduler.critical_section_duration\"\r\n match_metric_type: observer\r\n name: \"af_agg_scheduler_critical_section_duration\"\r\n labels:\r\n airflow_id: \"$1\"\r\n - match: \"*.dagrun.*.first_task_scheduling_delay\"\r\n match_metric_type: observer\r\n name: \"af_agg_dagrun_first_task_scheduling_delay\"\r\n labels:\r\n airflow_id: \"$1\"\r\n dag_id: \"$2\""

0 comments on commit b782f5a

Please sign in to comment.