2021-05-29 11:45:35 +00:00
---
prometheus_version : 2.27 .0
prometheus_user : "prometheus_adm"
prometheus_user_home : "/home/{{ prometheus_user }}"
prometheus_group : "{{ prometheus_user }}"
prometheus_extra_groups : [ ]
prometheus_packages : [ ]
prometheus_base_dir : "/opt/prometheus"
prometheus_config_dir : "{{ prometheus_base_dir }}/conf"
prometheus_rules_dir : "{{ prometheus_config_dir }}/rules"
prometheus_file_sd_dir : "{{ prometheus_config_dir }}/file_sd"
prometheus_data_dir : "{{ prometheus_base_dir }}/data"
2021-05-29 13:02:57 +00:00
prometheus_read_only_dirs : [ ]
2021-05-29 11:45:35 +00:00
2021-05-29 13:02:57 +00:00
prometheus_web_bind_ip : 127.0 .0 .1
2021-05-29 19:43:54 +00:00
prometheus_web_bind_port : 61000
prometheus_web_external_url : "http://localhost:61000/"
2021-05-29 11:45:35 +00:00
2021-05-29 15:52:24 +00:00
prometheus_web_tls_enabled : False
prometheus_web_tls_cert_path : "{{ prometheus_base_dir }}/tls/certs/mycert.pem"
prometheus_web_tls_key_path : "{{ prometheus_base_dir }}/tls/private/mykey.pem"
prometheus_web_tls_cert_source : mycert.pem
prometheus_web_tls_key_source : mykey.pem
# @var prometheus_web_http_server:description: See official [documentation](https://github.com/prometheus/exporter-toolkit/blob/master/docs/web-configuration.md).
# @var prometheus_web_http_server: $ "_unset_"
# @var prometheus_web_basic_auth_users:description: See official [documentation](https://github.com/prometheus/exporter-toolkit/blob/master/docs/web-configuration.md).
# @var prometheus_web_basic_auth_users: $ "_unset_"
prometheus_log_level : error
2021-05-29 11:45:35 +00:00
prometheus_storage_retention : "30d"
prometheus_storage_retention_size : "0"
2021-05-30 11:09:29 +00:00
# @var prometheus_storage_retention_size:description: __[EXPERIMENTAL]__ Maximum number of bytes that can be stored for blocks.
2021-05-29 11:45:35 +00:00
# Units supported: KB, MB, GB, TB, PB.
# @end
prometheus_config_flags_extra : [ ]
# @var prometheus_config_flags_extra:example: >
# prometheus_config_flags_extra:
# - name: alertmanager.timeout
2021-05-29 15:52:24 +00:00
# value: 10s
2021-05-29 11:45:35 +00:00
# @end
prometheus_alertmanager_config : [ ]
# @var prometheus_alertmanager_config:example: >
# prometheus_alertmanager_config:
# - scheme: https
# path_prefix: alertmanager/
# basic_auth:
# username: user
# password: pass
# static_configs:
# - targets: ["127.0.0.1:9093"]
# proxy_url: "127.0.0.2"
# @end
prometheus_alert_relabel_configs : [ ]
# @var prometheus_alert_relabel_configs:example: >
# prometheus_alert_relabel_configs:
# - action: labeldrop
# regex: replica
prometheus_global :
scrape_interval : 15s
scrape_timeout : 10s
evaluation_interval : 15s
prometheus_remote_write : [ ]
# @var prometheus_remote_write:example: >
# prometheus_remote_write:
# - url: https://dev.kausal.co/prom/push
# basic_auth:
# password: FOO
prometheus_remote_read : [ ]
# @var prometheus_remote_read:example: >
# prometheus_remote_read:
# - url: https://demo.cloudalchemy.org:9201/read
# basic_auth:
# password: FOO
# @end
prometheus_external_labels :
environment : "{{ ansible_fqdn | default(ansible_host) | default(inventory_hostname) }}"
prometheus_targets : [ ]
# @var prometheus_targets:example: >
# prometheus_targets:
# - name: node
# config:
# - targets:
# - localhost:9100
# labels:
# env: test
# @end
prometheus_scrape_configs :
- job_name : "prometheus"
2021-05-29 15:52:24 +00:00
metrics_path : "{{ prometheus_web_external_url | urlsplit('path') if (prometheus_web_external_url | urlsplit('path')) | length > 1 else '' }}/metrics"
2021-05-29 11:45:35 +00:00
static_configs :
- targets :
2021-05-29 19:43:54 +00:00
- "{{ ansible_fqdn | default(ansible_host) | default('localhost') }}:61000"
2021-05-29 11:45:35 +00:00
prometheus_alert_rules_files :
2021-06-03 19:56:53 +00:00
- prometheus/rules/*.rules
2021-05-29 11:45:35 +00:00
prometheus_static_targets_files :
- prometheus/targets/*.yml
- prometheus/targets/*.json
prometheus_alert_rules :
- alert : Watchdog
expr : vector(1)
for : 10m
labels :
severity : warning
annotations :
2021-05-29 13:47:26 +00:00
description : >-
This is an alert meant to ensure that the entire alerting pipeline is functional.
2021-06-03 10:50:11 +00:00
This alert is always firing. There are integrations with various notification
mechanisms that send a notification when this alert is not firing anymore.
2021-05-29 11:45:35 +00:00
summary : "Ensure entire alerting pipeline is functional"
- alert : InstanceDown
expr : "up == 0"
for : 5m
labels :
severity : critical
annotations :
description : "{% raw %}{{ $labels.instance }} of job {{ $labels.job }} has been down for more than 5 minutes.{% endraw %}"
summary : "{% raw %}Instance {{ $labels.instance }} down{% endraw %}"
prometheus_alert_rules_extra : [ ]