--- prometheus_version: 2.27.0 prometheus_user: "prometheus_adm" prometheus_user_home: "/home/{{ prometheus_user }}" prometheus_group: "{{ prometheus_user }}" prometheus_extra_groups: [] prometheus_packages: [] prometheus_base_dir: "/opt/prometheus" prometheus_config_dir: "{{ prometheus_base_dir }}/conf" prometheus_rules_dir: "{{ prometheus_config_dir }}/rules" prometheus_file_sd_dir: "{{ prometheus_config_dir }}/file_sd" prometheus_data_dir: "{{ prometheus_base_dir }}/data" prometheus_read_only_dirs: [] prometheus_web_bind_ip: 127.0.0.1 prometheus_web_bind_port: 9090 prometheus_web_external_url: "" # @var prometheus_web_config:description: See official [documentation](https://github.com/prometheus/exporter-toolkit/blob/master/docs/web-configuration.md). prometheus_web_config: tls_server_config: {} http_server_config: {} basic_auth_users: {} prometheus_storage_retention: "30d" prometheus_storage_retention_size: "0" # @var prometheus_storage_retention_size:description: > __[EXPERIMENTAL]__ Maximum number of bytes that can be stored for blocks. # Units supported: KB, MB, GB, TB, PB. # @end prometheus_config_flags_extra: [] # @var prometheus_config_flags_extra:example: > # prometheus_config_flags_extra: # - name: storage.tsdb.retention # value: 15d # - name: alertmanager.timeout # - value: 10s # @end prometheus_alertmanager_config: [] # @var prometheus_alertmanager_config:example: > # prometheus_alertmanager_config: # - scheme: https # path_prefix: alertmanager/ # basic_auth: # username: user # password: pass # static_configs: # - targets: ["127.0.0.1:9093"] # proxy_url: "127.0.0.2" # @end prometheus_alert_relabel_configs: [] # @var prometheus_alert_relabel_configs:example: > # prometheus_alert_relabel_configs: # - action: labeldrop # regex: replica prometheus_global: scrape_interval: 15s scrape_timeout: 10s evaluation_interval: 15s prometheus_remote_write: [] # @var prometheus_remote_write:example: > # prometheus_remote_write: # - url: https://dev.kausal.co/prom/push # basic_auth: # password: FOO prometheus_remote_read: [] # @var prometheus_remote_read:example: > # prometheus_remote_read: # - url: https://demo.cloudalchemy.org:9201/read # basic_auth: # password: FOO # @end prometheus_external_labels: environment: "{{ ansible_fqdn | default(ansible_host) | default(inventory_hostname) }}" prometheus_targets: [] # @var prometheus_targets:example: > # prometheus_targets: # - name: node # config: # - targets: # - localhost:9100 # labels: # env: test # @end prometheus_scrape_configs: - job_name: "prometheus" metrics_path: "{{ prometheus_web_external_url | urlsplit('path') }}/metrics" static_configs: - targets: - "{{ ansible_fqdn | default(ansible_host) | default('localhost') }}:9090" prometheus_alert_rules_files: - "prometheus/rules/*.rules" prometheus_static_targets_files: - prometheus/targets/*.yml - prometheus/targets/*.json prometheus_alert_rules: - alert: Watchdog expr: vector(1) for: 10m labels: severity: warning annotations: description: "This is an alert meant to ensure that the entire alerting pipeline is functional.\nThis alert is always firing, therefore it should always be firing in Alertmanager\nand always fire against a receiver. There are integrations with various notification\nmechanisms that send a notification when this alert is not firing. For example the\n\"DeadMansSnitch\" integration in PagerDuty." summary: "Ensure entire alerting pipeline is functional" - alert: InstanceDown expr: "up == 0" for: 5m labels: severity: critical annotations: description: "{% raw %}{{ $labels.instance }} of job {{ $labels.job }} has been down for more than 5 minutes.{% endraw %}" summary: "{% raw %}Instance {{ $labels.instance }} down{% endraw %}" prometheus_alert_rules_extra: []