Robert Kaussow
0abd03b77b
All checks were successful
continuous-integration/drone/push Build is passing
138 lines
4.4 KiB
YAML
138 lines
4.4 KiB
YAML
---
|
|
prometheus_version: 2.27.0
|
|
prometheus_user: "prometheus_adm"
|
|
prometheus_user_home: "/home/{{ prometheus_user }}"
|
|
prometheus_group: "{{ prometheus_user }}"
|
|
prometheus_extra_groups: []
|
|
|
|
prometheus_packages: []
|
|
|
|
prometheus_base_dir: "/opt/prometheus"
|
|
prometheus_config_dir: "{{ prometheus_base_dir }}/conf"
|
|
prometheus_rules_dir: "{{ prometheus_config_dir }}/rules"
|
|
prometheus_file_sd_dir: "{{ prometheus_config_dir }}/file_sd"
|
|
prometheus_data_dir: "{{ prometheus_base_dir }}/data"
|
|
prometheus_read_only_dirs: []
|
|
|
|
prometheus_web_bind_ip: 127.0.0.1
|
|
prometheus_web_bind_port: 61000
|
|
prometheus_web_external_url: "http://localhost:61000/"
|
|
|
|
prometheus_web_tls_enabled: False
|
|
prometheus_web_tls_cert_path: "{{ prometheus_base_dir }}/tls/certs/mycert.pem"
|
|
prometheus_web_tls_key_path: "{{ prometheus_base_dir }}/tls/private/mykey.pem"
|
|
prometheus_web_tls_cert_source: mycert.pem
|
|
prometheus_web_tls_key_source: mykey.pem
|
|
|
|
# @var prometheus_web_http_server:description: See official [documentation](https://github.com/prometheus/exporter-toolkit/blob/master/docs/web-configuration.md).
|
|
# @var prometheus_web_http_server: $ "_unset_"
|
|
|
|
# @var prometheus_web_basic_auth_users:description: See official [documentation](https://github.com/prometheus/exporter-toolkit/blob/master/docs/web-configuration.md).
|
|
# @var prometheus_web_basic_auth_users: $ "_unset_"
|
|
|
|
prometheus_log_level: error
|
|
|
|
prometheus_storage_retention: "30d"
|
|
prometheus_storage_retention_size: "0"
|
|
# @var prometheus_storage_retention_size:description: > __[EXPERIMENTAL]__ Maximum number of bytes that can be stored for blocks.
|
|
# Units supported: KB, MB, GB, TB, PB.
|
|
# @end
|
|
|
|
prometheus_config_flags_extra: []
|
|
# @var prometheus_config_flags_extra:example: >
|
|
# prometheus_config_flags_extra:
|
|
# - name: alertmanager.timeout
|
|
# value: 10s
|
|
# @end
|
|
|
|
prometheus_alertmanager_config: []
|
|
# @var prometheus_alertmanager_config:example: >
|
|
# prometheus_alertmanager_config:
|
|
# - scheme: https
|
|
# path_prefix: alertmanager/
|
|
# basic_auth:
|
|
# username: user
|
|
# password: pass
|
|
# static_configs:
|
|
# - targets: ["127.0.0.1:9093"]
|
|
# proxy_url: "127.0.0.2"
|
|
# @end
|
|
|
|
prometheus_alert_relabel_configs: []
|
|
# @var prometheus_alert_relabel_configs:example: >
|
|
# prometheus_alert_relabel_configs:
|
|
# - action: labeldrop
|
|
# regex: replica
|
|
|
|
prometheus_global:
|
|
scrape_interval: 15s
|
|
scrape_timeout: 10s
|
|
evaluation_interval: 15s
|
|
|
|
prometheus_remote_write: []
|
|
# @var prometheus_remote_write:example: >
|
|
# prometheus_remote_write:
|
|
# - url: https://dev.kausal.co/prom/push
|
|
# basic_auth:
|
|
# password: FOO
|
|
|
|
prometheus_remote_read: []
|
|
# @var prometheus_remote_read:example: >
|
|
# prometheus_remote_read:
|
|
# - url: https://demo.cloudalchemy.org:9201/read
|
|
# basic_auth:
|
|
# password: FOO
|
|
# @end
|
|
|
|
prometheus_external_labels:
|
|
environment: "{{ ansible_fqdn | default(ansible_host) | default(inventory_hostname) }}"
|
|
|
|
prometheus_targets: []
|
|
# @var prometheus_targets:example: >
|
|
# prometheus_targets:
|
|
# - name: node
|
|
# config:
|
|
# - targets:
|
|
# - localhost:9100
|
|
# labels:
|
|
# env: test
|
|
# @end
|
|
|
|
prometheus_scrape_configs:
|
|
- job_name: "prometheus"
|
|
metrics_path: "{{ prometheus_web_external_url | urlsplit('path') if (prometheus_web_external_url | urlsplit('path')) | length > 1 else '' }}/metrics"
|
|
static_configs:
|
|
- targets:
|
|
- "{{ ansible_fqdn | default(ansible_host) | default('localhost') }}:61000"
|
|
|
|
prometheus_alert_rules_files:
|
|
- "prometheus/rules/*.rules"
|
|
|
|
prometheus_static_targets_files:
|
|
- prometheus/targets/*.yml
|
|
- prometheus/targets/*.json
|
|
|
|
prometheus_alert_rules:
|
|
- alert: Watchdog
|
|
expr: vector(1)
|
|
for: 10m
|
|
labels:
|
|
severity: warning
|
|
annotations:
|
|
description: >-
|
|
This is an alert meant to ensure that the entire alerting pipeline is functional.
|
|
This alert is always firing, therefore it should always be firing in Alertmanager
|
|
and always fire against a receiver. There are integrations with various notification
|
|
mechanisms that send a notification when this alert is not firing.
|
|
summary: "Ensure entire alerting pipeline is functional"
|
|
- alert: InstanceDown
|
|
expr: "up == 0"
|
|
for: 5m
|
|
labels:
|
|
severity: critical
|
|
annotations:
|
|
description: "{% raw %}{{ $labels.instance }} of job {{ $labels.job }} has been down for more than 5 minutes.{% endraw %}"
|
|
summary: "{% raw %}Instance {{ $labels.instance }} down{% endraw %}"
|
|
|
|
prometheus_alert_rules_extra: []
|