diff --git a/nixos/machines/pepe/prometheus.nix b/nixos/machines/pepe/prometheus.nix index ab63b8e..34a9b68 100644 --- a/nixos/machines/pepe/prometheus.nix +++ b/nixos/machines/pepe/prometheus.nix @@ -22,6 +22,37 @@ # keep data for 30 days extraFlags = [ "--storage.tsdb.retention.time=30d" ]; + ruleFiles = [ + (pkgs.writeText "prometheus-rules.yml" (builtins.toJSON { + groups = [ + { + name = "core"; + rules = [ + { + alert = "InstanceDown"; + expr = "up == 0"; + for = "5m"; + labels.severity = "page"; + annotations = { + summary = "Instance {{ $labels.instance }} down"; + description = "{{ $labels.instance }} of job {{ $labels.job }} has been down for more than 5 minutes."; + }; + } + ]; + } + ]; + })) + ]; + + + + + #alertmanager = { + # enable = true; + # configuration = { + #}; + #}; + exporters = { systemd.enable = true; node = { @@ -81,18 +112,6 @@ }; }]; } - { - # see https://www.home-assistant.io/integrations/prometheus/ - job_name = "telgraf"; - metrics_path = "/metrics"; - static_configs = [{ - targets = [ "localhost:9273" ]; - labels = { - service = "telegraf"; - server = "pepe"; - }; - }]; - } ]; }; } diff --git a/nixos/machines/pepe/telegraf.nix b/nixos/machines/pepe/telegraf.nix index e2b2901..f051719 100644 --- a/nixos/machines/pepe/telegraf.nix +++ b/nixos/machines/pepe/telegraf.nix @@ -1,3 +1,4 @@ +{ pkgs, ... }: let urls = [ { url = "https://bitwarden.ingolf-wagner.de"; path = ""; } @@ -30,7 +31,59 @@ in systemd_units = { }; internet_speed.interval = "50m"; nginx.urls = [ "http://localhost/nginx_status" ]; + ping = [{ urls = [ "10.100.0.1" ]; }]; }; }; }; + + services.prometheus.scrapeConfigs = [ + { + # see https://www.home-assistant.io/integrations/prometheus/ + job_name = "telgraf"; + metrics_path = "/metrics"; + static_configs = [{ + targets = [ "localhost:9273" ]; + labels = { + service = "telegraf"; + server = "pepe"; + }; + }]; + } + ]; + + services.prometheus.ruleFiles = [ + (pkgs.writeText "telegraf.yml" (builtins.toJSON { + groups = [ + { + name = "telegraf"; + rules = [ + { + alert = "HttpResponseNotOk"; + expr = "0 * (http_response_http_response_code != 200) + 1"; + for = "5m"; + labels.severity = "page"; + annotations = { + summary = "{{ $labels.exported_server }} does not return Ok"; + description = "{{ $labels.exported_server }} does not return Ok for more than 5 minutes"; + }; + } + { + alert = "CertificatExpires"; + expr = ''x509_cert_expiry{issuer_common_name="R3"} < ${toString (60 * 60 * 24 * 5)}''; + for = "1d"; + labels.severity = "page"; + annotations = { + summary = "{{ $labels.san }} does Expire Soon"; + description = "{{ $labels.san }} does expire in less than 5 days"; + }; + } + ]; + } + ]; + })) + ]; + + + + }