improving prometheus

This commit is contained in:
Ingolf Wagner 2023-03-06 02:57:01 +01:00
parent 3b0afdecf3
commit d78761f434
No known key found for this signature in database
GPG key ID: 76BF5F1928B9618B
2 changed files with 84 additions and 12 deletions

View file

@ -22,6 +22,37 @@
# keep data for 30 days # keep data for 30 days
extraFlags = [ "--storage.tsdb.retention.time=30d" ]; extraFlags = [ "--storage.tsdb.retention.time=30d" ];
ruleFiles = [
(pkgs.writeText "prometheus-rules.yml" (builtins.toJSON {
groups = [
{
name = "core";
rules = [
{
alert = "InstanceDown";
expr = "up == 0";
for = "5m";
labels.severity = "page";
annotations = {
summary = "Instance {{ $labels.instance }} down";
description = "{{ $labels.instance }} of job {{ $labels.job }} has been down for more than 5 minutes.";
};
}
];
}
];
}))
];
#alertmanager = {
# enable = true;
# configuration = {
#};
#};
exporters = { exporters = {
systemd.enable = true; systemd.enable = true;
node = { node = {
@ -81,18 +112,6 @@
}; };
}]; }];
} }
{
# see https://www.home-assistant.io/integrations/prometheus/
job_name = "telgraf";
metrics_path = "/metrics";
static_configs = [{
targets = [ "localhost:9273" ];
labels = {
service = "telegraf";
server = "pepe";
};
}];
}
]; ];
}; };
} }

View file

@ -1,3 +1,4 @@
{ pkgs, ... }:
let let
urls = [ urls = [
{ url = "https://bitwarden.ingolf-wagner.de"; path = ""; } { url = "https://bitwarden.ingolf-wagner.de"; path = ""; }
@ -30,7 +31,59 @@ in
systemd_units = { }; systemd_units = { };
internet_speed.interval = "50m"; internet_speed.interval = "50m";
nginx.urls = [ "http://localhost/nginx_status" ]; nginx.urls = [ "http://localhost/nginx_status" ];
ping = [{ urls = [ "10.100.0.1" ]; }];
}; };
}; };
}; };
services.prometheus.scrapeConfigs = [
{
# see https://www.home-assistant.io/integrations/prometheus/
job_name = "telgraf";
metrics_path = "/metrics";
static_configs = [{
targets = [ "localhost:9273" ];
labels = {
service = "telegraf";
server = "pepe";
};
}];
}
];
services.prometheus.ruleFiles = [
(pkgs.writeText "telegraf.yml" (builtins.toJSON {
groups = [
{
name = "telegraf";
rules = [
{
alert = "HttpResponseNotOk";
expr = "0 * (http_response_http_response_code != 200) + 1";
for = "5m";
labels.severity = "page";
annotations = {
summary = "{{ $labels.exported_server }} does not return Ok";
description = "{{ $labels.exported_server }} does not return Ok for more than 5 minutes";
};
}
{
alert = "CertificatExpires";
expr = ''x509_cert_expiry{issuer_common_name="R3"} < ${toString (60 * 60 * 24 * 5)}'';
for = "1d";
labels.severity = "page";
annotations = {
summary = "{{ $labels.san }} does Expire Soon";
description = "{{ $labels.san }} does expire in less than 5 days";
};
}
];
}
];
}))
];
} }