refactor otlp

This commit is contained in:
Ingolf Wagner 2024-05-15 00:25:32 +02:00
parent fce4a39b94
commit 661c350544
Signed by: palo
GPG key ID: 76BF5F1928B9618B
3 changed files with 64 additions and 182 deletions

View file

@ -1,32 +1,65 @@
{ pkgs, config, ... }:
let
telegraf_sink = 8088;
prometheus_port = 8090;
in
{
imports = [
# telemetry sink
{
services.opentelemetry-collector.settings = {
receivers.influxdb.endpoint = "127.0.0.1:${toString telegraf_sink }";
service.pipelines.metrics.receivers = [ "influxdb" ];
};
services.telegraf.extraConfig.outputs.influxdb_v2.urls = [ "http://127.0.0.1:${toString telegraf_sink}" ];
}
# prometheus export
{
services.opentelemetry-collector.settings = {
exporters.prometheus.endpoint = "127.0.0.1:${toString prometheus_port}";
service.pipelines.metrics.exporters = [ "prometheus" ];
};
services.prometheus.scrapeConfigs = [
{
job_name = "opentelemetry";
metrics_path = "/metrics";
scrape_interval = "10s";
static_configs = [{ targets = [ "localhost:${toString prometheus_port}" ]; }];
}
];
}
# todo : move to netdata component
# netdata sink
{
services.opentelemetry-collector.settings.receivers.prometheus.config.scrape_configs = [
{
job_name = "netdata";
scrape_interval = "10s";
metrics_path = "/api/v1/allmetrics";
params.format = [ "prometheus" ];
static_configs = [{
targets = [ "127.0.0.1:19999" ];
labels = {
service = "netdata";
server = config.networking.hostName;
};
}];
}
];
}
];
services.opentelemetry-collector = {
enable = true;
package = pkgs.unstable.opentelemetry-collector-contrib;
settings = {
receivers = {
# provide a influxdb sink
influxdb = {
endpoint = "127.0.0.1:8088";
};
# scrape opentelemetry-colectors metrics
prometheus.config.scrape_configs = [
{
job_name = "netdata";
scrape_interval = "10s";
metrics_path = "/api/v1/allmetrics";
params.format = [ "prometheus" ];
static_configs = [{
targets = [ "127.0.0.1:19999" ];
labels = {
service = "netdata";
server = config.networking.hostName;
};
}];
}
# todo: this should be collected another way (opentelemetry internal?)
{
job_name = "otelcol";
scrape_interval = "10s";
@ -45,6 +78,7 @@
}
];
}
{
job_name = "node";
static_configs = [{
@ -59,25 +93,24 @@
];
};
exporters = {
# provide prometheus sink under `/metrics` to
prometheus = {
endpoint = "127.0.0.1:8090";
};
otlp = {
endpoint = "10.100.0.2:4317"; # chungus
tls.insecure = true;
};
# ship to chungus
exporters.otlp = {
# todo : move this to orbi and route from orbi to chungus
endpoint = "10.100.0.2:4317"; # chungus
tls.insecure = true;
};
service = {
pipelines.metrics = {
receivers = [ "influxdb" "prometheus" ];
exporters = [ "prometheus" "otlp" ];
receivers = [ "prometheus" ];
exporters = [ "otlp" ];
};
# todo : this should be automatically be collected
# open telemetries own metrics?
telemetry.metrics.address = "0.0.0.0:8100";
};
};
};
}

View file

@ -17,46 +17,6 @@
services.prometheus = {
checkConfig = "syntax-only";
enable = true;
# keep data for 30 days
extraFlags = [ "--storage.tsdb.retention.time=90d" ];
ruleFiles = [
(pkgs.writeText "prometheus-rules.yml" (builtins.toJSON {
groups = [
{
name = "core";
rules = [
{
alert = "InstanceDown";
expr = "up == 0";
for = "5m";
labels.severity = "page";
annotations = {
summary = "Instance {{ $labels.instance }} down";
description = "{{ $labels.instance }} of job {{ $labels.job }} has been down for more than 5 minutes.";
};
}
];
}
# todo : move this to open telemetry
{
name = "home-assistant";
rules = [
{
record = "home_open_window_sum";
expr = ''sum( homeassistant_binary_sensor_state{entity=~"binary_sensor\\.window_02_contact|binary_sensor\\.window_03_contact|binary_sensor\\.window_04_contact|binary_sensor\\.window_05_contact|binary_sensor\\.window_06_contact|binary_sensor\\.window_07_contact"} )'';
}
] ++ (map
(number:
{
record = "home_at_least_n_windows_open";
expr = ''home_open_window_sum >= bool ${toString number}'';
labels.n = number;
}) [ 1 2 3 ]);
}
];
}))
];
exporters = {
node = {
@ -66,52 +26,5 @@
};
};
scrapeConfigs = [
{
job_name = "opentelemetry";
metrics_path = "/metrics";
scrape_interval = "10s";
static_configs = [{ targets = [ "localhost:8090" ]; }];
}
#{
# job_name = "netdata";
# metrics_path = "/api/v1/allmetrics";
# params.format = [ "prometheus" ];
# scrape_interval = "5s";
# static_configs = [
# {
# targets = [ "localhost:19999" ];
# labels = {
# service = "netdata";
# server = config.networking.hostName;
# };
# }
# ];
#}
#{
# job_name = "node";
# static_configs = [{
# targets = [ "localhost:${toString config.services.prometheus.exporters.node.port}" ];
# labels = {
# service = "node-exporter";
# server = config.networking.hostName;
# };
# }];
#}
#{
# # see https://www.home-assistant.io/integrations/prometheus/
# job_name = "home-assistant";
# scrape_interval = "60s";
# metrics_path = "/api/prometheus";
# bearer_token_file = toString config.sops.secrets.hass_long_term_token.path;
# static_configs = [{
# targets = [ "localhost:8123" ];
# labels = {
# service = "hass";
# server = config.networking.hostName;
# };
# }];
#}
];
};
}

View file

@ -1,29 +1,11 @@
{ config, pkgs, ... }:
let
urls = [
{ url = "https://bitwarden.ingolf-wagner.de"; path = ""; }
{ url = "https://flix.ingolf-wagner.de"; path = "web/index.html"; }
{ url = "https://git.ingolf-wagner.de"; path = ""; }
{ url = "https://ingolf-wagner.de"; path = ""; }
{ url = "https://nextcloud.ingolf-wagner.de"; path = "login"; }
{ url = "https://tech.ingolf-wagner.de"; path = ""; }
{ url = "https://matrix.ingolf-wagner.de"; path = ""; }
];
in
{
systemd.services.telegraf.path = [ pkgs.inetutils ];
services.telegraf = {
enable = true;
extraConfig = {
#outputs.prometheus_client = {
# listen = ":9273";
# metric_version = 2;
#};
outputs.influxdb_v2 = {
urls = [ "http://127.0.0.1:8088" ];
};
global_tags = {
service = "telegraf";
@ -34,58 +16,12 @@ in
inputs = {
cpu = { };
diskio = { };
smart.attributes = true;
x509_cert = [{
sources = (map (url: "${url.url}:443") urls);
interval = "30m"; # agent.interval = "10s" is default
}];
http_response =
let fullUrls = map ({ url, path }: "${url}/${path}") urls;
in [{ urls = fullUrls; }];
processes = { };
system = { };
systemd_units = { };
internet_speed.interval = "10m";
nginx.urls = [ "http://localhost/nginx_status" ];
ping = [{ urls = [ "10.100.0.1" ]; }]; # actually important to make pepe visible over wireguard
};
};
};
# todo : do this prometheus
services.prometheus.ruleFiles = [
(pkgs.writeText "telegraf.yml" (builtins.toJSON {
groups = [
{
name = "telegraf";
rules = [
{
alert = "HttpResponseNotOk";
expr = "0 * (http_response_http_response_code != 200) + 1";
for = "5m";
labels.severity = "page";
annotations = {
summary = "{{ $labels.exported_server }} does not return Ok";
description = "{{ $labels.exported_server }} does not return Ok for more than 5 minutes";
};
}
{
alert = "CertificatExpires";
expr = ''x509_cert_expiry{issuer_common_name="R3"} < ${toString (60 * 60 * 24 * 5)}'';
for = "1d";
labels.severity = "page";
annotations = {
summary = "{{ $labels.san }} does Expire Soon";
description = "{{ $labels.san }} does expire in less than 5 days";
};
}
];
}
];
}))
];
}