diff --git a/nixos/machines/cherry/telemetry/opentelemetry.nix b/nixos/machines/cherry/telemetry/opentelemetry.nix index fdc360c..c9b4322 100644 --- a/nixos/machines/cherry/telemetry/opentelemetry.nix +++ b/nixos/machines/cherry/telemetry/opentelemetry.nix @@ -1,32 +1,65 @@ { pkgs, config, ... }: +let + telegraf_sink = 8088; + prometheus_port = 8090; +in { + imports = [ + # telemetry sink + { + services.opentelemetry-collector.settings = { + receivers.influxdb.endpoint = "127.0.0.1:${toString telegraf_sink }"; + service.pipelines.metrics.receivers = [ "influxdb" ]; + }; + services.telegraf.extraConfig.outputs.influxdb_v2.urls = [ "http://127.0.0.1:${toString telegraf_sink}" ]; + } + + # prometheus export + { + services.opentelemetry-collector.settings = { + exporters.prometheus.endpoint = "127.0.0.1:${toString prometheus_port}"; + service.pipelines.metrics.exporters = [ "prometheus" ]; + }; + services.prometheus.scrapeConfigs = [ + { + job_name = "opentelemetry"; + metrics_path = "/metrics"; + scrape_interval = "10s"; + static_configs = [{ targets = [ "localhost:${toString prometheus_port}" ]; }]; + } + ]; + } + + # todo : move to netdata component + # netdata sink + { + services.opentelemetry-collector.settings.receivers.prometheus.config.scrape_configs = [ + { + job_name = "netdata"; + scrape_interval = "10s"; + metrics_path = "/api/v1/allmetrics"; + params.format = [ "prometheus" ]; + static_configs = [{ + targets = [ "127.0.0.1:19999" ]; + labels = { + service = "netdata"; + server = config.networking.hostName; + }; + }]; + } + ]; + } + ]; + services.opentelemetry-collector = { enable = true; package = pkgs.unstable.opentelemetry-collector-contrib; settings = { receivers = { - - # provide a influxdb sink - influxdb = { - endpoint = "127.0.0.1:8088"; - }; - # scrape opentelemetry-colectors metrics prometheus.config.scrape_configs = [ - { - job_name = "netdata"; - scrape_interval = "10s"; - metrics_path = "/api/v1/allmetrics"; - params.format = [ "prometheus" ]; - static_configs = [{ - targets = [ "127.0.0.1:19999" ]; - labels = { - service = "netdata"; - server = config.networking.hostName; - }; - }]; - } + # todo: this should be collected another way (opentelemetry internal?) { job_name = "otelcol"; scrape_interval = "10s"; @@ -45,6 +78,7 @@ } ]; } + { job_name = "node"; static_configs = [{ @@ -59,25 +93,24 @@ ]; }; - exporters = { - # provide prometheus sink under `/metrics` to - prometheus = { - endpoint = "127.0.0.1:8090"; - }; - otlp = { - endpoint = "10.100.0.2:4317"; # chungus - tls.insecure = true; - }; + # ship to chungus + exporters.otlp = { + # todo : move this to orbi and route from orbi to chungus + endpoint = "10.100.0.2:4317"; # chungus + tls.insecure = true; }; service = { pipelines.metrics = { - receivers = [ "influxdb" "prometheus" ]; - exporters = [ "prometheus" "otlp" ]; + receivers = [ "prometheus" ]; + exporters = [ "otlp" ]; }; + + # todo : this should be automatically be collected # open telemetries own metrics? telemetry.metrics.address = "0.0.0.0:8100"; }; + }; }; } diff --git a/nixos/machines/cherry/telemetry/prometheus.nix b/nixos/machines/cherry/telemetry/prometheus.nix index 73b43a3..7fbd163 100644 --- a/nixos/machines/cherry/telemetry/prometheus.nix +++ b/nixos/machines/cherry/telemetry/prometheus.nix @@ -17,46 +17,6 @@ services.prometheus = { checkConfig = "syntax-only"; enable = true; - # keep data for 30 days - extraFlags = [ "--storage.tsdb.retention.time=90d" ]; - - ruleFiles = [ - (pkgs.writeText "prometheus-rules.yml" (builtins.toJSON { - groups = [ - { - name = "core"; - rules = [ - { - alert = "InstanceDown"; - expr = "up == 0"; - for = "5m"; - labels.severity = "page"; - annotations = { - summary = "Instance {{ $labels.instance }} down"; - description = "{{ $labels.instance }} of job {{ $labels.job }} has been down for more than 5 minutes."; - }; - } - ]; - } - # todo : move this to open telemetry - { - name = "home-assistant"; - rules = [ - { - record = "home_open_window_sum"; - expr = ''sum( homeassistant_binary_sensor_state{entity=~"binary_sensor\\.window_02_contact|binary_sensor\\.window_03_contact|binary_sensor\\.window_04_contact|binary_sensor\\.window_05_contact|binary_sensor\\.window_06_contact|binary_sensor\\.window_07_contact"} )''; - } - ] ++ (map - (number: - { - record = "home_at_least_n_windows_open"; - expr = ''home_open_window_sum >= bool ${toString number}''; - labels.n = number; - }) [ 1 2 3 ]); - } - ]; - })) - ]; exporters = { node = { @@ -66,52 +26,5 @@ }; }; - scrapeConfigs = [ - { - job_name = "opentelemetry"; - metrics_path = "/metrics"; - scrape_interval = "10s"; - static_configs = [{ targets = [ "localhost:8090" ]; }]; - } - #{ - # job_name = "netdata"; - # metrics_path = "/api/v1/allmetrics"; - # params.format = [ "prometheus" ]; - # scrape_interval = "5s"; - # static_configs = [ - # { - # targets = [ "localhost:19999" ]; - # labels = { - # service = "netdata"; - # server = config.networking.hostName; - # }; - # } - # ]; - #} - #{ - # job_name = "node"; - # static_configs = [{ - # targets = [ "localhost:${toString config.services.prometheus.exporters.node.port}" ]; - # labels = { - # service = "node-exporter"; - # server = config.networking.hostName; - # }; - # }]; - #} - #{ - # # see https://www.home-assistant.io/integrations/prometheus/ - # job_name = "home-assistant"; - # scrape_interval = "60s"; - # metrics_path = "/api/prometheus"; - # bearer_token_file = toString config.sops.secrets.hass_long_term_token.path; - # static_configs = [{ - # targets = [ "localhost:8123" ]; - # labels = { - # service = "hass"; - # server = config.networking.hostName; - # }; - # }]; - #} - ]; }; } diff --git a/nixos/machines/cherry/telemetry/telegraf.nix b/nixos/machines/cherry/telemetry/telegraf.nix index de9cac8..863b2ac 100644 --- a/nixos/machines/cherry/telemetry/telegraf.nix +++ b/nixos/machines/cherry/telemetry/telegraf.nix @@ -1,29 +1,11 @@ { config, pkgs, ... }: -let - urls = [ - { url = "https://bitwarden.ingolf-wagner.de"; path = ""; } - { url = "https://flix.ingolf-wagner.de"; path = "web/index.html"; } - { url = "https://git.ingolf-wagner.de"; path = ""; } - { url = "https://ingolf-wagner.de"; path = ""; } - { url = "https://nextcloud.ingolf-wagner.de"; path = "login"; } - { url = "https://tech.ingolf-wagner.de"; path = ""; } - { url = "https://matrix.ingolf-wagner.de"; path = ""; } - ]; - -in { + systemd.services.telegraf.path = [ pkgs.inetutils ]; services.telegraf = { enable = true; extraConfig = { - #outputs.prometheus_client = { - # listen = ":9273"; - # metric_version = 2; - #}; - outputs.influxdb_v2 = { - urls = [ "http://127.0.0.1:8088" ]; - }; global_tags = { service = "telegraf"; @@ -34,58 +16,12 @@ in inputs = { cpu = { }; diskio = { }; - smart.attributes = true; - x509_cert = [{ - sources = (map (url: "${url.url}:443") urls); - interval = "30m"; # agent.interval = "10s" is default - }]; - http_response = - let fullUrls = map ({ url, path }: "${url}/${path}") urls; - in [{ urls = fullUrls; }]; processes = { }; system = { }; systemd_units = { }; - internet_speed.interval = "10m"; - nginx.urls = [ "http://localhost/nginx_status" ]; ping = [{ urls = [ "10.100.0.1" ]; }]; # actually important to make pepe visible over wireguard }; }; }; - # todo : do this prometheus - services.prometheus.ruleFiles = [ - (pkgs.writeText "telegraf.yml" (builtins.toJSON { - groups = [ - { - name = "telegraf"; - rules = [ - { - alert = "HttpResponseNotOk"; - expr = "0 * (http_response_http_response_code != 200) + 1"; - for = "5m"; - labels.severity = "page"; - annotations = { - summary = "{{ $labels.exported_server }} does not return Ok"; - description = "{{ $labels.exported_server }} does not return Ok for more than 5 minutes"; - }; - } - { - alert = "CertificatExpires"; - expr = ''x509_cert_expiry{issuer_common_name="R3"} < ${toString (60 * 60 * 24 * 5)}''; - for = "1d"; - labels.severity = "page"; - annotations = { - summary = "{{ $labels.san }} does Expire Soon"; - description = "{{ $labels.san }} does expire in less than 5 days"; - }; - } - ]; - } - ]; - })) - ]; - - - - }