From 4903ab6fd934e8e7b46ac03572c500fab758510b Mon Sep 17 00:00:00 2001 From: Ingolf Wagner Date: Wed, 15 May 2024 11:59:24 +0200 Subject: [PATCH] make monitoring a component and easy to configure --- nixos/components/monitor/default.nix | 5 +- nixos/components/monitor/opentelemetry.nix | 124 ++++++++++++++++++ nixos/components/monitor/prometheus.nix | 45 +++++++ nixos/components/monitor/telegraf.nix | 46 +++++++ nixos/legacy/media-tdarr.nix | 4 +- nixos/legacy/media-unmanic.nix | 4 +- nixos/machines/cherry/configuration.nix | 8 +- .../cherry/telemetry/opentelemetry.nix | 64 --------- .../telemetry/prometheus-exporter-node.nix | 17 --- .../machines/cherry/telemetry/prometheus.nix | 44 ------- nixos/machines/cherry/telemetry/telegraf.nix | 33 ----- nixos/machines/chungus/configuration.nix | 12 +- nixos/machines/chungus/hass-mqtt.nix | 2 +- .../chungus/telemetry/opentelemetry-hass.nix | 38 ++++++ .../chungus/telemetry/opentelemetry.nix | 101 -------------- .../machines/chungus/telemetry/prometheus.nix | 96 -------------- .../chungus/telemetry/telegraf-smart.nix | 11 +- nixos/machines/chungus/telemetry/telegraf.nix | 60 --------- nixos/machines/cream/configuration.nix | 3 + nixos/machines/orbi/configuration.nix | 9 +- nixos/machines/orbi/media-tdarr.nix | 4 +- nixos/machines/orbi/media-unmanic.nix | 4 +- .../machines/orbi/telemetry/opentelemetry.nix | 51 ------- .../orbi/telemetry/system-prometheus.nix | 70 ---------- nixos/machines/orbi/telemetry/telegraf.nix | 33 ----- nixos/machines/robi/media-tdarr.nix | 4 +- 26 files changed, 292 insertions(+), 600 deletions(-) create mode 100644 nixos/components/monitor/opentelemetry.nix create mode 100644 nixos/components/monitor/prometheus.nix create mode 100644 nixos/components/monitor/telegraf.nix delete mode 100644 nixos/machines/cherry/telemetry/opentelemetry.nix delete mode 100644 nixos/machines/cherry/telemetry/prometheus-exporter-node.nix delete mode 100644 nixos/machines/cherry/telemetry/prometheus.nix delete mode 100644 nixos/machines/cherry/telemetry/telegraf.nix create mode 100644 nixos/machines/chungus/telemetry/opentelemetry-hass.nix delete mode 100644 nixos/machines/chungus/telemetry/opentelemetry.nix delete mode 100644 nixos/machines/orbi/telemetry/opentelemetry.nix delete mode 100644 nixos/machines/orbi/telemetry/system-prometheus.nix delete mode 100644 nixos/machines/orbi/telemetry/telegraf.nix diff --git a/nixos/components/monitor/default.nix b/nixos/components/monitor/default.nix index 69192a7..bc91357 100644 --- a/nixos/components/monitor/default.nix +++ b/nixos/components/monitor/default.nix @@ -3,7 +3,6 @@ with lib; with types; { - options.components.monitor = { enable = mkOption { type = bool; @@ -13,8 +12,10 @@ with types; imports = [ ./netdata.nix + ./opentelemetry.nix + ./prometheus.nix + ./telegraf.nix ]; - config = mkIf config.components.monitor.enable { }; } diff --git a/nixos/components/monitor/opentelemetry.nix b/nixos/components/monitor/opentelemetry.nix new file mode 100644 index 0000000..f84107a --- /dev/null +++ b/nixos/components/monitor/opentelemetry.nix @@ -0,0 +1,124 @@ +{ pkgs, config, lib, ... }: +with lib; +with types; +let + cfg = config.components.monitor.opentelemetry; +in +{ + options.components.monitor.opentelemetry = { + receiver.endpoint = mkOption { + type = nullOr str; + default = null; + description = "endpoint to receive the opentelementry data from other collectors"; + }; + exporter.endpoint = mkOption { + type = nullOr str; + default = null; + description = "endpoint to ship opentelementry data too"; + }; + metrics.endpoint = mkOption { + type = str; + default = "127.0.0.1:8100"; + description = "endpoint on where to provide opentelementry metrics"; + }; + }; + + config = mkMerge [ + (mkIf config.components.monitor.enable { + services.opentelemetry-collector = { + enable = true; + package = pkgs.unstable.opentelemetry-collector-contrib; + }; + }) + + # add default tags to metrics + # todo : make sure we filter out metrics from otlp receivers + (mkIf config.components.monitor.enable { + services.opentelemetry-collector.settings = { + service.pipelines.metrics.processors = [ "metricstransform" ]; + processors.metricstransform.transforms = [ + { + include = ".*"; + match_type = "regexp"; + action = "update"; + operations = [{ + action = "add_label"; + new_label = "machine"; + new_value = config.networking.hostName; + }]; + } + ]; + }; + }) + + # ship to next instance + (mkIf (config.components.monitor.opentelemetry.exporter.endpoint != null) { + services.opentelemetry-collector.settings = { + exporters.otlp = { + endpoint = cfg.exporter.endpoint; + tls.insecure = true; + }; + service = { + pipelines.metrics = { + exporters = [ "otlp" ]; + }; + #pipelines.logs = { + # exporters = [ "otlp" ]; + #}; + }; + }; + }) + + # ship from other instance + (mkIf (config.components.monitor.opentelemetry.receiver.endpoint != null) { + services.opentelemetry-collector.settings = { + receivers.otlp.protocols.grpc.endpoint = cfg.receiver.endpoint; + service = { + pipelines.metrics = { + receivers = [ "otlp" ]; + }; + #pipelines.logs = { + # exporters = [ "otlp" ]; + #}; + }; + }; + }) + + # scrape opentelemetry-colectors metrics + # todo: this should be collected another way (opentelemetry internal?) + (mkIf config.components.monitor.enable { + services.opentelemetry-collector.settings = { + receivers = { + prometheus.config.scrape_configs = [ + { + job_name = "otelcol"; + scrape_interval = "10s"; + static_configs = [{ + targets = [ cfg.metrics.endpoint ]; + }]; + metric_relabel_configs = [ + { + source_labels = [ "__name__" ]; + regex = ".*grpc_io.*"; + action = "drop"; + } + ]; + } + ]; + }; + + service = { + pipelines.metrics = { + receivers = [ "prometheus" ]; + }; + + # todo : this should be automatically be collected + # open telemetries own metrics? + telemetry.metrics.address = cfg.metrics.endpoint; + }; + + }; + }) + ]; + +} diff --git a/nixos/components/monitor/prometheus.nix b/nixos/components/monitor/prometheus.nix new file mode 100644 index 0000000..52e5e69 --- /dev/null +++ b/nixos/components/monitor/prometheus.nix @@ -0,0 +1,45 @@ +{ config, lib, ... }: +with lib; +with types; +let + cfg = config.components.monitor.prometheus; +in +{ + options.components.monitor.prometheus = { + enable = mkOption { + type = lib.types.bool; + default = config.components.monitor.enable; + }; + port = mkOption { + type = int; + default = 8090; + description = "port to provide Prometheus export"; + }; + }; + + config = mkMerge [ + + (mkIf config.components.monitor.prometheus.enable { + services.prometheus = { + checkConfig = "syntax-only"; + enable = true; + }; + }) + + (mkIf config.components.monitor.prometheus.enable { + services.opentelemetry-collector.settings = { + exporters.prometheus.endpoint = "127.0.0.1:${toString cfg.port}"; + service.pipelines.metrics.exporters = [ "prometheus" ]; + }; + services.prometheus.scrapeConfigs = [ + { + job_name = "opentelemetry"; + metrics_path = "/metrics"; + scrape_interval = "10s"; + static_configs = [{ targets = [ "localhost:${toString cfg.port}" ]; }]; + } + ]; + }) + + ]; +} diff --git a/nixos/components/monitor/telegraf.nix b/nixos/components/monitor/telegraf.nix new file mode 100644 index 0000000..c56bb85 --- /dev/null +++ b/nixos/components/monitor/telegraf.nix @@ -0,0 +1,46 @@ +{ config, pkgs, lib, ... }: +with lib; +with types; +let + cfg = config.components.monitor; +in +{ + options.components.monitor = { + influxDBPort = mkOption { + type = int; + default = 8088; + description = "Port to listen on influxDB input"; + }; + }; + + config = lib.mkMerge [ + (mkIf config.components.monitor.enable { + # opentelemetry wireing + services.opentelemetry-collector.settings = { + receivers.influxdb.endpoint = "127.0.0.1:${toString cfg.influxDBPort}"; + service.pipelines.metrics.receivers = [ "influxdb" ]; + }; + services.telegraf.extraConfig.outputs.influxdb_v2.urls = [ "http://127.0.0.1:${toString cfg.influxDBPort}" ]; + }) + + (mkIf config.components.monitor.enable { + + systemd.services.telegraf.path = [ pkgs.inetutils ]; + + services.telegraf = { + enable = true; + extraConfig = { + # https://github.com/influxdata/telegraf/tree/master/plugins/inputs < all them plugins + inputs = { + cpu = { }; + diskio = { }; + processes = { }; + system = { }; + systemd_units = { }; + ping = [{ urls = [ "10.100.0.1" ]; }]; # actually important to make machine visible over wireguard + }; + }; + }; + }) + ]; +} diff --git a/nixos/legacy/media-tdarr.nix b/nixos/legacy/media-tdarr.nix index 6086f9d..cb8dde7 100644 --- a/nixos/legacy/media-tdarr.nix +++ b/nixos/legacy/media-tdarr.nix @@ -31,8 +31,8 @@ }; }; - networking.firewall.interfaces.wq0.allowedTCPPorts = [ 8266 ]; - networking.firewall.interfaces.wq0.allowedUDPPorts = [ 8266 ]; + networking.firewall.interfaces.wg0.allowedTCPPorts = [ 8266 ]; + networking.firewall.interfaces.wg0.allowedUDPPorts = [ 8266 ]; networking.firewall.interfaces.enp0s31f6.allowedTCPPorts = [ 8266 ]; networking.firewall.interfaces.enp0s31f6.allowedUDPPorts = [ 8266 ]; diff --git a/nixos/legacy/media-unmanic.nix b/nixos/legacy/media-unmanic.nix index 1d85089..4662769 100644 --- a/nixos/legacy/media-unmanic.nix +++ b/nixos/legacy/media-unmanic.nix @@ -20,8 +20,8 @@ }; }; - #networking.firewall.interfaces.wq0.allowedTCPPorts = [ 8266 ]; - #networking.firewall.interfaces.wq0.allowedUDPPorts = [ 8266 ]; + #networking.firewall.interfaces.wg0.allowedTCPPorts = [ 8266 ]; + #networking.firewall.interfaces.wg0.allowedUDPPorts = [ 8266 ]; #networking.firewall.interfaces.enp0s31f6.allowedTCPPorts = [ 8266 ]; #networking.firewall.interfaces.enp0s31f6.allowedUDPPorts = [ 8266 ]; diff --git a/nixos/machines/cherry/configuration.nix b/nixos/machines/cherry/configuration.nix index 6aac578..d46806c 100644 --- a/nixos/machines/cherry/configuration.nix +++ b/nixos/machines/cherry/configuration.nix @@ -16,11 +16,6 @@ ./37c3.nix - ./telemetry/opentelemetry.nix - ./telemetry/prometheus.nix - ./telemetry/prometheus-exporter-node.nix - ./telemetry/telegraf.nix - ]; @@ -36,6 +31,9 @@ components.network.wifi.enable = true; components.terminal.enable = true; + components.monitor.enable = true; + components.monitor.opentelemetry.exporter.endpoint = "10.100.0.1:4317"; # orbi + home-manager.users.mainUser.home.sessionPath = [ "$HOME/.timewarrior/scripts" ]; sops.secrets.yubikey_u2fAuthFile = { }; diff --git a/nixos/machines/cherry/telemetry/opentelemetry.nix b/nixos/machines/cherry/telemetry/opentelemetry.nix deleted file mode 100644 index 2795172..0000000 --- a/nixos/machines/cherry/telemetry/opentelemetry.nix +++ /dev/null @@ -1,64 +0,0 @@ -{ pkgs, config, ... }: -{ - services.opentelemetry-collector = { - enable = true; - package = pkgs.unstable.opentelemetry-collector-contrib; - settings = { - # add default tags - # todo : make sure we filter out metrics from otlp receivers - processors.metricstransform.transforms = [ - { - include = ".*"; - match_type = "regexp"; - action = "update"; - operations = [{ - action = "add_label"; - new_label = "server"; - new_value = config.networking.hostName; - }]; - } - ]; - - receivers = { - # scrape opentelemetry-colectors metrics - prometheus.config.scrape_configs = [ - # todo: this should be collected another way (opentelemetry internal?) - { - job_name = "otelcol"; - scrape_interval = "10s"; - static_configs = [{ - targets = [ "127.0.0.1:8100" ]; - }]; - metric_relabel_configs = [ - { - source_labels = [ "__name__" ]; - regex = ".*grpc_io.*"; - action = "drop"; - } - ]; - } - ]; - }; - - # ship to chungus - exporters.otlp = { - # todo : move this to orbi and route from orbi to chungus - endpoint = "10.100.0.2:4317"; # chungus - tls.insecure = true; - }; - - service = { - pipelines.metrics = { - receivers = [ "prometheus" ]; - processors = [ "metricstransform" ]; - exporters = [ "otlp" ]; - }; - - # todo : this should be automatically be collected - # open telemetries own metrics? - telemetry.metrics.address = "0.0.0.0:8100"; - }; - - }; - }; -} diff --git a/nixos/machines/cherry/telemetry/prometheus-exporter-node.nix b/nixos/machines/cherry/telemetry/prometheus-exporter-node.nix deleted file mode 100644 index 5047e10..0000000 --- a/nixos/machines/cherry/telemetry/prometheus-exporter-node.nix +++ /dev/null @@ -1,17 +0,0 @@ -{ config, ... }: -{ - services.opentelemetry-collector.settings.receivers.prometheus.config.scrape_configs = [ - { - job_name = "node-exporter"; - static_configs = [{ - targets = [ "127.0.0.1:${toString config.services.prometheus.exporters.node.port}" ]; - }]; - } - ]; - services.prometheus.exporters.node = { - enable = true; - enabledCollectors = [ "systemd" ]; - port = 9002; - }; - -} diff --git a/nixos/machines/cherry/telemetry/prometheus.nix b/nixos/machines/cherry/telemetry/prometheus.nix deleted file mode 100644 index 3bc35ae..0000000 --- a/nixos/machines/cherry/telemetry/prometheus.nix +++ /dev/null @@ -1,44 +0,0 @@ -{ config, pkgs, lib, ... }: -let - prometheus_port = 8090; -in -{ - - services.nginx = { - enable = true; - statusPage = true; - virtualHosts = { - "prometheus.${config.networking.hostName}.private" = { - extraConfig = '' - allow ${config.tinc.private.subnet}; - deny all; - ''; - locations."/" = { proxyPass = "http://localhost:${toString config.services.prometheus.port}"; }; - }; - }; - }; - - services.prometheus = { - checkConfig = "syntax-only"; - enable = true; - }; - - - imports = [ - # open telemetry connection - { - services.opentelemetry-collector.settings = { - exporters.prometheus.endpoint = "127.0.0.1:${toString prometheus_port}"; - service.pipelines.metrics.exporters = [ "prometheus" ]; - }; - services.prometheus.scrapeConfigs = [ - { - job_name = "opentelemetry"; - metrics_path = "/metrics"; - scrape_interval = "10s"; - static_configs = [{ targets = [ "localhost:${toString prometheus_port}" ]; }]; - } - ]; - } - ]; -} diff --git a/nixos/machines/cherry/telemetry/telegraf.nix b/nixos/machines/cherry/telemetry/telegraf.nix deleted file mode 100644 index 7c9d77f..0000000 --- a/nixos/machines/cherry/telemetry/telegraf.nix +++ /dev/null @@ -1,33 +0,0 @@ -{ config, pkgs, ... }: -let - telegraf_sink = 8088; -in -{ - imports = [ - { - services.opentelemetry-collector.settings = { - receivers.influxdb.endpoint = "127.0.0.1:${toString telegraf_sink }"; - service.pipelines.metrics.receivers = [ "influxdb" ]; - }; - services.telegraf.extraConfig.outputs.influxdb_v2.urls = [ "http://127.0.0.1:${toString telegraf_sink}" ]; - } - ]; - - systemd.services.telegraf.path = [ pkgs.inetutils ]; - - services.telegraf = { - enable = true; - extraConfig = { - # https://github.com/influxdata/telegraf/tree/master/plugins/inputs < all them plugins - inputs = { - cpu = { }; - diskio = { }; - processes = { }; - system = { }; - systemd_units = { }; - ping = [{ urls = [ "10.100.0.1" ]; }]; # actually important to make pepe visible over wireguard - }; - }; - }; - -} diff --git a/nixos/machines/chungus/configuration.nix b/nixos/machines/chungus/configuration.nix index 53e01f4..0a51cbb 100644 --- a/nixos/machines/chungus/configuration.nix +++ b/nixos/machines/chungus/configuration.nix @@ -39,10 +39,10 @@ ./loki-promtail.nix ./grafana.nix - ./telemetry/opentelemetry.nix - ./telemetry/prometheus.nix - ./telemetry/telegraf.nix ./telemetry/telegraf-smart.nix + ./telemetry/telegraf.nix + #./telemetry/opentelemetry-hass.nix + ./telemetry/prometheus.nix #./home-display.nix @@ -75,8 +75,12 @@ components.network.wifi.enable = false; components.terminal.enable = true; - services.printing.enable = false; + components.monitor.enable = true; + networking.firewall.interfaces.wg0.allowedTCPPorts = [ 4317 ]; + networking.firewall.interfaces.wg0.allowedUDPPorts = [ 4317 ]; + components.monitor.opentelemetry.receiver.endpoint = "0.0.0.0:4317"; + services.printing.enable = false; #virtualisation.containers.storage.settings = { # # fixes: Error: 'overlay' is not supported over zfs, a mount_program is required: backing file system is unsupported for this graph driver diff --git a/nixos/machines/chungus/hass-mqtt.nix b/nixos/machines/chungus/hass-mqtt.nix index fd66888..9bb6aea 100644 --- a/nixos/machines/chungus/hass-mqtt.nix +++ b/nixos/machines/chungus/hass-mqtt.nix @@ -11,6 +11,6 @@ # open for tasmota networking.firewall.interfaces.enp0s31f6.allowedTCPPorts = [ 1883 ]; - networking.firewall.interfaces.wq0.allowedTCPPorts = [ 1883 ]; + networking.firewall.interfaces.wg0.allowedTCPPorts = [ 1883 ]; } diff --git a/nixos/machines/chungus/telemetry/opentelemetry-hass.nix b/nixos/machines/chungus/telemetry/opentelemetry-hass.nix new file mode 100644 index 0000000..ec8aff0 --- /dev/null +++ b/nixos/machines/chungus/telemetry/opentelemetry-hass.nix @@ -0,0 +1,38 @@ +{ config, ... }: +{ + + #{ + # name = "home-assistant"; + # rules = [ + # { + # record = "home_open_window_sum"; + # expr = ''sum( homeassistant_binary_sensor_state{entity=~"binary_sensor\\.window_02_contact|binary_sensor\\.window_03_contact|binary_sensor\\.window_04_contact|binary_sensor\\.window_05_contact|binary_sensor\\.window_06_contact|binary_sensor\\.window_07_contact"} )''; + # } + # ] ++ (map + # (number: + # { + # record = "home_at_least_n_windows_open"; + # expr = ''home_open_window_sum >= bool ${toString number}''; + # labels.n = number; + # }) [ 1 2 3 ]); + #}; + + sops.secrets.hass_long_term_token.owner = "prometheus"; + + services.opentelemetry-collector.settings = { + service.pipelines.metrics.receivers = [ "prometheus" ]; + receivers.prometheus.config.scrape_configs = [ + { + # see https://www.home-assistant.io/integrations/prometheus/ + job_name = "home-assistant"; + scrape_interval = "60s"; + metrics_path = "/api/prometheus"; + bearer_token_file = toString config.sops.secrets.hass_long_term_token.path; + static_configs = [{ + targets = [ "localhost:8123" ]; + }]; + } + ]; + + }; +} diff --git a/nixos/machines/chungus/telemetry/opentelemetry.nix b/nixos/machines/chungus/telemetry/opentelemetry.nix deleted file mode 100644 index 124afd1..0000000 --- a/nixos/machines/chungus/telemetry/opentelemetry.nix +++ /dev/null @@ -1,101 +0,0 @@ -{ pkgs, config, ... }: -{ - - networking.firewall.interfaces.wg0.allowedTCPPorts = [ 4317 ]; - networking.firewall.interfaces.wg0.allowedUDPPorts = [ 4317 ]; - - services.opentelemetry-collector = { - enable = true; - package = pkgs.unstable.opentelemetry-collector-contrib; - settings = { - receivers = { - - # receive metrics from other open telemetry collectors - otlp.protocols.grpc.endpoint = "0.0.0.0:4317"; - - # provide a influxdb sink - influxdb = { - endpoint = "127.0.0.1:8088"; - }; - - # scrape opentelemetry-colectors metrics - prometheus.config.scrape_configs = [ - { - job_name = "netdata"; - scrape_interval = "10s"; - metrics_path = "/api/v1/allmetrics"; - params.format = [ "prometheus" ]; - static_configs = [{ - targets = [ "127.0.0.1:19999" ]; - labels = { - service = "netdata"; - server = config.networking.hostName; - }; - }]; - } - { - job_name = "otelcol"; - scrape_interval = "10s"; - static_configs = [{ - targets = [ "127.0.0.1:8100" ]; - labels = { - service = "otelcol"; - server = config.networking.hostName; - }; - }]; - metric_relabel_configs = [ - { - source_labels = [ "__name__" ]; - regex = ".*grpc_io.*"; - action = "drop"; - } - ]; - } - { - job_name = "node"; - static_configs = [{ - targets = [ "127.0.0.1:${toString config.services.prometheus.exporters.node.port}" ]; - labels = { - # todo : this is not really needed (right?) - service = "node-exporter"; - # todo : use a processor for this - server = config.networking.hostName; - }; - }]; - } - { - # see https://www.home-assistant.io/integrations/prometheus/ - job_name = "home-assistant"; - scrape_interval = "60s"; - metrics_path = "/api/prometheus"; - bearer_token_file = toString config.sops.secrets.hass_long_term_token.path; - static_configs = [{ - targets = [ "localhost:8123" ]; - labels = { - service = "hass"; - server = config.networking.hostName; - }; - }]; - } - ]; - }; - - exporters = { - # provide prometheus sink under `/metrics` to - prometheus = { - endpoint = "127.0.0.1:8090"; - }; - }; - - service = { - pipelines.metrics = { - #receivers = [ "otlp" "influxdb" "prometheus" ]; - receivers = [ "otlp" "influxdb" ]; - exporters = [ "prometheus" ]; - }; - # open telemetries own metrics? - telemetry.metrics.address = "0.0.0.0:8100"; - }; - }; - }; -} diff --git a/nixos/machines/chungus/telemetry/prometheus.nix b/nixos/machines/chungus/telemetry/prometheus.nix index c574852..2e74728 100644 --- a/nixos/machines/chungus/telemetry/prometheus.nix +++ b/nixos/machines/chungus/telemetry/prometheus.nix @@ -1,7 +1,5 @@ { config, pkgs, lib, ... }: { - sops.secrets.hass_long_term_token.owner = "prometheus"; - services.nginx = { enable = true; statusPage = true; @@ -21,99 +19,5 @@ enable = true; # keep data for 30 days extraFlags = [ "--storage.tsdb.retention.time=90d" ]; - - ruleFiles = [ - (pkgs.writeText "prometheus-rules.yml" (builtins.toJSON { - groups = [ - { - name = "core"; - rules = [ - { - alert = "InstanceDown"; - expr = "up == 0"; - for = "5m"; - labels.severity = "page"; - annotations = { - summary = "Instance {{ $labels.instance }} down"; - description = "{{ $labels.instance }} of job {{ $labels.job }} has been down for more than 5 minutes."; - }; - } - ]; - } - # todo : move this to open telemetry - { - name = "home-assistant"; - rules = [ - { - record = "home_open_window_sum"; - expr = ''sum( homeassistant_binary_sensor_state{entity=~"binary_sensor\\.window_02_contact|binary_sensor\\.window_03_contact|binary_sensor\\.window_04_contact|binary_sensor\\.window_05_contact|binary_sensor\\.window_06_contact|binary_sensor\\.window_07_contact"} )''; - } - ] ++ (map - (number: - { - record = "home_at_least_n_windows_open"; - expr = ''home_open_window_sum >= bool ${toString number}''; - labels.n = number; - }) [ 1 2 3 ]); - } - ]; - })) - ]; - - exporters = { - node = { - enable = true; - enabledCollectors = [ "systemd" ]; - port = 9002; - }; - }; - - scrapeConfigs = [ - { - job_name = "opentelemetry"; - metrics_path = "/metrics"; - scrape_interval = "10s"; - static_configs = [{ targets = [ "localhost:8090" ]; }]; - } - #{ - # job_name = "netdata"; - # metrics_path = "/api/v1/allmetrics"; - # params.format = [ "prometheus" ]; - # scrape_interval = "5s"; - # static_configs = [ - # { - # targets = [ "localhost:19999" ]; - # labels = { - # service = "netdata"; - # server = config.networking.hostName; - # }; - # } - # ]; - #} - #{ - # job_name = "node"; - # static_configs = [{ - # targets = [ "localhost:${toString config.services.prometheus.exporters.node.port}" ]; - # labels = { - # service = "node-exporter"; - # server = config.networking.hostName; - # }; - # }]; - #} - #{ - # # see https://www.home-assistant.io/integrations/prometheus/ - # job_name = "home-assistant"; - # scrape_interval = "60s"; - # metrics_path = "/api/prometheus"; - # bearer_token_file = toString config.sops.secrets.hass_long_term_token.path; - # static_configs = [{ - # targets = [ "localhost:8123" ]; - # labels = { - # service = "hass"; - # server = config.networking.hostName; - # }; - # }]; - #} - ]; }; } diff --git a/nixos/machines/chungus/telemetry/telegraf-smart.nix b/nixos/machines/chungus/telemetry/telegraf-smart.nix index 0df46d1..040fb5d 100644 --- a/nixos/machines/chungus/telemetry/telegraf-smart.nix +++ b/nixos/machines/chungus/telemetry/telegraf-smart.nix @@ -1,15 +1,14 @@ { pkgs, ... }: { + services.smartd.enable = true; environment.systemPackages = [ pkgs.smartmontools pkgs.nvme-cli ]; - services.telegraf = { - enable = true; - extraConfig.inputs.smart = { - attributes = true; - use_sudo = true; - }; + services.telegraf.extraConfig.inputs.smart = { + attributes = true; + use_sudo = true; }; + systemd.services.telegraf.path = [ pkgs.smartmontools pkgs.nvme-cli "/run/wrappers" ]; security.sudo.configFile = '' diff --git a/nixos/machines/chungus/telemetry/telegraf.nix b/nixos/machines/chungus/telemetry/telegraf.nix index de9cac8..6b6101b 100644 --- a/nixos/machines/chungus/telemetry/telegraf.nix +++ b/nixos/machines/chungus/telemetry/telegraf.nix @@ -9,32 +9,12 @@ let { url = "https://tech.ingolf-wagner.de"; path = ""; } { url = "https://matrix.ingolf-wagner.de"; path = ""; } ]; - in { - systemd.services.telegraf.path = [ pkgs.inetutils ]; - services.telegraf = { - enable = true; extraConfig = { - #outputs.prometheus_client = { - # listen = ":9273"; - # metric_version = 2; - #}; - outputs.influxdb_v2 = { - urls = [ "http://127.0.0.1:8088" ]; - }; - - global_tags = { - service = "telegraf"; - server = config.networking.hostName; - }; - # https://github.com/influxdata/telegraf/tree/master/plugins/inputs < all them plugins inputs = { - cpu = { }; - diskio = { }; - smart.attributes = true; x509_cert = [{ sources = (map (url: "${url.url}:443") urls); interval = "30m"; # agent.interval = "10s" is default @@ -42,50 +22,10 @@ in http_response = let fullUrls = map ({ url, path }: "${url}/${path}") urls; in [{ urls = fullUrls; }]; - processes = { }; - system = { }; - systemd_units = { }; internet_speed.interval = "10m"; nginx.urls = [ "http://localhost/nginx_status" ]; - ping = [{ urls = [ "10.100.0.1" ]; }]; # actually important to make pepe visible over wireguard }; }; }; - # todo : do this prometheus - services.prometheus.ruleFiles = [ - (pkgs.writeText "telegraf.yml" (builtins.toJSON { - groups = [ - { - name = "telegraf"; - rules = [ - { - alert = "HttpResponseNotOk"; - expr = "0 * (http_response_http_response_code != 200) + 1"; - for = "5m"; - labels.severity = "page"; - annotations = { - summary = "{{ $labels.exported_server }} does not return Ok"; - description = "{{ $labels.exported_server }} does not return Ok for more than 5 minutes"; - }; - } - { - alert = "CertificatExpires"; - expr = ''x509_cert_expiry{issuer_common_name="R3"} < ${toString (60 * 60 * 24 * 5)}''; - for = "1d"; - labels.severity = "page"; - annotations = { - summary = "{{ $labels.san }} does Expire Soon"; - description = "{{ $labels.san }} does expire in less than 5 days"; - }; - } - ]; - } - ]; - })) - ]; - - - - } diff --git a/nixos/machines/cream/configuration.nix b/nixos/machines/cream/configuration.nix index 5f7c48a..8d24b81 100644 --- a/nixos/machines/cream/configuration.nix +++ b/nixos/machines/cream/configuration.nix @@ -37,6 +37,9 @@ components.network.wifi.enable = true; components.terminal.enable = true; + components.monitor.enable = true; + components.monitor.opentelemetry.exporter.endpoint = "10.100.0.1:4317"; # orbi + home-manager.users.mainUser.home.sessionPath = [ "$HOME/.timewarrior/scripts" ]; sops.secrets.yubikey_u2fAuthFile = { }; diff --git a/nixos/machines/orbi/configuration.nix b/nixos/machines/orbi/configuration.nix index e2f46b6..8dd9ed0 100644 --- a/nixos/machines/orbi/configuration.nix +++ b/nixos/machines/orbi/configuration.nix @@ -41,9 +41,6 @@ #./loki.nix #./loki-promtail.nix #./grafana.nix - ./telemetry/telegraf.nix - ./telemetry/opentelemetry.nix - #./telemetry/prometheus.nix #./cache.nix ]; @@ -57,6 +54,12 @@ components.network.nginx.landingpage.enable = false; components.network.wifi.enable = false; + components.monitor.enable = true; + networking.firewall.interfaces.wg0.allowedTCPPorts = [ 4317 ]; + networking.firewall.interfaces.wg0.allowedUDPPorts = [ 4317 ]; + components.monitor.opentelemetry.receiver.endpoint = "0.0.0.0:4317"; + components.monitor.opentelemetry.exporter.endpoint = "10.100.0.2:4317"; # chnungus + security.acme.acceptTerms = true; security.acme.defaults.email = "contact@ingolf-wagner.de"; diff --git a/nixos/machines/orbi/media-tdarr.nix b/nixos/machines/orbi/media-tdarr.nix index 84c0fb0..3249efa 100644 --- a/nixos/machines/orbi/media-tdarr.nix +++ b/nixos/machines/orbi/media-tdarr.nix @@ -34,8 +34,8 @@ }; }; - #networking.firewall.interfaces.wq0.allowedTCPPorts = [ 8266 ]; - #networking.firewall.interfaces.wq0.allowedUDPPorts = [ 8266 ]; + #networking.firewall.interfaces.wg0.allowedTCPPorts = [ 8266 ]; + #networking.firewall.interfaces.wg0.allowedUDPPorts = [ 8266 ]; #networking.firewall.interfaces.enp0s31f6.allowedTCPPorts = [ 8266 ]; #networking.firewall.interfaces.enp0s31f6.allowedUDPPorts = [ 8266 ]; diff --git a/nixos/machines/orbi/media-unmanic.nix b/nixos/machines/orbi/media-unmanic.nix index 1d85089..4662769 100644 --- a/nixos/machines/orbi/media-unmanic.nix +++ b/nixos/machines/orbi/media-unmanic.nix @@ -20,8 +20,8 @@ }; }; - #networking.firewall.interfaces.wq0.allowedTCPPorts = [ 8266 ]; - #networking.firewall.interfaces.wq0.allowedUDPPorts = [ 8266 ]; + #networking.firewall.interfaces.wg0.allowedTCPPorts = [ 8266 ]; + #networking.firewall.interfaces.wg0.allowedUDPPorts = [ 8266 ]; #networking.firewall.interfaces.enp0s31f6.allowedTCPPorts = [ 8266 ]; #networking.firewall.interfaces.enp0s31f6.allowedUDPPorts = [ 8266 ]; diff --git a/nixos/machines/orbi/telemetry/opentelemetry.nix b/nixos/machines/orbi/telemetry/opentelemetry.nix deleted file mode 100644 index 6b389e1..0000000 --- a/nixos/machines/orbi/telemetry/opentelemetry.nix +++ /dev/null @@ -1,51 +0,0 @@ -{ pkgs, ... }: -{ - services.opentelemetry-collector = { - enable = true; - package = pkgs.unstable.opentelemetry-collector-contrib; - settings = { - receivers = { - # provide a influxdb sink - influxdb = { - endpoint = "127.0.0.1:8088"; - }; - # scrape opentelemetry-colectors metrics - prometheus.config.scrape_configs = [ - { - job_name = "netdata"; - scrape_interval = "10s"; - metrics_path = "/api/v1/allmetrics"; - params.format = [ "prometheus" ]; - static_configs = [{ targets = [ "127.0.0.1:19999" ]; }]; - } - { - job_name = "otelcol"; - scrape_interval = "10s"; - static_configs = [{ targets = [ "127.0.0.1:8100" ]; }]; - metric_relabel_configs = [ - { - source_labels = [ "__name__" ]; - regex = ".*grpc_io.*"; - action = "drop"; - } - ]; - } - ]; - }; - exporters = { - # provide prometheus sink under `/metrics` to - prometheus = { - endpoint = "127.0.0.1:8090"; - }; - }; - service = { - pipelines.metrics = { - receivers = [ "influxdb" "prometheus" ]; - exporters = [ "prometheus" ]; - }; - # open telemetries own metrics? - telemetry.metrics.address = "0.0.0.0:8100"; - }; - }; - }; -} diff --git a/nixos/machines/orbi/telemetry/system-prometheus.nix b/nixos/machines/orbi/telemetry/system-prometheus.nix deleted file mode 100644 index 2fa4218..0000000 --- a/nixos/machines/orbi/telemetry/system-prometheus.nix +++ /dev/null @@ -1,70 +0,0 @@ -{ config, pkgs, lib, ... }: { - - services.nginx = { - enable = true; - statusPage = true; - virtualHosts = { - "prometheus.robi.private" = { - extraConfig = '' - allow ${config.tinc.private.subnet}; - deny all; - ''; - locations."/" = { proxyPass = "http://localhost:${toString config.services.prometheus.port}"; }; - }; - }; - }; - - services.prometheus = { - enable = true; - # keep data for 30 days - extraFlags = [ "--storage.tsdb.retention.time=30d" ]; - - exporters = { - node = { - enable = true; - enabledCollectors = [ "systemd" ]; - port = 9002; - }; - }; - - scrapeConfigs = [ - { - job_name = "netdata"; - metrics_path = "/api/v1/allmetrics"; - params.format = [ "prometheus" ]; - scrape_interval = "5s"; - static_configs = [ - { - targets = [ "localhost:19999" ]; - labels = { - service = "netdata"; - server = "robi"; - }; - } - ]; - } - { - job_name = "systemd"; - static_configs = [{ - targets = [ "localhost:${toString config.services.prometheus.exporters.node.port}" ]; - labels = { - service = "node-exporter"; - server = "robi"; - }; - }]; - } - { - # see https://www.home-assistant.io/integrations/prometheus/ - job_name = "telgraf"; - metrics_path = "/metrics"; - static_configs = [{ - targets = [ "localhost:9273" ]; - labels = { - service = "telegraf"; - server = "robi"; - }; - }]; - } - ]; - }; -} diff --git a/nixos/machines/orbi/telemetry/telegraf.nix b/nixos/machines/orbi/telemetry/telegraf.nix deleted file mode 100644 index f6e32ff..0000000 --- a/nixos/machines/orbi/telemetry/telegraf.nix +++ /dev/null @@ -1,33 +0,0 @@ -{ - services.telegraf = { - enable = true; - extraConfig = { - outputs = { - prometheus_client = { - listen = ":9273"; - metric_version = 2; - }; - influxdb_v2 = { - urls = [ "http://127.0.0.1:8088" ]; - }; - }; - # https://github.com/influxdata/telegraf/tree/master/plugins/inputs < all them plugins - inputs = { - cpu = { - percpu = true; - totalcpu = true; - }; - disk = { }; - diskio = { }; - kernel = { }; - mem = { }; - processes = { }; - netstat = { }; - net = { }; - system = { }; - systemd_units = { }; - nginx.urls = [ "http://localhost/nginx_status" ]; - }; - }; - }; -} diff --git a/nixos/machines/robi/media-tdarr.nix b/nixos/machines/robi/media-tdarr.nix index 84c0fb0..3249efa 100644 --- a/nixos/machines/robi/media-tdarr.nix +++ b/nixos/machines/robi/media-tdarr.nix @@ -34,8 +34,8 @@ }; }; - #networking.firewall.interfaces.wq0.allowedTCPPorts = [ 8266 ]; - #networking.firewall.interfaces.wq0.allowedUDPPorts = [ 8266 ]; + #networking.firewall.interfaces.wg0.allowedTCPPorts = [ 8266 ]; + #networking.firewall.interfaces.wg0.allowedUDPPorts = [ 8266 ]; #networking.firewall.interfaces.enp0s31f6.allowedTCPPorts = [ 8266 ]; #networking.firewall.interfaces.enp0s31f6.allowedUDPPorts = [ 8266 ];