From b890a69e30a8ba240644e1be1925672480c59c9c Mon Sep 17 00:00:00 2001 From: Ingolf Wagner Date: Sat, 18 May 2024 12:02:21 +0200 Subject: [PATCH] monitor containers --- nixos/components/monitor/default.nix | 20 +++-- .../{promtail.nix => logs-promtail.nix} | 7 +- ...{export-zfs.nix => metrics-export-zfs.nix} | 2 +- .../{netdata.nix => metrics-netdata.nix} | 8 +- ...{prometheus.nix => metrics-prometheus.nix} | 2 +- .../{telegraf.nix => metrics-telegraf.nix} | 12 ++- nixos/components/monitor/opentelemetry.nix | 84 ++++++++++++++----- .../chungus/telemetry/opentelemetry-hass.nix | 2 +- nixos/machines/orbi/container-monitoring.nix | 22 +++++ nixos/machines/orbi/media-nextcloud.nix | 8 +- nixos/machines/orbi/media-tdarr.nix | 54 ------------ nixos/machines/orbi/service-photoprism.nix | 5 +- .../machines/orbi/social-matrix-terranix.nix | 5 +- 13 files changed, 134 insertions(+), 97 deletions(-) rename nixos/components/monitor/{promtail.nix => logs-promtail.nix} (98%) rename nixos/components/monitor/{export-zfs.nix => metrics-export-zfs.nix} (93%) rename nixos/components/monitor/{netdata.nix => metrics-netdata.nix} (74%) rename nixos/components/monitor/{prometheus.nix => metrics-prometheus.nix} (94%) rename nixos/components/monitor/{telegraf.nix => metrics-telegraf.nix} (78%) create mode 100644 nixos/machines/orbi/container-monitoring.nix delete mode 100644 nixos/machines/orbi/media-tdarr.nix diff --git a/nixos/components/monitor/default.nix b/nixos/components/monitor/default.nix index a7fe06a..d8007eb 100644 --- a/nixos/components/monitor/default.nix +++ b/nixos/components/monitor/default.nix @@ -1,4 +1,4 @@ -{ lib, ... }: +{ lib, config, ... }: with lib; with types; { @@ -8,15 +8,23 @@ with types; type = bool; default = true; }; + metrics.enable = mkOption { + type = bool; + default = config.components.monitor.enable; + }; + logs.enable = mkOption { + type = bool; + default = config.components.monitor.enable; + }; }; imports = [ - ./export-zfs.nix - ./netdata.nix + ./logs-promtail.nix + ./metrics-export-zfs.nix + ./metrics-netdata.nix + ./metrics-prometheus.nix + ./metrics-telegraf.nix ./opentelemetry.nix - ./prometheus.nix - ./promtail.nix - ./telegraf.nix ]; config = mkIf config.components.monitor.enable { }; diff --git a/nixos/components/monitor/promtail.nix b/nixos/components/monitor/logs-promtail.nix similarity index 98% rename from nixos/components/monitor/promtail.nix rename to nixos/components/monitor/logs-promtail.nix index 986e546..7ef67fd 100644 --- a/nixos/components/monitor/promtail.nix +++ b/nixos/components/monitor/logs-promtail.nix @@ -8,7 +8,7 @@ in options.components.monitor.promtail = { enable = mkOption { type = lib.types.bool; - default = config.components.monitor.enable; + default = config.components.monitor.logs.enable; }; port = mkOption { type = int; @@ -32,10 +32,7 @@ in services.promtail = { enable = true; configuration = { - server = { - http_listen_port = 28183; - grpc_listen_port = 0; - }; + server. disable = true; positions.filename = "/var/cache/promtail/positions.yaml"; clients = [ diff --git a/nixos/components/monitor/export-zfs.nix b/nixos/components/monitor/metrics-export-zfs.nix similarity index 93% rename from nixos/components/monitor/export-zfs.nix rename to nixos/components/monitor/metrics-export-zfs.nix index c416faf..63892ae 100644 --- a/nixos/components/monitor/export-zfs.nix +++ b/nixos/components/monitor/metrics-export-zfs.nix @@ -4,7 +4,7 @@ with types; { options.components.monitor.exporters.zfs.enable = mkOption { type = lib.types.bool; - default = config.components.monitor.enable; + default = config.components.monitor.metrics.enable; }; config = mkMerge [ diff --git a/nixos/components/monitor/netdata.nix b/nixos/components/monitor/metrics-netdata.nix similarity index 74% rename from nixos/components/monitor/netdata.nix rename to nixos/components/monitor/metrics-netdata.nix index b6f801d..6132405 100644 --- a/nixos/components/monitor/netdata.nix +++ b/nixos/components/monitor/metrics-netdata.nix @@ -2,8 +2,14 @@ with lib; with types; { + options.components.monitor.netdata = { + enable = mkOption { + type = bool; + default = config.components.monitor.metrics.enable; + }; + }; - config = lib.mkIf config.components.monitor.enable { + config = mkIf config.components.monitor.netdata.enable { # netdata sink services.opentelemetry-collector.settings.receivers.prometheus.config.scrape_configs = [ diff --git a/nixos/components/monitor/prometheus.nix b/nixos/components/monitor/metrics-prometheus.nix similarity index 94% rename from nixos/components/monitor/prometheus.nix rename to nixos/components/monitor/metrics-prometheus.nix index 52e5e69..095bc59 100644 --- a/nixos/components/monitor/prometheus.nix +++ b/nixos/components/monitor/metrics-prometheus.nix @@ -8,7 +8,7 @@ in options.components.monitor.prometheus = { enable = mkOption { type = lib.types.bool; - default = config.components.monitor.enable; + default = config.components.monitor.metrics.enable; }; port = mkOption { type = int; diff --git a/nixos/components/monitor/telegraf.nix b/nixos/components/monitor/metrics-telegraf.nix similarity index 78% rename from nixos/components/monitor/telegraf.nix rename to nixos/components/monitor/metrics-telegraf.nix index c56bb85..ab04632 100644 --- a/nixos/components/monitor/telegraf.nix +++ b/nixos/components/monitor/metrics-telegraf.nix @@ -2,10 +2,14 @@ with lib; with types; let - cfg = config.components.monitor; + cfg = config.components.monitor.telegraf; in { - options.components.monitor = { + options.components.monitor.telegraf = { + enable = mkOption { + type = lib.types.bool; + default = config.components.monitor.metrics.enable; + }; influxDBPort = mkOption { type = int; default = 8088; @@ -14,7 +18,7 @@ in }; config = lib.mkMerge [ - (mkIf config.components.monitor.enable { + (mkIf config.components.monitor.telegraf.enable { # opentelemetry wireing services.opentelemetry-collector.settings = { receivers.influxdb.endpoint = "127.0.0.1:${toString cfg.influxDBPort}"; @@ -23,7 +27,7 @@ in services.telegraf.extraConfig.outputs.influxdb_v2.urls = [ "http://127.0.0.1:${toString cfg.influxDBPort}" ]; }) - (mkIf config.components.monitor.enable { + (mkIf config.components.monitor.telegraf.enable { systemd.services.telegraf.path = [ pkgs.inetutils ]; diff --git a/nixos/components/monitor/opentelemetry.nix b/nixos/components/monitor/opentelemetry.nix index 0878fa1..5f4cf0b 100644 --- a/nixos/components/monitor/opentelemetry.nix +++ b/nixos/components/monitor/opentelemetry.nix @@ -29,6 +29,7 @@ in }; config = mkMerge [ + (mkIf config.components.monitor.enable { services.opentelemetry-collector = { enable = true; @@ -41,9 +42,6 @@ in (mkIf config.components.monitor.enable { services.opentelemetry-collector.settings = { - service.pipelines.metrics.processors = [ "metricstransform" "resourcedetection/system" ]; - service.pipelines.logs.processors = [ "resourcedetection/system" ]; - processors = { # https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/main/processor/resourcedetectionprocessor/README.md @@ -68,6 +66,20 @@ in }; }; }) + (mkIf config.components.monitor.metrics.enable { + services.opentelemetry-collector.settings = { + service.pipelines.metrics.processors = [ + "metricstransform" + "resourcedetection/system" + ]; + }; + }) + (mkIf config.components.monitor.logs.enable { + services.opentelemetry-collector.settings = { + service.pipelines.logs.processors = [ "resourcedetection/system" ]; + }; + }) + (mkIf (config.components.monitor.opentelemetry.exporter.debug != null) { services.opentelemetry-collector.settings = { @@ -90,35 +102,62 @@ in endpoint = cfg.exporter.endpoint; tls.insecure = true; }; - service = { - pipelines.metrics = { - exporters = [ "otlp" ]; - }; - pipelines.logs = { - exporters = [ "otlp" ]; - }; - }; }; }) + (mkIf + ( + config.components.monitor.opentelemetry.exporter.endpoint != null && + config.components.monitor.logs.enable + ) + { + services.opentelemetry-collector.settings = { + service.pipelines.logs.exporters = [ "otlp" ]; + }; + }) + (mkIf + ( + config.components.monitor.opentelemetry.exporter.endpoint != null && + config.components.monitor.metrics.enable + ) + { + services.opentelemetry-collector.settings = { + service.pipelines.metrics.exporters = [ "otlp" ]; + }; + }) # ship from other instance (mkIf (config.components.monitor.opentelemetry.receiver.endpoint != null) { services.opentelemetry-collector.settings = { receivers.otlp.protocols.grpc.endpoint = cfg.receiver.endpoint; - service = { - pipelines.metrics = { - receivers = [ "otlp" ]; - }; - pipelines.logs = { - receivers = [ "otlp" ]; - }; - }; }; }) + (mkIf + ( + config.components.monitor.opentelemetry.receiver.endpoint != null && + config.components.monitor.logs.enable + ) + { + services.opentelemetry-collector.settings = { + service.pipelines.logs.receivers = [ "otlp" ]; + }; + }) + (mkIf + ( + config.components.monitor.opentelemetry.receiver.endpoint != null && + config.components.monitor.metrics.enable + ) + { + services.opentelemetry-collector.settings = { + service.pipelines.metrics.receivers = [ "otlp" ]; + }; + }) + + # scrape opentelemetry-colectors metrics # todo: this should be collected another way (opentelemetry internal?) - (mkIf config.components.monitor.enable { + # todo : enable me only when metrics.endpoint is set. + (mkIf config.components.monitor.metrics.enable { services.opentelemetry-collector.settings = { receivers = { prometheus.config.scrape_configs = [ @@ -151,6 +190,11 @@ in }; }) + (mkIf (! config.components.monitor.metrics.enable) { + services.opentelemetry-collector.settings = { + service.telemetry.metrics.level = "none"; + }; + }) ]; } diff --git a/nixos/machines/chungus/telemetry/opentelemetry-hass.nix b/nixos/machines/chungus/telemetry/opentelemetry-hass.nix index ec8aff0..a57bec6 100644 --- a/nixos/machines/chungus/telemetry/opentelemetry-hass.nix +++ b/nixos/machines/chungus/telemetry/opentelemetry-hass.nix @@ -29,7 +29,7 @@ metrics_path = "/api/prometheus"; bearer_token_file = toString config.sops.secrets.hass_long_term_token.path; static_configs = [{ - targets = [ "localhost:8123" ]; + targets = [ "127.0.0.1:8123" ]; }]; } ]; diff --git a/nixos/machines/orbi/container-monitoring.nix b/nixos/machines/orbi/container-monitoring.nix new file mode 100644 index 0000000..d9793b6 --- /dev/null +++ b/nixos/machines/orbi/container-monitoring.nix @@ -0,0 +1,22 @@ +{ lib, config, ... }: +with lib; +with types; +{ + imports = [ + ../../components/monitor + ]; + + # todo create a the components.monitor.container module. + options.promtail.port = mkOption { + type = int; + }; + + config = { + components.monitor.enable = true; + components.monitor.metrics.enable = false; + components.monitor.promtail.port = config.promtail.port; + components.monitor.opentelemetry.exporter.endpoint = "127.0.0.1:4317"; + }; + + +} diff --git a/nixos/machines/orbi/media-nextcloud.nix b/nixos/machines/orbi/media-nextcloud.nix index 8af0423..5444f8f 100644 --- a/nixos/machines/orbi/media-nextcloud.nix +++ b/nixos/machines/orbi/media-nextcloud.nix @@ -86,14 +86,18 @@ in privateNetwork = false; autoStart = true; - config = { config, pkgs, lib, ... }: { + + config = { config, lib, ... }: { + nixpkgs.pkgs = pkgs; + imports = [ ./container-monitoring.nix ]; + promtail.port = 3502; + system.stateVersion = "23.11"; # Configuring nameservers for containers is currently broken. # Therefore in some cases internet connectivity can be broken inside the containers. # A temporary workaround is to manually write the /etc/nixos/resolv.conf file like this: #environment.etc."resolv.conf".text = "nameserver 8.8.8.8"; - system.stateVersion = "23.11"; users.users.nextcloud.uid = nextcloudUid; diff --git a/nixos/machines/orbi/media-tdarr.nix b/nixos/machines/orbi/media-tdarr.nix deleted file mode 100644 index 3249efa..0000000 --- a/nixos/machines/orbi/media-tdarr.nix +++ /dev/null @@ -1,54 +0,0 @@ -{ config, lib, pkgs, ... }: -{ - - # https://docs.tdarr.io/docs/installation/docker/run-compose - virtualisation.oci-containers = { - containers.tdarr = { - volumes = [ - "/media/arr/tdarr/server:/app/server" - "/media/arr/tdarr/configs:/app/configs" - "/media/arr/tdarr/logs:/app/logs" - "/media/arr/tdarr/transcode_cache:/temp" - "/media:/media" - ]; - environment = { - serverIP = "0.0.0.0"; - serverPort = "8266"; - webUIPort = "8265"; - internalNode = "true"; - inContainer = "true"; - nodeName = "robi"; - TZ = "Europe/Berlin"; - PUID = toString config.users.users.media.uid; - PGID = toString config.users.groups.media.gid; - }; - ports = [ - "127.0.0.1:8265:8265" # WebUI - # "8266:8266" # server port - ]; - image = "ghcr.io/haveagitgat/tdarr:latest"; # Warning: if the tag does not change, the image will not be updated - extraOptions = [ - #"--network=bridge" - #"--privileged" - ]; - }; - }; - - #networking.firewall.interfaces.wg0.allowedTCPPorts = [ 8266 ]; - #networking.firewall.interfaces.wg0.allowedUDPPorts = [ 8266 ]; - - #networking.firewall.interfaces.enp0s31f6.allowedTCPPorts = [ 8266 ]; - #networking.firewall.interfaces.enp0s31f6.allowedUDPPorts = [ 8266 ]; - - services.nginx.virtualHosts."tdarr.${config.networking.hostName}.private" = { - extraConfig = '' - allow ${config.tinc.private.subnet}; - deny all; - ''; - locations."/" = { - proxyPass = "http://localhost:8265"; - proxyWebsockets = true; - }; - }; - -} diff --git a/nixos/machines/orbi/service-photoprism.nix b/nixos/machines/orbi/service-photoprism.nix index 5bb486f..e7e6233 100644 --- a/nixos/machines/orbi/service-photoprism.nix +++ b/nixos/machines/orbi/service-photoprism.nix @@ -13,7 +13,10 @@ in privateNetwork = false; autoStart = true; - config = { config, pkgs, lib, ... }: { + config = { config, lib, ... }: { + nixpkgs.pkgs = pkgs; + imports = [ ./container-monitoring.nix ]; + promtail.port = 3503; system.stateVersion = "23.11"; # Photoprism diff --git a/nixos/machines/orbi/social-matrix-terranix.nix b/nixos/machines/orbi/social-matrix-terranix.nix index 78789a7..56eb448 100644 --- a/nixos/machines/orbi/social-matrix-terranix.nix +++ b/nixos/machines/orbi/social-matrix-terranix.nix @@ -68,7 +68,10 @@ in }; }; - config = { config, pkgs, lib, ... }: { + config = { config, lib, ... }: { + nixpkgs.pkgs = pkgs; + imports = [ ./container-monitoring.nix ]; + promtail.port = 3504; system.stateVersion = "23.11"; services.postgresql = {