make monitoring a component and easy to configure
This commit is contained in:
parent
a7f72610d4
commit
4903ab6fd9
26 changed files with 292 additions and 600 deletions
|
@ -3,7 +3,6 @@ with lib;
|
|||
with types;
|
||||
{
|
||||
|
||||
|
||||
options.components.monitor = {
|
||||
enable = mkOption {
|
||||
type = bool;
|
||||
|
@ -13,8 +12,10 @@ with types;
|
|||
|
||||
imports = [
|
||||
./netdata.nix
|
||||
./opentelemetry.nix
|
||||
./prometheus.nix
|
||||
./telegraf.nix
|
||||
];
|
||||
|
||||
|
||||
config = mkIf config.components.monitor.enable { };
|
||||
}
|
||||
|
|
124
nixos/components/monitor/opentelemetry.nix
Normal file
124
nixos/components/monitor/opentelemetry.nix
Normal file
|
@ -0,0 +1,124 @@
|
|||
{ pkgs, config, lib, ... }:
|
||||
with lib;
|
||||
with types;
|
||||
let
|
||||
cfg = config.components.monitor.opentelemetry;
|
||||
in
|
||||
{
|
||||
options.components.monitor.opentelemetry = {
|
||||
receiver.endpoint = mkOption {
|
||||
type = nullOr str;
|
||||
default = null;
|
||||
description = "endpoint to receive the opentelementry data from other collectors";
|
||||
};
|
||||
exporter.endpoint = mkOption {
|
||||
type = nullOr str;
|
||||
default = null;
|
||||
description = "endpoint to ship opentelementry data too";
|
||||
};
|
||||
metrics.endpoint = mkOption {
|
||||
type = str;
|
||||
default = "127.0.0.1:8100";
|
||||
description = "endpoint on where to provide opentelementry metrics";
|
||||
};
|
||||
};
|
||||
|
||||
config = mkMerge [
|
||||
(mkIf config.components.monitor.enable {
|
||||
services.opentelemetry-collector = {
|
||||
enable = true;
|
||||
package = pkgs.unstable.opentelemetry-collector-contrib;
|
||||
};
|
||||
})
|
||||
|
||||
# add default tags to metrics
|
||||
# todo : make sure we filter out metrics from otlp receivers
|
||||
(mkIf config.components.monitor.enable {
|
||||
services.opentelemetry-collector.settings = {
|
||||
service.pipelines.metrics.processors = [ "metricstransform" ];
|
||||
processors.metricstransform.transforms = [
|
||||
{
|
||||
include = ".*";
|
||||
match_type = "regexp";
|
||||
action = "update";
|
||||
operations = [{
|
||||
action = "add_label";
|
||||
new_label = "machine";
|
||||
new_value = config.networking.hostName;
|
||||
}];
|
||||
}
|
||||
];
|
||||
};
|
||||
})
|
||||
|
||||
# ship to next instance
|
||||
(mkIf (config.components.monitor.opentelemetry.exporter.endpoint != null) {
|
||||
services.opentelemetry-collector.settings = {
|
||||
exporters.otlp = {
|
||||
endpoint = cfg.exporter.endpoint;
|
||||
tls.insecure = true;
|
||||
};
|
||||
service = {
|
||||
pipelines.metrics = {
|
||||
exporters = [ "otlp" ];
|
||||
};
|
||||
#pipelines.logs = {
|
||||
# exporters = [ "otlp" ];
|
||||
#};
|
||||
};
|
||||
};
|
||||
})
|
||||
|
||||
# ship from other instance
|
||||
(mkIf (config.components.monitor.opentelemetry.receiver.endpoint != null) {
|
||||
services.opentelemetry-collector.settings = {
|
||||
receivers.otlp.protocols.grpc.endpoint = cfg.receiver.endpoint;
|
||||
service = {
|
||||
pipelines.metrics = {
|
||||
receivers = [ "otlp" ];
|
||||
};
|
||||
#pipelines.logs = {
|
||||
# exporters = [ "otlp" ];
|
||||
#};
|
||||
};
|
||||
};
|
||||
})
|
||||
|
||||
# scrape opentelemetry-colectors metrics
|
||||
# todo: this should be collected another way (opentelemetry internal?)
|
||||
(mkIf config.components.monitor.enable {
|
||||
services.opentelemetry-collector.settings = {
|
||||
receivers = {
|
||||
prometheus.config.scrape_configs = [
|
||||
{
|
||||
job_name = "otelcol";
|
||||
scrape_interval = "10s";
|
||||
static_configs = [{
|
||||
targets = [ cfg.metrics.endpoint ];
|
||||
}];
|
||||
metric_relabel_configs = [
|
||||
{
|
||||
source_labels = [ "__name__" ];
|
||||
regex = ".*grpc_io.*";
|
||||
action = "drop";
|
||||
}
|
||||
];
|
||||
}
|
||||
];
|
||||
};
|
||||
|
||||
service = {
|
||||
pipelines.metrics = {
|
||||
receivers = [ "prometheus" ];
|
||||
};
|
||||
|
||||
# todo : this should be automatically be collected
|
||||
# open telemetries own metrics?
|
||||
telemetry.metrics.address = cfg.metrics.endpoint;
|
||||
};
|
||||
|
||||
};
|
||||
})
|
||||
];
|
||||
|
||||
}
|
45
nixos/components/monitor/prometheus.nix
Normal file
45
nixos/components/monitor/prometheus.nix
Normal file
|
@ -0,0 +1,45 @@
|
|||
{ config, lib, ... }:
|
||||
with lib;
|
||||
with types;
|
||||
let
|
||||
cfg = config.components.monitor.prometheus;
|
||||
in
|
||||
{
|
||||
options.components.monitor.prometheus = {
|
||||
enable = mkOption {
|
||||
type = lib.types.bool;
|
||||
default = config.components.monitor.enable;
|
||||
};
|
||||
port = mkOption {
|
||||
type = int;
|
||||
default = 8090;
|
||||
description = "port to provide Prometheus export";
|
||||
};
|
||||
};
|
||||
|
||||
config = mkMerge [
|
||||
|
||||
(mkIf config.components.monitor.prometheus.enable {
|
||||
services.prometheus = {
|
||||
checkConfig = "syntax-only";
|
||||
enable = true;
|
||||
};
|
||||
})
|
||||
|
||||
(mkIf config.components.monitor.prometheus.enable {
|
||||
services.opentelemetry-collector.settings = {
|
||||
exporters.prometheus.endpoint = "127.0.0.1:${toString cfg.port}";
|
||||
service.pipelines.metrics.exporters = [ "prometheus" ];
|
||||
};
|
||||
services.prometheus.scrapeConfigs = [
|
||||
{
|
||||
job_name = "opentelemetry";
|
||||
metrics_path = "/metrics";
|
||||
scrape_interval = "10s";
|
||||
static_configs = [{ targets = [ "localhost:${toString cfg.port}" ]; }];
|
||||
}
|
||||
];
|
||||
})
|
||||
|
||||
];
|
||||
}
|
46
nixos/components/monitor/telegraf.nix
Normal file
46
nixos/components/monitor/telegraf.nix
Normal file
|
@ -0,0 +1,46 @@
|
|||
{ config, pkgs, lib, ... }:
|
||||
with lib;
|
||||
with types;
|
||||
let
|
||||
cfg = config.components.monitor;
|
||||
in
|
||||
{
|
||||
options.components.monitor = {
|
||||
influxDBPort = mkOption {
|
||||
type = int;
|
||||
default = 8088;
|
||||
description = "Port to listen on influxDB input";
|
||||
};
|
||||
};
|
||||
|
||||
config = lib.mkMerge [
|
||||
(mkIf config.components.monitor.enable {
|
||||
# opentelemetry wireing
|
||||
services.opentelemetry-collector.settings = {
|
||||
receivers.influxdb.endpoint = "127.0.0.1:${toString cfg.influxDBPort}";
|
||||
service.pipelines.metrics.receivers = [ "influxdb" ];
|
||||
};
|
||||
services.telegraf.extraConfig.outputs.influxdb_v2.urls = [ "http://127.0.0.1:${toString cfg.influxDBPort}" ];
|
||||
})
|
||||
|
||||
(mkIf config.components.monitor.enable {
|
||||
|
||||
systemd.services.telegraf.path = [ pkgs.inetutils ];
|
||||
|
||||
services.telegraf = {
|
||||
enable = true;
|
||||
extraConfig = {
|
||||
# https://github.com/influxdata/telegraf/tree/master/plugins/inputs < all them plugins
|
||||
inputs = {
|
||||
cpu = { };
|
||||
diskio = { };
|
||||
processes = { };
|
||||
system = { };
|
||||
systemd_units = { };
|
||||
ping = [{ urls = [ "10.100.0.1" ]; }]; # actually important to make machine visible over wireguard
|
||||
};
|
||||
};
|
||||
};
|
||||
})
|
||||
];
|
||||
}
|
|
@ -31,8 +31,8 @@
|
|||
};
|
||||
};
|
||||
|
||||
networking.firewall.interfaces.wq0.allowedTCPPorts = [ 8266 ];
|
||||
networking.firewall.interfaces.wq0.allowedUDPPorts = [ 8266 ];
|
||||
networking.firewall.interfaces.wg0.allowedTCPPorts = [ 8266 ];
|
||||
networking.firewall.interfaces.wg0.allowedUDPPorts = [ 8266 ];
|
||||
|
||||
networking.firewall.interfaces.enp0s31f6.allowedTCPPorts = [ 8266 ];
|
||||
networking.firewall.interfaces.enp0s31f6.allowedUDPPorts = [ 8266 ];
|
||||
|
|
|
@ -20,8 +20,8 @@
|
|||
};
|
||||
};
|
||||
|
||||
#networking.firewall.interfaces.wq0.allowedTCPPorts = [ 8266 ];
|
||||
#networking.firewall.interfaces.wq0.allowedUDPPorts = [ 8266 ];
|
||||
#networking.firewall.interfaces.wg0.allowedTCPPorts = [ 8266 ];
|
||||
#networking.firewall.interfaces.wg0.allowedUDPPorts = [ 8266 ];
|
||||
|
||||
#networking.firewall.interfaces.enp0s31f6.allowedTCPPorts = [ 8266 ];
|
||||
#networking.firewall.interfaces.enp0s31f6.allowedUDPPorts = [ 8266 ];
|
||||
|
|
|
@ -16,11 +16,6 @@
|
|||
|
||||
./37c3.nix
|
||||
|
||||
./telemetry/opentelemetry.nix
|
||||
./telemetry/prometheus.nix
|
||||
./telemetry/prometheus-exporter-node.nix
|
||||
./telemetry/telegraf.nix
|
||||
|
||||
];
|
||||
|
||||
|
||||
|
@ -36,6 +31,9 @@
|
|||
components.network.wifi.enable = true;
|
||||
components.terminal.enable = true;
|
||||
|
||||
components.monitor.enable = true;
|
||||
components.monitor.opentelemetry.exporter.endpoint = "10.100.0.1:4317"; # orbi
|
||||
|
||||
home-manager.users.mainUser.home.sessionPath = [ "$HOME/.timewarrior/scripts" ];
|
||||
|
||||
sops.secrets.yubikey_u2fAuthFile = { };
|
||||
|
|
|
@ -1,64 +0,0 @@
|
|||
{ pkgs, config, ... }:
|
||||
{
|
||||
services.opentelemetry-collector = {
|
||||
enable = true;
|
||||
package = pkgs.unstable.opentelemetry-collector-contrib;
|
||||
settings = {
|
||||
# add default tags
|
||||
# todo : make sure we filter out metrics from otlp receivers
|
||||
processors.metricstransform.transforms = [
|
||||
{
|
||||
include = ".*";
|
||||
match_type = "regexp";
|
||||
action = "update";
|
||||
operations = [{
|
||||
action = "add_label";
|
||||
new_label = "server";
|
||||
new_value = config.networking.hostName;
|
||||
}];
|
||||
}
|
||||
];
|
||||
|
||||
receivers = {
|
||||
# scrape opentelemetry-colectors metrics
|
||||
prometheus.config.scrape_configs = [
|
||||
# todo: this should be collected another way (opentelemetry internal?)
|
||||
{
|
||||
job_name = "otelcol";
|
||||
scrape_interval = "10s";
|
||||
static_configs = [{
|
||||
targets = [ "127.0.0.1:8100" ];
|
||||
}];
|
||||
metric_relabel_configs = [
|
||||
{
|
||||
source_labels = [ "__name__" ];
|
||||
regex = ".*grpc_io.*";
|
||||
action = "drop";
|
||||
}
|
||||
];
|
||||
}
|
||||
];
|
||||
};
|
||||
|
||||
# ship to chungus
|
||||
exporters.otlp = {
|
||||
# todo : move this to orbi and route from orbi to chungus
|
||||
endpoint = "10.100.0.2:4317"; # chungus
|
||||
tls.insecure = true;
|
||||
};
|
||||
|
||||
service = {
|
||||
pipelines.metrics = {
|
||||
receivers = [ "prometheus" ];
|
||||
processors = [ "metricstransform" ];
|
||||
exporters = [ "otlp" ];
|
||||
};
|
||||
|
||||
# todo : this should be automatically be collected
|
||||
# open telemetries own metrics?
|
||||
telemetry.metrics.address = "0.0.0.0:8100";
|
||||
};
|
||||
|
||||
};
|
||||
};
|
||||
}
|
|
@ -1,17 +0,0 @@
|
|||
{ config, ... }:
|
||||
{
|
||||
services.opentelemetry-collector.settings.receivers.prometheus.config.scrape_configs = [
|
||||
{
|
||||
job_name = "node-exporter";
|
||||
static_configs = [{
|
||||
targets = [ "127.0.0.1:${toString config.services.prometheus.exporters.node.port}" ];
|
||||
}];
|
||||
}
|
||||
];
|
||||
services.prometheus.exporters.node = {
|
||||
enable = true;
|
||||
enabledCollectors = [ "systemd" ];
|
||||
port = 9002;
|
||||
};
|
||||
|
||||
}
|
|
@ -1,44 +0,0 @@
|
|||
{ config, pkgs, lib, ... }:
|
||||
let
|
||||
prometheus_port = 8090;
|
||||
in
|
||||
{
|
||||
|
||||
services.nginx = {
|
||||
enable = true;
|
||||
statusPage = true;
|
||||
virtualHosts = {
|
||||
"prometheus.${config.networking.hostName}.private" = {
|
||||
extraConfig = ''
|
||||
allow ${config.tinc.private.subnet};
|
||||
deny all;
|
||||
'';
|
||||
locations."/" = { proxyPass = "http://localhost:${toString config.services.prometheus.port}"; };
|
||||
};
|
||||
};
|
||||
};
|
||||
|
||||
services.prometheus = {
|
||||
checkConfig = "syntax-only";
|
||||
enable = true;
|
||||
};
|
||||
|
||||
|
||||
imports = [
|
||||
# open telemetry connection
|
||||
{
|
||||
services.opentelemetry-collector.settings = {
|
||||
exporters.prometheus.endpoint = "127.0.0.1:${toString prometheus_port}";
|
||||
service.pipelines.metrics.exporters = [ "prometheus" ];
|
||||
};
|
||||
services.prometheus.scrapeConfigs = [
|
||||
{
|
||||
job_name = "opentelemetry";
|
||||
metrics_path = "/metrics";
|
||||
scrape_interval = "10s";
|
||||
static_configs = [{ targets = [ "localhost:${toString prometheus_port}" ]; }];
|
||||
}
|
||||
];
|
||||
}
|
||||
];
|
||||
}
|
|
@ -1,33 +0,0 @@
|
|||
{ config, pkgs, ... }:
|
||||
let
|
||||
telegraf_sink = 8088;
|
||||
in
|
||||
{
|
||||
imports = [
|
||||
{
|
||||
services.opentelemetry-collector.settings = {
|
||||
receivers.influxdb.endpoint = "127.0.0.1:${toString telegraf_sink }";
|
||||
service.pipelines.metrics.receivers = [ "influxdb" ];
|
||||
};
|
||||
services.telegraf.extraConfig.outputs.influxdb_v2.urls = [ "http://127.0.0.1:${toString telegraf_sink}" ];
|
||||
}
|
||||
];
|
||||
|
||||
systemd.services.telegraf.path = [ pkgs.inetutils ];
|
||||
|
||||
services.telegraf = {
|
||||
enable = true;
|
||||
extraConfig = {
|
||||
# https://github.com/influxdata/telegraf/tree/master/plugins/inputs < all them plugins
|
||||
inputs = {
|
||||
cpu = { };
|
||||
diskio = { };
|
||||
processes = { };
|
||||
system = { };
|
||||
systemd_units = { };
|
||||
ping = [{ urls = [ "10.100.0.1" ]; }]; # actually important to make pepe visible over wireguard
|
||||
};
|
||||
};
|
||||
};
|
||||
|
||||
}
|
|
@ -39,10 +39,10 @@
|
|||
./loki-promtail.nix
|
||||
./grafana.nix
|
||||
|
||||
./telemetry/opentelemetry.nix
|
||||
./telemetry/prometheus.nix
|
||||
./telemetry/telegraf.nix
|
||||
./telemetry/telegraf-smart.nix
|
||||
./telemetry/telegraf.nix
|
||||
#./telemetry/opentelemetry-hass.nix
|
||||
./telemetry/prometheus.nix
|
||||
|
||||
#./home-display.nix
|
||||
|
||||
|
@ -75,8 +75,12 @@
|
|||
components.network.wifi.enable = false;
|
||||
components.terminal.enable = true;
|
||||
|
||||
services.printing.enable = false;
|
||||
components.monitor.enable = true;
|
||||
networking.firewall.interfaces.wg0.allowedTCPPorts = [ 4317 ];
|
||||
networking.firewall.interfaces.wg0.allowedUDPPorts = [ 4317 ];
|
||||
components.monitor.opentelemetry.receiver.endpoint = "0.0.0.0:4317";
|
||||
|
||||
services.printing.enable = false;
|
||||
|
||||
#virtualisation.containers.storage.settings = {
|
||||
# # fixes: Error: 'overlay' is not supported over zfs, a mount_program is required: backing file system is unsupported for this graph driver
|
||||
|
|
|
@ -11,6 +11,6 @@
|
|||
|
||||
# open for tasmota
|
||||
networking.firewall.interfaces.enp0s31f6.allowedTCPPorts = [ 1883 ];
|
||||
networking.firewall.interfaces.wq0.allowedTCPPorts = [ 1883 ];
|
||||
networking.firewall.interfaces.wg0.allowedTCPPorts = [ 1883 ];
|
||||
|
||||
}
|
||||
|
|
38
nixos/machines/chungus/telemetry/opentelemetry-hass.nix
Normal file
38
nixos/machines/chungus/telemetry/opentelemetry-hass.nix
Normal file
|
@ -0,0 +1,38 @@
|
|||
{ config, ... }:
|
||||
{
|
||||
|
||||
#{
|
||||
# name = "home-assistant";
|
||||
# rules = [
|
||||
# {
|
||||
# record = "home_open_window_sum";
|
||||
# expr = ''sum( homeassistant_binary_sensor_state{entity=~"binary_sensor\\.window_02_contact|binary_sensor\\.window_03_contact|binary_sensor\\.window_04_contact|binary_sensor\\.window_05_contact|binary_sensor\\.window_06_contact|binary_sensor\\.window_07_contact"} )'';
|
||||
# }
|
||||
# ] ++ (map
|
||||
# (number:
|
||||
# {
|
||||
# record = "home_at_least_n_windows_open";
|
||||
# expr = ''home_open_window_sum >= bool ${toString number}'';
|
||||
# labels.n = number;
|
||||
# }) [ 1 2 3 ]);
|
||||
#};
|
||||
|
||||
sops.secrets.hass_long_term_token.owner = "prometheus";
|
||||
|
||||
services.opentelemetry-collector.settings = {
|
||||
service.pipelines.metrics.receivers = [ "prometheus" ];
|
||||
receivers.prometheus.config.scrape_configs = [
|
||||
{
|
||||
# see https://www.home-assistant.io/integrations/prometheus/
|
||||
job_name = "home-assistant";
|
||||
scrape_interval = "60s";
|
||||
metrics_path = "/api/prometheus";
|
||||
bearer_token_file = toString config.sops.secrets.hass_long_term_token.path;
|
||||
static_configs = [{
|
||||
targets = [ "localhost:8123" ];
|
||||
}];
|
||||
}
|
||||
];
|
||||
|
||||
};
|
||||
}
|
|
@ -1,101 +0,0 @@
|
|||
{ pkgs, config, ... }:
|
||||
{
|
||||
|
||||
networking.firewall.interfaces.wg0.allowedTCPPorts = [ 4317 ];
|
||||
networking.firewall.interfaces.wg0.allowedUDPPorts = [ 4317 ];
|
||||
|
||||
services.opentelemetry-collector = {
|
||||
enable = true;
|
||||
package = pkgs.unstable.opentelemetry-collector-contrib;
|
||||
settings = {
|
||||
receivers = {
|
||||
|
||||
# receive metrics from other open telemetry collectors
|
||||
otlp.protocols.grpc.endpoint = "0.0.0.0:4317";
|
||||
|
||||
# provide a influxdb sink
|
||||
influxdb = {
|
||||
endpoint = "127.0.0.1:8088";
|
||||
};
|
||||
|
||||
# scrape opentelemetry-colectors metrics
|
||||
prometheus.config.scrape_configs = [
|
||||
{
|
||||
job_name = "netdata";
|
||||
scrape_interval = "10s";
|
||||
metrics_path = "/api/v1/allmetrics";
|
||||
params.format = [ "prometheus" ];
|
||||
static_configs = [{
|
||||
targets = [ "127.0.0.1:19999" ];
|
||||
labels = {
|
||||
service = "netdata";
|
||||
server = config.networking.hostName;
|
||||
};
|
||||
}];
|
||||
}
|
||||
{
|
||||
job_name = "otelcol";
|
||||
scrape_interval = "10s";
|
||||
static_configs = [{
|
||||
targets = [ "127.0.0.1:8100" ];
|
||||
labels = {
|
||||
service = "otelcol";
|
||||
server = config.networking.hostName;
|
||||
};
|
||||
}];
|
||||
metric_relabel_configs = [
|
||||
{
|
||||
source_labels = [ "__name__" ];
|
||||
regex = ".*grpc_io.*";
|
||||
action = "drop";
|
||||
}
|
||||
];
|
||||
}
|
||||
{
|
||||
job_name = "node";
|
||||
static_configs = [{
|
||||
targets = [ "127.0.0.1:${toString config.services.prometheus.exporters.node.port}" ];
|
||||
labels = {
|
||||
# todo : this is not really needed (right?)
|
||||
service = "node-exporter";
|
||||
# todo : use a processor for this
|
||||
server = config.networking.hostName;
|
||||
};
|
||||
}];
|
||||
}
|
||||
{
|
||||
# see https://www.home-assistant.io/integrations/prometheus/
|
||||
job_name = "home-assistant";
|
||||
scrape_interval = "60s";
|
||||
metrics_path = "/api/prometheus";
|
||||
bearer_token_file = toString config.sops.secrets.hass_long_term_token.path;
|
||||
static_configs = [{
|
||||
targets = [ "localhost:8123" ];
|
||||
labels = {
|
||||
service = "hass";
|
||||
server = config.networking.hostName;
|
||||
};
|
||||
}];
|
||||
}
|
||||
];
|
||||
};
|
||||
|
||||
exporters = {
|
||||
# provide prometheus sink under `/metrics` to
|
||||
prometheus = {
|
||||
endpoint = "127.0.0.1:8090";
|
||||
};
|
||||
};
|
||||
|
||||
service = {
|
||||
pipelines.metrics = {
|
||||
#receivers = [ "otlp" "influxdb" "prometheus" ];
|
||||
receivers = [ "otlp" "influxdb" ];
|
||||
exporters = [ "prometheus" ];
|
||||
};
|
||||
# open telemetries own metrics?
|
||||
telemetry.metrics.address = "0.0.0.0:8100";
|
||||
};
|
||||
};
|
||||
};
|
||||
}
|
|
@ -1,7 +1,5 @@
|
|||
{ config, pkgs, lib, ... }: {
|
||||
|
||||
sops.secrets.hass_long_term_token.owner = "prometheus";
|
||||
|
||||
services.nginx = {
|
||||
enable = true;
|
||||
statusPage = true;
|
||||
|
@ -21,99 +19,5 @@
|
|||
enable = true;
|
||||
# keep data for 30 days
|
||||
extraFlags = [ "--storage.tsdb.retention.time=90d" ];
|
||||
|
||||
ruleFiles = [
|
||||
(pkgs.writeText "prometheus-rules.yml" (builtins.toJSON {
|
||||
groups = [
|
||||
{
|
||||
name = "core";
|
||||
rules = [
|
||||
{
|
||||
alert = "InstanceDown";
|
||||
expr = "up == 0";
|
||||
for = "5m";
|
||||
labels.severity = "page";
|
||||
annotations = {
|
||||
summary = "Instance {{ $labels.instance }} down";
|
||||
description = "{{ $labels.instance }} of job {{ $labels.job }} has been down for more than 5 minutes.";
|
||||
};
|
||||
}
|
||||
];
|
||||
}
|
||||
# todo : move this to open telemetry
|
||||
{
|
||||
name = "home-assistant";
|
||||
rules = [
|
||||
{
|
||||
record = "home_open_window_sum";
|
||||
expr = ''sum( homeassistant_binary_sensor_state{entity=~"binary_sensor\\.window_02_contact|binary_sensor\\.window_03_contact|binary_sensor\\.window_04_contact|binary_sensor\\.window_05_contact|binary_sensor\\.window_06_contact|binary_sensor\\.window_07_contact"} )'';
|
||||
}
|
||||
] ++ (map
|
||||
(number:
|
||||
{
|
||||
record = "home_at_least_n_windows_open";
|
||||
expr = ''home_open_window_sum >= bool ${toString number}'';
|
||||
labels.n = number;
|
||||
}) [ 1 2 3 ]);
|
||||
}
|
||||
];
|
||||
}))
|
||||
];
|
||||
|
||||
exporters = {
|
||||
node = {
|
||||
enable = true;
|
||||
enabledCollectors = [ "systemd" ];
|
||||
port = 9002;
|
||||
};
|
||||
};
|
||||
|
||||
scrapeConfigs = [
|
||||
{
|
||||
job_name = "opentelemetry";
|
||||
metrics_path = "/metrics";
|
||||
scrape_interval = "10s";
|
||||
static_configs = [{ targets = [ "localhost:8090" ]; }];
|
||||
}
|
||||
#{
|
||||
# job_name = "netdata";
|
||||
# metrics_path = "/api/v1/allmetrics";
|
||||
# params.format = [ "prometheus" ];
|
||||
# scrape_interval = "5s";
|
||||
# static_configs = [
|
||||
# {
|
||||
# targets = [ "localhost:19999" ];
|
||||
# labels = {
|
||||
# service = "netdata";
|
||||
# server = config.networking.hostName;
|
||||
# };
|
||||
# }
|
||||
# ];
|
||||
#}
|
||||
#{
|
||||
# job_name = "node";
|
||||
# static_configs = [{
|
||||
# targets = [ "localhost:${toString config.services.prometheus.exporters.node.port}" ];
|
||||
# labels = {
|
||||
# service = "node-exporter";
|
||||
# server = config.networking.hostName;
|
||||
# };
|
||||
# }];
|
||||
#}
|
||||
#{
|
||||
# # see https://www.home-assistant.io/integrations/prometheus/
|
||||
# job_name = "home-assistant";
|
||||
# scrape_interval = "60s";
|
||||
# metrics_path = "/api/prometheus";
|
||||
# bearer_token_file = toString config.sops.secrets.hass_long_term_token.path;
|
||||
# static_configs = [{
|
||||
# targets = [ "localhost:8123" ];
|
||||
# labels = {
|
||||
# service = "hass";
|
||||
# server = config.networking.hostName;
|
||||
# };
|
||||
# }];
|
||||
#}
|
||||
];
|
||||
};
|
||||
}
|
||||
|
|
|
@ -1,15 +1,14 @@
|
|||
{ pkgs, ... }:
|
||||
{
|
||||
|
||||
services.smartd.enable = true;
|
||||
environment.systemPackages = [ pkgs.smartmontools pkgs.nvme-cli ];
|
||||
|
||||
services.telegraf = {
|
||||
enable = true;
|
||||
extraConfig.inputs.smart = {
|
||||
services.telegraf.extraConfig.inputs.smart = {
|
||||
attributes = true;
|
||||
use_sudo = true;
|
||||
};
|
||||
};
|
||||
|
||||
systemd.services.telegraf.path = [ pkgs.smartmontools pkgs.nvme-cli "/run/wrappers" ];
|
||||
|
||||
security.sudo.configFile = ''
|
||||
|
|
|
@ -9,32 +9,12 @@ let
|
|||
{ url = "https://tech.ingolf-wagner.de"; path = ""; }
|
||||
{ url = "https://matrix.ingolf-wagner.de"; path = ""; }
|
||||
];
|
||||
|
||||
in
|
||||
{
|
||||
systemd.services.telegraf.path = [ pkgs.inetutils ];
|
||||
|
||||
services.telegraf = {
|
||||
enable = true;
|
||||
extraConfig = {
|
||||
#outputs.prometheus_client = {
|
||||
# listen = ":9273";
|
||||
# metric_version = 2;
|
||||
#};
|
||||
outputs.influxdb_v2 = {
|
||||
urls = [ "http://127.0.0.1:8088" ];
|
||||
};
|
||||
|
||||
global_tags = {
|
||||
service = "telegraf";
|
||||
server = config.networking.hostName;
|
||||
};
|
||||
|
||||
# https://github.com/influxdata/telegraf/tree/master/plugins/inputs < all them plugins
|
||||
inputs = {
|
||||
cpu = { };
|
||||
diskio = { };
|
||||
smart.attributes = true;
|
||||
x509_cert = [{
|
||||
sources = (map (url: "${url.url}:443") urls);
|
||||
interval = "30m"; # agent.interval = "10s" is default
|
||||
|
@ -42,50 +22,10 @@ in
|
|||
http_response =
|
||||
let fullUrls = map ({ url, path }: "${url}/${path}") urls;
|
||||
in [{ urls = fullUrls; }];
|
||||
processes = { };
|
||||
system = { };
|
||||
systemd_units = { };
|
||||
internet_speed.interval = "10m";
|
||||
nginx.urls = [ "http://localhost/nginx_status" ];
|
||||
ping = [{ urls = [ "10.100.0.1" ]; }]; # actually important to make pepe visible over wireguard
|
||||
};
|
||||
};
|
||||
};
|
||||
|
||||
# todo : do this prometheus
|
||||
services.prometheus.ruleFiles = [
|
||||
(pkgs.writeText "telegraf.yml" (builtins.toJSON {
|
||||
groups = [
|
||||
{
|
||||
name = "telegraf";
|
||||
rules = [
|
||||
{
|
||||
alert = "HttpResponseNotOk";
|
||||
expr = "0 * (http_response_http_response_code != 200) + 1";
|
||||
for = "5m";
|
||||
labels.severity = "page";
|
||||
annotations = {
|
||||
summary = "{{ $labels.exported_server }} does not return Ok";
|
||||
description = "{{ $labels.exported_server }} does not return Ok for more than 5 minutes";
|
||||
};
|
||||
}
|
||||
{
|
||||
alert = "CertificatExpires";
|
||||
expr = ''x509_cert_expiry{issuer_common_name="R3"} < ${toString (60 * 60 * 24 * 5)}'';
|
||||
for = "1d";
|
||||
labels.severity = "page";
|
||||
annotations = {
|
||||
summary = "{{ $labels.san }} does Expire Soon";
|
||||
description = "{{ $labels.san }} does expire in less than 5 days";
|
||||
};
|
||||
}
|
||||
];
|
||||
}
|
||||
];
|
||||
}))
|
||||
];
|
||||
|
||||
|
||||
|
||||
|
||||
}
|
||||
|
|
|
@ -37,6 +37,9 @@
|
|||
components.network.wifi.enable = true;
|
||||
components.terminal.enable = true;
|
||||
|
||||
components.monitor.enable = true;
|
||||
components.monitor.opentelemetry.exporter.endpoint = "10.100.0.1:4317"; # orbi
|
||||
|
||||
home-manager.users.mainUser.home.sessionPath = [ "$HOME/.timewarrior/scripts" ];
|
||||
|
||||
sops.secrets.yubikey_u2fAuthFile = { };
|
||||
|
|
|
@ -41,9 +41,6 @@
|
|||
#./loki.nix
|
||||
#./loki-promtail.nix
|
||||
#./grafana.nix
|
||||
./telemetry/telegraf.nix
|
||||
./telemetry/opentelemetry.nix
|
||||
#./telemetry/prometheus.nix
|
||||
|
||||
#./cache.nix
|
||||
];
|
||||
|
@ -57,6 +54,12 @@
|
|||
components.network.nginx.landingpage.enable = false;
|
||||
components.network.wifi.enable = false;
|
||||
|
||||
components.monitor.enable = true;
|
||||
networking.firewall.interfaces.wg0.allowedTCPPorts = [ 4317 ];
|
||||
networking.firewall.interfaces.wg0.allowedUDPPorts = [ 4317 ];
|
||||
components.monitor.opentelemetry.receiver.endpoint = "0.0.0.0:4317";
|
||||
components.monitor.opentelemetry.exporter.endpoint = "10.100.0.2:4317"; # chnungus
|
||||
|
||||
security.acme.acceptTerms = true;
|
||||
security.acme.defaults.email = "contact@ingolf-wagner.de";
|
||||
|
||||
|
|
|
@ -34,8 +34,8 @@
|
|||
};
|
||||
};
|
||||
|
||||
#networking.firewall.interfaces.wq0.allowedTCPPorts = [ 8266 ];
|
||||
#networking.firewall.interfaces.wq0.allowedUDPPorts = [ 8266 ];
|
||||
#networking.firewall.interfaces.wg0.allowedTCPPorts = [ 8266 ];
|
||||
#networking.firewall.interfaces.wg0.allowedUDPPorts = [ 8266 ];
|
||||
|
||||
#networking.firewall.interfaces.enp0s31f6.allowedTCPPorts = [ 8266 ];
|
||||
#networking.firewall.interfaces.enp0s31f6.allowedUDPPorts = [ 8266 ];
|
||||
|
|
|
@ -20,8 +20,8 @@
|
|||
};
|
||||
};
|
||||
|
||||
#networking.firewall.interfaces.wq0.allowedTCPPorts = [ 8266 ];
|
||||
#networking.firewall.interfaces.wq0.allowedUDPPorts = [ 8266 ];
|
||||
#networking.firewall.interfaces.wg0.allowedTCPPorts = [ 8266 ];
|
||||
#networking.firewall.interfaces.wg0.allowedUDPPorts = [ 8266 ];
|
||||
|
||||
#networking.firewall.interfaces.enp0s31f6.allowedTCPPorts = [ 8266 ];
|
||||
#networking.firewall.interfaces.enp0s31f6.allowedUDPPorts = [ 8266 ];
|
||||
|
|
|
@ -1,51 +0,0 @@
|
|||
{ pkgs, ... }:
|
||||
{
|
||||
services.opentelemetry-collector = {
|
||||
enable = true;
|
||||
package = pkgs.unstable.opentelemetry-collector-contrib;
|
||||
settings = {
|
||||
receivers = {
|
||||
# provide a influxdb sink
|
||||
influxdb = {
|
||||
endpoint = "127.0.0.1:8088";
|
||||
};
|
||||
# scrape opentelemetry-colectors metrics
|
||||
prometheus.config.scrape_configs = [
|
||||
{
|
||||
job_name = "netdata";
|
||||
scrape_interval = "10s";
|
||||
metrics_path = "/api/v1/allmetrics";
|
||||
params.format = [ "prometheus" ];
|
||||
static_configs = [{ targets = [ "127.0.0.1:19999" ]; }];
|
||||
}
|
||||
{
|
||||
job_name = "otelcol";
|
||||
scrape_interval = "10s";
|
||||
static_configs = [{ targets = [ "127.0.0.1:8100" ]; }];
|
||||
metric_relabel_configs = [
|
||||
{
|
||||
source_labels = [ "__name__" ];
|
||||
regex = ".*grpc_io.*";
|
||||
action = "drop";
|
||||
}
|
||||
];
|
||||
}
|
||||
];
|
||||
};
|
||||
exporters = {
|
||||
# provide prometheus sink under `/metrics` to
|
||||
prometheus = {
|
||||
endpoint = "127.0.0.1:8090";
|
||||
};
|
||||
};
|
||||
service = {
|
||||
pipelines.metrics = {
|
||||
receivers = [ "influxdb" "prometheus" ];
|
||||
exporters = [ "prometheus" ];
|
||||
};
|
||||
# open telemetries own metrics?
|
||||
telemetry.metrics.address = "0.0.0.0:8100";
|
||||
};
|
||||
};
|
||||
};
|
||||
}
|
|
@ -1,70 +0,0 @@
|
|||
{ config, pkgs, lib, ... }: {
|
||||
|
||||
services.nginx = {
|
||||
enable = true;
|
||||
statusPage = true;
|
||||
virtualHosts = {
|
||||
"prometheus.robi.private" = {
|
||||
extraConfig = ''
|
||||
allow ${config.tinc.private.subnet};
|
||||
deny all;
|
||||
'';
|
||||
locations."/" = { proxyPass = "http://localhost:${toString config.services.prometheus.port}"; };
|
||||
};
|
||||
};
|
||||
};
|
||||
|
||||
services.prometheus = {
|
||||
enable = true;
|
||||
# keep data for 30 days
|
||||
extraFlags = [ "--storage.tsdb.retention.time=30d" ];
|
||||
|
||||
exporters = {
|
||||
node = {
|
||||
enable = true;
|
||||
enabledCollectors = [ "systemd" ];
|
||||
port = 9002;
|
||||
};
|
||||
};
|
||||
|
||||
scrapeConfigs = [
|
||||
{
|
||||
job_name = "netdata";
|
||||
metrics_path = "/api/v1/allmetrics";
|
||||
params.format = [ "prometheus" ];
|
||||
scrape_interval = "5s";
|
||||
static_configs = [
|
||||
{
|
||||
targets = [ "localhost:19999" ];
|
||||
labels = {
|
||||
service = "netdata";
|
||||
server = "robi";
|
||||
};
|
||||
}
|
||||
];
|
||||
}
|
||||
{
|
||||
job_name = "systemd";
|
||||
static_configs = [{
|
||||
targets = [ "localhost:${toString config.services.prometheus.exporters.node.port}" ];
|
||||
labels = {
|
||||
service = "node-exporter";
|
||||
server = "robi";
|
||||
};
|
||||
}];
|
||||
}
|
||||
{
|
||||
# see https://www.home-assistant.io/integrations/prometheus/
|
||||
job_name = "telgraf";
|
||||
metrics_path = "/metrics";
|
||||
static_configs = [{
|
||||
targets = [ "localhost:9273" ];
|
||||
labels = {
|
||||
service = "telegraf";
|
||||
server = "robi";
|
||||
};
|
||||
}];
|
||||
}
|
||||
];
|
||||
};
|
||||
}
|
|
@ -1,33 +0,0 @@
|
|||
{
|
||||
services.telegraf = {
|
||||
enable = true;
|
||||
extraConfig = {
|
||||
outputs = {
|
||||
prometheus_client = {
|
||||
listen = ":9273";
|
||||
metric_version = 2;
|
||||
};
|
||||
influxdb_v2 = {
|
||||
urls = [ "http://127.0.0.1:8088" ];
|
||||
};
|
||||
};
|
||||
# https://github.com/influxdata/telegraf/tree/master/plugins/inputs < all them plugins
|
||||
inputs = {
|
||||
cpu = {
|
||||
percpu = true;
|
||||
totalcpu = true;
|
||||
};
|
||||
disk = { };
|
||||
diskio = { };
|
||||
kernel = { };
|
||||
mem = { };
|
||||
processes = { };
|
||||
netstat = { };
|
||||
net = { };
|
||||
system = { };
|
||||
systemd_units = { };
|
||||
nginx.urls = [ "http://localhost/nginx_status" ];
|
||||
};
|
||||
};
|
||||
};
|
||||
}
|
|
@ -34,8 +34,8 @@
|
|||
};
|
||||
};
|
||||
|
||||
#networking.firewall.interfaces.wq0.allowedTCPPorts = [ 8266 ];
|
||||
#networking.firewall.interfaces.wq0.allowedUDPPorts = [ 8266 ];
|
||||
#networking.firewall.interfaces.wg0.allowedTCPPorts = [ 8266 ];
|
||||
#networking.firewall.interfaces.wg0.allowedUDPPorts = [ 8266 ];
|
||||
|
||||
#networking.firewall.interfaces.enp0s31f6.allowedTCPPorts = [ 8266 ];
|
||||
#networking.firewall.interfaces.enp0s31f6.allowedUDPPorts = [ 8266 ];
|
||||
|
|
Loading…
Reference in a new issue