diff options
Diffstat (limited to 'infra/libkookie/nixpkgs/nixos/modules/services/monitoring/prometheus/default.nix')
-rw-r--r-- | infra/libkookie/nixpkgs/nixos/modules/services/monitoring/prometheus/default.nix | 722 |
1 files changed, 722 insertions, 0 deletions
diff --git a/infra/libkookie/nixpkgs/nixos/modules/services/monitoring/prometheus/default.nix b/infra/libkookie/nixpkgs/nixos/modules/services/monitoring/prometheus/default.nix new file mode 100644 index 000000000000..72428957109c --- /dev/null +++ b/infra/libkookie/nixpkgs/nixos/modules/services/monitoring/prometheus/default.nix @@ -0,0 +1,722 @@ +{ config, pkgs, lib, ... }: + +with lib; + +let + cfg = config.services.prometheus; + + workingDir = "/var/lib/" + cfg.stateDir; + + # a wrapper that verifies that the configuration is valid + promtoolCheck = what: name: file: + if cfg.checkConfig then + pkgs.runCommandNoCCLocal + "${name}-${replaceStrings [" "] [""] what}-checked" + { buildInputs = [ cfg.package ]; } '' + ln -s ${file} $out + promtool ${what} $out + '' else file; + + # Pretty-print JSON to a file + writePrettyJSON = name: x: + pkgs.runCommandNoCCLocal name {} '' + echo '${builtins.toJSON x}' | ${pkgs.jq}/bin/jq . > $out + ''; + + generatedPrometheusYml = writePrettyJSON "prometheus.yml" promConfig; + + # This becomes the main config file for Prometheus + promConfig = { + global = filterValidPrometheus cfg.globalConfig; + rule_files = map (promtoolCheck "check rules" "rules") (cfg.ruleFiles ++ [ + (pkgs.writeText "prometheus.rules" (concatStringsSep "\n" cfg.rules)) + ]); + scrape_configs = filterValidPrometheus cfg.scrapeConfigs; + alerting = { + inherit (cfg) alertmanagers; + }; + }; + + prometheusYml = let + yml = if cfg.configText != null then + pkgs.writeText "prometheus.yml" cfg.configText + else generatedPrometheusYml; + in promtoolCheck "check config" "prometheus.yml" yml; + + cmdlineArgs = cfg.extraFlags ++ [ + "--storage.tsdb.path=${workingDir}/data/" + "--config.file=/run/prometheus/prometheus-substituted.yaml" + "--web.listen-address=${cfg.listenAddress}:${builtins.toString cfg.port}" + "--alertmanager.notification-queue-capacity=${toString cfg.alertmanagerNotificationQueueCapacity}" + "--alertmanager.timeout=${toString cfg.alertmanagerTimeout}s" + ] ++ optional (cfg.webExternalUrl != null) "--web.external-url=${cfg.webExternalUrl}" + ++ optional (cfg.retentionTime != null) "--storage.tsdb.retention.time=${cfg.retentionTime}"; + + filterValidPrometheus = filterAttrsListRecursive (n: v: !(n == "_module" || v == null)); + filterAttrsListRecursive = pred: x: + if isAttrs x then + listToAttrs ( + concatMap (name: + let v = x.${name}; in + if pred name v then [ + (nameValuePair name (filterAttrsListRecursive pred v)) + ] else [] + ) (attrNames x) + ) + else if isList x then + map (filterAttrsListRecursive pred) x + else x; + + mkDefOpt = type : defaultStr : description : mkOpt type (description + '' + + Defaults to <literal>${defaultStr}</literal> in prometheus + when set to <literal>null</literal>. + ''); + + mkOpt = type : description : mkOption { + type = types.nullOr type; + default = null; + inherit description; + }; + + promTypes.globalConfig = types.submodule { + options = { + scrape_interval = mkDefOpt types.str "1m" '' + How frequently to scrape targets by default. + ''; + + scrape_timeout = mkDefOpt types.str "10s" '' + How long until a scrape request times out. + ''; + + evaluation_interval = mkDefOpt types.str "1m" '' + How frequently to evaluate rules by default. + ''; + + external_labels = mkOpt (types.attrsOf types.str) '' + The labels to add to any time series or alerts when + communicating with external systems (federation, remote + storage, Alertmanager). + ''; + }; + }; + + promTypes.scrape_config = types.submodule { + options = { + job_name = mkOption { + type = types.str; + description = '' + The job name assigned to scraped metrics by default. + ''; + }; + scrape_interval = mkOpt types.str '' + How frequently to scrape targets from this job. Defaults to the + globally configured default. + ''; + + scrape_timeout = mkOpt types.str '' + Per-target timeout when scraping this job. Defaults to the + globally configured default. + ''; + + metrics_path = mkDefOpt types.str "/metrics" '' + The HTTP resource path on which to fetch metrics from targets. + ''; + + honor_labels = mkDefOpt types.bool "false" '' + Controls how Prometheus handles conflicts between labels + that are already present in scraped data and labels that + Prometheus would attach server-side ("job" and "instance" + labels, manually configured target labels, and labels + generated by service discovery implementations). + + If honor_labels is set to "true", label conflicts are + resolved by keeping label values from the scraped data and + ignoring the conflicting server-side labels. + + If honor_labels is set to "false", label conflicts are + resolved by renaming conflicting labels in the scraped data + to "exported_<original-label>" (for example + "exported_instance", "exported_job") and then attaching + server-side labels. This is useful for use cases such as + federation, where all labels specified in the target should + be preserved. + ''; + + honor_timestamps = mkDefOpt types.bool "true" '' + honor_timestamps controls whether Prometheus respects the timestamps present + in scraped data. + + If honor_timestamps is set to <literal>true</literal>, the timestamps of the metrics exposed + by the target will be used. + + If honor_timestamps is set to <literal>false</literal>, the timestamps of the metrics exposed + by the target will be ignored. + ''; + + scheme = mkDefOpt (types.enum ["http" "https"]) "http" '' + The URL scheme with which to fetch metrics from targets. + ''; + + params = mkOpt (types.attrsOf (types.listOf types.str)) '' + Optional HTTP URL parameters. + ''; + + basic_auth = mkOpt (types.submodule { + options = { + username = mkOption { + type = types.str; + description = '' + HTTP username + ''; + }; + password = mkOption { + type = types.str; + description = '' + HTTP password + ''; + }; + }; + }) '' + Optional http login credentials for metrics scraping. + ''; + + bearer_token = mkOpt types.str '' + Sets the `Authorization` header on every scrape request with + the configured bearer token. It is mutually exclusive with + <option>bearer_token_file</option>. + ''; + + bearer_token_file = mkOpt types.str '' + Sets the `Authorization` header on every scrape request with + the bearer token read from the configured file. It is mutually + exclusive with <option>bearer_token</option>. + ''; + + tls_config = mkOpt promTypes.tls_config '' + Configures the scrape request's TLS settings. + ''; + + proxy_url = mkOpt types.str '' + Optional proxy URL. + ''; + + ec2_sd_configs = mkOpt (types.listOf promTypes.ec2_sd_config) '' + List of EC2 service discovery configurations. + ''; + + dns_sd_configs = mkOpt (types.listOf promTypes.dns_sd_config) '' + List of DNS service discovery configurations. + ''; + + consul_sd_configs = mkOpt (types.listOf promTypes.consul_sd_config) '' + List of Consul service discovery configurations. + ''; + + file_sd_configs = mkOpt (types.listOf promTypes.file_sd_config) '' + List of file service discovery configurations. + ''; + + static_configs = mkOpt (types.listOf promTypes.static_config) '' + List of labeled target groups for this job. + ''; + + relabel_configs = mkOpt (types.listOf promTypes.relabel_config) '' + List of relabel configurations. + ''; + + sample_limit = mkDefOpt types.int "0" '' + Per-scrape limit on number of scraped samples that will be accepted. + If more than this number of samples are present after metric relabelling + the entire scrape will be treated as failed. 0 means no limit. + ''; + }; + }; + + promTypes.static_config = types.submodule { + options = { + targets = mkOption { + type = types.listOf types.str; + description = '' + The targets specified by the target group. + ''; + }; + labels = mkOption { + type = types.attrsOf types.str; + default = {}; + description = '' + Labels assigned to all metrics scraped from the targets. + ''; + }; + }; + }; + + promTypes.ec2_sd_config = types.submodule { + options = { + region = mkOption { + type = types.str; + description = '' + The AWS Region. + ''; + }; + endpoint = mkOpt types.str '' + Custom endpoint to be used. + ''; + + access_key = mkOpt types.str '' + The AWS API key id. If blank, the environment variable + <literal>AWS_ACCESS_KEY_ID</literal> is used. + ''; + + secret_key = mkOpt types.str '' + The AWS API key secret. If blank, the environment variable + <literal>AWS_SECRET_ACCESS_KEY</literal> is used. + ''; + + profile = mkOpt types.str '' + Named AWS profile used to connect to the API. + ''; + + role_arn = mkOpt types.str '' + AWS Role ARN, an alternative to using AWS API keys. + ''; + + refresh_interval = mkDefOpt types.str "60s" '' + Refresh interval to re-read the instance list. + ''; + + port = mkDefOpt types.int "80" '' + The port to scrape metrics from. If using the public IP + address, this must instead be specified in the relabeling + rule. + ''; + + filters = mkOpt (types.listOf promTypes.filter) '' + Filters can be used optionally to filter the instance list by other criteria. + ''; + }; + }; + + promTypes.filter = types.submodule { + options = { + name = mkOption { + type = types.str; + description = '' + See <link xlink:href="https://docs.aws.amazon.com/AWSEC2/latest/APIReference/API_DescribeInstances.html">this list</link> + for the available filters. + ''; + }; + + value = mkOption { + type = types.listOf types.str; + default = []; + description = '' + Value of the filter. + ''; + }; + }; + }; + + promTypes.dns_sd_config = types.submodule { + options = { + names = mkOption { + type = types.listOf types.str; + description = '' + A list of DNS SRV record names to be queried. + ''; + }; + + refresh_interval = mkDefOpt types.str "30s" '' + The time after which the provided names are refreshed. + ''; + }; + }; + + promTypes.consul_sd_config = types.submodule { + options = { + server = mkDefOpt types.str "localhost:8500" '' + Consul server to query. + ''; + + token = mkOpt types.str "Consul token"; + + datacenter = mkOpt types.str "Consul datacenter"; + + scheme = mkDefOpt types.str "http" "Consul scheme"; + + username = mkOpt types.str "Consul username"; + + password = mkOpt types.str "Consul password"; + + tls_config = mkOpt promTypes.tls_config '' + Configures the Consul request's TLS settings. + ''; + + services = mkOpt (types.listOf types.str) '' + A list of services for which targets are retrieved. + ''; + + tags = mkOpt (types.listOf types.str) '' + An optional list of tags used to filter nodes for a given + service. Services must contain all tags in the list. + ''; + + node_meta = mkOpt (types.attrsOf types.str) '' + Node metadata used to filter nodes for a given service. + ''; + + tag_separator = mkDefOpt types.str "," '' + The string by which Consul tags are joined into the tag label. + ''; + + allow_stale = mkOpt types.bool '' + Allow stale Consul results + (see <link xlink:href="https://www.consul.io/api/index.html#consistency-modes"/>). + + Will reduce load on Consul. + ''; + + refresh_interval = mkDefOpt types.str "30s" '' + The time after which the provided names are refreshed. + + On large setup it might be a good idea to increase this value + because the catalog will change all the time. + ''; + }; + }; + + promTypes.file_sd_config = types.submodule { + options = { + files = mkOption { + type = types.listOf types.str; + description = '' + Patterns for files from which target groups are extracted. Refer + to the Prometheus documentation for permitted filename patterns + and formats. + ''; + }; + + refresh_interval = mkDefOpt types.str "5m" '' + Refresh interval to re-read the files. + ''; + }; + }; + + promTypes.relabel_config = types.submodule { + options = { + source_labels = mkOpt (types.listOf types.str) '' + The source labels select values from existing labels. Their content + is concatenated using the configured separator and matched against + the configured regular expression. + ''; + + separator = mkDefOpt types.str ";" '' + Separator placed between concatenated source label values. + ''; + + target_label = mkOpt types.str '' + Label to which the resulting value is written in a replace action. + It is mandatory for replace actions. + ''; + + regex = mkDefOpt types.str "(.*)" '' + Regular expression against which the extracted value is matched. + ''; + + modulus = mkOpt types.int '' + Modulus to take of the hash of the source label values. + ''; + + replacement = mkDefOpt types.str "$1" '' + Replacement value against which a regex replace is performed if the + regular expression matches. + ''; + + action = mkDefOpt (types.enum ["replace" "keep" "drop"]) "replace" '' + Action to perform based on regex matching. + ''; + + }; + }; + + promTypes.tls_config = types.submodule { + options = { + ca_file = mkOpt types.str '' + CA certificate to validate API server certificate with. + ''; + + cert_file = mkOpt types.str '' + Certificate file for client cert authentication to the server. + ''; + + key_file = mkOpt types.str '' + Key file for client cert authentication to the server. + ''; + + server_name = mkOpt types.str '' + ServerName extension to indicate the name of the server. + http://tools.ietf.org/html/rfc4366#section-3.1 + ''; + + insecure_skip_verify = mkOpt types.bool '' + Disable validation of the server certificate. + ''; + }; + }; + +in { + + imports = [ + (mkRenamedOptionModule [ "services" "prometheus2" ] [ "services" "prometheus" ]) + ]; + + options.services.prometheus = { + + enable = mkOption { + type = types.bool; + default = false; + description = '' + Enable the Prometheus monitoring daemon. + ''; + }; + + package = mkOption { + type = types.package; + default = pkgs.prometheus; + defaultText = "pkgs.prometheus"; + description = '' + The prometheus package that should be used. + ''; + }; + + port = mkOption { + type = types.port; + default = 9090; + description = '' + Port to listen on. + ''; + }; + + listenAddress = mkOption { + type = types.str; + default = "0.0.0.0"; + description = '' + Address to listen on for the web interface, API, and telemetry. + ''; + }; + + stateDir = mkOption { + type = types.str; + default = "prometheus2"; + description = '' + Directory below <literal>/var/lib</literal> to store Prometheus metrics data. + This directory will be created automatically using systemd's StateDirectory mechanism. + ''; + }; + + extraFlags = mkOption { + type = types.listOf types.str; + default = []; + description = '' + Extra commandline options when launching Prometheus. + ''; + }; + + environmentFile = mkOption { + type = types.nullOr types.path; + default = null; + example = "/root/prometheus.env"; + description = '' + Environment file as defined in <citerefentry> + <refentrytitle>systemd.exec</refentrytitle><manvolnum>5</manvolnum> + </citerefentry>. + + Secrets may be passed to the service without adding them to the + world-readable Nix store, by specifying placeholder variables as + the option value in Nix and setting these variables accordingly in the + environment file. + + Environment variables from this file will be interpolated into the + config file using envsubst with this syntax: + <literal>$ENVIRONMENT ''${VARIABLE}</literal> + + <programlisting> + # Example scrape config entry handling an OAuth bearer token + { + job_name = "home_assistant"; + metrics_path = "/api/prometheus"; + scheme = "https"; + bearer_token = "\''${HOME_ASSISTANT_BEARER_TOKEN}"; + [...] + } + </programlisting> + + <programlisting> + # Content of the environment file + HOME_ASSISTANT_BEARER_TOKEN=someoauthbearertoken + </programlisting> + + Note that this file needs to be available on the host on which + <literal>Prometheus</literal> is running. + ''; + }; + + configText = mkOption { + type = types.nullOr types.lines; + default = null; + description = '' + If non-null, this option defines the text that is written to + prometheus.yml. If null, the contents of prometheus.yml is generated + from the structured config options. + ''; + }; + + globalConfig = mkOption { + type = promTypes.globalConfig; + default = {}; + description = '' + Parameters that are valid in all configuration contexts. They + also serve as defaults for other configuration sections + ''; + }; + + rules = mkOption { + type = types.listOf types.str; + default = []; + description = '' + Alerting and/or Recording rules to evaluate at runtime. + ''; + }; + + ruleFiles = mkOption { + type = types.listOf types.path; + default = []; + description = '' + Any additional rules files to include in this configuration. + ''; + }; + + scrapeConfigs = mkOption { + type = types.listOf promTypes.scrape_config; + default = []; + description = '' + A list of scrape configurations. + ''; + }; + + alertmanagers = mkOption { + type = types.listOf types.attrs; + example = literalExample '' + [ { + scheme = "https"; + path_prefix = "/alertmanager"; + static_configs = [ { + targets = [ + "prometheus.domain.tld" + ]; + } ]; + } ] + ''; + default = []; + description = '' + A list of alertmanagers to send alerts to. + See <link xlink:href="https://prometheus.io/docs/prometheus/latest/configuration/configuration/#alertmanager_config">the official documentation</link> for more information. + ''; + }; + + alertmanagerNotificationQueueCapacity = mkOption { + type = types.int; + default = 10000; + description = '' + The capacity of the queue for pending alert manager notifications. + ''; + }; + + alertmanagerTimeout = mkOption { + type = types.int; + default = 10; + description = '' + Alert manager HTTP API timeout (in seconds). + ''; + }; + + webExternalUrl = mkOption { + type = types.nullOr types.str; + default = null; + example = "https://example.com/"; + description = '' + The URL under which Prometheus is externally reachable (for example, + if Prometheus is served via a reverse proxy). + ''; + }; + + checkConfig = mkOption { + type = types.bool; + default = true; + description = '' + Check configuration with <literal>promtool + check</literal>. The call to <literal>promtool</literal> is + subject to sandboxing by Nix. When credentials are stored in + external files (<literal>password_file</literal>, + <literal>bearer_token_file</literal>, etc), they will not be + visible to <literal>promtool</literal> and it will report + errors, despite a correct configuration. + ''; + }; + + retentionTime = mkOption { + type = types.nullOr types.str; + default = null; + example = "15d"; + description = '' + How long to retain samples in storage. + ''; + }; + }; + + config = mkIf cfg.enable { + assertions = [ + ( let + # Match something with dots (an IPv4 address) or something ending in + # a square bracket (an IPv6 addresses) followed by a port number. + legacy = builtins.match "(.*\\..*|.*]):([[:digit:]]+)" cfg.listenAddress; + in { + assertion = legacy == null; + message = '' + Do not specify the port for Prometheus to listen on in the + listenAddress option; use the port option instead: + services.prometheus.listenAddress = ${builtins.elemAt legacy 0}; + services.prometheus.port = ${builtins.elemAt legacy 1}; + ''; + } + ) + ]; + + users.groups.prometheus.gid = config.ids.gids.prometheus; + users.users.prometheus = { + description = "Prometheus daemon user"; + uid = config.ids.uids.prometheus; + group = "prometheus"; + }; + systemd.services.prometheus = { + wantedBy = [ "multi-user.target" ]; + after = [ "network.target" ]; + preStart = '' + ${lib.getBin pkgs.envsubst}/bin/envsubst -o "/run/prometheus/prometheus-substituted.yaml" \ + -i "${prometheusYml}" + ''; + serviceConfig = { + ExecStart = "${cfg.package}/bin/prometheus" + + optionalString (length cmdlineArgs != 0) (" \\\n " + + concatStringsSep " \\\n " cmdlineArgs); + User = "prometheus"; + Restart = "always"; + EnvironmentFile = mkIf (cfg.environmentFile != null) [ cfg.environmentFile ]; + RuntimeDirectory = "prometheus"; + RuntimeDirectoryMode = "0700"; + WorkingDirectory = workingDir; + StateDirectory = cfg.stateDir; + }; + }; + }; +} |