aboutsummaryrefslogtreecommitdiff
path: root/nixpkgs/nixos/modules/security/systemd-confinement.nix
diff options
context:
space:
mode:
Diffstat (limited to 'nixpkgs/nixos/modules/security/systemd-confinement.nix')
-rw-r--r--nixpkgs/nixos/modules/security/systemd-confinement.nix199
1 files changed, 199 insertions, 0 deletions
diff --git a/nixpkgs/nixos/modules/security/systemd-confinement.nix b/nixpkgs/nixos/modules/security/systemd-confinement.nix
new file mode 100644
index 00000000000..cd4eb81dbe1
--- /dev/null
+++ b/nixpkgs/nixos/modules/security/systemd-confinement.nix
@@ -0,0 +1,199 @@
+{ config, pkgs, lib, ... }:
+
+let
+ toplevelConfig = config;
+ inherit (lib) types;
+ inherit (import ../system/boot/systemd-lib.nix {
+ inherit config pkgs lib;
+ }) mkPathSafeName;
+in {
+ options.systemd.services = lib.mkOption {
+ type = types.attrsOf (types.submodule ({ name, config, ... }: {
+ options.confinement.enable = lib.mkOption {
+ type = types.bool;
+ default = false;
+ description = ''
+ If set, all the required runtime store paths for this service are
+ bind-mounted into a <literal>tmpfs</literal>-based <citerefentry>
+ <refentrytitle>chroot</refentrytitle>
+ <manvolnum>2</manvolnum>
+ </citerefentry>.
+ '';
+ };
+
+ options.confinement.fullUnit = lib.mkOption {
+ type = types.bool;
+ default = false;
+ description = ''
+ Whether to include the full closure of the systemd unit file into the
+ chroot, instead of just the dependencies for the executables.
+
+ <warning><para>While it may be tempting to just enable this option to
+ make things work quickly, please be aware that this might add paths
+ to the closure of the chroot that you didn't anticipate. It's better
+ to use <option>confinement.packages</option> to <emphasis
+ role="strong">explicitly</emphasis> add additional store paths to the
+ chroot.</para></warning>
+ '';
+ };
+
+ options.confinement.packages = lib.mkOption {
+ type = types.listOf (types.either types.str types.package);
+ default = [];
+ description = let
+ mkScOption = optName: "<option>serviceConfig.${optName}</option>";
+ in ''
+ Additional packages or strings with context to add to the closure of
+ the chroot. By default, this includes all the packages from the
+ ${lib.concatMapStringsSep ", " mkScOption [
+ "ExecReload" "ExecStartPost" "ExecStartPre" "ExecStop"
+ "ExecStopPost"
+ ]} and ${mkScOption "ExecStart"} options. If you want to have all the
+ dependencies of this systemd unit, you can use
+ <option>confinement.fullUnit</option>.
+
+ <note><para>The store paths listed in <option>path</option> are
+ <emphasis role="strong">not</emphasis> included in the closure as
+ well as paths from other options except those listed
+ above.</para></note>
+ '';
+ };
+
+ options.confinement.binSh = lib.mkOption {
+ type = types.nullOr types.path;
+ default = toplevelConfig.environment.binsh;
+ defaultText = "config.environment.binsh";
+ example = lib.literalExample "\${pkgs.dash}/bin/dash";
+ description = ''
+ The program to make available as <filename>/bin/sh</filename> inside
+ the chroot. If this is set to <literal>null</literal>, no
+ <filename>/bin/sh</filename> is provided at all.
+
+ This is useful for some applications, which for example use the
+ <citerefentry>
+ <refentrytitle>system</refentrytitle>
+ <manvolnum>3</manvolnum>
+ </citerefentry> library function to execute commands.
+ '';
+ };
+
+ options.confinement.mode = lib.mkOption {
+ type = types.enum [ "full-apivfs" "chroot-only" ];
+ default = "full-apivfs";
+ description = ''
+ The value <literal>full-apivfs</literal> (the default) sets up
+ private <filename class="directory">/dev</filename>, <filename
+ class="directory">/proc</filename>, <filename
+ class="directory">/sys</filename> and <filename
+ class="directory">/tmp</filename> file systems in a separate user
+ name space.
+
+ If this is set to <literal>chroot-only</literal>, only the file
+ system name space is set up along with the call to <citerefentry>
+ <refentrytitle>chroot</refentrytitle>
+ <manvolnum>2</manvolnum>
+ </citerefentry>.
+
+ <note><para>This doesn't cover network namespaces and is solely for
+ file system level isolation.</para></note>
+ '';
+ };
+
+ config = let
+ rootName = "${mkPathSafeName name}-chroot";
+ inherit (config.confinement) binSh fullUnit;
+ wantsAPIVFS = lib.mkDefault (config.confinement.mode == "full-apivfs");
+ in lib.mkIf config.confinement.enable {
+ serviceConfig = {
+ RootDirectory = pkgs.runCommand rootName {} "mkdir \"$out\"";
+ TemporaryFileSystem = "/";
+ PrivateMounts = lib.mkDefault true;
+
+ # https://github.com/NixOS/nixpkgs/issues/14645 is a future attempt
+ # to change some of these to default to true.
+ #
+ # If we run in chroot-only mode, having something like PrivateDevices
+ # set to true by default will mount /dev within the chroot, whereas
+ # with "chroot-only" it's expected that there are no /dev, /proc and
+ # /sys file systems available.
+ #
+ # However, if this suddenly becomes true, the attack surface will
+ # increase, so let's explicitly set these options to true/false
+ # depending on the mode.
+ MountAPIVFS = wantsAPIVFS;
+ PrivateDevices = wantsAPIVFS;
+ PrivateTmp = wantsAPIVFS;
+ PrivateUsers = wantsAPIVFS;
+ ProtectControlGroups = wantsAPIVFS;
+ ProtectKernelModules = wantsAPIVFS;
+ ProtectKernelTunables = wantsAPIVFS;
+ };
+ confinement.packages = let
+ execOpts = [
+ "ExecReload" "ExecStart" "ExecStartPost" "ExecStartPre" "ExecStop"
+ "ExecStopPost"
+ ];
+ execPkgs = lib.concatMap (opt: let
+ isSet = config.serviceConfig ? ${opt};
+ in lib.optional isSet config.serviceConfig.${opt}) execOpts;
+ unitAttrs = toplevelConfig.systemd.units."${name}.service";
+ allPkgs = lib.singleton (builtins.toJSON unitAttrs);
+ unitPkgs = if fullUnit then allPkgs else execPkgs;
+ in unitPkgs ++ lib.optional (binSh != null) binSh;
+ };
+ }));
+ };
+
+ config.assertions = lib.concatLists (lib.mapAttrsToList (name: cfg: let
+ whatOpt = optName: "The 'serviceConfig' option '${optName}' for"
+ + " service '${name}' is enabled in conjunction with"
+ + " 'confinement.enable'";
+ in lib.optionals cfg.confinement.enable [
+ { assertion = !cfg.serviceConfig.RootDirectoryStartOnly or false;
+ message = "${whatOpt "RootDirectoryStartOnly"}, but right now systemd"
+ + " doesn't support restricting bind-mounts to 'ExecStart'."
+ + " Please either define a separate service or find a way to run"
+ + " commands other than ExecStart within the chroot.";
+ }
+ { assertion = !cfg.serviceConfig.DynamicUser or false;
+ message = "${whatOpt "DynamicUser"}. Please create a dedicated user via"
+ + " the 'users.users' option instead as this combination is"
+ + " currently not supported.";
+ }
+ ]) config.systemd.services);
+
+ config.systemd.packages = lib.concatLists (lib.mapAttrsToList (name: cfg: let
+ rootPaths = let
+ contents = lib.concatStringsSep "\n" cfg.confinement.packages;
+ in pkgs.writeText "${mkPathSafeName name}-string-contexts.txt" contents;
+
+ chrootPaths = pkgs.runCommand "${mkPathSafeName name}-chroot-paths" {
+ closureInfo = pkgs.closureInfo { inherit rootPaths; };
+ serviceName = "${name}.service";
+ excludedPath = rootPaths;
+ } ''
+ mkdir -p "$out/lib/systemd/system"
+ serviceFile="$out/lib/systemd/system/$serviceName"
+
+ echo '[Service]' > "$serviceFile"
+
+ # /bin/sh is special here, because the option value could contain a
+ # symlink and we need to properly resolve it.
+ ${lib.optionalString (cfg.confinement.binSh != null) ''
+ binsh=${lib.escapeShellArg cfg.confinement.binSh}
+ realprog="$(readlink -e "$binsh")"
+ echo "BindReadOnlyPaths=$realprog:/bin/sh" >> "$serviceFile"
+ ''}
+
+ while read storePath; do
+ if [ -L "$storePath" ]; then
+ # Currently, systemd can't cope with symlinks in Bind(ReadOnly)Paths,
+ # so let's just bind-mount the target to that location.
+ echo "BindReadOnlyPaths=$(readlink -e "$storePath"):$storePath"
+ elif [ "$storePath" != "$excludedPath" ]; then
+ echo "BindReadOnlyPaths=$storePath"
+ fi
+ done < "$closureInfo/store-paths" >> "$serviceFile"
+ '';
+ in lib.optional cfg.confinement.enable chrootPaths) config.systemd.services);
+}