aboutsummaryrefslogtreecommitdiff
path: root/nixpkgs/nixos/modules/services/computing/slurm/slurm.nix
diff options
context:
space:
mode:
Diffstat (limited to 'nixpkgs/nixos/modules/services/computing/slurm/slurm.nix')
-rw-r--r--nixpkgs/nixos/modules/services/computing/slurm/slurm.nix412
1 files changed, 412 insertions, 0 deletions
diff --git a/nixpkgs/nixos/modules/services/computing/slurm/slurm.nix b/nixpkgs/nixos/modules/services/computing/slurm/slurm.nix
new file mode 100644
index 00000000000..c70d999ca96
--- /dev/null
+++ b/nixpkgs/nixos/modules/services/computing/slurm/slurm.nix
@@ -0,0 +1,412 @@
+{ config, lib, pkgs, ... }:
+
+with lib;
+
+let
+
+ cfg = config.services.slurm;
+ # configuration file can be generated by http://slurm.schedmd.com/configurator.html
+
+ defaultUser = "slurm";
+
+ configFile = pkgs.writeTextDir "slurm.conf"
+ ''
+ ClusterName=${cfg.clusterName}
+ StateSaveLocation=${cfg.stateSaveLocation}
+ SlurmUser=${cfg.user}
+ ${optionalString (cfg.controlMachine != null) ''controlMachine=${cfg.controlMachine}''}
+ ${optionalString (cfg.controlAddr != null) ''controlAddr=${cfg.controlAddr}''}
+ ${toString (map (x: "NodeName=${x}\n") cfg.nodeName)}
+ ${toString (map (x: "PartitionName=${x}\n") cfg.partitionName)}
+ PlugStackConfig=${plugStackConfig}/plugstack.conf
+ ProctrackType=${cfg.procTrackType}
+ ${cfg.extraConfig}
+ '';
+
+ plugStackConfig = pkgs.writeTextDir "plugstack.conf"
+ ''
+ ${optionalString cfg.enableSrunX11 ''optional ${pkgs.slurm-spank-x11}/lib/x11.so''}
+ ${cfg.extraPlugstackConfig}
+ '';
+
+ cgroupConfig = pkgs.writeTextDir "cgroup.conf"
+ ''
+ ${cfg.extraCgroupConfig}
+ '';
+
+ slurmdbdConf = pkgs.writeTextDir "slurmdbd.conf"
+ ''
+ DbdHost=${cfg.dbdserver.dbdHost}
+ SlurmUser=${cfg.user}
+ StorageType=accounting_storage/mysql
+ StorageUser=${cfg.dbdserver.storageUser}
+ ${optionalString (cfg.dbdserver.storagePass != null) "StoragePass=${cfg.dbdserver.storagePass}"}
+ ${cfg.dbdserver.extraConfig}
+ '';
+
+ # slurm expects some additional config files to be
+ # in the same directory as slurm.conf
+ etcSlurm = pkgs.symlinkJoin {
+ name = "etc-slurm";
+ paths = [ configFile cgroupConfig plugStackConfig ] ++ cfg.extraConfigPaths;
+ };
+in
+
+{
+
+ ###### interface
+
+ meta.maintainers = [ maintainers.markuskowa ];
+
+ options = {
+
+ services.slurm = {
+
+ server = {
+ enable = mkOption {
+ type = types.bool;
+ default = false;
+ description = ''
+ Wether to enable the slurm control daemon.
+ Note that the standard authentication method is "munge".
+ The "munge" service needs to be provided with a password file in order for
+ slurm to work properly (see <literal>services.munge.password</literal>).
+ '';
+ };
+ };
+
+ dbdserver = {
+ enable = mkEnableOption "SlurmDBD service";
+
+ dbdHost = mkOption {
+ type = types.str;
+ default = config.networking.hostName;
+ description = ''
+ Hostname of the machine where <literal>slurmdbd</literal>
+ is running (i.e. name returned by <literal>hostname -s</literal>).
+ '';
+ };
+
+ storageUser = mkOption {
+ type = types.str;
+ default = cfg.user;
+ description = ''
+ Database user name.
+ '';
+ };
+
+ storagePass = mkOption {
+ type = types.nullOr types.str;
+ default = null;
+ description = ''
+ Database password. Note that this password will be publicable
+ readable in the nix store. Use <option>configFile</option>
+ to store the and config file and password outside the nix store.
+ '';
+ };
+
+ configFile = mkOption {
+ type = types.nullOr types.str;
+ default = null;
+ description = ''
+ Path to <literal>slurmdbd.conf</literal>. The password for the database connection
+ is stored in the config file. Use this option to specfify a path
+ outside the nix store. If this option is unset a configuration file
+ will be generated. See also:
+ <citerefentry><refentrytitle>slurmdbd.conf</refentrytitle>
+ <manvolnum>8</manvolnum></citerefentry>.
+ '';
+ };
+
+ extraConfig = mkOption {
+ type = types.lines;
+ default = "";
+ description = ''
+ Extra configuration for <literal>slurmdbd.conf</literal>
+ '';
+ };
+ };
+
+ client = {
+ enable = mkEnableOption "slurm client daemon";
+ };
+
+ enableStools = mkOption {
+ type = types.bool;
+ default = false;
+ description = ''
+ Wether to provide a slurm.conf file.
+ Enable this option if you do not run a slurm daemon on this host
+ (i.e. <literal>server.enable</literal> and <literal>client.enable</literal> are <literal>false</literal>)
+ but you still want to run slurm commands from this host.
+ '';
+ };
+
+ package = mkOption {
+ type = types.package;
+ default = pkgs.slurm.override { enableX11 = ! cfg.enableSrunX11; };
+ defaultText = "pkgs.slurm";
+ example = literalExample "pkgs.slurm-full";
+ description = ''
+ The package to use for slurm binaries.
+ '';
+ };
+
+ controlMachine = mkOption {
+ type = types.nullOr types.str;
+ default = null;
+ example = null;
+ description = ''
+ The short hostname of the machine where SLURM control functions are
+ executed (i.e. the name returned by the command "hostname -s", use "tux001"
+ rather than "tux001.my.com").
+ '';
+ };
+
+ controlAddr = mkOption {
+ type = types.nullOr types.str;
+ default = cfg.controlMachine;
+ example = null;
+ description = ''
+ Name that ControlMachine should be referred to in establishing a
+ communications path.
+ '';
+ };
+
+ clusterName = mkOption {
+ type = types.str;
+ default = "default";
+ example = "myCluster";
+ description = ''
+ Necessary to distinguish accounting records in a multi-cluster environment.
+ '';
+ };
+
+ nodeName = mkOption {
+ type = types.listOf types.str;
+ default = [];
+ example = literalExample ''[ "linux[1-32] CPUs=1 State=UNKNOWN" ];'';
+ description = ''
+ Name that SLURM uses to refer to a node (or base partition for BlueGene
+ systems). Typically this would be the string that "/bin/hostname -s"
+ returns. Note that now you have to write node's parameters after the name.
+ '';
+ };
+
+ partitionName = mkOption {
+ type = types.listOf types.str;
+ default = [];
+ example = literalExample ''[ "debug Nodes=linux[1-32] Default=YES MaxTime=INFINITE State=UP" ];'';
+ description = ''
+ Name by which the partition may be referenced. Note that now you have
+ to write the partition's parameters after the name.
+ '';
+ };
+
+ enableSrunX11 = mkOption {
+ default = false;
+ type = types.bool;
+ description = ''
+ If enabled srun will accept the option "--x11" to allow for X11 forwarding
+ from within an interactive session or a batch job. This activates the
+ slurm-spank-x11 module. Note that this option also enables
+ <option>services.openssh.forwardX11</option> on the client.
+
+ This option requires slurm to be compiled without native X11 support.
+ The default behavior is to re-compile the slurm package with native X11
+ support disabled if this option is set to true.
+
+ To use the native X11 support add <literal>PrologFlags=X11</literal> in <option>extraConfig</option>.
+ Note that this method will only work RSA SSH host keys.
+ '';
+ };
+
+ procTrackType = mkOption {
+ type = types.str;
+ default = "proctrack/linuxproc";
+ description = ''
+ Plugin to be used for process tracking on a job step basis.
+ The slurmd daemon uses this mechanism to identify all processes
+ which are children of processes it spawns for a user job step.
+ '';
+ };
+
+ stateSaveLocation = mkOption {
+ type = types.str;
+ default = "/var/spool/slurmctld";
+ description = ''
+ Directory into which the Slurm controller, slurmctld, saves its state.
+ '';
+ };
+
+ user = mkOption {
+ type = types.str;
+ default = defaultUser;
+ description = ''
+ Set this option when you want to run the slurmctld daemon
+ as something else than the default slurm user "slurm".
+ Note that the UID of this user needs to be the same
+ on all nodes.
+ '';
+ };
+
+ extraConfig = mkOption {
+ default = "";
+ type = types.lines;
+ description = ''
+ Extra configuration options that will be added verbatim at
+ the end of the slurm configuration file.
+ '';
+ };
+
+ extraPlugstackConfig = mkOption {
+ default = "";
+ type = types.lines;
+ description = ''
+ Extra configuration that will be added to the end of <literal>plugstack.conf</literal>.
+ '';
+ };
+
+ extraCgroupConfig = mkOption {
+ default = "";
+ type = types.lines;
+ description = ''
+ Extra configuration for <literal>cgroup.conf</literal>. This file is
+ used when <literal>procTrackType=proctrack/cgroup</literal>.
+ '';
+ };
+
+ extraConfigPaths = mkOption {
+ type = with types; listOf path;
+ default = [];
+ description = ''
+ Slurm expects config files for plugins in the same path
+ as <literal>slurm.conf</literal>. Add extra nix store
+ paths that should be merged into same directory as
+ <literal>slurm.conf</literal>.
+ '';
+ };
+
+
+ };
+
+ };
+
+
+ ###### implementation
+
+ config =
+ let
+ wrappedSlurm = pkgs.stdenv.mkDerivation {
+ name = "wrappedSlurm";
+
+ builder = pkgs.writeText "builder.sh" ''
+ source $stdenv/setup
+ mkdir -p $out/bin
+ find ${getBin cfg.package}/bin -type f -executable | while read EXE
+ do
+ exename="$(basename $EXE)"
+ wrappername="$out/bin/$exename"
+ cat > "$wrappername" <<EOT
+ #!/bin/sh
+ if [ -z "$SLURM_CONF" ]
+ then
+ SLURM_CONF="${etcSlurm}/slurm.conf" "$EXE" "\$@"
+ else
+ "$EXE" "\$0"
+ fi
+ EOT
+ chmod +x "$wrappername"
+ done
+
+ mkdir -p $out/share
+ ln -s ${getBin cfg.package}/share/man $out/share/man
+ '';
+ };
+
+ in mkIf ( cfg.enableStools ||
+ cfg.client.enable ||
+ cfg.server.enable ||
+ cfg.dbdserver.enable ) {
+
+ environment.systemPackages = [ wrappedSlurm ];
+
+ services.munge.enable = mkDefault true;
+
+ # use a static uid as default to ensure it is the same on all nodes
+ users.users.slurm = mkIf (cfg.user == defaultUser) {
+ name = defaultUser;
+ group = "slurm";
+ uid = config.ids.uids.slurm;
+ };
+
+ users.groups.slurm.gid = config.ids.uids.slurm;
+
+ systemd.services.slurmd = mkIf (cfg.client.enable) {
+ path = with pkgs; [ wrappedSlurm coreutils ]
+ ++ lib.optional cfg.enableSrunX11 slurm-spank-x11;
+
+ wantedBy = [ "multi-user.target" ];
+ after = [ "systemd-tmpfiles-clean.service" ];
+
+ serviceConfig = {
+ Type = "forking";
+ KillMode = "process";
+ ExecStart = "${wrappedSlurm}/bin/slurmd";
+ PIDFile = "/run/slurmd.pid";
+ ExecReload = "${pkgs.coreutils}/bin/kill -HUP $MAINPID";
+ };
+
+ preStart = ''
+ mkdir -p /var/spool
+ '';
+ };
+
+ services.openssh.forwardX11 = mkIf cfg.client.enable (mkDefault true);
+
+ systemd.services.slurmctld = mkIf (cfg.server.enable) {
+ path = with pkgs; [ wrappedSlurm munge coreutils ]
+ ++ lib.optional cfg.enableSrunX11 slurm-spank-x11;
+
+ wantedBy = [ "multi-user.target" ];
+ after = [ "network.target" "munged.service" ];
+ requires = [ "munged.service" ];
+
+ serviceConfig = {
+ Type = "forking";
+ ExecStart = "${wrappedSlurm}/bin/slurmctld";
+ PIDFile = "/run/slurmctld.pid";
+ ExecReload = "${pkgs.coreutils}/bin/kill -HUP $MAINPID";
+ };
+
+ preStart = ''
+ mkdir -p ${cfg.stateSaveLocation}
+ chown -R ${cfg.user}:slurm ${cfg.stateSaveLocation}
+ '';
+ };
+
+ systemd.services.slurmdbd = mkIf (cfg.dbdserver.enable) {
+ path = with pkgs; [ wrappedSlurm munge coreutils ];
+
+ wantedBy = [ "multi-user.target" ];
+ after = [ "network.target" "munged.service" "mysql.service" ];
+ requires = [ "munged.service" "mysql.service" ];
+
+ # slurm strips the last component off the path
+ environment.SLURM_CONF =
+ if (cfg.dbdserver.configFile == null) then
+ "${slurmdbdConf}/slurm.conf"
+ else
+ cfg.dbdserver.configFile;
+
+ serviceConfig = {
+ Type = "forking";
+ ExecStart = "${cfg.package}/bin/slurmdbd";
+ PIDFile = "/run/slurmdbd.pid";
+ ExecReload = "${pkgs.coreutils}/bin/kill -HUP $MAINPID";
+ };
+ };
+
+ };
+
+}