aboutsummaryrefslogtreecommitdiff
path: root/nixos/tests/slurm.nix
diff options
context:
space:
mode:
authorLancelot SIX <lsix@lancelotsix.com>2015-12-25 15:55:07 +0100
committerLancelot SIX <lsix@lancelotsix.com>2015-12-25 15:55:07 +0100
commit4994f0f7d062f932ef36ca6e36d5556334c4c93a (patch)
tree53dd683a126a553eb2d2dbf36f652fa1c93c7b4d /nixos/tests/slurm.nix
parentca4c35478979106fe64163c11606516e3b3d2e7e (diff)
slurm service: add tests
Diffstat (limited to 'nixos/tests/slurm.nix')
-rw-r--r--nixos/tests/slurm.nix80
1 files changed, 80 insertions, 0 deletions
diff --git a/nixos/tests/slurm.nix b/nixos/tests/slurm.nix
new file mode 100644
index 00000000000..0dd00dfb04c
--- /dev/null
+++ b/nixos/tests/slurm.nix
@@ -0,0 +1,80 @@
+import ./make-test.nix ({ pkgs, ... }:
+let mungekey = "mungeverryweakkeybuteasytointegratoinatest";
+ slurmconfig = {
+ client.enable = true;
+ controlMachine = "control";
+ nodeName = ''
+ control
+ NodeName=node[1-3] CPUs=1 State=UNKNOWN
+ '';
+ partitionName = "debug Nodes=node[1-3] Default=YES MaxTime=INFINITE State=UP";
+ };
+in {
+ name = "slurm";
+
+ nodes =
+ let
+ computeNode =
+ { config, pkgs, ...}:
+ {
+ # TODO slrumd port and slurmctld port should be configurations and
+ # automatically allowed by the firewall.
+ networking.firewall.enable = false;
+ services.munge.enable = true;
+ services.slurm = slurmconfig;
+ };
+ in {
+ control =
+ { config, pkgs, ...}:
+ {
+ networking.firewall.enable = false;
+ services.munge.enable = true;
+ services.slurm = {
+ server.enable = true;
+ } // slurmconfig;
+ };
+ node1 = computeNode;
+ node2 = computeNode;
+ node3 = computeNode;
+ };
+
+ testScript =
+ ''
+ startAll;
+
+ # Set up authentification across the cluster
+ foreach my $node (($control,$node1,$node2,$node3))
+ {
+ $node->waitForUnit("default.target");
+
+ $node->succeed("mkdir /etc/munge");
+ $node->succeed("echo '${mungekey}' > /etc/munge/munge.key");
+ $node->succeed("chmod 0400 /etc/munge/munge.key");
+ $node->succeed("systemctl restart munged");
+ }
+
+ # Restart the services since they have probably failed due to the munge init
+ # failure
+
+ subtest "can_start_slurmctld", sub {
+ $control->succeed("systemctl restart slurmctld");
+ $control->waitForUnit("slurmctld.service");
+ };
+
+ subtest "can_start_slurmd", sub {
+ foreach my $node (($control,$node1,$node2,$node3))
+ {
+ $node->succeed("systemctl restart slurmd.service");
+ $node->waitForUnit("slurmd");
+ }
+ };
+
+ # Test that the cluster work and can distribute jobs;
+
+ subtest "run_distributed_command", sub {
+ # Run `hostname` on 3 nodes of the partition (so on all the 3 nodes).
+ # The output must contain the 3 different names
+ $control->succeed("srun -N 3 hostname | sort | uniq | wc -l | xargs test 3 -eq");
+ };
+ '';
+})