aboutsummaryrefslogtreecommitdiff
path: root/nixos/modules/services/cluster/kubernetes/pki.nix
diff options
context:
space:
mode:
authorChristian Albrecht <christian.albrecht@mayflower.de>2019-03-01 08:44:45 +0100
committerChristian Albrecht <christian.albrecht@mayflower.de>2019-03-03 19:39:02 +0100
commit62f03750e48ae7658ea18d7ac75833279da02a5a (patch)
tree7ea701d69b681a45ebc5e2b9aec90224f8de8fee /nixos/modules/services/cluster/kubernetes/pki.nix
parentf9e2f76a590d11cbeaa10e3953ddc96110bf1b3b (diff)
nixos/kubernetes: Stabilize services startup across machines
by adding targets and curl wait loops to services to ensure services are not started before their depended services are reachable. Extra targets cfssl-online.target and kube-apiserver-online.target syncronize starts across machines and node-online.target ensures docker is restarted and ready to deploy containers on after flannel has discussed the network cidr with apiserver. Since flannel needs to be started before addon-manager to configure the docker interface, it has to have its own rbac bootstrap service. The curl wait loops within the other services exists to ensure that when starting the service it is able to do its work immediately without clobbering the log about failing conditions. By ensuring kubernetes.target is only reached after starting the cluster it can be used in the tests as a wait condition. In kube-certmgr-bootstrap mkdir is needed for it to not fail to start. The following is the relevant part of systemctl list-dependencies default.target ● ├─certmgr.service ● ├─cfssl.service ● ├─docker.service ● ├─etcd.service ● ├─flannel.service ● ├─kubernetes.target ● │ ├─kube-addon-manager.service ● │ ├─kube-proxy.service ● │ ├─kube-apiserver-online.target ● │ │ ├─flannel-rbac-bootstrap.service ● │ │ ├─kube-apiserver-online.service ● │ │ ├─kube-apiserver.service ● │ │ ├─kube-controller-manager.service ● │ │ └─kube-scheduler.service ● │ └─node-online.target ● │ ├─node-online.service ● │ ├─flannel.target ● │ │ ├─flannel.service ● │ │ └─mk-docker-opts.service ● │ └─kubelet.target ● │ └─kubelet.service ● ├─network-online.target ● │ └─cfssl-online.target ● │ ├─certmgr.service ● │ ├─cfssl-online.service ● │ └─kube-certmgr-bootstrap.service
Diffstat (limited to 'nixos/modules/services/cluster/kubernetes/pki.nix')
-rw-r--r--nixos/modules/services/cluster/kubernetes/pki.nix65
1 files changed, 59 insertions, 6 deletions
diff --git a/nixos/modules/services/cluster/kubernetes/pki.nix b/nixos/modules/services/cluster/kubernetes/pki.nix
index 8ad17d4dfb4e..d08d7892bb53 100644
--- a/nixos/modules/services/cluster/kubernetes/pki.nix
+++ b/nixos/modules/services/cluster/kubernetes/pki.nix
@@ -119,6 +119,7 @@ in
cfsslCertPathPrefix = "${config.services.cfssl.dataDir}/cfssl";
cfsslCert = "${cfsslCertPathPrefix}.pem";
cfsslKey = "${cfsslCertPathPrefix}-key.pem";
+ cfsslPort = toString config.services.cfssl.port;
certmgrPaths = [
top.caFile
@@ -191,13 +192,39 @@ in
chown cfssl "${cfsslAPITokenPath}" && chmod 400 "${cfsslAPITokenPath}"
'')]);
+ systemd.targets.cfssl-online = {
+ wantedBy = [ "network-online.target" ];
+ after = [ "cfssl.service" "network-online.target" "cfssl-online.service" ];
+ };
+
+ systemd.services.cfssl-online = {
+ description = "Wait for ${remote} to be reachable.";
+ wantedBy = [ "cfssl-online.target" ];
+ before = [ "cfssl-online.target" ];
+ preStart = ''
+ ${top.lib.mkWaitCurl {
+ address = remote;
+ path = "/api/v1/cfssl/info";
+ args = "-kd '{}' -o /dev/null";
+ }}
+ '';
+ script = "echo Ok";
+ serviceConfig = {
+ TimeoutSec = "300";
+ };
+ };
+
systemd.services.kube-certmgr-bootstrap = {
description = "Kubernetes certmgr bootstrapper";
- wantedBy = [ "certmgr.service" ];
- after = [ "cfssl.target" ];
+ wantedBy = [ "cfssl-online.target" ];
+ after = [ "cfssl-online.target" ];
+ before = [ "certmgr.service" ];
script = concatStringsSep "\n" [''
set -e
+ mkdir -p $(dirname ${certmgrAPITokenPath})
+ mkdir -p $(dirname ${top.caFile})
+
# If there's a cfssl (cert issuer) running locally, then don't rely on user to
# manually paste it in place. Just symlink.
# otherwise, create the target file, ready for users to insert the token
@@ -209,14 +236,18 @@ in
fi
''
(optionalString (cfg.pkiTrustOnBootstrap) ''
- if [ ! -f "${top.caFile}" ] || [ $(cat "${top.caFile}" | wc -c) -lt 1 ]; then
- ${pkgs.curl}/bin/curl --fail-early -f -kd '{}' ${remote}/api/v1/cfssl/info | \
- ${pkgs.cfssl}/bin/cfssljson -stdout >${top.caFile}
+ if [ ! -s "${top.caFile}" ]; then
+ ${top.lib.mkWaitCurl {
+ address = "https://${top.masterAddress}:${cfsslPort}";
+ path = "/api/v1/cfssl/info";
+ args = "-kd '{}' -o - | ${pkgs.cfssl}/bin/cfssljson -stdout >${top.caFile}";
+ }}
fi
'')
];
serviceConfig = {
- RestartSec = "10s";
+ TimeoutSec = "300";
+ RestartSec = "1s";
Restart = "on-failure";
};
};
@@ -254,6 +285,14 @@ in
};
systemd.services.certmgr = {
+ wantedBy = [ "cfssl-online.target" ];
+ after = [ "cfssl-online.target" "kube-certmgr-bootstrap.service" ];
+ preStart = ''
+ while ! test -s ${certmgrAPITokenPath} ; do
+ sleep 1
+ echo Waiting for ${certmgrAPITokenPath}
+ done
+ '';
unitConfig.ConditionPathExists = certmgrPaths;
};
@@ -289,6 +328,12 @@ in
''
export KUBECONFIG=${clusterAdminKubeconfig}
${kubectl}/bin/kubectl apply -f ${concatStringsSep " \\\n -f " files}
+
+ ${top.lib.mkWaitCurl (with top.pki.certs.addonManager; {
+ path = "/api/v1/namespaces/kube-system/serviceaccounts/default";
+ cacert = top.caFile;
+ inherit cert key;
+ })}
'';
})
{
@@ -384,6 +429,14 @@ in
};
systemd.services.flannel = {
+ preStart = ''
+ ${top.lib.mkWaitCurl (with top.pki.certs.flannelClient; {
+ path = "/api/v1/nodes";
+ cacert = top.caFile;
+ inherit cert key;
+ args = "-o - | grep podCIDR >/dev/null";
+ })}
+ '';
unitConfig.ConditionPathExists = flannelPaths;
};