4 cryptorand "crypto/rand"
15 "github.com/Sirupsen/logrus"
16 cfconfig "github.com/cloudflare/cfssl/config"
17 "github.com/docker/swarmkit/api"
18 "github.com/docker/swarmkit/connectionbroker"
19 "github.com/docker/swarmkit/identity"
20 "github.com/docker/swarmkit/log"
21 "github.com/docker/swarmkit/watch"
22 "github.com/opencontainers/go-digest"
23 "github.com/pkg/errors"
24 "google.golang.org/grpc/credentials"
26 "golang.org/x/net/context"
30 rootCACertFilename = "swarm-root-ca.crt"
31 rootCAKeyFilename = "swarm-root-ca.key"
32 nodeTLSCertFilename = "swarm-node.crt"
33 nodeTLSKeyFilename = "swarm-node.key"
34 nodeCSRFilename = "swarm-node.csr"
36 // DefaultRootCN represents the root CN that we should create roots CAs with by default
37 DefaultRootCN = "swarm-ca"
38 // ManagerRole represents the Manager node type, and is used for authorization to endpoints
39 ManagerRole = "swarm-manager"
40 // WorkerRole represents the Worker node type, and is used for authorization to endpoints
41 WorkerRole = "swarm-worker"
42 // CARole represents the CA node type, and is used for clients attempting to get new certificates issued
45 generatedSecretEntropyBytes = 16
47 // ceil(log(2^128-1, 36))
48 maxGeneratedSecretLength = 25
49 // ceil(log(2^256-1, 36))
54 // GetCertRetryInterval is how long to wait before retrying a node
55 // certificate or root certificate request.
56 GetCertRetryInterval = 2 * time.Second
59 // SecurityConfig is used to represent a node's security configuration. It includes information about
60 // the RootCA and ServerTLSCreds/ClientTLSCreds transport authenticators to be used for MTLS
61 type SecurityConfig struct {
62 // mu protects against concurrent access to fields inside the structure.
65 // renewalMu makes sure only one certificate renewal attempt happens at
66 // a time. It should never be locked after mu is already locked.
70 externalCA *ExternalCA
71 keyReadWriter *KeyReadWriter
73 certificate *tls.Certificate
74 issuerInfo *IssuerInfo
76 externalCAClientRootPool *x509.CertPool
78 ServerTLSCreds *MutableTLSCreds
79 ClientTLSCreds *MutableTLSCreds
81 // An optional queue for anyone interested in subscribing to SecurityConfig updates
85 // CertificateUpdate represents a change in the underlying TLS configuration being returned by
86 // a certificate renewal event.
87 type CertificateUpdate struct {
92 func validateRootCAAndTLSCert(rootCA *RootCA, externalCARootPool *x509.CertPool, tlsKeyPair *tls.Certificate) error {
94 leafCert *x509.Certificate
95 intermediatePool *x509.CertPool
97 for i, derBytes := range tlsKeyPair.Certificate {
98 parsed, err := x509.ParseCertificate(derBytes)
100 return errors.Wrap(err, "could not validate new root certificates due to parse error")
105 if intermediatePool == nil {
106 intermediatePool = x509.NewCertPool()
108 intermediatePool.AddCert(parsed)
111 opts := x509.VerifyOptions{
113 Intermediates: intermediatePool,
115 if _, err := leafCert.Verify(opts); err != nil {
116 return errors.Wrap(err, "new root CA does not match existing TLS credentials")
118 opts.Roots = externalCARootPool
119 if _, err := leafCert.Verify(opts); err != nil {
120 return errors.Wrap(err, "new external root pool does not match existing TLS credentials")
125 // NewSecurityConfig initializes and returns a new SecurityConfig.
126 func NewSecurityConfig(rootCA *RootCA, krw *KeyReadWriter, tlsKeyPair *tls.Certificate, issuerInfo *IssuerInfo) (*SecurityConfig, error) {
127 // Create the Server TLS Credentials for this node. These will not be used by workers.
128 serverTLSCreds, err := rootCA.NewServerTLSCredentials(tlsKeyPair)
133 // Create a TLSConfig to be used when this node connects as a client to another remote node.
134 // We're using ManagerRole as remote serverName for TLS host verification because both workers
135 // and managers always connect to remote managers.
136 clientTLSCreds, err := rootCA.NewClientTLSCredentials(tlsKeyPair, ManagerRole)
141 // Make a new TLS config for the external CA client without a
142 // ServerName value set.
143 externalCATLSConfig := &tls.Config{
144 Certificates: []tls.Certificate{*tlsKeyPair},
145 RootCAs: rootCA.Pool,
146 MinVersion: tls.VersionTLS12,
149 return &SecurityConfig{
153 certificate: tlsKeyPair,
154 issuerInfo: issuerInfo,
156 externalCA: NewExternalCA(rootCA, externalCATLSConfig),
157 ClientTLSCreds: clientTLSCreds,
158 ServerTLSCreds: serverTLSCreds,
159 externalCAClientRootPool: rootCA.Pool,
163 // RootCA returns the root CA.
164 func (s *SecurityConfig) RootCA() *RootCA {
171 // ExternalCA returns the external CA.
172 func (s *SecurityConfig) ExternalCA() *ExternalCA {
176 // KeyWriter returns the object that can write keys to disk
177 func (s *SecurityConfig) KeyWriter() KeyWriter {
178 return s.keyReadWriter
181 // KeyReader returns the object that can read keys from disk
182 func (s *SecurityConfig) KeyReader() KeyReader {
183 return s.keyReadWriter
186 // UpdateRootCA replaces the root CA with a new root CA
187 func (s *SecurityConfig) UpdateRootCA(rootCA *RootCA, externalCARootPool *x509.CertPool) error {
191 // refuse to update the root CA if the current TLS credentials do not validate against it
192 if err := validateRootCAAndTLSCert(rootCA, externalCARootPool, s.certificate); err != nil {
197 s.externalCAClientRootPool = externalCARootPool
198 s.externalCA.UpdateRootCA(rootCA)
200 return s.updateTLSCredentials(s.certificate, s.issuerInfo)
203 // SetWatch allows you to set a watch on the security config, in order to be notified of any changes
204 func (s *SecurityConfig) SetWatch(q *watch.Queue) {
210 // IssuerInfo returns the issuer subject and issuer public key
211 func (s *SecurityConfig) IssuerInfo() *IssuerInfo {
217 // This function expects something else to have taken out a lock on the SecurityConfig.
218 func (s *SecurityConfig) updateTLSCredentials(certificate *tls.Certificate, issuerInfo *IssuerInfo) error {
219 certs := []tls.Certificate{*certificate}
220 clientConfig, err := NewClientTLSConfig(certs, s.rootCA.Pool, ManagerRole)
222 return errors.Wrap(err, "failed to create a new client config using the new root CA")
225 serverConfig, err := NewServerTLSConfig(certs, s.rootCA.Pool)
227 return errors.Wrap(err, "failed to create a new server config using the new root CA")
230 if err := s.ClientTLSCreds.loadNewTLSConfig(clientConfig); err != nil {
231 return errors.Wrap(err, "failed to update the client credentials")
234 // Update the external CA to use the new client TLS
235 // config using a copy without a serverName specified.
236 s.externalCA.UpdateTLSConfig(&tls.Config{
238 RootCAs: s.externalCAClientRootPool,
239 MinVersion: tls.VersionTLS12,
242 if err := s.ServerTLSCreds.loadNewTLSConfig(serverConfig); err != nil {
243 return errors.Wrap(err, "failed to update the server TLS credentials")
246 s.certificate = certificate
247 s.issuerInfo = issuerInfo
249 s.queue.Publish(&api.NodeTLSInfo{
250 TrustRoot: s.rootCA.Certs,
251 CertIssuerPublicKey: s.issuerInfo.PublicKey,
252 CertIssuerSubject: s.issuerInfo.Subject,
258 // UpdateTLSCredentials updates the security config with an updated TLS certificate and issuer info
259 func (s *SecurityConfig) UpdateTLSCredentials(certificate *tls.Certificate, issuerInfo *IssuerInfo) error {
262 return s.updateTLSCredentials(certificate, issuerInfo)
265 // SigningPolicy creates a policy used by the signer to ensure that the only fields
266 // from the remote CSRs we trust are: PublicKey, PublicKeyAlgorithm and SignatureAlgorithm.
267 // It receives the duration a certificate will be valid for
268 func SigningPolicy(certExpiry time.Duration) *cfconfig.Signing {
269 // Force the minimum Certificate expiration to be fifteen minutes
270 if certExpiry < MinNodeCertExpiration {
271 certExpiry = DefaultNodeCertExpiration
275 certExpiry = certExpiry + CertBackdate
277 return &cfconfig.Signing{
278 Default: &cfconfig.SigningProfile{
279 Usage: []string{"signing", "key encipherment", "server auth", "client auth"},
281 Backdate: CertBackdate,
282 // Only trust the key components from the CSR. Everything else should
283 // come directly from API call params.
284 CSRWhitelist: &cfconfig.CSRWhitelist{
286 PublicKeyAlgorithm: true,
287 SignatureAlgorithm: true,
293 // SecurityConfigPaths is used as a helper to hold all the paths of security relevant files
294 type SecurityConfigPaths struct {
295 Node, RootCA CertPaths
298 // NewConfigPaths returns the absolute paths to all of the different types of files
299 func NewConfigPaths(baseCertDir string) *SecurityConfigPaths {
300 return &SecurityConfigPaths{
302 Cert: filepath.Join(baseCertDir, nodeTLSCertFilename),
303 Key: filepath.Join(baseCertDir, nodeTLSKeyFilename)},
305 Cert: filepath.Join(baseCertDir, rootCACertFilename),
306 Key: filepath.Join(baseCertDir, rootCAKeyFilename)},
310 // GenerateJoinToken creates a new join token.
311 func GenerateJoinToken(rootCA *RootCA) string {
312 var secretBytes [generatedSecretEntropyBytes]byte
314 if _, err := cryptorand.Read(secretBytes[:]); err != nil {
315 panic(fmt.Errorf("failed to read random bytes: %v", err))
318 var nn, digest big.Int
319 nn.SetBytes(secretBytes[:])
320 digest.SetString(rootCA.Digest.Hex(), 16)
321 return fmt.Sprintf("SWMTKN-1-%0[1]*s-%0[3]*s", base36DigestLen, digest.Text(joinTokenBase), maxGeneratedSecretLength, nn.Text(joinTokenBase))
324 func getCAHashFromToken(token string) (digest.Digest, error) {
325 split := strings.Split(token, "-")
326 if len(split) != 4 || split[0] != "SWMTKN" || split[1] != "1" || len(split[2]) != base36DigestLen || len(split[3]) != maxGeneratedSecretLength {
327 return "", errors.New("invalid join token")
330 var digestInt big.Int
331 digestInt.SetString(split[2], joinTokenBase)
333 return digest.Parse(fmt.Sprintf("sha256:%0[1]*s", 64, digestInt.Text(16)))
336 // DownloadRootCA tries to retrieve a remote root CA and matches the digest against the provided token.
337 func DownloadRootCA(ctx context.Context, paths CertPaths, token string, connBroker *connectionbroker.Broker) (RootCA, error) {
339 // Get a digest for the optional CA hash string that we've been provided
340 // If we were provided a non-empty string, and it is an invalid hash, return
341 // otherwise, allow the invalid digest through.
347 d, err = getCAHashFromToken(token)
352 // Get the remote CA certificate, verify integrity with the
353 // hash provided. Retry up to 5 times, in case the manager we
354 // first try to contact is not responding properly (it may have
355 // just been demoted, for example).
357 for i := 0; i != 5; i++ {
358 rootCA, err = GetRemoteCA(ctx, d, connBroker)
362 log.G(ctx).WithError(err).Errorf("failed to retrieve remote root CA certificate")
365 case <-time.After(GetCertRetryInterval):
367 return RootCA{}, ctx.Err()
374 // Save root CA certificate to disk
375 if err = SaveRootCA(rootCA, paths); err != nil {
379 log.G(ctx).Debugf("retrieved remote CA certificate: %s", paths.Cert)
383 // LoadSecurityConfig loads TLS credentials from disk, or returns an error if
384 // these credentials do not exist or are unusable.
385 func LoadSecurityConfig(ctx context.Context, rootCA RootCA, krw *KeyReadWriter, allowExpired bool) (*SecurityConfig, error) {
386 ctx = log.WithModule(ctx, "tls")
388 // At this point we've successfully loaded the CA details from disk, or
389 // successfully downloaded them remotely. The next step is to try to
390 // load our certificates.
392 // Read both the Cert and Key from disk
393 cert, key, err := krw.Read()
398 // Check to see if this certificate was signed by our CA, and isn't expired
399 _, chains, err := ValidateCertChain(rootCA.Pool, cert, allowExpired)
403 // ValidateChain, if successful, will always return at least 1 chain containing
404 // at least 2 certificates: the leaf and the root.
405 issuer := chains[0][1]
407 // Now that we know this certificate is valid, create a TLS Certificate for our
409 keyPair, err := tls.X509KeyPair(cert, key)
414 secConfig, err := NewSecurityConfig(&rootCA, krw, &keyPair, &IssuerInfo{
415 Subject: issuer.RawSubject,
416 PublicKey: issuer.RawSubjectPublicKeyInfo,
419 log.G(ctx).WithFields(logrus.Fields{
420 "node.id": secConfig.ClientTLSCreds.NodeID(),
421 "node.role": secConfig.ClientTLSCreds.Role(),
422 }).Debug("loaded node credentials")
424 return secConfig, err
427 // CertificateRequestConfig contains the information needed to request a
428 // certificate from a remote CA.
429 type CertificateRequestConfig struct {
430 // Token is the join token that authenticates us with the CA.
432 // Availability allows a user to control the current scheduling status of a node
433 Availability api.NodeSpec_Availability
434 // ConnBroker provides connections to CAs.
435 ConnBroker *connectionbroker.Broker
436 // Credentials provides transport credentials for communicating with the
438 Credentials credentials.TransportCredentials
439 // ForceRemote specifies that only a remote (TCP) connection should
440 // be used to request the certificate. This may be necessary in cases
441 // where the local node is running a manager, but is in the process of
444 // NodeCertificateStatusRequestTimeout determines how long to wait for a node
445 // status RPC result. If not provided (zero value), will default to 5 seconds.
446 NodeCertificateStatusRequestTimeout time.Duration
447 // RetryInterval specifies how long to delay between retries, if non-zero.
448 RetryInterval time.Duration
451 // CreateSecurityConfig creates a new key and cert for this node, either locally
452 // or via a remote CA.
453 func (rootCA RootCA) CreateSecurityConfig(ctx context.Context, krw *KeyReadWriter, config CertificateRequestConfig) (*SecurityConfig, error) {
454 ctx = log.WithModule(ctx, "tls")
456 // Create a new random ID for this certificate
457 cn := identity.NewID()
458 org := identity.NewID()
460 proposedRole := ManagerRole
461 tlsKeyPair, issuerInfo, err := rootCA.IssueAndSaveNewCertificates(krw, cn, proposedRole, org)
462 switch errors.Cause(err) {
463 case ErrNoValidSigner:
464 config.RetryInterval = GetCertRetryInterval
465 // Request certificate issuance from a remote CA.
466 // Last argument is nil because at this point we don't have any valid TLS creds
467 tlsKeyPair, issuerInfo, err = rootCA.RequestAndSaveNewCertificates(ctx, krw, config)
469 log.G(ctx).WithError(err).Error("failed to request and save new certificate")
473 log.G(ctx).WithFields(logrus.Fields{
475 "node.role": proposedRole,
476 }).Debug("issued new TLS certificate")
478 log.G(ctx).WithFields(logrus.Fields{
480 "node.role": proposedRole,
481 }).WithError(err).Errorf("failed to issue and save new certificate")
485 secConfig, err := NewSecurityConfig(&rootCA, krw, tlsKeyPair, issuerInfo)
487 log.G(ctx).WithFields(logrus.Fields{
488 "node.id": secConfig.ClientTLSCreds.NodeID(),
489 "node.role": secConfig.ClientTLSCreds.Role(),
490 }).Debugf("new node credentials generated: %s", krw.Target())
492 return secConfig, err
495 // TODO(cyli): currently we have to only update if it's a worker role - if we have a single root CA update path for
496 // both managers and workers, we won't need to check any more.
497 func updateRootThenUpdateCert(ctx context.Context, s *SecurityConfig, connBroker *connectionbroker.Broker, rootPaths CertPaths, failedCert *x509.Certificate) (*tls.Certificate, *IssuerInfo, error) {
498 if len(failedCert.Subject.OrganizationalUnit) == 0 || failedCert.Subject.OrganizationalUnit[0] != WorkerRole {
499 return nil, nil, errors.New("cannot update root CA since this is not a worker")
501 // try downloading a new root CA if it's an unknown authority issue, in case there was a root rotation completion
502 // and we just didn't get the new root
503 rootCA, err := GetRemoteCA(ctx, "", connBroker)
507 // validate against the existing security config creds
508 if err := s.UpdateRootCA(&rootCA, rootCA.Pool); err != nil {
511 if err := SaveRootCA(rootCA, rootPaths); err != nil {
514 return rootCA.RequestAndSaveNewCertificates(ctx, s.KeyWriter(),
515 CertificateRequestConfig{
516 ConnBroker: connBroker,
517 Credentials: s.ClientTLSCreds,
521 // RenewTLSConfigNow gets a new TLS cert and key, and updates the security config if provided. This is similar to
522 // RenewTLSConfig, except while that monitors for expiry, and periodically renews, this renews once and is blocking
523 func RenewTLSConfigNow(ctx context.Context, s *SecurityConfig, connBroker *connectionbroker.Broker, rootPaths CertPaths) error {
525 defer s.renewalMu.Unlock()
527 ctx = log.WithModule(ctx, "tls")
528 log := log.G(ctx).WithFields(logrus.Fields{
529 "node.id": s.ClientTLSCreds.NodeID(),
530 "node.role": s.ClientTLSCreds.Role(),
533 // Let's request new certs. Renewals don't require a token.
535 tlsKeyPair, issuerInfo, err := rootCA.RequestAndSaveNewCertificates(ctx,
537 CertificateRequestConfig{
538 ConnBroker: connBroker,
539 Credentials: s.ClientTLSCreds,
541 if wrappedError, ok := err.(x509UnknownAuthError); ok {
543 tlsKeyPair, issuerInfo, newErr = updateRootThenUpdateCert(ctx, s, connBroker, rootPaths, wrappedError.failedLeafCert)
545 err = wrappedError.error
551 log.WithError(err).Errorf("failed to renew the certificate")
555 return s.UpdateTLSCredentials(tlsKeyPair, issuerInfo)
558 // calculateRandomExpiry returns a random duration between 50% and 80% of the
559 // original validity period
560 func calculateRandomExpiry(validFrom, validUntil time.Time) time.Duration {
561 duration := validUntil.Sub(validFrom)
564 // Our lower bound of renewal will be half of the total expiration time
565 minValidity := int(duration.Minutes() * CertLowerRotationRange)
566 // Our upper bound of renewal will be 80% of the total expiration time
567 maxValidity := int(duration.Minutes() * CertUpperRotationRange)
568 // Let's select a random number of minutes between min and max, and set our retry for that
569 // Using randomly selected rotation allows us to avoid certificate thundering herds.
570 if maxValidity-minValidity < 1 {
571 randomExpiry = minValidity
573 randomExpiry = rand.Intn(maxValidity-minValidity) + int(minValidity)
576 expiry := validFrom.Add(time.Duration(randomExpiry) * time.Minute).Sub(time.Now())
583 // NewServerTLSConfig returns a tls.Config configured for a TLS Server, given a tls.Certificate
584 // and the PEM-encoded root CA Certificate
585 func NewServerTLSConfig(certs []tls.Certificate, rootCAPool *x509.CertPool) (*tls.Config, error) {
586 if rootCAPool == nil {
587 return nil, errors.New("valid root CA pool required")
592 // Since we're using the same CA server to issue Certificates to new nodes, we can't
593 // use tls.RequireAndVerifyClientCert
594 ClientAuth: tls.VerifyClientCertIfGiven,
596 ClientCAs: rootCAPool,
597 PreferServerCipherSuites: true,
598 MinVersion: tls.VersionTLS12,
602 // NewClientTLSConfig returns a tls.Config configured for a TLS Client, given a tls.Certificate
603 // the PEM-encoded root CA Certificate, and the name of the remote server the client wants to connect to.
604 func NewClientTLSConfig(certs []tls.Certificate, rootCAPool *x509.CertPool, serverName string) (*tls.Config, error) {
605 if rootCAPool == nil {
606 return nil, errors.New("valid root CA pool required")
610 ServerName: serverName,
613 MinVersion: tls.VersionTLS12,
617 // NewClientTLSCredentials returns GRPC credentials for a TLS GRPC client, given a tls.Certificate
618 // a PEM-Encoded root CA Certificate, and the name of the remote server the client wants to connect to.
619 func (rootCA *RootCA) NewClientTLSCredentials(cert *tls.Certificate, serverName string) (*MutableTLSCreds, error) {
620 tlsConfig, err := NewClientTLSConfig([]tls.Certificate{*cert}, rootCA.Pool, serverName)
625 mtls, err := NewMutableTLS(tlsConfig)
630 // NewServerTLSCredentials returns GRPC credentials for a TLS GRPC client, given a tls.Certificate
631 // a PEM-Encoded root CA Certificate, and the name of the remote server the client wants to connect to.
632 func (rootCA *RootCA) NewServerTLSCredentials(cert *tls.Certificate) (*MutableTLSCreds, error) {
633 tlsConfig, err := NewServerTLSConfig([]tls.Certificate{*cert}, rootCA.Pool)
638 mtls, err := NewMutableTLS(tlsConfig)
643 // ParseRole parses an apiRole into an internal role string
644 func ParseRole(apiRole api.NodeRole) (string, error) {
646 case api.NodeRoleManager:
647 return ManagerRole, nil
648 case api.NodeRoleWorker:
649 return WorkerRole, nil
651 return "", errors.Errorf("failed to parse api role: %v", apiRole)
655 // FormatRole parses an internal role string into an apiRole
656 func FormatRole(role string) (api.NodeRole, error) {
657 switch strings.ToLower(role) {
658 case strings.ToLower(ManagerRole):
659 return api.NodeRoleManager, nil
660 case strings.ToLower(WorkerRole):
661 return api.NodeRoleWorker, nil
663 return 0, errors.Errorf("failed to parse role: %s", role)