1
0
Files
monitor/pkg/k8s/rotator.go
2023-09-06 17:52:39 +08:00

577 lines
17 KiB
Go

package k8s
import (
"bytes"
"context"
"crypto"
"crypto/rand"
"crypto/rsa"
"crypto/tls"
"crypto/x509"
"encoding/base64"
"encoding/pem"
"errors"
"fmt"
"math/big"
"net"
"os"
"path/filepath"
"sync/atomic"
"time"
corev1 "k8s.io/api/core/v1"
apierrors "k8s.io/apimachinery/pkg/api/errors"
"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
"k8s.io/apimachinery/pkg/runtime/schema"
"k8s.io/apimachinery/pkg/types"
"k8s.io/apimachinery/pkg/util/wait"
ctrl "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/cache"
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/controller"
"sigs.k8s.io/controller-runtime/pkg/handler"
"sigs.k8s.io/controller-runtime/pkg/manager"
"sigs.k8s.io/controller-runtime/pkg/reconcile"
"sigs.k8s.io/controller-runtime/pkg/source"
)
var logcr = ctrl.Log.WithName("cert-rotation")
const (
rotationCheckFrequency = 12 * time.Hour
lookaheadInterval = 90 * rotationCheckFrequency
certValidityDuration = 10 * 365 * 24 * time.Hour
bits = 2048
CACertKey = "ca.crt"
)
// WebhookType it the type of webhook, either validating/mutating webhook or a CRD conversion webhook
type WebhookType int
const (
// ValidatingWebhook indicates the webhook is a ValidatingWebhook
Validating WebhookType = iota
// MutingWebhook indicates the webhook is a MutatingWebhook
Mutating
// CRDConversionWebhook indicates the webhook is a conversion webhook
CRDConversion
)
// AddRotator adds the CertRotator and ReconcileWH to the manager.
func AddRotator(mgr manager.Manager, cr *CertRotator) error {
// add a new namespace-scoped cache.Cache to the provided manager
c, err := cache.New(mgr.GetConfig(), cache.Options{
Scheme: mgr.GetScheme(),
Mapper: mgr.GetRESTMapper(),
Namespace: cr.CaKey.Namespace,
})
if err = mgr.Add(c); err != nil {
return err
}
cr.reader = c
cr.writer = mgr.GetClient()
cr.wasCAInjected = new(atomic.Value)
cr.caNotInjected = make(chan struct{})
if err = mgr.Add(cr); err != nil {
return err
}
r := &ReconcileWH{
Client: mgr.GetClient(),
secretKey: cr.CaKey,
webhooks: cr.Webhooks,
wasCAInjected: cr.wasCAInjected,
}
// Create a new controller
reconciler, err := controller.NewUnmanaged("cert-rotator", mgr, controller.Options{Reconciler: r})
if err != nil {
return err
}
err = reconciler.Watch(source.NewKindWithCache(&corev1.Secret{}, c),
&handler.EnqueueRequestForObject{})
if err != nil {
return err
}
for _, wh := range cr.Webhooks {
obj := new(unstructured.Unstructured)
obj.SetGroupVersionKind(wh.GVK())
err = reconciler.Watch(source.NewKindWithCache(obj, c),
handler.EnqueueRequestsFromMapFunc(func(object client.Object) []reconcile.Request {
if object.GetName() != wh.Name {
return nil
}
return []reconcile.Request{{NamespacedName: r.secretKey}}
}),
)
if err != nil {
return err
}
}
return mgr.Add(&controllerWrapper{Controller: reconciler})
}
type controllerWrapper struct {
controller.Controller
needLeaderElection bool
}
func (cw controllerWrapper) NeedLeaderElection() bool {
return cw.needLeaderElection
}
var _ manager.Runnable = &CertRotator{}
// WebhookInfo is used by the certmgr to receive info about resources to be updated with certificates
type WebhookInfo struct {
//Name is the name of the webhook for a validating or mutating webhook, or the CRD name in case of a CRD conversion webhook
Name string
Type WebhookType
}
func (w WebhookInfo) GVK() schema.GroupVersionKind {
t2g := map[WebhookType]schema.GroupVersionKind{
Validating: {Group: "admissionregistration.k8s.io", Version: "v1", Kind: "ValidatingWebhookConfiguration"},
Mutating: {Group: "admissionregistration.k8s.io", Version: "v1", Kind: "MutatingWebhookConfiguration"},
CRDConversion: {Group: "apiextensions.k8s.io", Version: "v1", Kind: "CustomResourceDefinition"},
}
return t2g[w.Type]
}
// SyncingSource is a reader that needs syncing prior to being usable.
type SyncingReader interface {
client.Reader
WaitForCacheSync(ctx context.Context) bool
}
// CertRotator contains cert artifacts and a channel to close when the certs are ready.
type CertRotator struct {
reader SyncingReader
writer client.Writer
CaKey types.NamespacedName
DNSName string
CertDir string
Webhooks []WebhookInfo
IsReady chan struct{}
wasCAInjected *atomic.Value
caNotInjected chan struct{}
// RequireLeaderElection should be set to true if the CertRotator needs to
// be run in the leader election mode.
RequireLeaderElection bool
}
func (cr *CertRotator) NeedLeaderElection() bool {
return cr.RequireLeaderElection
}
// Start starts the CertRotator runnable to rotate certs and ensure the certs are ready.
func (cr *CertRotator) Start(ctx context.Context) error {
// explicitly rotate on the first round so that the certificate
// can be bootstrapped, otherwise manager exits before a cert can be written
log.Info("starting cert certmgr controller")
if !cr.reader.WaitForCacheSync(ctx) {
return errors.New("failed waiting for reader to sync")
}
if err := cr.refreshCertIfNeeded(ctx); err != nil {
log.Error(err, "could not refresh cert on startup")
return err
}
// Once the certs are ready, close the channel.
go cr.ensureReady()
ticker := time.NewTicker(rotationCheckFrequency)
defer ticker.Stop()
for {
select {
case <-ticker.C:
if err := cr.refreshCertIfNeeded(ctx); err != nil {
log.Error(err, "error rotating certs")
}
case <-cr.caNotInjected:
return errors.New("could not inject certs to webhooks")
case <-ctx.Done():
log.Info("stopping cert certmgr controller")
return nil
}
}
}
// refreshCertIfNeeded returns whether there's any error when refreshing the certs if needed.
func (cr *CertRotator) refreshCertIfNeeded(ctx context.Context) error {
refreshFn := func() (bool, error) {
lookahead := time.Now().Add(lookaheadInterval)
caKey, err := cr.refreshCA(ctx, lookahead)
if err != nil {
return false, err
}
if err = cr.refreshFile(ctx, caKey, lookahead); err != nil {
return false, err
}
return true, nil
}
if err := wait.ExponentialBackoff(wait.Backoff{
Duration: 10 * time.Millisecond,
Factor: 2,
Jitter: 1,
Steps: 10,
}, refreshFn); err != nil {
return err
}
return nil
}
// refreshCA creates the self-signed CA cert and private key that will
// be used to sign the server certificate
func (cr *CertRotator) refreshCA(ctx context.Context, lookahead time.Time) (*KeyPair, error) {
var ca = new(corev1.Secret)
caFile := filepath.Join(cr.CertDir, CACertKey)
if err := cr.reader.Get(ctx, cr.CaKey, ca); err != nil {
if !apierrors.IsNotFound(err) {
return nil, fmt.Errorf("acquiring ca to update certificates %w", err)
}
ca.Name = cr.CaKey.Name
ca.Namespace = cr.CaKey.Namespace
} else if caKey, err := loadCert(ca); err == nil {
// 已存在证书 验证是否有效
if err = validCert(ca, caKey.Cert, lookahead); err == nil {
if _, err = os.Stat(caFile); os.IsNotExist(err) {
if err = os.WriteFile(caFile, ca.Data[corev1.TLSCertKey], os.ModePerm); err != nil {
log.Error(err, "could not write ca", "path", caFile)
return nil, err
}
}
return caKey, nil
}
}
log.Info("refreshing ca", "name", cr.CaKey)
if err := cr.refreshCerts(ctx, nil, ca); err != nil {
log.Error(err, "could not refresh ca", "name", cr.CaKey)
return nil, err
}
if err := os.WriteFile(caFile, ca.Data[corev1.TLSCertKey], os.ModePerm); err != nil {
log.Error(err, "could not write ca", "path", caFile)
return nil, err
}
log.Info("server ca refreshed", "name", cr.CaKey, "file", caFile)
return loadCert(ca)
}
func (cr *CertRotator) refreshFile(ctx context.Context, ca *KeyPair, lookahead time.Time) error {
certFile := filepath.Join(cr.CertDir, corev1.TLSCertKey)
keyFile := filepath.Join(cr.CertDir, corev1.TLSPrivateKeyKey)
crt, err := tls.LoadX509KeyPair(certFile, keyFile)
if err == nil {
certDer, err := x509.ParseCertificate(crt.Certificate[0])
if err != nil {
return err
}
caPool := x509.NewCertPool()
caPool.AddCert(ca.Cert)
_, err = certDer.Verify(x509.VerifyOptions{
DNSName: cr.DNSName,
Roots: caPool,
CurrentTime: lookahead,
})
if err == nil {
return nil
}
}
certBytes, key, err := CreateCertPEM(ca, cr.DNSName)
if err != nil {
return err
}
if err = os.WriteFile(certFile, certBytes, os.ModePerm); err != nil {
return err
}
if err = os.WriteFile(keyFile, key, os.ModePerm); err != nil {
return err
}
log.Info("refresh cert complete", "cert", certFile, "key", keyFile)
return nil
}
func (cr *CertRotator) refreshCerts(ctx context.Context, ca *KeyPair, s *corev1.Secret, hosts ...string) error {
cert, key, err := CreateCertPEM(ca, hosts...)
if err != nil {
return err
}
// updateSecret
s.Data = map[string][]byte{
corev1.TLSCertKey: cert,
corev1.TLSPrivateKeyKey: key,
}
if !s.CreationTimestamp.IsZero() {
return cr.writer.Update(ctx, s)
}
s.Type = corev1.SecretTypeTLS
return cr.writer.Create(ctx, s)
}
// ensureReady ensure the cert files exist and the CAs are injected.
func (cr *CertRotator) ensureReady() {
checkFn := func() (bool, error) {
certFile := cr.CertDir + "/" + corev1.TLSCertKey
_, err := os.Stat(certFile)
if err != nil {
return false, nil
}
return cr.wasCAInjected.Load() != nil, nil
}
if err := wait.ExponentialBackoff(wait.Backoff{
Duration: 1 * time.Second,
Factor: 2,
Jitter: 1,
Steps: 10,
}, checkFn); err != nil {
log.Error(err, "max retries for checking CA injection")
close(cr.caNotInjected)
return
}
log.Info("CA certs are injected to webhooks")
close(cr.IsReady)
}
// KeyPair stores cert artifacts.
type KeyPair struct {
Cert *x509.Certificate
Key crypto.PrivateKey
}
func loadCert(s *corev1.Secret) (*KeyPair, error) {
c, err := tls.X509KeyPair(s.Data[corev1.TLSCertKey], s.Data[corev1.TLSPrivateKeyKey])
if err != nil {
return nil, err
}
certDer, err := x509.ParseCertificate(c.Certificate[0])
if err != nil {
return nil, err
}
return &KeyPair{
Key: c.PrivateKey,
Cert: certDer,
}, nil
}
func validCert(s *corev1.Secret, ca *x509.Certificate, lookahead time.Time) error {
caPool := x509.NewCertPool()
caPool.AddCert(ca)
c, err := tls.X509KeyPair(s.Data[corev1.TLSCertKey], s.Data[corev1.TLSPrivateKeyKey])
if err != nil {
return err
}
certDer, err := x509.ParseCertificate(c.Certificate[0])
if err != nil {
return err
}
_, err = certDer.Verify(x509.VerifyOptions{
Roots: caPool,
CurrentTime: lookahead,
})
return err
}
// CreateCertPEM takes the results of CreateCACert and uses it to create the
// PEM-encoded public certificate and private key, respectively
func CreateCertPEM(ca *KeyPair, hosts ...string) ([]byte, []byte, error) {
privKey, err := rsa.GenerateKey(rand.Reader, bits)
if err != nil {
return nil, nil, err
}
validFrom := time.Now().Add(-time.Hour) // valid an hour earlier to avoid flakes due to clock skew
tpl := &x509.Certificate{
NotBefore: validFrom,
NotAfter: validFrom.Add(certValidityDuration),
KeyUsage: x509.KeyUsageDigitalSignature | x509.KeyUsageKeyEncipherment | x509.KeyUsageCertSign,
BasicConstraintsValid: true,
IsCA: true,
}
max := new(big.Int).Lsh(big.NewInt(1), 128)
if tpl.SerialNumber, err = rand.Int(rand.Reader, max); err != nil {
return nil, nil, err
}
for _, h := range hosts {
if ip := net.ParseIP(h); ip != nil {
tpl.IPAddresses = append(tpl.IPAddresses, ip)
} else {
tpl.DNSNames = append(tpl.DNSNames, h)
}
}
if ca == nil {
ca = &KeyPair{
Key: privKey,
Cert: tpl,
}
} else if len(hosts) > 0 {
tpl.Subject.CommonName = hosts[0]
}
derBytes, err := x509.CreateCertificate(rand.Reader, tpl, ca.Cert, privKey.Public(), ca.Key)
if err != nil {
return nil, nil, err
}
// pemEncode takes a certificate and encodes it as PEM
certBuf := &bytes.Buffer{}
if err := pem.Encode(certBuf, &pem.Block{Type: "CERTIFICATE", Bytes: derBytes}); err != nil {
return nil, nil, fmt.Errorf("encoding cert %w", err)
}
keyBuf := &bytes.Buffer{}
if err := pem.Encode(keyBuf, &pem.Block{Type: "RSA PRIVATE KEY", Bytes: x509.MarshalPKCS1PrivateKey(privKey)}); err != nil {
return nil, nil, fmt.Errorf("encoding key %w", err)
}
return certBuf.Bytes(), keyBuf.Bytes(), nil
}
// ReconcileWH reconciles a validatingwebhookconfiguration, making sure it
// has the appropriate CA cert
type ReconcileWH struct {
client.Client
secretKey types.NamespacedName
webhooks []WebhookInfo
wasCAInjected *atomic.Value
}
// +kubebuilder:rbac:groups="",resources=secrets,verbs=create;get;list;watch;update
// +kubebuilder:rbac:groups="admissionregistration.k8s.io",resources=validatingwebhookconfigurations;mutatingwebhookconfigurations,verbs=list;watch
// +kubebuilder:rbac:groups="admissionregistration.k8s.io",resources=validatingwebhookconfigurations,resourceNames=monitor-validating-webhook-configuration,verbs=get;update
// +kubebuilder:rbac:groups="admissionregistration.k8s.io",resources=mutatingwebhookconfigurations,resourceNames=monitor-mutating-webhook-configuration,verbs=get;update
// +kubebuilder:rbac:groups="apiextensions.k8s.io",resources=customresourcedefinitions,verbs=list;watch
// Reconcile reads that state of the cluster for a validatingwebhookconfiguration
// object and makes sure the most recent CA cert is included
func (r *ReconcileWH) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) {
if req.NamespacedName != r.secretKey {
return ctrl.Result{}, nil
}
var secret corev1.Secret
if err := r.Get(ctx, req.NamespacedName, &secret); err != nil {
if apierrors.IsNotFound(err) {
// Object not found, return. Created objects are automatically garbage collected.
// For additional cleanup logic use finalizers.
return ctrl.Result{}, nil
}
// Error reading the object - requeue the request.
return ctrl.Result{Requeue: true}, err
}
if secret.GetDeletionTimestamp().IsZero() {
caPem, ok := secret.Data[corev1.TLSCertKey]
if !ok {
return ctrl.Result{}, nil
}
// Ensure certs on webhooks
if err := r.ensureCerts(ctx, caPem); err != nil {
return ctrl.Result{}, err
}
// Set CAInjected if the reconciler has not exited early.
r.wasCAInjected.Store(true)
}
return ctrl.Result{}, nil
}
// ensureCerts returns an arbitrary error if multiple errors are encountered,
// while all the errors are logged.
// This is important to allow the controller to reconcile the secret. If an error
// is returned, request will be requeued, and the controller will attempt to reconcile
// the secret again.
// When an error is encountered for when processing a webhook, the error is logged, but
// following webhooks are also attempted to be updated. If multiple errors occur for different
// webhooks, only the last one will be returned. This is ok, as the returned error is only meant
// to indicate that reconciliation failed. The information about all the errors is passed not
// by the returned error, but rather in the logged errors.
func (r *ReconcileWH) ensureCerts(ctx context.Context, certPem []byte) error {
encodedPem := base64.StdEncoding.EncodeToString(certPem)
for _, wh := range r.webhooks {
obj := new(unstructured.Unstructured)
obj.SetGroupVersionKind(wh.GVK())
err := r.Get(ctx, types.NamespacedName{Name: wh.Name}, obj)
if err != nil {
log.Error(err, "Webhook not found. Unable to update certificate.")
continue
}
if !obj.GetDeletionTimestamp().IsZero() {
log.Info("Webhook is being deleted. Unable to update certificate")
continue
}
log.Info("Ensuring CA cert", "name", wh.Name, "GVK", wh.GVK())
switch wh.Type {
case Validating, Mutating:
err = injectCertToWebhook(obj, encodedPem)
case CRDConversion:
err = injectCertToConversionWebhook(obj, encodedPem)
default:
return fmt.Errorf("incorrect webhook type `%v`", wh.Type)
}
if err != nil {
log.Error(err, "Unable to inject cert to webhook.")
continue
}
if err = r.Update(ctx, obj); err != nil {
log.Error(err, "Error updating webhook with certificate")
}
}
return nil
}
func injectCertToWebhook(wh *unstructured.Unstructured, certPem string) error {
webhooks, found, err := unstructured.NestedSlice(wh.Object, "webhooks")
if err != nil {
return err
}
if !found {
return fmt.Errorf("`webhooks` field not found in ValidatingWebhookConfiguration")
}
for i, h := range webhooks {
hook, ok := h.(map[string]interface{})
if !ok {
return fmt.Errorf("webhook %d is not well-formed", i)
}
if err = unstructured.SetNestedField(hook, certPem, "clientConfig", "caBundle"); err != nil {
return err
}
webhooks[i] = hook
}
return unstructured.SetNestedSlice(wh.Object, webhooks, "webhooks")
}
func injectCertToConversionWebhook(crd *unstructured.Unstructured, certPem string) error {
_, found, err := unstructured.NestedMap(crd.Object, "spec", "conversion", "webhook", "clientConfig")
if err != nil {
return err
}
if !found {
return fmt.Errorf("`clientConfig` field not found in CustomResourceDefinition `%s`", crd.GetName())
}
return unstructured.SetNestedField(crd.Object, certPem, "spec", "conversion", "webhook", "clientConfig", "caBundle")
}