From 0a0fc905a60b98e189cbc0992bbb3b650a924270 Mon Sep 17 00:00:00 2001 From: "Jonathan A. Sternberg" Date: Wed, 1 Apr 2026 15:44:51 -0500 Subject: [PATCH] feat: add persistent storage options to k8s driver The k8s driver now supports the `persistent-volume-claim.requests.storage` option. Setting this option changes the deployment into a statefulset and creates a persistent volume claim where the buildkit data is stored. Signed-off-by: Jonathan A. Sternberg --- driver/kubernetes/driver.go | 182 +++++++++---- driver/kubernetes/factory.go | 17 +- driver/kubernetes/kubeclient/client.go | 62 ++++- driver/kubernetes/manifest/manifest.go | 281 ++++++++++++++------- driver/kubernetes/podchooser/podchooser.go | 34 ++- 5 files changed, 409 insertions(+), 167 deletions(-) diff --git a/driver/kubernetes/driver.go b/driver/kubernetes/driver.go index c0dc40f53b2c..fdae382b6f1e 100644 --- a/driver/kubernetes/driver.go +++ b/driver/kubernetes/driver.go @@ -44,15 +44,17 @@ type Driver struct { // if you add fields, remember to update docs: // https://github.com/docker/docs/blob/main/content/build/drivers/kubernetes.md - minReplicas int - deployment *appsv1.Deployment - configMaps []*corev1.ConfigMap - deploymentClient kubeclient.DeploymentClient - podClient kubeclient.PodClient - configMapClient kubeclient.ConfigMapClient - podChooser podchooser.PodChooser - defaultLoad bool - timeout time.Duration + minReplicas int + deployment *appsv1.Deployment + statefulSet *appsv1.StatefulSet + configMaps []*corev1.ConfigMap + deploymentClient kubeclient.DeploymentClient + statefulSetClient kubeclient.StatefulSetClient + podClient kubeclient.PodClient + configMapClient kubeclient.ConfigMapClient + podChooser podchooser.PodChooser + defaultLoad bool + timeout time.Duration } func (d *Driver) IsMobyDriver() bool { @@ -65,31 +67,18 @@ func (d *Driver) Config() driver.InitConfig { func (d *Driver) Bootstrap(ctx context.Context, l progress.Logger) error { return progress.Wrap("[internal] booting buildkit", l, func(sub progress.SubLogger) error { - _, err := d.deploymentClient.Get(ctx, d.deployment.Name, metav1.GetOptions{}) - if err != nil { - if !apierrors.IsNotFound(err) { - return errors.Wrapf(err, "error for bootstrap %q", d.deployment.Name) - } - - for _, cfg := range d.configMaps { - // create ConfigMap first if exists - _, err = d.configMapClient.Create(ctx, cfg, metav1.CreateOptions{}) - if err != nil { - if !apierrors.IsAlreadyExists(err) { - return errors.Wrapf(err, "error while calling configMapClient.Create for %q", cfg.Name) - } - _, err = d.configMapClient.Update(ctx, cfg, metav1.UpdateOptions{}) - if err != nil { - return errors.Wrapf(err, "error while calling configMapClient.Update for %q", cfg.Name) - } - } + if d.deployment != nil { + if err := bootstrap(ctx, d, d.deploymentClient, d.deployment.Name, d.deployment); err != nil { + return err } + } - _, err = d.deploymentClient.Create(ctx, d.deployment, metav1.CreateOptions{}) - if err != nil { - return errors.Wrapf(err, "error while calling deploymentClient.Create for %q", d.deployment.Name) + if d.statefulSet != nil { + if err := bootstrap(ctx, d, d.statefulSetClient, d.statefulSet.Name, d.statefulSet); err != nil { + return err } } + return sub.Wrap( fmt.Sprintf("waiting for %d pods to be ready, timeout: %s", d.minReplicas, units.HumanDuration(d.timeout)), func() error { @@ -98,11 +87,76 @@ func (d *Driver) Bootstrap(ctx context.Context, l progress.Logger) error { }) } +type appClient[S any] interface { + Get(ctx context.Context, name string, opts metav1.GetOptions) (*S, error) + Create(ctx context.Context, spec *S, opts metav1.CreateOptions) (*S, error) + Delete(ctx context.Context, name string, opts metav1.DeleteOptions) error +} + +func bootstrap[S any](ctx context.Context, d *Driver, client appClient[S], name string, spec *S) error { + if _, err := client.Get(ctx, name, metav1.GetOptions{}); err != nil { + if !apierrors.IsNotFound(err) { + return errors.Wrapf(err, "error for bootstrap %q", name) + } + + for _, cfg := range d.configMaps { + // create ConfigMap first if exists + if _, err = d.configMapClient.Create(ctx, cfg, metav1.CreateOptions{}); err != nil { + if !apierrors.IsAlreadyExists(err) { + return errors.Wrapf(err, "error while calling configMapClient.Create for %q", cfg.Name) + } + + if _, err = d.configMapClient.Update(ctx, cfg, metav1.UpdateOptions{}); err != nil { + return errors.Wrapf(err, "error while calling configMapClient.Update for %q", cfg.Name) + } + } + } + + if _, err = client.Create(ctx, spec, metav1.CreateOptions{}); err != nil { + return errors.Wrapf(err, "error while calling Create for %q", d.deployment.Name) + } + } + return nil +} + func (d *Driver) wait(ctx context.Context) error { + if d.deployment != nil { + if err := d.waitDeployments(ctx); err != nil { + return err + } + } + + if d.statefulSet != nil { + if err := d.waitStatefulSets(ctx); err != nil { + return err + } + } + return nil +} + +func (d *Driver) waitDeployments(ctx context.Context) error { + return wait(ctx, d, d.deploymentClient, d.deployment.Name, func(s *appsv1.Deployment) error { + if s.Status.ReadyReplicas < int32(d.minReplicas) { + return errors.Errorf("expected %d replicas to be ready, got %d", d.minReplicas, s.Status.ReadyReplicas) + } + return nil + }) +} + +func (d *Driver) waitStatefulSets(ctx context.Context) error { + return wait(ctx, d, d.statefulSetClient, d.statefulSet.Name, func(s *appsv1.StatefulSet) error { + if s.Status.ReadyReplicas < int32(d.minReplicas) { + return errors.Errorf("expected %d replicas to be ready, got %d", d.minReplicas, s.Status.ReadyReplicas) + } + return nil + }) +} + +func wait[S any](ctx context.Context, d *Driver, client appClient[S], name string, check func(*S) error) error { // TODO: use watch API var ( err error - depl *appsv1.Deployment + spec *S ) timeoutChan := time.After(d.timeout) @@ -116,31 +170,50 @@ func (d *Driver) wait(ctx context.Context) error { case <-timeoutChan: return err case <-ticker.C: - depl, err = d.deploymentClient.Get(ctx, d.deployment.Name, metav1.GetOptions{}) + spec, err = client.Get(ctx, name, metav1.GetOptions{}) if err == nil { - if depl.Status.ReadyReplicas >= int32(d.minReplicas) { + if err = check(spec); err == nil { return nil } - err = errors.Errorf("expected %d replicas to be ready, got %d", d.minReplicas, depl.Status.ReadyReplicas) } } } } -func (d *Driver) Info(ctx context.Context) (*driver.Info, error) { - depl, err := d.deploymentClient.Get(ctx, d.deployment.Name, metav1.GetOptions{}) - if err != nil { - // TODO: return err if err != ErrNotFound - return &driver.Info{ - Status: driver.Inactive, - }, nil +func (d *Driver) Info(ctx context.Context) (_ *driver.Info, err error) { + var depl *appsv1.Deployment + if d.deployment != nil { + depl, err = d.deploymentClient.Get(ctx, d.deployment.Name, metav1.GetOptions{}) + if err != nil { + // TODO: return err if err != ErrNotFound + return &driver.Info{ + Status: driver.Inactive, + }, nil + } + if depl.Status.ReadyReplicas <= 0 { + return &driver.Info{ + Status: driver.Stopped, + }, nil + } } - if depl.Status.ReadyReplicas <= 0 { - return &driver.Info{ - Status: driver.Stopped, - }, nil + + var stat *appsv1.StatefulSet + if d.statefulSet != nil { + stat, err = d.statefulSetClient.Get(ctx, d.statefulSet.Name, metav1.GetOptions{}) + if err != nil { + // TODO: return err if err != ErrNotFound + return &driver.Info{ + Status: driver.Inactive, + }, nil + } + if depl.Status.ReadyReplicas <= 0 { + return &driver.Info{ + Status: driver.Stopped, + }, nil + } } - pods, err := podchooser.ListRunningPods(ctx, d.podClient, depl) + + pods, err := podchooser.ListRunningPods(ctx, d.podClient, depl, stat) if err != nil { return nil, err } @@ -182,11 +255,22 @@ func (d *Driver) Rm(ctx context.Context, force, rmVolume, rmDaemon bool) error { return nil } - if err := d.deploymentClient.Delete(ctx, d.deployment.Name, metav1.DeleteOptions{}); err != nil { - if !apierrors.IsNotFound(err) { - return errors.Wrapf(err, "error while calling deploymentClient.Delete for %q", d.deployment.Name) + if d.deployment != nil { + if err := d.deploymentClient.Delete(ctx, d.deployment.Name, metav1.DeleteOptions{}); err != nil { + if !apierrors.IsNotFound(err) { + return errors.Wrapf(err, "error while calling deploymentClient.Delete for %q", d.deployment.Name) + } } } + + if d.statefulSet != nil { + if err := d.statefulSetClient.Delete(ctx, d.statefulSet.Name, metav1.DeleteOptions{}); err != nil { + if !apierrors.IsNotFound(err) { + return errors.Wrapf(err, "error while calling statefulSetClient.Delete for %q", d.statefulSet.Name) + } + } + } + for _, cfg := range d.configMaps { if err := d.configMapClient.Delete(ctx, cfg.Name, metav1.DeleteOptions{}); err != nil { if !apierrors.IsNotFound(err) { diff --git a/driver/kubernetes/factory.go b/driver/kubernetes/factory.go index c6f50f15a835..284e6ab3946e 100644 --- a/driver/kubernetes/factory.go +++ b/driver/kubernetes/factory.go @@ -131,7 +131,7 @@ func (f *factory) New(ctx context.Context, cfg driver.InitConfig) (driver.Driver d.defaultLoad = defaultLoad d.timeout = timeout - d.deployment, d.configMaps, err = manifest.NewDeployment(deploymentOpt) + d.deployment, d.statefulSet, d.configMaps, err = manifest.NewDeployment(deploymentOpt) if err != nil { return nil, err } @@ -143,20 +143,23 @@ func (f *factory) New(ctx context.Context, cfg driver.InitConfig) (driver.Driver return nil, err } d.deploymentClient = clients.Deployments + d.statefulSetClient = clients.StatefulSets d.podClient = clients.Pods d.configMapClient = clients.ConfigMaps switch loadbalance { case LoadbalanceSticky: d.podChooser = &podchooser.StickyPodChooser{ - Key: cfg.ContextPathHash, - PodClient: d.podClient, - Deployment: d.deployment, + Key: cfg.ContextPathHash, + PodClient: d.podClient, + Deployment: d.deployment, + StatefulSet: d.statefulSet, } case LoadbalanceRandom: d.podChooser = &podchooser.RandomPodChooser{ - PodClient: d.podClient, - Deployment: d.deployment, + PodClient: d.podClient, + Deployment: d.deployment, + StatefulSet: d.statefulSet, } } return d, nil @@ -199,6 +202,8 @@ func (f *factory) processDriverOpts(deploymentName string, namespace string, cfg deploymentOpt.RequestsMemory = v case k == "requests.ephemeral-storage": deploymentOpt.RequestsEphemeralStorage = v + case k == "persistent-volume-claim.requests.storage": + deploymentOpt.RequestsPersistentStorage = v case k == "limits.cpu": deploymentOpt.LimitsCPU = v case k == "limits.memory": diff --git a/driver/kubernetes/kubeclient/client.go b/driver/kubernetes/kubeclient/client.go index 83e17686516c..c52c18ecac74 100644 --- a/driver/kubernetes/kubeclient/client.go +++ b/driver/kubernetes/kubeclient/client.go @@ -17,6 +17,12 @@ type DeploymentClient interface { Delete(ctx context.Context, name string, opts metav1.DeleteOptions) error } +type StatefulSetClient interface { + Get(ctx context.Context, name string, opts metav1.GetOptions) (*appsv1.StatefulSet, error) + Create(ctx context.Context, deployment *appsv1.StatefulSet, opts metav1.CreateOptions) (*appsv1.StatefulSet, error) + Delete(ctx context.Context, name string, opts metav1.DeleteOptions) error +} + type ConfigMapClient interface { Create(ctx context.Context, configMap *corev1.ConfigMap, opts metav1.CreateOptions) (*corev1.ConfigMap, error) Update(ctx context.Context, configMap *corev1.ConfigMap, opts metav1.UpdateOptions) (*corev1.ConfigMap, error) @@ -29,9 +35,10 @@ type PodClient interface { } type Clients struct { - Deployments DeploymentClient - ConfigMaps ConfigMapClient - Pods PodClient + Deployments DeploymentClient + StatefulSets StatefulSetClient + ConfigMaps ConfigMapClient + Pods PodClient } func New(config *rest.Config, namespace string) (*Clients, error) { @@ -51,9 +58,10 @@ func New(config *rest.Config, namespace string) (*Clients, error) { } return &Clients{ - Deployments: &deploymentClient{client: appsClient, namespace: namespace}, - ConfigMaps: &configMapClient{client: coreClient, namespace: namespace}, - Pods: &podClient{client: coreClient, namespace: namespace}, + Deployments: &deploymentClient{client: appsClient, namespace: namespace}, + StatefulSets: &statefulSetClient{client: appsClient, namespace: namespace}, + ConfigMaps: &configMapClient{client: coreClient, namespace: namespace}, + Pods: &podClient{client: coreClient, namespace: namespace}, }, nil } @@ -110,6 +118,48 @@ func (c *deploymentClient) Delete(ctx context.Context, name string, opts metav1. Error() } +type statefulSetClient struct { + client rest.Interface + namespace string +} + +func (c *statefulSetClient) Get(ctx context.Context, name string, opts metav1.GetOptions) (*appsv1.StatefulSet, error) { + result := &appsv1.StatefulSet{} + err := c.client.Get(). + UseProtobufAsDefault(). + Namespace(c.namespace). + Resource("statefulsets"). + Name(name). + VersionedParams(&opts, ParameterCodec()). + Do(ctx). + Into(result) + return result, err +} + +func (c *statefulSetClient) Create(ctx context.Context, statefulSet *appsv1.StatefulSet, opts metav1.CreateOptions) (*appsv1.StatefulSet, error) { + result := &appsv1.StatefulSet{} + err := c.client.Post(). + UseProtobufAsDefault(). + Namespace(c.namespace). + Resource("statefulsets"). + VersionedParams(&opts, ParameterCodec()). + Body(statefulSet). + Do(ctx). + Into(result) + return result, err +} + +func (c *statefulSetClient) Delete(ctx context.Context, name string, opts metav1.DeleteOptions) error { + return c.client.Delete(). + UseProtobufAsDefault(). + Namespace(c.namespace). + Resource("statefulsets"). + Name(name). + Body(&opts). + Do(ctx). + Error() +} + type configMapClient struct { client rest.Interface namespace string diff --git a/driver/kubernetes/manifest/manifest.go b/driver/kubernetes/manifest/manifest.go index 931cb48f6a66..8199df000087 100644 --- a/driver/kubernetes/manifest/manifest.go +++ b/driver/kubernetes/manifest/manifest.go @@ -32,28 +32,36 @@ type DeploymentOpt struct { // files mounted at /etc/buildkitd ConfigFiles map[string][]byte - BuildKitRootVolumeMemory string - Rootless bool - NodeSelector map[string]string - CustomAnnotations map[string]string - CustomLabels map[string]string - Tolerations []corev1.Toleration - RequestsCPU string - RequestsMemory string - RequestsEphemeralStorage string - LimitsCPU string - LimitsMemory string - LimitsEphemeralStorage string - Platforms []ocispecs.Platform - Env []corev1.EnvVar // injected into main buildkitd container + BuildKitRootVolumeMemory string + Rootless bool + NodeSelector map[string]string + CustomAnnotations map[string]string + CustomLabels map[string]string + Tolerations []corev1.Toleration + RequestsCPU string + RequestsMemory string + RequestsEphemeralStorage string + RequestsPersistentStorage string + LimitsCPU string + LimitsMemory string + LimitsEphemeralStorage string + Platforms []ocispecs.Platform + Env []corev1.EnvVar // injected into main buildkitd container } const ( - containerName = "buildkitd" - AnnotationPlatform = "buildx.docker.com/platform" - LabelApp = "app" - rootVolumeName = "buildkit-memory" - rootVolumePath = "/var/lib/buildkit" + containerName = "buildkitd" + AnnotationPlatform = "buildx.docker.com/platform" + LabelApp = "app" + rootVolumeName = "buildkit-memory" + rootVolumePath = "/var/lib/buildkit" + persistentVolumeClaimName = "buildkitd" + + probeFailureThreshold = 3 + probeInitialDelaySeconds = 5 + probePeriodSeconds = 30 + probeSuccessThreshold = 1 + probeTimeoutSeconds = 60 ) type ErrReservedAnnotationPlatform struct{} @@ -68,7 +76,7 @@ func (ErrReservedLabelApp) Error() string { return fmt.Sprintf("the label %q is reserved and cannot be customized", LabelApp) } -func NewDeployment(opt *DeploymentOpt) (d *appsv1.Deployment, c []*corev1.ConfigMap, err error) { +func NewDeployment(opt *DeploymentOpt) (d *appsv1.Deployment, s *appsv1.StatefulSet, c []*corev1.ConfigMap, err error) { labels := map[string]string{ LabelApp: opt.Name, } @@ -83,67 +91,75 @@ func NewDeployment(opt *DeploymentOpt) (d *appsv1.Deployment, c []*corev1.Config for k, v := range opt.CustomAnnotations { if k == AnnotationPlatform { - return nil, nil, ErrReservedAnnotationPlatform{} + return nil, nil, nil, ErrReservedAnnotationPlatform{} } annotations[k] = v } for k, v := range opt.CustomLabels { if k == LabelApp { - return nil, nil, ErrReservedLabelApp{} + return nil, nil, nil, ErrReservedLabelApp{} } labels[k] = v } - d = &appsv1.Deployment{ - TypeMeta: metav1.TypeMeta{ - APIVersion: appsv1.SchemeGroupVersion.String(), - Kind: "Deployment", - }, + probeHandlerCommand := []string{"buildctl", "debug", "workers"} + podTemplate := corev1.PodTemplateSpec{ ObjectMeta: metav1.ObjectMeta{ - Namespace: opt.Namespace, - Name: opt.Name, Labels: labels, Annotations: annotations, }, - Spec: appsv1.DeploymentSpec{ - Replicas: &replicas, - Selector: &metav1.LabelSelector{ - MatchLabels: labels, - }, - Template: corev1.PodTemplateSpec{ - ObjectMeta: metav1.ObjectMeta{ - Labels: labels, - Annotations: annotations, - }, - Spec: corev1.PodSpec{ - ServiceAccountName: opt.ServiceAccountName, - SchedulerName: opt.SchedulerName, - Containers: []corev1.Container{ - { - Name: containerName, - Image: opt.Image, - Args: args, - SecurityContext: &corev1.SecurityContext{ - Privileged: &privileged, - }, - ReadinessProbe: &corev1.Probe{ - ProbeHandler: corev1.ProbeHandler{ - Exec: &corev1.ExecAction{ - Command: []string{"buildctl", "debug", "workers"}, - }, - }, + Spec: corev1.PodSpec{ + ServiceAccountName: opt.ServiceAccountName, + SchedulerName: opt.SchedulerName, + Containers: []corev1.Container{ + { + Name: containerName, + Image: opt.Image, + Args: args, + SecurityContext: &corev1.SecurityContext{ + Privileged: &privileged, + }, + ReadinessProbe: &corev1.Probe{ + ProbeHandler: corev1.ProbeHandler{ + Exec: &corev1.ExecAction{ + Command: probeHandlerCommand, }, - Resources: corev1.ResourceRequirements{ - Requests: corev1.ResourceList{}, - Limits: corev1.ResourceList{}, + }, + FailureThreshold: probeFailureThreshold, + InitialDelaySeconds: probeInitialDelaySeconds, + PeriodSeconds: probePeriodSeconds, + SuccessThreshold: probeSuccessThreshold, + TimeoutSeconds: probeTimeoutSeconds, + }, + LivenessProbe: &corev1.Probe{ + ProbeHandler: corev1.ProbeHandler{ + Exec: &corev1.ExecAction{ + Command: probeHandlerCommand, }, }, + FailureThreshold: probeFailureThreshold, + InitialDelaySeconds: probeInitialDelaySeconds, + PeriodSeconds: probePeriodSeconds, + SuccessThreshold: probeSuccessThreshold, + TimeoutSeconds: probeTimeoutSeconds, + }, + Resources: corev1.ResourceRequirements{ + Requests: corev1.ResourceList{}, + Limits: corev1.ResourceList{}, }, }, }, }, } + + meta := metav1.ObjectMeta{ + Namespace: opt.Namespace, + Name: opt.Name, + Labels: labels, + Annotations: annotations, + } + for _, cfg := range splitConfigFiles(opt.ConfigFiles) { cc := &corev1.ConfigMap{ TypeMeta: metav1.TypeMeta{ @@ -158,12 +174,12 @@ func NewDeployment(opt *DeploymentOpt) (d *appsv1.Deployment, c []*corev1.Config Data: cfg.files, } - d.Spec.Template.Spec.Containers[0].VolumeMounts = append(d.Spec.Template.Spec.Containers[0].VolumeMounts, corev1.VolumeMount{ + podTemplate.Spec.Containers[0].VolumeMounts = append(podTemplate.Spec.Containers[0].VolumeMounts, corev1.VolumeMount{ Name: cfg.name, MountPath: path.Join("/etc/buildkit", cfg.path), }) - d.Spec.Template.Spec.Volumes = append(d.Spec.Template.Spec.Volumes, corev1.Volume{ + podTemplate.Spec.Volumes = append(podTemplate.Spec.Volumes, corev1.Volume{ Name: cfg.name, VolumeSource: corev1.VolumeSource{ ConfigMap: &corev1.ConfigMapVolumeSource{ @@ -177,7 +193,7 @@ func NewDeployment(opt *DeploymentOpt) (d *appsv1.Deployment, c []*corev1.Config } if opt.Qemu.Install { - d.Spec.Template.Spec.InitContainers = []corev1.Container{ + podTemplate.Spec.InitContainers = []corev1.Container{ { Name: "qemu", Image: opt.Qemu.Image, @@ -190,73 +206,73 @@ func NewDeployment(opt *DeploymentOpt) (d *appsv1.Deployment, c []*corev1.Config } if opt.Rootless { - if err := toRootless(d); err != nil { - return nil, nil, err + if err := toRootless(&podTemplate); err != nil { + return nil, nil, nil, err } } if len(opt.NodeSelector) > 0 { - d.Spec.Template.Spec.NodeSelector = opt.NodeSelector + podTemplate.Spec.NodeSelector = opt.NodeSelector } if len(opt.Tolerations) > 0 { - d.Spec.Template.Spec.Tolerations = opt.Tolerations + podTemplate.Spec.Tolerations = opt.Tolerations } if opt.RequestsCPU != "" { reqCPU, err := resource.ParseQuantity(opt.RequestsCPU) if err != nil { - return nil, nil, err + return nil, nil, nil, err } - d.Spec.Template.Spec.Containers[0].Resources.Requests[corev1.ResourceCPU] = reqCPU + podTemplate.Spec.Containers[0].Resources.Requests[corev1.ResourceCPU] = reqCPU } if opt.RequestsMemory != "" { reqMemory, err := resource.ParseQuantity(opt.RequestsMemory) if err != nil { - return nil, nil, err + return nil, nil, nil, err } - d.Spec.Template.Spec.Containers[0].Resources.Requests[corev1.ResourceMemory] = reqMemory + podTemplate.Spec.Containers[0].Resources.Requests[corev1.ResourceMemory] = reqMemory } if opt.RequestsEphemeralStorage != "" { - reqEphemeralStorage, err := resource.ParseQuantity(opt.RequestsEphemeralStorage) + reqStorage, err := resource.ParseQuantity(opt.RequestsEphemeralStorage) if err != nil { - return nil, nil, err + return nil, nil, nil, err } - d.Spec.Template.Spec.Containers[0].Resources.Requests[corev1.ResourceEphemeralStorage] = reqEphemeralStorage + podTemplate.Spec.Containers[0].Resources.Limits[corev1.ResourceEphemeralStorage] = reqStorage } if opt.LimitsCPU != "" { limCPU, err := resource.ParseQuantity(opt.LimitsCPU) if err != nil { - return nil, nil, err + return nil, nil, nil, err } - d.Spec.Template.Spec.Containers[0].Resources.Limits[corev1.ResourceCPU] = limCPU + podTemplate.Spec.Containers[0].Resources.Limits[corev1.ResourceCPU] = limCPU } if opt.LimitsMemory != "" { limMemory, err := resource.ParseQuantity(opt.LimitsMemory) if err != nil { - return nil, nil, err + return nil, nil, nil, err } - d.Spec.Template.Spec.Containers[0].Resources.Limits[corev1.ResourceMemory] = limMemory + podTemplate.Spec.Containers[0].Resources.Limits[corev1.ResourceMemory] = limMemory } if opt.LimitsEphemeralStorage != "" { - limEphemeralStorage, err := resource.ParseQuantity(opt.LimitsEphemeralStorage) + limStorage, err := resource.ParseQuantity(opt.LimitsEphemeralStorage) if err != nil { - return nil, nil, err + return nil, nil, nil, err } - d.Spec.Template.Spec.Containers[0].Resources.Limits[corev1.ResourceEphemeralStorage] = limEphemeralStorage + podTemplate.Spec.Containers[0].Resources.Limits[corev1.ResourceEphemeralStorage] = limStorage } if opt.BuildKitRootVolumeMemory != "" { buildKitRootVolumeMemory, err := resource.ParseQuantity(opt.BuildKitRootVolumeMemory) if err != nil { - return nil, nil, err + return nil, nil, nil, err } - d.Spec.Template.Spec.Volumes = append(d.Spec.Template.Spec.Volumes, corev1.Volume{ + podTemplate.Spec.Volumes = append(podTemplate.Spec.Volumes, corev1.Volume{ Name: rootVolumeName, VolumeSource: corev1.VolumeSource{ EmptyDir: &corev1.EmptyDirVolumeSource{ @@ -265,45 +281,122 @@ func NewDeployment(opt *DeploymentOpt) (d *appsv1.Deployment, c []*corev1.Config }, }, }) - d.Spec.Template.Spec.Containers[0].VolumeMounts = append(d.Spec.Template.Spec.Containers[0].VolumeMounts, corev1.VolumeMount{ + podTemplate.Spec.Containers[0].VolumeMounts = append(podTemplate.Spec.Containers[0].VolumeMounts, corev1.VolumeMount{ Name: rootVolumeName, MountPath: rootVolumePath, }) } if len(opt.Env) > 0 { - d.Spec.Template.Spec.Containers[0].Env = append(d.Spec.Template.Spec.Containers[0].Env, opt.Env...) + podTemplate.Spec.Containers[0].Env = append(podTemplate.Spec.Containers[0].Env, opt.Env...) + } + + if opt.IsPersistentStorage() { + s = &appsv1.StatefulSet{ + TypeMeta: metav1.TypeMeta{ + APIVersion: appsv1.SchemeGroupVersion.String(), + Kind: "StatefulSet", + }, + ObjectMeta: meta, + Spec: appsv1.StatefulSetSpec{ + ServiceName: "buildkitd", + PodManagementPolicy: appsv1.ParallelPodManagement, + Replicas: &replicas, + Selector: &metav1.LabelSelector{ + MatchLabels: labels, + }, + Template: podTemplate, + }, + } + + if err := setupPersistentStorage(s, opt); err != nil { + return nil, nil, nil, err + } + } else { + d = &appsv1.Deployment{ + TypeMeta: metav1.TypeMeta{ + APIVersion: appsv1.SchemeGroupVersion.String(), + Kind: "Deployment", + }, + ObjectMeta: meta, + Spec: appsv1.DeploymentSpec{ + Replicas: &replicas, + Selector: &metav1.LabelSelector{ + MatchLabels: labels, + }, + Template: podTemplate, + }, + } } return } -func toRootless(d *appsv1.Deployment) error { - d.Spec.Template.Spec.Containers[0].Args = append( - d.Spec.Template.Spec.Containers[0].Args, +func (opt *DeploymentOpt) IsPersistentStorage() bool { + return opt.RequestsPersistentStorage != "" +} + +func setupPersistentStorage(s *appsv1.StatefulSet, opt *DeploymentOpt) error { + reqStorage, err := resource.ParseQuantity(opt.RequestsPersistentStorage) + if err != nil { + return err + } + + // Rootless already sets up the data mount. + if !opt.Rootless { + s.Spec.Template.Spec.Containers[0].VolumeMounts = []corev1.VolumeMount{ + { + Name: persistentVolumeClaimName, + ReadOnly: false, + MountPath: "/var/lib/buildkit", + }, + } + } + + s.Spec.VolumeClaimTemplates = []corev1.PersistentVolumeClaim{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: persistentVolumeClaimName, + }, + Spec: corev1.PersistentVolumeClaimSpec{ + AccessModes: []corev1.PersistentVolumeAccessMode{corev1.ReadWriteOnce}, + Resources: corev1.VolumeResourceRequirements{ + Requests: corev1.ResourceList{ + corev1.ResourceStorage: reqStorage, + }, + }, + }, + }, + } + return nil +} + +func toRootless(p *corev1.PodTemplateSpec) error { + p.Spec.Containers[0].Args = append( + p.Spec.Containers[0].Args, "--oci-worker-no-process-sandbox", ) - d.Spec.Template.Spec.Containers[0].SecurityContext = &corev1.SecurityContext{ + p.Spec.Containers[0].SecurityContext = &corev1.SecurityContext{ SeccompProfile: &corev1.SeccompProfile{ Type: corev1.SeccompProfileTypeUnconfined, }, } - if d.Spec.Template.Annotations == nil { - d.Spec.Template.Annotations = make(map[string]string, 1) + if p.Annotations == nil { + p.Annotations = make(map[string]string, 1) } - d.Spec.Template.Annotations["container.apparmor.security.beta.kubernetes.io/"+containerName] = "unconfined" + p.Annotations["container.apparmor.security.beta.kubernetes.io/"+containerName] = "unconfined" // Dockerfile has `VOLUME /home/user/.local/share/buildkit` by default too, // but the default VOLUME does not work with rootless on Google's Container-Optimized OS // as it is mounted with `nosuid,nodev`. // https://github.com/moby/buildkit/issues/879#issuecomment-1240347038 // https://github.com/moby/buildkit/pull/3097 - const emptyDirVolName = "buildkitd" - d.Spec.Template.Spec.Containers[0].VolumeMounts = append(d.Spec.Template.Spec.Containers[0].VolumeMounts, corev1.VolumeMount{ + const emptyDirVolName = persistentVolumeClaimName + p.Spec.Containers[0].VolumeMounts = append(p.Spec.Containers[0].VolumeMounts, corev1.VolumeMount{ Name: emptyDirVolName, MountPath: "/home/user/.local/share/buildkit", }) - d.Spec.Template.Spec.Volumes = append(d.Spec.Template.Spec.Volumes, corev1.Volume{ + p.Spec.Volumes = append(p.Spec.Volumes, corev1.Volume{ Name: emptyDirVolName, VolumeSource: corev1.VolumeSource{ EmptyDir: &corev1.EmptyDirVolumeSource{}, diff --git a/driver/kubernetes/podchooser/podchooser.go b/driver/kubernetes/podchooser/podchooser.go index 80275abe687b..feb87c30a19a 100644 --- a/driver/kubernetes/podchooser/podchooser.go +++ b/driver/kubernetes/podchooser/podchooser.go @@ -20,13 +20,14 @@ type PodChooser interface { } type RandomPodChooser struct { - RandSource rand.Source - PodClient kubeclient.PodClient - Deployment *appsv1.Deployment + RandSource rand.Source + PodClient kubeclient.PodClient + Deployment *appsv1.Deployment + StatefulSet *appsv1.StatefulSet } func (pc *RandomPodChooser) ChoosePod(ctx context.Context) (*corev1.Pod, error) { - pods, err := ListRunningPods(ctx, pc.PodClient, pc.Deployment) + pods, err := ListRunningPods(ctx, pc.PodClient, pc.Deployment, pc.StatefulSet) if err != nil { return nil, err } @@ -44,13 +45,14 @@ func (pc *RandomPodChooser) ChoosePod(ctx context.Context) (*corev1.Pod, error) } type StickyPodChooser struct { - Key string - PodClient kubeclient.PodClient - Deployment *appsv1.Deployment + Key string + PodClient kubeclient.PodClient + Deployment *appsv1.Deployment + StatefulSet *appsv1.StatefulSet } func (pc *StickyPodChooser) ChoosePod(ctx context.Context) (*corev1.Pod, error) { - pods, err := ListRunningPods(ctx, pc.PodClient, pc.Deployment) + pods, err := ListRunningPods(ctx, pc.PodClient, pc.Deployment, pc.StatefulSet) if err != nil { return nil, err } @@ -66,16 +68,24 @@ func (pc *StickyPodChooser) ChoosePod(ctx context.Context) (*corev1.Pod, error) // NOTREACHED logrus.Errorf("no pod found for key %q", pc.Key) rpc := &RandomPodChooser{ - PodClient: pc.PodClient, - Deployment: pc.Deployment, + PodClient: pc.PodClient, + Deployment: pc.Deployment, + StatefulSet: pc.StatefulSet, } return rpc.ChoosePod(ctx) } return podMap[chosen], nil } -func ListRunningPods(ctx context.Context, client kubeclient.PodClient, depl *appsv1.Deployment) ([]*corev1.Pod, error) { - selector, err := metav1.LabelSelectorAsSelector(depl.Spec.Selector) +func ListRunningPods(ctx context.Context, client kubeclient.PodClient, depl *appsv1.Deployment, stat *appsv1.StatefulSet) ([]*corev1.Pod, error) { + var labelSelector *metav1.LabelSelector + if depl != nil { + labelSelector = depl.Spec.Selector + } else if stat != nil { + labelSelector = stat.Spec.Selector + } + + selector, err := metav1.LabelSelectorAsSelector(labelSelector) if err != nil { return nil, err }