From dea870f4eafac1efb1d3a70d8196da11bfa6c59c Mon Sep 17 00:00:00 2001 From: Cory Snider Date: Mon, 5 Jun 2023 18:44:51 -0400 Subject: [PATCH] daemon: stop setting container resources to zero Many of the fields in LinuxResources struct are pointers to scalars for some reason, presumably to differentiate between set-to-zero and unset when unmarshaling from JSON, despite zero being outside the acceptable range for the corresponding kernel tunables. When creating the OCI spec for a container, the daemon sets the container's OCI spec CPUShares and BlkioWeight parameters to zero when the corresponding Docker container configuration values are zero, signifying unset, despite the minimum acceptable value for CPUShares being two, and BlkioWeight ten. This has gone unnoticed as runC does not distingiush set-to-zero from unset as it also uses zero internally to represent unset for those fields. However, kata-containers v3.2.0-alpha.3 tries to apply the explicit-zero resource parameters to the container, exactly as instructed, and fails loudly. The OCI runtime-spec is silent on how the runtime should handle the case when those parameters are explicitly set to out-of-range values and kata's behaviour is not unreasonable, so the daemon must therefore be in the wrong. Translate unset values in the Docker container's resources HostConfig to omit the corresponding fields in the container's OCI spec when starting and updating a container in order to maximize compatibility with runtimes. Signed-off-by: Cory Snider --- daemon/daemon_unix.go | 12 +++++-- daemon/oci_linux.go | 6 ++-- daemon/oci_linux_test.go | 37 ++++++++++++++++++++ daemon/update_linux.go | 50 +++++++++++++++++++--------- daemon/update_linux_test.go | 11 ++++++ libcontainerd/remote/client_linux.go | 4 +-- libcontainerd/types/types_linux.go | 2 +- 7 files changed, 98 insertions(+), 24 deletions(-) create mode 100644 daemon/update_linux_test.go diff --git a/daemon/daemon_unix.go b/daemon/daemon_unix.go index 0776c4e2d8..7a0f0dd552 100644 --- a/daemon/daemon_unix.go +++ b/daemon/daemon_unix.go @@ -104,7 +104,10 @@ func getMemoryResources(config containertypes.Resources) *specs.LinuxMemory { memory.KernelTCP = &config.KernelMemoryTCP } - return &memory + if memory != (specs.LinuxMemory{}) { + return &memory + } + return nil } func getPidsLimit(config containertypes.Resources) *specs.LinuxPids { @@ -126,7 +129,7 @@ func getCPUResources(config containertypes.Resources) (*specs.LinuxCPU, error) { if config.CPUShares < 0 { return nil, fmt.Errorf("shares: invalid argument") } - if config.CPUShares >= 0 { + if config.CPUShares > 0 { shares := uint64(config.CPUShares) cpu.Shares = &shares } @@ -167,7 +170,10 @@ func getCPUResources(config containertypes.Resources) (*specs.LinuxCPU, error) { cpu.RealtimeRuntime = &c } - return &cpu, nil + if cpu != (specs.LinuxCPU{}) { + return &cpu, nil + } + return nil, nil } func getBlkioWeightDevices(config containertypes.Resources) ([]specs.LinuxWeightDevice, error) { diff --git a/daemon/oci_linux.go b/daemon/oci_linux.go index e336d2c4df..1bee7b4c01 100644 --- a/daemon/oci_linux.go +++ b/daemon/oci_linux.go @@ -973,13 +973,11 @@ func WithResources(c *container.Container) coci.SpecOpts { if err != nil { return err } - blkioWeight := r.BlkioWeight specResources := &specs.LinuxResources{ Memory: memoryRes, CPU: cpuRes, BlockIO: &specs.LinuxBlockIO{ - Weight: &blkioWeight, WeightDevice: weightDevices, ThrottleReadBpsDevice: readBpsDevice, ThrottleWriteBpsDevice: writeBpsDevice, @@ -988,6 +986,10 @@ func WithResources(c *container.Container) coci.SpecOpts { }, Pids: getPidsLimit(r), } + if r.BlkioWeight != 0 { + w := r.BlkioWeight + specResources.BlockIO.Weight = &w + } if s.Linux.Resources != nil && len(s.Linux.Resources.Devices) > 0 { specResources.Devices = s.Linux.Resources.Devices diff --git a/daemon/oci_linux_test.go b/daemon/oci_linux_test.go index ed9fd50d00..787451cbb5 100644 --- a/daemon/oci_linux_test.go +++ b/daemon/oci_linux_test.go @@ -11,6 +11,8 @@ import ( "github.com/docker/docker/daemon/config" "github.com/docker/docker/daemon/network" "github.com/docker/docker/libnetwork" + "github.com/google/go-cmp/cmp/cmpopts" + "github.com/opencontainers/runtime-spec/specs-go" "golang.org/x/sys/unix" "gotest.tools/v3/assert" is "gotest.tools/v3/assert/cmp" @@ -212,3 +214,38 @@ func TestGetSourceMount(t *testing.T) { _, _, err = getSourceMount(cwd) assert.NilError(t, err) } + +func TestDefaultResources(t *testing.T) { + skip.If(t, os.Getuid() != 0, "skipping test that requires root") // TODO: is this actually true? I'm guilty of following the cargo cult here. + + c := &container.Container{ + HostConfig: &containertypes.HostConfig{ + IpcMode: containertypes.IPCModeNone, + }, + } + d := setupFakeDaemon(t, c) + + s, err := d.createSpec(context.Background(), &configStore{}, c) + assert.NilError(t, err) + checkResourcesAreUnset(t, s.Linux.Resources) +} + +func checkResourcesAreUnset(t *testing.T, r *specs.LinuxResources) { + t.Helper() + + if r != nil { + if r.Memory != nil { + assert.Check(t, is.DeepEqual(r.Memory, &specs.LinuxMemory{})) + } + if r.CPU != nil { + assert.Check(t, is.DeepEqual(r.CPU, &specs.LinuxCPU{})) + } + assert.Check(t, is.Nil(r.Pids)) + if r.BlockIO != nil { + assert.Check(t, is.DeepEqual(r.BlockIO, &specs.LinuxBlockIO{}, cmpopts.EquateEmpty())) + } + if r.Network != nil { + assert.Check(t, is.DeepEqual(r.Network, &specs.LinuxNetwork{}, cmpopts.EquateEmpty())) + } + } +} diff --git a/daemon/update_linux.go b/daemon/update_linux.go index c1d3684868..3105402e3c 100644 --- a/daemon/update_linux.go +++ b/daemon/update_linux.go @@ -11,15 +11,19 @@ import ( func toContainerdResources(resources container.Resources) *libcontainerdtypes.Resources { var r libcontainerdtypes.Resources - r.BlockIO = &specs.LinuxBlockIO{ - Weight: &resources.BlkioWeight, + if resources.BlkioWeight != 0 { + r.BlockIO = &specs.LinuxBlockIO{ + Weight: &resources.BlkioWeight, + } } - shares := uint64(resources.CPUShares) - r.CPU = &specs.LinuxCPU{ - Shares: &shares, - Cpus: resources.CpusetCpus, - Mems: resources.CpusetMems, + cpu := specs.LinuxCPU{ + Cpus: resources.CpusetCpus, + Mems: resources.CpusetMems, + } + if resources.CPUShares != 0 { + shares := uint64(resources.CPUShares) + cpu.Shares = &shares } var ( @@ -37,17 +41,33 @@ func toContainerdResources(resources container.Resources) *libcontainerdtypes.Re period = uint64(resources.CPUPeriod) } - r.CPU.Period = &period - r.CPU.Quota = "a - - r.Memory = &specs.LinuxMemory{ - Limit: &resources.Memory, - Reservation: &resources.MemoryReservation, - Kernel: &resources.KernelMemory, + if period != 0 { + cpu.Period = &period + } + if quota != 0 { + cpu.Quota = "a } + if cpu != (specs.LinuxCPU{}) { + r.CPU = &cpu + } + + var memory specs.LinuxMemory + if resources.Memory != 0 { + memory.Limit = &resources.Memory + } + if resources.MemoryReservation != 0 { + memory.Reservation = &resources.MemoryReservation + } + if resources.KernelMemory != 0 { + memory.Kernel = &resources.KernelMemory + } if resources.MemorySwap > 0 { - r.Memory.Swap = &resources.MemorySwap + memory.Swap = &resources.MemorySwap + } + + if memory != (specs.LinuxMemory{}) { + r.Memory = &memory } r.Pids = getPidsLimit(resources) diff --git a/daemon/update_linux_test.go b/daemon/update_linux_test.go new file mode 100644 index 0000000000..4817c1eab7 --- /dev/null +++ b/daemon/update_linux_test.go @@ -0,0 +1,11 @@ +package daemon // import "github.com/docker/docker/daemon" + +import ( + "testing" + + "github.com/docker/docker/api/types/container" +) + +func TestToContainerdResources_Defaults(t *testing.T) { + checkResourcesAreUnset(t, toContainerdResources(container.Resources{})) +} diff --git a/libcontainerd/remote/client_linux.go b/libcontainerd/remote/client_linux.go index dd7aee8fe8..3b7ee1ab6e 100644 --- a/libcontainerd/remote/client_linux.go +++ b/libcontainerd/remote/client_linux.go @@ -21,9 +21,7 @@ func summaryFromInterface(i interface{}) (*libcontainerdtypes.Summary, error) { } func (t *task) UpdateResources(ctx context.Context, resources *libcontainerdtypes.Resources) error { - // go doesn't like the alias in 1.8, this means this need to be - // platform specific - return t.Update(ctx, containerd.WithResources((*specs.LinuxResources)(resources))) + return t.Update(ctx, containerd.WithResources(resources)) } func hostIDFromMap(id uint32, mp []specs.LinuxIDMapping) int { diff --git a/libcontainerd/types/types_linux.go b/libcontainerd/types/types_linux.go index 34360ed5c2..c91bcb9223 100644 --- a/libcontainerd/types/types_linux.go +++ b/libcontainerd/types/types_linux.go @@ -27,7 +27,7 @@ func InterfaceToStats(read time.Time, v interface{}) *Stats { } // Resources defines updatable container resource values. TODO: it must match containerd upcoming API -type Resources specs.LinuxResources +type Resources = specs.LinuxResources // Checkpoints contains the details of a checkpoint type Checkpoints struct{}