2021-08-23 13:14:53 +00:00
//go:build linux || freebsd
2015-11-12 19:55:17 +00:00
2018-02-05 21:05:59 +00:00
package daemon // import "github.com/docker/docker/daemon"
2015-11-12 19:55:17 +00:00
import (
2023-06-23 00:33:17 +00:00
"context"
2015-11-12 19:55:17 +00:00
"fmt"
"os"
"path/filepath"
"strconv"
2022-05-01 22:28:17 +00:00
"syscall"
2015-11-12 19:55:17 +00:00
2023-09-13 15:41:45 +00:00
"github.com/containerd/log"
2015-11-12 19:55:17 +00:00
"github.com/docker/docker/container"
2022-08-17 21:13:49 +00:00
"github.com/docker/docker/daemon/config"
2015-11-12 19:55:17 +00:00
"github.com/docker/docker/daemon/links"
2018-01-11 19:53:06 +00:00
"github.com/docker/docker/errdefs"
2021-05-28 00:15:56 +00:00
"github.com/docker/docker/libnetwork"
2015-11-12 19:55:17 +00:00
"github.com/docker/docker/pkg/idtools"
2022-10-15 13:10:25 +00:00
"github.com/docker/docker/pkg/process"
2015-11-12 19:55:17 +00:00
"github.com/docker/docker/pkg/stringid"
"github.com/docker/docker/runconfig"
2020-03-13 23:38:24 +00:00
"github.com/moby/sys/mount"
2017-04-18 13:26:36 +00:00
"github.com/opencontainers/selinux/go-selinux/label"
2016-10-26 20:30:53 +00:00
"github.com/pkg/errors"
2017-05-23 14:22:32 +00:00
"golang.org/x/sys/unix"
2015-11-12 19:55:17 +00:00
)
func ( daemon * Daemon ) setupLinkedContainers ( container * container . Container ) ( [ ] string , error ) {
var env [ ] string
2015-09-04 00:51:04 +00:00
children := daemon . children ( container )
2015-11-12 19:55:17 +00:00
2016-03-10 04:33:21 +00:00
bridgeSettings := container . NetworkSettings . Networks [ runconfig . DefaultDaemonNetworkMode ( ) . NetworkName ( ) ]
2016-08-23 23:50:15 +00:00
if bridgeSettings == nil || bridgeSettings . EndpointSettings == nil {
2015-11-12 19:55:17 +00:00
return nil , nil
}
2015-09-04 00:51:04 +00:00
for linkAlias , child := range children {
if ! child . IsRunning ( ) {
Remove static errors from errors package.
Moving all strings to the errors package wasn't a good idea after all.
Our custom implementation of Go errors predates everything that's nice
and good about working with errors in Go. Take as an example what we
have to do to get an error message:
```go
func GetErrorMessage(err error) string {
switch err.(type) {
case errcode.Error:
e, _ := err.(errcode.Error)
return e.Message
case errcode.ErrorCode:
ec, _ := err.(errcode.ErrorCode)
return ec.Message()
default:
return err.Error()
}
}
```
This goes against every good practice for Go development. The language already provides a simple, intuitive and standard way to get error messages, that is calling the `Error()` method from an error. Reinventing the error interface is a mistake.
Our custom implementation also makes very hard to reason about errors, another nice thing about Go. I found several (>10) error declarations that we don't use anywhere. This is a clear sign about how little we know about the errors we return. I also found several error usages where the number of arguments was different than the parameters declared in the error, another clear example of how difficult is to reason about errors.
Moreover, our custom implementation didn't really make easier for people to return custom HTTP status code depending on the errors. Again, it's hard to reason about when to set custom codes and how. Take an example what we have to do to extract the message and status code from an error before returning a response from the API:
```go
switch err.(type) {
case errcode.ErrorCode:
daError, _ := err.(errcode.ErrorCode)
statusCode = daError.Descriptor().HTTPStatusCode
errMsg = daError.Message()
case errcode.Error:
// For reference, if you're looking for a particular error
// then you can do something like :
// import ( derr "github.com/docker/docker/errors" )
// if daError.ErrorCode() == derr.ErrorCodeNoSuchContainer { ... }
daError, _ := err.(errcode.Error)
statusCode = daError.ErrorCode().Descriptor().HTTPStatusCode
errMsg = daError.Message
default:
// This part of will be removed once we've
// converted everything over to use the errcode package
// FIXME: this is brittle and should not be necessary.
// If we need to differentiate between different possible error types,
// we should create appropriate error types with clearly defined meaning
errStr := strings.ToLower(err.Error())
for keyword, status := range map[string]int{
"not found": http.StatusNotFound,
"no such": http.StatusNotFound,
"bad parameter": http.StatusBadRequest,
"conflict": http.StatusConflict,
"impossible": http.StatusNotAcceptable,
"wrong login/password": http.StatusUnauthorized,
"hasn't been activated": http.StatusForbidden,
} {
if strings.Contains(errStr, keyword) {
statusCode = status
break
}
}
}
```
You can notice two things in that code:
1. We have to explain how errors work, because our implementation goes against how easy to use Go errors are.
2. At no moment we arrived to remove that `switch` statement that was the original reason to use our custom implementation.
This change removes all our status errors from the errors package and puts them back in their specific contexts.
IT puts the messages back with their contexts. That way, we know right away when errors used and how to generate their messages.
It uses custom interfaces to reason about errors. Errors that need to response with a custom status code MUST implementent this simple interface:
```go
type errorWithStatus interface {
HTTPErrorStatusCode() int
}
```
This interface is very straightforward to implement. It also preserves Go errors real behavior, getting the message is as simple as using the `Error()` method.
I included helper functions to generate errors that use custom status code in `errors/errors.go`.
By doing this, we remove the hard dependency we have eeverywhere to our custom errors package. Yes, you can use it as a helper to generate error, but it's still very easy to generate errors without it.
Please, read this fantastic blog post about errors in Go: http://dave.cheney.net/2014/12/24/inspecting-errors
Signed-off-by: David Calavera <david.calavera@gmail.com>
2016-02-25 15:53:35 +00:00
return nil , fmt . Errorf ( "Cannot link to a non running container: %s AS %s" , child . Name , linkAlias )
2015-09-04 00:51:04 +00:00
}
2015-11-12 19:55:17 +00:00
2016-03-10 04:33:21 +00:00
childBridgeSettings := child . NetworkSettings . Networks [ runconfig . DefaultDaemonNetworkMode ( ) . NetworkName ( ) ]
2016-08-23 23:50:15 +00:00
if childBridgeSettings == nil || childBridgeSettings . EndpointSettings == nil {
2015-09-04 00:51:04 +00:00
return nil , fmt . Errorf ( "container %s not attached to default bridge network" , child . ID )
}
2015-11-12 19:55:17 +00:00
2015-09-04 00:51:04 +00:00
link := links . NewLink (
bridgeSettings . IPAddress ,
childBridgeSettings . IPAddress ,
linkAlias ,
child . Config . Env ,
child . Config . ExposedPorts ,
)
2015-11-12 19:55:17 +00:00
2016-10-13 16:34:19 +00:00
env = append ( env , link . ToEnv ( ) ... )
2015-11-12 19:55:17 +00:00
}
2015-09-04 00:51:04 +00:00
2015-11-12 19:55:17 +00:00
return env , nil
}
2023-08-11 22:23:21 +00:00
func ( daemon * Daemon ) getIPCContainer ( id string ) ( * container . Container , error ) {
2023-08-10 15:39:56 +00:00
// Check if the container exists, is running, and not restarting
2019-08-09 12:10:07 +00:00
ctr , err := daemon . GetContainer ( id )
2015-11-12 19:55:17 +00:00
if err != nil {
2023-08-11 22:23:21 +00:00
return nil , errdefs . InvalidParameter ( err )
2015-11-12 19:55:17 +00:00
}
2023-08-10 15:39:56 +00:00
if ! ctr . IsRunning ( ) {
return nil , errNotRunning ( id )
}
if ctr . IsRestarting ( ) {
return nil , errContainerIsRestarting ( id )
Implement none, private, and shareable ipc modes
Since the commit d88fe447df0e8 ("Add support for sharing /dev/shm/ and
/dev/mqueue between containers") container's /dev/shm is mounted on the
host first, then bind-mounted inside the container. This is done that
way in order to be able to share this container's IPC namespace
(and the /dev/shm mount point) with another container.
Unfortunately, this functionality breaks container checkpoint/restore
(even if IPC is not shared). Since /dev/shm is an external mount, its
contents is not saved by `criu checkpoint`, and so upon restore any
application that tries to access data under /dev/shm is severily
disappointed (which usually results in a fatal crash).
This commit solves the issue by introducing new IPC modes for containers
(in addition to 'host' and 'container:ID'). The new modes are:
- 'shareable': enables sharing this container's IPC with others
(this used to be the implicit default);
- 'private': disables sharing this container's IPC.
In 'private' mode, container's /dev/shm is truly mounted inside the
container, without any bind-mounting from the host, which solves the
issue.
While at it, let's also implement 'none' mode. The motivation, as
eloquently put by Justin Cormack, is:
> I wondered a while back about having a none shm mode, as currently it is
> not possible to have a totally unwriteable container as there is always
> a /dev/shm writeable mount. It is a bit of a niche case (and clearly
> should never be allowed to be daemon default) but it would be trivial to
> add now so maybe we should...
...so here's yet yet another mode:
- 'none': no /dev/shm mount inside the container (though it still
has its own private IPC namespace).
Now, to ultimately solve the abovementioned checkpoint/restore issue, we'd
need to make 'private' the default mode, but unfortunately it breaks the
backward compatibility. So, let's make the default container IPC mode
per-daemon configurable (with the built-in default set to 'shareable'
for now). The default can be changed either via a daemon CLI option
(--default-shm-mode) or a daemon.json configuration file parameter
of the same name.
Note one can only set either 'shareable' or 'private' IPC modes as a
daemon default (i.e. in this context 'host', 'container', or 'none'
do not make much sense).
Some other changes this patch introduces are:
1. A mount for /dev/shm is added to default OCI Linux spec.
2. IpcMode.Valid() is simplified to remove duplicated code that parsed
'container:ID' form. Note the old version used to check that ID does
not contain a semicolon -- this is no longer the case (tests are
modified accordingly). The motivation is we should either do a
proper check for container ID validity, or don't check it at all
(since it is checked in other places anyway). I chose the latter.
3. IpcMode.Container() is modified to not return container ID if the
mode value does not start with "container:", unifying the check to
be the same as in IpcMode.IsContainer().
3. IPC mode unit tests (runconfig/hostconfig_test.go) are modified
to add checks for newly added values.
[v2: addressed review at https://github.com/moby/moby/pull/34087#pullrequestreview-51345997]
[v3: addressed review at https://github.com/moby/moby/pull/34087#pullrequestreview-53902833]
[v4: addressed the case of upgrading from older daemon, in this case
container.HostConfig.IpcMode is unset and this is valid]
[v5: document old and new IpcMode values in api/swagger.yaml]
[v6: add the 'none' mode, changelog entry to docs/api/version-history.md]
Signed-off-by: Kir Kolyshkin <kolyshkin@gmail.com>
2017-06-27 21:58:50 +00:00
}
2023-08-10 15:39:56 +00:00
Implement none, private, and shareable ipc modes
Since the commit d88fe447df0e8 ("Add support for sharing /dev/shm/ and
/dev/mqueue between containers") container's /dev/shm is mounted on the
host first, then bind-mounted inside the container. This is done that
way in order to be able to share this container's IPC namespace
(and the /dev/shm mount point) with another container.
Unfortunately, this functionality breaks container checkpoint/restore
(even if IPC is not shared). Since /dev/shm is an external mount, its
contents is not saved by `criu checkpoint`, and so upon restore any
application that tries to access data under /dev/shm is severily
disappointed (which usually results in a fatal crash).
This commit solves the issue by introducing new IPC modes for containers
(in addition to 'host' and 'container:ID'). The new modes are:
- 'shareable': enables sharing this container's IPC with others
(this used to be the implicit default);
- 'private': disables sharing this container's IPC.
In 'private' mode, container's /dev/shm is truly mounted inside the
container, without any bind-mounting from the host, which solves the
issue.
While at it, let's also implement 'none' mode. The motivation, as
eloquently put by Justin Cormack, is:
> I wondered a while back about having a none shm mode, as currently it is
> not possible to have a totally unwriteable container as there is always
> a /dev/shm writeable mount. It is a bit of a niche case (and clearly
> should never be allowed to be daemon default) but it would be trivial to
> add now so maybe we should...
...so here's yet yet another mode:
- 'none': no /dev/shm mount inside the container (though it still
has its own private IPC namespace).
Now, to ultimately solve the abovementioned checkpoint/restore issue, we'd
need to make 'private' the default mode, but unfortunately it breaks the
backward compatibility. So, let's make the default container IPC mode
per-daemon configurable (with the built-in default set to 'shareable'
for now). The default can be changed either via a daemon CLI option
(--default-shm-mode) or a daemon.json configuration file parameter
of the same name.
Note one can only set either 'shareable' or 'private' IPC modes as a
daemon default (i.e. in this context 'host', 'container', or 'none'
do not make much sense).
Some other changes this patch introduces are:
1. A mount for /dev/shm is added to default OCI Linux spec.
2. IpcMode.Valid() is simplified to remove duplicated code that parsed
'container:ID' form. Note the old version used to check that ID does
not contain a semicolon -- this is no longer the case (tests are
modified accordingly). The motivation is we should either do a
proper check for container ID validity, or don't check it at all
(since it is checked in other places anyway). I chose the latter.
3. IpcMode.Container() is modified to not return container ID if the
mode value does not start with "container:", unifying the check to
be the same as in IpcMode.IsContainer().
3. IPC mode unit tests (runconfig/hostconfig_test.go) are modified
to add checks for newly added values.
[v2: addressed review at https://github.com/moby/moby/pull/34087#pullrequestreview-51345997]
[v3: addressed review at https://github.com/moby/moby/pull/34087#pullrequestreview-53902833]
[v4: addressed the case of upgrading from older daemon, in this case
container.HostConfig.IpcMode is unset and this is valid]
[v5: document old and new IpcMode values in api/swagger.yaml]
[v6: add the 'none' mode, changelog entry to docs/api/version-history.md]
Signed-off-by: Kir Kolyshkin <kolyshkin@gmail.com>
2017-06-27 21:58:50 +00:00
// Check the container ipc is shareable
2019-08-09 12:10:07 +00:00
if st , err := os . Stat ( ctr . ShmPath ) ; err != nil || ! st . IsDir ( ) {
Implement none, private, and shareable ipc modes
Since the commit d88fe447df0e8 ("Add support for sharing /dev/shm/ and
/dev/mqueue between containers") container's /dev/shm is mounted on the
host first, then bind-mounted inside the container. This is done that
way in order to be able to share this container's IPC namespace
(and the /dev/shm mount point) with another container.
Unfortunately, this functionality breaks container checkpoint/restore
(even if IPC is not shared). Since /dev/shm is an external mount, its
contents is not saved by `criu checkpoint`, and so upon restore any
application that tries to access data under /dev/shm is severily
disappointed (which usually results in a fatal crash).
This commit solves the issue by introducing new IPC modes for containers
(in addition to 'host' and 'container:ID'). The new modes are:
- 'shareable': enables sharing this container's IPC with others
(this used to be the implicit default);
- 'private': disables sharing this container's IPC.
In 'private' mode, container's /dev/shm is truly mounted inside the
container, without any bind-mounting from the host, which solves the
issue.
While at it, let's also implement 'none' mode. The motivation, as
eloquently put by Justin Cormack, is:
> I wondered a while back about having a none shm mode, as currently it is
> not possible to have a totally unwriteable container as there is always
> a /dev/shm writeable mount. It is a bit of a niche case (and clearly
> should never be allowed to be daemon default) but it would be trivial to
> add now so maybe we should...
...so here's yet yet another mode:
- 'none': no /dev/shm mount inside the container (though it still
has its own private IPC namespace).
Now, to ultimately solve the abovementioned checkpoint/restore issue, we'd
need to make 'private' the default mode, but unfortunately it breaks the
backward compatibility. So, let's make the default container IPC mode
per-daemon configurable (with the built-in default set to 'shareable'
for now). The default can be changed either via a daemon CLI option
(--default-shm-mode) or a daemon.json configuration file parameter
of the same name.
Note one can only set either 'shareable' or 'private' IPC modes as a
daemon default (i.e. in this context 'host', 'container', or 'none'
do not make much sense).
Some other changes this patch introduces are:
1. A mount for /dev/shm is added to default OCI Linux spec.
2. IpcMode.Valid() is simplified to remove duplicated code that parsed
'container:ID' form. Note the old version used to check that ID does
not contain a semicolon -- this is no longer the case (tests are
modified accordingly). The motivation is we should either do a
proper check for container ID validity, or don't check it at all
(since it is checked in other places anyway). I chose the latter.
3. IpcMode.Container() is modified to not return container ID if the
mode value does not start with "container:", unifying the check to
be the same as in IpcMode.IsContainer().
3. IPC mode unit tests (runconfig/hostconfig_test.go) are modified
to add checks for newly added values.
[v2: addressed review at https://github.com/moby/moby/pull/34087#pullrequestreview-51345997]
[v3: addressed review at https://github.com/moby/moby/pull/34087#pullrequestreview-53902833]
[v4: addressed the case of upgrading from older daemon, in this case
container.HostConfig.IpcMode is unset and this is valid]
[v5: document old and new IpcMode values in api/swagger.yaml]
[v6: add the 'none' mode, changelog entry to docs/api/version-history.md]
Signed-off-by: Kir Kolyshkin <kolyshkin@gmail.com>
2017-06-27 21:58:50 +00:00
if err == nil || os . IsNotExist ( err ) {
2023-08-11 22:23:21 +00:00
return nil , errdefs . InvalidParameter ( errors . New ( "container " + id + ": non-shareable IPC (hint: use IpcMode:shareable for the donor container)" ) )
Implement none, private, and shareable ipc modes
Since the commit d88fe447df0e8 ("Add support for sharing /dev/shm/ and
/dev/mqueue between containers") container's /dev/shm is mounted on the
host first, then bind-mounted inside the container. This is done that
way in order to be able to share this container's IPC namespace
(and the /dev/shm mount point) with another container.
Unfortunately, this functionality breaks container checkpoint/restore
(even if IPC is not shared). Since /dev/shm is an external mount, its
contents is not saved by `criu checkpoint`, and so upon restore any
application that tries to access data under /dev/shm is severily
disappointed (which usually results in a fatal crash).
This commit solves the issue by introducing new IPC modes for containers
(in addition to 'host' and 'container:ID'). The new modes are:
- 'shareable': enables sharing this container's IPC with others
(this used to be the implicit default);
- 'private': disables sharing this container's IPC.
In 'private' mode, container's /dev/shm is truly mounted inside the
container, without any bind-mounting from the host, which solves the
issue.
While at it, let's also implement 'none' mode. The motivation, as
eloquently put by Justin Cormack, is:
> I wondered a while back about having a none shm mode, as currently it is
> not possible to have a totally unwriteable container as there is always
> a /dev/shm writeable mount. It is a bit of a niche case (and clearly
> should never be allowed to be daemon default) but it would be trivial to
> add now so maybe we should...
...so here's yet yet another mode:
- 'none': no /dev/shm mount inside the container (though it still
has its own private IPC namespace).
Now, to ultimately solve the abovementioned checkpoint/restore issue, we'd
need to make 'private' the default mode, but unfortunately it breaks the
backward compatibility. So, let's make the default container IPC mode
per-daemon configurable (with the built-in default set to 'shareable'
for now). The default can be changed either via a daemon CLI option
(--default-shm-mode) or a daemon.json configuration file parameter
of the same name.
Note one can only set either 'shareable' or 'private' IPC modes as a
daemon default (i.e. in this context 'host', 'container', or 'none'
do not make much sense).
Some other changes this patch introduces are:
1. A mount for /dev/shm is added to default OCI Linux spec.
2. IpcMode.Valid() is simplified to remove duplicated code that parsed
'container:ID' form. Note the old version used to check that ID does
not contain a semicolon -- this is no longer the case (tests are
modified accordingly). The motivation is we should either do a
proper check for container ID validity, or don't check it at all
(since it is checked in other places anyway). I chose the latter.
3. IpcMode.Container() is modified to not return container ID if the
mode value does not start with "container:", unifying the check to
be the same as in IpcMode.IsContainer().
3. IPC mode unit tests (runconfig/hostconfig_test.go) are modified
to add checks for newly added values.
[v2: addressed review at https://github.com/moby/moby/pull/34087#pullrequestreview-51345997]
[v3: addressed review at https://github.com/moby/moby/pull/34087#pullrequestreview-53902833]
[v4: addressed the case of upgrading from older daemon, in this case
container.HostConfig.IpcMode is unset and this is valid]
[v5: document old and new IpcMode values in api/swagger.yaml]
[v6: add the 'none' mode, changelog entry to docs/api/version-history.md]
Signed-off-by: Kir Kolyshkin <kolyshkin@gmail.com>
2017-06-27 21:58:50 +00:00
}
// stat() failed?
2023-08-11 22:23:21 +00:00
return nil , errdefs . System ( errors . Wrap ( err , "container " + id ) )
Implement none, private, and shareable ipc modes
Since the commit d88fe447df0e8 ("Add support for sharing /dev/shm/ and
/dev/mqueue between containers") container's /dev/shm is mounted on the
host first, then bind-mounted inside the container. This is done that
way in order to be able to share this container's IPC namespace
(and the /dev/shm mount point) with another container.
Unfortunately, this functionality breaks container checkpoint/restore
(even if IPC is not shared). Since /dev/shm is an external mount, its
contents is not saved by `criu checkpoint`, and so upon restore any
application that tries to access data under /dev/shm is severily
disappointed (which usually results in a fatal crash).
This commit solves the issue by introducing new IPC modes for containers
(in addition to 'host' and 'container:ID'). The new modes are:
- 'shareable': enables sharing this container's IPC with others
(this used to be the implicit default);
- 'private': disables sharing this container's IPC.
In 'private' mode, container's /dev/shm is truly mounted inside the
container, without any bind-mounting from the host, which solves the
issue.
While at it, let's also implement 'none' mode. The motivation, as
eloquently put by Justin Cormack, is:
> I wondered a while back about having a none shm mode, as currently it is
> not possible to have a totally unwriteable container as there is always
> a /dev/shm writeable mount. It is a bit of a niche case (and clearly
> should never be allowed to be daemon default) but it would be trivial to
> add now so maybe we should...
...so here's yet yet another mode:
- 'none': no /dev/shm mount inside the container (though it still
has its own private IPC namespace).
Now, to ultimately solve the abovementioned checkpoint/restore issue, we'd
need to make 'private' the default mode, but unfortunately it breaks the
backward compatibility. So, let's make the default container IPC mode
per-daemon configurable (with the built-in default set to 'shareable'
for now). The default can be changed either via a daemon CLI option
(--default-shm-mode) or a daemon.json configuration file parameter
of the same name.
Note one can only set either 'shareable' or 'private' IPC modes as a
daemon default (i.e. in this context 'host', 'container', or 'none'
do not make much sense).
Some other changes this patch introduces are:
1. A mount for /dev/shm is added to default OCI Linux spec.
2. IpcMode.Valid() is simplified to remove duplicated code that parsed
'container:ID' form. Note the old version used to check that ID does
not contain a semicolon -- this is no longer the case (tests are
modified accordingly). The motivation is we should either do a
proper check for container ID validity, or don't check it at all
(since it is checked in other places anyway). I chose the latter.
3. IpcMode.Container() is modified to not return container ID if the
mode value does not start with "container:", unifying the check to
be the same as in IpcMode.IsContainer().
3. IPC mode unit tests (runconfig/hostconfig_test.go) are modified
to add checks for newly added values.
[v2: addressed review at https://github.com/moby/moby/pull/34087#pullrequestreview-51345997]
[v3: addressed review at https://github.com/moby/moby/pull/34087#pullrequestreview-53902833]
[v4: addressed the case of upgrading from older daemon, in this case
container.HostConfig.IpcMode is unset and this is valid]
[v5: document old and new IpcMode values in api/swagger.yaml]
[v6: add the 'none' mode, changelog entry to docs/api/version-history.md]
Signed-off-by: Kir Kolyshkin <kolyshkin@gmail.com>
2017-06-27 21:58:50 +00:00
}
2019-08-09 12:10:07 +00:00
return ctr , nil
2015-11-12 19:55:17 +00:00
}
2023-08-11 19:33:16 +00:00
func ( daemon * Daemon ) getPIDContainer ( id string ) ( * container . Container , error ) {
2023-08-10 15:39:56 +00:00
ctr , err := daemon . GetContainer ( id )
2016-05-06 18:56:03 +00:00
if err != nil {
daemon: WithNamespaces(): fix incorrect error for PID, IPC namespace
`Daemon.getPidContainer()` was wrapping the error-message with a message
("cannot join PID of a non running container") that did not reflect the
actual reason for the error; `Daemon.GetContainer()` could either return
an invalid parameter (invalid / empty identifier), or a "not found" error
if the specified container-ID could not be found.
In the latter case, we don't want to return a "not found" error through
the API, as this would indicate that the container we're _starting_ was
not found (which is not the case), so we need to convert the error into
an `errdefs.ErrInvalidParameter` (the container-ID specified for the PID
namespace is invalid if the container doesn't exist).
This logic is similar to what we do for IPC namespaces. which received
a similar fix in c3d7a0c6033a2764dd85c3863809ac498ef129f2.
This patch updates the error-types, and moves them into the getIpcContainer
and getPidContainer container functions, both of which should return
an "invalid parameter" if the container was not found.
It's worth noting that, while `WithNamespaces()` may return an "invalid
parameter" error, the `start` endpoint itself may _not_ be. as outlined
in commit bf1fb97575ae0c929075f8340d7deb4ae9f41fae, starting a container
that has an invalid configuration should be considered an internal server
error, and is not an invalid _request_. However, for uses other than
container "start", `WithNamespaces()` should return the correct error
to allow code to handle it accordingly.
Signed-off-by: Sebastiaan van Stijn <github@gone.nl>
2023-08-11 18:37:58 +00:00
return nil , errdefs . InvalidParameter ( err )
2016-05-06 18:56:03 +00:00
}
2023-08-10 15:39:56 +00:00
if ! ctr . IsRunning ( ) {
return nil , errNotRunning ( id )
2016-05-06 18:56:03 +00:00
}
2023-08-10 15:39:56 +00:00
if ctr . IsRestarting ( ) {
return nil , errContainerIsRestarting ( id )
2016-05-06 18:56:03 +00:00
}
2023-08-10 15:39:56 +00:00
return ctr , nil
2016-05-06 18:56:03 +00:00
}
2023-06-27 10:17:49 +00:00
// setupContainerDirs sets up base container directories (root, ipc, tmpfs and secrets).
func ( daemon * Daemon ) setupContainerDirs ( c * container . Container ) ( _ [ ] container . Mount , err error ) {
if err := daemon . setupContainerMountsRoot ( c ) ; err != nil {
return nil , err
}
if err := daemon . setupIPCDirs ( c ) ; err != nil {
return nil , err
}
if err := daemon . setupSecretDir ( c ) ; err != nil {
return nil , err
}
defer func ( ) {
if err != nil {
daemon . cleanupSecretDir ( c )
}
} ( )
var ms [ ] container . Mount
if ! c . HostConfig . IpcMode . IsPrivate ( ) && ! c . HostConfig . IpcMode . IsEmpty ( ) {
ms = append ( ms , c . IpcMounts ( ) ... )
}
tmpfsMounts , err := c . TmpfsMounts ( )
if err != nil {
return nil , err
}
ms = append ( ms , tmpfsMounts ... )
secretMounts , err := c . SecretMounts ( )
if err != nil {
return nil , err
}
ms = append ( ms , secretMounts ... )
return ms , nil
}
2023-08-11 22:23:21 +00:00
func ( daemon * Daemon ) setupIPCDirs ( c * container . Container ) error {
Implement none, private, and shareable ipc modes
Since the commit d88fe447df0e8 ("Add support for sharing /dev/shm/ and
/dev/mqueue between containers") container's /dev/shm is mounted on the
host first, then bind-mounted inside the container. This is done that
way in order to be able to share this container's IPC namespace
(and the /dev/shm mount point) with another container.
Unfortunately, this functionality breaks container checkpoint/restore
(even if IPC is not shared). Since /dev/shm is an external mount, its
contents is not saved by `criu checkpoint`, and so upon restore any
application that tries to access data under /dev/shm is severily
disappointed (which usually results in a fatal crash).
This commit solves the issue by introducing new IPC modes for containers
(in addition to 'host' and 'container:ID'). The new modes are:
- 'shareable': enables sharing this container's IPC with others
(this used to be the implicit default);
- 'private': disables sharing this container's IPC.
In 'private' mode, container's /dev/shm is truly mounted inside the
container, without any bind-mounting from the host, which solves the
issue.
While at it, let's also implement 'none' mode. The motivation, as
eloquently put by Justin Cormack, is:
> I wondered a while back about having a none shm mode, as currently it is
> not possible to have a totally unwriteable container as there is always
> a /dev/shm writeable mount. It is a bit of a niche case (and clearly
> should never be allowed to be daemon default) but it would be trivial to
> add now so maybe we should...
...so here's yet yet another mode:
- 'none': no /dev/shm mount inside the container (though it still
has its own private IPC namespace).
Now, to ultimately solve the abovementioned checkpoint/restore issue, we'd
need to make 'private' the default mode, but unfortunately it breaks the
backward compatibility. So, let's make the default container IPC mode
per-daemon configurable (with the built-in default set to 'shareable'
for now). The default can be changed either via a daemon CLI option
(--default-shm-mode) or a daemon.json configuration file parameter
of the same name.
Note one can only set either 'shareable' or 'private' IPC modes as a
daemon default (i.e. in this context 'host', 'container', or 'none'
do not make much sense).
Some other changes this patch introduces are:
1. A mount for /dev/shm is added to default OCI Linux spec.
2. IpcMode.Valid() is simplified to remove duplicated code that parsed
'container:ID' form. Note the old version used to check that ID does
not contain a semicolon -- this is no longer the case (tests are
modified accordingly). The motivation is we should either do a
proper check for container ID validity, or don't check it at all
(since it is checked in other places anyway). I chose the latter.
3. IpcMode.Container() is modified to not return container ID if the
mode value does not start with "container:", unifying the check to
be the same as in IpcMode.IsContainer().
3. IPC mode unit tests (runconfig/hostconfig_test.go) are modified
to add checks for newly added values.
[v2: addressed review at https://github.com/moby/moby/pull/34087#pullrequestreview-51345997]
[v3: addressed review at https://github.com/moby/moby/pull/34087#pullrequestreview-53902833]
[v4: addressed the case of upgrading from older daemon, in this case
container.HostConfig.IpcMode is unset and this is valid]
[v5: document old and new IpcMode values in api/swagger.yaml]
[v6: add the 'none' mode, changelog entry to docs/api/version-history.md]
Signed-off-by: Kir Kolyshkin <kolyshkin@gmail.com>
2017-06-27 21:58:50 +00:00
ipcMode := c . HostConfig . IpcMode
2016-03-18 18:50:19 +00:00
Implement none, private, and shareable ipc modes
Since the commit d88fe447df0e8 ("Add support for sharing /dev/shm/ and
/dev/mqueue between containers") container's /dev/shm is mounted on the
host first, then bind-mounted inside the container. This is done that
way in order to be able to share this container's IPC namespace
(and the /dev/shm mount point) with another container.
Unfortunately, this functionality breaks container checkpoint/restore
(even if IPC is not shared). Since /dev/shm is an external mount, its
contents is not saved by `criu checkpoint`, and so upon restore any
application that tries to access data under /dev/shm is severily
disappointed (which usually results in a fatal crash).
This commit solves the issue by introducing new IPC modes for containers
(in addition to 'host' and 'container:ID'). The new modes are:
- 'shareable': enables sharing this container's IPC with others
(this used to be the implicit default);
- 'private': disables sharing this container's IPC.
In 'private' mode, container's /dev/shm is truly mounted inside the
container, without any bind-mounting from the host, which solves the
issue.
While at it, let's also implement 'none' mode. The motivation, as
eloquently put by Justin Cormack, is:
> I wondered a while back about having a none shm mode, as currently it is
> not possible to have a totally unwriteable container as there is always
> a /dev/shm writeable mount. It is a bit of a niche case (and clearly
> should never be allowed to be daemon default) but it would be trivial to
> add now so maybe we should...
...so here's yet yet another mode:
- 'none': no /dev/shm mount inside the container (though it still
has its own private IPC namespace).
Now, to ultimately solve the abovementioned checkpoint/restore issue, we'd
need to make 'private' the default mode, but unfortunately it breaks the
backward compatibility. So, let's make the default container IPC mode
per-daemon configurable (with the built-in default set to 'shareable'
for now). The default can be changed either via a daemon CLI option
(--default-shm-mode) or a daemon.json configuration file parameter
of the same name.
Note one can only set either 'shareable' or 'private' IPC modes as a
daemon default (i.e. in this context 'host', 'container', or 'none'
do not make much sense).
Some other changes this patch introduces are:
1. A mount for /dev/shm is added to default OCI Linux spec.
2. IpcMode.Valid() is simplified to remove duplicated code that parsed
'container:ID' form. Note the old version used to check that ID does
not contain a semicolon -- this is no longer the case (tests are
modified accordingly). The motivation is we should either do a
proper check for container ID validity, or don't check it at all
(since it is checked in other places anyway). I chose the latter.
3. IpcMode.Container() is modified to not return container ID if the
mode value does not start with "container:", unifying the check to
be the same as in IpcMode.IsContainer().
3. IPC mode unit tests (runconfig/hostconfig_test.go) are modified
to add checks for newly added values.
[v2: addressed review at https://github.com/moby/moby/pull/34087#pullrequestreview-51345997]
[v3: addressed review at https://github.com/moby/moby/pull/34087#pullrequestreview-53902833]
[v4: addressed the case of upgrading from older daemon, in this case
container.HostConfig.IpcMode is unset and this is valid]
[v5: document old and new IpcMode values in api/swagger.yaml]
[v6: add the 'none' mode, changelog entry to docs/api/version-history.md]
Signed-off-by: Kir Kolyshkin <kolyshkin@gmail.com>
2017-06-27 21:58:50 +00:00
switch {
case ipcMode . IsContainer ( ) :
2023-08-11 22:23:21 +00:00
ic , err := daemon . getIPCContainer ( ipcMode . Container ( ) )
2015-11-12 19:55:17 +00:00
if err != nil {
2023-08-11 22:23:21 +00:00
return errors . Wrapf ( err , "failed to join IPC namespace" )
2015-11-12 19:55:17 +00:00
}
2016-03-18 18:50:19 +00:00
c . ShmPath = ic . ShmPath
Implement none, private, and shareable ipc modes
Since the commit d88fe447df0e8 ("Add support for sharing /dev/shm/ and
/dev/mqueue between containers") container's /dev/shm is mounted on the
host first, then bind-mounted inside the container. This is done that
way in order to be able to share this container's IPC namespace
(and the /dev/shm mount point) with another container.
Unfortunately, this functionality breaks container checkpoint/restore
(even if IPC is not shared). Since /dev/shm is an external mount, its
contents is not saved by `criu checkpoint`, and so upon restore any
application that tries to access data under /dev/shm is severily
disappointed (which usually results in a fatal crash).
This commit solves the issue by introducing new IPC modes for containers
(in addition to 'host' and 'container:ID'). The new modes are:
- 'shareable': enables sharing this container's IPC with others
(this used to be the implicit default);
- 'private': disables sharing this container's IPC.
In 'private' mode, container's /dev/shm is truly mounted inside the
container, without any bind-mounting from the host, which solves the
issue.
While at it, let's also implement 'none' mode. The motivation, as
eloquently put by Justin Cormack, is:
> I wondered a while back about having a none shm mode, as currently it is
> not possible to have a totally unwriteable container as there is always
> a /dev/shm writeable mount. It is a bit of a niche case (and clearly
> should never be allowed to be daemon default) but it would be trivial to
> add now so maybe we should...
...so here's yet yet another mode:
- 'none': no /dev/shm mount inside the container (though it still
has its own private IPC namespace).
Now, to ultimately solve the abovementioned checkpoint/restore issue, we'd
need to make 'private' the default mode, but unfortunately it breaks the
backward compatibility. So, let's make the default container IPC mode
per-daemon configurable (with the built-in default set to 'shareable'
for now). The default can be changed either via a daemon CLI option
(--default-shm-mode) or a daemon.json configuration file parameter
of the same name.
Note one can only set either 'shareable' or 'private' IPC modes as a
daemon default (i.e. in this context 'host', 'container', or 'none'
do not make much sense).
Some other changes this patch introduces are:
1. A mount for /dev/shm is added to default OCI Linux spec.
2. IpcMode.Valid() is simplified to remove duplicated code that parsed
'container:ID' form. Note the old version used to check that ID does
not contain a semicolon -- this is no longer the case (tests are
modified accordingly). The motivation is we should either do a
proper check for container ID validity, or don't check it at all
(since it is checked in other places anyway). I chose the latter.
3. IpcMode.Container() is modified to not return container ID if the
mode value does not start with "container:", unifying the check to
be the same as in IpcMode.IsContainer().
3. IPC mode unit tests (runconfig/hostconfig_test.go) are modified
to add checks for newly added values.
[v2: addressed review at https://github.com/moby/moby/pull/34087#pullrequestreview-51345997]
[v3: addressed review at https://github.com/moby/moby/pull/34087#pullrequestreview-53902833]
[v4: addressed the case of upgrading from older daemon, in this case
container.HostConfig.IpcMode is unset and this is valid]
[v5: document old and new IpcMode values in api/swagger.yaml]
[v6: add the 'none' mode, changelog entry to docs/api/version-history.md]
Signed-off-by: Kir Kolyshkin <kolyshkin@gmail.com>
2017-06-27 21:58:50 +00:00
case ipcMode . IsHost ( ) :
2016-03-18 18:50:19 +00:00
if _ , err := os . Stat ( "/dev/shm" ) ; err != nil {
return fmt . Errorf ( "/dev/shm is not mounted, but must be for --ipc=host" )
2015-11-12 19:55:17 +00:00
}
2016-03-18 18:50:19 +00:00
c . ShmPath = "/dev/shm"
Implement none, private, and shareable ipc modes
Since the commit d88fe447df0e8 ("Add support for sharing /dev/shm/ and
/dev/mqueue between containers") container's /dev/shm is mounted on the
host first, then bind-mounted inside the container. This is done that
way in order to be able to share this container's IPC namespace
(and the /dev/shm mount point) with another container.
Unfortunately, this functionality breaks container checkpoint/restore
(even if IPC is not shared). Since /dev/shm is an external mount, its
contents is not saved by `criu checkpoint`, and so upon restore any
application that tries to access data under /dev/shm is severily
disappointed (which usually results in a fatal crash).
This commit solves the issue by introducing new IPC modes for containers
(in addition to 'host' and 'container:ID'). The new modes are:
- 'shareable': enables sharing this container's IPC with others
(this used to be the implicit default);
- 'private': disables sharing this container's IPC.
In 'private' mode, container's /dev/shm is truly mounted inside the
container, without any bind-mounting from the host, which solves the
issue.
While at it, let's also implement 'none' mode. The motivation, as
eloquently put by Justin Cormack, is:
> I wondered a while back about having a none shm mode, as currently it is
> not possible to have a totally unwriteable container as there is always
> a /dev/shm writeable mount. It is a bit of a niche case (and clearly
> should never be allowed to be daemon default) but it would be trivial to
> add now so maybe we should...
...so here's yet yet another mode:
- 'none': no /dev/shm mount inside the container (though it still
has its own private IPC namespace).
Now, to ultimately solve the abovementioned checkpoint/restore issue, we'd
need to make 'private' the default mode, but unfortunately it breaks the
backward compatibility. So, let's make the default container IPC mode
per-daemon configurable (with the built-in default set to 'shareable'
for now). The default can be changed either via a daemon CLI option
(--default-shm-mode) or a daemon.json configuration file parameter
of the same name.
Note one can only set either 'shareable' or 'private' IPC modes as a
daemon default (i.e. in this context 'host', 'container', or 'none'
do not make much sense).
Some other changes this patch introduces are:
1. A mount for /dev/shm is added to default OCI Linux spec.
2. IpcMode.Valid() is simplified to remove duplicated code that parsed
'container:ID' form. Note the old version used to check that ID does
not contain a semicolon -- this is no longer the case (tests are
modified accordingly). The motivation is we should either do a
proper check for container ID validity, or don't check it at all
(since it is checked in other places anyway). I chose the latter.
3. IpcMode.Container() is modified to not return container ID if the
mode value does not start with "container:", unifying the check to
be the same as in IpcMode.IsContainer().
3. IPC mode unit tests (runconfig/hostconfig_test.go) are modified
to add checks for newly added values.
[v2: addressed review at https://github.com/moby/moby/pull/34087#pullrequestreview-51345997]
[v3: addressed review at https://github.com/moby/moby/pull/34087#pullrequestreview-53902833]
[v4: addressed the case of upgrading from older daemon, in this case
container.HostConfig.IpcMode is unset and this is valid]
[v5: document old and new IpcMode values in api/swagger.yaml]
[v6: add the 'none' mode, changelog entry to docs/api/version-history.md]
Signed-off-by: Kir Kolyshkin <kolyshkin@gmail.com>
2017-06-27 21:58:50 +00:00
case ipcMode . IsPrivate ( ) , ipcMode . IsNone ( ) :
// c.ShmPath will/should not be used, so make it empty.
// Container's /dev/shm mount comes from OCI spec.
c . ShmPath = ""
case ipcMode . IsEmpty ( ) :
// A container was created by an older version of the daemon.
// The default behavior used to be what is now called "shareable".
fallthrough
case ipcMode . IsShareable ( ) :
2017-11-16 06:20:33 +00:00
rootIDs := daemon . idMapping . RootPair ( )
2016-03-18 18:50:19 +00:00
if ! c . HasMountFor ( "/dev/shm" ) {
shmPath , err := c . ShmResourcePath ( )
if err != nil {
return err
}
2015-11-12 19:55:17 +00:00
2022-01-20 13:25:24 +00:00
if err := idtools . MkdirAllAndChown ( shmPath , 0 o700 , rootIDs ) ; err != nil {
2016-03-18 18:50:19 +00:00
return err
}
2017-07-24 17:16:38 +00:00
shmproperty := "mode=1777,size=" + strconv . FormatInt ( c . HostConfig . ShmSize , 10 )
2017-05-23 14:22:32 +00:00
if err := unix . Mount ( "shm" , shmPath , "tmpfs" , uintptr ( unix . MS_NOEXEC | unix . MS_NOSUID | unix . MS_NODEV ) , label . FormatMountLabel ( shmproperty , c . GetMountLabel ( ) ) ) ; err != nil {
2016-03-18 18:50:19 +00:00
return fmt . Errorf ( "mounting shm tmpfs: %s" , err )
}
2017-05-19 22:06:46 +00:00
if err := os . Chown ( shmPath , rootIDs . UID , rootIDs . GID ) ; err != nil {
2016-03-18 18:50:19 +00:00
return err
}
Implement none, private, and shareable ipc modes
Since the commit d88fe447df0e8 ("Add support for sharing /dev/shm/ and
/dev/mqueue between containers") container's /dev/shm is mounted on the
host first, then bind-mounted inside the container. This is done that
way in order to be able to share this container's IPC namespace
(and the /dev/shm mount point) with another container.
Unfortunately, this functionality breaks container checkpoint/restore
(even if IPC is not shared). Since /dev/shm is an external mount, its
contents is not saved by `criu checkpoint`, and so upon restore any
application that tries to access data under /dev/shm is severily
disappointed (which usually results in a fatal crash).
This commit solves the issue by introducing new IPC modes for containers
(in addition to 'host' and 'container:ID'). The new modes are:
- 'shareable': enables sharing this container's IPC with others
(this used to be the implicit default);
- 'private': disables sharing this container's IPC.
In 'private' mode, container's /dev/shm is truly mounted inside the
container, without any bind-mounting from the host, which solves the
issue.
While at it, let's also implement 'none' mode. The motivation, as
eloquently put by Justin Cormack, is:
> I wondered a while back about having a none shm mode, as currently it is
> not possible to have a totally unwriteable container as there is always
> a /dev/shm writeable mount. It is a bit of a niche case (and clearly
> should never be allowed to be daemon default) but it would be trivial to
> add now so maybe we should...
...so here's yet yet another mode:
- 'none': no /dev/shm mount inside the container (though it still
has its own private IPC namespace).
Now, to ultimately solve the abovementioned checkpoint/restore issue, we'd
need to make 'private' the default mode, but unfortunately it breaks the
backward compatibility. So, let's make the default container IPC mode
per-daemon configurable (with the built-in default set to 'shareable'
for now). The default can be changed either via a daemon CLI option
(--default-shm-mode) or a daemon.json configuration file parameter
of the same name.
Note one can only set either 'shareable' or 'private' IPC modes as a
daemon default (i.e. in this context 'host', 'container', or 'none'
do not make much sense).
Some other changes this patch introduces are:
1. A mount for /dev/shm is added to default OCI Linux spec.
2. IpcMode.Valid() is simplified to remove duplicated code that parsed
'container:ID' form. Note the old version used to check that ID does
not contain a semicolon -- this is no longer the case (tests are
modified accordingly). The motivation is we should either do a
proper check for container ID validity, or don't check it at all
(since it is checked in other places anyway). I chose the latter.
3. IpcMode.Container() is modified to not return container ID if the
mode value does not start with "container:", unifying the check to
be the same as in IpcMode.IsContainer().
3. IPC mode unit tests (runconfig/hostconfig_test.go) are modified
to add checks for newly added values.
[v2: addressed review at https://github.com/moby/moby/pull/34087#pullrequestreview-51345997]
[v3: addressed review at https://github.com/moby/moby/pull/34087#pullrequestreview-53902833]
[v4: addressed the case of upgrading from older daemon, in this case
container.HostConfig.IpcMode is unset and this is valid]
[v5: document old and new IpcMode values in api/swagger.yaml]
[v6: add the 'none' mode, changelog entry to docs/api/version-history.md]
Signed-off-by: Kir Kolyshkin <kolyshkin@gmail.com>
2017-06-27 21:58:50 +00:00
c . ShmPath = shmPath
2015-11-12 19:55:17 +00:00
}
2016-03-18 18:50:19 +00:00
Implement none, private, and shareable ipc modes
Since the commit d88fe447df0e8 ("Add support for sharing /dev/shm/ and
/dev/mqueue between containers") container's /dev/shm is mounted on the
host first, then bind-mounted inside the container. This is done that
way in order to be able to share this container's IPC namespace
(and the /dev/shm mount point) with another container.
Unfortunately, this functionality breaks container checkpoint/restore
(even if IPC is not shared). Since /dev/shm is an external mount, its
contents is not saved by `criu checkpoint`, and so upon restore any
application that tries to access data under /dev/shm is severily
disappointed (which usually results in a fatal crash).
This commit solves the issue by introducing new IPC modes for containers
(in addition to 'host' and 'container:ID'). The new modes are:
- 'shareable': enables sharing this container's IPC with others
(this used to be the implicit default);
- 'private': disables sharing this container's IPC.
In 'private' mode, container's /dev/shm is truly mounted inside the
container, without any bind-mounting from the host, which solves the
issue.
While at it, let's also implement 'none' mode. The motivation, as
eloquently put by Justin Cormack, is:
> I wondered a while back about having a none shm mode, as currently it is
> not possible to have a totally unwriteable container as there is always
> a /dev/shm writeable mount. It is a bit of a niche case (and clearly
> should never be allowed to be daemon default) but it would be trivial to
> add now so maybe we should...
...so here's yet yet another mode:
- 'none': no /dev/shm mount inside the container (though it still
has its own private IPC namespace).
Now, to ultimately solve the abovementioned checkpoint/restore issue, we'd
need to make 'private' the default mode, but unfortunately it breaks the
backward compatibility. So, let's make the default container IPC mode
per-daemon configurable (with the built-in default set to 'shareable'
for now). The default can be changed either via a daemon CLI option
(--default-shm-mode) or a daemon.json configuration file parameter
of the same name.
Note one can only set either 'shareable' or 'private' IPC modes as a
daemon default (i.e. in this context 'host', 'container', or 'none'
do not make much sense).
Some other changes this patch introduces are:
1. A mount for /dev/shm is added to default OCI Linux spec.
2. IpcMode.Valid() is simplified to remove duplicated code that parsed
'container:ID' form. Note the old version used to check that ID does
not contain a semicolon -- this is no longer the case (tests are
modified accordingly). The motivation is we should either do a
proper check for container ID validity, or don't check it at all
(since it is checked in other places anyway). I chose the latter.
3. IpcMode.Container() is modified to not return container ID if the
mode value does not start with "container:", unifying the check to
be the same as in IpcMode.IsContainer().
3. IPC mode unit tests (runconfig/hostconfig_test.go) are modified
to add checks for newly added values.
[v2: addressed review at https://github.com/moby/moby/pull/34087#pullrequestreview-51345997]
[v3: addressed review at https://github.com/moby/moby/pull/34087#pullrequestreview-53902833]
[v4: addressed the case of upgrading from older daemon, in this case
container.HostConfig.IpcMode is unset and this is valid]
[v5: document old and new IpcMode values in api/swagger.yaml]
[v6: add the 'none' mode, changelog entry to docs/api/version-history.md]
Signed-off-by: Kir Kolyshkin <kolyshkin@gmail.com>
2017-06-27 21:58:50 +00:00
default :
return fmt . Errorf ( "invalid IPC mode: %v" , ipcMode )
2015-11-12 19:55:17 +00:00
}
return nil
}
2016-10-19 16:22:02 +00:00
2018-01-11 22:28:56 +00:00
func ( daemon * Daemon ) setupSecretDir ( c * container . Container ) ( setupErr error ) {
2018-01-17 15:49:58 +00:00
if len ( c . SecretReferences ) == 0 && len ( c . ConfigReferences ) == 0 {
2016-10-29 00:16:07 +00:00
return nil
}
2018-01-17 15:49:58 +00:00
if err := daemon . createSecretsDir ( c ) ; err != nil {
2018-01-11 22:28:56 +00:00
return err
}
defer func ( ) {
if setupErr != nil {
2018-01-17 15:49:58 +00:00
daemon . cleanupSecretDir ( c )
2018-01-11 22:28:56 +00:00
}
} ( )
2016-10-19 16:22:02 +00:00
2017-03-16 21:23:33 +00:00
if c . DependencyStore == nil {
return fmt . Errorf ( "secret store is not initialized" )
}
2016-11-15 15:04:36 +00:00
2017-06-27 01:46:30 +00:00
// retrieve possible remapped range start for root UID, GID
2017-11-16 06:20:33 +00:00
rootIDs := daemon . idMapping . RootPair ( )
2017-06-27 01:46:30 +00:00
2017-03-16 21:23:33 +00:00
for _ , s := range c . SecretReferences {
2016-11-15 15:04:36 +00:00
// TODO (ehazlett): use type switch when more are supported
if s . File == nil {
2023-06-23 00:33:17 +00:00
log . G ( context . TODO ( ) ) . Error ( "secret target type is not a file target" )
2017-03-16 21:23:33 +00:00
continue
2016-11-15 15:04:36 +00:00
}
2017-04-28 18:48:52 +00:00
// secrets are created in the SecretMountPath on the host, at a
// single level
2017-12-18 21:02:23 +00:00
fPath , err := c . SecretFilePath ( * s )
if err != nil {
return errors . Wrap ( err , "error getting secret file path" )
}
2022-01-20 13:25:24 +00:00
if err := idtools . MkdirAllAndChown ( filepath . Dir ( fPath ) , 0 o700 , rootIDs ) ; err != nil {
2016-10-27 18:29:51 +00:00
return errors . Wrap ( err , "error creating secret mount path" )
2016-10-19 16:22:02 +00:00
}
2023-07-30 15:18:56 +00:00
log . G ( context . TODO ( ) ) . WithFields ( log . Fields {
2016-11-15 15:04:36 +00:00
"name" : s . File . Name ,
2016-10-26 20:30:53 +00:00
"path" : fPath ,
} ) . Debug ( "injecting secret" )
2017-06-15 18:11:48 +00:00
secret , err := c . DependencyStore . Secrets ( ) . Get ( s . SecretID )
if err != nil {
return errors . Wrap ( err , "unable to get secret from secret store" )
2016-11-15 15:04:36 +00:00
}
2021-08-24 10:10:50 +00:00
if err := os . WriteFile ( fPath , secret . Spec . Data , s . File . Mode ) ; err != nil {
2016-10-27 18:29:51 +00:00
return errors . Wrap ( err , "error injecting secret" )
2016-10-19 16:22:02 +00:00
}
2016-11-15 15:04:36 +00:00
uid , err := strconv . Atoi ( s . File . UID )
2016-11-03 18:09:13 +00:00
if err != nil {
return err
}
2016-11-15 15:04:36 +00:00
gid , err := strconv . Atoi ( s . File . GID )
2016-11-03 18:09:13 +00:00
if err != nil {
return err
}
2017-05-19 22:06:46 +00:00
if err := os . Chown ( fPath , rootIDs . UID + uid , rootIDs . GID + gid ) ; err != nil {
2016-10-27 18:29:51 +00:00
return errors . Wrap ( err , "error setting ownership for secret" )
2016-10-19 16:22:02 +00:00
}
2018-01-28 07:13:48 +00:00
if err := os . Chmod ( fPath , s . File . Mode ) ; err != nil {
return errors . Wrap ( err , "error setting file mode for secret" )
}
2016-10-19 16:22:02 +00:00
}
2019-08-09 13:01:40 +00:00
for _ , configRef := range c . ConfigReferences {
2017-03-16 21:23:33 +00:00
// TODO (ehazlett): use type switch when more are supported
2019-08-09 13:01:40 +00:00
if configRef . File == nil {
2019-02-07 20:27:08 +00:00
// Runtime configs are not mounted into the container, but they're
// a valid type of config so we should not error when we encounter
// one.
2019-08-09 13:01:40 +00:00
if configRef . Runtime == nil {
2023-06-23 00:33:17 +00:00
log . G ( context . TODO ( ) ) . Error ( "config target type is not a file or runtime target" )
2019-02-07 20:27:08 +00:00
}
// However, in any case, this isn't a file config, so we have no
// further work to do
2017-03-16 21:23:33 +00:00
continue
}
2018-01-17 15:49:58 +00:00
2019-08-09 13:01:40 +00:00
fPath , err := c . ConfigFilePath ( * configRef )
2017-06-27 01:46:30 +00:00
if err != nil {
2018-01-17 15:49:58 +00:00
return errors . Wrap ( err , "error getting config file path for container" )
2017-12-18 21:02:23 +00:00
}
2022-01-20 13:25:24 +00:00
if err := idtools . MkdirAllAndChown ( filepath . Dir ( fPath ) , 0 o700 , rootIDs ) ; err != nil {
2018-01-11 22:28:56 +00:00
return errors . Wrap ( err , "error creating config mount path" )
2017-03-16 21:23:33 +00:00
}
2023-07-30 15:18:56 +00:00
log . G ( context . TODO ( ) ) . WithFields ( log . Fields {
2019-08-09 13:01:40 +00:00
"name" : configRef . File . Name ,
2018-01-11 22:28:56 +00:00
"path" : fPath ,
} ) . Debug ( "injecting config" )
2019-08-09 13:01:40 +00:00
config , err := c . DependencyStore . Configs ( ) . Get ( configRef . ConfigID )
2018-01-11 22:28:56 +00:00
if err != nil {
return errors . Wrap ( err , "unable to get config from config store" )
}
2021-08-24 10:10:50 +00:00
if err := os . WriteFile ( fPath , config . Spec . Data , configRef . File . Mode ) ; err != nil {
2017-03-16 21:23:33 +00:00
return errors . Wrap ( err , "error injecting config" )
}
2019-08-09 13:01:40 +00:00
uid , err := strconv . Atoi ( configRef . File . UID )
2017-03-16 21:23:33 +00:00
if err != nil {
return err
}
2019-08-09 13:01:40 +00:00
gid , err := strconv . Atoi ( configRef . File . GID )
2017-03-16 21:23:33 +00:00
if err != nil {
return err
}
2017-05-19 22:06:46 +00:00
if err := os . Chown ( fPath , rootIDs . UID + uid , rootIDs . GID + gid ) ; err != nil {
2017-03-16 21:23:33 +00:00
return errors . Wrap ( err , "error setting ownership for config" )
}
2019-08-09 13:01:40 +00:00
if err := os . Chmod ( fPath , configRef . File . Mode ) ; err != nil {
2018-01-28 07:13:48 +00:00
return errors . Wrap ( err , "error setting file mode for config" )
}
2018-01-17 15:49:58 +00:00
}
return daemon . remountSecretDir ( c )
}
// createSecretsDir is used to create a dir suitable for storing container secrets.
// In practice this is using a tmpfs mount and is used for both "configs" and "secrets"
func ( daemon * Daemon ) createSecretsDir ( c * container . Container ) error {
// retrieve possible remapped range start for root UID, GID
2017-11-16 06:20:33 +00:00
rootIDs := daemon . idMapping . RootPair ( )
2018-01-17 15:49:58 +00:00
dir , err := c . SecretMountPath ( )
if err != nil {
return errors . Wrap ( err , "error getting container secrets dir" )
}
// create tmpfs
2022-01-20 13:25:24 +00:00
if err := idtools . MkdirAllAndChown ( dir , 0 o700 , rootIDs ) ; err != nil {
2018-01-17 15:49:58 +00:00
return errors . Wrap ( err , "error creating secret local mount path" )
}
tmpfsOwnership := fmt . Sprintf ( "uid=%d,gid=%d" , rootIDs . UID , rootIDs . GID )
if err := mount . Mount ( "tmpfs" , dir , "tmpfs" , "nodev,nosuid,noexec," + tmpfsOwnership ) ; err != nil {
return errors . Wrap ( err , "unable to setup secret mount" )
}
return nil
}
func ( daemon * Daemon ) remountSecretDir ( c * container . Container ) error {
dir , err := c . SecretMountPath ( )
if err != nil {
return errors . Wrap ( err , "error getting container secrets path" )
2017-03-16 21:23:33 +00:00
}
2018-01-17 15:49:58 +00:00
if err := label . Relabel ( dir , c . MountLabel , false ) ; err != nil {
2023-06-23 00:33:17 +00:00
log . G ( context . TODO ( ) ) . WithError ( err ) . WithField ( "dir" , dir ) . Warn ( "Error while attempting to set selinux label" )
2018-01-17 15:49:58 +00:00
}
2017-11-16 06:20:33 +00:00
rootIDs := daemon . idMapping . RootPair ( )
2018-01-17 15:49:58 +00:00
tmpfsOwnership := fmt . Sprintf ( "uid=%d,gid=%d" , rootIDs . UID , rootIDs . GID )
2017-03-16 21:23:33 +00:00
2018-01-17 15:49:58 +00:00
// remount secrets ro
if err := mount . Mount ( "tmpfs" , dir , "tmpfs" , "remount,ro," + tmpfsOwnership ) ; err != nil {
return errors . Wrap ( err , "unable to remount dir as readonly" )
}
return nil
}
func ( daemon * Daemon ) cleanupSecretDir ( c * container . Container ) {
dir , err := c . SecretMountPath ( )
if err != nil {
2023-06-23 00:33:17 +00:00
log . G ( context . TODO ( ) ) . WithError ( err ) . WithField ( "container" , c . ID ) . Warn ( "error getting secrets mount path for container" )
2018-01-17 15:49:58 +00:00
}
if err := mount . RecursiveUnmount ( dir ) ; err != nil {
2023-06-23 00:33:17 +00:00
log . G ( context . TODO ( ) ) . WithField ( "dir" , dir ) . WithError ( err ) . Warn ( "Error while attempting to unmount dir, this may prevent removal of container." )
2018-01-17 15:49:58 +00:00
}
2020-09-23 08:30:53 +00:00
if err := os . RemoveAll ( dir ) ; err != nil {
2023-06-23 00:33:17 +00:00
log . G ( context . TODO ( ) ) . WithField ( "dir" , dir ) . WithError ( err ) . Error ( "Error removing dir." )
2018-01-17 15:49:58 +00:00
}
2017-03-16 21:23:33 +00:00
}
2020-10-24 17:56:21 +00:00
func killProcessDirectly ( container * container . Container ) error {
pid := container . GetPID ( )
if pid == 0 {
2022-05-01 22:28:17 +00:00
// Ensure that we don't kill ourselves
2020-10-24 17:56:21 +00:00
return nil
}
2020-03-27 03:20:41 +00:00
2022-05-01 22:28:17 +00:00
if err := unix . Kill ( pid , syscall . SIGKILL ) ; err != nil {
2020-10-24 17:56:21 +00:00
if err != unix . ESRCH {
2022-05-01 22:28:17 +00:00
return errdefs . System ( err )
2020-10-24 17:56:21 +00:00
}
2022-05-01 22:28:17 +00:00
err = errNoSuchProcess { pid , syscall . SIGKILL }
2023-06-23 00:33:17 +00:00
log . G ( context . TODO ( ) ) . WithError ( err ) . WithField ( "container" , container . ID ) . Debug ( "no such process" )
2022-05-01 22:28:17 +00:00
return err
2020-10-24 17:56:21 +00:00
}
// In case there were some exceptions(e.g., state of zombie and D)
2022-10-15 13:10:25 +00:00
if process . Alive ( pid ) {
2020-10-24 17:56:21 +00:00
// Since we can not kill a zombie pid, add zombie check here
2022-10-15 13:10:25 +00:00
isZombie , err := process . Zombie ( pid )
2020-10-24 17:56:21 +00:00
if err != nil {
2023-06-23 00:33:17 +00:00
log . G ( context . TODO ( ) ) . WithError ( err ) . WithField ( "container" , container . ID ) . Warn ( "Container state is invalid" )
2020-10-24 17:56:21 +00:00
return err
}
if isZombie {
return errdefs . System ( errors . Errorf ( "container %s PID %d is zombie and can not be killed. Use the --init option when creating containers to run an init inside the container that forwards signals and reaps processes" , stringid . TruncateID ( container . ID ) , pid ) )
2015-11-12 19:55:17 +00:00
}
}
return nil
}
func isLinkable ( child * container . Container ) bool {
// A container is linkable only if it belongs to the default network
2016-03-10 04:33:21 +00:00
_ , ok := child . NetworkSettings . Networks [ runconfig . DefaultDaemonNetworkMode ( ) . NetworkName ( ) ]
2015-11-12 19:55:17 +00:00
return ok
}
Remove static errors from errors package.
Moving all strings to the errors package wasn't a good idea after all.
Our custom implementation of Go errors predates everything that's nice
and good about working with errors in Go. Take as an example what we
have to do to get an error message:
```go
func GetErrorMessage(err error) string {
switch err.(type) {
case errcode.Error:
e, _ := err.(errcode.Error)
return e.Message
case errcode.ErrorCode:
ec, _ := err.(errcode.ErrorCode)
return ec.Message()
default:
return err.Error()
}
}
```
This goes against every good practice for Go development. The language already provides a simple, intuitive and standard way to get error messages, that is calling the `Error()` method from an error. Reinventing the error interface is a mistake.
Our custom implementation also makes very hard to reason about errors, another nice thing about Go. I found several (>10) error declarations that we don't use anywhere. This is a clear sign about how little we know about the errors we return. I also found several error usages where the number of arguments was different than the parameters declared in the error, another clear example of how difficult is to reason about errors.
Moreover, our custom implementation didn't really make easier for people to return custom HTTP status code depending on the errors. Again, it's hard to reason about when to set custom codes and how. Take an example what we have to do to extract the message and status code from an error before returning a response from the API:
```go
switch err.(type) {
case errcode.ErrorCode:
daError, _ := err.(errcode.ErrorCode)
statusCode = daError.Descriptor().HTTPStatusCode
errMsg = daError.Message()
case errcode.Error:
// For reference, if you're looking for a particular error
// then you can do something like :
// import ( derr "github.com/docker/docker/errors" )
// if daError.ErrorCode() == derr.ErrorCodeNoSuchContainer { ... }
daError, _ := err.(errcode.Error)
statusCode = daError.ErrorCode().Descriptor().HTTPStatusCode
errMsg = daError.Message
default:
// This part of will be removed once we've
// converted everything over to use the errcode package
// FIXME: this is brittle and should not be necessary.
// If we need to differentiate between different possible error types,
// we should create appropriate error types with clearly defined meaning
errStr := strings.ToLower(err.Error())
for keyword, status := range map[string]int{
"not found": http.StatusNotFound,
"no such": http.StatusNotFound,
"bad parameter": http.StatusBadRequest,
"conflict": http.StatusConflict,
"impossible": http.StatusNotAcceptable,
"wrong login/password": http.StatusUnauthorized,
"hasn't been activated": http.StatusForbidden,
} {
if strings.Contains(errStr, keyword) {
statusCode = status
break
}
}
}
```
You can notice two things in that code:
1. We have to explain how errors work, because our implementation goes against how easy to use Go errors are.
2. At no moment we arrived to remove that `switch` statement that was the original reason to use our custom implementation.
This change removes all our status errors from the errors package and puts them back in their specific contexts.
IT puts the messages back with their contexts. That way, we know right away when errors used and how to generate their messages.
It uses custom interfaces to reason about errors. Errors that need to response with a custom status code MUST implementent this simple interface:
```go
type errorWithStatus interface {
HTTPErrorStatusCode() int
}
```
This interface is very straightforward to implement. It also preserves Go errors real behavior, getting the message is as simple as using the `Error()` method.
I included helper functions to generate errors that use custom status code in `errors/errors.go`.
By doing this, we remove the hard dependency we have eeverywhere to our custom errors package. Yes, you can use it as a helper to generate error, but it's still very easy to generate errors without it.
Please, read this fantastic blog post about errors in Go: http://dave.cheney.net/2014/12/24/inspecting-errors
Signed-off-by: David Calavera <david.calavera@gmail.com>
2016-02-25 15:53:35 +00:00
2023-08-09 20:18:12 +00:00
// TODO(aker): remove when we make the default bridge network behave like any other network
2016-04-20 22:35:11 +00:00
func enableIPOnPredefinedNetwork ( ) bool {
return false
}
2016-09-21 19:02:20 +00:00
2019-03-20 09:26:43 +00:00
// serviceDiscoveryOnDefaultNetwork indicates if service discovery is supported on the default network
2023-08-09 20:18:12 +00:00
// TODO(aker): remove when we make the default bridge network behave like any other network
2019-03-20 09:26:43 +00:00
func serviceDiscoveryOnDefaultNetwork ( ) bool {
return false
}
2023-07-23 21:14:26 +00:00
func setupPathsAndSandboxOptions ( container * container . Container , cfg * config . Config , sboxOptions * [ ] libnetwork . SandboxOption ) error {
2016-10-13 21:51:10 +00:00
var err error
2024-01-03 09:10:51 +00:00
var originResolvConfPath string
2016-10-13 21:51:10 +00:00
Better selection of DNS server
Commit e353e7e3f0ce8eceeff657393cba2876375403fa updated selection of the
`resolv.conf` file to use in situations where systemd-resolvd is used as
a resolver.
If a host uses `systemd-resolvd`, the system's `/etc/resolv.conf` file is
updated to set `127.0.0.53` as DNS, which is the local IP address for
systemd-resolvd. The DNS servers that are configured by the user will now
be stored in `/run/systemd/resolve/resolv.conf`, and systemd-resolvd acts
as a forwarding DNS for those.
Originally, Docker copied the DNS servers as configured in `/etc/resolv.conf`
as default DNS servers in containers, which failed to work if systemd-resolvd
is used (as `127.0.0.53` is not available inside the container's networking
namespace). To resolve this, e353e7e3f0ce8eceeff657393cba2876375403fa instead
detected if systemd-resolvd is in use, and in that case copied the "upstream"
DNS servers from the `/run/systemd/resolve/resolv.conf` configuration.
While this worked for most situations, it had some downsides, among which:
- we're skipping systemd-resolvd altogether, which means that we cannot take
advantage of addition functionality provided by it (such as per-interface
DNS servers)
- when updating DNS servers in the system's configuration, those changes were
not reflected in the container configuration, which could be problematic in
"developer" scenarios, when switching between networks.
This patch changes the way we select which resolv.conf to use as template
for the container's resolv.conf;
- in situations where a custom network is attached to the container, and the
embedded DNS is available, we use `/etc/resolv.conf` unconditionally. If
systemd-resolvd is used, the embedded DNS forwards external DNS lookups to
systemd-resolvd, which in turn is responsible for forwarding requests to
the external DNS servers configured by the user.
- if the container is running in "host mode" networking, we also use the
DNS server that's configured in `/etc/resolv.conf`. In this situation, no
embedded DNS server is available, but the container runs in the host's
networking namespace, and can use the same DNS servers as the host (which
could be systemd-resolvd or DNSMasq
- if the container uses the default (bridge) network, no embedded DNS is
available, and the container has its own networking namespace. In this
situation we check if systemd-resolvd is used, in which case we skip
systemd-resolvd, and configure the upstream DNS servers as DNS for the
container. This situation is the same as is used currently, which means
that dynamically switching DNS servers won't be supported for these
containers.
Signed-off-by: Sebastiaan van Stijn <github@gone.nl>
2020-05-25 12:02:18 +00:00
// Set the correct paths for /etc/hosts and /etc/resolv.conf, based on the
// networking-mode of the container. Note that containers with "container"
// networking are already handled in "initializeNetworking()" before we reach
// this function, so do not have to be accounted for here.
switch {
case container . HostConfig . NetworkMode . IsHost ( ) :
// In host-mode networking, the container does not have its own networking
// namespace, so both `/etc/hosts` and `/etc/resolv.conf` should be the same
2020-08-10 10:27:24 +00:00
// as on the host itself. The container gets a copy of these files.
Better selection of DNS server
Commit e353e7e3f0ce8eceeff657393cba2876375403fa updated selection of the
`resolv.conf` file to use in situations where systemd-resolvd is used as
a resolver.
If a host uses `systemd-resolvd`, the system's `/etc/resolv.conf` file is
updated to set `127.0.0.53` as DNS, which is the local IP address for
systemd-resolvd. The DNS servers that are configured by the user will now
be stored in `/run/systemd/resolve/resolv.conf`, and systemd-resolvd acts
as a forwarding DNS for those.
Originally, Docker copied the DNS servers as configured in `/etc/resolv.conf`
as default DNS servers in containers, which failed to work if systemd-resolvd
is used (as `127.0.0.53` is not available inside the container's networking
namespace). To resolve this, e353e7e3f0ce8eceeff657393cba2876375403fa instead
detected if systemd-resolvd is in use, and in that case copied the "upstream"
DNS servers from the `/run/systemd/resolve/resolv.conf` configuration.
While this worked for most situations, it had some downsides, among which:
- we're skipping systemd-resolvd altogether, which means that we cannot take
advantage of addition functionality provided by it (such as per-interface
DNS servers)
- when updating DNS servers in the system's configuration, those changes were
not reflected in the container configuration, which could be problematic in
"developer" scenarios, when switching between networks.
This patch changes the way we select which resolv.conf to use as template
for the container's resolv.conf;
- in situations where a custom network is attached to the container, and the
embedded DNS is available, we use `/etc/resolv.conf` unconditionally. If
systemd-resolvd is used, the embedded DNS forwards external DNS lookups to
systemd-resolvd, which in turn is responsible for forwarding requests to
the external DNS servers configured by the user.
- if the container is running in "host mode" networking, we also use the
DNS server that's configured in `/etc/resolv.conf`. In this situation, no
embedded DNS server is available, but the container runs in the host's
networking namespace, and can use the same DNS servers as the host (which
could be systemd-resolvd or DNSMasq
- if the container uses the default (bridge) network, no embedded DNS is
available, and the container has its own networking namespace. In this
situation we check if systemd-resolvd is used, in which case we skip
systemd-resolvd, and configure the upstream DNS servers as DNS for the
container. This situation is the same as is used currently, which means
that dynamically switching DNS servers won't be supported for these
containers.
Signed-off-by: Sebastiaan van Stijn <github@gone.nl>
2020-05-25 12:02:18 +00:00
* sboxOptions = append (
* sboxOptions ,
2020-08-10 10:27:24 +00:00
libnetwork . OptionOriginHostsPath ( "/etc/hosts" ) ,
Better selection of DNS server
Commit e353e7e3f0ce8eceeff657393cba2876375403fa updated selection of the
`resolv.conf` file to use in situations where systemd-resolvd is used as
a resolver.
If a host uses `systemd-resolvd`, the system's `/etc/resolv.conf` file is
updated to set `127.0.0.53` as DNS, which is the local IP address for
systemd-resolvd. The DNS servers that are configured by the user will now
be stored in `/run/systemd/resolve/resolv.conf`, and systemd-resolvd acts
as a forwarding DNS for those.
Originally, Docker copied the DNS servers as configured in `/etc/resolv.conf`
as default DNS servers in containers, which failed to work if systemd-resolvd
is used (as `127.0.0.53` is not available inside the container's networking
namespace). To resolve this, e353e7e3f0ce8eceeff657393cba2876375403fa instead
detected if systemd-resolvd is in use, and in that case copied the "upstream"
DNS servers from the `/run/systemd/resolve/resolv.conf` configuration.
While this worked for most situations, it had some downsides, among which:
- we're skipping systemd-resolvd altogether, which means that we cannot take
advantage of addition functionality provided by it (such as per-interface
DNS servers)
- when updating DNS servers in the system's configuration, those changes were
not reflected in the container configuration, which could be problematic in
"developer" scenarios, when switching between networks.
This patch changes the way we select which resolv.conf to use as template
for the container's resolv.conf;
- in situations where a custom network is attached to the container, and the
embedded DNS is available, we use `/etc/resolv.conf` unconditionally. If
systemd-resolvd is used, the embedded DNS forwards external DNS lookups to
systemd-resolvd, which in turn is responsible for forwarding requests to
the external DNS servers configured by the user.
- if the container is running in "host mode" networking, we also use the
DNS server that's configured in `/etc/resolv.conf`. In this situation, no
embedded DNS server is available, but the container runs in the host's
networking namespace, and can use the same DNS servers as the host (which
could be systemd-resolvd or DNSMasq
- if the container uses the default (bridge) network, no embedded DNS is
available, and the container has its own networking namespace. In this
situation we check if systemd-resolvd is used, in which case we skip
systemd-resolvd, and configure the upstream DNS servers as DNS for the
container. This situation is the same as is used currently, which means
that dynamically switching DNS servers won't be supported for these
containers.
Signed-off-by: Sebastiaan van Stijn <github@gone.nl>
2020-05-25 12:02:18 +00:00
)
2024-01-03 09:10:51 +00:00
originResolvConfPath = "/etc/resolv.conf"
Better selection of DNS server
Commit e353e7e3f0ce8eceeff657393cba2876375403fa updated selection of the
`resolv.conf` file to use in situations where systemd-resolvd is used as
a resolver.
If a host uses `systemd-resolvd`, the system's `/etc/resolv.conf` file is
updated to set `127.0.0.53` as DNS, which is the local IP address for
systemd-resolvd. The DNS servers that are configured by the user will now
be stored in `/run/systemd/resolve/resolv.conf`, and systemd-resolvd acts
as a forwarding DNS for those.
Originally, Docker copied the DNS servers as configured in `/etc/resolv.conf`
as default DNS servers in containers, which failed to work if systemd-resolvd
is used (as `127.0.0.53` is not available inside the container's networking
namespace). To resolve this, e353e7e3f0ce8eceeff657393cba2876375403fa instead
detected if systemd-resolvd is in use, and in that case copied the "upstream"
DNS servers from the `/run/systemd/resolve/resolv.conf` configuration.
While this worked for most situations, it had some downsides, among which:
- we're skipping systemd-resolvd altogether, which means that we cannot take
advantage of addition functionality provided by it (such as per-interface
DNS servers)
- when updating DNS servers in the system's configuration, those changes were
not reflected in the container configuration, which could be problematic in
"developer" scenarios, when switching between networks.
This patch changes the way we select which resolv.conf to use as template
for the container's resolv.conf;
- in situations where a custom network is attached to the container, and the
embedded DNS is available, we use `/etc/resolv.conf` unconditionally. If
systemd-resolvd is used, the embedded DNS forwards external DNS lookups to
systemd-resolvd, which in turn is responsible for forwarding requests to
the external DNS servers configured by the user.
- if the container is running in "host mode" networking, we also use the
DNS server that's configured in `/etc/resolv.conf`. In this situation, no
embedded DNS server is available, but the container runs in the host's
networking namespace, and can use the same DNS servers as the host (which
could be systemd-resolvd or DNSMasq
- if the container uses the default (bridge) network, no embedded DNS is
available, and the container has its own networking namespace. In this
situation we check if systemd-resolvd is used, in which case we skip
systemd-resolvd, and configure the upstream DNS servers as DNS for the
container. This situation is the same as is used currently, which means
that dynamically switching DNS servers won't be supported for these
containers.
Signed-off-by: Sebastiaan van Stijn <github@gone.nl>
2020-05-25 12:02:18 +00:00
case container . HostConfig . NetworkMode . IsUserDefined ( ) :
// The container uses a user-defined network. We use the embedded DNS
// server for container name resolution and to act as a DNS forwarder
// for external DNS resolution.
// We parse the DNS server(s) that are defined in /etc/resolv.conf on
// the host, which may be a local DNS server (for example, if DNSMasq or
// systemd-resolvd are in use). The embedded DNS server forwards DNS
// resolution to the DNS server configured on the host, which in itself
// may act as a forwarder for external DNS servers.
// If systemd-resolvd is used, the "upstream" DNS servers can be found in
// /run/systemd/resolve/resolv.conf. We do not query those DNS servers
// directly, as they can be dynamically reconfigured.
2024-01-03 09:10:51 +00:00
originResolvConfPath = "/etc/resolv.conf"
Better selection of DNS server
Commit e353e7e3f0ce8eceeff657393cba2876375403fa updated selection of the
`resolv.conf` file to use in situations where systemd-resolvd is used as
a resolver.
If a host uses `systemd-resolvd`, the system's `/etc/resolv.conf` file is
updated to set `127.0.0.53` as DNS, which is the local IP address for
systemd-resolvd. The DNS servers that are configured by the user will now
be stored in `/run/systemd/resolve/resolv.conf`, and systemd-resolvd acts
as a forwarding DNS for those.
Originally, Docker copied the DNS servers as configured in `/etc/resolv.conf`
as default DNS servers in containers, which failed to work if systemd-resolvd
is used (as `127.0.0.53` is not available inside the container's networking
namespace). To resolve this, e353e7e3f0ce8eceeff657393cba2876375403fa instead
detected if systemd-resolvd is in use, and in that case copied the "upstream"
DNS servers from the `/run/systemd/resolve/resolv.conf` configuration.
While this worked for most situations, it had some downsides, among which:
- we're skipping systemd-resolvd altogether, which means that we cannot take
advantage of addition functionality provided by it (such as per-interface
DNS servers)
- when updating DNS servers in the system's configuration, those changes were
not reflected in the container configuration, which could be problematic in
"developer" scenarios, when switching between networks.
This patch changes the way we select which resolv.conf to use as template
for the container's resolv.conf;
- in situations where a custom network is attached to the container, and the
embedded DNS is available, we use `/etc/resolv.conf` unconditionally. If
systemd-resolvd is used, the embedded DNS forwards external DNS lookups to
systemd-resolvd, which in turn is responsible for forwarding requests to
the external DNS servers configured by the user.
- if the container is running in "host mode" networking, we also use the
DNS server that's configured in `/etc/resolv.conf`. In this situation, no
embedded DNS server is available, but the container runs in the host's
networking namespace, and can use the same DNS servers as the host (which
could be systemd-resolvd or DNSMasq
- if the container uses the default (bridge) network, no embedded DNS is
available, and the container has its own networking namespace. In this
situation we check if systemd-resolvd is used, in which case we skip
systemd-resolvd, and configure the upstream DNS servers as DNS for the
container. This situation is the same as is used currently, which means
that dynamically switching DNS servers won't be supported for these
containers.
Signed-off-by: Sebastiaan van Stijn <github@gone.nl>
2020-05-25 12:02:18 +00:00
default :
// For other situations, such as the default bridge network, container
// discovery / name resolution is handled through /etc/hosts, and no
// embedded DNS server is available. Without the embedded DNS, we
// cannot use local DNS servers on the host (for example, if DNSMasq or
// systemd-resolvd is used). If systemd-resolvd is used, we try to
// determine the external DNS servers that are used on the host.
// This situation is not ideal, because DNS servers configured in the
// container are not updated after the container is created, but the
// DNS servers on the host can be dynamically updated.
//
// Copy the host's resolv.conf for the container (/run/systemd/resolve/resolv.conf or /etc/resolv.conf)
2024-01-03 09:10:51 +00:00
originResolvConfPath = cfg . GetResolvConf ( )
}
// Allow tests to point at their own resolv.conf file.
if envPath := os . Getenv ( "DOCKER_TEST_RESOLV_CONF_PATH" ) ; envPath != "" {
log . G ( context . TODO ( ) ) . Infof ( "Using OriginResolvConfPath from env: %s" , envPath )
originResolvConfPath = envPath
Better selection of DNS server
Commit e353e7e3f0ce8eceeff657393cba2876375403fa updated selection of the
`resolv.conf` file to use in situations where systemd-resolvd is used as
a resolver.
If a host uses `systemd-resolvd`, the system's `/etc/resolv.conf` file is
updated to set `127.0.0.53` as DNS, which is the local IP address for
systemd-resolvd. The DNS servers that are configured by the user will now
be stored in `/run/systemd/resolve/resolv.conf`, and systemd-resolvd acts
as a forwarding DNS for those.
Originally, Docker copied the DNS servers as configured in `/etc/resolv.conf`
as default DNS servers in containers, which failed to work if systemd-resolvd
is used (as `127.0.0.53` is not available inside the container's networking
namespace). To resolve this, e353e7e3f0ce8eceeff657393cba2876375403fa instead
detected if systemd-resolvd is in use, and in that case copied the "upstream"
DNS servers from the `/run/systemd/resolve/resolv.conf` configuration.
While this worked for most situations, it had some downsides, among which:
- we're skipping systemd-resolvd altogether, which means that we cannot take
advantage of addition functionality provided by it (such as per-interface
DNS servers)
- when updating DNS servers in the system's configuration, those changes were
not reflected in the container configuration, which could be problematic in
"developer" scenarios, when switching between networks.
This patch changes the way we select which resolv.conf to use as template
for the container's resolv.conf;
- in situations where a custom network is attached to the container, and the
embedded DNS is available, we use `/etc/resolv.conf` unconditionally. If
systemd-resolvd is used, the embedded DNS forwards external DNS lookups to
systemd-resolvd, which in turn is responsible for forwarding requests to
the external DNS servers configured by the user.
- if the container is running in "host mode" networking, we also use the
DNS server that's configured in `/etc/resolv.conf`. In this situation, no
embedded DNS server is available, but the container runs in the host's
networking namespace, and can use the same DNS servers as the host (which
could be systemd-resolvd or DNSMasq
- if the container uses the default (bridge) network, no embedded DNS is
available, and the container has its own networking namespace. In this
situation we check if systemd-resolvd is used, in which case we skip
systemd-resolvd, and configure the upstream DNS servers as DNS for the
container. This situation is the same as is used currently, which means
that dynamically switching DNS servers won't be supported for these
containers.
Signed-off-by: Sebastiaan van Stijn <github@gone.nl>
2020-05-25 12:02:18 +00:00
}
2024-01-03 09:10:51 +00:00
* sboxOptions = append ( * sboxOptions , libnetwork . OptionOriginResolvConfPath ( originResolvConfPath ) )
2019-01-10 21:58:55 +00:00
2016-10-13 21:51:10 +00:00
container . HostsPath , err = container . GetRootResourcePath ( "hosts" )
if err != nil {
return err
}
* sboxOptions = append ( * sboxOptions , libnetwork . OptionHostsPath ( container . HostsPath ) )
container . ResolvConfPath , err = container . GetRootResourcePath ( "resolv.conf" )
if err != nil {
return err
}
* sboxOptions = append ( * sboxOptions , libnetwork . OptionResolvConfPath ( container . ResolvConfPath ) )
return nil
}
2017-06-12 22:20:23 +00:00
func ( daemon * Daemon ) initializeNetworkingPaths ( container * container . Container , nc * container . Container ) error {
2016-10-13 21:51:10 +00:00
container . HostnamePath = nc . HostnamePath
container . HostsPath = nc . HostsPath
container . ResolvConfPath = nc . ResolvConfPath
2017-06-12 22:20:23 +00:00
return nil
2016-10-13 21:51:10 +00:00
}
2017-12-18 21:02:23 +00:00
func ( daemon * Daemon ) setupContainerMountsRoot ( c * container . Container ) error {
// get the root mount path so we can make it unbindable
p , err := c . MountsResourcePath ( "" )
if err != nil {
return err
}
2022-01-20 13:25:24 +00:00
return idtools . MkdirAllAndChown ( p , 0 o710 , idtools . Identity { UID : idtools . CurrentIdentity ( ) . UID , GID : daemon . IdentityMapping ( ) . RootPair ( ) . GID } )
2017-12-18 21:02:23 +00:00
}