dockerd-rootless.sh 6.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156
  1. #!/bin/sh
  2. # dockerd-rootless.sh executes dockerd in rootless mode.
  3. #
  4. # Usage: dockerd-rootless.sh [DOCKERD_OPTIONS]
  5. #
  6. # External dependencies:
  7. # * newuidmap and newgidmap needs to be installed.
  8. # * /etc/subuid and /etc/subgid needs to be configured for the current user.
  9. # * Either one of slirp4netns (>= v0.4.0), VPNKit, lxc-user-nic needs to be installed.
  10. #
  11. # Recognized environment variables:
  12. # * DOCKERD_ROOTLESS_ROOTLESSKIT_STATE_DIR=DIR: the rootlesskit state dir. Defaults to "$XDG_RUNTIME_DIR/dockerd-rootless".
  13. # * DOCKERD_ROOTLESS_ROOTLESSKIT_NET=(slirp4netns|vpnkit|pasta|lxc-user-nic): the rootlesskit network driver. Defaults to "slirp4netns" if slirp4netns (>= v0.4.0) is installed. Otherwise defaults to "vpnkit".
  14. # * DOCKERD_ROOTLESS_ROOTLESSKIT_MTU=NUM: the MTU value for the rootlesskit network driver. Defaults to 65520 for slirp4netns, 1500 for other drivers.
  15. # * DOCKERD_ROOTLESS_ROOTLESSKIT_PORT_DRIVER=(builtin|slirp4netns|implicit): the rootlesskit port driver. Defaults to "builtin".
  16. # * DOCKERD_ROOTLESS_ROOTLESSKIT_SLIRP4NETNS_SANDBOX=(auto|true|false): whether to protect slirp4netns with a dedicated mount namespace. Defaults to "auto".
  17. # * DOCKERD_ROOTLESS_ROOTLESSKIT_SLIRP4NETNS_SECCOMP=(auto|true|false): whether to protect slirp4netns with seccomp. Defaults to "auto".
  18. # To apply an environment variable via systemd, create ~/.config/systemd/user/docker.service.d/override.conf as follows,
  19. # and run `systemctl --user daemon-reload && systemctl --user restart docker`:
  20. # --- BEGIN ---
  21. # [Service]
  22. # Environment="DOCKERD_ROOTLESS_ROOTLESSKIT_NET=pasta"
  23. # Environment="DOCKERD_ROOTLESS_ROOTLESSKIT_PORT_DRIVER=implicit"
  24. # --- END ---
  25. # Guide to choose the network driver and the port driver:
  26. #
  27. # Network driver | Port driver | Net throughput | Port throughput | Src IP | No SUID | Note
  28. # ---------------|----------------|----------------|-----------------|--------|---------|---------------------------------------------------------
  29. # slirp4netns | builtin | Slow | Fast ✅ | ❌ | ✅ | Default in typical setup
  30. # vpnkit | builtin | Slow | Fast ✅ | ❌ | ✅ | Default when slirp4netns is not installed
  31. # slirp4netns | slirp4netns | Slow | Slow | ✅ | ✅ |
  32. # pasta | implicit | Slow | Fast ✅ | ✅ | ✅ | Experimental; Needs recent version of pasta (2023_12_04)
  33. # lxc-user-nic | builtin | Fast ✅ | Fast ✅ | ❌ | ❌ | Experimental
  34. # (bypass4netns) | (bypass4netns) | Fast ✅ | Fast ✅ | ✅ | ✅ | (Not integrated to RootlessKit)
  35. # See the documentation for the further information: https://docs.docker.com/go/rootless/
  36. set -e -x
  37. case "$1" in
  38. "check" | "install" | "uninstall")
  39. echo "Did you mean 'dockerd-rootless-setuptool.sh $@' ?"
  40. exit 1
  41. ;;
  42. esac
  43. if ! [ -w "$XDG_RUNTIME_DIR" ]; then
  44. echo "XDG_RUNTIME_DIR needs to be set and writable"
  45. exit 1
  46. fi
  47. if ! [ -d "$HOME" ]; then
  48. echo "HOME needs to be set and exist."
  49. exit 1
  50. fi
  51. rootlesskit=""
  52. for f in docker-rootlesskit rootlesskit; do
  53. if command -v $f > /dev/null 2>&1; then
  54. rootlesskit=$f
  55. break
  56. fi
  57. done
  58. if [ -z "$rootlesskit" ]; then
  59. echo "rootlesskit needs to be installed"
  60. exit 1
  61. fi
  62. : "${DOCKERD_ROOTLESS_ROOTLESSKIT_STATE_DIR:=$XDG_RUNTIME_DIR/dockerd-rootless}"
  63. : "${DOCKERD_ROOTLESS_ROOTLESSKIT_NET:=}"
  64. : "${DOCKERD_ROOTLESS_ROOTLESSKIT_MTU:=}"
  65. : "${DOCKERD_ROOTLESS_ROOTLESSKIT_PORT_DRIVER:=builtin}"
  66. : "${DOCKERD_ROOTLESS_ROOTLESSKIT_SLIRP4NETNS_SANDBOX:=auto}"
  67. : "${DOCKERD_ROOTLESS_ROOTLESSKIT_SLIRP4NETNS_SECCOMP:=auto}"
  68. net=$DOCKERD_ROOTLESS_ROOTLESSKIT_NET
  69. mtu=$DOCKERD_ROOTLESS_ROOTLESSKIT_MTU
  70. if [ -z "$net" ]; then
  71. if command -v slirp4netns > /dev/null 2>&1; then
  72. # If --netns-type is present in --help, slirp4netns is >= v0.4.0.
  73. if slirp4netns --help | grep -qw -- --netns-type; then
  74. net=slirp4netns
  75. if [ -z "$mtu" ]; then
  76. mtu=65520
  77. fi
  78. else
  79. echo "slirp4netns found but seems older than v0.4.0. Falling back to VPNKit."
  80. fi
  81. fi
  82. if [ -z "$net" ]; then
  83. if command -v vpnkit > /dev/null 2>&1; then
  84. net=vpnkit
  85. else
  86. echo "Either slirp4netns (>= v0.4.0) or vpnkit needs to be installed"
  87. exit 1
  88. fi
  89. fi
  90. fi
  91. if [ -z "$mtu" ]; then
  92. mtu=1500
  93. fi
  94. dockerd="${DOCKERD:-dockerd}"
  95. if [ -z "$_DOCKERD_ROOTLESS_CHILD" ]; then
  96. _DOCKERD_ROOTLESS_CHILD=1
  97. export _DOCKERD_ROOTLESS_CHILD
  98. if [ "$(id -u)" = "0" ]; then
  99. echo "This script must be executed as a non-privileged user"
  100. exit 1
  101. fi
  102. # `selinuxenabled` always returns false in RootlessKit child, so we execute `selinuxenabled` in the parent.
  103. # https://github.com/rootless-containers/rootlesskit/issues/94
  104. if command -v selinuxenabled > /dev/null 2>&1 && selinuxenabled; then
  105. _DOCKERD_ROOTLESS_SELINUX=1
  106. export _DOCKERD_ROOTLESS_SELINUX
  107. fi
  108. # Re-exec the script via RootlessKit, so as to create unprivileged {user,mount,network} namespaces.
  109. #
  110. # --copy-up allows removing/creating files in the directories by creating tmpfs and symlinks
  111. # * /etc: copy-up is required so as to prevent `/etc/resolv.conf` in the
  112. # namespace from being unexpectedly unmounted when `/etc/resolv.conf` is recreated on the host
  113. # (by either systemd-networkd or NetworkManager)
  114. # * /run: copy-up is required so that we can create /run/docker (hardcoded for plugins) in our namespace
  115. exec $rootlesskit \
  116. --state-dir=$DOCKERD_ROOTLESS_ROOTLESSKIT_STATE_DIR \
  117. --net=$net --mtu=$mtu \
  118. --slirp4netns-sandbox=$DOCKERD_ROOTLESS_ROOTLESSKIT_SLIRP4NETNS_SANDBOX \
  119. --slirp4netns-seccomp=$DOCKERD_ROOTLESS_ROOTLESSKIT_SLIRP4NETNS_SECCOMP \
  120. --disable-host-loopback --port-driver=$DOCKERD_ROOTLESS_ROOTLESSKIT_PORT_DRIVER \
  121. --copy-up=/etc --copy-up=/run \
  122. --propagation=rslave \
  123. $DOCKERD_ROOTLESS_ROOTLESSKIT_FLAGS \
  124. "$0" "$@"
  125. else
  126. [ "$_DOCKERD_ROOTLESS_CHILD" = 1 ]
  127. # remove the symlinks for the existing files in the parent namespace if any,
  128. # so that we can create our own files in our mount namespace.
  129. rm -f /run/docker /run/containerd /run/xtables.lock
  130. if [ -n "$_DOCKERD_ROOTLESS_SELINUX" ]; then
  131. # iptables requires /run in the child to be relabeled. The actual /run in the parent is unaffected.
  132. # https://github.com/containers/podman/blob/e6fc34b71aa9d876b1218efe90e14f8b912b0603/libpod/networking_linux.go#L396-L401
  133. # https://github.com/moby/moby/issues/41230
  134. chcon system_u:object_r:iptables_var_run_t:s0 /run
  135. fi
  136. if [ "$(stat -c %T -f /etc)" = "tmpfs" ] && [ -L "/etc/ssl" ]; then
  137. # Workaround for "x509: certificate signed by unknown authority" on openSUSE Tumbleweed.
  138. # https://github.com/rootless-containers/rootlesskit/issues/225
  139. realpath_etc_ssl=$(realpath /etc/ssl)
  140. rm -f /etc/ssl
  141. mkdir /etc/ssl
  142. mount --rbind ${realpath_etc_ssl} /etc/ssl
  143. fi
  144. exec "$dockerd" "$@"
  145. fi