Merge pull request #20187 from cyphar/vendor-runc
[carry 19752] vendor: update runc/libcontainer to v0.0.8
This commit is contained in:
commit
929f62e64d
41 changed files with 1328 additions and 532 deletions
|
@ -59,7 +59,7 @@ clone git github.com/miekg/pkcs11 80f102b5cac759de406949c47f0928b99bd64cdf
|
|||
clone git github.com/docker/go v1.5.1-1-1-gbaf439e
|
||||
clone git github.com/agl/ed25519 d2b94fd789ea21d12fac1a4443dd3a3f79cda72c
|
||||
|
||||
clone git github.com/opencontainers/runc 3d8a20bb772defc28c355534d83486416d1719b4 # libcontainer
|
||||
clone git github.com/opencontainers/runc ce72f86a2b54bc114d6ffb51f6500479b2d42154 # libcontainer
|
||||
clone git github.com/seccomp/libseccomp-golang 1b506fc7c24eec5a3693cdcbed40d9c226cfc6a1
|
||||
# libcontainer deps (see src/github.com/opencontainers/runc/Godeps/Godeps.json)
|
||||
clone git github.com/coreos/go-systemd v4
|
||||
|
|
|
@ -10,80 +10,165 @@ host system and which is (optionally) isolated from other containers in the syst
|
|||
|
||||
#### Using libcontainer
|
||||
|
||||
To create a container you first have to initialize an instance of a factory
|
||||
that will handle the creation and initialization for a container.
|
||||
|
||||
Because containers are spawned in a two step process you will need to provide
|
||||
arguments to a binary that will be executed as the init process for the container.
|
||||
To use the current binary that is spawning the containers and acting as the parent
|
||||
you can use `os.Args[0]` and we have a command called `init` setup.
|
||||
Because containers are spawned in a two step process you will need a binary that
|
||||
will be executed as the init process for the container. In libcontainer, we use
|
||||
the current binary (/proc/self/exe) to be executed as the init process, and use
|
||||
arg "init", we call the first step process "bootstrap", so you always need a "init"
|
||||
function as the entry of "bootstrap".
|
||||
|
||||
```go
|
||||
root, err := libcontainer.New("/var/lib/container", libcontainer.InitArgs(os.Args[0], "init"))
|
||||
func init() {
|
||||
if len(os.Args) > 1 && os.Args[1] == "init" {
|
||||
runtime.GOMAXPROCS(1)
|
||||
runtime.LockOSThread()
|
||||
factory, _ := libcontainer.New("")
|
||||
if err := factory.StartInitialization(); err != nil {
|
||||
logrus.Fatal(err)
|
||||
}
|
||||
panic("--this line should have never been executed, congratulations--")
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
Then to create a container you first have to initialize an instance of a factory
|
||||
that will handle the creation and initialization for a container.
|
||||
|
||||
```go
|
||||
factory, err := libcontainer.New("/var/lib/container", libcontainer.Cgroupfs, libcontainer.InitArgs(os.Args[0], "init"))
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
logrus.Fatal(err)
|
||||
return
|
||||
}
|
||||
```
|
||||
|
||||
Once you have an instance of the factory created we can create a configuration
|
||||
struct describing how the container is to be created. A sample would look similar to this:
|
||||
struct describing how the container is to be created. A sample would look similar to this:
|
||||
|
||||
```go
|
||||
defaultMountFlags := syscall.MS_NOEXEC | syscall.MS_NOSUID | syscall.MS_NODEV
|
||||
config := &configs.Config{
|
||||
Rootfs: rootfs,
|
||||
Capabilities: []string{
|
||||
"CAP_CHOWN",
|
||||
"CAP_DAC_OVERRIDE",
|
||||
"CAP_FSETID",
|
||||
"CAP_FOWNER",
|
||||
"CAP_MKNOD",
|
||||
"CAP_NET_RAW",
|
||||
"CAP_SETGID",
|
||||
"CAP_SETUID",
|
||||
"CAP_SETFCAP",
|
||||
"CAP_SETPCAP",
|
||||
"CAP_NET_BIND_SERVICE",
|
||||
"CAP_SYS_CHROOT",
|
||||
"CAP_KILL",
|
||||
"CAP_AUDIT_WRITE",
|
||||
},
|
||||
Namespaces: configs.Namespaces([]configs.Namespace{
|
||||
{Type: configs.NEWNS},
|
||||
{Type: configs.NEWUTS},
|
||||
{Type: configs.NEWIPC},
|
||||
{Type: configs.NEWPID},
|
||||
{Type: configs.NEWNET},
|
||||
}),
|
||||
Cgroups: &configs.Cgroup{
|
||||
Name: "test-container",
|
||||
Parent: "system",
|
||||
AllowAllDevices: false,
|
||||
AllowedDevices: configs.DefaultAllowedDevices,
|
||||
},
|
||||
|
||||
Devices: configs.DefaultAutoCreatedDevices,
|
||||
Hostname: "testing",
|
||||
Networks: []*configs.Network{
|
||||
{
|
||||
Type: "loopback",
|
||||
Address: "127.0.0.1/0",
|
||||
Gateway: "localhost",
|
||||
},
|
||||
},
|
||||
Rlimits: []configs.Rlimit{
|
||||
{
|
||||
Type: syscall.RLIMIT_NOFILE,
|
||||
Hard: uint64(1024),
|
||||
Soft: uint64(1024),
|
||||
},
|
||||
},
|
||||
Rootfs: "/your/path/to/rootfs",
|
||||
Capabilities: []string{
|
||||
"CAP_CHOWN",
|
||||
"CAP_DAC_OVERRIDE",
|
||||
"CAP_FSETID",
|
||||
"CAP_FOWNER",
|
||||
"CAP_MKNOD",
|
||||
"CAP_NET_RAW",
|
||||
"CAP_SETGID",
|
||||
"CAP_SETUID",
|
||||
"CAP_SETFCAP",
|
||||
"CAP_SETPCAP",
|
||||
"CAP_NET_BIND_SERVICE",
|
||||
"CAP_SYS_CHROOT",
|
||||
"CAP_KILL",
|
||||
"CAP_AUDIT_WRITE",
|
||||
},
|
||||
Namespaces: configs.Namespaces([]configs.Namespace{
|
||||
{Type: configs.NEWNS},
|
||||
{Type: configs.NEWUTS},
|
||||
{Type: configs.NEWIPC},
|
||||
{Type: configs.NEWPID},
|
||||
{Type: configs.NEWUSER},
|
||||
{Type: configs.NEWNET},
|
||||
}),
|
||||
Cgroups: &configs.Cgroup{
|
||||
Name: "test-container",
|
||||
Parent: "system",
|
||||
Resources: &configs.Resources{
|
||||
MemorySwappiness: -1,
|
||||
AllowAllDevices: false,
|
||||
AllowedDevices: configs.DefaultAllowedDevices,
|
||||
},
|
||||
},
|
||||
MaskPaths: []string{
|
||||
"/proc/kcore",
|
||||
},
|
||||
ReadonlyPaths: []string{
|
||||
"/proc/sys", "/proc/sysrq-trigger", "/proc/irq", "/proc/bus",
|
||||
},
|
||||
Devices: configs.DefaultAutoCreatedDevices,
|
||||
Hostname: "testing",
|
||||
Mounts: []*configs.Mount{
|
||||
{
|
||||
Source: "proc",
|
||||
Destination: "/proc",
|
||||
Device: "proc",
|
||||
Flags: defaultMountFlags,
|
||||
},
|
||||
{
|
||||
Source: "tmpfs",
|
||||
Destination: "/dev",
|
||||
Device: "tmpfs",
|
||||
Flags: syscall.MS_NOSUID | syscall.MS_STRICTATIME,
|
||||
Data: "mode=755",
|
||||
},
|
||||
{
|
||||
Source: "devpts",
|
||||
Destination: "/dev/pts",
|
||||
Device: "devpts",
|
||||
Flags: syscall.MS_NOSUID | syscall.MS_NOEXEC,
|
||||
Data: "newinstance,ptmxmode=0666,mode=0620,gid=5",
|
||||
},
|
||||
{
|
||||
Device: "tmpfs",
|
||||
Source: "shm",
|
||||
Destination: "/dev/shm",
|
||||
Data: "mode=1777,size=65536k",
|
||||
Flags: defaultMountFlags,
|
||||
},
|
||||
{
|
||||
Source: "mqueue",
|
||||
Destination: "/dev/mqueue",
|
||||
Device: "mqueue",
|
||||
Flags: defaultMountFlags,
|
||||
},
|
||||
{
|
||||
Source: "sysfs",
|
||||
Destination: "/sys",
|
||||
Device: "sysfs",
|
||||
Flags: defaultMountFlags | syscall.MS_RDONLY,
|
||||
},
|
||||
},
|
||||
UidMappings: []configs.IDMap{
|
||||
{
|
||||
ContainerID: 0,
|
||||
Host: 1000,
|
||||
size: 65536,
|
||||
},
|
||||
},
|
||||
GidMappings: []configs.IDMap{
|
||||
{
|
||||
ContainerID: 0,
|
||||
Host: 1000,
|
||||
size: 65536,
|
||||
},
|
||||
},
|
||||
Networks: []*configs.Network{
|
||||
{
|
||||
Type: "loopback",
|
||||
Address: "127.0.0.1/0",
|
||||
Gateway: "localhost",
|
||||
},
|
||||
},
|
||||
Rlimits: []configs.Rlimit{
|
||||
{
|
||||
Type: syscall.RLIMIT_NOFILE,
|
||||
Hard: uint64(1025),
|
||||
Soft: uint64(1025),
|
||||
},
|
||||
},
|
||||
}
|
||||
```
|
||||
|
||||
Once you have the configuration populated you can create a container:
|
||||
|
||||
```go
|
||||
container, err := root.Create("container-id", config)
|
||||
container, err := factory.Create("container-id", config)
|
||||
if err != nil {
|
||||
logrus.Fatal(err)
|
||||
return
|
||||
}
|
||||
```
|
||||
|
||||
To spawn bash as the initial process inside the container and have the
|
||||
|
@ -91,23 +176,25 @@ processes pid returned in order to wait, signal, or kill the process:
|
|||
|
||||
```go
|
||||
process := &libcontainer.Process{
|
||||
Args: []string{"/bin/bash"},
|
||||
Env: []string{"PATH=/bin"},
|
||||
User: "daemon",
|
||||
Stdin: os.Stdin,
|
||||
Stdout: os.Stdout,
|
||||
Stderr: os.Stderr,
|
||||
Args: []string{"/bin/bash"},
|
||||
Env: []string{"PATH=/bin"},
|
||||
User: "daemon",
|
||||
Stdin: os.Stdin,
|
||||
Stdout: os.Stdout,
|
||||
Stderr: os.Stderr,
|
||||
}
|
||||
|
||||
err := container.Start(process)
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
logrus.Fatal(err)
|
||||
container.Destroy()
|
||||
return
|
||||
}
|
||||
|
||||
// wait for the process to finish.
|
||||
status, err := process.Wait()
|
||||
_, err := process.Wait()
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
logrus.Fatal(err)
|
||||
}
|
||||
|
||||
// destroy the container.
|
||||
|
@ -124,7 +211,6 @@ processes, err := container.Processes()
|
|||
// it's processes.
|
||||
stats, err := container.Stats()
|
||||
|
||||
|
||||
// pause all processes inside the container.
|
||||
container.Pause()
|
||||
|
||||
|
|
|
@ -60,7 +60,7 @@ are required to be mounted within the rootfs that the runtime will setup.
|
|||
After a container's filesystems are mounted within the newly created
|
||||
mount namespace `/dev` will need to be populated with a set of device nodes.
|
||||
It is expected that a rootfs does not need to have any device nodes specified
|
||||
for `/dev` witin the rootfs as the container will setup the correct devices
|
||||
for `/dev` within the rootfs as the container will setup the correct devices
|
||||
that are required for executing a container's process.
|
||||
|
||||
| Path | Mode | Access |
|
||||
|
@ -142,6 +142,7 @@ system resources like cpu, memory, and device access.
|
|||
| perf_event | 1 |
|
||||
| freezer | 1 |
|
||||
| hugetlb | 1 |
|
||||
| pids | 1 |
|
||||
|
||||
|
||||
All cgroup subsystem are joined so that statistics can be collected from
|
||||
|
@ -199,7 +200,7 @@ provide a good default for security and flexibility for the applications.
|
|||
| CAP_SYS_BOOT | 0 |
|
||||
| CAP_LEASE | 0 |
|
||||
| CAP_WAKE_ALARM | 0 |
|
||||
| CAP_BLOCK_SUSPE | 0 |
|
||||
| CAP_BLOCK_SUSPEND | 0 |
|
||||
|
||||
|
||||
Additional security layers like [apparmor](https://wiki.ubuntu.com/AppArmor)
|
||||
|
|
|
@ -15,6 +15,9 @@ type Manager interface {
|
|||
// Returns the PIDs inside the cgroup set
|
||||
GetPids() ([]int, error)
|
||||
|
||||
// Returns the PIDs inside the cgroup set & all sub-cgroups
|
||||
GetAllPids() ([]int, error)
|
||||
|
||||
// Returns statistics for the cgroup set
|
||||
GetStats() (*Stats, error)
|
||||
|
||||
|
|
|
@ -14,6 +14,7 @@ import (
|
|||
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
libcontainerUtils "github.com/opencontainers/runc/libcontainer/utils"
|
||||
)
|
||||
|
||||
var (
|
||||
|
@ -23,6 +24,7 @@ var (
|
|||
&MemoryGroup{},
|
||||
&CpuGroup{},
|
||||
&CpuacctGroup{},
|
||||
&PidsGroup{},
|
||||
&BlkioGroup{},
|
||||
&HugetlbGroup{},
|
||||
&NetClsGroup{},
|
||||
|
@ -93,11 +95,10 @@ func getCgroupRoot() (string, error) {
|
|||
}
|
||||
|
||||
type cgroupData struct {
|
||||
root string
|
||||
parent string
|
||||
name string
|
||||
config *configs.Cgroup
|
||||
pid int
|
||||
root string
|
||||
innerPath string
|
||||
config *configs.Cgroup
|
||||
pid int
|
||||
}
|
||||
|
||||
func (m *Manager) Apply(pid int) (err error) {
|
||||
|
@ -112,6 +113,22 @@ func (m *Manager) Apply(pid int) (err error) {
|
|||
return err
|
||||
}
|
||||
|
||||
if c.Paths != nil {
|
||||
paths := make(map[string]string)
|
||||
for name, path := range c.Paths {
|
||||
_, err := d.path(name)
|
||||
if err != nil {
|
||||
if cgroups.IsNotFound(err) {
|
||||
continue
|
||||
}
|
||||
return err
|
||||
}
|
||||
paths[name] = path
|
||||
}
|
||||
m.Paths = paths
|
||||
return cgroups.EnterPid(m.Paths, pid)
|
||||
}
|
||||
|
||||
paths := make(map[string]string)
|
||||
defer func() {
|
||||
if err != nil {
|
||||
|
@ -135,17 +152,13 @@ func (m *Manager) Apply(pid int) (err error) {
|
|||
paths[sys.Name()] = p
|
||||
}
|
||||
m.Paths = paths
|
||||
|
||||
if paths["cpu"] != "" {
|
||||
if err := CheckCpushares(paths["cpu"], c.Resources.CpuShares); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *Manager) Destroy() error {
|
||||
if m.Cgroups.Paths != nil {
|
||||
return nil
|
||||
}
|
||||
m.mu.Lock()
|
||||
defer m.mu.Unlock()
|
||||
if err := cgroups.RemovePaths(m.Paths); err != nil {
|
||||
|
@ -179,15 +192,28 @@ func (m *Manager) GetStats() (*cgroups.Stats, error) {
|
|||
}
|
||||
|
||||
func (m *Manager) Set(container *configs.Config) error {
|
||||
for name, path := range m.Paths {
|
||||
sys, err := subsystems.Get(name)
|
||||
if err == errSubsystemDoesNotExist || !cgroups.PathExists(path) {
|
||||
continue
|
||||
for _, sys := range subsystems {
|
||||
// Generate fake cgroup data.
|
||||
d, err := getCgroupData(container.Cgroups, -1)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
// Get the path, but don't error out if the cgroup wasn't found.
|
||||
path, err := d.path(sys.Name())
|
||||
if err != nil && !cgroups.IsNotFound(err) {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := sys.Set(path, container.Cgroups); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
if m.Paths["cpu"] != "" {
|
||||
if err := CheckCpushares(m.Paths["cpu"], container.Cgroups.Resources.CpuShares); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
|
@ -217,41 +243,28 @@ func (m *Manager) Freeze(state configs.FreezerState) error {
|
|||
}
|
||||
|
||||
func (m *Manager) GetPids() ([]int, error) {
|
||||
d, err := getCgroupData(m.Cgroups, 0)
|
||||
dir, err := getCgroupPath(m.Cgroups)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
dir, err := d.path("devices")
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return cgroups.GetPids(dir)
|
||||
}
|
||||
|
||||
// pathClean makes a path safe for use with filepath.Join. This is done by not
|
||||
// only cleaning the path, but also (if the path is relative) adding a leading
|
||||
// '/' and cleaning it (then removing the leading '/'). This ensures that a
|
||||
// path resulting from prepending another path will always resolve to lexically
|
||||
// be a subdirectory of the prefixed path. This is all done lexically, so paths
|
||||
// that include symlinks won't be safe as a result of using pathClean.
|
||||
func pathClean(path string) string {
|
||||
// Ensure that all paths are cleaned (especially problematic ones like
|
||||
// "/../../../../../" which can cause lots of issues).
|
||||
path = filepath.Clean(path)
|
||||
func (m *Manager) GetAllPids() ([]int, error) {
|
||||
dir, err := getCgroupPath(m.Cgroups)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return cgroups.GetAllPids(dir)
|
||||
}
|
||||
|
||||
// If the path isn't absolute, we need to do more processing to fix paths
|
||||
// such as "../../../../<etc>/some/path". We also shouldn't convert absolute
|
||||
// paths to relative ones.
|
||||
if !filepath.IsAbs(path) {
|
||||
path = filepath.Clean(string(os.PathSeparator) + path)
|
||||
// This can't fail, as (by definition) all paths are relative to root.
|
||||
path, _ = filepath.Rel(string(os.PathSeparator), path)
|
||||
func getCgroupPath(c *configs.Cgroup) (string, error) {
|
||||
d, err := getCgroupData(c, 0)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
// Clean the path again for good measure.
|
||||
return filepath.Clean(path)
|
||||
return d.path("devices")
|
||||
}
|
||||
|
||||
func getCgroupData(c *configs.Cgroup, pid int) (*cgroupData, error) {
|
||||
|
@ -260,15 +273,25 @@ func getCgroupData(c *configs.Cgroup, pid int) (*cgroupData, error) {
|
|||
return nil, err
|
||||
}
|
||||
|
||||
// Clean the parent slice path.
|
||||
c.Parent = pathClean(c.Parent)
|
||||
if (c.Name != "" || c.Parent != "") && c.Path != "" {
|
||||
return nil, fmt.Errorf("cgroup: either Path or Name and Parent should be used")
|
||||
}
|
||||
|
||||
// XXX: Do not remove this code. Path safety is important! -- cyphar
|
||||
cgPath := libcontainerUtils.CleanPath(c.Path)
|
||||
cgParent := libcontainerUtils.CleanPath(c.Parent)
|
||||
cgName := libcontainerUtils.CleanPath(c.Name)
|
||||
|
||||
innerPath := cgPath
|
||||
if innerPath == "" {
|
||||
innerPath = filepath.Join(cgParent, cgName)
|
||||
}
|
||||
|
||||
return &cgroupData{
|
||||
root: root,
|
||||
parent: c.Parent,
|
||||
name: c.Name,
|
||||
config: c,
|
||||
pid: pid,
|
||||
root: root,
|
||||
innerPath: innerPath,
|
||||
config: c,
|
||||
pid: pid,
|
||||
}, nil
|
||||
}
|
||||
|
||||
|
@ -296,11 +319,10 @@ func (raw *cgroupData) path(subsystem string) (string, error) {
|
|||
return "", err
|
||||
}
|
||||
|
||||
cgPath := filepath.Join(raw.parent, raw.name)
|
||||
// If the cgroup name/path is absolute do not look relative to the cgroup of the init process.
|
||||
if filepath.IsAbs(cgPath) {
|
||||
if filepath.IsAbs(raw.innerPath) {
|
||||
// Sometimes subsystems can be mounted togethger as 'cpu,cpuacct'.
|
||||
return filepath.Join(raw.root, filepath.Base(mnt), cgPath), nil
|
||||
return filepath.Join(raw.root, filepath.Base(mnt), raw.innerPath), nil
|
||||
}
|
||||
|
||||
parentPath, err := raw.parentPath(subsystem, mnt, root)
|
||||
|
@ -308,7 +330,7 @@ func (raw *cgroupData) path(subsystem string) (string, error) {
|
|||
return "", err
|
||||
}
|
||||
|
||||
return filepath.Join(parentPath, cgPath), nil
|
||||
return filepath.Join(parentPath, raw.innerPath), nil
|
||||
}
|
||||
|
||||
func (raw *cgroupData) join(subsystem string) (string, error) {
|
||||
|
|
|
@ -22,15 +22,10 @@ func (s *BlkioGroup) Name() string {
|
|||
}
|
||||
|
||||
func (s *BlkioGroup) Apply(d *cgroupData) error {
|
||||
dir, err := d.join("blkio")
|
||||
_, err := d.join("blkio")
|
||||
if err != nil && !cgroups.IsNotFound(err) {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := s.Set(dir, d.config); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
|
|
|
@ -22,15 +22,10 @@ func (s *CpuGroup) Name() string {
|
|||
func (s *CpuGroup) Apply(d *cgroupData) error {
|
||||
// We always want to join the cpu group, to allow fair cpu scheduling
|
||||
// on a container basis
|
||||
dir, err := d.join("cpu")
|
||||
_, err := d.join("cpu")
|
||||
if err != nil && !cgroups.IsNotFound(err) {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := s.Set(dir, d.config); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
|
|
|
@ -12,6 +12,7 @@ import (
|
|||
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
libcontainerUtils "github.com/opencontainers/runc/libcontainer/utils"
|
||||
)
|
||||
|
||||
type CpusetGroup struct {
|
||||
|
@ -64,11 +65,6 @@ func (s *CpusetGroup) ApplyDir(dir string, cgroup *configs.Cgroup, pid int) erro
|
|||
if err := s.ensureParent(dir, root); err != nil {
|
||||
return err
|
||||
}
|
||||
// the default values inherit from parent cgroup are already set in
|
||||
// s.ensureParent, cover these if we have our own
|
||||
if err := s.Set(dir, cgroup); err != nil {
|
||||
return err
|
||||
}
|
||||
// because we are not using d.join we need to place the pid into the procs file
|
||||
// unlike the other subsystems
|
||||
if err := writeFile(dir, "cgroup.procs", strconv.Itoa(pid)); err != nil {
|
||||
|
@ -93,7 +89,7 @@ func (s *CpusetGroup) getSubsystemSettings(parent string) (cpus []byte, mems []b
|
|||
// it's parent.
|
||||
func (s *CpusetGroup) ensureParent(current, root string) error {
|
||||
parent := filepath.Dir(current)
|
||||
if filepath.Clean(parent) == root {
|
||||
if libcontainerUtils.CleanPath(parent) == root {
|
||||
return nil
|
||||
}
|
||||
// Avoid infinite recursion.
|
||||
|
|
|
@ -15,21 +15,29 @@ func (s *DevicesGroup) Name() string {
|
|||
}
|
||||
|
||||
func (s *DevicesGroup) Apply(d *cgroupData) error {
|
||||
dir, err := d.join("devices")
|
||||
_, err := d.join("devices")
|
||||
if err != nil {
|
||||
// We will return error even it's `not found` error, devices
|
||||
// cgroup is hard requirement for container's security.
|
||||
return err
|
||||
}
|
||||
|
||||
if err := s.Set(dir, d.config); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *DevicesGroup) Set(path string, cgroup *configs.Cgroup) error {
|
||||
devices := cgroup.Resources.Devices
|
||||
if len(devices) > 0 {
|
||||
for _, dev := range devices {
|
||||
file := "devices.deny"
|
||||
if dev.Allow {
|
||||
file = "devices.allow"
|
||||
}
|
||||
if err := writeFile(path, file, dev.CgroupString()); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
if !cgroup.Resources.AllowAllDevices {
|
||||
if err := writeFile(path, "devices.deny", "a"); err != nil {
|
||||
return err
|
||||
|
|
|
@ -19,15 +19,10 @@ func (s *FreezerGroup) Name() string {
|
|||
}
|
||||
|
||||
func (s *FreezerGroup) Apply(d *cgroupData) error {
|
||||
dir, err := d.join("freezer")
|
||||
_, err := d.join("freezer")
|
||||
if err != nil && !cgroups.IsNotFound(err) {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := s.Set(dir, d.config); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
|
|
|
@ -19,15 +19,10 @@ func (s *HugetlbGroup) Name() string {
|
|||
}
|
||||
|
||||
func (s *HugetlbGroup) Apply(d *cgroupData) error {
|
||||
dir, err := d.join("hugetlb")
|
||||
_, err := d.join("hugetlb")
|
||||
if err != nil && !cgroups.IsNotFound(err) {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := s.Set(dir, d.config); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
|
|
|
@ -32,8 +32,9 @@ func (s *MemoryGroup) Apply(d *cgroupData) (err error) {
|
|||
return err
|
||||
}
|
||||
}
|
||||
|
||||
if err := s.Set(path, d.config); err != nil {
|
||||
// We have to set kernel memory here, as we can't change it once
|
||||
// processes have been attached.
|
||||
if err := s.SetKernelMemory(path, d.config); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
@ -50,7 +51,17 @@ func (s *MemoryGroup) Apply(d *cgroupData) (err error) {
|
|||
if err != nil && !cgroups.IsNotFound(err) {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *MemoryGroup) SetKernelMemory(path string, cgroup *configs.Cgroup) error {
|
||||
// This has to be done separately because it has special constraints (it
|
||||
// can't be done after there are processes attached to the cgroup).
|
||||
if cgroup.Resources.KernelMemory > 0 {
|
||||
if err := writeFile(path, "memory.kmem.limit_in_bytes", strconv.FormatInt(cgroup.Resources.KernelMemory, 10)); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
|
@ -70,12 +81,6 @@ func (s *MemoryGroup) Set(path string, cgroup *configs.Cgroup) error {
|
|||
return err
|
||||
}
|
||||
}
|
||||
if cgroup.Resources.KernelMemory > 0 {
|
||||
if err := writeFile(path, "memory.kmem.limit_in_bytes", strconv.FormatInt(cgroup.Resources.KernelMemory, 10)); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
if cgroup.Resources.OomKillDisable {
|
||||
if err := writeFile(path, "memory.oom_control", "1"); err != nil {
|
||||
return err
|
||||
|
@ -157,6 +162,7 @@ func getMemoryData(path, name string) (cgroups.MemoryData, error) {
|
|||
usage := strings.Join([]string{moduleName, "usage_in_bytes"}, ".")
|
||||
maxUsage := strings.Join([]string{moduleName, "max_usage_in_bytes"}, ".")
|
||||
failcnt := strings.Join([]string{moduleName, "failcnt"}, ".")
|
||||
limit := strings.Join([]string{moduleName, "limit_in_bytes"}, ".")
|
||||
|
||||
value, err := getCgroupParamUint(path, usage)
|
||||
if err != nil {
|
||||
|
@ -182,6 +188,14 @@ func getMemoryData(path, name string) (cgroups.MemoryData, error) {
|
|||
return cgroups.MemoryData{}, fmt.Errorf("failed to parse %s - %v", failcnt, err)
|
||||
}
|
||||
memoryData.Failcnt = value
|
||||
value, err = getCgroupParamUint(path, limit)
|
||||
if err != nil {
|
||||
if moduleName != "memory" && os.IsNotExist(err) {
|
||||
return cgroups.MemoryData{}, nil
|
||||
}
|
||||
return cgroups.MemoryData{}, fmt.Errorf("failed to parse %s - %v", limit, err)
|
||||
}
|
||||
memoryData.Limit = value
|
||||
|
||||
return memoryData, nil
|
||||
}
|
||||
|
|
|
@ -15,15 +15,10 @@ func (s *NetClsGroup) Name() string {
|
|||
}
|
||||
|
||||
func (s *NetClsGroup) Apply(d *cgroupData) error {
|
||||
dir, err := d.join("net_cls")
|
||||
_, err := d.join("net_cls")
|
||||
if err != nil && !cgroups.IsNotFound(err) {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := s.Set(dir, d.config); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
|
|
|
@ -15,15 +15,10 @@ func (s *NetPrioGroup) Name() string {
|
|||
}
|
||||
|
||||
func (s *NetPrioGroup) Apply(d *cgroupData) error {
|
||||
dir, err := d.join("net_prio")
|
||||
_, err := d.join("net_prio")
|
||||
if err != nil && !cgroups.IsNotFound(err) {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := s.Set(dir, d.config); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
|
|
57
vendor/src/github.com/opencontainers/runc/libcontainer/cgroups/fs/pids.go
vendored
Normal file
57
vendor/src/github.com/opencontainers/runc/libcontainer/cgroups/fs/pids.go
vendored
Normal file
|
@ -0,0 +1,57 @@
|
|||
// +build linux
|
||||
|
||||
package fs
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strconv"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
)
|
||||
|
||||
type PidsGroup struct {
|
||||
}
|
||||
|
||||
func (s *PidsGroup) Name() string {
|
||||
return "pids"
|
||||
}
|
||||
|
||||
func (s *PidsGroup) Apply(d *cgroupData) error {
|
||||
_, err := d.join("pids")
|
||||
if err != nil && !cgroups.IsNotFound(err) {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *PidsGroup) Set(path string, cgroup *configs.Cgroup) error {
|
||||
if cgroup.Resources.PidsLimit != 0 {
|
||||
// "max" is the fallback value.
|
||||
limit := "max"
|
||||
|
||||
if cgroup.Resources.PidsLimit > 0 {
|
||||
limit = strconv.FormatInt(cgroup.Resources.PidsLimit, 10)
|
||||
}
|
||||
|
||||
if err := writeFile(path, "pids.max", limit); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *PidsGroup) Remove(d *cgroupData) error {
|
||||
return removePath(d.path("pids"))
|
||||
}
|
||||
|
||||
func (s *PidsGroup) GetStats(path string, stats *cgroups.Stats) error {
|
||||
value, err := getCgroupParamUint(path, "pids.current")
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to parse pids.current - %s", err)
|
||||
}
|
||||
|
||||
stats.PidsStats.Current = value
|
||||
return nil
|
||||
}
|
|
@ -36,7 +36,9 @@ type MemoryData struct {
|
|||
Usage uint64 `json:"usage,omitempty"`
|
||||
MaxUsage uint64 `json:"max_usage,omitempty"`
|
||||
Failcnt uint64 `json:"failcnt"`
|
||||
Limit uint64 `json:"limit"`
|
||||
}
|
||||
|
||||
type MemoryStats struct {
|
||||
// memory used for cache
|
||||
Cache uint64 `json:"cache,omitempty"`
|
||||
|
@ -49,6 +51,11 @@ type MemoryStats struct {
|
|||
Stats map[string]uint64 `json:"stats,omitempty"`
|
||||
}
|
||||
|
||||
type PidsStats struct {
|
||||
// number of pids in the cgroup
|
||||
Current uint64 `json:"current,omitempty"`
|
||||
}
|
||||
|
||||
type BlkioStatEntry struct {
|
||||
Major uint64 `json:"major,omitempty"`
|
||||
Minor uint64 `json:"minor,omitempty"`
|
||||
|
@ -80,6 +87,7 @@ type HugetlbStats struct {
|
|||
type Stats struct {
|
||||
CpuStats CpuStats `json:"cpu_stats,omitempty"`
|
||||
MemoryStats MemoryStats `json:"memory_stats,omitempty"`
|
||||
PidsStats PidsStats `json:"pids_stats,omitempty"`
|
||||
BlkioStats BlkioStats `json:"blkio_stats,omitempty"`
|
||||
// the map is in the format "size of hugepage: stats of the hugepage"
|
||||
HugetlbStats map[string]HugetlbStats `json:"hugetlb_stats,omitempty"`
|
||||
|
|
|
@ -26,6 +26,10 @@ func (m *Manager) GetPids() ([]int, error) {
|
|||
return nil, fmt.Errorf("Systemd not supported")
|
||||
}
|
||||
|
||||
func (m *Manager) GetAllPids() ([]int, error) {
|
||||
return nil, fmt.Errorf("Systemd not supported")
|
||||
}
|
||||
|
||||
func (m *Manager) Destroy() error {
|
||||
return fmt.Errorf("Systemd not supported")
|
||||
}
|
||||
|
|
|
@ -55,6 +55,7 @@ var subsystems = subsystemSet{
|
|||
&fs.MemoryGroup{},
|
||||
&fs.CpuGroup{},
|
||||
&fs.CpuacctGroup{},
|
||||
&fs.PidsGroup{},
|
||||
&fs.BlkioGroup{},
|
||||
&fs.HugetlbGroup{},
|
||||
&fs.PerfEventGroup{},
|
||||
|
@ -167,6 +168,23 @@ func (m *Manager) Apply(pid int) error {
|
|||
properties []systemdDbus.Property
|
||||
)
|
||||
|
||||
if c.Paths != nil {
|
||||
paths := make(map[string]string)
|
||||
for name, path := range c.Paths {
|
||||
_, err := getSubsystemPath(m.Cgroups, name)
|
||||
if err != nil {
|
||||
// Don't fail if a cgroup hierarchy was not found, just skip this subsystem
|
||||
if cgroups.IsNotFound(err) {
|
||||
continue
|
||||
}
|
||||
return err
|
||||
}
|
||||
paths[name] = path
|
||||
}
|
||||
m.Paths = paths
|
||||
return cgroups.EnterPid(m.Paths, pid)
|
||||
}
|
||||
|
||||
if c.Parent != "" {
|
||||
slice = c.Parent
|
||||
}
|
||||
|
@ -233,7 +251,7 @@ func (m *Manager) Apply(pid int) error {
|
|||
return err
|
||||
}
|
||||
|
||||
// we need to manually join the freezer, net_cls, net_prio and cpuset cgroup in systemd
|
||||
// we need to manually join the freezer, net_cls, net_prio, pids and cpuset cgroup in systemd
|
||||
// because it does not currently support it via the dbus api.
|
||||
if err := joinFreezer(c, pid); err != nil {
|
||||
return err
|
||||
|
@ -246,6 +264,10 @@ func (m *Manager) Apply(pid int) error {
|
|||
return err
|
||||
}
|
||||
|
||||
if err := joinPids(c, pid); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := joinCpuset(c, pid); err != nil {
|
||||
return err
|
||||
}
|
||||
|
@ -277,17 +299,13 @@ func (m *Manager) Apply(pid int) error {
|
|||
paths[s.Name()] = subsystemPath
|
||||
}
|
||||
m.Paths = paths
|
||||
|
||||
if paths["cpu"] != "" {
|
||||
if err := fs.CheckCpushares(paths["cpu"], c.Resources.CpuShares); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *Manager) Destroy() error {
|
||||
if m.Cgroups.Paths != nil {
|
||||
return nil
|
||||
}
|
||||
m.mu.Lock()
|
||||
defer m.mu.Unlock()
|
||||
theConn.StopUnit(getUnitName(m.Cgroups), "replace", nil)
|
||||
|
@ -330,68 +348,74 @@ func join(c *configs.Cgroup, subsystem string, pid int) (string, error) {
|
|||
}
|
||||
|
||||
func joinCpu(c *configs.Cgroup, pid int) error {
|
||||
path, err := getSubsystemPath(c, "cpu")
|
||||
_, err := join(c, "cpu", pid)
|
||||
if err != nil && !cgroups.IsNotFound(err) {
|
||||
return err
|
||||
}
|
||||
if c.Resources.CpuQuota != 0 {
|
||||
if err = writeFile(path, "cpu.cfs_quota_us", strconv.FormatInt(c.Resources.CpuQuota, 10)); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
if c.Resources.CpuPeriod != 0 {
|
||||
if err = writeFile(path, "cpu.cfs_period_us", strconv.FormatInt(c.Resources.CpuPeriod, 10)); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
if c.Resources.CpuRtPeriod != 0 {
|
||||
if err = writeFile(path, "cpu.rt_period_us", strconv.FormatInt(c.Resources.CpuRtPeriod, 10)); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
if c.Resources.CpuRtRuntime != 0 {
|
||||
if err = writeFile(path, "cpu.rt_runtime_us", strconv.FormatInt(c.Resources.CpuRtRuntime, 10)); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func joinFreezer(c *configs.Cgroup, pid int) error {
|
||||
path, err := join(c, "freezer", pid)
|
||||
_, err := join(c, "freezer", pid)
|
||||
if err != nil && !cgroups.IsNotFound(err) {
|
||||
return err
|
||||
}
|
||||
freezer, err := subsystems.Get("freezer")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return freezer.Set(path, c)
|
||||
return nil
|
||||
}
|
||||
|
||||
func joinNetPrio(c *configs.Cgroup, pid int) error {
|
||||
path, err := join(c, "net_prio", pid)
|
||||
_, err := join(c, "net_prio", pid)
|
||||
if err != nil && !cgroups.IsNotFound(err) {
|
||||
return err
|
||||
}
|
||||
netPrio, err := subsystems.Get("net_prio")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return netPrio.Set(path, c)
|
||||
return nil
|
||||
}
|
||||
|
||||
func joinNetCls(c *configs.Cgroup, pid int) error {
|
||||
path, err := join(c, "net_cls", pid)
|
||||
_, err := join(c, "net_cls", pid)
|
||||
if err != nil && !cgroups.IsNotFound(err) {
|
||||
return err
|
||||
}
|
||||
netcls, err := subsystems.Get("net_cls")
|
||||
if err != nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
func joinPids(c *configs.Cgroup, pid int) error {
|
||||
_, err := join(c, "pids", pid)
|
||||
if err != nil && !cgroups.IsNotFound(err) {
|
||||
return err
|
||||
}
|
||||
return netcls.Set(path, c)
|
||||
return nil
|
||||
}
|
||||
|
||||
// systemd represents slice heirarchy using `-`, so we need to follow suit when
|
||||
// generating the path of slice. Essentially, test-a-b.slice becomes
|
||||
// test.slice/test-a.slice/test-a-b.slice.
|
||||
func expandSlice(slice string) (string, error) {
|
||||
suffix := ".slice"
|
||||
// Name has to end with ".slice", but can't be just ".slice".
|
||||
if len(slice) < len(suffix) || !strings.HasSuffix(slice, suffix) {
|
||||
return "", fmt.Errorf("invalid slice name: %s", slice)
|
||||
}
|
||||
|
||||
// Path-separators are not allowed.
|
||||
if strings.Contains(slice, "/") {
|
||||
return "", fmt.Errorf("invalid slice name: %s", slice)
|
||||
}
|
||||
|
||||
var path, prefix string
|
||||
sliceName := strings.TrimSuffix(slice, suffix)
|
||||
for _, component := range strings.Split(sliceName, "-") {
|
||||
// test--a.slice isn't permitted, nor is -test.slice.
|
||||
if component == "" {
|
||||
return "", fmt.Errorf("invalid slice name: %s", slice)
|
||||
}
|
||||
|
||||
// Append the component to the path and to the prefix.
|
||||
path += prefix + component + suffix + "/"
|
||||
prefix += component + "-"
|
||||
}
|
||||
|
||||
return path, nil
|
||||
}
|
||||
|
||||
func getSubsystemPath(c *configs.Cgroup, subsystem string) (string, error) {
|
||||
|
@ -410,6 +434,11 @@ func getSubsystemPath(c *configs.Cgroup, subsystem string) (string, error) {
|
|||
slice = c.Parent
|
||||
}
|
||||
|
||||
slice, err = expandSlice(slice)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
return filepath.Join(mountpoint, initPath, slice, getUnitName(c)), nil
|
||||
}
|
||||
|
||||
|
@ -440,6 +469,14 @@ func (m *Manager) GetPids() ([]int, error) {
|
|||
return cgroups.GetPids(path)
|
||||
}
|
||||
|
||||
func (m *Manager) GetAllPids() ([]int, error) {
|
||||
path, err := getSubsystemPath(m.Cgroups, "devices")
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return cgroups.GetAllPids(path)
|
||||
}
|
||||
|
||||
func (m *Manager) GetStats() (*cgroups.Stats, error) {
|
||||
m.mu.Lock()
|
||||
defer m.mu.Unlock()
|
||||
|
@ -458,16 +495,23 @@ func (m *Manager) GetStats() (*cgroups.Stats, error) {
|
|||
}
|
||||
|
||||
func (m *Manager) Set(container *configs.Config) error {
|
||||
for name, path := range m.Paths {
|
||||
sys, err := subsystems.Get(name)
|
||||
if err == errSubsystemDoesNotExist || !cgroups.PathExists(path) {
|
||||
continue
|
||||
for _, sys := range subsystems {
|
||||
// Get the subsystem path, but don't error out for not found cgroups.
|
||||
path, err := getSubsystemPath(container.Cgroups, sys.Name())
|
||||
if err != nil && !cgroups.IsNotFound(err) {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := sys.Set(path, container.Cgroups); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
if m.Paths["cpu"] != "" {
|
||||
if err := fs.CheckCpushares(m.Paths["cpu"], container.Cgroups.Resources.CpuShares); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
|
@ -487,17 +531,13 @@ func getUnitName(c *configs.Cgroup) string {
|
|||
// because systemd will re-write the device settings if it needs to re-apply the cgroup context.
|
||||
// This happens at least for v208 when any sibling unit is started.
|
||||
func joinDevices(c *configs.Cgroup, pid int) error {
|
||||
path, err := join(c, "devices", pid)
|
||||
_, err := join(c, "devices", pid)
|
||||
// Even if it's `not found` error, we'll return err because devices cgroup
|
||||
// is hard requirement for container security.
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
devices, err := subsystems.Get("devices")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return devices.Set(path, c)
|
||||
return nil
|
||||
}
|
||||
|
||||
func setKernelMemory(c *configs.Cgroup) error {
|
||||
|
@ -510,52 +550,16 @@ func setKernelMemory(c *configs.Cgroup) error {
|
|||
return err
|
||||
}
|
||||
|
||||
if c.Resources.KernelMemory > 0 {
|
||||
err = writeFile(path, "memory.kmem.limit_in_bytes", strconv.FormatInt(c.Resources.KernelMemory, 10))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
// This doesn't get called by manager.Set, so we need to do it here.
|
||||
s := &fs.MemoryGroup{}
|
||||
return s.SetKernelMemory(path, c)
|
||||
}
|
||||
|
||||
func joinMemory(c *configs.Cgroup, pid int) error {
|
||||
path, err := getSubsystemPath(c, "memory")
|
||||
_, err := join(c, "memory", pid)
|
||||
if err != nil && !cgroups.IsNotFound(err) {
|
||||
return err
|
||||
}
|
||||
|
||||
// -1 disables memoryswap
|
||||
if c.Resources.MemorySwap > 0 {
|
||||
err = writeFile(path, "memory.memsw.limit_in_bytes", strconv.FormatInt(c.Resources.MemorySwap, 10))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
if c.Resources.MemoryReservation > 0 {
|
||||
err = writeFile(path, "memory.soft_limit_in_bytes", strconv.FormatInt(c.Resources.MemoryReservation, 10))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
if c.Resources.OomKillDisable {
|
||||
if err := writeFile(path, "memory.oom_control", "1"); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
if c.Resources.MemorySwappiness >= 0 && c.Resources.MemorySwappiness <= 100 {
|
||||
err = writeFile(path, "memory.swappiness", strconv.FormatInt(c.Resources.MemorySwappiness, 10))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
} else if c.Resources.MemorySwappiness == -1 {
|
||||
return nil
|
||||
} else {
|
||||
return fmt.Errorf("invalid value:%d. valid memory swappiness range is 0-100", c.Resources.MemorySwappiness)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
|
@ -577,68 +581,25 @@ func joinCpuset(c *configs.Cgroup, pid int) error {
|
|||
// expects device path instead of major minor numbers, which is also confusing
|
||||
// for users. So we use fs work around for now.
|
||||
func joinBlkio(c *configs.Cgroup, pid int) error {
|
||||
path, err := getSubsystemPath(c, "blkio")
|
||||
_, err := join(c, "blkio", pid)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
// systemd doesn't directly support this in the dbus properties
|
||||
if c.Resources.BlkioLeafWeight != 0 {
|
||||
if err := writeFile(path, "blkio.leaf_weight", strconv.FormatUint(uint64(c.Resources.BlkioLeafWeight), 10)); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
for _, wd := range c.Resources.BlkioWeightDevice {
|
||||
if err := writeFile(path, "blkio.weight_device", wd.WeightString()); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := writeFile(path, "blkio.leaf_weight_device", wd.LeafWeightString()); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
for _, td := range c.Resources.BlkioThrottleReadBpsDevice {
|
||||
if err := writeFile(path, "blkio.throttle.read_bps_device", td.String()); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
for _, td := range c.Resources.BlkioThrottleWriteBpsDevice {
|
||||
if err := writeFile(path, "blkio.throttle.write_bps_device", td.String()); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
for _, td := range c.Resources.BlkioThrottleReadIOPSDevice {
|
||||
if err := writeFile(path, "blkio.throttle.read_iops_device", td.String()); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
for _, td := range c.Resources.BlkioThrottleWriteIOPSDevice {
|
||||
if err := writeFile(path, "blkio.throttle.write_iops_device", td.String()); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func joinHugetlb(c *configs.Cgroup, pid int) error {
|
||||
path, err := join(c, "hugetlb", pid)
|
||||
_, err := join(c, "hugetlb", pid)
|
||||
if err != nil && !cgroups.IsNotFound(err) {
|
||||
return err
|
||||
}
|
||||
hugetlb, err := subsystems.Get("hugetlb")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return hugetlb.Set(path, c)
|
||||
return nil
|
||||
}
|
||||
|
||||
func joinPerfEvent(c *configs.Cgroup, pid int) error {
|
||||
path, err := join(c, "perf_event", pid)
|
||||
_, err := join(c, "perf_event", pid)
|
||||
if err != nil && !cgroups.IsNotFound(err) {
|
||||
return err
|
||||
}
|
||||
perfEvent, err := subsystems.Get("perf_event")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return perfEvent.Set(path, c)
|
||||
return nil
|
||||
}
|
||||
|
|
|
@ -5,6 +5,7 @@ package cgroups
|
|||
import (
|
||||
"bufio"
|
||||
"fmt"
|
||||
"io"
|
||||
"io/ioutil"
|
||||
"os"
|
||||
"path/filepath"
|
||||
|
@ -12,7 +13,6 @@ import (
|
|||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/docker/docker/pkg/mount"
|
||||
"github.com/docker/go-units"
|
||||
)
|
||||
|
||||
|
@ -84,10 +84,19 @@ func FindCgroupMountpointDir() (string, error) {
|
|||
// Safe as mountinfo encodes mountpoints with spaces as \040.
|
||||
index := strings.Index(text, " - ")
|
||||
postSeparatorFields := strings.Fields(text[index+3:])
|
||||
if len(postSeparatorFields) < 3 {
|
||||
return "", fmt.Errorf("Error found less than 3 fields post '-' in %q", text)
|
||||
numPostFields := len(postSeparatorFields)
|
||||
|
||||
// This is an error as we can't detect if the mount is for "cgroup"
|
||||
if numPostFields == 0 {
|
||||
return "", fmt.Errorf("Found no fields post '-' in %q", text)
|
||||
}
|
||||
|
||||
if postSeparatorFields[0] == "cgroup" {
|
||||
// Check that the mount is properly formated.
|
||||
if numPostFields < 3 {
|
||||
return "", fmt.Errorf("Error found less than 3 fields post '-' in %q", text)
|
||||
}
|
||||
|
||||
return filepath.Dir(fields[4]), nil
|
||||
}
|
||||
}
|
||||
|
@ -112,11 +121,45 @@ func (m Mount) GetThisCgroupDir(cgroups map[string]string) (string, error) {
|
|||
return getControllerPath(m.Subsystems[0], cgroups)
|
||||
}
|
||||
|
||||
func getCgroupMountsHelper(ss map[string]bool, mi io.Reader) ([]Mount, error) {
|
||||
res := make([]Mount, 0, len(ss))
|
||||
scanner := bufio.NewScanner(mi)
|
||||
for scanner.Scan() {
|
||||
txt := scanner.Text()
|
||||
sepIdx := strings.IndexByte(txt, '-')
|
||||
if sepIdx == -1 {
|
||||
return nil, fmt.Errorf("invalid mountinfo format")
|
||||
}
|
||||
if txt[sepIdx+2:sepIdx+8] != "cgroup" {
|
||||
continue
|
||||
}
|
||||
fields := strings.Split(txt, " ")
|
||||
m := Mount{
|
||||
Mountpoint: fields[4],
|
||||
Root: fields[3],
|
||||
}
|
||||
for _, opt := range strings.Split(fields[len(fields)-1], ",") {
|
||||
if strings.HasPrefix(opt, cgroupNamePrefix) {
|
||||
m.Subsystems = append(m.Subsystems, opt[len(cgroupNamePrefix):])
|
||||
}
|
||||
if ss[opt] {
|
||||
m.Subsystems = append(m.Subsystems, opt)
|
||||
}
|
||||
}
|
||||
res = append(res, m)
|
||||
}
|
||||
if err := scanner.Err(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return res, nil
|
||||
}
|
||||
|
||||
func GetCgroupMounts() ([]Mount, error) {
|
||||
mounts, err := mount.GetMounts()
|
||||
f, err := os.Open("/proc/self/mountinfo")
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
all, err := GetAllSubsystems()
|
||||
if err != nil {
|
||||
|
@ -127,24 +170,7 @@ func GetCgroupMounts() ([]Mount, error) {
|
|||
for _, s := range all {
|
||||
allMap[s] = true
|
||||
}
|
||||
|
||||
res := []Mount{}
|
||||
for _, mount := range mounts {
|
||||
if mount.Fstype == "cgroup" {
|
||||
m := Mount{Mountpoint: mount.Mountpoint, Root: mount.Root}
|
||||
|
||||
for _, opt := range strings.Split(mount.VfsOpts, ",") {
|
||||
if strings.HasPrefix(opt, cgroupNamePrefix) {
|
||||
m.Subsystems = append(m.Subsystems, opt[len(cgroupNamePrefix):])
|
||||
}
|
||||
if allMap[opt] {
|
||||
m.Subsystems = append(m.Subsystems, opt)
|
||||
}
|
||||
}
|
||||
res = append(res, m)
|
||||
}
|
||||
}
|
||||
return res, nil
|
||||
return getCgroupMountsHelper(allMap, f)
|
||||
}
|
||||
|
||||
// Returns all the cgroup subsystems supported by the kernel
|
||||
|
@ -323,9 +349,14 @@ func GetHugePageSize() ([]string, error) {
|
|||
return pageSizes, nil
|
||||
}
|
||||
|
||||
// GetPids returns all pids, that were added to cgroup at path and to all its
|
||||
// subcgroups.
|
||||
// GetPids returns all pids, that were added to cgroup at path.
|
||||
func GetPids(path string) ([]int, error) {
|
||||
return readProcsFile(path)
|
||||
}
|
||||
|
||||
// GetAllPids returns all pids, that were added to cgroup at path and to all its
|
||||
// subcgroups.
|
||||
func GetAllPids(path string) ([]int, error) {
|
||||
var pids []int
|
||||
// collect pids from all sub-cgroups
|
||||
err := filepath.Walk(path, func(p string, info os.FileInfo, iErr error) error {
|
||||
|
|
|
@ -11,25 +11,38 @@ const (
|
|||
)
|
||||
|
||||
type Cgroup struct {
|
||||
Name string `json:"name"`
|
||||
// Deprecated, use Path instead
|
||||
Name string `json:"name,omitempty"`
|
||||
|
||||
// name of parent cgroup or slice
|
||||
Parent string `json:"parent"`
|
||||
// name of parent of cgroup or slice
|
||||
// Deprecated, use Path instead
|
||||
Parent string `json:"parent,omitempty"`
|
||||
|
||||
// Path specifies the path to cgroups that are created and/or joined by the container.
|
||||
// The path is assumed to be relative to the host system cgroup mountpoint.
|
||||
Path string `json:"path"`
|
||||
|
||||
// ScopePrefix decribes prefix for the scope name
|
||||
ScopePrefix string `json:"scope_prefix"`
|
||||
|
||||
// Paths represent the absolute cgroups paths to join.
|
||||
// This takes precedence over Path.
|
||||
Paths map[string]string
|
||||
|
||||
// Resources contains various cgroups settings to apply
|
||||
*Resources
|
||||
}
|
||||
|
||||
type Resources struct {
|
||||
// If this is true allow access to any kind of device within the container. If false, allow access only to devices explicitly listed in the allowed_devices list.
|
||||
AllowAllDevices bool `json:"allow_all_devices"`
|
||||
// Deprecated
|
||||
AllowAllDevices bool `json:"allow_all_devices,omitempty"`
|
||||
// Deprecated
|
||||
AllowedDevices []*Device `json:"allowed_devices,omitempty"`
|
||||
// Deprecated
|
||||
DeniedDevices []*Device `json:"denied_devices,omitempty"`
|
||||
|
||||
AllowedDevices []*Device `json:"allowed_devices"`
|
||||
|
||||
DeniedDevices []*Device `json:"denied_devices"`
|
||||
Devices []*Device `json:"devices"`
|
||||
|
||||
// Memory limit (in bytes)
|
||||
Memory int64 `json:"memory"`
|
||||
|
@ -37,7 +50,7 @@ type Resources struct {
|
|||
// Memory reservation or soft_limit (in bytes)
|
||||
MemoryReservation int64 `json:"memory_reservation"`
|
||||
|
||||
// Total memory usage (memory + swap); set `-1' to disable swap
|
||||
// Total memory usage (memory + swap); set `-1` to enable unlimited swap
|
||||
MemorySwap int64 `json:"memory_swap"`
|
||||
|
||||
// Kernel memory limit (in bytes)
|
||||
|
@ -64,6 +77,9 @@ type Resources struct {
|
|||
// MEM to use
|
||||
CpusetMems string `json:"cpuset_mems"`
|
||||
|
||||
// Process limit; set <= `0' to disable limit.
|
||||
PidsLimit int64 `json:"pids_limit"`
|
||||
|
||||
// Specifies per cgroup weight, range is from 10 to 1000.
|
||||
BlkioWeight uint16 `json:"blkio_weight"`
|
||||
|
||||
|
|
|
@ -171,6 +171,9 @@ type Config struct {
|
|||
// A default action to be taken if no rules match is also given.
|
||||
Seccomp *Seccomp `json:"seccomp"`
|
||||
|
||||
// NoNewPrivileges controls whether processes in the container can gain additional privileges.
|
||||
NoNewPrivileges bool `json:"no_new_privileges"`
|
||||
|
||||
// Hooks are a collection of actions to perform at various container lifecycle events.
|
||||
// Hooks are not able to be marshaled to json but they are also not needed to.
|
||||
Hooks *Hooks `json:"-"`
|
||||
|
|
|
@ -35,6 +35,9 @@ type Device struct {
|
|||
|
||||
// Gid of the device.
|
||||
Gid uint32 `json:"gid"`
|
||||
|
||||
// Write the file to the allowed list
|
||||
Allow bool `json:"allow"`
|
||||
}
|
||||
|
||||
func (d *Device) CgroupString() string {
|
||||
|
|
|
@ -82,20 +82,6 @@ var (
|
|||
Minor: 1,
|
||||
Permissions: "rwm",
|
||||
},
|
||||
{
|
||||
Path: "/dev/tty0",
|
||||
Type: 'c',
|
||||
Major: 4,
|
||||
Minor: 0,
|
||||
Permissions: "rwm",
|
||||
},
|
||||
{
|
||||
Path: "/dev/tty1",
|
||||
Type: 'c',
|
||||
Major: 4,
|
||||
Minor: 1,
|
||||
Permissions: "rwm",
|
||||
},
|
||||
// /dev/pts/ - pts namespaces are "coming soon"
|
||||
{
|
||||
Path: "",
|
||||
|
|
|
@ -6,6 +6,7 @@ package libcontainer
|
|||
|
||||
import (
|
||||
"os"
|
||||
"time"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
)
|
||||
|
@ -14,8 +15,11 @@ import (
|
|||
type Status int
|
||||
|
||||
const (
|
||||
// The container exists but has not been run yet
|
||||
Created Status = iota
|
||||
|
||||
// The container exists and is running.
|
||||
Running Status = iota + 1
|
||||
Running
|
||||
|
||||
// The container exists, it is in the process of being paused.
|
||||
Pausing
|
||||
|
@ -30,6 +34,25 @@ const (
|
|||
Destroyed
|
||||
)
|
||||
|
||||
func (s Status) String() string {
|
||||
switch s {
|
||||
case Created:
|
||||
return "created"
|
||||
case Running:
|
||||
return "running"
|
||||
case Pausing:
|
||||
return "pausing"
|
||||
case Paused:
|
||||
return "paused"
|
||||
case Checkpointed:
|
||||
return "checkpointed"
|
||||
case Destroyed:
|
||||
return "destroyed"
|
||||
default:
|
||||
return "unknown"
|
||||
}
|
||||
}
|
||||
|
||||
// BaseState represents the platform agnostic pieces relating to a
|
||||
// running container's state
|
||||
type BaseState struct {
|
||||
|
@ -39,9 +62,12 @@ type BaseState struct {
|
|||
// InitProcessPid is the init process id in the parent namespace.
|
||||
InitProcessPid int `json:"init_process_pid"`
|
||||
|
||||
// InitProcessStartTime is the init process start time.
|
||||
// InitProcessStartTime is the init process start time in clock cycles since boot time.
|
||||
InitProcessStartTime string `json:"init_process_start"`
|
||||
|
||||
// Created is the unix timestamp for the creation time of the container in UTC
|
||||
Created time.Time `json:"created"`
|
||||
|
||||
// Config is the container's configuration.
|
||||
Config configs.Config `json:"config"`
|
||||
}
|
||||
|
|
|
@ -15,6 +15,7 @@ import (
|
|||
"strings"
|
||||
"sync"
|
||||
"syscall"
|
||||
"time"
|
||||
|
||||
"github.com/Sirupsen/logrus"
|
||||
"github.com/golang/protobuf/proto"
|
||||
|
@ -38,6 +39,8 @@ type linuxContainer struct {
|
|||
criuPath string
|
||||
m sync.Mutex
|
||||
criuVersion int
|
||||
state containerState
|
||||
created time.Time
|
||||
}
|
||||
|
||||
// State represents a running container's state
|
||||
|
@ -104,6 +107,12 @@ type Container interface {
|
|||
// errors:
|
||||
// Systemerror - System error.
|
||||
NotifyOOM() (<-chan struct{}, error)
|
||||
|
||||
// NotifyMemoryPressure returns a read-only channel signaling when the container reaches a given pressure level
|
||||
//
|
||||
// errors:
|
||||
// Systemerror - System error.
|
||||
NotifyMemoryPressure(level PressureLevel) (<-chan struct{}, error)
|
||||
}
|
||||
|
||||
// ID returns the container's unique ID
|
||||
|
@ -129,7 +138,7 @@ func (c *linuxContainer) State() (*State, error) {
|
|||
}
|
||||
|
||||
func (c *linuxContainer) Processes() ([]int, error) {
|
||||
pids, err := c.cgroupManager.GetPids()
|
||||
pids, err := c.cgroupManager.GetAllPids()
|
||||
if err != nil {
|
||||
return nil, newSystemError(err)
|
||||
}
|
||||
|
@ -183,22 +192,30 @@ func (c *linuxContainer) Start(process *Process) error {
|
|||
}
|
||||
return newSystemError(err)
|
||||
}
|
||||
if doInit {
|
||||
c.updateState(parent)
|
||||
// generate a timestamp indicating when the container was started
|
||||
c.created = time.Now().UTC()
|
||||
|
||||
c.state = &runningState{
|
||||
c: c,
|
||||
}
|
||||
if c.config.Hooks != nil {
|
||||
s := configs.HookState{
|
||||
Version: c.config.Version,
|
||||
ID: c.id,
|
||||
Pid: parent.pid(),
|
||||
Root: c.config.Rootfs,
|
||||
if doInit {
|
||||
if err := c.updateState(parent); err != nil {
|
||||
return err
|
||||
}
|
||||
for _, hook := range c.config.Hooks.Poststart {
|
||||
if err := hook.Run(s); err != nil {
|
||||
if err := parent.terminate(); err != nil {
|
||||
logrus.Warn(err)
|
||||
if c.config.Hooks != nil {
|
||||
s := configs.HookState{
|
||||
Version: c.config.Version,
|
||||
ID: c.id,
|
||||
Pid: parent.pid(),
|
||||
Root: c.config.Rootfs,
|
||||
}
|
||||
for _, hook := range c.config.Hooks.Poststart {
|
||||
if err := hook.Run(s); err != nil {
|
||||
if err := parent.terminate(); err != nil {
|
||||
logrus.Warn(err)
|
||||
}
|
||||
return newSystemError(err)
|
||||
}
|
||||
return newSystemError(err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -251,7 +268,7 @@ func (c *linuxContainer) commandTemplate(p *Process, childPipe *os.File) (*exec.
|
|||
}
|
||||
|
||||
func (c *linuxContainer) newInitProcess(p *Process, cmd *exec.Cmd, parentPipe, childPipe *os.File) (*initProcess, error) {
|
||||
t := "_LIBCONTAINER_INITTYPE=standard"
|
||||
t := "_LIBCONTAINER_INITTYPE=" + string(initStandard)
|
||||
cloneFlags := c.config.Namespaces.CloneFlags()
|
||||
if cloneFlags&syscall.CLONE_NEWUSER != 0 {
|
||||
if err := c.addUidGidMappings(cmd.SysProcAttr); err != nil {
|
||||
|
@ -278,7 +295,7 @@ func (c *linuxContainer) newInitProcess(p *Process, cmd *exec.Cmd, parentPipe, c
|
|||
}
|
||||
|
||||
func (c *linuxContainer) newSetnsProcess(p *Process, cmd *exec.Cmd, parentPipe, childPipe *os.File) (*setnsProcess, error) {
|
||||
cmd.Env = append(cmd.Env, "_LIBCONTAINER_INITTYPE=setns")
|
||||
cmd.Env = append(cmd.Env, "_LIBCONTAINER_INITTYPE="+string(initSetns))
|
||||
// for setns process, we dont have to set cloneflags as the process namespaces
|
||||
// will only be set via setns syscall
|
||||
data, err := c.bootstrapData(0, c.initProcess.pid(), p.consolePath)
|
||||
|
@ -321,54 +338,53 @@ func newPipe() (parent *os.File, child *os.File, err error) {
|
|||
func (c *linuxContainer) Destroy() error {
|
||||
c.m.Lock()
|
||||
defer c.m.Unlock()
|
||||
status, err := c.currentStatus()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if status != Destroyed {
|
||||
return newGenericError(fmt.Errorf("container is not destroyed"), ContainerNotStopped)
|
||||
}
|
||||
if !c.config.Namespaces.Contains(configs.NEWPID) {
|
||||
if err := killCgroupProcesses(c.cgroupManager); err != nil {
|
||||
logrus.Warn(err)
|
||||
}
|
||||
}
|
||||
err = c.cgroupManager.Destroy()
|
||||
if rerr := os.RemoveAll(c.root); err == nil {
|
||||
err = rerr
|
||||
}
|
||||
c.initProcess = nil
|
||||
if c.config.Hooks != nil {
|
||||
s := configs.HookState{
|
||||
Version: c.config.Version,
|
||||
ID: c.id,
|
||||
Root: c.config.Rootfs,
|
||||
}
|
||||
for _, hook := range c.config.Hooks.Poststop {
|
||||
if err := hook.Run(s); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
}
|
||||
return err
|
||||
return c.state.destroy()
|
||||
}
|
||||
|
||||
func (c *linuxContainer) Pause() error {
|
||||
c.m.Lock()
|
||||
defer c.m.Unlock()
|
||||
return c.cgroupManager.Freeze(configs.Frozen)
|
||||
status, err := c.currentStatus()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if status != Running {
|
||||
return newGenericError(fmt.Errorf("container not running"), ContainerNotRunning)
|
||||
}
|
||||
if err := c.cgroupManager.Freeze(configs.Frozen); err != nil {
|
||||
return err
|
||||
}
|
||||
return c.state.transition(&pausedState{
|
||||
c: c,
|
||||
})
|
||||
}
|
||||
|
||||
func (c *linuxContainer) Resume() error {
|
||||
c.m.Lock()
|
||||
defer c.m.Unlock()
|
||||
return c.cgroupManager.Freeze(configs.Thawed)
|
||||
status, err := c.currentStatus()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if status != Paused {
|
||||
return newGenericError(fmt.Errorf("container not paused"), ContainerNotPaused)
|
||||
}
|
||||
if err := c.cgroupManager.Freeze(configs.Thawed); err != nil {
|
||||
return err
|
||||
}
|
||||
return c.state.transition(&runningState{
|
||||
c: c,
|
||||
})
|
||||
}
|
||||
|
||||
func (c *linuxContainer) NotifyOOM() (<-chan struct{}, error) {
|
||||
return notifyOnOOM(c.cgroupManager.GetPaths())
|
||||
}
|
||||
|
||||
func (c *linuxContainer) NotifyMemoryPressure(level PressureLevel) (<-chan struct{}, error) {
|
||||
return notifyMemoryPressure(c.cgroupManager.GetPaths(), level)
|
||||
}
|
||||
|
||||
// XXX debug support, remove when debugging done.
|
||||
func addArgsFromEnv(evar string, args *[]string) {
|
||||
if e := os.Getenv(evar); e != "" {
|
||||
|
@ -460,7 +476,7 @@ func (c *linuxContainer) Checkpoint(criuOpts *CriuOpts) error {
|
|||
}
|
||||
|
||||
if criuOpts.ImagesDirectory == "" {
|
||||
criuOpts.ImagesDirectory = filepath.Join(c.root, "criu.image")
|
||||
return fmt.Errorf("invalid directory to save checkpoint")
|
||||
}
|
||||
|
||||
// Since a container can be C/R'ed multiple times,
|
||||
|
@ -579,11 +595,9 @@ func (c *linuxContainer) addCriuRestoreMount(req *criurpc.CriuReq, m *configs.Mo
|
|||
func (c *linuxContainer) Restore(process *Process, criuOpts *CriuOpts) error {
|
||||
c.m.Lock()
|
||||
defer c.m.Unlock()
|
||||
|
||||
if err := c.checkCriuVersion("1.5.2"); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if criuOpts.WorkDirectory == "" {
|
||||
criuOpts.WorkDirectory = filepath.Join(c.root, "criu.work")
|
||||
}
|
||||
|
@ -592,22 +606,19 @@ func (c *linuxContainer) Restore(process *Process, criuOpts *CriuOpts) error {
|
|||
if err := os.Mkdir(criuOpts.WorkDirectory, 0655); err != nil && !os.IsExist(err) {
|
||||
return err
|
||||
}
|
||||
|
||||
workDir, err := os.Open(criuOpts.WorkDirectory)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer workDir.Close()
|
||||
|
||||
if criuOpts.ImagesDirectory == "" {
|
||||
criuOpts.ImagesDirectory = filepath.Join(c.root, "criu.image")
|
||||
return fmt.Errorf("invalid directory to restore checkpoint")
|
||||
}
|
||||
imageDir, err := os.Open(criuOpts.ImagesDirectory)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer imageDir.Close()
|
||||
|
||||
// CRIU has a few requirements for a root directory:
|
||||
// * it must be a mount point
|
||||
// * its parent must not be overmounted
|
||||
|
@ -618,18 +629,15 @@ func (c *linuxContainer) Restore(process *Process, criuOpts *CriuOpts) error {
|
|||
return err
|
||||
}
|
||||
defer os.Remove(root)
|
||||
|
||||
root, err = filepath.EvalSymlinks(root)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
err = syscall.Mount(c.config.Rootfs, root, "", syscall.MS_BIND|syscall.MS_REC, "")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer syscall.Unmount(root, syscall.MNT_DETACH)
|
||||
|
||||
t := criurpc.CriuReqType_RESTORE
|
||||
req := &criurpc.CriuReq{
|
||||
Type: &t,
|
||||
|
@ -697,15 +705,13 @@ func (c *linuxContainer) Restore(process *Process, criuOpts *CriuOpts) error {
|
|||
fds []string
|
||||
fdJSON []byte
|
||||
)
|
||||
|
||||
if fdJSON, err = ioutil.ReadFile(filepath.Join(criuOpts.ImagesDirectory, descriptorsFilename)); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err = json.Unmarshal(fdJSON, &fds); err != nil {
|
||||
if err := json.Unmarshal(fdJSON, &fds); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
for i := range fds {
|
||||
if s := fds[i]; strings.Contains(s, "pipe:") {
|
||||
inheritFd := new(criurpc.InheritFd)
|
||||
|
@ -714,12 +720,7 @@ func (c *linuxContainer) Restore(process *Process, criuOpts *CriuOpts) error {
|
|||
req.Opts.InheritFd = append(req.Opts.InheritFd, inheritFd)
|
||||
}
|
||||
}
|
||||
|
||||
err = c.criuSwrk(process, req, criuOpts, true)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
return c.criuSwrk(process, req, criuOpts, true)
|
||||
}
|
||||
|
||||
func (c *linuxContainer) criuApplyCgroups(pid int, req *criurpc.CriuReq) error {
|
||||
|
@ -914,46 +915,43 @@ func (c *linuxContainer) criuNotifications(resp *criurpc.CriuResp, process *Proc
|
|||
if notify == nil {
|
||||
return fmt.Errorf("invalid response: %s", resp.String())
|
||||
}
|
||||
|
||||
switch {
|
||||
case notify.GetScript() == "post-dump":
|
||||
if !opts.LeaveRunning {
|
||||
f, err := os.Create(filepath.Join(c.root, "checkpoint"))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
f.Close()
|
||||
f, err := os.Create(filepath.Join(c.root, "checkpoint"))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
break
|
||||
|
||||
f.Close()
|
||||
case notify.GetScript() == "network-unlock":
|
||||
if err := unlockNetwork(c.config); err != nil {
|
||||
return err
|
||||
}
|
||||
break
|
||||
|
||||
case notify.GetScript() == "network-lock":
|
||||
if err := lockNetwork(c.config); err != nil {
|
||||
return err
|
||||
}
|
||||
break
|
||||
|
||||
case notify.GetScript() == "post-restore":
|
||||
pid := notify.GetPid()
|
||||
r, err := newRestoredProcess(int(pid), fds)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// TODO: crosbymichael restore previous process information by saving the init process information in
|
||||
// the container's state file or separate process state files.
|
||||
process.ops = r
|
||||
if err := c.state.transition(&restoredState{
|
||||
imageDir: opts.ImagesDirectory,
|
||||
c: c,
|
||||
}); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := c.updateState(r); err != nil {
|
||||
return err
|
||||
}
|
||||
process.ops = r
|
||||
break
|
||||
if err := os.Remove(filepath.Join(c.root, "checkpoint")); err != nil {
|
||||
if !os.IsNotExist(err) {
|
||||
logrus.Error(err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
|
@ -963,65 +961,108 @@ func (c *linuxContainer) updateState(process parentProcess) error {
|
|||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return c.saveState(state)
|
||||
}
|
||||
|
||||
func (c *linuxContainer) saveState(s *State) error {
|
||||
f, err := os.Create(filepath.Join(c.root, stateFilename))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer f.Close()
|
||||
os.Remove(filepath.Join(c.root, "checkpoint"))
|
||||
return utils.WriteJSON(f, state)
|
||||
return utils.WriteJSON(f, s)
|
||||
}
|
||||
|
||||
func (c *linuxContainer) deleteState() error {
|
||||
return os.Remove(filepath.Join(c.root, stateFilename))
|
||||
}
|
||||
|
||||
func (c *linuxContainer) currentStatus() (Status, error) {
|
||||
if _, err := os.Stat(filepath.Join(c.root, "checkpoint")); err == nil {
|
||||
return Checkpointed, nil
|
||||
if err := c.refreshState(); err != nil {
|
||||
return -1, err
|
||||
}
|
||||
return c.state.status(), nil
|
||||
}
|
||||
|
||||
// refreshState needs to be called to verify that the current state on the
|
||||
// container is what is true. Because consumers of libcontainer can use it
|
||||
// out of process we need to verify the container's status based on runtime
|
||||
// information and not rely on our in process info.
|
||||
func (c *linuxContainer) refreshState() error {
|
||||
paused, err := c.isPaused()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if paused {
|
||||
return c.state.transition(&pausedState{c: c})
|
||||
}
|
||||
running, err := c.isRunning()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if running {
|
||||
return c.state.transition(&runningState{c: c})
|
||||
}
|
||||
return c.state.transition(&stoppedState{c: c})
|
||||
}
|
||||
|
||||
func (c *linuxContainer) isRunning() (bool, error) {
|
||||
if c.initProcess == nil {
|
||||
return Destroyed, nil
|
||||
return false, nil
|
||||
}
|
||||
// return Running if the init process is alive
|
||||
if err := syscall.Kill(c.initProcess.pid(), 0); err != nil {
|
||||
if err == syscall.ESRCH {
|
||||
return Destroyed, nil
|
||||
return false, nil
|
||||
}
|
||||
return 0, newSystemError(err)
|
||||
return false, newSystemError(err)
|
||||
}
|
||||
if c.config.Cgroups != nil && c.config.Cgroups.Resources != nil && c.config.Cgroups.Resources.Freezer == configs.Frozen {
|
||||
return Paused, nil
|
||||
return true, nil
|
||||
}
|
||||
|
||||
func (c *linuxContainer) isPaused() (bool, error) {
|
||||
data, err := ioutil.ReadFile(filepath.Join(c.cgroupManager.GetPaths()["freezer"], "freezer.state"))
|
||||
if err != nil {
|
||||
if os.IsNotExist(err) {
|
||||
return false, nil
|
||||
}
|
||||
return false, newSystemError(err)
|
||||
}
|
||||
return Running, nil
|
||||
return bytes.Equal(bytes.TrimSpace(data), []byte("FROZEN")), nil
|
||||
}
|
||||
|
||||
func (c *linuxContainer) currentState() (*State, error) {
|
||||
status, err := c.currentStatus()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if status == Destroyed {
|
||||
return nil, newGenericError(fmt.Errorf("container destroyed"), ContainerNotExists)
|
||||
}
|
||||
startTime, err := c.initProcess.startTime()
|
||||
if err != nil {
|
||||
return nil, newSystemError(err)
|
||||
var (
|
||||
startTime string
|
||||
externalDescriptors []string
|
||||
pid = -1
|
||||
)
|
||||
if c.initProcess != nil {
|
||||
pid = c.initProcess.pid()
|
||||
startTime, _ = c.initProcess.startTime()
|
||||
externalDescriptors = c.initProcess.externalDescriptors()
|
||||
}
|
||||
state := &State{
|
||||
BaseState: BaseState{
|
||||
ID: c.ID(),
|
||||
Config: *c.config,
|
||||
InitProcessPid: c.initProcess.pid(),
|
||||
InitProcessPid: pid,
|
||||
InitProcessStartTime: startTime,
|
||||
Created: c.created,
|
||||
},
|
||||
CgroupPaths: c.cgroupManager.GetPaths(),
|
||||
NamespacePaths: make(map[configs.NamespaceType]string),
|
||||
ExternalDescriptors: c.initProcess.externalDescriptors(),
|
||||
ExternalDescriptors: externalDescriptors,
|
||||
}
|
||||
for _, ns := range c.config.Namespaces {
|
||||
state.NamespacePaths[ns.Type] = ns.GetPath(c.initProcess.pid())
|
||||
}
|
||||
for _, nsType := range configs.NamespaceTypes() {
|
||||
if _, ok := state.NamespacePaths[nsType]; !ok {
|
||||
ns := configs.Namespace{Type: nsType}
|
||||
state.NamespacePaths[ns.Type] = ns.GetPath(c.initProcess.pid())
|
||||
if pid > 0 {
|
||||
for _, ns := range c.config.Namespaces {
|
||||
state.NamespacePaths[ns.Type] = ns.GetPath(pid)
|
||||
}
|
||||
for _, nsType := range configs.NamespaceTypes() {
|
||||
if _, ok := state.NamespacePaths[nsType]; !ok {
|
||||
ns := configs.Namespace{Type: nsType}
|
||||
state.NamespacePaths[ns.Type] = ns.GetPath(pid)
|
||||
}
|
||||
}
|
||||
}
|
||||
return state, nil
|
||||
|
|
|
@ -16,9 +16,10 @@ const (
|
|||
ContainerPaused
|
||||
ContainerNotStopped
|
||||
ContainerNotRunning
|
||||
ContainerNotPaused
|
||||
|
||||
// Process errors
|
||||
ProcessNotExecuted
|
||||
NoProcessOps
|
||||
|
||||
// Common errors
|
||||
ConfigInvalid
|
||||
|
@ -46,6 +47,10 @@ func (c ErrorCode) String() string {
|
|||
return "Container is not running"
|
||||
case ConsoleExists:
|
||||
return "Console exists for process"
|
||||
case ContainerNotPaused:
|
||||
return "Container is not paused"
|
||||
case NoProcessOps:
|
||||
return "No process operations"
|
||||
default:
|
||||
return "Unknown error"
|
||||
}
|
||||
|
|
|
@ -166,7 +166,7 @@ func (l *LinuxFactory) Create(id string, config *configs.Config) (Container, err
|
|||
if err := os.MkdirAll(containerRoot, 0700); err != nil {
|
||||
return nil, newGenericError(err, SystemError)
|
||||
}
|
||||
return &linuxContainer{
|
||||
c := &linuxContainer{
|
||||
id: id,
|
||||
root: containerRoot,
|
||||
config: config,
|
||||
|
@ -174,7 +174,9 @@ func (l *LinuxFactory) Create(id string, config *configs.Config) (Container, err
|
|||
initArgs: l.InitArgs,
|
||||
criuPath: l.CriuPath,
|
||||
cgroupManager: l.NewCgroupsManager(config.Cgroups, nil),
|
||||
}, nil
|
||||
}
|
||||
c.state = &stoppedState{c: c}
|
||||
return c, nil
|
||||
}
|
||||
|
||||
func (l *LinuxFactory) Load(id string) (Container, error) {
|
||||
|
@ -191,7 +193,7 @@ func (l *LinuxFactory) Load(id string) (Container, error) {
|
|||
processStartTime: state.InitProcessStartTime,
|
||||
fds: state.ExternalDescriptors,
|
||||
}
|
||||
return &linuxContainer{
|
||||
c := &linuxContainer{
|
||||
initProcess: r,
|
||||
id: id,
|
||||
config: &state.Config,
|
||||
|
@ -200,7 +202,13 @@ func (l *LinuxFactory) Load(id string) (Container, error) {
|
|||
criuPath: l.CriuPath,
|
||||
cgroupManager: l.NewCgroupsManager(state.Config.Cgroups, state.CgroupPaths),
|
||||
root: containerRoot,
|
||||
}, nil
|
||||
created: state.Created,
|
||||
}
|
||||
c.state = &createdState{c: c, s: Created}
|
||||
if err := c.refreshState(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return c, nil
|
||||
}
|
||||
|
||||
func (l *LinuxFactory) Type() string {
|
||||
|
@ -222,18 +230,25 @@ func (l *LinuxFactory) StartInitialization() (err error) {
|
|||
// clear the current process's environment to clean any libcontainer
|
||||
// specific env vars.
|
||||
os.Clearenv()
|
||||
var i initer
|
||||
defer func() {
|
||||
// if we have an error during the initialization of the container's init then send it back to the
|
||||
// parent process in the form of an initError.
|
||||
if err != nil {
|
||||
if err := utils.WriteJSON(pipe, newSystemError(err)); err != nil {
|
||||
// We have an error during the initialization of the container's init,
|
||||
// send it back to the parent process in the form of an initError.
|
||||
// If container's init successed, syscall.Exec will not return, hence
|
||||
// this defer function will never be called.
|
||||
if _, ok := i.(*linuxStandardInit); ok {
|
||||
// Synchronisation only necessary for standard init.
|
||||
if err := utils.WriteJSON(pipe, syncT{procError}); err != nil {
|
||||
panic(err)
|
||||
}
|
||||
}
|
||||
if err := utils.WriteJSON(pipe, newSystemError(err)); err != nil {
|
||||
panic(err)
|
||||
}
|
||||
// ensure that this pipe is always closed
|
||||
pipe.Close()
|
||||
}()
|
||||
i, err := newContainerInit(it, pipe)
|
||||
i, err = newContainerInit(it, pipe)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
|
|
@ -9,6 +9,18 @@ import (
|
|||
"github.com/opencontainers/runc/libcontainer/stacktrace"
|
||||
)
|
||||
|
||||
type syncType uint8
|
||||
|
||||
const (
|
||||
procReady syncType = iota
|
||||
procError
|
||||
procRun
|
||||
)
|
||||
|
||||
type syncT struct {
|
||||
Type syncType `json:"type"`
|
||||
}
|
||||
|
||||
var errorTemplate = template.Must(template.New("error").Parse(`Timestamp: {{.Timestamp}}
|
||||
Code: {{.ECode}}
|
||||
{{if .Message }}
|
||||
|
|
|
@ -5,6 +5,7 @@ package libcontainer
|
|||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"io/ioutil"
|
||||
"net"
|
||||
"os"
|
||||
|
@ -73,6 +74,7 @@ func newContainerInit(t initType, pipe *os.File) (initer, error) {
|
|||
}, nil
|
||||
case initStandard:
|
||||
return &linuxStandardInit{
|
||||
pipe: pipe,
|
||||
parentPid: syscall.Getppid(),
|
||||
config: config,
|
||||
}, nil
|
||||
|
@ -140,6 +142,27 @@ func finalizeNamespace(config *initConfig) error {
|
|||
return nil
|
||||
}
|
||||
|
||||
// syncParentReady sends to the given pipe a JSON payload which indicates that
|
||||
// the init is ready to Exec the child process. It then waits for the parent to
|
||||
// indicate that it is cleared to Exec.
|
||||
func syncParentReady(pipe io.ReadWriter) error {
|
||||
// Tell parent.
|
||||
if err := utils.WriteJSON(pipe, syncT{procReady}); err != nil {
|
||||
return err
|
||||
}
|
||||
// Wait for parent to give the all-clear.
|
||||
var procSync syncT
|
||||
if err := json.NewDecoder(pipe).Decode(&procSync); err != nil {
|
||||
if err == io.EOF {
|
||||
return fmt.Errorf("parent closed synchronisation channel")
|
||||
}
|
||||
if procSync.Type != procRun {
|
||||
return fmt.Errorf("invalid synchronisation flag from parent")
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// joinExistingNamespaces gets all the namespace paths specified for the container and
|
||||
// does a setns on the namespace fd so that the current process joins the namespace.
|
||||
func joinExistingNamespaces(namespaces []configs.Namespace) error {
|
||||
|
@ -309,7 +332,7 @@ func killCgroupProcesses(m cgroups.Manager) error {
|
|||
if err := m.Freeze(configs.Frozen); err != nil {
|
||||
logrus.Warn(err)
|
||||
}
|
||||
pids, err := m.GetPids()
|
||||
pids, err := m.GetAllPids()
|
||||
if err != nil {
|
||||
m.Freeze(configs.Thawed)
|
||||
return err
|
||||
|
|
67
vendor/src/github.com/opencontainers/runc/libcontainer/keys/keyctl.go
vendored
Normal file
67
vendor/src/github.com/opencontainers/runc/libcontainer/keys/keyctl.go
vendored
Normal file
|
@ -0,0 +1,67 @@
|
|||
// +build linux
|
||||
|
||||
package keyctl
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"syscall"
|
||||
"strings"
|
||||
"strconv"
|
||||
"unsafe"
|
||||
)
|
||||
|
||||
const KEYCTL_JOIN_SESSION_KEYRING = 1
|
||||
const KEYCTL_SETPERM = 5
|
||||
const KEYCTL_DESCRIBE = 6
|
||||
|
||||
type KeySerial uint32
|
||||
|
||||
func JoinSessionKeyring(name string) (KeySerial, error) {
|
||||
var _name *byte = nil
|
||||
var err error
|
||||
|
||||
if len(name) > 0 {
|
||||
_name, err = syscall.BytePtrFromString(name)
|
||||
if err != nil {
|
||||
return KeySerial(0), err
|
||||
}
|
||||
}
|
||||
|
||||
sessKeyId, _, errn := syscall.Syscall(syscall.SYS_KEYCTL, KEYCTL_JOIN_SESSION_KEYRING, uintptr(unsafe.Pointer(_name)), 0)
|
||||
if errn != 0 {
|
||||
return 0, fmt.Errorf("could not create session key: %v", errn)
|
||||
}
|
||||
return KeySerial(sessKeyId), nil
|
||||
}
|
||||
|
||||
// modify permissions on a keyring by reading the current permissions,
|
||||
// anding the bits with the given mask (clearing permissions) and setting
|
||||
// additional permission bits
|
||||
func ModKeyringPerm(ringId KeySerial, mask, setbits uint32) error {
|
||||
dest := make([]byte, 1024)
|
||||
destBytes := unsafe.Pointer(&dest[0])
|
||||
|
||||
if _, _, err := syscall.Syscall6(syscall.SYS_KEYCTL, uintptr(KEYCTL_DESCRIBE), uintptr(ringId), uintptr(destBytes), uintptr(len(dest)), 0, 0); err != 0 {
|
||||
return err
|
||||
}
|
||||
|
||||
res := strings.Split(string(dest), ";")
|
||||
if len(res) < 5 {
|
||||
return fmt.Errorf("Destination buffer for key description is too small")
|
||||
}
|
||||
|
||||
// parse permissions
|
||||
perm64, err := strconv.ParseUint(res[3], 16, 32)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
perm := (uint32(perm64) & mask) | setbits
|
||||
|
||||
if _, _, err := syscall.Syscall(syscall.SYS_KEYCTL, uintptr(KEYCTL_SETPERM), uintptr(ringId), uintptr(perm)); err != 0 {
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
|
@ -12,31 +12,32 @@ import (
|
|||
|
||||
const oomCgroupName = "memory"
|
||||
|
||||
// notifyOnOOM returns channel on which you can expect event about OOM,
|
||||
// if process died without OOM this channel will be closed.
|
||||
// s is current *libcontainer.State for container.
|
||||
func notifyOnOOM(paths map[string]string) (<-chan struct{}, error) {
|
||||
dir := paths[oomCgroupName]
|
||||
if dir == "" {
|
||||
return nil, fmt.Errorf("There is no path for %q in state", oomCgroupName)
|
||||
}
|
||||
oomControl, err := os.Open(filepath.Join(dir, "memory.oom_control"))
|
||||
type PressureLevel uint
|
||||
|
||||
const (
|
||||
LowPressure PressureLevel = iota
|
||||
MediumPressure
|
||||
CriticalPressure
|
||||
)
|
||||
|
||||
func registerMemoryEvent(cgDir string, evName string, arg string) (<-chan struct{}, error) {
|
||||
evFile, err := os.Open(filepath.Join(cgDir, evName))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
fd, _, syserr := syscall.RawSyscall(syscall.SYS_EVENTFD2, 0, syscall.FD_CLOEXEC, 0)
|
||||
if syserr != 0 {
|
||||
oomControl.Close()
|
||||
evFile.Close()
|
||||
return nil, syserr
|
||||
}
|
||||
|
||||
eventfd := os.NewFile(fd, "eventfd")
|
||||
|
||||
eventControlPath := filepath.Join(dir, "cgroup.event_control")
|
||||
data := fmt.Sprintf("%d %d", eventfd.Fd(), oomControl.Fd())
|
||||
eventControlPath := filepath.Join(cgDir, "cgroup.event_control")
|
||||
data := fmt.Sprintf("%d %d %s", eventfd.Fd(), evFile.Fd(), arg)
|
||||
if err := ioutil.WriteFile(eventControlPath, []byte(data), 0700); err != nil {
|
||||
eventfd.Close()
|
||||
oomControl.Close()
|
||||
evFile.Close()
|
||||
return nil, err
|
||||
}
|
||||
ch := make(chan struct{})
|
||||
|
@ -44,7 +45,7 @@ func notifyOnOOM(paths map[string]string) (<-chan struct{}, error) {
|
|||
defer func() {
|
||||
close(ch)
|
||||
eventfd.Close()
|
||||
oomControl.Close()
|
||||
evFile.Close()
|
||||
}()
|
||||
buf := make([]byte, 8)
|
||||
for {
|
||||
|
@ -61,3 +62,28 @@ func notifyOnOOM(paths map[string]string) (<-chan struct{}, error) {
|
|||
}()
|
||||
return ch, nil
|
||||
}
|
||||
|
||||
// notifyOnOOM returns channel on which you can expect event about OOM,
|
||||
// if process died without OOM this channel will be closed.
|
||||
func notifyOnOOM(paths map[string]string) (<-chan struct{}, error) {
|
||||
dir := paths[oomCgroupName]
|
||||
if dir == "" {
|
||||
return nil, fmt.Errorf("path %q missing", oomCgroupName)
|
||||
}
|
||||
|
||||
return registerMemoryEvent(dir, "memory.oom_control", "")
|
||||
}
|
||||
|
||||
func notifyMemoryPressure(paths map[string]string, level PressureLevel) (<-chan struct{}, error) {
|
||||
dir := paths[oomCgroupName]
|
||||
if dir == "" {
|
||||
return nil, fmt.Errorf("path %q missing", oomCgroupName)
|
||||
}
|
||||
|
||||
if level > CriticalPressure {
|
||||
return nil, fmt.Errorf("invalid pressure level %d", level)
|
||||
}
|
||||
|
||||
levelStr := []string{"low", "medium", "critical"}[level]
|
||||
return registerMemoryEvent(dir, "memory.pressure_level", levelStr)
|
||||
}
|
||||
|
|
|
@ -17,6 +17,7 @@
|
|||
#include <sched.h>
|
||||
#include <signal.h>
|
||||
|
||||
#include <bits/sockaddr.h>
|
||||
#include <linux/netlink.h>
|
||||
#include <linux/types.h>
|
||||
#include <stdint.h>
|
||||
|
|
|
@ -55,7 +55,7 @@ type Process struct {
|
|||
// Wait releases any resources associated with the Process
|
||||
func (p Process) Wait() (*os.ProcessState, error) {
|
||||
if p.ops == nil {
|
||||
return nil, newGenericError(fmt.Errorf("invalid process"), ProcessNotExecuted)
|
||||
return nil, newGenericError(fmt.Errorf("invalid process"), NoProcessOps)
|
||||
}
|
||||
return p.ops.wait()
|
||||
}
|
||||
|
@ -65,7 +65,7 @@ func (p Process) Pid() (int, error) {
|
|||
// math.MinInt32 is returned here, because it's invalid value
|
||||
// for the kill() system call.
|
||||
if p.ops == nil {
|
||||
return math.MinInt32, newGenericError(fmt.Errorf("invalid process"), ProcessNotExecuted)
|
||||
return math.MinInt32, newGenericError(fmt.Errorf("invalid process"), NoProcessOps)
|
||||
}
|
||||
return p.ops.pid(), nil
|
||||
}
|
||||
|
@ -73,7 +73,7 @@ func (p Process) Pid() (int, error) {
|
|||
// Signal sends a signal to the Process.
|
||||
func (p Process) Signal(sig os.Signal) error {
|
||||
if p.ops == nil {
|
||||
return newGenericError(fmt.Errorf("invalid process"), ProcessNotExecuted)
|
||||
return newGenericError(fmt.Errorf("invalid process"), NoProcessOps)
|
||||
}
|
||||
return p.ops.signal(sig)
|
||||
}
|
||||
|
|
|
@ -5,6 +5,7 @@ package libcontainer
|
|||
import (
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
"os/exec"
|
||||
|
@ -87,6 +88,7 @@ func (p *setnsProcess) start() (err error) {
|
|||
if err := utils.WriteJSON(p.parentPipe, p.config); err != nil {
|
||||
return newSystemError(err)
|
||||
}
|
||||
|
||||
if err := syscall.Shutdown(int(p.parentPipe.Fd()), syscall.SHUT_WR); err != nil {
|
||||
return newSystemError(err)
|
||||
}
|
||||
|
@ -96,6 +98,7 @@ func (p *setnsProcess) start() (err error) {
|
|||
if err := json.NewDecoder(p.parentPipe).Decode(&ierr); err != nil && err != io.EOF {
|
||||
return newSystemError(err)
|
||||
}
|
||||
// Must be done after Shutdown so the child will exit and we can wait for it.
|
||||
if ierr != nil {
|
||||
p.wait()
|
||||
return newSystemError(ierr)
|
||||
|
@ -199,7 +202,6 @@ func (p *initProcess) start() (err error) {
|
|||
return newSystemError(err)
|
||||
}
|
||||
p.setExternalDescriptors(fds)
|
||||
|
||||
// Do this before syncing with child so that no children
|
||||
// can escape the cgroup
|
||||
if err := p.manager.Apply(p.pid()); err != nil {
|
||||
|
@ -230,13 +232,54 @@ func (p *initProcess) start() (err error) {
|
|||
if err := p.sendConfig(); err != nil {
|
||||
return newSystemError(err)
|
||||
}
|
||||
// wait for the child process to fully complete and receive an error message
|
||||
// if one was encoutered
|
||||
var ierr *genericError
|
||||
if err := json.NewDecoder(p.parentPipe).Decode(&ierr); err != nil && err != io.EOF {
|
||||
var (
|
||||
procSync syncT
|
||||
sentRun bool
|
||||
ierr *genericError
|
||||
)
|
||||
|
||||
loop:
|
||||
for {
|
||||
if err := json.NewDecoder(p.parentPipe).Decode(&procSync); err != nil {
|
||||
if err == io.EOF {
|
||||
break loop
|
||||
}
|
||||
return newSystemError(err)
|
||||
}
|
||||
switch procSync.Type {
|
||||
case procReady:
|
||||
if err := p.manager.Set(p.config.Config); err != nil {
|
||||
return newSystemError(err)
|
||||
}
|
||||
// Sync with child.
|
||||
if err := utils.WriteJSON(p.parentPipe, syncT{procRun}); err != nil {
|
||||
return newSystemError(err)
|
||||
}
|
||||
sentRun = true
|
||||
case procError:
|
||||
// wait for the child process to fully complete and receive an error message
|
||||
// if one was encoutered
|
||||
if err := json.NewDecoder(p.parentPipe).Decode(&ierr); err != nil && err != io.EOF {
|
||||
return newSystemError(err)
|
||||
}
|
||||
if ierr != nil {
|
||||
break loop
|
||||
}
|
||||
// Programmer error.
|
||||
panic("No error following JSON procError payload.")
|
||||
default:
|
||||
return newSystemError(fmt.Errorf("invalid JSON synchronisation payload from child"))
|
||||
}
|
||||
}
|
||||
if !sentRun {
|
||||
return newSystemError(fmt.Errorf("could not synchronise with container process"))
|
||||
}
|
||||
if err := syscall.Shutdown(int(p.parentPipe.Fd()), syscall.SHUT_WR); err != nil {
|
||||
return newSystemError(err)
|
||||
}
|
||||
// Must be done after Shutdown so the child will exit and we can wait for it.
|
||||
if ierr != nil {
|
||||
p.wait()
|
||||
return newSystemError(ierr)
|
||||
}
|
||||
return nil
|
||||
|
@ -270,12 +313,10 @@ func (p *initProcess) startTime() (string, error) {
|
|||
}
|
||||
|
||||
func (p *initProcess) sendConfig() error {
|
||||
// send the state to the container's init process then shutdown writes for the parent
|
||||
if err := utils.WriteJSON(p.parentPipe, p.config); err != nil {
|
||||
return err
|
||||
}
|
||||
// shutdown writes for the parent side of the pipe
|
||||
return syscall.Shutdown(int(p.parentPipe.Fd()), syscall.SHUT_WR)
|
||||
// send the config to the container's init process, we don't use JSON Encode
|
||||
// here because there might be a problem in JSON decoder in some cases, see:
|
||||
// https://github.com/docker/docker/issues/14203#issuecomment-174177790
|
||||
return utils.WriteJSON(p.parentPipe, p.config)
|
||||
}
|
||||
|
||||
func (p *initProcess) createNetworkInterfaces() error {
|
||||
|
|
|
@ -18,6 +18,8 @@ import (
|
|||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
"github.com/opencontainers/runc/libcontainer/label"
|
||||
"github.com/opencontainers/runc/libcontainer/system"
|
||||
libcontainerUtils "github.com/opencontainers/runc/libcontainer/utils"
|
||||
)
|
||||
|
||||
const defaultMountFlags = syscall.MS_NOEXEC | syscall.MS_NOSUID | syscall.MS_NODEV
|
||||
|
@ -293,12 +295,31 @@ func getCgroupMounts(m *configs.Mount) ([]*configs.Mount, error) {
|
|||
// checkMountDestination checks to ensure that the mount destination is not over the top of /proc.
|
||||
// dest is required to be an abs path and have any symlinks resolved before calling this function.
|
||||
func checkMountDestination(rootfs, dest string) error {
|
||||
if filepath.Clean(rootfs) == filepath.Clean(dest) {
|
||||
if libcontainerUtils.CleanPath(rootfs) == libcontainerUtils.CleanPath(dest) {
|
||||
return fmt.Errorf("mounting into / is prohibited")
|
||||
}
|
||||
invalidDestinations := []string{
|
||||
"/proc",
|
||||
}
|
||||
// White list, it should be sub directories of invalid destinations
|
||||
validDestinations := []string{
|
||||
// These entries can be bind mounted by files emulated by fuse,
|
||||
// so commands like top, free displays stats in container.
|
||||
"/proc/cpuinfo",
|
||||
"/proc/diskstats",
|
||||
"/proc/meminfo",
|
||||
"/proc/stat",
|
||||
"/proc/net/dev",
|
||||
}
|
||||
for _, valid := range validDestinations {
|
||||
path, err := filepath.Rel(filepath.Join(rootfs, valid), dest)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if path == "." {
|
||||
return nil
|
||||
}
|
||||
}
|
||||
for _, invalid := range invalidDestinations {
|
||||
path, err := filepath.Rel(filepath.Join(rootfs, invalid), dest)
|
||||
if err != nil {
|
||||
|
@ -321,7 +342,7 @@ func setupDevSymlinks(rootfs string) error {
|
|||
// kcore support can be toggled with CONFIG_PROC_KCORE; only create a symlink
|
||||
// in /dev if it exists in /proc.
|
||||
if _, err := os.Stat("/proc/kcore"); err == nil {
|
||||
links = append(links, [2]string{"/proc/kcore", "/dev/kcore"})
|
||||
links = append(links, [2]string{"/proc/kcore", "/dev/core"})
|
||||
}
|
||||
for _, link := range links {
|
||||
var (
|
||||
|
@ -365,11 +386,12 @@ func reOpenDevNull() error {
|
|||
|
||||
// Create the device nodes in the container.
|
||||
func createDevices(config *configs.Config) error {
|
||||
useBindMount := system.RunningInUserNS() || config.Namespaces.Contains(configs.NEWUSER)
|
||||
oldMask := syscall.Umask(0000)
|
||||
for _, node := range config.Devices {
|
||||
// containers running in a user namespace are not allowed to mknod
|
||||
// devices so we can just bind mount it from the host.
|
||||
if err := createDeviceNode(config.Rootfs, node, config.Namespaces.Contains(configs.NEWUSER)); err != nil {
|
||||
if err := createDeviceNode(config.Rootfs, node, useBindMount); err != nil {
|
||||
syscall.Umask(oldMask)
|
||||
return err
|
||||
}
|
||||
|
|
|
@ -231,10 +231,14 @@ func ReserveLabel(scon string) {
|
|||
}
|
||||
}
|
||||
|
||||
func selinuxEnforcePath() string {
|
||||
return fmt.Sprintf("%s/enforce", selinuxPath)
|
||||
}
|
||||
|
||||
func SelinuxGetEnforce() int {
|
||||
var enforce int
|
||||
|
||||
enforceS, err := readCon(fmt.Sprintf("%s/enforce", selinuxPath))
|
||||
enforceS, err := readCon(selinuxEnforcePath())
|
||||
if err != nil {
|
||||
return -1
|
||||
}
|
||||
|
@ -246,6 +250,10 @@ func SelinuxGetEnforce() int {
|
|||
return enforce
|
||||
}
|
||||
|
||||
func SelinuxSetEnforce(mode int) error {
|
||||
return writeCon(selinuxEnforcePath(), fmt.Sprintf("%d", mode))
|
||||
}
|
||||
|
||||
func SelinuxGetEnforceMode() int {
|
||||
switch readConfig(selinuxTag) {
|
||||
case "enforcing":
|
||||
|
|
|
@ -6,6 +6,7 @@ import (
|
|||
"os"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/apparmor"
|
||||
"github.com/opencontainers/runc/libcontainer/keys"
|
||||
"github.com/opencontainers/runc/libcontainer/label"
|
||||
"github.com/opencontainers/runc/libcontainer/seccomp"
|
||||
"github.com/opencontainers/runc/libcontainer/system"
|
||||
|
@ -18,12 +19,21 @@ type linuxSetnsInit struct {
|
|||
}
|
||||
|
||||
func (l *linuxSetnsInit) Init() error {
|
||||
// do not inherit the parent's session keyring
|
||||
if _, err := keyctl.JoinSessionKeyring("_ses"); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := setupRlimits(l.config.Config); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := setOomScoreAdj(l.config.Config.OomScoreAdj); err != nil {
|
||||
return err
|
||||
}
|
||||
if l.config.Config.NoNewPrivileges {
|
||||
if err := system.Prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
if l.config.Config.Seccomp != nil {
|
||||
if err := seccomp.InitSeccomp(l.config.Config.Seccomp); err != nil {
|
||||
return err
|
||||
|
|
|
@ -3,22 +3,41 @@
|
|||
package libcontainer
|
||||
|
||||
import (
|
||||
"io"
|
||||
"os"
|
||||
"syscall"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/apparmor"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
"github.com/opencontainers/runc/libcontainer/keys"
|
||||
"github.com/opencontainers/runc/libcontainer/label"
|
||||
"github.com/opencontainers/runc/libcontainer/seccomp"
|
||||
"github.com/opencontainers/runc/libcontainer/system"
|
||||
)
|
||||
|
||||
type linuxStandardInit struct {
|
||||
pipe io.ReadWriter
|
||||
parentPid int
|
||||
config *initConfig
|
||||
}
|
||||
|
||||
// PR_SET_NO_NEW_PRIVS isn't exposed in Golang so we define it ourselves copying the value
|
||||
// the kernel
|
||||
const PR_SET_NO_NEW_PRIVS = 0x26
|
||||
|
||||
func (l *linuxStandardInit) Init() error {
|
||||
// do not inherit the parent's session keyring
|
||||
sessKeyId, err := keyctl.JoinSessionKeyring("")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
// make session keyring searcheable
|
||||
// without user ns we need 'UID' search permissions
|
||||
// with user ns we need 'other' search permissions
|
||||
if err := keyctl.ModKeyringPerm(sessKeyId, 0xffffffff, 0x080008); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// join any namespaces via a path to the namespace fd if provided
|
||||
if err := joinExistingNamespaces(l.config.Config.Namespaces); err != nil {
|
||||
return err
|
||||
|
@ -50,7 +69,6 @@ func (l *linuxStandardInit) Init() error {
|
|||
if err := setOomScoreAdj(l.config.Config.OomScoreAdj); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
label.Init()
|
||||
// InitializeMountNamespace() can be executed only for a new mount namespace
|
||||
if l.config.Config.Namespaces.Contains(configs.NEWNS) {
|
||||
|
@ -75,7 +93,6 @@ func (l *linuxStandardInit) Init() error {
|
|||
return err
|
||||
}
|
||||
}
|
||||
|
||||
for _, path := range l.config.Config.ReadonlyPaths {
|
||||
if err := remountReadonly(path); err != nil {
|
||||
return err
|
||||
|
@ -90,6 +107,17 @@ func (l *linuxStandardInit) Init() error {
|
|||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if l.config.Config.NoNewPrivileges {
|
||||
if err := system.Prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
// Tell our parent that we're ready to Execv. This must be done before the
|
||||
// Seccomp rules have been applied, because we need to be able to read and
|
||||
// write to a socket.
|
||||
if err := syncParentReady(l.pipe); err != nil {
|
||||
return err
|
||||
}
|
||||
if l.config.Config.Seccomp != nil {
|
||||
if err := seccomp.InitSeccomp(l.config.Config.Seccomp); err != nil {
|
||||
return err
|
||||
|
@ -109,5 +137,6 @@ func (l *linuxStandardInit) Init() error {
|
|||
if syscall.Getppid() != l.parentPid {
|
||||
return syscall.Kill(syscall.Getpid(), syscall.SIGKILL)
|
||||
}
|
||||
|
||||
return system.Execv(l.config.Args[0], l.config.Args[0:], os.Environ())
|
||||
}
|
||||
|
|
226
vendor/src/github.com/opencontainers/runc/libcontainer/state_linux.go
vendored
Normal file
226
vendor/src/github.com/opencontainers/runc/libcontainer/state_linux.go
vendored
Normal file
|
@ -0,0 +1,226 @@
|
|||
// +build linux
|
||||
|
||||
package libcontainer
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
|
||||
"github.com/Sirupsen/logrus"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
)
|
||||
|
||||
func newStateTransitionError(from, to containerState) error {
|
||||
return &stateTransitionError{
|
||||
From: from.status().String(),
|
||||
To: to.status().String(),
|
||||
}
|
||||
}
|
||||
|
||||
// stateTransitionError is returned when an invalid state transition happens from one
|
||||
// state to another.
|
||||
type stateTransitionError struct {
|
||||
From string
|
||||
To string
|
||||
}
|
||||
|
||||
func (s *stateTransitionError) Error() string {
|
||||
return fmt.Sprintf("invalid state transition from %s to %s", s.From, s.To)
|
||||
}
|
||||
|
||||
type containerState interface {
|
||||
transition(containerState) error
|
||||
destroy() error
|
||||
status() Status
|
||||
}
|
||||
|
||||
func destroy(c *linuxContainer) error {
|
||||
if !c.config.Namespaces.Contains(configs.NEWPID) {
|
||||
if err := killCgroupProcesses(c.cgroupManager); err != nil {
|
||||
logrus.Warn(err)
|
||||
}
|
||||
}
|
||||
err := c.cgroupManager.Destroy()
|
||||
if rerr := os.RemoveAll(c.root); err == nil {
|
||||
err = rerr
|
||||
}
|
||||
c.initProcess = nil
|
||||
if herr := runPoststopHooks(c); err == nil {
|
||||
err = herr
|
||||
}
|
||||
c.state = &stoppedState{c: c}
|
||||
return err
|
||||
}
|
||||
|
||||
func runPoststopHooks(c *linuxContainer) error {
|
||||
if c.config.Hooks != nil {
|
||||
s := configs.HookState{
|
||||
Version: c.config.Version,
|
||||
ID: c.id,
|
||||
Root: c.config.Rootfs,
|
||||
}
|
||||
for _, hook := range c.config.Hooks.Poststop {
|
||||
if err := hook.Run(s); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// stoppedState represents a container is a stopped/destroyed state.
|
||||
type stoppedState struct {
|
||||
c *linuxContainer
|
||||
}
|
||||
|
||||
func (b *stoppedState) status() Status {
|
||||
return Destroyed
|
||||
}
|
||||
|
||||
func (b *stoppedState) transition(s containerState) error {
|
||||
switch s.(type) {
|
||||
case *runningState:
|
||||
b.c.state = s
|
||||
return nil
|
||||
case *restoredState:
|
||||
b.c.state = s
|
||||
return nil
|
||||
case *stoppedState:
|
||||
return nil
|
||||
}
|
||||
return newStateTransitionError(b, s)
|
||||
}
|
||||
|
||||
func (b *stoppedState) destroy() error {
|
||||
return destroy(b.c)
|
||||
}
|
||||
|
||||
// runningState represents a container that is currently running.
|
||||
type runningState struct {
|
||||
c *linuxContainer
|
||||
}
|
||||
|
||||
func (r *runningState) status() Status {
|
||||
return Running
|
||||
}
|
||||
|
||||
func (r *runningState) transition(s containerState) error {
|
||||
switch s.(type) {
|
||||
case *stoppedState:
|
||||
running, err := r.c.isRunning()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if running {
|
||||
return newGenericError(fmt.Errorf("container still running"), ContainerNotStopped)
|
||||
}
|
||||
r.c.state = s
|
||||
return nil
|
||||
case *pausedState:
|
||||
r.c.state = s
|
||||
return nil
|
||||
case *runningState:
|
||||
return nil
|
||||
}
|
||||
return newStateTransitionError(r, s)
|
||||
}
|
||||
|
||||
func (r *runningState) destroy() error {
|
||||
running, err := r.c.isRunning()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if running {
|
||||
return newGenericError(fmt.Errorf("container is not destroyed"), ContainerNotStopped)
|
||||
}
|
||||
return destroy(r.c)
|
||||
}
|
||||
|
||||
// pausedState represents a container that is currently pause. It cannot be destroyed in a
|
||||
// paused state and must transition back to running first.
|
||||
type pausedState struct {
|
||||
c *linuxContainer
|
||||
}
|
||||
|
||||
func (p *pausedState) status() Status {
|
||||
return Paused
|
||||
}
|
||||
|
||||
func (p *pausedState) transition(s containerState) error {
|
||||
switch s.(type) {
|
||||
case *runningState, *stoppedState:
|
||||
p.c.state = s
|
||||
return nil
|
||||
case *pausedState:
|
||||
return nil
|
||||
}
|
||||
return newStateTransitionError(p, s)
|
||||
}
|
||||
|
||||
func (p *pausedState) destroy() error {
|
||||
isRunning, err := p.c.isRunning()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if !isRunning {
|
||||
if err := p.c.cgroupManager.Freeze(configs.Thawed); err != nil {
|
||||
return err
|
||||
}
|
||||
return destroy(p.c)
|
||||
}
|
||||
return newGenericError(fmt.Errorf("container is paused"), ContainerPaused)
|
||||
}
|
||||
|
||||
// restoredState is the same as the running state but also has accociated checkpoint
|
||||
// information that maybe need destroyed when the container is stopped and destory is called.
|
||||
type restoredState struct {
|
||||
imageDir string
|
||||
c *linuxContainer
|
||||
}
|
||||
|
||||
func (r *restoredState) status() Status {
|
||||
return Running
|
||||
}
|
||||
|
||||
func (r *restoredState) transition(s containerState) error {
|
||||
switch s.(type) {
|
||||
case *stoppedState:
|
||||
return nil
|
||||
case *runningState:
|
||||
return nil
|
||||
}
|
||||
return newStateTransitionError(r, s)
|
||||
}
|
||||
|
||||
func (r *restoredState) destroy() error {
|
||||
if _, err := os.Stat(filepath.Join(r.c.root, "checkpoint")); err != nil {
|
||||
if !os.IsNotExist(err) {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return destroy(r.c)
|
||||
}
|
||||
|
||||
// createdState is used whenever a container is restored, loaded, or setting additional
|
||||
// processes inside and it should not be destroyed when it is exiting.
|
||||
type createdState struct {
|
||||
c *linuxContainer
|
||||
s Status
|
||||
}
|
||||
|
||||
func (n *createdState) status() Status {
|
||||
return n.s
|
||||
}
|
||||
|
||||
func (n *createdState) transition(s containerState) error {
|
||||
n.c.state = s
|
||||
return nil
|
||||
}
|
||||
|
||||
func (n *createdState) destroy() error {
|
||||
if err := n.c.refreshState(); err != nil {
|
||||
return err
|
||||
}
|
||||
return n.c.state.destroy()
|
||||
}
|
|
@ -3,6 +3,9 @@
|
|||
package system
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"fmt"
|
||||
"os"
|
||||
"os/exec"
|
||||
"syscall"
|
||||
"unsafe"
|
||||
|
@ -75,3 +78,45 @@ func Setctty() error {
|
|||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
/*
|
||||
* Detect whether we are currently running in a user namespace.
|
||||
* Copied from github.com/lxc/lxd/shared/util.go
|
||||
*/
|
||||
func RunningInUserNS() bool {
|
||||
file, err := os.Open("/proc/self/uid_map")
|
||||
if err != nil {
|
||||
/*
|
||||
* This kernel-provided file only exists if user namespaces are
|
||||
* supported
|
||||
*/
|
||||
return false
|
||||
}
|
||||
defer file.Close()
|
||||
|
||||
buf := bufio.NewReader(file)
|
||||
l, _, err := buf.ReadLine()
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
|
||||
line := string(l)
|
||||
var a, b, c int64
|
||||
fmt.Sscanf(line, "%d %d %d", &a, &b, &c)
|
||||
/*
|
||||
* We assume we are in the initial user namespace if we have a full
|
||||
* range - 4294967295 uids starting at uid 0.
|
||||
*/
|
||||
if a == 0 && b == 0 && c == 4294967295 {
|
||||
return false
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
func Prctl(option int, arg2, arg3, arg4, arg5 uintptr) (err error) {
|
||||
_, _, e1 := syscall.Syscall6(syscall.SYS_PRCTL, uintptr(option), arg2, arg3, arg4, arg5, 0)
|
||||
if e1 != 0 {
|
||||
err = e1
|
||||
}
|
||||
return
|
||||
}
|
||||
|
|
|
@ -5,6 +5,7 @@ import (
|
|||
"encoding/hex"
|
||||
"encoding/json"
|
||||
"io"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"syscall"
|
||||
)
|
||||
|
@ -54,3 +55,32 @@ func WriteJSON(w io.Writer, v interface{}) error {
|
|||
_, err = w.Write(data)
|
||||
return err
|
||||
}
|
||||
|
||||
// CleanPath makes a path safe for use with filepath.Join. This is done by not
|
||||
// only cleaning the path, but also (if the path is relative) adding a leading
|
||||
// '/' and cleaning it (then removing the leading '/'). This ensures that a
|
||||
// path resulting from prepending another path will always resolve to lexically
|
||||
// be a subdirectory of the prefixed path. This is all done lexically, so paths
|
||||
// that include symlinks won't be safe as a result of using CleanPath.
|
||||
func CleanPath(path string) string {
|
||||
// Deal with empty strings nicely.
|
||||
if path == "" {
|
||||
return ""
|
||||
}
|
||||
|
||||
// Ensure that all paths are cleaned (especially problematic ones like
|
||||
// "/../../../../../" which can cause lots of issues).
|
||||
path = filepath.Clean(path)
|
||||
|
||||
// If the path isn't absolute, we need to do more processing to fix paths
|
||||
// such as "../../../../<etc>/some/path". We also shouldn't convert absolute
|
||||
// paths to relative ones.
|
||||
if !filepath.IsAbs(path) {
|
||||
path = filepath.Clean(string(os.PathSeparator) + path)
|
||||
// This can't fail, as (by definition) all paths are relative to root.
|
||||
path, _ = filepath.Rel(string(os.PathSeparator), path)
|
||||
}
|
||||
|
||||
// Clean the path again for good measure.
|
||||
return filepath.Clean(path)
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue