Merge pull request #21342 from tonistiigi/cleanup-libcontainer
Convert libnetwork stats directly to api types
This commit is contained in:
commit
2a4c970aeb
73 changed files with 19 additions and 10645 deletions
|
@ -70,9 +70,7 @@ import (
|
|||
"github.com/docker/go-connections/nat"
|
||||
"github.com/docker/libnetwork"
|
||||
nwconfig "github.com/docker/libnetwork/config"
|
||||
lntypes "github.com/docker/libnetwork/types"
|
||||
"github.com/docker/libtrust"
|
||||
"github.com/opencontainers/runc/libcontainer"
|
||||
"golang.org/x/net/context"
|
||||
)
|
||||
|
||||
|
@ -1528,50 +1526,40 @@ func (daemon *Daemon) GetContainerStats(container *container.Container) (*types.
|
|||
return nil, err
|
||||
}
|
||||
|
||||
// Retrieve the nw statistics from libnetwork and inject them in the Stats
|
||||
var nwStats []*libcontainer.NetworkInterface
|
||||
if nwStats, err = daemon.getNetworkStats(container); err != nil {
|
||||
if stats.Networks, err = daemon.getNetworkStats(container); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
stats.Networks = make(map[string]types.NetworkStats)
|
||||
for _, iface := range nwStats {
|
||||
// For API Version >= 1.21, the original data of network will
|
||||
// be returned.
|
||||
stats.Networks[iface.Name] = types.NetworkStats{
|
||||
RxBytes: iface.RxBytes,
|
||||
RxPackets: iface.RxPackets,
|
||||
RxErrors: iface.RxErrors,
|
||||
RxDropped: iface.RxDropped,
|
||||
TxBytes: iface.TxBytes,
|
||||
TxPackets: iface.TxPackets,
|
||||
TxErrors: iface.TxErrors,
|
||||
TxDropped: iface.TxDropped,
|
||||
}
|
||||
}
|
||||
|
||||
return stats, nil
|
||||
}
|
||||
|
||||
func (daemon *Daemon) getNetworkStats(c *container.Container) ([]*libcontainer.NetworkInterface, error) {
|
||||
var list []*libcontainer.NetworkInterface
|
||||
|
||||
func (daemon *Daemon) getNetworkStats(c *container.Container) (map[string]types.NetworkStats, error) {
|
||||
sb, err := daemon.netController.SandboxByID(c.NetworkSettings.SandboxID)
|
||||
if err != nil {
|
||||
return list, err
|
||||
return nil, err
|
||||
}
|
||||
|
||||
stats, err := sb.Statistics()
|
||||
lnstats, err := sb.Statistics()
|
||||
if err != nil {
|
||||
return list, err
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Convert libnetwork nw stats into libcontainer nw stats
|
||||
for ifName, ifStats := range stats {
|
||||
list = append(list, convertLnNetworkStats(ifName, ifStats))
|
||||
stats := make(map[string]types.NetworkStats)
|
||||
// Convert libnetwork nw stats into engine-api stats
|
||||
for ifName, ifStats := range lnstats {
|
||||
stats[ifName] = types.NetworkStats{
|
||||
RxBytes: ifStats.RxBytes,
|
||||
RxPackets: ifStats.RxPackets,
|
||||
RxErrors: ifStats.RxErrors,
|
||||
RxDropped: ifStats.RxDropped,
|
||||
TxBytes: ifStats.TxBytes,
|
||||
TxPackets: ifStats.TxPackets,
|
||||
TxErrors: ifStats.TxErrors,
|
||||
TxDropped: ifStats.TxDropped,
|
||||
}
|
||||
}
|
||||
|
||||
return list, nil
|
||||
return stats, nil
|
||||
}
|
||||
|
||||
// newBaseContainer creates a new container with its initial
|
||||
|
@ -1675,19 +1663,6 @@ func (daemon *Daemon) reloadClusterDiscovery(config *Config) error {
|
|||
return nil
|
||||
}
|
||||
|
||||
func convertLnNetworkStats(name string, stats *lntypes.InterfaceStatistics) *libcontainer.NetworkInterface {
|
||||
n := &libcontainer.NetworkInterface{Name: name}
|
||||
n.RxBytes = stats.RxBytes
|
||||
n.RxPackets = stats.RxPackets
|
||||
n.RxErrors = stats.RxErrors
|
||||
n.RxDropped = stats.RxDropped
|
||||
n.TxBytes = stats.TxBytes
|
||||
n.TxPackets = stats.TxPackets
|
||||
n.TxErrors = stats.TxErrors
|
||||
n.TxDropped = stats.TxDropped
|
||||
return n
|
||||
}
|
||||
|
||||
func validateID(id string) error {
|
||||
if id == "" {
|
||||
return fmt.Errorf("Invalid empty id")
|
||||
|
|
|
@ -1,14 +0,0 @@
|
|||
package daemon
|
||||
|
||||
import (
|
||||
"github.com/docker/engine-api/types"
|
||||
"github.com/opencontainers/runc/libcontainer"
|
||||
)
|
||||
|
||||
// convertStatsToAPITypes converts the libcontainer.Stats to the api specific
|
||||
// structs. This is done to preserve API compatibility and versioning.
|
||||
func convertStatsToAPITypes(ls *libcontainer.Stats) *types.StatsJSON {
|
||||
// TODO FreeBSD. Refactor accordingly to fill in stats.
|
||||
s := &types.StatsJSON{}
|
||||
return s
|
||||
}
|
187
vendor/src/github.com/coreos/go-systemd/dbus/dbus.go
vendored
187
vendor/src/github.com/coreos/go-systemd/dbus/dbus.go
vendored
|
@ -1,187 +0,0 @@
|
|||
// Copyright 2015 CoreOS, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
// Integration with the systemd D-Bus API. See http://www.freedesktop.org/wiki/Software/systemd/dbus/
|
||||
package dbus
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"strconv"
|
||||
"strings"
|
||||
"sync"
|
||||
|
||||
"github.com/godbus/dbus"
|
||||
)
|
||||
|
||||
const (
|
||||
alpha = `abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ`
|
||||
num = `0123456789`
|
||||
alphanum = alpha + num
|
||||
signalBuffer = 100
|
||||
)
|
||||
|
||||
// needsEscape checks whether a byte in a potential dbus ObjectPath needs to be escaped
|
||||
func needsEscape(i int, b byte) bool {
|
||||
// Escape everything that is not a-z-A-Z-0-9
|
||||
// Also escape 0-9 if it's the first character
|
||||
return strings.IndexByte(alphanum, b) == -1 ||
|
||||
(i == 0 && strings.IndexByte(num, b) != -1)
|
||||
}
|
||||
|
||||
// PathBusEscape sanitizes a constituent string of a dbus ObjectPath using the
|
||||
// rules that systemd uses for serializing special characters.
|
||||
func PathBusEscape(path string) string {
|
||||
// Special case the empty string
|
||||
if len(path) == 0 {
|
||||
return "_"
|
||||
}
|
||||
n := []byte{}
|
||||
for i := 0; i < len(path); i++ {
|
||||
c := path[i]
|
||||
if needsEscape(i, c) {
|
||||
e := fmt.Sprintf("_%x", c)
|
||||
n = append(n, []byte(e)...)
|
||||
} else {
|
||||
n = append(n, c)
|
||||
}
|
||||
}
|
||||
return string(n)
|
||||
}
|
||||
|
||||
// Conn is a connection to systemd's dbus endpoint.
|
||||
type Conn struct {
|
||||
// sysconn/sysobj are only used to call dbus methods
|
||||
sysconn *dbus.Conn
|
||||
sysobj dbus.BusObject
|
||||
|
||||
// sigconn/sigobj are only used to receive dbus signals
|
||||
sigconn *dbus.Conn
|
||||
sigobj dbus.BusObject
|
||||
|
||||
jobListener struct {
|
||||
jobs map[dbus.ObjectPath]chan<- string
|
||||
sync.Mutex
|
||||
}
|
||||
subscriber struct {
|
||||
updateCh chan<- *SubStateUpdate
|
||||
errCh chan<- error
|
||||
sync.Mutex
|
||||
ignore map[dbus.ObjectPath]int64
|
||||
cleanIgnore int64
|
||||
}
|
||||
}
|
||||
|
||||
// New establishes a connection to the system bus and authenticates.
|
||||
// Callers should call Close() when done with the connection.
|
||||
func New() (*Conn, error) {
|
||||
return newConnection(func() (*dbus.Conn, error) {
|
||||
return dbusAuthHelloConnection(dbus.SystemBusPrivate)
|
||||
})
|
||||
}
|
||||
|
||||
// NewUserConnection establishes a connection to the session bus and
|
||||
// authenticates. This can be used to connect to systemd user instances.
|
||||
// Callers should call Close() when done with the connection.
|
||||
func NewUserConnection() (*Conn, error) {
|
||||
return newConnection(func() (*dbus.Conn, error) {
|
||||
return dbusAuthHelloConnection(dbus.SessionBusPrivate)
|
||||
})
|
||||
}
|
||||
|
||||
// NewSystemdConnection establishes a private, direct connection to systemd.
|
||||
// This can be used for communicating with systemd without a dbus daemon.
|
||||
// Callers should call Close() when done with the connection.
|
||||
func NewSystemdConnection() (*Conn, error) {
|
||||
return newConnection(func() (*dbus.Conn, error) {
|
||||
// We skip Hello when talking directly to systemd.
|
||||
return dbusAuthConnection(func() (*dbus.Conn, error) {
|
||||
return dbus.Dial("unix:path=/run/systemd/private")
|
||||
})
|
||||
})
|
||||
}
|
||||
|
||||
// Close closes an established connection
|
||||
func (c *Conn) Close() {
|
||||
c.sysconn.Close()
|
||||
c.sigconn.Close()
|
||||
}
|
||||
|
||||
func newConnection(createBus func() (*dbus.Conn, error)) (*Conn, error) {
|
||||
sysconn, err := createBus()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
sigconn, err := createBus()
|
||||
if err != nil {
|
||||
sysconn.Close()
|
||||
return nil, err
|
||||
}
|
||||
|
||||
c := &Conn{
|
||||
sysconn: sysconn,
|
||||
sysobj: systemdObject(sysconn),
|
||||
sigconn: sigconn,
|
||||
sigobj: systemdObject(sigconn),
|
||||
}
|
||||
|
||||
c.subscriber.ignore = make(map[dbus.ObjectPath]int64)
|
||||
c.jobListener.jobs = make(map[dbus.ObjectPath]chan<- string)
|
||||
|
||||
// Setup the listeners on jobs so that we can get completions
|
||||
c.sigconn.BusObject().Call("org.freedesktop.DBus.AddMatch", 0,
|
||||
"type='signal', interface='org.freedesktop.systemd1.Manager', member='JobRemoved'")
|
||||
|
||||
c.dispatch()
|
||||
return c, nil
|
||||
}
|
||||
|
||||
func dbusAuthConnection(createBus func() (*dbus.Conn, error)) (*dbus.Conn, error) {
|
||||
conn, err := createBus()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Only use EXTERNAL method, and hardcode the uid (not username)
|
||||
// to avoid a username lookup (which requires a dynamically linked
|
||||
// libc)
|
||||
methods := []dbus.Auth{dbus.AuthExternal(strconv.Itoa(os.Getuid()))}
|
||||
|
||||
err = conn.Auth(methods)
|
||||
if err != nil {
|
||||
conn.Close()
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return conn, nil
|
||||
}
|
||||
|
||||
func dbusAuthHelloConnection(createBus func() (*dbus.Conn, error)) (*dbus.Conn, error) {
|
||||
conn, err := dbusAuthConnection(createBus)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if err = conn.Hello(); err != nil {
|
||||
conn.Close()
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return conn, nil
|
||||
}
|
||||
|
||||
func systemdObject(conn *dbus.Conn) dbus.BusObject {
|
||||
return conn.Object("org.freedesktop.systemd1", dbus.ObjectPath("/org/freedesktop/systemd1"))
|
||||
}
|
|
@ -1,410 +0,0 @@
|
|||
// Copyright 2015 CoreOS, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package dbus
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"path"
|
||||
"strconv"
|
||||
|
||||
"github.com/godbus/dbus"
|
||||
)
|
||||
|
||||
func (c *Conn) jobComplete(signal *dbus.Signal) {
|
||||
var id uint32
|
||||
var job dbus.ObjectPath
|
||||
var unit string
|
||||
var result string
|
||||
dbus.Store(signal.Body, &id, &job, &unit, &result)
|
||||
c.jobListener.Lock()
|
||||
out, ok := c.jobListener.jobs[job]
|
||||
if ok {
|
||||
out <- result
|
||||
delete(c.jobListener.jobs, job)
|
||||
}
|
||||
c.jobListener.Unlock()
|
||||
}
|
||||
|
||||
func (c *Conn) startJob(ch chan<- string, job string, args ...interface{}) (int, error) {
|
||||
if ch != nil {
|
||||
c.jobListener.Lock()
|
||||
defer c.jobListener.Unlock()
|
||||
}
|
||||
|
||||
var p dbus.ObjectPath
|
||||
err := c.sysobj.Call(job, 0, args...).Store(&p)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
|
||||
if ch != nil {
|
||||
c.jobListener.jobs[p] = ch
|
||||
}
|
||||
|
||||
// ignore error since 0 is fine if conversion fails
|
||||
jobID, _ := strconv.Atoi(path.Base(string(p)))
|
||||
|
||||
return jobID, nil
|
||||
}
|
||||
|
||||
// StartUnit enqueues a start job and depending jobs, if any (unless otherwise
|
||||
// specified by the mode string).
|
||||
//
|
||||
// Takes the unit to activate, plus a mode string. The mode needs to be one of
|
||||
// replace, fail, isolate, ignore-dependencies, ignore-requirements. If
|
||||
// "replace" the call will start the unit and its dependencies, possibly
|
||||
// replacing already queued jobs that conflict with this. If "fail" the call
|
||||
// will start the unit and its dependencies, but will fail if this would change
|
||||
// an already queued job. If "isolate" the call will start the unit in question
|
||||
// and terminate all units that aren't dependencies of it. If
|
||||
// "ignore-dependencies" it will start a unit but ignore all its dependencies.
|
||||
// If "ignore-requirements" it will start a unit but only ignore the
|
||||
// requirement dependencies. It is not recommended to make use of the latter
|
||||
// two options.
|
||||
//
|
||||
// If the provided channel is non-nil, a result string will be sent to it upon
|
||||
// job completion: one of done, canceled, timeout, failed, dependency, skipped.
|
||||
// done indicates successful execution of a job. canceled indicates that a job
|
||||
// has been canceled before it finished execution. timeout indicates that the
|
||||
// job timeout was reached. failed indicates that the job failed. dependency
|
||||
// indicates that a job this job has been depending on failed and the job hence
|
||||
// has been removed too. skipped indicates that a job was skipped because it
|
||||
// didn't apply to the units current state.
|
||||
//
|
||||
// If no error occurs, the ID of the underlying systemd job will be returned. There
|
||||
// does exist the possibility for no error to be returned, but for the returned job
|
||||
// ID to be 0. In this case, the actual underlying ID is not 0 and this datapoint
|
||||
// should not be considered authoritative.
|
||||
//
|
||||
// If an error does occur, it will be returned to the user alongside a job ID of 0.
|
||||
func (c *Conn) StartUnit(name string, mode string, ch chan<- string) (int, error) {
|
||||
return c.startJob(ch, "org.freedesktop.systemd1.Manager.StartUnit", name, mode)
|
||||
}
|
||||
|
||||
// StopUnit is similar to StartUnit but stops the specified unit rather
|
||||
// than starting it.
|
||||
func (c *Conn) StopUnit(name string, mode string, ch chan<- string) (int, error) {
|
||||
return c.startJob(ch, "org.freedesktop.systemd1.Manager.StopUnit", name, mode)
|
||||
}
|
||||
|
||||
// ReloadUnit reloads a unit. Reloading is done only if the unit is already running and fails otherwise.
|
||||
func (c *Conn) ReloadUnit(name string, mode string, ch chan<- string) (int, error) {
|
||||
return c.startJob(ch, "org.freedesktop.systemd1.Manager.ReloadUnit", name, mode)
|
||||
}
|
||||
|
||||
// RestartUnit restarts a service. If a service is restarted that isn't
|
||||
// running it will be started.
|
||||
func (c *Conn) RestartUnit(name string, mode string, ch chan<- string) (int, error) {
|
||||
return c.startJob(ch, "org.freedesktop.systemd1.Manager.RestartUnit", name, mode)
|
||||
}
|
||||
|
||||
// TryRestartUnit is like RestartUnit, except that a service that isn't running
|
||||
// is not affected by the restart.
|
||||
func (c *Conn) TryRestartUnit(name string, mode string, ch chan<- string) (int, error) {
|
||||
return c.startJob(ch, "org.freedesktop.systemd1.Manager.TryRestartUnit", name, mode)
|
||||
}
|
||||
|
||||
// ReloadOrRestart attempts a reload if the unit supports it and use a restart
|
||||
// otherwise.
|
||||
func (c *Conn) ReloadOrRestartUnit(name string, mode string, ch chan<- string) (int, error) {
|
||||
return c.startJob(ch, "org.freedesktop.systemd1.Manager.ReloadOrRestartUnit", name, mode)
|
||||
}
|
||||
|
||||
// ReloadOrTryRestart attempts a reload if the unit supports it and use a "Try"
|
||||
// flavored restart otherwise.
|
||||
func (c *Conn) ReloadOrTryRestartUnit(name string, mode string, ch chan<- string) (int, error) {
|
||||
return c.startJob(ch, "org.freedesktop.systemd1.Manager.ReloadOrTryRestartUnit", name, mode)
|
||||
}
|
||||
|
||||
// StartTransientUnit() may be used to create and start a transient unit, which
|
||||
// will be released as soon as it is not running or referenced anymore or the
|
||||
// system is rebooted. name is the unit name including suffix, and must be
|
||||
// unique. mode is the same as in StartUnit(), properties contains properties
|
||||
// of the unit.
|
||||
func (c *Conn) StartTransientUnit(name string, mode string, properties []Property, ch chan<- string) (int, error) {
|
||||
return c.startJob(ch, "org.freedesktop.systemd1.Manager.StartTransientUnit", name, mode, properties, make([]PropertyCollection, 0))
|
||||
}
|
||||
|
||||
// KillUnit takes the unit name and a UNIX signal number to send. All of the unit's
|
||||
// processes are killed.
|
||||
func (c *Conn) KillUnit(name string, signal int32) {
|
||||
c.sysobj.Call("org.freedesktop.systemd1.Manager.KillUnit", 0, name, "all", signal).Store()
|
||||
}
|
||||
|
||||
// ResetFailedUnit resets the "failed" state of a specific unit.
|
||||
func (c *Conn) ResetFailedUnit(name string) error {
|
||||
return c.sysobj.Call("org.freedesktop.systemd1.Manager.ResetFailedUnit", 0, name).Store()
|
||||
}
|
||||
|
||||
// getProperties takes the unit name and returns all of its dbus object properties, for the given dbus interface
|
||||
func (c *Conn) getProperties(unit string, dbusInterface string) (map[string]interface{}, error) {
|
||||
var err error
|
||||
var props map[string]dbus.Variant
|
||||
|
||||
path := unitPath(unit)
|
||||
if !path.IsValid() {
|
||||
return nil, errors.New("invalid unit name: " + unit)
|
||||
}
|
||||
|
||||
obj := c.sysconn.Object("org.freedesktop.systemd1", path)
|
||||
err = obj.Call("org.freedesktop.DBus.Properties.GetAll", 0, dbusInterface).Store(&props)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
out := make(map[string]interface{}, len(props))
|
||||
for k, v := range props {
|
||||
out[k] = v.Value()
|
||||
}
|
||||
|
||||
return out, nil
|
||||
}
|
||||
|
||||
// GetUnitProperties takes the unit name and returns all of its dbus object properties.
|
||||
func (c *Conn) GetUnitProperties(unit string) (map[string]interface{}, error) {
|
||||
return c.getProperties(unit, "org.freedesktop.systemd1.Unit")
|
||||
}
|
||||
|
||||
func (c *Conn) getProperty(unit string, dbusInterface string, propertyName string) (*Property, error) {
|
||||
var err error
|
||||
var prop dbus.Variant
|
||||
|
||||
path := unitPath(unit)
|
||||
if !path.IsValid() {
|
||||
return nil, errors.New("invalid unit name: " + unit)
|
||||
}
|
||||
|
||||
obj := c.sysconn.Object("org.freedesktop.systemd1", path)
|
||||
err = obj.Call("org.freedesktop.DBus.Properties.Get", 0, dbusInterface, propertyName).Store(&prop)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return &Property{Name: propertyName, Value: prop}, nil
|
||||
}
|
||||
|
||||
func (c *Conn) GetUnitProperty(unit string, propertyName string) (*Property, error) {
|
||||
return c.getProperty(unit, "org.freedesktop.systemd1.Unit", propertyName)
|
||||
}
|
||||
|
||||
// GetUnitTypeProperties returns the extra properties for a unit, specific to the unit type.
|
||||
// Valid values for unitType: Service, Socket, Target, Device, Mount, Automount, Snapshot, Timer, Swap, Path, Slice, Scope
|
||||
// return "dbus.Error: Unknown interface" if the unitType is not the correct type of the unit
|
||||
func (c *Conn) GetUnitTypeProperties(unit string, unitType string) (map[string]interface{}, error) {
|
||||
return c.getProperties(unit, "org.freedesktop.systemd1."+unitType)
|
||||
}
|
||||
|
||||
// SetUnitProperties() may be used to modify certain unit properties at runtime.
|
||||
// Not all properties may be changed at runtime, but many resource management
|
||||
// settings (primarily those in systemd.cgroup(5)) may. The changes are applied
|
||||
// instantly, and stored on disk for future boots, unless runtime is true, in which
|
||||
// case the settings only apply until the next reboot. name is the name of the unit
|
||||
// to modify. properties are the settings to set, encoded as an array of property
|
||||
// name and value pairs.
|
||||
func (c *Conn) SetUnitProperties(name string, runtime bool, properties ...Property) error {
|
||||
return c.sysobj.Call("org.freedesktop.systemd1.Manager.SetUnitProperties", 0, name, runtime, properties).Store()
|
||||
}
|
||||
|
||||
func (c *Conn) GetUnitTypeProperty(unit string, unitType string, propertyName string) (*Property, error) {
|
||||
return c.getProperty(unit, "org.freedesktop.systemd1."+unitType, propertyName)
|
||||
}
|
||||
|
||||
// ListUnits returns an array with all currently loaded units. Note that
|
||||
// units may be known by multiple names at the same time, and hence there might
|
||||
// be more unit names loaded than actual units behind them.
|
||||
func (c *Conn) ListUnits() ([]UnitStatus, error) {
|
||||
result := make([][]interface{}, 0)
|
||||
err := c.sysobj.Call("org.freedesktop.systemd1.Manager.ListUnits", 0).Store(&result)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
resultInterface := make([]interface{}, len(result))
|
||||
for i := range result {
|
||||
resultInterface[i] = result[i]
|
||||
}
|
||||
|
||||
status := make([]UnitStatus, len(result))
|
||||
statusInterface := make([]interface{}, len(status))
|
||||
for i := range status {
|
||||
statusInterface[i] = &status[i]
|
||||
}
|
||||
|
||||
err = dbus.Store(resultInterface, statusInterface...)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return status, nil
|
||||
}
|
||||
|
||||
type UnitStatus struct {
|
||||
Name string // The primary unit name as string
|
||||
Description string // The human readable description string
|
||||
LoadState string // The load state (i.e. whether the unit file has been loaded successfully)
|
||||
ActiveState string // The active state (i.e. whether the unit is currently started or not)
|
||||
SubState string // The sub state (a more fine-grained version of the active state that is specific to the unit type, which the active state is not)
|
||||
Followed string // A unit that is being followed in its state by this unit, if there is any, otherwise the empty string.
|
||||
Path dbus.ObjectPath // The unit object path
|
||||
JobId uint32 // If there is a job queued for the job unit the numeric job id, 0 otherwise
|
||||
JobType string // The job type as string
|
||||
JobPath dbus.ObjectPath // The job object path
|
||||
}
|
||||
|
||||
type LinkUnitFileChange EnableUnitFileChange
|
||||
|
||||
// LinkUnitFiles() links unit files (that are located outside of the
|
||||
// usual unit search paths) into the unit search path.
|
||||
//
|
||||
// It takes a list of absolute paths to unit files to link and two
|
||||
// booleans. The first boolean controls whether the unit shall be
|
||||
// enabled for runtime only (true, /run), or persistently (false,
|
||||
// /etc).
|
||||
// The second controls whether symlinks pointing to other units shall
|
||||
// be replaced if necessary.
|
||||
//
|
||||
// This call returns a list of the changes made. The list consists of
|
||||
// structures with three strings: the type of the change (one of symlink
|
||||
// or unlink), the file name of the symlink and the destination of the
|
||||
// symlink.
|
||||
func (c *Conn) LinkUnitFiles(files []string, runtime bool, force bool) ([]LinkUnitFileChange, error) {
|
||||
result := make([][]interface{}, 0)
|
||||
err := c.sysobj.Call("org.freedesktop.systemd1.Manager.LinkUnitFiles", 0, files, runtime, force).Store(&result)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
resultInterface := make([]interface{}, len(result))
|
||||
for i := range result {
|
||||
resultInterface[i] = result[i]
|
||||
}
|
||||
|
||||
changes := make([]LinkUnitFileChange, len(result))
|
||||
changesInterface := make([]interface{}, len(changes))
|
||||
for i := range changes {
|
||||
changesInterface[i] = &changes[i]
|
||||
}
|
||||
|
||||
err = dbus.Store(resultInterface, changesInterface...)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return changes, nil
|
||||
}
|
||||
|
||||
// EnableUnitFiles() may be used to enable one or more units in the system (by
|
||||
// creating symlinks to them in /etc or /run).
|
||||
//
|
||||
// It takes a list of unit files to enable (either just file names or full
|
||||
// absolute paths if the unit files are residing outside the usual unit
|
||||
// search paths), and two booleans: the first controls whether the unit shall
|
||||
// be enabled for runtime only (true, /run), or persistently (false, /etc).
|
||||
// The second one controls whether symlinks pointing to other units shall
|
||||
// be replaced if necessary.
|
||||
//
|
||||
// This call returns one boolean and an array with the changes made. The
|
||||
// boolean signals whether the unit files contained any enablement
|
||||
// information (i.e. an [Install]) section. The changes list consists of
|
||||
// structures with three strings: the type of the change (one of symlink
|
||||
// or unlink), the file name of the symlink and the destination of the
|
||||
// symlink.
|
||||
func (c *Conn) EnableUnitFiles(files []string, runtime bool, force bool) (bool, []EnableUnitFileChange, error) {
|
||||
var carries_install_info bool
|
||||
|
||||
result := make([][]interface{}, 0)
|
||||
err := c.sysobj.Call("org.freedesktop.systemd1.Manager.EnableUnitFiles", 0, files, runtime, force).Store(&carries_install_info, &result)
|
||||
if err != nil {
|
||||
return false, nil, err
|
||||
}
|
||||
|
||||
resultInterface := make([]interface{}, len(result))
|
||||
for i := range result {
|
||||
resultInterface[i] = result[i]
|
||||
}
|
||||
|
||||
changes := make([]EnableUnitFileChange, len(result))
|
||||
changesInterface := make([]interface{}, len(changes))
|
||||
for i := range changes {
|
||||
changesInterface[i] = &changes[i]
|
||||
}
|
||||
|
||||
err = dbus.Store(resultInterface, changesInterface...)
|
||||
if err != nil {
|
||||
return false, nil, err
|
||||
}
|
||||
|
||||
return carries_install_info, changes, nil
|
||||
}
|
||||
|
||||
type EnableUnitFileChange struct {
|
||||
Type string // Type of the change (one of symlink or unlink)
|
||||
Filename string // File name of the symlink
|
||||
Destination string // Destination of the symlink
|
||||
}
|
||||
|
||||
// DisableUnitFiles() may be used to disable one or more units in the system (by
|
||||
// removing symlinks to them from /etc or /run).
|
||||
//
|
||||
// It takes a list of unit files to disable (either just file names or full
|
||||
// absolute paths if the unit files are residing outside the usual unit
|
||||
// search paths), and one boolean: whether the unit was enabled for runtime
|
||||
// only (true, /run), or persistently (false, /etc).
|
||||
//
|
||||
// This call returns an array with the changes made. The changes list
|
||||
// consists of structures with three strings: the type of the change (one of
|
||||
// symlink or unlink), the file name of the symlink and the destination of the
|
||||
// symlink.
|
||||
func (c *Conn) DisableUnitFiles(files []string, runtime bool) ([]DisableUnitFileChange, error) {
|
||||
result := make([][]interface{}, 0)
|
||||
err := c.sysobj.Call("org.freedesktop.systemd1.Manager.DisableUnitFiles", 0, files, runtime).Store(&result)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
resultInterface := make([]interface{}, len(result))
|
||||
for i := range result {
|
||||
resultInterface[i] = result[i]
|
||||
}
|
||||
|
||||
changes := make([]DisableUnitFileChange, len(result))
|
||||
changesInterface := make([]interface{}, len(changes))
|
||||
for i := range changes {
|
||||
changesInterface[i] = &changes[i]
|
||||
}
|
||||
|
||||
err = dbus.Store(resultInterface, changesInterface...)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return changes, nil
|
||||
}
|
||||
|
||||
type DisableUnitFileChange struct {
|
||||
Type string // Type of the change (one of symlink or unlink)
|
||||
Filename string // File name of the symlink
|
||||
Destination string // Destination of the symlink
|
||||
}
|
||||
|
||||
// Reload instructs systemd to scan for and reload unit files. This is
|
||||
// equivalent to a 'systemctl daemon-reload'.
|
||||
func (c *Conn) Reload() error {
|
||||
return c.sysobj.Call("org.freedesktop.systemd1.Manager.Reload", 0).Store()
|
||||
}
|
||||
|
||||
func unitPath(name string) dbus.ObjectPath {
|
||||
return dbus.ObjectPath("/org/freedesktop/systemd1/unit/" + PathBusEscape(name))
|
||||
}
|
|
@ -1,218 +0,0 @@
|
|||
// Copyright 2015 CoreOS, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package dbus
|
||||
|
||||
import (
|
||||
"github.com/godbus/dbus"
|
||||
)
|
||||
|
||||
// From the systemd docs:
|
||||
//
|
||||
// The properties array of StartTransientUnit() may take many of the settings
|
||||
// that may also be configured in unit files. Not all parameters are currently
|
||||
// accepted though, but we plan to cover more properties with future release.
|
||||
// Currently you may set the Description, Slice and all dependency types of
|
||||
// units, as well as RemainAfterExit, ExecStart for service units,
|
||||
// TimeoutStopUSec and PIDs for scope units, and CPUAccounting, CPUShares,
|
||||
// BlockIOAccounting, BlockIOWeight, BlockIOReadBandwidth,
|
||||
// BlockIOWriteBandwidth, BlockIODeviceWeight, MemoryAccounting, MemoryLimit,
|
||||
// DevicePolicy, DeviceAllow for services/scopes/slices. These fields map
|
||||
// directly to their counterparts in unit files and as normal D-Bus object
|
||||
// properties. The exception here is the PIDs field of scope units which is
|
||||
// used for construction of the scope only and specifies the initial PIDs to
|
||||
// add to the scope object.
|
||||
|
||||
type Property struct {
|
||||
Name string
|
||||
Value dbus.Variant
|
||||
}
|
||||
|
||||
type PropertyCollection struct {
|
||||
Name string
|
||||
Properties []Property
|
||||
}
|
||||
|
||||
type execStart struct {
|
||||
Path string // the binary path to execute
|
||||
Args []string // an array with all arguments to pass to the executed command, starting with argument 0
|
||||
UncleanIsFailure bool // a boolean whether it should be considered a failure if the process exits uncleanly
|
||||
}
|
||||
|
||||
// PropExecStart sets the ExecStart service property. The first argument is a
|
||||
// slice with the binary path to execute followed by the arguments to pass to
|
||||
// the executed command. See
|
||||
// http://www.freedesktop.org/software/systemd/man/systemd.service.html#ExecStart=
|
||||
func PropExecStart(command []string, uncleanIsFailure bool) Property {
|
||||
execStarts := []execStart{
|
||||
execStart{
|
||||
Path: command[0],
|
||||
Args: command,
|
||||
UncleanIsFailure: uncleanIsFailure,
|
||||
},
|
||||
}
|
||||
|
||||
return Property{
|
||||
Name: "ExecStart",
|
||||
Value: dbus.MakeVariant(execStarts),
|
||||
}
|
||||
}
|
||||
|
||||
// PropRemainAfterExit sets the RemainAfterExit service property. See
|
||||
// http://www.freedesktop.org/software/systemd/man/systemd.service.html#RemainAfterExit=
|
||||
func PropRemainAfterExit(b bool) Property {
|
||||
return Property{
|
||||
Name: "RemainAfterExit",
|
||||
Value: dbus.MakeVariant(b),
|
||||
}
|
||||
}
|
||||
|
||||
// PropDescription sets the Description unit property. See
|
||||
// http://www.freedesktop.org/software/systemd/man/systemd.unit#Description=
|
||||
func PropDescription(desc string) Property {
|
||||
return Property{
|
||||
Name: "Description",
|
||||
Value: dbus.MakeVariant(desc),
|
||||
}
|
||||
}
|
||||
|
||||
func propDependency(name string, units []string) Property {
|
||||
return Property{
|
||||
Name: name,
|
||||
Value: dbus.MakeVariant(units),
|
||||
}
|
||||
}
|
||||
|
||||
// PropRequires sets the Requires unit property. See
|
||||
// http://www.freedesktop.org/software/systemd/man/systemd.unit.html#Requires=
|
||||
func PropRequires(units ...string) Property {
|
||||
return propDependency("Requires", units)
|
||||
}
|
||||
|
||||
// PropRequiresOverridable sets the RequiresOverridable unit property. See
|
||||
// http://www.freedesktop.org/software/systemd/man/systemd.unit.html#RequiresOverridable=
|
||||
func PropRequiresOverridable(units ...string) Property {
|
||||
return propDependency("RequiresOverridable", units)
|
||||
}
|
||||
|
||||
// PropRequisite sets the Requisite unit property. See
|
||||
// http://www.freedesktop.org/software/systemd/man/systemd.unit.html#Requisite=
|
||||
func PropRequisite(units ...string) Property {
|
||||
return propDependency("Requisite", units)
|
||||
}
|
||||
|
||||
// PropRequisiteOverridable sets the RequisiteOverridable unit property. See
|
||||
// http://www.freedesktop.org/software/systemd/man/systemd.unit.html#RequisiteOverridable=
|
||||
func PropRequisiteOverridable(units ...string) Property {
|
||||
return propDependency("RequisiteOverridable", units)
|
||||
}
|
||||
|
||||
// PropWants sets the Wants unit property. See
|
||||
// http://www.freedesktop.org/software/systemd/man/systemd.unit.html#Wants=
|
||||
func PropWants(units ...string) Property {
|
||||
return propDependency("Wants", units)
|
||||
}
|
||||
|
||||
// PropBindsTo sets the BindsTo unit property. See
|
||||
// http://www.freedesktop.org/software/systemd/man/systemd.unit.html#BindsTo=
|
||||
func PropBindsTo(units ...string) Property {
|
||||
return propDependency("BindsTo", units)
|
||||
}
|
||||
|
||||
// PropRequiredBy sets the RequiredBy unit property. See
|
||||
// http://www.freedesktop.org/software/systemd/man/systemd.unit.html#RequiredBy=
|
||||
func PropRequiredBy(units ...string) Property {
|
||||
return propDependency("RequiredBy", units)
|
||||
}
|
||||
|
||||
// PropRequiredByOverridable sets the RequiredByOverridable unit property. See
|
||||
// http://www.freedesktop.org/software/systemd/man/systemd.unit.html#RequiredByOverridable=
|
||||
func PropRequiredByOverridable(units ...string) Property {
|
||||
return propDependency("RequiredByOverridable", units)
|
||||
}
|
||||
|
||||
// PropWantedBy sets the WantedBy unit property. See
|
||||
// http://www.freedesktop.org/software/systemd/man/systemd.unit.html#WantedBy=
|
||||
func PropWantedBy(units ...string) Property {
|
||||
return propDependency("WantedBy", units)
|
||||
}
|
||||
|
||||
// PropBoundBy sets the BoundBy unit property. See
|
||||
// http://www.freedesktop.org/software/systemd/main/systemd.unit.html#BoundBy=
|
||||
func PropBoundBy(units ...string) Property {
|
||||
return propDependency("BoundBy", units)
|
||||
}
|
||||
|
||||
// PropConflicts sets the Conflicts unit property. See
|
||||
// http://www.freedesktop.org/software/systemd/man/systemd.unit.html#Conflicts=
|
||||
func PropConflicts(units ...string) Property {
|
||||
return propDependency("Conflicts", units)
|
||||
}
|
||||
|
||||
// PropConflictedBy sets the ConflictedBy unit property. See
|
||||
// http://www.freedesktop.org/software/systemd/man/systemd.unit.html#ConflictedBy=
|
||||
func PropConflictedBy(units ...string) Property {
|
||||
return propDependency("ConflictedBy", units)
|
||||
}
|
||||
|
||||
// PropBefore sets the Before unit property. See
|
||||
// http://www.freedesktop.org/software/systemd/man/systemd.unit.html#Before=
|
||||
func PropBefore(units ...string) Property {
|
||||
return propDependency("Before", units)
|
||||
}
|
||||
|
||||
// PropAfter sets the After unit property. See
|
||||
// http://www.freedesktop.org/software/systemd/man/systemd.unit.html#After=
|
||||
func PropAfter(units ...string) Property {
|
||||
return propDependency("After", units)
|
||||
}
|
||||
|
||||
// PropOnFailure sets the OnFailure unit property. See
|
||||
// http://www.freedesktop.org/software/systemd/man/systemd.unit.html#OnFailure=
|
||||
func PropOnFailure(units ...string) Property {
|
||||
return propDependency("OnFailure", units)
|
||||
}
|
||||
|
||||
// PropTriggers sets the Triggers unit property. See
|
||||
// http://www.freedesktop.org/software/systemd/man/systemd.unit.html#Triggers=
|
||||
func PropTriggers(units ...string) Property {
|
||||
return propDependency("Triggers", units)
|
||||
}
|
||||
|
||||
// PropTriggeredBy sets the TriggeredBy unit property. See
|
||||
// http://www.freedesktop.org/software/systemd/man/systemd.unit.html#TriggeredBy=
|
||||
func PropTriggeredBy(units ...string) Property {
|
||||
return propDependency("TriggeredBy", units)
|
||||
}
|
||||
|
||||
// PropPropagatesReloadTo sets the PropagatesReloadTo unit property. See
|
||||
// http://www.freedesktop.org/software/systemd/man/systemd.unit.html#PropagatesReloadTo=
|
||||
func PropPropagatesReloadTo(units ...string) Property {
|
||||
return propDependency("PropagatesReloadTo", units)
|
||||
}
|
||||
|
||||
// PropRequiresMountsFor sets the RequiresMountsFor unit property. See
|
||||
// http://www.freedesktop.org/software/systemd/man/systemd.unit.html#RequiresMountsFor=
|
||||
func PropRequiresMountsFor(units ...string) Property {
|
||||
return propDependency("RequiresMountsFor", units)
|
||||
}
|
||||
|
||||
// PropSlice sets the Slice unit property. See
|
||||
// http://www.freedesktop.org/software/systemd/man/systemd.resource-control.html#Slice=
|
||||
func PropSlice(slice string) Property {
|
||||
return Property{
|
||||
Name: "Slice",
|
||||
Value: dbus.MakeVariant(slice),
|
||||
}
|
||||
}
|
|
@ -1,47 +0,0 @@
|
|||
// Copyright 2015 CoreOS, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package dbus
|
||||
|
||||
type set struct {
|
||||
data map[string]bool
|
||||
}
|
||||
|
||||
func (s *set) Add(value string) {
|
||||
s.data[value] = true
|
||||
}
|
||||
|
||||
func (s *set) Remove(value string) {
|
||||
delete(s.data, value)
|
||||
}
|
||||
|
||||
func (s *set) Contains(value string) (exists bool) {
|
||||
_, exists = s.data[value]
|
||||
return
|
||||
}
|
||||
|
||||
func (s *set) Length() int {
|
||||
return len(s.data)
|
||||
}
|
||||
|
||||
func (s *set) Values() (values []string) {
|
||||
for val, _ := range s.data {
|
||||
values = append(values, val)
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
func newSet() *set {
|
||||
return &set{make(map[string]bool)}
|
||||
}
|
|
@ -1,250 +0,0 @@
|
|||
// Copyright 2015 CoreOS, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package dbus
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"time"
|
||||
|
||||
"github.com/godbus/dbus"
|
||||
)
|
||||
|
||||
const (
|
||||
cleanIgnoreInterval = int64(10 * time.Second)
|
||||
ignoreInterval = int64(30 * time.Millisecond)
|
||||
)
|
||||
|
||||
// Subscribe sets up this connection to subscribe to all systemd dbus events.
|
||||
// This is required before calling SubscribeUnits. When the connection closes
|
||||
// systemd will automatically stop sending signals so there is no need to
|
||||
// explicitly call Unsubscribe().
|
||||
func (c *Conn) Subscribe() error {
|
||||
c.sigconn.BusObject().Call("org.freedesktop.DBus.AddMatch", 0,
|
||||
"type='signal',interface='org.freedesktop.systemd1.Manager',member='UnitNew'")
|
||||
c.sigconn.BusObject().Call("org.freedesktop.DBus.AddMatch", 0,
|
||||
"type='signal',interface='org.freedesktop.DBus.Properties',member='PropertiesChanged'")
|
||||
|
||||
err := c.sigobj.Call("org.freedesktop.systemd1.Manager.Subscribe", 0).Store()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// Unsubscribe this connection from systemd dbus events.
|
||||
func (c *Conn) Unsubscribe() error {
|
||||
err := c.sigobj.Call("org.freedesktop.systemd1.Manager.Unsubscribe", 0).Store()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (c *Conn) dispatch() {
|
||||
ch := make(chan *dbus.Signal, signalBuffer)
|
||||
|
||||
c.sigconn.Signal(ch)
|
||||
|
||||
go func() {
|
||||
for {
|
||||
signal, ok := <-ch
|
||||
if !ok {
|
||||
return
|
||||
}
|
||||
|
||||
if signal.Name == "org.freedesktop.systemd1.Manager.JobRemoved" {
|
||||
c.jobComplete(signal)
|
||||
}
|
||||
|
||||
if c.subscriber.updateCh == nil {
|
||||
continue
|
||||
}
|
||||
|
||||
var unitPath dbus.ObjectPath
|
||||
switch signal.Name {
|
||||
case "org.freedesktop.systemd1.Manager.JobRemoved":
|
||||
unitName := signal.Body[2].(string)
|
||||
c.sysobj.Call("org.freedesktop.systemd1.Manager.GetUnit", 0, unitName).Store(&unitPath)
|
||||
case "org.freedesktop.systemd1.Manager.UnitNew":
|
||||
unitPath = signal.Body[1].(dbus.ObjectPath)
|
||||
case "org.freedesktop.DBus.Properties.PropertiesChanged":
|
||||
if signal.Body[0].(string) == "org.freedesktop.systemd1.Unit" {
|
||||
unitPath = signal.Path
|
||||
}
|
||||
}
|
||||
|
||||
if unitPath == dbus.ObjectPath("") {
|
||||
continue
|
||||
}
|
||||
|
||||
c.sendSubStateUpdate(unitPath)
|
||||
}
|
||||
}()
|
||||
}
|
||||
|
||||
// Returns two unbuffered channels which will receive all changed units every
|
||||
// interval. Deleted units are sent as nil.
|
||||
func (c *Conn) SubscribeUnits(interval time.Duration) (<-chan map[string]*UnitStatus, <-chan error) {
|
||||
return c.SubscribeUnitsCustom(interval, 0, func(u1, u2 *UnitStatus) bool { return *u1 != *u2 }, nil)
|
||||
}
|
||||
|
||||
// SubscribeUnitsCustom is like SubscribeUnits but lets you specify the buffer
|
||||
// size of the channels, the comparison function for detecting changes and a filter
|
||||
// function for cutting down on the noise that your channel receives.
|
||||
func (c *Conn) SubscribeUnitsCustom(interval time.Duration, buffer int, isChanged func(*UnitStatus, *UnitStatus) bool, filterUnit func(string) bool) (<-chan map[string]*UnitStatus, <-chan error) {
|
||||
old := make(map[string]*UnitStatus)
|
||||
statusChan := make(chan map[string]*UnitStatus, buffer)
|
||||
errChan := make(chan error, buffer)
|
||||
|
||||
go func() {
|
||||
for {
|
||||
timerChan := time.After(interval)
|
||||
|
||||
units, err := c.ListUnits()
|
||||
if err == nil {
|
||||
cur := make(map[string]*UnitStatus)
|
||||
for i := range units {
|
||||
if filterUnit != nil && filterUnit(units[i].Name) {
|
||||
continue
|
||||
}
|
||||
cur[units[i].Name] = &units[i]
|
||||
}
|
||||
|
||||
// add all new or changed units
|
||||
changed := make(map[string]*UnitStatus)
|
||||
for n, u := range cur {
|
||||
if oldU, ok := old[n]; !ok || isChanged(oldU, u) {
|
||||
changed[n] = u
|
||||
}
|
||||
delete(old, n)
|
||||
}
|
||||
|
||||
// add all deleted units
|
||||
for oldN := range old {
|
||||
changed[oldN] = nil
|
||||
}
|
||||
|
||||
old = cur
|
||||
|
||||
if len(changed) != 0 {
|
||||
statusChan <- changed
|
||||
}
|
||||
} else {
|
||||
errChan <- err
|
||||
}
|
||||
|
||||
<-timerChan
|
||||
}
|
||||
}()
|
||||
|
||||
return statusChan, errChan
|
||||
}
|
||||
|
||||
type SubStateUpdate struct {
|
||||
UnitName string
|
||||
SubState string
|
||||
}
|
||||
|
||||
// SetSubStateSubscriber writes to updateCh when any unit's substate changes.
|
||||
// Although this writes to updateCh on every state change, the reported state
|
||||
// may be more recent than the change that generated it (due to an unavoidable
|
||||
// race in the systemd dbus interface). That is, this method provides a good
|
||||
// way to keep a current view of all units' states, but is not guaranteed to
|
||||
// show every state transition they go through. Furthermore, state changes
|
||||
// will only be written to the channel with non-blocking writes. If updateCh
|
||||
// is full, it attempts to write an error to errCh; if errCh is full, the error
|
||||
// passes silently.
|
||||
func (c *Conn) SetSubStateSubscriber(updateCh chan<- *SubStateUpdate, errCh chan<- error) {
|
||||
c.subscriber.Lock()
|
||||
defer c.subscriber.Unlock()
|
||||
c.subscriber.updateCh = updateCh
|
||||
c.subscriber.errCh = errCh
|
||||
}
|
||||
|
||||
func (c *Conn) sendSubStateUpdate(path dbus.ObjectPath) {
|
||||
c.subscriber.Lock()
|
||||
defer c.subscriber.Unlock()
|
||||
|
||||
if c.shouldIgnore(path) {
|
||||
return
|
||||
}
|
||||
|
||||
info, err := c.GetUnitProperties(string(path))
|
||||
if err != nil {
|
||||
select {
|
||||
case c.subscriber.errCh <- err:
|
||||
default:
|
||||
}
|
||||
}
|
||||
|
||||
name := info["Id"].(string)
|
||||
substate := info["SubState"].(string)
|
||||
|
||||
update := &SubStateUpdate{name, substate}
|
||||
select {
|
||||
case c.subscriber.updateCh <- update:
|
||||
default:
|
||||
select {
|
||||
case c.subscriber.errCh <- errors.New("update channel full!"):
|
||||
default:
|
||||
}
|
||||
}
|
||||
|
||||
c.updateIgnore(path, info)
|
||||
}
|
||||
|
||||
// The ignore functions work around a wart in the systemd dbus interface.
|
||||
// Requesting the properties of an unloaded unit will cause systemd to send a
|
||||
// pair of UnitNew/UnitRemoved signals. Because we need to get a unit's
|
||||
// properties on UnitNew (as that's the only indication of a new unit coming up
|
||||
// for the first time), we would enter an infinite loop if we did not attempt
|
||||
// to detect and ignore these spurious signals. The signal themselves are
|
||||
// indistinguishable from relevant ones, so we (somewhat hackishly) ignore an
|
||||
// unloaded unit's signals for a short time after requesting its properties.
|
||||
// This means that we will miss e.g. a transient unit being restarted
|
||||
// *immediately* upon failure and also a transient unit being started
|
||||
// immediately after requesting its status (with systemctl status, for example,
|
||||
// because this causes a UnitNew signal to be sent which then causes us to fetch
|
||||
// the properties).
|
||||
|
||||
func (c *Conn) shouldIgnore(path dbus.ObjectPath) bool {
|
||||
t, ok := c.subscriber.ignore[path]
|
||||
return ok && t >= time.Now().UnixNano()
|
||||
}
|
||||
|
||||
func (c *Conn) updateIgnore(path dbus.ObjectPath, info map[string]interface{}) {
|
||||
c.cleanIgnore()
|
||||
|
||||
// unit is unloaded - it will trigger bad systemd dbus behavior
|
||||
if info["LoadState"].(string) == "not-found" {
|
||||
c.subscriber.ignore[path] = time.Now().UnixNano() + ignoreInterval
|
||||
}
|
||||
}
|
||||
|
||||
// without this, ignore would grow unboundedly over time
|
||||
func (c *Conn) cleanIgnore() {
|
||||
now := time.Now().UnixNano()
|
||||
if c.subscriber.cleanIgnore < now {
|
||||
c.subscriber.cleanIgnore = now + cleanIgnoreInterval
|
||||
|
||||
for p, t := range c.subscriber.ignore {
|
||||
if t < now {
|
||||
delete(c.subscriber.ignore, p)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1,57 +0,0 @@
|
|||
// Copyright 2015 CoreOS, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package dbus
|
||||
|
||||
import (
|
||||
"time"
|
||||
)
|
||||
|
||||
// SubscriptionSet returns a subscription set which is like conn.Subscribe but
|
||||
// can filter to only return events for a set of units.
|
||||
type SubscriptionSet struct {
|
||||
*set
|
||||
conn *Conn
|
||||
}
|
||||
|
||||
func (s *SubscriptionSet) filter(unit string) bool {
|
||||
return !s.Contains(unit)
|
||||
}
|
||||
|
||||
// Subscribe starts listening for dbus events for all of the units in the set.
|
||||
// Returns channels identical to conn.SubscribeUnits.
|
||||
func (s *SubscriptionSet) Subscribe() (<-chan map[string]*UnitStatus, <-chan error) {
|
||||
// TODO: Make fully evented by using systemd 209 with properties changed values
|
||||
return s.conn.SubscribeUnitsCustom(time.Second, 0,
|
||||
mismatchUnitStatus,
|
||||
func(unit string) bool { return s.filter(unit) },
|
||||
)
|
||||
}
|
||||
|
||||
// NewSubscriptionSet returns a new subscription set.
|
||||
func (conn *Conn) NewSubscriptionSet() *SubscriptionSet {
|
||||
return &SubscriptionSet{newSet(), conn}
|
||||
}
|
||||
|
||||
// mismatchUnitStatus returns true if the provided UnitStatus objects
|
||||
// are not equivalent. false is returned if the objects are equivalent.
|
||||
// Only the Name, Description and state-related fields are used in
|
||||
// the comparison.
|
||||
func mismatchUnitStatus(u1, u2 *UnitStatus) bool {
|
||||
return u1.Name != u2.Name ||
|
||||
u1.Description != u2.Description ||
|
||||
u1.LoadState != u2.LoadState ||
|
||||
u1.ActiveState != u2.ActiveState ||
|
||||
u1.SubState != u2.SubState
|
||||
}
|
|
@ -1,33 +0,0 @@
|
|||
// Copyright 2015 CoreOS, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
// Package util contains utility functions related to systemd that applications
|
||||
// can use to check things like whether systemd is running.
|
||||
package util
|
||||
|
||||
import (
|
||||
"os"
|
||||
)
|
||||
|
||||
// IsRunningSystemd checks whether the host was booted with systemd as its init
|
||||
// system. This functions similar to systemd's `sd_booted(3)`: internally, it
|
||||
// checks whether /run/systemd/system/ exists and is a directory.
|
||||
// http://www.freedesktop.org/software/systemd/man/sd_booted.html
|
||||
func IsRunningSystemd() bool {
|
||||
fi, err := os.Lstat("/run/systemd/system")
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
return fi.IsDir()
|
||||
}
|
|
@ -1,238 +0,0 @@
|
|||
Libcontainer provides a native Go implementation for creating containers
|
||||
with namespaces, cgroups, capabilities, and filesystem access controls.
|
||||
It allows you to manage the lifecycle of the container performing additional operations
|
||||
after the container is created.
|
||||
|
||||
|
||||
#### Container
|
||||
A container is a self contained execution environment that shares the kernel of the
|
||||
host system and which is (optionally) isolated from other containers in the system.
|
||||
|
||||
#### Using libcontainer
|
||||
|
||||
Because containers are spawned in a two step process you will need a binary that
|
||||
will be executed as the init process for the container. In libcontainer, we use
|
||||
the current binary (/proc/self/exe) to be executed as the init process, and use
|
||||
arg "init", we call the first step process "bootstrap", so you always need a "init"
|
||||
function as the entry of "bootstrap".
|
||||
|
||||
```go
|
||||
func init() {
|
||||
if len(os.Args) > 1 && os.Args[1] == "init" {
|
||||
runtime.GOMAXPROCS(1)
|
||||
runtime.LockOSThread()
|
||||
factory, _ := libcontainer.New("")
|
||||
if err := factory.StartInitialization(); err != nil {
|
||||
logrus.Fatal(err)
|
||||
}
|
||||
panic("--this line should have never been executed, congratulations--")
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
Then to create a container you first have to initialize an instance of a factory
|
||||
that will handle the creation and initialization for a container.
|
||||
|
||||
```go
|
||||
factory, err := libcontainer.New("/var/lib/container", libcontainer.Cgroupfs, libcontainer.InitArgs(os.Args[0], "init"))
|
||||
if err != nil {
|
||||
logrus.Fatal(err)
|
||||
return
|
||||
}
|
||||
```
|
||||
|
||||
Once you have an instance of the factory created we can create a configuration
|
||||
struct describing how the container is to be created. A sample would look similar to this:
|
||||
|
||||
```go
|
||||
defaultMountFlags := syscall.MS_NOEXEC | syscall.MS_NOSUID | syscall.MS_NODEV
|
||||
config := &configs.Config{
|
||||
Rootfs: "/your/path/to/rootfs",
|
||||
Capabilities: []string{
|
||||
"CAP_CHOWN",
|
||||
"CAP_DAC_OVERRIDE",
|
||||
"CAP_FSETID",
|
||||
"CAP_FOWNER",
|
||||
"CAP_MKNOD",
|
||||
"CAP_NET_RAW",
|
||||
"CAP_SETGID",
|
||||
"CAP_SETUID",
|
||||
"CAP_SETFCAP",
|
||||
"CAP_SETPCAP",
|
||||
"CAP_NET_BIND_SERVICE",
|
||||
"CAP_SYS_CHROOT",
|
||||
"CAP_KILL",
|
||||
"CAP_AUDIT_WRITE",
|
||||
},
|
||||
Namespaces: configs.Namespaces([]configs.Namespace{
|
||||
{Type: configs.NEWNS},
|
||||
{Type: configs.NEWUTS},
|
||||
{Type: configs.NEWIPC},
|
||||
{Type: configs.NEWPID},
|
||||
{Type: configs.NEWUSER},
|
||||
{Type: configs.NEWNET},
|
||||
}),
|
||||
Cgroups: &configs.Cgroup{
|
||||
Name: "test-container",
|
||||
Parent: "system",
|
||||
Resources: &configs.Resources{
|
||||
MemorySwappiness: nil,
|
||||
AllowAllDevices: false,
|
||||
AllowedDevices: configs.DefaultAllowedDevices,
|
||||
},
|
||||
},
|
||||
MaskPaths: []string{
|
||||
"/proc/kcore",
|
||||
},
|
||||
ReadonlyPaths: []string{
|
||||
"/proc/sys", "/proc/sysrq-trigger", "/proc/irq", "/proc/bus",
|
||||
},
|
||||
Devices: configs.DefaultAutoCreatedDevices,
|
||||
Hostname: "testing",
|
||||
Mounts: []*configs.Mount{
|
||||
{
|
||||
Source: "proc",
|
||||
Destination: "/proc",
|
||||
Device: "proc",
|
||||
Flags: defaultMountFlags,
|
||||
},
|
||||
{
|
||||
Source: "tmpfs",
|
||||
Destination: "/dev",
|
||||
Device: "tmpfs",
|
||||
Flags: syscall.MS_NOSUID | syscall.MS_STRICTATIME,
|
||||
Data: "mode=755",
|
||||
},
|
||||
{
|
||||
Source: "devpts",
|
||||
Destination: "/dev/pts",
|
||||
Device: "devpts",
|
||||
Flags: syscall.MS_NOSUID | syscall.MS_NOEXEC,
|
||||
Data: "newinstance,ptmxmode=0666,mode=0620,gid=5",
|
||||
},
|
||||
{
|
||||
Device: "tmpfs",
|
||||
Source: "shm",
|
||||
Destination: "/dev/shm",
|
||||
Data: "mode=1777,size=65536k",
|
||||
Flags: defaultMountFlags,
|
||||
},
|
||||
{
|
||||
Source: "mqueue",
|
||||
Destination: "/dev/mqueue",
|
||||
Device: "mqueue",
|
||||
Flags: defaultMountFlags,
|
||||
},
|
||||
{
|
||||
Source: "sysfs",
|
||||
Destination: "/sys",
|
||||
Device: "sysfs",
|
||||
Flags: defaultMountFlags | syscall.MS_RDONLY,
|
||||
},
|
||||
},
|
||||
UidMappings: []configs.IDMap{
|
||||
{
|
||||
ContainerID: 0,
|
||||
Host: 1000,
|
||||
size: 65536,
|
||||
},
|
||||
},
|
||||
GidMappings: []configs.IDMap{
|
||||
{
|
||||
ContainerID: 0,
|
||||
Host: 1000,
|
||||
size: 65536,
|
||||
},
|
||||
},
|
||||
Networks: []*configs.Network{
|
||||
{
|
||||
Type: "loopback",
|
||||
Address: "127.0.0.1/0",
|
||||
Gateway: "localhost",
|
||||
},
|
||||
},
|
||||
Rlimits: []configs.Rlimit{
|
||||
{
|
||||
Type: syscall.RLIMIT_NOFILE,
|
||||
Hard: uint64(1025),
|
||||
Soft: uint64(1025),
|
||||
},
|
||||
},
|
||||
}
|
||||
```
|
||||
|
||||
Once you have the configuration populated you can create a container:
|
||||
|
||||
```go
|
||||
container, err := factory.Create("container-id", config)
|
||||
if err != nil {
|
||||
logrus.Fatal(err)
|
||||
return
|
||||
}
|
||||
```
|
||||
|
||||
To spawn bash as the initial process inside the container and have the
|
||||
processes pid returned in order to wait, signal, or kill the process:
|
||||
|
||||
```go
|
||||
process := &libcontainer.Process{
|
||||
Args: []string{"/bin/bash"},
|
||||
Env: []string{"PATH=/bin"},
|
||||
User: "daemon",
|
||||
Stdin: os.Stdin,
|
||||
Stdout: os.Stdout,
|
||||
Stderr: os.Stderr,
|
||||
}
|
||||
|
||||
err := container.Start(process)
|
||||
if err != nil {
|
||||
logrus.Fatal(err)
|
||||
container.Destroy()
|
||||
return
|
||||
}
|
||||
|
||||
// wait for the process to finish.
|
||||
_, err := process.Wait()
|
||||
if err != nil {
|
||||
logrus.Fatal(err)
|
||||
}
|
||||
|
||||
// destroy the container.
|
||||
container.Destroy()
|
||||
```
|
||||
|
||||
Additional ways to interact with a running container are:
|
||||
|
||||
```go
|
||||
// return all the pids for all processes running inside the container.
|
||||
processes, err := container.Processes()
|
||||
|
||||
// get detailed cpu, memory, io, and network statistics for the container and
|
||||
// it's processes.
|
||||
stats, err := container.Stats()
|
||||
|
||||
// pause all processes inside the container.
|
||||
container.Pause()
|
||||
|
||||
// resume all paused processes.
|
||||
container.Resume()
|
||||
```
|
||||
|
||||
|
||||
#### Checkpoint & Restore
|
||||
|
||||
libcontainer now integrates [CRIU](http://criu.org/) for checkpointing and restoring containers.
|
||||
This let's you save the state of a process running inside a container to disk, and then restore
|
||||
that state into a new process, on the same machine or on another machine.
|
||||
|
||||
`criu` version 1.5.2 or higher is required to use checkpoint and restore.
|
||||
If you don't already have `criu` installed, you can build it from source, following the
|
||||
[online instructions](http://criu.org/Installation). `criu` is also installed in the docker image
|
||||
generated when building libcontainer with docker.
|
||||
|
||||
|
||||
## Copyright and license
|
||||
|
||||
Code and documentation copyright 2014 Docker, inc. Code released under the Apache 2.0 license.
|
||||
Docs released under Creative commons.
|
||||
|
|
@ -1,335 +0,0 @@
|
|||
## Container Specification - v1
|
||||
|
||||
This is the standard configuration for version 1 containers. It includes
|
||||
namespaces, standard filesystem setup, a default Linux capability set, and
|
||||
information about resource reservations. It also has information about any
|
||||
populated environment settings for the processes running inside a container.
|
||||
|
||||
Along with the configuration of how a container is created the standard also
|
||||
discusses actions that can be performed on a container to manage and inspect
|
||||
information about the processes running inside.
|
||||
|
||||
The v1 profile is meant to be able to accommodate the majority of applications
|
||||
with a strong security configuration.
|
||||
|
||||
### System Requirements and Compatibility
|
||||
|
||||
Minimum requirements:
|
||||
* Kernel version - 3.10 recommended 2.6.2x minimum(with backported patches)
|
||||
* Mounted cgroups with each subsystem in its own hierarchy
|
||||
|
||||
|
||||
### Namespaces
|
||||
|
||||
| Flag | Enabled |
|
||||
| ------------ | ------- |
|
||||
| CLONE_NEWPID | 1 |
|
||||
| CLONE_NEWUTS | 1 |
|
||||
| CLONE_NEWIPC | 1 |
|
||||
| CLONE_NEWNET | 1 |
|
||||
| CLONE_NEWNS | 1 |
|
||||
| CLONE_NEWUSER | 1 |
|
||||
|
||||
Namespaces are created for the container via the `clone` syscall.
|
||||
|
||||
|
||||
### Filesystem
|
||||
|
||||
A root filesystem must be provided to a container for execution. The container
|
||||
will use this root filesystem (rootfs) to jail and spawn processes inside where
|
||||
the binaries and system libraries are local to that directory. Any binaries
|
||||
to be executed must be contained within this rootfs.
|
||||
|
||||
Mounts that happen inside the container are automatically cleaned up when the
|
||||
container exits as the mount namespace is destroyed and the kernel will
|
||||
unmount all the mounts that were setup within that namespace.
|
||||
|
||||
For a container to execute properly there are certain filesystems that
|
||||
are required to be mounted within the rootfs that the runtime will setup.
|
||||
|
||||
| Path | Type | Flags | Data |
|
||||
| ----------- | ------ | -------------------------------------- | ---------------------------------------- |
|
||||
| /proc | proc | MS_NOEXEC,MS_NOSUID,MS_NODEV | |
|
||||
| /dev | tmpfs | MS_NOEXEC,MS_STRICTATIME | mode=755 |
|
||||
| /dev/shm | tmpfs | MS_NOEXEC,MS_NOSUID,MS_NODEV | mode=1777,size=65536k |
|
||||
| /dev/mqueue | mqueue | MS_NOEXEC,MS_NOSUID,MS_NODEV | |
|
||||
| /dev/pts | devpts | MS_NOEXEC,MS_NOSUID | newinstance,ptmxmode=0666,mode=620,gid=5 |
|
||||
| /sys | sysfs | MS_NOEXEC,MS_NOSUID,MS_NODEV,MS_RDONLY | |
|
||||
|
||||
|
||||
After a container's filesystems are mounted within the newly created
|
||||
mount namespace `/dev` will need to be populated with a set of device nodes.
|
||||
It is expected that a rootfs does not need to have any device nodes specified
|
||||
for `/dev` within the rootfs as the container will setup the correct devices
|
||||
that are required for executing a container's process.
|
||||
|
||||
| Path | Mode | Access |
|
||||
| ------------ | ---- | ---------- |
|
||||
| /dev/null | 0666 | rwm |
|
||||
| /dev/zero | 0666 | rwm |
|
||||
| /dev/full | 0666 | rwm |
|
||||
| /dev/tty | 0666 | rwm |
|
||||
| /dev/random | 0666 | rwm |
|
||||
| /dev/urandom | 0666 | rwm |
|
||||
| /dev/fuse | 0666 | rwm |
|
||||
|
||||
|
||||
**ptmx**
|
||||
`/dev/ptmx` will need to be a symlink to the host's `/dev/ptmx` within
|
||||
the container.
|
||||
|
||||
The use of a pseudo TTY is optional within a container and it should support both.
|
||||
If a pseudo is provided to the container `/dev/console` will need to be
|
||||
setup by binding the console in `/dev/` after it has been populated and mounted
|
||||
in tmpfs.
|
||||
|
||||
| Source | Destination | UID GID | Mode | Type |
|
||||
| --------------- | ------------ | ------- | ---- | ---- |
|
||||
| *pty host path* | /dev/console | 0 0 | 0600 | bind |
|
||||
|
||||
|
||||
After `/dev/null` has been setup we check for any external links between
|
||||
the container's io, STDIN, STDOUT, STDERR. If the container's io is pointing
|
||||
to `/dev/null` outside the container we close and `dup2` the the `/dev/null`
|
||||
that is local to the container's rootfs.
|
||||
|
||||
|
||||
After the container has `/proc` mounted a few standard symlinks are setup
|
||||
within `/dev/` for the io.
|
||||
|
||||
| Source | Destination |
|
||||
| --------------- | ----------- |
|
||||
| /proc/self/fd | /dev/fd |
|
||||
| /proc/self/fd/0 | /dev/stdin |
|
||||
| /proc/self/fd/1 | /dev/stdout |
|
||||
| /proc/self/fd/2 | /dev/stderr |
|
||||
|
||||
A `pivot_root` is used to change the root for the process, effectively
|
||||
jailing the process inside the rootfs.
|
||||
|
||||
```c
|
||||
put_old = mkdir(...);
|
||||
pivot_root(rootfs, put_old);
|
||||
chdir("/");
|
||||
unmount(put_old, MS_DETACH);
|
||||
rmdir(put_old);
|
||||
```
|
||||
|
||||
For container's running with a rootfs inside `ramfs` a `MS_MOVE` combined
|
||||
with a `chroot` is required as `pivot_root` is not supported in `ramfs`.
|
||||
|
||||
```c
|
||||
mount(rootfs, "/", NULL, MS_MOVE, NULL);
|
||||
chroot(".");
|
||||
chdir("/");
|
||||
```
|
||||
|
||||
The `umask` is set back to `0022` after the filesystem setup has been completed.
|
||||
|
||||
### Resources
|
||||
|
||||
Cgroups are used to handle resource allocation for containers. This includes
|
||||
system resources like cpu, memory, and device access.
|
||||
|
||||
| Subsystem | Enabled |
|
||||
| ---------- | ------- |
|
||||
| devices | 1 |
|
||||
| memory | 1 |
|
||||
| cpu | 1 |
|
||||
| cpuacct | 1 |
|
||||
| cpuset | 1 |
|
||||
| blkio | 1 |
|
||||
| perf_event | 1 |
|
||||
| freezer | 1 |
|
||||
| hugetlb | 1 |
|
||||
| pids | 1 |
|
||||
|
||||
|
||||
All cgroup subsystem are joined so that statistics can be collected from
|
||||
each of the subsystems. Freezer does not expose any stats but is joined
|
||||
so that containers can be paused and resumed.
|
||||
|
||||
The parent process of the container's init must place the init pid inside
|
||||
the correct cgroups before the initialization begins. This is done so
|
||||
that no processes or threads escape the cgroups. This sync is
|
||||
done via a pipe ( specified in the runtime section below ) that the container's
|
||||
init process will block waiting for the parent to finish setup.
|
||||
|
||||
### Security
|
||||
|
||||
The standard set of Linux capabilities that are set in a container
|
||||
provide a good default for security and flexibility for the applications.
|
||||
|
||||
|
||||
| Capability | Enabled |
|
||||
| -------------------- | ------- |
|
||||
| CAP_NET_RAW | 1 |
|
||||
| CAP_NET_BIND_SERVICE | 1 |
|
||||
| CAP_AUDIT_READ | 1 |
|
||||
| CAP_AUDIT_WRITE | 1 |
|
||||
| CAP_DAC_OVERRIDE | 1 |
|
||||
| CAP_SETFCAP | 1 |
|
||||
| CAP_SETPCAP | 1 |
|
||||
| CAP_SETGID | 1 |
|
||||
| CAP_SETUID | 1 |
|
||||
| CAP_MKNOD | 1 |
|
||||
| CAP_CHOWN | 1 |
|
||||
| CAP_FOWNER | 1 |
|
||||
| CAP_FSETID | 1 |
|
||||
| CAP_KILL | 1 |
|
||||
| CAP_SYS_CHROOT | 1 |
|
||||
| CAP_NET_BROADCAST | 0 |
|
||||
| CAP_SYS_MODULE | 0 |
|
||||
| CAP_SYS_RAWIO | 0 |
|
||||
| CAP_SYS_PACCT | 0 |
|
||||
| CAP_SYS_ADMIN | 0 |
|
||||
| CAP_SYS_NICE | 0 |
|
||||
| CAP_SYS_RESOURCE | 0 |
|
||||
| CAP_SYS_TIME | 0 |
|
||||
| CAP_SYS_TTY_CONFIG | 0 |
|
||||
| CAP_AUDIT_CONTROL | 0 |
|
||||
| CAP_MAC_OVERRIDE | 0 |
|
||||
| CAP_MAC_ADMIN | 0 |
|
||||
| CAP_NET_ADMIN | 0 |
|
||||
| CAP_SYSLOG | 0 |
|
||||
| CAP_DAC_READ_SEARCH | 0 |
|
||||
| CAP_LINUX_IMMUTABLE | 0 |
|
||||
| CAP_IPC_LOCK | 0 |
|
||||
| CAP_IPC_OWNER | 0 |
|
||||
| CAP_SYS_PTRACE | 0 |
|
||||
| CAP_SYS_BOOT | 0 |
|
||||
| CAP_LEASE | 0 |
|
||||
| CAP_WAKE_ALARM | 0 |
|
||||
| CAP_BLOCK_SUSPEND | 0 |
|
||||
|
||||
|
||||
Additional security layers like [apparmor](https://wiki.ubuntu.com/AppArmor)
|
||||
and [selinux](http://selinuxproject.org/page/Main_Page) can be used with
|
||||
the containers. A container should support setting an apparmor profile or
|
||||
selinux process and mount labels if provided in the configuration.
|
||||
|
||||
Standard apparmor profile:
|
||||
```c
|
||||
#include <tunables/global>
|
||||
profile <profile_name> flags=(attach_disconnected,mediate_deleted) {
|
||||
#include <abstractions/base>
|
||||
network,
|
||||
capability,
|
||||
file,
|
||||
umount,
|
||||
|
||||
deny @{PROC}/sys/fs/** wklx,
|
||||
deny @{PROC}/sysrq-trigger rwklx,
|
||||
deny @{PROC}/mem rwklx,
|
||||
deny @{PROC}/kmem rwklx,
|
||||
deny @{PROC}/sys/kernel/[^s][^h][^m]* wklx,
|
||||
deny @{PROC}/sys/kernel/*/** wklx,
|
||||
|
||||
deny mount,
|
||||
|
||||
deny /sys/[^f]*/** wklx,
|
||||
deny /sys/f[^s]*/** wklx,
|
||||
deny /sys/fs/[^c]*/** wklx,
|
||||
deny /sys/fs/c[^g]*/** wklx,
|
||||
deny /sys/fs/cg[^r]*/** wklx,
|
||||
deny /sys/firmware/efi/efivars/** rwklx,
|
||||
deny /sys/kernel/security/** rwklx,
|
||||
}
|
||||
```
|
||||
|
||||
*TODO: seccomp work is being done to find a good default config*
|
||||
|
||||
### Runtime and Init Process
|
||||
|
||||
During container creation the parent process needs to talk to the container's init
|
||||
process and have a form of synchronization. This is accomplished by creating
|
||||
a pipe that is passed to the container's init. When the init process first spawns
|
||||
it will block on its side of the pipe until the parent closes its side. This
|
||||
allows the parent to have time to set the new process inside a cgroup hierarchy
|
||||
and/or write any uid/gid mappings required for user namespaces.
|
||||
The pipe is passed to the init process via FD 3.
|
||||
|
||||
The application consuming libcontainer should be compiled statically. libcontainer
|
||||
does not define any init process and the arguments provided are used to `exec` the
|
||||
process inside the application. There should be no long running init within the
|
||||
container spec.
|
||||
|
||||
If a pseudo tty is provided to a container it will open and `dup2` the console
|
||||
as the container's STDIN, STDOUT, STDERR as well as mounting the console
|
||||
as `/dev/console`.
|
||||
|
||||
An extra set of mounts are provided to a container and setup for use. A container's
|
||||
rootfs can contain some non portable files inside that can cause side effects during
|
||||
execution of a process. These files are usually created and populated with the container
|
||||
specific information via the runtime.
|
||||
|
||||
**Extra runtime files:**
|
||||
* /etc/hosts
|
||||
* /etc/resolv.conf
|
||||
* /etc/hostname
|
||||
* /etc/localtime
|
||||
|
||||
|
||||
#### Defaults
|
||||
|
||||
There are a few defaults that can be overridden by users, but in their omission
|
||||
these apply to processes within a container.
|
||||
|
||||
| Type | Value |
|
||||
| ------------------- | ------------------------------ |
|
||||
| Parent Death Signal | SIGKILL |
|
||||
| UID | 0 |
|
||||
| GID | 0 |
|
||||
| GROUPS | 0, NULL |
|
||||
| CWD | "/" |
|
||||
| $HOME | Current user's home dir or "/" |
|
||||
| Readonly rootfs | false |
|
||||
| Pseudo TTY | false |
|
||||
|
||||
|
||||
## Actions
|
||||
|
||||
After a container is created there is a standard set of actions that can
|
||||
be done to the container. These actions are part of the public API for
|
||||
a container.
|
||||
|
||||
| Action | Description |
|
||||
| -------------- | ------------------------------------------------------------------ |
|
||||
| Get processes | Return all the pids for processes running inside a container |
|
||||
| Get Stats | Return resource statistics for the container as a whole |
|
||||
| Wait | Wait waits on the container's init process ( pid 1 ) |
|
||||
| Wait Process | Wait on any of the container's processes returning the exit status |
|
||||
| Destroy | Kill the container's init process and remove any filesystem state |
|
||||
| Signal | Send a signal to the container's init process |
|
||||
| Signal Process | Send a signal to any of the container's processes |
|
||||
| Pause | Pause all processes inside the container |
|
||||
| Resume | Resume all processes inside the container if paused |
|
||||
| Exec | Execute a new process inside of the container ( requires setns ) |
|
||||
| Set | Setup configs of the container after it's created |
|
||||
|
||||
### Execute a new process inside of a running container.
|
||||
|
||||
User can execute a new process inside of a running container. Any binaries to be
|
||||
executed must be accessible within the container's rootfs.
|
||||
|
||||
The started process will run inside the container's rootfs. Any changes
|
||||
made by the process to the container's filesystem will persist after the
|
||||
process finished executing.
|
||||
|
||||
The started process will join all the container's existing namespaces. When the
|
||||
container is paused, the process will also be paused and will resume when
|
||||
the container is unpaused. The started process will only run when the container's
|
||||
primary process (PID 1) is running, and will not be restarted when the container
|
||||
is restarted.
|
||||
|
||||
#### Planned additions
|
||||
|
||||
The started process will have its own cgroups nested inside the container's
|
||||
cgroups. This is used for process tracking and optionally resource allocation
|
||||
handling for the new process. Freezer cgroup is required, the rest of the cgroups
|
||||
are optional. The process executor must place its pid inside the correct
|
||||
cgroups before starting the process. This is done so that no child processes or
|
||||
threads can escape the cgroups.
|
||||
|
||||
When the process is stopped, the process executor will try (in a best-effort way)
|
||||
to stop all its children and remove the sub-cgroups.
|
|
@ -1,69 +0,0 @@
|
|||
// +build linux
|
||||
|
||||
package libcontainer
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"strings"
|
||||
|
||||
"github.com/syndtr/gocapability/capability"
|
||||
)
|
||||
|
||||
const allCapabilityTypes = capability.CAPS | capability.BOUNDS
|
||||
|
||||
var capabilityMap map[string]capability.Cap
|
||||
|
||||
func init() {
|
||||
capabilityMap = make(map[string]capability.Cap)
|
||||
last := capability.CAP_LAST_CAP
|
||||
// workaround for RHEL6 which has no /proc/sys/kernel/cap_last_cap
|
||||
if last == capability.Cap(63) {
|
||||
last = capability.CAP_BLOCK_SUSPEND
|
||||
}
|
||||
for _, cap := range capability.List() {
|
||||
if cap > last {
|
||||
continue
|
||||
}
|
||||
capKey := fmt.Sprintf("CAP_%s", strings.ToUpper(cap.String()))
|
||||
capabilityMap[capKey] = cap
|
||||
}
|
||||
}
|
||||
|
||||
func newCapWhitelist(caps []string) (*whitelist, error) {
|
||||
l := []capability.Cap{}
|
||||
for _, c := range caps {
|
||||
v, ok := capabilityMap[c]
|
||||
if !ok {
|
||||
return nil, fmt.Errorf("unknown capability %q", c)
|
||||
}
|
||||
l = append(l, v)
|
||||
}
|
||||
pid, err := capability.NewPid(os.Getpid())
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return &whitelist{
|
||||
keep: l,
|
||||
pid: pid,
|
||||
}, nil
|
||||
}
|
||||
|
||||
type whitelist struct {
|
||||
pid capability.Capabilities
|
||||
keep []capability.Cap
|
||||
}
|
||||
|
||||
// dropBoundingSet drops the capability bounding set to those specified in the whitelist.
|
||||
func (w *whitelist) dropBoundingSet() error {
|
||||
w.pid.Clear(capability.BOUNDS)
|
||||
w.pid.Set(capability.BOUNDS, w.keep...)
|
||||
return w.pid.Apply(capability.BOUNDS)
|
||||
}
|
||||
|
||||
// drop drops all capabilities for the current process except those specified in the whitelist.
|
||||
func (w *whitelist) drop() error {
|
||||
w.pid.Clear(allCapabilityTypes)
|
||||
w.pid.Set(allCapabilityTypes, w.keep...)
|
||||
return w.pid.Apply(allCapabilityTypes)
|
||||
}
|
|
@ -1,397 +0,0 @@
|
|||
// +build linux
|
||||
|
||||
package fs
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"io/ioutil"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
"sync"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
libcontainerUtils "github.com/opencontainers/runc/libcontainer/utils"
|
||||
)
|
||||
|
||||
var (
|
||||
subsystems = subsystemSet{
|
||||
&CpusetGroup{},
|
||||
&DevicesGroup{},
|
||||
&MemoryGroup{},
|
||||
&CpuGroup{},
|
||||
&CpuacctGroup{},
|
||||
&PidsGroup{},
|
||||
&BlkioGroup{},
|
||||
&HugetlbGroup{},
|
||||
&NetClsGroup{},
|
||||
&NetPrioGroup{},
|
||||
&PerfEventGroup{},
|
||||
&FreezerGroup{},
|
||||
&NameGroup{GroupName: "name=systemd", Join: true},
|
||||
}
|
||||
CgroupProcesses = "cgroup.procs"
|
||||
HugePageSizes, _ = cgroups.GetHugePageSize()
|
||||
)
|
||||
|
||||
var errSubsystemDoesNotExist = errors.New("cgroup: subsystem does not exist")
|
||||
|
||||
type subsystemSet []subsystem
|
||||
|
||||
func (s subsystemSet) Get(name string) (subsystem, error) {
|
||||
for _, ss := range s {
|
||||
if ss.Name() == name {
|
||||
return ss, nil
|
||||
}
|
||||
}
|
||||
return nil, errSubsystemDoesNotExist
|
||||
}
|
||||
|
||||
type subsystem interface {
|
||||
// Name returns the name of the subsystem.
|
||||
Name() string
|
||||
// Returns the stats, as 'stats', corresponding to the cgroup under 'path'.
|
||||
GetStats(path string, stats *cgroups.Stats) error
|
||||
// Removes the cgroup represented by 'cgroupData'.
|
||||
Remove(*cgroupData) error
|
||||
// Creates and joins the cgroup represented by 'cgroupData'.
|
||||
Apply(*cgroupData) error
|
||||
// Set the cgroup represented by cgroup.
|
||||
Set(path string, cgroup *configs.Cgroup) error
|
||||
}
|
||||
|
||||
type Manager struct {
|
||||
mu sync.Mutex
|
||||
Cgroups *configs.Cgroup
|
||||
Paths map[string]string
|
||||
}
|
||||
|
||||
// The absolute path to the root of the cgroup hierarchies.
|
||||
var cgroupRootLock sync.Mutex
|
||||
var cgroupRoot string
|
||||
|
||||
// Gets the cgroupRoot.
|
||||
func getCgroupRoot() (string, error) {
|
||||
cgroupRootLock.Lock()
|
||||
defer cgroupRootLock.Unlock()
|
||||
|
||||
if cgroupRoot != "" {
|
||||
return cgroupRoot, nil
|
||||
}
|
||||
|
||||
root, err := cgroups.FindCgroupMountpointDir()
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
if _, err := os.Stat(root); err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
cgroupRoot = root
|
||||
return cgroupRoot, nil
|
||||
}
|
||||
|
||||
type cgroupData struct {
|
||||
root string
|
||||
innerPath string
|
||||
config *configs.Cgroup
|
||||
pid int
|
||||
}
|
||||
|
||||
func (m *Manager) Apply(pid int) (err error) {
|
||||
if m.Cgroups == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
var c = m.Cgroups
|
||||
|
||||
d, err := getCgroupData(m.Cgroups, pid)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if c.Paths != nil {
|
||||
paths := make(map[string]string)
|
||||
for name, path := range c.Paths {
|
||||
_, err := d.path(name)
|
||||
if err != nil {
|
||||
if cgroups.IsNotFound(err) {
|
||||
continue
|
||||
}
|
||||
return err
|
||||
}
|
||||
paths[name] = path
|
||||
}
|
||||
m.Paths = paths
|
||||
return cgroups.EnterPid(m.Paths, pid)
|
||||
}
|
||||
|
||||
m.mu.Lock()
|
||||
defer m.mu.Unlock()
|
||||
paths := make(map[string]string)
|
||||
for _, sys := range subsystems {
|
||||
if err := sys.Apply(d); err != nil {
|
||||
return err
|
||||
}
|
||||
// TODO: Apply should, ideally, be reentrant or be broken up into a separate
|
||||
// create and join phase so that the cgroup hierarchy for a container can be
|
||||
// created then join consists of writing the process pids to cgroup.procs
|
||||
p, err := d.path(sys.Name())
|
||||
if err != nil {
|
||||
if cgroups.IsNotFound(err) {
|
||||
continue
|
||||
}
|
||||
return err
|
||||
}
|
||||
paths[sys.Name()] = p
|
||||
}
|
||||
m.Paths = paths
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *Manager) Destroy() error {
|
||||
if m.Cgroups.Paths != nil {
|
||||
return nil
|
||||
}
|
||||
m.mu.Lock()
|
||||
defer m.mu.Unlock()
|
||||
if err := cgroups.RemovePaths(m.Paths); err != nil {
|
||||
return err
|
||||
}
|
||||
m.Paths = make(map[string]string)
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *Manager) GetPaths() map[string]string {
|
||||
m.mu.Lock()
|
||||
paths := m.Paths
|
||||
m.mu.Unlock()
|
||||
return paths
|
||||
}
|
||||
|
||||
func (m *Manager) GetStats() (*cgroups.Stats, error) {
|
||||
m.mu.Lock()
|
||||
defer m.mu.Unlock()
|
||||
stats := cgroups.NewStats()
|
||||
for name, path := range m.Paths {
|
||||
sys, err := subsystems.Get(name)
|
||||
if err == errSubsystemDoesNotExist || !cgroups.PathExists(path) {
|
||||
continue
|
||||
}
|
||||
if err := sys.GetStats(path, stats); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
return stats, nil
|
||||
}
|
||||
|
||||
func (m *Manager) Set(container *configs.Config) error {
|
||||
for _, sys := range subsystems {
|
||||
// Generate fake cgroup data.
|
||||
d, err := getCgroupData(container.Cgroups, -1)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
// Get the path, but don't error out if the cgroup wasn't found.
|
||||
path, err := d.path(sys.Name())
|
||||
if err != nil && !cgroups.IsNotFound(err) {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := sys.Set(path, container.Cgroups); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
if m.Paths["cpu"] != "" {
|
||||
if err := CheckCpushares(m.Paths["cpu"], container.Cgroups.Resources.CpuShares); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Freeze toggles the container's freezer cgroup depending on the state
|
||||
// provided
|
||||
func (m *Manager) Freeze(state configs.FreezerState) error {
|
||||
d, err := getCgroupData(m.Cgroups, 0)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
dir, err := d.path("freezer")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
prevState := m.Cgroups.Resources.Freezer
|
||||
m.Cgroups.Resources.Freezer = state
|
||||
freezer, err := subsystems.Get("freezer")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
err = freezer.Set(dir, m.Cgroups)
|
||||
if err != nil {
|
||||
m.Cgroups.Resources.Freezer = prevState
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *Manager) GetPids() ([]int, error) {
|
||||
dir, err := getCgroupPath(m.Cgroups)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return cgroups.GetPids(dir)
|
||||
}
|
||||
|
||||
func (m *Manager) GetAllPids() ([]int, error) {
|
||||
dir, err := getCgroupPath(m.Cgroups)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return cgroups.GetAllPids(dir)
|
||||
}
|
||||
|
||||
func getCgroupPath(c *configs.Cgroup) (string, error) {
|
||||
d, err := getCgroupData(c, 0)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
return d.path("devices")
|
||||
}
|
||||
|
||||
func getCgroupData(c *configs.Cgroup, pid int) (*cgroupData, error) {
|
||||
root, err := getCgroupRoot()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if (c.Name != "" || c.Parent != "") && c.Path != "" {
|
||||
return nil, fmt.Errorf("cgroup: either Path or Name and Parent should be used")
|
||||
}
|
||||
|
||||
// XXX: Do not remove this code. Path safety is important! -- cyphar
|
||||
cgPath := libcontainerUtils.CleanPath(c.Path)
|
||||
cgParent := libcontainerUtils.CleanPath(c.Parent)
|
||||
cgName := libcontainerUtils.CleanPath(c.Name)
|
||||
|
||||
innerPath := cgPath
|
||||
if innerPath == "" {
|
||||
innerPath = filepath.Join(cgParent, cgName)
|
||||
}
|
||||
|
||||
return &cgroupData{
|
||||
root: root,
|
||||
innerPath: innerPath,
|
||||
config: c,
|
||||
pid: pid,
|
||||
}, nil
|
||||
}
|
||||
|
||||
func (raw *cgroupData) parentPath(subsystem, mountpoint, root string) (string, error) {
|
||||
// Use GetThisCgroupDir instead of GetInitCgroupDir, because the creating
|
||||
// process could in container and shared pid namespace with host, and
|
||||
// /proc/1/cgroup could point to whole other world of cgroups.
|
||||
initPath, err := cgroups.GetThisCgroupDir(subsystem)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
// This is needed for nested containers, because in /proc/self/cgroup we
|
||||
// see pathes from host, which don't exist in container.
|
||||
relDir, err := filepath.Rel(root, initPath)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
return filepath.Join(mountpoint, relDir), nil
|
||||
}
|
||||
|
||||
func (raw *cgroupData) path(subsystem string) (string, error) {
|
||||
mnt, root, err := cgroups.FindCgroupMountpointAndRoot(subsystem)
|
||||
// If we didn't mount the subsystem, there is no point we make the path.
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
// If the cgroup name/path is absolute do not look relative to the cgroup of the init process.
|
||||
if filepath.IsAbs(raw.innerPath) {
|
||||
// Sometimes subsystems can be mounted togethger as 'cpu,cpuacct'.
|
||||
return filepath.Join(raw.root, filepath.Base(mnt), raw.innerPath), nil
|
||||
}
|
||||
|
||||
parentPath, err := raw.parentPath(subsystem, mnt, root)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
return filepath.Join(parentPath, raw.innerPath), nil
|
||||
}
|
||||
|
||||
func (raw *cgroupData) join(subsystem string) (string, error) {
|
||||
path, err := raw.path(subsystem)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
if err := os.MkdirAll(path, 0755); err != nil {
|
||||
return "", err
|
||||
}
|
||||
if err := writeFile(path, CgroupProcesses, strconv.Itoa(raw.pid)); err != nil {
|
||||
return "", err
|
||||
}
|
||||
return path, nil
|
||||
}
|
||||
|
||||
func writeFile(dir, file, data string) error {
|
||||
// Normally dir should not be empty, one case is that cgroup subsystem
|
||||
// is not mounted, we will get empty dir, and we want it fail here.
|
||||
if dir == "" {
|
||||
return fmt.Errorf("no such directory for %s.", file)
|
||||
}
|
||||
return ioutil.WriteFile(filepath.Join(dir, file), []byte(data), 0700)
|
||||
}
|
||||
|
||||
func readFile(dir, file string) (string, error) {
|
||||
data, err := ioutil.ReadFile(filepath.Join(dir, file))
|
||||
return string(data), err
|
||||
}
|
||||
|
||||
func removePath(p string, err error) error {
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if p != "" {
|
||||
return os.RemoveAll(p)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func CheckCpushares(path string, c int64) error {
|
||||
var cpuShares int64
|
||||
|
||||
if c == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
fd, err := os.Open(filepath.Join(path, "cpu.shares"))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer fd.Close()
|
||||
|
||||
_, err = fmt.Fscanf(fd, "%d", &cpuShares)
|
||||
if err != nil && err != io.EOF {
|
||||
return err
|
||||
}
|
||||
|
||||
if c > cpuShares {
|
||||
return fmt.Errorf("The maximum allowed cpu-shares is %d", cpuShares)
|
||||
} else if c < cpuShares {
|
||||
return fmt.Errorf("The minimum allowed cpu-shares is %d", cpuShares)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
|
@ -1,237 +0,0 @@
|
|||
// +build linux
|
||||
|
||||
package fs
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
)
|
||||
|
||||
type BlkioGroup struct {
|
||||
}
|
||||
|
||||
func (s *BlkioGroup) Name() string {
|
||||
return "blkio"
|
||||
}
|
||||
|
||||
func (s *BlkioGroup) Apply(d *cgroupData) error {
|
||||
_, err := d.join("blkio")
|
||||
if err != nil && !cgroups.IsNotFound(err) {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *BlkioGroup) Set(path string, cgroup *configs.Cgroup) error {
|
||||
if cgroup.Resources.BlkioWeight != 0 {
|
||||
if err := writeFile(path, "blkio.weight", strconv.FormatUint(uint64(cgroup.Resources.BlkioWeight), 10)); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
if cgroup.Resources.BlkioLeafWeight != 0 {
|
||||
if err := writeFile(path, "blkio.leaf_weight", strconv.FormatUint(uint64(cgroup.Resources.BlkioLeafWeight), 10)); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
for _, wd := range cgroup.Resources.BlkioWeightDevice {
|
||||
if err := writeFile(path, "blkio.weight_device", wd.WeightString()); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := writeFile(path, "blkio.leaf_weight_device", wd.LeafWeightString()); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
for _, td := range cgroup.Resources.BlkioThrottleReadBpsDevice {
|
||||
if err := writeFile(path, "blkio.throttle.read_bps_device", td.String()); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
for _, td := range cgroup.Resources.BlkioThrottleWriteBpsDevice {
|
||||
if err := writeFile(path, "blkio.throttle.write_bps_device", td.String()); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
for _, td := range cgroup.Resources.BlkioThrottleReadIOPSDevice {
|
||||
if err := writeFile(path, "blkio.throttle.read_iops_device", td.String()); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
for _, td := range cgroup.Resources.BlkioThrottleWriteIOPSDevice {
|
||||
if err := writeFile(path, "blkio.throttle.write_iops_device", td.String()); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *BlkioGroup) Remove(d *cgroupData) error {
|
||||
return removePath(d.path("blkio"))
|
||||
}
|
||||
|
||||
/*
|
||||
examples:
|
||||
|
||||
blkio.sectors
|
||||
8:0 6792
|
||||
|
||||
blkio.io_service_bytes
|
||||
8:0 Read 1282048
|
||||
8:0 Write 2195456
|
||||
8:0 Sync 2195456
|
||||
8:0 Async 1282048
|
||||
8:0 Total 3477504
|
||||
Total 3477504
|
||||
|
||||
blkio.io_serviced
|
||||
8:0 Read 124
|
||||
8:0 Write 104
|
||||
8:0 Sync 104
|
||||
8:0 Async 124
|
||||
8:0 Total 228
|
||||
Total 228
|
||||
|
||||
blkio.io_queued
|
||||
8:0 Read 0
|
||||
8:0 Write 0
|
||||
8:0 Sync 0
|
||||
8:0 Async 0
|
||||
8:0 Total 0
|
||||
Total 0
|
||||
*/
|
||||
|
||||
func splitBlkioStatLine(r rune) bool {
|
||||
return r == ' ' || r == ':'
|
||||
}
|
||||
|
||||
func getBlkioStat(path string) ([]cgroups.BlkioStatEntry, error) {
|
||||
var blkioStats []cgroups.BlkioStatEntry
|
||||
f, err := os.Open(path)
|
||||
if err != nil {
|
||||
if os.IsNotExist(err) {
|
||||
return blkioStats, nil
|
||||
}
|
||||
return nil, err
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
sc := bufio.NewScanner(f)
|
||||
for sc.Scan() {
|
||||
// format: dev type amount
|
||||
fields := strings.FieldsFunc(sc.Text(), splitBlkioStatLine)
|
||||
if len(fields) < 3 {
|
||||
if len(fields) == 2 && fields[0] == "Total" {
|
||||
// skip total line
|
||||
continue
|
||||
} else {
|
||||
return nil, fmt.Errorf("Invalid line found while parsing %s: %s", path, sc.Text())
|
||||
}
|
||||
}
|
||||
|
||||
v, err := strconv.ParseUint(fields[0], 10, 64)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
major := v
|
||||
|
||||
v, err = strconv.ParseUint(fields[1], 10, 64)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
minor := v
|
||||
|
||||
op := ""
|
||||
valueField := 2
|
||||
if len(fields) == 4 {
|
||||
op = fields[2]
|
||||
valueField = 3
|
||||
}
|
||||
v, err = strconv.ParseUint(fields[valueField], 10, 64)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
blkioStats = append(blkioStats, cgroups.BlkioStatEntry{Major: major, Minor: minor, Op: op, Value: v})
|
||||
}
|
||||
|
||||
return blkioStats, nil
|
||||
}
|
||||
|
||||
func (s *BlkioGroup) GetStats(path string, stats *cgroups.Stats) error {
|
||||
// Try to read CFQ stats available on all CFQ enabled kernels first
|
||||
if blkioStats, err := getBlkioStat(filepath.Join(path, "blkio.io_serviced_recursive")); err == nil && blkioStats != nil {
|
||||
return getCFQStats(path, stats)
|
||||
}
|
||||
return getStats(path, stats) // Use generic stats as fallback
|
||||
}
|
||||
|
||||
func getCFQStats(path string, stats *cgroups.Stats) error {
|
||||
var blkioStats []cgroups.BlkioStatEntry
|
||||
var err error
|
||||
|
||||
if blkioStats, err = getBlkioStat(filepath.Join(path, "blkio.sectors_recursive")); err != nil {
|
||||
return err
|
||||
}
|
||||
stats.BlkioStats.SectorsRecursive = blkioStats
|
||||
|
||||
if blkioStats, err = getBlkioStat(filepath.Join(path, "blkio.io_service_bytes_recursive")); err != nil {
|
||||
return err
|
||||
}
|
||||
stats.BlkioStats.IoServiceBytesRecursive = blkioStats
|
||||
|
||||
if blkioStats, err = getBlkioStat(filepath.Join(path, "blkio.io_serviced_recursive")); err != nil {
|
||||
return err
|
||||
}
|
||||
stats.BlkioStats.IoServicedRecursive = blkioStats
|
||||
|
||||
if blkioStats, err = getBlkioStat(filepath.Join(path, "blkio.io_queued_recursive")); err != nil {
|
||||
return err
|
||||
}
|
||||
stats.BlkioStats.IoQueuedRecursive = blkioStats
|
||||
|
||||
if blkioStats, err = getBlkioStat(filepath.Join(path, "blkio.io_service_time_recursive")); err != nil {
|
||||
return err
|
||||
}
|
||||
stats.BlkioStats.IoServiceTimeRecursive = blkioStats
|
||||
|
||||
if blkioStats, err = getBlkioStat(filepath.Join(path, "blkio.io_wait_time_recursive")); err != nil {
|
||||
return err
|
||||
}
|
||||
stats.BlkioStats.IoWaitTimeRecursive = blkioStats
|
||||
|
||||
if blkioStats, err = getBlkioStat(filepath.Join(path, "blkio.io_merged_recursive")); err != nil {
|
||||
return err
|
||||
}
|
||||
stats.BlkioStats.IoMergedRecursive = blkioStats
|
||||
|
||||
if blkioStats, err = getBlkioStat(filepath.Join(path, "blkio.time_recursive")); err != nil {
|
||||
return err
|
||||
}
|
||||
stats.BlkioStats.IoTimeRecursive = blkioStats
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func getStats(path string, stats *cgroups.Stats) error {
|
||||
var blkioStats []cgroups.BlkioStatEntry
|
||||
var err error
|
||||
|
||||
if blkioStats, err = getBlkioStat(filepath.Join(path, "blkio.throttle.io_service_bytes")); err != nil {
|
||||
return err
|
||||
}
|
||||
stats.BlkioStats.IoServiceBytesRecursive = blkioStats
|
||||
|
||||
if blkioStats, err = getBlkioStat(filepath.Join(path, "blkio.throttle.io_serviced")); err != nil {
|
||||
return err
|
||||
}
|
||||
stats.BlkioStats.IoServicedRecursive = blkioStats
|
||||
|
||||
return nil
|
||||
}
|
|
@ -1,94 +0,0 @@
|
|||
// +build linux
|
||||
|
||||
package fs
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
)
|
||||
|
||||
type CpuGroup struct {
|
||||
}
|
||||
|
||||
func (s *CpuGroup) Name() string {
|
||||
return "cpu"
|
||||
}
|
||||
|
||||
func (s *CpuGroup) Apply(d *cgroupData) error {
|
||||
// We always want to join the cpu group, to allow fair cpu scheduling
|
||||
// on a container basis
|
||||
_, err := d.join("cpu")
|
||||
if err != nil && !cgroups.IsNotFound(err) {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *CpuGroup) Set(path string, cgroup *configs.Cgroup) error {
|
||||
if cgroup.Resources.CpuShares != 0 {
|
||||
if err := writeFile(path, "cpu.shares", strconv.FormatInt(cgroup.Resources.CpuShares, 10)); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
if cgroup.Resources.CpuPeriod != 0 {
|
||||
if err := writeFile(path, "cpu.cfs_period_us", strconv.FormatInt(cgroup.Resources.CpuPeriod, 10)); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
if cgroup.Resources.CpuQuota != 0 {
|
||||
if err := writeFile(path, "cpu.cfs_quota_us", strconv.FormatInt(cgroup.Resources.CpuQuota, 10)); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
if cgroup.Resources.CpuRtPeriod != 0 {
|
||||
if err := writeFile(path, "cpu.rt_period_us", strconv.FormatInt(cgroup.Resources.CpuRtPeriod, 10)); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
if cgroup.Resources.CpuRtRuntime != 0 {
|
||||
if err := writeFile(path, "cpu.rt_runtime_us", strconv.FormatInt(cgroup.Resources.CpuRtRuntime, 10)); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *CpuGroup) Remove(d *cgroupData) error {
|
||||
return removePath(d.path("cpu"))
|
||||
}
|
||||
|
||||
func (s *CpuGroup) GetStats(path string, stats *cgroups.Stats) error {
|
||||
f, err := os.Open(filepath.Join(path, "cpu.stat"))
|
||||
if err != nil {
|
||||
if os.IsNotExist(err) {
|
||||
return nil
|
||||
}
|
||||
return err
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
sc := bufio.NewScanner(f)
|
||||
for sc.Scan() {
|
||||
t, v, err := getCgroupParamKeyValue(sc.Text())
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
switch t {
|
||||
case "nr_periods":
|
||||
stats.CpuStats.ThrottlingData.Periods = v
|
||||
|
||||
case "nr_throttled":
|
||||
stats.CpuStats.ThrottlingData.ThrottledPeriods = v
|
||||
|
||||
case "throttled_time":
|
||||
stats.CpuStats.ThrottlingData.ThrottledTime = v
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
|
@ -1,121 +0,0 @@
|
|||
// +build linux
|
||||
|
||||
package fs
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
"github.com/opencontainers/runc/libcontainer/system"
|
||||
)
|
||||
|
||||
const (
|
||||
cgroupCpuacctStat = "cpuacct.stat"
|
||||
nanosecondsInSecond = 1000000000
|
||||
)
|
||||
|
||||
var clockTicks = uint64(system.GetClockTicks())
|
||||
|
||||
type CpuacctGroup struct {
|
||||
}
|
||||
|
||||
func (s *CpuacctGroup) Name() string {
|
||||
return "cpuacct"
|
||||
}
|
||||
|
||||
func (s *CpuacctGroup) Apply(d *cgroupData) error {
|
||||
// we just want to join this group even though we don't set anything
|
||||
if _, err := d.join("cpuacct"); err != nil && !cgroups.IsNotFound(err) {
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *CpuacctGroup) Set(path string, cgroup *configs.Cgroup) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *CpuacctGroup) Remove(d *cgroupData) error {
|
||||
return removePath(d.path("cpuacct"))
|
||||
}
|
||||
|
||||
func (s *CpuacctGroup) GetStats(path string, stats *cgroups.Stats) error {
|
||||
userModeUsage, kernelModeUsage, err := getCpuUsageBreakdown(path)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
totalUsage, err := getCgroupParamUint(path, "cpuacct.usage")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
percpuUsage, err := getPercpuUsage(path)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
stats.CpuStats.CpuUsage.TotalUsage = totalUsage
|
||||
stats.CpuStats.CpuUsage.PercpuUsage = percpuUsage
|
||||
stats.CpuStats.CpuUsage.UsageInUsermode = userModeUsage
|
||||
stats.CpuStats.CpuUsage.UsageInKernelmode = kernelModeUsage
|
||||
return nil
|
||||
}
|
||||
|
||||
// Returns user and kernel usage breakdown in nanoseconds.
|
||||
func getCpuUsageBreakdown(path string) (uint64, uint64, error) {
|
||||
userModeUsage := uint64(0)
|
||||
kernelModeUsage := uint64(0)
|
||||
const (
|
||||
userField = "user"
|
||||
systemField = "system"
|
||||
)
|
||||
|
||||
// Expected format:
|
||||
// user <usage in ticks>
|
||||
// system <usage in ticks>
|
||||
data, err := ioutil.ReadFile(filepath.Join(path, cgroupCpuacctStat))
|
||||
if err != nil {
|
||||
return 0, 0, err
|
||||
}
|
||||
fields := strings.Fields(string(data))
|
||||
if len(fields) != 4 {
|
||||
return 0, 0, fmt.Errorf("failure - %s is expected to have 4 fields", filepath.Join(path, cgroupCpuacctStat))
|
||||
}
|
||||
if fields[0] != userField {
|
||||
return 0, 0, fmt.Errorf("unexpected field %q in %q, expected %q", fields[0], cgroupCpuacctStat, userField)
|
||||
}
|
||||
if fields[2] != systemField {
|
||||
return 0, 0, fmt.Errorf("unexpected field %q in %q, expected %q", fields[2], cgroupCpuacctStat, systemField)
|
||||
}
|
||||
if userModeUsage, err = strconv.ParseUint(fields[1], 10, 64); err != nil {
|
||||
return 0, 0, err
|
||||
}
|
||||
if kernelModeUsage, err = strconv.ParseUint(fields[3], 10, 64); err != nil {
|
||||
return 0, 0, err
|
||||
}
|
||||
|
||||
return (userModeUsage * nanosecondsInSecond) / clockTicks, (kernelModeUsage * nanosecondsInSecond) / clockTicks, nil
|
||||
}
|
||||
|
||||
func getPercpuUsage(path string) ([]uint64, error) {
|
||||
percpuUsage := []uint64{}
|
||||
data, err := ioutil.ReadFile(filepath.Join(path, "cpuacct.usage_percpu"))
|
||||
if err != nil {
|
||||
return percpuUsage, err
|
||||
}
|
||||
for _, value := range strings.Fields(string(data)) {
|
||||
value, err := strconv.ParseUint(value, 10, 64)
|
||||
if err != nil {
|
||||
return percpuUsage, fmt.Errorf("Unable to convert param value to uint64: %s", err)
|
||||
}
|
||||
percpuUsage = append(percpuUsage, value)
|
||||
}
|
||||
return percpuUsage, nil
|
||||
}
|
|
@ -1,139 +0,0 @@
|
|||
// +build linux
|
||||
|
||||
package fs
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
libcontainerUtils "github.com/opencontainers/runc/libcontainer/utils"
|
||||
)
|
||||
|
||||
type CpusetGroup struct {
|
||||
}
|
||||
|
||||
func (s *CpusetGroup) Name() string {
|
||||
return "cpuset"
|
||||
}
|
||||
|
||||
func (s *CpusetGroup) Apply(d *cgroupData) error {
|
||||
dir, err := d.path("cpuset")
|
||||
if err != nil && !cgroups.IsNotFound(err) {
|
||||
return err
|
||||
}
|
||||
return s.ApplyDir(dir, d.config, d.pid)
|
||||
}
|
||||
|
||||
func (s *CpusetGroup) Set(path string, cgroup *configs.Cgroup) error {
|
||||
if cgroup.Resources.CpusetCpus != "" {
|
||||
if err := writeFile(path, "cpuset.cpus", cgroup.Resources.CpusetCpus); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
if cgroup.Resources.CpusetMems != "" {
|
||||
if err := writeFile(path, "cpuset.mems", cgroup.Resources.CpusetMems); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *CpusetGroup) Remove(d *cgroupData) error {
|
||||
return removePath(d.path("cpuset"))
|
||||
}
|
||||
|
||||
func (s *CpusetGroup) GetStats(path string, stats *cgroups.Stats) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *CpusetGroup) ApplyDir(dir string, cgroup *configs.Cgroup, pid int) error {
|
||||
// This might happen if we have no cpuset cgroup mounted.
|
||||
// Just do nothing and don't fail.
|
||||
if dir == "" {
|
||||
return nil
|
||||
}
|
||||
root, err := getCgroupRoot()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if err := s.ensureParent(dir, root); err != nil {
|
||||
return err
|
||||
}
|
||||
// because we are not using d.join we need to place the pid into the procs file
|
||||
// unlike the other subsystems
|
||||
if err := writeFile(dir, "cgroup.procs", strconv.Itoa(pid)); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *CpusetGroup) getSubsystemSettings(parent string) (cpus []byte, mems []byte, err error) {
|
||||
if cpus, err = ioutil.ReadFile(filepath.Join(parent, "cpuset.cpus")); err != nil {
|
||||
return
|
||||
}
|
||||
if mems, err = ioutil.ReadFile(filepath.Join(parent, "cpuset.mems")); err != nil {
|
||||
return
|
||||
}
|
||||
return cpus, mems, nil
|
||||
}
|
||||
|
||||
// ensureParent makes sure that the parent directory of current is created
|
||||
// and populated with the proper cpus and mems files copied from
|
||||
// it's parent.
|
||||
func (s *CpusetGroup) ensureParent(current, root string) error {
|
||||
parent := filepath.Dir(current)
|
||||
if libcontainerUtils.CleanPath(parent) == root {
|
||||
return nil
|
||||
}
|
||||
// Avoid infinite recursion.
|
||||
if parent == current {
|
||||
return fmt.Errorf("cpuset: cgroup parent path outside cgroup root")
|
||||
}
|
||||
if err := s.ensureParent(parent, root); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := os.MkdirAll(current, 0755); err != nil {
|
||||
return err
|
||||
}
|
||||
return s.copyIfNeeded(current, parent)
|
||||
}
|
||||
|
||||
// copyIfNeeded copies the cpuset.cpus and cpuset.mems from the parent
|
||||
// directory to the current directory if the file's contents are 0
|
||||
func (s *CpusetGroup) copyIfNeeded(current, parent string) error {
|
||||
var (
|
||||
err error
|
||||
currentCpus, currentMems []byte
|
||||
parentCpus, parentMems []byte
|
||||
)
|
||||
|
||||
if currentCpus, currentMems, err = s.getSubsystemSettings(current); err != nil {
|
||||
return err
|
||||
}
|
||||
if parentCpus, parentMems, err = s.getSubsystemSettings(parent); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if s.isEmpty(currentCpus) {
|
||||
if err := writeFile(current, "cpuset.cpus", string(parentCpus)); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
if s.isEmpty(currentMems) {
|
||||
if err := writeFile(current, "cpuset.mems", string(parentMems)); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *CpusetGroup) isEmpty(b []byte) bool {
|
||||
return len(bytes.Trim(b, "\n")) == 0
|
||||
}
|
|
@ -1,78 +0,0 @@
|
|||
// +build linux
|
||||
|
||||
package fs
|
||||
|
||||
import (
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
"github.com/opencontainers/runc/libcontainer/system"
|
||||
)
|
||||
|
||||
type DevicesGroup struct {
|
||||
}
|
||||
|
||||
func (s *DevicesGroup) Name() string {
|
||||
return "devices"
|
||||
}
|
||||
|
||||
func (s *DevicesGroup) Apply(d *cgroupData) error {
|
||||
_, err := d.join("devices")
|
||||
if err != nil {
|
||||
// We will return error even it's `not found` error, devices
|
||||
// cgroup is hard requirement for container's security.
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *DevicesGroup) Set(path string, cgroup *configs.Cgroup) error {
|
||||
if system.RunningInUserNS() {
|
||||
return nil
|
||||
}
|
||||
|
||||
devices := cgroup.Resources.Devices
|
||||
if len(devices) > 0 {
|
||||
for _, dev := range devices {
|
||||
file := "devices.deny"
|
||||
if dev.Allow {
|
||||
file = "devices.allow"
|
||||
}
|
||||
if err := writeFile(path, file, dev.CgroupString()); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
if !cgroup.Resources.AllowAllDevices {
|
||||
if err := writeFile(path, "devices.deny", "a"); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
for _, dev := range cgroup.Resources.AllowedDevices {
|
||||
if err := writeFile(path, "devices.allow", dev.CgroupString()); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
if err := writeFile(path, "devices.allow", "a"); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
for _, dev := range cgroup.Resources.DeniedDevices {
|
||||
if err := writeFile(path, "devices.deny", dev.CgroupString()); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *DevicesGroup) Remove(d *cgroupData) error {
|
||||
return removePath(d.path("devices"))
|
||||
}
|
||||
|
||||
func (s *DevicesGroup) GetStats(path string, stats *cgroups.Stats) error {
|
||||
return nil
|
||||
}
|
|
@ -1,61 +0,0 @@
|
|||
// +build linux
|
||||
|
||||
package fs
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
)
|
||||
|
||||
type FreezerGroup struct {
|
||||
}
|
||||
|
||||
func (s *FreezerGroup) Name() string {
|
||||
return "freezer"
|
||||
}
|
||||
|
||||
func (s *FreezerGroup) Apply(d *cgroupData) error {
|
||||
_, err := d.join("freezer")
|
||||
if err != nil && !cgroups.IsNotFound(err) {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *FreezerGroup) Set(path string, cgroup *configs.Cgroup) error {
|
||||
switch cgroup.Resources.Freezer {
|
||||
case configs.Frozen, configs.Thawed:
|
||||
if err := writeFile(path, "freezer.state", string(cgroup.Resources.Freezer)); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
for {
|
||||
state, err := readFile(path, "freezer.state")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if strings.TrimSpace(state) == string(cgroup.Resources.Freezer) {
|
||||
break
|
||||
}
|
||||
time.Sleep(1 * time.Millisecond)
|
||||
}
|
||||
case configs.Undefined:
|
||||
return nil
|
||||
default:
|
||||
return fmt.Errorf("Invalid argument '%s' to freezer.state", string(cgroup.Resources.Freezer))
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *FreezerGroup) Remove(d *cgroupData) error {
|
||||
return removePath(d.path("freezer"))
|
||||
}
|
||||
|
||||
func (s *FreezerGroup) GetStats(path string, stats *cgroups.Stats) error {
|
||||
return nil
|
||||
}
|
|
@ -1,3 +0,0 @@
|
|||
// +build !linux
|
||||
|
||||
package fs
|
|
@ -1,71 +0,0 @@
|
|||
// +build linux
|
||||
|
||||
package fs
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
)
|
||||
|
||||
type HugetlbGroup struct {
|
||||
}
|
||||
|
||||
func (s *HugetlbGroup) Name() string {
|
||||
return "hugetlb"
|
||||
}
|
||||
|
||||
func (s *HugetlbGroup) Apply(d *cgroupData) error {
|
||||
_, err := d.join("hugetlb")
|
||||
if err != nil && !cgroups.IsNotFound(err) {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *HugetlbGroup) Set(path string, cgroup *configs.Cgroup) error {
|
||||
for _, hugetlb := range cgroup.Resources.HugetlbLimit {
|
||||
if err := writeFile(path, strings.Join([]string{"hugetlb", hugetlb.Pagesize, "limit_in_bytes"}, "."), strconv.FormatUint(hugetlb.Limit, 10)); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *HugetlbGroup) Remove(d *cgroupData) error {
|
||||
return removePath(d.path("hugetlb"))
|
||||
}
|
||||
|
||||
func (s *HugetlbGroup) GetStats(path string, stats *cgroups.Stats) error {
|
||||
hugetlbStats := cgroups.HugetlbStats{}
|
||||
for _, pageSize := range HugePageSizes {
|
||||
usage := strings.Join([]string{"hugetlb", pageSize, "usage_in_bytes"}, ".")
|
||||
value, err := getCgroupParamUint(path, usage)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to parse %s - %v", usage, err)
|
||||
}
|
||||
hugetlbStats.Usage = value
|
||||
|
||||
maxUsage := strings.Join([]string{"hugetlb", pageSize, "max_usage_in_bytes"}, ".")
|
||||
value, err = getCgroupParamUint(path, maxUsage)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to parse %s - %v", maxUsage, err)
|
||||
}
|
||||
hugetlbStats.MaxUsage = value
|
||||
|
||||
failcnt := strings.Join([]string{"hugetlb", pageSize, "failcnt"}, ".")
|
||||
value, err = getCgroupParamUint(path, failcnt)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to parse %s - %v", failcnt, err)
|
||||
}
|
||||
hugetlbStats.Failcnt = value
|
||||
|
||||
stats.HugetlbStats[pageSize] = hugetlbStats
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
|
@ -1,201 +0,0 @@
|
|||
// +build linux
|
||||
|
||||
package fs
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
)
|
||||
|
||||
type MemoryGroup struct {
|
||||
}
|
||||
|
||||
func (s *MemoryGroup) Name() string {
|
||||
return "memory"
|
||||
}
|
||||
|
||||
func (s *MemoryGroup) Apply(d *cgroupData) (err error) {
|
||||
path, err := d.path("memory")
|
||||
if err != nil && !cgroups.IsNotFound(err) {
|
||||
return err
|
||||
}
|
||||
if memoryAssigned(d.config) {
|
||||
if path != "" {
|
||||
if err := os.MkdirAll(path, 0755); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
// We have to set kernel memory here, as we can't change it once
|
||||
// processes have been attached.
|
||||
if err := s.SetKernelMemory(path, d.config); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
defer func() {
|
||||
if err != nil {
|
||||
os.RemoveAll(path)
|
||||
}
|
||||
}()
|
||||
|
||||
// We need to join memory cgroup after set memory limits, because
|
||||
// kmem.limit_in_bytes can only be set when the cgroup is empty.
|
||||
_, err = d.join("memory")
|
||||
if err != nil && !cgroups.IsNotFound(err) {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *MemoryGroup) SetKernelMemory(path string, cgroup *configs.Cgroup) error {
|
||||
// This has to be done separately because it has special constraints (it
|
||||
// can't be done after there are processes attached to the cgroup).
|
||||
if cgroup.Resources.KernelMemory > 0 {
|
||||
if err := writeFile(path, "memory.kmem.limit_in_bytes", strconv.FormatInt(cgroup.Resources.KernelMemory, 10)); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *MemoryGroup) Set(path string, cgroup *configs.Cgroup) error {
|
||||
if cgroup.Resources.Memory != 0 {
|
||||
if err := writeFile(path, "memory.limit_in_bytes", strconv.FormatInt(cgroup.Resources.Memory, 10)); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
if cgroup.Resources.MemoryReservation != 0 {
|
||||
if err := writeFile(path, "memory.soft_limit_in_bytes", strconv.FormatInt(cgroup.Resources.MemoryReservation, 10)); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
if cgroup.Resources.MemorySwap > 0 {
|
||||
if err := writeFile(path, "memory.memsw.limit_in_bytes", strconv.FormatInt(cgroup.Resources.MemorySwap, 10)); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
if cgroup.Resources.OomKillDisable {
|
||||
if err := writeFile(path, "memory.oom_control", "1"); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
if cgroup.Resources.MemorySwappiness == nil || int64(*cgroup.Resources.MemorySwappiness) == -1 {
|
||||
return nil
|
||||
} else if int64(*cgroup.Resources.MemorySwappiness) >= 0 && int64(*cgroup.Resources.MemorySwappiness) <= 100 {
|
||||
if err := writeFile(path, "memory.swappiness", strconv.FormatInt(*cgroup.Resources.MemorySwappiness, 10)); err != nil {
|
||||
return err
|
||||
}
|
||||
} else {
|
||||
return fmt.Errorf("invalid value:%d. valid memory swappiness range is 0-100", int64(*cgroup.Resources.MemorySwappiness))
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *MemoryGroup) Remove(d *cgroupData) error {
|
||||
return removePath(d.path("memory"))
|
||||
}
|
||||
|
||||
func (s *MemoryGroup) GetStats(path string, stats *cgroups.Stats) error {
|
||||
// Set stats from memory.stat.
|
||||
statsFile, err := os.Open(filepath.Join(path, "memory.stat"))
|
||||
if err != nil {
|
||||
if os.IsNotExist(err) {
|
||||
return nil
|
||||
}
|
||||
return err
|
||||
}
|
||||
defer statsFile.Close()
|
||||
|
||||
sc := bufio.NewScanner(statsFile)
|
||||
for sc.Scan() {
|
||||
t, v, err := getCgroupParamKeyValue(sc.Text())
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to parse memory.stat (%q) - %v", sc.Text(), err)
|
||||
}
|
||||
stats.MemoryStats.Stats[t] = v
|
||||
}
|
||||
stats.MemoryStats.Cache = stats.MemoryStats.Stats["cache"]
|
||||
|
||||
memoryUsage, err := getMemoryData(path, "")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
stats.MemoryStats.Usage = memoryUsage
|
||||
swapUsage, err := getMemoryData(path, "memsw")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
stats.MemoryStats.SwapUsage = swapUsage
|
||||
kernelUsage, err := getMemoryData(path, "kmem")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
stats.MemoryStats.KernelUsage = kernelUsage
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func memoryAssigned(cgroup *configs.Cgroup) bool {
|
||||
return cgroup.Resources.Memory != 0 ||
|
||||
cgroup.Resources.MemoryReservation != 0 ||
|
||||
cgroup.Resources.MemorySwap > 0 ||
|
||||
cgroup.Resources.KernelMemory > 0 ||
|
||||
cgroup.Resources.OomKillDisable ||
|
||||
(cgroup.Resources.MemorySwappiness != nil && *cgroup.Resources.MemorySwappiness != -1)
|
||||
}
|
||||
|
||||
func getMemoryData(path, name string) (cgroups.MemoryData, error) {
|
||||
memoryData := cgroups.MemoryData{}
|
||||
|
||||
moduleName := "memory"
|
||||
if name != "" {
|
||||
moduleName = strings.Join([]string{"memory", name}, ".")
|
||||
}
|
||||
usage := strings.Join([]string{moduleName, "usage_in_bytes"}, ".")
|
||||
maxUsage := strings.Join([]string{moduleName, "max_usage_in_bytes"}, ".")
|
||||
failcnt := strings.Join([]string{moduleName, "failcnt"}, ".")
|
||||
limit := strings.Join([]string{moduleName, "limit_in_bytes"}, ".")
|
||||
|
||||
value, err := getCgroupParamUint(path, usage)
|
||||
if err != nil {
|
||||
if moduleName != "memory" && os.IsNotExist(err) {
|
||||
return cgroups.MemoryData{}, nil
|
||||
}
|
||||
return cgroups.MemoryData{}, fmt.Errorf("failed to parse %s - %v", usage, err)
|
||||
}
|
||||
memoryData.Usage = value
|
||||
value, err = getCgroupParamUint(path, maxUsage)
|
||||
if err != nil {
|
||||
if moduleName != "memory" && os.IsNotExist(err) {
|
||||
return cgroups.MemoryData{}, nil
|
||||
}
|
||||
return cgroups.MemoryData{}, fmt.Errorf("failed to parse %s - %v", maxUsage, err)
|
||||
}
|
||||
memoryData.MaxUsage = value
|
||||
value, err = getCgroupParamUint(path, failcnt)
|
||||
if err != nil {
|
||||
if moduleName != "memory" && os.IsNotExist(err) {
|
||||
return cgroups.MemoryData{}, nil
|
||||
}
|
||||
return cgroups.MemoryData{}, fmt.Errorf("failed to parse %s - %v", failcnt, err)
|
||||
}
|
||||
memoryData.Failcnt = value
|
||||
value, err = getCgroupParamUint(path, limit)
|
||||
if err != nil {
|
||||
if moduleName != "memory" && os.IsNotExist(err) {
|
||||
return cgroups.MemoryData{}, nil
|
||||
}
|
||||
return cgroups.MemoryData{}, fmt.Errorf("failed to parse %s - %v", limit, err)
|
||||
}
|
||||
memoryData.Limit = value
|
||||
|
||||
return memoryData, nil
|
||||
}
|
|
@ -1,40 +0,0 @@
|
|||
// +build linux
|
||||
|
||||
package fs
|
||||
|
||||
import (
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
)
|
||||
|
||||
type NameGroup struct {
|
||||
GroupName string
|
||||
Join bool
|
||||
}
|
||||
|
||||
func (s *NameGroup) Name() string {
|
||||
return s.GroupName
|
||||
}
|
||||
|
||||
func (s *NameGroup) Apply(d *cgroupData) error {
|
||||
if s.Join {
|
||||
// ignore errors if the named cgroup does not exist
|
||||
d.join(s.GroupName)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *NameGroup) Set(path string, cgroup *configs.Cgroup) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *NameGroup) Remove(d *cgroupData) error {
|
||||
if s.Join {
|
||||
removePath(d.path(s.GroupName))
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *NameGroup) GetStats(path string, stats *cgroups.Stats) error {
|
||||
return nil
|
||||
}
|
|
@ -1,41 +0,0 @@
|
|||
// +build linux
|
||||
|
||||
package fs
|
||||
|
||||
import (
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
)
|
||||
|
||||
type NetClsGroup struct {
|
||||
}
|
||||
|
||||
func (s *NetClsGroup) Name() string {
|
||||
return "net_cls"
|
||||
}
|
||||
|
||||
func (s *NetClsGroup) Apply(d *cgroupData) error {
|
||||
_, err := d.join("net_cls")
|
||||
if err != nil && !cgroups.IsNotFound(err) {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *NetClsGroup) Set(path string, cgroup *configs.Cgroup) error {
|
||||
if cgroup.Resources.NetClsClassid != "" {
|
||||
if err := writeFile(path, "net_cls.classid", cgroup.Resources.NetClsClassid); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *NetClsGroup) Remove(d *cgroupData) error {
|
||||
return removePath(d.path("net_cls"))
|
||||
}
|
||||
|
||||
func (s *NetClsGroup) GetStats(path string, stats *cgroups.Stats) error {
|
||||
return nil
|
||||
}
|
|
@ -1,41 +0,0 @@
|
|||
// +build linux
|
||||
|
||||
package fs
|
||||
|
||||
import (
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
)
|
||||
|
||||
type NetPrioGroup struct {
|
||||
}
|
||||
|
||||
func (s *NetPrioGroup) Name() string {
|
||||
return "net_prio"
|
||||
}
|
||||
|
||||
func (s *NetPrioGroup) Apply(d *cgroupData) error {
|
||||
_, err := d.join("net_prio")
|
||||
if err != nil && !cgroups.IsNotFound(err) {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *NetPrioGroup) Set(path string, cgroup *configs.Cgroup) error {
|
||||
for _, prioMap := range cgroup.Resources.NetPrioIfpriomap {
|
||||
if err := writeFile(path, "net_prio.ifpriomap", prioMap.CgroupString()); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *NetPrioGroup) Remove(d *cgroupData) error {
|
||||
return removePath(d.path("net_prio"))
|
||||
}
|
||||
|
||||
func (s *NetPrioGroup) GetStats(path string, stats *cgroups.Stats) error {
|
||||
return nil
|
||||
}
|
|
@ -1,35 +0,0 @@
|
|||
// +build linux
|
||||
|
||||
package fs
|
||||
|
||||
import (
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
)
|
||||
|
||||
type PerfEventGroup struct {
|
||||
}
|
||||
|
||||
func (s *PerfEventGroup) Name() string {
|
||||
return "perf_event"
|
||||
}
|
||||
|
||||
func (s *PerfEventGroup) Apply(d *cgroupData) error {
|
||||
// we just want to join this group even though we don't set anything
|
||||
if _, err := d.join("perf_event"); err != nil && !cgroups.IsNotFound(err) {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *PerfEventGroup) Set(path string, cgroup *configs.Cgroup) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *PerfEventGroup) Remove(d *cgroupData) error {
|
||||
return removePath(d.path("perf_event"))
|
||||
}
|
||||
|
||||
func (s *PerfEventGroup) GetStats(path string, stats *cgroups.Stats) error {
|
||||
return nil
|
||||
}
|
|
@ -1,57 +0,0 @@
|
|||
// +build linux
|
||||
|
||||
package fs
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strconv"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
)
|
||||
|
||||
type PidsGroup struct {
|
||||
}
|
||||
|
||||
func (s *PidsGroup) Name() string {
|
||||
return "pids"
|
||||
}
|
||||
|
||||
func (s *PidsGroup) Apply(d *cgroupData) error {
|
||||
_, err := d.join("pids")
|
||||
if err != nil && !cgroups.IsNotFound(err) {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *PidsGroup) Set(path string, cgroup *configs.Cgroup) error {
|
||||
if cgroup.Resources.PidsLimit != 0 {
|
||||
// "max" is the fallback value.
|
||||
limit := "max"
|
||||
|
||||
if cgroup.Resources.PidsLimit > 0 {
|
||||
limit = strconv.FormatInt(cgroup.Resources.PidsLimit, 10)
|
||||
}
|
||||
|
||||
if err := writeFile(path, "pids.max", limit); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *PidsGroup) Remove(d *cgroupData) error {
|
||||
return removePath(d.path("pids"))
|
||||
}
|
||||
|
||||
func (s *PidsGroup) GetStats(path string, stats *cgroups.Stats) error {
|
||||
value, err := getCgroupParamUint(path, "pids.current")
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to parse pids.current - %s", err)
|
||||
}
|
||||
|
||||
stats.PidsStats.Current = value
|
||||
return nil
|
||||
}
|
|
@ -1,79 +0,0 @@
|
|||
// +build linux
|
||||
|
||||
package fs
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
"strings"
|
||||
)
|
||||
|
||||
var (
|
||||
ErrNotSupportStat = errors.New("stats are not supported for subsystem")
|
||||
ErrNotValidFormat = errors.New("line is not a valid key value format")
|
||||
)
|
||||
|
||||
// Saturates negative values at zero and returns a uint64.
|
||||
// Due to kernel bugs, some of the memory cgroup stats can be negative.
|
||||
func parseUint(s string, base, bitSize int) (uint64, error) {
|
||||
value, err := strconv.ParseUint(s, base, bitSize)
|
||||
if err != nil {
|
||||
intValue, intErr := strconv.ParseInt(s, base, bitSize)
|
||||
// 1. Handle negative values greater than MinInt64 (and)
|
||||
// 2. Handle negative values lesser than MinInt64
|
||||
if intErr == nil && intValue < 0 {
|
||||
return 0, nil
|
||||
} else if intErr != nil && intErr.(*strconv.NumError).Err == strconv.ErrRange && intValue < 0 {
|
||||
return 0, nil
|
||||
}
|
||||
|
||||
return value, err
|
||||
}
|
||||
|
||||
return value, nil
|
||||
}
|
||||
|
||||
// Parses a cgroup param and returns as name, value
|
||||
// i.e. "io_service_bytes 1234" will return as io_service_bytes, 1234
|
||||
func getCgroupParamKeyValue(t string) (string, uint64, error) {
|
||||
parts := strings.Fields(t)
|
||||
switch len(parts) {
|
||||
case 2:
|
||||
value, err := parseUint(parts[1], 10, 64)
|
||||
if err != nil {
|
||||
return "", 0, fmt.Errorf("unable to convert param value (%q) to uint64: %v", parts[1], err)
|
||||
}
|
||||
|
||||
return parts[0], value, nil
|
||||
default:
|
||||
return "", 0, ErrNotValidFormat
|
||||
}
|
||||
}
|
||||
|
||||
// Gets a single uint64 value from the specified cgroup file.
|
||||
func getCgroupParamUint(cgroupPath, cgroupFile string) (uint64, error) {
|
||||
fileName := filepath.Join(cgroupPath, cgroupFile)
|
||||
contents, err := ioutil.ReadFile(fileName)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
|
||||
res, err := parseUint(strings.TrimSpace(string(contents)), 10, 64)
|
||||
if err != nil {
|
||||
return res, fmt.Errorf("unable to parse %q as a uint from Cgroup file %q", string(contents), fileName)
|
||||
}
|
||||
return res, nil
|
||||
}
|
||||
|
||||
// Gets a string value from the specified cgroup file
|
||||
func getCgroupParamString(cgroupPath, cgroupFile string) (string, error) {
|
||||
contents, err := ioutil.ReadFile(filepath.Join(cgroupPath, cgroupFile))
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
return strings.TrimSpace(string(contents)), nil
|
||||
}
|
|
@ -1,55 +0,0 @@
|
|||
// +build !linux
|
||||
|
||||
package systemd
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
)
|
||||
|
||||
type Manager struct {
|
||||
Cgroups *configs.Cgroup
|
||||
Paths map[string]string
|
||||
}
|
||||
|
||||
func UseSystemd() bool {
|
||||
return false
|
||||
}
|
||||
|
||||
func (m *Manager) Apply(pid int) error {
|
||||
return fmt.Errorf("Systemd not supported")
|
||||
}
|
||||
|
||||
func (m *Manager) GetPids() ([]int, error) {
|
||||
return nil, fmt.Errorf("Systemd not supported")
|
||||
}
|
||||
|
||||
func (m *Manager) GetAllPids() ([]int, error) {
|
||||
return nil, fmt.Errorf("Systemd not supported")
|
||||
}
|
||||
|
||||
func (m *Manager) Destroy() error {
|
||||
return fmt.Errorf("Systemd not supported")
|
||||
}
|
||||
|
||||
func (m *Manager) GetPaths() map[string]string {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *Manager) GetStats() (*cgroups.Stats, error) {
|
||||
return nil, fmt.Errorf("Systemd not supported")
|
||||
}
|
||||
|
||||
func (m *Manager) Set(container *configs.Config) error {
|
||||
return nil, fmt.Errorf("Systemd not supported")
|
||||
}
|
||||
|
||||
func (m *Manager) Freeze(state configs.FreezerState) error {
|
||||
return fmt.Errorf("Systemd not supported")
|
||||
}
|
||||
|
||||
func Freeze(c *configs.Cgroup, state configs.FreezerState) error {
|
||||
return fmt.Errorf("Systemd not supported")
|
||||
}
|
|
@ -1,605 +0,0 @@
|
|||
// +build linux
|
||||
|
||||
package systemd
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
systemdDbus "github.com/coreos/go-systemd/dbus"
|
||||
systemdUtil "github.com/coreos/go-systemd/util"
|
||||
"github.com/godbus/dbus"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups/fs"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
)
|
||||
|
||||
type Manager struct {
|
||||
mu sync.Mutex
|
||||
Cgroups *configs.Cgroup
|
||||
Paths map[string]string
|
||||
}
|
||||
|
||||
type subsystem interface {
|
||||
// Name returns the name of the subsystem.
|
||||
Name() string
|
||||
// Returns the stats, as 'stats', corresponding to the cgroup under 'path'.
|
||||
GetStats(path string, stats *cgroups.Stats) error
|
||||
// Set the cgroup represented by cgroup.
|
||||
Set(path string, cgroup *configs.Cgroup) error
|
||||
}
|
||||
|
||||
var errSubsystemDoesNotExist = errors.New("cgroup: subsystem does not exist")
|
||||
|
||||
type subsystemSet []subsystem
|
||||
|
||||
func (s subsystemSet) Get(name string) (subsystem, error) {
|
||||
for _, ss := range s {
|
||||
if ss.Name() == name {
|
||||
return ss, nil
|
||||
}
|
||||
}
|
||||
return nil, errSubsystemDoesNotExist
|
||||
}
|
||||
|
||||
var subsystems = subsystemSet{
|
||||
&fs.CpusetGroup{},
|
||||
&fs.DevicesGroup{},
|
||||
&fs.MemoryGroup{},
|
||||
&fs.CpuGroup{},
|
||||
&fs.CpuacctGroup{},
|
||||
&fs.PidsGroup{},
|
||||
&fs.BlkioGroup{},
|
||||
&fs.HugetlbGroup{},
|
||||
&fs.PerfEventGroup{},
|
||||
&fs.FreezerGroup{},
|
||||
&fs.NetPrioGroup{},
|
||||
&fs.NetClsGroup{},
|
||||
&fs.NameGroup{GroupName: "name=systemd"},
|
||||
}
|
||||
|
||||
const (
|
||||
testScopeWait = 4
|
||||
)
|
||||
|
||||
var (
|
||||
connLock sync.Mutex
|
||||
theConn *systemdDbus.Conn
|
||||
hasStartTransientUnit bool
|
||||
hasTransientDefaultDependencies bool
|
||||
)
|
||||
|
||||
func newProp(name string, units interface{}) systemdDbus.Property {
|
||||
return systemdDbus.Property{
|
||||
Name: name,
|
||||
Value: dbus.MakeVariant(units),
|
||||
}
|
||||
}
|
||||
|
||||
func UseSystemd() bool {
|
||||
if !systemdUtil.IsRunningSystemd() {
|
||||
return false
|
||||
}
|
||||
|
||||
connLock.Lock()
|
||||
defer connLock.Unlock()
|
||||
|
||||
if theConn == nil {
|
||||
var err error
|
||||
theConn, err = systemdDbus.New()
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
|
||||
// Assume we have StartTransientUnit
|
||||
hasStartTransientUnit = true
|
||||
|
||||
// But if we get UnknownMethod error we don't
|
||||
if _, err := theConn.StartTransientUnit("test.scope", "invalid", nil, nil); err != nil {
|
||||
if dbusError, ok := err.(dbus.Error); ok {
|
||||
if dbusError.Name == "org.freedesktop.DBus.Error.UnknownMethod" {
|
||||
hasStartTransientUnit = false
|
||||
return hasStartTransientUnit
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Ensure the scope name we use doesn't exist. Use the Pid to
|
||||
// avoid collisions between multiple libcontainer users on a
|
||||
// single host.
|
||||
scope := fmt.Sprintf("libcontainer-%d-systemd-test-default-dependencies.scope", os.Getpid())
|
||||
testScopeExists := true
|
||||
for i := 0; i <= testScopeWait; i++ {
|
||||
if _, err := theConn.StopUnit(scope, "replace", nil); err != nil {
|
||||
if dbusError, ok := err.(dbus.Error); ok {
|
||||
if strings.Contains(dbusError.Name, "org.freedesktop.systemd1.NoSuchUnit") {
|
||||
testScopeExists = false
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
time.Sleep(time.Millisecond)
|
||||
}
|
||||
|
||||
// Bail out if we can't kill this scope without testing for DefaultDependencies
|
||||
if testScopeExists {
|
||||
return hasStartTransientUnit
|
||||
}
|
||||
|
||||
// Assume StartTransientUnit on a scope allows DefaultDependencies
|
||||
hasTransientDefaultDependencies = true
|
||||
ddf := newProp("DefaultDependencies", false)
|
||||
if _, err := theConn.StartTransientUnit(scope, "replace", []systemdDbus.Property{ddf}, nil); err != nil {
|
||||
if dbusError, ok := err.(dbus.Error); ok {
|
||||
if strings.Contains(dbusError.Name, "org.freedesktop.DBus.Error.PropertyReadOnly") {
|
||||
hasTransientDefaultDependencies = false
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Not critical because of the stop unit logic above.
|
||||
theConn.StopUnit(scope, "replace", nil)
|
||||
}
|
||||
return hasStartTransientUnit
|
||||
}
|
||||
|
||||
func getIfaceForUnit(unitName string) string {
|
||||
if strings.HasSuffix(unitName, ".scope") {
|
||||
return "Scope"
|
||||
}
|
||||
if strings.HasSuffix(unitName, ".service") {
|
||||
return "Service"
|
||||
}
|
||||
return "Unit"
|
||||
}
|
||||
|
||||
func (m *Manager) Apply(pid int) error {
|
||||
var (
|
||||
c = m.Cgroups
|
||||
unitName = getUnitName(c)
|
||||
slice = "system.slice"
|
||||
properties []systemdDbus.Property
|
||||
)
|
||||
|
||||
if c.Paths != nil {
|
||||
paths := make(map[string]string)
|
||||
for name, path := range c.Paths {
|
||||
_, err := getSubsystemPath(m.Cgroups, name)
|
||||
if err != nil {
|
||||
// Don't fail if a cgroup hierarchy was not found, just skip this subsystem
|
||||
if cgroups.IsNotFound(err) {
|
||||
continue
|
||||
}
|
||||
return err
|
||||
}
|
||||
paths[name] = path
|
||||
}
|
||||
m.Paths = paths
|
||||
return cgroups.EnterPid(m.Paths, pid)
|
||||
}
|
||||
|
||||
if c.Parent != "" {
|
||||
slice = c.Parent
|
||||
}
|
||||
|
||||
properties = append(properties,
|
||||
systemdDbus.PropSlice(slice),
|
||||
systemdDbus.PropDescription("docker container "+c.Name),
|
||||
newProp("PIDs", []uint32{uint32(pid)}),
|
||||
)
|
||||
|
||||
// Always enable accounting, this gets us the same behaviour as the fs implementation,
|
||||
// plus the kernel has some problems with joining the memory cgroup at a later time.
|
||||
properties = append(properties,
|
||||
newProp("MemoryAccounting", true),
|
||||
newProp("CPUAccounting", true),
|
||||
newProp("BlockIOAccounting", true))
|
||||
|
||||
if hasTransientDefaultDependencies {
|
||||
properties = append(properties,
|
||||
newProp("DefaultDependencies", false))
|
||||
}
|
||||
|
||||
if c.Resources.Memory != 0 {
|
||||
properties = append(properties,
|
||||
newProp("MemoryLimit", uint64(c.Resources.Memory)))
|
||||
}
|
||||
|
||||
if c.Resources.CpuShares != 0 {
|
||||
properties = append(properties,
|
||||
newProp("CPUShares", uint64(c.Resources.CpuShares)))
|
||||
}
|
||||
|
||||
if c.Resources.BlkioWeight != 0 {
|
||||
properties = append(properties,
|
||||
newProp("BlockIOWeight", uint64(c.Resources.BlkioWeight)))
|
||||
}
|
||||
|
||||
// We need to set kernel memory before processes join cgroup because
|
||||
// kmem.limit_in_bytes can only be set when the cgroup is empty.
|
||||
// And swap memory limit needs to be set after memory limit, only
|
||||
// memory limit is handled by systemd, so it's kind of ugly here.
|
||||
if c.Resources.KernelMemory > 0 {
|
||||
if err := setKernelMemory(c); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
if _, err := theConn.StartTransientUnit(unitName, "replace", properties, nil); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := joinDevices(c, pid); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// TODO: CpuQuota and CpuPeriod not available in systemd
|
||||
// we need to manually join the cpu.cfs_quota_us and cpu.cfs_period_us
|
||||
if err := joinCpu(c, pid); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// TODO: MemoryReservation and MemorySwap not available in systemd
|
||||
if err := joinMemory(c, pid); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// we need to manually join the freezer, net_cls, net_prio, pids and cpuset cgroup in systemd
|
||||
// because it does not currently support it via the dbus api.
|
||||
if err := joinFreezer(c, pid); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := joinNetPrio(c, pid); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := joinNetCls(c, pid); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := joinPids(c, pid); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := joinCpuset(c, pid); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := joinHugetlb(c, pid); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := joinPerfEvent(c, pid); err != nil {
|
||||
return err
|
||||
}
|
||||
// FIXME: Systemd does have `BlockIODeviceWeight` property, but we got problem
|
||||
// using that (at least on systemd 208, see https://github.com/opencontainers/runc/libcontainer/pull/354),
|
||||
// so use fs work around for now.
|
||||
if err := joinBlkio(c, pid); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
paths := make(map[string]string)
|
||||
for _, s := range subsystems {
|
||||
subsystemPath, err := getSubsystemPath(m.Cgroups, s.Name())
|
||||
if err != nil {
|
||||
// Don't fail if a cgroup hierarchy was not found, just skip this subsystem
|
||||
if cgroups.IsNotFound(err) {
|
||||
continue
|
||||
}
|
||||
return err
|
||||
}
|
||||
paths[s.Name()] = subsystemPath
|
||||
}
|
||||
m.Paths = paths
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *Manager) Destroy() error {
|
||||
if m.Cgroups.Paths != nil {
|
||||
return nil
|
||||
}
|
||||
m.mu.Lock()
|
||||
defer m.mu.Unlock()
|
||||
theConn.StopUnit(getUnitName(m.Cgroups), "replace", nil)
|
||||
if err := cgroups.RemovePaths(m.Paths); err != nil {
|
||||
return err
|
||||
}
|
||||
m.Paths = make(map[string]string)
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *Manager) GetPaths() map[string]string {
|
||||
m.mu.Lock()
|
||||
paths := m.Paths
|
||||
m.mu.Unlock()
|
||||
return paths
|
||||
}
|
||||
|
||||
func writeFile(dir, file, data string) error {
|
||||
// Normally dir should not be empty, one case is that cgroup subsystem
|
||||
// is not mounted, we will get empty dir, and we want it fail here.
|
||||
if dir == "" {
|
||||
return fmt.Errorf("no such directory for %s.", file)
|
||||
}
|
||||
return ioutil.WriteFile(filepath.Join(dir, file), []byte(data), 0700)
|
||||
}
|
||||
|
||||
func join(c *configs.Cgroup, subsystem string, pid int) (string, error) {
|
||||
path, err := getSubsystemPath(c, subsystem)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
if err := os.MkdirAll(path, 0755); err != nil {
|
||||
return "", err
|
||||
}
|
||||
if err := writeFile(path, "cgroup.procs", strconv.Itoa(pid)); err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
return path, nil
|
||||
}
|
||||
|
||||
func joinCpu(c *configs.Cgroup, pid int) error {
|
||||
_, err := join(c, "cpu", pid)
|
||||
if err != nil && !cgroups.IsNotFound(err) {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func joinFreezer(c *configs.Cgroup, pid int) error {
|
||||
_, err := join(c, "freezer", pid)
|
||||
if err != nil && !cgroups.IsNotFound(err) {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func joinNetPrio(c *configs.Cgroup, pid int) error {
|
||||
_, err := join(c, "net_prio", pid)
|
||||
if err != nil && !cgroups.IsNotFound(err) {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func joinNetCls(c *configs.Cgroup, pid int) error {
|
||||
_, err := join(c, "net_cls", pid)
|
||||
if err != nil && !cgroups.IsNotFound(err) {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func joinPids(c *configs.Cgroup, pid int) error {
|
||||
_, err := join(c, "pids", pid)
|
||||
if err != nil && !cgroups.IsNotFound(err) {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// systemd represents slice heirarchy using `-`, so we need to follow suit when
|
||||
// generating the path of slice. Essentially, test-a-b.slice becomes
|
||||
// test.slice/test-a.slice/test-a-b.slice.
|
||||
func expandSlice(slice string) (string, error) {
|
||||
suffix := ".slice"
|
||||
// Name has to end with ".slice", but can't be just ".slice".
|
||||
if len(slice) < len(suffix) || !strings.HasSuffix(slice, suffix) {
|
||||
return "", fmt.Errorf("invalid slice name: %s", slice)
|
||||
}
|
||||
|
||||
// Path-separators are not allowed.
|
||||
if strings.Contains(slice, "/") {
|
||||
return "", fmt.Errorf("invalid slice name: %s", slice)
|
||||
}
|
||||
|
||||
var path, prefix string
|
||||
sliceName := strings.TrimSuffix(slice, suffix)
|
||||
for _, component := range strings.Split(sliceName, "-") {
|
||||
// test--a.slice isn't permitted, nor is -test.slice.
|
||||
if component == "" {
|
||||
return "", fmt.Errorf("invalid slice name: %s", slice)
|
||||
}
|
||||
|
||||
// Append the component to the path and to the prefix.
|
||||
path += prefix + component + suffix + "/"
|
||||
prefix += component + "-"
|
||||
}
|
||||
|
||||
return path, nil
|
||||
}
|
||||
|
||||
func getSubsystemPath(c *configs.Cgroup, subsystem string) (string, error) {
|
||||
mountpoint, err := cgroups.FindCgroupMountpoint(subsystem)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
initPath, err := cgroups.GetInitCgroupDir(subsystem)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
slice := "system.slice"
|
||||
if c.Parent != "" {
|
||||
slice = c.Parent
|
||||
}
|
||||
|
||||
slice, err = expandSlice(slice)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
return filepath.Join(mountpoint, initPath, slice, getUnitName(c)), nil
|
||||
}
|
||||
|
||||
func (m *Manager) Freeze(state configs.FreezerState) error {
|
||||
path, err := getSubsystemPath(m.Cgroups, "freezer")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
prevState := m.Cgroups.Resources.Freezer
|
||||
m.Cgroups.Resources.Freezer = state
|
||||
freezer, err := subsystems.Get("freezer")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
err = freezer.Set(path, m.Cgroups)
|
||||
if err != nil {
|
||||
m.Cgroups.Resources.Freezer = prevState
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *Manager) GetPids() ([]int, error) {
|
||||
path, err := getSubsystemPath(m.Cgroups, "devices")
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return cgroups.GetPids(path)
|
||||
}
|
||||
|
||||
func (m *Manager) GetAllPids() ([]int, error) {
|
||||
path, err := getSubsystemPath(m.Cgroups, "devices")
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return cgroups.GetAllPids(path)
|
||||
}
|
||||
|
||||
func (m *Manager) GetStats() (*cgroups.Stats, error) {
|
||||
m.mu.Lock()
|
||||
defer m.mu.Unlock()
|
||||
stats := cgroups.NewStats()
|
||||
for name, path := range m.Paths {
|
||||
sys, err := subsystems.Get(name)
|
||||
if err == errSubsystemDoesNotExist || !cgroups.PathExists(path) {
|
||||
continue
|
||||
}
|
||||
if err := sys.GetStats(path, stats); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
|
||||
return stats, nil
|
||||
}
|
||||
|
||||
func (m *Manager) Set(container *configs.Config) error {
|
||||
for _, sys := range subsystems {
|
||||
// Get the subsystem path, but don't error out for not found cgroups.
|
||||
path, err := getSubsystemPath(container.Cgroups, sys.Name())
|
||||
if err != nil && !cgroups.IsNotFound(err) {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := sys.Set(path, container.Cgroups); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
if m.Paths["cpu"] != "" {
|
||||
if err := fs.CheckCpushares(m.Paths["cpu"], container.Cgroups.Resources.CpuShares); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func getUnitName(c *configs.Cgroup) string {
|
||||
return fmt.Sprintf("%s-%s.scope", c.ScopePrefix, c.Name)
|
||||
}
|
||||
|
||||
// Atm we can't use the systemd device support because of two missing things:
|
||||
// * Support for wildcards to allow mknod on any device
|
||||
// * Support for wildcards to allow /dev/pts support
|
||||
//
|
||||
// The second is available in more recent systemd as "char-pts", but not in e.g. v208 which is
|
||||
// in wide use. When both these are available we will be able to switch, but need to keep the old
|
||||
// implementation for backwards compat.
|
||||
//
|
||||
// Note: we can't use systemd to set up the initial limits, and then change the cgroup
|
||||
// because systemd will re-write the device settings if it needs to re-apply the cgroup context.
|
||||
// This happens at least for v208 when any sibling unit is started.
|
||||
func joinDevices(c *configs.Cgroup, pid int) error {
|
||||
_, err := join(c, "devices", pid)
|
||||
// Even if it's `not found` error, we'll return err because devices cgroup
|
||||
// is hard requirement for container security.
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func setKernelMemory(c *configs.Cgroup) error {
|
||||
path, err := getSubsystemPath(c, "memory")
|
||||
if err != nil && !cgroups.IsNotFound(err) {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := os.MkdirAll(path, 0755); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// This doesn't get called by manager.Set, so we need to do it here.
|
||||
s := &fs.MemoryGroup{}
|
||||
return s.SetKernelMemory(path, c)
|
||||
}
|
||||
|
||||
func joinMemory(c *configs.Cgroup, pid int) error {
|
||||
_, err := join(c, "memory", pid)
|
||||
if err != nil && !cgroups.IsNotFound(err) {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// systemd does not atm set up the cpuset controller, so we must manually
|
||||
// join it. Additionally that is a very finicky controller where each
|
||||
// level must have a full setup as the default for a new directory is "no cpus"
|
||||
func joinCpuset(c *configs.Cgroup, pid int) error {
|
||||
path, err := getSubsystemPath(c, "cpuset")
|
||||
if err != nil && !cgroups.IsNotFound(err) {
|
||||
return err
|
||||
}
|
||||
|
||||
s := &fs.CpusetGroup{}
|
||||
|
||||
return s.ApplyDir(path, c, pid)
|
||||
}
|
||||
|
||||
// `BlockIODeviceWeight` property of systemd does not work properly, and systemd
|
||||
// expects device path instead of major minor numbers, which is also confusing
|
||||
// for users. So we use fs work around for now.
|
||||
func joinBlkio(c *configs.Cgroup, pid int) error {
|
||||
_, err := join(c, "blkio", pid)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func joinHugetlb(c *configs.Cgroup, pid int) error {
|
||||
_, err := join(c, "hugetlb", pid)
|
||||
if err != nil && !cgroups.IsNotFound(err) {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func joinPerfEvent(c *configs.Cgroup, pid int) error {
|
||||
_, err := join(c, "perf_event", pid)
|
||||
if err != nil && !cgroups.IsNotFound(err) {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
|
@ -1,10 +0,0 @@
|
|||
// +build linux,!go1.5
|
||||
|
||||
package libcontainer
|
||||
|
||||
import "syscall"
|
||||
|
||||
// GidMappingsEnableSetgroups was added in Go 1.5, so do nothing when building
|
||||
// with earlier versions
|
||||
func enableSetgroups(sys *syscall.SysProcAttr) {
|
||||
}
|
|
@ -1,138 +0,0 @@
|
|||
package validate
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
)
|
||||
|
||||
type Validator interface {
|
||||
Validate(*configs.Config) error
|
||||
}
|
||||
|
||||
func New() Validator {
|
||||
return &ConfigValidator{}
|
||||
}
|
||||
|
||||
type ConfigValidator struct {
|
||||
}
|
||||
|
||||
func (v *ConfigValidator) Validate(config *configs.Config) error {
|
||||
if err := v.rootfs(config); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := v.network(config); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := v.hostname(config); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := v.security(config); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := v.usernamespace(config); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := v.sysctl(config); err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// rootfs validates the the rootfs is an absolute path and is not a symlink
|
||||
// to the container's root filesystem.
|
||||
func (v *ConfigValidator) rootfs(config *configs.Config) error {
|
||||
cleaned, err := filepath.Abs(config.Rootfs)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if cleaned, err = filepath.EvalSymlinks(cleaned); err != nil {
|
||||
return err
|
||||
}
|
||||
if config.Rootfs != cleaned {
|
||||
return fmt.Errorf("%s is not an absolute path or is a symlink", config.Rootfs)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (v *ConfigValidator) network(config *configs.Config) error {
|
||||
if !config.Namespaces.Contains(configs.NEWNET) {
|
||||
if len(config.Networks) > 0 || len(config.Routes) > 0 {
|
||||
return fmt.Errorf("unable to apply network settings without a private NET namespace")
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (v *ConfigValidator) hostname(config *configs.Config) error {
|
||||
if config.Hostname != "" && !config.Namespaces.Contains(configs.NEWUTS) {
|
||||
return fmt.Errorf("unable to set hostname without a private UTS namespace")
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (v *ConfigValidator) security(config *configs.Config) error {
|
||||
// restrict sys without mount namespace
|
||||
if (len(config.MaskPaths) > 0 || len(config.ReadonlyPaths) > 0) &&
|
||||
!config.Namespaces.Contains(configs.NEWNS) {
|
||||
return fmt.Errorf("unable to restrict sys entries without a private MNT namespace")
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (v *ConfigValidator) usernamespace(config *configs.Config) error {
|
||||
if config.Namespaces.Contains(configs.NEWUSER) {
|
||||
if _, err := os.Stat("/proc/self/ns/user"); os.IsNotExist(err) {
|
||||
return fmt.Errorf("USER namespaces aren't enabled in the kernel")
|
||||
}
|
||||
} else {
|
||||
if config.UidMappings != nil || config.GidMappings != nil {
|
||||
return fmt.Errorf("User namespace mappings specified, but USER namespace isn't enabled in the config")
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// sysctl validates that the specified sysctl keys are valid or not.
|
||||
// /proc/sys isn't completely namespaced and depending on which namespaces
|
||||
// are specified, a subset of sysctls are permitted.
|
||||
func (v *ConfigValidator) sysctl(config *configs.Config) error {
|
||||
validSysctlPrefixes := []string{}
|
||||
validSysctlMap := make(map[string]bool)
|
||||
if config.Namespaces.Contains(configs.NEWNET) {
|
||||
validSysctlPrefixes = append(validSysctlPrefixes, "net.")
|
||||
}
|
||||
if config.Namespaces.Contains(configs.NEWIPC) {
|
||||
validSysctlPrefixes = append(validSysctlPrefixes, "fs.mqueue.")
|
||||
validSysctlMap = map[string]bool{
|
||||
"kernel.msgmax": true,
|
||||
"kernel.msgmnb": true,
|
||||
"kernel.msgmni": true,
|
||||
"kernel.sem": true,
|
||||
"kernel.shmall": true,
|
||||
"kernel.shmmax": true,
|
||||
"kernel.shmmni": true,
|
||||
"kernel.shm_rmid_forced": true,
|
||||
}
|
||||
}
|
||||
for s := range config.Sysctl {
|
||||
if validSysctlMap[s] {
|
||||
continue
|
||||
}
|
||||
valid := false
|
||||
for _, vp := range validSysctlPrefixes {
|
||||
if strings.HasPrefix(s, vp) {
|
||||
valid = true
|
||||
break
|
||||
}
|
||||
}
|
||||
if !valid {
|
||||
return fmt.Errorf("sysctl %q is not permitted in the config", s)
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
|
@ -1,15 +0,0 @@
|
|||
package libcontainer
|
||||
|
||||
import "io"
|
||||
|
||||
// Console represents a pseudo TTY.
|
||||
type Console interface {
|
||||
io.ReadWriter
|
||||
io.Closer
|
||||
|
||||
// Path returns the filesystem path to the slave side of the pty.
|
||||
Path() string
|
||||
|
||||
// Fd returns the fd for the master of the pty.
|
||||
Fd() uintptr
|
||||
}
|
|
@ -1,13 +0,0 @@
|
|||
// +build freebsd
|
||||
|
||||
package libcontainer
|
||||
|
||||
import (
|
||||
"errors"
|
||||
)
|
||||
|
||||
// NewConsole returns an initalized console that can be used within a container by copying bytes
|
||||
// from the master side to the slave that is attached as the tty for the container's init process.
|
||||
func NewConsole(uid, gid int) (Console, error) {
|
||||
return nil, errors.New("libcontainer console is not supported on FreeBSD")
|
||||
}
|
|
@ -1,145 +0,0 @@
|
|||
package libcontainer
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"syscall"
|
||||
"unsafe"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/label"
|
||||
)
|
||||
|
||||
// NewConsole returns an initalized console that can be used within a container by copying bytes
|
||||
// from the master side to the slave that is attached as the tty for the container's init process.
|
||||
func NewConsole(uid, gid int) (Console, error) {
|
||||
master, err := os.OpenFile("/dev/ptmx", syscall.O_RDWR|syscall.O_NOCTTY|syscall.O_CLOEXEC, 0)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
console, err := ptsname(master)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if err := unlockpt(master); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if err := os.Chmod(console, 0600); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if err := os.Chown(console, uid, gid); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return &linuxConsole{
|
||||
slavePath: console,
|
||||
master: master,
|
||||
}, nil
|
||||
}
|
||||
|
||||
// newConsoleFromPath is an internal function returning an initialized console for use inside
|
||||
// a container's MNT namespace.
|
||||
func newConsoleFromPath(slavePath string) *linuxConsole {
|
||||
return &linuxConsole{
|
||||
slavePath: slavePath,
|
||||
}
|
||||
}
|
||||
|
||||
// linuxConsole is a linux psuedo TTY for use within a container.
|
||||
type linuxConsole struct {
|
||||
master *os.File
|
||||
slavePath string
|
||||
}
|
||||
|
||||
func (c *linuxConsole) Fd() uintptr {
|
||||
return c.master.Fd()
|
||||
}
|
||||
|
||||
func (c *linuxConsole) Path() string {
|
||||
return c.slavePath
|
||||
}
|
||||
|
||||
func (c *linuxConsole) Read(b []byte) (int, error) {
|
||||
return c.master.Read(b)
|
||||
}
|
||||
|
||||
func (c *linuxConsole) Write(b []byte) (int, error) {
|
||||
return c.master.Write(b)
|
||||
}
|
||||
|
||||
func (c *linuxConsole) Close() error {
|
||||
if m := c.master; m != nil {
|
||||
return m.Close()
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// mount initializes the console inside the rootfs mounting with the specified mount label
|
||||
// and applying the correct ownership of the console.
|
||||
func (c *linuxConsole) mount(rootfs, mountLabel string) error {
|
||||
oldMask := syscall.Umask(0000)
|
||||
defer syscall.Umask(oldMask)
|
||||
if err := label.SetFileLabel(c.slavePath, mountLabel); err != nil {
|
||||
return err
|
||||
}
|
||||
dest := filepath.Join(rootfs, "/dev/console")
|
||||
f, err := os.Create(dest)
|
||||
if err != nil && !os.IsExist(err) {
|
||||
return err
|
||||
}
|
||||
if f != nil {
|
||||
f.Close()
|
||||
}
|
||||
return syscall.Mount(c.slavePath, dest, "bind", syscall.MS_BIND, "")
|
||||
}
|
||||
|
||||
// dupStdio opens the slavePath for the console and dups the fds to the current
|
||||
// processes stdio, fd 0,1,2.
|
||||
func (c *linuxConsole) dupStdio() error {
|
||||
slave, err := c.open(syscall.O_RDWR)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
fd := int(slave.Fd())
|
||||
for _, i := range []int{0, 1, 2} {
|
||||
if err := syscall.Dup3(fd, i, 0); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// open is a clone of os.OpenFile without the O_CLOEXEC used to open the pty slave.
|
||||
func (c *linuxConsole) open(flag int) (*os.File, error) {
|
||||
r, e := syscall.Open(c.slavePath, flag, 0)
|
||||
if e != nil {
|
||||
return nil, &os.PathError{
|
||||
Op: "open",
|
||||
Path: c.slavePath,
|
||||
Err: e,
|
||||
}
|
||||
}
|
||||
return os.NewFile(uintptr(r), c.slavePath), nil
|
||||
}
|
||||
|
||||
func ioctl(fd uintptr, flag, data uintptr) error {
|
||||
if _, _, err := syscall.Syscall(syscall.SYS_IOCTL, fd, flag, data); err != 0 {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// unlockpt unlocks the slave pseudoterminal device corresponding to the master pseudoterminal referred to by f.
|
||||
// unlockpt should be called before opening the slave side of a pty.
|
||||
func unlockpt(f *os.File) error {
|
||||
var u int32
|
||||
return ioctl(f.Fd(), syscall.TIOCSPTLCK, uintptr(unsafe.Pointer(&u)))
|
||||
}
|
||||
|
||||
// ptsname retrieves the name of the first available pts for the given master.
|
||||
func ptsname(f *os.File) (string, error) {
|
||||
var n int32
|
||||
if err := ioctl(f.Fd(), syscall.TIOCGPTN, uintptr(unsafe.Pointer(&n))); err != nil {
|
||||
return "", err
|
||||
}
|
||||
return fmt.Sprintf("/dev/pts/%d", n), nil
|
||||
}
|
|
@ -1,30 +0,0 @@
|
|||
package libcontainer
|
||||
|
||||
// NewConsole returns an initalized console that can be used within a container
|
||||
func NewConsole(uid, gid int) (Console, error) {
|
||||
return &windowsConsole{}, nil
|
||||
}
|
||||
|
||||
// windowsConsole is a Windows psuedo TTY for use within a container.
|
||||
type windowsConsole struct {
|
||||
}
|
||||
|
||||
func (c *windowsConsole) Fd() uintptr {
|
||||
return 0
|
||||
}
|
||||
|
||||
func (c *windowsConsole) Path() string {
|
||||
return ""
|
||||
}
|
||||
|
||||
func (c *windowsConsole) Read(b []byte) (int, error) {
|
||||
return 0, nil
|
||||
}
|
||||
|
||||
func (c *windowsConsole) Write(b []byte) (int, error) {
|
||||
return 0, nil
|
||||
}
|
||||
|
||||
func (c *windowsConsole) Close() error {
|
||||
return nil
|
||||
}
|
|
@ -1,144 +0,0 @@
|
|||
// Libcontainer provides a native Go implementation for creating containers
|
||||
// with namespaces, cgroups, capabilities, and filesystem access controls.
|
||||
// It allows you to manage the lifecycle of the container performing additional operations
|
||||
// after the container is created.
|
||||
package libcontainer
|
||||
|
||||
import (
|
||||
"os"
|
||||
"time"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
)
|
||||
|
||||
// The status of a container.
|
||||
type Status int
|
||||
|
||||
const (
|
||||
// The container exists but has not been run yet
|
||||
Created Status = iota
|
||||
|
||||
// The container exists and is running.
|
||||
Running
|
||||
|
||||
// The container exists, it is in the process of being paused.
|
||||
Pausing
|
||||
|
||||
// The container exists, but all its processes are paused.
|
||||
Paused
|
||||
|
||||
// The container does not exist.
|
||||
Destroyed
|
||||
)
|
||||
|
||||
func (s Status) String() string {
|
||||
switch s {
|
||||
case Created:
|
||||
return "created"
|
||||
case Running:
|
||||
return "running"
|
||||
case Pausing:
|
||||
return "pausing"
|
||||
case Paused:
|
||||
return "paused"
|
||||
case Destroyed:
|
||||
return "destroyed"
|
||||
default:
|
||||
return "unknown"
|
||||
}
|
||||
}
|
||||
|
||||
// BaseState represents the platform agnostic pieces relating to a
|
||||
// running container's state
|
||||
type BaseState struct {
|
||||
// ID is the container ID.
|
||||
ID string `json:"id"`
|
||||
|
||||
// InitProcessPid is the init process id in the parent namespace.
|
||||
InitProcessPid int `json:"init_process_pid"`
|
||||
|
||||
// InitProcessStartTime is the init process start time in clock cycles since boot time.
|
||||
InitProcessStartTime string `json:"init_process_start"`
|
||||
|
||||
// Created is the unix timestamp for the creation time of the container in UTC
|
||||
Created time.Time `json:"created"`
|
||||
|
||||
// Config is the container's configuration.
|
||||
Config configs.Config `json:"config"`
|
||||
}
|
||||
|
||||
// A libcontainer container object.
|
||||
//
|
||||
// Each container is thread-safe within the same process. Since a container can
|
||||
// be destroyed by a separate process, any function may return that the container
|
||||
// was not found. BaseContainer includes methods that are platform agnostic.
|
||||
type BaseContainer interface {
|
||||
// Returns the ID of the container
|
||||
ID() string
|
||||
|
||||
// Returns the current status of the container.
|
||||
//
|
||||
// errors:
|
||||
// ContainerDestroyed - Container no longer exists,
|
||||
// Systemerror - System error.
|
||||
Status() (Status, error)
|
||||
|
||||
// State returns the current container's state information.
|
||||
//
|
||||
// errors:
|
||||
// Systemerror - System error.
|
||||
State() (*State, error)
|
||||
|
||||
// Returns the current config of the container.
|
||||
Config() configs.Config
|
||||
|
||||
// Returns the PIDs inside this container. The PIDs are in the namespace of the calling process.
|
||||
//
|
||||
// errors:
|
||||
// ContainerDestroyed - Container no longer exists,
|
||||
// Systemerror - System error.
|
||||
//
|
||||
// Some of the returned PIDs may no longer refer to processes in the Container, unless
|
||||
// the Container state is PAUSED in which case every PID in the slice is valid.
|
||||
Processes() ([]int, error)
|
||||
|
||||
// Returns statistics for the container.
|
||||
//
|
||||
// errors:
|
||||
// ContainerDestroyed - Container no longer exists,
|
||||
// Systemerror - System error.
|
||||
Stats() (*Stats, error)
|
||||
|
||||
// Set resources of container as configured
|
||||
//
|
||||
// We can use this to change resources when containers are running.
|
||||
//
|
||||
// errors:
|
||||
// Systemerror - System error.
|
||||
Set(config configs.Config) error
|
||||
|
||||
// Start a process inside the container. Returns error if process fails to
|
||||
// start. You can track process lifecycle with passed Process structure.
|
||||
//
|
||||
// errors:
|
||||
// ContainerDestroyed - Container no longer exists,
|
||||
// ConfigInvalid - config is invalid,
|
||||
// ContainerPaused - Container is paused,
|
||||
// Systemerror - System error.
|
||||
Start(process *Process) (err error)
|
||||
|
||||
// Destroys the container after killing all running processes.
|
||||
//
|
||||
// Any event registrations are removed before the container is destroyed.
|
||||
// No error is returned if the container is already destroyed.
|
||||
//
|
||||
// errors:
|
||||
// Systemerror - System error.
|
||||
Destroy() error
|
||||
|
||||
// Signal sends the provided signal code to the container's initial process.
|
||||
//
|
||||
// errors:
|
||||
// Systemerror - System error.
|
||||
Signal(s os.Signal) error
|
||||
}
|
File diff suppressed because it is too large
Load diff
|
@ -1,20 +0,0 @@
|
|||
package libcontainer
|
||||
|
||||
// State represents a running container's state
|
||||
type State struct {
|
||||
BaseState
|
||||
|
||||
// Platform specific fields below here
|
||||
}
|
||||
|
||||
// A libcontainer container object.
|
||||
//
|
||||
// Each container is thread-safe within the same process. Since a container can
|
||||
// be destroyed by a separate process, any function may return that the container
|
||||
// was not found.
|
||||
type Container interface {
|
||||
BaseContainer
|
||||
|
||||
// Methods below here are platform specific
|
||||
|
||||
}
|
|
@ -1,36 +0,0 @@
|
|||
// +build linux freebsd
|
||||
|
||||
package libcontainer
|
||||
|
||||
// cgroup restoring strategy provided by criu
|
||||
type cg_mode uint32
|
||||
|
||||
const (
|
||||
CRIU_CG_MODE_SOFT cg_mode = 3 + iota // restore cgroup properties if only dir created by criu
|
||||
CRIU_CG_MODE_FULL // always restore all cgroups and their properties
|
||||
CRIU_CG_MODE_STRICT // restore all, requiring them to not present in the system
|
||||
CRIU_CG_MODE_DEFAULT // the same as CRIU_CG_MODE_SOFT
|
||||
)
|
||||
|
||||
type CriuPageServerInfo struct {
|
||||
Address string // IP address of CRIU page server
|
||||
Port int32 // port number of CRIU page server
|
||||
}
|
||||
|
||||
type VethPairName struct {
|
||||
ContainerInterfaceName string
|
||||
HostInterfaceName string
|
||||
}
|
||||
|
||||
type CriuOpts struct {
|
||||
ImagesDirectory string // directory for storing image files
|
||||
WorkDirectory string // directory to cd and write logs/pidfiles/stats to
|
||||
LeaveRunning bool // leave container in running state after checkpoint
|
||||
TcpEstablished bool // checkpoint/restore established TCP connections
|
||||
ExternalUnixConnections bool // allow external unix connections
|
||||
ShellJob bool // allow to dump and restore shell jobs
|
||||
FileLocks bool // handle file locks, for safety
|
||||
PageServer CriuPageServerInfo // allow to dump to criu page server
|
||||
VethPairs []VethPairName // pass the veth to criu when restore
|
||||
ManageCgroupsMode cg_mode // dump or restore cgroup mode
|
||||
}
|
|
@ -1,6 +0,0 @@
|
|||
package libcontainer
|
||||
|
||||
// TODO Windows: This can ultimately be entirely factored out as criu is
|
||||
// a Unix concept not relevant on Windows.
|
||||
type CriuOpts struct {
|
||||
}
|
|
@ -1,2 +0,0 @@
|
|||
gen: criurpc.proto
|
||||
protoc --go_out=. criurpc.proto
|
|
@ -1,689 +0,0 @@
|
|||
// Code generated by protoc-gen-go.
|
||||
// source: criurpc.proto
|
||||
// DO NOT EDIT!
|
||||
|
||||
/*
|
||||
Package criurpc is a generated protocol buffer package.
|
||||
|
||||
It is generated from these files:
|
||||
criurpc.proto
|
||||
|
||||
It has these top-level messages:
|
||||
CriuPageServerInfo
|
||||
CriuVethPair
|
||||
ExtMountMap
|
||||
InheritFd
|
||||
CgroupRoot
|
||||
UnixSk
|
||||
CriuOpts
|
||||
CriuDumpResp
|
||||
CriuRestoreResp
|
||||
CriuNotify
|
||||
CriuReq
|
||||
CriuResp
|
||||
*/
|
||||
package criurpc
|
||||
|
||||
import proto "github.com/golang/protobuf/proto"
|
||||
import math "math"
|
||||
|
||||
// Reference imports to suppress errors if they are not otherwise used.
|
||||
var _ = proto.Marshal
|
||||
var _ = math.Inf
|
||||
|
||||
type CriuReqType int32
|
||||
|
||||
const (
|
||||
CriuReqType_EMPTY CriuReqType = 0
|
||||
CriuReqType_DUMP CriuReqType = 1
|
||||
CriuReqType_RESTORE CriuReqType = 2
|
||||
CriuReqType_CHECK CriuReqType = 3
|
||||
CriuReqType_PRE_DUMP CriuReqType = 4
|
||||
CriuReqType_PAGE_SERVER CriuReqType = 5
|
||||
CriuReqType_NOTIFY CriuReqType = 6
|
||||
CriuReqType_CPUINFO_DUMP CriuReqType = 7
|
||||
CriuReqType_CPUINFO_CHECK CriuReqType = 8
|
||||
)
|
||||
|
||||
var CriuReqType_name = map[int32]string{
|
||||
0: "EMPTY",
|
||||
1: "DUMP",
|
||||
2: "RESTORE",
|
||||
3: "CHECK",
|
||||
4: "PRE_DUMP",
|
||||
5: "PAGE_SERVER",
|
||||
6: "NOTIFY",
|
||||
7: "CPUINFO_DUMP",
|
||||
8: "CPUINFO_CHECK",
|
||||
}
|
||||
var CriuReqType_value = map[string]int32{
|
||||
"EMPTY": 0,
|
||||
"DUMP": 1,
|
||||
"RESTORE": 2,
|
||||
"CHECK": 3,
|
||||
"PRE_DUMP": 4,
|
||||
"PAGE_SERVER": 5,
|
||||
"NOTIFY": 6,
|
||||
"CPUINFO_DUMP": 7,
|
||||
"CPUINFO_CHECK": 8,
|
||||
}
|
||||
|
||||
func (x CriuReqType) Enum() *CriuReqType {
|
||||
p := new(CriuReqType)
|
||||
*p = x
|
||||
return p
|
||||
}
|
||||
func (x CriuReqType) String() string {
|
||||
return proto.EnumName(CriuReqType_name, int32(x))
|
||||
}
|
||||
func (x *CriuReqType) UnmarshalJSON(data []byte) error {
|
||||
value, err := proto.UnmarshalJSONEnum(CriuReqType_value, data, "CriuReqType")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
*x = CriuReqType(value)
|
||||
return nil
|
||||
}
|
||||
|
||||
type CriuPageServerInfo struct {
|
||||
Address *string `protobuf:"bytes,1,opt,name=address" json:"address,omitempty"`
|
||||
Port *int32 `protobuf:"varint,2,opt,name=port" json:"port,omitempty"`
|
||||
Pid *int32 `protobuf:"varint,3,opt,name=pid" json:"pid,omitempty"`
|
||||
Fd *int32 `protobuf:"varint,4,opt,name=fd" json:"fd,omitempty"`
|
||||
XXX_unrecognized []byte `json:"-"`
|
||||
}
|
||||
|
||||
func (m *CriuPageServerInfo) Reset() { *m = CriuPageServerInfo{} }
|
||||
func (m *CriuPageServerInfo) String() string { return proto.CompactTextString(m) }
|
||||
func (*CriuPageServerInfo) ProtoMessage() {}
|
||||
|
||||
func (m *CriuPageServerInfo) GetAddress() string {
|
||||
if m != nil && m.Address != nil {
|
||||
return *m.Address
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
func (m *CriuPageServerInfo) GetPort() int32 {
|
||||
if m != nil && m.Port != nil {
|
||||
return *m.Port
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
func (m *CriuPageServerInfo) GetPid() int32 {
|
||||
if m != nil && m.Pid != nil {
|
||||
return *m.Pid
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
func (m *CriuPageServerInfo) GetFd() int32 {
|
||||
if m != nil && m.Fd != nil {
|
||||
return *m.Fd
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
type CriuVethPair struct {
|
||||
IfIn *string `protobuf:"bytes,1,req,name=if_in" json:"if_in,omitempty"`
|
||||
IfOut *string `protobuf:"bytes,2,req,name=if_out" json:"if_out,omitempty"`
|
||||
XXX_unrecognized []byte `json:"-"`
|
||||
}
|
||||
|
||||
func (m *CriuVethPair) Reset() { *m = CriuVethPair{} }
|
||||
func (m *CriuVethPair) String() string { return proto.CompactTextString(m) }
|
||||
func (*CriuVethPair) ProtoMessage() {}
|
||||
|
||||
func (m *CriuVethPair) GetIfIn() string {
|
||||
if m != nil && m.IfIn != nil {
|
||||
return *m.IfIn
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
func (m *CriuVethPair) GetIfOut() string {
|
||||
if m != nil && m.IfOut != nil {
|
||||
return *m.IfOut
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
type ExtMountMap struct {
|
||||
Key *string `protobuf:"bytes,1,req,name=key" json:"key,omitempty"`
|
||||
Val *string `protobuf:"bytes,2,req,name=val" json:"val,omitempty"`
|
||||
XXX_unrecognized []byte `json:"-"`
|
||||
}
|
||||
|
||||
func (m *ExtMountMap) Reset() { *m = ExtMountMap{} }
|
||||
func (m *ExtMountMap) String() string { return proto.CompactTextString(m) }
|
||||
func (*ExtMountMap) ProtoMessage() {}
|
||||
|
||||
func (m *ExtMountMap) GetKey() string {
|
||||
if m != nil && m.Key != nil {
|
||||
return *m.Key
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
func (m *ExtMountMap) GetVal() string {
|
||||
if m != nil && m.Val != nil {
|
||||
return *m.Val
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
type InheritFd struct {
|
||||
Key *string `protobuf:"bytes,1,req,name=key" json:"key,omitempty"`
|
||||
Fd *int32 `protobuf:"varint,2,req,name=fd" json:"fd,omitempty"`
|
||||
XXX_unrecognized []byte `json:"-"`
|
||||
}
|
||||
|
||||
func (m *InheritFd) Reset() { *m = InheritFd{} }
|
||||
func (m *InheritFd) String() string { return proto.CompactTextString(m) }
|
||||
func (*InheritFd) ProtoMessage() {}
|
||||
|
||||
func (m *InheritFd) GetKey() string {
|
||||
if m != nil && m.Key != nil {
|
||||
return *m.Key
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
func (m *InheritFd) GetFd() int32 {
|
||||
if m != nil && m.Fd != nil {
|
||||
return *m.Fd
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
type CgroupRoot struct {
|
||||
Ctrl *string `protobuf:"bytes,1,opt,name=ctrl" json:"ctrl,omitempty"`
|
||||
Path *string `protobuf:"bytes,2,req,name=path" json:"path,omitempty"`
|
||||
XXX_unrecognized []byte `json:"-"`
|
||||
}
|
||||
|
||||
func (m *CgroupRoot) Reset() { *m = CgroupRoot{} }
|
||||
func (m *CgroupRoot) String() string { return proto.CompactTextString(m) }
|
||||
func (*CgroupRoot) ProtoMessage() {}
|
||||
|
||||
func (m *CgroupRoot) GetCtrl() string {
|
||||
if m != nil && m.Ctrl != nil {
|
||||
return *m.Ctrl
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
func (m *CgroupRoot) GetPath() string {
|
||||
if m != nil && m.Path != nil {
|
||||
return *m.Path
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
type UnixSk struct {
|
||||
Inode *uint32 `protobuf:"varint,1,req,name=inode" json:"inode,omitempty"`
|
||||
XXX_unrecognized []byte `json:"-"`
|
||||
}
|
||||
|
||||
func (m *UnixSk) Reset() { *m = UnixSk{} }
|
||||
func (m *UnixSk) String() string { return proto.CompactTextString(m) }
|
||||
func (*UnixSk) ProtoMessage() {}
|
||||
|
||||
func (m *UnixSk) GetInode() uint32 {
|
||||
if m != nil && m.Inode != nil {
|
||||
return *m.Inode
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
type CriuOpts struct {
|
||||
ImagesDirFd *int32 `protobuf:"varint,1,req,name=images_dir_fd" json:"images_dir_fd,omitempty"`
|
||||
Pid *int32 `protobuf:"varint,2,opt,name=pid" json:"pid,omitempty"`
|
||||
LeaveRunning *bool `protobuf:"varint,3,opt,name=leave_running" json:"leave_running,omitempty"`
|
||||
ExtUnixSk *bool `protobuf:"varint,4,opt,name=ext_unix_sk" json:"ext_unix_sk,omitempty"`
|
||||
TcpEstablished *bool `protobuf:"varint,5,opt,name=tcp_established" json:"tcp_established,omitempty"`
|
||||
EvasiveDevices *bool `protobuf:"varint,6,opt,name=evasive_devices" json:"evasive_devices,omitempty"`
|
||||
ShellJob *bool `protobuf:"varint,7,opt,name=shell_job" json:"shell_job,omitempty"`
|
||||
FileLocks *bool `protobuf:"varint,8,opt,name=file_locks" json:"file_locks,omitempty"`
|
||||
LogLevel *int32 `protobuf:"varint,9,opt,name=log_level,def=2" json:"log_level,omitempty"`
|
||||
LogFile *string `protobuf:"bytes,10,opt,name=log_file" json:"log_file,omitempty"`
|
||||
Ps *CriuPageServerInfo `protobuf:"bytes,11,opt,name=ps" json:"ps,omitempty"`
|
||||
NotifyScripts *bool `protobuf:"varint,12,opt,name=notify_scripts" json:"notify_scripts,omitempty"`
|
||||
Root *string `protobuf:"bytes,13,opt,name=root" json:"root,omitempty"`
|
||||
ParentImg *string `protobuf:"bytes,14,opt,name=parent_img" json:"parent_img,omitempty"`
|
||||
TrackMem *bool `protobuf:"varint,15,opt,name=track_mem" json:"track_mem,omitempty"`
|
||||
AutoDedup *bool `protobuf:"varint,16,opt,name=auto_dedup" json:"auto_dedup,omitempty"`
|
||||
WorkDirFd *int32 `protobuf:"varint,17,opt,name=work_dir_fd" json:"work_dir_fd,omitempty"`
|
||||
LinkRemap *bool `protobuf:"varint,18,opt,name=link_remap" json:"link_remap,omitempty"`
|
||||
Veths []*CriuVethPair `protobuf:"bytes,19,rep,name=veths" json:"veths,omitempty"`
|
||||
CpuCap *uint32 `protobuf:"varint,20,opt,name=cpu_cap,def=4294967295" json:"cpu_cap,omitempty"`
|
||||
ForceIrmap *bool `protobuf:"varint,21,opt,name=force_irmap" json:"force_irmap,omitempty"`
|
||||
ExecCmd []string `protobuf:"bytes,22,rep,name=exec_cmd" json:"exec_cmd,omitempty"`
|
||||
ExtMnt []*ExtMountMap `protobuf:"bytes,23,rep,name=ext_mnt" json:"ext_mnt,omitempty"`
|
||||
ManageCgroups *bool `protobuf:"varint,24,opt,name=manage_cgroups" json:"manage_cgroups,omitempty"`
|
||||
CgRoot []*CgroupRoot `protobuf:"bytes,25,rep,name=cg_root" json:"cg_root,omitempty"`
|
||||
RstSibling *bool `protobuf:"varint,26,opt,name=rst_sibling" json:"rst_sibling,omitempty"`
|
||||
InheritFd []*InheritFd `protobuf:"bytes,27,rep,name=inherit_fd" json:"inherit_fd,omitempty"`
|
||||
AutoExtMnt *bool `protobuf:"varint,28,opt,name=auto_ext_mnt" json:"auto_ext_mnt,omitempty"`
|
||||
ExtSharing *bool `protobuf:"varint,29,opt,name=ext_sharing" json:"ext_sharing,omitempty"`
|
||||
ExtMasters *bool `protobuf:"varint,30,opt,name=ext_masters" json:"ext_masters,omitempty"`
|
||||
SkipMnt []string `protobuf:"bytes,31,rep,name=skip_mnt" json:"skip_mnt,omitempty"`
|
||||
EnableFs []string `protobuf:"bytes,32,rep,name=enable_fs" json:"enable_fs,omitempty"`
|
||||
UnixSkIno []*UnixSk `protobuf:"bytes,33,rep,name=unix_sk_ino" json:"unix_sk_ino,omitempty"`
|
||||
ManageCgroupsMode *uint32 `protobuf:"varint,34,opt,name=manage_cgroups_mode" json:"manage_cgroups_mode,omitempty"`
|
||||
XXX_unrecognized []byte `json:"-"`
|
||||
}
|
||||
|
||||
func (m *CriuOpts) Reset() { *m = CriuOpts{} }
|
||||
func (m *CriuOpts) String() string { return proto.CompactTextString(m) }
|
||||
func (*CriuOpts) ProtoMessage() {}
|
||||
|
||||
const Default_CriuOpts_LogLevel int32 = 2
|
||||
const Default_CriuOpts_CpuCap uint32 = 4294967295
|
||||
|
||||
func (m *CriuOpts) GetImagesDirFd() int32 {
|
||||
if m != nil && m.ImagesDirFd != nil {
|
||||
return *m.ImagesDirFd
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
func (m *CriuOpts) GetPid() int32 {
|
||||
if m != nil && m.Pid != nil {
|
||||
return *m.Pid
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
func (m *CriuOpts) GetLeaveRunning() bool {
|
||||
if m != nil && m.LeaveRunning != nil {
|
||||
return *m.LeaveRunning
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func (m *CriuOpts) GetExtUnixSk() bool {
|
||||
if m != nil && m.ExtUnixSk != nil {
|
||||
return *m.ExtUnixSk
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func (m *CriuOpts) GetTcpEstablished() bool {
|
||||
if m != nil && m.TcpEstablished != nil {
|
||||
return *m.TcpEstablished
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func (m *CriuOpts) GetEvasiveDevices() bool {
|
||||
if m != nil && m.EvasiveDevices != nil {
|
||||
return *m.EvasiveDevices
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func (m *CriuOpts) GetShellJob() bool {
|
||||
if m != nil && m.ShellJob != nil {
|
||||
return *m.ShellJob
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func (m *CriuOpts) GetFileLocks() bool {
|
||||
if m != nil && m.FileLocks != nil {
|
||||
return *m.FileLocks
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func (m *CriuOpts) GetLogLevel() int32 {
|
||||
if m != nil && m.LogLevel != nil {
|
||||
return *m.LogLevel
|
||||
}
|
||||
return Default_CriuOpts_LogLevel
|
||||
}
|
||||
|
||||
func (m *CriuOpts) GetLogFile() string {
|
||||
if m != nil && m.LogFile != nil {
|
||||
return *m.LogFile
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
func (m *CriuOpts) GetPs() *CriuPageServerInfo {
|
||||
if m != nil {
|
||||
return m.Ps
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *CriuOpts) GetNotifyScripts() bool {
|
||||
if m != nil && m.NotifyScripts != nil {
|
||||
return *m.NotifyScripts
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func (m *CriuOpts) GetRoot() string {
|
||||
if m != nil && m.Root != nil {
|
||||
return *m.Root
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
func (m *CriuOpts) GetParentImg() string {
|
||||
if m != nil && m.ParentImg != nil {
|
||||
return *m.ParentImg
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
func (m *CriuOpts) GetTrackMem() bool {
|
||||
if m != nil && m.TrackMem != nil {
|
||||
return *m.TrackMem
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func (m *CriuOpts) GetAutoDedup() bool {
|
||||
if m != nil && m.AutoDedup != nil {
|
||||
return *m.AutoDedup
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func (m *CriuOpts) GetWorkDirFd() int32 {
|
||||
if m != nil && m.WorkDirFd != nil {
|
||||
return *m.WorkDirFd
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
func (m *CriuOpts) GetLinkRemap() bool {
|
||||
if m != nil && m.LinkRemap != nil {
|
||||
return *m.LinkRemap
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func (m *CriuOpts) GetVeths() []*CriuVethPair {
|
||||
if m != nil {
|
||||
return m.Veths
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *CriuOpts) GetCpuCap() uint32 {
|
||||
if m != nil && m.CpuCap != nil {
|
||||
return *m.CpuCap
|
||||
}
|
||||
return Default_CriuOpts_CpuCap
|
||||
}
|
||||
|
||||
func (m *CriuOpts) GetForceIrmap() bool {
|
||||
if m != nil && m.ForceIrmap != nil {
|
||||
return *m.ForceIrmap
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func (m *CriuOpts) GetExecCmd() []string {
|
||||
if m != nil {
|
||||
return m.ExecCmd
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *CriuOpts) GetExtMnt() []*ExtMountMap {
|
||||
if m != nil {
|
||||
return m.ExtMnt
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *CriuOpts) GetManageCgroups() bool {
|
||||
if m != nil && m.ManageCgroups != nil {
|
||||
return *m.ManageCgroups
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func (m *CriuOpts) GetCgRoot() []*CgroupRoot {
|
||||
if m != nil {
|
||||
return m.CgRoot
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *CriuOpts) GetRstSibling() bool {
|
||||
if m != nil && m.RstSibling != nil {
|
||||
return *m.RstSibling
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func (m *CriuOpts) GetInheritFd() []*InheritFd {
|
||||
if m != nil {
|
||||
return m.InheritFd
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *CriuOpts) GetAutoExtMnt() bool {
|
||||
if m != nil && m.AutoExtMnt != nil {
|
||||
return *m.AutoExtMnt
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func (m *CriuOpts) GetExtSharing() bool {
|
||||
if m != nil && m.ExtSharing != nil {
|
||||
return *m.ExtSharing
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func (m *CriuOpts) GetExtMasters() bool {
|
||||
if m != nil && m.ExtMasters != nil {
|
||||
return *m.ExtMasters
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func (m *CriuOpts) GetSkipMnt() []string {
|
||||
if m != nil {
|
||||
return m.SkipMnt
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *CriuOpts) GetEnableFs() []string {
|
||||
if m != nil {
|
||||
return m.EnableFs
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *CriuOpts) GetUnixSkIno() []*UnixSk {
|
||||
if m != nil {
|
||||
return m.UnixSkIno
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *CriuOpts) GetManageCgroupsMode() uint32 {
|
||||
if m != nil && m.ManageCgroupsMode != nil {
|
||||
return *m.ManageCgroupsMode
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
type CriuDumpResp struct {
|
||||
Restored *bool `protobuf:"varint,1,opt,name=restored" json:"restored,omitempty"`
|
||||
XXX_unrecognized []byte `json:"-"`
|
||||
}
|
||||
|
||||
func (m *CriuDumpResp) Reset() { *m = CriuDumpResp{} }
|
||||
func (m *CriuDumpResp) String() string { return proto.CompactTextString(m) }
|
||||
func (*CriuDumpResp) ProtoMessage() {}
|
||||
|
||||
func (m *CriuDumpResp) GetRestored() bool {
|
||||
if m != nil && m.Restored != nil {
|
||||
return *m.Restored
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
type CriuRestoreResp struct {
|
||||
Pid *int32 `protobuf:"varint,1,req,name=pid" json:"pid,omitempty"`
|
||||
XXX_unrecognized []byte `json:"-"`
|
||||
}
|
||||
|
||||
func (m *CriuRestoreResp) Reset() { *m = CriuRestoreResp{} }
|
||||
func (m *CriuRestoreResp) String() string { return proto.CompactTextString(m) }
|
||||
func (*CriuRestoreResp) ProtoMessage() {}
|
||||
|
||||
func (m *CriuRestoreResp) GetPid() int32 {
|
||||
if m != nil && m.Pid != nil {
|
||||
return *m.Pid
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
type CriuNotify struct {
|
||||
Script *string `protobuf:"bytes,1,opt,name=script" json:"script,omitempty"`
|
||||
Pid *int32 `protobuf:"varint,2,opt,name=pid" json:"pid,omitempty"`
|
||||
XXX_unrecognized []byte `json:"-"`
|
||||
}
|
||||
|
||||
func (m *CriuNotify) Reset() { *m = CriuNotify{} }
|
||||
func (m *CriuNotify) String() string { return proto.CompactTextString(m) }
|
||||
func (*CriuNotify) ProtoMessage() {}
|
||||
|
||||
func (m *CriuNotify) GetScript() string {
|
||||
if m != nil && m.Script != nil {
|
||||
return *m.Script
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
func (m *CriuNotify) GetPid() int32 {
|
||||
if m != nil && m.Pid != nil {
|
||||
return *m.Pid
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
type CriuReq struct {
|
||||
Type *CriuReqType `protobuf:"varint,1,req,name=type,enum=CriuReqType" json:"type,omitempty"`
|
||||
Opts *CriuOpts `protobuf:"bytes,2,opt,name=opts" json:"opts,omitempty"`
|
||||
NotifySuccess *bool `protobuf:"varint,3,opt,name=notify_success" json:"notify_success,omitempty"`
|
||||
//
|
||||
// When set service won't close the connection but
|
||||
// will wait for more req-s to appear. Works not
|
||||
// for all request types.
|
||||
KeepOpen *bool `protobuf:"varint,4,opt,name=keep_open" json:"keep_open,omitempty"`
|
||||
XXX_unrecognized []byte `json:"-"`
|
||||
}
|
||||
|
||||
func (m *CriuReq) Reset() { *m = CriuReq{} }
|
||||
func (m *CriuReq) String() string { return proto.CompactTextString(m) }
|
||||
func (*CriuReq) ProtoMessage() {}
|
||||
|
||||
func (m *CriuReq) GetType() CriuReqType {
|
||||
if m != nil && m.Type != nil {
|
||||
return *m.Type
|
||||
}
|
||||
return CriuReqType_EMPTY
|
||||
}
|
||||
|
||||
func (m *CriuReq) GetOpts() *CriuOpts {
|
||||
if m != nil {
|
||||
return m.Opts
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *CriuReq) GetNotifySuccess() bool {
|
||||
if m != nil && m.NotifySuccess != nil {
|
||||
return *m.NotifySuccess
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func (m *CriuReq) GetKeepOpen() bool {
|
||||
if m != nil && m.KeepOpen != nil {
|
||||
return *m.KeepOpen
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
type CriuResp struct {
|
||||
Type *CriuReqType `protobuf:"varint,1,req,name=type,enum=CriuReqType" json:"type,omitempty"`
|
||||
Success *bool `protobuf:"varint,2,req,name=success" json:"success,omitempty"`
|
||||
Dump *CriuDumpResp `protobuf:"bytes,3,opt,name=dump" json:"dump,omitempty"`
|
||||
Restore *CriuRestoreResp `protobuf:"bytes,4,opt,name=restore" json:"restore,omitempty"`
|
||||
Notify *CriuNotify `protobuf:"bytes,5,opt,name=notify" json:"notify,omitempty"`
|
||||
Ps *CriuPageServerInfo `protobuf:"bytes,6,opt,name=ps" json:"ps,omitempty"`
|
||||
CrErrno *int32 `protobuf:"varint,7,opt,name=cr_errno" json:"cr_errno,omitempty"`
|
||||
XXX_unrecognized []byte `json:"-"`
|
||||
}
|
||||
|
||||
func (m *CriuResp) Reset() { *m = CriuResp{} }
|
||||
func (m *CriuResp) String() string { return proto.CompactTextString(m) }
|
||||
func (*CriuResp) ProtoMessage() {}
|
||||
|
||||
func (m *CriuResp) GetType() CriuReqType {
|
||||
if m != nil && m.Type != nil {
|
||||
return *m.Type
|
||||
}
|
||||
return CriuReqType_EMPTY
|
||||
}
|
||||
|
||||
func (m *CriuResp) GetSuccess() bool {
|
||||
if m != nil && m.Success != nil {
|
||||
return *m.Success
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func (m *CriuResp) GetDump() *CriuDumpResp {
|
||||
if m != nil {
|
||||
return m.Dump
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *CriuResp) GetRestore() *CriuRestoreResp {
|
||||
if m != nil {
|
||||
return m.Restore
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *CriuResp) GetNotify() *CriuNotify {
|
||||
if m != nil {
|
||||
return m.Notify
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *CriuResp) GetPs() *CriuPageServerInfo {
|
||||
if m != nil {
|
||||
return m.Ps
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *CriuResp) GetCrErrno() int32 {
|
||||
if m != nil && m.CrErrno != nil {
|
||||
return *m.CrErrno
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
func init() {
|
||||
proto.RegisterEnum("CriuReqType", CriuReqType_name, CriuReqType_value)
|
||||
}
|
|
@ -1,142 +0,0 @@
|
|||
message criu_page_server_info {
|
||||
optional string address = 1;
|
||||
optional int32 port = 2;
|
||||
optional int32 pid = 3;
|
||||
optional int32 fd = 4;
|
||||
}
|
||||
|
||||
message criu_veth_pair {
|
||||
required string if_in = 1;
|
||||
required string if_out = 2;
|
||||
};
|
||||
|
||||
message ext_mount_map {
|
||||
required string key = 1;
|
||||
required string val = 2;
|
||||
};
|
||||
|
||||
message inherit_fd {
|
||||
required string key = 1;
|
||||
required int32 fd = 2;
|
||||
};
|
||||
|
||||
message cgroup_root {
|
||||
optional string ctrl = 1;
|
||||
required string path = 2;
|
||||
};
|
||||
|
||||
message unix_sk {
|
||||
required uint32 inode = 1;
|
||||
};
|
||||
|
||||
message criu_opts {
|
||||
required int32 images_dir_fd = 1;
|
||||
optional int32 pid = 2; /* if not set on dump, will dump requesting process */
|
||||
|
||||
optional bool leave_running = 3;
|
||||
optional bool ext_unix_sk = 4;
|
||||
optional bool tcp_established = 5;
|
||||
optional bool evasive_devices = 6;
|
||||
optional bool shell_job = 7;
|
||||
optional bool file_locks = 8;
|
||||
optional int32 log_level = 9 [default = 2];
|
||||
optional string log_file = 10; /* No subdirs are allowed. Consider using work-dir */
|
||||
|
||||
optional criu_page_server_info ps = 11;
|
||||
|
||||
optional bool notify_scripts = 12;
|
||||
|
||||
optional string root = 13;
|
||||
optional string parent_img = 14;
|
||||
optional bool track_mem = 15;
|
||||
optional bool auto_dedup = 16;
|
||||
|
||||
optional int32 work_dir_fd = 17;
|
||||
optional bool link_remap = 18;
|
||||
repeated criu_veth_pair veths = 19;
|
||||
|
||||
optional uint32 cpu_cap = 20 [default = 0xffffffff];
|
||||
optional bool force_irmap = 21;
|
||||
repeated string exec_cmd = 22;
|
||||
|
||||
repeated ext_mount_map ext_mnt = 23;
|
||||
optional bool manage_cgroups = 24; /* backward compatibility */
|
||||
repeated cgroup_root cg_root = 25;
|
||||
|
||||
optional bool rst_sibling = 26; /* swrk only */
|
||||
repeated inherit_fd inherit_fd = 27; /* swrk only */
|
||||
|
||||
optional bool auto_ext_mnt = 28;
|
||||
optional bool ext_sharing = 29;
|
||||
optional bool ext_masters = 30;
|
||||
|
||||
repeated string skip_mnt = 31;
|
||||
repeated string enable_fs = 32;
|
||||
|
||||
repeated unix_sk unix_sk_ino = 33;
|
||||
|
||||
optional uint32 manage_cgroups_mode = 34;
|
||||
}
|
||||
|
||||
message criu_dump_resp {
|
||||
optional bool restored = 1;
|
||||
}
|
||||
|
||||
message criu_restore_resp {
|
||||
required int32 pid = 1;
|
||||
}
|
||||
|
||||
message criu_notify {
|
||||
optional string script = 1;
|
||||
optional int32 pid = 2;
|
||||
}
|
||||
|
||||
enum criu_req_type {
|
||||
EMPTY = 0;
|
||||
DUMP = 1;
|
||||
RESTORE = 2;
|
||||
CHECK = 3;
|
||||
PRE_DUMP = 4;
|
||||
PAGE_SERVER = 5;
|
||||
|
||||
NOTIFY = 6;
|
||||
|
||||
CPUINFO_DUMP = 7;
|
||||
CPUINFO_CHECK = 8;
|
||||
}
|
||||
|
||||
/*
|
||||
* Request -- each type corresponds to must-be-there
|
||||
* request arguments of respective type
|
||||
*/
|
||||
|
||||
message criu_req {
|
||||
required criu_req_type type = 1;
|
||||
|
||||
optional criu_opts opts = 2;
|
||||
optional bool notify_success = 3;
|
||||
|
||||
/*
|
||||
* When set service won't close the connection but
|
||||
* will wait for more req-s to appear. Works not
|
||||
* for all request types.
|
||||
*/
|
||||
optional bool keep_open = 4;
|
||||
}
|
||||
|
||||
/*
|
||||
* Responce -- it states whether the request was served
|
||||
* and additional request-specific informarion
|
||||
*/
|
||||
|
||||
message criu_resp {
|
||||
required criu_req_type type = 1;
|
||||
required bool success = 2;
|
||||
|
||||
optional criu_dump_resp dump = 3;
|
||||
optional criu_restore_resp restore = 4;
|
||||
optional criu_notify notify = 5;
|
||||
optional criu_page_server_info ps = 6;
|
||||
|
||||
optional int32 cr_errno = 7;
|
||||
}
|
|
@ -1,70 +0,0 @@
|
|||
package libcontainer
|
||||
|
||||
import "io"
|
||||
|
||||
// API error code type.
|
||||
type ErrorCode int
|
||||
|
||||
// API error codes.
|
||||
const (
|
||||
// Factory errors
|
||||
IdInUse ErrorCode = iota
|
||||
InvalidIdFormat
|
||||
|
||||
// Container errors
|
||||
ContainerNotExists
|
||||
ContainerPaused
|
||||
ContainerNotStopped
|
||||
ContainerNotRunning
|
||||
ContainerNotPaused
|
||||
|
||||
// Process errors
|
||||
NoProcessOps
|
||||
|
||||
// Common errors
|
||||
ConfigInvalid
|
||||
ConsoleExists
|
||||
SystemError
|
||||
)
|
||||
|
||||
func (c ErrorCode) String() string {
|
||||
switch c {
|
||||
case IdInUse:
|
||||
return "Id already in use"
|
||||
case InvalidIdFormat:
|
||||
return "Invalid format"
|
||||
case ContainerPaused:
|
||||
return "Container paused"
|
||||
case ConfigInvalid:
|
||||
return "Invalid configuration"
|
||||
case SystemError:
|
||||
return "System error"
|
||||
case ContainerNotExists:
|
||||
return "Container does not exist"
|
||||
case ContainerNotStopped:
|
||||
return "Container is not stopped"
|
||||
case ContainerNotRunning:
|
||||
return "Container is not running"
|
||||
case ConsoleExists:
|
||||
return "Console exists for process"
|
||||
case ContainerNotPaused:
|
||||
return "Container is not paused"
|
||||
case NoProcessOps:
|
||||
return "No process operations"
|
||||
default:
|
||||
return "Unknown error"
|
||||
}
|
||||
}
|
||||
|
||||
// API Error type.
|
||||
type Error interface {
|
||||
error
|
||||
|
||||
// Returns a verbose string including the error message
|
||||
// and a representation of the stack trace suitable for
|
||||
// printing.
|
||||
Detail(w io.Writer) error
|
||||
|
||||
// Returns the error code for this error.
|
||||
Code() ErrorCode
|
||||
}
|
|
@ -1,45 +0,0 @@
|
|||
package libcontainer
|
||||
|
||||
import (
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
)
|
||||
|
||||
type Factory interface {
|
||||
// Creates a new container with the given id and starts the initial process inside it.
|
||||
// id must be a string containing only letters, digits and underscores and must contain
|
||||
// between 1 and 1024 characters, inclusive.
|
||||
//
|
||||
// The id must not already be in use by an existing container. Containers created using
|
||||
// a factory with the same path (and file system) must have distinct ids.
|
||||
//
|
||||
// Returns the new container with a running process.
|
||||
//
|
||||
// errors:
|
||||
// IdInUse - id is already in use by a container
|
||||
// InvalidIdFormat - id has incorrect format
|
||||
// ConfigInvalid - config is invalid
|
||||
// Systemerror - System error
|
||||
//
|
||||
// On error, any partially created container parts are cleaned up (the operation is atomic).
|
||||
Create(id string, config *configs.Config) (Container, error)
|
||||
|
||||
// Load takes an ID for an existing container and returns the container information
|
||||
// from the state. This presents a read only view of the container.
|
||||
//
|
||||
// errors:
|
||||
// Path does not exist
|
||||
// Container is stopped
|
||||
// System error
|
||||
Load(id string) (Container, error)
|
||||
|
||||
// StartInitialization is an internal API to libcontainer used during the reexec of the
|
||||
// container.
|
||||
//
|
||||
// Errors:
|
||||
// Pipe connection error
|
||||
// System error
|
||||
StartInitialization() error
|
||||
|
||||
// Type returns info string about factory type (e.g. lxc, libcontainer...)
|
||||
Type() string
|
||||
}
|
|
@ -1,295 +0,0 @@
|
|||
// +build linux
|
||||
|
||||
package libcontainer
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"os"
|
||||
"os/exec"
|
||||
"path/filepath"
|
||||
"regexp"
|
||||
"strconv"
|
||||
"syscall"
|
||||
|
||||
"github.com/docker/docker/pkg/mount"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups/fs"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups/systemd"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
"github.com/opencontainers/runc/libcontainer/configs/validate"
|
||||
"github.com/opencontainers/runc/libcontainer/utils"
|
||||
)
|
||||
|
||||
const (
|
||||
stateFilename = "state.json"
|
||||
)
|
||||
|
||||
var (
|
||||
idRegex = regexp.MustCompile(`^[\w_-]+$`)
|
||||
maxIdLen = 1024
|
||||
)
|
||||
|
||||
// InitArgs returns an options func to configure a LinuxFactory with the
|
||||
// provided init arguments.
|
||||
func InitArgs(args ...string) func(*LinuxFactory) error {
|
||||
return func(l *LinuxFactory) error {
|
||||
name := args[0]
|
||||
if filepath.Base(name) == name {
|
||||
if lp, err := exec.LookPath(name); err == nil {
|
||||
name = lp
|
||||
}
|
||||
} else {
|
||||
abs, err := filepath.Abs(name)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
name = abs
|
||||
}
|
||||
l.InitPath = "/proc/self/exe"
|
||||
l.InitArgs = append([]string{name}, args[1:]...)
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
// InitPath returns an options func to configure a LinuxFactory with the
|
||||
// provided absolute path to the init binary and arguements.
|
||||
func InitPath(path string, args ...string) func(*LinuxFactory) error {
|
||||
return func(l *LinuxFactory) error {
|
||||
l.InitPath = path
|
||||
l.InitArgs = args
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
// SystemdCgroups is an options func to configure a LinuxFactory to return
|
||||
// containers that use systemd to create and manage cgroups.
|
||||
func SystemdCgroups(l *LinuxFactory) error {
|
||||
l.NewCgroupsManager = func(config *configs.Cgroup, paths map[string]string) cgroups.Manager {
|
||||
return &systemd.Manager{
|
||||
Cgroups: config,
|
||||
Paths: paths,
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Cgroupfs is an options func to configure a LinuxFactory to return
|
||||
// containers that use the native cgroups filesystem implementation to
|
||||
// create and manage cgroups.
|
||||
func Cgroupfs(l *LinuxFactory) error {
|
||||
l.NewCgroupsManager = func(config *configs.Cgroup, paths map[string]string) cgroups.Manager {
|
||||
return &fs.Manager{
|
||||
Cgroups: config,
|
||||
Paths: paths,
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// TmpfsRoot is an option func to mount LinuxFactory.Root to tmpfs.
|
||||
func TmpfsRoot(l *LinuxFactory) error {
|
||||
mounted, err := mount.Mounted(l.Root)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if !mounted {
|
||||
if err := syscall.Mount("tmpfs", l.Root, "tmpfs", 0, ""); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// New returns a linux based container factory based in the root directory and
|
||||
// configures the factory with the provided option funcs.
|
||||
func New(root string, options ...func(*LinuxFactory) error) (Factory, error) {
|
||||
if root != "" {
|
||||
if err := os.MkdirAll(root, 0700); err != nil {
|
||||
return nil, newGenericError(err, SystemError)
|
||||
}
|
||||
}
|
||||
l := &LinuxFactory{
|
||||
Root: root,
|
||||
Validator: validate.New(),
|
||||
CriuPath: "criu",
|
||||
}
|
||||
InitArgs(os.Args[0], "init")(l)
|
||||
Cgroupfs(l)
|
||||
for _, opt := range options {
|
||||
if err := opt(l); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
return l, nil
|
||||
}
|
||||
|
||||
// LinuxFactory implements the default factory interface for linux based systems.
|
||||
type LinuxFactory struct {
|
||||
// Root directory for the factory to store state.
|
||||
Root string
|
||||
|
||||
// InitPath is the absolute path to the init binary.
|
||||
InitPath string
|
||||
|
||||
// InitArgs are arguments for calling the init responsibilities for spawning
|
||||
// a container.
|
||||
InitArgs []string
|
||||
|
||||
// CriuPath is the path to the criu binary used for checkpoint and restore of
|
||||
// containers.
|
||||
CriuPath string
|
||||
|
||||
// Validator provides validation to container configurations.
|
||||
Validator validate.Validator
|
||||
|
||||
// NewCgroupsManager returns an initialized cgroups manager for a single container.
|
||||
NewCgroupsManager func(config *configs.Cgroup, paths map[string]string) cgroups.Manager
|
||||
}
|
||||
|
||||
func (l *LinuxFactory) Create(id string, config *configs.Config) (Container, error) {
|
||||
if l.Root == "" {
|
||||
return nil, newGenericError(fmt.Errorf("invalid root"), ConfigInvalid)
|
||||
}
|
||||
if err := l.validateID(id); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if err := l.Validator.Validate(config); err != nil {
|
||||
return nil, newGenericError(err, ConfigInvalid)
|
||||
}
|
||||
containerRoot := filepath.Join(l.Root, id)
|
||||
if _, err := os.Stat(containerRoot); err == nil {
|
||||
return nil, newGenericError(fmt.Errorf("container with id exists: %v", id), IdInUse)
|
||||
} else if !os.IsNotExist(err) {
|
||||
return nil, newGenericError(err, SystemError)
|
||||
}
|
||||
if err := os.MkdirAll(containerRoot, 0700); err != nil {
|
||||
return nil, newGenericError(err, SystemError)
|
||||
}
|
||||
c := &linuxContainer{
|
||||
id: id,
|
||||
root: containerRoot,
|
||||
config: config,
|
||||
initPath: l.InitPath,
|
||||
initArgs: l.InitArgs,
|
||||
criuPath: l.CriuPath,
|
||||
cgroupManager: l.NewCgroupsManager(config.Cgroups, nil),
|
||||
}
|
||||
c.state = &stoppedState{c: c}
|
||||
return c, nil
|
||||
}
|
||||
|
||||
func (l *LinuxFactory) Load(id string) (Container, error) {
|
||||
if l.Root == "" {
|
||||
return nil, newGenericError(fmt.Errorf("invalid root"), ConfigInvalid)
|
||||
}
|
||||
containerRoot := filepath.Join(l.Root, id)
|
||||
state, err := l.loadState(containerRoot)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
r := &nonChildProcess{
|
||||
processPid: state.InitProcessPid,
|
||||
processStartTime: state.InitProcessStartTime,
|
||||
fds: state.ExternalDescriptors,
|
||||
}
|
||||
c := &linuxContainer{
|
||||
initProcess: r,
|
||||
id: id,
|
||||
config: &state.Config,
|
||||
initPath: l.InitPath,
|
||||
initArgs: l.InitArgs,
|
||||
criuPath: l.CriuPath,
|
||||
cgroupManager: l.NewCgroupsManager(state.Config.Cgroups, state.CgroupPaths),
|
||||
root: containerRoot,
|
||||
created: state.Created,
|
||||
}
|
||||
c.state = &createdState{c: c, s: Created}
|
||||
if err := c.refreshState(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return c, nil
|
||||
}
|
||||
|
||||
func (l *LinuxFactory) Type() string {
|
||||
return "libcontainer"
|
||||
}
|
||||
|
||||
// StartInitialization loads a container by opening the pipe fd from the parent to read the configuration and state
|
||||
// This is a low level implementation detail of the reexec and should not be consumed externally
|
||||
func (l *LinuxFactory) StartInitialization() (err error) {
|
||||
fdStr := os.Getenv("_LIBCONTAINER_INITPIPE")
|
||||
pipefd, err := strconv.Atoi(fdStr)
|
||||
if err != nil {
|
||||
return fmt.Errorf("error converting env var _LIBCONTAINER_INITPIPE(%q) to an int: %s", fdStr, err)
|
||||
}
|
||||
var (
|
||||
pipe = os.NewFile(uintptr(pipefd), "pipe")
|
||||
it = initType(os.Getenv("_LIBCONTAINER_INITTYPE"))
|
||||
)
|
||||
defer pipe.Close()
|
||||
// clear the current process's environment to clean any libcontainer
|
||||
// specific env vars.
|
||||
os.Clearenv()
|
||||
i, err := newContainerInit(it, pipe)
|
||||
if err != nil {
|
||||
l.sendError(nil, pipe, err)
|
||||
return err
|
||||
}
|
||||
if err := i.Init(); err != nil {
|
||||
if !isExecError(err) {
|
||||
l.sendError(i, pipe, err)
|
||||
}
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (l *LinuxFactory) sendError(i initer, pipe *os.File, err error) {
|
||||
// We have an error during the initialization of the container's init,
|
||||
// send it back to the parent process in the form of an initError.
|
||||
// If container's init successed, syscall.Exec will not return, hence
|
||||
// this defer function will never be called.
|
||||
if i != nil {
|
||||
if _, ok := i.(*linuxStandardInit); ok {
|
||||
// Synchronisation only necessary for standard init.
|
||||
if err := utils.WriteJSON(pipe, syncT{procError}); err != nil {
|
||||
panic(err)
|
||||
}
|
||||
}
|
||||
}
|
||||
if err := utils.WriteJSON(pipe, newSystemError(err)); err != nil {
|
||||
panic(err)
|
||||
}
|
||||
}
|
||||
|
||||
func (l *LinuxFactory) loadState(root string) (*State, error) {
|
||||
f, err := os.Open(filepath.Join(root, stateFilename))
|
||||
if err != nil {
|
||||
if os.IsNotExist(err) {
|
||||
return nil, newGenericError(err, ContainerNotExists)
|
||||
}
|
||||
return nil, newGenericError(err, SystemError)
|
||||
}
|
||||
defer f.Close()
|
||||
var state *State
|
||||
if err := json.NewDecoder(f).Decode(&state); err != nil {
|
||||
return nil, newGenericError(err, SystemError)
|
||||
}
|
||||
return state, nil
|
||||
}
|
||||
|
||||
func (l *LinuxFactory) validateID(id string) error {
|
||||
if !idRegex.MatchString(id) {
|
||||
return newGenericError(fmt.Errorf("invalid id format: %v", id), InvalidIdFormat)
|
||||
}
|
||||
if len(id) > maxIdLen {
|
||||
return newGenericError(fmt.Errorf("invalid id format: %v", id), InvalidIdFormat)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func isExecError(err error) bool {
|
||||
_, ok := err.(*exec.Error)
|
||||
return ok
|
||||
}
|
|
@ -1,88 +0,0 @@
|
|||
package libcontainer
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io"
|
||||
"text/template"
|
||||
"time"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/stacktrace"
|
||||
)
|
||||
|
||||
type syncType uint8
|
||||
|
||||
const (
|
||||
procReady syncType = iota
|
||||
procError
|
||||
procRun
|
||||
procHooks
|
||||
procResume
|
||||
)
|
||||
|
||||
type syncT struct {
|
||||
Type syncType `json:"type"`
|
||||
}
|
||||
|
||||
var errorTemplate = template.Must(template.New("error").Parse(`Timestamp: {{.Timestamp}}
|
||||
Code: {{.ECode}}
|
||||
{{if .Message }}
|
||||
Message: {{.Message}}
|
||||
{{end}}
|
||||
Frames:{{range $i, $frame := .Stack.Frames}}
|
||||
---
|
||||
{{$i}}: {{$frame.Function}}
|
||||
Package: {{$frame.Package}}
|
||||
File: {{$frame.File}}@{{$frame.Line}}{{end}}
|
||||
`))
|
||||
|
||||
func newGenericError(err error, c ErrorCode) Error {
|
||||
if le, ok := err.(Error); ok {
|
||||
return le
|
||||
}
|
||||
gerr := &genericError{
|
||||
Timestamp: time.Now(),
|
||||
Err: err,
|
||||
ECode: c,
|
||||
Stack: stacktrace.Capture(1),
|
||||
}
|
||||
if err != nil {
|
||||
gerr.Message = err.Error()
|
||||
}
|
||||
return gerr
|
||||
}
|
||||
|
||||
func newSystemError(err error) Error {
|
||||
if le, ok := err.(Error); ok {
|
||||
return le
|
||||
}
|
||||
gerr := &genericError{
|
||||
Timestamp: time.Now(),
|
||||
Err: err,
|
||||
ECode: SystemError,
|
||||
Stack: stacktrace.Capture(1),
|
||||
}
|
||||
if err != nil {
|
||||
gerr.Message = err.Error()
|
||||
}
|
||||
return gerr
|
||||
}
|
||||
|
||||
type genericError struct {
|
||||
Timestamp time.Time
|
||||
ECode ErrorCode
|
||||
Err error `json:"-"`
|
||||
Message string
|
||||
Stack stacktrace.Stacktrace
|
||||
}
|
||||
|
||||
func (e *genericError) Error() string {
|
||||
return fmt.Sprintf("[%d] %s: %s", e.ECode, e.ECode, e.Message)
|
||||
}
|
||||
|
||||
func (e *genericError) Code() ErrorCode {
|
||||
return e.ECode
|
||||
}
|
||||
|
||||
func (e *genericError) Detail(w io.Writer) error {
|
||||
return errorTemplate.Execute(w, e)
|
||||
}
|
|
@ -1,364 +0,0 @@
|
|||
// +build linux
|
||||
|
||||
package libcontainer
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"io/ioutil"
|
||||
"net"
|
||||
"os"
|
||||
"strconv"
|
||||
"strings"
|
||||
"syscall"
|
||||
|
||||
"github.com/Sirupsen/logrus"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
"github.com/opencontainers/runc/libcontainer/system"
|
||||
"github.com/opencontainers/runc/libcontainer/user"
|
||||
"github.com/opencontainers/runc/libcontainer/utils"
|
||||
"github.com/vishvananda/netlink"
|
||||
)
|
||||
|
||||
type initType string
|
||||
|
||||
const (
|
||||
initSetns initType = "setns"
|
||||
initStandard initType = "standard"
|
||||
)
|
||||
|
||||
type pid struct {
|
||||
Pid int `json:"pid"`
|
||||
}
|
||||
|
||||
// network is an internal struct used to setup container networks.
|
||||
type network struct {
|
||||
configs.Network
|
||||
|
||||
// TempVethPeerName is a unique temporary veth peer name that was placed into
|
||||
// the container's namespace.
|
||||
TempVethPeerName string `json:"temp_veth_peer_name"`
|
||||
}
|
||||
|
||||
// initConfig is used for transferring parameters from Exec() to Init()
|
||||
type initConfig struct {
|
||||
Args []string `json:"args"`
|
||||
Env []string `json:"env"`
|
||||
Cwd string `json:"cwd"`
|
||||
Capabilities []string `json:"capabilities"`
|
||||
ProcessLabel string `json:"process_label"`
|
||||
AppArmorProfile string `json:"apparmor_profile"`
|
||||
NoNewPrivileges bool `json:"no_new_privileges"`
|
||||
User string `json:"user"`
|
||||
Config *configs.Config `json:"config"`
|
||||
Console string `json:"console"`
|
||||
Networks []*network `json:"network"`
|
||||
PassedFilesCount int `json:"passed_files_count"`
|
||||
ContainerId string `json:"containerid"`
|
||||
}
|
||||
|
||||
type initer interface {
|
||||
Init() error
|
||||
}
|
||||
|
||||
func newContainerInit(t initType, pipe *os.File) (initer, error) {
|
||||
var config *initConfig
|
||||
if err := json.NewDecoder(pipe).Decode(&config); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if err := populateProcessEnvironment(config.Env); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
switch t {
|
||||
case initSetns:
|
||||
return &linuxSetnsInit{
|
||||
config: config,
|
||||
}, nil
|
||||
case initStandard:
|
||||
return &linuxStandardInit{
|
||||
pipe: pipe,
|
||||
parentPid: syscall.Getppid(),
|
||||
config: config,
|
||||
}, nil
|
||||
}
|
||||
return nil, fmt.Errorf("unknown init type %q", t)
|
||||
}
|
||||
|
||||
// populateProcessEnvironment loads the provided environment variables into the
|
||||
// current processes's environment.
|
||||
func populateProcessEnvironment(env []string) error {
|
||||
for _, pair := range env {
|
||||
p := strings.SplitN(pair, "=", 2)
|
||||
if len(p) < 2 {
|
||||
return fmt.Errorf("invalid environment '%v'", pair)
|
||||
}
|
||||
if err := os.Setenv(p[0], p[1]); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// finalizeNamespace drops the caps, sets the correct user
|
||||
// and working dir, and closes any leaked file descriptors
|
||||
// before executing the command inside the namespace
|
||||
func finalizeNamespace(config *initConfig) error {
|
||||
// Ensure that all unwanted fds we may have accidentally
|
||||
// inherited are marked close-on-exec so they stay out of the
|
||||
// container
|
||||
if err := utils.CloseExecFrom(config.PassedFilesCount + 3); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
capabilities := config.Config.Capabilities
|
||||
if config.Capabilities != nil {
|
||||
capabilities = config.Capabilities
|
||||
}
|
||||
w, err := newCapWhitelist(capabilities)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
// drop capabilities in bounding set before changing user
|
||||
if err := w.dropBoundingSet(); err != nil {
|
||||
return err
|
||||
}
|
||||
// preserve existing capabilities while we change users
|
||||
if err := system.SetKeepCaps(); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := setupUser(config); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := system.ClearKeepCaps(); err != nil {
|
||||
return err
|
||||
}
|
||||
// drop all other capabilities
|
||||
if err := w.drop(); err != nil {
|
||||
return err
|
||||
}
|
||||
if config.Cwd != "" {
|
||||
if err := syscall.Chdir(config.Cwd); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// syncParentReady sends to the given pipe a JSON payload which indicates that
|
||||
// the init is ready to Exec the child process. It then waits for the parent to
|
||||
// indicate that it is cleared to Exec.
|
||||
func syncParentReady(pipe io.ReadWriter) error {
|
||||
// Tell parent.
|
||||
if err := utils.WriteJSON(pipe, syncT{procReady}); err != nil {
|
||||
return err
|
||||
}
|
||||
// Wait for parent to give the all-clear.
|
||||
var procSync syncT
|
||||
if err := json.NewDecoder(pipe).Decode(&procSync); err != nil {
|
||||
if err == io.EOF {
|
||||
return fmt.Errorf("parent closed synchronisation channel")
|
||||
}
|
||||
if procSync.Type != procRun {
|
||||
return fmt.Errorf("invalid synchronisation flag from parent")
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// syncParentHooks sends to the given pipe a JSON payload which indicates that
|
||||
// the parent should execute pre-start hooks. It then waits for the parent to
|
||||
// indicate that it is cleared to resume.
|
||||
func syncParentHooks(pipe io.ReadWriter) error {
|
||||
// Tell parent.
|
||||
if err := utils.WriteJSON(pipe, syncT{procHooks}); err != nil {
|
||||
return err
|
||||
}
|
||||
// Wait for parent to give the all-clear.
|
||||
var procSync syncT
|
||||
if err := json.NewDecoder(pipe).Decode(&procSync); err != nil {
|
||||
if err == io.EOF {
|
||||
return fmt.Errorf("parent closed synchronisation channel")
|
||||
}
|
||||
if procSync.Type != procResume {
|
||||
return fmt.Errorf("invalid synchronisation flag from parent")
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// setupUser changes the groups, gid, and uid for the user inside the container
|
||||
func setupUser(config *initConfig) error {
|
||||
// Set up defaults.
|
||||
defaultExecUser := user.ExecUser{
|
||||
Uid: syscall.Getuid(),
|
||||
Gid: syscall.Getgid(),
|
||||
Home: "/",
|
||||
}
|
||||
passwdPath, err := user.GetPasswdPath()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
groupPath, err := user.GetGroupPath()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
execUser, err := user.GetExecUserPath(config.User, &defaultExecUser, passwdPath, groupPath)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
var addGroups []int
|
||||
if len(config.Config.AdditionalGroups) > 0 {
|
||||
addGroups, err = user.GetAdditionalGroupsPath(config.Config.AdditionalGroups, groupPath)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
// before we change to the container's user make sure that the processes STDIO
|
||||
// is correctly owned by the user that we are switching to.
|
||||
if err := fixStdioPermissions(execUser); err != nil {
|
||||
return err
|
||||
}
|
||||
suppGroups := append(execUser.Sgids, addGroups...)
|
||||
if err := syscall.Setgroups(suppGroups); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := system.Setgid(execUser.Gid); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := system.Setuid(execUser.Uid); err != nil {
|
||||
return err
|
||||
}
|
||||
// if we didn't get HOME already, set it based on the user's HOME
|
||||
if envHome := os.Getenv("HOME"); envHome == "" {
|
||||
if err := os.Setenv("HOME", execUser.Home); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// fixStdioPermissions fixes the permissions of PID 1's STDIO within the container to the specified user.
|
||||
// The ownership needs to match because it is created outside of the container and needs to be
|
||||
// localized.
|
||||
func fixStdioPermissions(u *user.ExecUser) error {
|
||||
var null syscall.Stat_t
|
||||
if err := syscall.Stat("/dev/null", &null); err != nil {
|
||||
return err
|
||||
}
|
||||
for _, fd := range []uintptr{
|
||||
os.Stdin.Fd(),
|
||||
os.Stderr.Fd(),
|
||||
os.Stdout.Fd(),
|
||||
} {
|
||||
var s syscall.Stat_t
|
||||
if err := syscall.Fstat(int(fd), &s); err != nil {
|
||||
return err
|
||||
}
|
||||
// skip chown of /dev/null if it was used as one of the STDIO fds.
|
||||
if s.Rdev == null.Rdev {
|
||||
continue
|
||||
}
|
||||
if err := syscall.Fchown(int(fd), u.Uid, u.Gid); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// setupNetwork sets up and initializes any network interface inside the container.
|
||||
func setupNetwork(config *initConfig) error {
|
||||
for _, config := range config.Networks {
|
||||
strategy, err := getStrategy(config.Type)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if err := strategy.initialize(config); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func setupRoute(config *configs.Config) error {
|
||||
for _, config := range config.Routes {
|
||||
_, dst, err := net.ParseCIDR(config.Destination)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
src := net.ParseIP(config.Source)
|
||||
if src == nil {
|
||||
return fmt.Errorf("Invalid source for route: %s", config.Source)
|
||||
}
|
||||
gw := net.ParseIP(config.Gateway)
|
||||
if gw == nil {
|
||||
return fmt.Errorf("Invalid gateway for route: %s", config.Gateway)
|
||||
}
|
||||
l, err := netlink.LinkByName(config.InterfaceName)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
route := &netlink.Route{
|
||||
Scope: netlink.SCOPE_UNIVERSE,
|
||||
Dst: dst,
|
||||
Src: src,
|
||||
Gw: gw,
|
||||
LinkIndex: l.Attrs().Index,
|
||||
}
|
||||
if err := netlink.RouteAdd(route); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func setupRlimits(config *configs.Config) error {
|
||||
for _, rlimit := range config.Rlimits {
|
||||
l := &syscall.Rlimit{Max: rlimit.Hard, Cur: rlimit.Soft}
|
||||
if err := syscall.Setrlimit(rlimit.Type, l); err != nil {
|
||||
return fmt.Errorf("error setting rlimit type %v: %v", rlimit.Type, err)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func setOomScoreAdj(oomScoreAdj int, pid int) error {
|
||||
path := fmt.Sprintf("/proc/%d/oom_score_adj", pid)
|
||||
|
||||
return ioutil.WriteFile(path, []byte(strconv.Itoa(oomScoreAdj)), 0600)
|
||||
}
|
||||
|
||||
// killCgroupProcesses freezes then iterates over all the processes inside the
|
||||
// manager's cgroups sending a SIGKILL to each process then waiting for them to
|
||||
// exit.
|
||||
func killCgroupProcesses(m cgroups.Manager) error {
|
||||
var procs []*os.Process
|
||||
if err := m.Freeze(configs.Frozen); err != nil {
|
||||
logrus.Warn(err)
|
||||
}
|
||||
pids, err := m.GetAllPids()
|
||||
if err != nil {
|
||||
m.Freeze(configs.Thawed)
|
||||
return err
|
||||
}
|
||||
for _, pid := range pids {
|
||||
if p, err := os.FindProcess(pid); err == nil {
|
||||
procs = append(procs, p)
|
||||
if err := p.Kill(); err != nil {
|
||||
logrus.Warn(err)
|
||||
}
|
||||
}
|
||||
}
|
||||
if err := m.Freeze(configs.Thawed); err != nil {
|
||||
logrus.Warn(err)
|
||||
}
|
||||
for _, p := range procs {
|
||||
if _, err := p.Wait(); err != nil {
|
||||
logrus.Warn(err)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
|
@ -1,67 +0,0 @@
|
|||
// +build linux
|
||||
|
||||
package keyctl
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"syscall"
|
||||
"strings"
|
||||
"strconv"
|
||||
"unsafe"
|
||||
)
|
||||
|
||||
const KEYCTL_JOIN_SESSION_KEYRING = 1
|
||||
const KEYCTL_SETPERM = 5
|
||||
const KEYCTL_DESCRIBE = 6
|
||||
|
||||
type KeySerial uint32
|
||||
|
||||
func JoinSessionKeyring(name string) (KeySerial, error) {
|
||||
var _name *byte = nil
|
||||
var err error
|
||||
|
||||
if len(name) > 0 {
|
||||
_name, err = syscall.BytePtrFromString(name)
|
||||
if err != nil {
|
||||
return KeySerial(0), err
|
||||
}
|
||||
}
|
||||
|
||||
sessKeyId, _, errn := syscall.Syscall(syscall.SYS_KEYCTL, KEYCTL_JOIN_SESSION_KEYRING, uintptr(unsafe.Pointer(_name)), 0)
|
||||
if errn != 0 {
|
||||
return 0, fmt.Errorf("could not create session key: %v", errn)
|
||||
}
|
||||
return KeySerial(sessKeyId), nil
|
||||
}
|
||||
|
||||
// modify permissions on a keyring by reading the current permissions,
|
||||
// anding the bits with the given mask (clearing permissions) and setting
|
||||
// additional permission bits
|
||||
func ModKeyringPerm(ringId KeySerial, mask, setbits uint32) error {
|
||||
dest := make([]byte, 1024)
|
||||
destBytes := unsafe.Pointer(&dest[0])
|
||||
|
||||
if _, _, err := syscall.Syscall6(syscall.SYS_KEYCTL, uintptr(KEYCTL_DESCRIBE), uintptr(ringId), uintptr(destBytes), uintptr(len(dest)), 0, 0); err != 0 {
|
||||
return err
|
||||
}
|
||||
|
||||
res := strings.Split(string(dest), ";")
|
||||
if len(res) < 5 {
|
||||
return fmt.Errorf("Destination buffer for key description is too small")
|
||||
}
|
||||
|
||||
// parse permissions
|
||||
perm64, err := strconv.ParseUint(res[3], 16, 32)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
perm := (uint32(perm64) & mask) | setbits
|
||||
|
||||
if _, _, err := syscall.Syscall(syscall.SYS_KEYCTL, uintptr(KEYCTL_SETPERM), uintptr(ringId), uintptr(perm)); err != 0 {
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
|
@ -1,88 +0,0 @@
|
|||
// +build linux
|
||||
|
||||
package libcontainer
|
||||
|
||||
import (
|
||||
"syscall"
|
||||
|
||||
"github.com/vishvananda/netlink/nl"
|
||||
)
|
||||
|
||||
// list of known message types we want to send to bootstrap program
|
||||
// The number is randomly chosen to not conflict with known netlink types
|
||||
const (
|
||||
InitMsg uint16 = 62000
|
||||
CloneFlagsAttr uint16 = 27281
|
||||
ConsolePathAttr uint16 = 27282
|
||||
NsPathsAttr uint16 = 27283
|
||||
UidmapAttr uint16 = 27284
|
||||
GidmapAttr uint16 = 27285
|
||||
SetgroupAttr uint16 = 27286
|
||||
// When syscall.NLA_HDRLEN is in gccgo, take this out.
|
||||
syscall_NLA_HDRLEN = (syscall.SizeofNlAttr + syscall.NLA_ALIGNTO - 1) & ^(syscall.NLA_ALIGNTO - 1)
|
||||
)
|
||||
|
||||
type Int32msg struct {
|
||||
Type uint16
|
||||
Value uint32
|
||||
}
|
||||
|
||||
// int32msg has the following representation
|
||||
// | nlattr len | nlattr type |
|
||||
// | uint32 value |
|
||||
func (msg *Int32msg) Serialize() []byte {
|
||||
buf := make([]byte, msg.Len())
|
||||
native := nl.NativeEndian()
|
||||
native.PutUint16(buf[0:2], uint16(msg.Len()))
|
||||
native.PutUint16(buf[2:4], msg.Type)
|
||||
native.PutUint32(buf[4:8], msg.Value)
|
||||
return buf
|
||||
}
|
||||
|
||||
func (msg *Int32msg) Len() int {
|
||||
return syscall_NLA_HDRLEN + 4
|
||||
}
|
||||
|
||||
// bytemsg has the following representation
|
||||
// | nlattr len | nlattr type |
|
||||
// | value | pad |
|
||||
type Bytemsg struct {
|
||||
Type uint16
|
||||
Value []byte
|
||||
}
|
||||
|
||||
func (msg *Bytemsg) Serialize() []byte {
|
||||
l := msg.Len()
|
||||
buf := make([]byte, (l+syscall.NLA_ALIGNTO-1) & ^(syscall.NLA_ALIGNTO-1))
|
||||
native := nl.NativeEndian()
|
||||
native.PutUint16(buf[0:2], uint16(l))
|
||||
native.PutUint16(buf[2:4], msg.Type)
|
||||
copy(buf[4:], msg.Value)
|
||||
return buf
|
||||
}
|
||||
|
||||
func (msg *Bytemsg) Len() int {
|
||||
return syscall_NLA_HDRLEN + len(msg.Value) + 1 // null-terminated
|
||||
}
|
||||
|
||||
type Boolmsg struct {
|
||||
Type uint16
|
||||
Value bool
|
||||
}
|
||||
|
||||
func (msg *Boolmsg) Serialize() []byte {
|
||||
buf := make([]byte, msg.Len())
|
||||
native := nl.NativeEndian()
|
||||
native.PutUint16(buf[0:2], uint16(msg.Len()))
|
||||
native.PutUint16(buf[2:4], msg.Type)
|
||||
if msg.Value {
|
||||
buf[4] = 1
|
||||
} else {
|
||||
buf[4] = 0
|
||||
}
|
||||
return buf
|
||||
}
|
||||
|
||||
func (msg *Boolmsg) Len() int {
|
||||
return syscall_NLA_HDRLEN + 1
|
||||
}
|
|
@ -1,259 +0,0 @@
|
|||
// +build linux
|
||||
|
||||
package libcontainer
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"net"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
"github.com/opencontainers/runc/libcontainer/utils"
|
||||
"github.com/vishvananda/netlink"
|
||||
)
|
||||
|
||||
var strategies = map[string]networkStrategy{
|
||||
"veth": &veth{},
|
||||
"loopback": &loopback{},
|
||||
}
|
||||
|
||||
// networkStrategy represents a specific network configuration for
|
||||
// a container's networking stack
|
||||
type networkStrategy interface {
|
||||
create(*network, int) error
|
||||
initialize(*network) error
|
||||
detach(*configs.Network) error
|
||||
attach(*configs.Network) error
|
||||
}
|
||||
|
||||
// getStrategy returns the specific network strategy for the
|
||||
// provided type.
|
||||
func getStrategy(tpe string) (networkStrategy, error) {
|
||||
s, exists := strategies[tpe]
|
||||
if !exists {
|
||||
return nil, fmt.Errorf("unknown strategy type %q", tpe)
|
||||
}
|
||||
return s, nil
|
||||
}
|
||||
|
||||
// Returns the network statistics for the network interfaces represented by the NetworkRuntimeInfo.
|
||||
func getNetworkInterfaceStats(interfaceName string) (*NetworkInterface, error) {
|
||||
out := &NetworkInterface{Name: interfaceName}
|
||||
// This can happen if the network runtime information is missing - possible if the
|
||||
// container was created by an old version of libcontainer.
|
||||
if interfaceName == "" {
|
||||
return out, nil
|
||||
}
|
||||
type netStatsPair struct {
|
||||
// Where to write the output.
|
||||
Out *uint64
|
||||
// The network stats file to read.
|
||||
File string
|
||||
}
|
||||
// Ingress for host veth is from the container. Hence tx_bytes stat on the host veth is actually number of bytes received by the container.
|
||||
netStats := []netStatsPair{
|
||||
{Out: &out.RxBytes, File: "tx_bytes"},
|
||||
{Out: &out.RxPackets, File: "tx_packets"},
|
||||
{Out: &out.RxErrors, File: "tx_errors"},
|
||||
{Out: &out.RxDropped, File: "tx_dropped"},
|
||||
|
||||
{Out: &out.TxBytes, File: "rx_bytes"},
|
||||
{Out: &out.TxPackets, File: "rx_packets"},
|
||||
{Out: &out.TxErrors, File: "rx_errors"},
|
||||
{Out: &out.TxDropped, File: "rx_dropped"},
|
||||
}
|
||||
for _, netStat := range netStats {
|
||||
data, err := readSysfsNetworkStats(interfaceName, netStat.File)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
*(netStat.Out) = data
|
||||
}
|
||||
return out, nil
|
||||
}
|
||||
|
||||
// Reads the specified statistics available under /sys/class/net/<EthInterface>/statistics
|
||||
func readSysfsNetworkStats(ethInterface, statsFile string) (uint64, error) {
|
||||
data, err := ioutil.ReadFile(filepath.Join("/sys/class/net", ethInterface, "statistics", statsFile))
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
return strconv.ParseUint(strings.TrimSpace(string(data)), 10, 64)
|
||||
}
|
||||
|
||||
// loopback is a network strategy that provides a basic loopback device
|
||||
type loopback struct {
|
||||
}
|
||||
|
||||
func (l *loopback) create(n *network, nspid int) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (l *loopback) initialize(config *network) error {
|
||||
return netlink.LinkSetUp(&netlink.Device{LinkAttrs: netlink.LinkAttrs{Name: "lo"}})
|
||||
}
|
||||
|
||||
func (l *loopback) attach(n *configs.Network) (err error) {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (l *loopback) detach(n *configs.Network) (err error) {
|
||||
return nil
|
||||
}
|
||||
|
||||
// veth is a network strategy that uses a bridge and creates
|
||||
// a veth pair, one that is attached to the bridge on the host and the other
|
||||
// is placed inside the container's namespace
|
||||
type veth struct {
|
||||
}
|
||||
|
||||
func (v *veth) detach(n *configs.Network) (err error) {
|
||||
return netlink.LinkSetMaster(&netlink.Device{LinkAttrs: netlink.LinkAttrs{Name: n.HostInterfaceName}}, nil)
|
||||
}
|
||||
|
||||
// attach a container network interface to an external network
|
||||
func (v *veth) attach(n *configs.Network) (err error) {
|
||||
brl, err := netlink.LinkByName(n.Bridge)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
br, ok := brl.(*netlink.Bridge)
|
||||
if !ok {
|
||||
return fmt.Errorf("Wrong device type %T", brl)
|
||||
}
|
||||
host, err := netlink.LinkByName(n.HostInterfaceName)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := netlink.LinkSetMaster(host, br); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := netlink.LinkSetMTU(host, n.Mtu); err != nil {
|
||||
return err
|
||||
}
|
||||
if n.HairpinMode {
|
||||
if err := netlink.LinkSetHairpin(host, true); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
if err := netlink.LinkSetUp(host); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (v *veth) create(n *network, nspid int) (err error) {
|
||||
tmpName, err := v.generateTempPeerName()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
n.TempVethPeerName = tmpName
|
||||
if n.Bridge == "" {
|
||||
return fmt.Errorf("bridge is not specified")
|
||||
}
|
||||
veth := &netlink.Veth{
|
||||
LinkAttrs: netlink.LinkAttrs{
|
||||
Name: n.HostInterfaceName,
|
||||
TxQLen: n.TxQueueLen,
|
||||
},
|
||||
PeerName: n.TempVethPeerName,
|
||||
}
|
||||
if err := netlink.LinkAdd(veth); err != nil {
|
||||
return err
|
||||
}
|
||||
defer func() {
|
||||
if err != nil {
|
||||
netlink.LinkDel(veth)
|
||||
}
|
||||
}()
|
||||
if err := v.attach(&n.Network); err != nil {
|
||||
return err
|
||||
}
|
||||
child, err := netlink.LinkByName(n.TempVethPeerName)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return netlink.LinkSetNsPid(child, nspid)
|
||||
}
|
||||
|
||||
func (v *veth) generateTempPeerName() (string, error) {
|
||||
return utils.GenerateRandomName("veth", 7)
|
||||
}
|
||||
|
||||
func (v *veth) initialize(config *network) error {
|
||||
peer := config.TempVethPeerName
|
||||
if peer == "" {
|
||||
return fmt.Errorf("peer is not specified")
|
||||
}
|
||||
child, err := netlink.LinkByName(peer)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if err := netlink.LinkSetDown(child); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := netlink.LinkSetName(child, config.Name); err != nil {
|
||||
return err
|
||||
}
|
||||
// get the interface again after we changed the name as the index also changes.
|
||||
if child, err = netlink.LinkByName(config.Name); err != nil {
|
||||
return err
|
||||
}
|
||||
if config.MacAddress != "" {
|
||||
mac, err := net.ParseMAC(config.MacAddress)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if err := netlink.LinkSetHardwareAddr(child, mac); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
ip, err := netlink.ParseAddr(config.Address)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if err := netlink.AddrAdd(child, ip); err != nil {
|
||||
return err
|
||||
}
|
||||
if config.IPv6Address != "" {
|
||||
ip6, err := netlink.ParseAddr(config.IPv6Address)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if err := netlink.AddrAdd(child, ip6); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
if err := netlink.LinkSetMTU(child, config.Mtu); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := netlink.LinkSetUp(child); err != nil {
|
||||
return err
|
||||
}
|
||||
if config.Gateway != "" {
|
||||
gw := net.ParseIP(config.Gateway)
|
||||
if err := netlink.RouteAdd(&netlink.Route{
|
||||
Scope: netlink.SCOPE_UNIVERSE,
|
||||
LinkIndex: child.Attrs().Index,
|
||||
Gw: gw,
|
||||
}); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
if config.IPv6Gateway != "" {
|
||||
gw := net.ParseIP(config.IPv6Gateway)
|
||||
if err := netlink.RouteAdd(&netlink.Route{
|
||||
Scope: netlink.SCOPE_UNIVERSE,
|
||||
LinkIndex: child.Attrs().Index,
|
||||
Gw: gw,
|
||||
}); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
|
@ -1,89 +0,0 @@
|
|||
// +build linux
|
||||
|
||||
package libcontainer
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"syscall"
|
||||
)
|
||||
|
||||
const oomCgroupName = "memory"
|
||||
|
||||
type PressureLevel uint
|
||||
|
||||
const (
|
||||
LowPressure PressureLevel = iota
|
||||
MediumPressure
|
||||
CriticalPressure
|
||||
)
|
||||
|
||||
func registerMemoryEvent(cgDir string, evName string, arg string) (<-chan struct{}, error) {
|
||||
evFile, err := os.Open(filepath.Join(cgDir, evName))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
fd, _, syserr := syscall.RawSyscall(syscall.SYS_EVENTFD2, 0, syscall.FD_CLOEXEC, 0)
|
||||
if syserr != 0 {
|
||||
evFile.Close()
|
||||
return nil, syserr
|
||||
}
|
||||
|
||||
eventfd := os.NewFile(fd, "eventfd")
|
||||
|
||||
eventControlPath := filepath.Join(cgDir, "cgroup.event_control")
|
||||
data := fmt.Sprintf("%d %d %s", eventfd.Fd(), evFile.Fd(), arg)
|
||||
if err := ioutil.WriteFile(eventControlPath, []byte(data), 0700); err != nil {
|
||||
eventfd.Close()
|
||||
evFile.Close()
|
||||
return nil, err
|
||||
}
|
||||
ch := make(chan struct{})
|
||||
go func() {
|
||||
defer func() {
|
||||
close(ch)
|
||||
eventfd.Close()
|
||||
evFile.Close()
|
||||
}()
|
||||
buf := make([]byte, 8)
|
||||
for {
|
||||
if _, err := eventfd.Read(buf); err != nil {
|
||||
return
|
||||
}
|
||||
// When a cgroup is destroyed, an event is sent to eventfd.
|
||||
// So if the control path is gone, return instead of notifying.
|
||||
if _, err := os.Lstat(eventControlPath); os.IsNotExist(err) {
|
||||
return
|
||||
}
|
||||
ch <- struct{}{}
|
||||
}
|
||||
}()
|
||||
return ch, nil
|
||||
}
|
||||
|
||||
// notifyOnOOM returns channel on which you can expect event about OOM,
|
||||
// if process died without OOM this channel will be closed.
|
||||
func notifyOnOOM(paths map[string]string) (<-chan struct{}, error) {
|
||||
dir := paths[oomCgroupName]
|
||||
if dir == "" {
|
||||
return nil, fmt.Errorf("path %q missing", oomCgroupName)
|
||||
}
|
||||
|
||||
return registerMemoryEvent(dir, "memory.oom_control", "")
|
||||
}
|
||||
|
||||
func notifyMemoryPressure(paths map[string]string, level PressureLevel) (<-chan struct{}, error) {
|
||||
dir := paths[oomCgroupName]
|
||||
if dir == "" {
|
||||
return nil, fmt.Errorf("path %q missing", oomCgroupName)
|
||||
}
|
||||
|
||||
if level > CriticalPressure {
|
||||
return nil, fmt.Errorf("invalid pressure level %d", level)
|
||||
}
|
||||
|
||||
levelStr := []string{"low", "medium", "critical"}[level]
|
||||
return registerMemoryEvent(dir, "memory.pressure_level", levelStr)
|
||||
}
|
|
@ -1,115 +0,0 @@
|
|||
package libcontainer
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io"
|
||||
"math"
|
||||
"os"
|
||||
)
|
||||
|
||||
type processOperations interface {
|
||||
wait() (*os.ProcessState, error)
|
||||
signal(sig os.Signal) error
|
||||
pid() int
|
||||
}
|
||||
|
||||
// Process specifies the configuration and IO for a process inside
|
||||
// a container.
|
||||
type Process struct {
|
||||
// The command to be run followed by any arguments.
|
||||
Args []string
|
||||
|
||||
// Env specifies the environment variables for the process.
|
||||
Env []string
|
||||
|
||||
// User will set the uid and gid of the executing process running inside the container
|
||||
// local to the container's user and group configuration.
|
||||
User string
|
||||
|
||||
// Cwd will change the processes current working directory inside the container's rootfs.
|
||||
Cwd string
|
||||
|
||||
// Stdin is a pointer to a reader which provides the standard input stream.
|
||||
Stdin io.Reader
|
||||
|
||||
// Stdout is a pointer to a writer which receives the standard output stream.
|
||||
Stdout io.Writer
|
||||
|
||||
// Stderr is a pointer to a writer which receives the standard error stream.
|
||||
Stderr io.Writer
|
||||
|
||||
// ExtraFiles specifies additional open files to be inherited by the container
|
||||
ExtraFiles []*os.File
|
||||
|
||||
// consolePath is the path to the console allocated to the container.
|
||||
consolePath string
|
||||
|
||||
// Capabilities specify the capabilities to keep when executing the process inside the container
|
||||
// All capabilities not specified will be dropped from the processes capability mask
|
||||
Capabilities []string
|
||||
|
||||
// AppArmorProfile specifies the profile to apply to the process and is
|
||||
// changed at the time the process is execed
|
||||
AppArmorProfile string
|
||||
|
||||
// Label specifies the label to apply to the process. It is commonly used by selinux
|
||||
Label string
|
||||
|
||||
// NoNewPrivileges controls whether processes can gain additional privileges.
|
||||
NoNewPrivileges *bool
|
||||
|
||||
ops processOperations
|
||||
}
|
||||
|
||||
// Wait waits for the process to exit.
|
||||
// Wait releases any resources associated with the Process
|
||||
func (p Process) Wait() (*os.ProcessState, error) {
|
||||
if p.ops == nil {
|
||||
return nil, newGenericError(fmt.Errorf("invalid process"), NoProcessOps)
|
||||
}
|
||||
return p.ops.wait()
|
||||
}
|
||||
|
||||
// Pid returns the process ID
|
||||
func (p Process) Pid() (int, error) {
|
||||
// math.MinInt32 is returned here, because it's invalid value
|
||||
// for the kill() system call.
|
||||
if p.ops == nil {
|
||||
return math.MinInt32, newGenericError(fmt.Errorf("invalid process"), NoProcessOps)
|
||||
}
|
||||
return p.ops.pid(), nil
|
||||
}
|
||||
|
||||
// Signal sends a signal to the Process.
|
||||
func (p Process) Signal(sig os.Signal) error {
|
||||
if p.ops == nil {
|
||||
return newGenericError(fmt.Errorf("invalid process"), NoProcessOps)
|
||||
}
|
||||
return p.ops.signal(sig)
|
||||
}
|
||||
|
||||
// IO holds the process's STDIO
|
||||
type IO struct {
|
||||
Stdin io.WriteCloser
|
||||
Stdout io.ReadCloser
|
||||
Stderr io.ReadCloser
|
||||
}
|
||||
|
||||
// NewConsole creates new console for process and returns it
|
||||
func (p *Process) NewConsole(rootuid int) (Console, error) {
|
||||
console, err := NewConsole(rootuid, rootuid)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
p.consolePath = console.Path()
|
||||
return console, nil
|
||||
}
|
||||
|
||||
// ConsoleFromPath sets the process's console with the path provided
|
||||
func (p *Process) ConsoleFromPath(path string) error {
|
||||
if p.consolePath != "" {
|
||||
return newGenericError(fmt.Errorf("console path already exists for process"), ConsoleExists)
|
||||
}
|
||||
p.consolePath = path
|
||||
return nil
|
||||
}
|
|
@ -1,476 +0,0 @@
|
|||
// +build linux
|
||||
|
||||
package libcontainer
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
"os/exec"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
"syscall"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
"github.com/opencontainers/runc/libcontainer/system"
|
||||
"github.com/opencontainers/runc/libcontainer/utils"
|
||||
)
|
||||
|
||||
type parentProcess interface {
|
||||
// pid returns the pid for the running process.
|
||||
pid() int
|
||||
|
||||
// start starts the process execution.
|
||||
start() error
|
||||
|
||||
// send a SIGKILL to the process and wait for the exit.
|
||||
terminate() error
|
||||
|
||||
// wait waits on the process returning the process state.
|
||||
wait() (*os.ProcessState, error)
|
||||
|
||||
// startTime return's the process start time.
|
||||
startTime() (string, error)
|
||||
|
||||
signal(os.Signal) error
|
||||
|
||||
externalDescriptors() []string
|
||||
|
||||
setExternalDescriptors(fds []string)
|
||||
}
|
||||
|
||||
type setnsProcess struct {
|
||||
cmd *exec.Cmd
|
||||
parentPipe *os.File
|
||||
childPipe *os.File
|
||||
cgroupPaths map[string]string
|
||||
config *initConfig
|
||||
fds []string
|
||||
process *Process
|
||||
bootstrapData io.Reader
|
||||
}
|
||||
|
||||
func (p *setnsProcess) startTime() (string, error) {
|
||||
return system.GetProcessStartTime(p.pid())
|
||||
}
|
||||
|
||||
func (p *setnsProcess) signal(sig os.Signal) error {
|
||||
s, ok := sig.(syscall.Signal)
|
||||
if !ok {
|
||||
return errors.New("os: unsupported signal type")
|
||||
}
|
||||
return syscall.Kill(p.pid(), s)
|
||||
}
|
||||
|
||||
func (p *setnsProcess) start() (err error) {
|
||||
defer p.parentPipe.Close()
|
||||
err = p.cmd.Start()
|
||||
p.childPipe.Close()
|
||||
if err != nil {
|
||||
return newSystemError(err)
|
||||
}
|
||||
if p.bootstrapData != nil {
|
||||
if _, err := io.Copy(p.parentPipe, p.bootstrapData); err != nil {
|
||||
return newSystemError(err)
|
||||
}
|
||||
}
|
||||
if err = p.execSetns(); err != nil {
|
||||
return newSystemError(err)
|
||||
}
|
||||
if len(p.cgroupPaths) > 0 {
|
||||
if err := cgroups.EnterPid(p.cgroupPaths, p.pid()); err != nil {
|
||||
return newSystemError(err)
|
||||
}
|
||||
}
|
||||
if err := utils.WriteJSON(p.parentPipe, p.config); err != nil {
|
||||
return newSystemError(err)
|
||||
}
|
||||
// set oom_score_adj
|
||||
if err := setOomScoreAdj(p.config.Config.OomScoreAdj, p.pid()); err != nil {
|
||||
return newSystemError(err)
|
||||
}
|
||||
|
||||
if err := syscall.Shutdown(int(p.parentPipe.Fd()), syscall.SHUT_WR); err != nil {
|
||||
return newSystemError(err)
|
||||
}
|
||||
// wait for the child process to fully complete and receive an error message
|
||||
// if one was encoutered
|
||||
var ierr *genericError
|
||||
if err := json.NewDecoder(p.parentPipe).Decode(&ierr); err != nil && err != io.EOF {
|
||||
return newSystemError(err)
|
||||
}
|
||||
// Must be done after Shutdown so the child will exit and we can wait for it.
|
||||
if ierr != nil {
|
||||
p.wait()
|
||||
return newSystemError(ierr)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// execSetns runs the process that executes C code to perform the setns calls
|
||||
// because setns support requires the C process to fork off a child and perform the setns
|
||||
// before the go runtime boots, we wait on the process to die and receive the child's pid
|
||||
// over the provided pipe.
|
||||
func (p *setnsProcess) execSetns() error {
|
||||
status, err := p.cmd.Process.Wait()
|
||||
if err != nil {
|
||||
p.cmd.Wait()
|
||||
return newSystemError(err)
|
||||
}
|
||||
if !status.Success() {
|
||||
p.cmd.Wait()
|
||||
return newSystemError(&exec.ExitError{ProcessState: status})
|
||||
}
|
||||
var pid *pid
|
||||
if err := json.NewDecoder(p.parentPipe).Decode(&pid); err != nil {
|
||||
p.cmd.Wait()
|
||||
return newSystemError(err)
|
||||
}
|
||||
process, err := os.FindProcess(pid.Pid)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
p.cmd.Process = process
|
||||
p.process.ops = p
|
||||
return nil
|
||||
}
|
||||
|
||||
// terminate sends a SIGKILL to the forked process for the setns routine then waits to
|
||||
// avoid the process becomming a zombie.
|
||||
func (p *setnsProcess) terminate() error {
|
||||
if p.cmd.Process == nil {
|
||||
return nil
|
||||
}
|
||||
err := p.cmd.Process.Kill()
|
||||
if _, werr := p.wait(); err == nil {
|
||||
err = werr
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
func (p *setnsProcess) wait() (*os.ProcessState, error) {
|
||||
err := p.cmd.Wait()
|
||||
|
||||
// Return actual ProcessState even on Wait error
|
||||
return p.cmd.ProcessState, err
|
||||
}
|
||||
|
||||
func (p *setnsProcess) pid() int {
|
||||
return p.cmd.Process.Pid
|
||||
}
|
||||
|
||||
func (p *setnsProcess) externalDescriptors() []string {
|
||||
return p.fds
|
||||
}
|
||||
|
||||
func (p *setnsProcess) setExternalDescriptors(newFds []string) {
|
||||
p.fds = newFds
|
||||
}
|
||||
|
||||
type initProcess struct {
|
||||
cmd *exec.Cmd
|
||||
parentPipe *os.File
|
||||
childPipe *os.File
|
||||
config *initConfig
|
||||
manager cgroups.Manager
|
||||
container *linuxContainer
|
||||
fds []string
|
||||
process *Process
|
||||
bootstrapData io.Reader
|
||||
sharePidns bool
|
||||
}
|
||||
|
||||
func (p *initProcess) pid() int {
|
||||
return p.cmd.Process.Pid
|
||||
}
|
||||
|
||||
func (p *initProcess) externalDescriptors() []string {
|
||||
return p.fds
|
||||
}
|
||||
|
||||
// execSetns runs the process that executes C code to perform the setns calls
|
||||
// because setns support requires the C process to fork off a child and perform the setns
|
||||
// before the go runtime boots, we wait on the process to die and receive the child's pid
|
||||
// over the provided pipe.
|
||||
// This is called by initProcess.start function
|
||||
func (p *initProcess) execSetns() error {
|
||||
status, err := p.cmd.Process.Wait()
|
||||
if err != nil {
|
||||
p.cmd.Wait()
|
||||
return err
|
||||
}
|
||||
if !status.Success() {
|
||||
p.cmd.Wait()
|
||||
return &exec.ExitError{ProcessState: status}
|
||||
}
|
||||
var pid *pid
|
||||
if err := json.NewDecoder(p.parentPipe).Decode(&pid); err != nil {
|
||||
p.cmd.Wait()
|
||||
return err
|
||||
}
|
||||
process, err := os.FindProcess(pid.Pid)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
p.cmd.Process = process
|
||||
return nil
|
||||
}
|
||||
|
||||
func (p *initProcess) start() error {
|
||||
defer p.parentPipe.Close()
|
||||
err := p.cmd.Start()
|
||||
p.process.ops = p
|
||||
p.childPipe.Close()
|
||||
if err != nil {
|
||||
p.process.ops = nil
|
||||
return newSystemError(err)
|
||||
}
|
||||
if _, err := io.Copy(p.parentPipe, p.bootstrapData); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := p.execSetns(); err != nil {
|
||||
return newSystemError(err)
|
||||
}
|
||||
// Save the standard descriptor names before the container process
|
||||
// can potentially move them (e.g., via dup2()). If we don't do this now,
|
||||
// we won't know at checkpoint time which file descriptor to look up.
|
||||
fds, err := getPipeFds(p.pid())
|
||||
if err != nil {
|
||||
return newSystemError(err)
|
||||
}
|
||||
p.setExternalDescriptors(fds)
|
||||
// Do this before syncing with child so that no children
|
||||
// can escape the cgroup
|
||||
if err := p.manager.Apply(p.pid()); err != nil {
|
||||
return newSystemError(err)
|
||||
}
|
||||
defer func() {
|
||||
if err != nil {
|
||||
// TODO: should not be the responsibility to call here
|
||||
p.manager.Destroy()
|
||||
}
|
||||
}()
|
||||
if err := p.createNetworkInterfaces(); err != nil {
|
||||
return newSystemError(err)
|
||||
}
|
||||
if err := p.sendConfig(); err != nil {
|
||||
return newSystemError(err)
|
||||
}
|
||||
var (
|
||||
procSync syncT
|
||||
sentRun bool
|
||||
sentResume bool
|
||||
ierr *genericError
|
||||
)
|
||||
|
||||
dec := json.NewDecoder(p.parentPipe)
|
||||
loop:
|
||||
for {
|
||||
if err := dec.Decode(&procSync); err != nil {
|
||||
if err == io.EOF {
|
||||
break loop
|
||||
}
|
||||
return newSystemError(err)
|
||||
}
|
||||
switch procSync.Type {
|
||||
case procReady:
|
||||
if err := p.manager.Set(p.config.Config); err != nil {
|
||||
return newSystemError(err)
|
||||
}
|
||||
// set oom_score_adj
|
||||
if err := setOomScoreAdj(p.config.Config.OomScoreAdj, p.pid()); err != nil {
|
||||
return newSystemError(err)
|
||||
}
|
||||
// call prestart hooks
|
||||
if !p.config.Config.Namespaces.Contains(configs.NEWNS) {
|
||||
if p.config.Config.Hooks != nil {
|
||||
s := configs.HookState{
|
||||
Version: p.container.config.Version,
|
||||
ID: p.container.id,
|
||||
Pid: p.pid(),
|
||||
Root: p.config.Config.Rootfs,
|
||||
}
|
||||
for _, hook := range p.config.Config.Hooks.Prestart {
|
||||
if err := hook.Run(s); err != nil {
|
||||
return newSystemError(err)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
// Sync with child.
|
||||
if err := utils.WriteJSON(p.parentPipe, syncT{procRun}); err != nil {
|
||||
return newSystemError(err)
|
||||
}
|
||||
sentRun = true
|
||||
case procHooks:
|
||||
if p.config.Config.Hooks != nil {
|
||||
s := configs.HookState{
|
||||
Version: p.container.config.Version,
|
||||
ID: p.container.id,
|
||||
Pid: p.pid(),
|
||||
Root: p.config.Config.Rootfs,
|
||||
}
|
||||
for _, hook := range p.config.Config.Hooks.Prestart {
|
||||
if err := hook.Run(s); err != nil {
|
||||
return newSystemError(err)
|
||||
}
|
||||
}
|
||||
}
|
||||
// Sync with child.
|
||||
if err := utils.WriteJSON(p.parentPipe, syncT{procResume}); err != nil {
|
||||
return newSystemError(err)
|
||||
}
|
||||
sentResume = true
|
||||
case procError:
|
||||
// wait for the child process to fully complete and receive an error message
|
||||
// if one was encoutered
|
||||
if err := dec.Decode(&ierr); err != nil && err != io.EOF {
|
||||
return newSystemError(err)
|
||||
}
|
||||
if ierr != nil {
|
||||
break loop
|
||||
}
|
||||
// Programmer error.
|
||||
panic("No error following JSON procError payload.")
|
||||
default:
|
||||
return newSystemError(fmt.Errorf("invalid JSON synchronisation payload from child"))
|
||||
}
|
||||
}
|
||||
if !sentRun {
|
||||
return newSystemError(fmt.Errorf("could not synchronise with container process"))
|
||||
}
|
||||
if p.config.Config.Namespaces.Contains(configs.NEWNS) && !sentResume {
|
||||
return newSystemError(fmt.Errorf("could not synchronise after executing prestart hooks with container process"))
|
||||
}
|
||||
if err := syscall.Shutdown(int(p.parentPipe.Fd()), syscall.SHUT_WR); err != nil {
|
||||
return newSystemError(err)
|
||||
}
|
||||
// Must be done after Shutdown so the child will exit and we can wait for it.
|
||||
if ierr != nil {
|
||||
p.wait()
|
||||
return newSystemError(ierr)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (p *initProcess) wait() (*os.ProcessState, error) {
|
||||
err := p.cmd.Wait()
|
||||
if err != nil {
|
||||
return p.cmd.ProcessState, err
|
||||
}
|
||||
// we should kill all processes in cgroup when init is died if we use host PID namespace
|
||||
if p.sharePidns {
|
||||
killCgroupProcesses(p.manager)
|
||||
}
|
||||
return p.cmd.ProcessState, nil
|
||||
}
|
||||
|
||||
func (p *initProcess) terminate() error {
|
||||
if p.cmd.Process == nil {
|
||||
return nil
|
||||
}
|
||||
err := p.cmd.Process.Kill()
|
||||
if _, werr := p.wait(); err == nil {
|
||||
err = werr
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
func (p *initProcess) startTime() (string, error) {
|
||||
return system.GetProcessStartTime(p.pid())
|
||||
}
|
||||
|
||||
func (p *initProcess) sendConfig() error {
|
||||
// send the config to the container's init process, we don't use JSON Encode
|
||||
// here because there might be a problem in JSON decoder in some cases, see:
|
||||
// https://github.com/docker/docker/issues/14203#issuecomment-174177790
|
||||
return utils.WriteJSON(p.parentPipe, p.config)
|
||||
}
|
||||
|
||||
func (p *initProcess) createNetworkInterfaces() error {
|
||||
for _, config := range p.config.Config.Networks {
|
||||
strategy, err := getStrategy(config.Type)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
n := &network{
|
||||
Network: *config,
|
||||
}
|
||||
if err := strategy.create(n, p.pid()); err != nil {
|
||||
return err
|
||||
}
|
||||
p.config.Networks = append(p.config.Networks, n)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (p *initProcess) signal(sig os.Signal) error {
|
||||
s, ok := sig.(syscall.Signal)
|
||||
if !ok {
|
||||
return errors.New("os: unsupported signal type")
|
||||
}
|
||||
return syscall.Kill(p.pid(), s)
|
||||
}
|
||||
|
||||
func (p *initProcess) setExternalDescriptors(newFds []string) {
|
||||
p.fds = newFds
|
||||
}
|
||||
|
||||
func getPipeFds(pid int) ([]string, error) {
|
||||
fds := make([]string, 3)
|
||||
|
||||
dirPath := filepath.Join("/proc", strconv.Itoa(pid), "/fd")
|
||||
for i := 0; i < 3; i++ {
|
||||
f := filepath.Join(dirPath, strconv.Itoa(i))
|
||||
target, err := os.Readlink(f)
|
||||
if err != nil {
|
||||
return fds, err
|
||||
}
|
||||
fds[i] = target
|
||||
}
|
||||
return fds, nil
|
||||
}
|
||||
|
||||
// InitializeIO creates pipes for use with the process's STDIO
|
||||
// and returns the opposite side for each
|
||||
func (p *Process) InitializeIO(rootuid int) (i *IO, err error) {
|
||||
var fds []uintptr
|
||||
i = &IO{}
|
||||
// cleanup in case of an error
|
||||
defer func() {
|
||||
if err != nil {
|
||||
for _, fd := range fds {
|
||||
syscall.Close(int(fd))
|
||||
}
|
||||
}
|
||||
}()
|
||||
// STDIN
|
||||
r, w, err := os.Pipe()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
fds = append(fds, r.Fd(), w.Fd())
|
||||
p.Stdin, i.Stdin = r, w
|
||||
// STDOUT
|
||||
if r, w, err = os.Pipe(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
fds = append(fds, r.Fd(), w.Fd())
|
||||
p.Stdout, i.Stdout = w, r
|
||||
// STDERR
|
||||
if r, w, err = os.Pipe(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
fds = append(fds, r.Fd(), w.Fd())
|
||||
p.Stderr, i.Stderr = w, r
|
||||
// change ownership of the pipes incase we are in a user namespace
|
||||
for _, fd := range fds {
|
||||
if err := syscall.Fchown(int(fd), rootuid, rootuid); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
return i, nil
|
||||
}
|
|
@ -1,122 +0,0 @@
|
|||
// +build linux
|
||||
|
||||
package libcontainer
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/system"
|
||||
)
|
||||
|
||||
func newRestoredProcess(pid int, fds []string) (*restoredProcess, error) {
|
||||
var (
|
||||
err error
|
||||
)
|
||||
proc, err := os.FindProcess(pid)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
started, err := system.GetProcessStartTime(pid)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return &restoredProcess{
|
||||
proc: proc,
|
||||
processStartTime: started,
|
||||
fds: fds,
|
||||
}, nil
|
||||
}
|
||||
|
||||
type restoredProcess struct {
|
||||
proc *os.Process
|
||||
processStartTime string
|
||||
fds []string
|
||||
}
|
||||
|
||||
func (p *restoredProcess) start() error {
|
||||
return newGenericError(fmt.Errorf("restored process cannot be started"), SystemError)
|
||||
}
|
||||
|
||||
func (p *restoredProcess) pid() int {
|
||||
return p.proc.Pid
|
||||
}
|
||||
|
||||
func (p *restoredProcess) terminate() error {
|
||||
err := p.proc.Kill()
|
||||
if _, werr := p.wait(); err == nil {
|
||||
err = werr
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
func (p *restoredProcess) wait() (*os.ProcessState, error) {
|
||||
// TODO: how do we wait on the actual process?
|
||||
// maybe use --exec-cmd in criu
|
||||
st, err := p.proc.Wait()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return st, nil
|
||||
}
|
||||
|
||||
func (p *restoredProcess) startTime() (string, error) {
|
||||
return p.processStartTime, nil
|
||||
}
|
||||
|
||||
func (p *restoredProcess) signal(s os.Signal) error {
|
||||
return p.proc.Signal(s)
|
||||
}
|
||||
|
||||
func (p *restoredProcess) externalDescriptors() []string {
|
||||
return p.fds
|
||||
}
|
||||
|
||||
func (p *restoredProcess) setExternalDescriptors(newFds []string) {
|
||||
p.fds = newFds
|
||||
}
|
||||
|
||||
// nonChildProcess represents a process where the calling process is not
|
||||
// the parent process. This process is created when a factory loads a container from
|
||||
// a persisted state.
|
||||
type nonChildProcess struct {
|
||||
processPid int
|
||||
processStartTime string
|
||||
fds []string
|
||||
}
|
||||
|
||||
func (p *nonChildProcess) start() error {
|
||||
return newGenericError(fmt.Errorf("restored process cannot be started"), SystemError)
|
||||
}
|
||||
|
||||
func (p *nonChildProcess) pid() int {
|
||||
return p.processPid
|
||||
}
|
||||
|
||||
func (p *nonChildProcess) terminate() error {
|
||||
return newGenericError(fmt.Errorf("restored process cannot be terminated"), SystemError)
|
||||
}
|
||||
|
||||
func (p *nonChildProcess) wait() (*os.ProcessState, error) {
|
||||
return nil, newGenericError(fmt.Errorf("restored process cannot be waited on"), SystemError)
|
||||
}
|
||||
|
||||
func (p *nonChildProcess) startTime() (string, error) {
|
||||
return p.processStartTime, nil
|
||||
}
|
||||
|
||||
func (p *nonChildProcess) signal(s os.Signal) error {
|
||||
proc, err := os.FindProcess(p.processPid)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return proc.Signal(s)
|
||||
}
|
||||
|
||||
func (p *nonChildProcess) externalDescriptors() []string {
|
||||
return p.fds
|
||||
}
|
||||
|
||||
func (p *nonChildProcess) setExternalDescriptors(newFds []string) {
|
||||
p.fds = newFds
|
||||
}
|
|
@ -1,714 +0,0 @@
|
|||
// +build linux
|
||||
|
||||
package libcontainer
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io"
|
||||
"io/ioutil"
|
||||
"os"
|
||||
"os/exec"
|
||||
"path"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"syscall"
|
||||
"time"
|
||||
|
||||
"github.com/docker/docker/pkg/mount"
|
||||
"github.com/docker/docker/pkg/symlink"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
"github.com/opencontainers/runc/libcontainer/label"
|
||||
"github.com/opencontainers/runc/libcontainer/system"
|
||||
libcontainerUtils "github.com/opencontainers/runc/libcontainer/utils"
|
||||
)
|
||||
|
||||
const defaultMountFlags = syscall.MS_NOEXEC | syscall.MS_NOSUID | syscall.MS_NODEV
|
||||
|
||||
// setupRootfs sets up the devices, mount points, and filesystems for use inside a
|
||||
// new mount namespace.
|
||||
func setupRootfs(config *configs.Config, console *linuxConsole, pipe io.ReadWriter) (err error) {
|
||||
if err := prepareRoot(config); err != nil {
|
||||
return newSystemError(err)
|
||||
}
|
||||
|
||||
setupDev := len(config.Devices) != 0
|
||||
for _, m := range config.Mounts {
|
||||
for _, precmd := range m.PremountCmds {
|
||||
if err := mountCmd(precmd); err != nil {
|
||||
return newSystemError(err)
|
||||
}
|
||||
}
|
||||
if err := mountToRootfs(m, config.Rootfs, config.MountLabel); err != nil {
|
||||
return newSystemError(err)
|
||||
}
|
||||
|
||||
for _, postcmd := range m.PostmountCmds {
|
||||
if err := mountCmd(postcmd); err != nil {
|
||||
return newSystemError(err)
|
||||
}
|
||||
}
|
||||
}
|
||||
if setupDev {
|
||||
if err := createDevices(config); err != nil {
|
||||
return newSystemError(err)
|
||||
}
|
||||
if err := setupPtmx(config, console); err != nil {
|
||||
return newSystemError(err)
|
||||
}
|
||||
if err := setupDevSymlinks(config.Rootfs); err != nil {
|
||||
return newSystemError(err)
|
||||
}
|
||||
}
|
||||
// Signal the parent to run the pre-start hooks.
|
||||
// The hooks are run after the mounts are setup, but before we switch to the new
|
||||
// root, so that the old root is still available in the hooks for any mount
|
||||
// manipulations.
|
||||
if err := syncParentHooks(pipe); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := syscall.Chdir(config.Rootfs); err != nil {
|
||||
return newSystemError(err)
|
||||
}
|
||||
if config.NoPivotRoot {
|
||||
err = msMoveRoot(config.Rootfs)
|
||||
} else {
|
||||
err = pivotRoot(config.Rootfs, config.PivotDir)
|
||||
}
|
||||
if err != nil {
|
||||
return newSystemError(err)
|
||||
}
|
||||
if setupDev {
|
||||
if err := reOpenDevNull(); err != nil {
|
||||
return newSystemError(err)
|
||||
}
|
||||
}
|
||||
// remount dev as ro if specifed
|
||||
for _, m := range config.Mounts {
|
||||
if m.Destination == "/dev" {
|
||||
if m.Flags&syscall.MS_RDONLY != 0 {
|
||||
if err := remountReadonly(m.Destination); err != nil {
|
||||
return newSystemError(err)
|
||||
}
|
||||
}
|
||||
break
|
||||
}
|
||||
}
|
||||
// set rootfs ( / ) as readonly
|
||||
if config.Readonlyfs {
|
||||
if err := setReadonly(); err != nil {
|
||||
return newSystemError(err)
|
||||
}
|
||||
}
|
||||
syscall.Umask(0022)
|
||||
return nil
|
||||
}
|
||||
|
||||
func mountCmd(cmd configs.Command) error {
|
||||
|
||||
command := exec.Command(cmd.Path, cmd.Args[:]...)
|
||||
command.Env = cmd.Env
|
||||
command.Dir = cmd.Dir
|
||||
if out, err := command.CombinedOutput(); err != nil {
|
||||
return fmt.Errorf("%#v failed: %s: %v", cmd, string(out), err)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func mountToRootfs(m *configs.Mount, rootfs, mountLabel string) error {
|
||||
var (
|
||||
dest = m.Destination
|
||||
)
|
||||
if !strings.HasPrefix(dest, rootfs) {
|
||||
dest = filepath.Join(rootfs, dest)
|
||||
}
|
||||
|
||||
switch m.Device {
|
||||
case "proc", "sysfs":
|
||||
if err := os.MkdirAll(dest, 0755); err != nil {
|
||||
return err
|
||||
}
|
||||
// Selinux kernels do not support labeling of /proc or /sys
|
||||
return mountPropagate(m, rootfs, "")
|
||||
case "mqueue":
|
||||
if err := os.MkdirAll(dest, 0755); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := mountPropagate(m, rootfs, mountLabel); err != nil {
|
||||
// older kernels do not support labeling of /dev/mqueue
|
||||
if err := mountPropagate(m, rootfs, ""); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return label.SetFileLabel(dest, mountLabel)
|
||||
case "tmpfs":
|
||||
stat, err := os.Stat(dest)
|
||||
if err != nil {
|
||||
if err := os.MkdirAll(dest, 0755); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
if err := mountPropagate(m, rootfs, mountLabel); err != nil {
|
||||
return err
|
||||
}
|
||||
if stat != nil {
|
||||
if err = os.Chmod(dest, stat.Mode()); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
case "bind":
|
||||
stat, err := os.Stat(m.Source)
|
||||
if err != nil {
|
||||
// error out if the source of a bind mount does not exist as we will be
|
||||
// unable to bind anything to it.
|
||||
return err
|
||||
}
|
||||
// ensure that the destination of the bind mount is resolved of symlinks at mount time because
|
||||
// any previous mounts can invalidate the next mount's destination.
|
||||
// this can happen when a user specifies mounts within other mounts to cause breakouts or other
|
||||
// evil stuff to try to escape the container's rootfs.
|
||||
if dest, err = symlink.FollowSymlinkInScope(filepath.Join(rootfs, m.Destination), rootfs); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := checkMountDestination(rootfs, dest); err != nil {
|
||||
return err
|
||||
}
|
||||
// update the mount with the correct dest after symlinks are resolved.
|
||||
m.Destination = dest
|
||||
if err := createIfNotExists(dest, stat.IsDir()); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := mountPropagate(m, rootfs, mountLabel); err != nil {
|
||||
return err
|
||||
}
|
||||
// bind mount won't change mount options, we need remount to make mount options effective.
|
||||
// first check that we have non-default options required before attempting a remount
|
||||
if m.Flags&^(syscall.MS_REC|syscall.MS_REMOUNT|syscall.MS_BIND) != 0 {
|
||||
// only remount if unique mount options are set
|
||||
if err := remount(m, rootfs); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
if m.Relabel != "" {
|
||||
if err := label.Validate(m.Relabel); err != nil {
|
||||
return err
|
||||
}
|
||||
shared := label.IsShared(m.Relabel)
|
||||
if err := label.Relabel(m.Source, mountLabel, shared); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
case "cgroup":
|
||||
binds, err := getCgroupMounts(m)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
var merged []string
|
||||
for _, b := range binds {
|
||||
ss := filepath.Base(b.Destination)
|
||||
if strings.Contains(ss, ",") {
|
||||
merged = append(merged, ss)
|
||||
}
|
||||
}
|
||||
tmpfs := &configs.Mount{
|
||||
Source: "tmpfs",
|
||||
Device: "tmpfs",
|
||||
Destination: m.Destination,
|
||||
Flags: defaultMountFlags,
|
||||
Data: "mode=755",
|
||||
PropagationFlags: m.PropagationFlags,
|
||||
}
|
||||
if err := mountToRootfs(tmpfs, rootfs, mountLabel); err != nil {
|
||||
return err
|
||||
}
|
||||
for _, b := range binds {
|
||||
if err := mountToRootfs(b, rootfs, mountLabel); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
// create symlinks for merged cgroups
|
||||
cwd, err := os.Getwd()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if err := os.Chdir(filepath.Join(rootfs, m.Destination)); err != nil {
|
||||
return err
|
||||
}
|
||||
for _, mc := range merged {
|
||||
for _, ss := range strings.Split(mc, ",") {
|
||||
if err := os.Symlink(mc, ss); err != nil {
|
||||
// if cgroup already exists, then okay(it could have been created before)
|
||||
if os.IsExist(err) {
|
||||
continue
|
||||
}
|
||||
os.Chdir(cwd)
|
||||
return err
|
||||
}
|
||||
}
|
||||
}
|
||||
if err := os.Chdir(cwd); err != nil {
|
||||
return err
|
||||
}
|
||||
if m.Flags&syscall.MS_RDONLY != 0 {
|
||||
// remount cgroup root as readonly
|
||||
mcgrouproot := &configs.Mount{
|
||||
Destination: m.Destination,
|
||||
Flags: defaultMountFlags | syscall.MS_RDONLY,
|
||||
}
|
||||
if err := remount(mcgrouproot, rootfs); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
default:
|
||||
if err := os.MkdirAll(dest, 0755); err != nil {
|
||||
return err
|
||||
}
|
||||
return mountPropagate(m, rootfs, mountLabel)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func getCgroupMounts(m *configs.Mount) ([]*configs.Mount, error) {
|
||||
mounts, err := cgroups.GetCgroupMounts()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
cgroupPaths, err := cgroups.ParseCgroupFile("/proc/self/cgroup")
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
var binds []*configs.Mount
|
||||
|
||||
for _, mm := range mounts {
|
||||
dir, err := mm.GetThisCgroupDir(cgroupPaths)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
relDir, err := filepath.Rel(mm.Root, dir)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
binds = append(binds, &configs.Mount{
|
||||
Device: "bind",
|
||||
Source: filepath.Join(mm.Mountpoint, relDir),
|
||||
Destination: filepath.Join(m.Destination, strings.Join(mm.Subsystems, ",")),
|
||||
Flags: syscall.MS_BIND | syscall.MS_REC | m.Flags,
|
||||
PropagationFlags: m.PropagationFlags,
|
||||
})
|
||||
}
|
||||
|
||||
return binds, nil
|
||||
}
|
||||
|
||||
// checkMountDestination checks to ensure that the mount destination is not over the top of /proc.
|
||||
// dest is required to be an abs path and have any symlinks resolved before calling this function.
|
||||
func checkMountDestination(rootfs, dest string) error {
|
||||
if libcontainerUtils.CleanPath(rootfs) == libcontainerUtils.CleanPath(dest) {
|
||||
return fmt.Errorf("mounting into / is prohibited")
|
||||
}
|
||||
invalidDestinations := []string{
|
||||
"/proc",
|
||||
}
|
||||
// White list, it should be sub directories of invalid destinations
|
||||
validDestinations := []string{
|
||||
// These entries can be bind mounted by files emulated by fuse,
|
||||
// so commands like top, free displays stats in container.
|
||||
"/proc/cpuinfo",
|
||||
"/proc/diskstats",
|
||||
"/proc/meminfo",
|
||||
"/proc/stat",
|
||||
"/proc/net/dev",
|
||||
}
|
||||
for _, valid := range validDestinations {
|
||||
path, err := filepath.Rel(filepath.Join(rootfs, valid), dest)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if path == "." {
|
||||
return nil
|
||||
}
|
||||
}
|
||||
for _, invalid := range invalidDestinations {
|
||||
path, err := filepath.Rel(filepath.Join(rootfs, invalid), dest)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if path == "." || !strings.HasPrefix(path, "..") {
|
||||
return fmt.Errorf("%q cannot be mounted because it is located inside %q", dest, invalid)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func setupDevSymlinks(rootfs string) error {
|
||||
var links = [][2]string{
|
||||
{"/proc/self/fd", "/dev/fd"},
|
||||
{"/proc/self/fd/0", "/dev/stdin"},
|
||||
{"/proc/self/fd/1", "/dev/stdout"},
|
||||
{"/proc/self/fd/2", "/dev/stderr"},
|
||||
}
|
||||
// kcore support can be toggled with CONFIG_PROC_KCORE; only create a symlink
|
||||
// in /dev if it exists in /proc.
|
||||
if _, err := os.Stat("/proc/kcore"); err == nil {
|
||||
links = append(links, [2]string{"/proc/kcore", "/dev/core"})
|
||||
}
|
||||
for _, link := range links {
|
||||
var (
|
||||
src = link[0]
|
||||
dst = filepath.Join(rootfs, link[1])
|
||||
)
|
||||
if err := os.Symlink(src, dst); err != nil && !os.IsExist(err) {
|
||||
return fmt.Errorf("symlink %s %s %s", src, dst, err)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// If stdin, stdout, and/or stderr are pointing to `/dev/null` in the parent's rootfs
|
||||
// this method will make them point to `/dev/null` in this container's rootfs. This
|
||||
// needs to be called after we chroot/pivot into the container's rootfs so that any
|
||||
// symlinks are resolved locally.
|
||||
func reOpenDevNull() error {
|
||||
var stat, devNullStat syscall.Stat_t
|
||||
file, err := os.OpenFile("/dev/null", os.O_RDWR, 0)
|
||||
if err != nil {
|
||||
return fmt.Errorf("Failed to open /dev/null - %s", err)
|
||||
}
|
||||
defer file.Close()
|
||||
if err := syscall.Fstat(int(file.Fd()), &devNullStat); err != nil {
|
||||
return err
|
||||
}
|
||||
for fd := 0; fd < 3; fd++ {
|
||||
if err := syscall.Fstat(fd, &stat); err != nil {
|
||||
return err
|
||||
}
|
||||
if stat.Rdev == devNullStat.Rdev {
|
||||
// Close and re-open the fd.
|
||||
if err := syscall.Dup3(int(file.Fd()), fd, 0); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Create the device nodes in the container.
|
||||
func createDevices(config *configs.Config) error {
|
||||
useBindMount := system.RunningInUserNS() || config.Namespaces.Contains(configs.NEWUSER)
|
||||
oldMask := syscall.Umask(0000)
|
||||
for _, node := range config.Devices {
|
||||
// containers running in a user namespace are not allowed to mknod
|
||||
// devices so we can just bind mount it from the host.
|
||||
if err := createDeviceNode(config.Rootfs, node, useBindMount); err != nil {
|
||||
syscall.Umask(oldMask)
|
||||
return err
|
||||
}
|
||||
}
|
||||
syscall.Umask(oldMask)
|
||||
return nil
|
||||
}
|
||||
|
||||
func bindMountDeviceNode(dest string, node *configs.Device) error {
|
||||
f, err := os.Create(dest)
|
||||
if err != nil && !os.IsExist(err) {
|
||||
return err
|
||||
}
|
||||
if f != nil {
|
||||
f.Close()
|
||||
}
|
||||
return syscall.Mount(node.Path, dest, "bind", syscall.MS_BIND, "")
|
||||
}
|
||||
|
||||
// Creates the device node in the rootfs of the container.
|
||||
func createDeviceNode(rootfs string, node *configs.Device, bind bool) error {
|
||||
dest := filepath.Join(rootfs, node.Path)
|
||||
if err := os.MkdirAll(filepath.Dir(dest), 0755); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if bind {
|
||||
return bindMountDeviceNode(dest, node)
|
||||
}
|
||||
if err := mknodDevice(dest, node); err != nil {
|
||||
if os.IsExist(err) {
|
||||
return nil
|
||||
} else if os.IsPermission(err) {
|
||||
return bindMountDeviceNode(dest, node)
|
||||
}
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func mknodDevice(dest string, node *configs.Device) error {
|
||||
fileMode := node.FileMode
|
||||
switch node.Type {
|
||||
case 'c':
|
||||
fileMode |= syscall.S_IFCHR
|
||||
case 'b':
|
||||
fileMode |= syscall.S_IFBLK
|
||||
default:
|
||||
return fmt.Errorf("%c is not a valid device type for device %s", node.Type, node.Path)
|
||||
}
|
||||
if err := syscall.Mknod(dest, uint32(fileMode), node.Mkdev()); err != nil {
|
||||
return err
|
||||
}
|
||||
return syscall.Chown(dest, int(node.Uid), int(node.Gid))
|
||||
}
|
||||
|
||||
func getMountInfo(mountinfo []*mount.Info, dir string) *mount.Info {
|
||||
for _, m := range mountinfo {
|
||||
if m.Mountpoint == dir {
|
||||
return m
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Get the parent mount point of directory passed in as argument. Also return
|
||||
// optional fields.
|
||||
func getParentMount(rootfs string) (string, string, error) {
|
||||
var path string
|
||||
|
||||
mountinfos, err := mount.GetMounts()
|
||||
if err != nil {
|
||||
return "", "", err
|
||||
}
|
||||
|
||||
mountinfo := getMountInfo(mountinfos, rootfs)
|
||||
if mountinfo != nil {
|
||||
return rootfs, mountinfo.Optional, nil
|
||||
}
|
||||
|
||||
path = rootfs
|
||||
for {
|
||||
path = filepath.Dir(path)
|
||||
|
||||
mountinfo = getMountInfo(mountinfos, path)
|
||||
if mountinfo != nil {
|
||||
return path, mountinfo.Optional, nil
|
||||
}
|
||||
|
||||
if path == "/" {
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
// If we are here, we did not find parent mount. Something is wrong.
|
||||
return "", "", fmt.Errorf("Could not find parent mount of %s", rootfs)
|
||||
}
|
||||
|
||||
// Make parent mount private if it was shared
|
||||
func rootfsParentMountPrivate(config *configs.Config) error {
|
||||
sharedMount := false
|
||||
|
||||
parentMount, optionalOpts, err := getParentMount(config.Rootfs)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
optsSplit := strings.Split(optionalOpts, " ")
|
||||
for _, opt := range optsSplit {
|
||||
if strings.HasPrefix(opt, "shared:") {
|
||||
sharedMount = true
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
// Make parent mount PRIVATE if it was shared. It is needed for two
|
||||
// reasons. First of all pivot_root() will fail if parent mount is
|
||||
// shared. Secondly when we bind mount rootfs it will propagate to
|
||||
// parent namespace and we don't want that to happen.
|
||||
if sharedMount {
|
||||
return syscall.Mount("", parentMount, "", syscall.MS_PRIVATE, "")
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func prepareRoot(config *configs.Config) error {
|
||||
flag := syscall.MS_SLAVE | syscall.MS_REC
|
||||
if config.RootPropagation != 0 {
|
||||
flag = config.RootPropagation
|
||||
}
|
||||
if err := syscall.Mount("", "/", "", uintptr(flag), ""); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := rootfsParentMountPrivate(config); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return syscall.Mount(config.Rootfs, config.Rootfs, "bind", syscall.MS_BIND|syscall.MS_REC, "")
|
||||
}
|
||||
|
||||
func setReadonly() error {
|
||||
return syscall.Mount("/", "/", "bind", syscall.MS_BIND|syscall.MS_REMOUNT|syscall.MS_RDONLY|syscall.MS_REC, "")
|
||||
}
|
||||
|
||||
func setupPtmx(config *configs.Config, console *linuxConsole) error {
|
||||
ptmx := filepath.Join(config.Rootfs, "dev/ptmx")
|
||||
if err := os.Remove(ptmx); err != nil && !os.IsNotExist(err) {
|
||||
return err
|
||||
}
|
||||
if err := os.Symlink("pts/ptmx", ptmx); err != nil {
|
||||
return fmt.Errorf("symlink dev ptmx %s", err)
|
||||
}
|
||||
if console != nil {
|
||||
return console.mount(config.Rootfs, config.MountLabel)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func pivotRoot(rootfs, pivotBaseDir string) (err error) {
|
||||
if pivotBaseDir == "" {
|
||||
pivotBaseDir = "/"
|
||||
}
|
||||
tmpDir := filepath.Join(rootfs, pivotBaseDir)
|
||||
if err := os.MkdirAll(tmpDir, 0755); err != nil {
|
||||
return fmt.Errorf("can't create tmp dir %s, error %v", tmpDir, err)
|
||||
}
|
||||
pivotDir, err := ioutil.TempDir(tmpDir, ".pivot_root")
|
||||
if err != nil {
|
||||
return fmt.Errorf("can't create pivot_root dir %s, error %v", pivotDir, err)
|
||||
}
|
||||
defer func() {
|
||||
errVal := os.Remove(pivotDir)
|
||||
if err == nil {
|
||||
err = errVal
|
||||
}
|
||||
}()
|
||||
if err := syscall.PivotRoot(rootfs, pivotDir); err != nil {
|
||||
return fmt.Errorf("pivot_root %s", err)
|
||||
}
|
||||
if err := syscall.Chdir("/"); err != nil {
|
||||
return fmt.Errorf("chdir / %s", err)
|
||||
}
|
||||
// path to pivot dir now changed, update
|
||||
pivotDir = filepath.Join(pivotBaseDir, filepath.Base(pivotDir))
|
||||
|
||||
// Make pivotDir rprivate to make sure any of the unmounts don't
|
||||
// propagate to parent.
|
||||
if err := syscall.Mount("", pivotDir, "", syscall.MS_PRIVATE|syscall.MS_REC, ""); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := syscall.Unmount(pivotDir, syscall.MNT_DETACH); err != nil {
|
||||
return fmt.Errorf("unmount pivot_root dir %s", err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func msMoveRoot(rootfs string) error {
|
||||
if err := syscall.Mount(rootfs, "/", "", syscall.MS_MOVE, ""); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := syscall.Chroot("."); err != nil {
|
||||
return err
|
||||
}
|
||||
return syscall.Chdir("/")
|
||||
}
|
||||
|
||||
// createIfNotExists creates a file or a directory only if it does not already exist.
|
||||
func createIfNotExists(path string, isDir bool) error {
|
||||
if _, err := os.Stat(path); err != nil {
|
||||
if os.IsNotExist(err) {
|
||||
if isDir {
|
||||
return os.MkdirAll(path, 0755)
|
||||
}
|
||||
if err := os.MkdirAll(filepath.Dir(path), 0755); err != nil {
|
||||
return err
|
||||
}
|
||||
f, err := os.OpenFile(path, os.O_CREATE, 0755)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
f.Close()
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// remountReadonly will bind over the top of an existing path and ensure that it is read-only.
|
||||
func remountReadonly(path string) error {
|
||||
for i := 0; i < 5; i++ {
|
||||
if err := syscall.Mount("", path, "", syscall.MS_REMOUNT|syscall.MS_RDONLY, ""); err != nil && !os.IsNotExist(err) {
|
||||
switch err {
|
||||
case syscall.EINVAL:
|
||||
// Probably not a mountpoint, use bind-mount
|
||||
if err := syscall.Mount(path, path, "", syscall.MS_BIND, ""); err != nil {
|
||||
return err
|
||||
}
|
||||
return syscall.Mount(path, path, "", syscall.MS_BIND|syscall.MS_REMOUNT|syscall.MS_RDONLY|syscall.MS_REC|defaultMountFlags, "")
|
||||
case syscall.EBUSY:
|
||||
time.Sleep(100 * time.Millisecond)
|
||||
continue
|
||||
default:
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
return fmt.Errorf("unable to mount %s as readonly max retries reached", path)
|
||||
}
|
||||
|
||||
// maskFile bind mounts /dev/null over the top of the specified path inside a container
|
||||
// to avoid security issues from processes reading information from non-namespace aware mounts ( proc/kcore ).
|
||||
func maskFile(path string) error {
|
||||
if err := syscall.Mount("/dev/null", path, "", syscall.MS_BIND, ""); err != nil && !os.IsNotExist(err) {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// writeSystemProperty writes the value to a path under /proc/sys as determined from the key.
|
||||
// For e.g. net.ipv4.ip_forward translated to /proc/sys/net/ipv4/ip_forward.
|
||||
func writeSystemProperty(key, value string) error {
|
||||
keyPath := strings.Replace(key, ".", "/", -1)
|
||||
return ioutil.WriteFile(path.Join("/proc/sys", keyPath), []byte(value), 0644)
|
||||
}
|
||||
|
||||
func remount(m *configs.Mount, rootfs string) error {
|
||||
var (
|
||||
dest = m.Destination
|
||||
)
|
||||
if !strings.HasPrefix(dest, rootfs) {
|
||||
dest = filepath.Join(rootfs, dest)
|
||||
}
|
||||
if err := syscall.Mount(m.Source, dest, m.Device, uintptr(m.Flags|syscall.MS_REMOUNT), ""); err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Do the mount operation followed by additional mounts required to take care
|
||||
// of propagation flags.
|
||||
func mountPropagate(m *configs.Mount, rootfs string, mountLabel string) error {
|
||||
var (
|
||||
dest = m.Destination
|
||||
data = label.FormatMountLabel(m.Data, mountLabel)
|
||||
flags = m.Flags
|
||||
)
|
||||
if dest == "/dev" {
|
||||
flags &= ^syscall.MS_RDONLY
|
||||
}
|
||||
if !strings.HasPrefix(dest, rootfs) {
|
||||
dest = filepath.Join(rootfs, dest)
|
||||
}
|
||||
|
||||
if err := syscall.Mount(m.Source, dest, m.Device, uintptr(flags), data); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
for _, pflag := range m.PropagationFlags {
|
||||
if err := syscall.Mount("", dest, "", uintptr(pflag), ""); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
|
@ -1,71 +0,0 @@
|
|||
package seccomp
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
)
|
||||
|
||||
var operators = map[string]configs.Operator{
|
||||
"SCMP_CMP_NE": configs.NotEqualTo,
|
||||
"SCMP_CMP_LT": configs.LessThan,
|
||||
"SCMP_CMP_LE": configs.LessThanOrEqualTo,
|
||||
"SCMP_CMP_EQ": configs.EqualTo,
|
||||
"SCMP_CMP_GE": configs.GreaterThanOrEqualTo,
|
||||
"SCMP_CMP_GT": configs.GreaterThan,
|
||||
"SCMP_CMP_MASKED_EQ": configs.MaskEqualTo,
|
||||
}
|
||||
|
||||
var actions = map[string]configs.Action{
|
||||
"SCMP_ACT_KILL": configs.Kill,
|
||||
"SCMP_ACT_ERRNO": configs.Errno,
|
||||
"SCMP_ACT_TRAP": configs.Trap,
|
||||
"SCMP_ACT_ALLOW": configs.Allow,
|
||||
"SCMP_ACT_TRACE": configs.Trace,
|
||||
}
|
||||
|
||||
var archs = map[string]string{
|
||||
"SCMP_ARCH_X86": "x86",
|
||||
"SCMP_ARCH_X86_64": "amd64",
|
||||
"SCMP_ARCH_X32": "x32",
|
||||
"SCMP_ARCH_ARM": "arm",
|
||||
"SCMP_ARCH_AARCH64": "arm64",
|
||||
"SCMP_ARCH_MIPS": "mips",
|
||||
"SCMP_ARCH_MIPS64": "mips64",
|
||||
"SCMP_ARCH_MIPS64N32": "mips64n32",
|
||||
"SCMP_ARCH_MIPSEL": "mipsel",
|
||||
"SCMP_ARCH_MIPSEL64": "mipsel64",
|
||||
"SCMP_ARCH_MIPSEL64N32": "mipsel64n32",
|
||||
}
|
||||
|
||||
// ConvertStringToOperator converts a string into a Seccomp comparison operator.
|
||||
// Comparison operators use the names they are assigned by Libseccomp's header.
|
||||
// Attempting to convert a string that is not a valid operator results in an
|
||||
// error.
|
||||
func ConvertStringToOperator(in string) (configs.Operator, error) {
|
||||
if op, ok := operators[in]; ok == true {
|
||||
return op, nil
|
||||
}
|
||||
return 0, fmt.Errorf("string %s is not a valid operator for seccomp", in)
|
||||
}
|
||||
|
||||
// ConvertStringToAction converts a string into a Seccomp rule match action.
|
||||
// Actions use the names they are assigned in Libseccomp's header, though some
|
||||
// (notable, SCMP_ACT_TRACE) are not available in this implementation and will
|
||||
// return errors.
|
||||
// Attempting to convert a string that is not a valid action results in an
|
||||
// error.
|
||||
func ConvertStringToAction(in string) (configs.Action, error) {
|
||||
if act, ok := actions[in]; ok == true {
|
||||
return act, nil
|
||||
}
|
||||
return 0, fmt.Errorf("string %s is not a valid action for seccomp", in)
|
||||
}
|
||||
|
||||
// ConvertStringToArch converts a string into a Seccomp comparison arch.
|
||||
func ConvertStringToArch(in string) (string, error) {
|
||||
if arch, ok := archs[in]; ok == true {
|
||||
return arch, nil
|
||||
}
|
||||
return "", fmt.Errorf("string %s is not a valid arch for seccomp", in)
|
||||
}
|
|
@ -1,231 +0,0 @@
|
|||
// +build linux,cgo,seccomp
|
||||
|
||||
package seccomp
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"fmt"
|
||||
"log"
|
||||
"os"
|
||||
"strings"
|
||||
"syscall"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
libseccomp "github.com/seccomp/libseccomp-golang"
|
||||
)
|
||||
|
||||
var (
|
||||
actAllow = libseccomp.ActAllow
|
||||
actTrap = libseccomp.ActTrap
|
||||
actKill = libseccomp.ActKill
|
||||
actTrace = libseccomp.ActTrace.SetReturnCode(int16(syscall.EPERM))
|
||||
actErrno = libseccomp.ActErrno.SetReturnCode(int16(syscall.EPERM))
|
||||
|
||||
// SeccompModeFilter refers to the syscall argument SECCOMP_MODE_FILTER.
|
||||
SeccompModeFilter = uintptr(2)
|
||||
)
|
||||
|
||||
// Filters given syscalls in a container, preventing them from being used
|
||||
// Started in the container init process, and carried over to all child processes
|
||||
// Setns calls, however, require a separate invocation, as they are not children
|
||||
// of the init until they join the namespace
|
||||
func InitSeccomp(config *configs.Seccomp) error {
|
||||
if config == nil {
|
||||
return fmt.Errorf("cannot initialize Seccomp - nil config passed")
|
||||
}
|
||||
|
||||
defaultAction, err := getAction(config.DefaultAction)
|
||||
if err != nil {
|
||||
return fmt.Errorf("error initializing seccomp - invalid default action")
|
||||
}
|
||||
|
||||
filter, err := libseccomp.NewFilter(defaultAction)
|
||||
if err != nil {
|
||||
return fmt.Errorf("error creating filter: %s", err)
|
||||
}
|
||||
|
||||
// Add extra architectures
|
||||
for _, arch := range config.Architectures {
|
||||
scmpArch, err := libseccomp.GetArchFromString(arch)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := filter.AddArch(scmpArch); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
// Unset no new privs bit
|
||||
if err := filter.SetNoNewPrivsBit(false); err != nil {
|
||||
return fmt.Errorf("error setting no new privileges: %s", err)
|
||||
}
|
||||
|
||||
// Add a rule for each syscall
|
||||
for _, call := range config.Syscalls {
|
||||
if call == nil {
|
||||
return fmt.Errorf("encountered nil syscall while initializing Seccomp")
|
||||
}
|
||||
|
||||
if err = matchCall(filter, call); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
if err = filter.Load(); err != nil {
|
||||
return fmt.Errorf("error loading seccomp filter into kernel: %s", err)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// IsEnabled returns if the kernel has been configured to support seccomp.
|
||||
func IsEnabled() bool {
|
||||
// Try to read from /proc/self/status for kernels > 3.8
|
||||
s, err := parseStatusFile("/proc/self/status")
|
||||
if err != nil {
|
||||
// Check if Seccomp is supported, via CONFIG_SECCOMP.
|
||||
if _, _, err := syscall.RawSyscall(syscall.SYS_PRCTL, syscall.PR_GET_SECCOMP, 0, 0); err != syscall.EINVAL {
|
||||
// Make sure the kernel has CONFIG_SECCOMP_FILTER.
|
||||
if _, _, err := syscall.RawSyscall(syscall.SYS_PRCTL, syscall.PR_SET_SECCOMP, SeccompModeFilter, 0); err != syscall.EINVAL {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
_, ok := s["Seccomp"]
|
||||
return ok
|
||||
}
|
||||
|
||||
// Convert Libcontainer Action to Libseccomp ScmpAction
|
||||
func getAction(act configs.Action) (libseccomp.ScmpAction, error) {
|
||||
switch act {
|
||||
case configs.Kill:
|
||||
return actKill, nil
|
||||
case configs.Errno:
|
||||
return actErrno, nil
|
||||
case configs.Trap:
|
||||
return actTrap, nil
|
||||
case configs.Allow:
|
||||
return actAllow, nil
|
||||
case configs.Trace:
|
||||
return actTrace, nil
|
||||
default:
|
||||
return libseccomp.ActInvalid, fmt.Errorf("invalid action, cannot use in rule")
|
||||
}
|
||||
}
|
||||
|
||||
// Convert Libcontainer Operator to Libseccomp ScmpCompareOp
|
||||
func getOperator(op configs.Operator) (libseccomp.ScmpCompareOp, error) {
|
||||
switch op {
|
||||
case configs.EqualTo:
|
||||
return libseccomp.CompareEqual, nil
|
||||
case configs.NotEqualTo:
|
||||
return libseccomp.CompareNotEqual, nil
|
||||
case configs.GreaterThan:
|
||||
return libseccomp.CompareGreater, nil
|
||||
case configs.GreaterThanOrEqualTo:
|
||||
return libseccomp.CompareGreaterEqual, nil
|
||||
case configs.LessThan:
|
||||
return libseccomp.CompareLess, nil
|
||||
case configs.LessThanOrEqualTo:
|
||||
return libseccomp.CompareLessOrEqual, nil
|
||||
case configs.MaskEqualTo:
|
||||
return libseccomp.CompareMaskedEqual, nil
|
||||
default:
|
||||
return libseccomp.CompareInvalid, fmt.Errorf("invalid operator, cannot use in rule")
|
||||
}
|
||||
}
|
||||
|
||||
// Convert Libcontainer Arg to Libseccomp ScmpCondition
|
||||
func getCondition(arg *configs.Arg) (libseccomp.ScmpCondition, error) {
|
||||
cond := libseccomp.ScmpCondition{}
|
||||
|
||||
if arg == nil {
|
||||
return cond, fmt.Errorf("cannot convert nil to syscall condition")
|
||||
}
|
||||
|
||||
op, err := getOperator(arg.Op)
|
||||
if err != nil {
|
||||
return cond, err
|
||||
}
|
||||
|
||||
return libseccomp.MakeCondition(arg.Index, op, arg.Value, arg.ValueTwo)
|
||||
}
|
||||
|
||||
// Add a rule to match a single syscall
|
||||
func matchCall(filter *libseccomp.ScmpFilter, call *configs.Syscall) error {
|
||||
if call == nil || filter == nil {
|
||||
return fmt.Errorf("cannot use nil as syscall to block")
|
||||
}
|
||||
|
||||
if len(call.Name) == 0 {
|
||||
return fmt.Errorf("empty string is not a valid syscall")
|
||||
}
|
||||
|
||||
// If we can't resolve the syscall, assume it's not supported on this kernel
|
||||
// Ignore it, don't error out
|
||||
callNum, err := libseccomp.GetSyscallFromName(call.Name)
|
||||
if err != nil {
|
||||
log.Printf("Error resolving syscall name %s: %s - ignoring syscall.", call.Name, err)
|
||||
return nil
|
||||
}
|
||||
|
||||
// Convert the call's action to the libseccomp equivalent
|
||||
callAct, err := getAction(call.Action)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Unconditional match - just add the rule
|
||||
if len(call.Args) == 0 {
|
||||
if err = filter.AddRule(callNum, callAct); err != nil {
|
||||
return err
|
||||
}
|
||||
} else {
|
||||
// Conditional match - convert the per-arg rules into library format
|
||||
conditions := []libseccomp.ScmpCondition{}
|
||||
|
||||
for _, cond := range call.Args {
|
||||
newCond, err := getCondition(cond)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
conditions = append(conditions, newCond)
|
||||
}
|
||||
|
||||
if err = filter.AddRuleConditional(callNum, callAct, conditions); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func parseStatusFile(path string) (map[string]string, error) {
|
||||
f, err := os.Open(path)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
s := bufio.NewScanner(f)
|
||||
status := make(map[string]string)
|
||||
|
||||
for s.Scan() {
|
||||
if err := s.Err(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
text := s.Text()
|
||||
parts := strings.Split(text, ":")
|
||||
|
||||
if len(parts) <= 1 {
|
||||
continue
|
||||
}
|
||||
|
||||
status[parts[0]] = parts[1]
|
||||
}
|
||||
return status, nil
|
||||
}
|
|
@ -1,24 +0,0 @@
|
|||
// +build !linux !cgo !seccomp
|
||||
|
||||
package seccomp
|
||||
|
||||
import (
|
||||
"errors"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
)
|
||||
|
||||
var ErrSeccompNotEnabled = errors.New("seccomp: config provided but seccomp not supported")
|
||||
|
||||
// Seccomp not supported, do nothing
|
||||
func InitSeccomp(config *configs.Seccomp) error {
|
||||
if config != nil {
|
||||
return ErrSeccompNotEnabled
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// IsEnabled returns false, because it is not supported.
|
||||
func IsEnabled() bool {
|
||||
return false
|
||||
}
|
|
@ -1,11 +0,0 @@
|
|||
// +build linux,go1.5
|
||||
|
||||
package libcontainer
|
||||
|
||||
import "syscall"
|
||||
|
||||
// Set the GidMappingsEnableSetgroups member to true, so the process's
|
||||
// setgroups proc entry wont be set to 'deny' if GidMappings are set
|
||||
func enableSetgroups(sys *syscall.SysProcAttr) {
|
||||
sys.GidMappingsEnableSetgroups = true
|
||||
}
|
|
@ -1,56 +0,0 @@
|
|||
// +build linux
|
||||
|
||||
package libcontainer
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/apparmor"
|
||||
"github.com/opencontainers/runc/libcontainer/keys"
|
||||
"github.com/opencontainers/runc/libcontainer/label"
|
||||
"github.com/opencontainers/runc/libcontainer/seccomp"
|
||||
"github.com/opencontainers/runc/libcontainer/system"
|
||||
)
|
||||
|
||||
// linuxSetnsInit performs the container's initialization for running a new process
|
||||
// inside an existing container.
|
||||
type linuxSetnsInit struct {
|
||||
config *initConfig
|
||||
}
|
||||
|
||||
func (l *linuxSetnsInit) getSessionRingName() string {
|
||||
return fmt.Sprintf("_ses.%s", l.config.ContainerId)
|
||||
}
|
||||
|
||||
func (l *linuxSetnsInit) Init() error {
|
||||
// do not inherit the parent's session keyring
|
||||
if _, err := keyctl.JoinSessionKeyring(l.getSessionRingName()); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := setupRlimits(l.config.Config); err != nil {
|
||||
return err
|
||||
}
|
||||
if l.config.NoNewPrivileges {
|
||||
if err := system.Prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
if l.config.Config.Seccomp != nil {
|
||||
if err := seccomp.InitSeccomp(l.config.Config.Seccomp); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
if err := finalizeNamespace(l.config); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := apparmor.ApplyProfile(l.config.AppArmorProfile); err != nil {
|
||||
return err
|
||||
}
|
||||
if l.config.ProcessLabel != "" {
|
||||
if err := label.SetProcessLabel(l.config.ProcessLabel); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return system.Execv(l.config.Args[0], l.config.Args[0:], os.Environ())
|
||||
}
|
|
@ -1,27 +0,0 @@
|
|||
package stacktrace
|
||||
|
||||
import "runtime"
|
||||
|
||||
// Caputure captures a stacktrace for the current calling go program
|
||||
//
|
||||
// skip is the number of frames to skip
|
||||
func Capture(userSkip int) Stacktrace {
|
||||
var (
|
||||
skip = userSkip + 1 // add one for our own function
|
||||
frames []Frame
|
||||
prevPc uintptr = 0
|
||||
)
|
||||
for i := skip; ; i++ {
|
||||
pc, file, line, ok := runtime.Caller(i)
|
||||
//detect if caller is repeated to avoid loop, gccgo
|
||||
//currently runs into a loop without this check
|
||||
if !ok || pc == prevPc {
|
||||
break
|
||||
}
|
||||
frames = append(frames, NewFrame(pc, file, line))
|
||||
prevPc = pc
|
||||
}
|
||||
return Stacktrace{
|
||||
Frames: frames,
|
||||
}
|
||||
}
|
|
@ -1,38 +0,0 @@
|
|||
package stacktrace
|
||||
|
||||
import (
|
||||
"path/filepath"
|
||||
"runtime"
|
||||
"strings"
|
||||
)
|
||||
|
||||
// NewFrame returns a new stack frame for the provided information
|
||||
func NewFrame(pc uintptr, file string, line int) Frame {
|
||||
fn := runtime.FuncForPC(pc)
|
||||
if fn == nil {
|
||||
return Frame{}
|
||||
}
|
||||
pack, name := parseFunctionName(fn.Name())
|
||||
return Frame{
|
||||
Line: line,
|
||||
File: filepath.Base(file),
|
||||
Package: pack,
|
||||
Function: name,
|
||||
}
|
||||
}
|
||||
|
||||
func parseFunctionName(name string) (string, string) {
|
||||
i := strings.LastIndex(name, ".")
|
||||
if i == -1 {
|
||||
return "", name
|
||||
}
|
||||
return name[:i], name[i+1:]
|
||||
}
|
||||
|
||||
// Frame contains all the information for a stack frame within a go program
|
||||
type Frame struct {
|
||||
File string
|
||||
Function string
|
||||
Package string
|
||||
Line int
|
||||
}
|
|
@ -1,5 +0,0 @@
|
|||
package stacktrace
|
||||
|
||||
type Stacktrace struct {
|
||||
Frames []Frame
|
||||
}
|
|
@ -1,150 +0,0 @@
|
|||
// +build linux
|
||||
|
||||
package libcontainer
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
"syscall"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/apparmor"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
"github.com/opencontainers/runc/libcontainer/keys"
|
||||
"github.com/opencontainers/runc/libcontainer/label"
|
||||
"github.com/opencontainers/runc/libcontainer/seccomp"
|
||||
"github.com/opencontainers/runc/libcontainer/system"
|
||||
)
|
||||
|
||||
type linuxStandardInit struct {
|
||||
pipe io.ReadWriter
|
||||
parentPid int
|
||||
config *initConfig
|
||||
}
|
||||
|
||||
func (l *linuxStandardInit) getSessionRingParams() (string, uint32, uint32) {
|
||||
var newperms uint32
|
||||
|
||||
if l.config.Config.Namespaces.Contains(configs.NEWUSER) {
|
||||
// with user ns we need 'other' search permissions
|
||||
newperms = 0x8
|
||||
} else {
|
||||
// without user ns we need 'UID' search permissions
|
||||
newperms = 0x80000
|
||||
}
|
||||
|
||||
// create a unique per session container name that we can
|
||||
// join in setns; however, other containers can also join it
|
||||
return fmt.Sprintf("_ses.%s", l.config.ContainerId), 0xffffffff, newperms
|
||||
}
|
||||
|
||||
// PR_SET_NO_NEW_PRIVS isn't exposed in Golang so we define it ourselves copying the value
|
||||
// the kernel
|
||||
const PR_SET_NO_NEW_PRIVS = 0x26
|
||||
|
||||
func (l *linuxStandardInit) Init() error {
|
||||
ringname, keepperms, newperms := l.getSessionRingParams()
|
||||
|
||||
// do not inherit the parent's session keyring
|
||||
sessKeyId, err := keyctl.JoinSessionKeyring(ringname)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
// make session keyring searcheable
|
||||
if err := keyctl.ModKeyringPerm(sessKeyId, keepperms, newperms); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
var console *linuxConsole
|
||||
if l.config.Console != "" {
|
||||
console = newConsoleFromPath(l.config.Console)
|
||||
if err := console.dupStdio(); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
if console != nil {
|
||||
if err := system.Setctty(); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
if err := setupNetwork(l.config); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := setupRoute(l.config.Config); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := setupRlimits(l.config.Config); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
label.Init()
|
||||
// InitializeMountNamespace() can be executed only for a new mount namespace
|
||||
if l.config.Config.Namespaces.Contains(configs.NEWNS) {
|
||||
if err := setupRootfs(l.config.Config, console, l.pipe); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
if hostname := l.config.Config.Hostname; hostname != "" {
|
||||
if err := syscall.Sethostname([]byte(hostname)); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
if err := apparmor.ApplyProfile(l.config.AppArmorProfile); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := label.SetProcessLabel(l.config.ProcessLabel); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
for key, value := range l.config.Config.Sysctl {
|
||||
if err := writeSystemProperty(key, value); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
for _, path := range l.config.Config.ReadonlyPaths {
|
||||
if err := remountReadonly(path); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
for _, path := range l.config.Config.MaskPaths {
|
||||
if err := maskFile(path); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
pdeath, err := system.GetParentDeathSignal()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if l.config.NoNewPrivileges {
|
||||
if err := system.Prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
// Tell our parent that we're ready to Execv. This must be done before the
|
||||
// Seccomp rules have been applied, because we need to be able to read and
|
||||
// write to a socket.
|
||||
if err := syncParentReady(l.pipe); err != nil {
|
||||
return err
|
||||
}
|
||||
if l.config.Config.Seccomp != nil {
|
||||
if err := seccomp.InitSeccomp(l.config.Config.Seccomp); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
if err := finalizeNamespace(l.config); err != nil {
|
||||
return err
|
||||
}
|
||||
// finalizeNamespace can change user/group which clears the parent death
|
||||
// signal, so we restore it here.
|
||||
if err := pdeath.Restore(); err != nil {
|
||||
return err
|
||||
}
|
||||
// compare the parent from the inital start of the init process and make sure that it did not change.
|
||||
// if the parent changes that means it died and we were reparened to something else so we should
|
||||
// just kill ourself and not cause problems for someone else.
|
||||
if syscall.Getppid() != l.parentPid {
|
||||
return syscall.Kill(syscall.Getpid(), syscall.SIGKILL)
|
||||
}
|
||||
|
||||
return system.Execv(l.config.Args[0], l.config.Args[0:], os.Environ())
|
||||
}
|
|
@ -1,226 +0,0 @@
|
|||
// +build linux
|
||||
|
||||
package libcontainer
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
|
||||
"github.com/Sirupsen/logrus"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
)
|
||||
|
||||
func newStateTransitionError(from, to containerState) error {
|
||||
return &stateTransitionError{
|
||||
From: from.status().String(),
|
||||
To: to.status().String(),
|
||||
}
|
||||
}
|
||||
|
||||
// stateTransitionError is returned when an invalid state transition happens from one
|
||||
// state to another.
|
||||
type stateTransitionError struct {
|
||||
From string
|
||||
To string
|
||||
}
|
||||
|
||||
func (s *stateTransitionError) Error() string {
|
||||
return fmt.Sprintf("invalid state transition from %s to %s", s.From, s.To)
|
||||
}
|
||||
|
||||
type containerState interface {
|
||||
transition(containerState) error
|
||||
destroy() error
|
||||
status() Status
|
||||
}
|
||||
|
||||
func destroy(c *linuxContainer) error {
|
||||
if !c.config.Namespaces.Contains(configs.NEWPID) {
|
||||
if err := killCgroupProcesses(c.cgroupManager); err != nil {
|
||||
logrus.Warn(err)
|
||||
}
|
||||
}
|
||||
err := c.cgroupManager.Destroy()
|
||||
if rerr := os.RemoveAll(c.root); err == nil {
|
||||
err = rerr
|
||||
}
|
||||
c.initProcess = nil
|
||||
if herr := runPoststopHooks(c); err == nil {
|
||||
err = herr
|
||||
}
|
||||
c.state = &stoppedState{c: c}
|
||||
return err
|
||||
}
|
||||
|
||||
func runPoststopHooks(c *linuxContainer) error {
|
||||
if c.config.Hooks != nil {
|
||||
s := configs.HookState{
|
||||
Version: c.config.Version,
|
||||
ID: c.id,
|
||||
Root: c.config.Rootfs,
|
||||
}
|
||||
for _, hook := range c.config.Hooks.Poststop {
|
||||
if err := hook.Run(s); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// stoppedState represents a container is a stopped/destroyed state.
|
||||
type stoppedState struct {
|
||||
c *linuxContainer
|
||||
}
|
||||
|
||||
func (b *stoppedState) status() Status {
|
||||
return Destroyed
|
||||
}
|
||||
|
||||
func (b *stoppedState) transition(s containerState) error {
|
||||
switch s.(type) {
|
||||
case *runningState:
|
||||
b.c.state = s
|
||||
return nil
|
||||
case *restoredState:
|
||||
b.c.state = s
|
||||
return nil
|
||||
case *stoppedState:
|
||||
return nil
|
||||
}
|
||||
return newStateTransitionError(b, s)
|
||||
}
|
||||
|
||||
func (b *stoppedState) destroy() error {
|
||||
return destroy(b.c)
|
||||
}
|
||||
|
||||
// runningState represents a container that is currently running.
|
||||
type runningState struct {
|
||||
c *linuxContainer
|
||||
}
|
||||
|
||||
func (r *runningState) status() Status {
|
||||
return Running
|
||||
}
|
||||
|
||||
func (r *runningState) transition(s containerState) error {
|
||||
switch s.(type) {
|
||||
case *stoppedState:
|
||||
running, err := r.c.isRunning()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if running {
|
||||
return newGenericError(fmt.Errorf("container still running"), ContainerNotStopped)
|
||||
}
|
||||
r.c.state = s
|
||||
return nil
|
||||
case *pausedState:
|
||||
r.c.state = s
|
||||
return nil
|
||||
case *runningState:
|
||||
return nil
|
||||
}
|
||||
return newStateTransitionError(r, s)
|
||||
}
|
||||
|
||||
func (r *runningState) destroy() error {
|
||||
running, err := r.c.isRunning()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if running {
|
||||
return newGenericError(fmt.Errorf("container is not destroyed"), ContainerNotStopped)
|
||||
}
|
||||
return destroy(r.c)
|
||||
}
|
||||
|
||||
// pausedState represents a container that is currently pause. It cannot be destroyed in a
|
||||
// paused state and must transition back to running first.
|
||||
type pausedState struct {
|
||||
c *linuxContainer
|
||||
}
|
||||
|
||||
func (p *pausedState) status() Status {
|
||||
return Paused
|
||||
}
|
||||
|
||||
func (p *pausedState) transition(s containerState) error {
|
||||
switch s.(type) {
|
||||
case *runningState, *stoppedState:
|
||||
p.c.state = s
|
||||
return nil
|
||||
case *pausedState:
|
||||
return nil
|
||||
}
|
||||
return newStateTransitionError(p, s)
|
||||
}
|
||||
|
||||
func (p *pausedState) destroy() error {
|
||||
isRunning, err := p.c.isRunning()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if !isRunning {
|
||||
if err := p.c.cgroupManager.Freeze(configs.Thawed); err != nil {
|
||||
return err
|
||||
}
|
||||
return destroy(p.c)
|
||||
}
|
||||
return newGenericError(fmt.Errorf("container is paused"), ContainerPaused)
|
||||
}
|
||||
|
||||
// restoredState is the same as the running state but also has accociated checkpoint
|
||||
// information that maybe need destroyed when the container is stopped and destory is called.
|
||||
type restoredState struct {
|
||||
imageDir string
|
||||
c *linuxContainer
|
||||
}
|
||||
|
||||
func (r *restoredState) status() Status {
|
||||
return Running
|
||||
}
|
||||
|
||||
func (r *restoredState) transition(s containerState) error {
|
||||
switch s.(type) {
|
||||
case *stoppedState:
|
||||
return nil
|
||||
case *runningState:
|
||||
return nil
|
||||
}
|
||||
return newStateTransitionError(r, s)
|
||||
}
|
||||
|
||||
func (r *restoredState) destroy() error {
|
||||
if _, err := os.Stat(filepath.Join(r.c.root, "checkpoint")); err != nil {
|
||||
if !os.IsNotExist(err) {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return destroy(r.c)
|
||||
}
|
||||
|
||||
// createdState is used whenever a container is restored, loaded, or setting additional
|
||||
// processes inside and it should not be destroyed when it is exiting.
|
||||
type createdState struct {
|
||||
c *linuxContainer
|
||||
s Status
|
||||
}
|
||||
|
||||
func (n *createdState) status() Status {
|
||||
return n.s
|
||||
}
|
||||
|
||||
func (n *createdState) transition(s containerState) error {
|
||||
n.c.state = s
|
||||
return nil
|
||||
}
|
||||
|
||||
func (n *createdState) destroy() error {
|
||||
if err := n.c.refreshState(); err != nil {
|
||||
return err
|
||||
}
|
||||
return n.c.state.destroy()
|
||||
}
|
|
@ -1,15 +0,0 @@
|
|||
package libcontainer
|
||||
|
||||
type NetworkInterface struct {
|
||||
// Name is the name of the network interface.
|
||||
Name string
|
||||
|
||||
RxBytes uint64
|
||||
RxPackets uint64
|
||||
RxErrors uint64
|
||||
RxDropped uint64
|
||||
TxBytes uint64
|
||||
TxPackets uint64
|
||||
TxErrors uint64
|
||||
TxDropped uint64
|
||||
}
|
|
@ -1,5 +0,0 @@
|
|||
package libcontainer
|
||||
|
||||
type Stats struct {
|
||||
Interfaces []*NetworkInterface
|
||||
}
|
|
@ -1,8 +0,0 @@
|
|||
package libcontainer
|
||||
|
||||
import "github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
|
||||
type Stats struct {
|
||||
Interfaces []*NetworkInterface
|
||||
CgroupStats *cgroups.Stats
|
||||
}
|
|
@ -1,5 +0,0 @@
|
|||
package libcontainer
|
||||
|
||||
type Stats struct {
|
||||
Interfaces []*NetworkInterface
|
||||
}
|
|
@ -1,86 +0,0 @@
|
|||
package utils
|
||||
|
||||
import (
|
||||
"crypto/rand"
|
||||
"encoding/hex"
|
||||
"encoding/json"
|
||||
"io"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"syscall"
|
||||
)
|
||||
|
||||
const (
|
||||
exitSignalOffset = 128
|
||||
)
|
||||
|
||||
// GenerateRandomName returns a new name joined with a prefix. This size
|
||||
// specified is used to truncate the randomly generated value
|
||||
func GenerateRandomName(prefix string, size int) (string, error) {
|
||||
id := make([]byte, 32)
|
||||
if _, err := io.ReadFull(rand.Reader, id); err != nil {
|
||||
return "", err
|
||||
}
|
||||
if size > 64 {
|
||||
size = 64
|
||||
}
|
||||
return prefix + hex.EncodeToString(id)[:size], nil
|
||||
}
|
||||
|
||||
// ResolveRootfs ensures that the current working directory is
|
||||
// not a symlink and returns the absolute path to the rootfs
|
||||
func ResolveRootfs(uncleanRootfs string) (string, error) {
|
||||
rootfs, err := filepath.Abs(uncleanRootfs)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
return filepath.EvalSymlinks(rootfs)
|
||||
}
|
||||
|
||||
// ExitStatus returns the correct exit status for a process based on if it
|
||||
// was signaled or exited cleanly
|
||||
func ExitStatus(status syscall.WaitStatus) int {
|
||||
if status.Signaled() {
|
||||
return exitSignalOffset + int(status.Signal())
|
||||
}
|
||||
return status.ExitStatus()
|
||||
}
|
||||
|
||||
// WriteJSON writes the provided struct v to w using standard json marshaling
|
||||
func WriteJSON(w io.Writer, v interface{}) error {
|
||||
data, err := json.Marshal(v)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
_, err = w.Write(data)
|
||||
return err
|
||||
}
|
||||
|
||||
// CleanPath makes a path safe for use with filepath.Join. This is done by not
|
||||
// only cleaning the path, but also (if the path is relative) adding a leading
|
||||
// '/' and cleaning it (then removing the leading '/'). This ensures that a
|
||||
// path resulting from prepending another path will always resolve to lexically
|
||||
// be a subdirectory of the prefixed path. This is all done lexically, so paths
|
||||
// that include symlinks won't be safe as a result of using CleanPath.
|
||||
func CleanPath(path string) string {
|
||||
// Deal with empty strings nicely.
|
||||
if path == "" {
|
||||
return ""
|
||||
}
|
||||
|
||||
// Ensure that all paths are cleaned (especially problematic ones like
|
||||
// "/../../../../../" which can cause lots of issues).
|
||||
path = filepath.Clean(path)
|
||||
|
||||
// If the path isn't absolute, we need to do more processing to fix paths
|
||||
// such as "../../../../<etc>/some/path". We also shouldn't convert absolute
|
||||
// paths to relative ones.
|
||||
if !filepath.IsAbs(path) {
|
||||
path = filepath.Clean(string(os.PathSeparator) + path)
|
||||
// This can't fail, as (by definition) all paths are relative to root.
|
||||
path, _ = filepath.Rel(string(os.PathSeparator), path)
|
||||
}
|
||||
|
||||
// Clean the path again for good measure.
|
||||
return filepath.Clean(path)
|
||||
}
|
|
@ -1,33 +0,0 @@
|
|||
// +build !windows
|
||||
|
||||
package utils
|
||||
|
||||
import (
|
||||
"io/ioutil"
|
||||
"strconv"
|
||||
"syscall"
|
||||
)
|
||||
|
||||
func CloseExecFrom(minFd int) error {
|
||||
fdList, err := ioutil.ReadDir("/proc/self/fd")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
for _, fi := range fdList {
|
||||
fd, err := strconv.Atoi(fi.Name())
|
||||
if err != nil {
|
||||
// ignore non-numeric file names
|
||||
continue
|
||||
}
|
||||
|
||||
if fd < minFd {
|
||||
// ignore descriptors lower than our specified minimum
|
||||
continue
|
||||
}
|
||||
|
||||
// intentionally ignore errors from syscall.CloseOnExec
|
||||
syscall.CloseOnExec(fd)
|
||||
// the cases where this might fail are basically file descriptors that have already been closed (including and especially the one that was created when ioutil.ReadDir did the "opendir" syscall)
|
||||
}
|
||||
return nil
|
||||
}
|
Loading…
Reference in a new issue