driver.go 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536
  1. package bridge
  2. import (
  3. "fmt"
  4. "io/ioutil"
  5. "net"
  6. "os"
  7. "strconv"
  8. "sync"
  9. log "github.com/Sirupsen/logrus"
  10. "github.com/docker/docker/daemon/networkdriver"
  11. "github.com/docker/docker/daemon/networkdriver/ipallocator"
  12. "github.com/docker/docker/daemon/networkdriver/portmapper"
  13. "github.com/docker/docker/engine"
  14. "github.com/docker/docker/nat"
  15. "github.com/docker/docker/pkg/iptables"
  16. "github.com/docker/docker/pkg/networkfs/resolvconf"
  17. "github.com/docker/docker/pkg/parsers/kernel"
  18. "github.com/docker/libcontainer/netlink"
  19. )
  20. const (
  21. DefaultNetworkBridge = "docker0"
  22. MaxAllocatedPortAttempts = 10
  23. )
  24. // Network interface represents the networking stack of a container
  25. type networkInterface struct {
  26. IP net.IP
  27. PortMappings []net.Addr // there are mappings to the host interfaces
  28. }
  29. type ifaces struct {
  30. c map[string]*networkInterface
  31. sync.Mutex
  32. }
  33. func (i *ifaces) Set(key string, n *networkInterface) {
  34. i.Lock()
  35. i.c[key] = n
  36. i.Unlock()
  37. }
  38. func (i *ifaces) Get(key string) *networkInterface {
  39. i.Lock()
  40. res := i.c[key]
  41. i.Unlock()
  42. return res
  43. }
  44. var (
  45. addrs = []string{
  46. // Here we don't follow the convention of using the 1st IP of the range for the gateway.
  47. // This is to use the same gateway IPs as the /24 ranges, which predate the /16 ranges.
  48. // In theory this shouldn't matter - in practice there's bound to be a few scripts relying
  49. // on the internal addressing or other stupid things like that.
  50. // They shouldn't, but hey, let's not break them unless we really have to.
  51. "172.17.42.1/16", // Don't use 172.16.0.0/16, it conflicts with EC2 DNS 172.16.0.23
  52. "10.0.42.1/16", // Don't even try using the entire /8, that's too intrusive
  53. "10.1.42.1/16",
  54. "10.42.42.1/16",
  55. "172.16.42.1/24",
  56. "172.16.43.1/24",
  57. "172.16.44.1/24",
  58. "10.0.42.1/24",
  59. "10.0.43.1/24",
  60. "192.168.42.1/24",
  61. "192.168.43.1/24",
  62. "192.168.44.1/24",
  63. }
  64. bridgeIface string
  65. bridgeNetwork *net.IPNet
  66. defaultBindingIP = net.ParseIP("0.0.0.0")
  67. currentInterfaces = ifaces{c: make(map[string]*networkInterface)}
  68. )
  69. func InitDriver(job *engine.Job) engine.Status {
  70. var (
  71. network *net.IPNet
  72. enableIPTables = job.GetenvBool("EnableIptables")
  73. icc = job.GetenvBool("InterContainerCommunication")
  74. ipMasq = job.GetenvBool("EnableIpMasq")
  75. ipForward = job.GetenvBool("EnableIpForward")
  76. bridgeIP = job.Getenv("BridgeIP")
  77. fixedCIDR = job.Getenv("FixedCIDR")
  78. )
  79. if defaultIP := job.Getenv("DefaultBindingIP"); defaultIP != "" {
  80. defaultBindingIP = net.ParseIP(defaultIP)
  81. }
  82. bridgeIface = job.Getenv("BridgeIface")
  83. usingDefaultBridge := false
  84. if bridgeIface == "" {
  85. usingDefaultBridge = true
  86. bridgeIface = DefaultNetworkBridge
  87. }
  88. addr, err := networkdriver.GetIfaceAddr(bridgeIface)
  89. if err != nil {
  90. // If we're not using the default bridge, fail without trying to create it
  91. if !usingDefaultBridge {
  92. return job.Error(err)
  93. }
  94. // If the bridge interface is not found (or has no address), try to create it and/or add an address
  95. if err := configureBridge(bridgeIP); err != nil {
  96. return job.Error(err)
  97. }
  98. addr, err = networkdriver.GetIfaceAddr(bridgeIface)
  99. if err != nil {
  100. return job.Error(err)
  101. }
  102. network = addr.(*net.IPNet)
  103. } else {
  104. network = addr.(*net.IPNet)
  105. // validate that the bridge ip matches the ip specified by BridgeIP
  106. if bridgeIP != "" {
  107. bip, _, err := net.ParseCIDR(bridgeIP)
  108. if err != nil {
  109. return job.Error(err)
  110. }
  111. if !network.IP.Equal(bip) {
  112. return job.Errorf("bridge ip (%s) does not match existing bridge configuration %s", network.IP, bip)
  113. }
  114. }
  115. }
  116. // Configure iptables for link support
  117. if enableIPTables {
  118. if err := setupIPTables(addr, icc, ipMasq); err != nil {
  119. return job.Error(err)
  120. }
  121. }
  122. if ipForward {
  123. // Enable IPv4 forwarding
  124. if err := ioutil.WriteFile("/proc/sys/net/ipv4/ip_forward", []byte{'1', '\n'}, 0644); err != nil {
  125. job.Logf("WARNING: unable to enable IPv4 forwarding: %s\n", err)
  126. }
  127. }
  128. // We can always try removing the iptables
  129. if err := iptables.RemoveExistingChain("DOCKER"); err != nil {
  130. return job.Error(err)
  131. }
  132. if enableIPTables {
  133. chain, err := iptables.NewChain("DOCKER", bridgeIface)
  134. if err != nil {
  135. return job.Error(err)
  136. }
  137. portmapper.SetIptablesChain(chain)
  138. }
  139. bridgeNetwork = network
  140. if fixedCIDR != "" {
  141. _, subnet, err := net.ParseCIDR(fixedCIDR)
  142. if err != nil {
  143. return job.Error(err)
  144. }
  145. log.Debugf("Subnet: %v", subnet)
  146. if err := ipallocator.RegisterSubnet(bridgeNetwork, subnet); err != nil {
  147. return job.Error(err)
  148. }
  149. }
  150. // https://github.com/docker/docker/issues/2768
  151. job.Eng.Hack_SetGlobalVar("httpapi.bridgeIP", bridgeNetwork.IP)
  152. for name, f := range map[string]engine.Handler{
  153. "allocate_interface": Allocate,
  154. "release_interface": Release,
  155. "allocate_port": AllocatePort,
  156. "link": LinkContainers,
  157. } {
  158. if err := job.Eng.Register(name, f); err != nil {
  159. return job.Error(err)
  160. }
  161. }
  162. return engine.StatusOK
  163. }
  164. func setupIPTables(addr net.Addr, icc, ipmasq bool) error {
  165. // Enable NAT
  166. if ipmasq {
  167. natArgs := []string{"POSTROUTING", "-t", "nat", "-s", addr.String(), "!", "-o", bridgeIface, "-j", "MASQUERADE"}
  168. if !iptables.Exists(natArgs...) {
  169. if output, err := iptables.Raw(append([]string{"-I"}, natArgs...)...); err != nil {
  170. return fmt.Errorf("Unable to enable network bridge NAT: %s", err)
  171. } else if len(output) != 0 {
  172. return &iptables.ChainError{Chain: "POSTROUTING", Output: output}
  173. }
  174. }
  175. }
  176. var (
  177. args = []string{"FORWARD", "-i", bridgeIface, "-o", bridgeIface, "-j"}
  178. acceptArgs = append(args, "ACCEPT")
  179. dropArgs = append(args, "DROP")
  180. )
  181. if !icc {
  182. iptables.Raw(append([]string{"-D"}, acceptArgs...)...)
  183. if !iptables.Exists(dropArgs...) {
  184. log.Debugf("Disable inter-container communication")
  185. if output, err := iptables.Raw(append([]string{"-I"}, dropArgs...)...); err != nil {
  186. return fmt.Errorf("Unable to prevent intercontainer communication: %s", err)
  187. } else if len(output) != 0 {
  188. return fmt.Errorf("Error disabling intercontainer communication: %s", output)
  189. }
  190. }
  191. } else {
  192. iptables.Raw(append([]string{"-D"}, dropArgs...)...)
  193. if !iptables.Exists(acceptArgs...) {
  194. log.Debugf("Enable inter-container communication")
  195. if output, err := iptables.Raw(append([]string{"-I"}, acceptArgs...)...); err != nil {
  196. return fmt.Errorf("Unable to allow intercontainer communication: %s", err)
  197. } else if len(output) != 0 {
  198. return fmt.Errorf("Error enabling intercontainer communication: %s", output)
  199. }
  200. }
  201. }
  202. // Accept all non-intercontainer outgoing packets
  203. outgoingArgs := []string{"FORWARD", "-i", bridgeIface, "!", "-o", bridgeIface, "-j", "ACCEPT"}
  204. if !iptables.Exists(outgoingArgs...) {
  205. if output, err := iptables.Raw(append([]string{"-I"}, outgoingArgs...)...); err != nil {
  206. return fmt.Errorf("Unable to allow outgoing packets: %s", err)
  207. } else if len(output) != 0 {
  208. return &iptables.ChainError{Chain: "FORWARD outgoing", Output: output}
  209. }
  210. }
  211. // Accept incoming packets for existing connections
  212. existingArgs := []string{"FORWARD", "-o", bridgeIface, "-m", "conntrack", "--ctstate", "RELATED,ESTABLISHED", "-j", "ACCEPT"}
  213. if !iptables.Exists(existingArgs...) {
  214. if output, err := iptables.Raw(append([]string{"-I"}, existingArgs...)...); err != nil {
  215. return fmt.Errorf("Unable to allow incoming packets: %s", err)
  216. } else if len(output) != 0 {
  217. return &iptables.ChainError{Chain: "FORWARD incoming", Output: output}
  218. }
  219. }
  220. return nil
  221. }
  222. // configureBridge attempts to create and configure a network bridge interface named `bridgeIface` on the host
  223. // If bridgeIP is empty, it will try to find a non-conflicting IP from the Docker-specified private ranges
  224. // If the bridge `bridgeIface` already exists, it will only perform the IP address association with the existing
  225. // bridge (fixes issue #8444)
  226. // If an address which doesn't conflict with existing interfaces can't be found, an error is returned.
  227. func configureBridge(bridgeIP string) error {
  228. nameservers := []string{}
  229. resolvConf, _ := resolvconf.Get()
  230. // we don't check for an error here, because we don't really care
  231. // if we can't read /etc/resolv.conf. So instead we skip the append
  232. // if resolvConf is nil. It either doesn't exist, or we can't read it
  233. // for some reason.
  234. if resolvConf != nil {
  235. nameservers = append(nameservers, resolvconf.GetNameserversAsCIDR(resolvConf)...)
  236. }
  237. var ifaceAddr string
  238. if len(bridgeIP) != 0 {
  239. _, _, err := net.ParseCIDR(bridgeIP)
  240. if err != nil {
  241. return err
  242. }
  243. ifaceAddr = bridgeIP
  244. } else {
  245. for _, addr := range addrs {
  246. _, dockerNetwork, err := net.ParseCIDR(addr)
  247. if err != nil {
  248. return err
  249. }
  250. if err := networkdriver.CheckNameserverOverlaps(nameservers, dockerNetwork); err == nil {
  251. if err := networkdriver.CheckRouteOverlaps(dockerNetwork); err == nil {
  252. ifaceAddr = addr
  253. break
  254. } else {
  255. log.Debugf("%s %s", addr, err)
  256. }
  257. }
  258. }
  259. }
  260. if ifaceAddr == "" {
  261. return fmt.Errorf("Could not find a free IP address range for interface '%s'. Please configure its address manually and run 'docker -b %s'", bridgeIface, bridgeIface)
  262. }
  263. log.Debugf("Creating bridge %s with network %s", bridgeIface, ifaceAddr)
  264. if err := createBridgeIface(bridgeIface); err != nil {
  265. // the bridge may already exist, therefore we can ignore an "exists" error
  266. if !os.IsExist(err) {
  267. return err
  268. }
  269. }
  270. iface, err := net.InterfaceByName(bridgeIface)
  271. if err != nil {
  272. return err
  273. }
  274. ipAddr, ipNet, err := net.ParseCIDR(ifaceAddr)
  275. if err != nil {
  276. return err
  277. }
  278. if netlink.NetworkLinkAddIp(iface, ipAddr, ipNet); err != nil {
  279. return fmt.Errorf("Unable to add private network: %s", err)
  280. }
  281. if err := netlink.NetworkLinkUp(iface); err != nil {
  282. return fmt.Errorf("Unable to start network bridge: %s", err)
  283. }
  284. return nil
  285. }
  286. func createBridgeIface(name string) error {
  287. kv, err := kernel.GetKernelVersion()
  288. // only set the bridge's mac address if the kernel version is > 3.3
  289. // before that it was not supported
  290. setBridgeMacAddr := err == nil && (kv.Kernel >= 3 && kv.Major >= 3)
  291. log.Debugf("setting bridge mac address = %v", setBridgeMacAddr)
  292. return netlink.CreateBridge(name, setBridgeMacAddr)
  293. }
  294. // Generate a IEEE802 compliant MAC address from the given IP address.
  295. //
  296. // The generator is guaranteed to be consistent: the same IP will always yield the same
  297. // MAC address. This is to avoid ARP cache issues.
  298. func generateMacAddr(ip net.IP) net.HardwareAddr {
  299. hw := make(net.HardwareAddr, 6)
  300. // The first byte of the MAC address has to comply with these rules:
  301. // 1. Unicast: Set the least-significant bit to 0.
  302. // 2. Address is locally administered: Set the second-least-significant bit (U/L) to 1.
  303. // 3. As "small" as possible: The veth address has to be "smaller" than the bridge address.
  304. hw[0] = 0x02
  305. // The first 24 bits of the MAC represent the Organizationally Unique Identifier (OUI).
  306. // Since this address is locally administered, we can do whatever we want as long as
  307. // it doesn't conflict with other addresses.
  308. hw[1] = 0x42
  309. // Insert the IP address into the last 32 bits of the MAC address.
  310. // This is a simple way to guarantee the address will be consistent and unique.
  311. copy(hw[2:], ip.To4())
  312. return hw
  313. }
  314. // Allocate a network interface
  315. func Allocate(job *engine.Job) engine.Status {
  316. var (
  317. ip net.IP
  318. mac net.HardwareAddr
  319. err error
  320. id = job.Args[0]
  321. requestedIP = net.ParseIP(job.Getenv("RequestedIP"))
  322. )
  323. if requestedIP != nil {
  324. ip, err = ipallocator.RequestIP(bridgeNetwork, requestedIP)
  325. } else {
  326. ip, err = ipallocator.RequestIP(bridgeNetwork, nil)
  327. }
  328. if err != nil {
  329. return job.Error(err)
  330. }
  331. // If no explicit mac address was given, generate a random one.
  332. if mac, err = net.ParseMAC(job.Getenv("RequestedMac")); err != nil {
  333. mac = generateMacAddr(ip)
  334. }
  335. out := engine.Env{}
  336. out.Set("IP", ip.String())
  337. out.Set("Mask", bridgeNetwork.Mask.String())
  338. out.Set("Gateway", bridgeNetwork.IP.String())
  339. out.Set("MacAddress", mac.String())
  340. out.Set("Bridge", bridgeIface)
  341. size, _ := bridgeNetwork.Mask.Size()
  342. out.SetInt("IPPrefixLen", size)
  343. currentInterfaces.Set(id, &networkInterface{
  344. IP: ip,
  345. })
  346. out.WriteTo(job.Stdout)
  347. return engine.StatusOK
  348. }
  349. // release an interface for a select ip
  350. func Release(job *engine.Job) engine.Status {
  351. var (
  352. id = job.Args[0]
  353. containerInterface = currentInterfaces.Get(id)
  354. )
  355. if containerInterface == nil {
  356. return job.Errorf("No network information to release for %s", id)
  357. }
  358. for _, nat := range containerInterface.PortMappings {
  359. if err := portmapper.Unmap(nat); err != nil {
  360. log.Infof("Unable to unmap port %s: %s", nat, err)
  361. }
  362. }
  363. if err := ipallocator.ReleaseIP(bridgeNetwork, containerInterface.IP); err != nil {
  364. log.Infof("Unable to release ip %s", err)
  365. }
  366. return engine.StatusOK
  367. }
  368. // Allocate an external port and map it to the interface
  369. func AllocatePort(job *engine.Job) engine.Status {
  370. var (
  371. err error
  372. ip = defaultBindingIP
  373. id = job.Args[0]
  374. hostIP = job.Getenv("HostIP")
  375. hostPort = job.GetenvInt("HostPort")
  376. containerPort = job.GetenvInt("ContainerPort")
  377. proto = job.Getenv("Proto")
  378. network = currentInterfaces.Get(id)
  379. )
  380. if hostIP != "" {
  381. ip = net.ParseIP(hostIP)
  382. if ip == nil {
  383. return job.Errorf("Bad parameter: invalid host ip %s", hostIP)
  384. }
  385. }
  386. // host ip, proto, and host port
  387. var container net.Addr
  388. switch proto {
  389. case "tcp":
  390. container = &net.TCPAddr{IP: network.IP, Port: containerPort}
  391. case "udp":
  392. container = &net.UDPAddr{IP: network.IP, Port: containerPort}
  393. default:
  394. return job.Errorf("unsupported address type %s", proto)
  395. }
  396. //
  397. // Try up to 10 times to get a port that's not already allocated.
  398. //
  399. // In the event of failure to bind, return the error that portmapper.Map
  400. // yields.
  401. //
  402. var host net.Addr
  403. for i := 0; i < MaxAllocatedPortAttempts; i++ {
  404. if host, err = portmapper.Map(container, ip, hostPort); err == nil {
  405. break
  406. }
  407. // There is no point in immediately retrying to map an explicitly
  408. // chosen port.
  409. if hostPort != 0 {
  410. job.Logf("Failed to allocate and map port %d: %s", hostPort, err)
  411. break
  412. }
  413. job.Logf("Failed to allocate and map port: %s, retry: %d", err, i+1)
  414. }
  415. if err != nil {
  416. return job.Error(err)
  417. }
  418. network.PortMappings = append(network.PortMappings, host)
  419. out := engine.Env{}
  420. switch netAddr := host.(type) {
  421. case *net.TCPAddr:
  422. out.Set("HostIP", netAddr.IP.String())
  423. out.SetInt("HostPort", netAddr.Port)
  424. case *net.UDPAddr:
  425. out.Set("HostIP", netAddr.IP.String())
  426. out.SetInt("HostPort", netAddr.Port)
  427. }
  428. if _, err := out.WriteTo(job.Stdout); err != nil {
  429. return job.Error(err)
  430. }
  431. return engine.StatusOK
  432. }
  433. func LinkContainers(job *engine.Job) engine.Status {
  434. var (
  435. action = job.Args[0]
  436. childIP = job.Getenv("ChildIP")
  437. parentIP = job.Getenv("ParentIP")
  438. ignoreErrors = job.GetenvBool("IgnoreErrors")
  439. ports = job.GetenvList("Ports")
  440. )
  441. for _, value := range ports {
  442. port := nat.Port(value)
  443. if output, err := iptables.Raw(action, "FORWARD",
  444. "-i", bridgeIface, "-o", bridgeIface,
  445. "-p", port.Proto(),
  446. "-s", parentIP,
  447. "--dport", strconv.Itoa(port.Int()),
  448. "-d", childIP,
  449. "-j", "ACCEPT"); !ignoreErrors && err != nil {
  450. return job.Error(err)
  451. } else if len(output) != 0 {
  452. return job.Errorf("Error toggle iptables forward: %s", output)
  453. }
  454. if output, err := iptables.Raw(action, "FORWARD",
  455. "-i", bridgeIface, "-o", bridgeIface,
  456. "-p", port.Proto(),
  457. "-s", childIP,
  458. "--sport", strconv.Itoa(port.Int()),
  459. "-d", parentIP,
  460. "-j", "ACCEPT"); !ignoreErrors && err != nil {
  461. return job.Error(err)
  462. } else if len(output) != 0 {
  463. return job.Errorf("Error toggle iptables forward: %s", output)
  464. }
  465. }
  466. return engine.StatusOK
  467. }