network.go 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528
  1. package docker
  2. import (
  3. "encoding/binary"
  4. "errors"
  5. "fmt"
  6. "log"
  7. "net"
  8. "os/exec"
  9. "strconv"
  10. "strings"
  11. "sync"
  12. )
  13. var NetworkBridgeIface string
  14. const (
  15. DefaultNetworkBridge = "docker0"
  16. portRangeStart = 49153
  17. portRangeEnd = 65535
  18. )
  19. // Calculates the first and last IP addresses in an IPNet
  20. func networkRange(network *net.IPNet) (net.IP, net.IP) {
  21. netIP := network.IP.To4()
  22. firstIP := netIP.Mask(network.Mask)
  23. lastIP := net.IPv4(0, 0, 0, 0).To4()
  24. for i := 0; i < len(lastIP); i++ {
  25. lastIP[i] = netIP[i] | ^network.Mask[i]
  26. }
  27. return firstIP, lastIP
  28. }
  29. // Detects overlap between one IPNet and another
  30. func networkOverlaps(netX *net.IPNet, netY *net.IPNet) bool {
  31. firstIP, _ := networkRange(netX)
  32. if netY.Contains(firstIP) {
  33. return true
  34. }
  35. firstIP, _ = networkRange(netY)
  36. if netX.Contains(firstIP) {
  37. return true
  38. }
  39. return false
  40. }
  41. // Converts a 4 bytes IP into a 32 bit integer
  42. func ipToInt(ip net.IP) int32 {
  43. return int32(binary.BigEndian.Uint32(ip.To4()))
  44. }
  45. // Converts 32 bit integer into a 4 bytes IP address
  46. func intToIp(n int32) net.IP {
  47. b := make([]byte, 4)
  48. binary.BigEndian.PutUint32(b, uint32(n))
  49. return net.IP(b)
  50. }
  51. // Given a netmask, calculates the number of available hosts
  52. func networkSize(mask net.IPMask) int32 {
  53. m := net.IPv4Mask(0, 0, 0, 0)
  54. for i := 0; i < net.IPv4len; i++ {
  55. m[i] = ^mask[i]
  56. }
  57. return int32(binary.BigEndian.Uint32(m)) + 1
  58. }
  59. //Wrapper around the ip command
  60. func ip(args ...string) (string, error) {
  61. path, err := exec.LookPath("ip")
  62. if err != nil {
  63. return "", fmt.Errorf("command not found: ip")
  64. }
  65. output, err := exec.Command(path, args...).CombinedOutput()
  66. if err != nil {
  67. return "", fmt.Errorf("ip failed: ip %v", strings.Join(args, " "))
  68. }
  69. return string(output), nil
  70. }
  71. // Wrapper around the iptables command
  72. func iptables(args ...string) error {
  73. path, err := exec.LookPath("iptables")
  74. if err != nil {
  75. return fmt.Errorf("command not found: iptables")
  76. }
  77. if err := exec.Command(path, args...).Run(); err != nil {
  78. return fmt.Errorf("iptables failed: iptables %v", strings.Join(args, " "))
  79. }
  80. return nil
  81. }
  82. func checkRouteOverlaps(dockerNetwork *net.IPNet) error {
  83. output, err := ip("route")
  84. if err != nil {
  85. return err
  86. }
  87. Debugf("Routes:\n\n%s", output)
  88. for _, line := range strings.Split(output, "\n") {
  89. if strings.Trim(line, "\r\n\t ") == "" || strings.Contains(line, "default") {
  90. continue
  91. }
  92. if _, network, err := net.ParseCIDR(strings.Split(line, " ")[0]); err != nil {
  93. return fmt.Errorf("Unexpected ip route output: %s (%s)", err, line)
  94. } else if networkOverlaps(dockerNetwork, network) {
  95. return fmt.Errorf("Network %s is already routed: '%s'", dockerNetwork.String(), line)
  96. }
  97. }
  98. return nil
  99. }
  100. func CreateBridgeIface(ifaceName string) error {
  101. // FIXME: try more IP ranges
  102. // FIXME: try bigger ranges! /24 is too small.
  103. addrs := []string{"172.16.42.1/24", "10.0.42.1/24", "192.168.42.1/24"}
  104. var ifaceAddr string
  105. for _, addr := range addrs {
  106. _, dockerNetwork, err := net.ParseCIDR(addr)
  107. if err != nil {
  108. return err
  109. }
  110. if err := checkRouteOverlaps(dockerNetwork); err == nil {
  111. ifaceAddr = addr
  112. break
  113. } else {
  114. Debugf("%s: %s", addr, err)
  115. }
  116. }
  117. if ifaceAddr == "" {
  118. return fmt.Errorf("Could not find a free IP address range for interface '%s'. Please configure its address manually and run 'docker -b %s'", ifaceName, ifaceName)
  119. } else {
  120. Debugf("Creating bridge %s with network %s", ifaceName, ifaceAddr)
  121. }
  122. if output, err := ip("link", "add", ifaceName, "type", "bridge"); err != nil {
  123. return fmt.Errorf("Error creating bridge: %s (output: %s)", err, output)
  124. }
  125. if output, err := ip("addr", "add", ifaceAddr, "dev", ifaceName); err != nil {
  126. return fmt.Errorf("Unable to add private network: %s (%s)", err, output)
  127. }
  128. if output, err := ip("link", "set", ifaceName, "up"); err != nil {
  129. return fmt.Errorf("Unable to start network bridge: %s (%s)", err, output)
  130. }
  131. if err := iptables("-t", "nat", "-A", "POSTROUTING", "-s", ifaceAddr,
  132. "!", "-d", ifaceAddr, "-j", "MASQUERADE"); err != nil {
  133. return fmt.Errorf("Unable to enable network bridge NAT: %s", err)
  134. }
  135. return nil
  136. }
  137. // Return the IPv4 address of a network interface
  138. func getIfaceAddr(name string) (net.Addr, error) {
  139. iface, err := net.InterfaceByName(name)
  140. if err != nil {
  141. return nil, err
  142. }
  143. addrs, err := iface.Addrs()
  144. if err != nil {
  145. return nil, err
  146. }
  147. var addrs4 []net.Addr
  148. for _, addr := range addrs {
  149. ip := (addr.(*net.IPNet)).IP
  150. if ip4 := ip.To4(); len(ip4) == net.IPv4len {
  151. addrs4 = append(addrs4, addr)
  152. }
  153. }
  154. switch {
  155. case len(addrs4) == 0:
  156. return nil, fmt.Errorf("Interface %v has no IP addresses", name)
  157. case len(addrs4) > 1:
  158. fmt.Printf("Interface %v has more than 1 IPv4 address. Defaulting to using %v\n",
  159. name, (addrs4[0].(*net.IPNet)).IP)
  160. }
  161. return addrs4[0], nil
  162. }
  163. // Port mapper takes care of mapping external ports to containers by setting
  164. // up iptables rules.
  165. // It keeps track of all mappings and is able to unmap at will
  166. type PortMapper struct {
  167. mapping map[int]net.TCPAddr
  168. }
  169. func (mapper *PortMapper) cleanup() error {
  170. // Ignore errors - This could mean the chains were never set up
  171. iptables("-t", "nat", "-D", "PREROUTING", "-m", "addrtype", "--dst-type", "LOCAL", "-j", "DOCKER")
  172. iptables("-t", "nat", "-D", "OUTPUT", "-m", "addrtype", "--dst-type", "LOCAL", "-j", "DOCKER")
  173. // Also cleanup rules created by older versions, or -X might fail.
  174. iptables("-t", "nat", "-D", "PREROUTING", "-j", "DOCKER")
  175. iptables("-t", "nat", "-D", "OUTPUT", "-j", "DOCKER")
  176. iptables("-t", "nat", "-F", "DOCKER")
  177. iptables("-t", "nat", "-X", "DOCKER")
  178. mapper.mapping = make(map[int]net.TCPAddr)
  179. return nil
  180. }
  181. func (mapper *PortMapper) setup() error {
  182. if err := iptables("-t", "nat", "-N", "DOCKER"); err != nil {
  183. return fmt.Errorf("Failed to create DOCKER chain: %s", err)
  184. }
  185. if err := iptables("-t", "nat", "-A", "PREROUTING", "-m", "addrtype", "--dst-type", "LOCAL", "-j", "DOCKER"); err != nil {
  186. return fmt.Errorf("Failed to inject docker in PREROUTING chain: %s", err)
  187. }
  188. if err := iptables("-t", "nat", "-A", "OUTPUT", "-m", "addrtype", "--dst-type", "LOCAL", "-j", "DOCKER"); err != nil {
  189. return fmt.Errorf("Failed to inject docker in OUTPUT chain: %s", err)
  190. }
  191. return nil
  192. }
  193. func (mapper *PortMapper) iptablesForward(rule string, port int, dest net.TCPAddr) error {
  194. return iptables("-t", "nat", rule, "DOCKER", "-p", "tcp", "--dport", strconv.Itoa(port),
  195. "-j", "DNAT", "--to-destination", net.JoinHostPort(dest.IP.String(), strconv.Itoa(dest.Port)))
  196. }
  197. func (mapper *PortMapper) Map(port int, dest net.TCPAddr) error {
  198. if err := mapper.iptablesForward("-A", port, dest); err != nil {
  199. return err
  200. }
  201. mapper.mapping[port] = dest
  202. return nil
  203. }
  204. func (mapper *PortMapper) Unmap(port int) error {
  205. dest, ok := mapper.mapping[port]
  206. if !ok {
  207. return errors.New("Port is not mapped")
  208. }
  209. if err := mapper.iptablesForward("-D", port, dest); err != nil {
  210. return err
  211. }
  212. delete(mapper.mapping, port)
  213. return nil
  214. }
  215. func newPortMapper() (*PortMapper, error) {
  216. mapper := &PortMapper{}
  217. if err := mapper.cleanup(); err != nil {
  218. return nil, err
  219. }
  220. if err := mapper.setup(); err != nil {
  221. return nil, err
  222. }
  223. return mapper, nil
  224. }
  225. // Port allocator: Atomatically allocate and release networking ports
  226. type PortAllocator struct {
  227. inUse map[int]struct{}
  228. fountain chan (int)
  229. lock sync.Mutex
  230. }
  231. func (alloc *PortAllocator) runFountain() {
  232. for {
  233. for port := portRangeStart; port < portRangeEnd; port++ {
  234. alloc.fountain <- port
  235. }
  236. }
  237. }
  238. // FIXME: Release can no longer fail, change its prototype to reflect that.
  239. func (alloc *PortAllocator) Release(port int) error {
  240. Debugf("Releasing %d", port)
  241. alloc.lock.Lock()
  242. delete(alloc.inUse, port)
  243. alloc.lock.Unlock()
  244. return nil
  245. }
  246. func (alloc *PortAllocator) Acquire(port int) (int, error) {
  247. Debugf("Acquiring %d", port)
  248. if port == 0 {
  249. // Allocate a port from the fountain
  250. for port := range alloc.fountain {
  251. if _, err := alloc.Acquire(port); err == nil {
  252. return port, nil
  253. }
  254. }
  255. return -1, fmt.Errorf("Port generator ended unexpectedly")
  256. }
  257. alloc.lock.Lock()
  258. defer alloc.lock.Unlock()
  259. if _, inUse := alloc.inUse[port]; inUse {
  260. return -1, fmt.Errorf("Port already in use: %d", port)
  261. }
  262. alloc.inUse[port] = struct{}{}
  263. return port, nil
  264. }
  265. func newPortAllocator() (*PortAllocator, error) {
  266. allocator := &PortAllocator{
  267. inUse: make(map[int]struct{}),
  268. fountain: make(chan int),
  269. }
  270. go allocator.runFountain()
  271. return allocator, nil
  272. }
  273. // IP allocator: Atomatically allocate and release networking ports
  274. type IPAllocator struct {
  275. network *net.IPNet
  276. queueAlloc chan allocatedIP
  277. queueReleased chan net.IP
  278. inUse map[int32]struct{}
  279. }
  280. type allocatedIP struct {
  281. ip net.IP
  282. err error
  283. }
  284. func (alloc *IPAllocator) run() {
  285. firstIP, _ := networkRange(alloc.network)
  286. ipNum := ipToInt(firstIP)
  287. ownIP := ipToInt(alloc.network.IP)
  288. size := networkSize(alloc.network.Mask)
  289. pos := int32(1)
  290. max := size - 2 // -1 for the broadcast address, -1 for the gateway address
  291. for {
  292. var (
  293. newNum int32
  294. inUse bool
  295. )
  296. // Find first unused IP, give up after one whole round
  297. for attempt := int32(0); attempt < max; attempt++ {
  298. newNum = ipNum + pos
  299. pos = pos%max + 1
  300. // The network's IP is never okay to use
  301. if newNum == ownIP {
  302. continue
  303. }
  304. if _, inUse = alloc.inUse[newNum]; !inUse {
  305. // We found an unused IP
  306. break
  307. }
  308. }
  309. ip := allocatedIP{ip: intToIp(newNum)}
  310. if inUse {
  311. ip.err = errors.New("No unallocated IP available")
  312. }
  313. select {
  314. case alloc.queueAlloc <- ip:
  315. alloc.inUse[newNum] = struct{}{}
  316. case released := <-alloc.queueReleased:
  317. r := ipToInt(released)
  318. delete(alloc.inUse, r)
  319. if inUse {
  320. // If we couldn't allocate a new IP, the released one
  321. // will be the only free one now, so instantly use it
  322. // next time
  323. pos = r - ipNum
  324. } else {
  325. // Use same IP as last time
  326. if pos == 1 {
  327. pos = max
  328. } else {
  329. pos--
  330. }
  331. }
  332. }
  333. }
  334. }
  335. func (alloc *IPAllocator) Acquire() (net.IP, error) {
  336. ip := <-alloc.queueAlloc
  337. return ip.ip, ip.err
  338. }
  339. func (alloc *IPAllocator) Release(ip net.IP) {
  340. alloc.queueReleased <- ip
  341. }
  342. func newIPAllocator(network *net.IPNet) *IPAllocator {
  343. alloc := &IPAllocator{
  344. network: network,
  345. queueAlloc: make(chan allocatedIP),
  346. queueReleased: make(chan net.IP),
  347. inUse: make(map[int32]struct{}),
  348. }
  349. go alloc.run()
  350. return alloc
  351. }
  352. // Network interface represents the networking stack of a container
  353. type NetworkInterface struct {
  354. IPNet net.IPNet
  355. Gateway net.IP
  356. manager *NetworkManager
  357. extPorts []int
  358. }
  359. // Allocate an external TCP port and map it to the interface
  360. func (iface *NetworkInterface) AllocatePort(spec string) (*Nat, error) {
  361. nat, err := parseNat(spec)
  362. if err != nil {
  363. return nil, err
  364. }
  365. // Allocate a random port if Frontend==0
  366. if extPort, err := iface.manager.portAllocator.Acquire(nat.Frontend); err != nil {
  367. return nil, err
  368. } else {
  369. nat.Frontend = extPort
  370. }
  371. if err := iface.manager.portMapper.Map(nat.Frontend, net.TCPAddr{IP: iface.IPNet.IP, Port: nat.Backend}); err != nil {
  372. iface.manager.portAllocator.Release(nat.Frontend)
  373. return nil, err
  374. }
  375. iface.extPorts = append(iface.extPorts, nat.Frontend)
  376. return nat, nil
  377. }
  378. type Nat struct {
  379. Proto string
  380. Frontend int
  381. Backend int
  382. }
  383. func parseNat(spec string) (*Nat, error) {
  384. var nat Nat
  385. // If spec starts with ':', external and internal ports must be the same.
  386. // This might fail if the requested external port is not available.
  387. var sameFrontend bool
  388. if spec[0] == ':' {
  389. sameFrontend = true
  390. spec = spec[1:]
  391. }
  392. port, err := strconv.ParseUint(spec, 10, 16)
  393. if err != nil {
  394. return nil, err
  395. }
  396. nat.Backend = int(port)
  397. if sameFrontend {
  398. nat.Frontend = nat.Backend
  399. }
  400. nat.Proto = "tcp"
  401. return &nat, nil
  402. }
  403. // Release: Network cleanup - release all resources
  404. func (iface *NetworkInterface) Release() {
  405. for _, port := range iface.extPorts {
  406. if err := iface.manager.portMapper.Unmap(port); err != nil {
  407. log.Printf("Unable to unmap port %v: %v", port, err)
  408. }
  409. if err := iface.manager.portAllocator.Release(port); err != nil {
  410. log.Printf("Unable to release port %v: %v", port, err)
  411. }
  412. }
  413. iface.manager.ipAllocator.Release(iface.IPNet.IP)
  414. }
  415. // Network Manager manages a set of network interfaces
  416. // Only *one* manager per host machine should be used
  417. type NetworkManager struct {
  418. bridgeIface string
  419. bridgeNetwork *net.IPNet
  420. ipAllocator *IPAllocator
  421. portAllocator *PortAllocator
  422. portMapper *PortMapper
  423. }
  424. // Allocate a network interface
  425. func (manager *NetworkManager) Allocate() (*NetworkInterface, error) {
  426. ip, err := manager.ipAllocator.Acquire()
  427. if err != nil {
  428. return nil, err
  429. }
  430. iface := &NetworkInterface{
  431. IPNet: net.IPNet{IP: ip, Mask: manager.bridgeNetwork.Mask},
  432. Gateway: manager.bridgeNetwork.IP,
  433. manager: manager,
  434. }
  435. return iface, nil
  436. }
  437. func newNetworkManager(bridgeIface string) (*NetworkManager, error) {
  438. addr, err := getIfaceAddr(bridgeIface)
  439. if err != nil {
  440. // If the iface is not found, try to create it
  441. if err := CreateBridgeIface(bridgeIface); err != nil {
  442. return nil, err
  443. }
  444. addr, err = getIfaceAddr(bridgeIface)
  445. if err != nil {
  446. return nil, err
  447. }
  448. }
  449. network := addr.(*net.IPNet)
  450. ipAllocator := newIPAllocator(network)
  451. portAllocator, err := newPortAllocator()
  452. if err != nil {
  453. return nil, err
  454. }
  455. portMapper, err := newPortMapper()
  456. if err != nil {
  457. return nil, err
  458. }
  459. manager := &NetworkManager{
  460. bridgeIface: bridgeIface,
  461. bridgeNetwork: network,
  462. ipAllocator: ipAllocator,
  463. portAllocator: portAllocator,
  464. portMapper: portMapper,
  465. }
  466. return manager, nil
  467. }