network.go 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598
  1. package docker
  2. import (
  3. "encoding/binary"
  4. "errors"
  5. "fmt"
  6. "github.com/dotcloud/docker/utils"
  7. "io"
  8. "log"
  9. "net"
  10. "os/exec"
  11. "strconv"
  12. "strings"
  13. "sync"
  14. )
  15. var NetworkBridgeIface string
  16. const (
  17. DefaultNetworkBridge = "docker0"
  18. portRangeStart = 49153
  19. portRangeEnd = 65535
  20. )
  21. // Calculates the first and last IP addresses in an IPNet
  22. func networkRange(network *net.IPNet) (net.IP, net.IP) {
  23. netIP := network.IP.To4()
  24. firstIP := netIP.Mask(network.Mask)
  25. lastIP := net.IPv4(0, 0, 0, 0).To4()
  26. for i := 0; i < len(lastIP); i++ {
  27. lastIP[i] = netIP[i] | ^network.Mask[i]
  28. }
  29. return firstIP, lastIP
  30. }
  31. // Detects overlap between one IPNet and another
  32. func networkOverlaps(netX *net.IPNet, netY *net.IPNet) bool {
  33. firstIP, _ := networkRange(netX)
  34. if netY.Contains(firstIP) {
  35. return true
  36. }
  37. firstIP, _ = networkRange(netY)
  38. if netX.Contains(firstIP) {
  39. return true
  40. }
  41. return false
  42. }
  43. // Converts a 4 bytes IP into a 32 bit integer
  44. func ipToInt(ip net.IP) int32 {
  45. return int32(binary.BigEndian.Uint32(ip.To4()))
  46. }
  47. // Converts 32 bit integer into a 4 bytes IP address
  48. func intToIP(n int32) net.IP {
  49. b := make([]byte, 4)
  50. binary.BigEndian.PutUint32(b, uint32(n))
  51. return net.IP(b)
  52. }
  53. // Given a netmask, calculates the number of available hosts
  54. func networkSize(mask net.IPMask) int32 {
  55. m := net.IPv4Mask(0, 0, 0, 0)
  56. for i := 0; i < net.IPv4len; i++ {
  57. m[i] = ^mask[i]
  58. }
  59. return int32(binary.BigEndian.Uint32(m)) + 1
  60. }
  61. //Wrapper around the ip command
  62. func ip(args ...string) (string, error) {
  63. path, err := exec.LookPath("ip")
  64. if err != nil {
  65. return "", fmt.Errorf("command not found: ip")
  66. }
  67. output, err := exec.Command(path, args...).CombinedOutput()
  68. if err != nil {
  69. return "", fmt.Errorf("ip failed: ip %v", strings.Join(args, " "))
  70. }
  71. return string(output), nil
  72. }
  73. // Wrapper around the iptables command
  74. func iptables(args ...string) error {
  75. path, err := exec.LookPath("iptables")
  76. if err != nil {
  77. return fmt.Errorf("command not found: iptables")
  78. }
  79. if err := exec.Command(path, args...).Run(); err != nil {
  80. return fmt.Errorf("iptables failed: iptables %v", strings.Join(args, " "))
  81. }
  82. return nil
  83. }
  84. func checkRouteOverlaps(dockerNetwork *net.IPNet) error {
  85. output, err := ip("route")
  86. if err != nil {
  87. return err
  88. }
  89. utils.Debugf("Routes:\n\n%s", output)
  90. for _, line := range strings.Split(output, "\n") {
  91. if strings.Trim(line, "\r\n\t ") == "" || strings.Contains(line, "default") {
  92. continue
  93. }
  94. if _, network, err := net.ParseCIDR(strings.Split(line, " ")[0]); err != nil {
  95. return fmt.Errorf("Unexpected ip route output: %s (%s)", err, line)
  96. } else if networkOverlaps(dockerNetwork, network) {
  97. return fmt.Errorf("Network %s is already routed: '%s'", dockerNetwork.String(), line)
  98. }
  99. }
  100. return nil
  101. }
  102. func CreateBridgeIface(ifaceName string) error {
  103. // FIXME: try more IP ranges
  104. // FIXME: try bigger ranges! /24 is too small.
  105. addrs := []string{"172.16.42.1/24", "10.0.42.1/24", "192.168.42.1/24"}
  106. var ifaceAddr string
  107. for _, addr := range addrs {
  108. _, dockerNetwork, err := net.ParseCIDR(addr)
  109. if err != nil {
  110. return err
  111. }
  112. if err := checkRouteOverlaps(dockerNetwork); err == nil {
  113. ifaceAddr = addr
  114. break
  115. } else {
  116. utils.Debugf("%s: %s", addr, err)
  117. }
  118. }
  119. if ifaceAddr == "" {
  120. return fmt.Errorf("Could not find a free IP address range for interface '%s'. Please configure its address manually and run 'docker -b %s'", ifaceName, ifaceName)
  121. }
  122. utils.Debugf("Creating bridge %s with network %s", ifaceName, ifaceAddr)
  123. if output, err := ip("link", "add", ifaceName, "type", "bridge"); err != nil {
  124. return fmt.Errorf("Error creating bridge: %s (output: %s)", err, output)
  125. }
  126. if output, err := ip("addr", "add", ifaceAddr, "dev", ifaceName); err != nil {
  127. return fmt.Errorf("Unable to add private network: %s (%s)", err, output)
  128. }
  129. if output, err := ip("link", "set", ifaceName, "up"); err != nil {
  130. return fmt.Errorf("Unable to start network bridge: %s (%s)", err, output)
  131. }
  132. if err := iptables("-t", "nat", "-A", "POSTROUTING", "-s", ifaceAddr,
  133. "!", "-d", ifaceAddr, "-j", "MASQUERADE"); err != nil {
  134. return fmt.Errorf("Unable to enable network bridge NAT: %s", err)
  135. }
  136. return nil
  137. }
  138. // Return the IPv4 address of a network interface
  139. func getIfaceAddr(name string) (net.Addr, error) {
  140. iface, err := net.InterfaceByName(name)
  141. if err != nil {
  142. return nil, err
  143. }
  144. addrs, err := iface.Addrs()
  145. if err != nil {
  146. return nil, err
  147. }
  148. var addrs4 []net.Addr
  149. for _, addr := range addrs {
  150. ip := (addr.(*net.IPNet)).IP
  151. if ip4 := ip.To4(); len(ip4) == net.IPv4len {
  152. addrs4 = append(addrs4, addr)
  153. }
  154. }
  155. switch {
  156. case len(addrs4) == 0:
  157. return nil, fmt.Errorf("Interface %v has no IP addresses", name)
  158. case len(addrs4) > 1:
  159. fmt.Printf("Interface %v has more than 1 IPv4 address. Defaulting to using %v\n",
  160. name, (addrs4[0].(*net.IPNet)).IP)
  161. }
  162. return addrs4[0], nil
  163. }
  164. // Port mapper takes care of mapping external ports to containers by setting
  165. // up iptables rules.
  166. // It keeps track of all mappings and is able to unmap at will
  167. type PortMapper struct {
  168. mapping map[int]net.TCPAddr
  169. proxies map[int]net.Listener
  170. }
  171. func (mapper *PortMapper) cleanup() error {
  172. // Ignore errors - This could mean the chains were never set up
  173. iptables("-t", "nat", "-D", "PREROUTING", "-m", "addrtype", "--dst-type", "LOCAL", "-j", "DOCKER")
  174. iptables("-t", "nat", "-D", "OUTPUT", "-m", "addrtype", "--dst-type", "LOCAL", "!", "--dst", "127.0.0.0/8", "-j", "DOCKER")
  175. iptables("-t", "nat", "-D", "OUTPUT", "-m", "addrtype", "--dst-type", "LOCAL", "-j", "DOCKER") // Created in versions <= 0.1.6
  176. // Also cleanup rules created by older versions, or -X might fail.
  177. iptables("-t", "nat", "-D", "PREROUTING", "-j", "DOCKER")
  178. iptables("-t", "nat", "-D", "OUTPUT", "-j", "DOCKER")
  179. iptables("-t", "nat", "-F", "DOCKER")
  180. iptables("-t", "nat", "-X", "DOCKER")
  181. mapper.mapping = make(map[int]net.TCPAddr)
  182. mapper.proxies = make(map[int]net.Listener)
  183. return nil
  184. }
  185. func (mapper *PortMapper) setup() error {
  186. if err := iptables("-t", "nat", "-N", "DOCKER"); err != nil {
  187. return fmt.Errorf("Failed to create DOCKER chain: %s", err)
  188. }
  189. if err := iptables("-t", "nat", "-A", "PREROUTING", "-m", "addrtype", "--dst-type", "LOCAL", "-j", "DOCKER"); err != nil {
  190. return fmt.Errorf("Failed to inject docker in PREROUTING chain: %s", err)
  191. }
  192. if err := iptables("-t", "nat", "-A", "OUTPUT", "-m", "addrtype", "--dst-type", "LOCAL", "!", "--dst", "127.0.0.0/8", "-j", "DOCKER"); err != nil {
  193. return fmt.Errorf("Failed to inject docker in OUTPUT chain: %s", err)
  194. }
  195. return nil
  196. }
  197. func (mapper *PortMapper) iptablesForward(rule string, port int, dest net.TCPAddr) error {
  198. return iptables("-t", "nat", rule, "DOCKER", "-p", "tcp", "--dport", strconv.Itoa(port),
  199. "-j", "DNAT", "--to-destination", net.JoinHostPort(dest.IP.String(), strconv.Itoa(dest.Port)))
  200. }
  201. func (mapper *PortMapper) Map(port int, dest net.TCPAddr) error {
  202. if err := mapper.iptablesForward("-A", port, dest); err != nil {
  203. return err
  204. }
  205. mapper.mapping[port] = dest
  206. listener, err := net.Listen("tcp", fmt.Sprintf("127.0.0.1:%d", port))
  207. if err != nil {
  208. mapper.Unmap(port)
  209. return err
  210. }
  211. mapper.proxies[port] = listener
  212. go proxy(listener, "tcp", dest.String())
  213. return nil
  214. }
  215. // proxy listens for socket connections on `listener`, and forwards them unmodified
  216. // to `proto:address`
  217. func proxy(listener net.Listener, proto, address string) error {
  218. utils.Debugf("proxying to %s:%s", proto, address)
  219. defer utils.Debugf("Done proxying to %s:%s", proto, address)
  220. for {
  221. utils.Debugf("Listening on %s", listener)
  222. src, err := listener.Accept()
  223. if err != nil {
  224. return err
  225. }
  226. utils.Debugf("Connecting to %s:%s", proto, address)
  227. dst, err := net.Dial(proto, address)
  228. if err != nil {
  229. log.Printf("Error connecting to %s:%s: %s", proto, address, err)
  230. src.Close()
  231. continue
  232. }
  233. utils.Debugf("Connected to backend, splicing")
  234. splice(src, dst)
  235. }
  236. }
  237. func halfSplice(dst, src net.Conn) error {
  238. _, err := io.Copy(dst, src)
  239. // FIXME: on EOF from a tcp connection, pass WriteClose()
  240. dst.Close()
  241. src.Close()
  242. return err
  243. }
  244. func splice(a, b net.Conn) {
  245. go halfSplice(a, b)
  246. go halfSplice(b, a)
  247. }
  248. func (mapper *PortMapper) Unmap(port int) error {
  249. dest, ok := mapper.mapping[port]
  250. if !ok {
  251. return errors.New("Port is not mapped")
  252. }
  253. if proxy, exists := mapper.proxies[port]; exists {
  254. proxy.Close()
  255. delete(mapper.proxies, port)
  256. }
  257. if err := mapper.iptablesForward("-D", port, dest); err != nil {
  258. return err
  259. }
  260. delete(mapper.mapping, port)
  261. return nil
  262. }
  263. func newPortMapper() (*PortMapper, error) {
  264. mapper := &PortMapper{}
  265. if err := mapper.cleanup(); err != nil {
  266. return nil, err
  267. }
  268. if err := mapper.setup(); err != nil {
  269. return nil, err
  270. }
  271. return mapper, nil
  272. }
  273. // Port allocator: Atomatically allocate and release networking ports
  274. type PortAllocator struct {
  275. sync.Mutex
  276. inUse map[int]struct{}
  277. fountain chan (int)
  278. }
  279. func (alloc *PortAllocator) runFountain() {
  280. for {
  281. for port := portRangeStart; port < portRangeEnd; port++ {
  282. alloc.fountain <- port
  283. }
  284. }
  285. }
  286. // FIXME: Release can no longer fail, change its prototype to reflect that.
  287. func (alloc *PortAllocator) Release(port int) error {
  288. utils.Debugf("Releasing %d", port)
  289. alloc.Lock()
  290. delete(alloc.inUse, port)
  291. alloc.Unlock()
  292. return nil
  293. }
  294. func (alloc *PortAllocator) Acquire(port int) (int, error) {
  295. utils.Debugf("Acquiring %d", port)
  296. if port == 0 {
  297. // Allocate a port from the fountain
  298. for port := range alloc.fountain {
  299. if _, err := alloc.Acquire(port); err == nil {
  300. return port, nil
  301. }
  302. }
  303. return -1, fmt.Errorf("Port generator ended unexpectedly")
  304. }
  305. alloc.Lock()
  306. defer alloc.Unlock()
  307. if _, inUse := alloc.inUse[port]; inUse {
  308. return -1, fmt.Errorf("Port already in use: %d", port)
  309. }
  310. alloc.inUse[port] = struct{}{}
  311. return port, nil
  312. }
  313. func newPortAllocator() (*PortAllocator, error) {
  314. allocator := &PortAllocator{
  315. inUse: make(map[int]struct{}),
  316. fountain: make(chan int),
  317. }
  318. go allocator.runFountain()
  319. return allocator, nil
  320. }
  321. // IP allocator: Atomatically allocate and release networking ports
  322. type IPAllocator struct {
  323. network *net.IPNet
  324. queueAlloc chan allocatedIP
  325. queueReleased chan net.IP
  326. inUse map[int32]struct{}
  327. }
  328. type allocatedIP struct {
  329. ip net.IP
  330. err error
  331. }
  332. func (alloc *IPAllocator) run() {
  333. firstIP, _ := networkRange(alloc.network)
  334. ipNum := ipToInt(firstIP)
  335. ownIP := ipToInt(alloc.network.IP)
  336. size := networkSize(alloc.network.Mask)
  337. pos := int32(1)
  338. max := size - 2 // -1 for the broadcast address, -1 for the gateway address
  339. for {
  340. var (
  341. newNum int32
  342. inUse bool
  343. )
  344. // Find first unused IP, give up after one whole round
  345. for attempt := int32(0); attempt < max; attempt++ {
  346. newNum = ipNum + pos
  347. pos = pos%max + 1
  348. // The network's IP is never okay to use
  349. if newNum == ownIP {
  350. continue
  351. }
  352. if _, inUse = alloc.inUse[newNum]; !inUse {
  353. // We found an unused IP
  354. break
  355. }
  356. }
  357. ip := allocatedIP{ip: intToIP(newNum)}
  358. if inUse {
  359. ip.err = errors.New("No unallocated IP available")
  360. }
  361. select {
  362. case alloc.queueAlloc <- ip:
  363. alloc.inUse[newNum] = struct{}{}
  364. case released := <-alloc.queueReleased:
  365. r := ipToInt(released)
  366. delete(alloc.inUse, r)
  367. if inUse {
  368. // If we couldn't allocate a new IP, the released one
  369. // will be the only free one now, so instantly use it
  370. // next time
  371. pos = r - ipNum
  372. } else {
  373. // Use same IP as last time
  374. if pos == 1 {
  375. pos = max
  376. } else {
  377. pos--
  378. }
  379. }
  380. }
  381. }
  382. }
  383. func (alloc *IPAllocator) Acquire() (net.IP, error) {
  384. ip := <-alloc.queueAlloc
  385. return ip.ip, ip.err
  386. }
  387. func (alloc *IPAllocator) Release(ip net.IP) {
  388. alloc.queueReleased <- ip
  389. }
  390. func newIPAllocator(network *net.IPNet) *IPAllocator {
  391. alloc := &IPAllocator{
  392. network: network,
  393. queueAlloc: make(chan allocatedIP),
  394. queueReleased: make(chan net.IP),
  395. inUse: make(map[int32]struct{}),
  396. }
  397. go alloc.run()
  398. return alloc
  399. }
  400. // Network interface represents the networking stack of a container
  401. type NetworkInterface struct {
  402. IPNet net.IPNet
  403. Gateway net.IP
  404. manager *NetworkManager
  405. extPorts []int
  406. }
  407. // Allocate an external TCP port and map it to the interface
  408. func (iface *NetworkInterface) AllocatePort(spec string) (*Nat, error) {
  409. nat, err := parseNat(spec)
  410. if err != nil {
  411. return nil, err
  412. }
  413. // Allocate a random port if Frontend==0
  414. extPort, err := iface.manager.portAllocator.Acquire(nat.Frontend)
  415. if err != nil {
  416. return nil, err
  417. }
  418. nat.Frontend = extPort
  419. if err := iface.manager.portMapper.Map(nat.Frontend, net.TCPAddr{IP: iface.IPNet.IP, Port: nat.Backend}); err != nil {
  420. iface.manager.portAllocator.Release(nat.Frontend)
  421. return nil, err
  422. }
  423. iface.extPorts = append(iface.extPorts, nat.Frontend)
  424. return nat, nil
  425. }
  426. type Nat struct {
  427. Proto string
  428. Frontend int
  429. Backend int
  430. }
  431. func parseNat(spec string) (*Nat, error) {
  432. var nat Nat
  433. if strings.Contains(spec, ":") {
  434. specParts := strings.Split(spec, ":")
  435. if len(specParts) != 2 {
  436. return nil, fmt.Errorf("Invalid port format.")
  437. }
  438. // If spec starts with ':', external and internal ports must be the same.
  439. // This might fail if the requested external port is not available.
  440. var sameFrontend bool
  441. if len(specParts[0]) == 0 {
  442. sameFrontend = true
  443. } else {
  444. front, err := strconv.ParseUint(specParts[0], 10, 16)
  445. if err != nil {
  446. return nil, err
  447. }
  448. nat.Frontend = int(front)
  449. }
  450. back, err := strconv.ParseUint(specParts[1], 10, 16)
  451. if err != nil {
  452. return nil, err
  453. }
  454. nat.Backend = int(back)
  455. if sameFrontend {
  456. nat.Frontend = nat.Backend
  457. }
  458. } else {
  459. port, err := strconv.ParseUint(spec, 10, 16)
  460. if err != nil {
  461. return nil, err
  462. }
  463. nat.Backend = int(port)
  464. }
  465. nat.Proto = "tcp"
  466. return &nat, nil
  467. }
  468. // Release: Network cleanup - release all resources
  469. func (iface *NetworkInterface) Release() {
  470. for _, port := range iface.extPorts {
  471. if err := iface.manager.portMapper.Unmap(port); err != nil {
  472. log.Printf("Unable to unmap port %v: %v", port, err)
  473. }
  474. if err := iface.manager.portAllocator.Release(port); err != nil {
  475. log.Printf("Unable to release port %v: %v", port, err)
  476. }
  477. }
  478. iface.manager.ipAllocator.Release(iface.IPNet.IP)
  479. }
  480. // Network Manager manages a set of network interfaces
  481. // Only *one* manager per host machine should be used
  482. type NetworkManager struct {
  483. bridgeIface string
  484. bridgeNetwork *net.IPNet
  485. ipAllocator *IPAllocator
  486. portAllocator *PortAllocator
  487. portMapper *PortMapper
  488. }
  489. // Allocate a network interface
  490. func (manager *NetworkManager) Allocate() (*NetworkInterface, error) {
  491. ip, err := manager.ipAllocator.Acquire()
  492. if err != nil {
  493. return nil, err
  494. }
  495. iface := &NetworkInterface{
  496. IPNet: net.IPNet{IP: ip, Mask: manager.bridgeNetwork.Mask},
  497. Gateway: manager.bridgeNetwork.IP,
  498. manager: manager,
  499. }
  500. return iface, nil
  501. }
  502. func newNetworkManager(bridgeIface string) (*NetworkManager, error) {
  503. addr, err := getIfaceAddr(bridgeIface)
  504. if err != nil {
  505. // If the iface is not found, try to create it
  506. if err := CreateBridgeIface(bridgeIface); err != nil {
  507. return nil, err
  508. }
  509. addr, err = getIfaceAddr(bridgeIface)
  510. if err != nil {
  511. return nil, err
  512. }
  513. }
  514. network := addr.(*net.IPNet)
  515. ipAllocator := newIPAllocator(network)
  516. portAllocator, err := newPortAllocator()
  517. if err != nil {
  518. return nil, err
  519. }
  520. portMapper, err := newPortMapper()
  521. if err != nil {
  522. return nil, err
  523. }
  524. manager := &NetworkManager{
  525. bridgeIface: bridgeIface,
  526. bridgeNetwork: network,
  527. ipAllocator: ipAllocator,
  528. portAllocator: portAllocator,
  529. portMapper: portMapper,
  530. }
  531. return manager, nil
  532. }