service_linux.go 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787
  1. package libnetwork
  2. import (
  3. "fmt"
  4. "io"
  5. "io/ioutil"
  6. "net"
  7. "os"
  8. "os/exec"
  9. "path/filepath"
  10. "runtime"
  11. "strconv"
  12. "strings"
  13. "sync"
  14. "syscall"
  15. "github.com/Sirupsen/logrus"
  16. "github.com/docker/docker/pkg/reexec"
  17. "github.com/docker/libnetwork/iptables"
  18. "github.com/docker/libnetwork/ipvs"
  19. "github.com/docker/libnetwork/ns"
  20. "github.com/gogo/protobuf/proto"
  21. "github.com/vishvananda/netlink/nl"
  22. "github.com/vishvananda/netns"
  23. )
  24. func init() {
  25. reexec.Register("fwmarker", fwMarker)
  26. }
  27. func newService(name string, id string, ingressPorts []*PortConfig, aliases []string) *service {
  28. return &service{
  29. name: name,
  30. id: id,
  31. ingressPorts: ingressPorts,
  32. loadBalancers: make(map[string]*loadBalancer),
  33. aliases: aliases,
  34. }
  35. }
  36. func (c *controller) cleanupServiceBindings(cleanupNID string) {
  37. var cleanupFuncs []func()
  38. c.Lock()
  39. for _, s := range c.serviceBindings {
  40. s.Lock()
  41. for nid, lb := range s.loadBalancers {
  42. if cleanupNID != "" && nid != cleanupNID {
  43. continue
  44. }
  45. for eid, ip := range lb.backEnds {
  46. service := s
  47. loadBalancer := lb
  48. networkID := nid
  49. epID := eid
  50. epIP := ip
  51. cleanupFuncs = append(cleanupFuncs, func() {
  52. if err := c.rmServiceBinding(service.name, service.id, networkID, epID, loadBalancer.vip,
  53. service.ingressPorts, service.aliases, epIP); err != nil {
  54. logrus.Errorf("Failed to remove service bindings for service %s network %s endpoint %s while cleanup: %v",
  55. service.id, networkID, epID, err)
  56. }
  57. })
  58. }
  59. }
  60. s.Unlock()
  61. }
  62. c.Unlock()
  63. for _, f := range cleanupFuncs {
  64. f()
  65. }
  66. }
  67. func (c *controller) addServiceBinding(name, sid, nid, eid string, vip net.IP, ingressPorts []*PortConfig, aliases []string, ip net.IP) error {
  68. var (
  69. s *service
  70. addService bool
  71. )
  72. n, err := c.NetworkByID(nid)
  73. if err != nil {
  74. return err
  75. }
  76. skey := serviceKey{
  77. id: sid,
  78. ports: portConfigs(ingressPorts).String(),
  79. }
  80. c.Lock()
  81. s, ok := c.serviceBindings[skey]
  82. if !ok {
  83. // Create a new service if we are seeing this service
  84. // for the first time.
  85. s = newService(name, sid, ingressPorts, aliases)
  86. c.serviceBindings[skey] = s
  87. }
  88. c.Unlock()
  89. // Add endpoint IP to special "tasks.svc_name" so that the
  90. // applications have access to DNS RR.
  91. n.(*network).addSvcRecords("tasks."+name, ip, nil, false)
  92. for _, alias := range aliases {
  93. n.(*network).addSvcRecords("tasks."+alias, ip, nil, false)
  94. }
  95. // Add service name to vip in DNS, if vip is valid. Otherwise resort to DNS RR
  96. svcIP := vip
  97. if len(svcIP) == 0 {
  98. svcIP = ip
  99. }
  100. n.(*network).addSvcRecords(name, svcIP, nil, false)
  101. for _, alias := range aliases {
  102. n.(*network).addSvcRecords(alias, svcIP, nil, false)
  103. }
  104. s.Lock()
  105. defer s.Unlock()
  106. lb, ok := s.loadBalancers[nid]
  107. if !ok {
  108. // Create a new load balancer if we are seeing this
  109. // network attachment on the service for the first
  110. // time.
  111. lb = &loadBalancer{
  112. vip: vip,
  113. fwMark: fwMarkCtr,
  114. backEnds: make(map[string]net.IP),
  115. service: s,
  116. }
  117. fwMarkCtrMu.Lock()
  118. fwMarkCtr++
  119. fwMarkCtrMu.Unlock()
  120. s.loadBalancers[nid] = lb
  121. // Since we just created this load balancer make sure
  122. // we add a new service service in IPVS rules.
  123. addService = true
  124. }
  125. lb.backEnds[eid] = ip
  126. // Add loadbalancer service and backend in all sandboxes in
  127. // the network only if vip is valid.
  128. if len(vip) != 0 {
  129. n.(*network).addLBBackend(ip, vip, lb.fwMark, ingressPorts, addService)
  130. }
  131. return nil
  132. }
  133. func (c *controller) rmServiceBinding(name, sid, nid, eid string, vip net.IP, ingressPorts []*PortConfig, aliases []string, ip net.IP) error {
  134. var rmService bool
  135. n, err := c.NetworkByID(nid)
  136. if err != nil {
  137. return err
  138. }
  139. skey := serviceKey{
  140. id: sid,
  141. ports: portConfigs(ingressPorts).String(),
  142. }
  143. c.Lock()
  144. s, ok := c.serviceBindings[skey]
  145. if !ok {
  146. c.Unlock()
  147. return nil
  148. }
  149. c.Unlock()
  150. s.Lock()
  151. lb, ok := s.loadBalancers[nid]
  152. if !ok {
  153. s.Unlock()
  154. return nil
  155. }
  156. _, ok = lb.backEnds[eid]
  157. if !ok {
  158. s.Unlock()
  159. return nil
  160. }
  161. delete(lb.backEnds, eid)
  162. if len(lb.backEnds) == 0 {
  163. // All the backends for this service have been
  164. // removed. Time to remove the load balancer and also
  165. // remove the service entry in IPVS.
  166. rmService = true
  167. delete(s.loadBalancers, nid)
  168. }
  169. if len(s.loadBalancers) == 0 {
  170. // All loadbalancers for the service removed. Time to
  171. // remove the service itself.
  172. delete(c.serviceBindings, skey)
  173. }
  174. // Remove loadbalancer service(if needed) and backend in all
  175. // sandboxes in the network only if the vip is valid.
  176. if len(vip) != 0 {
  177. n.(*network).rmLBBackend(ip, vip, lb.fwMark, ingressPorts, rmService)
  178. }
  179. s.Unlock()
  180. // Delete the special "tasks.svc_name" backend record.
  181. n.(*network).deleteSvcRecords("tasks."+name, ip, nil, false)
  182. for _, alias := range aliases {
  183. n.(*network).deleteSvcRecords("tasks."+alias, ip, nil, false)
  184. }
  185. // If we are doing DNS RR add the endpoint IP to DNS record
  186. // right away.
  187. if len(vip) == 0 {
  188. n.(*network).deleteSvcRecords(name, ip, nil, false)
  189. for _, alias := range aliases {
  190. n.(*network).deleteSvcRecords(alias, ip, nil, false)
  191. }
  192. }
  193. // Remove the DNS record for VIP only if we are removing the service
  194. if rmService && len(vip) != 0 {
  195. n.(*network).deleteSvcRecords(name, vip, nil, false)
  196. for _, alias := range aliases {
  197. n.(*network).deleteSvcRecords(alias, vip, nil, false)
  198. }
  199. }
  200. return nil
  201. }
  202. // Get all loadbalancers on this network that is currently discovered
  203. // on this node.
  204. func (n *network) connectedLoadbalancers() []*loadBalancer {
  205. c := n.getController()
  206. serviceBindings := make([]*service, 0, len(c.serviceBindings))
  207. c.Lock()
  208. for _, s := range c.serviceBindings {
  209. serviceBindings = append(serviceBindings, s)
  210. }
  211. c.Unlock()
  212. var lbs []*loadBalancer
  213. for _, s := range serviceBindings {
  214. s.Lock()
  215. if lb, ok := s.loadBalancers[n.ID()]; ok {
  216. lbs = append(lbs, lb)
  217. }
  218. s.Unlock()
  219. }
  220. return lbs
  221. }
  222. // Populate all loadbalancers on the network that the passed endpoint
  223. // belongs to, into this sandbox.
  224. func (sb *sandbox) populateLoadbalancers(ep *endpoint) {
  225. var gwIP net.IP
  226. n := ep.getNetwork()
  227. eIP := ep.Iface().Address()
  228. if sb.ingress {
  229. // For the ingress sandbox if this is not gateway
  230. // endpoint do nothing.
  231. if ep != sb.getGatewayEndpoint() {
  232. return
  233. }
  234. // This is the gateway endpoint. Now get the ingress
  235. // network and plumb the loadbalancers.
  236. gwIP = ep.Iface().Address().IP
  237. for _, ep := range sb.getConnectedEndpoints() {
  238. if !ep.endpointInGWNetwork() {
  239. n = ep.getNetwork()
  240. eIP = ep.Iface().Address()
  241. }
  242. }
  243. }
  244. for _, lb := range n.connectedLoadbalancers() {
  245. // Skip if vip is not valid.
  246. if len(lb.vip) == 0 {
  247. continue
  248. }
  249. lb.service.Lock()
  250. addService := true
  251. for _, ip := range lb.backEnds {
  252. sb.addLBBackend(ip, lb.vip, lb.fwMark, lb.service.ingressPorts,
  253. eIP, gwIP, addService, n.ingress)
  254. addService = false
  255. }
  256. lb.service.Unlock()
  257. }
  258. }
  259. // Add loadbalancer backend to all sandboxes which has a connection to
  260. // this network. If needed add the service as well, as specified by
  261. // the addService bool.
  262. func (n *network) addLBBackend(ip, vip net.IP, fwMark uint32, ingressPorts []*PortConfig, addService bool) {
  263. n.WalkEndpoints(func(e Endpoint) bool {
  264. ep := e.(*endpoint)
  265. if sb, ok := ep.getSandbox(); ok {
  266. if !sb.isEndpointPopulated(ep) {
  267. return false
  268. }
  269. var gwIP net.IP
  270. if ep := sb.getGatewayEndpoint(); ep != nil {
  271. gwIP = ep.Iface().Address().IP
  272. }
  273. sb.addLBBackend(ip, vip, fwMark, ingressPorts, ep.Iface().Address(), gwIP, addService, n.ingress)
  274. }
  275. return false
  276. })
  277. }
  278. // Remove loadbalancer backend from all sandboxes which has a
  279. // connection to this network. If needed remove the service entry as
  280. // well, as specified by the rmService bool.
  281. func (n *network) rmLBBackend(ip, vip net.IP, fwMark uint32, ingressPorts []*PortConfig, rmService bool) {
  282. n.WalkEndpoints(func(e Endpoint) bool {
  283. ep := e.(*endpoint)
  284. if sb, ok := ep.getSandbox(); ok {
  285. if !sb.isEndpointPopulated(ep) {
  286. return false
  287. }
  288. var gwIP net.IP
  289. if ep := sb.getGatewayEndpoint(); ep != nil {
  290. gwIP = ep.Iface().Address().IP
  291. }
  292. sb.rmLBBackend(ip, vip, fwMark, ingressPorts, ep.Iface().Address(), gwIP, rmService, n.ingress)
  293. }
  294. return false
  295. })
  296. }
  297. // Add loadbalancer backend into one connected sandbox.
  298. func (sb *sandbox) addLBBackend(ip, vip net.IP, fwMark uint32, ingressPorts []*PortConfig, eIP *net.IPNet, gwIP net.IP, addService bool, isIngressNetwork bool) {
  299. if sb.osSbox == nil {
  300. return
  301. }
  302. if isIngressNetwork && !sb.ingress {
  303. return
  304. }
  305. i, err := ipvs.New(sb.Key())
  306. if err != nil {
  307. logrus.Errorf("Failed to create an ipvs handle for sbox %s: %v", sb.Key(), err)
  308. return
  309. }
  310. defer i.Close()
  311. s := &ipvs.Service{
  312. AddressFamily: nl.FAMILY_V4,
  313. FWMark: fwMark,
  314. SchedName: ipvs.RoundRobin,
  315. }
  316. if addService {
  317. var iPorts []*PortConfig
  318. if sb.ingress {
  319. iPorts = filterPortConfigs(ingressPorts, false)
  320. if err := programIngress(gwIP, iPorts, false); err != nil {
  321. logrus.Errorf("Failed to add ingress: %v", err)
  322. return
  323. }
  324. }
  325. logrus.Debugf("Creating service for vip %s fwMark %d ingressPorts %#v", vip, fwMark, iPorts)
  326. if err := invokeFWMarker(sb.Key(), vip, fwMark, iPorts, eIP, false); err != nil {
  327. logrus.Errorf("Failed to add firewall mark rule in sbox %s: %v", sb.Key(), err)
  328. return
  329. }
  330. if err := i.NewService(s); err != nil {
  331. logrus.Errorf("Failed to create a new service for vip %s fwmark %d: %v", vip, fwMark, err)
  332. return
  333. }
  334. }
  335. d := &ipvs.Destination{
  336. AddressFamily: nl.FAMILY_V4,
  337. Address: ip,
  338. Weight: 1,
  339. }
  340. // Remove the sched name before using the service to add
  341. // destination.
  342. s.SchedName = ""
  343. if err := i.NewDestination(s, d); err != nil && err != syscall.EEXIST {
  344. logrus.Errorf("Failed to create real server %s for vip %s fwmark %d in sb %s: %v", ip, vip, fwMark, sb.containerID, err)
  345. }
  346. }
  347. // Remove loadbalancer backend from one connected sandbox.
  348. func (sb *sandbox) rmLBBackend(ip, vip net.IP, fwMark uint32, ingressPorts []*PortConfig, eIP *net.IPNet, gwIP net.IP, rmService bool, isIngressNetwork bool) {
  349. if sb.osSbox == nil {
  350. return
  351. }
  352. if isIngressNetwork && !sb.ingress {
  353. return
  354. }
  355. i, err := ipvs.New(sb.Key())
  356. if err != nil {
  357. logrus.Errorf("Failed to create an ipvs handle for sbox %s: %v", sb.Key(), err)
  358. return
  359. }
  360. defer i.Close()
  361. s := &ipvs.Service{
  362. AddressFamily: nl.FAMILY_V4,
  363. FWMark: fwMark,
  364. }
  365. d := &ipvs.Destination{
  366. AddressFamily: nl.FAMILY_V4,
  367. Address: ip,
  368. Weight: 1,
  369. }
  370. if err := i.DelDestination(s, d); err != nil {
  371. logrus.Infof("Failed to delete real server %s for vip %s fwmark %d: %v", ip, vip, fwMark, err)
  372. }
  373. if rmService {
  374. s.SchedName = ipvs.RoundRobin
  375. if err := i.DelService(s); err != nil {
  376. logrus.Errorf("Failed to delete a new service for vip %s fwmark %d: %v", vip, fwMark, err)
  377. }
  378. var iPorts []*PortConfig
  379. if sb.ingress {
  380. iPorts = filterPortConfigs(ingressPorts, true)
  381. if err := programIngress(gwIP, iPorts, true); err != nil {
  382. logrus.Errorf("Failed to delete ingress: %v", err)
  383. }
  384. }
  385. if err := invokeFWMarker(sb.Key(), vip, fwMark, iPorts, eIP, true); err != nil {
  386. logrus.Errorf("Failed to add firewall mark rule in sbox %s: %v", sb.Key(), err)
  387. }
  388. }
  389. }
  390. const ingressChain = "DOCKER-INGRESS"
  391. var (
  392. ingressOnce sync.Once
  393. ingressProxyMu sync.Mutex
  394. ingressProxyTbl = make(map[string]io.Closer)
  395. portConfigMu sync.Mutex
  396. portConfigTbl = make(map[PortConfig]int)
  397. )
  398. func filterPortConfigs(ingressPorts []*PortConfig, isDelete bool) []*PortConfig {
  399. portConfigMu.Lock()
  400. iPorts := make([]*PortConfig, 0, len(ingressPorts))
  401. for _, pc := range ingressPorts {
  402. if isDelete {
  403. if cnt, ok := portConfigTbl[*pc]; ok {
  404. // This is the last reference to this
  405. // port config. Delete the port config
  406. // and add it to filtered list to be
  407. // plumbed.
  408. if cnt == 1 {
  409. delete(portConfigTbl, *pc)
  410. iPorts = append(iPorts, pc)
  411. continue
  412. }
  413. portConfigTbl[*pc] = cnt - 1
  414. }
  415. continue
  416. }
  417. if cnt, ok := portConfigTbl[*pc]; ok {
  418. portConfigTbl[*pc] = cnt + 1
  419. continue
  420. }
  421. // We are adding it for the first time. Add it to the
  422. // filter list to be plumbed.
  423. portConfigTbl[*pc] = 1
  424. iPorts = append(iPorts, pc)
  425. }
  426. portConfigMu.Unlock()
  427. return iPorts
  428. }
  429. func programIngress(gwIP net.IP, ingressPorts []*PortConfig, isDelete bool) error {
  430. addDelOpt := "-I"
  431. if isDelete {
  432. addDelOpt = "-D"
  433. }
  434. chainExists := iptables.ExistChain(ingressChain, iptables.Nat)
  435. ingressOnce.Do(func() {
  436. if chainExists {
  437. // Flush ingress chain rules during init if it
  438. // exists. It might contain stale rules from
  439. // previous life.
  440. if err := iptables.RawCombinedOutput("-t", "nat", "-F", ingressChain); err != nil {
  441. logrus.Errorf("Could not flush ingress chain rules during init: %v", err)
  442. }
  443. }
  444. })
  445. if !isDelete {
  446. if !chainExists {
  447. if err := iptables.RawCombinedOutput("-t", "nat", "-N", ingressChain); err != nil {
  448. return fmt.Errorf("failed to create ingress chain: %v", err)
  449. }
  450. }
  451. if !iptables.Exists(iptables.Nat, ingressChain, "-j", "RETURN") {
  452. if err := iptables.RawCombinedOutput("-t", "nat", "-A", ingressChain, "-j", "RETURN"); err != nil {
  453. return fmt.Errorf("failed to add return rule in ingress chain: %v", err)
  454. }
  455. }
  456. for _, chain := range []string{"OUTPUT", "PREROUTING"} {
  457. if !iptables.Exists(iptables.Nat, chain, "-m", "addrtype", "--dst-type", "LOCAL", "-j", ingressChain) {
  458. if err := iptables.RawCombinedOutput("-t", "nat", "-I", chain, "-m", "addrtype", "--dst-type", "LOCAL", "-j", ingressChain); err != nil {
  459. return fmt.Errorf("failed to add jump rule in %s to ingress chain: %v", chain, err)
  460. }
  461. }
  462. }
  463. oifName, err := findOIFName(gwIP)
  464. if err != nil {
  465. return fmt.Errorf("failed to find gateway bridge interface name for %s: %v", gwIP, err)
  466. }
  467. path := filepath.Join("/proc/sys/net/ipv4/conf", oifName, "route_localnet")
  468. if err := ioutil.WriteFile(path, []byte{'1', '\n'}, 0644); err != nil {
  469. return fmt.Errorf("could not write to %s: %v", path, err)
  470. }
  471. ruleArgs := strings.Fields(fmt.Sprintf("-m addrtype --src-type LOCAL -o %s -j MASQUERADE", oifName))
  472. if !iptables.Exists(iptables.Nat, "POSTROUTING", ruleArgs...) {
  473. if err := iptables.RawCombinedOutput(append([]string{"-t", "nat", "-I", "POSTROUTING"}, ruleArgs...)...); err != nil {
  474. return fmt.Errorf("failed to add ingress localhost POSTROUTING rule for %s: %v", oifName, err)
  475. }
  476. }
  477. }
  478. for _, iPort := range ingressPorts {
  479. if iptables.ExistChain(ingressChain, iptables.Nat) {
  480. rule := strings.Fields(fmt.Sprintf("-t nat %s %s -p %s --dport %d -j DNAT --to-destination %s:%d",
  481. addDelOpt, ingressChain, strings.ToLower(PortConfig_Protocol_name[int32(iPort.Protocol)]), iPort.PublishedPort, gwIP, iPort.PublishedPort))
  482. if err := iptables.RawCombinedOutput(rule...); err != nil {
  483. errStr := fmt.Sprintf("setting up rule failed, %v: %v", rule, err)
  484. if !isDelete {
  485. return fmt.Errorf("%s", errStr)
  486. }
  487. logrus.Infof("%s", errStr)
  488. }
  489. }
  490. if err := plumbProxy(iPort, isDelete); err != nil {
  491. logrus.Warnf("failed to create proxy for port %d: %v", iPort.PublishedPort, err)
  492. }
  493. }
  494. return nil
  495. }
  496. func findOIFName(ip net.IP) (string, error) {
  497. nlh := ns.NlHandle()
  498. routes, err := nlh.RouteGet(ip)
  499. if err != nil {
  500. return "", err
  501. }
  502. if len(routes) == 0 {
  503. return "", fmt.Errorf("no route to %s", ip)
  504. }
  505. // Pick the first route(typically there is only one route). We
  506. // don't support multipath.
  507. link, err := nlh.LinkByIndex(routes[0].LinkIndex)
  508. if err != nil {
  509. return "", err
  510. }
  511. return link.Attrs().Name, nil
  512. }
  513. func plumbProxy(iPort *PortConfig, isDelete bool) error {
  514. var (
  515. err error
  516. l io.Closer
  517. )
  518. portSpec := fmt.Sprintf("%d/%s", iPort.PublishedPort, strings.ToLower(PortConfig_Protocol_name[int32(iPort.Protocol)]))
  519. if isDelete {
  520. ingressProxyMu.Lock()
  521. if listener, ok := ingressProxyTbl[portSpec]; ok {
  522. if listener != nil {
  523. listener.Close()
  524. }
  525. }
  526. ingressProxyMu.Unlock()
  527. return nil
  528. }
  529. switch iPort.Protocol {
  530. case ProtocolTCP:
  531. l, err = net.ListenTCP("tcp", &net.TCPAddr{Port: int(iPort.PublishedPort)})
  532. case ProtocolUDP:
  533. l, err = net.ListenUDP("udp", &net.UDPAddr{Port: int(iPort.PublishedPort)})
  534. }
  535. if err != nil {
  536. return err
  537. }
  538. ingressProxyMu.Lock()
  539. ingressProxyTbl[portSpec] = l
  540. ingressProxyMu.Unlock()
  541. return nil
  542. }
  543. // Invoke fwmarker reexec routine to mark vip destined packets with
  544. // the passed firewall mark.
  545. func invokeFWMarker(path string, vip net.IP, fwMark uint32, ingressPorts []*PortConfig, eIP *net.IPNet, isDelete bool) error {
  546. var ingressPortsFile string
  547. if len(ingressPorts) != 0 {
  548. f, err := ioutil.TempFile("", "port_configs")
  549. if err != nil {
  550. return err
  551. }
  552. buf, err := proto.Marshal(&EndpointRecord{
  553. IngressPorts: ingressPorts,
  554. })
  555. n, err := f.Write(buf)
  556. if err != nil {
  557. f.Close()
  558. return err
  559. }
  560. if n < len(buf) {
  561. f.Close()
  562. return io.ErrShortWrite
  563. }
  564. ingressPortsFile = f.Name()
  565. f.Close()
  566. }
  567. addDelOpt := "-A"
  568. if isDelete {
  569. addDelOpt = "-D"
  570. }
  571. cmd := &exec.Cmd{
  572. Path: reexec.Self(),
  573. Args: append([]string{"fwmarker"}, path, vip.String(), fmt.Sprintf("%d", fwMark), addDelOpt, ingressPortsFile, eIP.String()),
  574. Stdout: os.Stdout,
  575. Stderr: os.Stderr,
  576. }
  577. if err := cmd.Run(); err != nil {
  578. return fmt.Errorf("reexec failed: %v", err)
  579. }
  580. return nil
  581. }
  582. // Firewall marker reexec function.
  583. func fwMarker() {
  584. runtime.LockOSThread()
  585. defer runtime.UnlockOSThread()
  586. if len(os.Args) < 7 {
  587. logrus.Error("invalid number of arguments..")
  588. os.Exit(1)
  589. }
  590. var ingressPorts []*PortConfig
  591. if os.Args[5] != "" {
  592. buf, err := ioutil.ReadFile(os.Args[5])
  593. if err != nil {
  594. logrus.Errorf("Failed to read ports config file: %v", err)
  595. os.Exit(6)
  596. }
  597. var epRec EndpointRecord
  598. err = proto.Unmarshal(buf, &epRec)
  599. if err != nil {
  600. logrus.Errorf("Failed to unmarshal ports config data: %v", err)
  601. os.Exit(7)
  602. }
  603. ingressPorts = epRec.IngressPorts
  604. }
  605. vip := os.Args[2]
  606. fwMark, err := strconv.ParseUint(os.Args[3], 10, 32)
  607. if err != nil {
  608. logrus.Errorf("bad fwmark value(%s) passed: %v", os.Args[3], err)
  609. os.Exit(2)
  610. }
  611. addDelOpt := os.Args[4]
  612. rules := [][]string{}
  613. for _, iPort := range ingressPorts {
  614. rule := strings.Fields(fmt.Sprintf("-t nat %s PREROUTING -p %s --dport %d -j REDIRECT --to-port %d",
  615. addDelOpt, strings.ToLower(PortConfig_Protocol_name[int32(iPort.Protocol)]), iPort.PublishedPort, iPort.TargetPort))
  616. rules = append(rules, rule)
  617. rule = strings.Fields(fmt.Sprintf("-t mangle %s PREROUTING -p %s --dport %d -j MARK --set-mark %d",
  618. addDelOpt, strings.ToLower(PortConfig_Protocol_name[int32(iPort.Protocol)]), iPort.PublishedPort, fwMark))
  619. rules = append(rules, rule)
  620. }
  621. ns, err := netns.GetFromPath(os.Args[1])
  622. if err != nil {
  623. logrus.Errorf("failed get network namespace %q: %v", os.Args[1], err)
  624. os.Exit(3)
  625. }
  626. defer ns.Close()
  627. if err := netns.Set(ns); err != nil {
  628. logrus.Errorf("setting into container net ns %v failed, %v", os.Args[1], err)
  629. os.Exit(4)
  630. }
  631. if addDelOpt == "-A" {
  632. eIP, subnet, err := net.ParseCIDR(os.Args[6])
  633. if err != nil {
  634. logrus.Errorf("Failed to parse endpoint IP %s: %v", os.Args[6], err)
  635. os.Exit(9)
  636. }
  637. ruleParams := strings.Fields(fmt.Sprintf("-m ipvs --ipvs -d %s -j SNAT --to-source %s", subnet, eIP))
  638. if !iptables.Exists("nat", "POSTROUTING", ruleParams...) {
  639. rule := append(strings.Fields("-t nat -A POSTROUTING"), ruleParams...)
  640. rules = append(rules, rule)
  641. err := ioutil.WriteFile("/proc/sys/net/ipv4/vs/conntrack", []byte{'1', '\n'}, 0644)
  642. if err != nil {
  643. logrus.Errorf("Failed to write to /proc/sys/net/ipv4/vs/conntrack: %v", err)
  644. os.Exit(8)
  645. }
  646. }
  647. }
  648. rule := strings.Fields(fmt.Sprintf("-t mangle %s OUTPUT -d %s/32 -j MARK --set-mark %d", addDelOpt, vip, fwMark))
  649. rules = append(rules, rule)
  650. for _, rule := range rules {
  651. if err := iptables.RawCombinedOutputNative(rule...); err != nil {
  652. logrus.Errorf("setting up rule failed, %v: %v", rule, err)
  653. os.Exit(5)
  654. }
  655. }
  656. }