service_linux.go 23 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796
  1. package libnetwork
  2. import (
  3. "fmt"
  4. "io"
  5. "io/ioutil"
  6. "net"
  7. "os"
  8. "os/exec"
  9. "path/filepath"
  10. "runtime"
  11. "strconv"
  12. "strings"
  13. "sync"
  14. "syscall"
  15. "github.com/docker/docker/pkg/reexec"
  16. "github.com/docker/libnetwork/iptables"
  17. "github.com/docker/libnetwork/ipvs"
  18. "github.com/docker/libnetwork/ns"
  19. "github.com/gogo/protobuf/proto"
  20. "github.com/ishidawataru/sctp"
  21. "github.com/sirupsen/logrus"
  22. "github.com/vishvananda/netlink/nl"
  23. "github.com/vishvananda/netns"
  24. )
  25. func init() {
  26. reexec.Register("fwmarker", fwMarker)
  27. reexec.Register("redirecter", redirecter)
  28. }
  29. // Get all loadbalancers on this network that is currently discovered
  30. // on this node.
  31. func (n *network) connectedLoadbalancers() []*loadBalancer {
  32. c := n.getController()
  33. c.Lock()
  34. serviceBindings := make([]*service, 0, len(c.serviceBindings))
  35. for _, s := range c.serviceBindings {
  36. serviceBindings = append(serviceBindings, s)
  37. }
  38. c.Unlock()
  39. var lbs []*loadBalancer
  40. for _, s := range serviceBindings {
  41. s.Lock()
  42. // Skip the serviceBindings that got deleted
  43. if s.deleted {
  44. s.Unlock()
  45. continue
  46. }
  47. if lb, ok := s.loadBalancers[n.ID()]; ok {
  48. lbs = append(lbs, lb)
  49. }
  50. s.Unlock()
  51. }
  52. return lbs
  53. }
  54. // Populate all loadbalancers on the network that the passed endpoint
  55. // belongs to, into this sandbox.
  56. func (sb *sandbox) populateLoadbalancers(ep *endpoint) {
  57. var gwIP net.IP
  58. // This is an interface less endpoint. Nothing to do.
  59. if ep.Iface() == nil {
  60. return
  61. }
  62. n := ep.getNetwork()
  63. eIP := ep.Iface().Address()
  64. if n.ingress {
  65. if err := addRedirectRules(sb.Key(), eIP, ep.ingressPorts); err != nil {
  66. logrus.Errorf("Failed to add redirect rules for ep %s (%s): %v", ep.Name(), ep.ID()[0:7], err)
  67. }
  68. }
  69. if sb.ingress {
  70. // For the ingress sandbox if this is not gateway
  71. // endpoint do nothing.
  72. if ep != sb.getGatewayEndpoint() {
  73. return
  74. }
  75. // This is the gateway endpoint. Now get the ingress
  76. // network and plumb the loadbalancers.
  77. gwIP = ep.Iface().Address().IP
  78. for _, ep := range sb.getConnectedEndpoints() {
  79. if !ep.endpointInGWNetwork() {
  80. n = ep.getNetwork()
  81. eIP = ep.Iface().Address()
  82. }
  83. }
  84. }
  85. for _, lb := range n.connectedLoadbalancers() {
  86. // Skip if vip is not valid.
  87. if len(lb.vip) == 0 {
  88. continue
  89. }
  90. lb.service.Lock()
  91. for _, be := range lb.backEnds {
  92. if !be.disabled {
  93. sb.addLBBackend(be.ip, lb.vip, lb.fwMark, lb.service.ingressPorts, eIP, gwIP, n.ingress)
  94. }
  95. }
  96. lb.service.Unlock()
  97. }
  98. }
  99. // Add loadbalancer backend to all sandboxes which has a connection to
  100. // this network. If needed add the service as well.
  101. func (n *network) addLBBackend(ip, vip net.IP, lb *loadBalancer, ingressPorts []*PortConfig) {
  102. n.WalkEndpoints(func(e Endpoint) bool {
  103. ep := e.(*endpoint)
  104. if sb, ok := ep.getSandbox(); ok {
  105. if !sb.isEndpointPopulated(ep) {
  106. return false
  107. }
  108. var gwIP net.IP
  109. if ep := sb.getGatewayEndpoint(); ep != nil {
  110. gwIP = ep.Iface().Address().IP
  111. }
  112. sb.addLBBackend(ip, vip, lb.fwMark, ingressPorts, ep.Iface().Address(), gwIP, n.ingress)
  113. }
  114. return false
  115. })
  116. }
  117. // Remove loadbalancer backend from all sandboxes which has a
  118. // connection to this network. If needed remove the service entry as
  119. // well, as specified by the rmService bool.
  120. func (n *network) rmLBBackend(ip, vip net.IP, lb *loadBalancer, ingressPorts []*PortConfig, rmService bool, fullRemove bool) {
  121. n.WalkEndpoints(func(e Endpoint) bool {
  122. ep := e.(*endpoint)
  123. if sb, ok := ep.getSandbox(); ok {
  124. if !sb.isEndpointPopulated(ep) {
  125. return false
  126. }
  127. var gwIP net.IP
  128. if ep := sb.getGatewayEndpoint(); ep != nil {
  129. gwIP = ep.Iface().Address().IP
  130. }
  131. sb.rmLBBackend(ip, vip, lb.fwMark, ingressPorts, ep.Iface().Address(), gwIP, rmService, fullRemove, n.ingress)
  132. }
  133. return false
  134. })
  135. }
  136. // Add loadbalancer backend into one connected sandbox.
  137. func (sb *sandbox) addLBBackend(ip, vip net.IP, fwMark uint32, ingressPorts []*PortConfig, eIP *net.IPNet, gwIP net.IP, isIngressNetwork bool) {
  138. if sb.osSbox == nil {
  139. return
  140. }
  141. if isIngressNetwork && !sb.ingress {
  142. return
  143. }
  144. i, err := ipvs.New(sb.Key())
  145. if err != nil {
  146. logrus.Errorf("Failed to create an ipvs handle for sbox %s (%s,%s) for lb addition: %v", sb.ID()[0:7], sb.ContainerID()[0:7], sb.Key(), err)
  147. return
  148. }
  149. defer i.Close()
  150. s := &ipvs.Service{
  151. AddressFamily: nl.FAMILY_V4,
  152. FWMark: fwMark,
  153. SchedName: ipvs.RoundRobin,
  154. }
  155. if !i.IsServicePresent(s) {
  156. var filteredPorts []*PortConfig
  157. if sb.ingress {
  158. filteredPorts = filterPortConfigs(ingressPorts, false)
  159. if err := programIngress(gwIP, filteredPorts, false); err != nil {
  160. logrus.Errorf("Failed to add ingress: %v", err)
  161. return
  162. }
  163. }
  164. logrus.Debugf("Creating service for vip %s fwMark %d ingressPorts %#v in sbox %s (%s)", vip, fwMark, ingressPorts, sb.ID()[0:7], sb.ContainerID()[0:7])
  165. if err := invokeFWMarker(sb.Key(), vip, fwMark, ingressPorts, eIP, false); err != nil {
  166. logrus.Errorf("Failed to add firewall mark rule in sbox %s (%s): %v", sb.ID()[0:7], sb.ContainerID()[0:7], err)
  167. return
  168. }
  169. if err := i.NewService(s); err != nil && err != syscall.EEXIST {
  170. logrus.Errorf("Failed to create a new service for vip %s fwmark %d in sbox %s (%s): %v", vip, fwMark, sb.ID()[0:7], sb.ContainerID()[0:7], err)
  171. return
  172. }
  173. }
  174. d := &ipvs.Destination{
  175. AddressFamily: nl.FAMILY_V4,
  176. Address: ip,
  177. Weight: 1,
  178. }
  179. // Remove the sched name before using the service to add
  180. // destination.
  181. s.SchedName = ""
  182. if err := i.NewDestination(s, d); err != nil && err != syscall.EEXIST {
  183. logrus.Errorf("Failed to create real server %s for vip %s fwmark %d in sbox %s (%s): %v", ip, vip, fwMark, sb.ID()[0:7], sb.ContainerID()[0:7], err)
  184. }
  185. }
  186. // Remove loadbalancer backend from one connected sandbox.
  187. func (sb *sandbox) rmLBBackend(ip, vip net.IP, fwMark uint32, ingressPorts []*PortConfig, eIP *net.IPNet, gwIP net.IP, rmService bool, fullRemove bool, isIngressNetwork bool) {
  188. if sb.osSbox == nil {
  189. return
  190. }
  191. if isIngressNetwork && !sb.ingress {
  192. return
  193. }
  194. i, err := ipvs.New(sb.Key())
  195. if err != nil {
  196. logrus.Errorf("Failed to create an ipvs handle for sbox %s (%s,%s) for lb removal: %v", sb.ID()[0:7], sb.ContainerID()[0:7], sb.Key(), err)
  197. return
  198. }
  199. defer i.Close()
  200. s := &ipvs.Service{
  201. AddressFamily: nl.FAMILY_V4,
  202. FWMark: fwMark,
  203. }
  204. d := &ipvs.Destination{
  205. AddressFamily: nl.FAMILY_V4,
  206. Address: ip,
  207. Weight: 1,
  208. }
  209. if fullRemove {
  210. if err := i.DelDestination(s, d); err != nil && err != syscall.ENOENT {
  211. logrus.Errorf("Failed to delete real server %s for vip %s fwmark %d in sbox %s (%s): %v", ip, vip, fwMark, sb.ID()[0:7], sb.ContainerID()[0:7], err)
  212. }
  213. } else {
  214. d.Weight = 0
  215. if err := i.UpdateDestination(s, d); err != nil && err != syscall.ENOENT {
  216. logrus.Errorf("Failed to set LB weight of real server %s to 0 for vip %s fwmark %d in sbox %s (%s): %v", ip, vip, fwMark, sb.ID()[0:7], sb.ContainerID()[0:7], err)
  217. }
  218. }
  219. if rmService {
  220. s.SchedName = ipvs.RoundRobin
  221. if err := i.DelService(s); err != nil && err != syscall.ENOENT {
  222. logrus.Errorf("Failed to delete service for vip %s fwmark %d in sbox %s (%s): %v", vip, fwMark, sb.ID()[0:7], sb.ContainerID()[0:7], err)
  223. }
  224. var filteredPorts []*PortConfig
  225. if sb.ingress {
  226. filteredPorts = filterPortConfigs(ingressPorts, true)
  227. if err := programIngress(gwIP, filteredPorts, true); err != nil {
  228. logrus.Errorf("Failed to delete ingress: %v", err)
  229. }
  230. }
  231. if err := invokeFWMarker(sb.Key(), vip, fwMark, ingressPorts, eIP, true); err != nil {
  232. logrus.Errorf("Failed to delete firewall mark rule in sbox %s (%s): %v", sb.ID()[0:7], sb.ContainerID()[0:7], err)
  233. }
  234. }
  235. }
  236. const ingressChain = "DOCKER-INGRESS"
  237. var (
  238. ingressOnce sync.Once
  239. ingressProxyMu sync.Mutex
  240. ingressProxyTbl = make(map[string]io.Closer)
  241. portConfigMu sync.Mutex
  242. portConfigTbl = make(map[PortConfig]int)
  243. )
  244. func filterPortConfigs(ingressPorts []*PortConfig, isDelete bool) []*PortConfig {
  245. portConfigMu.Lock()
  246. iPorts := make([]*PortConfig, 0, len(ingressPorts))
  247. for _, pc := range ingressPorts {
  248. if isDelete {
  249. if cnt, ok := portConfigTbl[*pc]; ok {
  250. // This is the last reference to this
  251. // port config. Delete the port config
  252. // and add it to filtered list to be
  253. // plumbed.
  254. if cnt == 1 {
  255. delete(portConfigTbl, *pc)
  256. iPorts = append(iPorts, pc)
  257. continue
  258. }
  259. portConfigTbl[*pc] = cnt - 1
  260. }
  261. continue
  262. }
  263. if cnt, ok := portConfigTbl[*pc]; ok {
  264. portConfigTbl[*pc] = cnt + 1
  265. continue
  266. }
  267. // We are adding it for the first time. Add it to the
  268. // filter list to be plumbed.
  269. portConfigTbl[*pc] = 1
  270. iPorts = append(iPorts, pc)
  271. }
  272. portConfigMu.Unlock()
  273. return iPorts
  274. }
  275. func programIngress(gwIP net.IP, ingressPorts []*PortConfig, isDelete bool) error {
  276. addDelOpt := "-I"
  277. if isDelete {
  278. addDelOpt = "-D"
  279. }
  280. chainExists := iptables.ExistChain(ingressChain, iptables.Nat)
  281. filterChainExists := iptables.ExistChain(ingressChain, iptables.Filter)
  282. ingressOnce.Do(func() {
  283. // Flush nat table and filter table ingress chain rules during init if it
  284. // exists. It might contain stale rules from previous life.
  285. if chainExists {
  286. if err := iptables.RawCombinedOutput("-t", "nat", "-F", ingressChain); err != nil {
  287. logrus.Errorf("Could not flush nat table ingress chain rules during init: %v", err)
  288. }
  289. }
  290. if filterChainExists {
  291. if err := iptables.RawCombinedOutput("-F", ingressChain); err != nil {
  292. logrus.Errorf("Could not flush filter table ingress chain rules during init: %v", err)
  293. }
  294. }
  295. })
  296. if !isDelete {
  297. if !chainExists {
  298. if err := iptables.RawCombinedOutput("-t", "nat", "-N", ingressChain); err != nil {
  299. return fmt.Errorf("failed to create ingress chain: %v", err)
  300. }
  301. }
  302. if !filterChainExists {
  303. if err := iptables.RawCombinedOutput("-N", ingressChain); err != nil {
  304. return fmt.Errorf("failed to create filter table ingress chain: %v", err)
  305. }
  306. }
  307. if !iptables.Exists(iptables.Nat, ingressChain, "-j", "RETURN") {
  308. if err := iptables.RawCombinedOutput("-t", "nat", "-A", ingressChain, "-j", "RETURN"); err != nil {
  309. return fmt.Errorf("failed to add return rule in nat table ingress chain: %v", err)
  310. }
  311. }
  312. if !iptables.Exists(iptables.Filter, ingressChain, "-j", "RETURN") {
  313. if err := iptables.RawCombinedOutput("-A", ingressChain, "-j", "RETURN"); err != nil {
  314. return fmt.Errorf("failed to add return rule to filter table ingress chain: %v", err)
  315. }
  316. }
  317. for _, chain := range []string{"OUTPUT", "PREROUTING"} {
  318. if !iptables.Exists(iptables.Nat, chain, "-m", "addrtype", "--dst-type", "LOCAL", "-j", ingressChain) {
  319. if err := iptables.RawCombinedOutput("-t", "nat", "-I", chain, "-m", "addrtype", "--dst-type", "LOCAL", "-j", ingressChain); err != nil {
  320. return fmt.Errorf("failed to add jump rule in %s to ingress chain: %v", chain, err)
  321. }
  322. }
  323. }
  324. if !iptables.Exists(iptables.Filter, "FORWARD", "-j", ingressChain) {
  325. if err := iptables.RawCombinedOutput("-I", "FORWARD", "-j", ingressChain); err != nil {
  326. return fmt.Errorf("failed to add jump rule to %s in filter table forward chain: %v", ingressChain, err)
  327. }
  328. arrangeUserFilterRule()
  329. }
  330. oifName, err := findOIFName(gwIP)
  331. if err != nil {
  332. return fmt.Errorf("failed to find gateway bridge interface name for %s: %v", gwIP, err)
  333. }
  334. path := filepath.Join("/proc/sys/net/ipv4/conf", oifName, "route_localnet")
  335. if err := ioutil.WriteFile(path, []byte{'1', '\n'}, 0644); err != nil {
  336. return fmt.Errorf("could not write to %s: %v", path, err)
  337. }
  338. ruleArgs := strings.Fields(fmt.Sprintf("-m addrtype --src-type LOCAL -o %s -j MASQUERADE", oifName))
  339. if !iptables.Exists(iptables.Nat, "POSTROUTING", ruleArgs...) {
  340. if err := iptables.RawCombinedOutput(append([]string{"-t", "nat", "-I", "POSTROUTING"}, ruleArgs...)...); err != nil {
  341. return fmt.Errorf("failed to add ingress localhost POSTROUTING rule for %s: %v", oifName, err)
  342. }
  343. }
  344. }
  345. for _, iPort := range ingressPorts {
  346. if iptables.ExistChain(ingressChain, iptables.Nat) {
  347. rule := strings.Fields(fmt.Sprintf("-t nat %s %s -p %s --dport %d -j DNAT --to-destination %s:%d",
  348. addDelOpt, ingressChain, strings.ToLower(PortConfig_Protocol_name[int32(iPort.Protocol)]), iPort.PublishedPort, gwIP, iPort.PublishedPort))
  349. if err := iptables.RawCombinedOutput(rule...); err != nil {
  350. errStr := fmt.Sprintf("setting up rule failed, %v: %v", rule, err)
  351. if !isDelete {
  352. return fmt.Errorf("%s", errStr)
  353. }
  354. logrus.Infof("%s", errStr)
  355. }
  356. }
  357. // Filter table rules to allow a published service to be accessible in the local node from..
  358. // 1) service tasks attached to other networks
  359. // 2) unmanaged containers on bridge networks
  360. rule := strings.Fields(fmt.Sprintf("%s %s -m state -p %s --sport %d --state ESTABLISHED,RELATED -j ACCEPT",
  361. addDelOpt, ingressChain, strings.ToLower(PortConfig_Protocol_name[int32(iPort.Protocol)]), iPort.PublishedPort))
  362. if err := iptables.RawCombinedOutput(rule...); err != nil {
  363. errStr := fmt.Sprintf("setting up rule failed, %v: %v", rule, err)
  364. if !isDelete {
  365. return fmt.Errorf("%s", errStr)
  366. }
  367. logrus.Warnf("%s", errStr)
  368. }
  369. rule = strings.Fields(fmt.Sprintf("%s %s -p %s --dport %d -j ACCEPT",
  370. addDelOpt, ingressChain, strings.ToLower(PortConfig_Protocol_name[int32(iPort.Protocol)]), iPort.PublishedPort))
  371. if err := iptables.RawCombinedOutput(rule...); err != nil {
  372. errStr := fmt.Sprintf("setting up rule failed, %v: %v", rule, err)
  373. if !isDelete {
  374. return fmt.Errorf("%s", errStr)
  375. }
  376. logrus.Warnf("%s", errStr)
  377. }
  378. if err := plumbProxy(iPort, isDelete); err != nil {
  379. logrus.Warnf("failed to create proxy for port %d: %v", iPort.PublishedPort, err)
  380. }
  381. }
  382. return nil
  383. }
  384. // In the filter table FORWARD chain the first rule should be to jump to
  385. // DOCKER-USER so the user is able to filter packet first.
  386. // The second rule should be jump to INGRESS-CHAIN.
  387. // This chain has the rules to allow access to the published ports for swarm tasks
  388. // from local bridge networks and docker_gwbridge (ie:taks on other swarm netwroks)
  389. func arrangeIngressFilterRule() {
  390. if iptables.ExistChain(ingressChain, iptables.Filter) {
  391. if iptables.Exists(iptables.Filter, "FORWARD", "-j", ingressChain) {
  392. if err := iptables.RawCombinedOutput("-D", "FORWARD", "-j", ingressChain); err != nil {
  393. logrus.Warnf("failed to delete jump rule to ingressChain in filter table: %v", err)
  394. }
  395. }
  396. if err := iptables.RawCombinedOutput("-I", "FORWARD", "-j", ingressChain); err != nil {
  397. logrus.Warnf("failed to add jump rule to ingressChain in filter table: %v", err)
  398. }
  399. }
  400. }
  401. func findOIFName(ip net.IP) (string, error) {
  402. nlh := ns.NlHandle()
  403. routes, err := nlh.RouteGet(ip)
  404. if err != nil {
  405. return "", err
  406. }
  407. if len(routes) == 0 {
  408. return "", fmt.Errorf("no route to %s", ip)
  409. }
  410. // Pick the first route(typically there is only one route). We
  411. // don't support multipath.
  412. link, err := nlh.LinkByIndex(routes[0].LinkIndex)
  413. if err != nil {
  414. return "", err
  415. }
  416. return link.Attrs().Name, nil
  417. }
  418. func plumbProxy(iPort *PortConfig, isDelete bool) error {
  419. var (
  420. err error
  421. l io.Closer
  422. )
  423. portSpec := fmt.Sprintf("%d/%s", iPort.PublishedPort, strings.ToLower(PortConfig_Protocol_name[int32(iPort.Protocol)]))
  424. if isDelete {
  425. ingressProxyMu.Lock()
  426. if listener, ok := ingressProxyTbl[portSpec]; ok {
  427. if listener != nil {
  428. listener.Close()
  429. }
  430. }
  431. ingressProxyMu.Unlock()
  432. return nil
  433. }
  434. switch iPort.Protocol {
  435. case ProtocolTCP:
  436. l, err = net.ListenTCP("tcp", &net.TCPAddr{Port: int(iPort.PublishedPort)})
  437. case ProtocolUDP:
  438. l, err = net.ListenUDP("udp", &net.UDPAddr{Port: int(iPort.PublishedPort)})
  439. case ProtocolSCTP:
  440. l, err = sctp.ListenSCTP("sctp", &sctp.SCTPAddr{Port: int(iPort.PublishedPort)})
  441. default:
  442. err = fmt.Errorf("unknown protocol %v", iPort.Protocol)
  443. }
  444. if err != nil {
  445. return err
  446. }
  447. ingressProxyMu.Lock()
  448. ingressProxyTbl[portSpec] = l
  449. ingressProxyMu.Unlock()
  450. return nil
  451. }
  452. func writePortsToFile(ports []*PortConfig) (string, error) {
  453. f, err := ioutil.TempFile("", "port_configs")
  454. if err != nil {
  455. return "", err
  456. }
  457. defer f.Close()
  458. buf, _ := proto.Marshal(&EndpointRecord{
  459. IngressPorts: ports,
  460. })
  461. n, err := f.Write(buf)
  462. if err != nil {
  463. return "", err
  464. }
  465. if n < len(buf) {
  466. return "", io.ErrShortWrite
  467. }
  468. return f.Name(), nil
  469. }
  470. func readPortsFromFile(fileName string) ([]*PortConfig, error) {
  471. buf, err := ioutil.ReadFile(fileName)
  472. if err != nil {
  473. return nil, err
  474. }
  475. var epRec EndpointRecord
  476. err = proto.Unmarshal(buf, &epRec)
  477. if err != nil {
  478. return nil, err
  479. }
  480. return epRec.IngressPorts, nil
  481. }
  482. // Invoke fwmarker reexec routine to mark vip destined packets with
  483. // the passed firewall mark.
  484. func invokeFWMarker(path string, vip net.IP, fwMark uint32, ingressPorts []*PortConfig, eIP *net.IPNet, isDelete bool) error {
  485. var ingressPortsFile string
  486. if len(ingressPorts) != 0 {
  487. var err error
  488. ingressPortsFile, err = writePortsToFile(ingressPorts)
  489. if err != nil {
  490. return err
  491. }
  492. defer os.Remove(ingressPortsFile)
  493. }
  494. addDelOpt := "-A"
  495. if isDelete {
  496. addDelOpt = "-D"
  497. }
  498. cmd := &exec.Cmd{
  499. Path: reexec.Self(),
  500. Args: append([]string{"fwmarker"}, path, vip.String(), fmt.Sprintf("%d", fwMark), addDelOpt, ingressPortsFile, eIP.String()),
  501. Stdout: os.Stdout,
  502. Stderr: os.Stderr,
  503. }
  504. if err := cmd.Run(); err != nil {
  505. return fmt.Errorf("reexec failed: %v", err)
  506. }
  507. return nil
  508. }
  509. // Firewall marker reexec function.
  510. func fwMarker() {
  511. runtime.LockOSThread()
  512. defer runtime.UnlockOSThread()
  513. if len(os.Args) < 7 {
  514. logrus.Error("invalid number of arguments..")
  515. os.Exit(1)
  516. }
  517. var ingressPorts []*PortConfig
  518. if os.Args[5] != "" {
  519. var err error
  520. ingressPorts, err = readPortsFromFile(os.Args[5])
  521. if err != nil {
  522. logrus.Errorf("Failed reading ingress ports file: %v", err)
  523. os.Exit(6)
  524. }
  525. }
  526. vip := os.Args[2]
  527. fwMark, err := strconv.ParseUint(os.Args[3], 10, 32)
  528. if err != nil {
  529. logrus.Errorf("bad fwmark value(%s) passed: %v", os.Args[3], err)
  530. os.Exit(2)
  531. }
  532. addDelOpt := os.Args[4]
  533. rules := [][]string{}
  534. for _, iPort := range ingressPorts {
  535. rule := strings.Fields(fmt.Sprintf("-t mangle %s PREROUTING -p %s --dport %d -j MARK --set-mark %d",
  536. addDelOpt, strings.ToLower(PortConfig_Protocol_name[int32(iPort.Protocol)]), iPort.PublishedPort, fwMark))
  537. rules = append(rules, rule)
  538. }
  539. ns, err := netns.GetFromPath(os.Args[1])
  540. if err != nil {
  541. logrus.Errorf("failed get network namespace %q: %v", os.Args[1], err)
  542. os.Exit(3)
  543. }
  544. defer ns.Close()
  545. if err := netns.Set(ns); err != nil {
  546. logrus.Errorf("setting into container net ns %v failed, %v", os.Args[1], err)
  547. os.Exit(4)
  548. }
  549. if addDelOpt == "-A" {
  550. eIP, subnet, err := net.ParseCIDR(os.Args[6])
  551. if err != nil {
  552. logrus.Errorf("Failed to parse endpoint IP %s: %v", os.Args[6], err)
  553. os.Exit(9)
  554. }
  555. ruleParams := strings.Fields(fmt.Sprintf("-m ipvs --ipvs -d %s -j SNAT --to-source %s", subnet, eIP))
  556. if !iptables.Exists("nat", "POSTROUTING", ruleParams...) {
  557. rule := append(strings.Fields("-t nat -A POSTROUTING"), ruleParams...)
  558. rules = append(rules, rule)
  559. err := ioutil.WriteFile("/proc/sys/net/ipv4/vs/conntrack", []byte{'1', '\n'}, 0644)
  560. if err != nil {
  561. logrus.Errorf("Failed to write to /proc/sys/net/ipv4/vs/conntrack: %v", err)
  562. os.Exit(8)
  563. }
  564. }
  565. }
  566. rule := strings.Fields(fmt.Sprintf("-t mangle %s OUTPUT -d %s/32 -j MARK --set-mark %d", addDelOpt, vip, fwMark))
  567. rules = append(rules, rule)
  568. rule = strings.Fields(fmt.Sprintf("-t nat %s OUTPUT -p icmp --icmp echo-request -d %s -j DNAT --to 127.0.0.1", addDelOpt, vip))
  569. rules = append(rules, rule)
  570. for _, rule := range rules {
  571. if err := iptables.RawCombinedOutputNative(rule...); err != nil {
  572. logrus.Errorf("setting up rule failed, %v: %v", rule, err)
  573. os.Exit(5)
  574. }
  575. }
  576. }
  577. func addRedirectRules(path string, eIP *net.IPNet, ingressPorts []*PortConfig) error {
  578. var ingressPortsFile string
  579. if len(ingressPorts) != 0 {
  580. var err error
  581. ingressPortsFile, err = writePortsToFile(ingressPorts)
  582. if err != nil {
  583. return err
  584. }
  585. defer os.Remove(ingressPortsFile)
  586. }
  587. cmd := &exec.Cmd{
  588. Path: reexec.Self(),
  589. Args: append([]string{"redirecter"}, path, eIP.String(), ingressPortsFile),
  590. Stdout: os.Stdout,
  591. Stderr: os.Stderr,
  592. }
  593. if err := cmd.Run(); err != nil {
  594. return fmt.Errorf("reexec failed: %v", err)
  595. }
  596. return nil
  597. }
  598. // Redirecter reexec function.
  599. func redirecter() {
  600. runtime.LockOSThread()
  601. defer runtime.UnlockOSThread()
  602. if len(os.Args) < 4 {
  603. logrus.Error("invalid number of arguments..")
  604. os.Exit(1)
  605. }
  606. var ingressPorts []*PortConfig
  607. if os.Args[3] != "" {
  608. var err error
  609. ingressPorts, err = readPortsFromFile(os.Args[3])
  610. if err != nil {
  611. logrus.Errorf("Failed reading ingress ports file: %v", err)
  612. os.Exit(2)
  613. }
  614. }
  615. eIP, _, err := net.ParseCIDR(os.Args[2])
  616. if err != nil {
  617. logrus.Errorf("Failed to parse endpoint IP %s: %v", os.Args[2], err)
  618. os.Exit(3)
  619. }
  620. rules := [][]string{}
  621. for _, iPort := range ingressPorts {
  622. rule := strings.Fields(fmt.Sprintf("-t nat -A PREROUTING -d %s -p %s --dport %d -j REDIRECT --to-port %d",
  623. eIP.String(), strings.ToLower(PortConfig_Protocol_name[int32(iPort.Protocol)]), iPort.PublishedPort, iPort.TargetPort))
  624. rules = append(rules, rule)
  625. // Allow only incoming connections to exposed ports
  626. iRule := strings.Fields(fmt.Sprintf("-I INPUT -d %s -p %s --dport %d -m conntrack --ctstate NEW,ESTABLISHED -j ACCEPT",
  627. eIP.String(), strings.ToLower(PortConfig_Protocol_name[int32(iPort.Protocol)]), iPort.TargetPort))
  628. rules = append(rules, iRule)
  629. // Allow only outgoing connections from exposed ports
  630. oRule := strings.Fields(fmt.Sprintf("-I OUTPUT -s %s -p %s --sport %d -m conntrack --ctstate ESTABLISHED -j ACCEPT",
  631. eIP.String(), strings.ToLower(PortConfig_Protocol_name[int32(iPort.Protocol)]), iPort.TargetPort))
  632. rules = append(rules, oRule)
  633. }
  634. ns, err := netns.GetFromPath(os.Args[1])
  635. if err != nil {
  636. logrus.Errorf("failed get network namespace %q: %v", os.Args[1], err)
  637. os.Exit(4)
  638. }
  639. defer ns.Close()
  640. if err := netns.Set(ns); err != nil {
  641. logrus.Errorf("setting into container net ns %v failed, %v", os.Args[1], err)
  642. os.Exit(5)
  643. }
  644. for _, rule := range rules {
  645. if err := iptables.RawCombinedOutputNative(rule...); err != nil {
  646. logrus.Errorf("setting up rule failed, %v: %v", rule, err)
  647. os.Exit(6)
  648. }
  649. }
  650. if len(ingressPorts) == 0 {
  651. return
  652. }
  653. // Ensure blocking rules for anything else in/to ingress network
  654. for _, rule := range [][]string{
  655. {"-d", eIP.String(), "-p", "sctp", "-j", "DROP"},
  656. {"-d", eIP.String(), "-p", "udp", "-j", "DROP"},
  657. {"-d", eIP.String(), "-p", "tcp", "-j", "DROP"},
  658. } {
  659. if !iptables.ExistsNative(iptables.Filter, "INPUT", rule...) {
  660. if err := iptables.RawCombinedOutputNative(append([]string{"-A", "INPUT"}, rule...)...); err != nil {
  661. logrus.Errorf("setting up rule failed, %v: %v", rule, err)
  662. os.Exit(7)
  663. }
  664. }
  665. rule[0] = "-s"
  666. if !iptables.ExistsNative(iptables.Filter, "OUTPUT", rule...) {
  667. if err := iptables.RawCombinedOutputNative(append([]string{"-A", "OUTPUT"}, rule...)...); err != nil {
  668. logrus.Errorf("setting up rule failed, %v: %v", rule, err)
  669. os.Exit(8)
  670. }
  671. }
  672. }
  673. }