service_linux.go 24 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805
  1. package libnetwork
  2. import (
  3. "fmt"
  4. "io"
  5. "io/ioutil"
  6. "net"
  7. "os"
  8. "os/exec"
  9. "path/filepath"
  10. "runtime"
  11. "strconv"
  12. "strings"
  13. "sync"
  14. "syscall"
  15. "github.com/docker/docker/pkg/reexec"
  16. "github.com/docker/libnetwork/iptables"
  17. "github.com/docker/libnetwork/ns"
  18. "github.com/gogo/protobuf/proto"
  19. "github.com/ishidawataru/sctp"
  20. "github.com/moby/ipvs"
  21. "github.com/sirupsen/logrus"
  22. "github.com/vishvananda/netlink/nl"
  23. "github.com/vishvananda/netns"
  24. )
  25. func init() {
  26. reexec.Register("fwmarker", fwMarker)
  27. reexec.Register("redirector", redirector)
  28. }
  29. // Populate all loadbalancers on the network that the passed endpoint
  30. // belongs to, into this sandbox.
  31. func (sb *sandbox) populateLoadBalancers(ep *endpoint) {
  32. // This is an interface less endpoint. Nothing to do.
  33. if ep.Iface() == nil {
  34. return
  35. }
  36. n := ep.getNetwork()
  37. eIP := ep.Iface().Address()
  38. if n.ingress {
  39. if err := addRedirectRules(sb.Key(), eIP, ep.ingressPorts); err != nil {
  40. logrus.Errorf("Failed to add redirect rules for ep %s (%.7s): %v", ep.Name(), ep.ID(), err)
  41. }
  42. }
  43. }
  44. func (n *network) findLBEndpointSandbox() (*endpoint, *sandbox, error) {
  45. // TODO: get endpoint from store? See EndpointInfo()
  46. var ep *endpoint
  47. // Find this node's LB sandbox endpoint: there should be exactly one
  48. for _, e := range n.Endpoints() {
  49. epi := e.Info()
  50. if epi != nil && epi.LoadBalancer() {
  51. ep = e.(*endpoint)
  52. break
  53. }
  54. }
  55. if ep == nil {
  56. return nil, nil, fmt.Errorf("Unable to find load balancing endpoint for network %s", n.ID())
  57. }
  58. // Get the load balancer sandbox itself as well
  59. sb, ok := ep.getSandbox()
  60. if !ok {
  61. return nil, nil, fmt.Errorf("Unable to get sandbox for %s(%s) in for %s", ep.Name(), ep.ID(), n.ID())
  62. }
  63. ep = sb.getEndpoint(ep.ID())
  64. if ep == nil {
  65. return nil, nil, fmt.Errorf("Load balancing endpoint %s(%s) removed from %s", ep.Name(), ep.ID(), n.ID())
  66. }
  67. return ep, sb, nil
  68. }
  69. // Searches the OS sandbox for the name of the endpoint interface
  70. // within the sandbox. This is required for adding/removing IP
  71. // aliases to the interface.
  72. func findIfaceDstName(sb *sandbox, ep *endpoint) string {
  73. srcName := ep.Iface().SrcName()
  74. for _, i := range sb.osSbox.Info().Interfaces() {
  75. if i.SrcName() == srcName {
  76. return i.DstName()
  77. }
  78. }
  79. return ""
  80. }
  81. // Add loadbalancer backend to the loadbalncer sandbox for the network.
  82. // If needed add the service as well.
  83. func (n *network) addLBBackend(ip net.IP, lb *loadBalancer) {
  84. if len(lb.vip) == 0 {
  85. return
  86. }
  87. ep, sb, err := n.findLBEndpointSandbox()
  88. if err != nil {
  89. logrus.Errorf("addLBBackend %s/%s: %v", n.ID(), n.Name(), err)
  90. return
  91. }
  92. if sb.osSbox == nil {
  93. return
  94. }
  95. eIP := ep.Iface().Address()
  96. i, err := ipvs.New(sb.Key())
  97. if err != nil {
  98. logrus.Errorf("Failed to create an ipvs handle for sbox %.7s (%.7s,%s) for lb addition: %v", sb.ID(), sb.ContainerID(), sb.Key(), err)
  99. return
  100. }
  101. defer i.Close()
  102. s := &ipvs.Service{
  103. AddressFamily: nl.FAMILY_V4,
  104. FWMark: lb.fwMark,
  105. SchedName: ipvs.RoundRobin,
  106. }
  107. if !i.IsServicePresent(s) {
  108. // Add IP alias for the VIP to the endpoint
  109. ifName := findIfaceDstName(sb, ep)
  110. if ifName == "" {
  111. logrus.Errorf("Failed find interface name for endpoint %s(%s) to create LB alias", ep.ID(), ep.Name())
  112. return
  113. }
  114. err := sb.osSbox.AddAliasIP(ifName, &net.IPNet{IP: lb.vip, Mask: net.CIDRMask(32, 32)})
  115. if err != nil {
  116. logrus.Errorf("Failed add IP alias %s to network %s LB endpoint interface %s: %v", lb.vip, n.ID(), ifName, err)
  117. return
  118. }
  119. if sb.ingress {
  120. var gwIP net.IP
  121. if ep := sb.getGatewayEndpoint(); ep != nil {
  122. gwIP = ep.Iface().Address().IP
  123. }
  124. if err := programIngress(gwIP, lb.service.ingressPorts, false); err != nil {
  125. logrus.Errorf("Failed to add ingress: %v", err)
  126. return
  127. }
  128. }
  129. logrus.Debugf("Creating service for vip %s fwMark %d ingressPorts %#v in sbox %.7s (%.7s)", lb.vip, lb.fwMark, lb.service.ingressPorts, sb.ID(), sb.ContainerID())
  130. if err := invokeFWMarker(sb.Key(), lb.vip, lb.fwMark, lb.service.ingressPorts, eIP, false, n.loadBalancerMode); err != nil {
  131. logrus.Errorf("Failed to add firewall mark rule in sbox %.7s (%.7s): %v", sb.ID(), sb.ContainerID(), err)
  132. return
  133. }
  134. if err := i.NewService(s); err != nil && err != syscall.EEXIST {
  135. logrus.Errorf("Failed to create a new service for vip %s fwmark %d in sbox %.7s (%.7s): %v", lb.vip, lb.fwMark, sb.ID(), sb.ContainerID(), err)
  136. return
  137. }
  138. }
  139. d := &ipvs.Destination{
  140. AddressFamily: nl.FAMILY_V4,
  141. Address: ip,
  142. Weight: 1,
  143. }
  144. if n.loadBalancerMode == loadBalancerModeDSR {
  145. d.ConnectionFlags = ipvs.ConnFwdDirectRoute
  146. }
  147. // Remove the sched name before using the service to add
  148. // destination.
  149. s.SchedName = ""
  150. if err := i.NewDestination(s, d); err != nil && err != syscall.EEXIST {
  151. logrus.Errorf("Failed to create real server %s for vip %s fwmark %d in sbox %.7s (%.7s): %v", ip, lb.vip, lb.fwMark, sb.ID(), sb.ContainerID(), err)
  152. }
  153. }
  154. // Remove loadbalancer backend the load balancing endpoint for this
  155. // network. If 'rmService' is true, then remove the service entry as well.
  156. // If 'fullRemove' is true then completely remove the entry, otherwise
  157. // just deweight it for now.
  158. func (n *network) rmLBBackend(ip net.IP, lb *loadBalancer, rmService bool, fullRemove bool) {
  159. if len(lb.vip) == 0 {
  160. return
  161. }
  162. ep, sb, err := n.findLBEndpointSandbox()
  163. if err != nil {
  164. logrus.Debugf("rmLBBackend for %s/%s: %v -- probably transient state", n.ID(), n.Name(), err)
  165. return
  166. }
  167. if sb.osSbox == nil {
  168. return
  169. }
  170. eIP := ep.Iface().Address()
  171. i, err := ipvs.New(sb.Key())
  172. if err != nil {
  173. logrus.Errorf("Failed to create an ipvs handle for sbox %.7s (%.7s,%s) for lb removal: %v", sb.ID(), sb.ContainerID(), sb.Key(), err)
  174. return
  175. }
  176. defer i.Close()
  177. s := &ipvs.Service{
  178. AddressFamily: nl.FAMILY_V4,
  179. FWMark: lb.fwMark,
  180. }
  181. d := &ipvs.Destination{
  182. AddressFamily: nl.FAMILY_V4,
  183. Address: ip,
  184. Weight: 1,
  185. }
  186. if n.loadBalancerMode == loadBalancerModeDSR {
  187. d.ConnectionFlags = ipvs.ConnFwdDirectRoute
  188. }
  189. if fullRemove {
  190. if err := i.DelDestination(s, d); err != nil && err != syscall.ENOENT {
  191. logrus.Errorf("Failed to delete real server %s for vip %s fwmark %d in sbox %.7s (%.7s): %v", ip, lb.vip, lb.fwMark, sb.ID(), sb.ContainerID(), err)
  192. }
  193. } else {
  194. d.Weight = 0
  195. if err := i.UpdateDestination(s, d); err != nil && err != syscall.ENOENT {
  196. logrus.Errorf("Failed to set LB weight of real server %s to 0 for vip %s fwmark %d in sbox %.7s (%.7s): %v", ip, lb.vip, lb.fwMark, sb.ID(), sb.ContainerID(), err)
  197. }
  198. }
  199. if rmService {
  200. s.SchedName = ipvs.RoundRobin
  201. if err := i.DelService(s); err != nil && err != syscall.ENOENT {
  202. logrus.Errorf("Failed to delete service for vip %s fwmark %d in sbox %.7s (%.7s): %v", lb.vip, lb.fwMark, sb.ID(), sb.ContainerID(), err)
  203. }
  204. if sb.ingress {
  205. var gwIP net.IP
  206. if ep := sb.getGatewayEndpoint(); ep != nil {
  207. gwIP = ep.Iface().Address().IP
  208. }
  209. if err := programIngress(gwIP, lb.service.ingressPorts, true); err != nil {
  210. logrus.Errorf("Failed to delete ingress: %v", err)
  211. }
  212. }
  213. if err := invokeFWMarker(sb.Key(), lb.vip, lb.fwMark, lb.service.ingressPorts, eIP, true, n.loadBalancerMode); err != nil {
  214. logrus.Errorf("Failed to delete firewall mark rule in sbox %.7s (%.7s): %v", sb.ID(), sb.ContainerID(), err)
  215. }
  216. // Remove IP alias from the VIP to the endpoint
  217. ifName := findIfaceDstName(sb, ep)
  218. if ifName == "" {
  219. logrus.Errorf("Failed find interface name for endpoint %s(%s) to create LB alias", ep.ID(), ep.Name())
  220. return
  221. }
  222. err := sb.osSbox.RemoveAliasIP(ifName, &net.IPNet{IP: lb.vip, Mask: net.CIDRMask(32, 32)})
  223. if err != nil {
  224. logrus.Errorf("Failed add IP alias %s to network %s LB endpoint interface %s: %v", lb.vip, n.ID(), ifName, err)
  225. }
  226. }
  227. }
  228. const ingressChain = "DOCKER-INGRESS"
  229. var (
  230. ingressOnce sync.Once
  231. ingressMu sync.Mutex // lock for operations on ingress
  232. ingressProxyTbl = make(map[string]io.Closer)
  233. portConfigMu sync.Mutex
  234. portConfigTbl = make(map[PortConfig]int)
  235. )
  236. func filterPortConfigs(ingressPorts []*PortConfig, isDelete bool) []*PortConfig {
  237. portConfigMu.Lock()
  238. iPorts := make([]*PortConfig, 0, len(ingressPorts))
  239. for _, pc := range ingressPorts {
  240. if isDelete {
  241. if cnt, ok := portConfigTbl[*pc]; ok {
  242. // This is the last reference to this
  243. // port config. Delete the port config
  244. // and add it to filtered list to be
  245. // plumbed.
  246. if cnt == 1 {
  247. delete(portConfigTbl, *pc)
  248. iPorts = append(iPorts, pc)
  249. continue
  250. }
  251. portConfigTbl[*pc] = cnt - 1
  252. }
  253. continue
  254. }
  255. if cnt, ok := portConfigTbl[*pc]; ok {
  256. portConfigTbl[*pc] = cnt + 1
  257. continue
  258. }
  259. // We are adding it for the first time. Add it to the
  260. // filter list to be plumbed.
  261. portConfigTbl[*pc] = 1
  262. iPorts = append(iPorts, pc)
  263. }
  264. portConfigMu.Unlock()
  265. return iPorts
  266. }
  267. func programIngress(gwIP net.IP, ingressPorts []*PortConfig, isDelete bool) error {
  268. // TODO IPv6 support
  269. iptable := iptables.GetIptable(iptables.IPv4)
  270. addDelOpt := "-I"
  271. rollbackAddDelOpt := "-D"
  272. if isDelete {
  273. addDelOpt = "-D"
  274. rollbackAddDelOpt = "-I"
  275. }
  276. ingressMu.Lock()
  277. defer ingressMu.Unlock()
  278. chainExists := iptable.ExistChain(ingressChain, iptables.Nat)
  279. filterChainExists := iptable.ExistChain(ingressChain, iptables.Filter)
  280. ingressOnce.Do(func() {
  281. // Flush nat table and filter table ingress chain rules during init if it
  282. // exists. It might contain stale rules from previous life.
  283. if chainExists {
  284. if err := iptable.RawCombinedOutput("-t", "nat", "-F", ingressChain); err != nil {
  285. logrus.Errorf("Could not flush nat table ingress chain rules during init: %v", err)
  286. }
  287. }
  288. if filterChainExists {
  289. if err := iptable.RawCombinedOutput("-F", ingressChain); err != nil {
  290. logrus.Errorf("Could not flush filter table ingress chain rules during init: %v", err)
  291. }
  292. }
  293. })
  294. if !isDelete {
  295. if !chainExists {
  296. if err := iptable.RawCombinedOutput("-t", "nat", "-N", ingressChain); err != nil {
  297. return fmt.Errorf("failed to create ingress chain: %v", err)
  298. }
  299. }
  300. if !filterChainExists {
  301. if err := iptable.RawCombinedOutput("-N", ingressChain); err != nil {
  302. return fmt.Errorf("failed to create filter table ingress chain: %v", err)
  303. }
  304. }
  305. if !iptable.Exists(iptables.Nat, ingressChain, "-j", "RETURN") {
  306. if err := iptable.RawCombinedOutput("-t", "nat", "-A", ingressChain, "-j", "RETURN"); err != nil {
  307. return fmt.Errorf("failed to add return rule in nat table ingress chain: %v", err)
  308. }
  309. }
  310. if !iptable.Exists(iptables.Filter, ingressChain, "-j", "RETURN") {
  311. if err := iptable.RawCombinedOutput("-A", ingressChain, "-j", "RETURN"); err != nil {
  312. return fmt.Errorf("failed to add return rule to filter table ingress chain: %v", err)
  313. }
  314. }
  315. for _, chain := range []string{"OUTPUT", "PREROUTING"} {
  316. if !iptable.Exists(iptables.Nat, chain, "-m", "addrtype", "--dst-type", "LOCAL", "-j", ingressChain) {
  317. if err := iptable.RawCombinedOutput("-t", "nat", "-I", chain, "-m", "addrtype", "--dst-type", "LOCAL", "-j", ingressChain); err != nil {
  318. return fmt.Errorf("failed to add jump rule in %s to ingress chain: %v", chain, err)
  319. }
  320. }
  321. }
  322. if !iptable.Exists(iptables.Filter, "FORWARD", "-j", ingressChain) {
  323. if err := iptable.RawCombinedOutput("-I", "FORWARD", "-j", ingressChain); err != nil {
  324. return fmt.Errorf("failed to add jump rule to %s in filter table forward chain: %v", ingressChain, err)
  325. }
  326. arrangeUserFilterRule()
  327. }
  328. oifName, err := findOIFName(gwIP)
  329. if err != nil {
  330. return fmt.Errorf("failed to find gateway bridge interface name for %s: %v", gwIP, err)
  331. }
  332. path := filepath.Join("/proc/sys/net/ipv4/conf", oifName, "route_localnet")
  333. if err := ioutil.WriteFile(path, []byte{'1', '\n'}, 0644); err != nil {
  334. return fmt.Errorf("could not write to %s: %v", path, err)
  335. }
  336. ruleArgs := strings.Fields(fmt.Sprintf("-m addrtype --src-type LOCAL -o %s -j MASQUERADE", oifName))
  337. if !iptable.Exists(iptables.Nat, "POSTROUTING", ruleArgs...) {
  338. if err := iptable.RawCombinedOutput(append([]string{"-t", "nat", "-I", "POSTROUTING"}, ruleArgs...)...); err != nil {
  339. return fmt.Errorf("failed to add ingress localhost POSTROUTING rule for %s: %v", oifName, err)
  340. }
  341. }
  342. }
  343. //Filter the ingress ports until port rules start to be added/deleted
  344. filteredPorts := filterPortConfigs(ingressPorts, isDelete)
  345. rollbackRules := make([][]string, 0, len(filteredPorts)*3)
  346. var portErr error
  347. defer func() {
  348. if portErr != nil && !isDelete {
  349. filterPortConfigs(filteredPorts, !isDelete)
  350. for _, rule := range rollbackRules {
  351. if err := iptable.RawCombinedOutput(rule...); err != nil {
  352. logrus.Warnf("roll back rule failed, %v: %v", rule, err)
  353. }
  354. }
  355. }
  356. }()
  357. for _, iPort := range filteredPorts {
  358. if iptable.ExistChain(ingressChain, iptables.Nat) {
  359. rule := strings.Fields(fmt.Sprintf("-t nat %s %s -p %s --dport %d -j DNAT --to-destination %s:%d",
  360. addDelOpt, ingressChain, strings.ToLower(PortConfig_Protocol_name[int32(iPort.Protocol)]), iPort.PublishedPort, gwIP, iPort.PublishedPort))
  361. if portErr = iptable.RawCombinedOutput(rule...); portErr != nil {
  362. errStr := fmt.Sprintf("set up rule failed, %v: %v", rule, portErr)
  363. if !isDelete {
  364. return fmt.Errorf("%s", errStr)
  365. }
  366. logrus.Infof("%s", errStr)
  367. }
  368. rollbackRule := strings.Fields(fmt.Sprintf("-t nat %s %s -p %s --dport %d -j DNAT --to-destination %s:%d", rollbackAddDelOpt,
  369. ingressChain, strings.ToLower(PortConfig_Protocol_name[int32(iPort.Protocol)]), iPort.PublishedPort, gwIP, iPort.PublishedPort))
  370. rollbackRules = append(rollbackRules, rollbackRule)
  371. }
  372. // Filter table rules to allow a published service to be accessible in the local node from..
  373. // 1) service tasks attached to other networks
  374. // 2) unmanaged containers on bridge networks
  375. rule := strings.Fields(fmt.Sprintf("%s %s -m state -p %s --sport %d --state ESTABLISHED,RELATED -j ACCEPT",
  376. addDelOpt, ingressChain, strings.ToLower(PortConfig_Protocol_name[int32(iPort.Protocol)]), iPort.PublishedPort))
  377. if portErr = iptable.RawCombinedOutput(rule...); portErr != nil {
  378. errStr := fmt.Sprintf("set up rule failed, %v: %v", rule, portErr)
  379. if !isDelete {
  380. return fmt.Errorf("%s", errStr)
  381. }
  382. logrus.Warnf("%s", errStr)
  383. }
  384. rollbackRule := strings.Fields(fmt.Sprintf("%s %s -m state -p %s --sport %d --state ESTABLISHED,RELATED -j ACCEPT", rollbackAddDelOpt,
  385. ingressChain, strings.ToLower(PortConfig_Protocol_name[int32(iPort.Protocol)]), iPort.PublishedPort))
  386. rollbackRules = append(rollbackRules, rollbackRule)
  387. rule = strings.Fields(fmt.Sprintf("%s %s -p %s --dport %d -j ACCEPT",
  388. addDelOpt, ingressChain, strings.ToLower(PortConfig_Protocol_name[int32(iPort.Protocol)]), iPort.PublishedPort))
  389. if portErr = iptable.RawCombinedOutput(rule...); portErr != nil {
  390. errStr := fmt.Sprintf("set up rule failed, %v: %v", rule, portErr)
  391. if !isDelete {
  392. return fmt.Errorf("%s", errStr)
  393. }
  394. logrus.Warnf("%s", errStr)
  395. }
  396. rollbackRule = strings.Fields(fmt.Sprintf("%s %s -p %s --dport %d -j ACCEPT", rollbackAddDelOpt,
  397. ingressChain, strings.ToLower(PortConfig_Protocol_name[int32(iPort.Protocol)]), iPort.PublishedPort))
  398. rollbackRules = append(rollbackRules, rollbackRule)
  399. if err := plumbProxy(iPort, isDelete); err != nil {
  400. logrus.Warnf("failed to create proxy for port %d: %v", iPort.PublishedPort, err)
  401. }
  402. }
  403. return nil
  404. }
  405. // In the filter table FORWARD chain the first rule should be to jump to
  406. // DOCKER-USER so the user is able to filter packet first.
  407. // The second rule should be jump to INGRESS-CHAIN.
  408. // This chain has the rules to allow access to the published ports for swarm tasks
  409. // from local bridge networks and docker_gwbridge (ie:taks on other swarm networks)
  410. func arrangeIngressFilterRule() {
  411. // TODO IPv6 support
  412. iptable := iptables.GetIptable(iptables.IPv4)
  413. if iptable.ExistChain(ingressChain, iptables.Filter) {
  414. if iptable.Exists(iptables.Filter, "FORWARD", "-j", ingressChain) {
  415. if err := iptable.RawCombinedOutput("-D", "FORWARD", "-j", ingressChain); err != nil {
  416. logrus.Warnf("failed to delete jump rule to ingressChain in filter table: %v", err)
  417. }
  418. }
  419. if err := iptable.RawCombinedOutput("-I", "FORWARD", "-j", ingressChain); err != nil {
  420. logrus.Warnf("failed to add jump rule to ingressChain in filter table: %v", err)
  421. }
  422. }
  423. }
  424. func findOIFName(ip net.IP) (string, error) {
  425. nlh := ns.NlHandle()
  426. routes, err := nlh.RouteGet(ip)
  427. if err != nil {
  428. return "", err
  429. }
  430. if len(routes) == 0 {
  431. return "", fmt.Errorf("no route to %s", ip)
  432. }
  433. // Pick the first route(typically there is only one route). We
  434. // don't support multipath.
  435. link, err := nlh.LinkByIndex(routes[0].LinkIndex)
  436. if err != nil {
  437. return "", err
  438. }
  439. return link.Attrs().Name, nil
  440. }
  441. func plumbProxy(iPort *PortConfig, isDelete bool) error {
  442. var (
  443. err error
  444. l io.Closer
  445. )
  446. portSpec := fmt.Sprintf("%d/%s", iPort.PublishedPort, strings.ToLower(PortConfig_Protocol_name[int32(iPort.Protocol)]))
  447. if isDelete {
  448. if listener, ok := ingressProxyTbl[portSpec]; ok {
  449. if listener != nil {
  450. listener.Close()
  451. }
  452. }
  453. return nil
  454. }
  455. switch iPort.Protocol {
  456. case ProtocolTCP:
  457. l, err = net.ListenTCP("tcp", &net.TCPAddr{Port: int(iPort.PublishedPort)})
  458. case ProtocolUDP:
  459. l, err = net.ListenUDP("udp", &net.UDPAddr{Port: int(iPort.PublishedPort)})
  460. case ProtocolSCTP:
  461. l, err = sctp.ListenSCTP("sctp", &sctp.SCTPAddr{Port: int(iPort.PublishedPort)})
  462. default:
  463. err = fmt.Errorf("unknown protocol %v", iPort.Protocol)
  464. }
  465. if err != nil {
  466. return err
  467. }
  468. ingressProxyTbl[portSpec] = l
  469. return nil
  470. }
  471. func writePortsToFile(ports []*PortConfig) (string, error) {
  472. f, err := ioutil.TempFile("", "port_configs")
  473. if err != nil {
  474. return "", err
  475. }
  476. defer f.Close()
  477. buf, _ := proto.Marshal(&EndpointRecord{
  478. IngressPorts: ports,
  479. })
  480. n, err := f.Write(buf)
  481. if err != nil {
  482. return "", err
  483. }
  484. if n < len(buf) {
  485. return "", io.ErrShortWrite
  486. }
  487. return f.Name(), nil
  488. }
  489. func readPortsFromFile(fileName string) ([]*PortConfig, error) {
  490. buf, err := ioutil.ReadFile(fileName)
  491. if err != nil {
  492. return nil, err
  493. }
  494. var epRec EndpointRecord
  495. err = proto.Unmarshal(buf, &epRec)
  496. if err != nil {
  497. return nil, err
  498. }
  499. return epRec.IngressPorts, nil
  500. }
  501. // Invoke fwmarker reexec routine to mark vip destined packets with
  502. // the passed firewall mark.
  503. func invokeFWMarker(path string, vip net.IP, fwMark uint32, ingressPorts []*PortConfig, eIP *net.IPNet, isDelete bool, lbMode string) error {
  504. var ingressPortsFile string
  505. if len(ingressPorts) != 0 {
  506. var err error
  507. ingressPortsFile, err = writePortsToFile(ingressPorts)
  508. if err != nil {
  509. return err
  510. }
  511. defer os.Remove(ingressPortsFile)
  512. }
  513. addDelOpt := "-A"
  514. if isDelete {
  515. addDelOpt = "-D"
  516. }
  517. cmd := &exec.Cmd{
  518. Path: reexec.Self(),
  519. Args: append([]string{"fwmarker"}, path, vip.String(), fmt.Sprintf("%d", fwMark), addDelOpt, ingressPortsFile, eIP.String(), lbMode),
  520. Stdout: os.Stdout,
  521. Stderr: os.Stderr,
  522. }
  523. if err := cmd.Run(); err != nil {
  524. return fmt.Errorf("reexec failed: %v", err)
  525. }
  526. return nil
  527. }
  528. // Firewall marker reexec function.
  529. func fwMarker() {
  530. // TODO IPv6 support
  531. iptable := iptables.GetIptable(iptables.IPv4)
  532. runtime.LockOSThread()
  533. defer runtime.UnlockOSThread()
  534. if len(os.Args) < 8 {
  535. logrus.Error("invalid number of arguments..")
  536. os.Exit(1)
  537. }
  538. var ingressPorts []*PortConfig
  539. if os.Args[5] != "" {
  540. var err error
  541. ingressPorts, err = readPortsFromFile(os.Args[5])
  542. if err != nil {
  543. logrus.Errorf("Failed reading ingress ports file: %v", err)
  544. os.Exit(2)
  545. }
  546. }
  547. vip := os.Args[2]
  548. fwMark, err := strconv.ParseUint(os.Args[3], 10, 32)
  549. if err != nil {
  550. logrus.Errorf("bad fwmark value(%s) passed: %v", os.Args[3], err)
  551. os.Exit(3)
  552. }
  553. addDelOpt := os.Args[4]
  554. rules := [][]string{}
  555. for _, iPort := range ingressPorts {
  556. rule := strings.Fields(fmt.Sprintf("-t mangle %s PREROUTING -p %s --dport %d -j MARK --set-mark %d",
  557. addDelOpt, strings.ToLower(PortConfig_Protocol_name[int32(iPort.Protocol)]), iPort.PublishedPort, fwMark))
  558. rules = append(rules, rule)
  559. }
  560. ns, err := netns.GetFromPath(os.Args[1])
  561. if err != nil {
  562. logrus.Errorf("failed get network namespace %q: %v", os.Args[1], err)
  563. os.Exit(4)
  564. }
  565. defer ns.Close()
  566. if err := netns.Set(ns); err != nil {
  567. logrus.Errorf("setting into container net ns %v failed, %v", os.Args[1], err)
  568. os.Exit(5)
  569. }
  570. lbMode := os.Args[7]
  571. if addDelOpt == "-A" && lbMode == loadBalancerModeNAT {
  572. eIP, subnet, err := net.ParseCIDR(os.Args[6])
  573. if err != nil {
  574. logrus.Errorf("Failed to parse endpoint IP %s: %v", os.Args[6], err)
  575. os.Exit(6)
  576. }
  577. ruleParams := strings.Fields(fmt.Sprintf("-m ipvs --ipvs -d %s -j SNAT --to-source %s", subnet, eIP))
  578. if !iptable.Exists("nat", "POSTROUTING", ruleParams...) {
  579. rule := append(strings.Fields("-t nat -A POSTROUTING"), ruleParams...)
  580. rules = append(rules, rule)
  581. err := ioutil.WriteFile("/proc/sys/net/ipv4/vs/conntrack", []byte{'1', '\n'}, 0644)
  582. if err != nil {
  583. logrus.Errorf("Failed to write to /proc/sys/net/ipv4/vs/conntrack: %v", err)
  584. os.Exit(7)
  585. }
  586. }
  587. }
  588. rule := strings.Fields(fmt.Sprintf("-t mangle %s INPUT -d %s/32 -j MARK --set-mark %d", addDelOpt, vip, fwMark))
  589. rules = append(rules, rule)
  590. for _, rule := range rules {
  591. if err := iptable.RawCombinedOutputNative(rule...); err != nil {
  592. logrus.Errorf("set up rule failed, %v: %v", rule, err)
  593. os.Exit(8)
  594. }
  595. }
  596. }
  597. func addRedirectRules(path string, eIP *net.IPNet, ingressPorts []*PortConfig) error {
  598. var ingressPortsFile string
  599. if len(ingressPorts) != 0 {
  600. var err error
  601. ingressPortsFile, err = writePortsToFile(ingressPorts)
  602. if err != nil {
  603. return err
  604. }
  605. defer os.Remove(ingressPortsFile)
  606. }
  607. cmd := &exec.Cmd{
  608. Path: reexec.Self(),
  609. Args: append([]string{"redirector"}, path, eIP.String(), ingressPortsFile),
  610. Stdout: os.Stdout,
  611. Stderr: os.Stderr,
  612. }
  613. if err := cmd.Run(); err != nil {
  614. return fmt.Errorf("reexec failed: %v", err)
  615. }
  616. return nil
  617. }
  618. // Redirector reexec function.
  619. func redirector() {
  620. // TODO IPv6 support
  621. iptable := iptables.GetIptable(iptables.IPv4)
  622. runtime.LockOSThread()
  623. defer runtime.UnlockOSThread()
  624. if len(os.Args) < 4 {
  625. logrus.Error("invalid number of arguments..")
  626. os.Exit(1)
  627. }
  628. var ingressPorts []*PortConfig
  629. if os.Args[3] != "" {
  630. var err error
  631. ingressPorts, err = readPortsFromFile(os.Args[3])
  632. if err != nil {
  633. logrus.Errorf("Failed reading ingress ports file: %v", err)
  634. os.Exit(2)
  635. }
  636. }
  637. eIP, _, err := net.ParseCIDR(os.Args[2])
  638. if err != nil {
  639. logrus.Errorf("Failed to parse endpoint IP %s: %v", os.Args[2], err)
  640. os.Exit(3)
  641. }
  642. rules := [][]string{}
  643. for _, iPort := range ingressPorts {
  644. rule := strings.Fields(fmt.Sprintf("-t nat -A PREROUTING -d %s -p %s --dport %d -j REDIRECT --to-port %d",
  645. eIP.String(), strings.ToLower(PortConfig_Protocol_name[int32(iPort.Protocol)]), iPort.PublishedPort, iPort.TargetPort))
  646. rules = append(rules, rule)
  647. // Allow only incoming connections to exposed ports
  648. iRule := strings.Fields(fmt.Sprintf("-I INPUT -d %s -p %s --dport %d -m conntrack --ctstate NEW,ESTABLISHED -j ACCEPT",
  649. eIP.String(), strings.ToLower(PortConfig_Protocol_name[int32(iPort.Protocol)]), iPort.TargetPort))
  650. rules = append(rules, iRule)
  651. // Allow only outgoing connections from exposed ports
  652. oRule := strings.Fields(fmt.Sprintf("-I OUTPUT -s %s -p %s --sport %d -m conntrack --ctstate ESTABLISHED -j ACCEPT",
  653. eIP.String(), strings.ToLower(PortConfig_Protocol_name[int32(iPort.Protocol)]), iPort.TargetPort))
  654. rules = append(rules, oRule)
  655. }
  656. ns, err := netns.GetFromPath(os.Args[1])
  657. if err != nil {
  658. logrus.Errorf("failed get network namespace %q: %v", os.Args[1], err)
  659. os.Exit(4)
  660. }
  661. defer ns.Close()
  662. if err := netns.Set(ns); err != nil {
  663. logrus.Errorf("setting into container net ns %v failed, %v", os.Args[1], err)
  664. os.Exit(5)
  665. }
  666. for _, rule := range rules {
  667. if err := iptable.RawCombinedOutputNative(rule...); err != nil {
  668. logrus.Errorf("set up rule failed, %v: %v", rule, err)
  669. os.Exit(6)
  670. }
  671. }
  672. if len(ingressPorts) == 0 {
  673. return
  674. }
  675. // Ensure blocking rules for anything else in/to ingress network
  676. for _, rule := range [][]string{
  677. {"-d", eIP.String(), "-p", "sctp", "-j", "DROP"},
  678. {"-d", eIP.String(), "-p", "udp", "-j", "DROP"},
  679. {"-d", eIP.String(), "-p", "tcp", "-j", "DROP"},
  680. } {
  681. if !iptable.ExistsNative(iptables.Filter, "INPUT", rule...) {
  682. if err := iptable.RawCombinedOutputNative(append([]string{"-A", "INPUT"}, rule...)...); err != nil {
  683. logrus.Errorf("set up rule failed, %v: %v", rule, err)
  684. os.Exit(7)
  685. }
  686. }
  687. rule[0] = "-s"
  688. if !iptable.ExistsNative(iptables.Filter, "OUTPUT", rule...) {
  689. if err := iptable.RawCombinedOutputNative(append([]string{"-A", "OUTPUT"}, rule...)...); err != nil {
  690. logrus.Errorf("set up rule failed, %v: %v", rule, err)
  691. os.Exit(8)
  692. }
  693. }
  694. }
  695. }