resolver.go 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582
  1. package libnetwork
  2. import (
  3. "context"
  4. "errors"
  5. "fmt"
  6. "math/rand"
  7. "net"
  8. "strconv"
  9. "strings"
  10. "sync"
  11. "time"
  12. "github.com/containerd/log"
  13. "github.com/docker/docker/libnetwork/types"
  14. "github.com/miekg/dns"
  15. "go.opentelemetry.io/otel"
  16. "go.opentelemetry.io/otel/attribute"
  17. "go.opentelemetry.io/otel/codes"
  18. "go.opentelemetry.io/otel/trace"
  19. "golang.org/x/sync/semaphore"
  20. "golang.org/x/time/rate"
  21. )
  22. // DNSBackend represents a backend DNS resolver used for DNS name
  23. // resolution. All the queries to the resolver are forwarded to the
  24. // backend resolver.
  25. type DNSBackend interface {
  26. // ResolveName resolves a service name to an IPv4 or IPv6 address by searching
  27. // the networks the sandbox is connected to. For IPv6 queries, second return
  28. // value will be true if the name exists in docker domain but doesn't have an
  29. // IPv6 address. Such queries shouldn't be forwarded to external nameservers.
  30. ResolveName(ctx context.Context, name string, iplen int) ([]net.IP, bool)
  31. // ResolveIP returns the service name for the passed in IP. IP is in reverse dotted
  32. // notation; the format used for DNS PTR records
  33. ResolveIP(ctx context.Context, name string) string
  34. // ResolveService returns all the backend details about the containers or hosts
  35. // backing a service. Its purpose is to satisfy an SRV query
  36. ResolveService(ctx context.Context, name string) ([]*net.SRV, []net.IP)
  37. // ExecFunc allows a function to be executed in the context of the backend
  38. // on behalf of the resolver.
  39. ExecFunc(f func()) error
  40. // NdotsSet queries the backends ndots dns option settings
  41. NdotsSet() bool
  42. // HandleQueryResp passes the name & IP from a response to the backend. backend
  43. // can use it to maintain any required state about the resolution
  44. HandleQueryResp(name string, ip net.IP)
  45. }
  46. const (
  47. dnsPort = "53"
  48. ptrIPv4domain = ".in-addr.arpa."
  49. ptrIPv6domain = ".ip6.arpa."
  50. respTTL = 600
  51. maxExtDNS = 3 // max number of external servers to try
  52. extIOTimeout = 4 * time.Second
  53. maxConcurrent = 1024
  54. logInterval = 2 * time.Second
  55. )
  56. type extDNSEntry struct {
  57. IPStr string
  58. port uint16 // for testing
  59. HostLoopback bool
  60. }
  61. // Resolver is the embedded DNS server in Docker. It operates by listening on
  62. // the container's loopback interface for DNS queries.
  63. type Resolver struct {
  64. backend DNSBackend
  65. extDNSList [maxExtDNS]extDNSEntry
  66. server *dns.Server
  67. conn *net.UDPConn
  68. tcpServer *dns.Server
  69. tcpListen *net.TCPListener
  70. err error
  71. listenAddress string
  72. proxyDNS bool
  73. startCh chan struct{}
  74. logger *log.Entry
  75. fwdSem *semaphore.Weighted // Limit the number of concurrent external DNS requests in-flight
  76. logInverval rate.Sometimes // Rate-limit logging about hitting the fwdSem limit
  77. }
  78. // NewResolver creates a new instance of the Resolver
  79. func NewResolver(address string, proxyDNS bool, backend DNSBackend) *Resolver {
  80. return &Resolver{
  81. backend: backend,
  82. proxyDNS: proxyDNS,
  83. listenAddress: address,
  84. err: fmt.Errorf("setup not done yet"),
  85. startCh: make(chan struct{}, 1),
  86. fwdSem: semaphore.NewWeighted(maxConcurrent),
  87. logInverval: rate.Sometimes{Interval: logInterval},
  88. }
  89. }
  90. func (r *Resolver) log(ctx context.Context) *log.Entry {
  91. if r.logger == nil {
  92. return log.G(ctx)
  93. }
  94. return r.logger
  95. }
  96. // SetupFunc returns the setup function that should be run in the container's
  97. // network namespace.
  98. func (r *Resolver) SetupFunc(port int) func() {
  99. return func() {
  100. var err error
  101. // DNS operates primarily on UDP
  102. r.conn, err = net.ListenUDP("udp", &net.UDPAddr{
  103. IP: net.ParseIP(r.listenAddress),
  104. Port: port,
  105. })
  106. if err != nil {
  107. r.err = fmt.Errorf("error in opening name server socket %v", err)
  108. return
  109. }
  110. // Listen on a TCP as well
  111. r.tcpListen, err = net.ListenTCP("tcp", &net.TCPAddr{
  112. IP: net.ParseIP(r.listenAddress),
  113. Port: port,
  114. })
  115. if err != nil {
  116. r.err = fmt.Errorf("error in opening name TCP server socket %v", err)
  117. return
  118. }
  119. r.err = nil
  120. }
  121. }
  122. // Start starts the name server for the container.
  123. func (r *Resolver) Start() error {
  124. r.startCh <- struct{}{}
  125. defer func() { <-r.startCh }()
  126. // make sure the resolver has been setup before starting
  127. if r.err != nil {
  128. return r.err
  129. }
  130. if err := r.setupIPTable(); err != nil {
  131. return fmt.Errorf("setting up IP table rules failed: %v", err)
  132. }
  133. s := &dns.Server{Handler: dns.HandlerFunc(r.serveDNS), PacketConn: r.conn}
  134. r.server = s
  135. go func() {
  136. if err := s.ActivateAndServe(); err != nil {
  137. r.log(context.TODO()).WithError(err).Error("[resolver] failed to start PacketConn DNS server")
  138. }
  139. }()
  140. tcpServer := &dns.Server{Handler: dns.HandlerFunc(r.serveDNS), Listener: r.tcpListen}
  141. r.tcpServer = tcpServer
  142. go func() {
  143. if err := tcpServer.ActivateAndServe(); err != nil {
  144. r.log(context.TODO()).WithError(err).Error("[resolver] failed to start TCP DNS server")
  145. }
  146. }()
  147. return nil
  148. }
  149. // Stop stops the name server for the container. A stopped resolver can be
  150. // reused after running the SetupFunc again.
  151. func (r *Resolver) Stop() {
  152. r.startCh <- struct{}{}
  153. defer func() { <-r.startCh }()
  154. if r.server != nil {
  155. r.server.Shutdown() //nolint:errcheck
  156. }
  157. if r.tcpServer != nil {
  158. r.tcpServer.Shutdown() //nolint:errcheck
  159. }
  160. r.conn = nil
  161. r.tcpServer = nil
  162. r.err = fmt.Errorf("setup not done yet")
  163. r.fwdSem = semaphore.NewWeighted(maxConcurrent)
  164. }
  165. // SetExtServers configures the external nameservers the resolver should use
  166. // when forwarding queries.
  167. func (r *Resolver) SetExtServers(extDNS []extDNSEntry) {
  168. l := len(extDNS)
  169. if l > maxExtDNS {
  170. l = maxExtDNS
  171. }
  172. for i := 0; i < l; i++ {
  173. r.extDNSList[i] = extDNS[i]
  174. }
  175. }
  176. // NameServer returns the IP of the DNS resolver for the containers.
  177. func (r *Resolver) NameServer() string {
  178. return r.listenAddress
  179. }
  180. // ResolverOptions returns resolv.conf options that should be set.
  181. func (r *Resolver) ResolverOptions() []string {
  182. return []string{"ndots:0"}
  183. }
  184. //nolint:gosec // The RNG is not used in a security-sensitive context.
  185. var (
  186. shuffleRNG = rand.New(rand.NewSource(time.Now().Unix()))
  187. shuffleRNGMu sync.Mutex
  188. )
  189. func shuffleAddr(addr []net.IP) []net.IP {
  190. shuffleRNGMu.Lock()
  191. defer shuffleRNGMu.Unlock()
  192. for i := len(addr) - 1; i > 0; i-- {
  193. r := shuffleRNG.Intn(i + 1) //nolint:gosec // gosec complains about the use of rand here. It should be fine.
  194. addr[i], addr[r] = addr[r], addr[i]
  195. }
  196. return addr
  197. }
  198. func createRespMsg(query *dns.Msg) *dns.Msg {
  199. resp := &dns.Msg{}
  200. resp.SetReply(query)
  201. resp.RecursionAvailable = true
  202. return resp
  203. }
  204. func (r *Resolver) handleMXQuery(ctx context.Context, query *dns.Msg) (*dns.Msg, error) {
  205. name := query.Question[0].Name
  206. addrv4, _ := r.backend.ResolveName(ctx, name, types.IPv4)
  207. addrv6, _ := r.backend.ResolveName(ctx, name, types.IPv6)
  208. if addrv4 == nil && addrv6 == nil {
  209. return nil, nil
  210. }
  211. // We were able to resolve the name. Respond with an empty list with
  212. // RcodeSuccess/NOERROR so that email clients can treat it as "implicit MX"
  213. // [RFC 5321 Section-5.1] and issue a Type A/AAAA query for the name.
  214. resp := createRespMsg(query)
  215. return resp, nil
  216. }
  217. func (r *Resolver) handleIPQuery(ctx context.Context, query *dns.Msg, ipType int) (*dns.Msg, error) {
  218. var (
  219. addr []net.IP
  220. ipv6Miss bool
  221. name = query.Question[0].Name
  222. )
  223. addr, ipv6Miss = r.backend.ResolveName(ctx, name, ipType)
  224. if addr == nil && ipv6Miss {
  225. // Send a reply without any Answer sections
  226. r.log(ctx).Debugf("[resolver] lookup name %s present without IPv6 address", name)
  227. resp := createRespMsg(query)
  228. return resp, nil
  229. }
  230. if addr == nil {
  231. return nil, nil
  232. }
  233. r.log(ctx).Debugf("[resolver] lookup for %s: IP %v", name, addr)
  234. resp := createRespMsg(query)
  235. if len(addr) > 1 {
  236. addr = shuffleAddr(addr)
  237. }
  238. if ipType == types.IPv4 {
  239. for _, ip := range addr {
  240. resp.Answer = append(resp.Answer, &dns.A{
  241. Hdr: dns.RR_Header{Name: name, Rrtype: dns.TypeA, Class: dns.ClassINET, Ttl: respTTL},
  242. A: ip,
  243. })
  244. }
  245. } else {
  246. for _, ip := range addr {
  247. resp.Answer = append(resp.Answer, &dns.AAAA{
  248. Hdr: dns.RR_Header{Name: name, Rrtype: dns.TypeAAAA, Class: dns.ClassINET, Ttl: respTTL},
  249. AAAA: ip,
  250. })
  251. }
  252. }
  253. return resp, nil
  254. }
  255. func (r *Resolver) handlePTRQuery(ctx context.Context, query *dns.Msg) (*dns.Msg, error) {
  256. ptr := query.Question[0].Name
  257. name, after, found := strings.Cut(ptr, ptrIPv4domain)
  258. if !found || after != "" {
  259. name, after, found = strings.Cut(ptr, ptrIPv6domain)
  260. }
  261. if !found || after != "" {
  262. // Not a known IPv4 or IPv6 PTR domain.
  263. // Maybe the external DNS servers know what to do with the query?
  264. return nil, nil
  265. }
  266. host := r.backend.ResolveIP(ctx, name)
  267. if host == "" {
  268. return nil, nil
  269. }
  270. r.log(ctx).Debugf("[resolver] lookup for IP %s: name %s", name, host)
  271. fqdn := dns.Fqdn(host)
  272. resp := createRespMsg(query)
  273. resp.Answer = append(resp.Answer, &dns.PTR{
  274. Hdr: dns.RR_Header{Name: ptr, Rrtype: dns.TypePTR, Class: dns.ClassINET, Ttl: respTTL},
  275. Ptr: fqdn,
  276. })
  277. return resp, nil
  278. }
  279. func (r *Resolver) handleSRVQuery(ctx context.Context, query *dns.Msg) (*dns.Msg, error) {
  280. svc := query.Question[0].Name
  281. srv, ip := r.backend.ResolveService(ctx, svc)
  282. if len(srv) == 0 {
  283. return nil, nil
  284. }
  285. if len(srv) != len(ip) {
  286. return nil, fmt.Errorf("invalid reply for SRV query %s", svc)
  287. }
  288. resp := createRespMsg(query)
  289. for i, r := range srv {
  290. resp.Answer = append(resp.Answer, &dns.SRV{
  291. Hdr: dns.RR_Header{Name: svc, Rrtype: dns.TypePTR, Class: dns.ClassINET, Ttl: respTTL},
  292. Port: r.Port,
  293. Target: r.Target,
  294. })
  295. resp.Extra = append(resp.Extra, &dns.A{
  296. Hdr: dns.RR_Header{Name: r.Target, Rrtype: dns.TypeA, Class: dns.ClassINET, Ttl: respTTL},
  297. A: ip[i],
  298. })
  299. }
  300. return resp, nil
  301. }
  302. func (r *Resolver) serveDNS(w dns.ResponseWriter, query *dns.Msg) {
  303. var (
  304. resp *dns.Msg
  305. err error
  306. )
  307. if query == nil || len(query.Question) == 0 {
  308. return
  309. }
  310. queryName := query.Question[0].Name
  311. queryType := query.Question[0].Qtype
  312. ctx, span := otel.Tracer("").Start(context.Background(), "resolver.serveDNS", trace.WithAttributes(
  313. attribute.String("libnet.resolver.query.name", queryName),
  314. attribute.String("libnet.resolver.query.type", dns.TypeToString[queryType]),
  315. ))
  316. defer span.End()
  317. switch queryType {
  318. case dns.TypeA:
  319. resp, err = r.handleIPQuery(ctx, query, types.IPv4)
  320. case dns.TypeAAAA:
  321. resp, err = r.handleIPQuery(ctx, query, types.IPv6)
  322. case dns.TypeMX:
  323. resp, err = r.handleMXQuery(ctx, query)
  324. case dns.TypePTR:
  325. resp, err = r.handlePTRQuery(ctx, query)
  326. case dns.TypeSRV:
  327. resp, err = r.handleSRVQuery(ctx, query)
  328. default:
  329. r.log(ctx).Debugf("[resolver] query type %s is not supported by the embedded DNS and will be forwarded to external DNS", dns.TypeToString[queryType])
  330. }
  331. reply := func(msg *dns.Msg) {
  332. if err = w.WriteMsg(msg); err != nil {
  333. r.log(ctx).WithError(err).Errorf("[resolver] failed to write response")
  334. span.RecordError(err)
  335. span.SetStatus(codes.Error, "WriteMsg failed")
  336. }
  337. }
  338. if err != nil {
  339. r.log(ctx).WithError(err).Errorf("[resolver] failed to handle query: %s (%s)", queryName, dns.TypeToString[queryType])
  340. reply(new(dns.Msg).SetRcode(query, dns.RcodeServerFailure))
  341. return
  342. }
  343. if resp != nil {
  344. // We are the authoritative DNS server for this request so it's
  345. // on us to truncate the response message to the size limit
  346. // negotiated by the client.
  347. maxSize := dns.MinMsgSize
  348. if w.LocalAddr().Network() == "tcp" {
  349. maxSize = dns.MaxMsgSize
  350. } else {
  351. if optRR := query.IsEdns0(); optRR != nil {
  352. if udpsize := int(optRR.UDPSize()); udpsize > maxSize {
  353. maxSize = udpsize
  354. }
  355. }
  356. }
  357. resp.Truncate(maxSize)
  358. span.AddEvent("found local record", trace.WithAttributes(
  359. attribute.String("libnet.resolver.resp", resp.String()),
  360. ))
  361. reply(resp)
  362. return
  363. }
  364. if r.proxyDNS {
  365. // If the user sets ndots > 0 explicitly and the query is
  366. // in the root domain don't forward it out. We will return
  367. // failure and let the client retry with the search domain
  368. // attached.
  369. if (queryType == dns.TypeA || queryType == dns.TypeAAAA) && r.backend.NdotsSet() &&
  370. !strings.Contains(strings.TrimSuffix(queryName, "."), ".") {
  371. resp = createRespMsg(query)
  372. } else {
  373. resp = r.forwardExtDNS(ctx, w.LocalAddr().Network(), query)
  374. }
  375. }
  376. if resp == nil {
  377. // We were unable to get an answer from any of the upstream DNS
  378. // servers or the backend doesn't support proxying DNS requests.
  379. resp = new(dns.Msg).SetRcode(query, dns.RcodeServerFailure)
  380. }
  381. reply(resp)
  382. }
  383. const defaultPort = "53"
  384. func (r *Resolver) dialExtDNS(proto string, server extDNSEntry) (net.Conn, error) {
  385. port := defaultPort
  386. if server.port != 0 {
  387. port = strconv.FormatUint(uint64(server.port), 10)
  388. }
  389. addr := net.JoinHostPort(server.IPStr, port)
  390. if server.HostLoopback {
  391. return net.DialTimeout(proto, addr, extIOTimeout)
  392. }
  393. var (
  394. extConn net.Conn
  395. dialErr error
  396. )
  397. err := r.backend.ExecFunc(func() {
  398. extConn, dialErr = net.DialTimeout(proto, addr, extIOTimeout)
  399. })
  400. if err != nil {
  401. return nil, err
  402. }
  403. if dialErr != nil {
  404. return nil, dialErr
  405. }
  406. return extConn, nil
  407. }
  408. func (r *Resolver) forwardExtDNS(ctx context.Context, proto string, query *dns.Msg) *dns.Msg {
  409. ctx, span := otel.Tracer("").Start(ctx, "resolver.forwardExtDNS")
  410. defer span.End()
  411. for _, extDNS := range r.extDNSList {
  412. if extDNS.IPStr == "" {
  413. break
  414. }
  415. // limits the number of outstanding concurrent queries.
  416. ctx, cancel := context.WithTimeout(ctx, extIOTimeout)
  417. err := r.fwdSem.Acquire(ctx, 1)
  418. cancel()
  419. if err != nil {
  420. if errors.Is(err, context.DeadlineExceeded) {
  421. r.logInverval.Do(func() {
  422. r.log(ctx).Errorf("[resolver] more than %v concurrent queries", maxConcurrent)
  423. })
  424. }
  425. return new(dns.Msg).SetRcode(query, dns.RcodeRefused)
  426. }
  427. resp := func() *dns.Msg {
  428. defer r.fwdSem.Release(1)
  429. return r.exchange(ctx, proto, extDNS, query)
  430. }()
  431. if resp == nil {
  432. continue
  433. }
  434. switch resp.Rcode {
  435. case dns.RcodeServerFailure, dns.RcodeRefused:
  436. // Server returned FAILURE: continue with the next external DNS server
  437. // Server returned REFUSED: this can be a transitional status, so continue with the next external DNS server
  438. r.log(ctx).Debugf("[resolver] external DNS %s:%s returned failure:\n%s", proto, extDNS.IPStr, resp)
  439. continue
  440. }
  441. answers := 0
  442. for _, rr := range resp.Answer {
  443. h := rr.Header()
  444. switch h.Rrtype {
  445. case dns.TypeA:
  446. answers++
  447. ip := rr.(*dns.A).A
  448. r.log(ctx).Debugf("[resolver] received A record %q for %q from %s:%s", ip, h.Name, proto, extDNS.IPStr)
  449. r.backend.HandleQueryResp(h.Name, ip)
  450. case dns.TypeAAAA:
  451. answers++
  452. ip := rr.(*dns.AAAA).AAAA
  453. r.log(ctx).Debugf("[resolver] received AAAA record %q for %q from %s:%s", ip, h.Name, proto, extDNS.IPStr)
  454. r.backend.HandleQueryResp(h.Name, ip)
  455. }
  456. }
  457. if len(resp.Answer) == 0 {
  458. r.log(ctx).Debugf("[resolver] external DNS %s:%s returned response with no answers:\n%s", proto, extDNS.IPStr, resp)
  459. }
  460. resp.Compress = true
  461. span.AddEvent("response from upstream server", trace.WithAttributes(
  462. attribute.String("libnet.resolver.resp", resp.String()),
  463. ))
  464. return resp
  465. }
  466. span.AddEvent("no response from upstream servers")
  467. return nil
  468. }
  469. func (r *Resolver) exchange(ctx context.Context, proto string, extDNS extDNSEntry, query *dns.Msg) *dns.Msg {
  470. ctx, span := otel.Tracer("").Start(ctx, "resolver.exchange", trace.WithAttributes(
  471. attribute.String("libnet.resolver.upstream.proto", proto),
  472. attribute.String("libnet.resolver.upstream.address", extDNS.IPStr),
  473. attribute.Bool("libnet.resolver.upstream.host-loopback", extDNS.HostLoopback)))
  474. defer span.End()
  475. extConn, err := r.dialExtDNS(proto, extDNS)
  476. if err != nil {
  477. r.log(ctx).WithError(err).Warn("[resolver] connect failed")
  478. span.RecordError(err)
  479. span.SetStatus(codes.Error, "dialExtDNS failed")
  480. return nil
  481. }
  482. defer extConn.Close()
  483. logger := r.log(ctx).WithFields(log.Fields{
  484. "dns-server": extConn.RemoteAddr().Network() + ":" + extConn.RemoteAddr().String(),
  485. "client-addr": extConn.LocalAddr().Network() + ":" + extConn.LocalAddr().String(),
  486. "question": query.Question[0].String(),
  487. })
  488. logger.Debug("[resolver] forwarding query")
  489. resp, _, err := (&dns.Client{
  490. Timeout: extIOTimeout,
  491. // Following the robustness principle, make a best-effort
  492. // attempt to receive oversized response messages without
  493. // truncating them on our end to forward verbatim to the client.
  494. // Some DNS servers (e.g. Mikrotik RouterOS) don't support
  495. // EDNS(0) and may send replies over UDP longer than 512 bytes
  496. // regardless of what size limit, if any, was advertized in the
  497. // query message. Note that ExchangeWithConn will override this
  498. // value if it detects an EDNS OPT record in query so only
  499. // oversized replies to non-EDNS queries will benefit.
  500. UDPSize: dns.MaxMsgSize,
  501. }).ExchangeWithConn(query, &dns.Conn{Conn: extConn})
  502. if err != nil {
  503. r.log(ctx).WithError(err).Errorf("[resolver] failed to query DNS server: %s, query: %s", extConn.RemoteAddr().String(), query.Question[0].String())
  504. span.RecordError(err)
  505. span.SetStatus(codes.Error, "ExchangeWithConn failed")
  506. return nil
  507. }
  508. if resp == nil {
  509. // Should be impossible, so make noise if it happens anyway.
  510. logger.Error("[resolver] external DNS returned empty response")
  511. span.SetStatus(codes.Error, "External DNS returned empty response")
  512. }
  513. return resp
  514. }