libnet: add OTEL tracing to the embedded DNS
This change creates a few OTEL spans and plumb context through the DNS resolver and DNS backends (ie. Sandbox and Network). This should help better understand how much lock contention impacts performance, and help debug issues related to DNS queries (we basically have no visibility into what's happening here right now). Signed-off-by: Albin Kerouanton <albinker@gmail.com>
This commit is contained in:
parent
97e28de7e2
commit
4de8459265
5 changed files with 134 additions and 71 deletions
|
@ -1,6 +1,7 @@
|
|||
package libnetwork
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"net"
|
||||
|
@ -418,7 +419,8 @@ func TestSRVServiceQuery(t *testing.T) {
|
|||
|
||||
c.svcRecords[n.ID()] = sr
|
||||
|
||||
_, ip := ep.Info().Sandbox().ResolveService("_http._tcp.web.swarm")
|
||||
ctx := context.Background()
|
||||
_, ip := ep.Info().Sandbox().ResolveService(ctx, "_http._tcp.web.swarm")
|
||||
|
||||
if len(ip) == 0 {
|
||||
t.Fatal(err)
|
||||
|
@ -427,7 +429,7 @@ func TestSRVServiceQuery(t *testing.T) {
|
|||
t.Fatal(err)
|
||||
}
|
||||
|
||||
_, ip = ep.Info().Sandbox().ResolveService("_host_http._tcp.web.swarm")
|
||||
_, ip = ep.Info().Sandbox().ResolveService(ctx, "_host_http._tcp.web.swarm")
|
||||
|
||||
if len(ip) == 0 {
|
||||
t.Fatal(err)
|
||||
|
@ -437,7 +439,7 @@ func TestSRVServiceQuery(t *testing.T) {
|
|||
}
|
||||
|
||||
// Service name with invalid protocol name. Should fail without error
|
||||
_, ip = ep.Info().Sandbox().ResolveService("_http._icmp.web.swarm")
|
||||
_, ip = ep.Info().Sandbox().ResolveService(ctx, "_http._icmp.web.swarm")
|
||||
if len(ip) != 0 {
|
||||
t.Fatal("Valid response for invalid service name")
|
||||
}
|
||||
|
@ -490,7 +492,8 @@ func TestServiceVIPReuse(t *testing.T) {
|
|||
|
||||
ipToResolve := netutils.ReverseIP("192.168.0.1")
|
||||
|
||||
ipList, _ := n.ResolveName("service_test", types.IPv4)
|
||||
ctx := context.Background()
|
||||
ipList, _ := n.ResolveName(ctx, "service_test", types.IPv4)
|
||||
if len(ipList) == 0 {
|
||||
t.Fatal("There must be the VIP")
|
||||
}
|
||||
|
@ -500,7 +503,7 @@ func TestServiceVIPReuse(t *testing.T) {
|
|||
if ipList[0].String() != "192.168.0.1" {
|
||||
t.Fatal("The service VIP is 192.168.0.1")
|
||||
}
|
||||
name := n.ResolveIP(ipToResolve)
|
||||
name := n.ResolveIP(ctx, ipToResolve)
|
||||
if name == "" {
|
||||
t.Fatal("It must return a name")
|
||||
}
|
||||
|
@ -510,7 +513,7 @@ func TestServiceVIPReuse(t *testing.T) {
|
|||
|
||||
// Delete service record for one of the services, the IP should remain because one service is still associated with it
|
||||
n.deleteSvcRecords("ep1", "service_test", "serviceID1", net.ParseIP("192.168.0.1"), net.IP{}, true, "test")
|
||||
ipList, _ = n.ResolveName("service_test", types.IPv4)
|
||||
ipList, _ = n.ResolveName(ctx, "service_test", types.IPv4)
|
||||
if len(ipList) == 0 {
|
||||
t.Fatal("There must be the VIP")
|
||||
}
|
||||
|
@ -520,7 +523,7 @@ func TestServiceVIPReuse(t *testing.T) {
|
|||
if ipList[0].String() != "192.168.0.1" {
|
||||
t.Fatal("The service VIP is 192.168.0.1")
|
||||
}
|
||||
name = n.ResolveIP(ipToResolve)
|
||||
name = n.ResolveIP(ctx, ipToResolve)
|
||||
if name == "" {
|
||||
t.Fatal("It must return a name")
|
||||
}
|
||||
|
@ -530,7 +533,7 @@ func TestServiceVIPReuse(t *testing.T) {
|
|||
|
||||
// Delete again the service using the previous service ID, nothing should happen
|
||||
n.deleteSvcRecords("ep2", "service_test", "serviceID1", net.ParseIP("192.168.0.1"), net.IP{}, true, "test")
|
||||
ipList, _ = n.ResolveName("service_test", types.IPv4)
|
||||
ipList, _ = n.ResolveName(ctx, "service_test", types.IPv4)
|
||||
if len(ipList) == 0 {
|
||||
t.Fatal("There must be the VIP")
|
||||
}
|
||||
|
@ -540,7 +543,7 @@ func TestServiceVIPReuse(t *testing.T) {
|
|||
if ipList[0].String() != "192.168.0.1" {
|
||||
t.Fatal("The service VIP is 192.168.0.1")
|
||||
}
|
||||
name = n.ResolveIP(ipToResolve)
|
||||
name = n.ResolveIP(ctx, ipToResolve)
|
||||
if name == "" {
|
||||
t.Fatal("It must return a name")
|
||||
}
|
||||
|
@ -550,11 +553,11 @@ func TestServiceVIPReuse(t *testing.T) {
|
|||
|
||||
// Delete now using the second service ID, now all the entries should be gone
|
||||
n.deleteSvcRecords("ep2", "service_test", "serviceID2", net.ParseIP("192.168.0.1"), net.IP{}, true, "test")
|
||||
ipList, _ = n.ResolveName("service_test", types.IPv4)
|
||||
ipList, _ = n.ResolveName(ctx, "service_test", types.IPv4)
|
||||
if len(ipList) != 0 {
|
||||
t.Fatal("All the VIPs should be gone now")
|
||||
}
|
||||
name = n.ResolveIP(ipToResolve)
|
||||
name = n.ResolveIP(ctx, ipToResolve)
|
||||
if name != "" {
|
||||
t.Fatalf("It must return empty no more services associated, instead:%s", name)
|
||||
}
|
||||
|
|
|
@ -23,6 +23,9 @@ import (
|
|||
"github.com/docker/docker/libnetwork/scope"
|
||||
"github.com/docker/docker/libnetwork/types"
|
||||
"github.com/docker/docker/pkg/stringid"
|
||||
"go.opentelemetry.io/otel"
|
||||
"go.opentelemetry.io/otel/attribute"
|
||||
"go.opentelemetry.io/otel/trace"
|
||||
)
|
||||
|
||||
// EndpointWalker is a client provided function which will be used to walk the Endpoints.
|
||||
|
@ -1955,12 +1958,20 @@ func (n *Network) hasLoadBalancerEndpoint() bool {
|
|||
return len(n.loadBalancerIP) != 0
|
||||
}
|
||||
|
||||
func (n *Network) ResolveName(req string, ipType int) ([]net.IP, bool) {
|
||||
func (n *Network) ResolveName(ctx context.Context, req string, ipType int) ([]net.IP, bool) {
|
||||
var ipv6Miss bool
|
||||
|
||||
c := n.getController()
|
||||
networkID := n.ID()
|
||||
|
||||
_, span := otel.Tracer("").Start(ctx, "Network.ResolveName", trace.WithAttributes(
|
||||
attribute.String("libnet.network.name", n.Name()),
|
||||
attribute.String("libnet.network.id", networkID),
|
||||
))
|
||||
defer span.End()
|
||||
|
||||
c.mu.Lock()
|
||||
// TODO(aker): release the lock earlier
|
||||
defer c.mu.Unlock()
|
||||
sr, ok := c.svcRecords[networkID]
|
||||
|
||||
|
@ -2019,7 +2030,7 @@ func (n *Network) HandleQueryResp(name string, ip net.IP) {
|
|||
}
|
||||
}
|
||||
|
||||
func (n *Network) ResolveIP(ip string) string {
|
||||
func (n *Network) ResolveIP(_ context.Context, ip string) string {
|
||||
networkID := n.ID()
|
||||
c := n.getController()
|
||||
c.mu.Lock()
|
||||
|
@ -2049,13 +2060,13 @@ func (n *Network) ResolveIP(ip string) string {
|
|||
return elem.name + "." + nwName
|
||||
}
|
||||
|
||||
func (n *Network) ResolveService(name string) ([]*net.SRV, []net.IP) {
|
||||
func (n *Network) ResolveService(ctx context.Context, name string) ([]*net.SRV, []net.IP) {
|
||||
c := n.getController()
|
||||
|
||||
srv := []*net.SRV{}
|
||||
ip := []net.IP{}
|
||||
|
||||
log.G(context.TODO()).Debugf("Service name To resolve: %v", name)
|
||||
log.G(ctx).Debugf("Service name To resolve: %v", name)
|
||||
|
||||
// There are DNS implementations that allow SRV queries for names not in
|
||||
// the format defined by RFC 2782. Hence specific validations checks are
|
||||
|
|
|
@ -2,6 +2,7 @@ package libnetwork
|
|||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"math/rand"
|
||||
"net"
|
||||
|
@ -14,6 +15,10 @@ import (
|
|||
"github.com/docker/docker/libnetwork/types"
|
||||
"github.com/miekg/dns"
|
||||
"github.com/sirupsen/logrus"
|
||||
"go.opentelemetry.io/otel"
|
||||
"go.opentelemetry.io/otel/attribute"
|
||||
"go.opentelemetry.io/otel/codes"
|
||||
"go.opentelemetry.io/otel/trace"
|
||||
"golang.org/x/sync/semaphore"
|
||||
"golang.org/x/time/rate"
|
||||
)
|
||||
|
@ -26,13 +31,13 @@ type DNSBackend interface {
|
|||
// the networks the sandbox is connected to. For IPv6 queries, second return
|
||||
// value will be true if the name exists in docker domain but doesn't have an
|
||||
// IPv6 address. Such queries shouldn't be forwarded to external nameservers.
|
||||
ResolveName(name string, iplen int) ([]net.IP, bool)
|
||||
ResolveName(ctx context.Context, name string, iplen int) ([]net.IP, bool)
|
||||
// ResolveIP returns the service name for the passed in IP. IP is in reverse dotted
|
||||
// notation; the format used for DNS PTR records
|
||||
ResolveIP(name string) string
|
||||
ResolveIP(ctx context.Context, name string) string
|
||||
// ResolveService returns all the backend details about the containers or hosts
|
||||
// backing a service. Its purpose is to satisfy an SRV query
|
||||
ResolveService(name string) ([]*net.SRV, []net.IP)
|
||||
ResolveService(ctx context.Context, name string) ([]*net.SRV, []net.IP)
|
||||
// ExecFunc allows a function to be executed in the context of the backend
|
||||
// on behalf of the resolver.
|
||||
ExecFunc(f func()) error
|
||||
|
@ -92,9 +97,9 @@ func NewResolver(address string, proxyDNS bool, backend DNSBackend) *Resolver {
|
|||
}
|
||||
}
|
||||
|
||||
func (r *Resolver) log() *logrus.Logger {
|
||||
func (r *Resolver) log(ctx context.Context) *logrus.Logger {
|
||||
if r.logger == nil {
|
||||
return log.G(context.TODO()).Logger
|
||||
return log.G(ctx).Logger
|
||||
}
|
||||
return r.logger
|
||||
}
|
||||
|
@ -146,7 +151,7 @@ func (r *Resolver) Start() error {
|
|||
r.server = s
|
||||
go func() {
|
||||
if err := s.ActivateAndServe(); err != nil {
|
||||
r.log().WithError(err).Error("[resolver] failed to start PacketConn DNS server")
|
||||
r.log(context.TODO()).WithError(err).Error("[resolver] failed to start PacketConn DNS server")
|
||||
}
|
||||
}()
|
||||
|
||||
|
@ -154,7 +159,7 @@ func (r *Resolver) Start() error {
|
|||
r.tcpServer = tcpServer
|
||||
go func() {
|
||||
if err := tcpServer.ActivateAndServe(); err != nil {
|
||||
r.log().WithError(err).Error("[resolver] failed to start TCP DNS server")
|
||||
r.log(context.TODO()).WithError(err).Error("[resolver] failed to start TCP DNS server")
|
||||
}
|
||||
}()
|
||||
return nil
|
||||
|
@ -224,10 +229,10 @@ func createRespMsg(query *dns.Msg) *dns.Msg {
|
|||
return resp
|
||||
}
|
||||
|
||||
func (r *Resolver) handleMXQuery(query *dns.Msg) (*dns.Msg, error) {
|
||||
func (r *Resolver) handleMXQuery(ctx context.Context, query *dns.Msg) (*dns.Msg, error) {
|
||||
name := query.Question[0].Name
|
||||
addrv4, _ := r.backend.ResolveName(name, types.IPv4)
|
||||
addrv6, _ := r.backend.ResolveName(name, types.IPv6)
|
||||
addrv4, _ := r.backend.ResolveName(ctx, name, types.IPv4)
|
||||
addrv6, _ := r.backend.ResolveName(ctx, name, types.IPv6)
|
||||
|
||||
if addrv4 == nil && addrv6 == nil {
|
||||
return nil, nil
|
||||
|
@ -241,17 +246,17 @@ func (r *Resolver) handleMXQuery(query *dns.Msg) (*dns.Msg, error) {
|
|||
return resp, nil
|
||||
}
|
||||
|
||||
func (r *Resolver) handleIPQuery(query *dns.Msg, ipType int) (*dns.Msg, error) {
|
||||
func (r *Resolver) handleIPQuery(ctx context.Context, query *dns.Msg, ipType int) (*dns.Msg, error) {
|
||||
var (
|
||||
addr []net.IP
|
||||
ipv6Miss bool
|
||||
name = query.Question[0].Name
|
||||
)
|
||||
addr, ipv6Miss = r.backend.ResolveName(name, ipType)
|
||||
addr, ipv6Miss = r.backend.ResolveName(ctx, name, ipType)
|
||||
|
||||
if addr == nil && ipv6Miss {
|
||||
// Send a reply without any Answer sections
|
||||
r.log().Debugf("[resolver] lookup name %s present without IPv6 address", name)
|
||||
r.log(ctx).Debugf("[resolver] lookup name %s present without IPv6 address", name)
|
||||
resp := createRespMsg(query)
|
||||
return resp, nil
|
||||
}
|
||||
|
@ -259,7 +264,7 @@ func (r *Resolver) handleIPQuery(query *dns.Msg, ipType int) (*dns.Msg, error) {
|
|||
return nil, nil
|
||||
}
|
||||
|
||||
r.log().Debugf("[resolver] lookup for %s: IP %v", name, addr)
|
||||
r.log(ctx).Debugf("[resolver] lookup for %s: IP %v", name, addr)
|
||||
|
||||
resp := createRespMsg(query)
|
||||
if len(addr) > 1 {
|
||||
|
@ -283,7 +288,7 @@ func (r *Resolver) handleIPQuery(query *dns.Msg, ipType int) (*dns.Msg, error) {
|
|||
return resp, nil
|
||||
}
|
||||
|
||||
func (r *Resolver) handlePTRQuery(query *dns.Msg) (*dns.Msg, error) {
|
||||
func (r *Resolver) handlePTRQuery(ctx context.Context, query *dns.Msg) (*dns.Msg, error) {
|
||||
ptr := query.Question[0].Name
|
||||
name, after, found := strings.Cut(ptr, ptrIPv4domain)
|
||||
if !found || after != "" {
|
||||
|
@ -295,12 +300,12 @@ func (r *Resolver) handlePTRQuery(query *dns.Msg) (*dns.Msg, error) {
|
|||
return nil, nil
|
||||
}
|
||||
|
||||
host := r.backend.ResolveIP(name)
|
||||
host := r.backend.ResolveIP(ctx, name)
|
||||
if host == "" {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
r.log().Debugf("[resolver] lookup for IP %s: name %s", name, host)
|
||||
r.log(ctx).Debugf("[resolver] lookup for IP %s: name %s", name, host)
|
||||
fqdn := dns.Fqdn(host)
|
||||
|
||||
resp := createRespMsg(query)
|
||||
|
@ -311,9 +316,9 @@ func (r *Resolver) handlePTRQuery(query *dns.Msg) (*dns.Msg, error) {
|
|||
return resp, nil
|
||||
}
|
||||
|
||||
func (r *Resolver) handleSRVQuery(query *dns.Msg) (*dns.Msg, error) {
|
||||
func (r *Resolver) handleSRVQuery(ctx context.Context, query *dns.Msg) (*dns.Msg, error) {
|
||||
svc := query.Question[0].Name
|
||||
srv, ip := r.backend.ResolveService(svc)
|
||||
srv, ip := r.backend.ResolveService(ctx, svc)
|
||||
|
||||
if len(srv) == 0 {
|
||||
return nil, nil
|
||||
|
@ -351,29 +356,37 @@ func (r *Resolver) serveDNS(w dns.ResponseWriter, query *dns.Msg) {
|
|||
queryName := query.Question[0].Name
|
||||
queryType := query.Question[0].Qtype
|
||||
|
||||
ctx, span := otel.Tracer("").Start(context.Background(), "resolver.serveDNS", trace.WithAttributes(
|
||||
attribute.String("libnet.resolver.query.name", queryName),
|
||||
attribute.String("libnet.resolver.query.type", dns.TypeToString[queryType]),
|
||||
))
|
||||
defer span.End()
|
||||
|
||||
switch queryType {
|
||||
case dns.TypeA:
|
||||
resp, err = r.handleIPQuery(query, types.IPv4)
|
||||
resp, err = r.handleIPQuery(ctx, query, types.IPv4)
|
||||
case dns.TypeAAAA:
|
||||
resp, err = r.handleIPQuery(query, types.IPv6)
|
||||
resp, err = r.handleIPQuery(ctx, query, types.IPv6)
|
||||
case dns.TypeMX:
|
||||
resp, err = r.handleMXQuery(query)
|
||||
resp, err = r.handleMXQuery(ctx, query)
|
||||
case dns.TypePTR:
|
||||
resp, err = r.handlePTRQuery(query)
|
||||
resp, err = r.handlePTRQuery(ctx, query)
|
||||
case dns.TypeSRV:
|
||||
resp, err = r.handleSRVQuery(query)
|
||||
resp, err = r.handleSRVQuery(ctx, query)
|
||||
default:
|
||||
r.log().Debugf("[resolver] query type %s is not supported by the embedded DNS and will be forwarded to external DNS", dns.TypeToString[queryType])
|
||||
r.log(ctx).Debugf("[resolver] query type %s is not supported by the embedded DNS and will be forwarded to external DNS", dns.TypeToString[queryType])
|
||||
}
|
||||
|
||||
reply := func(msg *dns.Msg) {
|
||||
if err = w.WriteMsg(msg); err != nil {
|
||||
r.log().WithError(err).Errorf("[resolver] failed to write response")
|
||||
r.log(ctx).WithError(err).Errorf("[resolver] failed to write response")
|
||||
span.RecordError(err)
|
||||
span.SetStatus(codes.Error, "WriteMsg failed")
|
||||
}
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
r.log().WithError(err).Errorf("[resolver] failed to handle query: %s (%s)", queryName, dns.TypeToString[queryType])
|
||||
r.log(ctx).WithError(err).Errorf("[resolver] failed to handle query: %s (%s)", queryName, dns.TypeToString[queryType])
|
||||
reply(new(dns.Msg).SetRcode(query, dns.RcodeServerFailure))
|
||||
return
|
||||
}
|
||||
|
@ -393,6 +406,9 @@ func (r *Resolver) serveDNS(w dns.ResponseWriter, query *dns.Msg) {
|
|||
}
|
||||
}
|
||||
resp.Truncate(maxSize)
|
||||
span.AddEvent("found local record", trace.WithAttributes(
|
||||
attribute.String("libnet.resolver.resp", resp.String()),
|
||||
))
|
||||
reply(resp)
|
||||
return
|
||||
}
|
||||
|
@ -406,7 +422,7 @@ func (r *Resolver) serveDNS(w dns.ResponseWriter, query *dns.Msg) {
|
|||
!strings.Contains(strings.TrimSuffix(queryName, "."), ".") {
|
||||
resp = createRespMsg(query)
|
||||
} else {
|
||||
resp = r.forwardExtDNS(w.LocalAddr().Network(), query)
|
||||
resp = r.forwardExtDNS(ctx, w.LocalAddr().Network(), query)
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -448,25 +464,31 @@ func (r *Resolver) dialExtDNS(proto string, server extDNSEntry) (net.Conn, error
|
|||
return extConn, nil
|
||||
}
|
||||
|
||||
func (r *Resolver) forwardExtDNS(proto string, query *dns.Msg) *dns.Msg {
|
||||
func (r *Resolver) forwardExtDNS(ctx context.Context, proto string, query *dns.Msg) *dns.Msg {
|
||||
ctx, span := otel.Tracer("").Start(ctx, "resolver.forwardExtDNS")
|
||||
defer span.End()
|
||||
|
||||
for _, extDNS := range r.extDNSList {
|
||||
if extDNS.IPStr == "" {
|
||||
break
|
||||
}
|
||||
|
||||
// limits the number of outstanding concurrent queries.
|
||||
ctx, cancel := context.WithTimeout(context.Background(), extIOTimeout)
|
||||
ctx, cancel := context.WithTimeout(ctx, extIOTimeout)
|
||||
err := r.fwdSem.Acquire(ctx, 1)
|
||||
cancel()
|
||||
|
||||
if err != nil {
|
||||
r.logInverval.Do(func() {
|
||||
r.log().Errorf("[resolver] more than %v concurrent queries", maxConcurrent)
|
||||
})
|
||||
if errors.Is(err, context.DeadlineExceeded) {
|
||||
r.logInverval.Do(func() {
|
||||
r.log(ctx).Errorf("[resolver] more than %v concurrent queries", maxConcurrent)
|
||||
})
|
||||
}
|
||||
return new(dns.Msg).SetRcode(query, dns.RcodeRefused)
|
||||
}
|
||||
resp := func() *dns.Msg {
|
||||
defer r.fwdSem.Release(1)
|
||||
return r.exchange(proto, extDNS, query)
|
||||
return r.exchange(ctx, proto, extDNS, query)
|
||||
}()
|
||||
if resp == nil {
|
||||
continue
|
||||
|
@ -476,7 +498,7 @@ func (r *Resolver) forwardExtDNS(proto string, query *dns.Msg) *dns.Msg {
|
|||
case dns.RcodeServerFailure, dns.RcodeRefused:
|
||||
// Server returned FAILURE: continue with the next external DNS server
|
||||
// Server returned REFUSED: this can be a transitional status, so continue with the next external DNS server
|
||||
r.log().Debugf("[resolver] external DNS %s:%s returned failure:\n%s", proto, extDNS.IPStr, resp)
|
||||
r.log(ctx).Debugf("[resolver] external DNS %s:%s returned failure:\n%s", proto, extDNS.IPStr, resp)
|
||||
continue
|
||||
}
|
||||
answers := 0
|
||||
|
@ -486,34 +508,46 @@ func (r *Resolver) forwardExtDNS(proto string, query *dns.Msg) *dns.Msg {
|
|||
case dns.TypeA:
|
||||
answers++
|
||||
ip := rr.(*dns.A).A
|
||||
r.log().Debugf("[resolver] received A record %q for %q from %s:%s", ip, h.Name, proto, extDNS.IPStr)
|
||||
r.log(ctx).Debugf("[resolver] received A record %q for %q from %s:%s", ip, h.Name, proto, extDNS.IPStr)
|
||||
r.backend.HandleQueryResp(h.Name, ip)
|
||||
case dns.TypeAAAA:
|
||||
answers++
|
||||
ip := rr.(*dns.AAAA).AAAA
|
||||
r.log().Debugf("[resolver] received AAAA record %q for %q from %s:%s", ip, h.Name, proto, extDNS.IPStr)
|
||||
r.log(ctx).Debugf("[resolver] received AAAA record %q for %q from %s:%s", ip, h.Name, proto, extDNS.IPStr)
|
||||
r.backend.HandleQueryResp(h.Name, ip)
|
||||
}
|
||||
}
|
||||
if len(resp.Answer) == 0 {
|
||||
r.log().Debugf("[resolver] external DNS %s:%s returned response with no answers:\n%s", proto, extDNS.IPStr, resp)
|
||||
r.log(ctx).Debugf("[resolver] external DNS %s:%s returned response with no answers:\n%s", proto, extDNS.IPStr, resp)
|
||||
}
|
||||
resp.Compress = true
|
||||
span.AddEvent("response from upstream server", trace.WithAttributes(
|
||||
attribute.String("libnet.resolver.resp", resp.String()),
|
||||
))
|
||||
return resp
|
||||
}
|
||||
|
||||
span.AddEvent("no response from upstream servers")
|
||||
return nil
|
||||
}
|
||||
|
||||
func (r *Resolver) exchange(proto string, extDNS extDNSEntry, query *dns.Msg) *dns.Msg {
|
||||
func (r *Resolver) exchange(ctx context.Context, proto string, extDNS extDNSEntry, query *dns.Msg) *dns.Msg {
|
||||
ctx, span := otel.Tracer("").Start(ctx, "resolver.exchange", trace.WithAttributes(
|
||||
attribute.String("libnet.resolver.upstream.proto", proto),
|
||||
attribute.String("libnet.resolver.upstream.address", extDNS.IPStr),
|
||||
attribute.Bool("libnet.resolver.upstream.host-loopback", extDNS.HostLoopback)))
|
||||
defer span.End()
|
||||
|
||||
extConn, err := r.dialExtDNS(proto, extDNS)
|
||||
if err != nil {
|
||||
r.log().WithError(err).Warn("[resolver] connect failed")
|
||||
r.log(ctx).WithError(err).Warn("[resolver] connect failed")
|
||||
span.RecordError(err)
|
||||
span.SetStatus(codes.Error, "dialExtDNS failed")
|
||||
return nil
|
||||
}
|
||||
defer extConn.Close()
|
||||
|
||||
logger := r.log().WithFields(log.Fields{
|
||||
logger := r.log(ctx).WithFields(log.Fields{
|
||||
"dns-server": extConn.RemoteAddr().Network() + ":" + extConn.RemoteAddr().String(),
|
||||
"client-addr": extConn.LocalAddr().Network() + ":" + extConn.LocalAddr().String(),
|
||||
"question": query.Question[0].String(),
|
||||
|
@ -534,13 +568,16 @@ func (r *Resolver) exchange(proto string, extDNS extDNSEntry, query *dns.Msg) *d
|
|||
UDPSize: dns.MaxMsgSize,
|
||||
}).ExchangeWithConn(query, &dns.Conn{Conn: extConn})
|
||||
if err != nil {
|
||||
r.log().WithError(err).Errorf("[resolver] failed to query DNS server: %s, query: %s", extConn.RemoteAddr().String(), query.Question[0].String())
|
||||
r.log(ctx).WithError(err).Errorf("[resolver] failed to query DNS server: %s, query: %s", extConn.RemoteAddr().String(), query.Question[0].String())
|
||||
span.RecordError(err)
|
||||
span.SetStatus(codes.Error, "ExchangeWithConn failed")
|
||||
return nil
|
||||
}
|
||||
|
||||
if resp == nil {
|
||||
// Should be impossible, so make noise if it happens anyway.
|
||||
logger.Error("[resolver] external DNS returned empty response")
|
||||
span.SetStatus(codes.Error, "External DNS returned empty response")
|
||||
}
|
||||
return resp
|
||||
}
|
||||
|
|
|
@ -242,7 +242,9 @@ func (w tlogWriter) Write(p []byte) (n int, err error) {
|
|||
|
||||
type noopDNSBackend struct{ DNSBackend }
|
||||
|
||||
func (noopDNSBackend) ResolveName(name string, iplen int) ([]net.IP, bool) { return nil, false }
|
||||
func (noopDNSBackend) ResolveName(_ context.Context, name string, iplen int) ([]net.IP, bool) {
|
||||
return nil, false
|
||||
}
|
||||
|
||||
func (noopDNSBackend) ExecFunc(f func()) error { f(); return nil }
|
||||
|
||||
|
@ -286,7 +288,7 @@ func TestReplySERVFAIL(t *testing.T) {
|
|||
|
||||
type badSRVDNSBackend struct{ noopDNSBackend }
|
||||
|
||||
func (badSRVDNSBackend) ResolveService(name string) ([]*net.SRV, []net.IP) {
|
||||
func (badSRVDNSBackend) ResolveService(_ context.Context, _ string) ([]*net.SRV, []net.IP) {
|
||||
return []*net.SRV{nil, nil, nil}, nil // Mismatched slice lengths
|
||||
}
|
||||
|
||||
|
|
|
@ -13,6 +13,9 @@ import (
|
|||
"github.com/docker/docker/libnetwork/etchosts"
|
||||
"github.com/docker/docker/libnetwork/osl"
|
||||
"github.com/docker/docker/libnetwork/types"
|
||||
"go.opentelemetry.io/otel"
|
||||
"go.opentelemetry.io/otel/attribute"
|
||||
"go.opentelemetry.io/otel/trace"
|
||||
)
|
||||
|
||||
// SandboxOption is an option setter function type used to pass various options to
|
||||
|
@ -351,13 +354,13 @@ func (sb *Sandbox) HandleQueryResp(name string, ip net.IP) {
|
|||
}
|
||||
}
|
||||
|
||||
func (sb *Sandbox) ResolveIP(ip string) string {
|
||||
func (sb *Sandbox) ResolveIP(ctx context.Context, ip string) string {
|
||||
var svc string
|
||||
log.G(context.TODO()).Debugf("IP To resolve %v", ip)
|
||||
log.G(ctx).Debugf("IP To resolve %v", ip)
|
||||
|
||||
for _, ep := range sb.Endpoints() {
|
||||
n := ep.getNetwork()
|
||||
svc = n.ResolveIP(ip)
|
||||
svc = n.ResolveIP(ctx, ip)
|
||||
if len(svc) != 0 {
|
||||
return svc
|
||||
}
|
||||
|
@ -368,8 +371,8 @@ func (sb *Sandbox) ResolveIP(ip string) string {
|
|||
|
||||
// ResolveService returns all the backend details about the containers or hosts
|
||||
// backing a service. Its purpose is to satisfy an SRV query.
|
||||
func (sb *Sandbox) ResolveService(name string) ([]*net.SRV, []net.IP) {
|
||||
log.G(context.TODO()).Debugf("Service name To resolve: %v", name)
|
||||
func (sb *Sandbox) ResolveService(ctx context.Context, name string) ([]*net.SRV, []net.IP) {
|
||||
log.G(ctx).Debugf("Service name To resolve: %v", name)
|
||||
|
||||
// There are DNS implementations that allow SRV queries for names not in
|
||||
// the format defined by RFC 2782. Hence specific validations checks are
|
||||
|
@ -381,7 +384,7 @@ func (sb *Sandbox) ResolveService(name string) ([]*net.SRV, []net.IP) {
|
|||
for _, ep := range sb.Endpoints() {
|
||||
n := ep.getNetwork()
|
||||
|
||||
srv, ip := n.ResolveService(name)
|
||||
srv, ip := n.ResolveService(ctx, name)
|
||||
if len(srv) > 0 {
|
||||
return srv, ip
|
||||
}
|
||||
|
@ -421,7 +424,7 @@ func getLocalNwEndpoints(epList []*Endpoint) []*Endpoint {
|
|||
return eps
|
||||
}
|
||||
|
||||
func (sb *Sandbox) ResolveName(name string, ipType int) ([]net.IP, bool) {
|
||||
func (sb *Sandbox) ResolveName(ctx context.Context, name string, ipType int) ([]net.IP, bool) {
|
||||
// Embedded server owns the docker network domain. Resolution should work
|
||||
// for both container_name and container_name.network_name
|
||||
// We allow '.' in service name and network name. For a name a.b.c.d the
|
||||
|
@ -431,7 +434,7 @@ func (sb *Sandbox) ResolveName(name string, ipType int) ([]net.IP, bool) {
|
|||
// {a.b in network c.d},
|
||||
// {a in network b.c.d},
|
||||
|
||||
log.G(context.TODO()).Debugf("Name To resolve: %v", name)
|
||||
log.G(ctx).Debugf("Name To resolve: %v", name)
|
||||
name = strings.TrimSuffix(name, ".")
|
||||
reqName := []string{name}
|
||||
networkName := []string{""}
|
||||
|
@ -468,7 +471,7 @@ func (sb *Sandbox) ResolveName(name string, ipType int) ([]net.IP, bool) {
|
|||
|
||||
for i := 0; i < len(reqName); i++ {
|
||||
// First check for local container alias
|
||||
ip, ipv6Miss := sb.resolveName(reqName[i], networkName[i], epList, true, ipType)
|
||||
ip, ipv6Miss := sb.resolveName(ctx, reqName[i], networkName[i], epList, true, ipType)
|
||||
if ip != nil {
|
||||
return ip, false
|
||||
}
|
||||
|
@ -477,7 +480,7 @@ func (sb *Sandbox) ResolveName(name string, ipType int) ([]net.IP, bool) {
|
|||
}
|
||||
|
||||
// Resolve the actual container name
|
||||
ip, ipv6Miss = sb.resolveName(reqName[i], networkName[i], epList, false, ipType)
|
||||
ip, ipv6Miss = sb.resolveName(ctx, reqName[i], networkName[i], epList, false, ipType)
|
||||
if ip != nil {
|
||||
return ip, false
|
||||
}
|
||||
|
@ -488,7 +491,14 @@ func (sb *Sandbox) ResolveName(name string, ipType int) ([]net.IP, bool) {
|
|||
return nil, false
|
||||
}
|
||||
|
||||
func (sb *Sandbox) resolveName(nameOrAlias string, networkName string, epList []*Endpoint, lookupAlias bool, ipType int) (_ []net.IP, ipv6Miss bool) {
|
||||
func (sb *Sandbox) resolveName(ctx context.Context, nameOrAlias string, networkName string, epList []*Endpoint, lookupAlias bool, ipType int) (_ []net.IP, ipv6Miss bool) {
|
||||
ctx, span := otel.Tracer("").Start(ctx, "Sandbox.resolveName", trace.WithAttributes(
|
||||
attribute.String("libnet.resolver.name-or-alias", nameOrAlias),
|
||||
attribute.String("libnet.network.name", networkName),
|
||||
attribute.Bool("libnet.resolver.alias-lookup", lookupAlias),
|
||||
attribute.Int("libnet.resolver.ip-family", ipType)))
|
||||
defer span.End()
|
||||
|
||||
for _, ep := range epList {
|
||||
if lookupAlias && len(ep.aliases) == 0 {
|
||||
continue
|
||||
|
@ -519,7 +529,7 @@ func (sb *Sandbox) resolveName(nameOrAlias string, networkName string, epList []
|
|||
}
|
||||
}
|
||||
|
||||
ip, miss := nw.ResolveName(name, ipType)
|
||||
ip, miss := nw.ResolveName(ctx, name, ipType)
|
||||
if ip != nil {
|
||||
return ip, false
|
||||
}
|
||||
|
|
Loading…
Add table
Reference in a new issue