Merge branch 'move_libnetwork' of /home/cpuguy83/go/src/github.com/docker/libnetwork into move_libnetwork

This commit is contained in:
Brian Goff 2021-06-01 21:50:56 +00:00
commit 5165093570
1907 changed files with 500870 additions and 0 deletions

View file

@ -0,0 +1,79 @@
version: 2
defaults: &defaults
working_directory: ~/go/src/github.com/docker/libnetwork
docker:
# the following image is irrelevant for the build, everything is built inside a container, check the Makefile
- image: 'circleci/golang:latest'
environment:
dockerbuildargs: .
dockerargs: --privileged -e CIRCLECI
jobs:
builder:
<<: *defaults
steps:
- checkout
- setup_remote_docker:
version: 19.03.12
reusable: true
exclusive: false
- run: make builder
build:
<<: *defaults
steps:
- checkout
- setup_remote_docker:
version: 19.03.12
reusable: true
exclusive: false
- run: make build
check:
<<: *defaults
steps:
- checkout
- setup_remote_docker:
version: 19.03.12
reusable: true
exclusive: false
- run: make check
cross:
<<: *defaults
steps:
- checkout
- setup_remote_docker:
version: 19.03.12
reusable: true
exclusive: false
- run: make cross
unit-tests:
<<: *defaults
steps:
- checkout
- setup_remote_docker:
version: 19.03.12
reusable: true
exclusive: false
- run: make unit-tests
workflows:
version: 2
ci:
jobs:
- builder
- build:
requires:
- builder
- check:
requires:
- builder
- cross:
requires:
- builder
- unit-tests:
requires:
- builder

5
libnetwork/.dockerignore Normal file
View file

@ -0,0 +1,5 @@
.git
.dockerignore
Dockerfile
bin
tags

45
libnetwork/.gitignore vendored Normal file
View file

@ -0,0 +1,45 @@
# Compiled Object files, Static and Dynamic libs (Shared Objects)
*.o
*.a
*.so
*~
.gtm
bin/
tags
.DS_Store
# Folders
integration-tmp/
_obj
_test
.vagrant
# Architecture specific extensions/prefixes
*.[568vq]
[568vq].out
*.cgo1.go
*.cgo2.c
_cgo_defun.c
_cgo_gotypes.go
_cgo_export.*
_testmain.go
*.exe
*.test
*.prof
cmd/dnet/dnet
# Coverage
*.tmp
*.coverprofile
# IDE files and folders
.project
.settings/
libnetworkbuild.created
test/networkDb/testMain
test/networkDb/gossipdb

199
libnetwork/CHANGELOG.md Normal file
View file

@ -0,0 +1,199 @@
# Changelog
## 0.8.0-dev.2 (2016-05-07)
- Fix an issue which may arise during sandbox cleanup (https://github.com/docker/libnetwork/pull/1157)
- Fix cleanup logic in case of ipv6 allocation failure
- Don't add /etc/hosts record if container's ip is empty (--net=none)
- Fix default gw logic for internal networks
- Error when updating IPv6 gateway (https://github.com/docker/libnetwork/issues/1142)
- Fixes https://github.com/docker/libnetwork/issues/1113
- Fixes https://github.com/docker/libnetwork/issues/1069
- Fxies https://github.com/docker/libnetwork/issues/1117
- Increase the concurrent query rate-limit count
- Changes to build libnetwork in Solaris
## 0.8.0-dev.1 (2016-04-16)
- Fixes docker/docker#16964
- Added maximum egress bandwidth qos for Windows
## 0.7.0-rc.6 (2016-04-10)
- Flush cached resolver socket on default gateway change
## 0.7.0-rc.5 (2016-04-08)
- Persist ipam driver options
- Fixes https://github.com/docker/libnetwork/issues/1087
- Use go vet from go tool
- Godep update to pick up latest docker/docker packages
- Validate remote driver response using docker plugins package method.
## 0.7.0-rc.4 (2016-04-06)
- Fix the handling for default gateway Endpoint join/leave.
## 0.7.0-rc.3 (2016-04-05)
- Revert fix for default gateway endpoint join/leave. Needs to be reworked.
- Persist the network internal mode for bridge networks
## 0.7.0-rc.2 (2016-04-05)
- Fixes https://github.com/docker/libnetwork/issues/1070
- Move IPAM resource initialization out of init()
- Initialize overlay driver before network delete
- Fix the handling for default gateway Endpoint join/lean
## 0.7.0-rc.1 (2016-03-30)
- Fixes https://github.com/docker/libnetwork/issues/985
- Fixes https://github.com/docker/libnetwork/issues/945
- Log time taken to set sandbox key
- Limit number of concurrent DNS queries
## 0.7.0-dev.10 (2016-03-21)
- Add IPv6 service discovery (AAAA records) in embedded DNS server
- Honor enableIPv6 flag in network create for the IP allocation
- Avoid V6 queries in docker domain going to external nameservers
## 0.7.0-dev.9 (2016-03-18)
- Support labels on networks
## 0.7.0-dev.8 (2016-03-16)
- Windows driver to respect user set MAC address.
- Fix possible nil pointer reference in ServeDNS() with concurrent go routines.
- Fix netns path setting from hook (for containerd integration)
- Clear cached udp connections on resolver Stop()
- Avoid network/endpoint count inconsistences and remove stale networks after ungraceful shutdown
- Fix possible endpoint count inconsistency after ungraceful shutdown
- Reject a null v4 IPAM slice in exp vlan drivers
- Removed experimental drivers modprobe check
## 0.7.0-dev.7 (2016-03-11)
- Bumped up the minimum kernel version for ipvlan to 4.2
- Removed modprobe from macvlan/ipvlan drivers to resolve docker IT failures
- Close dbus connection if firewalld is not started
## 0.7.0-dev.6 (2016-03-10)
- Experimental support for macvlan and ipvlan drivers
## 0.7.0-dev.5 (2016-03-08)
- Fixes https://github.com/docker/docker/issues/20847
- Fixes https://github.com/docker/docker/issues/20997
- Fixes issues unveiled by docker integ test over 0.7.0-dev.4
## 0.7.0-dev.4 (2016-03-07)
- Changed ownership of exposed ports and port-mapping options from Endpoint to Sandbox
- Implement DNS RR in the Docker embedded DNS server
- Fixes https://github.com/docker/libnetwork/issues/984 (multi container overlay veth leak)
- Libnetwork to program container's interface MAC address
- Fixed bug in iptables.Exists() logic
- Fixes https://github.com/docker/docker/issues/20694
- Source external DNS queries from container namespace
- Added inbuilt nil IPAM driver
- Windows drivers integration fixes
- Extract hostname from (hostname.domainname). Related to https://github.com/docker/docker/issues/14282
- Fixed race in sandbox statistics read
- Fixes https://github.com/docker/libnetwork/issues/892 (docker start fails when ipv6.disable=1)
- Fixed error message on bridge network creation conflict
## 0.7.0-dev.3 (2016-02-17)
- Fixes https://github.com/docker/docker/issues/20350
- Fixes https://github.com/docker/docker/issues/20145
- Initial Windows HNS integration
- Allow passing global datastore config to libnetwork after boot
- Set Recursion Available bit in DNS query responses
- Make sure iptables chains are recreated on firewalld reload
## 0.7.0-dev.2 (2016-02-11)
- Fixes https://github.com/docker/docker/issues/20140
## 0.7.0-dev.1 (2016-02-10)
- Expose EnableIPV6 option
- discoverapi refactoring
- Fixed a few typos & docs update
## 0.6.1-rc2 (2016-02-09)
- Fixes https://github.com/docker/docker/issues/20132
- Fixes https://github.com/docker/docker/issues/20140
- Fixes https://github.com/docker/docker/issues/20019
## 0.6.1-rc1 (2016-02-05)
- Fixes https://github.com/docker/docker/issues/20026
## 0.6.0-rc7 (2016-02-01)
- Allow inter-network connections via exposed ports
## 0.6.0-rc6 (2016-01-30)
- Properly fixes https://github.com/docker/docker/issues/18814
## 0.6.0-rc5 (2016-01-26)
- Cleanup stale overlay sandboxes
## 0.6.0-rc4 (2016-01-25)
- Add Endpoints() API to Sandbox interface
- Fixed a race-condition in default gateway network creation
## 0.6.0-rc3 (2016-01-25)
- Fixes docker/docker#19576
- Fixed embedded DNS to listen in TCP as well
- Fixed a race-condition in IPAM to choose non-overlapping subnet for concurrent requests
## 0.6.0-rc2 (2016-01-21)
- Fixes docker/docker#19376
- Fixes docker/docker#15819
- Fixes libnetwork/#885, Not filter v6 DNS servers from resolv.conf
- Fixes docker/docker #19448, also handles the . in service and network names correctly.
## 0.6.0-rc1 (2016-01-14)
- Fixes docker/docker#19404
- Fixes the ungraceful daemon restart issue in systemd with remote network plugin
(https://github.com/docker/libnetwork/issues/813)
## 0.5.6 (2016-01-14)
- Setup embedded DNS server correctly on container restart. Fixes docker/docker#19354
## 0.5.5 (2016-01-14)
- Allow network-scoped alias to be resolved for anonymous endpoint
- Self repair corrupted IP database that could happen in 1.9.0 & 1.9.1
- Skip IPTables cleanup if --iptables=false is set. Fixes docker/docker#19063
## 0.5.4 (2016-01-12)
- Removed the isNodeAlive protection when user forces an endpoint delete
## 0.5.3 (2016-01-12)
- Bridge driver supporting internal network option
- Backend implementation to support "force" option to network disconnect
- Fixing a regex in etchosts package to fix docker/docker#19080
## 0.5.2 (2016-01-08)
- Embedded DNS replacing /etc/hosts based Service Discovery
- Container local alias and Network-scoped alias support
- Backend support for internal network mode
- Support for IPAM driver options
- Fixes overlay veth cleanup issue : docker/docker#18814
- fixes docker/docker#19139
- disable IPv6 Duplicate Address Detection
## 0.5.1 (2015-12-07)
- Allowing user to assign IP Address for containers
- Fixes docker/docker#18214
- Fixes docker/docker#18380
## 0.5.0 (2015-10-30)
- Docker multi-host networking exiting experimental channel
- Introduced IP Address Management and IPAM drivers
- DEPRECATE service discovery from default bridge network
- Introduced new network UX
- Support for multiple networks in bridge driver
- Local persistence with boltdb
## 0.4.0 (2015-07-24)
- Introduce experimental version of Overlay driver
- Introduce experimental version of network plugins
- Introduce experimental version of network & service UX
- Introduced experimental /etc/hosts based service discovery
- Integrated with libkv
- Improving test coverage
- Fixed a bunch of issues with osl namespace mgmt
## 0.3.0 (2015-05-27)
- Introduce CNM (Container Networking Model)
- Replace docker networking with CNM & Bridge driver

22
libnetwork/Dockerfile Normal file
View file

@ -0,0 +1,22 @@
ARG GO_VERSION=1.13.8
FROM golang:${GO_VERSION}-buster as dev
RUN apt-get update && apt-get -y install iptables \
protobuf-compiler
RUN go get -d github.com/gogo/protobuf/protoc-gen-gogo && \
cd /go/src/github.com/gogo/protobuf/protoc-gen-gogo && \
git reset --hard 30cf7ac33676b5786e78c746683f0d4cd64fa75b && \
go install
RUN go get golang.org/x/lint/golint \
golang.org/x/tools/cmd/cover \
github.com/mattn/goveralls \
github.com/gordonklaus/ineffassign \
github.com/client9/misspell/cmd/misspell
WORKDIR /go/src/github.com/docker/libnetwork
FROM dev
COPY . .

202
libnetwork/LICENSE Normal file
View file

@ -0,0 +1,202 @@
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
APPENDIX: How to apply the Apache License to your work.
To apply the Apache License to your work, attach the following
boilerplate notice, with the fields enclosed by brackets "{}"
replaced with your own identifying information. (Don't include
the brackets!) The text should be enclosed in the appropriate
comment syntax for the file format. We also recommend that a
file or class name and description of purpose be included on the
same "printed page" as the copyright notice for easier
identification within third-party archives.
Copyright {yyyy} {name of copyright owner}
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.

52
libnetwork/MAINTAINERS Normal file
View file

@ -0,0 +1,52 @@
# Libnetwork maintainers file
#
# This file describes who runs the docker/libnetwork project and how.
# This is a living document - if you see something out of date or missing, speak up!
#
# It is structured to be consumable by both humans and programs.
# To extract its contents programmatically, use any TOML-compliant parser.
#
# This file is compiled into the MAINTAINERS file in docker/opensource.
#
[Org]
[Org."Core maintainers"]
people = [
"arkodg",
"euanh",
"fcrisciani",
"mavenugo",
"selansen",
]
[people]
# A reference list of all people associated with the project.
# All other sections should refer to people by their canonical key
# in the people section.
# ADD YOURSELF HERE IN ALPHABETICAL ORDER
[people.arkodg]
Name = "Arko Dasgupta"
Email = "arko.dasgupta@docker.com"
GitHub = "arkodg"
[people.euanh]
Name = "Euan Harris"
Email = "euan.harris@docker.com"
GitHub = "euanh"
[people.fcrisciani]
Name = "Flavio Crisciani"
Email = "flavio.crisciani@docker.com"
GitHub = "fcrisciani"
[people.mavenugo]
Name = "Madhu Venugopal"
Email = "madhu@docker.com"
GitHub = "mavenugo"
[people.selansen]
Name = "Elangovan Sivanandam"
Email = "elango.siva@docker.com"
GitHub = "selansen"

178
libnetwork/Makefile Normal file
View file

@ -0,0 +1,178 @@
.PHONY: all all-local build build-local clean cross cross-local vet lint misspell check check-local check-code check-format unit-tests protobuf protobuf-local check-protobuf
SHELL=/bin/bash
dockerbuildargs ?= --target dev - < Dockerfile
dockerargs ?= --privileged -v $(shell pwd):/go/src/github.com/docker/libnetwork -w /go/src/github.com/docker/libnetwork
build_image=libnetworkbuild
container_env = -e "INSIDECONTAINER=-incontainer=true"
docker = docker run --rm -it --init ${dockerargs} $$EXTRA_ARGS ${container_env} ${build_image}
CROSS_PLATFORMS = linux/amd64 linux/386 linux/arm windows/amd64
PACKAGES=$(shell go list ./... | grep -v /vendor/)
PROTOC_FLAGS=-I=. -I=/go/src -I=/go/src/github.com/gogo/protobuf -I=/go/src/github.com/gogo/protobuf/protobuf
export PATH := $(CURDIR)/bin:$(PATH)
# Several targets in this Makefile expect to run within the
# libnetworkbuild container. In general, a target named '<target>-local'
# relies on utilities inside the build container. Usually there is also
# a wrapper called '<target>' which starts a container and runs
# 'make <target>-local' inside it.
###########################################################################
# Top level targets
###########################################################################
all: build check clean
all-local: build-local check-local clean
###########################################################################
# Build targets
###########################################################################
# builder builds the libnetworkbuild container. All wrapper targets
# must depend on this to ensure that the container exists.
builder:
DOCKER_BUILDKIT=1 docker build --progress=plain -t ${build_image} --build-arg=GO_VERSION ${dockerbuildargs}
build: builder
@echo "🐳 $@"
@${docker} make build-local
build-local:
@echo "🐳 $@"
@mkdir -p "bin"
go build -tags experimental -o "bin/dnet" ./cmd/dnet
go build -o "bin/docker-proxy" ./cmd/proxy
CGO_ENABLED=0 go build -o "bin/diagnosticClient" ./cmd/diagnostic
CGO_ENABLED=0 go build -o "bin/testMain" ./cmd/networkdb-test/testMain.go
build-images:
@echo "🐳 $@"
cp cmd/diagnostic/daemon.json ./bin
DOCKER_BUILDKIT=1 docker build --progress=plain -f cmd/diagnostic/Dockerfile.client -t dockereng/network-diagnostic:onlyclient bin/
DOCKER_BUILDKIT=1 docker build --progress=plain -f cmd/diagnostic/Dockerfile.dind -t dockereng/network-diagnostic:17.12-dind bin/
DOCKER_BUILDKIT=1 docker build --progress=plain -f cmd/networkdb-test/Dockerfile -t dockereng/e2e-networkdb:master bin/
DOCKER_BUILDKIT=1 docker build --progress=plain -t dockereng/network-diagnostic:support.sh support/
push-images: build-images
@echo "🐳 $@"
docker push dockereng/network-diagnostic:onlyclient
docker push dockereng/network-diagnostic:17.12-dind
docker push dockereng/e2e-networkdb:master
docker push dockereng/network-diagnostic:support.sh
clean:
@echo "🐳 $@"
@if [ -d bin ]; then \
echo "Removing binaries"; \
rm -rf bin; \
fi
cross: builder
@mkdir -p "bin"
@for platform in ${CROSS_PLATFORMS}; do \
EXTRA_ARGS="-e GOOS=$${platform%/*} -e GOARCH=$${platform##*/}" ; \
echo "$${platform}..." ; \
${docker} make cross-local ; \
done
cross-local:
@echo "🐳 $@"
go build -o "bin/dnet-$$GOOS-$$GOARCH" ./cmd/dnet
go build -o "bin/docker-proxy-$$GOOS-$$GOARCH" ./cmd/proxy
# Rebuild protocol buffers.
# These may need to be rebuilt after vendoring updates, so .proto files are declared .PHONY so they are always rebuilt.
PROTO_FILES=$(shell find . -path ./vendor -prune -o -name \*.proto -print)
PB_FILES=$(PROTO_FILES:.proto=.pb.go)
# Pattern rule for protoc. If PROTOC_CHECK is defined, it checks
# whether the generated files are up to date and fails if they are not
%.pb.go: %.proto
@if [ ${PROTOC_CHECK} ]; then \
protoc ${PROTOC_FLAGS} --gogo_out=/tmp $< ; \
diff -q $@ /tmp/$@ >/dev/null || (echo "👹 $@ is out of date; please run 'make protobuf' and check in updates" && exit 1) ; \
else \
protoc ${PROTOC_FLAGS} --gogo_out=./ $< ; \
fi
.PHONY: $(PROTO_FILES)
protobuf: builder
@${docker} make protobuf-local
protobuf-local: $(PB_FILES)
###########################################################################
# Test targets
###########################################################################
check: builder
@${docker} make check-local
check-local: check-code check-format
check-code: check-protobuf lint vet ineffassign
check-format: fmt misspell
unit-tests: builder
${docker} make unit-tests-local
unit-tests-local:
@echo "🐳 Running tests... "
@echo "mode: count" > coverage.coverprofile
@go build -o "bin/docker-proxy" ./cmd/proxy
@for dir in $$( find . -maxdepth 10 -not -path './.git*' -not -path '*/_*' -not -path './vendor/*' -type d); do \
if ls $$dir/*.go &> /dev/null; then \
pushd . &> /dev/null ; \
cd $$dir ; \
go test ${INSIDECONTAINER} -test.parallel 5 -test.v -covermode=count -coverprofile=./profile.tmp ; \
ret=$$? ;\
if [ $$ret -ne 0 ]; then exit $$ret; fi ;\
popd &> /dev/null; \
if [ -f $$dir/profile.tmp ]; then \
cat $$dir/profile.tmp | tail -n +2 >> coverage.coverprofile ; \
rm $$dir/profile.tmp ; \
fi ; \
fi ; \
done
@echo "Done running tests"
# Depends on binaries because vet will silently fail if it can not load compiled imports
vet: ## run go vet
@echo "🐳 $@"
@test -z "$$(go vet ${PACKAGES} 2>&1 | grep -v 'constant [0-9]* not a string in call to Errorf' | egrep -v '(timestamp_test.go|duration_test.go|exit status 1)' | tee /dev/stderr)"
misspell:
@echo "🐳 $@"
@test -z "$$(find . -type f | grep -v vendor/ | grep "\.go\|\.md" | xargs misspell -error | tee /dev/stderr)"
fmt: ## run go fmt
@echo "🐳 $@"
@test -z "$$(gofmt -s -l . | grep -v vendor/ | grep -v ".pb.go$$" | tee /dev/stderr)" || \
(echo "👹 please format Go code with 'gofmt -s -w'" && false)
lint: ## run go lint
@echo "🐳 $@"
@test -z "$$(golint ./... | grep -v vendor/ | grep -v ".pb.go:" | grep -v ".mock.go" | tee /dev/stderr)"
ineffassign: ## run ineffassign
@echo "🐳 $@"
@test -z "$$(ineffassign . | grep -v vendor/ | grep -v ".pb.go:" | grep -v ".mock.go" | tee /dev/stderr)"
# check-protobuf rebuilds .pb.go files and fails if they have changed
check-protobuf: PROTOC_CHECK=1
check-protobuf: $(PB_FILES)
@echo "🐳 $@"
###########################################################################
# Utility targets
###########################################################################
shell: builder
@${docker} ${SHELL}

97
libnetwork/README.md Normal file
View file

@ -0,0 +1,97 @@
# libnetwork - networking for containers
[![Circle CI](https://circleci.com/gh/docker/libnetwork/tree/master.svg?style=svg)](https://circleci.com/gh/docker/libnetwork/tree/master) [![Coverage Status](https://coveralls.io/repos/docker/libnetwork/badge.svg)](https://coveralls.io/r/docker/libnetwork) [![GoDoc](https://godoc.org/github.com/docker/libnetwork?status.svg)](https://godoc.org/github.com/docker/libnetwork) [![Go Report Card](https://goreportcard.com/badge/github.com/docker/libnetwork)](https://goreportcard.com/report/github.com/docker/libnetwork)
Libnetwork provides a native Go implementation for connecting containers
The goal of libnetwork is to deliver a robust Container Network Model that provides a consistent programming interface and the required network abstractions for applications.
#### Design
Please refer to the [design](docs/design.md) for more information.
#### Using libnetwork
There are many networking solutions available to suit a broad range of use-cases. libnetwork uses a driver / plugin model to support all of these solutions while abstracting the complexity of the driver implementations by exposing a simple and consistent Network Model to users.
```go
import (
"fmt"
"log"
"github.com/docker/docker/pkg/reexec"
"github.com/docker/libnetwork"
"github.com/docker/libnetwork/config"
"github.com/docker/libnetwork/netlabel"
"github.com/docker/libnetwork/options"
)
func main() {
if reexec.Init() {
return
}
// Select and configure the network driver
networkType := "bridge"
// Create a new controller instance
driverOptions := options.Generic{}
genericOption := make(map[string]interface{})
genericOption[netlabel.GenericData] = driverOptions
controller, err := libnetwork.New(config.OptionDriverConfig(networkType, genericOption))
if err != nil {
log.Fatalf("libnetwork.New: %s", err)
}
// Create a network for containers to join.
// NewNetwork accepts Variadic optional arguments that libnetwork and Drivers can use.
network, err := controller.NewNetwork(networkType, "network1", "")
if err != nil {
log.Fatalf("controller.NewNetwork: %s", err)
}
// For each new container: allocate IP and interfaces. The returned network
// settings will be used for container infos (inspect and such), as well as
// iptables rules for port publishing. This info is contained or accessible
// from the returned endpoint.
ep, err := network.CreateEndpoint("Endpoint1")
if err != nil {
log.Fatalf("network.CreateEndpoint: %s", err)
}
// Create the sandbox for the container.
// NewSandbox accepts Variadic optional arguments which libnetwork can use.
sbx, err := controller.NewSandbox("container1",
libnetwork.OptionHostname("test"),
libnetwork.OptionDomainname("docker.io"))
if err != nil {
log.Fatalf("controller.NewSandbox: %s", err)
}
// A sandbox can join the endpoint via the join api.
err = ep.Join(sbx)
if err != nil {
log.Fatalf("ep.Join: %s", err)
}
// libnetwork client can check the endpoint's operational data via the Info() API
epInfo, err := ep.DriverInfo()
if err != nil {
log.Fatalf("ep.DriverInfo: %s", err)
}
macAddress, ok := epInfo[netlabel.MacAddress]
if !ok {
log.Fatalf("failed to get mac address from endpoint info")
}
fmt.Printf("Joined endpoint %s (%s) to sandbox %s (%s)\n", ep.Name(), macAddress, sbx.ContainerID(), sbx.Key())
}
```
## Contributing
Want to hack on libnetwork? [Docker's contributions guidelines](https://github.com/docker/docker/blob/master/CONTRIBUTING.md) apply.
## Copyright and license
Code and documentation copyright 2015 Docker, inc. Code released under the Apache 2.0 license. Docs released under Creative commons.

58
libnetwork/Vagrantfile vendored Normal file
View file

@ -0,0 +1,58 @@
# -*- mode: ruby -*-
# vi: set ft=ruby :
# Vagrantfile API/syntax version. Don't touch unless you know what you're doing!
VAGRANTFILE_API_VERSION = "2"
$consul=<<SCRIPT
apt-get update
apt-get -y install wget
wget -qO- https://experimental.docker.com/ | sh
gpasswd -a vagrant docker
service docker restart
docker run -d -p 8500:8500 -p 8300-8302:8300-8302/tcp -p 8300-8302:8300-8302/udp -h consul progrium/consul -server -bootstrap
SCRIPT
$bootstrap=<<SCRIPT
apt-get update
apt-get -y install wget curl
apt-get -y install bridge-utils
wget -qO- https://experimental.docker.com/ | sh
gpasswd -a vagrant docker
echo DOCKER_OPTS=\\"--cluster-store=consul://192.168.33.10:8500 --cluster-advertise=${1}:0\\" >> /etc/default/docker
cp /vagrant/docs/vagrant-systemd/docker.service /etc/systemd/system/
systemctl daemon-reload
systemctl restart docker.service
SCRIPT
Vagrant.configure(VAGRANTFILE_API_VERSION) do |config|
config.ssh.shell = "bash -c 'BASH_ENV=/etc/profile exec bash'"
num_nodes = 2
base_ip = "192.168.33."
net_ips = num_nodes.times.collect { |n| base_ip + "#{n+11}" }
config.vm.define "consul-server" do |consul|
consul.vm.box = "ubuntu/trusty64"
consul.vm.hostname = "consul-server"
consul.vm.network :private_network, ip: "192.168.33.10"
consul.vm.provider "virtualbox" do |vb|
vb.customize ["modifyvm", :id, "--memory", "512"]
end
consul.vm.provision :shell, inline: $consul
end
num_nodes.times do |n|
config.vm.define "net-#{n+1}" do |net|
net.vm.box = "ubuntu/xenial64"
net_ip = net_ips[n]
net_index = n+1
net.vm.hostname = "net-#{net_index}"
net.vm.provider "virtualbox" do |vb|
vb.customize ["modifyvm", :id, "--memory", "1024"]
end
net.vm.network :private_network, ip: "#{net_ip}"
net.vm.provision :shell, inline: $bootstrap, :args => "#{net_ip}"
end
end
end

988
libnetwork/agent.go Normal file
View file

@ -0,0 +1,988 @@
package libnetwork
//go:generate protoc -I.:Godeps/_workspace/src/github.com/gogo/protobuf --gogo_out=import_path=github.com/docker/libnetwork,Mgogoproto/gogo.proto=github.com/gogo/protobuf/gogoproto:. agent.proto
import (
"encoding/json"
"fmt"
"net"
"sort"
"sync"
"github.com/docker/go-events"
"github.com/docker/libnetwork/cluster"
"github.com/docker/libnetwork/datastore"
"github.com/docker/libnetwork/discoverapi"
"github.com/docker/libnetwork/driverapi"
"github.com/docker/libnetwork/networkdb"
"github.com/docker/libnetwork/types"
"github.com/gogo/protobuf/proto"
"github.com/sirupsen/logrus"
)
const (
subsysGossip = "networking:gossip"
subsysIPSec = "networking:ipsec"
keyringSize = 3
)
// ByTime implements sort.Interface for []*types.EncryptionKey based on
// the LamportTime field.
type ByTime []*types.EncryptionKey
func (b ByTime) Len() int { return len(b) }
func (b ByTime) Swap(i, j int) { b[i], b[j] = b[j], b[i] }
func (b ByTime) Less(i, j int) bool { return b[i].LamportTime < b[j].LamportTime }
type agent struct {
networkDB *networkdb.NetworkDB
bindAddr string
advertiseAddr string
dataPathAddr string
coreCancelFuncs []func()
driverCancelFuncs map[string][]func()
sync.Mutex
}
func (a *agent) dataPathAddress() string {
a.Lock()
defer a.Unlock()
if a.dataPathAddr != "" {
return a.dataPathAddr
}
return a.advertiseAddr
}
const libnetworkEPTable = "endpoint_table"
func getBindAddr(ifaceName string) (string, error) {
iface, err := net.InterfaceByName(ifaceName)
if err != nil {
return "", fmt.Errorf("failed to find interface %s: %v", ifaceName, err)
}
addrs, err := iface.Addrs()
if err != nil {
return "", fmt.Errorf("failed to get interface addresses: %v", err)
}
for _, a := range addrs {
addr, ok := a.(*net.IPNet)
if !ok {
continue
}
addrIP := addr.IP
if addrIP.IsLinkLocalUnicast() {
continue
}
return addrIP.String(), nil
}
return "", fmt.Errorf("failed to get bind address")
}
func resolveAddr(addrOrInterface string) (string, error) {
// Try and see if this is a valid IP address
if net.ParseIP(addrOrInterface) != nil {
return addrOrInterface, nil
}
addr, err := net.ResolveIPAddr("ip", addrOrInterface)
if err != nil {
// If not a valid IP address, it should be a valid interface
return getBindAddr(addrOrInterface)
}
return addr.String(), nil
}
func (c *controller) handleKeyChange(keys []*types.EncryptionKey) error {
drvEnc := discoverapi.DriverEncryptionUpdate{}
a := c.getAgent()
if a == nil {
logrus.Debug("Skipping key change as agent is nil")
return nil
}
// Find the deleted key. If the deleted key was the primary key,
// a new primary key should be set before removing if from keyring.
c.Lock()
added := []byte{}
deleted := []byte{}
j := len(c.keys)
for i := 0; i < j; {
same := false
for _, key := range keys {
if same = key.LamportTime == c.keys[i].LamportTime; same {
break
}
}
if !same {
cKey := c.keys[i]
if cKey.Subsystem == subsysGossip {
deleted = cKey.Key
}
if cKey.Subsystem == subsysIPSec {
drvEnc.Prune = cKey.Key
drvEnc.PruneTag = cKey.LamportTime
}
c.keys[i], c.keys[j-1] = c.keys[j-1], c.keys[i]
c.keys[j-1] = nil
j--
}
i++
}
c.keys = c.keys[:j]
// Find the new key and add it to the key ring
for _, key := range keys {
same := false
for _, cKey := range c.keys {
if same = cKey.LamportTime == key.LamportTime; same {
break
}
}
if !same {
c.keys = append(c.keys, key)
if key.Subsystem == subsysGossip {
added = key.Key
}
if key.Subsystem == subsysIPSec {
drvEnc.Key = key.Key
drvEnc.Tag = key.LamportTime
}
}
}
c.Unlock()
if len(added) > 0 {
a.networkDB.SetKey(added)
}
key, _, err := c.getPrimaryKeyTag(subsysGossip)
if err != nil {
return err
}
a.networkDB.SetPrimaryKey(key)
key, tag, err := c.getPrimaryKeyTag(subsysIPSec)
if err != nil {
return err
}
drvEnc.Primary = key
drvEnc.PrimaryTag = tag
if len(deleted) > 0 {
a.networkDB.RemoveKey(deleted)
}
c.drvRegistry.WalkDrivers(func(name string, driver driverapi.Driver, capability driverapi.Capability) bool {
err := driver.DiscoverNew(discoverapi.EncryptionKeysUpdate, drvEnc)
if err != nil {
logrus.Warnf("Failed to update datapath keys in driver %s: %v", name, err)
// Attempt to reconfigure keys in case of a update failure
// which can arise due to a mismatch of keys
// if worker nodes get temporarily disconnected
logrus.Warnf("Reconfiguring datapath keys for %s", name)
drvCfgEnc := discoverapi.DriverEncryptionConfig{}
drvCfgEnc.Keys, drvCfgEnc.Tags = c.getKeys(subsysIPSec)
err = driver.DiscoverNew(discoverapi.EncryptionKeysConfig, drvCfgEnc)
if err != nil {
logrus.Warnf("Failed to reset datapath keys in driver %s: %v", name, err)
}
}
return false
})
return nil
}
func (c *controller) agentSetup(clusterProvider cluster.Provider) error {
agent := c.getAgent()
// If the agent is already present there is no need to try to initialize it again
if agent != nil {
return nil
}
bindAddr := clusterProvider.GetLocalAddress()
advAddr := clusterProvider.GetAdvertiseAddress()
dataAddr := clusterProvider.GetDataPathAddress()
remoteList := clusterProvider.GetRemoteAddressList()
remoteAddrList := make([]string, 0, len(remoteList))
for _, remote := range remoteList {
addr, _, _ := net.SplitHostPort(remote)
remoteAddrList = append(remoteAddrList, addr)
}
listen := clusterProvider.GetListenAddress()
listenAddr, _, _ := net.SplitHostPort(listen)
logrus.Infof("Initializing Libnetwork Agent Listen-Addr=%s Local-addr=%s Adv-addr=%s Data-addr=%s Remote-addr-list=%v MTU=%d",
listenAddr, bindAddr, advAddr, dataAddr, remoteAddrList, c.Config().Daemon.NetworkControlPlaneMTU)
if advAddr != "" && agent == nil {
if err := c.agentInit(listenAddr, bindAddr, advAddr, dataAddr); err != nil {
logrus.Errorf("error in agentInit: %v", err)
return err
}
c.drvRegistry.WalkDrivers(func(name string, driver driverapi.Driver, capability driverapi.Capability) bool {
if capability.ConnectivityScope == datastore.GlobalScope {
c.agentDriverNotify(driver)
}
return false
})
}
if len(remoteAddrList) > 0 {
if err := c.agentJoin(remoteAddrList); err != nil {
logrus.Errorf("Error in joining gossip cluster : %v(join will be retried in background)", err)
}
}
return nil
}
// For a given subsystem getKeys sorts the keys by lamport time and returns
// slice of keys and lamport time which can used as a unique tag for the keys
func (c *controller) getKeys(subsys string) ([][]byte, []uint64) {
c.Lock()
defer c.Unlock()
sort.Sort(ByTime(c.keys))
keys := [][]byte{}
tags := []uint64{}
for _, key := range c.keys {
if key.Subsystem == subsys {
keys = append(keys, key.Key)
tags = append(tags, key.LamportTime)
}
}
keys[0], keys[1] = keys[1], keys[0]
tags[0], tags[1] = tags[1], tags[0]
return keys, tags
}
// getPrimaryKeyTag returns the primary key for a given subsystem from the
// list of sorted key and the associated tag
func (c *controller) getPrimaryKeyTag(subsys string) ([]byte, uint64, error) {
c.Lock()
defer c.Unlock()
sort.Sort(ByTime(c.keys))
keys := []*types.EncryptionKey{}
for _, key := range c.keys {
if key.Subsystem == subsys {
keys = append(keys, key)
}
}
return keys[1].Key, keys[1].LamportTime, nil
}
func (c *controller) agentInit(listenAddr, bindAddrOrInterface, advertiseAddr, dataPathAddr string) error {
bindAddr, err := resolveAddr(bindAddrOrInterface)
if err != nil {
return err
}
keys, _ := c.getKeys(subsysGossip)
netDBConf := networkdb.DefaultConfig()
netDBConf.BindAddr = listenAddr
netDBConf.AdvertiseAddr = advertiseAddr
netDBConf.Keys = keys
if c.Config().Daemon.NetworkControlPlaneMTU != 0 {
// Consider the MTU remove the IP hdr (IPv4 or IPv6) and the TCP/UDP hdr.
// To be on the safe side let's cut 100 bytes
netDBConf.PacketBufferSize = (c.Config().Daemon.NetworkControlPlaneMTU - 100)
logrus.Debugf("Control plane MTU: %d will initialize NetworkDB with: %d",
c.Config().Daemon.NetworkControlPlaneMTU, netDBConf.PacketBufferSize)
}
nDB, err := networkdb.New(netDBConf)
if err != nil {
return err
}
// Register the diagnostic handlers
c.DiagnosticServer.RegisterHandler(nDB, networkdb.NetDbPaths2Func)
var cancelList []func()
ch, cancel := nDB.Watch(libnetworkEPTable, "", "")
cancelList = append(cancelList, cancel)
nodeCh, cancel := nDB.Watch(networkdb.NodeTable, "", "")
cancelList = append(cancelList, cancel)
c.Lock()
c.agent = &agent{
networkDB: nDB,
bindAddr: bindAddr,
advertiseAddr: advertiseAddr,
dataPathAddr: dataPathAddr,
coreCancelFuncs: cancelList,
driverCancelFuncs: make(map[string][]func()),
}
c.Unlock()
go c.handleTableEvents(ch, c.handleEpTableEvent)
go c.handleTableEvents(nodeCh, c.handleNodeTableEvent)
drvEnc := discoverapi.DriverEncryptionConfig{}
keys, tags := c.getKeys(subsysIPSec)
drvEnc.Keys = keys
drvEnc.Tags = tags
c.drvRegistry.WalkDrivers(func(name string, driver driverapi.Driver, capability driverapi.Capability) bool {
err := driver.DiscoverNew(discoverapi.EncryptionKeysConfig, drvEnc)
if err != nil {
logrus.Warnf("Failed to set datapath keys in driver %s: %v", name, err)
}
return false
})
c.WalkNetworks(joinCluster)
return nil
}
func (c *controller) agentJoin(remoteAddrList []string) error {
agent := c.getAgent()
if agent == nil {
return nil
}
return agent.networkDB.Join(remoteAddrList)
}
func (c *controller) agentDriverNotify(d driverapi.Driver) {
agent := c.getAgent()
if agent == nil {
return
}
if err := d.DiscoverNew(discoverapi.NodeDiscovery, discoverapi.NodeDiscoveryData{
Address: agent.dataPathAddress(),
BindAddress: agent.bindAddr,
Self: true,
}); err != nil {
logrus.Warnf("Failed the node discovery in driver: %v", err)
}
drvEnc := discoverapi.DriverEncryptionConfig{}
keys, tags := c.getKeys(subsysIPSec)
drvEnc.Keys = keys
drvEnc.Tags = tags
if err := d.DiscoverNew(discoverapi.EncryptionKeysConfig, drvEnc); err != nil {
logrus.Warnf("Failed to set datapath keys in driver: %v", err)
}
}
func (c *controller) agentClose() {
// Acquire current agent instance and reset its pointer
// then run closing functions
c.Lock()
agent := c.agent
c.agent = nil
c.Unlock()
// when the agent is closed the cluster provider should be cleaned up
c.SetClusterProvider(nil)
if agent == nil {
return
}
var cancelList []func()
agent.Lock()
for _, cancelFuncs := range agent.driverCancelFuncs {
cancelList = append(cancelList, cancelFuncs...)
}
// Add also the cancel functions for the network db
cancelList = append(cancelList, agent.coreCancelFuncs...)
agent.Unlock()
for _, cancel := range cancelList {
cancel()
}
agent.networkDB.Close()
}
// Task has the backend container details
type Task struct {
Name string
EndpointID string
EndpointIP string
Info map[string]string
}
// ServiceInfo has service specific details along with the list of backend tasks
type ServiceInfo struct {
VIP string
LocalLBIndex int
Tasks []Task
Ports []string
}
type epRecord struct {
ep EndpointRecord
info map[string]string
lbIndex int
}
func (n *network) Services() map[string]ServiceInfo {
eps := make(map[string]epRecord)
if !n.isClusterEligible() {
return nil
}
agent := n.getController().getAgent()
if agent == nil {
return nil
}
// Walk through libnetworkEPTable and fetch the driver agnostic endpoint info
entries := agent.networkDB.GetTableByNetwork(libnetworkEPTable, n.id)
for eid, value := range entries {
var epRec EndpointRecord
nid := n.ID()
if err := proto.Unmarshal(value.Value, &epRec); err != nil {
logrus.Errorf("Unmarshal of libnetworkEPTable failed for endpoint %s in network %s, %v", eid, nid, err)
continue
}
i := n.getController().getLBIndex(epRec.ServiceID, nid, epRec.IngressPorts)
eps[eid] = epRecord{
ep: epRec,
lbIndex: i,
}
}
// Walk through the driver's tables, have the driver decode the entries
// and return the tuple {ep ID, value}. value is a string that coveys
// relevant info about the endpoint.
d, err := n.driver(true)
if err != nil {
logrus.Errorf("Could not resolve driver for network %s/%s while fetching services: %v", n.networkType, n.ID(), err)
return nil
}
for _, table := range n.driverTables {
if table.objType != driverapi.EndpointObject {
continue
}
entries := agent.networkDB.GetTableByNetwork(table.name, n.id)
for key, value := range entries {
epID, info := d.DecodeTableEntry(table.name, key, value.Value)
if ep, ok := eps[epID]; !ok {
logrus.Errorf("Inconsistent driver and libnetwork state for endpoint %s", epID)
} else {
ep.info = info
eps[epID] = ep
}
}
}
// group the endpoints into a map keyed by the service name
sinfo := make(map[string]ServiceInfo)
for ep, epr := range eps {
var (
s ServiceInfo
ok bool
)
if s, ok = sinfo[epr.ep.ServiceName]; !ok {
s = ServiceInfo{
VIP: epr.ep.VirtualIP,
LocalLBIndex: epr.lbIndex,
}
}
ports := []string{}
if s.Ports == nil {
for _, port := range epr.ep.IngressPorts {
p := fmt.Sprintf("Target: %d, Publish: %d", port.TargetPort, port.PublishedPort)
ports = append(ports, p)
}
s.Ports = ports
}
s.Tasks = append(s.Tasks, Task{
Name: epr.ep.Name,
EndpointID: ep,
EndpointIP: epr.ep.EndpointIP,
Info: epr.info,
})
sinfo[epr.ep.ServiceName] = s
}
return sinfo
}
func (n *network) isClusterEligible() bool {
if n.scope != datastore.SwarmScope || !n.driverIsMultihost() {
return false
}
return n.getController().getAgent() != nil
}
func (n *network) joinCluster() error {
if !n.isClusterEligible() {
return nil
}
agent := n.getController().getAgent()
if agent == nil {
return nil
}
return agent.networkDB.JoinNetwork(n.ID())
}
func (n *network) leaveCluster() error {
if !n.isClusterEligible() {
return nil
}
agent := n.getController().getAgent()
if agent == nil {
return nil
}
return agent.networkDB.LeaveNetwork(n.ID())
}
func (ep *endpoint) addDriverInfoToCluster() error {
n := ep.getNetwork()
if !n.isClusterEligible() {
return nil
}
if ep.joinInfo == nil {
return nil
}
agent := n.getController().getAgent()
if agent == nil {
return nil
}
for _, te := range ep.joinInfo.driverTableEntries {
if err := agent.networkDB.CreateEntry(te.tableName, n.ID(), te.key, te.value); err != nil {
return err
}
}
return nil
}
func (ep *endpoint) deleteDriverInfoFromCluster() error {
n := ep.getNetwork()
if !n.isClusterEligible() {
return nil
}
if ep.joinInfo == nil {
return nil
}
agent := n.getController().getAgent()
if agent == nil {
return nil
}
for _, te := range ep.joinInfo.driverTableEntries {
if err := agent.networkDB.DeleteEntry(te.tableName, n.ID(), te.key); err != nil {
return err
}
}
return nil
}
func (ep *endpoint) addServiceInfoToCluster(sb *sandbox) error {
if ep.isAnonymous() && len(ep.myAliases) == 0 || ep.Iface() == nil || ep.Iface().Address() == nil {
return nil
}
n := ep.getNetwork()
if !n.isClusterEligible() {
return nil
}
sb.Service.Lock()
defer sb.Service.Unlock()
logrus.Debugf("addServiceInfoToCluster START for %s %s", ep.svcName, ep.ID())
// Check that the endpoint is still present on the sandbox before adding it to the service discovery.
// This is to handle a race between the EnableService and the sbLeave
// It is possible that the EnableService starts, fetches the list of the endpoints and
// by the time the addServiceInfoToCluster is called the endpoint got removed from the sandbox
// The risk is that the deleteServiceInfoToCluster happens before the addServiceInfoToCluster.
// This check under the Service lock of the sandbox ensure the correct behavior.
// If the addServiceInfoToCluster arrives first may find or not the endpoint and will proceed or exit
// but in any case the deleteServiceInfoToCluster will follow doing the cleanup if needed.
// In case the deleteServiceInfoToCluster arrives first, this one is happening after the endpoint is
// removed from the list, in this situation the delete will bail out not finding any data to cleanup
// and the add will bail out not finding the endpoint on the sandbox.
if e := sb.getEndpoint(ep.ID()); e == nil {
logrus.Warnf("addServiceInfoToCluster suppressing service resolution ep is not anymore in the sandbox %s", ep.ID())
return nil
}
c := n.getController()
agent := c.getAgent()
name := ep.Name()
if ep.isAnonymous() {
name = ep.MyAliases()[0]
}
var ingressPorts []*PortConfig
if ep.svcID != "" {
// This is a task part of a service
// Gossip ingress ports only in ingress network.
if n.ingress {
ingressPorts = ep.ingressPorts
}
if err := c.addServiceBinding(ep.svcName, ep.svcID, n.ID(), ep.ID(), name, ep.virtualIP, ingressPorts, ep.svcAliases, ep.myAliases, ep.Iface().Address().IP, "addServiceInfoToCluster"); err != nil {
return err
}
} else {
// This is a container simply attached to an attachable network
if err := c.addContainerNameResolution(n.ID(), ep.ID(), name, ep.myAliases, ep.Iface().Address().IP, "addServiceInfoToCluster"); err != nil {
return err
}
}
buf, err := proto.Marshal(&EndpointRecord{
Name: name,
ServiceName: ep.svcName,
ServiceID: ep.svcID,
VirtualIP: ep.virtualIP.String(),
IngressPorts: ingressPorts,
Aliases: ep.svcAliases,
TaskAliases: ep.myAliases,
EndpointIP: ep.Iface().Address().IP.String(),
ServiceDisabled: false,
})
if err != nil {
return err
}
if agent != nil {
if err := agent.networkDB.CreateEntry(libnetworkEPTable, n.ID(), ep.ID(), buf); err != nil {
logrus.Warnf("addServiceInfoToCluster NetworkDB CreateEntry failed for %s %s err:%s", ep.id, n.id, err)
return err
}
}
logrus.Debugf("addServiceInfoToCluster END for %s %s", ep.svcName, ep.ID())
return nil
}
func (ep *endpoint) deleteServiceInfoFromCluster(sb *sandbox, fullRemove bool, method string) error {
if ep.isAnonymous() && len(ep.myAliases) == 0 {
return nil
}
n := ep.getNetwork()
if !n.isClusterEligible() {
return nil
}
sb.Service.Lock()
defer sb.Service.Unlock()
logrus.Debugf("deleteServiceInfoFromCluster from %s START for %s %s", method, ep.svcName, ep.ID())
// Avoid a race w/ with a container that aborts preemptively. This would
// get caught in disableServceInNetworkDB, but we check here to make the
// nature of the condition more clear.
// See comment in addServiceInfoToCluster()
if e := sb.getEndpoint(ep.ID()); e == nil {
logrus.Warnf("deleteServiceInfoFromCluster suppressing service resolution ep is not anymore in the sandbox %s", ep.ID())
return nil
}
c := n.getController()
agent := c.getAgent()
name := ep.Name()
if ep.isAnonymous() {
name = ep.MyAliases()[0]
}
if agent != nil {
// First update the networkDB then locally
if fullRemove {
if err := agent.networkDB.DeleteEntry(libnetworkEPTable, n.ID(), ep.ID()); err != nil {
logrus.Warnf("deleteServiceInfoFromCluster NetworkDB DeleteEntry failed for %s %s err:%s", ep.id, n.id, err)
}
} else {
disableServiceInNetworkDB(agent, n, ep)
}
}
if ep.Iface() != nil && ep.Iface().Address() != nil {
if ep.svcID != "" {
// This is a task part of a service
var ingressPorts []*PortConfig
if n.ingress {
ingressPorts = ep.ingressPorts
}
if err := c.rmServiceBinding(ep.svcName, ep.svcID, n.ID(), ep.ID(), name, ep.virtualIP, ingressPorts, ep.svcAliases, ep.myAliases, ep.Iface().Address().IP, "deleteServiceInfoFromCluster", true, fullRemove); err != nil {
return err
}
} else {
// This is a container simply attached to an attachable network
if err := c.delContainerNameResolution(n.ID(), ep.ID(), name, ep.myAliases, ep.Iface().Address().IP, "deleteServiceInfoFromCluster"); err != nil {
return err
}
}
}
logrus.Debugf("deleteServiceInfoFromCluster from %s END for %s %s", method, ep.svcName, ep.ID())
return nil
}
func disableServiceInNetworkDB(a *agent, n *network, ep *endpoint) {
var epRec EndpointRecord
logrus.Debugf("disableServiceInNetworkDB for %s %s", ep.svcName, ep.ID())
// Update existing record to indicate that the service is disabled
inBuf, err := a.networkDB.GetEntry(libnetworkEPTable, n.ID(), ep.ID())
if err != nil {
logrus.Warnf("disableServiceInNetworkDB GetEntry failed for %s %s err:%s", ep.id, n.id, err)
return
}
// Should never fail
if err := proto.Unmarshal(inBuf, &epRec); err != nil {
logrus.Errorf("disableServiceInNetworkDB unmarshal failed for %s %s err:%s", ep.id, n.id, err)
return
}
epRec.ServiceDisabled = true
// Should never fail
outBuf, err := proto.Marshal(&epRec)
if err != nil {
logrus.Errorf("disableServiceInNetworkDB marshalling failed for %s %s err:%s", ep.id, n.id, err)
return
}
// Send update to the whole cluster
if err := a.networkDB.UpdateEntry(libnetworkEPTable, n.ID(), ep.ID(), outBuf); err != nil {
logrus.Warnf("disableServiceInNetworkDB UpdateEntry failed for %s %s err:%s", ep.id, n.id, err)
}
}
func (n *network) addDriverWatches() {
if !n.isClusterEligible() {
return
}
c := n.getController()
agent := c.getAgent()
if agent == nil {
return
}
for _, table := range n.driverTables {
ch, cancel := agent.networkDB.Watch(table.name, n.ID(), "")
agent.Lock()
agent.driverCancelFuncs[n.ID()] = append(agent.driverCancelFuncs[n.ID()], cancel)
agent.Unlock()
go c.handleTableEvents(ch, n.handleDriverTableEvent)
d, err := n.driver(false)
if err != nil {
logrus.Errorf("Could not resolve driver %s while walking driver tabl: %v", n.networkType, err)
return
}
agent.networkDB.WalkTable(table.name, func(nid, key string, value []byte, deleted bool) bool {
// skip the entries that are mark for deletion, this is safe because this function is
// called at initialization time so there is no state to delete
if nid == n.ID() && !deleted {
d.EventNotify(driverapi.Create, nid, table.name, key, value)
}
return false
})
}
}
func (n *network) cancelDriverWatches() {
if !n.isClusterEligible() {
return
}
agent := n.getController().getAgent()
if agent == nil {
return
}
agent.Lock()
cancelFuncs := agent.driverCancelFuncs[n.ID()]
delete(agent.driverCancelFuncs, n.ID())
agent.Unlock()
for _, cancel := range cancelFuncs {
cancel()
}
}
func (c *controller) handleTableEvents(ch *events.Channel, fn func(events.Event)) {
for {
select {
case ev := <-ch.C:
fn(ev)
case <-ch.Done():
return
}
}
}
func (n *network) handleDriverTableEvent(ev events.Event) {
d, err := n.driver(false)
if err != nil {
logrus.Errorf("Could not resolve driver %s while handling driver table event: %v", n.networkType, err)
return
}
var (
etype driverapi.EventType
tname string
key string
value []byte
)
switch event := ev.(type) {
case networkdb.CreateEvent:
tname = event.Table
key = event.Key
value = event.Value
etype = driverapi.Create
case networkdb.DeleteEvent:
tname = event.Table
key = event.Key
value = event.Value
etype = driverapi.Delete
case networkdb.UpdateEvent:
tname = event.Table
key = event.Key
value = event.Value
etype = driverapi.Delete
}
d.EventNotify(etype, n.ID(), tname, key, value)
}
func (c *controller) handleNodeTableEvent(ev events.Event) {
var (
value []byte
isAdd bool
nodeAddr networkdb.NodeAddr
)
switch event := ev.(type) {
case networkdb.CreateEvent:
value = event.Value
isAdd = true
case networkdb.DeleteEvent:
value = event.Value
case networkdb.UpdateEvent:
logrus.Errorf("Unexpected update node table event = %#v", event)
}
err := json.Unmarshal(value, &nodeAddr)
if err != nil {
logrus.Errorf("Error unmarshalling node table event %v", err)
return
}
c.processNodeDiscovery([]net.IP{nodeAddr.Addr}, isAdd)
}
func (c *controller) handleEpTableEvent(ev events.Event) {
var (
nid string
eid string
value []byte
epRec EndpointRecord
)
switch event := ev.(type) {
case networkdb.CreateEvent:
nid = event.NetworkID
eid = event.Key
value = event.Value
case networkdb.DeleteEvent:
nid = event.NetworkID
eid = event.Key
value = event.Value
case networkdb.UpdateEvent:
nid = event.NetworkID
eid = event.Key
value = event.Value
default:
logrus.Errorf("Unexpected update service table event = %#v", event)
return
}
err := proto.Unmarshal(value, &epRec)
if err != nil {
logrus.Errorf("Failed to unmarshal service table value: %v", err)
return
}
containerName := epRec.Name
svcName := epRec.ServiceName
svcID := epRec.ServiceID
vip := net.ParseIP(epRec.VirtualIP)
ip := net.ParseIP(epRec.EndpointIP)
ingressPorts := epRec.IngressPorts
serviceAliases := epRec.Aliases
taskAliases := epRec.TaskAliases
if containerName == "" || ip == nil {
logrus.Errorf("Invalid endpoint name/ip received while handling service table event %s", value)
return
}
switch ev.(type) {
case networkdb.CreateEvent:
logrus.Debugf("handleEpTableEvent ADD %s R:%v", eid, epRec)
if svcID != "" {
// This is a remote task part of a service
if err := c.addServiceBinding(svcName, svcID, nid, eid, containerName, vip, ingressPorts, serviceAliases, taskAliases, ip, "handleEpTableEvent"); err != nil {
logrus.Errorf("failed adding service binding for %s epRec:%v err:%v", eid, epRec, err)
return
}
} else {
// This is a remote container simply attached to an attachable network
if err := c.addContainerNameResolution(nid, eid, containerName, taskAliases, ip, "handleEpTableEvent"); err != nil {
logrus.Errorf("failed adding container name resolution for %s epRec:%v err:%v", eid, epRec, err)
}
}
case networkdb.DeleteEvent:
logrus.Debugf("handleEpTableEvent DEL %s R:%v", eid, epRec)
if svcID != "" {
// This is a remote task part of a service
if err := c.rmServiceBinding(svcName, svcID, nid, eid, containerName, vip, ingressPorts, serviceAliases, taskAliases, ip, "handleEpTableEvent", true, true); err != nil {
logrus.Errorf("failed removing service binding for %s epRec:%v err:%v", eid, epRec, err)
return
}
} else {
// This is a remote container simply attached to an attachable network
if err := c.delContainerNameResolution(nid, eid, containerName, taskAliases, ip, "handleEpTableEvent"); err != nil {
logrus.Errorf("failed removing container name resolution for %s epRec:%v err:%v", eid, epRec, err)
}
}
case networkdb.UpdateEvent:
logrus.Debugf("handleEpTableEvent UPD %s R:%v", eid, epRec)
// We currently should only get these to inform us that an endpoint
// is disabled. Report if otherwise.
if svcID == "" || !epRec.ServiceDisabled {
logrus.Errorf("Unexpected update table event for %s epRec:%v", eid, epRec)
return
}
// This is a remote task that is part of a service that is now disabled
if err := c.rmServiceBinding(svcName, svcID, nid, eid, containerName, vip, ingressPorts, serviceAliases, taskAliases, ip, "handleEpTableEvent", true, false); err != nil {
logrus.Errorf("failed disabling service binding for %s epRec:%v err:%v", eid, epRec, err)
return
}
}
}

1095
libnetwork/agent.pb.go Normal file

File diff suppressed because it is too large Load diff

76
libnetwork/agent.proto Normal file
View file

@ -0,0 +1,76 @@
syntax = "proto3";
import "gogoproto/gogo.proto";
package libnetwork;
option (gogoproto.marshaler_all) = true;
option (gogoproto.unmarshaler_all) = true;
option (gogoproto.stringer_all) = true;
option (gogoproto.gostring_all) = true;
option (gogoproto.sizer_all) = true;
option (gogoproto.goproto_stringer_all) = false;
// EndpointRecord specifies all the endpoint specific information that
// needs to gossiped to nodes participating in the network.
message EndpointRecord {
// Name of the container
string name = 1;
// Service name of the service to which this endpoint belongs.
string service_name = 2;
// Service ID of the service to which this endpoint belongs.
string service_id = 3 [(gogoproto.customname) = "ServiceID"];
// Virtual IP of the service to which this endpoint belongs.
string virtual_ip = 4 [(gogoproto.customname) = "VirtualIP"];
// IP assigned to this endpoint.
string endpoint_ip = 5 [(gogoproto.customname) = "EndpointIP"];
// IngressPorts exposed by the service to which this endpoint belongs.
repeated PortConfig ingress_ports = 6;
// A list of aliases which are alternate names for the service
repeated string aliases = 7;
// List of aliases task specific aliases
repeated string task_aliases = 8;
// Whether this enpoint's service has been disabled
bool service_disabled = 9;
}
// PortConfig specifies an exposed port which can be
// addressed using the given name. This can be later queried
// using a service discovery api or a DNS SRV query. The node
// port specifies a port that can be used to address this
// service external to the cluster by sending a connection
// request to this port to any node on the cluster.
message PortConfig {
enum Protocol {
option (gogoproto.goproto_enum_prefix) = false;
TCP = 0 [(gogoproto.enumvalue_customname) = "ProtocolTCP"];
UDP = 1 [(gogoproto.enumvalue_customname) = "ProtocolUDP"];
SCTP = 2 [(gogoproto.enumvalue_customname) = "ProtocolSCTP"];
}
// Name for the port. If provided the port information can
// be queried using the name as in a DNS SRV query.
string name = 1;
// Protocol for the port which is exposed.
Protocol protocol = 2;
// The port which the application is exposing and is bound to.
uint32 target_port = 3;
// PublishedPort specifies the port on which the service is
// exposed on all nodes on the cluster. If not specified an
// arbitrary port in the node port range is allocated by the
// system. If specified it should be within the node port
// range and it should be available.
uint32 published_port = 4;
}

981
libnetwork/api/api.go Normal file
View file

@ -0,0 +1,981 @@
package api
import (
"encoding/json"
"fmt"
"io/ioutil"
"net/http"
"strconv"
"strings"
"github.com/docker/libnetwork"
"github.com/docker/libnetwork/netlabel"
"github.com/docker/libnetwork/netutils"
"github.com/docker/libnetwork/types"
"github.com/gorilla/mux"
)
var (
successResponse = responseStatus{Status: "Success", StatusCode: http.StatusOK}
createdResponse = responseStatus{Status: "Created", StatusCode: http.StatusCreated}
mismatchResponse = responseStatus{Status: "Body/URI parameter mismatch", StatusCode: http.StatusBadRequest}
badQueryResponse = responseStatus{Status: "Unsupported query", StatusCode: http.StatusBadRequest}
)
const (
// Resource name regex
// Gorilla mux encloses the passed pattern with '^' and '$'. So we need to do some tricks
// to have mux eventually build a query regex which matches empty or word string (`^$|[\w]+`)
regex = "[a-zA-Z_0-9-]+"
qregx = "$|" + regex
// Router URL variable definition
nwName = "{" + urlNwName + ":" + regex + "}"
nwNameQr = "{" + urlNwName + ":" + qregx + "}"
nwID = "{" + urlNwID + ":" + regex + "}"
nwPIDQr = "{" + urlNwPID + ":" + qregx + "}"
epName = "{" + urlEpName + ":" + regex + "}"
epNameQr = "{" + urlEpName + ":" + qregx + "}"
epID = "{" + urlEpID + ":" + regex + "}"
epPIDQr = "{" + urlEpPID + ":" + qregx + "}"
sbID = "{" + urlSbID + ":" + regex + "}"
sbPIDQr = "{" + urlSbPID + ":" + qregx + "}"
cnIDQr = "{" + urlCnID + ":" + qregx + "}"
cnPIDQr = "{" + urlCnPID + ":" + qregx + "}"
// Internal URL variable name.They can be anything as
// long as they do not collide with query fields.
urlNwName = "network-name"
urlNwID = "network-id"
urlNwPID = "network-partial-id"
urlEpName = "endpoint-name"
urlEpID = "endpoint-id"
urlEpPID = "endpoint-partial-id"
urlSbID = "sandbox-id"
urlSbPID = "sandbox-partial-id"
urlCnID = "container-id"
urlCnPID = "container-partial-id"
)
// NewHTTPHandler creates and initialize the HTTP handler to serve the requests for libnetwork
func NewHTTPHandler(c libnetwork.NetworkController) func(w http.ResponseWriter, req *http.Request) {
h := &httpHandler{c: c}
h.initRouter()
return h.handleRequest
}
type responseStatus struct {
Status string
StatusCode int
}
func (r *responseStatus) isOK() bool {
return r.StatusCode == http.StatusOK || r.StatusCode == http.StatusCreated
}
type processor func(c libnetwork.NetworkController, vars map[string]string, body []byte) (interface{}, *responseStatus)
type httpHandler struct {
c libnetwork.NetworkController
r *mux.Router
}
func (h *httpHandler) handleRequest(w http.ResponseWriter, req *http.Request) {
// Make sure the service is there
if h.c == nil {
http.Error(w, "NetworkController is not available", http.StatusServiceUnavailable)
return
}
// Get handler from router and execute it
h.r.ServeHTTP(w, req)
}
func (h *httpHandler) initRouter() {
m := map[string][]struct {
url string
qrs []string
fct processor
}{
"GET": {
// Order matters
{"/networks", []string{"name", nwNameQr}, procGetNetworks},
{"/networks", []string{"partial-id", nwPIDQr}, procGetNetworks},
{"/networks", nil, procGetNetworks},
{"/networks/" + nwID, nil, procGetNetwork},
{"/networks/" + nwID + "/endpoints", []string{"name", epNameQr}, procGetEndpoints},
{"/networks/" + nwID + "/endpoints", []string{"partial-id", epPIDQr}, procGetEndpoints},
{"/networks/" + nwID + "/endpoints", nil, procGetEndpoints},
{"/networks/" + nwID + "/endpoints/" + epID, nil, procGetEndpoint},
{"/services", []string{"network", nwNameQr}, procGetServices},
{"/services", []string{"name", epNameQr}, procGetServices},
{"/services", []string{"partial-id", epPIDQr}, procGetServices},
{"/services", nil, procGetServices},
{"/services/" + epID, nil, procGetService},
{"/services/" + epID + "/backend", nil, procGetSandbox},
{"/sandboxes", []string{"partial-container-id", cnPIDQr}, procGetSandboxes},
{"/sandboxes", []string{"container-id", cnIDQr}, procGetSandboxes},
{"/sandboxes", []string{"partial-id", sbPIDQr}, procGetSandboxes},
{"/sandboxes", nil, procGetSandboxes},
{"/sandboxes/" + sbID, nil, procGetSandbox},
},
"POST": {
{"/networks", nil, procCreateNetwork},
{"/networks/" + nwID + "/endpoints", nil, procCreateEndpoint},
{"/networks/" + nwID + "/endpoints/" + epID + "/sandboxes", nil, procJoinEndpoint},
{"/services", nil, procPublishService},
{"/services/" + epID + "/backend", nil, procAttachBackend},
{"/sandboxes", nil, procCreateSandbox},
},
"DELETE": {
{"/networks/" + nwID, nil, procDeleteNetwork},
{"/networks/" + nwID + "/endpoints/" + epID, nil, procDeleteEndpoint},
{"/networks/" + nwID + "/endpoints/" + epID + "/sandboxes/" + sbID, nil, procLeaveEndpoint},
{"/services/" + epID, nil, procUnpublishService},
{"/services/" + epID + "/backend/" + sbID, nil, procDetachBackend},
{"/sandboxes/" + sbID, nil, procDeleteSandbox},
},
}
h.r = mux.NewRouter()
for method, routes := range m {
for _, route := range routes {
r := h.r.Path("/{.*}" + route.url).Methods(method).HandlerFunc(makeHandler(h.c, route.fct))
if route.qrs != nil {
r.Queries(route.qrs...)
}
r = h.r.Path(route.url).Methods(method).HandlerFunc(makeHandler(h.c, route.fct))
if route.qrs != nil {
r.Queries(route.qrs...)
}
}
}
}
func makeHandler(ctrl libnetwork.NetworkController, fct processor) http.HandlerFunc {
return func(w http.ResponseWriter, req *http.Request) {
var (
body []byte
err error
)
if req.Body != nil {
body, err = ioutil.ReadAll(req.Body)
if err != nil {
http.Error(w, "Invalid body: "+err.Error(), http.StatusBadRequest)
return
}
}
res, rsp := fct(ctrl, mux.Vars(req), body)
if !rsp.isOK() {
http.Error(w, rsp.Status, rsp.StatusCode)
return
}
if res != nil {
writeJSON(w, rsp.StatusCode, res)
}
}
}
/*****************
Resource Builders
******************/
func buildNetworkResource(nw libnetwork.Network) *networkResource {
r := &networkResource{}
if nw != nil {
r.Name = nw.Name()
r.ID = nw.ID()
r.Type = nw.Type()
epl := nw.Endpoints()
r.Endpoints = make([]*endpointResource, 0, len(epl))
for _, e := range epl {
epr := buildEndpointResource(e)
r.Endpoints = append(r.Endpoints, epr)
}
}
return r
}
func buildEndpointResource(ep libnetwork.Endpoint) *endpointResource {
r := &endpointResource{}
if ep != nil {
r.Name = ep.Name()
r.ID = ep.ID()
r.Network = ep.Network()
}
return r
}
func buildSandboxResource(sb libnetwork.Sandbox) *sandboxResource {
r := &sandboxResource{}
if sb != nil {
r.ID = sb.ID()
r.Key = sb.Key()
r.ContainerID = sb.ContainerID()
}
return r
}
/****************
Options Parsers
*****************/
func (sc *sandboxCreate) parseOptions() []libnetwork.SandboxOption {
var setFctList []libnetwork.SandboxOption
if sc.HostName != "" {
setFctList = append(setFctList, libnetwork.OptionHostname(sc.HostName))
}
if sc.DomainName != "" {
setFctList = append(setFctList, libnetwork.OptionDomainname(sc.DomainName))
}
if sc.HostsPath != "" {
setFctList = append(setFctList, libnetwork.OptionHostsPath(sc.HostsPath))
}
if sc.ResolvConfPath != "" {
setFctList = append(setFctList, libnetwork.OptionResolvConfPath(sc.ResolvConfPath))
}
if sc.UseDefaultSandbox {
setFctList = append(setFctList, libnetwork.OptionUseDefaultSandbox())
}
if sc.UseExternalKey {
setFctList = append(setFctList, libnetwork.OptionUseExternalKey())
}
if sc.DNS != nil {
for _, d := range sc.DNS {
setFctList = append(setFctList, libnetwork.OptionDNS(d))
}
}
if sc.ExtraHosts != nil {
for _, e := range sc.ExtraHosts {
setFctList = append(setFctList, libnetwork.OptionExtraHost(e.Name, e.Address))
}
}
if sc.ExposedPorts != nil {
setFctList = append(setFctList, libnetwork.OptionExposedPorts(sc.ExposedPorts))
}
if sc.PortMapping != nil {
setFctList = append(setFctList, libnetwork.OptionPortMapping(sc.PortMapping))
}
return setFctList
}
func (ej *endpointJoin) parseOptions() []libnetwork.EndpointOption {
// priority will go here
return []libnetwork.EndpointOption{}
}
/******************
Process functions
*******************/
func processCreateDefaults(c libnetwork.NetworkController, nc *networkCreate) {
if nc.NetworkType == "" {
nc.NetworkType = c.Config().Daemon.DefaultDriver
}
}
/***************************
NetworkController interface
****************************/
func procCreateNetwork(c libnetwork.NetworkController, vars map[string]string, body []byte) (interface{}, *responseStatus) {
var create networkCreate
err := json.Unmarshal(body, &create)
if err != nil {
return nil, &responseStatus{Status: "Invalid body: " + err.Error(), StatusCode: http.StatusBadRequest}
}
processCreateDefaults(c, &create)
options := []libnetwork.NetworkOption{}
if val, ok := create.NetworkOpts[netlabel.Internal]; ok {
internal, err := strconv.ParseBool(val)
if err != nil {
return nil, &responseStatus{Status: err.Error(), StatusCode: http.StatusBadRequest}
}
if internal {
options = append(options, libnetwork.NetworkOptionInternalNetwork())
}
}
if val, ok := create.NetworkOpts[netlabel.EnableIPv6]; ok {
enableIPv6, err := strconv.ParseBool(val)
if err != nil {
return nil, &responseStatus{Status: err.Error(), StatusCode: http.StatusBadRequest}
}
options = append(options, libnetwork.NetworkOptionEnableIPv6(enableIPv6))
}
if len(create.DriverOpts) > 0 {
options = append(options, libnetwork.NetworkOptionDriverOpts(create.DriverOpts))
}
if len(create.IPv4Conf) > 0 {
ipamV4Conf := &libnetwork.IpamConf{
PreferredPool: create.IPv4Conf[0].PreferredPool,
SubPool: create.IPv4Conf[0].SubPool,
}
options = append(options, libnetwork.NetworkOptionIpam("default", "", []*libnetwork.IpamConf{ipamV4Conf}, nil, nil))
}
nw, err := c.NewNetwork(create.NetworkType, create.Name, create.ID, options...)
if err != nil {
return nil, convertNetworkError(err)
}
return nw.ID(), &createdResponse
}
func procGetNetwork(c libnetwork.NetworkController, vars map[string]string, body []byte) (interface{}, *responseStatus) {
t, by := detectNetworkTarget(vars)
nw, errRsp := findNetwork(c, t, by)
if !errRsp.isOK() {
return nil, errRsp
}
return buildNetworkResource(nw), &successResponse
}
func procGetNetworks(c libnetwork.NetworkController, vars map[string]string, body []byte) (interface{}, *responseStatus) {
var list []*networkResource
// Look for query filters and validate
name, queryByName := vars[urlNwName]
shortID, queryByPid := vars[urlNwPID]
if queryByName && queryByPid {
return nil, &badQueryResponse
}
if queryByName {
if nw, errRsp := findNetwork(c, name, byName); errRsp.isOK() {
list = append(list, buildNetworkResource(nw))
}
} else if queryByPid {
// Return all the prefix-matching networks
l := func(nw libnetwork.Network) bool {
if strings.HasPrefix(nw.ID(), shortID) {
list = append(list, buildNetworkResource(nw))
}
return false
}
c.WalkNetworks(l)
} else {
for _, nw := range c.Networks() {
list = append(list, buildNetworkResource(nw))
}
}
return list, &successResponse
}
func procCreateSandbox(c libnetwork.NetworkController, vars map[string]string, body []byte) (interface{}, *responseStatus) {
var create sandboxCreate
err := json.Unmarshal(body, &create)
if err != nil {
return "", &responseStatus{Status: "Invalid body: " + err.Error(), StatusCode: http.StatusBadRequest}
}
sb, err := c.NewSandbox(create.ContainerID, create.parseOptions()...)
if err != nil {
return "", convertNetworkError(err)
}
return sb.ID(), &createdResponse
}
/******************
Network interface
*******************/
func procCreateEndpoint(c libnetwork.NetworkController, vars map[string]string, body []byte) (interface{}, *responseStatus) {
var ec endpointCreate
err := json.Unmarshal(body, &ec)
if err != nil {
return "", &responseStatus{Status: "Invalid body: " + err.Error(), StatusCode: http.StatusBadRequest}
}
nwT, nwBy := detectNetworkTarget(vars)
n, errRsp := findNetwork(c, nwT, nwBy)
if !errRsp.isOK() {
return "", errRsp
}
var setFctList []libnetwork.EndpointOption
for _, str := range ec.MyAliases {
setFctList = append(setFctList, libnetwork.CreateOptionMyAlias(str))
}
ep, err := n.CreateEndpoint(ec.Name, setFctList...)
if err != nil {
return "", convertNetworkError(err)
}
return ep.ID(), &createdResponse
}
func procGetEndpoint(c libnetwork.NetworkController, vars map[string]string, body []byte) (interface{}, *responseStatus) {
nwT, nwBy := detectNetworkTarget(vars)
epT, epBy := detectEndpointTarget(vars)
ep, errRsp := findEndpoint(c, nwT, epT, nwBy, epBy)
if !errRsp.isOK() {
return nil, errRsp
}
return buildEndpointResource(ep), &successResponse
}
func procGetEndpoints(c libnetwork.NetworkController, vars map[string]string, body []byte) (interface{}, *responseStatus) {
// Look for query filters and validate
name, queryByName := vars[urlEpName]
shortID, queryByPid := vars[urlEpPID]
if queryByName && queryByPid {
return nil, &badQueryResponse
}
nwT, nwBy := detectNetworkTarget(vars)
nw, errRsp := findNetwork(c, nwT, nwBy)
if !errRsp.isOK() {
return nil, errRsp
}
var list []*endpointResource
// If query parameter is specified, return a filtered collection
if queryByName {
if ep, errRsp := findEndpoint(c, nwT, name, nwBy, byName); errRsp.isOK() {
list = append(list, buildEndpointResource(ep))
}
} else if queryByPid {
// Return all the prefix-matching endpoints
l := func(ep libnetwork.Endpoint) bool {
if strings.HasPrefix(ep.ID(), shortID) {
list = append(list, buildEndpointResource(ep))
}
return false
}
nw.WalkEndpoints(l)
} else {
for _, ep := range nw.Endpoints() {
epr := buildEndpointResource(ep)
list = append(list, epr)
}
}
return list, &successResponse
}
func procDeleteNetwork(c libnetwork.NetworkController, vars map[string]string, body []byte) (interface{}, *responseStatus) {
target, by := detectNetworkTarget(vars)
nw, errRsp := findNetwork(c, target, by)
if !errRsp.isOK() {
return nil, errRsp
}
err := nw.Delete()
if err != nil {
return nil, convertNetworkError(err)
}
return nil, &successResponse
}
/******************
Endpoint interface
*******************/
func procJoinEndpoint(c libnetwork.NetworkController, vars map[string]string, body []byte) (interface{}, *responseStatus) {
var ej endpointJoin
var setFctList []libnetwork.EndpointOption
err := json.Unmarshal(body, &ej)
if err != nil {
return nil, &responseStatus{Status: "Invalid body: " + err.Error(), StatusCode: http.StatusBadRequest}
}
nwT, nwBy := detectNetworkTarget(vars)
epT, epBy := detectEndpointTarget(vars)
ep, errRsp := findEndpoint(c, nwT, epT, nwBy, epBy)
if !errRsp.isOK() {
return nil, errRsp
}
sb, errRsp := findSandbox(c, ej.SandboxID, byID)
if !errRsp.isOK() {
return nil, errRsp
}
for _, str := range ej.Aliases {
name, alias, err := netutils.ParseAlias(str)
if err != nil {
return "", convertNetworkError(err)
}
setFctList = append(setFctList, libnetwork.CreateOptionAlias(name, alias))
}
err = ep.Join(sb, setFctList...)
if err != nil {
return nil, convertNetworkError(err)
}
return sb.Key(), &successResponse
}
func procLeaveEndpoint(c libnetwork.NetworkController, vars map[string]string, body []byte) (interface{}, *responseStatus) {
nwT, nwBy := detectNetworkTarget(vars)
epT, epBy := detectEndpointTarget(vars)
ep, errRsp := findEndpoint(c, nwT, epT, nwBy, epBy)
if !errRsp.isOK() {
return nil, errRsp
}
sb, errRsp := findSandbox(c, vars[urlSbID], byID)
if !errRsp.isOK() {
return nil, errRsp
}
err := ep.Leave(sb)
if err != nil {
return nil, convertNetworkError(err)
}
return nil, &successResponse
}
func procDeleteEndpoint(c libnetwork.NetworkController, vars map[string]string, body []byte) (interface{}, *responseStatus) {
nwT, nwBy := detectNetworkTarget(vars)
epT, epBy := detectEndpointTarget(vars)
ep, errRsp := findEndpoint(c, nwT, epT, nwBy, epBy)
if !errRsp.isOK() {
return nil, errRsp
}
err := ep.Delete(false)
if err != nil {
return nil, convertNetworkError(err)
}
return nil, &successResponse
}
/******************
Service interface
*******************/
func procGetServices(c libnetwork.NetworkController, vars map[string]string, body []byte) (interface{}, *responseStatus) {
// Look for query filters and validate
nwName, filterByNwName := vars[urlNwName]
svName, queryBySvName := vars[urlEpName]
shortID, queryBySvPID := vars[urlEpPID]
if filterByNwName && queryBySvName || filterByNwName && queryBySvPID || queryBySvName && queryBySvPID {
return nil, &badQueryResponse
}
var list []*endpointResource
switch {
case filterByNwName:
// return all service present on the specified network
nw, errRsp := findNetwork(c, nwName, byName)
if !errRsp.isOK() {
return list, &successResponse
}
for _, ep := range nw.Endpoints() {
epr := buildEndpointResource(ep)
list = append(list, epr)
}
case queryBySvName:
// Look in each network for the service with the specified name
l := func(ep libnetwork.Endpoint) bool {
if ep.Name() == svName {
list = append(list, buildEndpointResource(ep))
return true
}
return false
}
for _, nw := range c.Networks() {
nw.WalkEndpoints(l)
}
case queryBySvPID:
// Return all the prefix-matching services
l := func(ep libnetwork.Endpoint) bool {
if strings.HasPrefix(ep.ID(), shortID) {
list = append(list, buildEndpointResource(ep))
}
return false
}
for _, nw := range c.Networks() {
nw.WalkEndpoints(l)
}
default:
for _, nw := range c.Networks() {
for _, ep := range nw.Endpoints() {
epr := buildEndpointResource(ep)
list = append(list, epr)
}
}
}
return list, &successResponse
}
func procGetService(c libnetwork.NetworkController, vars map[string]string, body []byte) (interface{}, *responseStatus) {
epT, epBy := detectEndpointTarget(vars)
sv, errRsp := findService(c, epT, epBy)
if !errRsp.isOK() {
return nil, endpointToService(errRsp)
}
return buildEndpointResource(sv), &successResponse
}
func procPublishService(c libnetwork.NetworkController, vars map[string]string, body []byte) (interface{}, *responseStatus) {
var sp servicePublish
err := json.Unmarshal(body, &sp)
if err != nil {
return "", &responseStatus{Status: "Invalid body: " + err.Error(), StatusCode: http.StatusBadRequest}
}
n, errRsp := findNetwork(c, sp.Network, byName)
if !errRsp.isOK() {
return "", errRsp
}
var setFctList []libnetwork.EndpointOption
for _, str := range sp.MyAliases {
setFctList = append(setFctList, libnetwork.CreateOptionMyAlias(str))
}
ep, err := n.CreateEndpoint(sp.Name, setFctList...)
if err != nil {
return "", endpointToService(convertNetworkError(err))
}
return ep.ID(), &createdResponse
}
func procUnpublishService(c libnetwork.NetworkController, vars map[string]string, body []byte) (interface{}, *responseStatus) {
var sd serviceDelete
if body != nil {
err := json.Unmarshal(body, &sd)
if err != nil {
return "", &responseStatus{Status: "Invalid body: " + err.Error(), StatusCode: http.StatusBadRequest}
}
}
epT, epBy := detectEndpointTarget(vars)
sv, errRsp := findService(c, epT, epBy)
if !errRsp.isOK() {
return nil, errRsp
}
if err := sv.Delete(sd.Force); err != nil {
return nil, endpointToService(convertNetworkError(err))
}
return nil, &successResponse
}
func procAttachBackend(c libnetwork.NetworkController, vars map[string]string, body []byte) (interface{}, *responseStatus) {
var bk endpointJoin
var setFctList []libnetwork.EndpointOption
err := json.Unmarshal(body, &bk)
if err != nil {
return nil, &responseStatus{Status: "Invalid body: " + err.Error(), StatusCode: http.StatusBadRequest}
}
epT, epBy := detectEndpointTarget(vars)
sv, errRsp := findService(c, epT, epBy)
if !errRsp.isOK() {
return nil, errRsp
}
sb, errRsp := findSandbox(c, bk.SandboxID, byID)
if !errRsp.isOK() {
return nil, errRsp
}
for _, str := range bk.Aliases {
name, alias, err := netutils.ParseAlias(str)
if err != nil {
return "", convertNetworkError(err)
}
setFctList = append(setFctList, libnetwork.CreateOptionAlias(name, alias))
}
err = sv.Join(sb, setFctList...)
if err != nil {
return nil, convertNetworkError(err)
}
return sb.Key(), &successResponse
}
func procDetachBackend(c libnetwork.NetworkController, vars map[string]string, body []byte) (interface{}, *responseStatus) {
epT, epBy := detectEndpointTarget(vars)
sv, errRsp := findService(c, epT, epBy)
if !errRsp.isOK() {
return nil, errRsp
}
sb, errRsp := findSandbox(c, vars[urlSbID], byID)
if !errRsp.isOK() {
return nil, errRsp
}
err := sv.Leave(sb)
if err != nil {
return nil, convertNetworkError(err)
}
return nil, &successResponse
}
/******************
Sandbox interface
*******************/
func procGetSandbox(c libnetwork.NetworkController, vars map[string]string, body []byte) (interface{}, *responseStatus) {
if epT, ok := vars[urlEpID]; ok {
sv, errRsp := findService(c, epT, byID)
if !errRsp.isOK() {
return nil, endpointToService(errRsp)
}
return buildSandboxResource(sv.Info().Sandbox()), &successResponse
}
sbT, by := detectSandboxTarget(vars)
sb, errRsp := findSandbox(c, sbT, by)
if !errRsp.isOK() {
return nil, errRsp
}
return buildSandboxResource(sb), &successResponse
}
type cndFnMkr func(string) cndFn
type cndFn func(libnetwork.Sandbox) bool
// list of (query type, condition function makers) couples
var cndMkrList = []struct {
identifier string
maker cndFnMkr
}{
{urlSbPID, func(id string) cndFn {
return func(sb libnetwork.Sandbox) bool { return strings.HasPrefix(sb.ID(), id) }
}},
{urlCnID, func(id string) cndFn {
return func(sb libnetwork.Sandbox) bool { return sb.ContainerID() == id }
}},
{urlCnPID, func(id string) cndFn {
return func(sb libnetwork.Sandbox) bool { return strings.HasPrefix(sb.ContainerID(), id) }
}},
}
func getQueryCondition(vars map[string]string) func(libnetwork.Sandbox) bool {
for _, im := range cndMkrList {
if val, ok := vars[im.identifier]; ok {
return im.maker(val)
}
}
return func(sb libnetwork.Sandbox) bool { return true }
}
func sandboxWalker(condition cndFn, list *[]*sandboxResource) libnetwork.SandboxWalker {
return func(sb libnetwork.Sandbox) bool {
if condition(sb) {
*list = append(*list, buildSandboxResource(sb))
}
return false
}
}
func procGetSandboxes(c libnetwork.NetworkController, vars map[string]string, body []byte) (interface{}, *responseStatus) {
var list []*sandboxResource
cnd := getQueryCondition(vars)
c.WalkSandboxes(sandboxWalker(cnd, &list))
return list, &successResponse
}
func procDeleteSandbox(c libnetwork.NetworkController, vars map[string]string, body []byte) (interface{}, *responseStatus) {
sbT, by := detectSandboxTarget(vars)
sb, errRsp := findSandbox(c, sbT, by)
if !errRsp.isOK() {
return nil, errRsp
}
err := sb.Delete()
if err != nil {
return nil, convertNetworkError(err)
}
return nil, &successResponse
}
/***********
Utilities
************/
const (
byID = iota
byName
)
func detectNetworkTarget(vars map[string]string) (string, int) {
if target, ok := vars[urlNwName]; ok {
return target, byName
}
if target, ok := vars[urlNwID]; ok {
return target, byID
}
// vars are populated from the URL, following cannot happen
panic("Missing URL variable parameter for network")
}
func detectSandboxTarget(vars map[string]string) (string, int) {
if target, ok := vars[urlSbID]; ok {
return target, byID
}
// vars are populated from the URL, following cannot happen
panic("Missing URL variable parameter for sandbox")
}
func detectEndpointTarget(vars map[string]string) (string, int) {
if target, ok := vars[urlEpName]; ok {
return target, byName
}
if target, ok := vars[urlEpID]; ok {
return target, byID
}
// vars are populated from the URL, following cannot happen
panic("Missing URL variable parameter for endpoint")
}
func findNetwork(c libnetwork.NetworkController, s string, by int) (libnetwork.Network, *responseStatus) {
var (
nw libnetwork.Network
err error
)
switch by {
case byID:
nw, err = c.NetworkByID(s)
case byName:
if s == "" {
s = c.Config().Daemon.DefaultNetwork
}
nw, err = c.NetworkByName(s)
default:
panic(fmt.Sprintf("unexpected selector for network search: %d", by))
}
if err != nil {
if _, ok := err.(types.NotFoundError); ok {
return nil, &responseStatus{Status: "Resource not found: Network", StatusCode: http.StatusNotFound}
}
return nil, &responseStatus{Status: err.Error(), StatusCode: http.StatusBadRequest}
}
return nw, &successResponse
}
func findSandbox(c libnetwork.NetworkController, s string, by int) (libnetwork.Sandbox, *responseStatus) {
var (
sb libnetwork.Sandbox
err error
)
switch by {
case byID:
sb, err = c.SandboxByID(s)
default:
panic(fmt.Sprintf("unexpected selector for sandbox search: %d", by))
}
if err != nil {
if _, ok := err.(types.NotFoundError); ok {
return nil, &responseStatus{Status: "Resource not found: Sandbox", StatusCode: http.StatusNotFound}
}
return nil, &responseStatus{Status: err.Error(), StatusCode: http.StatusBadRequest}
}
return sb, &successResponse
}
func findEndpoint(c libnetwork.NetworkController, ns, es string, nwBy, epBy int) (libnetwork.Endpoint, *responseStatus) {
nw, errRsp := findNetwork(c, ns, nwBy)
if !errRsp.isOK() {
return nil, errRsp
}
var (
err error
ep libnetwork.Endpoint
)
switch epBy {
case byID:
ep, err = nw.EndpointByID(es)
case byName:
ep, err = nw.EndpointByName(es)
default:
panic(fmt.Sprintf("unexpected selector for endpoint search: %d", epBy))
}
if err != nil {
if _, ok := err.(types.NotFoundError); ok {
return nil, &responseStatus{Status: "Resource not found: Endpoint", StatusCode: http.StatusNotFound}
}
return nil, &responseStatus{Status: err.Error(), StatusCode: http.StatusBadRequest}
}
return ep, &successResponse
}
func findService(c libnetwork.NetworkController, svs string, svBy int) (libnetwork.Endpoint, *responseStatus) {
for _, nw := range c.Networks() {
var (
ep libnetwork.Endpoint
err error
)
switch svBy {
case byID:
ep, err = nw.EndpointByID(svs)
case byName:
ep, err = nw.EndpointByName(svs)
default:
panic(fmt.Sprintf("unexpected selector for service search: %d", svBy))
}
if err == nil {
return ep, &successResponse
} else if _, ok := err.(types.NotFoundError); !ok {
return nil, convertNetworkError(err)
}
}
return nil, &responseStatus{Status: "Service not found", StatusCode: http.StatusNotFound}
}
func endpointToService(rsp *responseStatus) *responseStatus {
rsp.Status = strings.Replace(rsp.Status, "endpoint", "service", -1)
return rsp
}
func convertNetworkError(err error) *responseStatus {
var code int
switch err.(type) {
case types.BadRequestError:
code = http.StatusBadRequest
case types.ForbiddenError:
code = http.StatusForbidden
case types.NotFoundError:
code = http.StatusNotFound
case types.TimeoutError:
code = http.StatusRequestTimeout
case types.NotImplementedError:
code = http.StatusNotImplemented
case types.NoServiceError:
code = http.StatusServiceUnavailable
case types.InternalError:
code = http.StatusInternalServerError
default:
code = http.StatusInternalServerError
}
return &responseStatus{Status: err.Error(), StatusCode: code}
}
func writeJSON(w http.ResponseWriter, code int, v interface{}) error {
w.Header().Set("Content-Type", "application/json")
w.WriteHeader(code)
return json.NewEncoder(w).Encode(v)
}

View file

@ -0,0 +1,18 @@
package api
import (
"github.com/docker/libnetwork/drivers/bridge"
"github.com/docker/libnetwork/netlabel"
)
func GetOpsMap(bridgeName, defaultMTU string) map[string]string {
if defaultMTU == "" {
return map[string]string{
bridge.BridgeName: bridgeName,
}
}
return map[string]string{
bridge.BridgeName: bridgeName,
netlabel.DriverMTU: defaultMTU,
}
}

2389
libnetwork/api/api_test.go Normal file

File diff suppressed because it is too large Load diff

96
libnetwork/api/types.go Normal file
View file

@ -0,0 +1,96 @@
package api
import "github.com/docker/libnetwork/types"
/***********
Resources
************/
// networkResource is the body of the "get network" http response message
type networkResource struct {
Name string `json:"name"`
ID string `json:"id"`
Type string `json:"type"`
Endpoints []*endpointResource `json:"endpoints"`
}
// endpointResource is the body of the "get endpoint" http response message
type endpointResource struct {
Name string `json:"name"`
ID string `json:"id"`
Network string `json:"network"`
}
// sandboxResource is the body of "get service backend" response message
type sandboxResource struct {
ID string `json:"id"`
Key string `json:"key"`
ContainerID string `json:"container_id"`
}
/***********
Body types
************/
type ipamConf struct {
PreferredPool string
SubPool string
Gateway string
AuxAddresses map[string]string
}
// networkCreate is the expected body of the "create network" http request message
type networkCreate struct {
Name string `json:"name"`
ID string `json:"id"`
NetworkType string `json:"network_type"`
IPv4Conf []ipamConf `json:"ipv4_configuration"`
DriverOpts map[string]string `json:"driver_opts"`
NetworkOpts map[string]string `json:"network_opts"`
}
// endpointCreate represents the body of the "create endpoint" http request message
type endpointCreate struct {
Name string `json:"name"`
MyAliases []string `json:"my_aliases"`
}
// sandboxCreate is the expected body of the "create sandbox" http request message
type sandboxCreate struct {
ContainerID string `json:"container_id"`
HostName string `json:"host_name"`
DomainName string `json:"domain_name"`
HostsPath string `json:"hosts_path"`
ResolvConfPath string `json:"resolv_conf_path"`
DNS []string `json:"dns"`
ExtraHosts []extraHost `json:"extra_hosts"`
UseDefaultSandbox bool `json:"use_default_sandbox"`
UseExternalKey bool `json:"use_external_key"`
ExposedPorts []types.TransportPort `json:"exposed_ports"`
PortMapping []types.PortBinding `json:"port_mapping"`
}
// endpointJoin represents the expected body of the "join endpoint" or "leave endpoint" http request messages
type endpointJoin struct {
SandboxID string `json:"sandbox_id"`
Aliases []string `json:"aliases"`
}
// servicePublish represents the body of the "publish service" http request message
type servicePublish struct {
Name string `json:"name"`
MyAliases []string `json:"my_aliases"`
Network string `json:"network_name"`
}
// serviceDelete represents the body of the "unpublish service" http request message
type serviceDelete struct {
Name string `json:"name"`
Force bool `json:"force"`
}
// extraHost represents the extra host object
type extraHost struct {
Name string `json:"name"`
Address string `json:"address"`
}

View file

@ -0,0 +1,736 @@
// Package bitseq provides a structure and utilities for representing long bitmask
// as sequence of run-length encoded blocks. It operates directly on the encoded
// representation, it does not decode/encode.
package bitseq
import (
"encoding/binary"
"encoding/json"
"errors"
"fmt"
"sync"
"github.com/docker/libnetwork/datastore"
"github.com/docker/libnetwork/types"
"github.com/sirupsen/logrus"
)
// block sequence constants
// If needed we can think of making these configurable
const (
blockLen = uint32(32)
blockBytes = uint64(blockLen / 8)
blockMAX = uint32(1<<blockLen - 1)
blockFirstBit = uint32(1) << (blockLen - 1)
invalidPos = uint64(0xFFFFFFFFFFFFFFFF)
)
var (
// ErrNoBitAvailable is returned when no more bits are available to set
ErrNoBitAvailable = errors.New("no bit available")
// ErrBitAllocated is returned when the specific bit requested is already set
ErrBitAllocated = errors.New("requested bit is already allocated")
)
// Handle contains the sequence representing the bitmask and its identifier
type Handle struct {
bits uint64
unselected uint64
head *sequence
app string
id string
dbIndex uint64
dbExists bool
curr uint64
store datastore.DataStore
sync.Mutex
}
// NewHandle returns a thread-safe instance of the bitmask handler
func NewHandle(app string, ds datastore.DataStore, id string, numElements uint64) (*Handle, error) {
h := &Handle{
app: app,
id: id,
store: ds,
bits: numElements,
unselected: numElements,
head: &sequence{
block: 0x0,
count: getNumBlocks(numElements),
},
}
if h.store == nil {
return h, nil
}
// Get the initial status from the ds if present.
if err := h.store.GetObject(datastore.Key(h.Key()...), h); err != nil && err != datastore.ErrKeyNotFound {
return nil, err
}
// If the handle is not in store, write it.
if !h.Exists() {
if err := h.writeToStore(); err != nil {
return nil, fmt.Errorf("failed to write bitsequence to store: %v", err)
}
}
return h, nil
}
// sequence represents a recurring sequence of 32 bits long bitmasks
type sequence struct {
block uint32 // block is a symbol representing 4 byte long allocation bitmask
count uint64 // number of consecutive blocks (symbols)
next *sequence // next sequence
}
// String returns a string representation of the block sequence starting from this block
func (s *sequence) toString() string {
var nextBlock string
if s.next == nil {
nextBlock = "end"
} else {
nextBlock = s.next.toString()
}
return fmt.Sprintf("(0x%x, %d)->%s", s.block, s.count, nextBlock)
}
// GetAvailableBit returns the position of the first unset bit in the bitmask represented by this sequence
func (s *sequence) getAvailableBit(from uint64) (uint64, uint64, error) {
if s.block == blockMAX || s.count == 0 {
return invalidPos, invalidPos, ErrNoBitAvailable
}
bits := from
bitSel := blockFirstBit >> from
for bitSel > 0 && s.block&bitSel != 0 {
bitSel >>= 1
bits++
}
// Check if the loop exited because it could not
// find any available bit int block starting from
// "from". Return invalid pos in that case.
if bitSel == 0 {
return invalidPos, invalidPos, ErrNoBitAvailable
}
return bits / 8, bits % 8, nil
}
// GetCopy returns a copy of the linked list rooted at this node
func (s *sequence) getCopy() *sequence {
n := &sequence{block: s.block, count: s.count}
pn := n
ps := s.next
for ps != nil {
pn.next = &sequence{block: ps.block, count: ps.count}
pn = pn.next
ps = ps.next
}
return n
}
// Equal checks if this sequence is equal to the passed one
func (s *sequence) equal(o *sequence) bool {
this := s
other := o
for this != nil {
if other == nil {
return false
}
if this.block != other.block || this.count != other.count {
return false
}
this = this.next
other = other.next
}
// Check if other is longer than this
if other != nil {
return false
}
return true
}
// ToByteArray converts the sequence into a byte array
func (s *sequence) toByteArray() ([]byte, error) {
var bb []byte
p := s
for p != nil {
b := make([]byte, 12)
binary.BigEndian.PutUint32(b[0:], p.block)
binary.BigEndian.PutUint64(b[4:], p.count)
bb = append(bb, b...)
p = p.next
}
return bb, nil
}
// fromByteArray construct the sequence from the byte array
func (s *sequence) fromByteArray(data []byte) error {
l := len(data)
if l%12 != 0 {
return fmt.Errorf("cannot deserialize byte sequence of length %d (%v)", l, data)
}
p := s
i := 0
for {
p.block = binary.BigEndian.Uint32(data[i : i+4])
p.count = binary.BigEndian.Uint64(data[i+4 : i+12])
i += 12
if i == l {
break
}
p.next = &sequence{}
p = p.next
}
return nil
}
func (h *Handle) getCopy() *Handle {
return &Handle{
bits: h.bits,
unselected: h.unselected,
head: h.head.getCopy(),
app: h.app,
id: h.id,
dbIndex: h.dbIndex,
dbExists: h.dbExists,
store: h.store,
curr: h.curr,
}
}
// SetAnyInRange atomically sets the first unset bit in the specified range in the sequence and returns the corresponding ordinal
func (h *Handle) SetAnyInRange(start, end uint64, serial bool) (uint64, error) {
if end < start || end >= h.bits {
return invalidPos, fmt.Errorf("invalid bit range [%d, %d]", start, end)
}
if h.Unselected() == 0 {
return invalidPos, ErrNoBitAvailable
}
return h.set(0, start, end, true, false, serial)
}
// SetAny atomically sets the first unset bit in the sequence and returns the corresponding ordinal
func (h *Handle) SetAny(serial bool) (uint64, error) {
if h.Unselected() == 0 {
return invalidPos, ErrNoBitAvailable
}
return h.set(0, 0, h.bits-1, true, false, serial)
}
// Set atomically sets the corresponding bit in the sequence
func (h *Handle) Set(ordinal uint64) error {
if err := h.validateOrdinal(ordinal); err != nil {
return err
}
_, err := h.set(ordinal, 0, 0, false, false, false)
return err
}
// Unset atomically unsets the corresponding bit in the sequence
func (h *Handle) Unset(ordinal uint64) error {
if err := h.validateOrdinal(ordinal); err != nil {
return err
}
_, err := h.set(ordinal, 0, 0, false, true, false)
return err
}
// IsSet atomically checks if the ordinal bit is set. In case ordinal
// is outside of the bit sequence limits, false is returned.
func (h *Handle) IsSet(ordinal uint64) bool {
if err := h.validateOrdinal(ordinal); err != nil {
return false
}
h.Lock()
_, _, err := checkIfAvailable(h.head, ordinal)
h.Unlock()
return err != nil
}
func (h *Handle) runConsistencyCheck() bool {
corrupted := false
for p, c := h.head, h.head.next; c != nil; c = c.next {
if c.count == 0 {
corrupted = true
p.next = c.next
continue // keep same p
}
p = c
}
return corrupted
}
// CheckConsistency checks if the bit sequence is in an inconsistent state and attempts to fix it.
// It looks for a corruption signature that may happen in docker 1.9.0 and 1.9.1.
func (h *Handle) CheckConsistency() error {
for {
h.Lock()
store := h.store
h.Unlock()
if store != nil {
if err := store.GetObject(datastore.Key(h.Key()...), h); err != nil && err != datastore.ErrKeyNotFound {
return err
}
}
h.Lock()
nh := h.getCopy()
h.Unlock()
if !nh.runConsistencyCheck() {
return nil
}
if err := nh.writeToStore(); err != nil {
if _, ok := err.(types.RetryError); !ok {
return fmt.Errorf("internal failure while fixing inconsistent bitsequence: %v", err)
}
continue
}
logrus.Infof("Fixed inconsistent bit sequence in datastore:\n%s\n%s", h, nh)
h.Lock()
h.head = nh.head
h.Unlock()
return nil
}
}
// set/reset the bit
func (h *Handle) set(ordinal, start, end uint64, any bool, release bool, serial bool) (uint64, error) {
var (
bitPos uint64
bytePos uint64
ret uint64
err error
)
for {
var store datastore.DataStore
curr := uint64(0)
h.Lock()
store = h.store
if store != nil {
h.Unlock() // The lock is acquired in the GetObject
if err := store.GetObject(datastore.Key(h.Key()...), h); err != nil && err != datastore.ErrKeyNotFound {
return ret, err
}
h.Lock() // Acquire the lock back
}
if serial {
curr = h.curr
}
// Get position if available
if release {
bytePos, bitPos = ordinalToPos(ordinal)
} else {
if any {
bytePos, bitPos, err = getAvailableFromCurrent(h.head, start, curr, end)
ret = posToOrdinal(bytePos, bitPos)
if err == nil {
h.curr = ret + 1
}
} else {
bytePos, bitPos, err = checkIfAvailable(h.head, ordinal)
ret = ordinal
}
}
if err != nil {
h.Unlock()
return ret, err
}
// Create a private copy of h and work on it
nh := h.getCopy()
nh.head = pushReservation(bytePos, bitPos, nh.head, release)
if release {
nh.unselected++
} else {
nh.unselected--
}
if h.store != nil {
h.Unlock()
// Attempt to write private copy to store
if err := nh.writeToStore(); err != nil {
if _, ok := err.(types.RetryError); !ok {
return ret, fmt.Errorf("internal failure while setting the bit: %v", err)
}
// Retry
continue
}
h.Lock()
}
// Previous atomic push was successful. Save private copy to local copy
h.unselected = nh.unselected
h.head = nh.head
h.dbExists = nh.dbExists
h.dbIndex = nh.dbIndex
h.Unlock()
return ret, nil
}
}
// checks is needed because to cover the case where the number of bits is not a multiple of blockLen
func (h *Handle) validateOrdinal(ordinal uint64) error {
h.Lock()
defer h.Unlock()
if ordinal >= h.bits {
return errors.New("bit does not belong to the sequence")
}
return nil
}
// Destroy removes from the datastore the data belonging to this handle
func (h *Handle) Destroy() error {
for {
if err := h.deleteFromStore(); err != nil {
if _, ok := err.(types.RetryError); !ok {
return fmt.Errorf("internal failure while destroying the sequence: %v", err)
}
// Fetch latest
if err := h.store.GetObject(datastore.Key(h.Key()...), h); err != nil {
if err == datastore.ErrKeyNotFound { // already removed
return nil
}
return fmt.Errorf("failed to fetch from store when destroying the sequence: %v", err)
}
continue
}
return nil
}
}
// ToByteArray converts this handle's data into a byte array
func (h *Handle) ToByteArray() ([]byte, error) {
h.Lock()
defer h.Unlock()
ba := make([]byte, 16)
binary.BigEndian.PutUint64(ba[0:], h.bits)
binary.BigEndian.PutUint64(ba[8:], h.unselected)
bm, err := h.head.toByteArray()
if err != nil {
return nil, fmt.Errorf("failed to serialize head: %s", err.Error())
}
ba = append(ba, bm...)
return ba, nil
}
// FromByteArray reads his handle's data from a byte array
func (h *Handle) FromByteArray(ba []byte) error {
if ba == nil {
return errors.New("nil byte array")
}
nh := &sequence{}
err := nh.fromByteArray(ba[16:])
if err != nil {
return fmt.Errorf("failed to deserialize head: %s", err.Error())
}
h.Lock()
h.head = nh
h.bits = binary.BigEndian.Uint64(ba[0:8])
h.unselected = binary.BigEndian.Uint64(ba[8:16])
h.Unlock()
return nil
}
// Bits returns the length of the bit sequence
func (h *Handle) Bits() uint64 {
return h.bits
}
// Unselected returns the number of bits which are not selected
func (h *Handle) Unselected() uint64 {
h.Lock()
defer h.Unlock()
return h.unselected
}
func (h *Handle) String() string {
h.Lock()
defer h.Unlock()
return fmt.Sprintf("App: %s, ID: %s, DBIndex: 0x%x, Bits: %d, Unselected: %d, Sequence: %s Curr:%d",
h.app, h.id, h.dbIndex, h.bits, h.unselected, h.head.toString(), h.curr)
}
// MarshalJSON encodes Handle into json message
func (h *Handle) MarshalJSON() ([]byte, error) {
m := map[string]interface{}{
"id": h.id,
}
b, err := h.ToByteArray()
if err != nil {
return nil, err
}
m["sequence"] = b
return json.Marshal(m)
}
// UnmarshalJSON decodes json message into Handle
func (h *Handle) UnmarshalJSON(data []byte) error {
var (
m map[string]interface{}
b []byte
err error
)
if err = json.Unmarshal(data, &m); err != nil {
return err
}
h.id = m["id"].(string)
bi, _ := json.Marshal(m["sequence"])
if err := json.Unmarshal(bi, &b); err != nil {
return err
}
return h.FromByteArray(b)
}
// getFirstAvailable looks for the first unset bit in passed mask starting from start
func getFirstAvailable(head *sequence, start uint64) (uint64, uint64, error) {
// Find sequence which contains the start bit
byteStart, bitStart := ordinalToPos(start)
current, _, precBlocks, inBlockBytePos := findSequence(head, byteStart)
// Derive the this sequence offsets
byteOffset := byteStart - inBlockBytePos
bitOffset := inBlockBytePos*8 + bitStart
for current != nil {
if current.block != blockMAX {
// If the current block is not full, check if there is any bit
// from the current bit in the current block. If not, before proceeding to the
// next block node, make sure we check for available bit in the next
// instance of the same block. Due to RLE same block signature will be
// compressed.
retry:
bytePos, bitPos, err := current.getAvailableBit(bitOffset)
if err != nil && precBlocks == current.count-1 {
// This is the last instance in the same block node,
// so move to the next block.
goto next
}
if err != nil {
// There are some more instances of the same block, so add the offset
// and be optimistic that you will find the available bit in the next
// instance of the same block.
bitOffset = 0
byteOffset += blockBytes
precBlocks++
goto retry
}
return byteOffset + bytePos, bitPos, err
}
// Moving to next block: Reset bit offset.
next:
bitOffset = 0
byteOffset += (current.count * blockBytes) - (precBlocks * blockBytes)
precBlocks = 0
current = current.next
}
return invalidPos, invalidPos, ErrNoBitAvailable
}
// getAvailableFromCurrent will look for available ordinal from the current ordinal.
// If none found then it will loop back to the start to check of the available bit.
// This can be further optimized to check from start till curr in case of a rollover
func getAvailableFromCurrent(head *sequence, start, curr, end uint64) (uint64, uint64, error) {
var bytePos, bitPos uint64
var err error
if curr != 0 && curr > start {
bytePos, bitPos, err = getFirstAvailable(head, curr)
ret := posToOrdinal(bytePos, bitPos)
if end < ret || err != nil {
goto begin
}
return bytePos, bitPos, nil
}
begin:
bytePos, bitPos, err = getFirstAvailable(head, start)
ret := posToOrdinal(bytePos, bitPos)
if end < ret || err != nil {
return invalidPos, invalidPos, ErrNoBitAvailable
}
return bytePos, bitPos, nil
}
// checkIfAvailable checks if the bit correspondent to the specified ordinal is unset
// If the ordinal is beyond the sequence limits, a negative response is returned
func checkIfAvailable(head *sequence, ordinal uint64) (uint64, uint64, error) {
bytePos, bitPos := ordinalToPos(ordinal)
// Find the sequence containing this byte
current, _, _, inBlockBytePos := findSequence(head, bytePos)
if current != nil {
// Check whether the bit corresponding to the ordinal address is unset
bitSel := blockFirstBit >> (inBlockBytePos*8 + bitPos)
if current.block&bitSel == 0 {
return bytePos, bitPos, nil
}
}
return invalidPos, invalidPos, ErrBitAllocated
}
// Given the byte position and the sequences list head, return the pointer to the
// sequence containing the byte (current), the pointer to the previous sequence,
// the number of blocks preceding the block containing the byte inside the current sequence.
// If bytePos is outside of the list, function will return (nil, nil, 0, invalidPos)
func findSequence(head *sequence, bytePos uint64) (*sequence, *sequence, uint64, uint64) {
// Find the sequence containing this byte
previous := head
current := head
n := bytePos
for current.next != nil && n >= (current.count*blockBytes) { // Nil check for less than 32 addresses masks
n -= (current.count * blockBytes)
previous = current
current = current.next
}
// If byte is outside of the list, let caller know
if n >= (current.count * blockBytes) {
return nil, nil, 0, invalidPos
}
// Find the byte position inside the block and the number of blocks
// preceding the block containing the byte inside this sequence
precBlocks := n / blockBytes
inBlockBytePos := bytePos % blockBytes
return current, previous, precBlocks, inBlockBytePos
}
// PushReservation pushes the bit reservation inside the bitmask.
// Given byte and bit positions, identify the sequence (current) which holds the block containing the affected bit.
// Create a new block with the modified bit according to the operation (allocate/release).
// Create a new sequence containing the new block and insert it in the proper position.
// Remove current sequence if empty.
// Check if new sequence can be merged with neighbour (previous/next) sequences.
//
//
// Identify "current" sequence containing block:
// [prev seq] [current seq] [next seq]
//
// Based on block position, resulting list of sequences can be any of three forms:
//
// block position Resulting list of sequences
// A) block is first in current: [prev seq] [new] [modified current seq] [next seq]
// B) block is last in current: [prev seq] [modified current seq] [new] [next seq]
// C) block is in the middle of current: [prev seq] [curr pre] [new] [curr post] [next seq]
func pushReservation(bytePos, bitPos uint64, head *sequence, release bool) *sequence {
// Store list's head
newHead := head
// Find the sequence containing this byte
current, previous, precBlocks, inBlockBytePos := findSequence(head, bytePos)
if current == nil {
return newHead
}
// Construct updated block
bitSel := blockFirstBit >> (inBlockBytePos*8 + bitPos)
newBlock := current.block
if release {
newBlock &^= bitSel
} else {
newBlock |= bitSel
}
// Quit if it was a redundant request
if current.block == newBlock {
return newHead
}
// Current sequence inevitably looses one block, upadate count
current.count--
// Create new sequence
newSequence := &sequence{block: newBlock, count: 1}
// Insert the new sequence in the list based on block position
if precBlocks == 0 { // First in sequence (A)
newSequence.next = current
if current == head {
newHead = newSequence
previous = newHead
} else {
previous.next = newSequence
}
removeCurrentIfEmpty(&newHead, newSequence, current)
mergeSequences(previous)
} else if precBlocks == current.count { // Last in sequence (B)
newSequence.next = current.next
current.next = newSequence
mergeSequences(current)
} else { // In between the sequence (C)
currPre := &sequence{block: current.block, count: precBlocks, next: newSequence}
currPost := current
currPost.count -= precBlocks
newSequence.next = currPost
if currPost == head {
newHead = currPre
} else {
previous.next = currPre
}
// No merging or empty current possible here
}
return newHead
}
// Removes the current sequence from the list if empty, adjusting the head pointer if needed
func removeCurrentIfEmpty(head **sequence, previous, current *sequence) {
if current.count == 0 {
if current == *head {
*head = current.next
} else {
previous.next = current.next
current = current.next
}
}
}
// Given a pointer to a sequence, it checks if it can be merged with any following sequences
// It stops when no more merging is possible.
// TODO: Optimization: only attempt merge from start to end sequence, no need to scan till the end of the list
func mergeSequences(seq *sequence) {
if seq != nil {
// Merge all what possible from seq
for seq.next != nil && seq.block == seq.next.block {
seq.count += seq.next.count
seq.next = seq.next.next
}
// Move to next
mergeSequences(seq.next)
}
}
func getNumBlocks(numBits uint64) uint64 {
numBlocks := numBits / uint64(blockLen)
if numBits%uint64(blockLen) != 0 {
numBlocks++
}
return numBlocks
}
func ordinalToPos(ordinal uint64) (uint64, uint64) {
return ordinal / 8, ordinal % 8
}
func posToOrdinal(bytePos, bitPos uint64) uint64 {
return bytePos*8 + bitPos
}

File diff suppressed because it is too large Load diff

142
libnetwork/bitseq/store.go Normal file
View file

@ -0,0 +1,142 @@
package bitseq
import (
"encoding/json"
"fmt"
"github.com/docker/libnetwork/datastore"
"github.com/docker/libnetwork/types"
)
// Key provides the Key to be used in KV Store
func (h *Handle) Key() []string {
h.Lock()
defer h.Unlock()
return []string{h.app, h.id}
}
// KeyPrefix returns the immediate parent key that can be used for tree walk
func (h *Handle) KeyPrefix() []string {
h.Lock()
defer h.Unlock()
return []string{h.app}
}
// Value marshals the data to be stored in the KV store
func (h *Handle) Value() []byte {
b, err := json.Marshal(h)
if err != nil {
return nil
}
return b
}
// SetValue unmarshals the data from the KV store
func (h *Handle) SetValue(value []byte) error {
return json.Unmarshal(value, h)
}
// Index returns the latest DB Index as seen by this object
func (h *Handle) Index() uint64 {
h.Lock()
defer h.Unlock()
return h.dbIndex
}
// SetIndex method allows the datastore to store the latest DB Index into this object
func (h *Handle) SetIndex(index uint64) {
h.Lock()
h.dbIndex = index
h.dbExists = true
h.Unlock()
}
// Exists method is true if this object has been stored in the DB.
func (h *Handle) Exists() bool {
h.Lock()
defer h.Unlock()
return h.dbExists
}
// New method returns a handle based on the receiver handle
func (h *Handle) New() datastore.KVObject {
h.Lock()
defer h.Unlock()
return &Handle{
app: h.app,
store: h.store,
}
}
// CopyTo deep copies the handle into the passed destination object
func (h *Handle) CopyTo(o datastore.KVObject) error {
h.Lock()
defer h.Unlock()
dstH := o.(*Handle)
if h == dstH {
return nil
}
dstH.Lock()
dstH.bits = h.bits
dstH.unselected = h.unselected
dstH.head = h.head.getCopy()
dstH.app = h.app
dstH.id = h.id
dstH.dbIndex = h.dbIndex
dstH.dbExists = h.dbExists
dstH.store = h.store
dstH.curr = h.curr
dstH.Unlock()
return nil
}
// Skip provides a way for a KV Object to avoid persisting it in the KV Store
func (h *Handle) Skip() bool {
return false
}
// DataScope method returns the storage scope of the datastore
func (h *Handle) DataScope() string {
h.Lock()
defer h.Unlock()
return h.store.Scope()
}
func (h *Handle) fromDsValue(value []byte) error {
var ba []byte
if err := json.Unmarshal(value, &ba); err != nil {
return fmt.Errorf("failed to decode json: %s", err.Error())
}
if err := h.FromByteArray(ba); err != nil {
return fmt.Errorf("failed to decode handle: %s", err.Error())
}
return nil
}
func (h *Handle) writeToStore() error {
h.Lock()
store := h.store
h.Unlock()
if store == nil {
return nil
}
err := store.PutObjectAtomic(h)
if err == datastore.ErrKeyModified {
return types.RetryErrorf("failed to perform atomic write (%v). Retry might fix the error", err)
}
return err
}
func (h *Handle) deleteFromStore() error {
h.Lock()
store := h.store
h.Unlock()
if store == nil {
return nil
}
return store.DeleteObjectAtomic(h)
}

115
libnetwork/client/client.go Normal file
View file

@ -0,0 +1,115 @@
package client
import (
"fmt"
"io"
"io/ioutil"
"net/http"
"reflect"
"strings"
flag "github.com/docker/libnetwork/client/mflag"
)
// CallFunc provides environment specific call utility to invoke backend functions from UI
type CallFunc func(string, string, interface{}, map[string][]string) (io.ReadCloser, http.Header, int, error)
// NetworkCli is the UI object for network subcmds
type NetworkCli struct {
out io.Writer
err io.Writer
call CallFunc
}
// NewNetworkCli is a convenient function to create a NetworkCli object
func NewNetworkCli(out, err io.Writer, call CallFunc) *NetworkCli {
return &NetworkCli{
out: out,
err: err,
call: call,
}
}
// getMethod is Borrowed from Docker UI which uses reflection to identify the UI Handler
func (cli *NetworkCli) getMethod(args ...string) (func(string, ...string) error, bool) {
camelArgs := make([]string, len(args))
for i, s := range args {
if len(s) == 0 {
return nil, false
}
camelArgs[i] = strings.ToUpper(s[:1]) + strings.ToLower(s[1:])
}
methodName := "Cmd" + strings.Join(camelArgs, "")
method := reflect.ValueOf(cli).MethodByName(methodName)
if !method.IsValid() {
return nil, false
}
return method.Interface().(func(string, ...string) error), true
}
// Cmd is borrowed from Docker UI and acts as the entry point for network UI commands.
// network UI commands are designed to be invoked from multiple parent chains
func (cli *NetworkCli) Cmd(chain string, args ...string) error {
if len(args) > 2 {
method, exists := cli.getMethod(args[:3]...)
if exists {
return method(chain+" "+args[0]+" "+args[1], args[3:]...)
}
}
if len(args) > 1 {
method, exists := cli.getMethod(args[:2]...)
if exists {
return method(chain+" "+args[0], args[2:]...)
}
}
if len(args) > 0 {
method, exists := cli.getMethod(args[0])
if !exists {
return fmt.Errorf("%s: '%s' is not a %s command. See '%s --help'", chain, args[0], chain, chain)
}
return method(chain, args[1:]...)
}
flag.Usage()
return nil
}
// Subcmd is borrowed from Docker UI and performs the same function of configuring the subCmds
func (cli *NetworkCli) Subcmd(chain, name, signature, description string, exitOnError bool) *flag.FlagSet {
var errorHandling flag.ErrorHandling
if exitOnError {
errorHandling = flag.ExitOnError
} else {
errorHandling = flag.ContinueOnError
}
flags := flag.NewFlagSet(name, errorHandling)
flags.Usage = func() {
flags.ShortUsage()
flags.PrintDefaults()
}
flags.ShortUsage = func() {
options := ""
if signature != "" {
signature = " " + signature
}
if flags.FlagCountUndeprecated() > 0 {
options = " [OPTIONS]"
}
fmt.Fprintf(cli.out, "\nUsage: %s %s%s%s\n\n%s\n\n", chain, name, options, signature, description)
flags.SetOutput(cli.out)
}
return flags
}
func readBody(stream io.ReadCloser, hdr http.Header, statusCode int, err error) ([]byte, int, error) {
if stream != nil {
defer stream.Close()
}
if err != nil {
return nil, statusCode, err
}
body, err := ioutil.ReadAll(stream)
if err != nil {
return nil, -1, err
}
return body, statusCode, nil
}

View file

@ -0,0 +1,122 @@
package client
import (
"bytes"
"testing"
_ "github.com/docker/libnetwork/testutils"
)
func TestClientServiceInvalidCommand(t *testing.T) {
var out, errOut bytes.Buffer
cli := NewNetworkCli(&out, &errOut, callbackFunc)
err := cli.Cmd("docker", "service", "invalid")
if err == nil {
t.Fatal("Passing invalid commands must fail")
}
}
func TestClientServiceCreate(t *testing.T) {
var out, errOut bytes.Buffer
cli := NewNetworkCli(&out, &errOut, callbackFunc)
err := cli.Cmd("docker", "service", "publish", mockServiceName+"."+mockNwName)
if err != nil {
t.Fatal(err)
}
}
func TestClientServiceRm(t *testing.T) {
var out, errOut bytes.Buffer
cli := NewNetworkCli(&out, &errOut, callbackFunc)
err := cli.Cmd("docker", "service", "unpublish", mockServiceName+"."+mockNwName)
if err != nil {
t.Fatal(err)
}
}
func TestClientServiceLs(t *testing.T) {
var out, errOut bytes.Buffer
cli := NewNetworkCli(&out, &errOut, callbackFunc)
err := cli.Cmd("docker", "service", "ls")
if err != nil {
t.Fatal(err)
}
}
func TestClientServiceInfo(t *testing.T) {
var out, errOut bytes.Buffer
cli := NewNetworkCli(&out, &errOut, callbackFunc)
err := cli.Cmd("docker", "service", "info", mockServiceName+"."+mockNwName)
if err != nil {
t.Fatal(err)
}
}
func TestClientServiceInfoById(t *testing.T) {
var out, errOut bytes.Buffer
cli := NewNetworkCli(&out, &errOut, callbackFunc)
err := cli.Cmd("docker", "service", "info", mockServiceID+"."+mockNwName)
if err != nil {
t.Fatal(err)
}
}
func TestClientServiceJoin(t *testing.T) {
var out, errOut bytes.Buffer
cli := NewNetworkCli(&out, &errOut, callbackFunc)
err := cli.Cmd("docker", "service", "attach", mockContainerID, mockServiceName+"."+mockNwName)
if err != nil {
t.Fatal(err)
}
}
func TestClientServiceLeave(t *testing.T) {
var out, errOut bytes.Buffer
cli := NewNetworkCli(&out, &errOut, callbackFunc)
err := cli.Cmd("docker", "service", "detach", mockContainerID, mockServiceName+"."+mockNwName)
if err != nil {
t.Fatal(err)
}
}
// Docker Flag processing in flag.go uses os.Exit() frequently, even for --help
// TODO : Handle the --help test-case in the IT when CLI is available
/*
func TestClientNetworkServiceCreateHelp(t *testing.T) {
var out, errOut bytes.Buffer
cFunc := func(method, path string, data interface{}, headers map[string][]string) (io.ReadCloser, int, error) {
return nil, 0, nil
}
cli := NewNetworkCli(&out, &errOut, callbackFunc)
err := cli.Cmd("docker", "network", "create", "--help")
if err != nil {
t.Fatalf(err.Error())
}
}
*/
// Docker flag processing in flag.go uses os.Exit(1) for incorrect parameter case.
// TODO : Handle the missing argument case in the IT when CLI is available
/*
func TestClientNetworkServiceCreateMissingArgument(t *testing.T) {
var out, errOut bytes.Buffer
cFunc := func(method, path string, data interface{}, headers map[string][]string) (io.ReadCloser, int, error) {
return nil, 0, nil
}
cli := NewNetworkCli(&out, &errOut, callbackFunc)
err := cli.Cmd("docker", "network", "create")
if err != nil {
t.Fatal(err.Error())
}
}
*/

View file

@ -0,0 +1,228 @@
package client
import (
"bytes"
"encoding/json"
"fmt"
"io"
"net/http"
"os"
"strings"
"testing"
_ "github.com/docker/libnetwork/testutils"
)
// nopCloser is used to provide a dummy CallFunc for Cmd()
type nopCloser struct {
io.Reader
}
func (nopCloser) Close() error { return nil }
func TestMain(m *testing.M) {
setupMockHTTPCallback()
os.Exit(m.Run())
}
var callbackFunc func(method, path string, data interface{}, headers map[string][]string) (io.ReadCloser, http.Header, int, error)
var mockNwJSON, mockNwListJSON, mockServiceJSON, mockServiceListJSON, mockSbJSON, mockSbListJSON []byte
var mockNwName = "test"
var mockNwID = "2a3456789"
var mockServiceName = "testSrv"
var mockServiceID = "2a3456789"
var mockContainerID = "2a3456789"
var mockSandboxID = "2b3456789"
func setupMockHTTPCallback() {
var list []networkResource
nw := networkResource{Name: mockNwName, ID: mockNwID}
mockNwJSON, _ = json.Marshal(nw)
list = append(list, nw)
mockNwListJSON, _ = json.Marshal(list)
var srvList []serviceResource
ep := serviceResource{Name: mockServiceName, ID: mockServiceID, Network: mockNwName}
mockServiceJSON, _ = json.Marshal(ep)
srvList = append(srvList, ep)
mockServiceListJSON, _ = json.Marshal(srvList)
var sbxList []SandboxResource
sb := SandboxResource{ID: mockSandboxID, ContainerID: mockContainerID}
mockSbJSON, _ = json.Marshal(sb)
sbxList = append(sbxList, sb)
mockSbListJSON, _ = json.Marshal(sbxList)
dummyHTTPHdr := http.Header{}
callbackFunc = func(method, path string, data interface{}, headers map[string][]string) (io.ReadCloser, http.Header, int, error) {
var rsp string
switch method {
case "GET":
if strings.Contains(path, fmt.Sprintf("networks?name=%s", mockNwName)) {
rsp = string(mockNwListJSON)
} else if strings.Contains(path, "networks?name=") {
rsp = "[]"
} else if strings.Contains(path, fmt.Sprintf("networks?partial-id=%s", mockNwID)) {
rsp = string(mockNwListJSON)
} else if strings.Contains(path, "networks?partial-id=") {
rsp = "[]"
} else if strings.HasSuffix(path, "networks") {
rsp = string(mockNwListJSON)
} else if strings.HasSuffix(path, "networks/"+mockNwID) {
rsp = string(mockNwJSON)
} else if strings.Contains(path, fmt.Sprintf("services?name=%s", mockServiceName)) {
rsp = string(mockServiceListJSON)
} else if strings.Contains(path, "services?name=") {
rsp = "[]"
} else if strings.Contains(path, fmt.Sprintf("services?partial-id=%s", mockServiceID)) {
rsp = string(mockServiceListJSON)
} else if strings.Contains(path, "services?partial-id=") {
rsp = "[]"
} else if strings.HasSuffix(path, "services") {
rsp = string(mockServiceListJSON)
} else if strings.HasSuffix(path, "services/"+mockServiceID) {
rsp = string(mockServiceJSON)
} else if strings.Contains(path, "containers") {
return nopCloser{bytes.NewBufferString("")}, dummyHTTPHdr, 400, fmt.Errorf("Bad Request")
} else if strings.Contains(path, fmt.Sprintf("sandboxes?container-id=%s", mockContainerID)) {
rsp = string(mockSbListJSON)
} else if strings.Contains(path, fmt.Sprintf("sandboxes?partial-container-id=%s", mockContainerID)) {
rsp = string(mockSbListJSON)
}
case "POST":
var data []byte
if strings.HasSuffix(path, "networks") {
data, _ = json.Marshal(mockNwID)
} else if strings.HasSuffix(path, "services") {
data, _ = json.Marshal(mockServiceID)
} else if strings.HasSuffix(path, "backend") {
data, _ = json.Marshal(mockSandboxID)
}
rsp = string(data)
case "PUT":
case "DELETE":
rsp = ""
}
return nopCloser{bytes.NewBufferString(rsp)}, dummyHTTPHdr, 200, nil
}
}
func TestClientDummyCommand(t *testing.T) {
var out, errOut bytes.Buffer
cli := NewNetworkCli(&out, &errOut, callbackFunc)
err := cli.Cmd("docker", "dummy")
if err == nil {
t.Fatal("Incorrect Command must fail")
}
}
func TestClientNetworkInvalidCommand(t *testing.T) {
var out, errOut bytes.Buffer
cli := NewNetworkCli(&out, &errOut, callbackFunc)
err := cli.Cmd("docker", "network", "invalid")
if err == nil {
t.Fatal("Passing invalid commands must fail")
}
}
func TestClientNetworkCreate(t *testing.T) {
var out, errOut bytes.Buffer
cli := NewNetworkCli(&out, &errOut, callbackFunc)
err := cli.Cmd("docker", "network", "create", mockNwName)
if err != nil {
t.Fatal(err.Error())
}
}
func TestClientNetworkCreateWithDriver(t *testing.T) {
var out, errOut bytes.Buffer
cli := NewNetworkCli(&out, &errOut, callbackFunc)
err := cli.Cmd("docker", "network", "create", "-f=dummy", mockNwName)
if err == nil {
t.Fatal("Passing incorrect flags to the create command must fail")
}
err = cli.Cmd("docker", "network", "create", "-d=dummy", mockNwName)
if err != nil {
t.Fatalf(err.Error())
}
}
func TestClientNetworkRm(t *testing.T) {
var out, errOut bytes.Buffer
cli := NewNetworkCli(&out, &errOut, callbackFunc)
err := cli.Cmd("docker", "network", "rm", mockNwName)
if err != nil {
t.Fatal(err.Error())
}
}
func TestClientNetworkLs(t *testing.T) {
var out, errOut bytes.Buffer
cli := NewNetworkCli(&out, &errOut, callbackFunc)
err := cli.Cmd("docker", "network", "ls")
if err != nil {
t.Fatal(err.Error())
}
}
func TestClientNetworkInfo(t *testing.T) {
var out, errOut bytes.Buffer
cli := NewNetworkCli(&out, &errOut, callbackFunc)
err := cli.Cmd("docker", "network", "info", mockNwName)
if err != nil {
t.Fatal(err.Error())
}
}
func TestClientNetworkInfoById(t *testing.T) {
var out, errOut bytes.Buffer
cli := NewNetworkCli(&out, &errOut, callbackFunc)
err := cli.Cmd("docker", "network", "info", mockNwID)
if err != nil {
t.Fatal(err.Error())
}
}
// Docker Flag processing in flag.go uses os.Exit() frequently, even for --help
// TODO : Handle the --help test-case in the IT when CLI is available
/*
func TestClientNetworkServiceCreateHelp(t *testing.T) {
var out, errOut bytes.Buffer
cFunc := func(method, path string, data interface{}, headers map[string][]string) (io.ReadCloser, int, error) {
return nil, 0, nil
}
cli := NewNetworkCli(&out, &errOut, callbackFunc)
err := cli.Cmd("docker", "network", "create", "--help")
if err != nil {
t.Fatalf(err.Error())
}
}
*/
// Docker flag processing in flag.go uses os.Exit(1) for incorrect parameter case.
// TODO : Handle the missing argument case in the IT when CLI is available
/*
func TestClientNetworkServiceCreateMissingArgument(t *testing.T) {
var out, errOut bytes.Buffer
cFunc := func(method, path string, data interface{}, headers map[string][]string) (io.ReadCloser, int, error) {
return nil, 0, nil
}
cli := NewNetworkCli(&out, &errOut, callbackFunc)
err := cli.Cmd("docker", "network", "create")
if err != nil {
t.Fatal(err.Error())
}
}
*/

View file

@ -0,0 +1,27 @@
Copyright (c) 2014-2016 The Docker & Go Authors. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above
copyright notice, this list of conditions and the following disclaimer
in the documentation and/or other materials provided with the
distribution.
* Neither the name of Google Inc. nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

View file

@ -0,0 +1,40 @@
Package mflag (aka multiple-flag) implements command-line flag parsing.
It's an **hacky** fork of the [official golang package](http://golang.org/pkg/flag/)
It adds:
* both short and long flag version
`./example -s red` `./example --string blue`
* multiple names for the same option
```
$>./example -h
Usage of example:
-s, --string="": a simple string
```
___
It is very flexible on purpose, so you can do things like:
```
$>./example -h
Usage of example:
-s, -string, --string="": a simple string
```
Or:
```
$>./example -h
Usage of example:
-oldflag, --newflag="": a simple string
```
You can also hide some flags from the usage, so if we want only `--newflag`:
```
$>./example -h
Usage of example:
--newflag="": a simple string
$>./example -oldflag str
str
```
See [example.go](example/example.go) for more details.

View file

@ -0,0 +1,36 @@
package main
import (
"fmt"
flag "github.com/docker/libnetwork/client/mflag"
)
var (
i int
str string
b, b2, h bool
)
func init() {
flag.Bool([]string{"#hp", "#-help"}, false, "display the help")
flag.BoolVar(&b, []string{"b", "#bal", "#bol", "-bal"}, false, "a simple bool")
flag.BoolVar(&b, []string{"g", "#gil"}, false, "a simple bool")
flag.BoolVar(&b2, []string{"#-bool"}, false, "a simple bool")
flag.IntVar(&i, []string{"-integer", "-number"}, -1, "a simple integer")
flag.StringVar(&str, []string{"s", "#hidden", "-string"}, "", "a simple string") //-s -hidden and --string will work, but -hidden won't be in the usage
flag.BoolVar(&h, []string{"h", "#help", "-help"}, false, "display the help")
flag.StringVar(&str, []string{"mode"}, "mode1", "set the mode\nmode1: use the mode1\nmode2: use the mode2\nmode3: use the mode3")
flag.Parse()
}
func main() {
if h {
flag.PrintDefaults()
} else {
fmt.Printf("s/#hidden/-string: %s\n", str)
fmt.Printf("b: %t\n", b)
fmt.Printf("-bool: %t\n", b2)
fmt.Printf("s/#hidden/-string(via lookup): %s\n", flag.Lookup("s").Value.String())
fmt.Printf("ARGS: %v\n", flag.Args())
}
}

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,529 @@
// Copyright 2014-2016 The Docker & Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package mflag
import (
"bytes"
"fmt"
"os"
"sort"
"strings"
"testing"
"time"
_ "github.com/docker/libnetwork/testutils"
)
// ResetForTesting clears all flag state and sets the usage function as directed.
// After calling ResetForTesting, parse errors in flag handling will not
// exit the program.
func ResetForTesting(usage func()) {
CommandLine = NewFlagSet(os.Args[0], ContinueOnError)
Usage = usage
}
func boolString(s string) string {
if s == "0" {
return "false"
}
return "true"
}
func TestEverything(t *testing.T) {
ResetForTesting(nil)
Bool([]string{"test_bool"}, false, "bool value")
Int([]string{"test_int"}, 0, "int value")
Int64([]string{"test_int64"}, 0, "int64 value")
Uint([]string{"test_uint"}, 0, "uint value")
Uint64([]string{"test_uint64"}, 0, "uint64 value")
String([]string{"test_string"}, "0", "string value")
Float64([]string{"test_float64"}, 0, "float64 value")
Duration([]string{"test_duration"}, 0, "time.Duration value")
m := make(map[string]*Flag)
desired := "0"
visitor := func(f *Flag) {
for _, name := range f.Names {
if len(name) > 5 && name[0:5] == "test_" {
m[name] = f
ok := false
switch {
case f.Value.String() == desired:
ok = true
case name == "test_bool" && f.Value.String() == boolString(desired):
ok = true
case name == "test_duration" && f.Value.String() == desired+"s":
ok = true
}
if !ok {
t.Error("Visit: bad value", f.Value.String(), "for", name)
}
}
}
}
VisitAll(visitor)
if len(m) != 8 {
t.Error("VisitAll misses some flags")
for k, v := range m {
t.Log(k, *v)
}
}
m = make(map[string]*Flag)
Visit(visitor)
if len(m) != 0 {
t.Error("Visit sees unset flags")
for k, v := range m {
t.Log(k, *v)
}
}
// Now set all flags
Set("test_bool", "true")
Set("test_int", "1")
Set("test_int64", "1")
Set("test_uint", "1")
Set("test_uint64", "1")
Set("test_string", "1")
Set("test_float64", "1")
Set("test_duration", "1s")
desired = "1"
Visit(visitor)
if len(m) != 8 {
t.Error("Visit fails after set")
for k, v := range m {
t.Log(k, *v)
}
}
// Now test they're visited in sort order.
var flagNames []string
Visit(func(f *Flag) {
for _, name := range f.Names {
flagNames = append(flagNames, name)
}
})
if !sort.StringsAreSorted(flagNames) {
t.Errorf("flag names not sorted: %v", flagNames)
}
}
func TestGet(t *testing.T) {
ResetForTesting(nil)
Bool([]string{"test_bool"}, true, "bool value")
Int([]string{"test_int"}, 1, "int value")
Int64([]string{"test_int64"}, 2, "int64 value")
Uint([]string{"test_uint"}, 3, "uint value")
Uint64([]string{"test_uint64"}, 4, "uint64 value")
String([]string{"test_string"}, "5", "string value")
Float64([]string{"test_float64"}, 6, "float64 value")
Duration([]string{"test_duration"}, 7, "time.Duration value")
visitor := func(f *Flag) {
for _, name := range f.Names {
if len(name) > 5 && name[0:5] == "test_" {
g, ok := f.Value.(Getter)
if !ok {
t.Errorf("Visit: value does not satisfy Getter: %T", f.Value)
return
}
switch name {
case "test_bool":
ok = g.Get() == true
case "test_int":
ok = g.Get() == int(1)
case "test_int64":
ok = g.Get() == int64(2)
case "test_uint":
ok = g.Get() == uint(3)
case "test_uint64":
ok = g.Get() == uint64(4)
case "test_string":
ok = g.Get() == "5"
case "test_float64":
ok = g.Get() == float64(6)
case "test_duration":
ok = g.Get() == time.Duration(7)
}
if !ok {
t.Errorf("Visit: bad value %T(%v) for %s", g.Get(), g.Get(), name)
}
}
}
}
VisitAll(visitor)
}
func testParse(f *FlagSet, t *testing.T) {
if f.Parsed() {
t.Error("f.Parse() = true before Parse")
}
boolFlag := f.Bool([]string{"bool"}, false, "bool value")
bool2Flag := f.Bool([]string{"bool2"}, false, "bool2 value")
f.Bool([]string{"bool3"}, false, "bool3 value")
bool4Flag := f.Bool([]string{"bool4"}, false, "bool4 value")
intFlag := f.Int([]string{"-int"}, 0, "int value")
int64Flag := f.Int64([]string{"-int64"}, 0, "int64 value")
uintFlag := f.Uint([]string{"uint"}, 0, "uint value")
uint64Flag := f.Uint64([]string{"-uint64"}, 0, "uint64 value")
stringFlag := f.String([]string{"string"}, "0", "string value")
f.String([]string{"string2"}, "0", "string2 value")
singleQuoteFlag := f.String([]string{"squote"}, "", "single quoted value")
doubleQuoteFlag := f.String([]string{"dquote"}, "", "double quoted value")
mixedQuoteFlag := f.String([]string{"mquote"}, "", "mixed quoted value")
mixed2QuoteFlag := f.String([]string{"mquote2"}, "", "mixed2 quoted value")
nestedQuoteFlag := f.String([]string{"nquote"}, "", "nested quoted value")
nested2QuoteFlag := f.String([]string{"nquote2"}, "", "nested2 quoted value")
float64Flag := f.Float64([]string{"float64"}, 0, "float64 value")
durationFlag := f.Duration([]string{"duration"}, 5*time.Second, "time.Duration value")
extra := "one-extra-argument"
args := []string{
"-bool",
"-bool2=true",
"-bool4=false",
"--int", "22",
"--int64", "0x23",
"-uint", "24",
"--uint64", "25",
"-string", "hello",
"-squote='single'",
`-dquote="double"`,
`-mquote='mixed"`,
`-mquote2="mixed2'`,
`-nquote="'single nested'"`,
`-nquote2='"double nested"'`,
"-float64", "2718e28",
"-duration", "2m",
extra,
}
if err := f.Parse(args); err != nil {
t.Fatal(err)
}
if !f.Parsed() {
t.Error("f.Parse() = false after Parse")
}
if *boolFlag != true {
t.Error("bool flag should be true, is ", *boolFlag)
}
if *bool2Flag != true {
t.Error("bool2 flag should be true, is ", *bool2Flag)
}
if !f.IsSet("bool2") {
t.Error("bool2 should be marked as set")
}
if f.IsSet("bool3") {
t.Error("bool3 should not be marked as set")
}
if !f.IsSet("bool4") {
t.Error("bool4 should be marked as set")
}
if *bool4Flag != false {
t.Error("bool4 flag should be false, is ", *bool4Flag)
}
if *intFlag != 22 {
t.Error("int flag should be 22, is ", *intFlag)
}
if *int64Flag != 0x23 {
t.Error("int64 flag should be 0x23, is ", *int64Flag)
}
if *uintFlag != 24 {
t.Error("uint flag should be 24, is ", *uintFlag)
}
if *uint64Flag != 25 {
t.Error("uint64 flag should be 25, is ", *uint64Flag)
}
if *stringFlag != "hello" {
t.Error("string flag should be `hello`, is ", *stringFlag)
}
if !f.IsSet("string") {
t.Error("string flag should be marked as set")
}
if f.IsSet("string2") {
t.Error("string2 flag should not be marked as set")
}
if *singleQuoteFlag != "single" {
t.Error("single quote string flag should be `single`, is ", *singleQuoteFlag)
}
if *doubleQuoteFlag != "double" {
t.Error("double quote string flag should be `double`, is ", *doubleQuoteFlag)
}
if *mixedQuoteFlag != `'mixed"` {
t.Error("mixed quote string flag should be `'mixed\"`, is ", *mixedQuoteFlag)
}
if *mixed2QuoteFlag != `"mixed2'` {
t.Error("mixed2 quote string flag should be `\"mixed2'`, is ", *mixed2QuoteFlag)
}
if *nestedQuoteFlag != "'single nested'" {
t.Error("nested quote string flag should be `'single nested'`, is ", *nestedQuoteFlag)
}
if *nested2QuoteFlag != `"double nested"` {
t.Error("double quote string flag should be `\"double nested\"`, is ", *nested2QuoteFlag)
}
if *float64Flag != 2718e28 {
t.Error("float64 flag should be 2718e28, is ", *float64Flag)
}
if *durationFlag != 2*time.Minute {
t.Error("duration flag should be 2m, is ", *durationFlag)
}
if len(f.Args()) != 1 {
t.Error("expected one argument, got", len(f.Args()))
} else if f.Args()[0] != extra {
t.Errorf("expected argument %q got %q", extra, f.Args()[0])
}
}
func testPanic(f *FlagSet, t *testing.T) {
f.Int([]string{"-int"}, 0, "int value")
if f.Parsed() {
t.Error("f.Parse() = true before Parse")
}
args := []string{
"-int", "21",
}
f.Parse(args)
}
func TestParsePanic(t *testing.T) {
ResetForTesting(func() {})
testPanic(CommandLine, t)
}
func TestParse(t *testing.T) {
ResetForTesting(func() { t.Error("bad parse") })
testParse(CommandLine, t)
}
func TestFlagSetParse(t *testing.T) {
testParse(NewFlagSet("test", ContinueOnError), t)
}
// Declare a user-defined flag type.
type flagVar []string
func (f *flagVar) String() string {
return fmt.Sprint([]string(*f))
}
func (f *flagVar) Set(value string) error {
*f = append(*f, value)
return nil
}
func TestUserDefined(t *testing.T) {
var flags FlagSet
flags.Init("test", ContinueOnError)
var v flagVar
flags.Var(&v, []string{"v"}, "usage")
if err := flags.Parse([]string{"-v", "1", "-v", "2", "-v=3"}); err != nil {
t.Error(err)
}
if len(v) != 3 {
t.Fatal("expected 3 args; got ", len(v))
}
expect := "[1 2 3]"
if v.String() != expect {
t.Errorf("expected value %q got %q", expect, v.String())
}
}
// Declare a user-defined boolean flag type.
type boolFlagVar struct {
count int
}
func (b *boolFlagVar) String() string {
return fmt.Sprintf("%d", b.count)
}
func (b *boolFlagVar) Set(value string) error {
if value == "true" {
b.count++
}
return nil
}
func (b *boolFlagVar) IsBoolFlag() bool {
return b.count < 4
}
func TestUserDefinedBool(t *testing.T) {
var flags FlagSet
flags.Init("test", ContinueOnError)
var b boolFlagVar
var err error
flags.Var(&b, []string{"b"}, "usage")
if err = flags.Parse([]string{"-b", "-b", "-b", "-b=true", "-b=false", "-b", "barg", "-b"}); err != nil {
if b.count < 4 {
t.Error(err)
}
}
if b.count != 4 {
t.Errorf("want: %d; got: %d", 4, b.count)
}
if err == nil {
t.Error("expected error; got none")
}
}
func TestSetOutput(t *testing.T) {
var flags FlagSet
var buf bytes.Buffer
flags.SetOutput(&buf)
flags.Init("test", ContinueOnError)
flags.Parse([]string{"-unknown"})
if out := buf.String(); !strings.Contains(out, "-unknown") {
t.Logf("expected output mentioning unknown; got %q", out)
}
}
// This tests that one can reset the flags. This still works but not well, and is
// superseded by FlagSet.
func TestChangingArgs(t *testing.T) {
ResetForTesting(func() { t.Fatal("bad parse") })
oldArgs := os.Args
defer func() { os.Args = oldArgs }()
os.Args = []string{"cmd", "-before", "subcmd", "-after", "args"}
before := Bool([]string{"before"}, false, "")
if err := CommandLine.Parse(os.Args[1:]); err != nil {
t.Fatal(err)
}
cmd := Arg(0)
os.Args = Args()
after := Bool([]string{"after"}, false, "")
Parse()
args := Args()
if !*before || cmd != "subcmd" || !*after || len(args) != 1 || args[0] != "args" {
t.Fatalf("expected true subcmd true [args] got %v %v %v %v", *before, cmd, *after, args)
}
}
// Test that -help invokes the usage message and returns ErrHelp.
func TestHelp(t *testing.T) {
var helpCalled = false
fs := NewFlagSet("help test", ContinueOnError)
fs.Usage = func() { helpCalled = true }
var flag bool
fs.BoolVar(&flag, []string{"flag"}, false, "regular flag")
// Regular flag invocation should work
err := fs.Parse([]string{"-flag=true"})
if err != nil {
t.Fatal("expected no error; got ", err)
}
if !flag {
t.Error("flag was not set by -flag")
}
if helpCalled {
t.Error("help called for regular flag")
helpCalled = false // reset for next test
}
// Help flag should work as expected.
err = fs.Parse([]string{"-help"})
if err == nil {
t.Fatal("error expected")
}
if err != ErrHelp {
t.Fatal("expected ErrHelp; got ", err)
}
if !helpCalled {
t.Fatal("help was not called")
}
// If we define a help flag, that should override.
var help bool
fs.BoolVar(&help, []string{"help"}, false, "help flag")
helpCalled = false
err = fs.Parse([]string{"-help"})
if err != nil {
t.Fatal("expected no error for defined -help; got ", err)
}
if helpCalled {
t.Fatal("help was called; should not have been for defined help flag")
}
}
// Test the flag count functions.
func TestFlagCounts(t *testing.T) {
fs := NewFlagSet("help test", ContinueOnError)
var flag bool
fs.BoolVar(&flag, []string{"flag1"}, false, "regular flag")
fs.BoolVar(&flag, []string{"#deprecated1"}, false, "regular flag")
fs.BoolVar(&flag, []string{"f", "flag2"}, false, "regular flag")
fs.BoolVar(&flag, []string{"#d", "#deprecated2"}, false, "regular flag")
fs.BoolVar(&flag, []string{"flag3"}, false, "regular flag")
fs.BoolVar(&flag, []string{"g", "#flag4", "-flag4"}, false, "regular flag")
if fs.FlagCount() != 6 {
t.Fatal("FlagCount wrong. ", fs.FlagCount())
}
if fs.FlagCountUndeprecated() != 4 {
t.Fatal("FlagCountUndeprecated wrong. ", fs.FlagCountUndeprecated())
}
if fs.NFlag() != 0 {
t.Fatal("NFlag wrong. ", fs.NFlag())
}
err := fs.Parse([]string{"-fd", "-g", "-flag4"})
if err != nil {
t.Fatal("expected no error for defined -help; got ", err)
}
if fs.NFlag() != 4 {
t.Fatal("NFlag wrong. ", fs.NFlag())
}
}
// Show up bug in sortFlags
func TestSortFlags(t *testing.T) {
fs := NewFlagSet("help TestSortFlags", ContinueOnError)
var err error
var b bool
fs.BoolVar(&b, []string{"b", "-banana"}, false, "usage")
err = fs.Parse([]string{"--banana=true"})
if err != nil {
t.Fatal("expected no error; got ", err)
}
count := 0
fs.VisitAll(func(flag *Flag) {
count++
if flag == nil {
t.Fatal("VisitAll should not return a nil flag")
}
})
flagcount := fs.FlagCount()
if flagcount != count {
t.Fatalf("FlagCount (%d) != number (%d) of elements visited", flagcount, count)
}
// Make sure its idempotent
if flagcount != fs.FlagCount() {
t.Fatalf("FlagCount (%d) != fs.FlagCount() (%d) of elements visited", flagcount, fs.FlagCount())
}
count = 0
fs.Visit(func(flag *Flag) {
count++
if flag == nil {
t.Fatal("Visit should not return a nil flag")
}
})
nflag := fs.NFlag()
if nflag != count {
t.Fatalf("NFlag (%d) != number (%d) of elements visited", nflag, count)
}
if nflag != fs.NFlag() {
t.Fatalf("NFlag (%d) != fs.NFlag() (%d) of elements visited", nflag, fs.NFlag())
}
}
func TestMergeFlags(t *testing.T) {
base := NewFlagSet("base", ContinueOnError)
base.String([]string{"f"}, "", "")
fs := NewFlagSet("test", ContinueOnError)
Merge(fs, base)
if len(fs.formal) != 1 {
t.Fatalf("FlagCount (%d) != number (1) of elements merged", len(fs.formal))
}
}

View file

@ -0,0 +1,267 @@
package client
import (
"bytes"
"encoding/json"
"fmt"
"net/http"
"strings"
"text/tabwriter"
"github.com/docker/docker/pkg/stringid"
flag "github.com/docker/libnetwork/client/mflag"
"github.com/docker/libnetwork/netlabel"
)
type command struct {
name string
description string
}
var (
networkCommands = []command{
{"create", "Create a network"},
{"rm", "Remove a network"},
{"ls", "List all networks"},
{"info", "Display information of a network"},
}
)
// CmdNetwork handles the root Network UI
func (cli *NetworkCli) CmdNetwork(chain string, args ...string) error {
cmd := cli.Subcmd(chain, "network", "COMMAND [OPTIONS] [arg...]", networkUsage(chain), false)
cmd.Require(flag.Min, 1)
err := cmd.ParseFlags(args, true)
if err == nil {
cmd.Usage()
return fmt.Errorf("invalid command : %v", args)
}
return err
}
// CmdNetworkCreate handles Network Create UI
func (cli *NetworkCli) CmdNetworkCreate(chain string, args ...string) error {
cmd := cli.Subcmd(chain, "create", "NETWORK-NAME", "Creates a new network with a name specified by the user", false)
flDriver := cmd.String([]string{"d", "-driver"}, "", "Driver to manage the Network")
flID := cmd.String([]string{"-id"}, "", "Network ID string")
flOpts := cmd.String([]string{"o", "-opt"}, "", "Network options")
flInternal := cmd.Bool([]string{"-internal"}, false, "Config the network to be internal")
flIPv6 := cmd.Bool([]string{"-ipv6"}, false, "Enable IPv6 on the network")
flSubnet := cmd.String([]string{"-subnet"}, "", "Subnet option")
flRange := cmd.String([]string{"-ip-range"}, "", "Range option")
cmd.Require(flag.Exact, 1)
err := cmd.ParseFlags(args, true)
if err != nil {
return err
}
networkOpts := make(map[string]string)
if *flInternal {
networkOpts[netlabel.Internal] = "true"
}
if *flIPv6 {
networkOpts[netlabel.EnableIPv6] = "true"
}
driverOpts := make(map[string]string)
if *flOpts != "" {
opts := strings.Split(*flOpts, ",")
for _, opt := range opts {
driverOpts[netlabel.Key(opt)] = netlabel.Value(opt)
}
}
var icList []ipamConf
if *flSubnet != "" {
ic := ipamConf{
PreferredPool: *flSubnet,
}
if *flRange != "" {
ic.SubPool = *flRange
}
icList = append(icList, ic)
}
// Construct network create request body
nc := networkCreate{Name: cmd.Arg(0), NetworkType: *flDriver, ID: *flID, IPv4Conf: icList, DriverOpts: driverOpts, NetworkOpts: networkOpts}
obj, _, err := readBody(cli.call("POST", "/networks", nc, nil))
if err != nil {
return err
}
var replyID string
err = json.Unmarshal(obj, &replyID)
if err != nil {
return err
}
fmt.Fprintf(cli.out, "%s\n", replyID)
return nil
}
// CmdNetworkRm handles Network Delete UI
func (cli *NetworkCli) CmdNetworkRm(chain string, args ...string) error {
cmd := cli.Subcmd(chain, "rm", "NETWORK", "Deletes a network", false)
cmd.Require(flag.Exact, 1)
err := cmd.ParseFlags(args, true)
if err != nil {
return err
}
id, err := lookupNetworkID(cli, cmd.Arg(0))
if err != nil {
return err
}
_, _, err = readBody(cli.call("DELETE", "/networks/"+id, nil, nil))
if err != nil {
return err
}
return nil
}
// CmdNetworkLs handles Network List UI
func (cli *NetworkCli) CmdNetworkLs(chain string, args ...string) error {
cmd := cli.Subcmd(chain, "ls", "", "Lists all the networks created by the user", false)
quiet := cmd.Bool([]string{"q", "-quiet"}, false, "Only display numeric IDs")
noTrunc := cmd.Bool([]string{"#notrunc", "-no-trunc"}, false, "Do not truncate the output")
nLatest := cmd.Bool([]string{"l", "-latest"}, false, "Show the latest network created")
last := cmd.Int([]string{"n"}, -1, "Show n last created networks")
err := cmd.ParseFlags(args, true)
if err != nil {
return err
}
obj, _, err := readBody(cli.call("GET", "/networks", nil, nil))
if err != nil {
return err
}
if *last == -1 && *nLatest {
*last = 1
}
var networkResources []networkResource
err = json.Unmarshal(obj, &networkResources)
if err != nil {
return err
}
wr := tabwriter.NewWriter(cli.out, 20, 1, 3, ' ', 0)
// unless quiet (-q) is specified, print field titles
if !*quiet {
fmt.Fprintln(wr, "NETWORK ID\tNAME\tTYPE")
}
for _, networkResource := range networkResources {
ID := networkResource.ID
netName := networkResource.Name
if !*noTrunc {
ID = stringid.TruncateID(ID)
}
if *quiet {
fmt.Fprintln(wr, ID)
continue
}
netType := networkResource.Type
fmt.Fprintf(wr, "%s\t%s\t%s\t",
ID,
netName,
netType)
fmt.Fprint(wr, "\n")
}
wr.Flush()
return nil
}
// CmdNetworkInfo handles Network Info UI
func (cli *NetworkCli) CmdNetworkInfo(chain string, args ...string) error {
cmd := cli.Subcmd(chain, "info", "NETWORK", "Displays detailed information on a network", false)
cmd.Require(flag.Exact, 1)
err := cmd.ParseFlags(args, true)
if err != nil {
return err
}
id, err := lookupNetworkID(cli, cmd.Arg(0))
if err != nil {
return err
}
obj, _, err := readBody(cli.call("GET", "/networks/"+id, nil, nil))
if err != nil {
return err
}
networkResource := &networkResource{}
if err := json.NewDecoder(bytes.NewReader(obj)).Decode(networkResource); err != nil {
return err
}
fmt.Fprintf(cli.out, "Network Id: %s\n", networkResource.ID)
fmt.Fprintf(cli.out, "Name: %s\n", networkResource.Name)
fmt.Fprintf(cli.out, "Type: %s\n", networkResource.Type)
if networkResource.Services != nil {
for _, serviceResource := range networkResource.Services {
fmt.Fprintf(cli.out, " Service Id: %s\n", serviceResource.ID)
fmt.Fprintf(cli.out, "\tName: %s\n", serviceResource.Name)
}
}
return nil
}
// Helper function to predict if a string is a name or id or partial-id
// This provides a best-effort mechanism to identify an id with the help of GET Filter APIs
// Being a UI, its most likely that name will be used by the user, which is used to lookup
// the corresponding ID. If ID is not found, this function will assume that the passed string
// is an ID by itself.
func lookupNetworkID(cli *NetworkCli, nameID string) (string, error) {
obj, statusCode, err := readBody(cli.call("GET", "/networks?name="+nameID, nil, nil))
if err != nil {
return "", err
}
if statusCode != http.StatusOK {
return "", fmt.Errorf("name query failed for %s due to : statuscode(%d) %v", nameID, statusCode, string(obj))
}
var list []*networkResource
err = json.Unmarshal(obj, &list)
if err != nil {
return "", err
}
if len(list) > 0 {
// name query filter will always return a single-element collection
return list[0].ID, nil
}
// Check for Partial-id
obj, statusCode, err = readBody(cli.call("GET", "/networks?partial-id="+nameID, nil, nil))
if err != nil {
return "", err
}
if statusCode != http.StatusOK {
return "", fmt.Errorf("partial-id match query failed for %s due to : statuscode(%d) %v", nameID, statusCode, string(obj))
}
err = json.Unmarshal(obj, &list)
if err != nil {
return "", err
}
if len(list) == 0 {
return "", fmt.Errorf("resource not found %s", nameID)
}
if len(list) > 1 {
return "", fmt.Errorf("multiple Networks matching the partial identifier (%s). Please use full identifier", nameID)
}
return list[0].ID, nil
}
func networkUsage(chain string) string {
help := "Commands:\n"
for _, cmd := range networkCommands {
help += fmt.Sprintf(" %-25.25s%s\n", cmd.name, cmd.description)
}
help += fmt.Sprintf("\nRun '%s network COMMAND --help' for more information on a command.", chain)
return help
}

View file

@ -0,0 +1,400 @@
package client
import (
"bytes"
"encoding/json"
"errors"
"fmt"
"net/http"
"strings"
"text/tabwriter"
"github.com/docker/docker/pkg/stringid"
flag "github.com/docker/libnetwork/client/mflag"
"github.com/docker/libnetwork/netutils"
)
var (
serviceCommands = []command{
{"publish", "Publish a service"},
{"unpublish", "Remove a service"},
{"attach", "Attach a backend (container) to the service"},
{"detach", "Detach the backend from the service"},
{"ls", "Lists all services"},
{"info", "Display information about a service"},
}
)
func lookupServiceID(cli *NetworkCli, nwName, svNameID string) (string, error) {
// Sanity Check
obj, _, err := readBody(cli.call("GET", fmt.Sprintf("/networks?name=%s", nwName), nil, nil))
if err != nil {
return "", err
}
var nwList []networkResource
if err = json.Unmarshal(obj, &nwList); err != nil {
return "", err
}
if len(nwList) == 0 {
return "", fmt.Errorf("Network %s does not exist", nwName)
}
if nwName == "" {
obj, _, err := readBody(cli.call("GET", "/networks/"+nwList[0].ID, nil, nil))
if err != nil {
return "", err
}
networkResource := &networkResource{}
if err := json.NewDecoder(bytes.NewReader(obj)).Decode(networkResource); err != nil {
return "", err
}
nwName = networkResource.Name
}
// Query service by name
obj, statusCode, err := readBody(cli.call("GET", fmt.Sprintf("/services?name=%s", svNameID), nil, nil))
if err != nil {
return "", err
}
if statusCode != http.StatusOK {
return "", fmt.Errorf("name query failed for %s due to: (%d) %s", svNameID, statusCode, string(obj))
}
var list []*serviceResource
if err = json.Unmarshal(obj, &list); err != nil {
return "", err
}
for _, sr := range list {
if sr.Network == nwName {
return sr.ID, nil
}
}
// Query service by Partial-id (this covers full id as well)
obj, statusCode, err = readBody(cli.call("GET", fmt.Sprintf("/services?partial-id=%s", svNameID), nil, nil))
if err != nil {
return "", err
}
if statusCode != http.StatusOK {
return "", fmt.Errorf("partial-id match query failed for %s due to: (%d) %s", svNameID, statusCode, string(obj))
}
if err = json.Unmarshal(obj, &list); err != nil {
return "", err
}
for _, sr := range list {
if sr.Network == nwName {
return sr.ID, nil
}
}
return "", fmt.Errorf("Service %s not found on network %s", svNameID, nwName)
}
func lookupContainerID(cli *NetworkCli, cnNameID string) (string, error) {
// Container is a Docker resource, ask docker about it.
// In case of connection error, we assume we are running in dnet and return whatever was passed to us
obj, _, err := readBody(cli.call("GET", fmt.Sprintf("/containers/%s/json", cnNameID), nil, nil))
if err != nil {
// We are probably running outside of docker
return cnNameID, nil
}
var x map[string]interface{}
err = json.Unmarshal(obj, &x)
if err != nil {
return "", err
}
if iid, ok := x["Id"]; ok {
if id, ok := iid.(string); ok {
return id, nil
}
return "", errors.New("Unexpected data type for container ID in json response")
}
return "", errors.New("Cannot find container ID in json response")
}
func lookupSandboxID(cli *NetworkCli, containerID string) (string, error) {
obj, _, err := readBody(cli.call("GET", fmt.Sprintf("/sandboxes?partial-container-id=%s", containerID), nil, nil))
if err != nil {
return "", err
}
var sandboxList []SandboxResource
err = json.Unmarshal(obj, &sandboxList)
if err != nil {
return "", err
}
if len(sandboxList) == 0 {
return "", fmt.Errorf("cannot find sandbox for container: %s", containerID)
}
return sandboxList[0].ID, nil
}
// CmdService handles the service UI
func (cli *NetworkCli) CmdService(chain string, args ...string) error {
cmd := cli.Subcmd(chain, "service", "COMMAND [OPTIONS] [arg...]", serviceUsage(chain), false)
cmd.Require(flag.Min, 1)
err := cmd.ParseFlags(args, true)
if err == nil {
cmd.Usage()
return fmt.Errorf("Invalid command : %v", args)
}
return err
}
// Parse service name for "SERVICE[.NETWORK]" format
func parseServiceName(name string) (string, string) {
s := strings.Split(name, ".")
var sName, nName string
if len(s) > 1 {
nName = s[len(s)-1]
sName = strings.Join(s[:len(s)-1], ".")
} else {
sName = s[0]
}
return sName, nName
}
// CmdServicePublish handles service create UI
func (cli *NetworkCli) CmdServicePublish(chain string, args ...string) error {
cmd := cli.Subcmd(chain, "publish", "SERVICE[.NETWORK]", "Publish a new service on a network", false)
flAlias := flag.NewListOpts(netutils.ValidateAlias)
cmd.Var(&flAlias, []string{"-alias"}, "Add alias to self")
cmd.Require(flag.Exact, 1)
err := cmd.ParseFlags(args, true)
if err != nil {
return err
}
sn, nn := parseServiceName(cmd.Arg(0))
sc := serviceCreate{Name: sn, Network: nn, MyAliases: flAlias.GetAll()}
obj, _, err := readBody(cli.call("POST", "/services", sc, nil))
if err != nil {
return err
}
var replyID string
err = json.Unmarshal(obj, &replyID)
if err != nil {
return err
}
fmt.Fprintf(cli.out, "%s\n", replyID)
return nil
}
// CmdServiceUnpublish handles service delete UI
func (cli *NetworkCli) CmdServiceUnpublish(chain string, args ...string) error {
cmd := cli.Subcmd(chain, "unpublish", "SERVICE[.NETWORK]", "Removes a service", false)
force := cmd.Bool([]string{"f", "-force"}, false, "force unpublish service")
cmd.Require(flag.Exact, 1)
err := cmd.ParseFlags(args, true)
if err != nil {
return err
}
sn, nn := parseServiceName(cmd.Arg(0))
serviceID, err := lookupServiceID(cli, nn, sn)
if err != nil {
return err
}
sd := serviceDelete{Name: sn, Force: *force}
_, _, err = readBody(cli.call("DELETE", "/services/"+serviceID, sd, nil))
return err
}
// CmdServiceLs handles service list UI
func (cli *NetworkCli) CmdServiceLs(chain string, args ...string) error {
cmd := cli.Subcmd(chain, "ls", "SERVICE", "Lists all the services on a network", false)
flNetwork := cmd.String([]string{"net", "-network"}, "", "Only show the services that are published on the specified network")
quiet := cmd.Bool([]string{"q", "-quiet"}, false, "Only display numeric IDs")
noTrunc := cmd.Bool([]string{"#notrunc", "-no-trunc"}, false, "Do not truncate the output")
err := cmd.ParseFlags(args, true)
if err != nil {
return err
}
var obj []byte
if *flNetwork == "" {
obj, _, err = readBody(cli.call("GET", "/services", nil, nil))
} else {
obj, _, err = readBody(cli.call("GET", "/services?network="+*flNetwork, nil, nil))
}
if err != nil {
return err
}
var serviceResources []serviceResource
err = json.Unmarshal(obj, &serviceResources)
if err != nil {
fmt.Println(err)
return err
}
wr := tabwriter.NewWriter(cli.out, 20, 1, 3, ' ', 0)
// unless quiet (-q) is specified, print field titles
if !*quiet {
fmt.Fprintln(wr, "SERVICE ID\tNAME\tNETWORK\tCONTAINER\tSANDBOX")
}
for _, sr := range serviceResources {
ID := sr.ID
bkID, sbID, err := getBackendID(cli, ID)
if err != nil {
return err
}
if !*noTrunc {
ID = stringid.TruncateID(ID)
bkID = stringid.TruncateID(bkID)
sbID = stringid.TruncateID(sbID)
}
if !*quiet {
fmt.Fprintf(wr, "%s\t%s\t%s\t%s\t%s\n", ID, sr.Name, sr.Network, bkID, sbID)
} else {
fmt.Fprintln(wr, ID)
}
}
wr.Flush()
return nil
}
func getBackendID(cli *NetworkCli, servID string) (string, string, error) {
var (
obj []byte
err error
bk string
sb string
)
if obj, _, err = readBody(cli.call("GET", "/services/"+servID+"/backend", nil, nil)); err == nil {
var sr SandboxResource
if err := json.NewDecoder(bytes.NewReader(obj)).Decode(&sr); err == nil {
bk = sr.ContainerID
sb = sr.ID
} else {
// Only print a message, don't make the caller cli fail for this
fmt.Fprintf(cli.out, "Failed to retrieve backend list for service %s (%v)\n", servID, err)
}
}
return bk, sb, err
}
// CmdServiceInfo handles service info UI
func (cli *NetworkCli) CmdServiceInfo(chain string, args ...string) error {
cmd := cli.Subcmd(chain, "info", "SERVICE[.NETWORK]", "Displays detailed information about a service", false)
cmd.Require(flag.Min, 1)
err := cmd.ParseFlags(args, true)
if err != nil {
return err
}
sn, nn := parseServiceName(cmd.Arg(0))
serviceID, err := lookupServiceID(cli, nn, sn)
if err != nil {
return err
}
obj, _, err := readBody(cli.call("GET", "/services/"+serviceID, nil, nil))
if err != nil {
return err
}
sr := &serviceResource{}
if err := json.NewDecoder(bytes.NewReader(obj)).Decode(sr); err != nil {
return err
}
fmt.Fprintf(cli.out, "Service Id: %s\n", sr.ID)
fmt.Fprintf(cli.out, "\tName: %s\n", sr.Name)
fmt.Fprintf(cli.out, "\tNetwork: %s\n", sr.Network)
return nil
}
// CmdServiceAttach handles service attach UI
func (cli *NetworkCli) CmdServiceAttach(chain string, args ...string) error {
cmd := cli.Subcmd(chain, "attach", "CONTAINER SERVICE[.NETWORK]", "Sets a container as a service backend", false)
flAlias := flag.NewListOpts(netutils.ValidateAlias)
cmd.Var(&flAlias, []string{"-alias"}, "Add alias for another container")
cmd.Require(flag.Min, 2)
err := cmd.ParseFlags(args, true)
if err != nil {
return err
}
containerID, err := lookupContainerID(cli, cmd.Arg(0))
if err != nil {
return err
}
sandboxID, err := lookupSandboxID(cli, containerID)
if err != nil {
return err
}
sn, nn := parseServiceName(cmd.Arg(1))
serviceID, err := lookupServiceID(cli, nn, sn)
if err != nil {
return err
}
nc := serviceAttach{SandboxID: sandboxID, Aliases: flAlias.GetAll()}
_, _, err = readBody(cli.call("POST", "/services/"+serviceID+"/backend", nc, nil))
return err
}
// CmdServiceDetach handles service detach UI
func (cli *NetworkCli) CmdServiceDetach(chain string, args ...string) error {
cmd := cli.Subcmd(chain, "detach", "CONTAINER SERVICE", "Removes a container from service backend", false)
cmd.Require(flag.Min, 2)
err := cmd.ParseFlags(args, true)
if err != nil {
return err
}
sn, nn := parseServiceName(cmd.Arg(1))
containerID, err := lookupContainerID(cli, cmd.Arg(0))
if err != nil {
return err
}
sandboxID, err := lookupSandboxID(cli, containerID)
if err != nil {
return err
}
serviceID, err := lookupServiceID(cli, nn, sn)
if err != nil {
return err
}
_, _, err = readBody(cli.call("DELETE", "/services/"+serviceID+"/backend/"+sandboxID, nil, nil))
if err != nil {
return err
}
return nil
}
func serviceUsage(chain string) string {
help := "Commands:\n"
for _, cmd := range serviceCommands {
help += fmt.Sprintf(" %-10.10s%s\n", cmd.name, cmd.description)
}
help += fmt.Sprintf("\nRun '%s service COMMAND --help' for more information on a command.", chain)
return help
}

View file

@ -0,0 +1,96 @@
package client
import "github.com/docker/libnetwork/types"
/***********
Resources
************/
// networkResource is the body of the "get network" http response message
type networkResource struct {
Name string `json:"name"`
ID string `json:"id"`
Type string `json:"type"`
Services []*serviceResource `json:"services"`
}
// serviceResource is the body of the "get service" http response message
type serviceResource struct {
Name string `json:"name"`
ID string `json:"id"`
Network string `json:"network"`
}
// SandboxResource is the body of "get service backend" response message
type SandboxResource struct {
ID string `json:"id"`
Key string `json:"key"`
ContainerID string `json:"container_id"`
}
/***********
Body types
************/
type ipamConf struct {
PreferredPool string
SubPool string
Gateway string
AuxAddresses map[string]string
}
// networkCreate is the expected body of the "create network" http request message
type networkCreate struct {
Name string `json:"name"`
ID string `json:"id"`
NetworkType string `json:"network_type"`
IPv4Conf []ipamConf `json:"ipv4_configuration"`
DriverOpts map[string]string `json:"driver_opts"`
NetworkOpts map[string]string `json:"network_opts"`
}
// serviceCreate represents the body of the "publish service" http request message
type serviceCreate struct {
Name string `json:"name"`
MyAliases []string `json:"my_aliases"`
Network string `json:"network_name"`
}
// serviceDelete represents the body of the "unpublish service" http request message
type serviceDelete struct {
Name string `json:"name"`
Force bool `json:"force"`
}
// serviceAttach represents the expected body of the "attach/detach sandbox to/from service" http request messages
type serviceAttach struct {
SandboxID string `json:"sandbox_id"`
Aliases []string `json:"aliases"`
}
// SandboxCreate is the body of the "post /sandboxes" http request message
type SandboxCreate struct {
ContainerID string `json:"container_id"`
HostName string `json:"host_name"`
DomainName string `json:"domain_name"`
HostsPath string `json:"hosts_path"`
ResolvConfPath string `json:"resolv_conf_path"`
DNS []string `json:"dns"`
ExtraHosts []extraHost `json:"extra_hosts"`
UseDefaultSandbox bool `json:"use_default_sandbox"`
ExposedPorts []types.TransportPort `json:"exposed_ports"`
PortMapping []types.PortBinding `json:"port_mapping"`
}
// extraHost represents the extra host object
type extraHost struct {
Name string `json:"name"`
Address string `json:"address"`
}
// sandboxParentUpdate is the object carrying the information about the
// sandbox parent that needs to be updated.
type sandboxParentUpdate struct {
ContainerID string `json:"container_id"`
Name string `json:"name"`
Address string `json:"address"`
}

View file

@ -0,0 +1,37 @@
package cluster
import (
"context"
"github.com/docker/docker/api/types/network"
)
const (
// EventSocketChange control socket changed
EventSocketChange = iota
// EventNodeReady cluster node in ready state
EventNodeReady
// EventNodeLeave node is leaving the cluster
EventNodeLeave
// EventNetworkKeysAvailable network keys correctly configured in the networking layer
EventNetworkKeysAvailable
)
// ConfigEventType type of the event produced by the cluster
type ConfigEventType uint8
// Provider provides clustering config details
type Provider interface {
IsManager() bool
IsAgent() bool
GetLocalAddress() string
GetListenAddress() string
GetAdvertiseAddress() string
GetDataPathAddress() string
GetRemoteAddressList() []string
ListenClusterEvents() <-chan ConfigEventType
AttachNetwork(string, string, []string) (*network.NetworkingConfig, error)
DetachNetwork(string, string) error
UpdateAttachment(string, string, *network.NetworkingConfig) error
WaitForDetachment(context.Context, string, string, string, string) error
}

View file

@ -0,0 +1,4 @@
FROM alpine
RUN apk add --no-cache curl
COPY diagnosticClient /usr/local/bin/diagnosticClient
ENTRYPOINT ["/usr/local/bin/diagnosticClient"]

View file

@ -0,0 +1,5 @@
FROM docker:17.12-dind
RUN apk add --no-cache curl
ENV DIND_CLIENT=true
COPY daemon.json /etc/docker/daemon.json
COPY diagnosticClient /usr/local/bin/diagnosticClient

View file

@ -0,0 +1,262 @@
---
description: Learn to use the built-in network debugger to debug overlay networking problems
keywords: network, troubleshooting, debug
title: Debug overlay or swarm networking issues
---
**WARNING**
This tool can change the internal state of the libnetwork API, be really mindful
on its use and read carefully the following guide. Improper use of it will damage
or permanently destroy the network configuration.
Docker CE 17.12 and higher introduce a network debugging tool designed to help
debug issues with overlay networks and swarm services running on Linux hosts.
When enabled, a network diagnostic server listens on the specified port and
provides diagnostic information. The network debugging tool should only be
started to debug specific issues, and should not be left running all the time.
Information about networks is stored in the database, which can be examined using
the API. Currently the database contains information about the overlay network
as well as the service discovery data.
The Docker API exposes endpoints to query and control the network debugging
tool. CLI integration is provided as a preview, but the implementation is not
yet considered stable and commands and options may change without notice.
The tool is available into 2 forms:
1) client only: dockereng/network-diagnostic:onlyclient
2) docker in docker version: dockereng/network-diagnostic:17.12-dind
The latter allows to use the tool with a cluster running an engine older than 17.12
## Enable the diagnostic server
The tool currently only works on Docker hosts running on Linux. To enable it on a node
follow the step below.
1. Set the `network-diagnostic-port` to a port which is free on the Docker
host, in the `/etc/docker/daemon.json` configuration file.
```json
“network-diagnostic-port”: <port>
```
2. Get the process ID (PID) of the `dockerd` process. It is the second field in
the output, and is typically a number from 2 to 6 digits long.
```bash
$ ps aux |grep dockerd | grep -v grep
```
3. Reload the Docker configuration without restarting Docker, by sending the
`HUP` signal to the PID you found in the previous step.
```bash
kill -HUP <pid-of-dockerd>
```
If systemd is used the command `systemctl reload docker` will be enough
A message like the following will appear in the Docker host logs:
```none
Starting the diagnostic server listening on <port> for commands
```
## Disable the diagnostic tool
Repeat these steps for each node participating in the swarm.
1. Remove the `network-diagnostic-port` key from the `/etc/docker/daemon.json`
configuration file.
2. Get the process ID (PID) of the `dockerd` process. It is the second field in
the output, and is typically a number from 2 to 6 digits long.
```bash
$ ps aux |grep dockerd | grep -v grep
```
3. Reload the Docker configuration without restarting Docker, by sending the
`HUP` signal to the PID you found in the previous step.
```bash
kill -HUP <pid-of-dockerd>
```
A message like the following will appear in the Docker host logs:
```none
Disabling the diagnostic server
```
## Access the diagnostic tool's API
The network diagnostic tool exposes its own RESTful API. To access the API,
send a HTTP request to the port where the tool is listening. The following
commands assume the tool is listening on port 2000.
Examples are not given for every endpoint.
### Get help
```bash
$ curl localhost:2000/help
OK
/updateentry
/getentry
/gettable
/leavenetwork
/createentry
/help
/clusterpeers
/ready
/joinnetwork
/deleteentry
/networkpeers
/
/join
```
### Join or leave the network database cluster
```bash
$ curl localhost:2000/join?members=ip1,ip2,...
```
```bash
$ curl localhost:2000/leave?members=ip1,ip2,...
```
`ip1`, `ip2`, ... are the swarm node ips (usually one is enough)
### Join or leave a network
```bash
$ curl localhost:2000/joinnetwork?nid=<network id>
```
```bash
$ curl localhost:2000/leavenetwork?nid=<network id>
```
`network id` can be retrieved on the manager with `docker network ls --no-trunc` and has
to be the full length identifier
### List cluster peers
```bash
$ curl localhost:2000/clusterpeers
```
### List nodes connected to a given network
```bash
$ curl localhost:2000/networkpeers?nid=<network id>
```
`network id` can be retrieved on the manager with `docker network ls --no-trunc` and has
to be the full length identifier
### Dump database tables
The tables are called `endpoint_table` and `overlay_peer_table`.
The `overlay_peer_table` contains all the overlay forwarding information
The `endpoint_table` contains all the service discovery information
```bash
$ curl localhost:2000/gettable?nid=<network id>&tname=<table name>
```
### Interact with a specific database table
The tables are called `endpoint_table` and `overlay_peer_table`.
```bash
$ curl localhost:2000/<method>?nid=<network id>&tname=<table name>&key=<key>[&value=<value>]
```
Note:
operations on tables have node ownership, this means that are going to remain persistent till
the node that inserted them is part of the cluster
## Access the diagnostic tool's CLI
The CLI is provided as a preview and is not yet stable. Commands or options may
change at any time.
The CLI executable is called `diagnosticClient` and is made available using a
standalone container.
`docker run --net host dockereng/network-diagnostic:onlyclient -v -net <full network id> -t sd`
The following flags are supported:
| Flag | Description |
|---------------|-------------------------------------------------|
| -t <string> | Table one of `sd` or `overlay`. |
| -ip <string> | The IP address to query. Defaults to 127.0.0.1. |
| -net <string> | The target network ID. |
| -port <int> | The target port. (default port is 2000) |
| -a | Join/leave network |
| -v | Enable verbose output. |
*NOTE*
By default the tool won't try to join the network. This is following the intent to not change
the state on which the node is when the diagnostic client is run. This means that it is safe
to run the diagnosticClient against a running daemon because it will just dump the current state.
When using instead the diagnosticClient in the containerized version the flag `-a` MUST be passed
to avoid retrieving empty results. On the other side using the `-a` flag against a loaded daemon
will have the undesirable side effect to leave the network and so cutting down the data path for
that daemon.
### Container version of the diagnostic tool
The CLI is provided as a container with a 17.12 engine that needs to run using privileged mode.
*NOTE*
Remember that table operations have ownership, so any `create entry` will be persistent till
the diagnostic container is part of the swarm.
1. Make sure that the node where the diagnostic client will run is not part of the swarm, if so do `docker swarm leave -f`
2. To run the container, use a command like the following:
```bash
$ docker container run --name net-diagnostic -d --privileged --network host dockereng/network-diagnostic:17.12-dind
```
3. Connect to the container using `docker exec -it <container-ID> sh`,
and start the server using the following command:
```bash
$ kill -HUP 1
```
4. Join the diagnostic container to the swarm, then run the diagnostic CLI within the container.
```bash
$ ./diagnosticClient <flags>...
```
4. When finished debugging, leave the swarm and stop the container.
### Examples
The following commands dump the service discovery table and verify node
ownership.
*NOTE*
Remember to use the full network ID, you can easily find that with `docker network ls --no-trunc`
**Service discovery and load balancer:**
```bash
$ diagnostiClient -t sd -v -net n8a8ie6tb3wr2e260vxj8ncy4 -a
```
**Overlay network:**
```bash
$ diagnostiClient -port 2001 -t overlay -v -net n8a8ie6tb3wr2e260vxj8ncy4 -a
```

View file

@ -0,0 +1,4 @@
{
"debug": true,
"network-diagnostic-port": 2000
}

View file

@ -0,0 +1,209 @@
package main
import (
"bufio"
"encoding/base64"
"encoding/json"
"flag"
"fmt"
"io"
"io/ioutil"
"net/http"
"os"
"strings"
"github.com/docker/libnetwork"
"github.com/docker/libnetwork/diagnostic"
"github.com/docker/libnetwork/drivers/overlay"
"github.com/sirupsen/logrus"
)
const (
readyPath = "http://%s:%d/ready"
joinNetwork = "http://%s:%d/joinnetwork?nid=%s"
leaveNetwork = "http://%s:%d/leavenetwork?nid=%s"
clusterPeers = "http://%s:%d/clusterpeers?json"
networkPeers = "http://%s:%d/networkpeers?nid=%s&json"
dumpTable = "http://%s:%d/gettable?nid=%s&tname=%s&json"
deleteEntry = "http://%s:%d/deleteentry?nid=%s&tname=%s&key=%s&json"
)
func httpIsOk(body io.ReadCloser) {
b, err := ioutil.ReadAll(body)
if err != nil {
logrus.Fatalf("Failed the body parse %s", err)
}
if !strings.Contains(string(b), "OK") {
logrus.Fatalf("Server not ready %s", b)
}
body.Close()
}
func main() {
ipPtr := flag.String("ip", "127.0.0.1", "ip address")
portPtr := flag.Int("port", 2000, "port")
networkPtr := flag.String("net", "", "target network")
tablePtr := flag.String("t", "", "table to process <sd/overlay>")
remediatePtr := flag.Bool("r", false, "perform remediation deleting orphan entries")
joinPtr := flag.Bool("a", false, "join/leave network")
verbosePtr := flag.Bool("v", false, "verbose output")
flag.Parse()
if *verbosePtr {
logrus.SetLevel(logrus.DebugLevel)
}
if _, ok := os.LookupEnv("DIND_CLIENT"); !ok && *joinPtr {
logrus.Fatal("you are not using the client in docker in docker mode, the use of the -a flag can be disruptive, " +
"please remove it (doc:https://github.com/docker/libnetwork/blob/master/cmd/diagnostic/README.md)")
}
logrus.Infof("Connecting to %s:%d checking ready", *ipPtr, *portPtr)
resp, err := http.Get(fmt.Sprintf(readyPath, *ipPtr, *portPtr))
if err != nil {
logrus.WithError(err).Fatalf("The connection failed")
}
httpIsOk(resp.Body)
clusterPeers := fetchNodePeers(*ipPtr, *portPtr, "")
var networkPeers map[string]string
var joinedNetwork bool
if *networkPtr != "" {
if *joinPtr {
logrus.Infof("Joining the network:%q", *networkPtr)
resp, err = http.Get(fmt.Sprintf(joinNetwork, *ipPtr, *portPtr, *networkPtr))
if err != nil {
logrus.WithError(err).Fatalf("Failed joining the network")
}
httpIsOk(resp.Body)
joinedNetwork = true
}
networkPeers = fetchNodePeers(*ipPtr, *portPtr, *networkPtr)
if len(networkPeers) == 0 {
logrus.Warnf("There is no peer on network %q, check the network ID, and verify that is the non truncated version", *networkPtr)
}
}
switch *tablePtr {
case "sd":
fetchTable(*ipPtr, *portPtr, *networkPtr, "endpoint_table", clusterPeers, networkPeers, *remediatePtr)
case "overlay":
fetchTable(*ipPtr, *portPtr, *networkPtr, "overlay_peer_table", clusterPeers, networkPeers, *remediatePtr)
}
if joinedNetwork {
logrus.Infof("Leaving the network:%q", *networkPtr)
resp, err = http.Get(fmt.Sprintf(leaveNetwork, *ipPtr, *portPtr, *networkPtr))
if err != nil {
logrus.WithError(err).Fatalf("Failed leaving the network")
}
httpIsOk(resp.Body)
}
}
func fetchNodePeers(ip string, port int, network string) map[string]string {
if network == "" {
logrus.Infof("Fetch cluster peers")
} else {
logrus.Infof("Fetch peers network:%q", network)
}
var path string
if network != "" {
path = fmt.Sprintf(networkPeers, ip, port, network)
} else {
path = fmt.Sprintf(clusterPeers, ip, port)
}
resp, err := http.Get(path)
if err != nil {
logrus.WithError(err).Fatalf("Failed fetching path")
}
defer resp.Body.Close()
body, err := ioutil.ReadAll(resp.Body)
if err != nil {
logrus.WithError(err).Fatalf("Failed the body parse")
}
output := diagnostic.HTTPResult{Details: &diagnostic.TablePeersResult{}}
err = json.Unmarshal(body, &output)
if err != nil {
logrus.WithError(err).Fatalf("Failed the json unmarshalling")
}
logrus.Debugf("Parsing JSON response")
result := make(map[string]string, output.Details.(*diagnostic.TablePeersResult).Length)
for _, v := range output.Details.(*diagnostic.TablePeersResult).Elements {
logrus.Debugf("name:%s ip:%s", v.Name, v.IP)
result[v.Name] = v.IP
}
return result
}
func fetchTable(ip string, port int, network, tableName string, clusterPeers, networkPeers map[string]string, remediate bool) {
logrus.Infof("Fetch %s table and check owners", tableName)
resp, err := http.Get(fmt.Sprintf(dumpTable, ip, port, network, tableName))
if err != nil {
logrus.WithError(err).Fatalf("Failed fetching endpoint table")
}
defer resp.Body.Close()
body, err := ioutil.ReadAll(resp.Body)
if err != nil {
logrus.WithError(err).Fatalf("Failed the body parse")
}
output := diagnostic.HTTPResult{Details: &diagnostic.TableEndpointsResult{}}
err = json.Unmarshal(body, &output)
if err != nil {
logrus.WithError(err).Fatalf("Failed the json unmarshalling")
}
logrus.Debug("Parsing data structures")
var orphanKeys []string
for _, v := range output.Details.(*diagnostic.TableEndpointsResult).Elements {
decoded, err := base64.StdEncoding.DecodeString(v.Value)
if err != nil {
logrus.WithError(err).Errorf("Failed decoding entry")
continue
}
switch tableName {
case "endpoint_table":
var elem libnetwork.EndpointRecord
elem.Unmarshal(decoded)
logrus.Debugf("key:%s value:%+v owner:%s", v.Key, elem, v.Owner)
case "overlay_peer_table":
var elem overlay.PeerRecord
elem.Unmarshal(decoded)
logrus.Debugf("key:%s value:%+v owner:%s", v.Key, elem, v.Owner)
}
if _, ok := networkPeers[v.Owner]; !ok {
logrus.Warnf("The element with key:%s does not belong to any node on this network", v.Key)
orphanKeys = append(orphanKeys, v.Key)
}
if _, ok := clusterPeers[v.Owner]; !ok {
logrus.Warnf("The element with key:%s does not belong to any node on this cluster", v.Key)
}
}
if len(orphanKeys) > 0 && remediate {
logrus.Warnf("The following keys:%v results as orphan, do you want to proceed with the deletion (this operation is irreversible)? [Yes/No]", orphanKeys)
reader := bufio.NewReader(os.Stdin)
text, _ := reader.ReadString('\n')
text = strings.Replace(text, "\n", "", -1)
if strings.Compare(text, "Yes") == 0 {
for _, k := range orphanKeys {
resp, err := http.Get(fmt.Sprintf(deleteEntry, ip, port, network, tableName, k))
if err != nil {
logrus.WithError(err).Errorf("Failed deleting entry k:%s", k)
break
}
resp.Body.Close()
}
} else {
logrus.Infof("Deletion skipped")
}
}
}

146
libnetwork/cmd/dnet/cmd.go Normal file
View file

@ -0,0 +1,146 @@
package main
import (
"encoding/json"
"fmt"
"io"
"io/ioutil"
"net/http"
"os"
"github.com/docker/libnetwork/client"
"github.com/moby/term"
"github.com/urfave/cli"
)
var (
containerCreateCommand = cli.Command{
Name: "create",
Usage: "Create a container",
Action: runContainerCreate,
}
containerRmCommand = cli.Command{
Name: "rm",
Usage: "Remove a container",
Action: runContainerRm,
}
containerCommands = []cli.Command{
containerCreateCommand,
containerRmCommand,
}
dnetCommands = []cli.Command{
createDockerCommand("network"),
createDockerCommand("service"),
{
Name: "container",
Usage: "Container management commands",
Subcommands: containerCommands,
},
}
)
func runContainerCreate(c *cli.Context) {
if len(c.Args()) == 0 {
fmt.Println("Please provide container id argument")
os.Exit(1)
}
sc := client.SandboxCreate{ContainerID: c.Args()[0]}
obj, _, err := readBody(epConn.httpCall("POST", "/sandboxes", sc, nil))
if err != nil {
fmt.Printf("POST failed during create container: %v\n", err)
os.Exit(1)
}
var replyID string
err = json.Unmarshal(obj, &replyID)
if err != nil {
fmt.Printf("Unmarshall of response failed during create container: %v\n", err)
os.Exit(1)
}
fmt.Printf("%s\n", replyID)
}
func runContainerRm(c *cli.Context) {
var sbList []*client.SandboxResource
if len(c.Args()) == 0 {
fmt.Println("Please provide container id argument")
os.Exit(1)
}
obj, _, err := readBody(epConn.httpCall("GET", "/sandboxes?partial-container-id="+c.Args()[0], nil, nil))
if err != nil {
fmt.Printf("GET failed during container id lookup: %v\n", err)
os.Exit(1)
}
err = json.Unmarshal(obj, &sbList)
if err != nil {
fmt.Printf("Unmarshall of container id lookup response failed: %v", err)
os.Exit(1)
}
if len(sbList) == 0 {
fmt.Printf("No sandbox for container %s found\n", c.Args()[0])
os.Exit(1)
}
_, _, err = readBody(epConn.httpCall("DELETE", "/sandboxes/"+sbList[0].ID, nil, nil))
if err != nil {
fmt.Printf("DELETE of sandbox id %s failed: %v", sbList[0].ID, err)
os.Exit(1)
}
}
func runDockerCommand(c *cli.Context, cmd string) {
_, stdout, stderr := term.StdStreams()
oldcli := client.NewNetworkCli(stdout, stderr, epConn.httpCall)
var args []string
args = append(args, cmd)
if c.Bool("h") {
args = append(args, "--help")
} else {
args = append(args, c.Args()...)
}
if err := oldcli.Cmd("dnet", args...); err != nil {
fmt.Println(err)
os.Exit(1)
}
}
func createDockerCommand(cmd string) cli.Command {
return cli.Command{
Name: cmd,
Usage: fmt.Sprintf("%s management commands", cmd),
SkipFlagParsing: true,
Action: func(c *cli.Context) {
runDockerCommand(c, cmd)
},
Subcommands: []cli.Command{
{
Name: "h, -help",
Usage: fmt.Sprintf("%s help", cmd),
},
},
}
}
func readBody(stream io.ReadCloser, hdr http.Header, statusCode int, err error) ([]byte, int, error) {
if stream != nil {
defer stream.Close()
}
if err != nil {
return nil, statusCode, err
}
body, err := ioutil.ReadAll(stream)
if err != nil {
return nil, -1, err
}
return body, statusCode, nil
}

582
libnetwork/cmd/dnet/dnet.go Normal file
View file

@ -0,0 +1,582 @@
package main
import (
"bytes"
"context"
"encoding/json"
"errors"
"fmt"
"io"
"io/ioutil"
"net"
"net/http"
"net/http/httptest"
"net/url"
"os"
"os/signal"
"strconv"
"strings"
"syscall"
"time"
"github.com/BurntSushi/toml"
"github.com/docker/docker/api/types/network"
"github.com/docker/docker/pkg/discovery"
"github.com/docker/docker/pkg/reexec"
"github.com/docker/libnetwork"
"github.com/docker/libnetwork/api"
"github.com/docker/libnetwork/cluster"
"github.com/docker/libnetwork/config"
"github.com/docker/libnetwork/datastore"
"github.com/docker/libnetwork/driverapi"
"github.com/docker/libnetwork/netlabel"
"github.com/docker/libnetwork/netutils"
"github.com/docker/libnetwork/options"
"github.com/docker/libnetwork/types"
"github.com/gorilla/mux"
"github.com/moby/term"
"github.com/sirupsen/logrus"
"github.com/urfave/cli"
)
const (
// DefaultHTTPHost is used if only port is provided to -H flag e.g. docker -d -H tcp://:8080
DefaultHTTPHost = "0.0.0.0"
// DefaultHTTPPort is the default http port used by dnet
DefaultHTTPPort = 2385
// DefaultUnixSocket exported
DefaultUnixSocket = "/var/run/dnet.sock"
cfgFileEnv = "LIBNETWORK_CFG"
defaultCfgFile = "/etc/default/libnetwork.toml"
defaultHeartbeat = time.Duration(10) * time.Second
ttlFactor = 2
)
var epConn *dnetConnection
func main() {
if reexec.Init() {
return
}
_, stdout, stderr := term.StdStreams()
logrus.SetOutput(stderr)
err := dnetApp(stdout, stderr)
if err != nil {
os.Exit(1)
}
}
// ParseConfig parses the libnetwork configuration file
func (d *dnetConnection) parseOrchestrationConfig(tomlCfgFile string) error {
dummy := &dnetConnection{}
if _, err := toml.DecodeFile(tomlCfgFile, dummy); err != nil {
return err
}
if dummy.Orchestration != nil {
d.Orchestration = dummy.Orchestration
}
return nil
}
func (d *dnetConnection) parseConfig(cfgFile string) (*config.Config, error) {
if strings.Trim(cfgFile, " ") == "" {
cfgFile = os.Getenv(cfgFileEnv)
if strings.Trim(cfgFile, " ") == "" {
cfgFile = defaultCfgFile
}
}
if err := d.parseOrchestrationConfig(cfgFile); err != nil {
return nil, err
}
return config.ParseConfig(cfgFile)
}
func processConfig(cfg *config.Config) []config.Option {
options := []config.Option{}
if cfg == nil {
return options
}
dn := "bridge"
if strings.TrimSpace(cfg.Daemon.DefaultNetwork) != "" {
dn = cfg.Daemon.DefaultNetwork
}
options = append(options, config.OptionDefaultNetwork(dn))
dd := "bridge"
if strings.TrimSpace(cfg.Daemon.DefaultDriver) != "" {
dd = cfg.Daemon.DefaultDriver
}
options = append(options, config.OptionDefaultDriver(dd))
if cfg.Daemon.Labels != nil {
options = append(options, config.OptionLabels(cfg.Daemon.Labels))
}
if dcfg, ok := cfg.Scopes[datastore.GlobalScope]; ok && dcfg.IsValid() {
options = append(options, config.OptionKVProvider(dcfg.Client.Provider))
options = append(options, config.OptionKVProviderURL(dcfg.Client.Address))
}
dOptions, err := startDiscovery(&cfg.Cluster)
if err != nil {
logrus.Infof("Skipping discovery : %s", err.Error())
} else {
options = append(options, dOptions...)
}
return options
}
func startDiscovery(cfg *config.ClusterCfg) ([]config.Option, error) {
if cfg == nil {
return nil, errors.New("discovery requires a valid configuration")
}
hb := time.Duration(cfg.Heartbeat) * time.Second
if hb == 0 {
hb = defaultHeartbeat
}
logrus.Infof("discovery : %s %s", cfg.Discovery, hb.String())
d, err := discovery.New(cfg.Discovery, hb, ttlFactor*hb, map[string]string{})
if err != nil {
return nil, err
}
if cfg.Address == "" {
iface, err := net.InterfaceByName("eth0")
if err != nil {
return nil, err
}
addrs, err := iface.Addrs()
if err != nil || len(addrs) == 0 {
return nil, err
}
ip, _, _ := net.ParseCIDR(addrs[0].String())
cfg.Address = ip.String()
}
if ip := net.ParseIP(cfg.Address); ip == nil {
return nil, errors.New("address config should be either ipv4 or ipv6 address")
}
if err := d.Register(cfg.Address + ":0"); err != nil {
return nil, err
}
options := []config.Option{config.OptionDiscoveryWatcher(d), config.OptionDiscoveryAddress(cfg.Address)}
go func() {
for {
select {
case <-time.After(hb):
if err := d.Register(cfg.Address + ":0"); err != nil {
logrus.Warn(err)
}
}
}
}()
return options, nil
}
func dnetApp(stdout, stderr io.Writer) error {
app := cli.NewApp()
app.Name = "dnet"
app.Usage = "A self-sufficient runtime for container networking."
app.Flags = dnetFlags
app.Before = processFlags
app.Commands = dnetCommands
app.Run(os.Args)
return nil
}
func createDefaultNetwork(c libnetwork.NetworkController) {
nw := c.Config().Daemon.DefaultNetwork
d := c.Config().Daemon.DefaultDriver
createOptions := []libnetwork.NetworkOption{}
genericOption := options.Generic{}
if nw != "" && d != "" {
// Bridge driver is special due to legacy reasons
if d == "bridge" {
genericOption[netlabel.GenericData] = map[string]string{
"BridgeName": "docker0",
"DefaultBridge": "true",
}
createOptions = append(createOptions,
libnetwork.NetworkOptionGeneric(genericOption),
ipamOption(nw))
}
if n, err := c.NetworkByName(nw); err == nil {
logrus.Debugf("Default network %s already present. Deleting it", nw)
if err = n.Delete(); err != nil {
logrus.Debugf("Network could not be deleted: %v", err)
return
}
}
_, err := c.NewNetwork(d, nw, "", createOptions...)
if err != nil {
logrus.Errorf("Error creating default network : %s : %v", nw, err)
}
}
}
type dnetConnection struct {
// proto holds the client protocol i.e. unix.
proto string
// addr holds the client address.
addr string
Orchestration *NetworkOrchestration
configEvent chan cluster.ConfigEventType
}
// NetworkOrchestration exported
type NetworkOrchestration struct {
Agent bool
Manager bool
Bind string
Peer string
}
func (d *dnetConnection) dnetDaemon(cfgFile string) error {
if err := startTestDriver(); err != nil {
return fmt.Errorf("failed to start test driver: %v", err)
}
cfg, err := d.parseConfig(cfgFile)
var cOptions []config.Option
if err == nil {
cOptions = processConfig(cfg)
} else {
logrus.Errorf("Error parsing config %v", err)
}
bridgeConfig := options.Generic{
"EnableIPForwarding": true,
"EnableIPTables": true,
}
bridgeOption := options.Generic{netlabel.GenericData: bridgeConfig}
cOptions = append(cOptions, config.OptionDriverConfig("bridge", bridgeOption))
controller, err := libnetwork.New(cOptions...)
if err != nil {
fmt.Println("Error starting dnetDaemon :", err)
return err
}
controller.SetClusterProvider(d)
if d.Orchestration.Agent || d.Orchestration.Manager {
d.configEvent <- cluster.EventNodeReady
}
createDefaultNetwork(controller)
httpHandler := api.NewHTTPHandler(controller)
r := mux.NewRouter().StrictSlash(false)
post := r.PathPrefix("/{.*}/networks").Subrouter()
post.Methods("GET", "PUT", "POST", "DELETE").HandlerFunc(httpHandler)
post = r.PathPrefix("/networks").Subrouter()
post.Methods("GET", "PUT", "POST", "DELETE").HandlerFunc(httpHandler)
post = r.PathPrefix("/{.*}/services").Subrouter()
post.Methods("GET", "PUT", "POST", "DELETE").HandlerFunc(httpHandler)
post = r.PathPrefix("/services").Subrouter()
post.Methods("GET", "PUT", "POST", "DELETE").HandlerFunc(httpHandler)
post = r.PathPrefix("/{.*}/sandboxes").Subrouter()
post.Methods("GET", "PUT", "POST", "DELETE").HandlerFunc(httpHandler)
post = r.PathPrefix("/sandboxes").Subrouter()
post.Methods("GET", "PUT", "POST", "DELETE").HandlerFunc(httpHandler)
handleSignals(controller)
setupDumpStackTrap()
return http.ListenAndServe(d.addr, r)
}
func (d *dnetConnection) IsManager() bool {
return d.Orchestration.Manager
}
func (d *dnetConnection) IsAgent() bool {
return d.Orchestration.Agent
}
func (d *dnetConnection) GetAdvertiseAddress() string {
return d.Orchestration.Bind
}
func (d *dnetConnection) GetDataPathAddress() string {
return d.Orchestration.Bind
}
func (d *dnetConnection) GetLocalAddress() string {
return d.Orchestration.Bind
}
func (d *dnetConnection) GetListenAddress() string {
return d.Orchestration.Bind
}
func (d *dnetConnection) GetRemoteAddressList() []string {
return []string{d.Orchestration.Peer}
}
func (d *dnetConnection) ListenClusterEvents() <-chan cluster.ConfigEventType {
return d.configEvent
}
func (d *dnetConnection) AttachNetwork(string, string, []string) (*network.NetworkingConfig, error) {
return nil, nil
}
func (d *dnetConnection) DetachNetwork(string, string) error {
return nil
}
func (d *dnetConnection) UpdateAttachment(string, string, *network.NetworkingConfig) error {
return nil
}
func (d *dnetConnection) WaitForDetachment(context.Context, string, string, string, string) error {
return nil
}
func handleSignals(controller libnetwork.NetworkController) {
c := make(chan os.Signal, 1)
signals := []os.Signal{os.Interrupt, syscall.SIGTERM, syscall.SIGQUIT}
signal.Notify(c, signals...)
go func() {
for range c {
controller.Stop()
os.Exit(0)
}
}()
}
func startTestDriver() error {
mux := http.NewServeMux()
server := httptest.NewServer(mux)
if server == nil {
return errors.New("Failed to start an HTTP Server")
}
mux.HandleFunc("/Plugin.Activate", func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", "application/vnd.docker.plugins.v1+json")
fmt.Fprintf(w, `{"Implements": ["%s"]}`, driverapi.NetworkPluginEndpointType)
})
mux.HandleFunc(fmt.Sprintf("/%s.GetCapabilities", driverapi.NetworkPluginEndpointType), func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", "application/vnd.docker.plugins.v1+json")
fmt.Fprint(w, `{"Scope":"global"}`)
})
mux.HandleFunc(fmt.Sprintf("/%s.CreateNetwork", driverapi.NetworkPluginEndpointType), func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", "application/vnd.docker.plugins.v1+json")
fmt.Fprint(w, "null")
})
mux.HandleFunc(fmt.Sprintf("/%s.DeleteNetwork", driverapi.NetworkPluginEndpointType), func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", "application/vnd.docker.plugins.v1+json")
fmt.Fprint(w, "null")
})
mux.HandleFunc(fmt.Sprintf("/%s.CreateEndpoint", driverapi.NetworkPluginEndpointType), func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", "application/vnd.docker.plugins.v1+json")
fmt.Fprint(w, "null")
})
mux.HandleFunc(fmt.Sprintf("/%s.DeleteEndpoint", driverapi.NetworkPluginEndpointType), func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", "application/vnd.docker.plugins.v1+json")
fmt.Fprint(w, "null")
})
mux.HandleFunc(fmt.Sprintf("/%s.Join", driverapi.NetworkPluginEndpointType), func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", "application/vnd.docker.plugins.v1+json")
fmt.Fprint(w, "null")
})
mux.HandleFunc(fmt.Sprintf("/%s.Leave", driverapi.NetworkPluginEndpointType), func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", "application/vnd.docker.plugins.v1+json")
fmt.Fprint(w, "null")
})
if err := os.MkdirAll("/etc/docker/plugins", 0755); err != nil {
return err
}
return ioutil.WriteFile("/etc/docker/plugins/test.spec", []byte(server.URL), 0644)
}
func newDnetConnection(val string) (*dnetConnection, error) {
url, err := parseHost(val)
if err != nil {
return nil, err
}
protoAddrParts := strings.SplitN(url, "://", 2)
if len(protoAddrParts) != 2 {
return nil, errors.New("bad format, expected tcp://ADDR")
}
if strings.ToLower(protoAddrParts[0]) != "tcp" {
return nil, errors.New("dnet currently only supports tcp transport")
}
return &dnetConnection{protoAddrParts[0], protoAddrParts[1], &NetworkOrchestration{}, make(chan cluster.ConfigEventType, 10)}, nil
}
const (
defaultUnixSocket = "unix:///var/run/docker.sock"
defaultTCPHost = "tcp://localhost:2375"
)
// parseHost and set defaults for a Daemon host string.
func parseHost(val string) (string, error) {
host := strings.TrimSpace(val)
if host == "" {
return defaultUnixSocket, nil
}
addrParts := strings.SplitN(host, "://", 2)
if len(addrParts) == 1 && addrParts[0] != "" {
addrParts = []string{"tcp", addrParts[0]}
}
if addrParts[0] != "tcp" {
return "", errors.New("dnet currently only supports tcp transport")
}
return parseTCPAddr(addrParts[1], defaultTCPHost)
}
// parseTCPAddr parses and validates that the specified address is a valid TCP
// address. It returns a formatted TCP address, either using the address parsed
// from tryAddr, or the contents of defaultAddr if tryAddr is a blank string.
// tryAddr is expected to have already been Trim()'d
// defaultAddr must be in the full `tcp://host:port` form
func parseTCPAddr(tryAddr string, defaultAddr string) (string, error) {
if tryAddr == "" || tryAddr == "tcp://" {
return defaultAddr, nil
}
addr := strings.TrimPrefix(tryAddr, "tcp://")
if strings.Contains(addr, "://") || addr == "" {
return "", fmt.Errorf("Invalid proto, expected tcp: %s", tryAddr)
}
defaultAddr = strings.TrimPrefix(defaultAddr, "tcp://")
defaultHost, defaultPort, err := net.SplitHostPort(defaultAddr)
if err != nil {
return "", err
}
u, err := url.Parse("tcp://" + addr)
if err != nil {
return "", err
}
host, port, err := net.SplitHostPort(u.Host)
if err != nil {
// try port addition once
host, port, err = net.SplitHostPort(net.JoinHostPort(u.Host, defaultPort))
}
if err != nil {
return "", fmt.Errorf("Invalid bind address format: %s", tryAddr)
}
if host == "" {
host = defaultHost
}
if port == "" {
port = defaultPort
}
p, err := strconv.Atoi(port)
if err != nil && p == 0 {
return "", fmt.Errorf("Invalid bind address format: %s", tryAddr)
}
return fmt.Sprintf("tcp://%s%s", net.JoinHostPort(host, port), u.Path), nil
}
func (d *dnetConnection) httpCall(method, path string, data interface{}, headers map[string][]string) (io.ReadCloser, http.Header, int, error) {
var in io.Reader
in, err := encodeData(data)
if err != nil {
return nil, nil, -1, err
}
req, err := http.NewRequest(method, fmt.Sprintf("%s", path), in)
if err != nil {
return nil, nil, -1, err
}
setupRequestHeaders(method, data, req, headers)
req.URL.Host = d.addr
req.URL.Scheme = "http"
httpClient := &http.Client{}
resp, err := httpClient.Do(req)
statusCode := -1
if resp != nil {
statusCode = resp.StatusCode
}
if err != nil {
return nil, nil, statusCode, fmt.Errorf("error when trying to connect: %v", err)
}
if statusCode < 200 || statusCode >= 400 {
body, err := ioutil.ReadAll(resp.Body)
if err != nil {
return nil, nil, statusCode, err
}
return nil, nil, statusCode, fmt.Errorf("error : %s", bytes.TrimSpace(body))
}
return resp.Body, resp.Header, statusCode, nil
}
func setupRequestHeaders(method string, data interface{}, req *http.Request, headers map[string][]string) {
if data != nil {
if headers == nil {
headers = make(map[string][]string)
}
headers["Content-Type"] = []string{"application/json"}
}
expectedPayload := (method == "POST" || method == "PUT")
if expectedPayload && req.Header.Get("Content-Type") == "" {
req.Header.Set("Content-Type", "text/plain")
}
if headers != nil {
for k, v := range headers {
req.Header[k] = v
}
}
}
func encodeData(data interface{}) (*bytes.Buffer, error) {
params := bytes.NewBuffer(nil)
if data != nil {
if err := json.NewEncoder(params).Encode(data); err != nil {
return nil, err
}
}
return params, nil
}
func ipamOption(bridgeName string) libnetwork.NetworkOption {
if nws, _, err := netutils.ElectInterfaceAddresses(bridgeName); err == nil {
ipamV4Conf := &libnetwork.IpamConf{PreferredPool: nws[0].String()}
hip, _ := types.GetHostPartIP(nws[0].IP, nws[0].Mask)
if hip.IsGlobalUnicast() {
ipamV4Conf.Gateway = nws[0].IP.String()
}
return libnetwork.NetworkOptionIpam("default", "", []*libnetwork.IpamConf{ipamV4Conf}, nil, nil)
}
return nil
}

View file

@ -0,0 +1,19 @@
package main
import (
"os"
"os/signal"
"syscall"
psignal "github.com/docker/docker/pkg/signal"
)
func setupDumpStackTrap() {
c := make(chan os.Signal, 1)
signal.Notify(c, syscall.SIGUSR1)
go func() {
for range c {
psignal.DumpStacks("")
}
}()
}

View file

@ -0,0 +1,27 @@
package main
import (
"fmt"
"os"
"github.com/docker/docker/pkg/signal"
"github.com/sirupsen/logrus"
"golang.org/x/sys/windows"
)
// Copied over from docker/daemon/debugtrap_windows.go
func setupDumpStackTrap() {
go func() {
sa := windows.SecurityAttributes{
Length: 0,
}
ev, _ := windows.UTF16PtrFromString("Global\\docker-daemon-" + fmt.Sprint(os.Getpid()))
if h, _ := windows.CreateEvent(&sa, 0, 0, ev); h != 0 {
logrus.Debugf("Stackdump - waiting signal at %s", ev)
for {
windows.WaitForSingleObject(h, windows.INFINITE)
signal.DumpStacks("")
}
}
}()
}

View file

@ -0,0 +1,87 @@
package main
import (
"fmt"
"os"
"github.com/sirupsen/logrus"
"github.com/urfave/cli"
)
var (
dnetFlags = []cli.Flag{
cli.BoolFlag{
Name: "d, -daemon",
Usage: "Enable daemon mode",
},
cli.StringFlag{
Name: "H, -host",
Value: "",
Usage: "Daemon socket to connect to",
},
cli.StringFlag{
Name: "l, -log-level",
Value: "info",
Usage: "Set the logging level",
},
cli.BoolFlag{
Name: "D, -debug",
Usage: "Enable debug mode",
},
cli.StringFlag{
Name: "c, -cfg-file",
Value: "/etc/default/libnetwork.toml",
Usage: "Configuration file",
},
}
)
func processFlags(c *cli.Context) error {
var err error
if c.String("l") != "" {
lvl, err := logrus.ParseLevel(c.String("l"))
if err != nil {
fmt.Printf("Unable to parse logging level: %s\n", c.String("l"))
os.Exit(1)
}
logrus.SetLevel(lvl)
} else {
logrus.SetLevel(logrus.InfoLevel)
}
if c.Bool("D") {
logrus.SetLevel(logrus.DebugLevel)
}
hostFlag := c.String("H")
if hostFlag == "" {
defaultHost := os.Getenv("DNET_HOST")
if defaultHost == "" {
// TODO : Add UDS support
defaultHost = fmt.Sprintf("tcp://%s:%d", DefaultHTTPHost, DefaultHTTPPort)
}
hostFlag = defaultHost
}
epConn, err = newDnetConnection(hostFlag)
if err != nil {
if c.Bool("d") {
logrus.Error(err)
} else {
fmt.Println(err)
}
os.Exit(1)
}
if c.Bool("d") {
err = epConn.dnetDaemon(c.String("c"))
if err != nil {
logrus.Errorf("dnet Daemon exited with an error : %v", err)
os.Exit(1)
}
os.Exit(1)
}
return nil
}

View file

@ -0,0 +1,16 @@
title = "LibNetwork Configuration file"
[daemon]
debug = false
[cluster]
discovery = "consul://localhost:8500"
Address = "1.1.1.1"
Heartbeat = 20
[datastore]
embedded = false
[datastore.client]
provider = "consul"
Address = "localhost:8500"
[orchestration]
agent = true
peer="2.2.2.2"

View file

@ -0,0 +1,9 @@
FROM alpine
RUN apk --no-cache add curl
COPY testMain /app/
WORKDIR app
ENTRYPOINT ["/app/testMain"]

View file

@ -0,0 +1,15 @@
SERVER
cd test/networkdb
env GOOS=linux go build -v testMain.go && docker build -t dockereng/e2e-networkdb .
(only for testkit case) docker push dockereng/e2e-networkdb
Run server: docker service create --name testdb --network net1 --replicas 3 --env TASK_ID="{{.Task.ID}}" -p mode=host,target=8000 dockereng/e2e-networkdb server 8000
CLIENT
cd test/networkdb
Join cluster: docker run -it --network net1 dockereng/e2e-networkdb client join testdb 8000
Join network: docker run -it --network net1 dockereng/e2e-networkdb client join-network testdb 8000 test
Run test: docker run -it --network net1 dockereng/e2e-networkdb client write-delete-unique-keys testdb 8000 test tableBla 3 10
check table: curl "localhost:32768/gettable?nid=test&tname=table_name"

View file

@ -0,0 +1,874 @@
package dbclient
import (
"context"
"fmt"
"io/ioutil"
"log"
"net"
"net/http"
"os"
"regexp"
"strconv"
"strings"
"time"
"github.com/sirupsen/logrus"
)
var servicePort string
const totalWrittenKeys string = "totalKeys"
type resultTuple struct {
id string
result int
}
func httpGetFatalError(ip, port, path string) {
body, err := httpGet(ip, port, path)
if err != nil || !strings.Contains(string(body), "OK") {
log.Fatalf("[%s] error %s %s", path, err, body)
}
}
func httpGet(ip, port, path string) ([]byte, error) {
resp, err := http.Get("http://" + ip + ":" + port + path)
if err != nil {
logrus.Errorf("httpGet error:%s", err)
return nil, err
}
defer resp.Body.Close()
body, err := ioutil.ReadAll(resp.Body)
return body, err
}
func joinCluster(ip, port string, members []string, doneCh chan resultTuple) {
httpGetFatalError(ip, port, "/join?members="+strings.Join(members, ","))
if doneCh != nil {
doneCh <- resultTuple{id: ip, result: 0}
}
}
func joinNetwork(ip, port, network string, doneCh chan resultTuple) {
httpGetFatalError(ip, port, "/joinnetwork?nid="+network)
if doneCh != nil {
doneCh <- resultTuple{id: ip, result: 0}
}
}
func leaveNetwork(ip, port, network string, doneCh chan resultTuple) {
httpGetFatalError(ip, port, "/leavenetwork?nid="+network)
if doneCh != nil {
doneCh <- resultTuple{id: ip, result: 0}
}
}
func writeTableKey(ip, port, networkName, tableName, key string) {
createPath := "/createentry?unsafe&nid=" + networkName + "&tname=" + tableName + "&value=v&key="
httpGetFatalError(ip, port, createPath+key)
}
func deleteTableKey(ip, port, networkName, tableName, key string) {
deletePath := "/deleteentry?nid=" + networkName + "&tname=" + tableName + "&key="
httpGetFatalError(ip, port, deletePath+key)
}
func clusterPeersNumber(ip, port string, doneCh chan resultTuple) {
body, err := httpGet(ip, port, "/clusterpeers")
if err != nil {
logrus.Errorf("clusterPeers %s there was an error: %s", ip, err)
doneCh <- resultTuple{id: ip, result: -1}
return
}
peersRegexp := regexp.MustCompile(`total entries: ([0-9]+)`)
peersNum, _ := strconv.Atoi(peersRegexp.FindStringSubmatch(string(body))[1])
doneCh <- resultTuple{id: ip, result: peersNum}
}
func networkPeersNumber(ip, port, networkName string, doneCh chan resultTuple) {
body, err := httpGet(ip, port, "/networkpeers?nid="+networkName)
if err != nil {
logrus.Errorf("networkPeersNumber %s there was an error: %s", ip, err)
doneCh <- resultTuple{id: ip, result: -1}
return
}
peersRegexp := regexp.MustCompile(`total entries: ([0-9]+)`)
peersNum, _ := strconv.Atoi(peersRegexp.FindStringSubmatch(string(body))[1])
doneCh <- resultTuple{id: ip, result: peersNum}
}
func dbTableEntriesNumber(ip, port, networkName, tableName string, doneCh chan resultTuple) {
body, err := httpGet(ip, port, "/gettable?nid="+networkName+"&tname="+tableName)
if err != nil {
logrus.Errorf("tableEntriesNumber %s there was an error: %s", ip, err)
doneCh <- resultTuple{id: ip, result: -1}
return
}
elementsRegexp := regexp.MustCompile(`total entries: ([0-9]+)`)
entriesNum, _ := strconv.Atoi(elementsRegexp.FindStringSubmatch(string(body))[1])
doneCh <- resultTuple{id: ip, result: entriesNum}
}
func dbEntriesNumber(ip, port, networkName string, doneCh chan resultTuple) {
body, err := httpGet(ip, port, "/networkstats?nid="+networkName)
if err != nil {
logrus.Errorf("entriesNumber %s there was an error: %s", ip, err)
doneCh <- resultTuple{id: ip, result: -1}
return
}
elementsRegexp := regexp.MustCompile(`entries: ([0-9]+)`)
entriesNum, _ := strconv.Atoi(elementsRegexp.FindStringSubmatch(string(body))[1])
doneCh <- resultTuple{id: ip, result: entriesNum}
}
func dbQueueLength(ip, port, networkName string, doneCh chan resultTuple) {
body, err := httpGet(ip, port, "/networkstats?nid="+networkName)
if err != nil {
logrus.Errorf("queueLength %s there was an error: %s", ip, err)
doneCh <- resultTuple{id: ip, result: -1}
return
}
elementsRegexp := regexp.MustCompile(`qlen: ([0-9]+)`)
entriesNum, _ := strconv.Atoi(elementsRegexp.FindStringSubmatch(string(body))[1])
doneCh <- resultTuple{id: ip, result: entriesNum}
}
func clientWatchTable(ip, port, networkName, tableName string, doneCh chan resultTuple) {
httpGetFatalError(ip, port, "/watchtable?nid="+networkName+"&tname="+tableName)
if doneCh != nil {
doneCh <- resultTuple{id: ip, result: 0}
}
}
func clientTableEntriesNumber(ip, port, networkName, tableName string, doneCh chan resultTuple) {
body, err := httpGet(ip, port, "/watchedtableentries?nid="+networkName+"&tname="+tableName)
if err != nil {
logrus.Errorf("clientTableEntriesNumber %s there was an error: %s", ip, err)
doneCh <- resultTuple{id: ip, result: -1}
return
}
elementsRegexp := regexp.MustCompile(`total elements: ([0-9]+)`)
entriesNum, _ := strconv.Atoi(elementsRegexp.FindStringSubmatch(string(body))[1])
doneCh <- resultTuple{id: ip, result: entriesNum}
}
func writeKeysNumber(ip, port, networkName, tableName, key string, number int, doneCh chan resultTuple) {
x := 0
for ; x < number; x++ {
k := key + strconv.Itoa(x)
// write key
writeTableKey(ip, port, networkName, tableName, k)
}
doneCh <- resultTuple{id: ip, result: x}
}
func deleteKeysNumber(ip, port, networkName, tableName, key string, number int, doneCh chan resultTuple) {
x := 0
for ; x < number; x++ {
k := key + strconv.Itoa(x)
// write key
deleteTableKey(ip, port, networkName, tableName, k)
}
doneCh <- resultTuple{id: ip, result: x}
}
func writeUniqueKeys(ctx context.Context, ip, port, networkName, tableName, key string, doneCh chan resultTuple) {
for x := 0; ; x++ {
select {
case <-ctx.Done():
doneCh <- resultTuple{id: ip, result: x}
return
default:
k := key + strconv.Itoa(x)
// write key
writeTableKey(ip, port, networkName, tableName, k)
// give time to send out key writes
time.Sleep(100 * time.Millisecond)
}
}
}
func writeDeleteUniqueKeys(ctx context.Context, ip, port, networkName, tableName, key string, doneCh chan resultTuple) {
for x := 0; ; x++ {
select {
case <-ctx.Done():
doneCh <- resultTuple{id: ip, result: x}
return
default:
k := key + strconv.Itoa(x)
// write key
writeTableKey(ip, port, networkName, tableName, k)
// give time to send out key writes
time.Sleep(100 * time.Millisecond)
// delete key
deleteTableKey(ip, port, networkName, tableName, k)
}
}
}
func writeDeleteLeaveJoin(ctx context.Context, ip, port, networkName, tableName, key string, doneCh chan resultTuple) {
for x := 0; ; x++ {
select {
case <-ctx.Done():
doneCh <- resultTuple{id: ip, result: x}
return
default:
k := key + strconv.Itoa(x)
// write key
writeTableKey(ip, port, networkName, tableName, k)
time.Sleep(100 * time.Millisecond)
// delete key
deleteTableKey(ip, port, networkName, tableName, k)
// give some time
time.Sleep(100 * time.Millisecond)
// leave network
leaveNetwork(ip, port, networkName, nil)
// join network
joinNetwork(ip, port, networkName, nil)
}
}
}
func ready(ip, port string, doneCh chan resultTuple) {
for {
body, err := httpGet(ip, port, "/ready")
if err != nil || !strings.Contains(string(body), "OK") {
time.Sleep(500 * time.Millisecond)
continue
}
// success
break
}
// notify the completion
doneCh <- resultTuple{id: ip, result: 0}
}
func checkTable(ctx context.Context, ips []string, port, networkName, tableName string, expectedEntries int, fn func(string, string, string, string, chan resultTuple)) (opTime time.Duration) {
startTime := time.Now().UnixNano()
var successTime int64
// Loop for 2 minutes to guarantee that the result is stable
for {
select {
case <-ctx.Done():
// Validate test success, if the time is set means that all the tables are empty
if successTime != 0 {
opTime = time.Duration(successTime-startTime) / time.Millisecond
logrus.Infof("Check table passed, the cluster converged in %d msec", opTime)
return
}
log.Fatal("Test failed, there is still entries in the tables of the nodes")
default:
logrus.Infof("Checking table %s expected %d", tableName, expectedEntries)
doneCh := make(chan resultTuple, len(ips))
for _, ip := range ips {
go fn(ip, servicePort, networkName, tableName, doneCh)
}
nodesWithCorrectEntriesNum := 0
for i := len(ips); i > 0; i-- {
tableEntries := <-doneCh
logrus.Infof("Node %s has %d entries", tableEntries.id, tableEntries.result)
if tableEntries.result == expectedEntries {
nodesWithCorrectEntriesNum++
}
}
close(doneCh)
if nodesWithCorrectEntriesNum == len(ips) {
if successTime == 0 {
successTime = time.Now().UnixNano()
logrus.Infof("Success after %d msec", time.Duration(successTime-startTime)/time.Millisecond)
}
} else {
successTime = 0
}
time.Sleep(10 * time.Second)
}
}
}
func waitWriters(parallelWriters int, mustWrite bool, doneCh chan resultTuple) map[string]int {
var totalKeys int
resultTable := make(map[string]int)
for i := 0; i < parallelWriters; i++ {
logrus.Infof("Waiting for %d workers", parallelWriters-i)
workerReturn := <-doneCh
totalKeys += workerReturn.result
if mustWrite && workerReturn.result == 0 {
log.Fatalf("The worker %s did not write any key %d == 0", workerReturn.id, workerReturn.result)
}
if !mustWrite && workerReturn.result != 0 {
log.Fatalf("The worker %s was supposed to return 0 instead %d != 0", workerReturn.id, workerReturn.result)
}
if mustWrite {
resultTable[workerReturn.id] = workerReturn.result
logrus.Infof("The worker %s wrote %d keys", workerReturn.id, workerReturn.result)
}
}
resultTable[totalWrittenKeys] = totalKeys
return resultTable
}
// ready
func doReady(ips []string) {
doneCh := make(chan resultTuple, len(ips))
// check all the nodes
for _, ip := range ips {
go ready(ip, servicePort, doneCh)
}
// wait for the readiness of all nodes
for i := len(ips); i > 0; i-- {
<-doneCh
}
close(doneCh)
}
// join
func doJoin(ips []string) {
doneCh := make(chan resultTuple, len(ips))
// check all the nodes
for i, ip := range ips {
members := append([]string(nil), ips[:i]...)
members = append(members, ips[i+1:]...)
go joinCluster(ip, servicePort, members, doneCh)
}
// wait for the readiness of all nodes
for i := len(ips); i > 0; i-- {
<-doneCh
}
close(doneCh)
}
// cluster-peers expectedNumberPeers maxRetry
func doClusterPeers(ips []string, args []string) {
doneCh := make(chan resultTuple, len(ips))
expectedPeers, _ := strconv.Atoi(args[0])
maxRetry, _ := strconv.Atoi(args[1])
for retry := 0; retry < maxRetry; retry++ {
// check all the nodes
for _, ip := range ips {
go clusterPeersNumber(ip, servicePort, doneCh)
}
var failed bool
// wait for the readiness of all nodes
for i := len(ips); i > 0; i-- {
node := <-doneCh
if node.result != expectedPeers {
failed = true
if retry == maxRetry-1 {
log.Fatalf("Expected peers from %s mismatch %d != %d", node.id, expectedPeers, node.result)
} else {
logrus.Warnf("Expected peers from %s mismatch %d != %d", node.id, expectedPeers, node.result)
}
time.Sleep(1 * time.Second)
}
}
// check if needs retry
if !failed {
break
}
}
close(doneCh)
}
// join-network networkName
func doJoinNetwork(ips []string, args []string) {
doneCh := make(chan resultTuple, len(ips))
// check all the nodes
for _, ip := range ips {
go joinNetwork(ip, servicePort, args[0], doneCh)
}
// wait for the readiness of all nodes
for i := len(ips); i > 0; i-- {
<-doneCh
}
close(doneCh)
}
// leave-network networkName
func doLeaveNetwork(ips []string, args []string) {
doneCh := make(chan resultTuple, len(ips))
// check all the nodes
for _, ip := range ips {
go leaveNetwork(ip, servicePort, args[0], doneCh)
}
// wait for the readiness of all nodes
for i := len(ips); i > 0; i-- {
<-doneCh
}
close(doneCh)
}
// network-peers networkName expectedNumberPeers maxRetry
func doNetworkPeers(ips []string, args []string) {
doneCh := make(chan resultTuple, len(ips))
networkName := args[0]
expectedPeers, _ := strconv.Atoi(args[1])
maxRetry, _ := strconv.Atoi(args[2])
for retry := 0; retry < maxRetry; retry++ {
// check all the nodes
for _, ip := range ips {
go networkPeersNumber(ip, servicePort, networkName, doneCh)
}
var failed bool
// wait for the readiness of all nodes
for i := len(ips); i > 0; i-- {
node := <-doneCh
if node.result != expectedPeers {
failed = true
if retry == maxRetry-1 {
log.Fatalf("Expected peers from %s mismatch %d != %d", node.id, expectedPeers, node.result)
} else {
logrus.Warnf("Expected peers from %s mismatch %d != %d", node.id, expectedPeers, node.result)
}
time.Sleep(1 * time.Second)
}
}
// check if needs retry
if !failed {
break
}
}
close(doneCh)
}
// network-stats-queue networkName <gt/lt> queueSize
func doNetworkStatsQueue(ips []string, args []string) {
doneCh := make(chan resultTuple, len(ips))
networkName := args[0]
comparison := args[1]
size, _ := strconv.Atoi(args[2])
// check all the nodes
for _, ip := range ips {
go dbQueueLength(ip, servicePort, networkName, doneCh)
}
var avgQueueSize int
// wait for the readiness of all nodes
for i := len(ips); i > 0; i-- {
node := <-doneCh
switch comparison {
case "lt":
if node.result > size {
log.Fatalf("Expected queue size from %s to be %d < %d", node.id, node.result, size)
}
case "gt":
if node.result < size {
log.Fatalf("Expected queue size from %s to be %d > %d", node.id, node.result, size)
}
default:
log.Fatal("unknown comparison operator")
}
avgQueueSize += node.result
}
close(doneCh)
avgQueueSize /= len(ips)
fmt.Fprintf(os.Stderr, "doNetworkStatsQueue succeeded with avg queue:%d", avgQueueSize)
}
// write-keys networkName tableName parallelWriters numberOfKeysEach
func doWriteKeys(ips []string, args []string) {
networkName := args[0]
tableName := args[1]
parallelWriters, _ := strconv.Atoi(args[2])
numberOfKeys, _ := strconv.Atoi(args[3])
doneCh := make(chan resultTuple, parallelWriters)
// Enable watch of tables from clients
for i := 0; i < parallelWriters; i++ {
go clientWatchTable(ips[i], servicePort, networkName, tableName, doneCh)
}
waitWriters(parallelWriters, false, doneCh)
// Start parallel writers that will create and delete unique keys
defer close(doneCh)
for i := 0; i < parallelWriters; i++ {
key := "key-" + strconv.Itoa(i) + "-"
logrus.Infof("Spawn worker: %d on IP:%s", i, ips[i])
go writeKeysNumber(ips[i], servicePort, networkName, tableName, key, numberOfKeys, doneCh)
}
// Sync with all the writers
keyMap := waitWriters(parallelWriters, true, doneCh)
logrus.Infof("Written a total of %d keys on the cluster", keyMap[totalWrittenKeys])
// check table entries for 2 minutes
ctx, cancel := context.WithTimeout(context.Background(), 3*time.Minute)
opTime := checkTable(ctx, ips, servicePort, networkName, tableName, keyMap[totalWrittenKeys], dbTableEntriesNumber)
cancel()
fmt.Fprintf(os.Stderr, "doWriteKeys succeeded in %d msec", opTime)
}
// delete-keys networkName tableName parallelWriters numberOfKeysEach
func doDeleteKeys(ips []string, args []string) {
networkName := args[0]
tableName := args[1]
parallelWriters, _ := strconv.Atoi(args[2])
numberOfKeys, _ := strconv.Atoi(args[3])
doneCh := make(chan resultTuple, parallelWriters)
// Enable watch of tables from clients
for i := 0; i < parallelWriters; i++ {
go clientWatchTable(ips[i], servicePort, networkName, tableName, doneCh)
}
waitWriters(parallelWriters, false, doneCh)
// Start parallel writers that will create and delete unique keys
defer close(doneCh)
for i := 0; i < parallelWriters; i++ {
key := "key-" + strconv.Itoa(i) + "-"
logrus.Infof("Spawn worker: %d on IP:%s", i, ips[i])
go deleteKeysNumber(ips[i], servicePort, networkName, tableName, key, numberOfKeys, doneCh)
}
// Sync with all the writers
keyMap := waitWriters(parallelWriters, true, doneCh)
logrus.Infof("Written a total of %d keys on the cluster", keyMap[totalWrittenKeys])
// check table entries for 2 minutes
ctx, cancel := context.WithTimeout(context.Background(), 3*time.Minute)
opTime := checkTable(ctx, ips, servicePort, networkName, tableName, 0, dbTableEntriesNumber)
cancel()
fmt.Fprintf(os.Stderr, "doDeletekeys succeeded in %d msec", opTime)
}
// write-delete-unique-keys networkName tableName numParallelWriters writeTimeSec
func doWriteDeleteUniqueKeys(ips []string, args []string) {
networkName := args[0]
tableName := args[1]
parallelWriters, _ := strconv.Atoi(args[2])
writeTimeSec, _ := strconv.Atoi(args[3])
doneCh := make(chan resultTuple, parallelWriters)
// Enable watch of tables from clients
for i := 0; i < parallelWriters; i++ {
go clientWatchTable(ips[i], servicePort, networkName, tableName, doneCh)
}
waitWriters(parallelWriters, false, doneCh)
// Start parallel writers that will create and delete unique keys
ctx, cancel := context.WithTimeout(context.Background(), time.Duration(writeTimeSec)*time.Second)
for i := 0; i < parallelWriters; i++ {
key := "key-" + strconv.Itoa(i) + "-"
logrus.Infof("Spawn worker: %d on IP:%s", i, ips[i])
go writeDeleteUniqueKeys(ctx, ips[i], servicePort, networkName, tableName, key, doneCh)
}
// Sync with all the writers
keyMap := waitWriters(parallelWriters, true, doneCh)
cancel()
logrus.Infof("Written a total of %d keys on the cluster", keyMap[totalWrittenKeys])
// check table entries for 2 minutes
ctx, cancel = context.WithTimeout(context.Background(), 2*time.Minute)
opDBTime := checkTable(ctx, ips, servicePort, networkName, tableName, 0, dbTableEntriesNumber)
cancel()
ctx, cancel = context.WithTimeout(context.Background(), 30*time.Second)
opClientTime := checkTable(ctx, ips, servicePort, networkName, tableName, 0, clientTableEntriesNumber)
cancel()
fmt.Fprintf(os.Stderr, "doWriteDeleteUniqueKeys succeeded in %d msec and client %d msec", opDBTime, opClientTime)
}
// write-unique-keys networkName tableName numParallelWriters writeTimeSec
func doWriteUniqueKeys(ips []string, args []string) {
networkName := args[0]
tableName := args[1]
parallelWriters, _ := strconv.Atoi(args[2])
writeTimeSec, _ := strconv.Atoi(args[3])
doneCh := make(chan resultTuple, parallelWriters)
// Enable watch of tables from clients
for i := 0; i < parallelWriters; i++ {
go clientWatchTable(ips[i], servicePort, networkName, tableName, doneCh)
}
waitWriters(parallelWriters, false, doneCh)
// Start parallel writers that will create and delete unique keys
defer close(doneCh)
ctx, cancel := context.WithTimeout(context.Background(), time.Duration(writeTimeSec)*time.Second)
for i := 0; i < parallelWriters; i++ {
key := "key-" + strconv.Itoa(i) + "-"
logrus.Infof("Spawn worker: %d on IP:%s", i, ips[i])
go writeUniqueKeys(ctx, ips[i], servicePort, networkName, tableName, key, doneCh)
}
// Sync with all the writers
keyMap := waitWriters(parallelWriters, true, doneCh)
cancel()
logrus.Infof("Written a total of %d keys on the cluster", keyMap[totalWrittenKeys])
// check table entries for 2 minutes
ctx, cancel = context.WithTimeout(context.Background(), 2*time.Minute)
opTime := checkTable(ctx, ips, servicePort, networkName, tableName, keyMap[totalWrittenKeys], dbTableEntriesNumber)
cancel()
fmt.Fprintf(os.Stderr, "doWriteUniqueKeys succeeded in %d msec", opTime)
}
// write-delete-leave-join networkName tableName numParallelWriters writeTimeSec
func doWriteDeleteLeaveJoin(ips []string, args []string) {
networkName := args[0]
tableName := args[1]
parallelWriters, _ := strconv.Atoi(args[2])
writeTimeSec, _ := strconv.Atoi(args[3])
// Start parallel writers that will create and delete unique keys
doneCh := make(chan resultTuple, parallelWriters)
defer close(doneCh)
ctx, cancel := context.WithTimeout(context.Background(), time.Duration(writeTimeSec)*time.Second)
for i := 0; i < parallelWriters; i++ {
key := "key-" + strconv.Itoa(i) + "-"
logrus.Infof("Spawn worker: %d on IP:%s", i, ips[i])
go writeDeleteLeaveJoin(ctx, ips[i], servicePort, networkName, tableName, key, doneCh)
}
// Sync with all the writers
keyMap := waitWriters(parallelWriters, true, doneCh)
cancel()
logrus.Infof("Written a total of %d keys on the cluster", keyMap["totalKeys"])
// check table entries for 2 minutes
ctx, cancel = context.WithTimeout(context.Background(), 2*time.Minute)
opTime := checkTable(ctx, ips, servicePort, networkName, tableName, 0, dbTableEntriesNumber)
cancel()
fmt.Fprintf(os.Stderr, "doWriteDeleteLeaveJoin succeeded in %d msec", opTime)
}
// write-delete-wait-leave-join networkName tableName numParallelWriters writeTimeSec
func doWriteDeleteWaitLeaveJoin(ips []string, args []string) {
networkName := args[0]
tableName := args[1]
parallelWriters, _ := strconv.Atoi(args[2])
writeTimeSec, _ := strconv.Atoi(args[3])
// Start parallel writers that will create and delete unique keys
doneCh := make(chan resultTuple, parallelWriters)
defer close(doneCh)
ctx, cancel := context.WithTimeout(context.Background(), time.Duration(writeTimeSec)*time.Second)
for i := 0; i < parallelWriters; i++ {
key := "key-" + strconv.Itoa(i) + "-"
logrus.Infof("Spawn worker: %d on IP:%s", i, ips[i])
go writeDeleteUniqueKeys(ctx, ips[i], servicePort, networkName, tableName, key, doneCh)
}
// Sync with all the writers
keyMap := waitWriters(parallelWriters, true, doneCh)
cancel()
logrus.Infof("Written a total of %d keys on the cluster", keyMap[totalWrittenKeys])
// The writers will leave the network
for i := 0; i < parallelWriters; i++ {
logrus.Infof("worker leaveNetwork: %d on IP:%s", i, ips[i])
go leaveNetwork(ips[i], servicePort, networkName, doneCh)
}
waitWriters(parallelWriters, false, doneCh)
// Give some time
time.Sleep(100 * time.Millisecond)
// The writers will join the network
for i := 0; i < parallelWriters; i++ {
logrus.Infof("worker joinNetwork: %d on IP:%s", i, ips[i])
go joinNetwork(ips[i], servicePort, networkName, doneCh)
}
waitWriters(parallelWriters, false, doneCh)
// check table entries for 2 minutes
ctx, cancel = context.WithTimeout(context.Background(), 2*time.Minute)
opTime := checkTable(ctx, ips, servicePort, networkName, tableName, 0, dbTableEntriesNumber)
cancel()
fmt.Fprintf(os.Stderr, "doWriteDeleteWaitLeaveJoin succeeded in %d msec", opTime)
}
// write-wait-leave networkName tableName numParallelWriters writeTimeSec
func doWriteWaitLeave(ips []string, args []string) {
networkName := args[0]
tableName := args[1]
parallelWriters, _ := strconv.Atoi(args[2])
writeTimeSec, _ := strconv.Atoi(args[3])
// Start parallel writers that will create and delete unique keys
doneCh := make(chan resultTuple, parallelWriters)
defer close(doneCh)
ctx, cancel := context.WithTimeout(context.Background(), time.Duration(writeTimeSec)*time.Second)
for i := 0; i < parallelWriters; i++ {
key := "key-" + strconv.Itoa(i) + "-"
logrus.Infof("Spawn worker: %d on IP:%s", i, ips[i])
go writeUniqueKeys(ctx, ips[i], servicePort, networkName, tableName, key, doneCh)
}
// Sync with all the writers
keyMap := waitWriters(parallelWriters, true, doneCh)
cancel()
logrus.Infof("Written a total of %d keys on the cluster", keyMap[totalWrittenKeys])
// The writers will leave the network
for i := 0; i < parallelWriters; i++ {
logrus.Infof("worker leaveNetwork: %d on IP:%s", i, ips[i])
go leaveNetwork(ips[i], servicePort, networkName, doneCh)
}
waitWriters(parallelWriters, false, doneCh)
// check table entries for 2 minutes
ctx, cancel = context.WithTimeout(context.Background(), 2*time.Minute)
opTime := checkTable(ctx, ips, servicePort, networkName, tableName, 0, dbTableEntriesNumber)
cancel()
fmt.Fprintf(os.Stderr, "doWriteLeaveJoin succeeded in %d msec", opTime)
}
// write-wait-leave-join networkName tableName numParallelWriters writeTimeSec numParallelLeaver
func doWriteWaitLeaveJoin(ips []string, args []string) {
networkName := args[0]
tableName := args[1]
parallelWriters, _ := strconv.Atoi(args[2])
writeTimeSec, _ := strconv.Atoi(args[3])
parallelLeaver, _ := strconv.Atoi(args[4])
// Start parallel writers that will create and delete unique keys
doneCh := make(chan resultTuple, parallelWriters)
defer close(doneCh)
ctx, cancel := context.WithTimeout(context.Background(), time.Duration(writeTimeSec)*time.Second)
for i := 0; i < parallelWriters; i++ {
key := "key-" + strconv.Itoa(i) + "-"
logrus.Infof("Spawn worker: %d on IP:%s", i, ips[i])
go writeUniqueKeys(ctx, ips[i], servicePort, networkName, tableName, key, doneCh)
}
// Sync with all the writers
keyMap := waitWriters(parallelWriters, true, doneCh)
cancel()
logrus.Infof("Written a total of %d keys on the cluster", keyMap[totalWrittenKeys])
keysExpected := keyMap[totalWrittenKeys]
// The Leavers will leave the network
for i := 0; i < parallelLeaver; i++ {
logrus.Infof("worker leaveNetwork: %d on IP:%s", i, ips[i])
go leaveNetwork(ips[i], servicePort, networkName, doneCh)
// Once a node leave all the keys written previously will be deleted, so the expected keys will consider that as removed
keysExpected -= keyMap[ips[i]]
}
waitWriters(parallelLeaver, false, doneCh)
// Give some time
time.Sleep(100 * time.Millisecond)
// The writers will join the network
for i := 0; i < parallelLeaver; i++ {
logrus.Infof("worker joinNetwork: %d on IP:%s", i, ips[i])
go joinNetwork(ips[i], servicePort, networkName, doneCh)
}
waitWriters(parallelLeaver, false, doneCh)
// check table entries for 2 minutes
ctx, cancel = context.WithTimeout(context.Background(), 2*time.Minute)
opTime := checkTable(ctx, ips, servicePort, networkName, tableName, keysExpected, dbTableEntriesNumber)
cancel()
fmt.Fprintf(os.Stderr, "doWriteWaitLeaveJoin succeeded in %d msec", opTime)
}
var cmdArgChec = map[string]int{
"debug": 0,
"fail": 0,
"ready": 2,
"join": 2,
"leave": 2,
"join-network": 3,
"leave-network": 3,
"cluster-peers": 5,
"network-peers": 5,
"write-delete-unique-keys": 7,
}
// Client is a client
func Client(args []string) {
logrus.Infof("[CLIENT] Starting with arguments %v", args)
command := args[0]
if len(args) < cmdArgChec[command] {
log.Fatalf("Command %s requires %d arguments, passed %d, aborting...", command, cmdArgChec[command], len(args))
}
switch command {
case "debug":
time.Sleep(1 * time.Hour)
os.Exit(0)
case "fail":
log.Fatalf("Test error condition with message: error error error")
}
serviceName := args[1]
ips, _ := net.LookupHost("tasks." + serviceName)
logrus.Infof("got the ips %v", ips)
if len(ips) == 0 {
log.Fatalf("Cannot resolve any IP for the service tasks.%s", serviceName)
}
servicePort = args[2]
commandArgs := args[3:]
logrus.Infof("Executing %s with args:%v", command, commandArgs)
switch command {
case "ready":
doReady(ips)
case "join":
doJoin(ips)
case "leave":
case "cluster-peers":
// cluster-peers maxRetry
doClusterPeers(ips, commandArgs)
case "join-network":
// join-network networkName
doJoinNetwork(ips, commandArgs)
case "leave-network":
// leave-network networkName
doLeaveNetwork(ips, commandArgs)
case "network-peers":
// network-peers networkName expectedNumberPeers maxRetry
doNetworkPeers(ips, commandArgs)
// case "network-stats-entries":
// // network-stats-entries networkName maxRetry
// doNetworkPeers(ips, commandArgs)
case "network-stats-queue":
// network-stats-queue networkName <lt/gt> queueSize
doNetworkStatsQueue(ips, commandArgs)
case "write-keys":
// write-keys networkName tableName parallelWriters numberOfKeysEach
doWriteKeys(ips, commandArgs)
case "delete-keys":
// delete-keys networkName tableName parallelWriters numberOfKeysEach
doDeleteKeys(ips, commandArgs)
case "write-unique-keys":
// write-delete-unique-keys networkName tableName numParallelWriters writeTimeSec
doWriteUniqueKeys(ips, commandArgs)
case "write-delete-unique-keys":
// write-delete-unique-keys networkName tableName numParallelWriters writeTimeSec
doWriteDeleteUniqueKeys(ips, commandArgs)
case "write-delete-leave-join":
// write-delete-leave-join networkName tableName numParallelWriters writeTimeSec
doWriteDeleteLeaveJoin(ips, commandArgs)
case "write-delete-wait-leave-join":
// write-delete-wait-leave-join networkName tableName numParallelWriters writeTimeSec
doWriteDeleteWaitLeaveJoin(ips, commandArgs)
case "write-wait-leave":
// write-wait-leave networkName tableName numParallelWriters writeTimeSec
doWriteWaitLeave(ips, commandArgs)
case "write-wait-leave-join":
// write-wait-leave networkName tableName numParallelWriters writeTimeSec
doWriteWaitLeaveJoin(ips, commandArgs)
default:
log.Fatalf("Command %s not recognized", command)
}
}

View file

@ -0,0 +1,111 @@
package dbserver
import (
"errors"
"fmt"
"log"
"net"
"net/http"
"os"
"strconv"
"github.com/docker/libnetwork/cmd/networkdb-test/dummyclient"
"github.com/docker/libnetwork/diagnostic"
"github.com/docker/libnetwork/networkdb"
"github.com/sirupsen/logrus"
)
var nDB *networkdb.NetworkDB
var server *diagnostic.Server
var ipAddr string
var testerPaths2Func = map[string]diagnostic.HTTPHandlerFunc{
"/myip": ipaddress,
}
func ipaddress(ctx interface{}, w http.ResponseWriter, r *http.Request) {
fmt.Fprintf(w, "%s\n", ipAddr)
}
// Server starts the server
func Server(args []string) {
logrus.Infof("[SERVER] Starting with arguments %v", args)
if len(args) < 1 {
log.Fatal("Port number is a mandatory argument, aborting...")
}
port, _ := strconv.Atoi(args[0])
var localNodeName string
var ok bool
if localNodeName, ok = os.LookupEnv("TASK_ID"); !ok {
log.Fatal("TASK_ID environment variable not set, aborting...")
}
logrus.Infof("[SERVER] Starting node %s on port %d", localNodeName, port)
ip, err := getIPInterface("eth0")
if err != nil {
logrus.Errorf("%s There was a problem with the IP %s\n", localNodeName, err)
return
}
ipAddr = ip
logrus.Infof("%s uses IP %s\n", localNodeName, ipAddr)
server = diagnostic.New()
server.Init()
conf := networkdb.DefaultConfig()
conf.Hostname = localNodeName
conf.AdvertiseAddr = ipAddr
conf.BindAddr = ipAddr
nDB, err = networkdb.New(conf)
if err != nil {
logrus.Infof("%s error in the DB init %s\n", localNodeName, err)
return
}
// Register network db handlers
server.RegisterHandler(nDB, networkdb.NetDbPaths2Func)
server.RegisterHandler(nil, testerPaths2Func)
server.RegisterHandler(nDB, dummyclient.DummyClientPaths2Func)
server.EnableDiagnostic("", port)
// block here
select {}
}
func getIPInterface(name string) (string, error) {
ifaces, err := net.Interfaces()
if err != nil {
return "", err
}
for _, iface := range ifaces {
if iface.Name != name {
continue // not the name specified
}
if iface.Flags&net.FlagUp == 0 {
return "", errors.New("Interfaces is down")
}
addrs, err := iface.Addrs()
if err != nil {
return "", err
}
for _, addr := range addrs {
var ip net.IP
switch v := addr.(type) {
case *net.IPNet:
ip = v.IP
case *net.IPAddr:
ip = v.IP
}
if ip == nil || ip.IsLoopback() {
continue
}
ip = ip.To4()
if ip == nil {
continue
}
return ip.String(), nil
}
return "", errors.New("Interfaces does not have a valid IPv4")
}
return "", errors.New("Interface not found")
}

View file

@ -0,0 +1,120 @@
package dummyclient
import (
"fmt"
"log"
"net/http"
events "github.com/docker/go-events"
"github.com/docker/libnetwork/diagnostic"
"github.com/docker/libnetwork/networkdb"
"github.com/sirupsen/logrus"
)
// DummyClientPaths2Func exported paths for the client
var DummyClientPaths2Func = map[string]diagnostic.HTTPHandlerFunc{
"/watchtable": watchTable,
"/watchedtableentries": watchTableEntries,
}
const (
missingParameter = "missing parameter"
)
type tableHandler struct {
cancelWatch func()
entries map[string]string
}
var clientWatchTable = map[string]tableHandler{}
func watchTable(ctx interface{}, w http.ResponseWriter, r *http.Request) {
r.ParseForm()
diagnostic.DebugHTTPForm(r)
if len(r.Form["tname"]) < 1 {
rsp := diagnostic.WrongCommand(missingParameter, fmt.Sprintf("%s?tname=table_name", r.URL.Path))
diagnostic.HTTPReply(w, rsp, &diagnostic.JSONOutput{})
return
}
tableName := r.Form["tname"][0]
if _, ok := clientWatchTable[tableName]; ok {
fmt.Fprintf(w, "OK\n")
return
}
nDB, ok := ctx.(*networkdb.NetworkDB)
if ok {
ch, cancel := nDB.Watch(tableName, "", "")
clientWatchTable[tableName] = tableHandler{cancelWatch: cancel, entries: make(map[string]string)}
go handleTableEvents(tableName, ch)
fmt.Fprintf(w, "OK\n")
}
}
func watchTableEntries(ctx interface{}, w http.ResponseWriter, r *http.Request) {
r.ParseForm()
diagnostic.DebugHTTPForm(r)
if len(r.Form["tname"]) < 1 {
rsp := diagnostic.WrongCommand(missingParameter, fmt.Sprintf("%s?tname=table_name", r.URL.Path))
diagnostic.HTTPReply(w, rsp, &diagnostic.JSONOutput{})
return
}
tableName := r.Form["tname"][0]
table, ok := clientWatchTable[tableName]
if !ok {
fmt.Fprintf(w, "Table %s not watched\n", tableName)
return
}
fmt.Fprintf(w, "total elements: %d\n", len(table.entries))
i := 0
for k, v := range table.entries {
fmt.Fprintf(w, "%d) k:`%s` -> v:`%s`\n", i, k, v)
i++
}
}
func handleTableEvents(tableName string, ch *events.Channel) {
var (
// nid string
eid string
value []byte
isAdd bool
)
logrus.Infof("Started watching table:%s", tableName)
for {
select {
case <-ch.Done():
logrus.Infof("End watching %s", tableName)
return
case evt := <-ch.C:
logrus.Infof("Recevied new event on:%s", tableName)
switch event := evt.(type) {
case networkdb.CreateEvent:
// nid = event.NetworkID
eid = event.Key
value = event.Value
isAdd = true
case networkdb.DeleteEvent:
// nid = event.NetworkID
eid = event.Key
value = event.Value
isAdd = false
default:
log.Fatalf("Unexpected table event = %#v", event)
}
if isAdd {
// logrus.Infof("Add %s %s", tableName, eid)
clientWatchTable[tableName].entries[eid] = string(value)
} else {
// logrus.Infof("Del %s %s", tableName, eid)
delete(clientWatchTable[tableName].entries, eid)
}
}
}
}

View file

@ -0,0 +1,28 @@
package main
import (
"log"
"os"
"github.com/docker/libnetwork/cmd/networkdb-test/dbclient"
"github.com/docker/libnetwork/cmd/networkdb-test/dbserver"
"github.com/sirupsen/logrus"
)
func main() {
formatter := &logrus.TextFormatter{
FullTimestamp: true,
}
logrus.SetFormatter(formatter)
logrus.Infof("Starting the image with these args: %v", os.Args)
if len(os.Args) < 1 {
log.Fatal("You need at least 1 argument [client/server]")
}
switch os.Args[1] {
case "server":
dbserver.Server(os.Args[2:])
case "client":
dbclient.Client(os.Args[2:])
}
}

View file

@ -0,0 +1,174 @@
package main
import (
"fmt"
"net"
"os"
"os/signal"
"github.com/docker/docker/pkg/plugingetter"
"github.com/docker/docker/pkg/reexec"
"github.com/docker/libnetwork/driverapi"
"github.com/docker/libnetwork/drivers/overlay"
"github.com/docker/libnetwork/netlabel"
"github.com/docker/libnetwork/types"
"github.com/vishvananda/netlink"
)
type router struct {
d driverapi.Driver
}
type endpoint struct {
addr *net.IPNet
mac net.HardwareAddr
name string
}
func (r *router) GetPluginGetter() plugingetter.PluginGetter {
return nil
}
func (r *router) RegisterDriver(name string, driver driverapi.Driver, c driverapi.Capability) error {
r.d = driver
return nil
}
func (ep *endpoint) Interface() driverapi.InterfaceInfo {
return nil
}
func (ep *endpoint) SetMacAddress(mac net.HardwareAddr) error {
if ep.mac != nil {
return types.ForbiddenErrorf("endpoint interface MAC address present (%s). Cannot be modified with %s.", ep.mac, mac)
}
if mac == nil {
return types.BadRequestErrorf("tried to set nil MAC address to endpoint interface")
}
ep.mac = types.GetMacCopy(mac)
return nil
}
func (ep *endpoint) SetIPAddress(address *net.IPNet) error {
if address.IP == nil {
return types.BadRequestErrorf("tried to set nil IP address to endpoint interface")
}
if address.IP.To4() == nil {
return types.NotImplementedErrorf("do not support ipv6 yet")
}
if ep.addr != nil {
return types.ForbiddenErrorf("endpoint interface IP present (%s). Cannot be modified with %s.", ep.addr, address)
}
ep.addr = types.GetIPNetCopy(address)
return nil
}
func (ep *endpoint) MacAddress() net.HardwareAddr {
return types.GetMacCopy(ep.mac)
}
func (ep *endpoint) Address() *net.IPNet {
return types.GetIPNetCopy(ep.addr)
}
func (ep *endpoint) AddressIPv6() *net.IPNet {
return nil
}
func (ep *endpoint) InterfaceName() driverapi.InterfaceNameInfo {
return ep
}
func (ep *endpoint) SetNames(srcName, dstPrefix string) error {
ep.name = srcName
return nil
}
func (ep *endpoint) SetGateway(net.IP) error {
return nil
}
func (ep *endpoint) SetGatewayIPv6(net.IP) error {
return nil
}
func (ep *endpoint) AddStaticRoute(destination *net.IPNet, routeType int,
nextHop net.IP) error {
return nil
}
func (ep *endpoint) AddTableEntry(tableName string, key string, value []byte) error {
return nil
}
func (ep *endpoint) DisableGatewayService() {}
func main() {
if reexec.Init() {
return
}
opt := make(map[string]interface{})
if len(os.Args) > 1 {
opt[netlabel.OverlayBindInterface] = os.Args[1]
}
if len(os.Args) > 2 {
opt[netlabel.OverlayNeighborIP] = os.Args[2]
}
if len(os.Args) > 3 {
opt[netlabel.GlobalKVProvider] = os.Args[3]
}
if len(os.Args) > 4 {
opt[netlabel.GlobalKVProviderURL] = os.Args[4]
}
r := &router{}
if err := overlay.Init(r, opt); err != nil {
fmt.Printf("Failed to initialize overlay driver: %v\n", err)
os.Exit(1)
}
if err := r.d.CreateNetwork("testnetwork",
map[string]interface{}{}, nil, nil, nil); err != nil {
fmt.Printf("Failed to create network in the driver: %v\n", err)
os.Exit(1)
}
ep := &endpoint{}
if err := r.d.CreateEndpoint("testnetwork", "testep",
ep, map[string]interface{}{}); err != nil {
fmt.Printf("Failed to create endpoint in the driver: %v\n", err)
os.Exit(1)
}
if err := r.d.Join("testnetwork", "testep",
"", ep, map[string]interface{}{}); err != nil {
fmt.Printf("Failed to join an endpoint in the driver: %v\n", err)
os.Exit(1)
}
link, err := netlink.LinkByName(ep.name)
if err != nil {
fmt.Printf("Failed to find the container interface with name %s: %v\n",
ep.name, err)
os.Exit(1)
}
ipAddr := &netlink.Addr{IPNet: ep.addr, Label: ""}
if err := netlink.AddrAdd(link, ipAddr); err != nil {
fmt.Printf("Failed to add address to the interface: %v\n", err)
os.Exit(1)
}
sigCh := make(chan os.Signal, 1)
signal.Notify(sigCh, os.Interrupt, os.Kill)
for {
select {
case <-sigCh:
r.d.Leave("testnetwork", "testep")
overlay.Fini(r.d)
os.Exit(0)
}
}
}

View file

@ -0,0 +1,72 @@
package main
import (
"flag"
"fmt"
"log"
"net"
"os"
"os/signal"
"syscall"
"github.com/ishidawataru/sctp"
)
func main() {
f := os.NewFile(3, "signal-parent")
host, container := parseHostContainerAddrs()
p, err := NewProxy(host, container)
if err != nil {
fmt.Fprintf(f, "1\n%s", err)
f.Close()
os.Exit(1)
}
go handleStopSignals(p)
fmt.Fprint(f, "0\n")
f.Close()
// Run will block until the proxy stops
p.Run()
}
// parseHostContainerAddrs parses the flags passed on reexec to create the TCP/UDP/SCTP
// net.Addrs to map the host and container ports
func parseHostContainerAddrs() (host net.Addr, container net.Addr) {
var (
proto = flag.String("proto", "tcp", "proxy protocol")
hostIP = flag.String("host-ip", "", "host ip")
hostPort = flag.Int("host-port", -1, "host port")
containerIP = flag.String("container-ip", "", "container ip")
containerPort = flag.Int("container-port", -1, "container port")
)
flag.Parse()
switch *proto {
case "tcp":
host = &net.TCPAddr{IP: net.ParseIP(*hostIP), Port: *hostPort}
container = &net.TCPAddr{IP: net.ParseIP(*containerIP), Port: *containerPort}
case "udp":
host = &net.UDPAddr{IP: net.ParseIP(*hostIP), Port: *hostPort}
container = &net.UDPAddr{IP: net.ParseIP(*containerIP), Port: *containerPort}
case "sctp":
host = &sctp.SCTPAddr{IPAddrs: []net.IPAddr{{IP: net.ParseIP(*hostIP)}}, Port: *hostPort}
container = &sctp.SCTPAddr{IPAddrs: []net.IPAddr{{IP: net.ParseIP(*containerIP)}}, Port: *containerPort}
default:
log.Fatalf("unsupported protocol %s", *proto)
}
return host, container
}
func handleStopSignals(p Proxy) {
s := make(chan os.Signal, 10)
signal.Notify(s, os.Interrupt, syscall.SIGTERM)
for range s {
p.Close()
os.Exit(0)
}
}

View file

@ -0,0 +1,307 @@
package main
import (
"bytes"
"fmt"
"io"
"io/ioutil"
"net"
"strings"
"testing"
"time"
"github.com/ishidawataru/sctp"
// this takes care of the incontainer flag
_ "github.com/docker/libnetwork/testutils"
)
var testBuf = []byte("Buffalo buffalo Buffalo buffalo buffalo buffalo Buffalo buffalo")
var testBufSize = len(testBuf)
type EchoServer interface {
Run()
Close()
LocalAddr() net.Addr
}
type EchoServerOptions struct {
TCPHalfClose bool
}
type StreamEchoServer struct {
listener net.Listener
testCtx *testing.T
opts EchoServerOptions
}
type UDPEchoServer struct {
conn net.PacketConn
testCtx *testing.T
}
func NewEchoServer(t *testing.T, proto, address string, opts EchoServerOptions) EchoServer {
var server EchoServer
if !strings.HasPrefix(proto, "tcp") && opts.TCPHalfClose {
t.Fatalf("TCPHalfClose is not supported for %s", proto)
}
switch {
case strings.HasPrefix(proto, "tcp"):
listener, err := net.Listen(proto, address)
if err != nil {
t.Fatal(err)
}
server = &StreamEchoServer{listener: listener, testCtx: t, opts: opts}
case strings.HasPrefix(proto, "udp"):
socket, err := net.ListenPacket(proto, address)
if err != nil {
t.Fatal(err)
}
server = &UDPEchoServer{conn: socket, testCtx: t}
case strings.HasPrefix(proto, "sctp"):
addr, err := sctp.ResolveSCTPAddr(proto, address)
if err != nil {
t.Fatal(err)
}
listener, err := sctp.ListenSCTP(proto, addr)
if err != nil {
t.Fatal(err)
}
server = &StreamEchoServer{listener: listener, testCtx: t}
default:
t.Fatalf("unknown protocol: %s", proto)
}
return server
}
func (server *StreamEchoServer) Run() {
go func() {
for {
client, err := server.listener.Accept()
if err != nil {
return
}
go func(client net.Conn) {
if server.opts.TCPHalfClose {
data, err := ioutil.ReadAll(client)
if err != nil {
server.testCtx.Logf("io.ReadAll() failed for the client: %v\n", err.Error())
}
if _, err := client.Write(data); err != nil {
server.testCtx.Logf("can't echo to the client: %v\n", err.Error())
}
client.(*net.TCPConn).CloseWrite()
} else {
if _, err := io.Copy(client, client); err != nil {
server.testCtx.Logf("can't echo to the client: %v\n", err.Error())
}
client.Close()
}
}(client)
}
}()
}
func (server *StreamEchoServer) LocalAddr() net.Addr { return server.listener.Addr() }
func (server *StreamEchoServer) Close() { server.listener.Close() }
func (server *UDPEchoServer) Run() {
go func() {
readBuf := make([]byte, 1024)
for {
read, from, err := server.conn.ReadFrom(readBuf)
if err != nil {
return
}
for i := 0; i != read; {
written, err := server.conn.WriteTo(readBuf[i:read], from)
if err != nil {
break
}
i += written
}
}
}()
}
func (server *UDPEchoServer) LocalAddr() net.Addr { return server.conn.LocalAddr() }
func (server *UDPEchoServer) Close() { server.conn.Close() }
func testProxyAt(t *testing.T, proto string, proxy Proxy, addr string, halfClose bool) {
defer proxy.Close()
go proxy.Run()
var client net.Conn
var err error
if strings.HasPrefix(proto, "sctp") {
var a *sctp.SCTPAddr
a, err = sctp.ResolveSCTPAddr(proto, addr)
if err != nil {
t.Fatal(err)
}
client, err = sctp.DialSCTP(proto, nil, a)
} else {
client, err = net.Dial(proto, addr)
}
if err != nil {
t.Fatalf("Can't connect to the proxy: %v", err)
}
defer client.Close()
client.SetDeadline(time.Now().Add(10 * time.Second))
if _, err = client.Write(testBuf); err != nil {
t.Fatal(err)
}
if halfClose {
if proto != "tcp" {
t.Fatalf("halfClose is not supported for %s", proto)
}
client.(*net.TCPConn).CloseWrite()
}
recvBuf := make([]byte, testBufSize)
if _, err = client.Read(recvBuf); err != nil {
t.Fatal(err)
}
if !bytes.Equal(testBuf, recvBuf) {
t.Fatal(fmt.Errorf("Expected [%v] but got [%v]", testBuf, recvBuf))
}
}
func testProxy(t *testing.T, proto string, proxy Proxy, halfClose bool) {
testProxyAt(t, proto, proxy, proxy.FrontendAddr().String(), halfClose)
}
func testTCP4Proxy(t *testing.T, halfClose bool) {
backend := NewEchoServer(t, "tcp", "127.0.0.1:0", EchoServerOptions{TCPHalfClose: halfClose})
defer backend.Close()
backend.Run()
frontendAddr := &net.TCPAddr{IP: net.IPv4(127, 0, 0, 1), Port: 0}
proxy, err := NewProxy(frontendAddr, backend.LocalAddr())
if err != nil {
t.Fatal(err)
}
testProxy(t, "tcp", proxy, halfClose)
}
func TestTCP4Proxy(t *testing.T) {
testTCP4Proxy(t, false)
}
func TestTCP4ProxyHalfClose(t *testing.T) {
testTCP4Proxy(t, true)
}
func TestTCP6Proxy(t *testing.T) {
t.Skip("Need to start CI docker with --ipv6")
backend := NewEchoServer(t, "tcp", "[::1]:0", EchoServerOptions{})
defer backend.Close()
backend.Run()
frontendAddr := &net.TCPAddr{IP: net.IPv6loopback, Port: 0}
proxy, err := NewProxy(frontendAddr, backend.LocalAddr())
if err != nil {
t.Fatal(err)
}
testProxy(t, "tcp", proxy, false)
}
func TestTCPDualStackProxy(t *testing.T) {
// If I understand `godoc -src net favoriteAddrFamily` (used by the
// net.Listen* functions) correctly this should work, but it doesn't.
t.Skip("No support for dual stack yet")
backend := NewEchoServer(t, "tcp", "[::1]:0", EchoServerOptions{})
defer backend.Close()
backend.Run()
frontendAddr := &net.TCPAddr{IP: net.IPv6loopback, Port: 0}
proxy, err := NewProxy(frontendAddr, backend.LocalAddr())
if err != nil {
t.Fatal(err)
}
ipv4ProxyAddr := &net.TCPAddr{
IP: net.IPv4(127, 0, 0, 1),
Port: proxy.FrontendAddr().(*net.TCPAddr).Port,
}
testProxyAt(t, "tcp", proxy, ipv4ProxyAddr.String(), false)
}
func TestUDP4Proxy(t *testing.T) {
backend := NewEchoServer(t, "udp", "127.0.0.1:0", EchoServerOptions{})
defer backend.Close()
backend.Run()
frontendAddr := &net.UDPAddr{IP: net.IPv4(127, 0, 0, 1), Port: 0}
proxy, err := NewProxy(frontendAddr, backend.LocalAddr())
if err != nil {
t.Fatal(err)
}
testProxy(t, "udp", proxy, false)
}
func TestUDP6Proxy(t *testing.T) {
t.Skip("Need to start CI docker with --ipv6")
backend := NewEchoServer(t, "udp", "[::1]:0", EchoServerOptions{})
defer backend.Close()
backend.Run()
frontendAddr := &net.UDPAddr{IP: net.IPv6loopback, Port: 0}
proxy, err := NewProxy(frontendAddr, backend.LocalAddr())
if err != nil {
t.Fatal(err)
}
testProxy(t, "udp", proxy, false)
}
func TestUDPWriteError(t *testing.T) {
frontendAddr := &net.UDPAddr{IP: net.IPv4(127, 0, 0, 1), Port: 0}
// Hopefully, this port will be free: */
backendAddr := &net.UDPAddr{IP: net.IPv4(127, 0, 0, 1), Port: 25587}
proxy, err := NewProxy(frontendAddr, backendAddr)
if err != nil {
t.Fatal(err)
}
defer proxy.Close()
go proxy.Run()
client, err := net.Dial("udp", "127.0.0.1:25587")
if err != nil {
t.Fatalf("Can't connect to the proxy: %v", err)
}
defer client.Close()
// Make sure the proxy doesn't stop when there is no actual backend:
client.Write(testBuf)
client.Write(testBuf)
backend := NewEchoServer(t, "udp", "127.0.0.1:25587", EchoServerOptions{})
defer backend.Close()
backend.Run()
client.SetDeadline(time.Now().Add(10 * time.Second))
if _, err = client.Write(testBuf); err != nil {
t.Fatal(err)
}
recvBuf := make([]byte, testBufSize)
if _, err = client.Read(recvBuf); err != nil {
t.Fatal(err)
}
if !bytes.Equal(testBuf, recvBuf) {
t.Fatal(fmt.Errorf("Expected [%v] but got [%v]", testBuf, recvBuf))
}
}
func TestSCTP4Proxy(t *testing.T) {
backend := NewEchoServer(t, "sctp", "127.0.0.1:0", EchoServerOptions{})
defer backend.Close()
backend.Run()
frontendAddr := &sctp.SCTPAddr{IPAddrs: []net.IPAddr{{IP: net.IPv4(127, 0, 0, 1)}}, Port: 0}
proxy, err := NewProxy(frontendAddr, backend.LocalAddr())
if err != nil {
t.Fatal(err)
}
testProxy(t, "sctp", proxy, false)
}
func TestSCTP6Proxy(t *testing.T) {
t.Skip("Need to start CI docker with --ipv6")
backend := NewEchoServer(t, "sctp", "[::1]:0", EchoServerOptions{})
defer backend.Close()
backend.Run()
frontendAddr := &sctp.SCTPAddr{IPAddrs: []net.IPAddr{{IP: net.IPv6loopback}}, Port: 0}
proxy, err := NewProxy(frontendAddr, backend.LocalAddr())
if err != nil {
t.Fatal(err)
}
testProxy(t, "sctp", proxy, false)
}

View file

@ -0,0 +1,50 @@
// docker-proxy provides a network Proxy interface and implementations for TCP
// and UDP.
package main
import (
"net"
"github.com/ishidawataru/sctp"
)
// ipVersion refers to IP version - v4 or v6
type ipVersion string
const (
// IPv4 is version 4
ipv4 ipVersion = "4"
// IPv4 is version 6
ipv6 ipVersion = "6"
)
// Proxy defines the behavior of a proxy. It forwards traffic back and forth
// between two endpoints : the frontend and the backend.
// It can be used to do software port-mapping between two addresses.
// e.g. forward all traffic between the frontend (host) 127.0.0.1:3000
// to the backend (container) at 172.17.42.108:4000.
type Proxy interface {
// Run starts forwarding traffic back and forth between the front
// and back-end addresses.
Run()
// Close stops forwarding traffic and close both ends of the Proxy.
Close()
// FrontendAddr returns the address on which the proxy is listening.
FrontendAddr() net.Addr
// BackendAddr returns the proxied address.
BackendAddr() net.Addr
}
// NewProxy creates a Proxy according to the specified frontendAddr and backendAddr.
func NewProxy(frontendAddr, backendAddr net.Addr) (Proxy, error) {
switch frontendAddr.(type) {
case *net.UDPAddr:
return NewUDPProxy(frontendAddr.(*net.UDPAddr), backendAddr.(*net.UDPAddr))
case *net.TCPAddr:
return NewTCPProxy(frontendAddr.(*net.TCPAddr), backendAddr.(*net.TCPAddr))
case *sctp.SCTPAddr:
return NewSCTPProxy(frontendAddr.(*sctp.SCTPAddr), backendAddr.(*sctp.SCTPAddr))
default:
panic("Unsupported protocol")
}
}

View file

@ -0,0 +1,98 @@
package main
import (
"io"
"log"
"net"
"sync"
"github.com/ishidawataru/sctp"
)
// SCTPProxy is a proxy for SCTP connections. It implements the Proxy interface to
// handle SCTP traffic forwarding between the frontend and backend addresses.
type SCTPProxy struct {
listener *sctp.SCTPListener
frontendAddr *sctp.SCTPAddr
backendAddr *sctp.SCTPAddr
}
// NewSCTPProxy creates a new SCTPProxy.
func NewSCTPProxy(frontendAddr, backendAddr *sctp.SCTPAddr) (*SCTPProxy, error) {
// detect version of hostIP to bind only to correct version
ipVersion := ipv4
if frontendAddr.IPAddrs[0].IP.To4() == nil {
ipVersion = ipv6
}
listener, err := sctp.ListenSCTP("sctp"+string(ipVersion), frontendAddr)
if err != nil {
return nil, err
}
// If the port in frontendAddr was 0 then ListenSCTP will have a picked
// a port to listen on, hence the call to Addr to get that actual port:
return &SCTPProxy{
listener: listener,
frontendAddr: listener.Addr().(*sctp.SCTPAddr),
backendAddr: backendAddr,
}, nil
}
func (proxy *SCTPProxy) clientLoop(client *sctp.SCTPConn, quit chan bool) {
backend, err := sctp.DialSCTP("sctp", nil, proxy.backendAddr)
if err != nil {
log.Printf("Can't forward traffic to backend sctp/%v: %s\n", proxy.backendAddr, err)
client.Close()
return
}
clientC := sctp.NewSCTPSndRcvInfoWrappedConn(client)
backendC := sctp.NewSCTPSndRcvInfoWrappedConn(backend)
var wg sync.WaitGroup
var broker = func(to, from net.Conn) {
io.Copy(to, from)
from.Close()
to.Close()
wg.Done()
}
wg.Add(2)
go broker(clientC, backendC)
go broker(backendC, clientC)
finish := make(chan struct{})
go func() {
wg.Wait()
close(finish)
}()
select {
case <-quit:
case <-finish:
}
clientC.Close()
backendC.Close()
<-finish
}
// Run starts forwarding the traffic using SCTP.
func (proxy *SCTPProxy) Run() {
quit := make(chan bool)
defer close(quit)
for {
client, err := proxy.listener.Accept()
if err != nil {
log.Printf("Stopping proxy on sctp/%v for sctp/%v (%s)", proxy.frontendAddr, proxy.backendAddr, err)
return
}
go proxy.clientLoop(client.(*sctp.SCTPConn), quit)
}
}
// Close stops forwarding the traffic.
func (proxy *SCTPProxy) Close() { proxy.listener.Close() }
// FrontendAddr returns the SCTP address on which the proxy is listening.
func (proxy *SCTPProxy) FrontendAddr() net.Addr { return proxy.frontendAddr }
// BackendAddr returns the SCTP proxied address.
func (proxy *SCTPProxy) BackendAddr() net.Addr { return proxy.backendAddr }

View file

@ -0,0 +1,31 @@
package main
import (
"net"
)
// StubProxy is a proxy that is a stub (does nothing).
type StubProxy struct {
frontendAddr net.Addr
backendAddr net.Addr
}
// Run does nothing.
func (p *StubProxy) Run() {}
// Close does nothing.
func (p *StubProxy) Close() {}
// FrontendAddr returns the frontend address.
func (p *StubProxy) FrontendAddr() net.Addr { return p.frontendAddr }
// BackendAddr returns the backend address.
func (p *StubProxy) BackendAddr() net.Addr { return p.backendAddr }
// NewStubProxy creates a new StubProxy
func NewStubProxy(frontendAddr, backendAddr net.Addr) (Proxy, error) {
return &StubProxy{
frontendAddr: frontendAddr,
backendAddr: backendAddr,
}, nil
}

View file

@ -0,0 +1,94 @@
package main
import (
"io"
"log"
"net"
"sync"
)
// TCPProxy is a proxy for TCP connections. It implements the Proxy interface to
// handle TCP traffic forwarding between the frontend and backend addresses.
type TCPProxy struct {
listener *net.TCPListener
frontendAddr *net.TCPAddr
backendAddr *net.TCPAddr
}
// NewTCPProxy creates a new TCPProxy.
func NewTCPProxy(frontendAddr, backendAddr *net.TCPAddr) (*TCPProxy, error) {
// detect version of hostIP to bind only to correct version
ipVersion := ipv4
if frontendAddr.IP.To4() == nil {
ipVersion = ipv6
}
listener, err := net.ListenTCP("tcp"+string(ipVersion), frontendAddr)
if err != nil {
return nil, err
}
// If the port in frontendAddr was 0 then ListenTCP will have a picked
// a port to listen on, hence the call to Addr to get that actual port:
return &TCPProxy{
listener: listener,
frontendAddr: listener.Addr().(*net.TCPAddr),
backendAddr: backendAddr,
}, nil
}
func (proxy *TCPProxy) clientLoop(client *net.TCPConn, quit chan bool) {
backend, err := net.DialTCP("tcp", nil, proxy.backendAddr)
if err != nil {
log.Printf("Can't forward traffic to backend tcp/%v: %s\n", proxy.backendAddr, err)
client.Close()
return
}
var wg sync.WaitGroup
var broker = func(to, from *net.TCPConn) {
io.Copy(to, from)
from.CloseRead()
to.CloseWrite()
wg.Done()
}
wg.Add(2)
go broker(client, backend)
go broker(backend, client)
finish := make(chan struct{})
go func() {
wg.Wait()
close(finish)
}()
select {
case <-quit:
case <-finish:
}
client.Close()
backend.Close()
<-finish
}
// Run starts forwarding the traffic using TCP.
func (proxy *TCPProxy) Run() {
quit := make(chan bool)
defer close(quit)
for {
client, err := proxy.listener.Accept()
if err != nil {
log.Printf("Stopping proxy on tcp/%v for tcp/%v (%s)", proxy.frontendAddr, proxy.backendAddr, err)
return
}
go proxy.clientLoop(client.(*net.TCPConn), quit)
}
}
// Close stops forwarding the traffic.
func (proxy *TCPProxy) Close() { proxy.listener.Close() }
// FrontendAddr returns the TCP address on which the proxy is listening.
func (proxy *TCPProxy) FrontendAddr() net.Addr { return proxy.frontendAddr }
// BackendAddr returns the TCP proxied address.
func (proxy *TCPProxy) BackendAddr() net.Addr { return proxy.backendAddr }

View file

@ -0,0 +1,173 @@
package main
import (
"encoding/binary"
"log"
"net"
"strings"
"sync"
"syscall"
"time"
)
const (
// UDPConnTrackTimeout is the timeout used for UDP connection tracking
UDPConnTrackTimeout = 90 * time.Second
// UDPBufSize is the buffer size for the UDP proxy
UDPBufSize = 65507
)
// A net.Addr where the IP is split into two fields so you can use it as a key
// in a map:
type connTrackKey struct {
IPHigh uint64
IPLow uint64
Port int
}
func newConnTrackKey(addr *net.UDPAddr) *connTrackKey {
if len(addr.IP) == net.IPv4len {
return &connTrackKey{
IPHigh: 0,
IPLow: uint64(binary.BigEndian.Uint32(addr.IP)),
Port: addr.Port,
}
}
return &connTrackKey{
IPHigh: binary.BigEndian.Uint64(addr.IP[:8]),
IPLow: binary.BigEndian.Uint64(addr.IP[8:]),
Port: addr.Port,
}
}
type connTrackMap map[connTrackKey]*net.UDPConn
// UDPProxy is proxy for which handles UDP datagrams. It implements the Proxy
// interface to handle UDP traffic forwarding between the frontend and backend
// addresses.
type UDPProxy struct {
listener *net.UDPConn
frontendAddr *net.UDPAddr
backendAddr *net.UDPAddr
connTrackTable connTrackMap
connTrackLock sync.Mutex
}
// NewUDPProxy creates a new UDPProxy.
func NewUDPProxy(frontendAddr, backendAddr *net.UDPAddr) (*UDPProxy, error) {
// detect version of hostIP to bind only to correct version
ipVersion := ipv4
if frontendAddr.IP.To4() == nil {
ipVersion = ipv6
}
listener, err := net.ListenUDP("udp"+string(ipVersion), frontendAddr)
if err != nil {
return nil, err
}
return &UDPProxy{
listener: listener,
frontendAddr: listener.LocalAddr().(*net.UDPAddr),
backendAddr: backendAddr,
connTrackTable: make(connTrackMap),
}, nil
}
func (proxy *UDPProxy) replyLoop(proxyConn *net.UDPConn, clientAddr *net.UDPAddr, clientKey *connTrackKey) {
defer func() {
proxy.connTrackLock.Lock()
delete(proxy.connTrackTable, *clientKey)
proxy.connTrackLock.Unlock()
proxyConn.Close()
}()
readBuf := make([]byte, UDPBufSize)
for {
proxyConn.SetReadDeadline(time.Now().Add(UDPConnTrackTimeout))
again:
read, err := proxyConn.Read(readBuf)
if err != nil {
if err, ok := err.(*net.OpError); ok && err.Err == syscall.ECONNREFUSED {
// This will happen if the last write failed
// (e.g: nothing is actually listening on the
// proxied port on the container), ignore it
// and continue until UDPConnTrackTimeout
// expires:
goto again
}
return
}
for i := 0; i != read; {
written, err := proxy.listener.WriteToUDP(readBuf[i:read], clientAddr)
if err != nil {
return
}
i += written
}
}
}
// Run starts forwarding the traffic using UDP.
func (proxy *UDPProxy) Run() {
readBuf := make([]byte, UDPBufSize)
for {
read, from, err := proxy.listener.ReadFromUDP(readBuf)
if err != nil {
// NOTE: Apparently ReadFrom doesn't return
// ECONNREFUSED like Read do (see comment in
// UDPProxy.replyLoop)
if !isClosedError(err) {
log.Printf("Stopping proxy on udp/%v for udp/%v (%s)", proxy.frontendAddr, proxy.backendAddr, err)
}
break
}
fromKey := newConnTrackKey(from)
proxy.connTrackLock.Lock()
proxyConn, hit := proxy.connTrackTable[*fromKey]
if !hit {
proxyConn, err = net.DialUDP("udp", nil, proxy.backendAddr)
if err != nil {
log.Printf("Can't proxy a datagram to udp/%s: %s\n", proxy.backendAddr, err)
proxy.connTrackLock.Unlock()
continue
}
proxy.connTrackTable[*fromKey] = proxyConn
go proxy.replyLoop(proxyConn, from, fromKey)
}
proxy.connTrackLock.Unlock()
for i := 0; i != read; {
written, err := proxyConn.Write(readBuf[i:read])
if err != nil {
log.Printf("Can't proxy a datagram to udp/%s: %s\n", proxy.backendAddr, err)
break
}
i += written
}
}
}
// Close stops forwarding the traffic.
func (proxy *UDPProxy) Close() {
proxy.listener.Close()
proxy.connTrackLock.Lock()
defer proxy.connTrackLock.Unlock()
for _, conn := range proxy.connTrackTable {
conn.Close()
}
}
// FrontendAddr returns the UDP address on which the proxy is listening.
func (proxy *UDPProxy) FrontendAddr() net.Addr { return proxy.frontendAddr }
// BackendAddr returns the proxied UDP address.
func (proxy *UDPProxy) BackendAddr() net.Addr { return proxy.backendAddr }
func isClosedError(err error) bool {
/* This comparison is ugly, but unfortunately, net.go doesn't export errClosing.
* See:
* http://golang.org/src/pkg/net/net.go
* https://code.google.com/p/go/issues/detail?id=4337
* https://groups.google.com/forum/#!msg/golang-nuts/0_aaCvBmOcM/SptmDyX1XJMJ
*/
return strings.HasSuffix(err.Error(), "use of closed network connection")
}

View file

@ -0,0 +1,74 @@
package main
import (
"fmt"
"log"
"github.com/docker/docker/pkg/reexec"
"github.com/docker/libnetwork"
"github.com/docker/libnetwork/config"
"github.com/docker/libnetwork/netlabel"
"github.com/docker/libnetwork/options"
)
func main() {
if reexec.Init() {
return
}
// Select and configure the network driver
networkType := "bridge"
// Create a new controller instance
driverOptions := options.Generic{}
genericOption := make(map[string]interface{})
genericOption[netlabel.GenericData] = driverOptions
controller, err := libnetwork.New(config.OptionDriverConfig(networkType, genericOption))
if err != nil {
log.Fatalf("libnetwork.New: %s", err)
}
// Create a network for containers to join.
// NewNetwork accepts Variadic optional arguments that libnetwork and Drivers can use.
network, err := controller.NewNetwork(networkType, "network1", "")
if err != nil {
log.Fatalf("controller.NewNetwork: %s", err)
}
// For each new container: allocate IP and interfaces. The returned network
// settings will be used for container infos (inspect and such), as well as
// iptables rules for port publishing. This info is contained or accessible
// from the returned endpoint.
ep, err := network.CreateEndpoint("Endpoint1")
if err != nil {
log.Fatalf("network.CreateEndpoint: %s", err)
}
// Create the sandbox for the container.
// NewSandbox accepts Variadic optional arguments which libnetwork can use.
sbx, err := controller.NewSandbox("container1",
libnetwork.OptionHostname("test"),
libnetwork.OptionDomainname("docker.io"))
if err != nil {
log.Fatalf("controller.NewSandbox: %s", err)
}
// A sandbox can join the endpoint via the join api.
err = ep.Join(sbx)
if err != nil {
log.Fatalf("ep.Join: %s", err)
}
// libnetwork client can check the endpoint's operational data via the Info() API
epInfo, err := ep.DriverInfo()
if err != nil {
log.Fatalf("ep.DriverInfo: %s", err)
}
macAddress, ok := epInfo[netlabel.MacAddress]
if !ok {
log.Fatal("failed to get mac address from endpoint info")
}
fmt.Printf("Joined endpoint %s (%s) to sandbox %s (%s)\n", ep.Name(), macAddress, sbx.ContainerID(), sbx.Key())
}

34
libnetwork/cmd/ssd/Dockerfile Executable file
View file

@ -0,0 +1,34 @@
FROM alpine:3.7
ENV PACKAGES="\
musl \
linux-headers \
build-base \
util-linux \
bash \
git \
ca-certificates \
python2 \
python2-dev \
py-setuptools \
iproute2 \
curl \
strace \
drill \
ipvsadm \
iperf \
ethtool \
"
RUN echo \
&& apk add --no-cache $PACKAGES \
&& if [[ ! -e /usr/bin/python ]]; then ln -sf /usr/bin/python2.7 /usr/bin/python; fi \
&& if [[ ! -e /usr/bin/python-config ]]; then ln -sf /usr/bin/python2.7-config /usr/bin/python-config; fi \
&& if [[ ! -e /usr/bin/easy_install ]]; then ln -sf /usr/bin/easy_install-2.7 /usr/bin/easy_install; fi \
&& easy_install pip \
&& pip install --upgrade pip \
&& if [[ ! -e /usr/bin/pip ]]; then ln -sf /usr/bin/pip2.7 /usr/bin/pip; fi \
&& echo
ADD ssd.py /
RUN pip install git+git://github.com/docker/docker-py.git
ENTRYPOINT [ "python", "/ssd.py"]

47
libnetwork/cmd/ssd/README.md Executable file
View file

@ -0,0 +1,47 @@
# Docker Swarm Service Driller(ssd)
ssd is a troubleshooting utility for Docker swarm networks.
### control-plane and datapath consistency check on a node
ssd checks for the consistency between docker network control-plane (from the docker daemon in-memory state) and kernel data path programming. Currently the tool checks only for the consistency of the Load balancer (implemented using IPVS).
In a three node swarm cluser ssd status for a overlay network `ov2` which has three services running, each replicated to 3 instances.
````bash
vagrant@net-1:~/code/go/src/github.com/docker/docker-e2e/tests$ docker run -v /var/run/docker.sock:/var/run/docker.sock -v /var/run/docker/netns:/var/run/docker/netns --privileged --net=host sanimej/ssd ov2
Verifying LB programming for containers on network ov2
Verifying container /s2.3.ltrdwef0iqf90rqauw3ehcs56...
service s2... OK
service s3... OK
service s1... OK
Verifying container /s3.3.nyhwvdvnocb4wftyhb8dr4fj8...
service s2... OK
service s3... OK
service s1... OK
Verifying container /s1.3.wwx5tuxhnvoz5vrb8ohphby0r...
service s2... OK
service s3... OK
service s1... OK
Verifying LB programming for containers on network ingress
Verifying container Ingress...
service web... OK
````
ssd checks the required iptables programming to direct an incoming packet with the <host ip>:<published port> to the right <backend ip>:<target port>
### control-plane consistency check across nodes in a cluster
Docker networking uses a gossip protocol to synchronize networking state across nodes in a cluster. ssd's `gossip-consistency` command verifies if the state maintained by all the nodes are consistent.
````bash
In a three node cluster with services running on an overlay network ov2 ssd consistency-checker shows
vagrant@net-1:~/code/go/src/github.com/docker/docker-e2e/tests$ docker run -v /var/run/docker.sock:/var/run/docker.sock -v /var/run/docker/netns:/var/run/docker/netns --privileged sanimej/ssd ov2 gossip-consistency
Node id: sjfp0ca8f43rvnab6v7f21gq0 gossip hash c57d89094dbb574a37930393278dc282
Node id: bg228r3q9095grj4wxkqs80oe gossip hash c57d89094dbb574a37930393278dc282
Node id: 6jylcraipcv2pxdricqe77j5q gossip hash c57d89094dbb574a37930393278dc282
````
This is hash digest of the control-plane state for the network `ov2` from all the cluster nodes. If the values have a mismatch `docker network inspect --verbose` on the individual nodes can help in identifying what the specific difference is.

193
libnetwork/cmd/ssd/ssd.py Executable file
View file

@ -0,0 +1,193 @@
#!/usr/bin/python
import sys, signal, time, os
import docker
import re
import subprocess
import json
import hashlib
ipv4match = re.compile(
r'(25[0-5]|2[0-4][0-9]|[01]?[0-9]?[0-9]).' +
r'(25[0-5]|2[0-4][0-9]|[01]?[0-9]?[0-9]).' +
r'(25[0-5]|2[0-4][0-9]|[01]?[0-9]?[0-9]).' +
r'(25[0-5]|2[0-4][0-9]|[01]?[0-9]?[0-9])'
)
def which(name, defaultPath=""):
if defaultPath and os.path.exists(defaultPath):
return defaultPath
for path in os.getenv("PATH").split(os.path.pathsep):
fullPath = path + os.sep + name
if os.path.exists(fullPath):
return fullPath
def check_iptables(name, plist):
replace = (':', ',')
ports = []
for port in plist:
for r in replace:
port = port.replace(r, ' ')
p = port.split()
ports.append((p[1], p[3]))
# get the ingress sandbox's docker_gwbridge network IP.
# published ports get DNAT'ed to this IP.
ip = subprocess.check_output([ which("nsenter","/usr/bin/nsenter"), '--net=/var/run/docker/netns/ingress_sbox', which("bash", "/bin/bash"), '-c', 'ifconfig eth1 | grep \"inet\\ addr\" | cut -d: -f2 | cut -d\" \" -f1'])
ip = ip.rstrip()
for p in ports:
rule = which("iptables", "/sbin/iptables") + '-t nat -C DOCKER-INGRESS -p tcp --dport {0} -j DNAT --to {1}:{2}'.format(p[1], ip, p[1])
try:
subprocess.check_output([which("bash", "/bin/bash"), "-c", rule])
except subprocess.CalledProcessError as e:
print "Service {0}: host iptables DNAT rule for port {1} -> ingress sandbox {2}:{3} missing".format(name, p[1], ip, p[1])
def get_namespaces(data, ingress=False):
if ingress is True:
return {"Ingress":"/var/run/docker/netns/ingress_sbox"}
else:
spaces =[]
for c in data["Containers"]:
sandboxes = {str(c) for c in data["Containers"]}
containers = {}
for s in sandboxes:
spaces.append(str(cli.inspect_container(s)["NetworkSettings"]["SandboxKey"]))
inspect = cli.inspect_container(s)
containers[str(inspect["Name"])] = str(inspect["NetworkSettings"]["SandboxKey"])
return containers
def check_network(nw_name, ingress=False):
print "Verifying LB programming for containers on network %s" % nw_name
data = cli.inspect_network(nw_name, verbose=True)
if "Services" in data.keys():
services = data["Services"]
else:
print "Network %s has no services. Skipping check" % nw_name
return
fwmarks = {str(service): str(svalue["LocalLBIndex"]) for service, svalue in services.items()}
stasks = {}
for service, svalue in services.items():
if service == "":
continue
tasks = []
for task in svalue["Tasks"]:
tasks.append(str(task["EndpointIP"]))
stasks[fwmarks[str(service)]] = tasks
# for services in ingress network verify the iptables rules
# that direct ingress (published port) to backend (target port)
if ingress is True:
check_iptables(service, svalue["Ports"])
containers = get_namespaces(data, ingress)
for container, namespace in containers.items():
print "Verifying container %s..." % container
ipvs = subprocess.check_output([which("nsenter","/usr/bin/nsenter"), '--net=%s' % namespace, which("ipvsadm","/usr/sbin/ipvsadm"), '-ln'])
mark = ""
realmark = {}
for line in ipvs.splitlines():
if "FWM" in line:
mark = re.findall("[0-9]+", line)[0]
realmark[str(mark)] = []
elif "->" in line:
if mark == "":
continue
ip = ipv4match.search(line)
if ip is not None:
realmark[mark].append(format(ip.group(0)))
else:
mark = ""
for key in realmark.keys():
if key not in stasks:
print "LB Index %s" % key, "present in IPVS but missing in docker daemon"
del realmark[key]
for key in stasks.keys():
if key not in realmark:
print "LB Index %s" % key, "present in docker daemon but missing in IPVS"
del stasks[key]
for key in realmark:
service = "--Invalid--"
for sname, idx in fwmarks.items():
if key == idx:
service = sname
if len(set(realmark[key])) != len(set(stasks[key])):
print "Incorrect LB Programming for service %s" % service
print "control-plane backend tasks:"
for task in stasks[key]:
print task
print "kernel IPVS backend tasks:"
for task in realmark[key]:
print task
else:
print "service %s... OK" % service
if __name__ == '__main__':
if len(sys.argv) < 2:
print 'Usage: ssd.py network-name [gossip-consistency]'
sys.exit()
cli = docker.APIClient(base_url='unix://var/run/docker.sock', version='auto')
if len(sys.argv) == 3:
command = sys.argv[2]
else:
command = 'default'
if command == 'gossip-consistency':
cspec = docker.types.ContainerSpec(
image='docker/ssd',
args=[sys.argv[1], 'gossip-hash'],
mounts=[docker.types.Mount('/var/run/docker.sock', '/var/run/docker.sock', type='bind')]
)
mode = docker.types.ServiceMode(
mode='global'
)
task_template = docker.types.TaskTemplate(cspec)
cli.create_service(task_template, name='gossip-hash', mode=mode)
#TODO change to a deterministic way to check if the service is up.
time.sleep(5)
output = cli.service_logs('gossip-hash', stdout=True, stderr=True, details=True)
for line in output:
print("Node id: %s gossip hash %s" % (line[line.find("=")+1:line.find(",")], line[line.find(" ")+1:]))
if cli.remove_service('gossip-hash') is not True:
print("Deleting gossip-hash service failed")
elif command == 'gossip-hash':
data = cli.inspect_network(sys.argv[1], verbose=True)
services = data["Services"]
md5 = hashlib.md5()
entries = []
for service, value in services.items():
entries.append(service)
entries.append(value["VIP"])
for task in value["Tasks"]:
for key, val in task.items():
if isinstance(val, dict):
for k, v in val.items():
entries.append(v)
else:
entries.append(val)
entries.sort()
for e in entries:
md5.update(e)
print(md5.hexdigest())
sys.stdout.flush()
while True:
signal.pause()
elif command == 'default':
if sys.argv[1] == "ingress":
check_network("ingress", ingress=True)
else:
check_network(sys.argv[1])
check_network("ingress", ingress=True)

328
libnetwork/config/config.go Normal file
View file

@ -0,0 +1,328 @@
package config
import (
"fmt"
"strings"
"github.com/BurntSushi/toml"
"github.com/docker/docker/pkg/discovery"
"github.com/docker/docker/pkg/plugingetter"
"github.com/docker/go-connections/tlsconfig"
"github.com/docker/libkv/store"
"github.com/docker/libnetwork/cluster"
"github.com/docker/libnetwork/datastore"
"github.com/docker/libnetwork/ipamutils"
"github.com/docker/libnetwork/netlabel"
"github.com/docker/libnetwork/osl"
"github.com/docker/libnetwork/portallocator"
"github.com/sirupsen/logrus"
)
const (
warningThNetworkControlPlaneMTU = 1500
minimumNetworkControlPlaneMTU = 500
)
// Config encapsulates configurations of various Libnetwork components
type Config struct {
Daemon DaemonCfg
Cluster ClusterCfg
Scopes map[string]*datastore.ScopeCfg
ActiveSandboxes map[string]interface{}
PluginGetter plugingetter.PluginGetter
}
// DaemonCfg represents libnetwork core configuration
type DaemonCfg struct {
Debug bool
Experimental bool
DataDir string
ExecRoot string
DefaultNetwork string
DefaultDriver string
Labels []string
DriverCfg map[string]interface{}
ClusterProvider cluster.Provider
NetworkControlPlaneMTU int
DefaultAddressPool []*ipamutils.NetworkToSplit
}
// ClusterCfg represents cluster configuration
type ClusterCfg struct {
Watcher discovery.Watcher
Address string
Discovery string
Heartbeat uint64
}
// LoadDefaultScopes loads default scope configs for scopes which
// doesn't have explicit user specified configs.
func (c *Config) LoadDefaultScopes(dataDir string) {
for k, v := range datastore.DefaultScopes(dataDir) {
if _, ok := c.Scopes[k]; !ok {
c.Scopes[k] = v
}
}
}
// ParseConfig parses the libnetwork configuration file
func ParseConfig(tomlCfgFile string) (*Config, error) {
cfg := &Config{
Scopes: map[string]*datastore.ScopeCfg{},
}
if _, err := toml.DecodeFile(tomlCfgFile, cfg); err != nil {
return nil, err
}
cfg.LoadDefaultScopes(cfg.Daemon.DataDir)
return cfg, nil
}
// ParseConfigOptions parses the configuration options and returns
// a reference to the corresponding Config structure
func ParseConfigOptions(cfgOptions ...Option) *Config {
cfg := &Config{
Daemon: DaemonCfg{
DriverCfg: make(map[string]interface{}),
},
Scopes: make(map[string]*datastore.ScopeCfg),
}
cfg.ProcessOptions(cfgOptions...)
cfg.LoadDefaultScopes(cfg.Daemon.DataDir)
return cfg
}
// Option is an option setter function type used to pass various configurations
// to the controller
type Option func(c *Config)
// OptionDefaultNetwork function returns an option setter for a default network
func OptionDefaultNetwork(dn string) Option {
return func(c *Config) {
logrus.Debugf("Option DefaultNetwork: %s", dn)
c.Daemon.DefaultNetwork = strings.TrimSpace(dn)
}
}
// OptionDefaultDriver function returns an option setter for default driver
func OptionDefaultDriver(dd string) Option {
return func(c *Config) {
logrus.Debugf("Option DefaultDriver: %s", dd)
c.Daemon.DefaultDriver = strings.TrimSpace(dd)
}
}
// OptionDefaultAddressPoolConfig function returns an option setter for default address pool
func OptionDefaultAddressPoolConfig(addressPool []*ipamutils.NetworkToSplit) Option {
return func(c *Config) {
c.Daemon.DefaultAddressPool = addressPool
}
}
// OptionDriverConfig returns an option setter for driver configuration.
func OptionDriverConfig(networkType string, config map[string]interface{}) Option {
return func(c *Config) {
c.Daemon.DriverCfg[networkType] = config
}
}
// OptionLabels function returns an option setter for labels
func OptionLabels(labels []string) Option {
return func(c *Config) {
for _, label := range labels {
if strings.HasPrefix(label, netlabel.Prefix) {
c.Daemon.Labels = append(c.Daemon.Labels, label)
}
}
}
}
// OptionKVProvider function returns an option setter for kvstore provider
func OptionKVProvider(provider string) Option {
return func(c *Config) {
logrus.Debugf("Option OptionKVProvider: %s", provider)
if _, ok := c.Scopes[datastore.GlobalScope]; !ok {
c.Scopes[datastore.GlobalScope] = &datastore.ScopeCfg{}
}
c.Scopes[datastore.GlobalScope].Client.Provider = strings.TrimSpace(provider)
}
}
// OptionKVProviderURL function returns an option setter for kvstore url
func OptionKVProviderURL(url string) Option {
return func(c *Config) {
logrus.Debugf("Option OptionKVProviderURL: %s", url)
if _, ok := c.Scopes[datastore.GlobalScope]; !ok {
c.Scopes[datastore.GlobalScope] = &datastore.ScopeCfg{}
}
c.Scopes[datastore.GlobalScope].Client.Address = strings.TrimSpace(url)
}
}
// OptionKVOpts function returns an option setter for kvstore options
func OptionKVOpts(opts map[string]string) Option {
return func(c *Config) {
if opts["kv.cacertfile"] != "" && opts["kv.certfile"] != "" && opts["kv.keyfile"] != "" {
logrus.Info("Option Initializing KV with TLS")
tlsConfig, err := tlsconfig.Client(tlsconfig.Options{
CAFile: opts["kv.cacertfile"],
CertFile: opts["kv.certfile"],
KeyFile: opts["kv.keyfile"],
})
if err != nil {
logrus.Errorf("Unable to set up TLS: %s", err)
return
}
if _, ok := c.Scopes[datastore.GlobalScope]; !ok {
c.Scopes[datastore.GlobalScope] = &datastore.ScopeCfg{}
}
if c.Scopes[datastore.GlobalScope].Client.Config == nil {
c.Scopes[datastore.GlobalScope].Client.Config = &store.Config{TLS: tlsConfig}
} else {
c.Scopes[datastore.GlobalScope].Client.Config.TLS = tlsConfig
}
// Workaround libkv/etcd bug for https
c.Scopes[datastore.GlobalScope].Client.Config.ClientTLS = &store.ClientTLSConfig{
CACertFile: opts["kv.cacertfile"],
CertFile: opts["kv.certfile"],
KeyFile: opts["kv.keyfile"],
}
} else {
logrus.Info("Option Initializing KV without TLS")
}
}
}
// OptionDiscoveryWatcher function returns an option setter for discovery watcher
func OptionDiscoveryWatcher(watcher discovery.Watcher) Option {
return func(c *Config) {
c.Cluster.Watcher = watcher
}
}
// OptionDiscoveryAddress function returns an option setter for self discovery address
func OptionDiscoveryAddress(address string) Option {
return func(c *Config) {
c.Cluster.Address = address
}
}
// OptionDataDir function returns an option setter for data folder
func OptionDataDir(dataDir string) Option {
return func(c *Config) {
c.Daemon.DataDir = dataDir
}
}
// OptionExecRoot function returns an option setter for exec root folder
func OptionExecRoot(execRoot string) Option {
return func(c *Config) {
c.Daemon.ExecRoot = execRoot
osl.SetBasePath(execRoot)
}
}
// OptionPluginGetter returns a plugingetter for remote drivers.
func OptionPluginGetter(pg plugingetter.PluginGetter) Option {
return func(c *Config) {
c.PluginGetter = pg
}
}
// OptionExperimental function returns an option setter for experimental daemon
func OptionExperimental(exp bool) Option {
return func(c *Config) {
logrus.Debugf("Option Experimental: %v", exp)
c.Daemon.Experimental = exp
}
}
// OptionDynamicPortRange function returns an option setter for service port allocation range
func OptionDynamicPortRange(in string) Option {
return func(c *Config) {
start, end := 0, 0
if len(in) > 0 {
n, err := fmt.Sscanf(in, "%d-%d", &start, &end)
if n != 2 || err != nil {
logrus.Errorf("Failed to parse range string with err %v", err)
return
}
}
if err := portallocator.Get().SetPortRange(start, end); err != nil {
logrus.Errorf("Failed to set port range with err %v", err)
}
}
}
// OptionNetworkControlPlaneMTU function returns an option setter for control plane MTU
func OptionNetworkControlPlaneMTU(exp int) Option {
return func(c *Config) {
logrus.Debugf("Network Control Plane MTU: %d", exp)
if exp < warningThNetworkControlPlaneMTU {
logrus.Warnf("Received a MTU of %d, this value is very low, the network control plane can misbehave,"+
" defaulting to minimum value (%d)", exp, minimumNetworkControlPlaneMTU)
if exp < minimumNetworkControlPlaneMTU {
exp = minimumNetworkControlPlaneMTU
}
}
c.Daemon.NetworkControlPlaneMTU = exp
}
}
// ProcessOptions processes options and stores it in config
func (c *Config) ProcessOptions(options ...Option) {
for _, opt := range options {
if opt != nil {
opt(c)
}
}
}
// IsValidName validates configuration objects supported by libnetwork
func IsValidName(name string) bool {
return strings.TrimSpace(name) != ""
}
// OptionLocalKVProvider function returns an option setter for kvstore provider
func OptionLocalKVProvider(provider string) Option {
return func(c *Config) {
logrus.Debugf("Option OptionLocalKVProvider: %s", provider)
if _, ok := c.Scopes[datastore.LocalScope]; !ok {
c.Scopes[datastore.LocalScope] = &datastore.ScopeCfg{}
}
c.Scopes[datastore.LocalScope].Client.Provider = strings.TrimSpace(provider)
}
}
// OptionLocalKVProviderURL function returns an option setter for kvstore url
func OptionLocalKVProviderURL(url string) Option {
return func(c *Config) {
logrus.Debugf("Option OptionLocalKVProviderURL: %s", url)
if _, ok := c.Scopes[datastore.LocalScope]; !ok {
c.Scopes[datastore.LocalScope] = &datastore.ScopeCfg{}
}
c.Scopes[datastore.LocalScope].Client.Address = strings.TrimSpace(url)
}
}
// OptionLocalKVProviderConfig function returns an option setter for kvstore config
func OptionLocalKVProviderConfig(config *store.Config) Option {
return func(c *Config) {
logrus.Debugf("Option OptionLocalKVProviderConfig: %v", config)
if _, ok := c.Scopes[datastore.LocalScope]; !ok {
c.Scopes[datastore.LocalScope] = &datastore.ScopeCfg{}
}
c.Scopes[datastore.LocalScope].Client.Config = config
}
}
// OptionActiveSandboxes function returns an option setter for passing the sandboxes
// which were active during previous daemon life
func OptionActiveSandboxes(sandboxes map[string]interface{}) Option {
return func(c *Config) {
c.ActiveSandboxes = sandboxes
}
}

View file

@ -0,0 +1,145 @@
package config
import (
"io/ioutil"
"os"
"strings"
"testing"
"github.com/docker/libnetwork/datastore"
"github.com/docker/libnetwork/netlabel"
_ "github.com/docker/libnetwork/testutils"
)
func TestInvalidConfig(t *testing.T) {
_, err := ParseConfig("invalid.toml")
if err == nil {
t.Fatal("Invalid Configuration file must fail")
}
}
func TestConfig(t *testing.T) {
_, err := ParseConfig("libnetwork.toml")
if err != nil {
t.Fatal("Error parsing a valid configuration file :", err)
}
}
func TestOptionsLabels(t *testing.T) {
c := &Config{}
l := []string{
"com.docker.network.key1=value1",
"com.docker.storage.key1=value1",
"com.docker.network.driver.key1=value1",
"com.docker.network.driver.key2=value2",
}
f := OptionLabels(l)
f(c)
if len(c.Daemon.Labels) != 3 {
t.Fatalf("Expecting 3 labels, seen %d", len(c.Daemon.Labels))
}
for _, l := range c.Daemon.Labels {
if !strings.HasPrefix(l, netlabel.Prefix) {
t.Fatalf("config must accept only libnetwork labels. Not : %s", l)
}
}
}
func TestValidName(t *testing.T) {
if !IsValidName("test") {
t.Fatal("Name validation fails for a name that must be accepted")
}
if IsValidName("") {
t.Fatal("Name validation succeeds for a case when it is expected to fail")
}
if IsValidName(" ") {
t.Fatal("Name validation succeeds for a case when it is expected to fail")
}
}
func TestTLSConfiguration(t *testing.T) {
cert := `-----BEGIN CERTIFICATE-----
MIIDCDCCAfKgAwIBAgIICifG7YeiQOEwCwYJKoZIhvcNAQELMBIxEDAOBgNVBAMT
B1Rlc3QgQ0EwHhcNMTUxMDAxMjMwMDAwWhcNMjAwOTI5MjMwMDAwWjASMRAwDgYD
VQQDEwdUZXN0IENBMIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEA1wRC
O+flnLTK5ImjTurNRHwSejuqGbc4CAvpB0hS+z0QlSs4+zE9h80aC4hz+6caRpds
+J908Q+RvAittMHbpc7VjbZP72G6fiXk7yPPl6C10HhRSoSi3nY+B7F2E8cuz14q
V2e+ejhWhSrBb/keyXpcyjoW1BOAAJ2TIclRRkICSCZrpXUyXxAvzXfpFXo1RhSb
UywN11pfiCQzDUN7sPww9UzFHuAHZHoyfTr27XnJYVUerVYrCPq8vqfn//01qz55
Xs0hvzGdlTFXhuabFtQnKFH5SNwo/fcznhB7rePOwHojxOpXTBepUCIJLbtNnWFT
V44t9gh5IqIWtoBReQIDAQABo2YwZDAOBgNVHQ8BAf8EBAMCAAYwEgYDVR0TAQH/
BAgwBgEB/wIBAjAdBgNVHQ4EFgQUZKUI8IIjIww7X/6hvwggQK4bD24wHwYDVR0j
BBgwFoAUZKUI8IIjIww7X/6hvwggQK4bD24wCwYJKoZIhvcNAQELA4IBAQDES2cz
7sCQfDCxCIWH7X8kpi/JWExzUyQEJ0rBzN1m3/x8ySRxtXyGekimBqQwQdFqlwMI
xzAQKkh3ue8tNSzRbwqMSyH14N1KrSxYS9e9szJHfUasoTpQGPmDmGIoRJuq1h6M
ej5x1SCJ7GWCR6xEXKUIE9OftXm9TdFzWa7Ja3OHz/mXteii8VXDuZ5ACq6EE5bY
8sP4gcICfJ5fTrpTlk9FIqEWWQrCGa5wk95PGEj+GJpNogjXQ97wVoo/Y3p1brEn
t5zjN9PAq4H1fuCMdNNA+p1DHNwd+ELTxcMAnb2ajwHvV6lKPXutrTFc4umJToBX
FpTxDmJHEV4bzUzh
-----END CERTIFICATE-----
`
key := `-----BEGIN RSA PRIVATE KEY-----
MIIEpQIBAAKCAQEA1wRCO+flnLTK5ImjTurNRHwSejuqGbc4CAvpB0hS+z0QlSs4
+zE9h80aC4hz+6caRpds+J908Q+RvAittMHbpc7VjbZP72G6fiXk7yPPl6C10HhR
SoSi3nY+B7F2E8cuz14qV2e+ejhWhSrBb/keyXpcyjoW1BOAAJ2TIclRRkICSCZr
pXUyXxAvzXfpFXo1RhSbUywN11pfiCQzDUN7sPww9UzFHuAHZHoyfTr27XnJYVUe
rVYrCPq8vqfn//01qz55Xs0hvzGdlTFXhuabFtQnKFH5SNwo/fcznhB7rePOwHoj
xOpXTBepUCIJLbtNnWFTV44t9gh5IqIWtoBReQIDAQABAoIBAHSWipORGp/uKFXj
i/mut776x8ofsAxhnLBARQr93ID+i49W8H7EJGkOfaDjTICYC1dbpGrri61qk8sx
qX7p3v/5NzKwOIfEpirgwVIqSNYe/ncbxnhxkx6tXtUtFKmEx40JskvSpSYAhmmO
1XSx0E/PWaEN/nLgX/f1eWJIlxlQkk3QeqL+FGbCXI48DEtlJ9+MzMu4pAwZTpj5
5qtXo5JJ0jRGfJVPAOznRsYqv864AhMdMIWguzk6EGnbaCWwPcfcn+h9a5LMdony
MDHfBS7bb5tkF3+AfnVY3IBMVx7YlsD9eAyajlgiKu4zLbwTRHjXgShy+4Oussz0
ugNGnkECgYEA/hi+McrZC8C4gg6XqK8+9joD8tnyDZDz88BQB7CZqABUSwvjDqlP
L8hcwo/lzvjBNYGkqaFPUICGWKjeCtd8pPS2DCVXxDQX4aHF1vUur0uYNncJiV3N
XQz4Iemsa6wnKf6M67b5vMXICw7dw0HZCdIHD1hnhdtDz0uVpeevLZ8CgYEA2KCT
Y43lorjrbCgMqtlefkr3GJA9dey+hTzCiWEOOqn9RqGoEGUday0sKhiLofOgmN2B
LEukpKIey8s+Q/cb6lReajDVPDsMweX8i7hz3Wa4Ugp4Xa5BpHqu8qIAE2JUZ7bU
t88aQAYE58pUF+/Lq1QzAQdrjjzQBx6SrBxieecCgYEAvukoPZEC8mmiN1VvbTX+
QFHmlZha3QaDxChB+QUe7bMRojEUL/fVnzkTOLuVFqSfxevaI/km9n0ac5KtAchV
xjp2bTnBb5EUQFqjopYktWA+xO07JRJtMfSEmjZPbbay1kKC7rdTfBm961EIHaRj
xZUf6M+rOE8964oGrdgdLlECgYEA046GQmx6fh7/82FtdZDRQp9tj3SWQUtSiQZc
qhO59Lq8mjUXz+MgBuJXxkiwXRpzlbaFB0Bca1fUoYw8o915SrDYf/Zu2OKGQ/qa
V81sgiVmDuEgycR7YOlbX6OsVUHrUlpwhY3hgfMe6UtkMvhBvHF/WhroBEIJm1pV
PXZ/CbMCgYEApNWVktFBjOaYfY6SNn4iSts1jgsQbbpglg3kT7PLKjCAhI6lNsbk
dyT7ut01PL6RaW4SeQWtrJIVQaM6vF3pprMKqlc5XihOGAmVqH7rQx9rtQB5TicL
BFrwkQE4HQtQBV60hYQUzzlSk44VFDz+jxIEtacRHaomDRh2FtOTz+I=
-----END RSA PRIVATE KEY-----
`
certFile, err := ioutil.TempFile("", "cert")
if err != nil {
t.Fatalf("Failed to setup temp file: %s", err)
}
defer os.Remove(certFile.Name())
certFile.Write([]byte(cert))
certFile.Close()
keyFile, err := ioutil.TempFile("", "key")
if err != nil {
t.Fatalf("Failed to setup temp file: %s", err)
}
defer os.Remove(keyFile.Name())
keyFile.Write([]byte(key))
keyFile.Close()
c := &Config{Scopes: map[string]*datastore.ScopeCfg{}}
l := map[string]string{
"kv.cacertfile": certFile.Name(),
"kv.certfile": certFile.Name(),
"kv.keyfile": keyFile.Name(),
}
f := OptionKVOpts(l)
f(c)
if _, ok := c.Scopes[datastore.GlobalScope]; !ok {
t.Fatal("GlobalScope not established")
}
if c.Scopes[datastore.GlobalScope].Client.Config.TLS == nil {
t.Fatal("TLS is nil")
}
if c.Scopes[datastore.GlobalScope].Client.Config.TLS.RootCAs == nil {
t.Fatal("TLS.RootCAs is nil")
}
if len(c.Scopes[datastore.GlobalScope].Client.Config.TLS.Certificates) != 1 {
t.Fatal("TLS.Certificates is not length 1")
}
}

View file

@ -0,0 +1,12 @@
title = "LibNetwork Configuration file"
[daemon]
debug = false
[cluster]
discovery = "token://swarm-discovery-token"
Address = "Cluster-wide reachable Host IP"
[datastore]
embedded = false
[datastore.client]
provider = "consul"
Address = "localhost:8500"

1389
libnetwork/controller.go Normal file

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,178 @@
package datastore
import (
"errors"
"fmt"
"sync"
"github.com/docker/libkv/store"
)
type kvMap map[string]KVObject
type cache struct {
sync.Mutex
kmm map[string]kvMap
ds *datastore
}
func newCache(ds *datastore) *cache {
return &cache{kmm: make(map[string]kvMap), ds: ds}
}
func (c *cache) kmap(kvObject KVObject) (kvMap, error) {
var err error
c.Lock()
keyPrefix := Key(kvObject.KeyPrefix()...)
kmap, ok := c.kmm[keyPrefix]
c.Unlock()
if ok {
return kmap, nil
}
kmap = kvMap{}
// Bail out right away if the kvObject does not implement KVConstructor
ctor, ok := kvObject.(KVConstructor)
if !ok {
return nil, errors.New("error while populating kmap, object does not implement KVConstructor interface")
}
kvList, err := c.ds.store.List(keyPrefix)
if err != nil {
if err == store.ErrKeyNotFound {
// If the store doesn't have anything then there is nothing to
// populate in the cache. Just bail out.
goto out
}
return nil, fmt.Errorf("error while populating kmap: %v", err)
}
for _, kvPair := range kvList {
// Ignore empty kvPair values
if len(kvPair.Value) == 0 {
continue
}
dstO := ctor.New()
err = dstO.SetValue(kvPair.Value)
if err != nil {
return nil, err
}
// Make sure the object has a correct view of the DB index in
// case we need to modify it and update the DB.
dstO.SetIndex(kvPair.LastIndex)
kmap[Key(dstO.Key()...)] = dstO
}
out:
// There may multiple go routines racing to fill the
// cache. The one which places the kmap in c.kmm first
// wins. The others should just use what the first populated.
c.Lock()
kmapNew, ok := c.kmm[keyPrefix]
if ok {
c.Unlock()
return kmapNew, nil
}
c.kmm[keyPrefix] = kmap
c.Unlock()
return kmap, nil
}
func (c *cache) add(kvObject KVObject, atomic bool) error {
kmap, err := c.kmap(kvObject)
if err != nil {
return err
}
c.Lock()
// If atomic is true, cache needs to maintain its own index
// for atomicity and the add needs to be atomic.
if atomic {
if prev, ok := kmap[Key(kvObject.Key()...)]; ok {
if prev.Index() != kvObject.Index() {
c.Unlock()
return ErrKeyModified
}
}
// Increment index
index := kvObject.Index()
index++
kvObject.SetIndex(index)
}
kmap[Key(kvObject.Key()...)] = kvObject
c.Unlock()
return nil
}
func (c *cache) del(kvObject KVObject, atomic bool) error {
kmap, err := c.kmap(kvObject)
if err != nil {
return err
}
c.Lock()
// If atomic is true, cache needs to maintain its own index
// for atomicity and del needs to be atomic.
if atomic {
if prev, ok := kmap[Key(kvObject.Key()...)]; ok {
if prev.Index() != kvObject.Index() {
c.Unlock()
return ErrKeyModified
}
}
}
delete(kmap, Key(kvObject.Key()...))
c.Unlock()
return nil
}
func (c *cache) get(key string, kvObject KVObject) error {
kmap, err := c.kmap(kvObject)
if err != nil {
return err
}
c.Lock()
defer c.Unlock()
o, ok := kmap[Key(kvObject.Key()...)]
if !ok {
return ErrKeyNotFound
}
ctor, ok := o.(KVConstructor)
if !ok {
return errors.New("kvobject does not implement KVConstructor interface. could not get object")
}
return ctor.CopyTo(kvObject)
}
func (c *cache) list(kvObject KVObject) ([]KVObject, error) {
kmap, err := c.kmap(kvObject)
if err != nil {
return nil, err
}
c.Lock()
defer c.Unlock()
var kvol []KVObject
for _, v := range kmap {
kvol = append(kvol, v)
}
return kvol, nil
}

View file

@ -0,0 +1,660 @@
package datastore
import (
"fmt"
"log"
"reflect"
"strings"
"sync"
"time"
"github.com/docker/libkv"
"github.com/docker/libkv/store"
"github.com/docker/libnetwork/discoverapi"
"github.com/docker/libnetwork/types"
)
//DataStore exported
type DataStore interface {
// GetObject gets data from datastore and unmarshals to the specified object
GetObject(key string, o KVObject) error
// PutObject adds a new Record based on an object into the datastore
PutObject(kvObject KVObject) error
// PutObjectAtomic provides an atomic add and update operation for a Record
PutObjectAtomic(kvObject KVObject) error
// DeleteObject deletes a record
DeleteObject(kvObject KVObject) error
// DeleteObjectAtomic performs an atomic delete operation
DeleteObjectAtomic(kvObject KVObject) error
// DeleteTree deletes a record
DeleteTree(kvObject KVObject) error
// Watchable returns whether the store is watchable or not
Watchable() bool
// Watch for changes on a KVObject
Watch(kvObject KVObject, stopCh <-chan struct{}) (<-chan KVObject, error)
// RestartWatch retriggers stopped Watches
RestartWatch()
// Active returns if the store is active
Active() bool
// List returns of a list of KVObjects belonging to the parent
// key. The caller must pass a KVObject of the same type as
// the objects that need to be listed
List(string, KVObject) ([]KVObject, error)
// Map returns a Map of KVObjects
Map(key string, kvObject KVObject) (map[string]KVObject, error)
// Scope returns the scope of the store
Scope() string
// KVStore returns access to the KV Store
KVStore() store.Store
// Close closes the data store
Close()
}
// ErrKeyModified is raised for an atomic update when the update is working on a stale state
var (
ErrKeyModified = store.ErrKeyModified
ErrKeyNotFound = store.ErrKeyNotFound
)
type datastore struct {
scope string
store store.Store
cache *cache
watchCh chan struct{}
active bool
sequential bool
sync.Mutex
}
// KVObject is Key/Value interface used by objects to be part of the DataStore
type KVObject interface {
// Key method lets an object provide the Key to be used in KV Store
Key() []string
// KeyPrefix method lets an object return immediate parent key that can be used for tree walk
KeyPrefix() []string
// Value method lets an object marshal its content to be stored in the KV store
Value() []byte
// SetValue is used by the datastore to set the object's value when loaded from the data store.
SetValue([]byte) error
// Index method returns the latest DB Index as seen by the object
Index() uint64
// SetIndex method allows the datastore to store the latest DB Index into the object
SetIndex(uint64)
// True if the object exists in the datastore, false if it hasn't been stored yet.
// When SetIndex() is called, the object has been stored.
Exists() bool
// DataScope indicates the storage scope of the KV object
DataScope() string
// Skip provides a way for a KV Object to avoid persisting it in the KV Store
Skip() bool
}
// KVConstructor interface defines methods which can construct a KVObject from another.
type KVConstructor interface {
// New returns a new object which is created based on the
// source object
New() KVObject
// CopyTo deep copies the contents of the implementing object
// to the passed destination object
CopyTo(KVObject) error
}
// ScopeCfg represents Datastore configuration.
type ScopeCfg struct {
Client ScopeClientCfg
}
// ScopeClientCfg represents Datastore Client-only mode configuration
type ScopeClientCfg struct {
Provider string
Address string
Config *store.Config
}
const (
// LocalScope indicates to store the KV object in local datastore such as boltdb
LocalScope = "local"
// GlobalScope indicates to store the KV object in global datastore such as consul/etcd/zookeeper
GlobalScope = "global"
// SwarmScope is not indicating a datastore location. It is defined here
// along with the other two scopes just for consistency.
SwarmScope = "swarm"
defaultPrefix = "/var/lib/docker/network/files"
)
const (
// NetworkKeyPrefix is the prefix for network key in the kv store
NetworkKeyPrefix = "network"
// EndpointKeyPrefix is the prefix for endpoint key in the kv store
EndpointKeyPrefix = "endpoint"
)
var (
defaultScopes = makeDefaultScopes()
)
func makeDefaultScopes() map[string]*ScopeCfg {
def := make(map[string]*ScopeCfg)
def[LocalScope] = &ScopeCfg{
Client: ScopeClientCfg{
Provider: string(store.BOLTDB),
Address: defaultPrefix + "/local-kv.db",
Config: &store.Config{
Bucket: "libnetwork",
ConnectionTimeout: time.Minute,
},
},
}
return def
}
var defaultRootChain = []string{"docker", "network", "v1.0"}
var rootChain = defaultRootChain
// DefaultScopes returns a map of default scopes and its config for clients to use.
func DefaultScopes(dataDir string) map[string]*ScopeCfg {
if dataDir != "" {
defaultScopes[LocalScope].Client.Address = dataDir + "/network/files/local-kv.db"
return defaultScopes
}
defaultScopes[LocalScope].Client.Address = defaultPrefix + "/local-kv.db"
return defaultScopes
}
// IsValid checks if the scope config has valid configuration.
func (cfg *ScopeCfg) IsValid() bool {
if cfg == nil ||
strings.TrimSpace(cfg.Client.Provider) == "" ||
strings.TrimSpace(cfg.Client.Address) == "" {
return false
}
return true
}
//Key provides convenient method to create a Key
func Key(key ...string) string {
keychain := append(rootChain, key...)
str := strings.Join(keychain, "/")
return str + "/"
}
//ParseKey provides convenient method to unpack the key to complement the Key function
func ParseKey(key string) ([]string, error) {
chain := strings.Split(strings.Trim(key, "/"), "/")
// The key must at least be equal to the rootChain in order to be considered as valid
if len(chain) <= len(rootChain) || !reflect.DeepEqual(chain[0:len(rootChain)], rootChain) {
return nil, types.BadRequestErrorf("invalid Key : %s", key)
}
return chain[len(rootChain):], nil
}
// newClient used to connect to KV Store
func newClient(scope string, kv string, addr string, config *store.Config, cached bool) (DataStore, error) {
if cached && scope != LocalScope {
return nil, fmt.Errorf("caching supported only for scope %s", LocalScope)
}
sequential := false
if scope == LocalScope {
sequential = true
}
if config == nil {
config = &store.Config{}
}
var addrs []string
if kv == string(store.BOLTDB) {
// Parse file path
addrs = strings.Split(addr, ",")
} else {
// Parse URI
parts := strings.SplitN(addr, "/", 2)
addrs = strings.Split(parts[0], ",")
// Add the custom prefix to the root chain
if len(parts) == 2 {
rootChain = append([]string{parts[1]}, defaultRootChain...)
}
}
store, err := libkv.NewStore(store.Backend(kv), addrs, config)
if err != nil {
return nil, err
}
ds := &datastore{scope: scope, store: store, active: true, watchCh: make(chan struct{}), sequential: sequential}
if cached {
ds.cache = newCache(ds)
}
return ds, nil
}
// NewDataStore creates a new instance of LibKV data store
func NewDataStore(scope string, cfg *ScopeCfg) (DataStore, error) {
if cfg == nil || cfg.Client.Provider == "" || cfg.Client.Address == "" {
c, ok := defaultScopes[scope]
if !ok || c.Client.Provider == "" || c.Client.Address == "" {
return nil, fmt.Errorf("unexpected scope %s without configuration passed", scope)
}
cfg = c
}
var cached bool
if scope == LocalScope {
cached = true
}
return newClient(scope, cfg.Client.Provider, cfg.Client.Address, cfg.Client.Config, cached)
}
// NewDataStoreFromConfig creates a new instance of LibKV data store starting from the datastore config data
func NewDataStoreFromConfig(dsc discoverapi.DatastoreConfigData) (DataStore, error) {
var (
ok bool
sCfgP *store.Config
)
sCfgP, ok = dsc.Config.(*store.Config)
if !ok && dsc.Config != nil {
return nil, fmt.Errorf("cannot parse store configuration: %v", dsc.Config)
}
scopeCfg := &ScopeCfg{
Client: ScopeClientCfg{
Address: dsc.Address,
Provider: dsc.Provider,
Config: sCfgP,
},
}
ds, err := NewDataStore(dsc.Scope, scopeCfg)
if err != nil {
return nil, fmt.Errorf("failed to construct datastore client from datastore configuration %v: %v", dsc, err)
}
return ds, err
}
func (ds *datastore) Close() {
ds.store.Close()
}
func (ds *datastore) Scope() string {
return ds.scope
}
func (ds *datastore) Active() bool {
return ds.active
}
func (ds *datastore) Watchable() bool {
return ds.scope != LocalScope
}
func (ds *datastore) Watch(kvObject KVObject, stopCh <-chan struct{}) (<-chan KVObject, error) {
sCh := make(chan struct{})
ctor, ok := kvObject.(KVConstructor)
if !ok {
return nil, fmt.Errorf("error watching object type %T, object does not implement KVConstructor interface", kvObject)
}
kvpCh, err := ds.store.Watch(Key(kvObject.Key()...), sCh)
if err != nil {
return nil, err
}
kvoCh := make(chan KVObject)
go func() {
retry_watch:
var err error
// Make sure to get a new instance of watch channel
ds.Lock()
watchCh := ds.watchCh
ds.Unlock()
loop:
for {
select {
case <-stopCh:
close(sCh)
return
case kvPair := <-kvpCh:
// If the backend KV store gets reset libkv's go routine
// for the watch can exit resulting in a nil value in
// channel.
if kvPair == nil {
ds.Lock()
ds.active = false
ds.Unlock()
break loop
}
dstO := ctor.New()
if err = dstO.SetValue(kvPair.Value); err != nil {
log.Printf("Could not unmarshal kvpair value = %s", string(kvPair.Value))
break
}
dstO.SetIndex(kvPair.LastIndex)
kvoCh <- dstO
}
}
// Wait on watch channel for a re-trigger when datastore becomes active
<-watchCh
kvpCh, err = ds.store.Watch(Key(kvObject.Key()...), sCh)
if err != nil {
log.Printf("Could not watch the key %s in store: %v", Key(kvObject.Key()...), err)
}
goto retry_watch
}()
return kvoCh, nil
}
func (ds *datastore) RestartWatch() {
ds.Lock()
defer ds.Unlock()
ds.active = true
watchCh := ds.watchCh
ds.watchCh = make(chan struct{})
close(watchCh)
}
func (ds *datastore) KVStore() store.Store {
return ds.store
}
// PutObjectAtomic adds a new Record based on an object into the datastore
func (ds *datastore) PutObjectAtomic(kvObject KVObject) error {
var (
previous *store.KVPair
pair *store.KVPair
err error
)
if ds.sequential {
ds.Lock()
defer ds.Unlock()
}
if kvObject == nil {
return types.BadRequestErrorf("invalid KV Object : nil")
}
kvObjValue := kvObject.Value()
if kvObjValue == nil {
return types.BadRequestErrorf("invalid KV Object with a nil Value for key %s", Key(kvObject.Key()...))
}
if kvObject.Skip() {
goto add_cache
}
if kvObject.Exists() {
previous = &store.KVPair{Key: Key(kvObject.Key()...), LastIndex: kvObject.Index()}
} else {
previous = nil
}
_, pair, err = ds.store.AtomicPut(Key(kvObject.Key()...), kvObjValue, previous, nil)
if err != nil {
if err == store.ErrKeyExists {
return ErrKeyModified
}
return err
}
kvObject.SetIndex(pair.LastIndex)
add_cache:
if ds.cache != nil {
// If persistent store is skipped, sequencing needs to
// happen in cache.
return ds.cache.add(kvObject, kvObject.Skip())
}
return nil
}
// PutObject adds a new Record based on an object into the datastore
func (ds *datastore) PutObject(kvObject KVObject) error {
if ds.sequential {
ds.Lock()
defer ds.Unlock()
}
if kvObject == nil {
return types.BadRequestErrorf("invalid KV Object : nil")
}
if kvObject.Skip() {
goto add_cache
}
if err := ds.putObjectWithKey(kvObject, kvObject.Key()...); err != nil {
return err
}
add_cache:
if ds.cache != nil {
// If persistent store is skipped, sequencing needs to
// happen in cache.
return ds.cache.add(kvObject, kvObject.Skip())
}
return nil
}
func (ds *datastore) putObjectWithKey(kvObject KVObject, key ...string) error {
kvObjValue := kvObject.Value()
if kvObjValue == nil {
return types.BadRequestErrorf("invalid KV Object with a nil Value for key %s", Key(kvObject.Key()...))
}
return ds.store.Put(Key(key...), kvObjValue, nil)
}
// GetObject returns a record matching the key
func (ds *datastore) GetObject(key string, o KVObject) error {
if ds.sequential {
ds.Lock()
defer ds.Unlock()
}
if ds.cache != nil {
return ds.cache.get(key, o)
}
kvPair, err := ds.store.Get(key)
if err != nil {
return err
}
if err := o.SetValue(kvPair.Value); err != nil {
return err
}
// Make sure the object has a correct view of the DB index in
// case we need to modify it and update the DB.
o.SetIndex(kvPair.LastIndex)
return nil
}
func (ds *datastore) ensureParent(parent string) error {
exists, err := ds.store.Exists(parent)
if err != nil {
return err
}
if exists {
return nil
}
return ds.store.Put(parent, []byte{}, &store.WriteOptions{IsDir: true})
}
func (ds *datastore) List(key string, kvObject KVObject) ([]KVObject, error) {
if ds.sequential {
ds.Lock()
defer ds.Unlock()
}
if ds.cache != nil {
return ds.cache.list(kvObject)
}
var kvol []KVObject
cb := func(key string, val KVObject) {
kvol = append(kvol, val)
}
err := ds.iterateKVPairsFromStore(key, kvObject, cb)
if err != nil {
return nil, err
}
return kvol, nil
}
func (ds *datastore) iterateKVPairsFromStore(key string, kvObject KVObject, callback func(string, KVObject)) error {
// Bail out right away if the kvObject does not implement KVConstructor
ctor, ok := kvObject.(KVConstructor)
if !ok {
return fmt.Errorf("error listing objects, object does not implement KVConstructor interface")
}
// Make sure the parent key exists
if err := ds.ensureParent(key); err != nil {
return err
}
kvList, err := ds.store.List(key)
if err != nil {
return err
}
for _, kvPair := range kvList {
if len(kvPair.Value) == 0 {
continue
}
dstO := ctor.New()
if err := dstO.SetValue(kvPair.Value); err != nil {
return err
}
// Make sure the object has a correct view of the DB index in
// case we need to modify it and update the DB.
dstO.SetIndex(kvPair.LastIndex)
callback(kvPair.Key, dstO)
}
return nil
}
func (ds *datastore) Map(key string, kvObject KVObject) (map[string]KVObject, error) {
if ds.sequential {
ds.Lock()
defer ds.Unlock()
}
kvol := make(map[string]KVObject)
cb := func(key string, val KVObject) {
// Trim the leading & trailing "/" to make it consistent across all stores
kvol[strings.Trim(key, "/")] = val
}
err := ds.iterateKVPairsFromStore(key, kvObject, cb)
if err != nil {
return nil, err
}
return kvol, nil
}
// DeleteObject unconditionally deletes a record from the store
func (ds *datastore) DeleteObject(kvObject KVObject) error {
if ds.sequential {
ds.Lock()
defer ds.Unlock()
}
// cleanup the cache first
if ds.cache != nil {
// If persistent store is skipped, sequencing needs to
// happen in cache.
ds.cache.del(kvObject, kvObject.Skip())
}
if kvObject.Skip() {
return nil
}
return ds.store.Delete(Key(kvObject.Key()...))
}
// DeleteObjectAtomic performs atomic delete on a record
func (ds *datastore) DeleteObjectAtomic(kvObject KVObject) error {
if ds.sequential {
ds.Lock()
defer ds.Unlock()
}
if kvObject == nil {
return types.BadRequestErrorf("invalid KV Object : nil")
}
previous := &store.KVPair{Key: Key(kvObject.Key()...), LastIndex: kvObject.Index()}
if kvObject.Skip() {
goto del_cache
}
if _, err := ds.store.AtomicDelete(Key(kvObject.Key()...), previous); err != nil {
if err == store.ErrKeyExists {
return ErrKeyModified
}
return err
}
del_cache:
// cleanup the cache only if AtomicDelete went through successfully
if ds.cache != nil {
// If persistent store is skipped, sequencing needs to
// happen in cache.
return ds.cache.del(kvObject, kvObject.Skip())
}
return nil
}
// DeleteTree unconditionally deletes a record from the store
func (ds *datastore) DeleteTree(kvObject KVObject) error {
if ds.sequential {
ds.Lock()
defer ds.Unlock()
}
// cleanup the cache first
if ds.cache != nil {
// If persistent store is skipped, sequencing needs to
// happen in cache.
ds.cache.del(kvObject, kvObject.Skip())
}
if kvObject.Skip() {
return nil
}
return ds.store.DeleteTree(Key(kvObject.KeyPrefix()...))
}

View file

@ -0,0 +1,257 @@
package datastore
import (
"encoding/json"
"reflect"
"testing"
"github.com/docker/libnetwork/options"
_ "github.com/docker/libnetwork/testutils"
"gotest.tools/v3/assert"
)
var dummyKey = "dummy"
// NewCustomDataStore can be used by other Tests in order to use custom datastore
func NewTestDataStore() DataStore {
return &datastore{scope: LocalScope, store: NewMockStore()}
}
func TestKey(t *testing.T) {
eKey := []string{"hello", "world"}
sKey := Key(eKey...)
if sKey != "docker/network/v1.0/hello/world/" {
t.Fatalf("unexpected key : %s", sKey)
}
}
func TestParseKey(t *testing.T) {
keySlice, err := ParseKey("/docker/network/v1.0/hello/world/")
if err != nil {
t.Fatal(err)
}
eKey := []string{"hello", "world"}
if len(keySlice) < 2 || !reflect.DeepEqual(eKey, keySlice) {
t.Fatalf("unexpected unkey : %s", keySlice)
}
}
func TestInvalidDataStore(t *testing.T) {
config := &ScopeCfg{}
config.Client.Provider = "invalid"
config.Client.Address = "localhost:8500"
_, err := NewDataStore(GlobalScope, config)
if err == nil {
t.Fatal("Invalid Datastore connection configuration must result in a failure")
}
}
func TestKVObjectFlatKey(t *testing.T) {
store := NewTestDataStore()
expected := dummyKVObject("1000", true)
err := store.PutObject(expected)
if err != nil {
t.Fatal(err)
}
keychain := []string{dummyKey, "1000"}
data, err := store.KVStore().Get(Key(keychain...))
if err != nil {
t.Fatal(err)
}
var n dummyObject
json.Unmarshal(data.Value, &n)
if n.Name != expected.Name {
t.Fatal("Dummy object doesn't match the expected object")
}
}
func TestAtomicKVObjectFlatKey(t *testing.T) {
store := NewTestDataStore()
expected := dummyKVObject("1111", true)
assert.Check(t, !expected.Exists())
err := store.PutObjectAtomic(expected)
if err != nil {
t.Fatal(err)
}
assert.Check(t, expected.Exists())
// PutObjectAtomic automatically sets the Index again. Hence the following must pass.
err = store.PutObjectAtomic(expected)
if err != nil {
t.Fatal("Atomic update should succeed.")
}
// Get the latest index and try PutObjectAtomic again for the same Key
// This must succeed as well
data, err := store.KVStore().Get(Key(expected.Key()...))
if err != nil {
t.Fatal(err)
}
n := dummyObject{}
json.Unmarshal(data.Value, &n)
n.ID = "1111"
n.SetIndex(data.LastIndex)
n.ReturnValue = true
err = store.PutObjectAtomic(&n)
if err != nil {
t.Fatal(err)
}
// Get the Object using GetObject, then set again.
newObj := dummyObject{}
err = store.GetObject(Key(expected.Key()...), &newObj)
if err != nil {
t.Fatal(err)
}
assert.Check(t, newObj.Exists())
err = store.PutObjectAtomic(&n)
if err != nil {
t.Fatal(err)
}
}
// dummy data used to test the datastore
type dummyObject struct {
Name string `kv:"leaf"`
NetworkType string `kv:"leaf"`
EnableIPv6 bool `kv:"leaf"`
Rec *recStruct `kv:"recursive"`
Dict map[string]*recStruct `kv:"iterative"`
Generic options.Generic `kv:"iterative"`
ID string
DBIndex uint64
DBExists bool
SkipSave bool
ReturnValue bool
}
func (n *dummyObject) Key() []string {
return []string{dummyKey, n.ID}
}
func (n *dummyObject) KeyPrefix() []string {
return []string{dummyKey}
}
func (n *dummyObject) Value() []byte {
if !n.ReturnValue {
return nil
}
b, err := json.Marshal(n)
if err != nil {
return nil
}
return b
}
func (n *dummyObject) SetValue(value []byte) error {
return json.Unmarshal(value, n)
}
func (n *dummyObject) Index() uint64 {
return n.DBIndex
}
func (n *dummyObject) SetIndex(index uint64) {
n.DBIndex = index
n.DBExists = true
}
func (n *dummyObject) Exists() bool {
return n.DBExists
}
func (n *dummyObject) Skip() bool {
return n.SkipSave
}
func (n *dummyObject) DataScope() string {
return LocalScope
}
func (n *dummyObject) MarshalJSON() ([]byte, error) {
netMap := make(map[string]interface{})
netMap["name"] = n.Name
netMap["networkType"] = n.NetworkType
netMap["enableIPv6"] = n.EnableIPv6
netMap["generic"] = n.Generic
return json.Marshal(netMap)
}
func (n *dummyObject) UnmarshalJSON(b []byte) (err error) {
var netMap map[string]interface{}
if err := json.Unmarshal(b, &netMap); err != nil {
return err
}
n.Name = netMap["name"].(string)
n.NetworkType = netMap["networkType"].(string)
n.EnableIPv6 = netMap["enableIPv6"].(bool)
n.Generic = netMap["generic"].(map[string]interface{})
return nil
}
// dummy structure to test "recursive" cases
type recStruct struct {
Name string `kv:"leaf"`
Field1 int `kv:"leaf"`
Dict map[string]string `kv:"iterative"`
DBIndex uint64
DBExists bool
SkipSave bool
}
func (r *recStruct) Key() []string {
return []string{"recStruct"}
}
func (r *recStruct) Value() []byte {
b, err := json.Marshal(r)
if err != nil {
return nil
}
return b
}
func (r *recStruct) SetValue(value []byte) error {
return json.Unmarshal(value, r)
}
func (r *recStruct) Index() uint64 {
return r.DBIndex
}
func (r *recStruct) SetIndex(index uint64) {
r.DBIndex = index
r.DBExists = true
}
func (r *recStruct) Exists() bool {
return r.DBExists
}
func (r *recStruct) Skip() bool {
return r.SkipSave
}
func dummyKVObject(id string, retValue bool) *dummyObject {
cDict := make(map[string]string)
cDict["foo"] = "bar"
cDict["hello"] = "world"
n := dummyObject{
Name: "testNw",
NetworkType: "bridge",
EnableIPv6: true,
Rec: &recStruct{"gen", 5, cDict, 0, false, false},
ID: id,
DBIndex: 0,
ReturnValue: retValue,
DBExists: false,
SkipSave: false}
generic := make(map[string]interface{})
generic["label1"] = &recStruct{"value1", 1, cDict, 0, false, false}
generic["label2"] = "subnet=10.1.1.0/16"
n.Generic = generic
return &n
}

View file

@ -0,0 +1,129 @@
package datastore
import (
"errors"
"github.com/docker/libkv/store"
"github.com/docker/libnetwork/types"
)
var (
// ErrNotImplemented exported
ErrNotImplemented = errors.New("Functionality not implemented")
)
// MockData exported
type MockData struct {
Data []byte
Index uint64
}
// MockStore exported
type MockStore struct {
db map[string]*MockData
}
// NewMockStore creates a Map backed Datastore that is useful for mocking
func NewMockStore() *MockStore {
db := make(map[string]*MockData)
return &MockStore{db}
}
// Get the value at "key", returns the last modified index
// to use in conjunction to CAS calls
func (s *MockStore) Get(key string) (*store.KVPair, error) {
mData := s.db[key]
if mData == nil {
return nil, nil
}
return &store.KVPair{Value: mData.Data, LastIndex: mData.Index}, nil
}
// Put a value at "key"
func (s *MockStore) Put(key string, value []byte, options *store.WriteOptions) error {
mData := s.db[key]
if mData == nil {
mData = &MockData{value, 0}
}
mData.Index = mData.Index + 1
s.db[key] = mData
return nil
}
// Delete a value at "key"
func (s *MockStore) Delete(key string) error {
delete(s.db, key)
return nil
}
// Exists checks that the key exists inside the store
func (s *MockStore) Exists(key string) (bool, error) {
_, ok := s.db[key]
return ok, nil
}
// List gets a range of values at "directory"
func (s *MockStore) List(prefix string) ([]*store.KVPair, error) {
return nil, ErrNotImplemented
}
// DeleteTree deletes a range of values at "directory"
func (s *MockStore) DeleteTree(prefix string) error {
delete(s.db, prefix)
return nil
}
// Watch a single key for modifications
func (s *MockStore) Watch(key string, stopCh <-chan struct{}) (<-chan *store.KVPair, error) {
return nil, ErrNotImplemented
}
// WatchTree triggers a watch on a range of values at "directory"
func (s *MockStore) WatchTree(prefix string, stopCh <-chan struct{}) (<-chan []*store.KVPair, error) {
return nil, ErrNotImplemented
}
// NewLock exposed
func (s *MockStore) NewLock(key string, options *store.LockOptions) (store.Locker, error) {
return nil, ErrNotImplemented
}
// AtomicPut put a value at "key" if the key has not been
// modified in the meantime, throws an error if this is the case
func (s *MockStore) AtomicPut(key string, newValue []byte, previous *store.KVPair, options *store.WriteOptions) (bool, *store.KVPair, error) {
mData := s.db[key]
if previous == nil {
if mData != nil {
return false, nil, types.BadRequestErrorf("atomic put failed because key exists")
} // Else OK.
} else {
if mData == nil {
return false, nil, types.BadRequestErrorf("atomic put failed because key exists")
}
if mData != nil && mData.Index != previous.LastIndex {
return false, nil, types.BadRequestErrorf("atomic put failed due to mismatched Index")
} // Else OK.
}
err := s.Put(key, newValue, nil)
if err != nil {
return false, nil, err
}
return true, &store.KVPair{Key: key, Value: newValue, LastIndex: s.db[key].Index}, nil
}
// AtomicDelete deletes a value at "key" if the key has not
// been modified in the meantime, throws an error if this is the case
func (s *MockStore) AtomicDelete(key string, previous *store.KVPair) (bool, error) {
mData := s.db[key]
if mData != nil && mData.Index != previous.LastIndex {
return false, types.BadRequestErrorf("atomic delete failed due to mismatched Index")
}
return true, s.Delete(key)
}
// Close closes the client connection
func (s *MockStore) Close() {
return
}

View file

@ -0,0 +1,201 @@
package libnetwork
import (
"fmt"
"strings"
"github.com/docker/libnetwork/netlabel"
"github.com/docker/libnetwork/types"
"github.com/sirupsen/logrus"
)
const (
gwEPlen = 12
)
var procGwNetwork = make(chan (bool), 1)
/*
libnetwork creates a bridge network "docker_gw_bridge" for providing
default gateway for the containers if none of the container's endpoints
have GW set by the driver. ICC is set to false for the GW_bridge network.
If a driver can't provide external connectivity it can choose to not set
the GW IP for the endpoint.
endpoint on the GW_bridge network is managed dynamically by libnetwork.
ie:
- its created when an endpoint without GW joins the container
- its deleted when an endpoint with GW joins the container
*/
func (sb *sandbox) setupDefaultGW() error {
// check if the container already has a GW endpoint
if ep := sb.getEndpointInGWNetwork(); ep != nil {
return nil
}
c := sb.controller
// Look for default gw network. In case of error (includes not found),
// retry and create it if needed in a serialized execution.
n, err := c.NetworkByName(libnGWNetwork)
if err != nil {
if n, err = c.defaultGwNetwork(); err != nil {
return err
}
}
createOptions := []EndpointOption{CreateOptionAnonymous()}
var gwName string
if len(sb.containerID) <= gwEPlen {
gwName = "gateway_" + sb.containerID
} else {
gwName = "gateway_" + sb.id[:gwEPlen]
}
sbLabels := sb.Labels()
if sbLabels[netlabel.PortMap] != nil {
createOptions = append(createOptions, CreateOptionPortMapping(sbLabels[netlabel.PortMap].([]types.PortBinding)))
}
if sbLabels[netlabel.ExposedPorts] != nil {
createOptions = append(createOptions, CreateOptionExposedPorts(sbLabels[netlabel.ExposedPorts].([]types.TransportPort)))
}
epOption := getPlatformOption()
if epOption != nil {
createOptions = append(createOptions, epOption)
}
newEp, err := n.CreateEndpoint(gwName, createOptions...)
if err != nil {
return fmt.Errorf("container %s: endpoint create on GW Network failed: %v", sb.containerID, err)
}
defer func() {
if err != nil {
if err2 := newEp.Delete(true); err2 != nil {
logrus.Warnf("Failed to remove gw endpoint for container %s after failing to join the gateway network: %v",
sb.containerID, err2)
}
}
}()
epLocal := newEp.(*endpoint)
if err = epLocal.sbJoin(sb); err != nil {
return fmt.Errorf("container %s: endpoint join on GW Network failed: %v", sb.containerID, err)
}
return nil
}
// If present, detach and remove the endpoint connecting the sandbox to the default gw network.
func (sb *sandbox) clearDefaultGW() error {
var ep *endpoint
if ep = sb.getEndpointInGWNetwork(); ep == nil {
return nil
}
if err := ep.sbLeave(sb, false); err != nil {
return fmt.Errorf("container %s: endpoint leaving GW Network failed: %v", sb.containerID, err)
}
if err := ep.Delete(false); err != nil {
return fmt.Errorf("container %s: deleting endpoint on GW Network failed: %v", sb.containerID, err)
}
return nil
}
// Evaluate whether the sandbox requires a default gateway based
// on the endpoints to which it is connected. It does not account
// for the default gateway network endpoint.
func (sb *sandbox) needDefaultGW() bool {
var needGW bool
for _, ep := range sb.getConnectedEndpoints() {
if ep.endpointInGWNetwork() {
continue
}
if ep.getNetwork().Type() == "null" || ep.getNetwork().Type() == "host" {
continue
}
if ep.getNetwork().Internal() {
continue
}
// During stale sandbox cleanup, joinInfo may be nil
if ep.joinInfo != nil && ep.joinInfo.disableGatewayService {
continue
}
// TODO v6 needs to be handled.
if len(ep.Gateway()) > 0 {
return false
}
for _, r := range ep.StaticRoutes() {
if r.Destination != nil && r.Destination.String() == "0.0.0.0/0" {
return false
}
}
needGW = true
}
return needGW
}
func (sb *sandbox) getEndpointInGWNetwork() *endpoint {
for _, ep := range sb.getConnectedEndpoints() {
if ep.getNetwork().name == libnGWNetwork && strings.HasPrefix(ep.Name(), "gateway_") {
return ep
}
}
return nil
}
func (ep *endpoint) endpointInGWNetwork() bool {
if ep.getNetwork().name == libnGWNetwork && strings.HasPrefix(ep.Name(), "gateway_") {
return true
}
return false
}
func (sb *sandbox) getEPwithoutGateway() *endpoint {
for _, ep := range sb.getConnectedEndpoints() {
if ep.getNetwork().Type() == "null" || ep.getNetwork().Type() == "host" {
continue
}
if len(ep.Gateway()) == 0 {
return ep
}
}
return nil
}
// Looks for the default gw network and creates it if not there.
// Parallel executions are serialized.
func (c *controller) defaultGwNetwork() (Network, error) {
procGwNetwork <- true
defer func() { <-procGwNetwork }()
n, err := c.NetworkByName(libnGWNetwork)
if _, ok := err.(types.NotFoundError); ok {
n, err = c.createGWNetwork()
}
return n, err
}
// Returns the endpoint which is providing external connectivity to the sandbox
func (sb *sandbox) getGatewayEndpoint() *endpoint {
for _, ep := range sb.getConnectedEndpoints() {
if ep.getNetwork().Type() == "null" || ep.getNetwork().Type() == "host" {
continue
}
if len(ep.Gateway()) != 0 {
return ep
}
}
return nil
}

View file

@ -0,0 +1,13 @@
package libnetwork
import "github.com/docker/libnetwork/types"
const libnGWNetwork = "docker_gwbridge"
func getPlatformOption() EndpointOption {
return nil
}
func (c *controller) createGWNetwork() (Network, error) {
return nil, types.NotImplementedErrorf("default gateway functionality is not implemented in freebsd")
}

View file

@ -0,0 +1,32 @@
package libnetwork
import (
"fmt"
"strconv"
"github.com/docker/libnetwork/drivers/bridge"
)
const libnGWNetwork = "docker_gwbridge"
func getPlatformOption() EndpointOption {
return nil
}
func (c *controller) createGWNetwork() (Network, error) {
netOption := map[string]string{
bridge.BridgeName: libnGWNetwork,
bridge.EnableICC: strconv.FormatBool(false),
bridge.EnableIPMasquerade: strconv.FormatBool(true),
}
n, err := c.NewNetwork("bridge", libnGWNetwork, "",
NetworkOptionDriverOpts(netOption),
NetworkOptionEnableIPv6(false),
)
if err != nil {
return nil, fmt.Errorf("error creating external connectivity network: %v", err)
}
return n, err
}

View file

@ -0,0 +1,22 @@
package libnetwork
import (
windriver "github.com/docker/libnetwork/drivers/windows"
"github.com/docker/libnetwork/options"
"github.com/docker/libnetwork/types"
)
const libnGWNetwork = "nat"
func getPlatformOption() EndpointOption {
epOption := options.Generic{
windriver.DisableICC: true,
windriver.DisableDNS: true,
}
return EndpointOptionGeneric(epOption)
}
func (c *controller) createGWNetwork() (Network, error) {
return nil, types.NotImplementedErrorf("default gateway functionality is not implemented in windows")
}

View file

@ -0,0 +1,227 @@
package diagnostic
import (
"context"
"encoding/json"
"fmt"
"net/http"
"sync"
"sync/atomic"
stackdump "github.com/docker/docker/pkg/signal"
"github.com/docker/libnetwork/internal/caller"
"github.com/sirupsen/logrus"
)
// HTTPHandlerFunc TODO
type HTTPHandlerFunc func(interface{}, http.ResponseWriter, *http.Request)
type httpHandlerCustom struct {
ctx interface{}
F func(interface{}, http.ResponseWriter, *http.Request)
}
// ServeHTTP TODO
func (h httpHandlerCustom) ServeHTTP(w http.ResponseWriter, r *http.Request) {
h.F(h.ctx, w, r)
}
var diagPaths2Func = map[string]HTTPHandlerFunc{
"/": notImplemented,
"/help": help,
"/ready": ready,
"/stackdump": stackTrace,
}
// Server when the debug is enabled exposes a
// This data structure is protected by the Agent mutex so does not require and additional mutex here
type Server struct {
enable int32
srv *http.Server
port int
mux *http.ServeMux
registeredHanders map[string]bool
sync.Mutex
}
// New creates a new diagnostic server
func New() *Server {
return &Server{
registeredHanders: make(map[string]bool),
}
}
// Init initialize the mux for the http handling and register the base hooks
func (s *Server) Init() {
s.mux = http.NewServeMux()
// Register local handlers
s.RegisterHandler(s, diagPaths2Func)
}
// RegisterHandler allows to register new handlers to the mux and to a specific path
func (s *Server) RegisterHandler(ctx interface{}, hdlrs map[string]HTTPHandlerFunc) {
s.Lock()
defer s.Unlock()
for path, fun := range hdlrs {
if _, ok := s.registeredHanders[path]; ok {
continue
}
s.mux.Handle(path, httpHandlerCustom{ctx, fun})
s.registeredHanders[path] = true
}
}
// ServeHTTP this is the method called bu the ListenAndServe, and is needed to allow us to
// use our custom mux
func (s *Server) ServeHTTP(w http.ResponseWriter, r *http.Request) {
s.mux.ServeHTTP(w, r)
}
// EnableDiagnostic opens a TCP socket to debug the passed network DB
func (s *Server) EnableDiagnostic(ip string, port int) {
s.Lock()
defer s.Unlock()
s.port = port
if s.enable == 1 {
logrus.Info("The server is already up and running")
return
}
logrus.Infof("Starting the diagnostic server listening on %d for commands", port)
srv := &http.Server{Addr: fmt.Sprintf("%s:%d", ip, port), Handler: s}
s.srv = srv
s.enable = 1
go func(n *Server) {
// Ignore ErrServerClosed that is returned on the Shutdown call
if err := srv.ListenAndServe(); err != nil && err != http.ErrServerClosed {
logrus.Errorf("ListenAndServe error: %s", err)
atomic.SwapInt32(&n.enable, 0)
}
}(s)
}
// DisableDiagnostic stop the dubug and closes the tcp socket
func (s *Server) DisableDiagnostic() {
s.Lock()
defer s.Unlock()
s.srv.Shutdown(context.Background())
s.srv = nil
s.enable = 0
logrus.Info("Disabling the diagnostic server")
}
// IsDiagnosticEnabled returns true when the debug is enabled
func (s *Server) IsDiagnosticEnabled() bool {
s.Lock()
defer s.Unlock()
return s.enable == 1
}
func notImplemented(ctx interface{}, w http.ResponseWriter, r *http.Request) {
r.ParseForm()
_, json := ParseHTTPFormOptions(r)
rsp := WrongCommand("not implemented", fmt.Sprintf("URL path: %s no method implemented check /help\n", r.URL.Path))
// audit logs
log := logrus.WithFields(logrus.Fields{"component": "diagnostic", "remoteIP": r.RemoteAddr, "method": caller.Name(0), "url": r.URL.String()})
log.Info("command not implemented done")
HTTPReply(w, rsp, json)
}
func help(ctx interface{}, w http.ResponseWriter, r *http.Request) {
r.ParseForm()
_, json := ParseHTTPFormOptions(r)
// audit logs
log := logrus.WithFields(logrus.Fields{"component": "diagnostic", "remoteIP": r.RemoteAddr, "method": caller.Name(0), "url": r.URL.String()})
log.Info("help done")
n, ok := ctx.(*Server)
var result string
if ok {
for path := range n.registeredHanders {
result += fmt.Sprintf("%s\n", path)
}
HTTPReply(w, CommandSucceed(&StringCmd{Info: result}), json)
}
}
func ready(ctx interface{}, w http.ResponseWriter, r *http.Request) {
r.ParseForm()
_, json := ParseHTTPFormOptions(r)
// audit logs
log := logrus.WithFields(logrus.Fields{"component": "diagnostic", "remoteIP": r.RemoteAddr, "method": caller.Name(0), "url": r.URL.String()})
log.Info("ready done")
HTTPReply(w, CommandSucceed(&StringCmd{Info: "OK"}), json)
}
func stackTrace(ctx interface{}, w http.ResponseWriter, r *http.Request) {
r.ParseForm()
_, json := ParseHTTPFormOptions(r)
// audit logs
log := logrus.WithFields(logrus.Fields{"component": "diagnostic", "remoteIP": r.RemoteAddr, "method": caller.Name(0), "url": r.URL.String()})
log.Info("stack trace")
path, err := stackdump.DumpStacks("/tmp/")
if err != nil {
log.WithError(err).Error("failed to write goroutines dump")
HTTPReply(w, FailCommand(err), json)
} else {
log.Info("stack trace done")
HTTPReply(w, CommandSucceed(&StringCmd{Info: fmt.Sprintf("goroutine stacks written to %s", path)}), json)
}
}
// DebugHTTPForm helper to print the form url parameters
func DebugHTTPForm(r *http.Request) {
for k, v := range r.Form {
logrus.Debugf("Form[%q] = %q\n", k, v)
}
}
// JSONOutput contains details on JSON output printing
type JSONOutput struct {
enable bool
prettyPrint bool
}
// ParseHTTPFormOptions easily parse the JSON printing options
func ParseHTTPFormOptions(r *http.Request) (bool, *JSONOutput) {
_, unsafe := r.Form["unsafe"]
v, json := r.Form["json"]
var pretty bool
if len(v) > 0 {
pretty = v[0] == "pretty"
}
return unsafe, &JSONOutput{enable: json, prettyPrint: pretty}
}
// HTTPReply helper function that takes care of sending the message out
func HTTPReply(w http.ResponseWriter, r *HTTPResult, j *JSONOutput) (int, error) {
var response []byte
if j.enable {
w.Header().Set("Content-Type", "application/json")
var err error
if j.prettyPrint {
response, err = json.MarshalIndent(r, "", " ")
if err != nil {
response, _ = json.MarshalIndent(FailCommand(err), "", " ")
}
} else {
response, err = json.Marshal(r)
if err != nil {
response, _ = json.Marshal(FailCommand(err))
}
}
} else {
response = []byte(r.String())
}
return fmt.Fprint(w, string(response))
}

View file

@ -0,0 +1,132 @@
package diagnostic
import "fmt"
// StringInterface interface that has to be implemented by messages
type StringInterface interface {
String() string
}
// CommandSucceed creates a success message
func CommandSucceed(result StringInterface) *HTTPResult {
return &HTTPResult{
Message: "OK",
Details: result,
}
}
// FailCommand creates a failure message with error
func FailCommand(err error) *HTTPResult {
return &HTTPResult{
Message: "FAIL",
Details: &ErrorCmd{Error: err.Error()},
}
}
// WrongCommand creates a wrong command response
func WrongCommand(message, usage string) *HTTPResult {
return &HTTPResult{
Message: message,
Details: &UsageCmd{Usage: usage},
}
}
// HTTPResult Diagnostic Server HTTP result operation
type HTTPResult struct {
Message string `json:"message"`
Details StringInterface `json:"details"`
}
func (h *HTTPResult) String() string {
rsp := h.Message
if h.Details != nil {
rsp += "\n" + h.Details.String()
}
return rsp
}
// UsageCmd command with usage field
type UsageCmd struct {
Usage string `json:"usage"`
}
func (u *UsageCmd) String() string {
return "Usage: " + u.Usage
}
// StringCmd command with info string
type StringCmd struct {
Info string `json:"info"`
}
func (s *StringCmd) String() string {
return s.Info
}
// ErrorCmd command with error
type ErrorCmd struct {
Error string `json:"error"`
}
func (e *ErrorCmd) String() string {
return "Error: " + e.Error
}
// TableObj network db table object
type TableObj struct {
Length int `json:"size"`
Elements []StringInterface `json:"entries"`
}
func (t *TableObj) String() string {
output := fmt.Sprintf("total entries: %d\n", t.Length)
for _, e := range t.Elements {
output += e.String()
}
return output
}
// PeerEntryObj entry in the networkdb peer table
type PeerEntryObj struct {
Index int `json:"-"`
Name string `json:"-=name"`
IP string `json:"ip"`
}
func (p *PeerEntryObj) String() string {
return fmt.Sprintf("%d) %s -> %s\n", p.Index, p.Name, p.IP)
}
// TableEntryObj network db table entry object
type TableEntryObj struct {
Index int `json:"-"`
Key string `json:"key"`
Value string `json:"value"`
Owner string `json:"owner"`
}
func (t *TableEntryObj) String() string {
return fmt.Sprintf("%d) k:`%s` -> v:`%s` owner:`%s`\n", t.Index, t.Key, t.Value, t.Owner)
}
// TableEndpointsResult fully typed message for proper unmarshaling on the client side
type TableEndpointsResult struct {
TableObj
Elements []TableEntryObj `json:"entries"`
}
// TablePeersResult fully typed message for proper unmarshaling on the client side
type TablePeersResult struct {
TableObj
Elements []PeerEntryObj `json:"entries"`
}
// NetworkStatsResult network db stats related to entries and queue len for a network
type NetworkStatsResult struct {
Entries int `json:"entries"`
QueueLen int `jsoin:"qlen"`
}
func (n *NetworkStatsResult) String() string {
return fmt.Sprintf("entries: %d, qlen: %d\n", n.Entries, n.QueueLen)
}

View file

@ -0,0 +1,60 @@
package discoverapi
// Discover is an interface to be implemented by the component interested in receiving discover events
// like new node joining the cluster or datastore updates
type Discover interface {
// DiscoverNew is a notification for a new discovery event, Example:a new node joining a cluster
DiscoverNew(dType DiscoveryType, data interface{}) error
// DiscoverDelete is a notification for a discovery delete event, Example:a node leaving a cluster
DiscoverDelete(dType DiscoveryType, data interface{}) error
}
// DiscoveryType represents the type of discovery element the DiscoverNew function is invoked on
type DiscoveryType int
const (
// NodeDiscovery represents Node join/leave events provided by discovery
NodeDiscovery = iota + 1
// DatastoreConfig represents an add/remove datastore event
DatastoreConfig
// EncryptionKeysConfig represents the initial key(s) for performing datapath encryption
EncryptionKeysConfig
// EncryptionKeysUpdate represents an update to the datapath encryption key(s)
EncryptionKeysUpdate
)
// NodeDiscoveryData represents the structure backing the node discovery data json string
type NodeDiscoveryData struct {
Address string
BindAddress string
Self bool
}
// DatastoreConfigData is the data for the datastore update event message
type DatastoreConfigData struct {
Scope string
Provider string
Address string
Config interface{}
}
// DriverEncryptionConfig contains the initial datapath encryption key(s)
// Key in first position is the primary key, the one to be used in tx.
// Original key and tag types are []byte and uint64
type DriverEncryptionConfig struct {
Keys [][]byte
Tags []uint64
}
// DriverEncryptionUpdate carries an update to the encryption key(s) as:
// a new key and/or set a primary key and/or a removal of an existing key.
// Original key and tag types are []byte and uint64
type DriverEncryptionUpdate struct {
Key []byte
Tag uint64
Primary []byte
PrimaryTag uint64
Prune []byte
PruneTag uint64
}

13
libnetwork/docs/bridge.md Normal file
View file

@ -0,0 +1,13 @@
Bridge Driver
=============
The bridge driver is an implementation that uses Linux Bridging and iptables to provide connectivity for containers
It creates a single bridge, called `docker0` by default, and attaches a `veth pair` between the bridge and every endpoint.
## Configuration
The bridge driver supports configuration through the Docker Daemon flags.
## Usage
This driver is supported for the default "bridge" network only and it cannot be used for any other networks.

Binary file not shown.

After

Width:  |  Height:  |  Size: 27 KiB

158
libnetwork/docs/design.md Normal file
View file

@ -0,0 +1,158 @@
Design
======
This document describes how libnetwork has been designed in order to achieve this.
Requirements for individual releases can be found on the [Project Page](https://github.com/docker/libnetwork/wiki).
Many of the design decisions are inspired by the learnings from the Docker networking design as of Docker v1.6.
Please refer to this [Docker v1.6 Design](legacy.md) document for more information on networking design as of Docker v1.6.
## Goal
libnetwork project will follow Docker and Linux philosophy of developing small, highly modular and composable tools that work well independently.
Libnetwork aims to satisfy that composable need for Networking in Containers.
## The Container Network Model
Libnetwork implements Container Network Model (CNM) which formalizes the steps required to provide networking for containers while providing an abstraction that can be used to support multiple network drivers. The CNM is built on 3 main components (shown below)
![](/docs/cnm-model.jpg?raw=true)
**Sandbox**
A Sandbox contains the configuration of a container's network stack.
This includes management of the container's interfaces, routing table and DNS settings.
An implementation of a Sandbox could be a Linux Network Namespace, a FreeBSD Jail or other similar concept.
A Sandbox may contain *many* endpoints from *multiple* networks.
**Endpoint**
An Endpoint joins a Sandbox to a Network.
An implementation of an Endpoint could be a `veth` pair, an Open vSwitch internal port or similar.
An Endpoint can belong to only one network and it can belong to only one Sandbox, if connected.
**Network**
A Network is a group of Endpoints that are able to communicate with each-other directly.
An implementation of a Network could be a Linux bridge, a VLAN, etc.
Networks consist of *many* endpoints.
## CNM Objects
**NetworkController**
`NetworkController` object provides the entry-point into libnetwork that exposes simple APIs for the users (such as Docker Engine) to allocate and manage Networks. libnetwork supports multiple active drivers (both inbuilt and remote). `NetworkController` allows user to bind a particular driver to a given network.
**Driver**
`Driver` is not a user visible object, but drivers provide the actual network implementation. `NetworkController` provides an API to configure a driver with driver-specific options/labels that is transparent to libnetwork, but can be handled by the drivers directly. Drivers can be both inbuilt (such as Bridge, Host, None & overlay) and remote (from plugin providers) to satisfy various use cases & deployment scenarios. At this point, the Driver owns a network and is responsible for managing the network (including IPAM, etc.). This can be improved in the future by having multiple drivers participating in handling various network management functionalities.
**Network**
`Network` object is an implementation of the `CNM : Network` as defined above. `NetworkController` provides APIs to create and manage `Network` object. Whenever a `Network` is created or updated, the corresponding `Driver` will be notified of the event. LibNetwork treats `Network` objects at an abstract level to provide connectivity between a group of endpoints that belong to the same network and isolation from the rest. The `Driver` performs the actual work of providing the required connectivity and isolation. The connectivity can be within the same host or across multiple hosts. Hence `Network` has a global scope within a cluster.
**Endpoint**
`Endpoint` represents a Service Endpoint. It provides the connectivity for services exposed by a container in a network with other services provided by other containers in the network. `Network` object provides APIs to create and manage an endpoint. An endpoint can be attached to only one network. `Endpoint` creation calls are made to the corresponding `Driver` which is responsible for allocating resources for the corresponding `Sandbox`. Since `Endpoint` represents a Service and not necessarily a particular container, `Endpoint` has a global scope within a cluster.
**Sandbox**
`Sandbox` object represents container's network configuration such as IP address, MAC address, routes, DNS entries. A `Sandbox` object is created when the user requests to create an endpoint on a network. The `Driver` that handles the `Network` is responsible for allocating the required network resources (such as the IP address) and passing the info called `SandboxInfo` back to libnetwork. libnetwork will make use of OS specific constructs (example: netns for Linux) to populate the network configuration into the containers that is represented by the `Sandbox`. A `Sandbox` can have multiple endpoints attached to different networks. Since `Sandbox` is associated with a particular container in a given host, it has a local scope that represents the Host that the Container belong to.
**CNM Attributes**
***Options***
`Options` provides a generic and flexible mechanism to pass `Driver` specific configuration options from the user to the `Driver` directly. `Options` are just key-value pairs of data with `key` represented by a string and `value` represented by a generic object (such as a Go `interface{}`). Libnetwork will operate on the `Options` ONLY if the `key` matches any of the well-known `Labels` defined in the `net-labels` package. `Options` also encompasses `Labels` as explained below. `Options` are generally NOT end-user visible (in UI), while `Labels` are.
***Labels***
`Labels` are very similar to `Options` and are in fact just a subset of `Options`. `Labels` are typically end-user visible and are represented in the UI explicitly using the `--labels` option. They are passed from the UI to the `Driver` so that `Driver` can make use of it and perform any `Driver` specific operation (such as a subnet to allocate IP-Addresses from in a Network).
## CNM Lifecycle
Consumers of the CNM, like Docker, interact through the CNM Objects and its APIs to network the containers that they manage.
1. `Drivers` register with `NetworkController`. Built-in drivers register inside of libnetwork, while remote drivers register with libnetwork via the Plugin mechanism (*plugin-mechanism is WIP*). Each `driver` handles a particular `networkType`.
2. `NetworkController` object is created using `libnetwork.New()` API to manage the allocation of Networks and optionally configure a `Driver` with driver specific `Options`.
3. `Network` is created using the controller's `NewNetwork()` API by providing a `name` and `networkType`. `networkType` parameter helps to choose a corresponding `Driver` and binds the created `Network` to that `Driver`. From this point, any operation on `Network` will be handled by that `Driver`.
4. `controller.NewNetwork()` API also takes in optional `options` parameter which carries Driver-specific options and `Labels`, which the Drivers can make use of for its purpose.
5. `network.CreateEndpoint()` can be called to create a new Endpoint in a given network. This API also accepts optional `options` parameter which drivers can make use of. These 'options' carry both well-known labels and driver-specific labels. Drivers will in turn be called with `driver.CreateEndpoint` and it can choose to reserve IPv4/IPv6 addresses when an `Endpoint` is created in a `Network`. The `Driver` will assign these addresses using `InterfaceInfo` interface defined in the `driverapi`. The IP/IPv6 are needed to complete the endpoint as service definition along with the ports the endpoint exposes since essentially a service endpoint is nothing but a network address and the port number that the application container is listening on.
6. `endpoint.Join()` can be used to attach a container to an `Endpoint`. The Join operation will create a `Sandbox` if it doesn't exist already for that container. The Drivers can make use of the Sandbox Key to identify multiple endpoints attached to a same container. This API also accepts optional `options` parameter which drivers can make use of.
* Though it is not a direct design issue of LibNetwork, it is highly encouraged to have users like `Docker` to call the endpoint.Join() during Container's `Start()` lifecycle that is invoked *before* the container is made operational. As part of Docker integration, this will be taken care of.
* One of a FAQ on endpoint join() API is that, why do we need an API to create an Endpoint and another to join the endpoint.
- The answer is based on the fact that Endpoint represents a Service which may or may not be backed by a Container. When an Endpoint is created, it will have its resources reserved so that any container can get attached to the endpoint later and get a consistent networking behaviour.
7. `endpoint.Leave()` can be invoked when a container is stopped. The `Driver` can cleanup the states that it allocated during the `Join()` call. LibNetwork will delete the `Sandbox` when the last referencing endpoint leaves the network. But LibNetwork keeps hold of the IP addresses as long as the endpoint is still present and will be reused when the container(or any container) joins again. This ensures that the container's resources are reused when they are Stopped and Started again.
8. `endpoint.Delete()` is used to delete an endpoint from a network. This results in deleting an endpoint and cleaning up the cached `sandbox.Info`.
9. `network.Delete()` is used to delete a network. LibNetwork will not allow the delete to proceed if there are any existing endpoints attached to the Network.
## Implementation Details
### Networks & Endpoints
LibNetwork's Network and Endpoint APIs are primarily for managing the corresponding Objects and book-keeping them to provide a level of abstraction as required by the CNM. It delegates the actual implementation to the drivers which realize the functionality as promised in the CNM. For more information on these details, please see [the drivers section](#drivers)
### Sandbox
Libnetwork provides a framework to implement of a Sandbox in multiple operating systems. Currently we have implemented Sandbox for Linux using `namespace_linux.go` and `configure_linux.go` in `sandbox` package.
This creates a Network Namespace for each sandbox which is uniquely identified by a path on the host filesystem.
Netlink calls are used to move interfaces from the global namespace to the Sandbox namespace.
Netlink is also used to manage the routing table in the namespace.
## Drivers
## API
Drivers are essentially an extension of libnetwork and provide the actual implementation for all of the LibNetwork APIs defined above. Hence there is an 1-1 correspondence for all the `Network` and `Endpoint` APIs, which includes :
* `driver.Config`
* `driver.CreateNetwork`
* `driver.DeleteNetwork`
* `driver.CreateEndpoint`
* `driver.DeleteEndpoint`
* `driver.Join`
* `driver.Leave`
These Driver facing APIs make use of unique identifiers (`networkid`,`endpointid`,...) instead of names (as seen in user-facing APIs).
The APIs are still work in progress and there can be changes to these based on the driver requirements especially when it comes to Multi-host networking.
### Driver semantics
* `Driver.CreateEndpoint`
This method is passed an interface `EndpointInfo`, with methods `Interface` and `AddInterface`.
If the value returned by `Interface` is non-nil, the driver is expected to make use of the interface information therein (e.g., treating the address or addresses as statically supplied), and must return an error if it cannot. If the value is `nil`, the driver should allocate exactly one _fresh_ interface, and use `AddInterface` to record them; or return an error if it cannot.
It is forbidden to use `AddInterface` if `Interface` is non-nil.
## Implementations
Libnetwork includes the following driver packages:
- null
- bridge
- overlay
- remote
### Null
The null driver is a `noop` implementation of the driver API, used only in cases where no networking is desired. This is to provide backward compatibility to the Docker's `--net=none` option.
### Bridge
The `bridge` driver provides a Linux-specific bridging implementation based on the Linux Bridge.
For more details, please [see the Bridge Driver documentation](bridge.md).
### Overlay
The `overlay` driver implements networking that can span multiple hosts using overlay network encapsulations such as VXLAN.
For more details on its design, please see the [Overlay Driver Design](overlay.md).
### Remote
The `remote` package does not provide a driver, but provides a means of supporting drivers over a remote transport.
This allows a driver to be written in a language of your choice.
For further details, please see the [Remote Driver Design](remote.md).

File diff suppressed because one or more lines are too long

Binary file not shown.

After

Width:  |  Height:  |  Size: 22 KiB

File diff suppressed because one or more lines are too long

After

Width:  |  Height:  |  Size: 39 KiB

File diff suppressed because one or more lines are too long

Binary file not shown.

After

Width:  |  Height:  |  Size: 18 KiB

File diff suppressed because one or more lines are too long

After

Width:  |  Height:  |  Size: 48 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 42 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 105 KiB

File diff suppressed because one or more lines are too long

Binary file not shown.

After

Width:  |  Height:  |  Size: 38 KiB

File diff suppressed because one or more lines are too long

After

Width:  |  Height:  |  Size: 108 KiB

274
libnetwork/docs/ipam.md Normal file
View file

@ -0,0 +1,274 @@
# IPAM Driver
During the Network and Endpoints lifecycle, the CNM model controls the IP address assignment for network and endpoint interfaces via the IPAM driver(s).
Libnetwork has a default, built-in IPAM driver and allows third party IPAM drivers to be dynamically plugged. On network creation, the user can specify which IPAM driver libnetwork needs to use for the network's IP address management. This document explains the APIs with which the IPAM driver needs to comply, and the corresponding HTTPS request/response body relevant for remote drivers.
## Remote IPAM driver
On the same line of remote network driver registration (see [remote.md](./remote.md) for more details), libnetwork initializes the `ipams.remote` package with the `Init()` function. It passes a `ipamapi.Callback` as a parameter, which implements `RegisterIpamDriver()`. The remote driver package uses this interface to register remote drivers with libnetwork's `NetworkController`, by supplying it in a `plugins.Handle` callback. The remote drivers register and communicate with libnetwork via the Docker plugin package. The `ipams.remote` provides the proxy for the remote driver processes.
## Protocol
Communication protocol is the same as the remote network driver.
## Handshake
During driver registration, libnetwork will query the remote driver about the default local and global address spaces strings, and about the driver capabilities.
More detailed information can be found in the respective section in this document.
## Datastore Requirements
It is the remote driver's responsibility to manage its database.
## Ipam Contract
The remote IPAM driver must serve the following requests:
- **GetDefaultAddressSpaces**
- **RequestPool**
- **ReleasePool**
- **Request address**
- **Release address**
The following sections explain each of the above requests' semantic, when they are called during network/endpoint lifecycle, and the corresponding payload for remote driver HTTP request/responses.
## IPAM Configuration and flow
A libnetwork user can provide IPAM related configuration when creating a network, via the `NetworkOptionIpam` setter function.
```go
func NetworkOptionIpam(ipamDriver string, addrSpace string, ipV4 []*IpamConf, ipV6 []*IpamConf, opts map[string]string) NetworkOption
```
The caller has to provide the IPAM driver name and may provide the address space and a list of `IpamConf` structures for IPv4 and a list for IPv6. The IPAM driver name is the only mandatory field. If not provided, network creation will fail.
In the list of configurations, each element has the following form:
```go
// IpamConf contains all the ipam related configurations for a network
type IpamConf struct {
// The master address pool for containers and network interfaces
PreferredPool string
// A subset of the master pool. If specified,
// this becomes the container pool
SubPool string
// Input options for IPAM Driver (optional)
Options map[string]string
// Preferred Network Gateway address (optional)
Gateway string
// Auxiliary addresses for network driver. Must be within the master pool.
// libnetwork will reserve them if they fall into the container pool
AuxAddresses map[string]string
}
```
On network creation, libnetwork will iterate the list and perform the following requests to the IPAM driver:
1. Request the address pool and pass the options along via `RequestPool()`.
2. Request the network gateway address if specified. Otherwise request any address from the pool to be used as network gateway. This is done via `RequestAddress()`.
3. Request each of the specified auxiliary addresses via `RequestAddress()`.
If the list of IPv4 configurations is empty, libnetwork will automatically add one empty `IpamConf` structure. This will cause libnetwork to request IPAM driver an IPv4 address pool of the driver's choice on the configured address space, if specified, or on the IPAM driver default address space otherwise. If the IPAM driver is not able to provide an address pool, network creation will fail.
If the list of IPv6 configurations is empty, libnetwork will not take any action.
The data retrieved from the IPAM driver during the execution of point 1) to 3) will be stored in the network structure as a list of `IpamInfo` structures for IPv4 and a list for IPv6.
On endpoint creation, libnetwork will iterate over the list of configs and perform the following operation:
1. Request an IPv4 address from the IPv4 pool and assign it to the endpoint interface IPv4 address. If successful, stop iterating.
2. Request an IPv6 address from the IPv6 pool (if exists) and assign it to the endpoint interface IPv6 address. If successful, stop iterating.
Endpoint creation will fail if any of the above operation does not succeed
On endpoint deletion, libnetwork will perform the following operations:
1. Release the endpoint interface IPv4 address
2. Release the endpoint interface IPv6 address if present
On network deletion, libnetwork will iterate the list of `IpamData` structures and perform the following requests to ipam driver:
1. Release the network gateway address via `ReleaseAddress()`
2. Release each of the auxiliary addresses via `ReleaseAddress()`
3. Release the pool via `ReleasePool()`
### GetDefaultAddressSpaces
GetDefaultAddressSpaces returns the default local and global address space names for this IPAM. An address space is a set of non-overlapping address pools isolated from other address spaces' pools. In other words, same pool can exist on N different address spaces. An address space naturally maps to a tenant name.
In libnetwork, the meaning associated to `local` or `global` address space is that a local address space doesn't need to get synchronized across the
cluster whereas the global address spaces does. Unless specified otherwise in the IPAM configuration, libnetwork will request address pools from the default local or default global address space based on the scope of the network being created. For example, if not specified otherwise in the configuration, libnetwork will request address pool from the default local address space for a bridge network, whereas from the default global address space for an overlay network.
During registration, the remote driver will receive a POST message to the URL `/IpamDriver.GetDefaultAddressSpaces` with no payload. The driver's response should have the form:
{
"LocalDefaultAddressSpace": string
"GlobalDefaultAddressSpace": string
}
### RequestPool
This API is for registering an address pool with the IPAM driver. Multiple identical calls must return the same result.
It is the IPAM driver's responsibility to keep a reference count for the pool.
```go
RequestPool(addressSpace, pool, subPool string, options map[string]string, v6 bool) (string, *net.IPNet, map[string]string, error)
```
For this API, the remote driver will receive a POST message to the URL `/IpamDriver.RequestPool` with the following payload:
{
"AddressSpace": string
"Pool": string
"SubPool": string
"Options": map[string]string
"V6": bool
}
Where:
* `AddressSpace` the IP address space. It denotes a set of non-overlapping pools.
* `Pool` The IPv4 or IPv6 address pool in CIDR format
* `SubPool` An optional subset of the address pool, an ip range in CIDR format
* `Options` A map of IPAM driver specific options
* `V6` Whether an IPAM self-chosen pool should be IPv6
AddressSpace is the only mandatory field. If no `Pool` is specified IPAM driver may choose to return a self chosen address pool. In such case, `V6` flag must be set if caller wants an IPAM-chosen IPv6 pool. A request with empty `Pool` and non-empty `SubPool` should be rejected as invalid.
If a `Pool` is not specified IPAM will allocate one of the default pools. When `Pool` is not specified, the `V6` flag should be set if the network needs IPv6 addresses to be allocated.
A successful response is in the form:
{
"PoolID": string
"Pool": string
"Data": map[string]string
}
Where:
* `PoolID` is an identifier for this pool. Same pools must have same pool id.
* `Pool` is the pool in CIDR format
* `Data` is the IPAM driver supplied metadata for this pool
### ReleasePool
This API is for releasing a previously registered address pool.
```go
ReleasePool(poolID string) error
```
For this API, the remote driver will receive a POST message to the URL `/IpamDriver.ReleasePool` with the following payload:
{
"PoolID": string
}
Where:
* `PoolID` is the pool identifier
A successful response is empty:
{}
### RequestAddress
This API is for reserving an ip address.
```go
RequestAddress(string, net.IP, map[string]string) (*net.IPNet, map[string]string, error)
```
For this API, the remote driver will receive a POST message to the URL `/IpamDriver.RequestAddress` with the following payload:
{
"PoolID": string
"Address": string
"Options": map[string]string
}
Where:
* `PoolID` is the pool identifier
* `Address` is the required address in regular IP form (A.B.C.D). If this address cannot be satisfied, the request fails. If empty, the IPAM driver chooses any available address on the pool
* `Options` are IPAM driver specific options
A successful response is in the form:
{
"Address": string
"Data": map[string]string
}
Where:
* `Address` is the allocated address in CIDR format (A.B.C.D/MM)
* `Data` is some IPAM driver specific metadata
### ReleaseAddress
This API is for releasing an IP address.
For this API, the remote driver will receive a POST message to the URL `/IpamDriver.ReleaseAddress` with the following payload:
{
"PoolID": string
"Address": string
}
Where:
* `PoolID` is the pool identifier
* `Address` is the IP address to release
### GetCapabilities
During the driver registration, libnetwork will query the driver about its capabilities. It is not mandatory for the driver to support this URL endpoint. If driver does not support it, registration will succeed with empty capabilities automatically added to the internal driver handle.
During registration, the remote driver will receive a POST message to the URL `/IpamDriver.GetCapabilities` with no payload. The driver's response should have the form:
{
"RequiresMACAddress": bool
"RequiresRequestReplay": bool
}
## Capabilities
Capabilities are requirements, features the remote ipam driver can express during registration with libnetwork.
As of now libnetwork accepts the following capabilities:
### RequiresMACAddress
It is a boolean value which tells libnetwork whether the ipam driver needs to know the interface MAC address in order to properly process the `RequestAddress()` call.
If true, on `CreateEndpoint()` request, libnetwork will generate a random MAC address for the endpoint (if an explicit MAC address was not already provided by the user) and pass it to `RequestAddress()` when requesting the IP address inside the options map. The key will be the `netlabel.MacAddress` constant: `"com.docker.network.endpoint.macaddress"`.
### RequiresRequestReplay
It is a boolean value which tells libnetwork whether the ipam driver needs to receive the replay of the `RequestPool()` and `RequestAddress()` requests on daemon reload. When libnetwork controller is initializing, it retrieves from local store the list of current local scope networks and, if this capability flag is set, it allows the IPAM driver to reconstruct the database of pools by replaying the `RequestPool()` requests for each pool and the `RequestAddress()` for each network gateway owned by the local networks. This can be useful to ipam drivers which decide not to persist the pools allocated to local scope networks.
## Appendix
A Go extension for the IPAM remote API is available at [docker/go-plugins-helpers/ipam](https://github.com/docker/go-plugins-helpers/tree/master/ipam)

15
libnetwork/docs/legacy.md Normal file
View file

@ -0,0 +1,15 @@
This document provides a TLD&R version of https://docs.docker.com/v1.6/articles/networking/.
If more interested in detailed operational design, please refer to this link.
## Docker Networking design as of Docker v1.6
Prior to libnetwork, Docker Networking was handled in both Docker Engine and libcontainer.
Docker Engine makes use of the Bridge Driver to provide single-host networking solution with the help of linux bridge and IPTables.
Docker Engine provides simple configurations such as `--link`, `--expose`,... to enable container connectivity within the same host by abstracting away networking configuration completely from the Containers.
For external connectivity, it relied upon NAT & Port-mapping
Docker Engine was responsible for providing the configuration for the container's networking stack.
Libcontainer would then use this information to create the necessary networking devices and move them in to a network namespace.
This namespace would then be used when the container is started.

427
libnetwork/docs/macvlan.md Normal file
View file

@ -0,0 +1,427 @@
# Macvlan Driver
### Overview
The Macvlan driver provides operators the ability to integrate Docker networking in a simple and lightweight fashion into the underlying network. Macvlan is supported by the Linux kernel and is a well known Linux network type. The Macvlan built-in driver does not require any port mapping and supports VLAN trunking (Virtual Local Area Network). VLANs are a traditional method of network virtualization and layer 2 datapath isolation that is prevalent in some form or fashion in most data centers.
The Linux implementation is considered lightweight because it eliminates the need for using a Linux bridge for isolating containers on the Docker host. The VLAN driver requires full access to the underlying host making it suitable for Enterprise data centers that have administrative access to the host.
Instead of attaching container network interfaces to a Docker host Linux bridge for a network, the driver simply connects the container interface to the Docker Host Ethernet interface (or sub-interface). Each network is attached to a unique parent interface. Containers in a network share a common broadcast domain and intra-network connectivity is permitted. Two separate networks will each have a unique parent interface and that parent is what enforces datapath isolation between two networks. In order for inter-network communications to occur, an IP router, external to the Docker host, is required to route between the two networks by hair-pining into the physical network and then back to the Docker host. While hairpinning traffic can be less efficient then east/west traffic staying local to the host, there is often more complexity associated with disaggregating services to the host. It can be practical for some users to leverage existing network services, such firewalls and load balancers that already exist in a data center architecture.
When using traditional Linux bridges there are two common techniques to get traffic out of a container and into the physical network and vice versa. The first method to connect containers to the underlying network is to use Iptable rules which perform a NAT translation from a bridge that represents the Docker network to the physical Ethernet connection such as `eth0`. The upside of Iptables using the Docker built-in bridge driver is that the NIC does not have to be in promiscuous mode. The second bridge driver method is to move a host's external Ethernet connection into the bridge. Moving the host Ethernet connection can at times be unforgiving. Common mistakes such as cutting oneself off from the host, or worse, creating bridging loops that can cripple a VLAN throughout a data center can open a network design up to potential risks as the infrastructure grows.
Connecting containers without any NATing is where the VLAN drivers accel. Rather than having to manage a bridge for each Docker network containers are connected directly to a `parent` interface such as `eth0` that attaches the container to the same broadcast domain as the parent interface. A simple example is if a host's `eth0` is on the network `192.168.1.0/24` with a gateway of `192.168.1.1` then a Macvlan Docker network can start containers on the addresses `192.168.1.2 - 192.168.1.254`. Containers use the same network as the parent `-o parent` that is specified in the `docker network create` command.
There are positive performance implication as a result of bypassing the Linux bridge, along with the simplicity of less moving parts, which is also attractive. Macvlan containers are easy to troubleshoot. The actual MAC and IP address of the container is bridged into the upstream network making a problematic application easy for operators to trace from the network. Existing underlay network management and monitoring tools remain relevant.
### Pre-Requisites
- The examples on this page are all single host and require Docker v1.12 or greater running on Linux.
- Any examples using a sub-interface like `eth0.10` can be replaced with `eth0` or any other valid parent interface on the Docker host. Sub-interfaces with a `.` are dynamically created. The parent `-o parent` interface parameter can also be left out of the `docker network create` all together and the driver will create a `dummy` Linux type interface that will enable local host connectivity to perform the examples.
- Kernel requirements:
- To check your current kernel version, use `uname -r` to display your kernel version.
- Macvlan Linux kernel v3.93.19 and 4.0+.
### MacVlan Bridge Mode Example Usage
- Macvlan driver networks are attached to a parent Docker host interface. Examples are a physical interface such as `eth0`, a sub-interface for 802.1q VLAN tagging like `eth0.10` (`.10` representing VLAN `10`) or even bonded `bond0` host adapters which bundle two Ethernet interfaces into a single logical interface and provide diversity in the server connection.
- The specified gateway is external to the host that is expected to be provided by the network infrastructure. If a gateway is not specified using the `--gateway` parameter, then Libnetwork will infer the first usable address of a subnet. For example, if a network's subnet is `--subnet 10.1.100.0/24` and no gateway is specified, Libnetwork will assign a gateway of `10.1.100.1` to the container. A second example would be a subnet of `--subnet 10.1.100.128/25` would receive a gateway of `10.1.100.129`.
- Containers on separate networks cannot reach one another without an external process routing between the two networks/subnets.
- Each Macvlan Bridge mode Docker network is isolated from one another and there can be only one network attached to a parent interface at a time. There is a theoretical limit of 4,094 sub-interfaces per host adapter that a Docker network could be attached to.
- The driver limits one network per parent interface. The driver does however accommodate secondary subnets to be allocated in a single Docker network for a multi-subnet requirement. The upstream router is responsible for proxy-arping between the two subnets.
- Any Macvlan container sharing the same subnet can communicate via IP to any other container in the same subnet without a gateway. It is important to note, that the parent will go into promiscuous mode when a container is attached to the parent since each container has a unique MAC address. Alternatively, Ipvlan which is currently an experimental driver uses the same MAC address as the parent interface and thus precluding the need for the parent being promiscuous.
In the following example, `eth0` on the docker host has an IP on the `172.16.86.0/24` network and a default gateway of `172.16.86.1`. The gateway is an external router with an address of `172.16.86.1`. An IP address is not required on the Docker host interface `eth0` in `bridge` mode, it merely needs to be on the proper upstream network to get forwarded by a network switch or network router.
![Simple Macvlan Bridge Mode Example](images/macvlan_bridge_simple.png)
**Note** The Docker network subnet specified needs to match the network that parent interface of the Docker host for external communications. For example, use the same subnet and gateway of the Docker host ethernet interface specified by the `-o parent=` option. The parent interface is not required to have a IP address assigned to it, since this is simply L2 flooding and learning.
- The parent interface used in this example is `eth0` and it is on the subnet `172.16.86.0/24`. The containers in the `docker network` will also need to be on this same subnet as the parent `-o parent=`. The gateway is an external router on the network.
- Libnetwork driver types are specified with the `-d <driver_name>` option. In this case `-d macvlan`
- The parent interface `-o parent=eth0` is configured as followed:
```
ip addr show eth0
3: eth0: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc pfifo_fast state UP group default qlen 1000
inet 172.16.86.250/24 brd 172.16.86.255 scope global eth0
```
Create the macvlan network and run a couple of containers attached to it:
```
# Macvlan (-o macvlan_mode= Defaults to Bridge mode if not specified)
docker network create -d macvlan \
--subnet=172.16.86.0/24 \
--gateway=172.16.86.1 \
-o parent=eth0 pub_net
# Run a container on the new network specifying the --ip address.
docker run --net=pub_net --ip=172.16.86.10 -itd alpine /bin/sh
# Start a second container and ping the first
docker run --net=pub_net -it --rm alpine /bin/sh
ping -c 4 172.16.86.10
```
Take a look at the containers ip and routing table:
```
ip a show eth0
eth0@if3: <BROADCAST,MULTICAST,UP,LOWER_UP,M-DOWN> mtu 1500 qdisc noqueue state UNKNOWN
link/ether 46:b2:6b:26:2f:69 brd ff:ff:ff:ff:ff:ff
inet 172.16.86.2/24 scope global eth0
ip route
default via 172.16.86.1 dev eth0
172.16.86.0/24 dev eth0 src 172.16.86.2
# NOTE: the containers can NOT ping the underlying host interfaces as
# they are intentionally filtered by Linux for additional isolation.
# In this case the containers cannot ping the -o parent=172.16.86.250
```
Users can explicitly specify the `bridge` mode option `-o macvlan_mode=bridge` or leave the mode option out since the most common mode of `bridge` is the driver default.
While the `eth0` interface does not need to have an IP address, it is not uncommon to have an IP address on the interface. Addresses can be excluded from getting an address from the default built in IPAM by using the `--aux-address=x.x.x.x` argument. This will blacklist the specified address from being handed out to containers from the built-in Libnetwork IPAM.
- The following is the same network example as above, but blacklisting the `-o parent=eth0` address from being handed out to a container.
```
docker network create -d macvlan \
--subnet=172.16.86.0/24 \
--gateway=172.16.86.1 \
--aux-address="exclude_host=172.16.86.250" \
-o parent=eth0 pub_net
```
Another option for specifying what subpool or range of usable addresses is used by the default Docker IPAM driver is to use the argument `--ip-range=`. This instructs the driver to allocate container addresses from the specific range, rather then the broader range from the `--subnet=` argument.
- The network create in the following example, allocates addresses beginning at `192.168.32.128` and increments n+1 upwards from there.
```
docker network create -d macvlan \
--subnet=192.168.32.0/24 \
--ip-range=192.168.32.128/25 \
--gateway=192.168.32.254 \
-o parent=eth0 macnet32
# Start a container and verify the address is 192.168.32.128
docker run --net=macnet32 -it --rm alpine /bin/sh
```
The network can then be deleted with:
```
docker network rm <network_name or id>
```
- **Note:** Linux Macvlan interface types are not able to ping or communicate with the default namespace IP address. For example, if you create a container and try to ping the Docker host's `eth0` it will **not** work. That traffic is explicitly filtered by the kernel to offer additional provider isolation and security. This is a common gotcha when a user first uses those Linux interface types since it is natural to ping local addresses when testing.
For more on Docker networking commands see: [Working with Docker network commands](https://docs.docker.com/engine/userguide/networking/work-with-networks/)
### Macvlan 802.1q Trunk Bridge Mode Example Usage
VLANs have long been a primary means of virtualizing data center networks and are still in virtually all existing networks today. VLANs work by tagging a Layer-2 isolation domain with a 12-bit identifier ranging from 1-4094. The VLAN tag is inserted into a packet header that enables a logical grouping of a single subnet or multiple subnets of IPv4 and/or IPv6. It is very common for network operators to separate traffic using VLANs based on a subnet(s) function or security profile such as `web`, `db` or any other isolation requirements.
It is very common to have a compute host requirement of running multiple virtual networks concurrently on a host. Linux networking has long supported VLAN tagging, also known by its standard 802.1Q, for maintaining datapath isolation between networks. The Ethernet link connected to a Docker host can be configured to support the 802.1q VLAN IDs by creating Linux sub-interfaces, each sub-interface being allocated a unique VLAN ID.
![Simple Macvlan Mode Example](images/multi_tenant_8021q_vlans.png)
Trunking 802.1q to a Linux host is notoriously painful for operations. It requires configuration file changes in order to be persistent through a reboot. If a bridge is involved, a physical NIC needs to be moved into the bridge and the bridge then gets the IP address. This has lead to many a stranded servers since the risk of cutting off access or misconfiguration is relatively high.
Like all of the Docker network drivers, the overarching goal is to alleviate the operational pains of managing network resources. To that end, when a network receives a sub-interface as the parent that does not exist, the drivers create the VLAN tagged interfaces while creating the network. If the sub-interface already exists it is simply used as is.
In the case of a host reboot, instead of needing to modify often complex network configuration files the driver will recreate all network links when the Docker daemon restarts. The driver tracks if it created the VLAN tagged sub-interface originally with the network create and will **only** recreate the sub-interface after a restart if it created the link in the first place.
The same holds true if the network is deleted `docker network rm`. If driver created the sub-interface with `docker network create` it will remove the sub-interface link for the operator.
If the user doesn't want Docker to create and delete the `-o parent` sub-interface, then you simply pass an interface that already exists as the parent link. Parent interfaces such as `eth0` are not deleted, only interfaces that are slave links.
For the driver to add/delete the vlan sub-interfaces the format needs to be `-o parent interface_name.vlan_tag`.
For example: `-o parent eth0.50` denotes a parent interface of `eth0` with a slave of `eth0.50` tagged with vlan id `50`. The equivalent `ip link` command would be `ip link add link eth0 name eth0.50 type vlan id 50`.
Replace the `macvlan` with `ipvlan` in the `-d` driver argument to create macvlan 802.1q trunks.
**Vlan ID 50**
In the next example, the network is tagged and isolated by the Docker host. A parent of `eth0.50` will tag the Ethernet traffic with the vlan id `50` specified by the parent nomenclature `-o parent=eth0.50`. Other naming formats can be used, but the links need to be added and deleted manually using `ip link` or Linux configuration files. As long as the `-o parent` exists, anything can be used if compliant with Linux netlink.
```
# now add networks and hosts as you would normally by attaching to the master (sub)interface that is tagged
docker network create -d macvlan \
--subnet=192.168.50.0/24 \
--gateway=192.168.50.1 \
-o parent=eth0.50 macvlan50
# In two separate terminals, start a Docker container and the containers can now ping one another.
docker run --net=macvlan50 -it --name macvlan_test5 --rm alpine /bin/sh
docker run --net=macvlan50 -it --name macvlan_test6 --rm alpine /bin/sh
```
**Vlan ID 60**
In the second network, tagged and isolated by the Docker host, `eth0.60` is the parent interface tagged with vlan id `60` specified with `-o parent=eth0.60`. The `macvlan_mode=` defaults to `macvlan_mode=bridge`. It can also be explicitly set with the same result, as shown in the next example.
```
# now add networks and hosts as you would normally by attaching to the master (sub)interface that is tagged.
docker network create -d macvlan \
--subnet=192.168.60.0/24 \
--gateway=192.168.60.1 \
-o parent=eth0.60 -o \
-o macvlan_mode=bridge macvlan60
# In two separate terminals, start a Docker container and the containers can now ping one another.
docker run --net=macvlan60 -it --name macvlan_test7 --rm alpine /bin/sh
docker run --net=macvlan60 -it --name macvlan_test8 --rm alpine /bin/sh
```
**Example:** Multi-Subnet Macvlan 802.1q Trunking
The same as the example before except there is an additional subnet bound to the network that the user can choose to provision containers on. In MacVlan/Bridge mode, containers can only ping one another if they are on the same subnet/broadcast domain unless there is an external router that routes the traffic (answers ARP etc) between the two subnets. Multiple subnets assigned to a network require a gateway external to the host that falls within the subnet range to hairpin the traffic back to the host.
```
docker network create -d macvlan \
--subnet=10.1.20.0/24 --subnet=10.1.10.0/24 \
--gateway=10.1.20.1 --gateway=10.1.10.1 \
-o parent=eth0.101 mcv101
# View Links after to network create `ip link`
$ ip link
# Test 10.1.20.10.0/24 connectivity
docker run --net=mcv101 --ip=10.1.20.9 -itd alpine /bin/sh
docker run --net=mcv101 --ip=10.1.20.10 -it --rm alpine ping -c 4 10.1.20.10
# Test 10.1.10.10.0/24 connectivity
docker run --net=mcv101 --ip=10.1.10.10 -itd alpine /bin/sh
docker run --net=mcv101 --ip=10.1.10.9 -it --rm alpine ping -c 4 10.1.10.10
# Delete All Containers
docker rm -f `docker ps -qa`
# Delete all Networks
docker network rm $(docker network ls -q)
# Run ip links again and verify the links are cleaned up
ip link
```
Hosts on the same VLAN are typically on the same subnet and almost always are grouped together based on their security policy. In most scenarios, a multi-tier application is tiered into different subnets because the security profile of each process requires some form of isolation. For example, hosting your credit card processing on the same virtual network as the front-end web-server would be a regulatory compliance issue, along with circumventing the long standing best practice of layered defense in depth architectures. VLANs or the equivalent VNI (Virtual Network Identifier) when using the built-in Overlay driver, are the first step in isolating tenant traffic.
![Docker VLANs in Depth](images/vlans-deeper-look.png)
### Dual Stack IPv4 IPv6 Macvlan Bridge Mode
The following specifies both v4 and v6 addresses. An address from each family will be assigned to each container. You can specify either family type explicitly or allow the Libnetwork IPAM to assign them from the subnet pool.
*Note on IPv6:* When declaring a v6 subnet with a `docker network create`, the flag `--ipv6` is required along with the subnet (in the following example `--subnet=2001:db8:abc8::/64`). Similar to IPv4 functionality, if a IPv6 `--gateway` is not specified, the first usable address in the v6 subnet is inferred and assigned as the gateway for the broadcast domain.
The following example creates a network with multiple IPv4 and IPv6 subnets. The network is attached to a sub-interface of `eth0.218`. By specifying `eth0.218` as the parent, the driver will create the sub-interface (if it does not already exist) and tag all traffic for containers in the network with a VLAN ID of 218. The physical switch port on the ToR (top of rack) network port needs to have 802.1Q trunking enabled for communications in and out of the host to work.
```
# Create multiple subnets w/ dual stacks:
docker network create -d macvlan \
--subnet=192.168.216.0/24 --subnet=192.168.218.0/24 \
--gateway=192.168.216.1 --gateway=192.168.218.1 \
--ipv6 --subnet=2001:db8:abc8::/64 --gateway=2001:db8:abc8::10 \
-o parent=eth0.218 \
-o macvlan_mode=bridge macvlan216
# Start a container on the first subnet 192.168.216.0/24
docker run --net=macvlan216 --name=macnet216_test --ip=192.168.216.10 -itd alpine /bin/sh
# Start a container on the second subnet 192.168.218.0/24
docker run --net=macvlan216 --name=macnet218_test --ip=192.168.218.10 -itd alpine /bin/sh
# Ping the first container started on the 192.168.216.0/24 subnet
docker run --net=macvlan216 --ip=192.168.216.11 -it --rm alpine /bin/sh
# From inside the container shell ping the other host on the same subnet and then exit
$ ping -c4 192.168.216.10
$ exit
# Ping the first container started on the 192.168.218.0/24 subnet
docker run --net=macvlan216 --ip=192.168.218.11 -it --rm alpine /bin/sh
# From inside the container shell ping the other host on the same subnet and then exit
$ ping -c4 192.168.218.10
$ exit
# Start a container in the back explicitly declaring the v6 address
docker run --net=macvlan216 --ip6=2001:db8:abc8::20 -itd alpine /bin/sh
# Start another container pinging the v6 address of the previous container started in the background
docker run --net=macvlan216 -it --rm alpine /bin/sh
$ ping6 -c4 2001:db8:abc8::20
$ exit
# Or, run the ping as a explicit process
docker run --net=macvlan216 -it --rm alpine ping6 -c4 2001:db8:abc8::20
```
View the details of one of the containers:
```
docker run --net=macvlan216 --ip=192.168.216.11 -it --rm alpine /bin/sh
root@526f3060d759:/# ip a show eth0
25: eth0@if19: <BROADCAST,MULTICAST,UP,LOWER_UP,M-DOWN> mtu 1500 qdisc noqueue state UNKNOWN
link/ether 02:42:c0:a8:d8:0b brd ff:ff:ff:ff:ff:ff
inet 192.168.216.11/24 scope global eth0
valid_lft forever preferred_lft forever
inet6 2001:db8:abc8::1/64 scope link
valid_lft forever preferred_lft forever
# The default gateway is a network gateway external to the Docker host
$ ip route
default via 192.168.216.1 dev eth0
192.168.216.0/24 dev eth0 src 192.168.216.11
# Specified v6 gateway of 2001:db8:abc8::10
$ ip -6 route
2001:db8:abc4::/64 dev eth0 proto kernel metric 256
2001:db8:abc8::/64 dev eth0 proto kernel metric 256
default via 2001:db8:abc8::10 dev eth0 metric 1024
#Containers can have both v4 and v6 addresses assigned to their interfaces or
# Both v4 and v6 addresses can be assigned to the container's interface
docker run --net=macvlan216 --ip=192.168.216.50 --ip6=2001:db8:abc8::50 -it --rm alpine /bin/sh
# View the details of the dual stack eth0 interface from inside of the container
$ ip a show eth0
95: eth0@if91: <BROADCAST,MULTICAST,UP,LOWER_UP,M-DOWN> mtu 1500 qdisc noqueue state UNKNOWN
link/ether 02:42:c0:a8:d8:32 brd ff:ff:ff:ff:ff:ff
inet 192.168.216.50/24 scope global eth0
valid_lft forever preferred_lft forever
inet6 2001:db8:abc8::50/64 scope global flags 02
valid_lft forever preferred_lft forever
```
The next example demonstrates how default gateways are inferred if the `--gateway` option is not specified for a subnet in the `docker network create ...` command. If the gateway is not specified, the first usable address in the subnet is selected. It also demonstrates how `--ip-range` and `--aux-address` are used in conjunction to exclude address assignments within a network and reserve sub-pools of usable addresses within a network's subnet. All traffic is untagged since `eth0` is used rather then a sub-interface.
```
docker network create -d macvlan \
--subnet=192.168.136.0/24 \
--subnet=192.168.138.0/24 \
--ipv6 --subnet=fd11::/64 \
--ip-range=192.168.136.0/25 \
--ip-range=192.168.138.0/25 \
--aux-address="reserved1=fd11::2" \
--aux-address="reserved2=192.168.136.2" \
--aux-address="reserved3=192.168.138.2" \
-o parent=eth0 mcv0
docker run --net=mcv0 -it --rm alpine /bin/sh
```
Next is the output from a running container provisioned on the example network named `mcv0`.
```
# Container eth0 output (the fe80::42:c0ff:fea8:8803/64 address is the local link addr)
ip address show eth0
100: eth0@if2: <BROADCAST,MULTICAST,UP,LOWER_UP,M-DOWN> mtu 1500 qdisc noqueue state UNKNOWN
link/ether 02:42:c0:a8:88:03 brd ff:ff:ff:ff:ff:ff
inet 192.168.136.3/24 scope global eth0
valid_lft forever preferred_lft forever
inet6 fd11::3/64 scope global flags 02
valid_lft forever preferred_lft forever
inet6 fe80::42:c0ff:fea8:8803/64 scope link
valid_lft forever preferred_lft forever
# IPv4 routing table from within the container
$ ip route
default via 192.168.136.1 dev eth0
192.168.136.0/24 dev eth0 src 192.168.136.3
# IPv6 routing table from within the container (the second v6 addresses is the local link addr)
$ ip -6 route
fd11::/64 dev eth0 metric 256
fe80::/64 dev eth0 metric 256
default via fd11::1 dev eth0 metric 1024
```
- After the examples, `docker rm -f `docker ps -qa`` can be used to remove all existing containers on the host, both running and stopped.
A key takeaway is, operators have the ability to map their physical network into their virtual network for integrating containers into their environment with no operational overhauls required. NetOps simply drops an 802.1q trunk into the Docker host. That virtual link would be the `-o parent=` passed in the network creation. For untagged (non-VLAN) links, it is as simple as `-o parent=eth0` or for 802.1q trunks with VLAN IDs each network gets mapped to the corresponding VLAN/Subnet from the network.
An example being, NetOps provides VLAN ID and the associated subnets for VLANs being passed on the Ethernet link to the Docker host server. Those values are simply plugged into the `docker network create` commands when provisioning the Docker networks. These are persistent configurations that are applied every time the Docker engine starts which alleviates having to manage often complex configuration files. The network interfaces can also be managed manually by being pre-created and docker networking will never modify them, simply use them as parent interfaces. Example mappings from NetOps to Docker network commands are as follows:
- VLAN: 10, Subnet: 172.16.80.0/24, Gateway: 172.16.80.1
- `--subnet=172.16.80.0/24 --gateway=172.16.80.1 -o parent=eth0.10`
- VLAN: 20, IP subnet: 172.16.50.0/22, Gateway: 172.16.50.1
- `--subnet=172.16.50.0/22 --gateway=172.16.50.1 -o parent=eth0.20`
- VLAN: 30, Subnet: 10.1.100.0/16, Gateway: 10.1.100.1
- `--subnet=10.1.100.0/16 --gateway=10.1.100.1 -o parent=eth0.30`
### Manually Creating 802.1q Links
If a user does not want the driver to create the vlan sub-interface it simply needs to exist prior to the `docker network create`. If you have sub-interface naming that is not `interface.vlan_id` it is honored in the `-o parent=` option again as long as the interface exists and us up.
Links if manually created can be named anything you want. As long as the exist when the network is created that is all that matters. Manually created links do not get deleted regardless of the name when the network is deleted with `docker network rm`.
```
# create a new sub-interface tied to dot1q vlan 40
ip link add link eth0 name eth0.40 type vlan id 40
# enable the new sub-interface
ip link set eth0.40 up
# now add networks and hosts as you would normally by attaching to the master (sub)interface that is tagged
docker network create -d macvlan \
--subnet=192.168.40.0/24 \
--gateway=192.168.40.1 \
-o parent=eth0.40 macvlan40
# in two separate terminals, start a Docker container and the containers can now ping one another.
docker run --net=macvlan40 -it --name mcv_test5 --rm alpine /bin/sh
docker run --net=macvlan40 -it --name mcv_test6 --rm alpine /bin/sh
```
**Example:** Vlan sub-interface manually created with any name:
```
# create a new sub interface tied to dot1q vlan 40
ip link add link eth0 name foo type vlan id 40
# enable the new sub-interface
ip link set foo up
# now add networks and hosts as you would normally by attaching to the master (sub)interface that is tagged
docker network create -d macvlan \
--subnet=192.168.40.0/24 --gateway=192.168.40.1 \
-o parent=foo macvlan40
# in two separate terminals, start a Docker container and the containers can now ping one another.
docker run --net=macvlan40 -it --name mcv_test5 --rm alpine /bin/sh
docker run --net=macvlan40 -it --name mcv_test6 --rm alpine /bin/sh
```
Manually created links can be cleaned up with:
```
ip link del foo
```
As with all of the Libnetwork drivers, networks of various driver types can be mixed and matched. This even applies to 3rd party ecosystem drivers that can be run in parallel with built-in drivers for maximum flexibility to the user.

143
libnetwork/docs/network.md Normal file
View file

@ -0,0 +1,143 @@
# Docker Networking With Libnetwork
This document describes docker networking in bridge and overlay mode delivered via libnetwork. Libnetwork uses iptables extensively to configure NATting and forwarding rules. [https://wiki.archlinux.org/index.php/iptables](https://wiki.archlinux.org/index.php/iptables) provides a good introduction to iptables and its default chains. More details may be found in [http://ipset.netfilter.org/iptables-extensions.man.html](http://ipset.netfilter.org/iptables-extensions.man.html)
# Bridge Mode
![alt_text](images/network_flow_bridge.png "image_tooltip")
The above diagram illustrates the network topology when a container instantiated with network mode set to bridge by docker engine. In this case, the libnetwork does the following
1. Creates a new network namespace container NS for this container
2. Creates a veth-pair, attaching one end to docker0 bridge on host NS, and move the other end to the new container NS.
3. In the new NS, assigns an IP address from docker0 subnet, sets default route gateway to docker0 IP address.
This completes network setup for container running in bridge mode. Outbound traffic from container flows through routing (Container NS) ? veth-pair ? docker0 bridge (Host NS) ---> docker0 interface (HOST NS) ? routing (HostNS) ? eth0 (HostNS) and out of host. And inbound traffic to container flows through the reverse direction.
Note that the container?s assigned IP (172.17.0.2 in above example) address is on docker0 subnet, and is not visible to externally to host. For this reason, a default masquerading rule is added to nat iptable?s POSTROUTING chain in host NS at docker engine initialization time. It states that for request traffic flow that has gone through the routing stage and the srcIP is within docker0 subnet (172.17.0.0/16), the traffic request must be originated from docker containers, therefore its srcIP is replaced with IP of outbound interface determined by routing. In the above diagram eth0?s IP 172.31.2.1 is used by replacement IP. In another word, masquerade is same as SNAT with replacement srcIP set to outbound interface?s IP.
If the container backends a service and has a listening targetPort in the container NS, it also must also have a corresponding publishedPort in host NS to receive the request and forward it to the container. Two rules are created in host NS for this purpose:
1. In nat iptable, a DOCKER(nat) chain is inserted to PREROUTING chain. And a rule such as ?DNAT tcp any any dport:45999 to conrts? is added to DOCKER chain, it does a DNAT for any traffic arriving at eth0 of host NS with dstIP=172.17.0.2 and dst Port=80, so that the DNATted request become routable to backend container listening on port 80.
2. In filter iptable, a DOCKER(filter) chain is inserted to FORWARD chain. And a rule such as ?ACCEPT tcp any containerIP dport:targetPort? is added. This allows request that is DNATted in 1) to be forwarded container.
# Swarm/Overlay Mode
Libnetwork use completely different set of namespaces, bridges, and iptables to forward container traffic in swarm/overlay mode.
![alt_text](images/network_flow_overlay.png "image_tooltip")
As depicted in the above diagram, when a host joinis a swarm cluster, the docker engine creates following network topology.
Initial Setup ( before any services are created)
1. In host NS, creates a docker_gwbridge bridge, assigning a subnet range to this bridge. In this above diagram 172.18.0.1/16. This subnet is local, and does not leak outside of the host.
2. In host NS, adds masquerading rule in nat iptable (5) PREROUTING chain for any request with srcIP within 172.18.0.0/16 subnet.
3. Creates a new network namespace ingress_sbox NS, creates two veth-pairs, one eth1 connects to docker_gwbridge bridge with fixed IP 172.18.0.2, and other (eth0) connected to ingress NS bridge br0. The eth0 is assigned an IP address, in this example, 10.255.0.2.
4. In ingress_sbox NS, adds to nat iptable(2)?s PREROUTING chain a rule that snat and redirect service request to ipvs for load-balancing. For instance,
?SNAT all -- anywhere 10.255.0.0/16 ipvs to:10.255.0.2?
5. In ingress_sbox NS, adds to nat iptable(2)?s POSTROUTING and OUTPUT chains rules that allows DNS lookup to be redirected to/from docker engine as is required by swarm service discovery.
6. Creates a new network namespace ingress NS, and creates a bridge br0 that has two links, one attaches to eth0 interface on ingress_sbox NS, and other to vxlan interface that in essence makes bridge br0 span across all hosts on the same swarm cluster. Each eth0 interface in ingress_sbox, each container instance of a service, and each service itself are given a unique IP 10.255.xx.xx, and is attached to br0, so that on the same swarm cluster, services, container instances of services, and ingress_sbox?s eth0 are all connected via bridge br0.
When a service is created, say with targetPort=80 and publishedPort=30000. The following are added to the existing network topology.
Service Setup
1. A container backends the service has its own namespace container NS. And two vether-pairs are created, eth1 is attached to docker_gwbridge in host NS, is given an IP assigned from docker_gwbridge subnet, in the example, 172.18.0.2, eth0 is attached to br0 in ingress NS, is given an IP of 10.255.0.5 in this example.
2. In container NS, adds rules to filter iptable (3)?s INPUT and OUTPUT chain that only allows targetPortt =80 traffic.
3. In container NS, adds rule to nat iptables (4)?s PREROUTING chain that changes publishedPort to targetPort. For instance,
?REDIRECT tcp -- anywhere 10.255.0.11 tcp dpt:30000 redir ports 80?
4. In container NS, adds rules to nat iptable(4)?s INPUT/OUTPUT chain that allow DNS lookup in this container to be redirected to docker engine.
5. In host NS, in filter iptable (5)?s FORWARD chain, inserts DOCKER-INGRESS chain, and adds a rule to allow service request to port 30000 and its reply.
I.e ?ACCEPT tcp -- anywhere anywhere tcp dpt:30000? and
?ACCEPT tcp -- anywhere anywhere state RELATED,ESTABLISHED tcp spt:30000?
6. In host NS, in nat iptable(6)?s PREROUTING chain, inserts (different) DOCKER-INGRESS chain, and adds a rule to dnat service request to ingress NS?s eth1?s IP (172.18.0.2). i.e
?DNAT tcp -- anywhere anywhere tcp dpt:30000 to:172.18.0.2:30000?
7. In ingress_sbox NS, in mangle iptable(1)?s PREROUTING chain, adds a rule to mark service request, i.e
?MARK tcp -- anywhere anywhere tcp dpt:30000 MARK set 0x100?
8. In ingress_sbox NS, in nat iptable(2)?s POSTROUTING chain, adds a rule to snat request?s srcIP to eth1?s IP, and forward to ipvs to load-balancing, i.e
?SNAT all -- 0.0.0.0/0 10.255.0.0/16 ipvs to:10.255.0.2?
9. In ingress_sbox, configures ipvs LB policy for marked traffic, i.e
FWM 256 rr
-> 10.255.0.5:0 Masq 1 0 0
-> 10.255.0.7:0 Masq 1 0 0
-> 10.255.0.8:0 Masq 1 0 0
Here each of 10.255.0.x represents IP address of of container instance backending the service.
## Service Traffic Flow
This section describes traffic flow of request and reply to/from a service with publishedPort = 30000, targetPort = 80
1. Request arrives at eth0 in host NS, with dstIP=172.31.2.1, dstPort=30000, srcIP=CLIENT_IP, srcPort=CLIENT_PORT. Before routing, It first goes through NAT rule in service setup (6) that dnats request with dstIP=172.18.0.2; It then go through FORWARD rule in service setup (5) during routing that allows request with dstPort=30000 to go through. The routing then forward request to docker_gwbridge, and in turn ...
2. The request arrives at eth1 in ingress_sbox NS with dstIP=172.18.0.2, dstPort=30000, srcIP=CLIENT_IP, srcPort=CLIENT_PORT. Before routing, the request is marked before by mangle iptable rule in service setup (7). After routing, it is snated with eth1?s IP 10.255.0.2, forwarded to ipvs for LB by nat iptable rule in service setup (8). The ipvs policy in setup (9) picks one container instance of the service, in this example 10.255.0.5 and dnats it.
3. The request arrives at br0 in ingress NS with dstIP=10.255.0.5, dstPort=30000, srcIP=10.255.0.2, srcPort=EPHEMERAL_PORT. For simplicity, we assume the container instance 10.255.0,5 is the local host, therefore simply forwards it. Note since br0 spans across all hosts in the cluster via vxlan, with all services instances latching onto it, so whether to pick remote or local container instance, it does not change the routing policy configuration.
4. The request arrives at eth0 of container NS with dstIP=10.255.0.5, dstPort=30000, srcIP=10.255.0.x (eth1 IP in ingress_sbox NS), srcPort=EMPHEMERAL_PORT. Before routing, it?s dstPort is changed to 80 via nat rule in service setup (3), and is allowed to be forwarded to local process by INPUT rule in service setup (2) post routig. The process listening on tcp:80 receives request with dstIP=10.255.0.5, dstPort=80, srcIP=10.255.0.2, , srcPort=EPHEMERAL_PORT.
5. The process replies, The reply has dstIP=10.255.0.2, dstPort=EPHEMERAL_PORT, srcIp=not_known, srcPort=80. It goes through filter rule in OUTPUT chain in service setup(2), which allows it to pass. It goes through routing that determines outbound interface is eth1, and srcIP=10.255.0.5; and it ?un-dnats? srcPort=80 to 30000 via nat table rule in service setup (3).
6. The reply arrives at br0 in ingress NS with dstIP=10.255.0.2, dstPort=EPHEMERAL_PORT, srcIP=10.255.0.5, srcPort=30000, which duly forwarded it to ...
7. The eh0 interface in sb_ingress NS. The reply first go through ipvs LB that ?un-dnats? srcIP from 10.255.0.5 to 172.18.0.2; then ?un-snats? via nat rule in service setup (8) dstIP from 10.255.0.2 to CLIENT_IP, dstPort from EMPHERAL_PORT to CLIENT_PORT.
8. The reply arrives at docker_gwbridge0 interface of host NS with dstIP=CLIENT_IP, dstPort=CLIENT_PORT, srcIP=172.18.0.2, srcPort=30000. The reply ?un-snats? with nat rule in service setup(6) with srcIP changes to 172.31.2.1. And is then forwarded out of eth0 interface, and complete the traffic flow. From external view, request enters host with dstIP=172.31.2.1, dstPort=30000, srcIP=CLIENT_IP, srcPort=CLIENT_PORT; and reply exits with dstIP=CLIENT_IP, dstPort=CLIENT_PORT, srcIP=172.31.2.1, srcPort=30000.
## Other Flows
**Northbound traffic originated from a container instance, for example, ping [www.cnn.com](www.cnn.com):**
The traffic flow is exactly the same as in bridge mode, except it is via docker_gwbridge in host NS, and traffic is masqueraded with nat rule in initial setup (2).
**DNS traffic**
DNS lookup traffic is routed to docker engine from container instance for service discovery, filling the blank.
# Other IPTable Chain and Rules
Other iptable chains and rules created and/or managed by docker engine/libnetwork.
**DOCKER-USER**: inserted as the first rule to FORWARD chain of filter iptable in host NS. So that user can independently managed traffic that may or may not be related docker containers.
**DOCKER-ISOLATION-STAGE-1** / 2: Filling in the blank
<!-- Docs to Markdown version 1.0?17 -->

View file

@ -0,0 +1,66 @@
NetworkDB
=========
There are two databases used in libnetwork:
- A persistent database that stores the network configuration requested by the user. This is typically the SwarmKit managers' raft store.
- A non-persistent peer-to-peer gossip-based database that keeps track of the current runtime state. This is NetworkDB.
NetworkDB is based on the [SWIM][] protocol, which is implemented by the [memberlist][] library.
`memberlist` manages cluster membership (nodes can join and leave), as well as message encryption.
Members of the cluster send each other ping messages from time to time, allowing the cluster to detect when a node has become unavailable.
The information held by each node in NetworkDB is:
- The set of nodes currently in the cluster (plus nodes that have recently left or failed).
- For each peer node, the set of networks to which that node is connected.
- For each of the node's currently-in-use networks, a set of named tables of key/value pairs.
Note that nodes only keep track of tables for networks to which they belong.
Updates spread through the cluster from node to node, and nodes may have inconsistent views at any given time.
They will eventually converge (quickly, if the network is operating well).
Nodes look up information using their local networkdb instance. Queries are not sent to remote nodes.
NetworkDB does not impose any structure on the tables; they are just maps from `string` keys to `[]byte` values.
Other components in libnetwork use the tables for their own purposes.
For example, there are tables for service discovery and load balancing,
and the [overlay](overlay.md) driver uses NetworkDB to store routing information.
Updates to a network's tables are only shared between nodes that are on that network.
All libnetwork nodes join the gossip cluster.
To do this, they need the IP address and port of at least one other member of the cluster.
In the case of a SwarmKit cluster, for example, each Docker engine will use the IP addresses of the swarm managers as the initial join addresses.
The `Join` method can be used to update these bootstrap IPs if they change while the system is running.
When joining the cluster, the new node will initially synchronise its cluster-wide state (known nodes and networks, but not tables) with at least one other node.
The state will be mostly kept up-to-date by small UDP gossip messages, but each node will also periodically perform a push-pull TCP sync with another random node.
In a push-pull sync, the initiator sends all of its cluster-wide state to the target, and the target then sends all of its own state back in response.
Once part of the gossip cluster, a node will also send a `NodeEventTypeJoin` message, which is a custom message defined by NetworkDB.
This is not actually needed now, but keeping it is useful for backwards compatibility with nodes running previous versions.
While a node is active in the cluster, it can join and leave networks.
When a node wants to join a network, it will send a `NetworkEventTypeJoin` message via gossip to the whole cluster.
It will also perform a bulk-sync of the network-specific state (the tables) with every other node on the network being joined.
This will allow it to get all the network-specific information quickly.
The tables will mostly be kept up-to-date by UDP gossip messages between the nodes on that network, but
each node in the network will also periodically do a full TCP bulk sync of the tables with another random node on the same network.
Note that there are two similar, but separate, gossip-and-periodic-sync mechanisms here:
1. memberlist-provided gossip and push-pull sync of cluster-wide state, involving all nodes in the cluster.
2. networkdb-provided gossip and bulk sync of network tables, for each network, involving just those nodes in that network.
When a node wishes to leave a network, it will send a `NetworkEventTypeLeave` via gossip. It will then delete the network's table data.
When a node hears that another node is leaving a network, it deletes all table entries belonging to the leaving node.
Deleting an entry in this case means marking it for deletion for a while, so that we can detect and ignore any older events that may arrive about it.
When a node wishes to leave the cluster, it will send a `NodeEventTypeLeave` message via gossip.
Nodes receiving this will mark the node as "left".
The leaving node will then send a memberlist leave message too.
If we receive the memberlist leave message without first getting the `NodeEventTypeLeave` one, we mark the node as failed (for a while).
Every node periodically attempts to reconnect to failed nodes, and will do a push-pull sync of cluster-wide state on success.
On success we also send the node a `NodeEventTypeJoin` and then do a bulk sync of network-specific state for all networks that we have in common.
[SWIM]: http://ieeexplore.ieee.org/document/1028914/
[memberlist]: https://github.com/hashicorp/memberlist

153
libnetwork/docs/overlay.md Normal file
View file

@ -0,0 +1,153 @@
# Overlay Driver
### Design
TODO
### Multi-Host Overlay Driver Quick Start
This example is to provision two Docker Hosts with the **experimental** Libnetwork overlay network driver.
### Pre-Requisites
- Kernel >= 3.16
- Experimental Docker client
### Install Docker Experimental
Follow Docker experimental installation instructions at: [https://github.com/docker/docker/tree/master/experimental](https://github.com/docker/docker/tree/master/experimental)
To ensure you are running the experimental Docker branch, check the version and look for the experimental tag:
```
$ docker -v
Docker version 1.8.0-dev, build f39b9a0, experimental
```
### Install and Bootstrap K/V Store
Multi-host networking uses a pluggable Key-Value store backend to distribute states using `libkv`.
`libkv` supports multiple pluggable backends such as `consul`, `etcd` & `zookeeper` (more to come).
In this example we will use `consul`
Install:
```
$ curl -OL https://dl.bintray.com/mitchellh/consul/0.5.2_linux_amd64.zip
$ unzip 0.5.2_linux_amd64.zip
$ mv consul /usr/local/bin/
```
**host-1** Start Consul as a server in bootstrap mode:
```
$ consul agent -server -bootstrap -data-dir /tmp/consul -bind=<host-1-ip-address>
```
**host-2** Start the Consul agent:
```
$ consul agent -data-dir /tmp/consul -bind=<host-2-ip-address>
$ consul join <host-1-ip-address>
```
### Start the Docker Daemon with the Network Driver Daemon Flags
**host-1** Docker daemon:
```
$ docker -d --kv-store=consul:localhost:8500 --label=com.docker.network.driver.overlay.bind_interface=eth0
```
**host-2** Start the Docker Daemon with the neighbor ID configuration:
```
$ docker -d --kv-store=consul:localhost:8500 --label=com.docker.network.driver.overlay.bind_interface=eth0 --label=com.docker.network.driver.overlay.neighbor_ip=<host-1-ip-address>
```
### QuickStart Containers Attached to a Network
**host-1** Start a container that publishes a service svc1 in the network dev that is managed by overlay driver.
```
$ docker run -i -t --publish-service=svc1.dev.overlay debian
root@21578ff721a9:/# ip add show eth0
34: eth0: <BROADCAST,UP,LOWER_UP> mtu 1500 qdisc noqueue state UP group default
link/ether 02:42:ec:41:35:bf brd ff:ff:ff:ff:ff:ff
inet 172.21.0.16/16 scope global eth0
valid_lft forever preferred_lft forever
inet6 fe80::42:ecff:fe41:35bf/64 scope link
valid_lft forever preferred_lft forever
```
**host-2** Start a container that publishes a service svc2 in the network dev that is managed by overlay driver.
```
$ docker run -i -t --publish-service=svc2.dev.overlay debian
root@d217828eb876:/# ping svc1
PING svc1 (172.21.0.16): 56 data bytes
64 bytes from 172.21.0.16: icmp_seq=0 ttl=64 time=0.706 ms
64 bytes from 172.21.0.16: icmp_seq=1 ttl=64 time=0.687 ms
64 bytes from 172.21.0.16: icmp_seq=2 ttl=64 time=0.841 ms
```
### Detailed Setup
You can also setup networks and services and then attach a running container to them.
**host-1**:
```
docker network create -d overlay prod
docker network ls
docker network info prod
docker service publish db1.prod
cid=$(docker run -itd -p 8000:8000 ubuntu)
docker service attach $cid db1.prod
```
**host-2**:
```
docker network ls
docker network info prod
docker service publish db2.prod
cid=$(docker run -itd -p 8000:8000 ubuntu)
docker service attach $cid db2.prod
```
Once a container is started, a container on `host-1` and `host-2` both containers should be able to ping one another via IP, service name, \<service name>.\<network name>
View information about the networks and services using `ls` and `info` subcommands like so:
```
$ docker service ls
SERVICE ID NAME NETWORK CONTAINER
0771deb5f84b db2 prod 0e54a527f22c
aea23b224acf db1 prod 4b0a309ca311
$ docker network info prod
Network Id: 5ac68be2518959b48ad102e9ec3d8f42fb2ec72056aa9592eb5abd0252203012
Name: prod
Type: overlay
$ docker service info db1.prod
Service Id: aea23b224acfd2da9b893870e0d632499188a1a4b3881515ba042928a9d3f465
Name: db1
Network: prod
```
To detach and unpublish a service:
```
$ docker service detach $cid <service>.<network>
$ docker service unpublish <service>.<network>
# Example:
$ docker service detach $cid db2.prod
$ docker service unpublish db2.prod
```
To reiterate, this is experimental, and will be under active development.

304
libnetwork/docs/remote.md Normal file
View file

@ -0,0 +1,304 @@
Remote Drivers
==============
The `drivers.remote` package provides the integration point for dynamically-registered drivers. Unlike the other driver packages, it does not provide a single implementation of a driver; rather, it provides a proxy for remote driver processes, which are registered and communicate with LibNetwork via the Docker plugin package.
For the semantics of driver methods, which correspond to the protocol below, please see the [overall design](design.md).
## LibNetwork integration with the Docker `plugins` package
When LibNetwork initializes the `drivers.remote` package with the `Init()` function, it passes a `DriverCallback` as a parameter, which implements `RegisterDriver()`. The remote driver package uses this interface to register remote drivers with LibNetwork's `NetworkController`, by supplying it in a `plugins.Handle` callback.
The callback is invoked when a driver is loaded with the `plugins.Get` API call. How that comes about is out of scope here (but it might be, for instance, when that driver is mentioned by the user).
This design ensures that the details of driver registration mechanism are owned by the remote driver package, and it doesn't expose any of the driver layer to the North of LibNetwork.
## Implementation
The remote driver implementation uses a `plugins.Client` to communicate with the remote driver process. The `driverapi.Driver` methods are implemented as RPCs over the plugin client.
The payloads of these RPCs are mostly direct translations into JSON of the arguments given to the method. There are some exceptions to account for the use of the interfaces `InterfaceInfo` and `JoinInfo`, and data types that do not serialise to JSON well (e.g., `net.IPNet`). The protocol is detailed below under "Protocol".
## Usage
A remote driver proxy follows all the rules of any other in-built driver and has exactly the same `Driver` interface exposed. LibNetwork will also support driver-specific `options` and user-supplied `labels` which may influence the behaviour of a remote driver process.
## Protocol
The remote driver protocol is a set of RPCs, issued as HTTP POSTs with JSON payloads. The proxy issues requests, and the remote driver process is expected to respond usually with a JSON payload of its own, although in some cases these are empty maps.
### Errors
If the remote process cannot decode, or otherwise detects a syntactic problem with the HTTP request or payload, it must respond with an HTTP error status (4xx or 5xx).
If the remote process http server receives a request for an unknown URI, it should respond with the HTTP StatusCode `404 Not Found`. This allows LibNetwork to detect when a remote driver does not implement yet a newly added method, therefore not to deem the request as failed.
If the remote process can decode the request, but cannot complete the operation, it must send a response in the form
{
"Err": string
}
The string value supplied may appear in logs, so should not include confidential information.
### Handshake
When loaded, a remote driver process receives an HTTP POST on the URL `/Plugin.Activate` with no payload. It must respond with a manifest of the form
{
"Implements": ["NetworkDriver"]
}
Other entries in the list value are allowed; `"NetworkDriver"` indicates that the plugin should be registered with LibNetwork as a driver.
### Set capability
After Handshake, the remote driver will receive another POST message to the URL `/NetworkDriver.GetCapabilities` with no payload. The driver's response should have the form:
{
"Scope": "local"
"ConnectivityScope": "global"
}
Value of "Scope" should be either "local" or "global" which indicates whether the resource allocations for this driver's network can be done only locally to the node or globally across the cluster of nodes. Any other value will fail driver's registration and return an error to the caller.
Similarly, value of "ConnectivityScope" should be either "local" or "global" which indicates whether the driver's network can provide connectivity only locally to this node or globally across the cluster of nodes. If the value is missing, libnetwork will set it to the value of "Scope". should be either "local" or "global" which indicates
### Create network
When the proxy is asked to create a network, the remote process shall receive a POST to the URL `/NetworkDriver.CreateNetwork` of the form
{
"NetworkID": string,
"IPv4Data" : [
{
"AddressSpace": string,
"Pool": ipv4-cidr-string,
"Gateway" : ipv4-cidr-string,
"AuxAddresses": {
"<identifier1>" : "<ipv4-address1>",
"<identifier2>" : "<ipv4-address2>",
...
}
},
],
"IPv6Data" : [
{
"AddressSpace": string,
"Pool": ipv6-cidr-string,
"Gateway" : ipv6-cidr-string,
"AuxAddresses": {
"<identifier1>" : "<ipv6-address1>",
"<identifier2>" : "<ipv6-address2>",
...
}
},
],
"Options": {
...
}
}
* `NetworkID` value is generated by LibNetwork which represents a unique network.
* `Options` value is the arbitrary map given to the proxy by LibNetwork.
* `IPv4Data` and `IPv6Data` are the ip-addressing data configured by the user and managed by IPAM driver. The network driver is expected to honor the ip-addressing data supplied by IPAM driver. The data include,
* `AddressSpace` : A unique string represents an isolated space for IP Addressing
* `Pool` : A range of IP Addresses represented in CIDR format address/mask. Since, the IPAM driver is responsible for allocating container ip-addresses, the network driver can make use of this information for the network plumbing purposes.
* `Gateway` : Optionally, the IPAM driver may provide a Gateway IP address in CIDR format for the subnet represented by the Pool. The network driver can make use of this information for the network plumbing purposes.
* `AuxAddresses` : A list of pre-allocated ip-addresses with an associated identifier as provided by the user to assist network driver if it requires specific ip-addresses for its operation.
The response indicating success is empty:
{}
### Delete network
When a network owned by the remote driver is deleted, the remote process shall receive a POST to the URL `/NetworkDriver.DeleteNetwork` of the form
{
"NetworkID": string
}
The success response is empty:
{}
### Create endpoint
When the proxy is asked to create an endpoint, the remote process shall receive a POST to the URL `/NetworkDriver.CreateEndpoint` of the form
{
"NetworkID": string,
"EndpointID": string,
"Options": {
...
},
"Interface": {
"Address": string,
"AddressIPv6": string,
"MacAddress": string
}
}
The `NetworkID` is the generated identifier for the network to which the endpoint belongs; the `EndpointID` is a generated identifier for the endpoint.
`Options` is an arbitrary map as supplied to the proxy.
The `Interface` value is of the form given. The fields in the `Interface` may be empty; and the `Interface` itself may be empty. If supplied, `Address` is an IPv4 address and subnet in CIDR notation; e.g., `"192.168.34.12/16"`. If supplied, `AddressIPv6` is an IPv6 address and subnet in CIDR notation. `MacAddress` is a MAC address as a string; e.g., `"6e:75:32:60:44:c9"`.
A success response is of the form
{
"Interface": {
"Address": string,
"AddressIPv6": string,
"MacAddress": string
}
}
with values in the `Interface` as above. As far as the value of `Interface` is concerned, `MacAddress` and either or both of `Address` and `AddressIPv6` must be given.
If the remote process was supplied a non-empty value in `Interface`, it must respond with an empty `Interface` value. LibNetwork will treat it as an error if it supplies a non-empty value and receives a non-empty value back, and roll back the operation.
### Endpoint operational info
The proxy may be asked for "operational info" on an endpoint. When this happens, the remote process shall receive a POST to `/NetworkDriver.EndpointOperInfo` of the form
{
"NetworkID": string,
"EndpointID": string
}
where `NetworkID` and `EndpointID` have meanings as above. It must send a response of the form
{
"Value": { ... }
}
where the value of the `Value` field is an arbitrary (possibly empty) map.
### Delete endpoint
When an endpoint is deleted, the remote process shall receive a POST to the URL `/NetworkDriver.DeleteEndpoint` with a body of the form
{
"NetworkID": string,
"EndpointID": string
}
where `NetworkID` and `EndpointID` have meanings as above. A success response is empty:
{}
### Join
When a sandbox is given an endpoint, the remote process shall receive a POST to the URL `NetworkDriver.Join` of the form
{
"NetworkID": string,
"EndpointID": string,
"SandboxKey": string,
"Options": { ... }
}
The `NetworkID` and `EndpointID` have meanings as above. The `SandboxKey` identifies the sandbox. `Options` is an arbitrary map as supplied to the proxy.
The response must have the form
{
"InterfaceName": {
SrcName: string,
DstPrefix: string
},
"Gateway": string,
"GatewayIPv6": string,
"StaticRoutes": [{
"Destination": string,
"RouteType": int,
"NextHop": string,
}, ...]
}
`Gateway` is optional and if supplied is an IP address as a string; e.g., `"192.168.0.1"`. `GatewayIPv6` is optional and if supplied is an IPv6 address as a string; e.g., `"fe80::7809:baff:fec6:7744"`.
The entries in `InterfaceName` represent actual OS level interfaces that should be moved by LibNetwork into the sandbox; the `SrcName` is the name of the OS level interface that the remote process created, and the `DstPrefix` is a prefix for the name the OS level interface should have after it has been moved into the sandbox (LibNetwork will append an index to make sure the actual name does not collide with others).
The entries in `"StaticRoutes"` represent routes that should be added to an interface once it has been moved into the sandbox. Since there may be zero or more routes for an interface, unlike the interface name they can be supplied in any order.
Routes are either given a `RouteType` of `0` and a value for `NextHop`; or, a `RouteType` of `1` and no value for `NextHop`, meaning a connected route.
If no gateway and no default static route is set by the driver in the Join response, LibNetwork will add an additional interface to the sandbox connecting to a default gateway network (a bridge network named *docker_gwbridge*) and program the default gateway into the sandbox accordingly, pointing to the interface address of the bridge *docker_gwbridge*.
### Leave
If the proxy is asked to remove an endpoint from a sandbox, the remote process shall receive a POST to the URL `/NetworkDriver.Leave` of the form
{
"NetworkID": string,
"EndpointID": string
}
where `NetworkID` and `EndpointID` have meanings as above. The success response is empty:
{}
### DiscoverNew Notification
LibNetwork listens to inbuilt docker discovery notifications and passes it along to the interested drivers.
When the proxy receives a DiscoverNew notification, the remote process shall receive a POST to the URL `/NetworkDriver.DiscoverNew` of the form
{
"DiscoveryType": int,
"DiscoveryData": {
...
}
}
`DiscoveryType` represents the discovery type. Each Discovery Type is represented by a number.
`DiscoveryData` carries discovery data the structure of which is determined by the DiscoveryType
The response indicating success is empty:
{}
* Node Discovery
Node Discovery is represented by a `DiscoveryType` value of `1` and the corresponding `DiscoveryData` will carry Node discovery data.
{
"DiscoveryType": int,
"DiscoveryData": {
"Address" : string
"self" : bool
}
}
### DiscoverDelete Notification
When the proxy receives a DiscoverDelete notification, the remote process shall receive a POST to the URL `/NetworkDriver.DiscoverDelete` of the form
{
"DiscoveryType": int,
"DiscoveryData": {
...
}
}
`DiscoveryType` represents the discovery type. Each Discovery Type is represented by a number.
`DiscoveryData` carries discovery data the structure of which is determined by the DiscoveryType
The response indicating success is empty:
{}
* Node Discovery
Similar to the DiscoverNew call, Node Discovery is represented by a `DiscoveryType` value of `1` and the corresponding `DiscoveryData` will carry Node discovery data to be deleted.
{
"DiscoveryType": int,
"DiscoveryData": {
"Address" : string
"self" : bool
}
}

View file

@ -0,0 +1,16 @@
[Unit]
Description=Docker Application Container Engine
Documentation=https://docs.docker.com
After=network.target docker.socket
Requires=docker.socket
[Service]
EnvironmentFile=-/etc/default/docker
ExecStart=/usr/bin/docker daemon -H fd:// $DOCKER_OPTS
MountFlags=slave
LimitNOFILE=1048576
LimitNPROC=1048576
LimitCORE=infinity
[Install]
WantedBy=multi-user.target

185
libnetwork/docs/vagrant.md Normal file
View file

@ -0,0 +1,185 @@
# Vagrant Setup to Test the Overlay Driver
This documentation highlights how to use Vagrant to start a three nodes setup to test Docker network.
## Pre-requisites
This was tested on:
- Vagrant 1.7.2
- VirtualBox 4.3.26
## Machine Setup
The Vagrantfile provided will start three virtual machines. One will act as a consul server, and the other two will act as Docker host.
The experimental version of Docker is installed.
- `consul-server` is the Consul server node, based on Ubuntu 14.04, this has IP 192.168.33.10
- `net-1` is the first Docker host based on Ubuntu 14.10, this has IP 192.168.33.11
- `net-2` is the second Docker host based on Ubuntu 14.10, this has IP 192.168.33.12
## Getting Started
Clone this repo, change to the `docs` directory and let Vagrant do the work.
$ vagrant up
$ vagrant status
Current machine states:
consul-server running (virtualbox)
net-1 running (virtualbox)
net-2 running (virtualbox)
You are now ready to SSH to the Docker hosts and start containers.
$ vagrant ssh net-1
vagrant@net-1:~$ docker version
Client version: 1.8.0-dev
...<snip>...
Check that Docker network is functional by listing the default networks:
vagrant@net-1:~$ docker network ls
NETWORK ID NAME TYPE
4275f8b3a821 none null
80eba28ed4a7 host host
64322973b4aa bridge bridge
No services has been published so far, so the `docker service ls` will return an empty list:
$ docker service ls
SERVICE ID NAME NETWORK CONTAINER
Start a container and check the content of `/etc/hosts`.
$ docker run -it --rm ubuntu:14.04 bash
root@df479e660658:/# cat /etc/hosts
172.21.0.3 df479e660658
127.0.0.1 localhost
::1 localhost ip6-localhost ip6-loopback
fe00::0 ip6-localnet
ff00::0 ip6-mcastprefix
ff02::1 ip6-allnodes
ff02::2 ip6-allrouters
172.21.0.3 distracted_bohr
172.21.0.3 distracted_bohr.multihost
In a separate terminal on `net-1` list the networks again. You will see that the _multihost_ overlay now appears.
The overlay network _multihost_ is your default network. This was setup by the Docker daemon during the Vagrant provisioning. Check `/etc/default/docker` to see the options that were set.
vagrant@net-1:~$ docker network ls
NETWORK ID NAME TYPE
4275f8b3a821 none null
80eba28ed4a7 host host
64322973b4aa bridge bridge
b5c9f05f1f8f multihost overlay
Now in a separate terminal, SSH to `net-2`, check the network and services. The networks will be the same, and the default network will also be _multihost_ of type overlay. But the service will show the container started on `net-1`:
$ vagrant ssh net-2
vagrant@net-2:~$ docker service ls
SERVICE ID NAME NETWORK CONTAINER
b00f2bfd81ac distracted_bohr multihost df479e660658
Start a container on `net-2` and check the `/etc/hosts`.
vagrant@net-2:~$ docker run -ti --rm ubuntu:14.04 bash
root@2ac726b4ce60:/# cat /etc/hosts
172.21.0.4 2ac726b4ce60
127.0.0.1 localhost
::1 localhost ip6-localhost ip6-loopback
fe00::0 ip6-localnet
ff00::0 ip6-mcastprefix
ff02::1 ip6-allnodes
ff02::2 ip6-allrouters
172.21.0.3 distracted_bohr
172.21.0.3 distracted_bohr.multihost
172.21.0.4 modest_curie
172.21.0.4 modest_curie.multihost
You will see not only the container that you just started on `net-2` but also the container that you started earlier on `net-1`.
And of course you will be able to ping each container.
## Creating a Non Default Overlay Network
In the previous test we started containers with regular options `-ti --rm` and these containers got placed automatically in the default network which was set to be the _multihost_ network of type overlay.
But you could create your own overlay network and start containers in it. Let's create a new overlay network.
On one of your Docker hosts, `net-1` or `net-2` do:
$ docker network create -d overlay foobar
8805e22ad6e29cd7abb95597c91420fdcac54f33fcdd6fbca6dd4ec9710dd6a4
$ docker network ls
NETWORK ID NAME TYPE
a77e16a1e394 host host
684a4bb4c471 bridge bridge
8805e22ad6e2 foobar overlay
b5c9f05f1f8f multihost overlay
67d5a33a2e54 none null
Automatically, the second host will also see this network. To start a container on this new network, simply use the `--publish-service` option of `docker run` like so:
$ docker run -it --rm --publish-service=bar.foobar.overlay ubuntu:14.04 bash
Note, that you could directly start a container with a new overlay using the `--publish-service` option and it will create the network automatically.
Check the docker services now:
$ docker service ls
SERVICE ID NAME NETWORK CONTAINER
b1ffdbfb1ac6 bar foobar 6635a3822135
Repeat the getting started steps, by starting another container in this new overlay on the other host, check the `/etc/hosts` file and try to ping each container.
## A look at the interfaces
This new Docker multihost networking is made possible via VXLAN tunnels and the use of network namespaces.
Check the [design](design.md) documentation for all the details. But to explore these concepts a bit, nothing beats an example.
With a running container in one overlay, check the network namespace:
$ docker inspect -f '{{ .NetworkSettings.SandboxKey}}' 6635a3822135
/var/run/docker/netns/6635a3822135
This is a none default location for network namespaces which might confuse things a bit. So let's become root, head over to this directory that contains the network namespaces of the containers and check the interfaces:
$ sudo su
root@net-2:/home/vagrant# cd /var/run/docker/
root@net-2:/var/run/docker# ls netns
6635a3822135
8805e22ad6e2
To be able to check the interfaces in those network namespace using `ip` command, just create a symlink for `netns` that points to `/var/run/docker/netns`:
root@net-2:/var/run# ln -s /var/run/docker/netns netns
root@net-2:/var/run# ip netns show
6635a3822135
8805e22ad6e2
The two namespace ID return are the ones of the running container on that host and the one of the actual overlay network the container is in.
Let's check the interfaces in the container:
root@net-2:/var/run/docker# ip netns exec 6635a3822135 ip addr show eth0
15: eth0: <BROADCAST,UP,LOWER_UP> mtu 1500 qdisc noqueue state UP group default
link/ether 02:42:b3:91:22:c3 brd ff:ff:ff:ff:ff:ff
inet 172.21.0.5/16 scope global eth0
valid_lft forever preferred_lft forever
inet6 fe80::42:b3ff:fe91:22c3/64 scope link
valid_lft forever preferred_lft forever
Indeed we get back the network interface of our running container, same MAC address, same IP.
If we check the links of the overlay namespace we see our vxlan interface and the VLAN ID being used.
root@net-2:/var/run/docker# ip netns exec 8805e22ad6e2 ip -d link show
...<snip>...
14: vxlan1: <BROADCAST,UP,LOWER_UP> mtu 1500 qdisc noqueue master br0 state UNKNOWN mode DEFAULT group default
link/ether 7a:af:20:ee:e3:81 brd ff:ff:ff:ff:ff:ff promiscuity 1
vxlan id 256 srcport 32768 61000 dstport 8472 proxy l2miss l3miss ageing 300
bridge_slave
16: veth2: <BROADCAST,UP,LOWER_UP> mtu 1500 qdisc pfifo_fast master br0 state UP mode DEFAULT group default qlen 1000
link/ether 46:b1:e2:5c:48:a8 brd ff:ff:ff:ff:ff:ff promiscuity 1
veth
bridge_slave
If you sniff packets on these interfaces you will see the traffic between your containers.

View file

@ -0,0 +1,218 @@
package driverapi
import (
"net"
"github.com/docker/docker/pkg/plugingetter"
"github.com/docker/libnetwork/discoverapi"
)
// NetworkPluginEndpointType represents the Endpoint Type used by Plugin system
const NetworkPluginEndpointType = "NetworkDriver"
// Driver is an interface that every plugin driver needs to implement.
type Driver interface {
discoverapi.Discover
// NetworkAllocate invokes the driver method to allocate network
// specific resources passing network id and network specific config.
// It returns a key,value pair of network specific driver allocations
// to the caller.
NetworkAllocate(nid string, options map[string]string, ipV4Data, ipV6Data []IPAMData) (map[string]string, error)
// NetworkFree invokes the driver method to free network specific resources
// associated with a given network id.
NetworkFree(nid string) error
// CreateNetwork invokes the driver method to create a network
// passing the network id and network specific config. The
// config mechanism will eventually be replaced with labels
// which are yet to be introduced. The driver can return a
// list of table names for which it is interested in receiving
// notification when a CRUD operation is performed on any
// entry in that table. This will be ignored for local scope
// drivers.
CreateNetwork(nid string, options map[string]interface{}, nInfo NetworkInfo, ipV4Data, ipV6Data []IPAMData) error
// DeleteNetwork invokes the driver method to delete network passing
// the network id.
DeleteNetwork(nid string) error
// CreateEndpoint invokes the driver method to create an endpoint
// passing the network id, endpoint id endpoint information and driver
// specific config. The endpoint information can be either consumed by
// the driver or populated by the driver. The config mechanism will
// eventually be replaced with labels which are yet to be introduced.
CreateEndpoint(nid, eid string, ifInfo InterfaceInfo, options map[string]interface{}) error
// DeleteEndpoint invokes the driver method to delete an endpoint
// passing the network id and endpoint id.
DeleteEndpoint(nid, eid string) error
// EndpointOperInfo retrieves from the driver the operational data related to the specified endpoint
EndpointOperInfo(nid, eid string) (map[string]interface{}, error)
// Join method is invoked when a Sandbox is attached to an endpoint.
Join(nid, eid string, sboxKey string, jinfo JoinInfo, options map[string]interface{}) error
// Leave method is invoked when a Sandbox detaches from an endpoint.
Leave(nid, eid string) error
// ProgramExternalConnectivity invokes the driver method which does the necessary
// programming to allow the external connectivity dictated by the passed options
ProgramExternalConnectivity(nid, eid string, options map[string]interface{}) error
// RevokeExternalConnectivity asks the driver to remove any external connectivity
// programming that was done so far
RevokeExternalConnectivity(nid, eid string) error
// EventNotify notifies the driver when a CRUD operation has
// happened on a table of its interest as soon as this node
// receives such an event in the gossip layer. This method is
// only invoked for the global scope driver.
EventNotify(event EventType, nid string, tableName string, key string, value []byte)
// DecodeTableEntry passes the driver a key, value pair from table it registered
// with libnetwork. Driver should return {object ID, map[string]string} tuple.
// If DecodeTableEntry is called for a table associated with NetworkObject or
// EndpointObject the return object ID should be the network id or endpoint id
// associated with that entry. map should have information about the object that
// can be presented to the user.
// For example: overlay driver returns the VTEP IP of the host that has the endpoint
// which is shown in 'network inspect --verbose'
DecodeTableEntry(tablename string, key string, value []byte) (string, map[string]string)
// Type returns the type of this driver, the network type this driver manages
Type() string
// IsBuiltIn returns true if it is a built-in driver
IsBuiltIn() bool
}
// NetworkInfo provides a go interface for drivers to provide network
// specific information to libnetwork.
type NetworkInfo interface {
// TableEventRegister registers driver interest in a given
// table name.
TableEventRegister(tableName string, objType ObjectType) error
// UpdateIPamConfig updates the networks IPAM configuration
// based on information from the driver. In windows, the OS (HNS) chooses
// the IP address space if the user does not specify an address space.
UpdateIpamConfig(ipV4Data []IPAMData)
}
// InterfaceInfo provides a go interface for drivers to retrieve
// network information to interface resources.
type InterfaceInfo interface {
// SetMacAddress allows the driver to set the mac address to the endpoint interface
// during the call to CreateEndpoint, if the mac address is not already set.
SetMacAddress(mac net.HardwareAddr) error
// SetIPAddress allows the driver to set the ip address to the endpoint interface
// during the call to CreateEndpoint, if the address is not already set.
// The API is to be used to assign both the IPv4 and IPv6 address types.
SetIPAddress(ip *net.IPNet) error
// MacAddress returns the MAC address.
MacAddress() net.HardwareAddr
// Address returns the IPv4 address.
Address() *net.IPNet
// AddressIPv6 returns the IPv6 address.
AddressIPv6() *net.IPNet
}
// InterfaceNameInfo provides a go interface for the drivers to assign names
// to interfaces.
type InterfaceNameInfo interface {
// SetNames method assigns the srcName and dstPrefix for the interface.
SetNames(srcName, dstPrefix string) error
}
// JoinInfo represents a set of resources that the driver has the ability to provide during
// join time.
type JoinInfo interface {
// InterfaceName returns an InterfaceNameInfo go interface to facilitate
// setting the names for the interface.
InterfaceName() InterfaceNameInfo
// SetGateway sets the default IPv4 gateway when a container joins the endpoint.
SetGateway(net.IP) error
// SetGatewayIPv6 sets the default IPv6 gateway when a container joins the endpoint.
SetGatewayIPv6(net.IP) error
// AddStaticRoute adds a route to the sandbox.
// It may be used in addition to or instead of a default gateway (as above).
AddStaticRoute(destination *net.IPNet, routeType int, nextHop net.IP) error
// DisableGatewayService tells libnetwork not to provide Default GW for the container
DisableGatewayService()
// AddTableEntry adds a table entry to the gossip layer
// passing the table name, key and an opaque value.
AddTableEntry(tableName string, key string, value []byte) error
}
// DriverCallback provides a Callback interface for Drivers into LibNetwork
type DriverCallback interface {
// GetPluginGetter returns the pluginv2 getter.
GetPluginGetter() plugingetter.PluginGetter
// RegisterDriver provides a way for Remote drivers to dynamically register new NetworkType and associate with a driver instance
RegisterDriver(name string, driver Driver, capability Capability) error
}
// Capability represents the high level capabilities of the drivers which libnetwork can make use of
type Capability struct {
DataScope string
ConnectivityScope string
}
// IPAMData represents the per-network ip related
// operational information libnetwork will send
// to the network driver during CreateNetwork()
type IPAMData struct {
AddressSpace string
Pool *net.IPNet
Gateway *net.IPNet
AuxAddresses map[string]*net.IPNet
}
// EventType defines a type for the CRUD event
type EventType uint8
const (
// Create event is generated when a table entry is created,
Create EventType = 1 + iota
// Update event is generated when a table entry is updated.
Update
// Delete event is generated when a table entry is deleted.
Delete
)
// ObjectType represents the type of object driver wants to store in libnetwork's networkDB
type ObjectType int
const (
// EndpointObject should be set for libnetwork endpoint object related data
EndpointObject ObjectType = 1 + iota
// NetworkObject should be set for libnetwork network object related data
NetworkObject
// OpaqueObject is for driver specific data with no corresponding libnetwork object
OpaqueObject
)
// IsValidType validates the passed in type against the valid object types
func IsValidType(objType ObjectType) bool {
switch objType {
case EndpointObject:
fallthrough
case NetworkObject:
fallthrough
case OpaqueObject:
return true
}
return false
}

Some files were not shown because too many files have changed in this diff Show more