docker_api_swarm_test.go 33 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045
  1. // +build !windows
  2. package main
  3. import (
  4. "context"
  5. "fmt"
  6. "io/ioutil"
  7. "net"
  8. "net/http"
  9. "path/filepath"
  10. "runtime"
  11. "strings"
  12. "sync"
  13. "time"
  14. "github.com/cloudflare/cfssl/csr"
  15. "github.com/cloudflare/cfssl/helpers"
  16. "github.com/cloudflare/cfssl/initca"
  17. "github.com/docker/docker/api/types"
  18. "github.com/docker/docker/api/types/container"
  19. "github.com/docker/docker/api/types/swarm"
  20. "github.com/docker/docker/client"
  21. "github.com/docker/docker/integration-cli/checker"
  22. "github.com/docker/docker/integration-cli/daemon"
  23. testdaemon "github.com/docker/docker/internal/test/daemon"
  24. "github.com/docker/docker/internal/test/request"
  25. "github.com/docker/swarmkit/ca"
  26. "github.com/go-check/check"
  27. "gotest.tools/assert"
  28. is "gotest.tools/assert/cmp"
  29. )
  30. var defaultReconciliationTimeout = 30 * time.Second
  31. func (s *DockerSwarmSuite) TestAPISwarmInit(c *check.C) {
  32. // todo: should find a better way to verify that components are running than /info
  33. d1 := s.AddDaemon(c, true, true)
  34. info := d1.SwarmInfo(c)
  35. c.Assert(info.ControlAvailable, checker.True)
  36. c.Assert(info.LocalNodeState, checker.Equals, swarm.LocalNodeStateActive)
  37. c.Assert(info.Cluster.RootRotationInProgress, checker.False)
  38. d2 := s.AddDaemon(c, true, false)
  39. info = d2.SwarmInfo(c)
  40. c.Assert(info.ControlAvailable, checker.False)
  41. c.Assert(info.LocalNodeState, checker.Equals, swarm.LocalNodeStateActive)
  42. // Leaving cluster
  43. c.Assert(d2.SwarmLeave(false), checker.IsNil)
  44. info = d2.SwarmInfo(c)
  45. c.Assert(info.ControlAvailable, checker.False)
  46. c.Assert(info.LocalNodeState, checker.Equals, swarm.LocalNodeStateInactive)
  47. d2.SwarmJoin(c, swarm.JoinRequest{
  48. ListenAddr: d1.SwarmListenAddr(),
  49. JoinToken: d1.JoinTokens(c).Worker,
  50. RemoteAddrs: []string{d1.SwarmListenAddr()},
  51. })
  52. info = d2.SwarmInfo(c)
  53. c.Assert(info.ControlAvailable, checker.False)
  54. c.Assert(info.LocalNodeState, checker.Equals, swarm.LocalNodeStateActive)
  55. // Current state restoring after restarts
  56. d1.Stop(c)
  57. d2.Stop(c)
  58. d1.Start(c)
  59. d2.Start(c)
  60. info = d1.SwarmInfo(c)
  61. c.Assert(info.ControlAvailable, checker.True)
  62. c.Assert(info.LocalNodeState, checker.Equals, swarm.LocalNodeStateActive)
  63. info = d2.SwarmInfo(c)
  64. c.Assert(info.ControlAvailable, checker.False)
  65. c.Assert(info.LocalNodeState, checker.Equals, swarm.LocalNodeStateActive)
  66. }
  67. func (s *DockerSwarmSuite) TestAPISwarmJoinToken(c *check.C) {
  68. d1 := s.AddDaemon(c, false, false)
  69. d1.SwarmInit(c, swarm.InitRequest{})
  70. // todo: error message differs depending if some components of token are valid
  71. d2 := s.AddDaemon(c, false, false)
  72. c2 := d2.NewClientT(c)
  73. err := c2.SwarmJoin(context.Background(), swarm.JoinRequest{
  74. ListenAddr: d2.SwarmListenAddr(),
  75. RemoteAddrs: []string{d1.SwarmListenAddr()},
  76. })
  77. c.Assert(err, checker.NotNil)
  78. c.Assert(err.Error(), checker.Contains, "join token is necessary")
  79. info := d2.SwarmInfo(c)
  80. c.Assert(info.LocalNodeState, checker.Equals, swarm.LocalNodeStateInactive)
  81. err = c2.SwarmJoin(context.Background(), swarm.JoinRequest{
  82. ListenAddr: d2.SwarmListenAddr(),
  83. JoinToken: "foobaz",
  84. RemoteAddrs: []string{d1.SwarmListenAddr()},
  85. })
  86. c.Assert(err, checker.NotNil)
  87. c.Assert(err.Error(), checker.Contains, "invalid join token")
  88. info = d2.SwarmInfo(c)
  89. c.Assert(info.LocalNodeState, checker.Equals, swarm.LocalNodeStateInactive)
  90. workerToken := d1.JoinTokens(c).Worker
  91. d2.SwarmJoin(c, swarm.JoinRequest{
  92. ListenAddr: d2.SwarmListenAddr(),
  93. JoinToken: workerToken,
  94. RemoteAddrs: []string{d1.SwarmListenAddr()},
  95. })
  96. info = d2.SwarmInfo(c)
  97. c.Assert(info.LocalNodeState, checker.Equals, swarm.LocalNodeStateActive)
  98. c.Assert(d2.SwarmLeave(false), checker.IsNil)
  99. info = d2.SwarmInfo(c)
  100. c.Assert(info.LocalNodeState, checker.Equals, swarm.LocalNodeStateInactive)
  101. // change tokens
  102. d1.RotateTokens(c)
  103. err = c2.SwarmJoin(context.Background(), swarm.JoinRequest{
  104. ListenAddr: d2.SwarmListenAddr(),
  105. JoinToken: workerToken,
  106. RemoteAddrs: []string{d1.SwarmListenAddr()},
  107. })
  108. c.Assert(err, checker.NotNil)
  109. c.Assert(err.Error(), checker.Contains, "join token is necessary")
  110. info = d2.SwarmInfo(c)
  111. c.Assert(info.LocalNodeState, checker.Equals, swarm.LocalNodeStateInactive)
  112. workerToken = d1.JoinTokens(c).Worker
  113. d2.SwarmJoin(c, swarm.JoinRequest{JoinToken: workerToken, RemoteAddrs: []string{d1.SwarmListenAddr()}})
  114. info = d2.SwarmInfo(c)
  115. c.Assert(info.LocalNodeState, checker.Equals, swarm.LocalNodeStateActive)
  116. c.Assert(d2.SwarmLeave(false), checker.IsNil)
  117. info = d2.SwarmInfo(c)
  118. c.Assert(info.LocalNodeState, checker.Equals, swarm.LocalNodeStateInactive)
  119. // change spec, don't change tokens
  120. d1.UpdateSwarm(c, func(s *swarm.Spec) {})
  121. err = c2.SwarmJoin(context.Background(), swarm.JoinRequest{
  122. ListenAddr: d2.SwarmListenAddr(),
  123. RemoteAddrs: []string{d1.SwarmListenAddr()},
  124. })
  125. c.Assert(err, checker.NotNil)
  126. c.Assert(err.Error(), checker.Contains, "join token is necessary")
  127. info = d2.SwarmInfo(c)
  128. c.Assert(info.LocalNodeState, checker.Equals, swarm.LocalNodeStateInactive)
  129. d2.SwarmJoin(c, swarm.JoinRequest{JoinToken: workerToken, RemoteAddrs: []string{d1.SwarmListenAddr()}})
  130. info = d2.SwarmInfo(c)
  131. c.Assert(info.LocalNodeState, checker.Equals, swarm.LocalNodeStateActive)
  132. c.Assert(d2.SwarmLeave(false), checker.IsNil)
  133. info = d2.SwarmInfo(c)
  134. c.Assert(info.LocalNodeState, checker.Equals, swarm.LocalNodeStateInactive)
  135. }
  136. func (s *DockerSwarmSuite) TestUpdateSwarmAddExternalCA(c *check.C) {
  137. d1 := s.AddDaemon(c, false, false)
  138. d1.SwarmInit(c, swarm.InitRequest{})
  139. d1.UpdateSwarm(c, func(s *swarm.Spec) {
  140. s.CAConfig.ExternalCAs = []*swarm.ExternalCA{
  141. {
  142. Protocol: swarm.ExternalCAProtocolCFSSL,
  143. URL: "https://thishasnoca.org",
  144. },
  145. {
  146. Protocol: swarm.ExternalCAProtocolCFSSL,
  147. URL: "https://thishasacacert.org",
  148. CACert: "cacert",
  149. },
  150. }
  151. })
  152. info := d1.SwarmInfo(c)
  153. c.Assert(info.Cluster.Spec.CAConfig.ExternalCAs, checker.HasLen, 2)
  154. c.Assert(info.Cluster.Spec.CAConfig.ExternalCAs[0].CACert, checker.Equals, "")
  155. c.Assert(info.Cluster.Spec.CAConfig.ExternalCAs[1].CACert, checker.Equals, "cacert")
  156. }
  157. func (s *DockerSwarmSuite) TestAPISwarmCAHash(c *check.C) {
  158. d1 := s.AddDaemon(c, true, true)
  159. d2 := s.AddDaemon(c, false, false)
  160. splitToken := strings.Split(d1.JoinTokens(c).Worker, "-")
  161. splitToken[2] = "1kxftv4ofnc6mt30lmgipg6ngf9luhwqopfk1tz6bdmnkubg0e"
  162. replacementToken := strings.Join(splitToken, "-")
  163. c2 := d2.NewClientT(c)
  164. err := c2.SwarmJoin(context.Background(), swarm.JoinRequest{
  165. ListenAddr: d2.SwarmListenAddr(),
  166. JoinToken: replacementToken,
  167. RemoteAddrs: []string{d1.SwarmListenAddr()},
  168. })
  169. c.Assert(err, checker.NotNil)
  170. c.Assert(err.Error(), checker.Contains, "remote CA does not match fingerprint")
  171. }
  172. func (s *DockerSwarmSuite) TestAPISwarmPromoteDemote(c *check.C) {
  173. d1 := s.AddDaemon(c, false, false)
  174. d1.SwarmInit(c, swarm.InitRequest{})
  175. d2 := s.AddDaemon(c, true, false)
  176. info := d2.SwarmInfo(c)
  177. c.Assert(info.ControlAvailable, checker.False)
  178. c.Assert(info.LocalNodeState, checker.Equals, swarm.LocalNodeStateActive)
  179. d1.UpdateNode(c, d2.NodeID(), func(n *swarm.Node) {
  180. n.Spec.Role = swarm.NodeRoleManager
  181. })
  182. waitAndAssert(c, defaultReconciliationTimeout, d2.CheckControlAvailable, checker.True)
  183. d1.UpdateNode(c, d2.NodeID(), func(n *swarm.Node) {
  184. n.Spec.Role = swarm.NodeRoleWorker
  185. })
  186. waitAndAssert(c, defaultReconciliationTimeout, d2.CheckControlAvailable, checker.False)
  187. // Wait for the role to change to worker in the cert. This is partially
  188. // done because it's something worth testing in its own right, and
  189. // partially because changing the role from manager to worker and then
  190. // back to manager quickly might cause the node to pause for awhile
  191. // while waiting for the role to change to worker, and the test can
  192. // time out during this interval.
  193. waitAndAssert(c, defaultReconciliationTimeout, func(c *check.C) (interface{}, check.CommentInterface) {
  194. certBytes, err := ioutil.ReadFile(filepath.Join(d2.Folder, "root", "swarm", "certificates", "swarm-node.crt"))
  195. if err != nil {
  196. return "", check.Commentf("error: %v", err)
  197. }
  198. certs, err := helpers.ParseCertificatesPEM(certBytes)
  199. if err == nil && len(certs) > 0 && len(certs[0].Subject.OrganizationalUnit) > 0 {
  200. return certs[0].Subject.OrganizationalUnit[0], nil
  201. }
  202. return "", check.Commentf("could not get organizational unit from certificate")
  203. }, checker.Equals, "swarm-worker")
  204. // Demoting last node should fail
  205. node := d1.GetNode(c, d1.NodeID())
  206. node.Spec.Role = swarm.NodeRoleWorker
  207. url := fmt.Sprintf("/nodes/%s/update?version=%d", node.ID, node.Version.Index)
  208. res, body, err := request.Post(url, request.Host(d1.Sock()), request.JSONBody(node.Spec))
  209. c.Assert(err, checker.IsNil)
  210. b, err := request.ReadBody(body)
  211. c.Assert(err, checker.IsNil)
  212. c.Assert(res.StatusCode, checker.Equals, http.StatusBadRequest, check.Commentf("output: %q", string(b)))
  213. // The warning specific to demoting the last manager is best-effort and
  214. // won't appear until the Role field of the demoted manager has been
  215. // updated.
  216. // Yes, I know this looks silly, but checker.Matches is broken, since
  217. // it anchors the regexp contrary to the documentation, and this makes
  218. // it impossible to match something that includes a line break.
  219. if !strings.Contains(string(b), "last manager of the swarm") {
  220. c.Assert(string(b), checker.Contains, "this would result in a loss of quorum")
  221. }
  222. info = d1.SwarmInfo(c)
  223. c.Assert(info.LocalNodeState, checker.Equals, swarm.LocalNodeStateActive)
  224. c.Assert(info.ControlAvailable, checker.True)
  225. // Promote already demoted node
  226. d1.UpdateNode(c, d2.NodeID(), func(n *swarm.Node) {
  227. n.Spec.Role = swarm.NodeRoleManager
  228. })
  229. waitAndAssert(c, defaultReconciliationTimeout, d2.CheckControlAvailable, checker.True)
  230. }
  231. func (s *DockerSwarmSuite) TestAPISwarmLeaderProxy(c *check.C) {
  232. // add three managers, one of these is leader
  233. d1 := s.AddDaemon(c, true, true)
  234. d2 := s.AddDaemon(c, true, true)
  235. d3 := s.AddDaemon(c, true, true)
  236. // start a service by hitting each of the 3 managers
  237. d1.CreateService(c, simpleTestService, func(s *swarm.Service) {
  238. s.Spec.Name = "test1"
  239. })
  240. d2.CreateService(c, simpleTestService, func(s *swarm.Service) {
  241. s.Spec.Name = "test2"
  242. })
  243. d3.CreateService(c, simpleTestService, func(s *swarm.Service) {
  244. s.Spec.Name = "test3"
  245. })
  246. // 3 services should be started now, because the requests were proxied to leader
  247. // query each node and make sure it returns 3 services
  248. for _, d := range []*daemon.Daemon{d1, d2, d3} {
  249. services := d.ListServices(c)
  250. c.Assert(services, checker.HasLen, 3)
  251. }
  252. }
  253. func (s *DockerSwarmSuite) TestAPISwarmLeaderElection(c *check.C) {
  254. if runtime.GOARCH == "s390x" {
  255. c.Skip("Disabled on s390x")
  256. }
  257. if runtime.GOARCH == "ppc64le" {
  258. c.Skip("Disabled on ppc64le")
  259. }
  260. // Create 3 nodes
  261. d1 := s.AddDaemon(c, true, true)
  262. d2 := s.AddDaemon(c, true, true)
  263. d3 := s.AddDaemon(c, true, true)
  264. // assert that the first node we made is the leader, and the other two are followers
  265. c.Assert(d1.GetNode(c, d1.NodeID()).ManagerStatus.Leader, checker.True)
  266. c.Assert(d1.GetNode(c, d2.NodeID()).ManagerStatus.Leader, checker.False)
  267. c.Assert(d1.GetNode(c, d3.NodeID()).ManagerStatus.Leader, checker.False)
  268. d1.Stop(c)
  269. var (
  270. leader *daemon.Daemon // keep track of leader
  271. followers []*daemon.Daemon // keep track of followers
  272. )
  273. checkLeader := func(nodes ...*daemon.Daemon) checkF {
  274. return func(c *check.C) (interface{}, check.CommentInterface) {
  275. // clear these out before each run
  276. leader = nil
  277. followers = nil
  278. for _, d := range nodes {
  279. if d.GetNode(c, d.NodeID()).ManagerStatus.Leader {
  280. leader = d
  281. } else {
  282. followers = append(followers, d)
  283. }
  284. }
  285. if leader == nil {
  286. return false, check.Commentf("no leader elected")
  287. }
  288. return true, check.Commentf("elected %v", leader.ID())
  289. }
  290. }
  291. // wait for an election to occur
  292. waitAndAssert(c, defaultReconciliationTimeout, checkLeader(d2, d3), checker.True)
  293. // assert that we have a new leader
  294. c.Assert(leader, checker.NotNil)
  295. // Keep track of the current leader, since we want that to be chosen.
  296. stableleader := leader
  297. // add the d1, the initial leader, back
  298. d1.Start(c)
  299. // TODO(stevvooe): may need to wait for rejoin here
  300. // wait for possible election
  301. waitAndAssert(c, defaultReconciliationTimeout, checkLeader(d1, d2, d3), checker.True)
  302. // pick out the leader and the followers again
  303. // verify that we still only have 1 leader and 2 followers
  304. c.Assert(leader, checker.NotNil)
  305. c.Assert(followers, checker.HasLen, 2)
  306. // and that after we added d1 back, the leader hasn't changed
  307. c.Assert(leader.NodeID(), checker.Equals, stableleader.NodeID())
  308. }
  309. func (s *DockerSwarmSuite) TestAPISwarmRaftQuorum(c *check.C) {
  310. if runtime.GOARCH == "s390x" {
  311. c.Skip("Disabled on s390x")
  312. }
  313. if runtime.GOARCH == "ppc64le" {
  314. c.Skip("Disabled on ppc64le")
  315. }
  316. d1 := s.AddDaemon(c, true, true)
  317. d2 := s.AddDaemon(c, true, true)
  318. d3 := s.AddDaemon(c, true, true)
  319. d1.CreateService(c, simpleTestService)
  320. d2.Stop(c)
  321. // make sure there is a leader
  322. waitAndAssert(c, defaultReconciliationTimeout, d1.CheckLeader, checker.IsNil)
  323. d1.CreateService(c, simpleTestService, func(s *swarm.Service) {
  324. s.Spec.Name = "top1"
  325. })
  326. d3.Stop(c)
  327. var service swarm.Service
  328. simpleTestService(&service)
  329. service.Spec.Name = "top2"
  330. cli := d1.NewClientT(c)
  331. defer cli.Close()
  332. // d1 will eventually step down from leader because there is no longer an active quorum, wait for that to happen
  333. waitAndAssert(c, defaultReconciliationTimeout, func(c *check.C) (interface{}, check.CommentInterface) {
  334. _, err := cli.ServiceCreate(context.Background(), service.Spec, types.ServiceCreateOptions{})
  335. return err.Error(), nil
  336. }, checker.Contains, "Make sure more than half of the managers are online.")
  337. d2.Start(c)
  338. // make sure there is a leader
  339. waitAndAssert(c, defaultReconciliationTimeout, d1.CheckLeader, checker.IsNil)
  340. d1.CreateService(c, simpleTestService, func(s *swarm.Service) {
  341. s.Spec.Name = "top3"
  342. })
  343. }
  344. func (s *DockerSwarmSuite) TestAPISwarmLeaveRemovesContainer(c *check.C) {
  345. d := s.AddDaemon(c, true, true)
  346. instances := 2
  347. d.CreateService(c, simpleTestService, setInstances(instances))
  348. id, err := d.Cmd("run", "-d", "busybox", "top")
  349. c.Assert(err, checker.IsNil, check.Commentf("%s", id))
  350. id = strings.TrimSpace(id)
  351. waitAndAssert(c, defaultReconciliationTimeout, d.CheckActiveContainerCount, checker.Equals, instances+1)
  352. c.Assert(d.SwarmLeave(false), checker.NotNil)
  353. c.Assert(d.SwarmLeave(true), checker.IsNil)
  354. waitAndAssert(c, defaultReconciliationTimeout, d.CheckActiveContainerCount, checker.Equals, 1)
  355. id2, err := d.Cmd("ps", "-q")
  356. c.Assert(err, checker.IsNil, check.Commentf("%s", id2))
  357. c.Assert(id, checker.HasPrefix, strings.TrimSpace(id2))
  358. }
  359. // #23629
  360. func (s *DockerSwarmSuite) TestAPISwarmLeaveOnPendingJoin(c *check.C) {
  361. testRequires(c, Network)
  362. s.AddDaemon(c, true, true)
  363. d2 := s.AddDaemon(c, false, false)
  364. id, err := d2.Cmd("run", "-d", "busybox", "top")
  365. c.Assert(err, checker.IsNil, check.Commentf("%s", id))
  366. id = strings.TrimSpace(id)
  367. c2 := d2.NewClientT(c)
  368. err = c2.SwarmJoin(context.Background(), swarm.JoinRequest{
  369. ListenAddr: d2.SwarmListenAddr(),
  370. RemoteAddrs: []string{"123.123.123.123:1234"},
  371. })
  372. c.Assert(err, check.NotNil)
  373. c.Assert(err.Error(), checker.Contains, "Timeout was reached")
  374. info := d2.SwarmInfo(c)
  375. c.Assert(info.LocalNodeState, checker.Equals, swarm.LocalNodeStatePending)
  376. c.Assert(d2.SwarmLeave(true), checker.IsNil)
  377. waitAndAssert(c, defaultReconciliationTimeout, d2.CheckActiveContainerCount, checker.Equals, 1)
  378. id2, err := d2.Cmd("ps", "-q")
  379. c.Assert(err, checker.IsNil, check.Commentf("%s", id2))
  380. c.Assert(id, checker.HasPrefix, strings.TrimSpace(id2))
  381. }
  382. // #23705
  383. func (s *DockerSwarmSuite) TestAPISwarmRestoreOnPendingJoin(c *check.C) {
  384. testRequires(c, Network)
  385. d := s.AddDaemon(c, false, false)
  386. client := d.NewClientT(c)
  387. err := client.SwarmJoin(context.Background(), swarm.JoinRequest{
  388. ListenAddr: d.SwarmListenAddr(),
  389. RemoteAddrs: []string{"123.123.123.123:1234"},
  390. })
  391. c.Assert(err, check.NotNil)
  392. c.Assert(err.Error(), checker.Contains, "Timeout was reached")
  393. waitAndAssert(c, defaultReconciliationTimeout, d.CheckLocalNodeState, checker.Equals, swarm.LocalNodeStatePending)
  394. d.Stop(c)
  395. d.Start(c)
  396. info := d.SwarmInfo(c)
  397. c.Assert(info.LocalNodeState, checker.Equals, swarm.LocalNodeStateInactive)
  398. }
  399. func (s *DockerSwarmSuite) TestAPISwarmManagerRestore(c *check.C) {
  400. d1 := s.AddDaemon(c, true, true)
  401. instances := 2
  402. id := d1.CreateService(c, simpleTestService, setInstances(instances))
  403. d1.GetService(c, id)
  404. d1.Stop(c)
  405. d1.Start(c)
  406. d1.GetService(c, id)
  407. d2 := s.AddDaemon(c, true, true)
  408. d2.GetService(c, id)
  409. d2.Stop(c)
  410. d2.Start(c)
  411. d2.GetService(c, id)
  412. d3 := s.AddDaemon(c, true, true)
  413. d3.GetService(c, id)
  414. d3.Stop(c)
  415. d3.Start(c)
  416. d3.GetService(c, id)
  417. d3.Kill()
  418. time.Sleep(1 * time.Second) // time to handle signal
  419. d3.Start(c)
  420. d3.GetService(c, id)
  421. }
  422. func (s *DockerSwarmSuite) TestAPISwarmScaleNoRollingUpdate(c *check.C) {
  423. d := s.AddDaemon(c, true, true)
  424. instances := 2
  425. id := d.CreateService(c, simpleTestService, setInstances(instances))
  426. waitAndAssert(c, defaultReconciliationTimeout, d.CheckActiveContainerCount, checker.Equals, instances)
  427. containers := d.ActiveContainers(c)
  428. instances = 4
  429. d.UpdateService(c, d.GetService(c, id), setInstances(instances))
  430. waitAndAssert(c, defaultReconciliationTimeout, d.CheckActiveContainerCount, checker.Equals, instances)
  431. containers2 := d.ActiveContainers(c)
  432. loop0:
  433. for _, c1 := range containers {
  434. for _, c2 := range containers2 {
  435. if c1 == c2 {
  436. continue loop0
  437. }
  438. }
  439. c.Errorf("container %v not found in new set %#v", c1, containers2)
  440. }
  441. }
  442. func (s *DockerSwarmSuite) TestAPISwarmInvalidAddress(c *check.C) {
  443. d := s.AddDaemon(c, false, false)
  444. req := swarm.InitRequest{
  445. ListenAddr: "",
  446. }
  447. res, _, err := request.Post("/swarm/init", request.Host(d.Sock()), request.JSONBody(req))
  448. c.Assert(err, checker.IsNil)
  449. c.Assert(res.StatusCode, checker.Equals, http.StatusBadRequest)
  450. req2 := swarm.JoinRequest{
  451. ListenAddr: "0.0.0.0:2377",
  452. RemoteAddrs: []string{""},
  453. }
  454. res, _, err = request.Post("/swarm/join", request.Host(d.Sock()), request.JSONBody(req2))
  455. c.Assert(err, checker.IsNil)
  456. c.Assert(res.StatusCode, checker.Equals, http.StatusBadRequest)
  457. }
  458. func (s *DockerSwarmSuite) TestAPISwarmForceNewCluster(c *check.C) {
  459. d1 := s.AddDaemon(c, true, true)
  460. d2 := s.AddDaemon(c, true, true)
  461. instances := 2
  462. id := d1.CreateService(c, simpleTestService, setInstances(instances))
  463. waitAndAssert(c, defaultReconciliationTimeout, reducedCheck(sumAsIntegers, d1.CheckActiveContainerCount, d2.CheckActiveContainerCount), checker.Equals, instances)
  464. // drain d2, all containers should move to d1
  465. d1.UpdateNode(c, d2.NodeID(), func(n *swarm.Node) {
  466. n.Spec.Availability = swarm.NodeAvailabilityDrain
  467. })
  468. waitAndAssert(c, defaultReconciliationTimeout, d1.CheckActiveContainerCount, checker.Equals, instances)
  469. waitAndAssert(c, defaultReconciliationTimeout, d2.CheckActiveContainerCount, checker.Equals, 0)
  470. d2.Stop(c)
  471. d1.SwarmInit(c, swarm.InitRequest{
  472. ForceNewCluster: true,
  473. Spec: swarm.Spec{},
  474. })
  475. waitAndAssert(c, defaultReconciliationTimeout, d1.CheckActiveContainerCount, checker.Equals, instances)
  476. d3 := s.AddDaemon(c, true, true)
  477. info := d3.SwarmInfo(c)
  478. c.Assert(info.ControlAvailable, checker.True)
  479. c.Assert(info.LocalNodeState, checker.Equals, swarm.LocalNodeStateActive)
  480. instances = 4
  481. d3.UpdateService(c, d3.GetService(c, id), setInstances(instances))
  482. waitAndAssert(c, defaultReconciliationTimeout, reducedCheck(sumAsIntegers, d1.CheckActiveContainerCount, d3.CheckActiveContainerCount), checker.Equals, instances)
  483. }
  484. func simpleTestService(s *swarm.Service) {
  485. ureplicas := uint64(1)
  486. restartDelay := time.Duration(100 * time.Millisecond)
  487. s.Spec = swarm.ServiceSpec{
  488. TaskTemplate: swarm.TaskSpec{
  489. ContainerSpec: &swarm.ContainerSpec{
  490. Image: "busybox:latest",
  491. Command: []string{"/bin/top"},
  492. },
  493. RestartPolicy: &swarm.RestartPolicy{
  494. Delay: &restartDelay,
  495. },
  496. },
  497. Mode: swarm.ServiceMode{
  498. Replicated: &swarm.ReplicatedService{
  499. Replicas: &ureplicas,
  500. },
  501. },
  502. }
  503. s.Spec.Name = "top"
  504. }
  505. func serviceForUpdate(s *swarm.Service) {
  506. ureplicas := uint64(1)
  507. restartDelay := time.Duration(100 * time.Millisecond)
  508. s.Spec = swarm.ServiceSpec{
  509. TaskTemplate: swarm.TaskSpec{
  510. ContainerSpec: &swarm.ContainerSpec{
  511. Image: "busybox:latest",
  512. Command: []string{"/bin/top"},
  513. },
  514. RestartPolicy: &swarm.RestartPolicy{
  515. Delay: &restartDelay,
  516. },
  517. },
  518. Mode: swarm.ServiceMode{
  519. Replicated: &swarm.ReplicatedService{
  520. Replicas: &ureplicas,
  521. },
  522. },
  523. UpdateConfig: &swarm.UpdateConfig{
  524. Parallelism: 2,
  525. Delay: 4 * time.Second,
  526. FailureAction: swarm.UpdateFailureActionContinue,
  527. },
  528. RollbackConfig: &swarm.UpdateConfig{
  529. Parallelism: 3,
  530. Delay: 4 * time.Second,
  531. FailureAction: swarm.UpdateFailureActionContinue,
  532. },
  533. }
  534. s.Spec.Name = "updatetest"
  535. }
  536. func setInstances(replicas int) testdaemon.ServiceConstructor {
  537. ureplicas := uint64(replicas)
  538. return func(s *swarm.Service) {
  539. s.Spec.Mode = swarm.ServiceMode{
  540. Replicated: &swarm.ReplicatedService{
  541. Replicas: &ureplicas,
  542. },
  543. }
  544. }
  545. }
  546. func setUpdateOrder(order string) testdaemon.ServiceConstructor {
  547. return func(s *swarm.Service) {
  548. if s.Spec.UpdateConfig == nil {
  549. s.Spec.UpdateConfig = &swarm.UpdateConfig{}
  550. }
  551. s.Spec.UpdateConfig.Order = order
  552. }
  553. }
  554. func setRollbackOrder(order string) testdaemon.ServiceConstructor {
  555. return func(s *swarm.Service) {
  556. if s.Spec.RollbackConfig == nil {
  557. s.Spec.RollbackConfig = &swarm.UpdateConfig{}
  558. }
  559. s.Spec.RollbackConfig.Order = order
  560. }
  561. }
  562. func setImage(image string) testdaemon.ServiceConstructor {
  563. return func(s *swarm.Service) {
  564. if s.Spec.TaskTemplate.ContainerSpec == nil {
  565. s.Spec.TaskTemplate.ContainerSpec = &swarm.ContainerSpec{}
  566. }
  567. s.Spec.TaskTemplate.ContainerSpec.Image = image
  568. }
  569. }
  570. func setFailureAction(failureAction string) testdaemon.ServiceConstructor {
  571. return func(s *swarm.Service) {
  572. s.Spec.UpdateConfig.FailureAction = failureAction
  573. }
  574. }
  575. func setMaxFailureRatio(maxFailureRatio float32) testdaemon.ServiceConstructor {
  576. return func(s *swarm.Service) {
  577. s.Spec.UpdateConfig.MaxFailureRatio = maxFailureRatio
  578. }
  579. }
  580. func setParallelism(parallelism uint64) testdaemon.ServiceConstructor {
  581. return func(s *swarm.Service) {
  582. s.Spec.UpdateConfig.Parallelism = parallelism
  583. }
  584. }
  585. func setConstraints(constraints []string) testdaemon.ServiceConstructor {
  586. return func(s *swarm.Service) {
  587. if s.Spec.TaskTemplate.Placement == nil {
  588. s.Spec.TaskTemplate.Placement = &swarm.Placement{}
  589. }
  590. s.Spec.TaskTemplate.Placement.Constraints = constraints
  591. }
  592. }
  593. func setPlacementPrefs(prefs []swarm.PlacementPreference) testdaemon.ServiceConstructor {
  594. return func(s *swarm.Service) {
  595. if s.Spec.TaskTemplate.Placement == nil {
  596. s.Spec.TaskTemplate.Placement = &swarm.Placement{}
  597. }
  598. s.Spec.TaskTemplate.Placement.Preferences = prefs
  599. }
  600. }
  601. func setGlobalMode(s *swarm.Service) {
  602. s.Spec.Mode = swarm.ServiceMode{
  603. Global: &swarm.GlobalService{},
  604. }
  605. }
  606. func checkClusterHealth(c *check.C, cl []*daemon.Daemon, managerCount, workerCount int) {
  607. var totalMCount, totalWCount int
  608. for _, d := range cl {
  609. var (
  610. info swarm.Info
  611. )
  612. // check info in a waitAndAssert, because if the cluster doesn't have a leader, `info` will return an error
  613. checkInfo := func(c *check.C) (interface{}, check.CommentInterface) {
  614. client := d.NewClientT(c)
  615. daemonInfo, err := client.Info(context.Background())
  616. info = daemonInfo.Swarm
  617. return err, check.Commentf("cluster not ready in time")
  618. }
  619. waitAndAssert(c, defaultReconciliationTimeout, checkInfo, checker.IsNil)
  620. if !info.ControlAvailable {
  621. totalWCount++
  622. continue
  623. }
  624. var leaderFound bool
  625. totalMCount++
  626. var mCount, wCount int
  627. for _, n := range d.ListNodes(c) {
  628. waitReady := func(c *check.C) (interface{}, check.CommentInterface) {
  629. if n.Status.State == swarm.NodeStateReady {
  630. return true, nil
  631. }
  632. nn := d.GetNode(c, n.ID)
  633. n = *nn
  634. return n.Status.State == swarm.NodeStateReady, check.Commentf("state of node %s, reported by %s", n.ID, d.NodeID())
  635. }
  636. waitAndAssert(c, defaultReconciliationTimeout, waitReady, checker.True)
  637. waitActive := func(c *check.C) (interface{}, check.CommentInterface) {
  638. if n.Spec.Availability == swarm.NodeAvailabilityActive {
  639. return true, nil
  640. }
  641. nn := d.GetNode(c, n.ID)
  642. n = *nn
  643. return n.Spec.Availability == swarm.NodeAvailabilityActive, check.Commentf("availability of node %s, reported by %s", n.ID, d.NodeID())
  644. }
  645. waitAndAssert(c, defaultReconciliationTimeout, waitActive, checker.True)
  646. if n.Spec.Role == swarm.NodeRoleManager {
  647. c.Assert(n.ManagerStatus, checker.NotNil, check.Commentf("manager status of node %s (manager), reported by %s", n.ID, d.NodeID()))
  648. if n.ManagerStatus.Leader {
  649. leaderFound = true
  650. }
  651. mCount++
  652. } else {
  653. c.Assert(n.ManagerStatus, checker.IsNil, check.Commentf("manager status of node %s (worker), reported by %s", n.ID, d.NodeID()))
  654. wCount++
  655. }
  656. }
  657. c.Assert(leaderFound, checker.True, check.Commentf("lack of leader reported by node %s", info.NodeID))
  658. c.Assert(mCount, checker.Equals, managerCount, check.Commentf("managers count reported by node %s", info.NodeID))
  659. c.Assert(wCount, checker.Equals, workerCount, check.Commentf("workers count reported by node %s", info.NodeID))
  660. }
  661. c.Assert(totalMCount, checker.Equals, managerCount)
  662. c.Assert(totalWCount, checker.Equals, workerCount)
  663. }
  664. func (s *DockerSwarmSuite) TestAPISwarmRestartCluster(c *check.C) {
  665. mCount, wCount := 5, 1
  666. var nodes []*daemon.Daemon
  667. for i := 0; i < mCount; i++ {
  668. manager := s.AddDaemon(c, true, true)
  669. info := manager.SwarmInfo(c)
  670. c.Assert(info.ControlAvailable, checker.True)
  671. c.Assert(info.LocalNodeState, checker.Equals, swarm.LocalNodeStateActive)
  672. nodes = append(nodes, manager)
  673. }
  674. for i := 0; i < wCount; i++ {
  675. worker := s.AddDaemon(c, true, false)
  676. info := worker.SwarmInfo(c)
  677. c.Assert(info.ControlAvailable, checker.False)
  678. c.Assert(info.LocalNodeState, checker.Equals, swarm.LocalNodeStateActive)
  679. nodes = append(nodes, worker)
  680. }
  681. // stop whole cluster
  682. {
  683. var wg sync.WaitGroup
  684. wg.Add(len(nodes))
  685. errs := make(chan error, len(nodes))
  686. for _, d := range nodes {
  687. go func(daemon *daemon.Daemon) {
  688. defer wg.Done()
  689. if err := daemon.StopWithError(); err != nil {
  690. errs <- err
  691. }
  692. }(d)
  693. }
  694. wg.Wait()
  695. close(errs)
  696. for err := range errs {
  697. c.Assert(err, check.IsNil)
  698. }
  699. }
  700. // start whole cluster
  701. {
  702. var wg sync.WaitGroup
  703. wg.Add(len(nodes))
  704. errs := make(chan error, len(nodes))
  705. for _, d := range nodes {
  706. go func(daemon *daemon.Daemon) {
  707. defer wg.Done()
  708. if err := daemon.StartWithError("--iptables=false"); err != nil {
  709. errs <- err
  710. }
  711. }(d)
  712. }
  713. wg.Wait()
  714. close(errs)
  715. for err := range errs {
  716. c.Assert(err, check.IsNil)
  717. }
  718. }
  719. checkClusterHealth(c, nodes, mCount, wCount)
  720. }
  721. func (s *DockerSwarmSuite) TestAPISwarmServicesUpdateWithName(c *check.C) {
  722. d := s.AddDaemon(c, true, true)
  723. instances := 2
  724. id := d.CreateService(c, simpleTestService, setInstances(instances))
  725. waitAndAssert(c, defaultReconciliationTimeout, d.CheckActiveContainerCount, checker.Equals, instances)
  726. service := d.GetService(c, id)
  727. instances = 5
  728. setInstances(instances)(service)
  729. cli := d.NewClientT(c)
  730. defer cli.Close()
  731. _, err := cli.ServiceUpdate(context.Background(), service.Spec.Name, service.Version, service.Spec, types.ServiceUpdateOptions{})
  732. c.Assert(err, checker.IsNil)
  733. waitAndAssert(c, defaultReconciliationTimeout, d.CheckActiveContainerCount, checker.Equals, instances)
  734. }
  735. // Unlocking an unlocked swarm results in an error
  736. func (s *DockerSwarmSuite) TestAPISwarmUnlockNotLocked(c *check.C) {
  737. d := s.AddDaemon(c, true, true)
  738. err := d.SwarmUnlock(swarm.UnlockRequest{UnlockKey: "wrong-key"})
  739. c.Assert(err, checker.NotNil)
  740. c.Assert(err.Error(), checker.Contains, "swarm is not locked")
  741. }
  742. // #29885
  743. func (s *DockerSwarmSuite) TestAPISwarmErrorHandling(c *check.C) {
  744. ln, err := net.Listen("tcp", fmt.Sprintf(":%d", defaultSwarmPort))
  745. c.Assert(err, checker.IsNil)
  746. defer ln.Close()
  747. d := s.AddDaemon(c, false, false)
  748. client := d.NewClientT(c)
  749. _, err = client.SwarmInit(context.Background(), swarm.InitRequest{
  750. ListenAddr: d.SwarmListenAddr(),
  751. })
  752. c.Assert(err, checker.NotNil)
  753. c.Assert(err.Error(), checker.Contains, "address already in use")
  754. }
  755. // Test case for 30242, where duplicate networks, with different drivers `bridge` and `overlay`,
  756. // caused both scopes to be `swarm` for `docker network inspect` and `docker network ls`.
  757. // This test makes sure the fixes correctly output scopes instead.
  758. func (s *DockerSwarmSuite) TestAPIDuplicateNetworks(c *check.C) {
  759. d := s.AddDaemon(c, true, true)
  760. cli := d.NewClientT(c)
  761. defer cli.Close()
  762. name := "foo"
  763. networkCreate := types.NetworkCreate{
  764. CheckDuplicate: false,
  765. }
  766. networkCreate.Driver = "bridge"
  767. n1, err := cli.NetworkCreate(context.Background(), name, networkCreate)
  768. c.Assert(err, checker.IsNil)
  769. networkCreate.Driver = "overlay"
  770. n2, err := cli.NetworkCreate(context.Background(), name, networkCreate)
  771. c.Assert(err, checker.IsNil)
  772. r1, err := cli.NetworkInspect(context.Background(), n1.ID, types.NetworkInspectOptions{})
  773. c.Assert(err, checker.IsNil)
  774. c.Assert(r1.Scope, checker.Equals, "local")
  775. r2, err := cli.NetworkInspect(context.Background(), n2.ID, types.NetworkInspectOptions{})
  776. c.Assert(err, checker.IsNil)
  777. c.Assert(r2.Scope, checker.Equals, "swarm")
  778. }
  779. // Test case for 30178
  780. func (s *DockerSwarmSuite) TestAPISwarmHealthcheckNone(c *check.C) {
  781. // Issue #36386 can be a independent one, which is worth further investigation.
  782. c.Skip("Root cause of Issue #36386 is needed")
  783. d := s.AddDaemon(c, true, true)
  784. out, err := d.Cmd("network", "create", "-d", "overlay", "lb")
  785. c.Assert(err, checker.IsNil, check.Commentf("%s", out))
  786. instances := 1
  787. d.CreateService(c, simpleTestService, setInstances(instances), func(s *swarm.Service) {
  788. if s.Spec.TaskTemplate.ContainerSpec == nil {
  789. s.Spec.TaskTemplate.ContainerSpec = &swarm.ContainerSpec{}
  790. }
  791. s.Spec.TaskTemplate.ContainerSpec.Healthcheck = &container.HealthConfig{}
  792. s.Spec.TaskTemplate.Networks = []swarm.NetworkAttachmentConfig{
  793. {Target: "lb"},
  794. }
  795. })
  796. waitAndAssert(c, defaultReconciliationTimeout, d.CheckActiveContainerCount, checker.Equals, instances)
  797. containers := d.ActiveContainers(c)
  798. out, err = d.Cmd("exec", containers[0], "ping", "-c1", "-W3", "top")
  799. c.Assert(err, checker.IsNil, check.Commentf("%s", out))
  800. }
  801. func (s *DockerSwarmSuite) TestSwarmRepeatedRootRotation(c *check.C) {
  802. m := s.AddDaemon(c, true, true)
  803. w := s.AddDaemon(c, true, false)
  804. info := m.SwarmInfo(c)
  805. currentTrustRoot := info.Cluster.TLSInfo.TrustRoot
  806. // rotate multiple times
  807. for i := 0; i < 4; i++ {
  808. var err error
  809. var cert, key []byte
  810. if i%2 != 0 {
  811. cert, _, key, err = initca.New(&csr.CertificateRequest{
  812. CN: "newRoot",
  813. KeyRequest: csr.NewBasicKeyRequest(),
  814. CA: &csr.CAConfig{Expiry: ca.RootCAExpiration},
  815. })
  816. c.Assert(err, checker.IsNil)
  817. }
  818. expectedCert := string(cert)
  819. m.UpdateSwarm(c, func(s *swarm.Spec) {
  820. s.CAConfig.SigningCACert = expectedCert
  821. s.CAConfig.SigningCAKey = string(key)
  822. s.CAConfig.ForceRotate++
  823. })
  824. // poll to make sure update succeeds
  825. var clusterTLSInfo swarm.TLSInfo
  826. for j := 0; j < 18; j++ {
  827. info := m.SwarmInfo(c)
  828. // the desired CA cert and key is always redacted
  829. c.Assert(info.Cluster.Spec.CAConfig.SigningCAKey, checker.Equals, "")
  830. c.Assert(info.Cluster.Spec.CAConfig.SigningCACert, checker.Equals, "")
  831. clusterTLSInfo = info.Cluster.TLSInfo
  832. // if root rotation is done and the trust root has changed, we don't have to poll anymore
  833. if !info.Cluster.RootRotationInProgress && clusterTLSInfo.TrustRoot != currentTrustRoot {
  834. break
  835. }
  836. // root rotation not done
  837. time.Sleep(250 * time.Millisecond)
  838. }
  839. if cert != nil {
  840. c.Assert(clusterTLSInfo.TrustRoot, checker.Equals, expectedCert)
  841. }
  842. // could take another second or two for the nodes to trust the new roots after they've all gotten
  843. // new TLS certificates
  844. for j := 0; j < 18; j++ {
  845. mInfo := m.GetNode(c, m.NodeID()).Description.TLSInfo
  846. wInfo := m.GetNode(c, w.NodeID()).Description.TLSInfo
  847. if mInfo.TrustRoot == clusterTLSInfo.TrustRoot && wInfo.TrustRoot == clusterTLSInfo.TrustRoot {
  848. break
  849. }
  850. // nodes don't trust root certs yet
  851. time.Sleep(250 * time.Millisecond)
  852. }
  853. c.Assert(m.GetNode(c, m.NodeID()).Description.TLSInfo, checker.DeepEquals, clusterTLSInfo)
  854. c.Assert(m.GetNode(c, w.NodeID()).Description.TLSInfo, checker.DeepEquals, clusterTLSInfo)
  855. currentTrustRoot = clusterTLSInfo.TrustRoot
  856. }
  857. }
  858. func (s *DockerSwarmSuite) TestAPINetworkInspectWithScope(c *check.C) {
  859. d := s.AddDaemon(c, true, true)
  860. name := "test-scoped-network"
  861. ctx := context.Background()
  862. apiclient := d.NewClientT(c)
  863. resp, err := apiclient.NetworkCreate(ctx, name, types.NetworkCreate{Driver: "overlay"})
  864. assert.NilError(c, err)
  865. network, err := apiclient.NetworkInspect(ctx, name, types.NetworkInspectOptions{})
  866. assert.NilError(c, err)
  867. assert.Check(c, is.Equal("swarm", network.Scope))
  868. assert.Check(c, is.Equal(resp.ID, network.ID))
  869. _, err = apiclient.NetworkInspect(ctx, name, types.NetworkInspectOptions{Scope: "local"})
  870. assert.Check(c, client.IsErrNotFound(err))
  871. }