docker_api_swarm_test.go 32 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045
  1. // +build !windows
  2. package main
  3. import (
  4. "context"
  5. "fmt"
  6. "io/ioutil"
  7. "net"
  8. "net/http"
  9. "path/filepath"
  10. "runtime"
  11. "strings"
  12. "sync"
  13. "testing"
  14. "time"
  15. "github.com/cloudflare/cfssl/csr"
  16. "github.com/cloudflare/cfssl/helpers"
  17. "github.com/cloudflare/cfssl/initca"
  18. "github.com/docker/docker/api/types"
  19. "github.com/docker/docker/api/types/container"
  20. "github.com/docker/docker/api/types/swarm"
  21. "github.com/docker/docker/client"
  22. "github.com/docker/docker/integration-cli/checker"
  23. "github.com/docker/docker/integration-cli/daemon"
  24. testdaemon "github.com/docker/docker/internal/test/daemon"
  25. "github.com/docker/docker/internal/test/request"
  26. "github.com/docker/swarmkit/ca"
  27. "github.com/pkg/errors"
  28. "gotest.tools/assert"
  29. is "gotest.tools/assert/cmp"
  30. )
  31. var defaultReconciliationTimeout = 30 * time.Second
  32. func (s *DockerSwarmSuite) TestAPISwarmInit(c *testing.T) {
  33. // todo: should find a better way to verify that components are running than /info
  34. d1 := s.AddDaemon(c, true, true)
  35. info := d1.SwarmInfo(c)
  36. assert.Equal(c, info.ControlAvailable, true)
  37. assert.Equal(c, info.LocalNodeState, swarm.LocalNodeStateActive)
  38. assert.Equal(c, info.Cluster.RootRotationInProgress, false)
  39. d2 := s.AddDaemon(c, true, false)
  40. info = d2.SwarmInfo(c)
  41. assert.Equal(c, info.ControlAvailable, false)
  42. assert.Equal(c, info.LocalNodeState, swarm.LocalNodeStateActive)
  43. // Leaving cluster
  44. assert.NilError(c, d2.SwarmLeave(c, false))
  45. info = d2.SwarmInfo(c)
  46. assert.Equal(c, info.ControlAvailable, false)
  47. assert.Equal(c, info.LocalNodeState, swarm.LocalNodeStateInactive)
  48. d2.SwarmJoin(c, swarm.JoinRequest{
  49. ListenAddr: d1.SwarmListenAddr(),
  50. JoinToken: d1.JoinTokens(c).Worker,
  51. RemoteAddrs: []string{d1.SwarmListenAddr()},
  52. })
  53. info = d2.SwarmInfo(c)
  54. assert.Equal(c, info.ControlAvailable, false)
  55. assert.Equal(c, info.LocalNodeState, swarm.LocalNodeStateActive)
  56. // Current state restoring after restarts
  57. d1.Stop(c)
  58. d2.Stop(c)
  59. d1.StartNode(c)
  60. d2.StartNode(c)
  61. info = d1.SwarmInfo(c)
  62. assert.Equal(c, info.ControlAvailable, true)
  63. assert.Equal(c, info.LocalNodeState, swarm.LocalNodeStateActive)
  64. info = d2.SwarmInfo(c)
  65. assert.Equal(c, info.ControlAvailable, false)
  66. assert.Equal(c, info.LocalNodeState, swarm.LocalNodeStateActive)
  67. }
  68. func (s *DockerSwarmSuite) TestAPISwarmJoinToken(c *testing.T) {
  69. d1 := s.AddDaemon(c, false, false)
  70. d1.SwarmInit(c, swarm.InitRequest{})
  71. // todo: error message differs depending if some components of token are valid
  72. d2 := s.AddDaemon(c, false, false)
  73. c2 := d2.NewClientT(c)
  74. err := c2.SwarmJoin(context.Background(), swarm.JoinRequest{
  75. ListenAddr: d2.SwarmListenAddr(),
  76. RemoteAddrs: []string{d1.SwarmListenAddr()},
  77. })
  78. assert.ErrorContains(c, err, "join token is necessary")
  79. info := d2.SwarmInfo(c)
  80. assert.Equal(c, info.LocalNodeState, swarm.LocalNodeStateInactive)
  81. err = c2.SwarmJoin(context.Background(), swarm.JoinRequest{
  82. ListenAddr: d2.SwarmListenAddr(),
  83. JoinToken: "foobaz",
  84. RemoteAddrs: []string{d1.SwarmListenAddr()},
  85. })
  86. assert.ErrorContains(c, err, "invalid join token")
  87. info = d2.SwarmInfo(c)
  88. assert.Equal(c, info.LocalNodeState, swarm.LocalNodeStateInactive)
  89. workerToken := d1.JoinTokens(c).Worker
  90. d2.SwarmJoin(c, swarm.JoinRequest{
  91. ListenAddr: d2.SwarmListenAddr(),
  92. JoinToken: workerToken,
  93. RemoteAddrs: []string{d1.SwarmListenAddr()},
  94. })
  95. info = d2.SwarmInfo(c)
  96. assert.Equal(c, info.LocalNodeState, swarm.LocalNodeStateActive)
  97. assert.NilError(c, d2.SwarmLeave(c, false))
  98. info = d2.SwarmInfo(c)
  99. assert.Equal(c, info.LocalNodeState, swarm.LocalNodeStateInactive)
  100. // change tokens
  101. d1.RotateTokens(c)
  102. err = c2.SwarmJoin(context.Background(), swarm.JoinRequest{
  103. ListenAddr: d2.SwarmListenAddr(),
  104. JoinToken: workerToken,
  105. RemoteAddrs: []string{d1.SwarmListenAddr()},
  106. })
  107. assert.ErrorContains(c, err, "join token is necessary")
  108. info = d2.SwarmInfo(c)
  109. assert.Equal(c, info.LocalNodeState, swarm.LocalNodeStateInactive)
  110. workerToken = d1.JoinTokens(c).Worker
  111. d2.SwarmJoin(c, swarm.JoinRequest{JoinToken: workerToken, RemoteAddrs: []string{d1.SwarmListenAddr()}})
  112. info = d2.SwarmInfo(c)
  113. assert.Equal(c, info.LocalNodeState, swarm.LocalNodeStateActive)
  114. assert.NilError(c, d2.SwarmLeave(c, false))
  115. info = d2.SwarmInfo(c)
  116. assert.Equal(c, info.LocalNodeState, swarm.LocalNodeStateInactive)
  117. // change spec, don't change tokens
  118. d1.UpdateSwarm(c, func(s *swarm.Spec) {})
  119. err = c2.SwarmJoin(context.Background(), swarm.JoinRequest{
  120. ListenAddr: d2.SwarmListenAddr(),
  121. RemoteAddrs: []string{d1.SwarmListenAddr()},
  122. })
  123. assert.ErrorContains(c, err, "join token is necessary")
  124. info = d2.SwarmInfo(c)
  125. assert.Equal(c, info.LocalNodeState, swarm.LocalNodeStateInactive)
  126. d2.SwarmJoin(c, swarm.JoinRequest{JoinToken: workerToken, RemoteAddrs: []string{d1.SwarmListenAddr()}})
  127. info = d2.SwarmInfo(c)
  128. assert.Equal(c, info.LocalNodeState, swarm.LocalNodeStateActive)
  129. assert.NilError(c, d2.SwarmLeave(c, false))
  130. info = d2.SwarmInfo(c)
  131. assert.Equal(c, info.LocalNodeState, swarm.LocalNodeStateInactive)
  132. }
  133. func (s *DockerSwarmSuite) TestUpdateSwarmAddExternalCA(c *testing.T) {
  134. d1 := s.AddDaemon(c, false, false)
  135. d1.SwarmInit(c, swarm.InitRequest{})
  136. d1.UpdateSwarm(c, func(s *swarm.Spec) {
  137. s.CAConfig.ExternalCAs = []*swarm.ExternalCA{
  138. {
  139. Protocol: swarm.ExternalCAProtocolCFSSL,
  140. URL: "https://thishasnoca.org",
  141. },
  142. {
  143. Protocol: swarm.ExternalCAProtocolCFSSL,
  144. URL: "https://thishasacacert.org",
  145. CACert: "cacert",
  146. },
  147. }
  148. })
  149. info := d1.SwarmInfo(c)
  150. assert.Equal(c, len(info.Cluster.Spec.CAConfig.ExternalCAs), 2)
  151. assert.Equal(c, info.Cluster.Spec.CAConfig.ExternalCAs[0].CACert, "")
  152. assert.Equal(c, info.Cluster.Spec.CAConfig.ExternalCAs[1].CACert, "cacert")
  153. }
  154. func (s *DockerSwarmSuite) TestAPISwarmCAHash(c *testing.T) {
  155. d1 := s.AddDaemon(c, true, true)
  156. d2 := s.AddDaemon(c, false, false)
  157. splitToken := strings.Split(d1.JoinTokens(c).Worker, "-")
  158. splitToken[2] = "1kxftv4ofnc6mt30lmgipg6ngf9luhwqopfk1tz6bdmnkubg0e"
  159. replacementToken := strings.Join(splitToken, "-")
  160. c2 := d2.NewClientT(c)
  161. err := c2.SwarmJoin(context.Background(), swarm.JoinRequest{
  162. ListenAddr: d2.SwarmListenAddr(),
  163. JoinToken: replacementToken,
  164. RemoteAddrs: []string{d1.SwarmListenAddr()},
  165. })
  166. assert.ErrorContains(c, err, "remote CA does not match fingerprint")
  167. }
  168. func (s *DockerSwarmSuite) TestAPISwarmPromoteDemote(c *testing.T) {
  169. d1 := s.AddDaemon(c, false, false)
  170. d1.SwarmInit(c, swarm.InitRequest{})
  171. d2 := s.AddDaemon(c, true, false)
  172. info := d2.SwarmInfo(c)
  173. assert.Equal(c, info.ControlAvailable, false)
  174. assert.Equal(c, info.LocalNodeState, swarm.LocalNodeStateActive)
  175. d1.UpdateNode(c, d2.NodeID(), func(n *swarm.Node) {
  176. n.Spec.Role = swarm.NodeRoleManager
  177. })
  178. waitAndAssert(c, defaultReconciliationTimeout, d2.CheckControlAvailable, checker.True)
  179. d1.UpdateNode(c, d2.NodeID(), func(n *swarm.Node) {
  180. n.Spec.Role = swarm.NodeRoleWorker
  181. })
  182. waitAndAssert(c, defaultReconciliationTimeout, d2.CheckControlAvailable, checker.False)
  183. // Wait for the role to change to worker in the cert. This is partially
  184. // done because it's something worth testing in its own right, and
  185. // partially because changing the role from manager to worker and then
  186. // back to manager quickly might cause the node to pause for awhile
  187. // while waiting for the role to change to worker, and the test can
  188. // time out during this interval.
  189. waitAndAssert(c, defaultReconciliationTimeout, func(c *testing.T) (interface{}, string) {
  190. certBytes, err := ioutil.ReadFile(filepath.Join(d2.Folder, "root", "swarm", "certificates", "swarm-node.crt"))
  191. if err != nil {
  192. return "", fmt.Sprintf("error: %v", err)
  193. }
  194. certs, err := helpers.ParseCertificatesPEM(certBytes)
  195. if err == nil && len(certs) > 0 && len(certs[0].Subject.OrganizationalUnit) > 0 {
  196. return certs[0].Subject.OrganizationalUnit[0], nil
  197. }
  198. return "", "could not get organizational unit from certificate"
  199. }, checker.Equals, "swarm-worker")
  200. // Demoting last node should fail
  201. node := d1.GetNode(c, d1.NodeID())
  202. node.Spec.Role = swarm.NodeRoleWorker
  203. url := fmt.Sprintf("/nodes/%s/update?version=%d", node.ID, node.Version.Index)
  204. res, body, err := request.Post(url, request.Host(d1.Sock()), request.JSONBody(node.Spec))
  205. assert.NilError(c, err)
  206. b, err := request.ReadBody(body)
  207. assert.NilError(c, err)
  208. assert.Equal(c, res.StatusCode, http.StatusBadRequest, "output: %q", string(b))
  209. // The warning specific to demoting the last manager is best-effort and
  210. // won't appear until the Role field of the demoted manager has been
  211. // updated.
  212. // Yes, I know this looks silly, but checker.Matches is broken, since
  213. // it anchors the regexp contrary to the documentation, and this makes
  214. // it impossible to match something that includes a line break.
  215. if !strings.Contains(string(b), "last manager of the swarm") {
  216. assert.Assert(c, strings.Contains(string(b), "this would result in a loss of quorum"))
  217. }
  218. info = d1.SwarmInfo(c)
  219. assert.Equal(c, info.LocalNodeState, swarm.LocalNodeStateActive)
  220. assert.Equal(c, info.ControlAvailable, true)
  221. // Promote already demoted node
  222. d1.UpdateNode(c, d2.NodeID(), func(n *swarm.Node) {
  223. n.Spec.Role = swarm.NodeRoleManager
  224. })
  225. waitAndAssert(c, defaultReconciliationTimeout, d2.CheckControlAvailable, checker.True)
  226. }
  227. func (s *DockerSwarmSuite) TestAPISwarmLeaderProxy(c *testing.T) {
  228. // add three managers, one of these is leader
  229. d1 := s.AddDaemon(c, true, true)
  230. d2 := s.AddDaemon(c, true, true)
  231. d3 := s.AddDaemon(c, true, true)
  232. // start a service by hitting each of the 3 managers
  233. d1.CreateService(c, simpleTestService, func(s *swarm.Service) {
  234. s.Spec.Name = "test1"
  235. })
  236. d2.CreateService(c, simpleTestService, func(s *swarm.Service) {
  237. s.Spec.Name = "test2"
  238. })
  239. d3.CreateService(c, simpleTestService, func(s *swarm.Service) {
  240. s.Spec.Name = "test3"
  241. })
  242. // 3 services should be started now, because the requests were proxied to leader
  243. // query each node and make sure it returns 3 services
  244. for _, d := range []*daemon.Daemon{d1, d2, d3} {
  245. services := d.ListServices(c)
  246. assert.Equal(c, len(services), 3)
  247. }
  248. }
  249. func (s *DockerSwarmSuite) TestAPISwarmLeaderElection(c *testing.T) {
  250. if runtime.GOARCH == "s390x" {
  251. c.Skip("Disabled on s390x")
  252. }
  253. if runtime.GOARCH == "ppc64le" {
  254. c.Skip("Disabled on ppc64le")
  255. }
  256. // Create 3 nodes
  257. d1 := s.AddDaemon(c, true, true)
  258. d2 := s.AddDaemon(c, true, true)
  259. d3 := s.AddDaemon(c, true, true)
  260. // assert that the first node we made is the leader, and the other two are followers
  261. assert.Equal(c, d1.GetNode(c, d1.NodeID()).ManagerStatus.Leader, true)
  262. assert.Equal(c, d1.GetNode(c, d2.NodeID()).ManagerStatus.Leader, false)
  263. assert.Equal(c, d1.GetNode(c, d3.NodeID()).ManagerStatus.Leader, false)
  264. d1.Stop(c)
  265. var (
  266. leader *daemon.Daemon // keep track of leader
  267. followers []*daemon.Daemon // keep track of followers
  268. )
  269. var lastErr error
  270. checkLeader := func(nodes ...*daemon.Daemon) checkF {
  271. return func(c *testing.T) (interface{}, string) {
  272. // clear these out before each run
  273. leader = nil
  274. followers = nil
  275. for _, d := range nodes {
  276. n := d.GetNode(c, d.NodeID(), func(err error) bool {
  277. if strings.Contains(errors.Cause(err).Error(), context.DeadlineExceeded.Error()) || strings.Contains(err.Error(), "swarm does not have a leader") {
  278. lastErr = err
  279. return true
  280. }
  281. return false
  282. })
  283. if n == nil {
  284. return false, fmt.Sprintf("failed to get node: %v", lastErr)
  285. }
  286. if n.ManagerStatus.Leader {
  287. leader = d
  288. } else {
  289. followers = append(followers, d)
  290. }
  291. }
  292. if leader == nil {
  293. return false, "no leader elected"
  294. }
  295. return true, fmt.Sprintf("elected %v", leader.ID())
  296. }
  297. }
  298. // wait for an election to occur
  299. c.Logf("Waiting for election to occur...")
  300. waitAndAssert(c, defaultReconciliationTimeout, checkLeader(d2, d3), checker.True)
  301. // assert that we have a new leader
  302. assert.Assert(c, leader != nil)
  303. // Keep track of the current leader, since we want that to be chosen.
  304. stableleader := leader
  305. // add the d1, the initial leader, back
  306. d1.StartNode(c)
  307. // wait for possible election
  308. c.Logf("Waiting for possible election...")
  309. waitAndAssert(c, defaultReconciliationTimeout, checkLeader(d1, d2, d3), checker.True)
  310. // pick out the leader and the followers again
  311. // verify that we still only have 1 leader and 2 followers
  312. assert.Assert(c, leader != nil)
  313. assert.Equal(c, len(followers), 2)
  314. // and that after we added d1 back, the leader hasn't changed
  315. assert.Equal(c, leader.NodeID(), stableleader.NodeID())
  316. }
  317. func (s *DockerSwarmSuite) TestAPISwarmRaftQuorum(c *testing.T) {
  318. if runtime.GOARCH == "s390x" {
  319. c.Skip("Disabled on s390x")
  320. }
  321. if runtime.GOARCH == "ppc64le" {
  322. c.Skip("Disabled on ppc64le")
  323. }
  324. d1 := s.AddDaemon(c, true, true)
  325. d2 := s.AddDaemon(c, true, true)
  326. d3 := s.AddDaemon(c, true, true)
  327. d1.CreateService(c, simpleTestService)
  328. d2.Stop(c)
  329. // make sure there is a leader
  330. waitAndAssert(c, defaultReconciliationTimeout, d1.CheckLeader, checker.IsNil)
  331. d1.CreateService(c, simpleTestService, func(s *swarm.Service) {
  332. s.Spec.Name = "top1"
  333. })
  334. d3.Stop(c)
  335. var service swarm.Service
  336. simpleTestService(&service)
  337. service.Spec.Name = "top2"
  338. cli := d1.NewClientT(c)
  339. defer cli.Close()
  340. // d1 will eventually step down from leader because there is no longer an active quorum, wait for that to happen
  341. waitAndAssert(c, defaultReconciliationTimeout*2, func(c *testing.T) (interface{}, string) {
  342. _, err := cli.ServiceCreate(context.Background(), service.Spec, types.ServiceCreateOptions{})
  343. return err.Error(), nil
  344. }, checker.Contains, "Make sure more than half of the managers are online.")
  345. d2.StartNode(c)
  346. // make sure there is a leader
  347. waitAndAssert(c, defaultReconciliationTimeout, d1.CheckLeader, checker.IsNil)
  348. d1.CreateService(c, simpleTestService, func(s *swarm.Service) {
  349. s.Spec.Name = "top3"
  350. })
  351. }
  352. func (s *DockerSwarmSuite) TestAPISwarmLeaveRemovesContainer(c *testing.T) {
  353. d := s.AddDaemon(c, true, true)
  354. instances := 2
  355. d.CreateService(c, simpleTestService, setInstances(instances))
  356. id, err := d.Cmd("run", "-d", "busybox", "top")
  357. assert.NilError(c, err, id)
  358. id = strings.TrimSpace(id)
  359. waitAndAssert(c, defaultReconciliationTimeout, d.CheckActiveContainerCount, checker.Equals, instances+1)
  360. assert.ErrorContains(c, d.SwarmLeave(c, false), "")
  361. assert.NilError(c, d.SwarmLeave(c, true))
  362. waitAndAssert(c, defaultReconciliationTimeout, d.CheckActiveContainerCount, checker.Equals, 1)
  363. id2, err := d.Cmd("ps", "-q")
  364. assert.NilError(c, err, id2)
  365. assert.Assert(c, strings.HasPrefix(id, strings.TrimSpace(id2)))
  366. }
  367. // #23629
  368. func (s *DockerSwarmSuite) TestAPISwarmLeaveOnPendingJoin(c *testing.T) {
  369. testRequires(c, Network)
  370. s.AddDaemon(c, true, true)
  371. d2 := s.AddDaemon(c, false, false)
  372. id, err := d2.Cmd("run", "-d", "busybox", "top")
  373. assert.NilError(c, err, id)
  374. id = strings.TrimSpace(id)
  375. c2 := d2.NewClientT(c)
  376. err = c2.SwarmJoin(context.Background(), swarm.JoinRequest{
  377. ListenAddr: d2.SwarmListenAddr(),
  378. RemoteAddrs: []string{"123.123.123.123:1234"},
  379. })
  380. assert.ErrorContains(c, err, "Timeout was reached")
  381. info := d2.SwarmInfo(c)
  382. assert.Equal(c, info.LocalNodeState, swarm.LocalNodeStatePending)
  383. assert.NilError(c, d2.SwarmLeave(c, true))
  384. waitAndAssert(c, defaultReconciliationTimeout, d2.CheckActiveContainerCount, checker.Equals, 1)
  385. id2, err := d2.Cmd("ps", "-q")
  386. assert.NilError(c, err, id2)
  387. assert.Assert(c, strings.HasPrefix(id, strings.TrimSpace(id2)))
  388. }
  389. // #23705
  390. func (s *DockerSwarmSuite) TestAPISwarmRestoreOnPendingJoin(c *testing.T) {
  391. testRequires(c, Network)
  392. d := s.AddDaemon(c, false, false)
  393. client := d.NewClientT(c)
  394. err := client.SwarmJoin(context.Background(), swarm.JoinRequest{
  395. ListenAddr: d.SwarmListenAddr(),
  396. RemoteAddrs: []string{"123.123.123.123:1234"},
  397. })
  398. assert.ErrorContains(c, err, "Timeout was reached")
  399. waitAndAssert(c, defaultReconciliationTimeout, d.CheckLocalNodeState, checker.Equals, swarm.LocalNodeStatePending)
  400. d.RestartNode(c)
  401. info := d.SwarmInfo(c)
  402. assert.Equal(c, info.LocalNodeState, swarm.LocalNodeStateInactive)
  403. }
  404. func (s *DockerSwarmSuite) TestAPISwarmManagerRestore(c *testing.T) {
  405. d1 := s.AddDaemon(c, true, true)
  406. instances := 2
  407. id := d1.CreateService(c, simpleTestService, setInstances(instances))
  408. d1.GetService(c, id)
  409. d1.RestartNode(c)
  410. d1.GetService(c, id)
  411. d2 := s.AddDaemon(c, true, true)
  412. d2.GetService(c, id)
  413. d2.RestartNode(c)
  414. d2.GetService(c, id)
  415. d3 := s.AddDaemon(c, true, true)
  416. d3.GetService(c, id)
  417. d3.RestartNode(c)
  418. d3.GetService(c, id)
  419. err := d3.Kill()
  420. assert.NilError(c, err)
  421. time.Sleep(1 * time.Second) // time to handle signal
  422. d3.StartNode(c)
  423. d3.GetService(c, id)
  424. }
  425. func (s *DockerSwarmSuite) TestAPISwarmScaleNoRollingUpdate(c *testing.T) {
  426. d := s.AddDaemon(c, true, true)
  427. instances := 2
  428. id := d.CreateService(c, simpleTestService, setInstances(instances))
  429. waitAndAssert(c, defaultReconciliationTimeout, d.CheckActiveContainerCount, checker.Equals, instances)
  430. containers := d.ActiveContainers(c)
  431. instances = 4
  432. d.UpdateService(c, d.GetService(c, id), setInstances(instances))
  433. waitAndAssert(c, defaultReconciliationTimeout, d.CheckActiveContainerCount, checker.Equals, instances)
  434. containers2 := d.ActiveContainers(c)
  435. loop0:
  436. for _, c1 := range containers {
  437. for _, c2 := range containers2 {
  438. if c1 == c2 {
  439. continue loop0
  440. }
  441. }
  442. c.Errorf("container %v not found in new set %#v", c1, containers2)
  443. }
  444. }
  445. func (s *DockerSwarmSuite) TestAPISwarmInvalidAddress(c *testing.T) {
  446. d := s.AddDaemon(c, false, false)
  447. req := swarm.InitRequest{
  448. ListenAddr: "",
  449. }
  450. res, _, err := request.Post("/swarm/init", request.Host(d.Sock()), request.JSONBody(req))
  451. assert.NilError(c, err)
  452. assert.Equal(c, res.StatusCode, http.StatusBadRequest)
  453. req2 := swarm.JoinRequest{
  454. ListenAddr: "0.0.0.0:2377",
  455. RemoteAddrs: []string{""},
  456. }
  457. res, _, err = request.Post("/swarm/join", request.Host(d.Sock()), request.JSONBody(req2))
  458. assert.NilError(c, err)
  459. assert.Equal(c, res.StatusCode, http.StatusBadRequest)
  460. }
  461. func (s *DockerSwarmSuite) TestAPISwarmForceNewCluster(c *testing.T) {
  462. d1 := s.AddDaemon(c, true, true)
  463. d2 := s.AddDaemon(c, true, true)
  464. instances := 2
  465. id := d1.CreateService(c, simpleTestService, setInstances(instances))
  466. waitAndAssert(c, defaultReconciliationTimeout, reducedCheck(sumAsIntegers, d1.CheckActiveContainerCount, d2.CheckActiveContainerCount), checker.Equals, instances)
  467. // drain d2, all containers should move to d1
  468. d1.UpdateNode(c, d2.NodeID(), func(n *swarm.Node) {
  469. n.Spec.Availability = swarm.NodeAvailabilityDrain
  470. })
  471. waitAndAssert(c, defaultReconciliationTimeout, d1.CheckActiveContainerCount, checker.Equals, instances)
  472. waitAndAssert(c, defaultReconciliationTimeout, d2.CheckActiveContainerCount, checker.Equals, 0)
  473. d2.Stop(c)
  474. d1.SwarmInit(c, swarm.InitRequest{
  475. ForceNewCluster: true,
  476. Spec: swarm.Spec{},
  477. })
  478. waitAndAssert(c, defaultReconciliationTimeout, d1.CheckActiveContainerCount, checker.Equals, instances)
  479. d3 := s.AddDaemon(c, true, true)
  480. info := d3.SwarmInfo(c)
  481. assert.Equal(c, info.ControlAvailable, true)
  482. assert.Equal(c, info.LocalNodeState, swarm.LocalNodeStateActive)
  483. instances = 4
  484. d3.UpdateService(c, d3.GetService(c, id), setInstances(instances))
  485. waitAndAssert(c, defaultReconciliationTimeout, reducedCheck(sumAsIntegers, d1.CheckActiveContainerCount, d3.CheckActiveContainerCount), checker.Equals, instances)
  486. }
  487. func simpleTestService(s *swarm.Service) {
  488. ureplicas := uint64(1)
  489. restartDelay := time.Duration(100 * time.Millisecond)
  490. s.Spec = swarm.ServiceSpec{
  491. TaskTemplate: swarm.TaskSpec{
  492. ContainerSpec: &swarm.ContainerSpec{
  493. Image: "busybox:latest",
  494. Command: []string{"/bin/top"},
  495. },
  496. RestartPolicy: &swarm.RestartPolicy{
  497. Delay: &restartDelay,
  498. },
  499. },
  500. Mode: swarm.ServiceMode{
  501. Replicated: &swarm.ReplicatedService{
  502. Replicas: &ureplicas,
  503. },
  504. },
  505. }
  506. s.Spec.Name = "top"
  507. }
  508. func serviceForUpdate(s *swarm.Service) {
  509. ureplicas := uint64(1)
  510. restartDelay := time.Duration(100 * time.Millisecond)
  511. s.Spec = swarm.ServiceSpec{
  512. TaskTemplate: swarm.TaskSpec{
  513. ContainerSpec: &swarm.ContainerSpec{
  514. Image: "busybox:latest",
  515. Command: []string{"/bin/top"},
  516. },
  517. RestartPolicy: &swarm.RestartPolicy{
  518. Delay: &restartDelay,
  519. },
  520. },
  521. Mode: swarm.ServiceMode{
  522. Replicated: &swarm.ReplicatedService{
  523. Replicas: &ureplicas,
  524. },
  525. },
  526. UpdateConfig: &swarm.UpdateConfig{
  527. Parallelism: 2,
  528. Delay: 4 * time.Second,
  529. FailureAction: swarm.UpdateFailureActionContinue,
  530. },
  531. RollbackConfig: &swarm.UpdateConfig{
  532. Parallelism: 3,
  533. Delay: 4 * time.Second,
  534. FailureAction: swarm.UpdateFailureActionContinue,
  535. },
  536. }
  537. s.Spec.Name = "updatetest"
  538. }
  539. func setInstances(replicas int) testdaemon.ServiceConstructor {
  540. ureplicas := uint64(replicas)
  541. return func(s *swarm.Service) {
  542. s.Spec.Mode = swarm.ServiceMode{
  543. Replicated: &swarm.ReplicatedService{
  544. Replicas: &ureplicas,
  545. },
  546. }
  547. }
  548. }
  549. func setUpdateOrder(order string) testdaemon.ServiceConstructor {
  550. return func(s *swarm.Service) {
  551. if s.Spec.UpdateConfig == nil {
  552. s.Spec.UpdateConfig = &swarm.UpdateConfig{}
  553. }
  554. s.Spec.UpdateConfig.Order = order
  555. }
  556. }
  557. func setRollbackOrder(order string) testdaemon.ServiceConstructor {
  558. return func(s *swarm.Service) {
  559. if s.Spec.RollbackConfig == nil {
  560. s.Spec.RollbackConfig = &swarm.UpdateConfig{}
  561. }
  562. s.Spec.RollbackConfig.Order = order
  563. }
  564. }
  565. func setImage(image string) testdaemon.ServiceConstructor {
  566. return func(s *swarm.Service) {
  567. if s.Spec.TaskTemplate.ContainerSpec == nil {
  568. s.Spec.TaskTemplate.ContainerSpec = &swarm.ContainerSpec{}
  569. }
  570. s.Spec.TaskTemplate.ContainerSpec.Image = image
  571. }
  572. }
  573. func setFailureAction(failureAction string) testdaemon.ServiceConstructor {
  574. return func(s *swarm.Service) {
  575. s.Spec.UpdateConfig.FailureAction = failureAction
  576. }
  577. }
  578. func setMaxFailureRatio(maxFailureRatio float32) testdaemon.ServiceConstructor {
  579. return func(s *swarm.Service) {
  580. s.Spec.UpdateConfig.MaxFailureRatio = maxFailureRatio
  581. }
  582. }
  583. func setParallelism(parallelism uint64) testdaemon.ServiceConstructor {
  584. return func(s *swarm.Service) {
  585. s.Spec.UpdateConfig.Parallelism = parallelism
  586. }
  587. }
  588. func setConstraints(constraints []string) testdaemon.ServiceConstructor {
  589. return func(s *swarm.Service) {
  590. if s.Spec.TaskTemplate.Placement == nil {
  591. s.Spec.TaskTemplate.Placement = &swarm.Placement{}
  592. }
  593. s.Spec.TaskTemplate.Placement.Constraints = constraints
  594. }
  595. }
  596. func setPlacementPrefs(prefs []swarm.PlacementPreference) testdaemon.ServiceConstructor {
  597. return func(s *swarm.Service) {
  598. if s.Spec.TaskTemplate.Placement == nil {
  599. s.Spec.TaskTemplate.Placement = &swarm.Placement{}
  600. }
  601. s.Spec.TaskTemplate.Placement.Preferences = prefs
  602. }
  603. }
  604. func setGlobalMode(s *swarm.Service) {
  605. s.Spec.Mode = swarm.ServiceMode{
  606. Global: &swarm.GlobalService{},
  607. }
  608. }
  609. func checkClusterHealth(c *testing.T, cl []*daemon.Daemon, managerCount, workerCount int) {
  610. var totalMCount, totalWCount int
  611. for _, d := range cl {
  612. var (
  613. info swarm.Info
  614. )
  615. // check info in a waitAndAssert, because if the cluster doesn't have a leader, `info` will return an error
  616. checkInfo := func(c *testing.T) (interface{}, string) {
  617. client := d.NewClientT(c)
  618. daemonInfo, err := client.Info(context.Background())
  619. info = daemonInfo.Swarm
  620. return err, "cluster not ready in time"
  621. }
  622. waitAndAssert(c, defaultReconciliationTimeout, checkInfo, checker.IsNil)
  623. if !info.ControlAvailable {
  624. totalWCount++
  625. continue
  626. }
  627. var leaderFound bool
  628. totalMCount++
  629. var mCount, wCount int
  630. for _, n := range d.ListNodes(c) {
  631. waitReady := func(c *testing.T) (interface{}, string) {
  632. if n.Status.State == swarm.NodeStateReady {
  633. return true, nil
  634. }
  635. nn := d.GetNode(c, n.ID)
  636. n = *nn
  637. return n.Status.State == swarm.NodeStateReady, fmt.Sprintf("state of node %s, reported by %s", n.ID, d.NodeID())
  638. }
  639. waitAndAssert(c, defaultReconciliationTimeout, waitReady, checker.True)
  640. waitActive := func(c *testing.T) (interface{}, string) {
  641. if n.Spec.Availability == swarm.NodeAvailabilityActive {
  642. return true, nil
  643. }
  644. nn := d.GetNode(c, n.ID)
  645. n = *nn
  646. return n.Spec.Availability == swarm.NodeAvailabilityActive, fmt.Sprintf("availability of node %s, reported by %s", n.ID, d.NodeID())
  647. }
  648. waitAndAssert(c, defaultReconciliationTimeout, waitActive, checker.True)
  649. if n.Spec.Role == swarm.NodeRoleManager {
  650. assert.Assert(c, n.ManagerStatus != nil, "manager status of node %s (manager), reported by %s", n.ID, d.NodeID())
  651. if n.ManagerStatus.Leader {
  652. leaderFound = true
  653. }
  654. mCount++
  655. } else {
  656. assert.Assert(c, n.ManagerStatus == nil, "manager status of node %s (worker), reported by %s", n.ID, d.NodeID())
  657. wCount++
  658. }
  659. }
  660. assert.Equal(c, leaderFound, true, "lack of leader reported by node %s", info.NodeID)
  661. assert.Equal(c, mCount, managerCount, "managers count reported by node %s", info.NodeID)
  662. assert.Equal(c, wCount, workerCount, "workers count reported by node %s", info.NodeID)
  663. }
  664. assert.Equal(c, totalMCount, managerCount)
  665. assert.Equal(c, totalWCount, workerCount)
  666. }
  667. func (s *DockerSwarmSuite) TestAPISwarmRestartCluster(c *testing.T) {
  668. mCount, wCount := 5, 1
  669. var nodes []*daemon.Daemon
  670. for i := 0; i < mCount; i++ {
  671. manager := s.AddDaemon(c, true, true)
  672. info := manager.SwarmInfo(c)
  673. assert.Equal(c, info.ControlAvailable, true)
  674. assert.Equal(c, info.LocalNodeState, swarm.LocalNodeStateActive)
  675. nodes = append(nodes, manager)
  676. }
  677. for i := 0; i < wCount; i++ {
  678. worker := s.AddDaemon(c, true, false)
  679. info := worker.SwarmInfo(c)
  680. assert.Equal(c, info.ControlAvailable, false)
  681. assert.Equal(c, info.LocalNodeState, swarm.LocalNodeStateActive)
  682. nodes = append(nodes, worker)
  683. }
  684. // stop whole cluster
  685. {
  686. var wg sync.WaitGroup
  687. wg.Add(len(nodes))
  688. errs := make(chan error, len(nodes))
  689. for _, d := range nodes {
  690. go func(daemon *daemon.Daemon) {
  691. defer wg.Done()
  692. if err := daemon.StopWithError(); err != nil {
  693. errs <- err
  694. }
  695. }(d)
  696. }
  697. wg.Wait()
  698. close(errs)
  699. for err := range errs {
  700. assert.NilError(c, err)
  701. }
  702. }
  703. // start whole cluster
  704. {
  705. var wg sync.WaitGroup
  706. wg.Add(len(nodes))
  707. errs := make(chan error, len(nodes))
  708. for _, d := range nodes {
  709. go func(daemon *daemon.Daemon) {
  710. defer wg.Done()
  711. if err := daemon.StartWithError("--iptables=false"); err != nil {
  712. errs <- err
  713. }
  714. }(d)
  715. }
  716. wg.Wait()
  717. close(errs)
  718. for err := range errs {
  719. assert.NilError(c, err)
  720. }
  721. }
  722. checkClusterHealth(c, nodes, mCount, wCount)
  723. }
  724. func (s *DockerSwarmSuite) TestAPISwarmServicesUpdateWithName(c *testing.T) {
  725. d := s.AddDaemon(c, true, true)
  726. instances := 2
  727. id := d.CreateService(c, simpleTestService, setInstances(instances))
  728. waitAndAssert(c, defaultReconciliationTimeout, d.CheckActiveContainerCount, checker.Equals, instances)
  729. service := d.GetService(c, id)
  730. instances = 5
  731. setInstances(instances)(service)
  732. cli := d.NewClientT(c)
  733. defer cli.Close()
  734. _, err := cli.ServiceUpdate(context.Background(), service.Spec.Name, service.Version, service.Spec, types.ServiceUpdateOptions{})
  735. assert.NilError(c, err)
  736. waitAndAssert(c, defaultReconciliationTimeout, d.CheckActiveContainerCount, checker.Equals, instances)
  737. }
  738. // Unlocking an unlocked swarm results in an error
  739. func (s *DockerSwarmSuite) TestAPISwarmUnlockNotLocked(c *testing.T) {
  740. d := s.AddDaemon(c, true, true)
  741. err := d.SwarmUnlock(c, swarm.UnlockRequest{UnlockKey: "wrong-key"})
  742. assert.ErrorContains(c, err, "swarm is not locked")
  743. }
  744. // #29885
  745. func (s *DockerSwarmSuite) TestAPISwarmErrorHandling(c *testing.T) {
  746. ln, err := net.Listen("tcp", fmt.Sprintf(":%d", defaultSwarmPort))
  747. assert.NilError(c, err)
  748. defer ln.Close()
  749. d := s.AddDaemon(c, false, false)
  750. client := d.NewClientT(c)
  751. _, err = client.SwarmInit(context.Background(), swarm.InitRequest{
  752. ListenAddr: d.SwarmListenAddr(),
  753. })
  754. assert.ErrorContains(c, err, "address already in use")
  755. }
  756. // Test case for 30242, where duplicate networks, with different drivers `bridge` and `overlay`,
  757. // caused both scopes to be `swarm` for `docker network inspect` and `docker network ls`.
  758. // This test makes sure the fixes correctly output scopes instead.
  759. func (s *DockerSwarmSuite) TestAPIDuplicateNetworks(c *testing.T) {
  760. d := s.AddDaemon(c, true, true)
  761. cli := d.NewClientT(c)
  762. defer cli.Close()
  763. name := "foo"
  764. networkCreate := types.NetworkCreate{
  765. CheckDuplicate: false,
  766. }
  767. networkCreate.Driver = "bridge"
  768. n1, err := cli.NetworkCreate(context.Background(), name, networkCreate)
  769. assert.NilError(c, err)
  770. networkCreate.Driver = "overlay"
  771. n2, err := cli.NetworkCreate(context.Background(), name, networkCreate)
  772. assert.NilError(c, err)
  773. r1, err := cli.NetworkInspect(context.Background(), n1.ID, types.NetworkInspectOptions{})
  774. assert.NilError(c, err)
  775. assert.Equal(c, r1.Scope, "local")
  776. r2, err := cli.NetworkInspect(context.Background(), n2.ID, types.NetworkInspectOptions{})
  777. assert.NilError(c, err)
  778. assert.Equal(c, r2.Scope, "swarm")
  779. }
  780. // Test case for 30178
  781. func (s *DockerSwarmSuite) TestAPISwarmHealthcheckNone(c *testing.T) {
  782. // Issue #36386 can be a independent one, which is worth further investigation.
  783. c.Skip("Root cause of Issue #36386 is needed")
  784. d := s.AddDaemon(c, true, true)
  785. out, err := d.Cmd("network", "create", "-d", "overlay", "lb")
  786. assert.NilError(c, err, out)
  787. instances := 1
  788. d.CreateService(c, simpleTestService, setInstances(instances), func(s *swarm.Service) {
  789. if s.Spec.TaskTemplate.ContainerSpec == nil {
  790. s.Spec.TaskTemplate.ContainerSpec = &swarm.ContainerSpec{}
  791. }
  792. s.Spec.TaskTemplate.ContainerSpec.Healthcheck = &container.HealthConfig{}
  793. s.Spec.TaskTemplate.Networks = []swarm.NetworkAttachmentConfig{
  794. {Target: "lb"},
  795. }
  796. })
  797. waitAndAssert(c, defaultReconciliationTimeout, d.CheckActiveContainerCount, checker.Equals, instances)
  798. containers := d.ActiveContainers(c)
  799. out, err = d.Cmd("exec", containers[0], "ping", "-c1", "-W3", "top")
  800. assert.NilError(c, err, out)
  801. }
  802. func (s *DockerSwarmSuite) TestSwarmRepeatedRootRotation(c *testing.T) {
  803. m := s.AddDaemon(c, true, true)
  804. w := s.AddDaemon(c, true, false)
  805. info := m.SwarmInfo(c)
  806. currentTrustRoot := info.Cluster.TLSInfo.TrustRoot
  807. // rotate multiple times
  808. for i := 0; i < 4; i++ {
  809. var err error
  810. var cert, key []byte
  811. if i%2 != 0 {
  812. cert, _, key, err = initca.New(&csr.CertificateRequest{
  813. CN: "newRoot",
  814. KeyRequest: csr.NewBasicKeyRequest(),
  815. CA: &csr.CAConfig{Expiry: ca.RootCAExpiration},
  816. })
  817. assert.NilError(c, err)
  818. }
  819. expectedCert := string(cert)
  820. m.UpdateSwarm(c, func(s *swarm.Spec) {
  821. s.CAConfig.SigningCACert = expectedCert
  822. s.CAConfig.SigningCAKey = string(key)
  823. s.CAConfig.ForceRotate++
  824. })
  825. // poll to make sure update succeeds
  826. var clusterTLSInfo swarm.TLSInfo
  827. for j := 0; j < 18; j++ {
  828. info := m.SwarmInfo(c)
  829. // the desired CA cert and key is always redacted
  830. assert.Equal(c, info.Cluster.Spec.CAConfig.SigningCAKey, "")
  831. assert.Equal(c, info.Cluster.Spec.CAConfig.SigningCACert, "")
  832. clusterTLSInfo = info.Cluster.TLSInfo
  833. // if root rotation is done and the trust root has changed, we don't have to poll anymore
  834. if !info.Cluster.RootRotationInProgress && clusterTLSInfo.TrustRoot != currentTrustRoot {
  835. break
  836. }
  837. // root rotation not done
  838. time.Sleep(250 * time.Millisecond)
  839. }
  840. if cert != nil {
  841. assert.Equal(c, clusterTLSInfo.TrustRoot, expectedCert)
  842. }
  843. // could take another second or two for the nodes to trust the new roots after they've all gotten
  844. // new TLS certificates
  845. for j := 0; j < 18; j++ {
  846. mInfo := m.GetNode(c, m.NodeID()).Description.TLSInfo
  847. wInfo := m.GetNode(c, w.NodeID()).Description.TLSInfo
  848. if mInfo.TrustRoot == clusterTLSInfo.TrustRoot && wInfo.TrustRoot == clusterTLSInfo.TrustRoot {
  849. break
  850. }
  851. // nodes don't trust root certs yet
  852. time.Sleep(250 * time.Millisecond)
  853. }
  854. assert.DeepEqual(c, m.GetNode(c, m.NodeID()).Description.TLSInfo, clusterTLSInfo)
  855. assert.DeepEqual(c, m.GetNode(c, w.NodeID()).Description.TLSInfo, clusterTLSInfo)
  856. currentTrustRoot = clusterTLSInfo.TrustRoot
  857. }
  858. }
  859. func (s *DockerSwarmSuite) TestAPINetworkInspectWithScope(c *testing.T) {
  860. d := s.AddDaemon(c, true, true)
  861. name := "test-scoped-network"
  862. ctx := context.Background()
  863. apiclient := d.NewClientT(c)
  864. resp, err := apiclient.NetworkCreate(ctx, name, types.NetworkCreate{Driver: "overlay"})
  865. assert.NilError(c, err)
  866. network, err := apiclient.NetworkInspect(ctx, name, types.NetworkInspectOptions{})
  867. assert.NilError(c, err)
  868. assert.Check(c, is.Equal("swarm", network.Scope))
  869. assert.Check(c, is.Equal(resp.ID, network.ID))
  870. _, err = apiclient.NetworkInspect(ctx, name, types.NetworkInspectOptions{Scope: "local"})
  871. assert.Check(c, client.IsErrNotFound(err))
  872. }