docker_api_swarm_test.go 33 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046
  1. //go:build !windows
  2. // +build !windows
  3. package main
  4. import (
  5. "context"
  6. "fmt"
  7. "net"
  8. "net/http"
  9. "os"
  10. "path/filepath"
  11. "runtime"
  12. "strings"
  13. "sync"
  14. "testing"
  15. "time"
  16. "github.com/cloudflare/cfssl/csr"
  17. "github.com/cloudflare/cfssl/helpers"
  18. "github.com/cloudflare/cfssl/initca"
  19. "github.com/docker/docker/api/types"
  20. "github.com/docker/docker/api/types/container"
  21. "github.com/docker/docker/api/types/swarm"
  22. "github.com/docker/docker/client"
  23. "github.com/docker/docker/integration-cli/checker"
  24. "github.com/docker/docker/integration-cli/daemon"
  25. testdaemon "github.com/docker/docker/testutil/daemon"
  26. "github.com/docker/docker/testutil/request"
  27. "github.com/moby/swarmkit/v2/ca"
  28. "gotest.tools/v3/assert"
  29. is "gotest.tools/v3/assert/cmp"
  30. "gotest.tools/v3/poll"
  31. )
  32. var defaultReconciliationTimeout = 30 * time.Second
  33. func (s *DockerSwarmSuite) TestAPISwarmInit(c *testing.T) {
  34. // todo: should find a better way to verify that components are running than /info
  35. d1 := s.AddDaemon(c, true, true)
  36. info := d1.SwarmInfo(c)
  37. assert.Equal(c, info.ControlAvailable, true)
  38. assert.Equal(c, info.LocalNodeState, swarm.LocalNodeStateActive)
  39. assert.Equal(c, info.Cluster.RootRotationInProgress, false)
  40. d2 := s.AddDaemon(c, true, false)
  41. info = d2.SwarmInfo(c)
  42. assert.Equal(c, info.ControlAvailable, false)
  43. assert.Equal(c, info.LocalNodeState, swarm.LocalNodeStateActive)
  44. // Leaving cluster
  45. assert.NilError(c, d2.SwarmLeave(c, false))
  46. info = d2.SwarmInfo(c)
  47. assert.Equal(c, info.ControlAvailable, false)
  48. assert.Equal(c, info.LocalNodeState, swarm.LocalNodeStateInactive)
  49. d2.SwarmJoin(c, swarm.JoinRequest{
  50. ListenAddr: d1.SwarmListenAddr(),
  51. JoinToken: d1.JoinTokens(c).Worker,
  52. RemoteAddrs: []string{d1.SwarmListenAddr()},
  53. })
  54. info = d2.SwarmInfo(c)
  55. assert.Equal(c, info.ControlAvailable, false)
  56. assert.Equal(c, info.LocalNodeState, swarm.LocalNodeStateActive)
  57. // Current state restoring after restarts
  58. d1.Stop(c)
  59. d2.Stop(c)
  60. d1.StartNode(c)
  61. d2.StartNode(c)
  62. info = d1.SwarmInfo(c)
  63. assert.Equal(c, info.ControlAvailable, true)
  64. assert.Equal(c, info.LocalNodeState, swarm.LocalNodeStateActive)
  65. info = d2.SwarmInfo(c)
  66. assert.Equal(c, info.ControlAvailable, false)
  67. assert.Equal(c, info.LocalNodeState, swarm.LocalNodeStateActive)
  68. }
  69. func (s *DockerSwarmSuite) TestAPISwarmJoinToken(c *testing.T) {
  70. d1 := s.AddDaemon(c, false, false)
  71. d1.SwarmInit(c, swarm.InitRequest{})
  72. // todo: error message differs depending if some components of token are valid
  73. d2 := s.AddDaemon(c, false, false)
  74. c2 := d2.NewClientT(c)
  75. err := c2.SwarmJoin(context.Background(), swarm.JoinRequest{
  76. ListenAddr: d2.SwarmListenAddr(),
  77. RemoteAddrs: []string{d1.SwarmListenAddr()},
  78. })
  79. assert.ErrorContains(c, err, "join token is necessary")
  80. info := d2.SwarmInfo(c)
  81. assert.Equal(c, info.LocalNodeState, swarm.LocalNodeStateInactive)
  82. err = c2.SwarmJoin(context.Background(), swarm.JoinRequest{
  83. ListenAddr: d2.SwarmListenAddr(),
  84. JoinToken: "foobaz",
  85. RemoteAddrs: []string{d1.SwarmListenAddr()},
  86. })
  87. assert.ErrorContains(c, err, "invalid join token")
  88. info = d2.SwarmInfo(c)
  89. assert.Equal(c, info.LocalNodeState, swarm.LocalNodeStateInactive)
  90. workerToken := d1.JoinTokens(c).Worker
  91. d2.SwarmJoin(c, swarm.JoinRequest{
  92. ListenAddr: d2.SwarmListenAddr(),
  93. JoinToken: workerToken,
  94. RemoteAddrs: []string{d1.SwarmListenAddr()},
  95. })
  96. info = d2.SwarmInfo(c)
  97. assert.Equal(c, info.LocalNodeState, swarm.LocalNodeStateActive)
  98. assert.NilError(c, d2.SwarmLeave(c, false))
  99. info = d2.SwarmInfo(c)
  100. assert.Equal(c, info.LocalNodeState, swarm.LocalNodeStateInactive)
  101. // change tokens
  102. d1.RotateTokens(c)
  103. err = c2.SwarmJoin(context.Background(), swarm.JoinRequest{
  104. ListenAddr: d2.SwarmListenAddr(),
  105. JoinToken: workerToken,
  106. RemoteAddrs: []string{d1.SwarmListenAddr()},
  107. })
  108. assert.ErrorContains(c, err, "join token is necessary")
  109. info = d2.SwarmInfo(c)
  110. assert.Equal(c, info.LocalNodeState, swarm.LocalNodeStateInactive)
  111. workerToken = d1.JoinTokens(c).Worker
  112. d2.SwarmJoin(c, swarm.JoinRequest{JoinToken: workerToken, RemoteAddrs: []string{d1.SwarmListenAddr()}})
  113. info = d2.SwarmInfo(c)
  114. assert.Equal(c, info.LocalNodeState, swarm.LocalNodeStateActive)
  115. assert.NilError(c, d2.SwarmLeave(c, false))
  116. info = d2.SwarmInfo(c)
  117. assert.Equal(c, info.LocalNodeState, swarm.LocalNodeStateInactive)
  118. // change spec, don't change tokens
  119. d1.UpdateSwarm(c, func(s *swarm.Spec) {})
  120. err = c2.SwarmJoin(context.Background(), swarm.JoinRequest{
  121. ListenAddr: d2.SwarmListenAddr(),
  122. RemoteAddrs: []string{d1.SwarmListenAddr()},
  123. })
  124. assert.ErrorContains(c, err, "join token is necessary")
  125. info = d2.SwarmInfo(c)
  126. assert.Equal(c, info.LocalNodeState, swarm.LocalNodeStateInactive)
  127. d2.SwarmJoin(c, swarm.JoinRequest{JoinToken: workerToken, RemoteAddrs: []string{d1.SwarmListenAddr()}})
  128. info = d2.SwarmInfo(c)
  129. assert.Equal(c, info.LocalNodeState, swarm.LocalNodeStateActive)
  130. assert.NilError(c, d2.SwarmLeave(c, false))
  131. info = d2.SwarmInfo(c)
  132. assert.Equal(c, info.LocalNodeState, swarm.LocalNodeStateInactive)
  133. }
  134. func (s *DockerSwarmSuite) TestUpdateSwarmAddExternalCA(c *testing.T) {
  135. d1 := s.AddDaemon(c, false, false)
  136. d1.SwarmInit(c, swarm.InitRequest{})
  137. d1.UpdateSwarm(c, func(s *swarm.Spec) {
  138. s.CAConfig.ExternalCAs = []*swarm.ExternalCA{
  139. {
  140. Protocol: swarm.ExternalCAProtocolCFSSL,
  141. URL: "https://thishasnoca.org",
  142. },
  143. {
  144. Protocol: swarm.ExternalCAProtocolCFSSL,
  145. URL: "https://thishasacacert.org",
  146. CACert: "cacert",
  147. },
  148. }
  149. })
  150. info := d1.SwarmInfo(c)
  151. assert.Equal(c, len(info.Cluster.Spec.CAConfig.ExternalCAs), 2)
  152. assert.Equal(c, info.Cluster.Spec.CAConfig.ExternalCAs[0].CACert, "")
  153. assert.Equal(c, info.Cluster.Spec.CAConfig.ExternalCAs[1].CACert, "cacert")
  154. }
  155. func (s *DockerSwarmSuite) TestAPISwarmCAHash(c *testing.T) {
  156. d1 := s.AddDaemon(c, true, true)
  157. d2 := s.AddDaemon(c, false, false)
  158. splitToken := strings.Split(d1.JoinTokens(c).Worker, "-")
  159. splitToken[2] = "1kxftv4ofnc6mt30lmgipg6ngf9luhwqopfk1tz6bdmnkubg0e"
  160. replacementToken := strings.Join(splitToken, "-")
  161. c2 := d2.NewClientT(c)
  162. err := c2.SwarmJoin(context.Background(), swarm.JoinRequest{
  163. ListenAddr: d2.SwarmListenAddr(),
  164. JoinToken: replacementToken,
  165. RemoteAddrs: []string{d1.SwarmListenAddr()},
  166. })
  167. assert.ErrorContains(c, err, "remote CA does not match fingerprint")
  168. }
  169. func (s *DockerSwarmSuite) TestAPISwarmPromoteDemote(c *testing.T) {
  170. d1 := s.AddDaemon(c, false, false)
  171. d1.SwarmInit(c, swarm.InitRequest{})
  172. d2 := s.AddDaemon(c, true, false)
  173. info := d2.SwarmInfo(c)
  174. assert.Equal(c, info.ControlAvailable, false)
  175. assert.Equal(c, info.LocalNodeState, swarm.LocalNodeStateActive)
  176. d1.UpdateNode(c, d2.NodeID(), func(n *swarm.Node) {
  177. n.Spec.Role = swarm.NodeRoleManager
  178. })
  179. poll.WaitOn(c, pollCheck(c, d2.CheckControlAvailable, checker.True()), poll.WithTimeout(defaultReconciliationTimeout))
  180. d1.UpdateNode(c, d2.NodeID(), func(n *swarm.Node) {
  181. n.Spec.Role = swarm.NodeRoleWorker
  182. })
  183. poll.WaitOn(c, pollCheck(c, d2.CheckControlAvailable, checker.False()), poll.WithTimeout(defaultReconciliationTimeout))
  184. // Wait for the role to change to worker in the cert. This is partially
  185. // done because it's something worth testing in its own right, and
  186. // partially because changing the role from manager to worker and then
  187. // back to manager quickly might cause the node to pause for awhile
  188. // while waiting for the role to change to worker, and the test can
  189. // time out during this interval.
  190. poll.WaitOn(c, pollCheck(c, func(c *testing.T) (interface{}, string) {
  191. certBytes, err := os.ReadFile(filepath.Join(d2.Folder, "root", "swarm", "certificates", "swarm-node.crt"))
  192. if err != nil {
  193. return "", fmt.Sprintf("error: %v", err)
  194. }
  195. certs, err := helpers.ParseCertificatesPEM(certBytes)
  196. if err == nil && len(certs) > 0 && len(certs[0].Subject.OrganizationalUnit) > 0 {
  197. return certs[0].Subject.OrganizationalUnit[0], ""
  198. }
  199. return "", "could not get organizational unit from certificate"
  200. }, checker.Equals("swarm-worker")), poll.WithTimeout(defaultReconciliationTimeout))
  201. // Demoting last node should fail
  202. node := d1.GetNode(c, d1.NodeID())
  203. node.Spec.Role = swarm.NodeRoleWorker
  204. url := fmt.Sprintf("/nodes/%s/update?version=%d", node.ID, node.Version.Index)
  205. res, body, err := request.Post(url, request.Host(d1.Sock()), request.JSONBody(node.Spec))
  206. assert.NilError(c, err)
  207. b, err := request.ReadBody(body)
  208. assert.NilError(c, err)
  209. assert.Equal(c, res.StatusCode, http.StatusBadRequest, "output: %q", string(b))
  210. // The warning specific to demoting the last manager is best-effort and
  211. // won't appear until the Role field of the demoted manager has been
  212. // updated.
  213. // Yes, I know this looks silly, but checker.Matches is broken, since
  214. // it anchors the regexp contrary to the documentation, and this makes
  215. // it impossible to match something that includes a line break.
  216. if !strings.Contains(string(b), "last manager of the swarm") {
  217. assert.Assert(c, strings.Contains(string(b), "this would result in a loss of quorum"))
  218. }
  219. info = d1.SwarmInfo(c)
  220. assert.Equal(c, info.LocalNodeState, swarm.LocalNodeStateActive)
  221. assert.Equal(c, info.ControlAvailable, true)
  222. // Promote already demoted node
  223. d1.UpdateNode(c, d2.NodeID(), func(n *swarm.Node) {
  224. n.Spec.Role = swarm.NodeRoleManager
  225. })
  226. poll.WaitOn(c, pollCheck(c, d2.CheckControlAvailable, checker.True()), poll.WithTimeout(defaultReconciliationTimeout))
  227. }
  228. func (s *DockerSwarmSuite) TestAPISwarmLeaderProxy(c *testing.T) {
  229. // add three managers, one of these is leader
  230. d1 := s.AddDaemon(c, true, true)
  231. d2 := s.AddDaemon(c, true, true)
  232. d3 := s.AddDaemon(c, true, true)
  233. // start a service by hitting each of the 3 managers
  234. d1.CreateService(c, simpleTestService, func(s *swarm.Service) {
  235. s.Spec.Name = "test1"
  236. })
  237. d2.CreateService(c, simpleTestService, func(s *swarm.Service) {
  238. s.Spec.Name = "test2"
  239. })
  240. d3.CreateService(c, simpleTestService, func(s *swarm.Service) {
  241. s.Spec.Name = "test3"
  242. })
  243. // 3 services should be started now, because the requests were proxied to leader
  244. // query each node and make sure it returns 3 services
  245. for _, d := range []*daemon.Daemon{d1, d2, d3} {
  246. services := d.ListServices(c)
  247. assert.Equal(c, len(services), 3)
  248. }
  249. }
  250. func (s *DockerSwarmSuite) TestAPISwarmLeaderElection(c *testing.T) {
  251. if runtime.GOARCH == "s390x" {
  252. c.Skip("Disabled on s390x")
  253. }
  254. if runtime.GOARCH == "ppc64le" {
  255. c.Skip("Disabled on ppc64le")
  256. }
  257. // Create 3 nodes
  258. d1 := s.AddDaemon(c, true, true)
  259. d2 := s.AddDaemon(c, true, true)
  260. d3 := s.AddDaemon(c, true, true)
  261. // assert that the first node we made is the leader, and the other two are followers
  262. assert.Equal(c, d1.GetNode(c, d1.NodeID()).ManagerStatus.Leader, true)
  263. assert.Equal(c, d1.GetNode(c, d2.NodeID()).ManagerStatus.Leader, false)
  264. assert.Equal(c, d1.GetNode(c, d3.NodeID()).ManagerStatus.Leader, false)
  265. d1.Stop(c)
  266. var (
  267. leader *daemon.Daemon // keep track of leader
  268. followers []*daemon.Daemon // keep track of followers
  269. )
  270. var lastErr error
  271. checkLeader := func(nodes ...*daemon.Daemon) checkF {
  272. return func(c *testing.T) (interface{}, string) {
  273. // clear these out before each run
  274. leader = nil
  275. followers = nil
  276. for _, d := range nodes {
  277. n := d.GetNode(c, d.NodeID(), func(err error) bool {
  278. if strings.Contains(err.Error(), context.DeadlineExceeded.Error()) || strings.Contains(err.Error(), "swarm does not have a leader") {
  279. lastErr = err
  280. return true
  281. }
  282. return false
  283. })
  284. if n == nil {
  285. return false, fmt.Sprintf("failed to get node: %v", lastErr)
  286. }
  287. if n.ManagerStatus.Leader {
  288. leader = d
  289. } else {
  290. followers = append(followers, d)
  291. }
  292. }
  293. if leader == nil {
  294. return false, "no leader elected"
  295. }
  296. return true, fmt.Sprintf("elected %v", leader.ID())
  297. }
  298. }
  299. // wait for an election to occur
  300. c.Logf("Waiting for election to occur...")
  301. poll.WaitOn(c, pollCheck(c, checkLeader(d2, d3), checker.True()), poll.WithTimeout(defaultReconciliationTimeout))
  302. // assert that we have a new leader
  303. assert.Assert(c, leader != nil)
  304. // Keep track of the current leader, since we want that to be chosen.
  305. stableleader := leader
  306. // add the d1, the initial leader, back
  307. d1.StartNode(c)
  308. // wait for possible election
  309. c.Logf("Waiting for possible election...")
  310. poll.WaitOn(c, pollCheck(c, checkLeader(d1, d2, d3), checker.True()), poll.WithTimeout(defaultReconciliationTimeout))
  311. // pick out the leader and the followers again
  312. // verify that we still only have 1 leader and 2 followers
  313. assert.Assert(c, leader != nil)
  314. assert.Equal(c, len(followers), 2)
  315. // and that after we added d1 back, the leader hasn't changed
  316. assert.Equal(c, leader.NodeID(), stableleader.NodeID())
  317. }
  318. func (s *DockerSwarmSuite) TestAPISwarmRaftQuorum(c *testing.T) {
  319. if runtime.GOARCH == "s390x" {
  320. c.Skip("Disabled on s390x")
  321. }
  322. if runtime.GOARCH == "ppc64le" {
  323. c.Skip("Disabled on ppc64le")
  324. }
  325. d1 := s.AddDaemon(c, true, true)
  326. d2 := s.AddDaemon(c, true, true)
  327. d3 := s.AddDaemon(c, true, true)
  328. d1.CreateService(c, simpleTestService)
  329. d2.Stop(c)
  330. // make sure there is a leader
  331. poll.WaitOn(c, pollCheck(c, d1.CheckLeader, checker.IsNil()), poll.WithTimeout(defaultReconciliationTimeout))
  332. d1.CreateService(c, simpleTestService, func(s *swarm.Service) {
  333. s.Spec.Name = "top1"
  334. })
  335. d3.Stop(c)
  336. var service swarm.Service
  337. simpleTestService(&service)
  338. service.Spec.Name = "top2"
  339. cli := d1.NewClientT(c)
  340. defer cli.Close()
  341. // d1 will eventually step down from leader because there is no longer an active quorum, wait for that to happen
  342. poll.WaitOn(c, pollCheck(c, func(c *testing.T) (interface{}, string) {
  343. _, err := cli.ServiceCreate(context.Background(), service.Spec, types.ServiceCreateOptions{})
  344. return err.Error(), ""
  345. }, checker.Contains("Make sure more than half of the managers are online.")), poll.WithTimeout(defaultReconciliationTimeout*2))
  346. d2.StartNode(c)
  347. // make sure there is a leader
  348. poll.WaitOn(c, pollCheck(c, d1.CheckLeader, checker.IsNil()), poll.WithTimeout(defaultReconciliationTimeout))
  349. d1.CreateService(c, simpleTestService, func(s *swarm.Service) {
  350. s.Spec.Name = "top3"
  351. })
  352. }
  353. func (s *DockerSwarmSuite) TestAPISwarmLeaveRemovesContainer(c *testing.T) {
  354. d := s.AddDaemon(c, true, true)
  355. instances := 2
  356. d.CreateService(c, simpleTestService, setInstances(instances))
  357. id, err := d.Cmd("run", "-d", "busybox", "top")
  358. assert.NilError(c, err, id)
  359. id = strings.TrimSpace(id)
  360. poll.WaitOn(c, pollCheck(c, d.CheckActiveContainerCount, checker.Equals(instances+1)), poll.WithTimeout(defaultReconciliationTimeout))
  361. assert.ErrorContains(c, d.SwarmLeave(c, false), "")
  362. assert.NilError(c, d.SwarmLeave(c, true))
  363. poll.WaitOn(c, pollCheck(c, d.CheckActiveContainerCount, checker.Equals(1)), poll.WithTimeout(defaultReconciliationTimeout))
  364. id2, err := d.Cmd("ps", "-q")
  365. assert.NilError(c, err, id2)
  366. assert.Assert(c, strings.HasPrefix(id, strings.TrimSpace(id2)))
  367. }
  368. // #23629
  369. func (s *DockerSwarmSuite) TestAPISwarmLeaveOnPendingJoin(c *testing.T) {
  370. testRequires(c, Network)
  371. s.AddDaemon(c, true, true)
  372. d2 := s.AddDaemon(c, false, false)
  373. id, err := d2.Cmd("run", "-d", "busybox", "top")
  374. assert.NilError(c, err, id)
  375. id = strings.TrimSpace(id)
  376. c2 := d2.NewClientT(c)
  377. err = c2.SwarmJoin(context.Background(), swarm.JoinRequest{
  378. ListenAddr: d2.SwarmListenAddr(),
  379. RemoteAddrs: []string{"123.123.123.123:1234"},
  380. })
  381. assert.ErrorContains(c, err, "Timeout was reached")
  382. info := d2.SwarmInfo(c)
  383. assert.Equal(c, info.LocalNodeState, swarm.LocalNodeStatePending)
  384. assert.NilError(c, d2.SwarmLeave(c, true))
  385. poll.WaitOn(c, pollCheck(c, d2.CheckActiveContainerCount, checker.Equals(1)), poll.WithTimeout(defaultReconciliationTimeout))
  386. id2, err := d2.Cmd("ps", "-q")
  387. assert.NilError(c, err, id2)
  388. assert.Assert(c, strings.HasPrefix(id, strings.TrimSpace(id2)))
  389. }
  390. // #23705
  391. func (s *DockerSwarmSuite) TestAPISwarmRestoreOnPendingJoin(c *testing.T) {
  392. testRequires(c, Network)
  393. d := s.AddDaemon(c, false, false)
  394. client := d.NewClientT(c)
  395. err := client.SwarmJoin(context.Background(), swarm.JoinRequest{
  396. ListenAddr: d.SwarmListenAddr(),
  397. RemoteAddrs: []string{"123.123.123.123:1234"},
  398. })
  399. assert.ErrorContains(c, err, "Timeout was reached")
  400. poll.WaitOn(c, pollCheck(c, d.CheckLocalNodeState, checker.Equals(swarm.LocalNodeStatePending)), poll.WithTimeout(defaultReconciliationTimeout))
  401. d.RestartNode(c)
  402. info := d.SwarmInfo(c)
  403. assert.Equal(c, info.LocalNodeState, swarm.LocalNodeStateInactive)
  404. }
  405. func (s *DockerSwarmSuite) TestAPISwarmManagerRestore(c *testing.T) {
  406. d1 := s.AddDaemon(c, true, true)
  407. instances := 2
  408. id := d1.CreateService(c, simpleTestService, setInstances(instances))
  409. d1.GetService(c, id)
  410. d1.RestartNode(c)
  411. d1.GetService(c, id)
  412. d2 := s.AddDaemon(c, true, true)
  413. d2.GetService(c, id)
  414. d2.RestartNode(c)
  415. d2.GetService(c, id)
  416. d3 := s.AddDaemon(c, true, true)
  417. d3.GetService(c, id)
  418. d3.RestartNode(c)
  419. d3.GetService(c, id)
  420. err := d3.Kill()
  421. assert.NilError(c, err)
  422. time.Sleep(1 * time.Second) // time to handle signal
  423. d3.StartNode(c)
  424. d3.GetService(c, id)
  425. }
  426. func (s *DockerSwarmSuite) TestAPISwarmScaleNoRollingUpdate(c *testing.T) {
  427. d := s.AddDaemon(c, true, true)
  428. instances := 2
  429. id := d.CreateService(c, simpleTestService, setInstances(instances))
  430. poll.WaitOn(c, pollCheck(c, d.CheckActiveContainerCount, checker.Equals(instances)), poll.WithTimeout(defaultReconciliationTimeout))
  431. containers := d.ActiveContainers(c)
  432. instances = 4
  433. d.UpdateService(c, d.GetService(c, id), setInstances(instances))
  434. poll.WaitOn(c, pollCheck(c, d.CheckActiveContainerCount, checker.Equals(instances)), poll.WithTimeout(defaultReconciliationTimeout))
  435. containers2 := d.ActiveContainers(c)
  436. loop0:
  437. for _, c1 := range containers {
  438. for _, c2 := range containers2 {
  439. if c1 == c2 {
  440. continue loop0
  441. }
  442. }
  443. c.Errorf("container %v not found in new set %#v", c1, containers2)
  444. }
  445. }
  446. func (s *DockerSwarmSuite) TestAPISwarmInvalidAddress(c *testing.T) {
  447. d := s.AddDaemon(c, false, false)
  448. req := swarm.InitRequest{
  449. ListenAddr: "",
  450. }
  451. res, _, err := request.Post("/swarm/init", request.Host(d.Sock()), request.JSONBody(req))
  452. assert.NilError(c, err)
  453. assert.Equal(c, res.StatusCode, http.StatusBadRequest)
  454. req2 := swarm.JoinRequest{
  455. ListenAddr: "0.0.0.0:2377",
  456. RemoteAddrs: []string{""},
  457. }
  458. res, _, err = request.Post("/swarm/join", request.Host(d.Sock()), request.JSONBody(req2))
  459. assert.NilError(c, err)
  460. assert.Equal(c, res.StatusCode, http.StatusBadRequest)
  461. }
  462. func (s *DockerSwarmSuite) TestAPISwarmForceNewCluster(c *testing.T) {
  463. d1 := s.AddDaemon(c, true, true)
  464. d2 := s.AddDaemon(c, true, true)
  465. instances := 2
  466. id := d1.CreateService(c, simpleTestService, setInstances(instances))
  467. poll.WaitOn(c, pollCheck(c, reducedCheck(sumAsIntegers, d1.CheckActiveContainerCount, d2.CheckActiveContainerCount), checker.Equals(instances)), poll.WithTimeout(defaultReconciliationTimeout))
  468. // drain d2, all containers should move to d1
  469. d1.UpdateNode(c, d2.NodeID(), func(n *swarm.Node) {
  470. n.Spec.Availability = swarm.NodeAvailabilityDrain
  471. })
  472. poll.WaitOn(c, pollCheck(c, d1.CheckActiveContainerCount, checker.Equals(instances)), poll.WithTimeout(defaultReconciliationTimeout))
  473. poll.WaitOn(c, pollCheck(c, d2.CheckActiveContainerCount, checker.Equals(0)), poll.WithTimeout(defaultReconciliationTimeout))
  474. d2.Stop(c)
  475. d1.SwarmInit(c, swarm.InitRequest{
  476. ForceNewCluster: true,
  477. Spec: swarm.Spec{},
  478. })
  479. poll.WaitOn(c, pollCheck(c, d1.CheckActiveContainerCount, checker.Equals(instances)), poll.WithTimeout(defaultReconciliationTimeout))
  480. d3 := s.AddDaemon(c, true, true)
  481. info := d3.SwarmInfo(c)
  482. assert.Equal(c, info.ControlAvailable, true)
  483. assert.Equal(c, info.LocalNodeState, swarm.LocalNodeStateActive)
  484. instances = 4
  485. d3.UpdateService(c, d3.GetService(c, id), setInstances(instances))
  486. poll.WaitOn(c, pollCheck(c, reducedCheck(sumAsIntegers, d1.CheckActiveContainerCount, d3.CheckActiveContainerCount), checker.Equals(instances)), poll.WithTimeout(defaultReconciliationTimeout))
  487. }
  488. func simpleTestService(s *swarm.Service) {
  489. ureplicas := uint64(1)
  490. restartDelay := 100 * time.Millisecond
  491. s.Spec = swarm.ServiceSpec{
  492. TaskTemplate: swarm.TaskSpec{
  493. ContainerSpec: &swarm.ContainerSpec{
  494. Image: "busybox:latest",
  495. Command: []string{"/bin/top"},
  496. },
  497. RestartPolicy: &swarm.RestartPolicy{
  498. Delay: &restartDelay,
  499. },
  500. },
  501. Mode: swarm.ServiceMode{
  502. Replicated: &swarm.ReplicatedService{
  503. Replicas: &ureplicas,
  504. },
  505. },
  506. }
  507. s.Spec.Name = "top"
  508. }
  509. func serviceForUpdate(s *swarm.Service) {
  510. ureplicas := uint64(1)
  511. restartDelay := 100 * time.Millisecond
  512. s.Spec = swarm.ServiceSpec{
  513. TaskTemplate: swarm.TaskSpec{
  514. ContainerSpec: &swarm.ContainerSpec{
  515. Image: "busybox:latest",
  516. Command: []string{"/bin/top"},
  517. },
  518. RestartPolicy: &swarm.RestartPolicy{
  519. Delay: &restartDelay,
  520. },
  521. },
  522. Mode: swarm.ServiceMode{
  523. Replicated: &swarm.ReplicatedService{
  524. Replicas: &ureplicas,
  525. },
  526. },
  527. UpdateConfig: &swarm.UpdateConfig{
  528. Parallelism: 2,
  529. Delay: 4 * time.Second,
  530. FailureAction: swarm.UpdateFailureActionContinue,
  531. },
  532. RollbackConfig: &swarm.UpdateConfig{
  533. Parallelism: 3,
  534. Delay: 4 * time.Second,
  535. FailureAction: swarm.UpdateFailureActionContinue,
  536. },
  537. }
  538. s.Spec.Name = "updatetest"
  539. }
  540. func setInstances(replicas int) testdaemon.ServiceConstructor {
  541. ureplicas := uint64(replicas)
  542. return func(s *swarm.Service) {
  543. s.Spec.Mode = swarm.ServiceMode{
  544. Replicated: &swarm.ReplicatedService{
  545. Replicas: &ureplicas,
  546. },
  547. }
  548. }
  549. }
  550. func setUpdateOrder(order string) testdaemon.ServiceConstructor {
  551. return func(s *swarm.Service) {
  552. if s.Spec.UpdateConfig == nil {
  553. s.Spec.UpdateConfig = &swarm.UpdateConfig{}
  554. }
  555. s.Spec.UpdateConfig.Order = order
  556. }
  557. }
  558. func setRollbackOrder(order string) testdaemon.ServiceConstructor {
  559. return func(s *swarm.Service) {
  560. if s.Spec.RollbackConfig == nil {
  561. s.Spec.RollbackConfig = &swarm.UpdateConfig{}
  562. }
  563. s.Spec.RollbackConfig.Order = order
  564. }
  565. }
  566. func setImage(image string) testdaemon.ServiceConstructor {
  567. return func(s *swarm.Service) {
  568. if s.Spec.TaskTemplate.ContainerSpec == nil {
  569. s.Spec.TaskTemplate.ContainerSpec = &swarm.ContainerSpec{}
  570. }
  571. s.Spec.TaskTemplate.ContainerSpec.Image = image
  572. }
  573. }
  574. func setFailureAction(failureAction string) testdaemon.ServiceConstructor {
  575. return func(s *swarm.Service) {
  576. s.Spec.UpdateConfig.FailureAction = failureAction
  577. }
  578. }
  579. func setMaxFailureRatio(maxFailureRatio float32) testdaemon.ServiceConstructor {
  580. return func(s *swarm.Service) {
  581. s.Spec.UpdateConfig.MaxFailureRatio = maxFailureRatio
  582. }
  583. }
  584. func setParallelism(parallelism uint64) testdaemon.ServiceConstructor {
  585. return func(s *swarm.Service) {
  586. s.Spec.UpdateConfig.Parallelism = parallelism
  587. }
  588. }
  589. func setConstraints(constraints []string) testdaemon.ServiceConstructor {
  590. return func(s *swarm.Service) {
  591. if s.Spec.TaskTemplate.Placement == nil {
  592. s.Spec.TaskTemplate.Placement = &swarm.Placement{}
  593. }
  594. s.Spec.TaskTemplate.Placement.Constraints = constraints
  595. }
  596. }
  597. func setPlacementPrefs(prefs []swarm.PlacementPreference) testdaemon.ServiceConstructor {
  598. return func(s *swarm.Service) {
  599. if s.Spec.TaskTemplate.Placement == nil {
  600. s.Spec.TaskTemplate.Placement = &swarm.Placement{}
  601. }
  602. s.Spec.TaskTemplate.Placement.Preferences = prefs
  603. }
  604. }
  605. func setGlobalMode(s *swarm.Service) {
  606. s.Spec.Mode = swarm.ServiceMode{
  607. Global: &swarm.GlobalService{},
  608. }
  609. }
  610. func checkClusterHealth(c *testing.T, cl []*daemon.Daemon, managerCount, workerCount int) {
  611. var totalMCount, totalWCount int
  612. for _, d := range cl {
  613. var (
  614. info swarm.Info
  615. )
  616. // check info in a poll.WaitOn(), because if the cluster doesn't have a leader, `info` will return an error
  617. checkInfo := func(c *testing.T) (interface{}, string) {
  618. client := d.NewClientT(c)
  619. daemonInfo, err := client.Info(context.Background())
  620. info = daemonInfo.Swarm
  621. return err, "cluster not ready in time"
  622. }
  623. poll.WaitOn(c, pollCheck(c, checkInfo, checker.IsNil()), poll.WithTimeout(defaultReconciliationTimeout))
  624. if !info.ControlAvailable {
  625. totalWCount++
  626. continue
  627. }
  628. var leaderFound bool
  629. totalMCount++
  630. var mCount, wCount int
  631. for _, n := range d.ListNodes(c) {
  632. waitReady := func(c *testing.T) (interface{}, string) {
  633. if n.Status.State == swarm.NodeStateReady {
  634. return true, ""
  635. }
  636. nn := d.GetNode(c, n.ID)
  637. n = *nn
  638. return n.Status.State == swarm.NodeStateReady, fmt.Sprintf("state of node %s, reported by %s", n.ID, d.NodeID())
  639. }
  640. poll.WaitOn(c, pollCheck(c, waitReady, checker.True()), poll.WithTimeout(defaultReconciliationTimeout))
  641. waitActive := func(c *testing.T) (interface{}, string) {
  642. if n.Spec.Availability == swarm.NodeAvailabilityActive {
  643. return true, ""
  644. }
  645. nn := d.GetNode(c, n.ID)
  646. n = *nn
  647. return n.Spec.Availability == swarm.NodeAvailabilityActive, fmt.Sprintf("availability of node %s, reported by %s", n.ID, d.NodeID())
  648. }
  649. poll.WaitOn(c, pollCheck(c, waitActive, checker.True()), poll.WithTimeout(defaultReconciliationTimeout))
  650. if n.Spec.Role == swarm.NodeRoleManager {
  651. assert.Assert(c, n.ManagerStatus != nil, "manager status of node %s (manager), reported by %s", n.ID, d.NodeID())
  652. if n.ManagerStatus.Leader {
  653. leaderFound = true
  654. }
  655. mCount++
  656. } else {
  657. assert.Assert(c, n.ManagerStatus == nil, "manager status of node %s (worker), reported by %s", n.ID, d.NodeID())
  658. wCount++
  659. }
  660. }
  661. assert.Equal(c, leaderFound, true, "lack of leader reported by node %s", info.NodeID)
  662. assert.Equal(c, mCount, managerCount, "managers count reported by node %s", info.NodeID)
  663. assert.Equal(c, wCount, workerCount, "workers count reported by node %s", info.NodeID)
  664. }
  665. assert.Equal(c, totalMCount, managerCount)
  666. assert.Equal(c, totalWCount, workerCount)
  667. }
  668. func (s *DockerSwarmSuite) TestAPISwarmRestartCluster(c *testing.T) {
  669. mCount, wCount := 5, 1
  670. var nodes []*daemon.Daemon
  671. for i := 0; i < mCount; i++ {
  672. manager := s.AddDaemon(c, true, true)
  673. info := manager.SwarmInfo(c)
  674. assert.Equal(c, info.ControlAvailable, true)
  675. assert.Equal(c, info.LocalNodeState, swarm.LocalNodeStateActive)
  676. nodes = append(nodes, manager)
  677. }
  678. for i := 0; i < wCount; i++ {
  679. worker := s.AddDaemon(c, true, false)
  680. info := worker.SwarmInfo(c)
  681. assert.Equal(c, info.ControlAvailable, false)
  682. assert.Equal(c, info.LocalNodeState, swarm.LocalNodeStateActive)
  683. nodes = append(nodes, worker)
  684. }
  685. // stop whole cluster
  686. {
  687. var wg sync.WaitGroup
  688. wg.Add(len(nodes))
  689. errs := make(chan error, len(nodes))
  690. for _, d := range nodes {
  691. go func(daemon *daemon.Daemon) {
  692. defer wg.Done()
  693. if err := daemon.StopWithError(); err != nil {
  694. errs <- err
  695. }
  696. }(d)
  697. }
  698. wg.Wait()
  699. close(errs)
  700. for err := range errs {
  701. assert.NilError(c, err)
  702. }
  703. }
  704. // start whole cluster
  705. {
  706. var wg sync.WaitGroup
  707. wg.Add(len(nodes))
  708. errs := make(chan error, len(nodes))
  709. for _, d := range nodes {
  710. go func(daemon *daemon.Daemon) {
  711. defer wg.Done()
  712. if err := daemon.StartWithError("--iptables=false"); err != nil {
  713. errs <- err
  714. }
  715. }(d)
  716. }
  717. wg.Wait()
  718. close(errs)
  719. for err := range errs {
  720. assert.NilError(c, err)
  721. }
  722. }
  723. checkClusterHealth(c, nodes, mCount, wCount)
  724. }
  725. func (s *DockerSwarmSuite) TestAPISwarmServicesUpdateWithName(c *testing.T) {
  726. d := s.AddDaemon(c, true, true)
  727. instances := 2
  728. id := d.CreateService(c, simpleTestService, setInstances(instances))
  729. poll.WaitOn(c, pollCheck(c, d.CheckActiveContainerCount, checker.Equals(instances)), poll.WithTimeout(defaultReconciliationTimeout))
  730. service := d.GetService(c, id)
  731. instances = 5
  732. setInstances(instances)(service)
  733. cli := d.NewClientT(c)
  734. defer cli.Close()
  735. _, err := cli.ServiceUpdate(context.Background(), service.Spec.Name, service.Version, service.Spec, types.ServiceUpdateOptions{})
  736. assert.NilError(c, err)
  737. poll.WaitOn(c, pollCheck(c, d.CheckActiveContainerCount, checker.Equals(instances)), poll.WithTimeout(defaultReconciliationTimeout))
  738. }
  739. // Unlocking an unlocked swarm results in an error
  740. func (s *DockerSwarmSuite) TestAPISwarmUnlockNotLocked(c *testing.T) {
  741. d := s.AddDaemon(c, true, true)
  742. err := d.SwarmUnlock(c, swarm.UnlockRequest{UnlockKey: "wrong-key"})
  743. assert.ErrorContains(c, err, "swarm is not locked")
  744. }
  745. // #29885
  746. func (s *DockerSwarmSuite) TestAPISwarmErrorHandling(c *testing.T) {
  747. ln, err := net.Listen("tcp", fmt.Sprintf(":%d", defaultSwarmPort))
  748. assert.NilError(c, err)
  749. defer ln.Close()
  750. d := s.AddDaemon(c, false, false)
  751. client := d.NewClientT(c)
  752. _, err = client.SwarmInit(context.Background(), swarm.InitRequest{
  753. ListenAddr: d.SwarmListenAddr(),
  754. })
  755. assert.ErrorContains(c, err, "address already in use")
  756. }
  757. // Test case for 30242, where duplicate networks, with different drivers `bridge` and `overlay`,
  758. // caused both scopes to be `swarm` for `docker network inspect` and `docker network ls`.
  759. // This test makes sure the fixes correctly output scopes instead.
  760. func (s *DockerSwarmSuite) TestAPIDuplicateNetworks(c *testing.T) {
  761. d := s.AddDaemon(c, true, true)
  762. cli := d.NewClientT(c)
  763. defer cli.Close()
  764. name := "foo"
  765. networkCreate := types.NetworkCreate{
  766. CheckDuplicate: false,
  767. }
  768. networkCreate.Driver = "bridge"
  769. n1, err := cli.NetworkCreate(context.Background(), name, networkCreate)
  770. assert.NilError(c, err)
  771. networkCreate.Driver = "overlay"
  772. n2, err := cli.NetworkCreate(context.Background(), name, networkCreate)
  773. assert.NilError(c, err)
  774. r1, err := cli.NetworkInspect(context.Background(), n1.ID, types.NetworkInspectOptions{})
  775. assert.NilError(c, err)
  776. assert.Equal(c, r1.Scope, "local")
  777. r2, err := cli.NetworkInspect(context.Background(), n2.ID, types.NetworkInspectOptions{})
  778. assert.NilError(c, err)
  779. assert.Equal(c, r2.Scope, "swarm")
  780. }
  781. // Test case for 30178
  782. func (s *DockerSwarmSuite) TestAPISwarmHealthcheckNone(c *testing.T) {
  783. // Issue #36386 can be a independent one, which is worth further investigation.
  784. c.Skip("Root cause of Issue #36386 is needed")
  785. d := s.AddDaemon(c, true, true)
  786. out, err := d.Cmd("network", "create", "-d", "overlay", "lb")
  787. assert.NilError(c, err, out)
  788. instances := 1
  789. d.CreateService(c, simpleTestService, setInstances(instances), func(s *swarm.Service) {
  790. if s.Spec.TaskTemplate.ContainerSpec == nil {
  791. s.Spec.TaskTemplate.ContainerSpec = &swarm.ContainerSpec{}
  792. }
  793. s.Spec.TaskTemplate.ContainerSpec.Healthcheck = &container.HealthConfig{}
  794. s.Spec.TaskTemplate.Networks = []swarm.NetworkAttachmentConfig{
  795. {Target: "lb"},
  796. }
  797. })
  798. poll.WaitOn(c, pollCheck(c, d.CheckActiveContainerCount, checker.Equals(instances)), poll.WithTimeout(defaultReconciliationTimeout))
  799. containers := d.ActiveContainers(c)
  800. out, err = d.Cmd("exec", containers[0], "ping", "-c1", "-W3", "top")
  801. assert.NilError(c, err, out)
  802. }
  803. func (s *DockerSwarmSuite) TestSwarmRepeatedRootRotation(c *testing.T) {
  804. m := s.AddDaemon(c, true, true)
  805. w := s.AddDaemon(c, true, false)
  806. info := m.SwarmInfo(c)
  807. currentTrustRoot := info.Cluster.TLSInfo.TrustRoot
  808. // rotate multiple times
  809. for i := 0; i < 4; i++ {
  810. var err error
  811. var cert, key []byte
  812. if i%2 != 0 {
  813. cert, _, key, err = initca.New(&csr.CertificateRequest{
  814. CN: "newRoot",
  815. KeyRequest: csr.NewBasicKeyRequest(),
  816. CA: &csr.CAConfig{Expiry: ca.RootCAExpiration},
  817. })
  818. assert.NilError(c, err)
  819. }
  820. expectedCert := string(cert)
  821. m.UpdateSwarm(c, func(s *swarm.Spec) {
  822. s.CAConfig.SigningCACert = expectedCert
  823. s.CAConfig.SigningCAKey = string(key)
  824. s.CAConfig.ForceRotate++
  825. })
  826. // poll to make sure update succeeds
  827. var clusterTLSInfo swarm.TLSInfo
  828. for j := 0; j < 18; j++ {
  829. info := m.SwarmInfo(c)
  830. // the desired CA cert and key is always redacted
  831. assert.Equal(c, info.Cluster.Spec.CAConfig.SigningCAKey, "")
  832. assert.Equal(c, info.Cluster.Spec.CAConfig.SigningCACert, "")
  833. clusterTLSInfo = info.Cluster.TLSInfo
  834. // if root rotation is done and the trust root has changed, we don't have to poll anymore
  835. if !info.Cluster.RootRotationInProgress && clusterTLSInfo.TrustRoot != currentTrustRoot {
  836. break
  837. }
  838. // root rotation not done
  839. time.Sleep(250 * time.Millisecond)
  840. }
  841. if cert != nil {
  842. assert.Equal(c, clusterTLSInfo.TrustRoot, expectedCert)
  843. }
  844. // could take another second or two for the nodes to trust the new roots after they've all gotten
  845. // new TLS certificates
  846. for j := 0; j < 18; j++ {
  847. mInfo := m.GetNode(c, m.NodeID()).Description.TLSInfo
  848. wInfo := m.GetNode(c, w.NodeID()).Description.TLSInfo
  849. if mInfo.TrustRoot == clusterTLSInfo.TrustRoot && wInfo.TrustRoot == clusterTLSInfo.TrustRoot {
  850. break
  851. }
  852. // nodes don't trust root certs yet
  853. time.Sleep(250 * time.Millisecond)
  854. }
  855. assert.DeepEqual(c, m.GetNode(c, m.NodeID()).Description.TLSInfo, clusterTLSInfo)
  856. assert.DeepEqual(c, m.GetNode(c, w.NodeID()).Description.TLSInfo, clusterTLSInfo)
  857. currentTrustRoot = clusterTLSInfo.TrustRoot
  858. }
  859. }
  860. func (s *DockerSwarmSuite) TestAPINetworkInspectWithScope(c *testing.T) {
  861. d := s.AddDaemon(c, true, true)
  862. name := "test-scoped-network"
  863. ctx := context.Background()
  864. apiclient := d.NewClientT(c)
  865. resp, err := apiclient.NetworkCreate(ctx, name, types.NetworkCreate{Driver: "overlay"})
  866. assert.NilError(c, err)
  867. network, err := apiclient.NetworkInspect(ctx, name, types.NetworkInspectOptions{})
  868. assert.NilError(c, err)
  869. assert.Check(c, is.Equal("swarm", network.Scope))
  870. assert.Check(c, is.Equal(resp.ID, network.ID))
  871. _, err = apiclient.NetworkInspect(ctx, name, types.NetworkInspectOptions{Scope: "local"})
  872. assert.Check(c, client.IsErrNotFound(err))
  873. }