docker_api_swarm_test.go 32 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006
  1. // +build !windows
  2. package main
  3. import (
  4. "fmt"
  5. "net/http"
  6. "os"
  7. "path/filepath"
  8. "strconv"
  9. "strings"
  10. "sync"
  11. "syscall"
  12. "time"
  13. "github.com/docker/docker/pkg/integration/checker"
  14. "github.com/docker/engine-api/types/swarm"
  15. "github.com/go-check/check"
  16. )
  17. var defaultReconciliationTimeout = 30 * time.Second
  18. func (s *DockerSwarmSuite) TestApiSwarmInit(c *check.C) {
  19. // todo: should find a better way to verify that components are running than /info
  20. d1 := s.AddDaemon(c, true, true)
  21. info, err := d1.info()
  22. c.Assert(err, checker.IsNil)
  23. c.Assert(info.ControlAvailable, checker.True)
  24. c.Assert(info.LocalNodeState, checker.Equals, swarm.LocalNodeStateActive)
  25. d2 := s.AddDaemon(c, true, false)
  26. info, err = d2.info()
  27. c.Assert(err, checker.IsNil)
  28. c.Assert(info.ControlAvailable, checker.False)
  29. c.Assert(info.LocalNodeState, checker.Equals, swarm.LocalNodeStateActive)
  30. // Leaving cluster
  31. c.Assert(d2.Leave(false), checker.IsNil)
  32. info, err = d2.info()
  33. c.Assert(err, checker.IsNil)
  34. c.Assert(info.ControlAvailable, checker.False)
  35. c.Assert(info.LocalNodeState, checker.Equals, swarm.LocalNodeStateInactive)
  36. c.Assert(d2.Join(swarm.JoinRequest{JoinToken: d1.joinTokens(c).Worker, RemoteAddrs: []string{d1.listenAddr}}), checker.IsNil)
  37. info, err = d2.info()
  38. c.Assert(err, checker.IsNil)
  39. c.Assert(info.ControlAvailable, checker.False)
  40. c.Assert(info.LocalNodeState, checker.Equals, swarm.LocalNodeStateActive)
  41. // Current state restoring after restarts
  42. err = d1.Stop()
  43. c.Assert(err, checker.IsNil)
  44. err = d2.Stop()
  45. c.Assert(err, checker.IsNil)
  46. err = d1.Start()
  47. c.Assert(err, checker.IsNil)
  48. err = d2.Start()
  49. c.Assert(err, checker.IsNil)
  50. info, err = d1.info()
  51. c.Assert(err, checker.IsNil)
  52. c.Assert(info.ControlAvailable, checker.True)
  53. c.Assert(info.LocalNodeState, checker.Equals, swarm.LocalNodeStateActive)
  54. info, err = d2.info()
  55. c.Assert(err, checker.IsNil)
  56. c.Assert(info.ControlAvailable, checker.False)
  57. c.Assert(info.LocalNodeState, checker.Equals, swarm.LocalNodeStateActive)
  58. }
  59. func (s *DockerSwarmSuite) TestApiSwarmJoinToken(c *check.C) {
  60. d1 := s.AddDaemon(c, false, false)
  61. c.Assert(d1.Init(swarm.InitRequest{}), checker.IsNil)
  62. d2 := s.AddDaemon(c, false, false)
  63. err := d2.Join(swarm.JoinRequest{RemoteAddrs: []string{d1.listenAddr}})
  64. c.Assert(err, checker.NotNil)
  65. c.Assert(err.Error(), checker.Contains, "join token is necessary")
  66. info, err := d2.info()
  67. c.Assert(err, checker.IsNil)
  68. c.Assert(info.LocalNodeState, checker.Equals, swarm.LocalNodeStateInactive)
  69. err = d2.Join(swarm.JoinRequest{JoinToken: "foobaz", RemoteAddrs: []string{d1.listenAddr}})
  70. c.Assert(err, checker.NotNil)
  71. c.Assert(err.Error(), checker.Contains, "join token is necessary")
  72. info, err = d2.info()
  73. c.Assert(err, checker.IsNil)
  74. c.Assert(info.LocalNodeState, checker.Equals, swarm.LocalNodeStateInactive)
  75. workerToken := d1.joinTokens(c).Worker
  76. c.Assert(d2.Join(swarm.JoinRequest{JoinToken: workerToken, RemoteAddrs: []string{d1.listenAddr}}), checker.IsNil)
  77. info, err = d2.info()
  78. c.Assert(err, checker.IsNil)
  79. c.Assert(info.LocalNodeState, checker.Equals, swarm.LocalNodeStateActive)
  80. c.Assert(d2.Leave(false), checker.IsNil)
  81. info, err = d2.info()
  82. c.Assert(err, checker.IsNil)
  83. c.Assert(info.LocalNodeState, checker.Equals, swarm.LocalNodeStateInactive)
  84. // change tokens
  85. d1.rotateTokens(c)
  86. err = d2.Join(swarm.JoinRequest{JoinToken: workerToken, RemoteAddrs: []string{d1.listenAddr}})
  87. c.Assert(err, checker.NotNil)
  88. c.Assert(err.Error(), checker.Contains, "join token is necessary")
  89. info, err = d2.info()
  90. c.Assert(err, checker.IsNil)
  91. c.Assert(info.LocalNodeState, checker.Equals, swarm.LocalNodeStateInactive)
  92. workerToken = d1.joinTokens(c).Worker
  93. c.Assert(d2.Join(swarm.JoinRequest{JoinToken: workerToken, RemoteAddrs: []string{d1.listenAddr}}), checker.IsNil)
  94. info, err = d2.info()
  95. c.Assert(err, checker.IsNil)
  96. c.Assert(info.LocalNodeState, checker.Equals, swarm.LocalNodeStateActive)
  97. c.Assert(d2.Leave(false), checker.IsNil)
  98. info, err = d2.info()
  99. c.Assert(err, checker.IsNil)
  100. c.Assert(info.LocalNodeState, checker.Equals, swarm.LocalNodeStateInactive)
  101. // change spec, don't change tokens
  102. d1.updateSwarm(c, func(s *swarm.Spec) {})
  103. err = d2.Join(swarm.JoinRequest{RemoteAddrs: []string{d1.listenAddr}})
  104. c.Assert(err, checker.NotNil)
  105. c.Assert(err.Error(), checker.Contains, "join token is necessary")
  106. info, err = d2.info()
  107. c.Assert(err, checker.IsNil)
  108. c.Assert(info.LocalNodeState, checker.Equals, swarm.LocalNodeStateInactive)
  109. c.Assert(d2.Join(swarm.JoinRequest{JoinToken: workerToken, RemoteAddrs: []string{d1.listenAddr}}), checker.IsNil)
  110. info, err = d2.info()
  111. c.Assert(err, checker.IsNil)
  112. c.Assert(info.LocalNodeState, checker.Equals, swarm.LocalNodeStateActive)
  113. c.Assert(d2.Leave(false), checker.IsNil)
  114. info, err = d2.info()
  115. c.Assert(err, checker.IsNil)
  116. c.Assert(info.LocalNodeState, checker.Equals, swarm.LocalNodeStateInactive)
  117. }
  118. func (s *DockerSwarmSuite) TestApiSwarmCAHash(c *check.C) {
  119. d1 := s.AddDaemon(c, true, true)
  120. d2 := s.AddDaemon(c, false, false)
  121. splitToken := strings.Split(d1.joinTokens(c).Worker, "-")
  122. splitToken[2] = "1kxftv4ofnc6mt30lmgipg6ngf9luhwqopfk1tz6bdmnkubg0e"
  123. replacementToken := strings.Join(splitToken, "-")
  124. err := d2.Join(swarm.JoinRequest{JoinToken: replacementToken, RemoteAddrs: []string{d1.listenAddr}})
  125. c.Assert(err, checker.NotNil)
  126. c.Assert(err.Error(), checker.Contains, "remote CA does not match fingerprint")
  127. }
  128. func (s *DockerSwarmSuite) TestApiSwarmPromoteDemote(c *check.C) {
  129. d1 := s.AddDaemon(c, false, false)
  130. c.Assert(d1.Init(swarm.InitRequest{}), checker.IsNil)
  131. d2 := s.AddDaemon(c, true, false)
  132. info, err := d2.info()
  133. c.Assert(err, checker.IsNil)
  134. c.Assert(info.ControlAvailable, checker.False)
  135. c.Assert(info.LocalNodeState, checker.Equals, swarm.LocalNodeStateActive)
  136. d1.updateNode(c, d2.NodeID, func(n *swarm.Node) {
  137. n.Spec.Role = swarm.NodeRoleManager
  138. })
  139. waitAndAssert(c, defaultReconciliationTimeout, d2.checkControlAvailable, checker.True)
  140. d1.updateNode(c, d2.NodeID, func(n *swarm.Node) {
  141. n.Spec.Role = swarm.NodeRoleWorker
  142. })
  143. waitAndAssert(c, defaultReconciliationTimeout, d2.checkControlAvailable, checker.False)
  144. // Demoting last node should fail
  145. node := d1.getNode(c, d1.NodeID)
  146. node.Spec.Role = swarm.NodeRoleWorker
  147. url := fmt.Sprintf("/nodes/%s/update?version=%d", node.ID, node.Version.Index)
  148. status, out, err := d1.SockRequest("POST", url, node.Spec)
  149. c.Assert(err, checker.IsNil)
  150. c.Assert(status, checker.Equals, http.StatusInternalServerError, check.Commentf("output: %q", string(out)))
  151. c.Assert(string(out), checker.Contains, "last manager of the swarm")
  152. info, err = d1.info()
  153. c.Assert(err, checker.IsNil)
  154. c.Assert(info.LocalNodeState, checker.Equals, swarm.LocalNodeStateActive)
  155. c.Assert(info.ControlAvailable, checker.True)
  156. // Promote already demoted node
  157. d1.updateNode(c, d2.NodeID, func(n *swarm.Node) {
  158. n.Spec.Role = swarm.NodeRoleManager
  159. })
  160. waitAndAssert(c, defaultReconciliationTimeout, d2.checkControlAvailable, checker.True)
  161. }
  162. func (s *DockerSwarmSuite) TestApiSwarmServicesEmptyList(c *check.C) {
  163. d := s.AddDaemon(c, true, true)
  164. services := d.listServices(c)
  165. c.Assert(services, checker.NotNil)
  166. c.Assert(len(services), checker.Equals, 0, check.Commentf("services: %#v", services))
  167. }
  168. func (s *DockerSwarmSuite) TestApiSwarmServicesCreate(c *check.C) {
  169. d := s.AddDaemon(c, true, true)
  170. instances := 2
  171. id := d.createService(c, simpleTestService, setInstances(instances))
  172. waitAndAssert(c, defaultReconciliationTimeout, d.checkActiveContainerCount, checker.Equals, instances)
  173. service := d.getService(c, id)
  174. instances = 5
  175. d.updateService(c, service, setInstances(instances))
  176. waitAndAssert(c, defaultReconciliationTimeout, d.checkActiveContainerCount, checker.Equals, instances)
  177. d.removeService(c, service.ID)
  178. waitAndAssert(c, defaultReconciliationTimeout, d.checkActiveContainerCount, checker.Equals, 0)
  179. }
  180. func (s *DockerSwarmSuite) TestApiSwarmServicesMultipleAgents(c *check.C) {
  181. d1 := s.AddDaemon(c, true, true)
  182. d2 := s.AddDaemon(c, true, false)
  183. d3 := s.AddDaemon(c, true, false)
  184. time.Sleep(1 * time.Second) // make sure all daemons are ready to accept tasks
  185. instances := 9
  186. id := d1.createService(c, simpleTestService, setInstances(instances))
  187. waitAndAssert(c, defaultReconciliationTimeout, d1.checkActiveContainerCount, checker.GreaterThan, 0)
  188. waitAndAssert(c, defaultReconciliationTimeout, d2.checkActiveContainerCount, checker.GreaterThan, 0)
  189. waitAndAssert(c, defaultReconciliationTimeout, d3.checkActiveContainerCount, checker.GreaterThan, 0)
  190. waitAndAssert(c, defaultReconciliationTimeout, reducedCheck(sumAsIntegers, d1.checkActiveContainerCount, d2.checkActiveContainerCount, d3.checkActiveContainerCount), checker.Equals, instances)
  191. // reconciliation on d2 node down
  192. c.Assert(d2.Stop(), checker.IsNil)
  193. waitAndAssert(c, defaultReconciliationTimeout, reducedCheck(sumAsIntegers, d1.checkActiveContainerCount, d3.checkActiveContainerCount), checker.Equals, instances)
  194. // test downscaling
  195. instances = 5
  196. d1.updateService(c, d1.getService(c, id), setInstances(instances))
  197. waitAndAssert(c, defaultReconciliationTimeout, reducedCheck(sumAsIntegers, d1.checkActiveContainerCount, d3.checkActiveContainerCount), checker.Equals, instances)
  198. }
  199. func (s *DockerSwarmSuite) TestApiSwarmServicesCreateGlobal(c *check.C) {
  200. d1 := s.AddDaemon(c, true, true)
  201. d2 := s.AddDaemon(c, true, false)
  202. d3 := s.AddDaemon(c, true, false)
  203. d1.createService(c, simpleTestService, setGlobalMode)
  204. waitAndAssert(c, defaultReconciliationTimeout, d1.checkActiveContainerCount, checker.Equals, 1)
  205. waitAndAssert(c, defaultReconciliationTimeout, d2.checkActiveContainerCount, checker.Equals, 1)
  206. waitAndAssert(c, defaultReconciliationTimeout, d3.checkActiveContainerCount, checker.Equals, 1)
  207. d4 := s.AddDaemon(c, true, false)
  208. d5 := s.AddDaemon(c, true, false)
  209. waitAndAssert(c, defaultReconciliationTimeout, d4.checkActiveContainerCount, checker.Equals, 1)
  210. waitAndAssert(c, defaultReconciliationTimeout, d5.checkActiveContainerCount, checker.Equals, 1)
  211. }
  212. func (s *DockerSwarmSuite) TestApiSwarmServicesUpdate(c *check.C) {
  213. const nodeCount = 3
  214. var daemons [nodeCount]*SwarmDaemon
  215. for i := 0; i < nodeCount; i++ {
  216. daemons[i] = s.AddDaemon(c, true, i == 0)
  217. }
  218. // wait for nodes ready
  219. waitAndAssert(c, 5*time.Second, daemons[0].checkNodeReadyCount, checker.Equals, nodeCount)
  220. // service image at start
  221. image1 := "busybox:latest"
  222. // target image in update
  223. image2 := "busybox:test"
  224. // create a different tag
  225. for _, d := range daemons {
  226. out, err := d.Cmd("tag", image1, image2)
  227. c.Assert(err, checker.IsNil, check.Commentf(out))
  228. }
  229. // create service
  230. instances := 5
  231. parallelism := 2
  232. id := daemons[0].createService(c, serviceForUpdate, setInstances(instances))
  233. // wait for tasks ready
  234. waitAndAssert(c, defaultReconciliationTimeout, daemons[0].checkRunningTaskImages, checker.DeepEquals,
  235. map[string]int{image1: instances})
  236. // issue service update
  237. service := daemons[0].getService(c, id)
  238. daemons[0].updateService(c, service, setImage(image2))
  239. // first batch
  240. waitAndAssert(c, defaultReconciliationTimeout, daemons[0].checkRunningTaskImages, checker.DeepEquals,
  241. map[string]int{image1: instances - parallelism, image2: parallelism})
  242. // 2nd batch
  243. waitAndAssert(c, defaultReconciliationTimeout, daemons[0].checkRunningTaskImages, checker.DeepEquals,
  244. map[string]int{image1: instances - 2*parallelism, image2: 2 * parallelism})
  245. // 3nd batch
  246. waitAndAssert(c, defaultReconciliationTimeout, daemons[0].checkRunningTaskImages, checker.DeepEquals,
  247. map[string]int{image2: instances})
  248. }
  249. func (s *DockerSwarmSuite) TestApiSwarmServicesStateReporting(c *check.C) {
  250. testRequires(c, SameHostDaemon)
  251. testRequires(c, DaemonIsLinux)
  252. d1 := s.AddDaemon(c, true, true)
  253. d2 := s.AddDaemon(c, true, true)
  254. d3 := s.AddDaemon(c, true, false)
  255. time.Sleep(1 * time.Second) // make sure all daemons are ready to accept
  256. instances := 9
  257. d1.createService(c, simpleTestService, setInstances(instances))
  258. waitAndAssert(c, defaultReconciliationTimeout, reducedCheck(sumAsIntegers, d1.checkActiveContainerCount, d2.checkActiveContainerCount, d3.checkActiveContainerCount), checker.Equals, instances)
  259. getContainers := func() map[string]*SwarmDaemon {
  260. m := make(map[string]*SwarmDaemon)
  261. for _, d := range []*SwarmDaemon{d1, d2, d3} {
  262. for _, id := range d.activeContainers() {
  263. m[id] = d
  264. }
  265. }
  266. return m
  267. }
  268. containers := getContainers()
  269. c.Assert(containers, checker.HasLen, instances)
  270. var toRemove string
  271. for i := range containers {
  272. toRemove = i
  273. }
  274. _, err := containers[toRemove].Cmd("stop", toRemove)
  275. c.Assert(err, checker.IsNil)
  276. waitAndAssert(c, defaultReconciliationTimeout, reducedCheck(sumAsIntegers, d1.checkActiveContainerCount, d2.checkActiveContainerCount, d3.checkActiveContainerCount), checker.Equals, instances)
  277. containers2 := getContainers()
  278. c.Assert(containers2, checker.HasLen, instances)
  279. for i := range containers {
  280. if i == toRemove {
  281. c.Assert(containers2[i], checker.IsNil)
  282. } else {
  283. c.Assert(containers2[i], checker.NotNil)
  284. }
  285. }
  286. containers = containers2
  287. for i := range containers {
  288. toRemove = i
  289. }
  290. // try with killing process outside of docker
  291. pidStr, err := containers[toRemove].Cmd("inspect", "-f", "{{.State.Pid}}", toRemove)
  292. c.Assert(err, checker.IsNil)
  293. pid, err := strconv.Atoi(strings.TrimSpace(pidStr))
  294. c.Assert(err, checker.IsNil)
  295. c.Assert(syscall.Kill(pid, syscall.SIGKILL), checker.IsNil)
  296. time.Sleep(time.Second) // give some time to handle the signal
  297. waitAndAssert(c, defaultReconciliationTimeout, reducedCheck(sumAsIntegers, d1.checkActiveContainerCount, d2.checkActiveContainerCount, d3.checkActiveContainerCount), checker.Equals, instances)
  298. containers2 = getContainers()
  299. c.Assert(containers2, checker.HasLen, instances)
  300. for i := range containers {
  301. if i == toRemove {
  302. c.Assert(containers2[i], checker.IsNil)
  303. } else {
  304. c.Assert(containers2[i], checker.NotNil)
  305. }
  306. }
  307. }
  308. func (s *DockerSwarmSuite) TestApiSwarmLeaderProxy(c *check.C) {
  309. // add three managers, one of these is leader
  310. d1 := s.AddDaemon(c, true, true)
  311. d2 := s.AddDaemon(c, true, true)
  312. d3 := s.AddDaemon(c, true, true)
  313. // start a service by hitting each of the 3 managers
  314. d1.createService(c, simpleTestService, func(s *swarm.Service) {
  315. s.Spec.Name = "test1"
  316. })
  317. d2.createService(c, simpleTestService, func(s *swarm.Service) {
  318. s.Spec.Name = "test2"
  319. })
  320. d3.createService(c, simpleTestService, func(s *swarm.Service) {
  321. s.Spec.Name = "test3"
  322. })
  323. // 3 services should be started now, because the requests were proxied to leader
  324. // query each node and make sure it returns 3 services
  325. for _, d := range []*SwarmDaemon{d1, d2, d3} {
  326. services := d.listServices(c)
  327. c.Assert(services, checker.HasLen, 3)
  328. }
  329. }
  330. func (s *DockerSwarmSuite) TestApiSwarmLeaderElection(c *check.C) {
  331. // Create 3 nodes
  332. d1 := s.AddDaemon(c, true, true)
  333. d2 := s.AddDaemon(c, true, true)
  334. d3 := s.AddDaemon(c, true, true)
  335. // assert that the first node we made is the leader, and the other two are followers
  336. c.Assert(d1.getNode(c, d1.NodeID).ManagerStatus.Leader, checker.True)
  337. c.Assert(d1.getNode(c, d2.NodeID).ManagerStatus.Leader, checker.False)
  338. c.Assert(d1.getNode(c, d3.NodeID).ManagerStatus.Leader, checker.False)
  339. d1.Stop() // stop the leader
  340. var (
  341. leader *SwarmDaemon // keep track of leader
  342. followers []*SwarmDaemon // keep track of followers
  343. )
  344. checkLeader := func(nodes ...*SwarmDaemon) checkF {
  345. return func(c *check.C) (interface{}, check.CommentInterface) {
  346. // clear these out before each run
  347. leader = nil
  348. followers = nil
  349. for _, d := range nodes {
  350. if d.getNode(c, d.NodeID).ManagerStatus.Leader {
  351. leader = d
  352. } else {
  353. followers = append(followers, d)
  354. }
  355. }
  356. if leader == nil {
  357. return false, check.Commentf("no leader elected")
  358. }
  359. return true, check.Commentf("elected %v", leader.id)
  360. }
  361. }
  362. // wait for an election to occur
  363. waitAndAssert(c, defaultReconciliationTimeout, checkLeader(d2, d3), checker.True)
  364. // assert that we have a new leader
  365. c.Assert(leader, checker.NotNil)
  366. // Keep track of the current leader, since we want that to be chosen.
  367. stableleader := leader
  368. // add the d1, the initial leader, back
  369. d1.Start()
  370. // TODO(stevvooe): may need to wait for rejoin here
  371. // wait for possible election
  372. waitAndAssert(c, defaultReconciliationTimeout, checkLeader(d1, d2, d3), checker.True)
  373. // pick out the leader and the followers again
  374. // verify that we still only have 1 leader and 2 followers
  375. c.Assert(leader, checker.NotNil)
  376. c.Assert(followers, checker.HasLen, 2)
  377. // and that after we added d1 back, the leader hasn't changed
  378. c.Assert(leader.NodeID, checker.Equals, stableleader.NodeID)
  379. }
  380. func (s *DockerSwarmSuite) TestApiSwarmRaftQuorum(c *check.C) {
  381. d1 := s.AddDaemon(c, true, true)
  382. d2 := s.AddDaemon(c, true, true)
  383. d3 := s.AddDaemon(c, true, true)
  384. d1.createService(c, simpleTestService)
  385. c.Assert(d2.Stop(), checker.IsNil)
  386. // make sure there is a leader
  387. waitAndAssert(c, defaultReconciliationTimeout, d1.checkLeader, checker.IsNil)
  388. d1.createService(c, simpleTestService, func(s *swarm.Service) {
  389. s.Spec.Name = "top1"
  390. })
  391. c.Assert(d3.Stop(), checker.IsNil)
  392. // make sure there is a leader
  393. waitAndAssert(c, defaultReconciliationTimeout, d1.checkLeader, checker.IsNil)
  394. var service swarm.Service
  395. simpleTestService(&service)
  396. service.Spec.Name = "top2"
  397. status, out, err := d1.SockRequest("POST", "/services/create", service.Spec)
  398. c.Assert(err, checker.IsNil)
  399. c.Assert(status, checker.Equals, http.StatusInternalServerError, check.Commentf("deadline exceeded", string(out)))
  400. c.Assert(d2.Start(), checker.IsNil)
  401. // make sure there is a leader
  402. waitAndAssert(c, defaultReconciliationTimeout, d1.checkLeader, checker.IsNil)
  403. d1.createService(c, simpleTestService, func(s *swarm.Service) {
  404. s.Spec.Name = "top3"
  405. })
  406. }
  407. func (s *DockerSwarmSuite) TestApiSwarmListNodes(c *check.C) {
  408. d1 := s.AddDaemon(c, true, true)
  409. d2 := s.AddDaemon(c, true, false)
  410. d3 := s.AddDaemon(c, true, false)
  411. nodes := d1.listNodes(c)
  412. c.Assert(len(nodes), checker.Equals, 3, check.Commentf("nodes: %#v", nodes))
  413. loop0:
  414. for _, n := range nodes {
  415. for _, d := range []*SwarmDaemon{d1, d2, d3} {
  416. if n.ID == d.NodeID {
  417. continue loop0
  418. }
  419. }
  420. c.Errorf("unknown nodeID %v", n.ID)
  421. }
  422. }
  423. func (s *DockerSwarmSuite) TestApiSwarmNodeUpdate(c *check.C) {
  424. d := s.AddDaemon(c, true, true)
  425. nodes := d.listNodes(c)
  426. d.updateNode(c, nodes[0].ID, func(n *swarm.Node) {
  427. n.Spec.Availability = swarm.NodeAvailabilityPause
  428. })
  429. n := d.getNode(c, nodes[0].ID)
  430. c.Assert(n.Spec.Availability, checker.Equals, swarm.NodeAvailabilityPause)
  431. }
  432. func (s *DockerSwarmSuite) TestApiSwarmNodeRemove(c *check.C) {
  433. testRequires(c, Network)
  434. d1 := s.AddDaemon(c, true, true)
  435. d2 := s.AddDaemon(c, true, false)
  436. _ = s.AddDaemon(c, true, false)
  437. nodes := d1.listNodes(c)
  438. c.Assert(len(nodes), checker.Equals, 3, check.Commentf("nodes: %#v", nodes))
  439. // Getting the info so we can take the NodeID
  440. d2Info, err := d2.info()
  441. c.Assert(err, checker.IsNil)
  442. // forceful removal of d2 should work
  443. d1.removeNode(c, d2Info.NodeID, true)
  444. nodes = d1.listNodes(c)
  445. c.Assert(len(nodes), checker.Equals, 2, check.Commentf("nodes: %#v", nodes))
  446. // Restart the node that was removed
  447. err = d2.Restart()
  448. c.Assert(err, checker.IsNil)
  449. // Give some time for the node to rejoin
  450. time.Sleep(1 * time.Second)
  451. // Make sure the node didn't rejoin
  452. nodes = d1.listNodes(c)
  453. c.Assert(len(nodes), checker.Equals, 2, check.Commentf("nodes: %#v", nodes))
  454. }
  455. func (s *DockerSwarmSuite) TestApiSwarmNodeDrainPause(c *check.C) {
  456. d1 := s.AddDaemon(c, true, true)
  457. d2 := s.AddDaemon(c, true, false)
  458. time.Sleep(1 * time.Second) // make sure all daemons are ready to accept tasks
  459. // start a service, expect balanced distribution
  460. instances := 8
  461. id := d1.createService(c, simpleTestService, setInstances(instances))
  462. waitAndAssert(c, defaultReconciliationTimeout, d1.checkActiveContainerCount, checker.GreaterThan, 0)
  463. waitAndAssert(c, defaultReconciliationTimeout, d2.checkActiveContainerCount, checker.GreaterThan, 0)
  464. waitAndAssert(c, defaultReconciliationTimeout, reducedCheck(sumAsIntegers, d1.checkActiveContainerCount, d2.checkActiveContainerCount), checker.Equals, instances)
  465. // drain d2, all containers should move to d1
  466. d1.updateNode(c, d2.NodeID, func(n *swarm.Node) {
  467. n.Spec.Availability = swarm.NodeAvailabilityDrain
  468. })
  469. waitAndAssert(c, defaultReconciliationTimeout, d1.checkActiveContainerCount, checker.Equals, instances)
  470. waitAndAssert(c, defaultReconciliationTimeout, d2.checkActiveContainerCount, checker.Equals, 0)
  471. // set d2 back to active
  472. d1.updateNode(c, d2.NodeID, func(n *swarm.Node) {
  473. n.Spec.Availability = swarm.NodeAvailabilityActive
  474. })
  475. instances = 1
  476. d1.updateService(c, d1.getService(c, id), setInstances(instances))
  477. waitAndAssert(c, defaultReconciliationTimeout*2, reducedCheck(sumAsIntegers, d1.checkActiveContainerCount, d2.checkActiveContainerCount), checker.Equals, instances)
  478. instances = 8
  479. d1.updateService(c, d1.getService(c, id), setInstances(instances))
  480. // drained node first so we don't get any old containers
  481. waitAndAssert(c, defaultReconciliationTimeout, d2.checkActiveContainerCount, checker.GreaterThan, 0)
  482. waitAndAssert(c, defaultReconciliationTimeout, d1.checkActiveContainerCount, checker.GreaterThan, 0)
  483. waitAndAssert(c, defaultReconciliationTimeout*2, reducedCheck(sumAsIntegers, d1.checkActiveContainerCount, d2.checkActiveContainerCount), checker.Equals, instances)
  484. d2ContainerCount := len(d2.activeContainers())
  485. // set d2 to paused, scale service up, only d1 gets new tasks
  486. d1.updateNode(c, d2.NodeID, func(n *swarm.Node) {
  487. n.Spec.Availability = swarm.NodeAvailabilityPause
  488. })
  489. instances = 14
  490. d1.updateService(c, d1.getService(c, id), setInstances(instances))
  491. waitAndAssert(c, defaultReconciliationTimeout, d1.checkActiveContainerCount, checker.Equals, instances-d2ContainerCount)
  492. waitAndAssert(c, defaultReconciliationTimeout, d2.checkActiveContainerCount, checker.Equals, d2ContainerCount)
  493. }
  494. func (s *DockerSwarmSuite) TestApiSwarmLeaveRemovesContainer(c *check.C) {
  495. d := s.AddDaemon(c, true, true)
  496. instances := 2
  497. d.createService(c, simpleTestService, setInstances(instances))
  498. id, err := d.Cmd("run", "-d", "busybox", "top")
  499. c.Assert(err, checker.IsNil)
  500. id = strings.TrimSpace(id)
  501. waitAndAssert(c, defaultReconciliationTimeout, d.checkActiveContainerCount, checker.Equals, instances+1)
  502. c.Assert(d.Leave(false), checker.NotNil)
  503. c.Assert(d.Leave(true), checker.IsNil)
  504. waitAndAssert(c, defaultReconciliationTimeout, d.checkActiveContainerCount, checker.Equals, 1)
  505. id2, err := d.Cmd("ps", "-q")
  506. c.Assert(err, checker.IsNil)
  507. c.Assert(id, checker.HasPrefix, strings.TrimSpace(id2))
  508. }
  509. // #23629
  510. func (s *DockerSwarmSuite) TestApiSwarmLeaveOnPendingJoin(c *check.C) {
  511. s.AddDaemon(c, true, true)
  512. d2 := s.AddDaemon(c, false, false)
  513. id, err := d2.Cmd("run", "-d", "busybox", "top")
  514. c.Assert(err, checker.IsNil)
  515. id = strings.TrimSpace(id)
  516. go d2.Join(swarm.JoinRequest{
  517. RemoteAddrs: []string{"nosuchhost:1234"},
  518. })
  519. waitAndAssert(c, defaultReconciliationTimeout, d2.checkLocalNodeState, checker.Equals, swarm.LocalNodeStateInactive)
  520. waitAndAssert(c, defaultReconciliationTimeout, d2.checkActiveContainerCount, checker.Equals, 1)
  521. id2, err := d2.Cmd("ps", "-q")
  522. c.Assert(err, checker.IsNil)
  523. c.Assert(id, checker.HasPrefix, strings.TrimSpace(id2))
  524. }
  525. // #23705
  526. func (s *DockerSwarmSuite) TestApiSwarmRestoreOnPendingJoin(c *check.C) {
  527. d := s.AddDaemon(c, false, false)
  528. go d.Join(swarm.JoinRequest{
  529. RemoteAddrs: []string{"nosuchhost:1234"},
  530. })
  531. waitAndAssert(c, defaultReconciliationTimeout, d.checkLocalNodeState, checker.Equals, swarm.LocalNodeStateInactive)
  532. c.Assert(d.Stop(), checker.IsNil)
  533. c.Assert(d.Start(), checker.IsNil)
  534. info, err := d.info()
  535. c.Assert(err, checker.IsNil)
  536. c.Assert(info.LocalNodeState, checker.Equals, swarm.LocalNodeStateInactive)
  537. }
  538. func (s *DockerSwarmSuite) TestApiSwarmManagerRestore(c *check.C) {
  539. d1 := s.AddDaemon(c, true, true)
  540. instances := 2
  541. id := d1.createService(c, simpleTestService, setInstances(instances))
  542. d1.getService(c, id)
  543. d1.Stop()
  544. d1.Start()
  545. d1.getService(c, id)
  546. d2 := s.AddDaemon(c, true, true)
  547. d2.getService(c, id)
  548. d2.Stop()
  549. d2.Start()
  550. d2.getService(c, id)
  551. d3 := s.AddDaemon(c, true, true)
  552. d3.getService(c, id)
  553. d3.Stop()
  554. d3.Start()
  555. d3.getService(c, id)
  556. d3.Kill()
  557. time.Sleep(1 * time.Second) // time to handle signal
  558. d3.Start()
  559. d3.getService(c, id)
  560. }
  561. func (s *DockerSwarmSuite) TestApiSwarmScaleNoRollingUpdate(c *check.C) {
  562. d := s.AddDaemon(c, true, true)
  563. instances := 2
  564. id := d.createService(c, simpleTestService, setInstances(instances))
  565. waitAndAssert(c, defaultReconciliationTimeout, d.checkActiveContainerCount, checker.Equals, instances)
  566. containers := d.activeContainers()
  567. instances = 4
  568. d.updateService(c, d.getService(c, id), setInstances(instances))
  569. waitAndAssert(c, defaultReconciliationTimeout, d.checkActiveContainerCount, checker.Equals, instances)
  570. containers2 := d.activeContainers()
  571. loop0:
  572. for _, c1 := range containers {
  573. for _, c2 := range containers2 {
  574. if c1 == c2 {
  575. continue loop0
  576. }
  577. }
  578. c.Errorf("container %v not found in new set %#v", c1, containers2)
  579. }
  580. }
  581. func (s *DockerSwarmSuite) TestApiSwarmInvalidAddress(c *check.C) {
  582. d := s.AddDaemon(c, false, false)
  583. req := swarm.InitRequest{
  584. ListenAddr: "",
  585. }
  586. status, _, err := d.SockRequest("POST", "/swarm/init", req)
  587. c.Assert(err, checker.IsNil)
  588. c.Assert(status, checker.Equals, http.StatusInternalServerError)
  589. req2 := swarm.JoinRequest{
  590. ListenAddr: "0.0.0.0:2377",
  591. RemoteAddrs: []string{""},
  592. }
  593. status, _, err = d.SockRequest("POST", "/swarm/join", req2)
  594. c.Assert(err, checker.IsNil)
  595. c.Assert(status, checker.Equals, http.StatusInternalServerError)
  596. }
  597. func (s *DockerSwarmSuite) TestApiSwarmForceNewCluster(c *check.C) {
  598. d1 := s.AddDaemon(c, true, true)
  599. d2 := s.AddDaemon(c, true, true)
  600. instances := 2
  601. id := d1.createService(c, simpleTestService, setInstances(instances))
  602. waitAndAssert(c, defaultReconciliationTimeout, reducedCheck(sumAsIntegers, d1.checkActiveContainerCount, d2.checkActiveContainerCount), checker.Equals, instances)
  603. // drain d2, all containers should move to d1
  604. d1.updateNode(c, d2.NodeID, func(n *swarm.Node) {
  605. n.Spec.Availability = swarm.NodeAvailabilityDrain
  606. })
  607. waitAndAssert(c, defaultReconciliationTimeout, d1.checkActiveContainerCount, checker.Equals, instances)
  608. waitAndAssert(c, defaultReconciliationTimeout, d2.checkActiveContainerCount, checker.Equals, 0)
  609. c.Assert(d2.Stop(), checker.IsNil)
  610. c.Assert(d1.Init(swarm.InitRequest{
  611. ForceNewCluster: true,
  612. Spec: swarm.Spec{},
  613. }), checker.IsNil)
  614. waitAndAssert(c, defaultReconciliationTimeout, d1.checkActiveContainerCount, checker.Equals, instances)
  615. d3 := s.AddDaemon(c, true, true)
  616. info, err := d3.info()
  617. c.Assert(err, checker.IsNil)
  618. c.Assert(info.ControlAvailable, checker.True)
  619. c.Assert(info.LocalNodeState, checker.Equals, swarm.LocalNodeStateActive)
  620. instances = 4
  621. d3.updateService(c, d3.getService(c, id), setInstances(instances))
  622. waitAndAssert(c, defaultReconciliationTimeout, reducedCheck(sumAsIntegers, d1.checkActiveContainerCount, d3.checkActiveContainerCount), checker.Equals, instances)
  623. }
  624. func simpleTestService(s *swarm.Service) {
  625. ureplicas := uint64(1)
  626. restartDelay := time.Duration(100 * time.Millisecond)
  627. s.Spec = swarm.ServiceSpec{
  628. TaskTemplate: swarm.TaskSpec{
  629. ContainerSpec: swarm.ContainerSpec{
  630. Image: "busybox:latest",
  631. Command: []string{"/bin/top"},
  632. },
  633. RestartPolicy: &swarm.RestartPolicy{
  634. Delay: &restartDelay,
  635. },
  636. },
  637. Mode: swarm.ServiceMode{
  638. Replicated: &swarm.ReplicatedService{
  639. Replicas: &ureplicas,
  640. },
  641. },
  642. }
  643. s.Spec.Name = "top"
  644. }
  645. func serviceForUpdate(s *swarm.Service) {
  646. ureplicas := uint64(1)
  647. restartDelay := time.Duration(100 * time.Millisecond)
  648. s.Spec = swarm.ServiceSpec{
  649. TaskTemplate: swarm.TaskSpec{
  650. ContainerSpec: swarm.ContainerSpec{
  651. Image: "busybox:latest",
  652. Command: []string{"/bin/top"},
  653. },
  654. RestartPolicy: &swarm.RestartPolicy{
  655. Delay: &restartDelay,
  656. },
  657. },
  658. Mode: swarm.ServiceMode{
  659. Replicated: &swarm.ReplicatedService{
  660. Replicas: &ureplicas,
  661. },
  662. },
  663. UpdateConfig: &swarm.UpdateConfig{
  664. Parallelism: 2,
  665. Delay: 4 * time.Second,
  666. FailureAction: swarm.UpdateFailureActionContinue,
  667. },
  668. }
  669. s.Spec.Name = "updatetest"
  670. }
  671. func setInstances(replicas int) serviceConstructor {
  672. ureplicas := uint64(replicas)
  673. return func(s *swarm.Service) {
  674. s.Spec.Mode = swarm.ServiceMode{
  675. Replicated: &swarm.ReplicatedService{
  676. Replicas: &ureplicas,
  677. },
  678. }
  679. }
  680. }
  681. func setImage(image string) serviceConstructor {
  682. return func(s *swarm.Service) {
  683. s.Spec.TaskTemplate.ContainerSpec.Image = image
  684. }
  685. }
  686. func setGlobalMode(s *swarm.Service) {
  687. s.Spec.Mode = swarm.ServiceMode{
  688. Global: &swarm.GlobalService{},
  689. }
  690. }
  691. func checkClusterHealth(c *check.C, cl []*SwarmDaemon, managerCount, workerCount int) {
  692. var totalMCount, totalWCount int
  693. for _, d := range cl {
  694. var (
  695. info swarm.Info
  696. err error
  697. )
  698. // check info in a waitAndAssert, because if the cluster doesn't have a leader, `info` will return an error
  699. checkInfo := func(c *check.C) (interface{}, check.CommentInterface) {
  700. info, err = d.info()
  701. return err, check.Commentf("cluster not ready in time")
  702. }
  703. waitAndAssert(c, defaultReconciliationTimeout, checkInfo, checker.IsNil)
  704. if !info.ControlAvailable {
  705. totalWCount++
  706. continue
  707. }
  708. var leaderFound bool
  709. totalMCount++
  710. var mCount, wCount int
  711. for _, n := range d.listNodes(c) {
  712. waitReady := func(c *check.C) (interface{}, check.CommentInterface) {
  713. if n.Status.State == swarm.NodeStateReady {
  714. return true, nil
  715. }
  716. nn := d.getNode(c, n.ID)
  717. n = *nn
  718. return n.Status.State == swarm.NodeStateReady, check.Commentf("state of node %s, reported by %s", n.ID, d.Info.NodeID)
  719. }
  720. waitAndAssert(c, defaultReconciliationTimeout, waitReady, checker.True)
  721. waitActive := func(c *check.C) (interface{}, check.CommentInterface) {
  722. if n.Spec.Availability == swarm.NodeAvailabilityActive {
  723. return true, nil
  724. }
  725. nn := d.getNode(c, n.ID)
  726. n = *nn
  727. return n.Spec.Availability == swarm.NodeAvailabilityActive, check.Commentf("availability of node %s, reported by %s", n.ID, d.Info.NodeID)
  728. }
  729. waitAndAssert(c, defaultReconciliationTimeout, waitActive, checker.True)
  730. if n.Spec.Role == swarm.NodeRoleManager {
  731. c.Assert(n.ManagerStatus, checker.NotNil, check.Commentf("manager status of node %s (manager), reported by %s", n.ID, d.Info.NodeID))
  732. if n.ManagerStatus.Leader {
  733. leaderFound = true
  734. }
  735. mCount++
  736. } else {
  737. c.Assert(n.ManagerStatus, checker.IsNil, check.Commentf("manager status of node %s (worker), reported by %s", n.ID, d.Info.NodeID))
  738. wCount++
  739. }
  740. }
  741. c.Assert(leaderFound, checker.True, check.Commentf("lack of leader reported by node %s", info.NodeID))
  742. c.Assert(mCount, checker.Equals, managerCount, check.Commentf("managers count reported by node %s", info.NodeID))
  743. c.Assert(wCount, checker.Equals, workerCount, check.Commentf("workers count reported by node %s", info.NodeID))
  744. }
  745. c.Assert(totalMCount, checker.Equals, managerCount)
  746. c.Assert(totalWCount, checker.Equals, workerCount)
  747. }
  748. func (s *DockerSwarmSuite) TestApiSwarmRestartCluster(c *check.C) {
  749. mCount, wCount := 5, 1
  750. var nodes []*SwarmDaemon
  751. for i := 0; i < mCount; i++ {
  752. manager := s.AddDaemon(c, true, true)
  753. info, err := manager.info()
  754. c.Assert(err, checker.IsNil)
  755. c.Assert(info.ControlAvailable, checker.True)
  756. c.Assert(info.LocalNodeState, checker.Equals, swarm.LocalNodeStateActive)
  757. nodes = append(nodes, manager)
  758. }
  759. for i := 0; i < wCount; i++ {
  760. worker := s.AddDaemon(c, true, false)
  761. info, err := worker.info()
  762. c.Assert(err, checker.IsNil)
  763. c.Assert(info.ControlAvailable, checker.False)
  764. c.Assert(info.LocalNodeState, checker.Equals, swarm.LocalNodeStateActive)
  765. nodes = append(nodes, worker)
  766. }
  767. // stop whole cluster
  768. {
  769. var wg sync.WaitGroup
  770. wg.Add(len(nodes))
  771. errs := make(chan error, len(nodes))
  772. for _, d := range nodes {
  773. go func(daemon *SwarmDaemon) {
  774. defer wg.Done()
  775. if err := daemon.Stop(); err != nil {
  776. errs <- err
  777. }
  778. if root := os.Getenv("DOCKER_REMAP_ROOT"); root != "" {
  779. daemon.root = filepath.Dir(daemon.root)
  780. }
  781. }(d)
  782. }
  783. wg.Wait()
  784. close(errs)
  785. for err := range errs {
  786. c.Assert(err, check.IsNil)
  787. }
  788. }
  789. // start whole cluster
  790. {
  791. var wg sync.WaitGroup
  792. wg.Add(len(nodes))
  793. errs := make(chan error, len(nodes))
  794. for _, d := range nodes {
  795. go func(daemon *SwarmDaemon) {
  796. defer wg.Done()
  797. if err := daemon.Start("--iptables=false"); err != nil {
  798. errs <- err
  799. }
  800. }(d)
  801. }
  802. wg.Wait()
  803. close(errs)
  804. for err := range errs {
  805. c.Assert(err, check.IsNil)
  806. }
  807. }
  808. checkClusterHealth(c, nodes, mCount, wCount)
  809. }