docker_api_swarm_test.go 39 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190
  1. // +build !windows
  2. package main
  3. import (
  4. "fmt"
  5. "net/http"
  6. "os"
  7. "path/filepath"
  8. "strconv"
  9. "strings"
  10. "sync"
  11. "syscall"
  12. "time"
  13. "github.com/docker/docker/api/types/swarm"
  14. "github.com/docker/docker/pkg/integration/checker"
  15. "github.com/go-check/check"
  16. )
  17. var defaultReconciliationTimeout = 30 * time.Second
  18. func (s *DockerSwarmSuite) TestAPISwarmInit(c *check.C) {
  19. // todo: should find a better way to verify that components are running than /info
  20. d1 := s.AddDaemon(c, true, true)
  21. info, err := d1.info()
  22. c.Assert(err, checker.IsNil)
  23. c.Assert(info.ControlAvailable, checker.True)
  24. c.Assert(info.LocalNodeState, checker.Equals, swarm.LocalNodeStateActive)
  25. d2 := s.AddDaemon(c, true, false)
  26. info, err = d2.info()
  27. c.Assert(err, checker.IsNil)
  28. c.Assert(info.ControlAvailable, checker.False)
  29. c.Assert(info.LocalNodeState, checker.Equals, swarm.LocalNodeStateActive)
  30. // Leaving cluster
  31. c.Assert(d2.Leave(false), checker.IsNil)
  32. info, err = d2.info()
  33. c.Assert(err, checker.IsNil)
  34. c.Assert(info.ControlAvailable, checker.False)
  35. c.Assert(info.LocalNodeState, checker.Equals, swarm.LocalNodeStateInactive)
  36. c.Assert(d2.Join(swarm.JoinRequest{JoinToken: d1.joinTokens(c).Worker, RemoteAddrs: []string{d1.listenAddr}}), checker.IsNil)
  37. info, err = d2.info()
  38. c.Assert(err, checker.IsNil)
  39. c.Assert(info.ControlAvailable, checker.False)
  40. c.Assert(info.LocalNodeState, checker.Equals, swarm.LocalNodeStateActive)
  41. // Current state restoring after restarts
  42. err = d1.Stop()
  43. c.Assert(err, checker.IsNil)
  44. err = d2.Stop()
  45. c.Assert(err, checker.IsNil)
  46. err = d1.Start()
  47. c.Assert(err, checker.IsNil)
  48. err = d2.Start()
  49. c.Assert(err, checker.IsNil)
  50. info, err = d1.info()
  51. c.Assert(err, checker.IsNil)
  52. c.Assert(info.ControlAvailable, checker.True)
  53. c.Assert(info.LocalNodeState, checker.Equals, swarm.LocalNodeStateActive)
  54. info, err = d2.info()
  55. c.Assert(err, checker.IsNil)
  56. c.Assert(info.ControlAvailable, checker.False)
  57. c.Assert(info.LocalNodeState, checker.Equals, swarm.LocalNodeStateActive)
  58. }
  59. func (s *DockerSwarmSuite) TestAPISwarmJoinToken(c *check.C) {
  60. d1 := s.AddDaemon(c, false, false)
  61. c.Assert(d1.Init(swarm.InitRequest{}), checker.IsNil)
  62. d2 := s.AddDaemon(c, false, false)
  63. err := d2.Join(swarm.JoinRequest{RemoteAddrs: []string{d1.listenAddr}})
  64. c.Assert(err, checker.NotNil)
  65. c.Assert(err.Error(), checker.Contains, "join token is necessary")
  66. info, err := d2.info()
  67. c.Assert(err, checker.IsNil)
  68. c.Assert(info.LocalNodeState, checker.Equals, swarm.LocalNodeStateInactive)
  69. err = d2.Join(swarm.JoinRequest{JoinToken: "foobaz", RemoteAddrs: []string{d1.listenAddr}})
  70. c.Assert(err, checker.NotNil)
  71. c.Assert(err.Error(), checker.Contains, "join token is necessary")
  72. info, err = d2.info()
  73. c.Assert(err, checker.IsNil)
  74. c.Assert(info.LocalNodeState, checker.Equals, swarm.LocalNodeStateInactive)
  75. workerToken := d1.joinTokens(c).Worker
  76. c.Assert(d2.Join(swarm.JoinRequest{JoinToken: workerToken, RemoteAddrs: []string{d1.listenAddr}}), checker.IsNil)
  77. info, err = d2.info()
  78. c.Assert(err, checker.IsNil)
  79. c.Assert(info.LocalNodeState, checker.Equals, swarm.LocalNodeStateActive)
  80. c.Assert(d2.Leave(false), checker.IsNil)
  81. info, err = d2.info()
  82. c.Assert(err, checker.IsNil)
  83. c.Assert(info.LocalNodeState, checker.Equals, swarm.LocalNodeStateInactive)
  84. // change tokens
  85. d1.rotateTokens(c)
  86. err = d2.Join(swarm.JoinRequest{JoinToken: workerToken, RemoteAddrs: []string{d1.listenAddr}})
  87. c.Assert(err, checker.NotNil)
  88. c.Assert(err.Error(), checker.Contains, "join token is necessary")
  89. info, err = d2.info()
  90. c.Assert(err, checker.IsNil)
  91. c.Assert(info.LocalNodeState, checker.Equals, swarm.LocalNodeStateInactive)
  92. workerToken = d1.joinTokens(c).Worker
  93. c.Assert(d2.Join(swarm.JoinRequest{JoinToken: workerToken, RemoteAddrs: []string{d1.listenAddr}}), checker.IsNil)
  94. info, err = d2.info()
  95. c.Assert(err, checker.IsNil)
  96. c.Assert(info.LocalNodeState, checker.Equals, swarm.LocalNodeStateActive)
  97. c.Assert(d2.Leave(false), checker.IsNil)
  98. info, err = d2.info()
  99. c.Assert(err, checker.IsNil)
  100. c.Assert(info.LocalNodeState, checker.Equals, swarm.LocalNodeStateInactive)
  101. // change spec, don't change tokens
  102. d1.updateSwarm(c, func(s *swarm.Spec) {})
  103. err = d2.Join(swarm.JoinRequest{RemoteAddrs: []string{d1.listenAddr}})
  104. c.Assert(err, checker.NotNil)
  105. c.Assert(err.Error(), checker.Contains, "join token is necessary")
  106. info, err = d2.info()
  107. c.Assert(err, checker.IsNil)
  108. c.Assert(info.LocalNodeState, checker.Equals, swarm.LocalNodeStateInactive)
  109. c.Assert(d2.Join(swarm.JoinRequest{JoinToken: workerToken, RemoteAddrs: []string{d1.listenAddr}}), checker.IsNil)
  110. info, err = d2.info()
  111. c.Assert(err, checker.IsNil)
  112. c.Assert(info.LocalNodeState, checker.Equals, swarm.LocalNodeStateActive)
  113. c.Assert(d2.Leave(false), checker.IsNil)
  114. info, err = d2.info()
  115. c.Assert(err, checker.IsNil)
  116. c.Assert(info.LocalNodeState, checker.Equals, swarm.LocalNodeStateInactive)
  117. }
  118. func (s *DockerSwarmSuite) TestAPISwarmCAHash(c *check.C) {
  119. d1 := s.AddDaemon(c, true, true)
  120. d2 := s.AddDaemon(c, false, false)
  121. splitToken := strings.Split(d1.joinTokens(c).Worker, "-")
  122. splitToken[2] = "1kxftv4ofnc6mt30lmgipg6ngf9luhwqopfk1tz6bdmnkubg0e"
  123. replacementToken := strings.Join(splitToken, "-")
  124. err := d2.Join(swarm.JoinRequest{JoinToken: replacementToken, RemoteAddrs: []string{d1.listenAddr}})
  125. c.Assert(err, checker.NotNil)
  126. c.Assert(err.Error(), checker.Contains, "remote CA does not match fingerprint")
  127. }
  128. func (s *DockerSwarmSuite) TestAPISwarmPromoteDemote(c *check.C) {
  129. d1 := s.AddDaemon(c, false, false)
  130. c.Assert(d1.Init(swarm.InitRequest{}), checker.IsNil)
  131. d2 := s.AddDaemon(c, true, false)
  132. info, err := d2.info()
  133. c.Assert(err, checker.IsNil)
  134. c.Assert(info.ControlAvailable, checker.False)
  135. c.Assert(info.LocalNodeState, checker.Equals, swarm.LocalNodeStateActive)
  136. d1.updateNode(c, d2.NodeID, func(n *swarm.Node) {
  137. n.Spec.Role = swarm.NodeRoleManager
  138. })
  139. waitAndAssert(c, defaultReconciliationTimeout, d2.checkControlAvailable, checker.True)
  140. d1.updateNode(c, d2.NodeID, func(n *swarm.Node) {
  141. n.Spec.Role = swarm.NodeRoleWorker
  142. })
  143. waitAndAssert(c, defaultReconciliationTimeout, d2.checkControlAvailable, checker.False)
  144. // Demoting last node should fail
  145. node := d1.getNode(c, d1.NodeID)
  146. node.Spec.Role = swarm.NodeRoleWorker
  147. url := fmt.Sprintf("/nodes/%s/update?version=%d", node.ID, node.Version.Index)
  148. status, out, err := d1.SockRequest("POST", url, node.Spec)
  149. c.Assert(err, checker.IsNil)
  150. c.Assert(status, checker.Equals, http.StatusInternalServerError, check.Commentf("output: %q", string(out)))
  151. c.Assert(string(out), checker.Contains, "last manager of the swarm")
  152. info, err = d1.info()
  153. c.Assert(err, checker.IsNil)
  154. c.Assert(info.LocalNodeState, checker.Equals, swarm.LocalNodeStateActive)
  155. c.Assert(info.ControlAvailable, checker.True)
  156. // Promote already demoted node
  157. d1.updateNode(c, d2.NodeID, func(n *swarm.Node) {
  158. n.Spec.Role = swarm.NodeRoleManager
  159. })
  160. waitAndAssert(c, defaultReconciliationTimeout, d2.checkControlAvailable, checker.True)
  161. }
  162. func (s *DockerSwarmSuite) TestAPISwarmServicesEmptyList(c *check.C) {
  163. d := s.AddDaemon(c, true, true)
  164. services := d.listServices(c)
  165. c.Assert(services, checker.NotNil)
  166. c.Assert(len(services), checker.Equals, 0, check.Commentf("services: %#v", services))
  167. }
  168. func (s *DockerSwarmSuite) TestAPISwarmServicesCreate(c *check.C) {
  169. d := s.AddDaemon(c, true, true)
  170. instances := 2
  171. id := d.createService(c, simpleTestService, setInstances(instances))
  172. waitAndAssert(c, defaultReconciliationTimeout, d.checkActiveContainerCount, checker.Equals, instances)
  173. service := d.getService(c, id)
  174. instances = 5
  175. d.updateService(c, service, setInstances(instances))
  176. waitAndAssert(c, defaultReconciliationTimeout, d.checkActiveContainerCount, checker.Equals, instances)
  177. d.removeService(c, service.ID)
  178. waitAndAssert(c, defaultReconciliationTimeout, d.checkActiveContainerCount, checker.Equals, 0)
  179. }
  180. func (s *DockerSwarmSuite) TestAPISwarmServicesMultipleAgents(c *check.C) {
  181. d1 := s.AddDaemon(c, true, true)
  182. d2 := s.AddDaemon(c, true, false)
  183. d3 := s.AddDaemon(c, true, false)
  184. time.Sleep(1 * time.Second) // make sure all daemons are ready to accept tasks
  185. instances := 9
  186. id := d1.createService(c, simpleTestService, setInstances(instances))
  187. waitAndAssert(c, defaultReconciliationTimeout, d1.checkActiveContainerCount, checker.GreaterThan, 0)
  188. waitAndAssert(c, defaultReconciliationTimeout, d2.checkActiveContainerCount, checker.GreaterThan, 0)
  189. waitAndAssert(c, defaultReconciliationTimeout, d3.checkActiveContainerCount, checker.GreaterThan, 0)
  190. waitAndAssert(c, defaultReconciliationTimeout, reducedCheck(sumAsIntegers, d1.checkActiveContainerCount, d2.checkActiveContainerCount, d3.checkActiveContainerCount), checker.Equals, instances)
  191. // reconciliation on d2 node down
  192. c.Assert(d2.Stop(), checker.IsNil)
  193. waitAndAssert(c, defaultReconciliationTimeout, reducedCheck(sumAsIntegers, d1.checkActiveContainerCount, d3.checkActiveContainerCount), checker.Equals, instances)
  194. // test downscaling
  195. instances = 5
  196. d1.updateService(c, d1.getService(c, id), setInstances(instances))
  197. waitAndAssert(c, defaultReconciliationTimeout, reducedCheck(sumAsIntegers, d1.checkActiveContainerCount, d3.checkActiveContainerCount), checker.Equals, instances)
  198. }
  199. func (s *DockerSwarmSuite) TestAPISwarmServicesCreateGlobal(c *check.C) {
  200. d1 := s.AddDaemon(c, true, true)
  201. d2 := s.AddDaemon(c, true, false)
  202. d3 := s.AddDaemon(c, true, false)
  203. d1.createService(c, simpleTestService, setGlobalMode)
  204. waitAndAssert(c, defaultReconciliationTimeout, d1.checkActiveContainerCount, checker.Equals, 1)
  205. waitAndAssert(c, defaultReconciliationTimeout, d2.checkActiveContainerCount, checker.Equals, 1)
  206. waitAndAssert(c, defaultReconciliationTimeout, d3.checkActiveContainerCount, checker.Equals, 1)
  207. d4 := s.AddDaemon(c, true, false)
  208. d5 := s.AddDaemon(c, true, false)
  209. waitAndAssert(c, defaultReconciliationTimeout, d4.checkActiveContainerCount, checker.Equals, 1)
  210. waitAndAssert(c, defaultReconciliationTimeout, d5.checkActiveContainerCount, checker.Equals, 1)
  211. }
  212. func (s *DockerSwarmSuite) TestAPISwarmServicesUpdate(c *check.C) {
  213. const nodeCount = 3
  214. var daemons [nodeCount]*SwarmDaemon
  215. for i := 0; i < nodeCount; i++ {
  216. daemons[i] = s.AddDaemon(c, true, i == 0)
  217. }
  218. // wait for nodes ready
  219. waitAndAssert(c, 5*time.Second, daemons[0].checkNodeReadyCount, checker.Equals, nodeCount)
  220. // service image at start
  221. image1 := "busybox:latest"
  222. // target image in update
  223. image2 := "busybox:test"
  224. // create a different tag
  225. for _, d := range daemons {
  226. out, err := d.Cmd("tag", image1, image2)
  227. c.Assert(err, checker.IsNil, check.Commentf(out))
  228. }
  229. // create service
  230. instances := 5
  231. parallelism := 2
  232. id := daemons[0].createService(c, serviceForUpdate, setInstances(instances))
  233. // wait for tasks ready
  234. waitAndAssert(c, defaultReconciliationTimeout, daemons[0].checkRunningTaskImages, checker.DeepEquals,
  235. map[string]int{image1: instances})
  236. // issue service update
  237. service := daemons[0].getService(c, id)
  238. daemons[0].updateService(c, service, setImage(image2))
  239. // first batch
  240. waitAndAssert(c, defaultReconciliationTimeout, daemons[0].checkRunningTaskImages, checker.DeepEquals,
  241. map[string]int{image1: instances - parallelism, image2: parallelism})
  242. // 2nd batch
  243. waitAndAssert(c, defaultReconciliationTimeout, daemons[0].checkRunningTaskImages, checker.DeepEquals,
  244. map[string]int{image1: instances - 2*parallelism, image2: 2 * parallelism})
  245. // 3nd batch
  246. waitAndAssert(c, defaultReconciliationTimeout, daemons[0].checkRunningTaskImages, checker.DeepEquals,
  247. map[string]int{image2: instances})
  248. }
  249. func (s *DockerSwarmSuite) TestAPISwarmServiceConstraintRole(c *check.C) {
  250. const nodeCount = 3
  251. var daemons [nodeCount]*SwarmDaemon
  252. for i := 0; i < nodeCount; i++ {
  253. daemons[i] = s.AddDaemon(c, true, i == 0)
  254. }
  255. // wait for nodes ready
  256. waitAndAssert(c, 5*time.Second, daemons[0].checkNodeReadyCount, checker.Equals, nodeCount)
  257. // create service
  258. constraints := []string{"node.role==worker"}
  259. instances := 3
  260. id := daemons[0].createService(c, simpleTestService, setConstraints(constraints), setInstances(instances))
  261. // wait for tasks ready
  262. waitAndAssert(c, defaultReconciliationTimeout, daemons[0].checkServiceRunningTasks(c, id), checker.Equals, instances)
  263. // validate tasks are running on worker nodes
  264. tasks := daemons[0].getServiceTasks(c, id)
  265. for _, task := range tasks {
  266. node := daemons[0].getNode(c, task.NodeID)
  267. c.Assert(node.Spec.Role, checker.Equals, swarm.NodeRoleWorker)
  268. }
  269. //remove service
  270. daemons[0].removeService(c, id)
  271. // create service
  272. constraints = []string{"node.role!=worker"}
  273. id = daemons[0].createService(c, simpleTestService, setConstraints(constraints), setInstances(instances))
  274. // wait for tasks ready
  275. waitAndAssert(c, defaultReconciliationTimeout, daemons[0].checkServiceRunningTasks(c, id), checker.Equals, instances)
  276. tasks = daemons[0].getServiceTasks(c, id)
  277. // validate tasks are running on manager nodes
  278. for _, task := range tasks {
  279. node := daemons[0].getNode(c, task.NodeID)
  280. c.Assert(node.Spec.Role, checker.Equals, swarm.NodeRoleManager)
  281. }
  282. //remove service
  283. daemons[0].removeService(c, id)
  284. // create service
  285. constraints = []string{"node.role==nosuchrole"}
  286. id = daemons[0].createService(c, simpleTestService, setConstraints(constraints), setInstances(instances))
  287. // wait for tasks created
  288. waitAndAssert(c, defaultReconciliationTimeout, daemons[0].checkServiceTasks(c, id), checker.Equals, instances)
  289. // let scheduler try
  290. time.Sleep(250 * time.Millisecond)
  291. // validate tasks are not assigned to any node
  292. tasks = daemons[0].getServiceTasks(c, id)
  293. for _, task := range tasks {
  294. c.Assert(task.NodeID, checker.Equals, "")
  295. }
  296. }
  297. func (s *DockerSwarmSuite) TestAPISwarmServiceConstraintLabel(c *check.C) {
  298. const nodeCount = 3
  299. var daemons [nodeCount]*SwarmDaemon
  300. for i := 0; i < nodeCount; i++ {
  301. daemons[i] = s.AddDaemon(c, true, i == 0)
  302. }
  303. // wait for nodes ready
  304. waitAndAssert(c, 5*time.Second, daemons[0].checkNodeReadyCount, checker.Equals, nodeCount)
  305. nodes := daemons[0].listNodes(c)
  306. c.Assert(len(nodes), checker.Equals, nodeCount)
  307. // add labels to nodes
  308. daemons[0].updateNode(c, nodes[0].ID, func(n *swarm.Node) {
  309. n.Spec.Annotations.Labels = map[string]string{
  310. "security": "high",
  311. }
  312. })
  313. for i := 1; i < nodeCount; i++ {
  314. daemons[0].updateNode(c, nodes[i].ID, func(n *swarm.Node) {
  315. n.Spec.Annotations.Labels = map[string]string{
  316. "security": "low",
  317. }
  318. })
  319. }
  320. // create service
  321. instances := 3
  322. constraints := []string{"node.labels.security==high"}
  323. id := daemons[0].createService(c, simpleTestService, setConstraints(constraints), setInstances(instances))
  324. // wait for tasks ready
  325. waitAndAssert(c, defaultReconciliationTimeout, daemons[0].checkServiceRunningTasks(c, id), checker.Equals, instances)
  326. tasks := daemons[0].getServiceTasks(c, id)
  327. // validate all tasks are running on nodes[0]
  328. for _, task := range tasks {
  329. c.Assert(task.NodeID, checker.Equals, nodes[0].ID)
  330. }
  331. //remove service
  332. daemons[0].removeService(c, id)
  333. // create service
  334. constraints = []string{"node.labels.security!=high"}
  335. id = daemons[0].createService(c, simpleTestService, setConstraints(constraints), setInstances(instances))
  336. // wait for tasks ready
  337. waitAndAssert(c, defaultReconciliationTimeout, daemons[0].checkServiceRunningTasks(c, id), checker.Equals, instances)
  338. tasks = daemons[0].getServiceTasks(c, id)
  339. // validate all tasks are NOT running on nodes[0]
  340. for _, task := range tasks {
  341. c.Assert(task.NodeID, checker.Not(checker.Equals), nodes[0].ID)
  342. }
  343. //remove service
  344. daemons[0].removeService(c, id)
  345. constraints = []string{"node.labels.security==medium"}
  346. id = daemons[0].createService(c, simpleTestService, setConstraints(constraints), setInstances(instances))
  347. // wait for tasks created
  348. waitAndAssert(c, defaultReconciliationTimeout, daemons[0].checkServiceTasks(c, id), checker.Equals, instances)
  349. // let scheduler try
  350. time.Sleep(250 * time.Millisecond)
  351. tasks = daemons[0].getServiceTasks(c, id)
  352. // validate tasks are not assigned
  353. for _, task := range tasks {
  354. c.Assert(task.NodeID, checker.Equals, "")
  355. }
  356. //remove service
  357. daemons[0].removeService(c, id)
  358. // multiple constraints
  359. constraints = []string{
  360. "node.labels.security==high",
  361. fmt.Sprintf("node.id==%s", nodes[1].ID),
  362. }
  363. id = daemons[0].createService(c, simpleTestService, setConstraints(constraints), setInstances(instances))
  364. // wait for tasks created
  365. waitAndAssert(c, defaultReconciliationTimeout, daemons[0].checkServiceTasks(c, id), checker.Equals, instances)
  366. // let scheduler try
  367. time.Sleep(250 * time.Millisecond)
  368. tasks = daemons[0].getServiceTasks(c, id)
  369. // validate tasks are not assigned
  370. for _, task := range tasks {
  371. c.Assert(task.NodeID, checker.Equals, "")
  372. }
  373. // make nodes[1] fulfills the constraints
  374. daemons[0].updateNode(c, nodes[1].ID, func(n *swarm.Node) {
  375. n.Spec.Annotations.Labels = map[string]string{
  376. "security": "high",
  377. }
  378. })
  379. // wait for tasks ready
  380. waitAndAssert(c, defaultReconciliationTimeout, daemons[0].checkServiceRunningTasks(c, id), checker.Equals, instances)
  381. tasks = daemons[0].getServiceTasks(c, id)
  382. for _, task := range tasks {
  383. c.Assert(task.NodeID, checker.Equals, nodes[1].ID)
  384. }
  385. }
  386. func (s *DockerSwarmSuite) TestAPISwarmServicesStateReporting(c *check.C) {
  387. testRequires(c, SameHostDaemon)
  388. testRequires(c, DaemonIsLinux)
  389. d1 := s.AddDaemon(c, true, true)
  390. d2 := s.AddDaemon(c, true, true)
  391. d3 := s.AddDaemon(c, true, false)
  392. time.Sleep(1 * time.Second) // make sure all daemons are ready to accept
  393. instances := 9
  394. d1.createService(c, simpleTestService, setInstances(instances))
  395. waitAndAssert(c, defaultReconciliationTimeout, reducedCheck(sumAsIntegers, d1.checkActiveContainerCount, d2.checkActiveContainerCount, d3.checkActiveContainerCount), checker.Equals, instances)
  396. getContainers := func() map[string]*SwarmDaemon {
  397. m := make(map[string]*SwarmDaemon)
  398. for _, d := range []*SwarmDaemon{d1, d2, d3} {
  399. for _, id := range d.activeContainers() {
  400. m[id] = d
  401. }
  402. }
  403. return m
  404. }
  405. containers := getContainers()
  406. c.Assert(containers, checker.HasLen, instances)
  407. var toRemove string
  408. for i := range containers {
  409. toRemove = i
  410. }
  411. _, err := containers[toRemove].Cmd("stop", toRemove)
  412. c.Assert(err, checker.IsNil)
  413. waitAndAssert(c, defaultReconciliationTimeout, reducedCheck(sumAsIntegers, d1.checkActiveContainerCount, d2.checkActiveContainerCount, d3.checkActiveContainerCount), checker.Equals, instances)
  414. containers2 := getContainers()
  415. c.Assert(containers2, checker.HasLen, instances)
  416. for i := range containers {
  417. if i == toRemove {
  418. c.Assert(containers2[i], checker.IsNil)
  419. } else {
  420. c.Assert(containers2[i], checker.NotNil)
  421. }
  422. }
  423. containers = containers2
  424. for i := range containers {
  425. toRemove = i
  426. }
  427. // try with killing process outside of docker
  428. pidStr, err := containers[toRemove].Cmd("inspect", "-f", "{{.State.Pid}}", toRemove)
  429. c.Assert(err, checker.IsNil)
  430. pid, err := strconv.Atoi(strings.TrimSpace(pidStr))
  431. c.Assert(err, checker.IsNil)
  432. c.Assert(syscall.Kill(pid, syscall.SIGKILL), checker.IsNil)
  433. time.Sleep(time.Second) // give some time to handle the signal
  434. waitAndAssert(c, defaultReconciliationTimeout, reducedCheck(sumAsIntegers, d1.checkActiveContainerCount, d2.checkActiveContainerCount, d3.checkActiveContainerCount), checker.Equals, instances)
  435. containers2 = getContainers()
  436. c.Assert(containers2, checker.HasLen, instances)
  437. for i := range containers {
  438. if i == toRemove {
  439. c.Assert(containers2[i], checker.IsNil)
  440. } else {
  441. c.Assert(containers2[i], checker.NotNil)
  442. }
  443. }
  444. }
  445. func (s *DockerSwarmSuite) TestAPISwarmLeaderProxy(c *check.C) {
  446. // add three managers, one of these is leader
  447. d1 := s.AddDaemon(c, true, true)
  448. d2 := s.AddDaemon(c, true, true)
  449. d3 := s.AddDaemon(c, true, true)
  450. // start a service by hitting each of the 3 managers
  451. d1.createService(c, simpleTestService, func(s *swarm.Service) {
  452. s.Spec.Name = "test1"
  453. })
  454. d2.createService(c, simpleTestService, func(s *swarm.Service) {
  455. s.Spec.Name = "test2"
  456. })
  457. d3.createService(c, simpleTestService, func(s *swarm.Service) {
  458. s.Spec.Name = "test3"
  459. })
  460. // 3 services should be started now, because the requests were proxied to leader
  461. // query each node and make sure it returns 3 services
  462. for _, d := range []*SwarmDaemon{d1, d2, d3} {
  463. services := d.listServices(c)
  464. c.Assert(services, checker.HasLen, 3)
  465. }
  466. }
  467. func (s *DockerSwarmSuite) TestAPISwarmLeaderElection(c *check.C) {
  468. // Create 3 nodes
  469. d1 := s.AddDaemon(c, true, true)
  470. d2 := s.AddDaemon(c, true, true)
  471. d3 := s.AddDaemon(c, true, true)
  472. // assert that the first node we made is the leader, and the other two are followers
  473. c.Assert(d1.getNode(c, d1.NodeID).ManagerStatus.Leader, checker.True)
  474. c.Assert(d1.getNode(c, d2.NodeID).ManagerStatus.Leader, checker.False)
  475. c.Assert(d1.getNode(c, d3.NodeID).ManagerStatus.Leader, checker.False)
  476. d1.Stop() // stop the leader
  477. var (
  478. leader *SwarmDaemon // keep track of leader
  479. followers []*SwarmDaemon // keep track of followers
  480. )
  481. checkLeader := func(nodes ...*SwarmDaemon) checkF {
  482. return func(c *check.C) (interface{}, check.CommentInterface) {
  483. // clear these out before each run
  484. leader = nil
  485. followers = nil
  486. for _, d := range nodes {
  487. if d.getNode(c, d.NodeID).ManagerStatus.Leader {
  488. leader = d
  489. } else {
  490. followers = append(followers, d)
  491. }
  492. }
  493. if leader == nil {
  494. return false, check.Commentf("no leader elected")
  495. }
  496. return true, check.Commentf("elected %v", leader.id)
  497. }
  498. }
  499. // wait for an election to occur
  500. waitAndAssert(c, defaultReconciliationTimeout, checkLeader(d2, d3), checker.True)
  501. // assert that we have a new leader
  502. c.Assert(leader, checker.NotNil)
  503. // Keep track of the current leader, since we want that to be chosen.
  504. stableleader := leader
  505. // add the d1, the initial leader, back
  506. d1.Start()
  507. // TODO(stevvooe): may need to wait for rejoin here
  508. // wait for possible election
  509. waitAndAssert(c, defaultReconciliationTimeout, checkLeader(d1, d2, d3), checker.True)
  510. // pick out the leader and the followers again
  511. // verify that we still only have 1 leader and 2 followers
  512. c.Assert(leader, checker.NotNil)
  513. c.Assert(followers, checker.HasLen, 2)
  514. // and that after we added d1 back, the leader hasn't changed
  515. c.Assert(leader.NodeID, checker.Equals, stableleader.NodeID)
  516. }
  517. func (s *DockerSwarmSuite) TestAPISwarmRaftQuorum(c *check.C) {
  518. d1 := s.AddDaemon(c, true, true)
  519. d2 := s.AddDaemon(c, true, true)
  520. d3 := s.AddDaemon(c, true, true)
  521. d1.createService(c, simpleTestService)
  522. c.Assert(d2.Stop(), checker.IsNil)
  523. // make sure there is a leader
  524. waitAndAssert(c, defaultReconciliationTimeout, d1.checkLeader, checker.IsNil)
  525. d1.createService(c, simpleTestService, func(s *swarm.Service) {
  526. s.Spec.Name = "top1"
  527. })
  528. c.Assert(d3.Stop(), checker.IsNil)
  529. // make sure there is a leader
  530. waitAndAssert(c, defaultReconciliationTimeout, d1.checkLeader, checker.IsNil)
  531. var service swarm.Service
  532. simpleTestService(&service)
  533. service.Spec.Name = "top2"
  534. status, out, err := d1.SockRequest("POST", "/services/create", service.Spec)
  535. c.Assert(err, checker.IsNil)
  536. c.Assert(status, checker.Equals, http.StatusInternalServerError, check.Commentf("deadline exceeded", string(out)))
  537. c.Assert(d2.Start(), checker.IsNil)
  538. // make sure there is a leader
  539. waitAndAssert(c, defaultReconciliationTimeout, d1.checkLeader, checker.IsNil)
  540. d1.createService(c, simpleTestService, func(s *swarm.Service) {
  541. s.Spec.Name = "top3"
  542. })
  543. }
  544. func (s *DockerSwarmSuite) TestAPISwarmListNodes(c *check.C) {
  545. d1 := s.AddDaemon(c, true, true)
  546. d2 := s.AddDaemon(c, true, false)
  547. d3 := s.AddDaemon(c, true, false)
  548. nodes := d1.listNodes(c)
  549. c.Assert(len(nodes), checker.Equals, 3, check.Commentf("nodes: %#v", nodes))
  550. loop0:
  551. for _, n := range nodes {
  552. for _, d := range []*SwarmDaemon{d1, d2, d3} {
  553. if n.ID == d.NodeID {
  554. continue loop0
  555. }
  556. }
  557. c.Errorf("unknown nodeID %v", n.ID)
  558. }
  559. }
  560. func (s *DockerSwarmSuite) TestAPISwarmNodeUpdate(c *check.C) {
  561. d := s.AddDaemon(c, true, true)
  562. nodes := d.listNodes(c)
  563. d.updateNode(c, nodes[0].ID, func(n *swarm.Node) {
  564. n.Spec.Availability = swarm.NodeAvailabilityPause
  565. })
  566. n := d.getNode(c, nodes[0].ID)
  567. c.Assert(n.Spec.Availability, checker.Equals, swarm.NodeAvailabilityPause)
  568. }
  569. func (s *DockerSwarmSuite) TestAPISwarmNodeRemove(c *check.C) {
  570. testRequires(c, Network)
  571. d1 := s.AddDaemon(c, true, true)
  572. d2 := s.AddDaemon(c, true, false)
  573. _ = s.AddDaemon(c, true, false)
  574. nodes := d1.listNodes(c)
  575. c.Assert(len(nodes), checker.Equals, 3, check.Commentf("nodes: %#v", nodes))
  576. // Getting the info so we can take the NodeID
  577. d2Info, err := d2.info()
  578. c.Assert(err, checker.IsNil)
  579. // forceful removal of d2 should work
  580. d1.removeNode(c, d2Info.NodeID, true)
  581. nodes = d1.listNodes(c)
  582. c.Assert(len(nodes), checker.Equals, 2, check.Commentf("nodes: %#v", nodes))
  583. // Restart the node that was removed
  584. err = d2.Restart()
  585. c.Assert(err, checker.IsNil)
  586. // Give some time for the node to rejoin
  587. time.Sleep(1 * time.Second)
  588. // Make sure the node didn't rejoin
  589. nodes = d1.listNodes(c)
  590. c.Assert(len(nodes), checker.Equals, 2, check.Commentf("nodes: %#v", nodes))
  591. }
  592. func (s *DockerSwarmSuite) TestAPISwarmNodeDrainPause(c *check.C) {
  593. d1 := s.AddDaemon(c, true, true)
  594. d2 := s.AddDaemon(c, true, false)
  595. time.Sleep(1 * time.Second) // make sure all daemons are ready to accept tasks
  596. // start a service, expect balanced distribution
  597. instances := 8
  598. id := d1.createService(c, simpleTestService, setInstances(instances))
  599. waitAndAssert(c, defaultReconciliationTimeout, d1.checkActiveContainerCount, checker.GreaterThan, 0)
  600. waitAndAssert(c, defaultReconciliationTimeout, d2.checkActiveContainerCount, checker.GreaterThan, 0)
  601. waitAndAssert(c, defaultReconciliationTimeout, reducedCheck(sumAsIntegers, d1.checkActiveContainerCount, d2.checkActiveContainerCount), checker.Equals, instances)
  602. // drain d2, all containers should move to d1
  603. d1.updateNode(c, d2.NodeID, func(n *swarm.Node) {
  604. n.Spec.Availability = swarm.NodeAvailabilityDrain
  605. })
  606. waitAndAssert(c, defaultReconciliationTimeout, d1.checkActiveContainerCount, checker.Equals, instances)
  607. waitAndAssert(c, defaultReconciliationTimeout, d2.checkActiveContainerCount, checker.Equals, 0)
  608. // set d2 back to active
  609. d1.updateNode(c, d2.NodeID, func(n *swarm.Node) {
  610. n.Spec.Availability = swarm.NodeAvailabilityActive
  611. })
  612. instances = 1
  613. d1.updateService(c, d1.getService(c, id), setInstances(instances))
  614. waitAndAssert(c, defaultReconciliationTimeout*2, reducedCheck(sumAsIntegers, d1.checkActiveContainerCount, d2.checkActiveContainerCount), checker.Equals, instances)
  615. instances = 8
  616. d1.updateService(c, d1.getService(c, id), setInstances(instances))
  617. // drained node first so we don't get any old containers
  618. waitAndAssert(c, defaultReconciliationTimeout, d2.checkActiveContainerCount, checker.GreaterThan, 0)
  619. waitAndAssert(c, defaultReconciliationTimeout, d1.checkActiveContainerCount, checker.GreaterThan, 0)
  620. waitAndAssert(c, defaultReconciliationTimeout*2, reducedCheck(sumAsIntegers, d1.checkActiveContainerCount, d2.checkActiveContainerCount), checker.Equals, instances)
  621. d2ContainerCount := len(d2.activeContainers())
  622. // set d2 to paused, scale service up, only d1 gets new tasks
  623. d1.updateNode(c, d2.NodeID, func(n *swarm.Node) {
  624. n.Spec.Availability = swarm.NodeAvailabilityPause
  625. })
  626. instances = 14
  627. d1.updateService(c, d1.getService(c, id), setInstances(instances))
  628. waitAndAssert(c, defaultReconciliationTimeout, d1.checkActiveContainerCount, checker.Equals, instances-d2ContainerCount)
  629. waitAndAssert(c, defaultReconciliationTimeout, d2.checkActiveContainerCount, checker.Equals, d2ContainerCount)
  630. }
  631. func (s *DockerSwarmSuite) TestAPISwarmLeaveRemovesContainer(c *check.C) {
  632. d := s.AddDaemon(c, true, true)
  633. instances := 2
  634. d.createService(c, simpleTestService, setInstances(instances))
  635. id, err := d.Cmd("run", "-d", "busybox", "top")
  636. c.Assert(err, checker.IsNil)
  637. id = strings.TrimSpace(id)
  638. waitAndAssert(c, defaultReconciliationTimeout, d.checkActiveContainerCount, checker.Equals, instances+1)
  639. c.Assert(d.Leave(false), checker.NotNil)
  640. c.Assert(d.Leave(true), checker.IsNil)
  641. waitAndAssert(c, defaultReconciliationTimeout, d.checkActiveContainerCount, checker.Equals, 1)
  642. id2, err := d.Cmd("ps", "-q")
  643. c.Assert(err, checker.IsNil)
  644. c.Assert(id, checker.HasPrefix, strings.TrimSpace(id2))
  645. }
  646. // #23629
  647. func (s *DockerSwarmSuite) TestAPISwarmLeaveOnPendingJoin(c *check.C) {
  648. testRequires(c, Network)
  649. s.AddDaemon(c, true, true)
  650. d2 := s.AddDaemon(c, false, false)
  651. id, err := d2.Cmd("run", "-d", "busybox", "top")
  652. c.Assert(err, checker.IsNil)
  653. id = strings.TrimSpace(id)
  654. err = d2.Join(swarm.JoinRequest{
  655. RemoteAddrs: []string{"123.123.123.123:1234"},
  656. })
  657. c.Assert(err, check.NotNil)
  658. c.Assert(err.Error(), checker.Contains, "Timeout was reached")
  659. info, err := d2.info()
  660. c.Assert(err, checker.IsNil)
  661. c.Assert(info.LocalNodeState, checker.Equals, swarm.LocalNodeStatePending)
  662. c.Assert(d2.Leave(true), checker.IsNil)
  663. waitAndAssert(c, defaultReconciliationTimeout, d2.checkActiveContainerCount, checker.Equals, 1)
  664. id2, err := d2.Cmd("ps", "-q")
  665. c.Assert(err, checker.IsNil)
  666. c.Assert(id, checker.HasPrefix, strings.TrimSpace(id2))
  667. }
  668. // #23705
  669. func (s *DockerSwarmSuite) TestAPISwarmRestoreOnPendingJoin(c *check.C) {
  670. testRequires(c, Network)
  671. d := s.AddDaemon(c, false, false)
  672. err := d.Join(swarm.JoinRequest{
  673. RemoteAddrs: []string{"123.123.123.123:1234"},
  674. })
  675. c.Assert(err, check.NotNil)
  676. c.Assert(err.Error(), checker.Contains, "Timeout was reached")
  677. waitAndAssert(c, defaultReconciliationTimeout, d.checkLocalNodeState, checker.Equals, swarm.LocalNodeStatePending)
  678. c.Assert(d.Stop(), checker.IsNil)
  679. c.Assert(d.Start(), checker.IsNil)
  680. info, err := d.info()
  681. c.Assert(err, checker.IsNil)
  682. c.Assert(info.LocalNodeState, checker.Equals, swarm.LocalNodeStateInactive)
  683. }
  684. func (s *DockerSwarmSuite) TestAPISwarmManagerRestore(c *check.C) {
  685. d1 := s.AddDaemon(c, true, true)
  686. instances := 2
  687. id := d1.createService(c, simpleTestService, setInstances(instances))
  688. d1.getService(c, id)
  689. d1.Stop()
  690. d1.Start()
  691. d1.getService(c, id)
  692. d2 := s.AddDaemon(c, true, true)
  693. d2.getService(c, id)
  694. d2.Stop()
  695. d2.Start()
  696. d2.getService(c, id)
  697. d3 := s.AddDaemon(c, true, true)
  698. d3.getService(c, id)
  699. d3.Stop()
  700. d3.Start()
  701. d3.getService(c, id)
  702. d3.Kill()
  703. time.Sleep(1 * time.Second) // time to handle signal
  704. d3.Start()
  705. d3.getService(c, id)
  706. }
  707. func (s *DockerSwarmSuite) TestAPISwarmScaleNoRollingUpdate(c *check.C) {
  708. d := s.AddDaemon(c, true, true)
  709. instances := 2
  710. id := d.createService(c, simpleTestService, setInstances(instances))
  711. waitAndAssert(c, defaultReconciliationTimeout, d.checkActiveContainerCount, checker.Equals, instances)
  712. containers := d.activeContainers()
  713. instances = 4
  714. d.updateService(c, d.getService(c, id), setInstances(instances))
  715. waitAndAssert(c, defaultReconciliationTimeout, d.checkActiveContainerCount, checker.Equals, instances)
  716. containers2 := d.activeContainers()
  717. loop0:
  718. for _, c1 := range containers {
  719. for _, c2 := range containers2 {
  720. if c1 == c2 {
  721. continue loop0
  722. }
  723. }
  724. c.Errorf("container %v not found in new set %#v", c1, containers2)
  725. }
  726. }
  727. func (s *DockerSwarmSuite) TestAPISwarmInvalidAddress(c *check.C) {
  728. d := s.AddDaemon(c, false, false)
  729. req := swarm.InitRequest{
  730. ListenAddr: "",
  731. }
  732. status, _, err := d.SockRequest("POST", "/swarm/init", req)
  733. c.Assert(err, checker.IsNil)
  734. c.Assert(status, checker.Equals, http.StatusInternalServerError)
  735. req2 := swarm.JoinRequest{
  736. ListenAddr: "0.0.0.0:2377",
  737. RemoteAddrs: []string{""},
  738. }
  739. status, _, err = d.SockRequest("POST", "/swarm/join", req2)
  740. c.Assert(err, checker.IsNil)
  741. c.Assert(status, checker.Equals, http.StatusInternalServerError)
  742. }
  743. func (s *DockerSwarmSuite) TestAPISwarmForceNewCluster(c *check.C) {
  744. d1 := s.AddDaemon(c, true, true)
  745. d2 := s.AddDaemon(c, true, true)
  746. instances := 2
  747. id := d1.createService(c, simpleTestService, setInstances(instances))
  748. waitAndAssert(c, defaultReconciliationTimeout, reducedCheck(sumAsIntegers, d1.checkActiveContainerCount, d2.checkActiveContainerCount), checker.Equals, instances)
  749. // drain d2, all containers should move to d1
  750. d1.updateNode(c, d2.NodeID, func(n *swarm.Node) {
  751. n.Spec.Availability = swarm.NodeAvailabilityDrain
  752. })
  753. waitAndAssert(c, defaultReconciliationTimeout, d1.checkActiveContainerCount, checker.Equals, instances)
  754. waitAndAssert(c, defaultReconciliationTimeout, d2.checkActiveContainerCount, checker.Equals, 0)
  755. c.Assert(d2.Stop(), checker.IsNil)
  756. c.Assert(d1.Init(swarm.InitRequest{
  757. ForceNewCluster: true,
  758. Spec: swarm.Spec{},
  759. }), checker.IsNil)
  760. waitAndAssert(c, defaultReconciliationTimeout, d1.checkActiveContainerCount, checker.Equals, instances)
  761. d3 := s.AddDaemon(c, true, true)
  762. info, err := d3.info()
  763. c.Assert(err, checker.IsNil)
  764. c.Assert(info.ControlAvailable, checker.True)
  765. c.Assert(info.LocalNodeState, checker.Equals, swarm.LocalNodeStateActive)
  766. instances = 4
  767. d3.updateService(c, d3.getService(c, id), setInstances(instances))
  768. waitAndAssert(c, defaultReconciliationTimeout, reducedCheck(sumAsIntegers, d1.checkActiveContainerCount, d3.checkActiveContainerCount), checker.Equals, instances)
  769. }
  770. func simpleTestService(s *swarm.Service) {
  771. ureplicas := uint64(1)
  772. restartDelay := time.Duration(100 * time.Millisecond)
  773. s.Spec = swarm.ServiceSpec{
  774. TaskTemplate: swarm.TaskSpec{
  775. ContainerSpec: swarm.ContainerSpec{
  776. Image: "busybox:latest",
  777. Command: []string{"/bin/top"},
  778. },
  779. RestartPolicy: &swarm.RestartPolicy{
  780. Delay: &restartDelay,
  781. },
  782. },
  783. Mode: swarm.ServiceMode{
  784. Replicated: &swarm.ReplicatedService{
  785. Replicas: &ureplicas,
  786. },
  787. },
  788. }
  789. s.Spec.Name = "top"
  790. }
  791. func serviceForUpdate(s *swarm.Service) {
  792. ureplicas := uint64(1)
  793. restartDelay := time.Duration(100 * time.Millisecond)
  794. s.Spec = swarm.ServiceSpec{
  795. TaskTemplate: swarm.TaskSpec{
  796. ContainerSpec: swarm.ContainerSpec{
  797. Image: "busybox:latest",
  798. Command: []string{"/bin/top"},
  799. },
  800. RestartPolicy: &swarm.RestartPolicy{
  801. Delay: &restartDelay,
  802. },
  803. },
  804. Mode: swarm.ServiceMode{
  805. Replicated: &swarm.ReplicatedService{
  806. Replicas: &ureplicas,
  807. },
  808. },
  809. UpdateConfig: &swarm.UpdateConfig{
  810. Parallelism: 2,
  811. Delay: 4 * time.Second,
  812. FailureAction: swarm.UpdateFailureActionContinue,
  813. },
  814. }
  815. s.Spec.Name = "updatetest"
  816. }
  817. func setInstances(replicas int) serviceConstructor {
  818. ureplicas := uint64(replicas)
  819. return func(s *swarm.Service) {
  820. s.Spec.Mode = swarm.ServiceMode{
  821. Replicated: &swarm.ReplicatedService{
  822. Replicas: &ureplicas,
  823. },
  824. }
  825. }
  826. }
  827. func setImage(image string) serviceConstructor {
  828. return func(s *swarm.Service) {
  829. s.Spec.TaskTemplate.ContainerSpec.Image = image
  830. }
  831. }
  832. func setConstraints(constraints []string) serviceConstructor {
  833. return func(s *swarm.Service) {
  834. if s.Spec.TaskTemplate.Placement == nil {
  835. s.Spec.TaskTemplate.Placement = &swarm.Placement{}
  836. }
  837. s.Spec.TaskTemplate.Placement.Constraints = constraints
  838. }
  839. }
  840. func setGlobalMode(s *swarm.Service) {
  841. s.Spec.Mode = swarm.ServiceMode{
  842. Global: &swarm.GlobalService{},
  843. }
  844. }
  845. func checkClusterHealth(c *check.C, cl []*SwarmDaemon, managerCount, workerCount int) {
  846. var totalMCount, totalWCount int
  847. for _, d := range cl {
  848. var (
  849. info swarm.Info
  850. err error
  851. )
  852. // check info in a waitAndAssert, because if the cluster doesn't have a leader, `info` will return an error
  853. checkInfo := func(c *check.C) (interface{}, check.CommentInterface) {
  854. info, err = d.info()
  855. return err, check.Commentf("cluster not ready in time")
  856. }
  857. waitAndAssert(c, defaultReconciliationTimeout, checkInfo, checker.IsNil)
  858. if !info.ControlAvailable {
  859. totalWCount++
  860. continue
  861. }
  862. var leaderFound bool
  863. totalMCount++
  864. var mCount, wCount int
  865. for _, n := range d.listNodes(c) {
  866. waitReady := func(c *check.C) (interface{}, check.CommentInterface) {
  867. if n.Status.State == swarm.NodeStateReady {
  868. return true, nil
  869. }
  870. nn := d.getNode(c, n.ID)
  871. n = *nn
  872. return n.Status.State == swarm.NodeStateReady, check.Commentf("state of node %s, reported by %s", n.ID, d.Info.NodeID)
  873. }
  874. waitAndAssert(c, defaultReconciliationTimeout, waitReady, checker.True)
  875. waitActive := func(c *check.C) (interface{}, check.CommentInterface) {
  876. if n.Spec.Availability == swarm.NodeAvailabilityActive {
  877. return true, nil
  878. }
  879. nn := d.getNode(c, n.ID)
  880. n = *nn
  881. return n.Spec.Availability == swarm.NodeAvailabilityActive, check.Commentf("availability of node %s, reported by %s", n.ID, d.Info.NodeID)
  882. }
  883. waitAndAssert(c, defaultReconciliationTimeout, waitActive, checker.True)
  884. if n.Spec.Role == swarm.NodeRoleManager {
  885. c.Assert(n.ManagerStatus, checker.NotNil, check.Commentf("manager status of node %s (manager), reported by %s", n.ID, d.Info.NodeID))
  886. if n.ManagerStatus.Leader {
  887. leaderFound = true
  888. }
  889. mCount++
  890. } else {
  891. c.Assert(n.ManagerStatus, checker.IsNil, check.Commentf("manager status of node %s (worker), reported by %s", n.ID, d.Info.NodeID))
  892. wCount++
  893. }
  894. }
  895. c.Assert(leaderFound, checker.True, check.Commentf("lack of leader reported by node %s", info.NodeID))
  896. c.Assert(mCount, checker.Equals, managerCount, check.Commentf("managers count reported by node %s", info.NodeID))
  897. c.Assert(wCount, checker.Equals, workerCount, check.Commentf("workers count reported by node %s", info.NodeID))
  898. }
  899. c.Assert(totalMCount, checker.Equals, managerCount)
  900. c.Assert(totalWCount, checker.Equals, workerCount)
  901. }
  902. func (s *DockerSwarmSuite) TestAPISwarmRestartCluster(c *check.C) {
  903. mCount, wCount := 5, 1
  904. var nodes []*SwarmDaemon
  905. for i := 0; i < mCount; i++ {
  906. manager := s.AddDaemon(c, true, true)
  907. info, err := manager.info()
  908. c.Assert(err, checker.IsNil)
  909. c.Assert(info.ControlAvailable, checker.True)
  910. c.Assert(info.LocalNodeState, checker.Equals, swarm.LocalNodeStateActive)
  911. nodes = append(nodes, manager)
  912. }
  913. for i := 0; i < wCount; i++ {
  914. worker := s.AddDaemon(c, true, false)
  915. info, err := worker.info()
  916. c.Assert(err, checker.IsNil)
  917. c.Assert(info.ControlAvailable, checker.False)
  918. c.Assert(info.LocalNodeState, checker.Equals, swarm.LocalNodeStateActive)
  919. nodes = append(nodes, worker)
  920. }
  921. // stop whole cluster
  922. {
  923. var wg sync.WaitGroup
  924. wg.Add(len(nodes))
  925. errs := make(chan error, len(nodes))
  926. for _, d := range nodes {
  927. go func(daemon *SwarmDaemon) {
  928. defer wg.Done()
  929. if err := daemon.Stop(); err != nil {
  930. errs <- err
  931. }
  932. if root := os.Getenv("DOCKER_REMAP_ROOT"); root != "" {
  933. daemon.root = filepath.Dir(daemon.root)
  934. }
  935. }(d)
  936. }
  937. wg.Wait()
  938. close(errs)
  939. for err := range errs {
  940. c.Assert(err, check.IsNil)
  941. }
  942. }
  943. // start whole cluster
  944. {
  945. var wg sync.WaitGroup
  946. wg.Add(len(nodes))
  947. errs := make(chan error, len(nodes))
  948. for _, d := range nodes {
  949. go func(daemon *SwarmDaemon) {
  950. defer wg.Done()
  951. if err := daemon.Start("--iptables=false"); err != nil {
  952. errs <- err
  953. }
  954. }(d)
  955. }
  956. wg.Wait()
  957. close(errs)
  958. for err := range errs {
  959. c.Assert(err, check.IsNil)
  960. }
  961. }
  962. checkClusterHealth(c, nodes, mCount, wCount)
  963. }
  964. func (s *DockerSwarmSuite) TestAPISwarmServicesUpdateWithName(c *check.C) {
  965. d := s.AddDaemon(c, true, true)
  966. instances := 2
  967. id := d.createService(c, simpleTestService, setInstances(instances))
  968. waitAndAssert(c, defaultReconciliationTimeout, d.checkActiveContainerCount, checker.Equals, instances)
  969. service := d.getService(c, id)
  970. instances = 5
  971. setInstances(instances)(service)
  972. url := fmt.Sprintf("/services/%s/update?version=%d", service.Spec.Name, service.Version.Index)
  973. status, out, err := d.SockRequest("POST", url, service.Spec)
  974. c.Assert(err, checker.IsNil)
  975. c.Assert(status, checker.Equals, http.StatusOK, check.Commentf("output: %q", string(out)))
  976. waitAndAssert(c, defaultReconciliationTimeout, d.checkActiveContainerCount, checker.Equals, instances)
  977. }