docker_api_swarm_test.go 31 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986
  1. // +build !windows
  2. package main
  3. import (
  4. "fmt"
  5. "net/http"
  6. "os"
  7. "path/filepath"
  8. "strconv"
  9. "strings"
  10. "sync"
  11. "syscall"
  12. "time"
  13. "github.com/docker/docker/pkg/integration/checker"
  14. "github.com/docker/engine-api/types/swarm"
  15. "github.com/go-check/check"
  16. )
  17. var defaultReconciliationTimeout = 30 * time.Second
  18. func (s *DockerSwarmSuite) TestApiSwarmInit(c *check.C) {
  19. testRequires(c, Network)
  20. // todo: should find a better way to verify that components are running than /info
  21. d1 := s.AddDaemon(c, true, true)
  22. info, err := d1.info()
  23. c.Assert(err, checker.IsNil)
  24. c.Assert(info.ControlAvailable, checker.True)
  25. c.Assert(info.LocalNodeState, checker.Equals, swarm.LocalNodeStateActive)
  26. d2 := s.AddDaemon(c, true, false)
  27. info, err = d2.info()
  28. c.Assert(err, checker.IsNil)
  29. c.Assert(info.ControlAvailable, checker.False)
  30. c.Assert(info.LocalNodeState, checker.Equals, swarm.LocalNodeStateActive)
  31. // Leaving cluster
  32. c.Assert(d2.Leave(false), checker.IsNil)
  33. info, err = d2.info()
  34. c.Assert(err, checker.IsNil)
  35. c.Assert(info.ControlAvailable, checker.False)
  36. c.Assert(info.LocalNodeState, checker.Equals, swarm.LocalNodeStateInactive)
  37. c.Assert(d2.Join(swarm.JoinRequest{JoinToken: d1.joinTokens(c).Worker, RemoteAddrs: []string{d1.listenAddr}}), checker.IsNil)
  38. info, err = d2.info()
  39. c.Assert(err, checker.IsNil)
  40. c.Assert(info.ControlAvailable, checker.False)
  41. c.Assert(info.LocalNodeState, checker.Equals, swarm.LocalNodeStateActive)
  42. // Current state restoring after restarts
  43. err = d1.Stop()
  44. c.Assert(err, checker.IsNil)
  45. err = d2.Stop()
  46. c.Assert(err, checker.IsNil)
  47. err = d1.Start()
  48. c.Assert(err, checker.IsNil)
  49. err = d2.Start()
  50. c.Assert(err, checker.IsNil)
  51. info, err = d1.info()
  52. c.Assert(err, checker.IsNil)
  53. c.Assert(info.ControlAvailable, checker.True)
  54. c.Assert(info.LocalNodeState, checker.Equals, swarm.LocalNodeStateActive)
  55. info, err = d2.info()
  56. c.Assert(err, checker.IsNil)
  57. c.Assert(info.ControlAvailable, checker.False)
  58. c.Assert(info.LocalNodeState, checker.Equals, swarm.LocalNodeStateActive)
  59. }
  60. func (s *DockerSwarmSuite) TestApiSwarmJoinToken(c *check.C) {
  61. testRequires(c, Network)
  62. d1 := s.AddDaemon(c, false, false)
  63. c.Assert(d1.Init(swarm.InitRequest{}), checker.IsNil)
  64. d2 := s.AddDaemon(c, false, false)
  65. err := d2.Join(swarm.JoinRequest{RemoteAddrs: []string{d1.listenAddr}})
  66. c.Assert(err, checker.NotNil)
  67. c.Assert(err.Error(), checker.Contains, "join token is necessary")
  68. info, err := d2.info()
  69. c.Assert(err, checker.IsNil)
  70. c.Assert(info.LocalNodeState, checker.Equals, swarm.LocalNodeStateInactive)
  71. err = d2.Join(swarm.JoinRequest{JoinToken: "foobaz", RemoteAddrs: []string{d1.listenAddr}})
  72. c.Assert(err, checker.NotNil)
  73. c.Assert(err.Error(), checker.Contains, "join token is necessary")
  74. info, err = d2.info()
  75. c.Assert(err, checker.IsNil)
  76. c.Assert(info.LocalNodeState, checker.Equals, swarm.LocalNodeStateInactive)
  77. workerToken := d1.joinTokens(c).Worker
  78. c.Assert(d2.Join(swarm.JoinRequest{JoinToken: workerToken, RemoteAddrs: []string{d1.listenAddr}}), checker.IsNil)
  79. info, err = d2.info()
  80. c.Assert(err, checker.IsNil)
  81. c.Assert(info.LocalNodeState, checker.Equals, swarm.LocalNodeStateActive)
  82. c.Assert(d2.Leave(false), checker.IsNil)
  83. info, err = d2.info()
  84. c.Assert(err, checker.IsNil)
  85. c.Assert(info.LocalNodeState, checker.Equals, swarm.LocalNodeStateInactive)
  86. // change tokens
  87. d1.rotateTokens(c)
  88. err = d2.Join(swarm.JoinRequest{JoinToken: workerToken, RemoteAddrs: []string{d1.listenAddr}})
  89. c.Assert(err, checker.NotNil)
  90. c.Assert(err.Error(), checker.Contains, "join token is necessary")
  91. info, err = d2.info()
  92. c.Assert(err, checker.IsNil)
  93. c.Assert(info.LocalNodeState, checker.Equals, swarm.LocalNodeStateInactive)
  94. workerToken = d1.joinTokens(c).Worker
  95. c.Assert(d2.Join(swarm.JoinRequest{JoinToken: workerToken, RemoteAddrs: []string{d1.listenAddr}}), checker.IsNil)
  96. info, err = d2.info()
  97. c.Assert(err, checker.IsNil)
  98. c.Assert(info.LocalNodeState, checker.Equals, swarm.LocalNodeStateActive)
  99. c.Assert(d2.Leave(false), checker.IsNil)
  100. info, err = d2.info()
  101. c.Assert(err, checker.IsNil)
  102. c.Assert(info.LocalNodeState, checker.Equals, swarm.LocalNodeStateInactive)
  103. // change spec, don't change tokens
  104. d1.updateSwarm(c, func(s *swarm.Spec) {})
  105. err = d2.Join(swarm.JoinRequest{RemoteAddrs: []string{d1.listenAddr}})
  106. c.Assert(err, checker.NotNil)
  107. c.Assert(err.Error(), checker.Contains, "join token is necessary")
  108. info, err = d2.info()
  109. c.Assert(err, checker.IsNil)
  110. c.Assert(info.LocalNodeState, checker.Equals, swarm.LocalNodeStateInactive)
  111. c.Assert(d2.Join(swarm.JoinRequest{JoinToken: workerToken, RemoteAddrs: []string{d1.listenAddr}}), checker.IsNil)
  112. info, err = d2.info()
  113. c.Assert(err, checker.IsNil)
  114. c.Assert(info.LocalNodeState, checker.Equals, swarm.LocalNodeStateActive)
  115. c.Assert(d2.Leave(false), checker.IsNil)
  116. info, err = d2.info()
  117. c.Assert(err, checker.IsNil)
  118. c.Assert(info.LocalNodeState, checker.Equals, swarm.LocalNodeStateInactive)
  119. }
  120. func (s *DockerSwarmSuite) TestApiSwarmCAHash(c *check.C) {
  121. testRequires(c, Network)
  122. d1 := s.AddDaemon(c, true, true)
  123. d2 := s.AddDaemon(c, false, false)
  124. splitToken := strings.Split(d1.joinTokens(c).Worker, "-")
  125. splitToken[2] = "1kxftv4ofnc6mt30lmgipg6ngf9luhwqopfk1tz6bdmnkubg0e"
  126. replacementToken := strings.Join(splitToken, "-")
  127. err := d2.Join(swarm.JoinRequest{JoinToken: replacementToken, RemoteAddrs: []string{d1.listenAddr}})
  128. c.Assert(err, checker.NotNil)
  129. c.Assert(err.Error(), checker.Contains, "remote CA does not match fingerprint")
  130. }
  131. func (s *DockerSwarmSuite) TestApiSwarmPromoteDemote(c *check.C) {
  132. testRequires(c, Network)
  133. d1 := s.AddDaemon(c, false, false)
  134. c.Assert(d1.Init(swarm.InitRequest{}), checker.IsNil)
  135. d2 := s.AddDaemon(c, true, false)
  136. info, err := d2.info()
  137. c.Assert(err, checker.IsNil)
  138. c.Assert(info.ControlAvailable, checker.False)
  139. c.Assert(info.LocalNodeState, checker.Equals, swarm.LocalNodeStateActive)
  140. d1.updateNode(c, d2.NodeID, func(n *swarm.Node) {
  141. n.Spec.Role = swarm.NodeRoleManager
  142. })
  143. waitAndAssert(c, defaultReconciliationTimeout, d2.checkControlAvailable, checker.True)
  144. d1.updateNode(c, d2.NodeID, func(n *swarm.Node) {
  145. n.Spec.Role = swarm.NodeRoleWorker
  146. })
  147. waitAndAssert(c, defaultReconciliationTimeout, d2.checkControlAvailable, checker.False)
  148. // Demoting last node should fail
  149. node := d1.getNode(c, d1.NodeID)
  150. node.Spec.Role = swarm.NodeRoleWorker
  151. url := fmt.Sprintf("/nodes/%s/update?version=%d", node.ID, node.Version.Index)
  152. status, out, err := d1.SockRequest("POST", url, node.Spec)
  153. c.Assert(err, checker.IsNil)
  154. c.Assert(status, checker.Equals, http.StatusInternalServerError, check.Commentf("output: %q", string(out)))
  155. c.Assert(string(out), checker.Contains, "last manager of the swarm")
  156. info, err = d1.info()
  157. c.Assert(err, checker.IsNil)
  158. c.Assert(info.LocalNodeState, checker.Equals, swarm.LocalNodeStateActive)
  159. c.Assert(info.ControlAvailable, checker.True)
  160. // Promote already demoted node
  161. d1.updateNode(c, d2.NodeID, func(n *swarm.Node) {
  162. n.Spec.Role = swarm.NodeRoleManager
  163. })
  164. waitAndAssert(c, defaultReconciliationTimeout, d2.checkControlAvailable, checker.True)
  165. }
  166. func (s *DockerSwarmSuite) TestApiSwarmServicesEmptyList(c *check.C) {
  167. testRequires(c, Network)
  168. d := s.AddDaemon(c, true, true)
  169. services := d.listServices(c)
  170. c.Assert(services, checker.NotNil)
  171. c.Assert(len(services), checker.Equals, 0, check.Commentf("services: %#v", services))
  172. }
  173. func (s *DockerSwarmSuite) TestApiSwarmServicesCreate(c *check.C) {
  174. testRequires(c, Network)
  175. d := s.AddDaemon(c, true, true)
  176. instances := 2
  177. id := d.createService(c, simpleTestService, setInstances(instances))
  178. waitAndAssert(c, defaultReconciliationTimeout, d.checkActiveContainerCount, checker.Equals, instances)
  179. service := d.getService(c, id)
  180. instances = 5
  181. d.updateService(c, service, setInstances(instances))
  182. waitAndAssert(c, defaultReconciliationTimeout, d.checkActiveContainerCount, checker.Equals, instances)
  183. d.removeService(c, service.ID)
  184. waitAndAssert(c, defaultReconciliationTimeout, d.checkActiveContainerCount, checker.Equals, 0)
  185. }
  186. func (s *DockerSwarmSuite) TestApiSwarmServicesMultipleAgents(c *check.C) {
  187. testRequires(c, Network)
  188. d1 := s.AddDaemon(c, true, true)
  189. d2 := s.AddDaemon(c, true, false)
  190. d3 := s.AddDaemon(c, true, false)
  191. time.Sleep(1 * time.Second) // make sure all daemons are ready to accept tasks
  192. instances := 9
  193. id := d1.createService(c, simpleTestService, setInstances(instances))
  194. waitAndAssert(c, defaultReconciliationTimeout, d1.checkActiveContainerCount, checker.GreaterThan, 0)
  195. waitAndAssert(c, defaultReconciliationTimeout, d2.checkActiveContainerCount, checker.GreaterThan, 0)
  196. waitAndAssert(c, defaultReconciliationTimeout, d3.checkActiveContainerCount, checker.GreaterThan, 0)
  197. waitAndAssert(c, defaultReconciliationTimeout, reducedCheck(sumAsIntegers, d1.checkActiveContainerCount, d2.checkActiveContainerCount, d3.checkActiveContainerCount), checker.Equals, instances)
  198. // reconciliation on d2 node down
  199. c.Assert(d2.Stop(), checker.IsNil)
  200. waitAndAssert(c, defaultReconciliationTimeout, reducedCheck(sumAsIntegers, d1.checkActiveContainerCount, d3.checkActiveContainerCount), checker.Equals, instances)
  201. // test downscaling
  202. instances = 5
  203. d1.updateService(c, d1.getService(c, id), setInstances(instances))
  204. waitAndAssert(c, defaultReconciliationTimeout, reducedCheck(sumAsIntegers, d1.checkActiveContainerCount, d3.checkActiveContainerCount), checker.Equals, instances)
  205. }
  206. func (s *DockerSwarmSuite) TestApiSwarmServicesCreateGlobal(c *check.C) {
  207. testRequires(c, Network)
  208. d1 := s.AddDaemon(c, true, true)
  209. d2 := s.AddDaemon(c, true, false)
  210. d3 := s.AddDaemon(c, true, false)
  211. d1.createService(c, simpleTestService, setGlobalMode)
  212. waitAndAssert(c, defaultReconciliationTimeout, d1.checkActiveContainerCount, checker.Equals, 1)
  213. waitAndAssert(c, defaultReconciliationTimeout, d2.checkActiveContainerCount, checker.Equals, 1)
  214. waitAndAssert(c, defaultReconciliationTimeout, d3.checkActiveContainerCount, checker.Equals, 1)
  215. d4 := s.AddDaemon(c, true, false)
  216. d5 := s.AddDaemon(c, true, false)
  217. waitAndAssert(c, defaultReconciliationTimeout, d4.checkActiveContainerCount, checker.Equals, 1)
  218. waitAndAssert(c, defaultReconciliationTimeout, d5.checkActiveContainerCount, checker.Equals, 1)
  219. }
  220. func (s *DockerSwarmSuite) TestApiSwarmServicesUpdate(c *check.C) {
  221. const nodeCount = 3
  222. var daemons [nodeCount]*SwarmDaemon
  223. for i := 0; i < nodeCount; i++ {
  224. daemons[i] = s.AddDaemon(c, true, i == 0)
  225. }
  226. // wait for nodes ready
  227. waitAndAssert(c, 5*time.Second, daemons[0].checkNodeReadyCount, checker.Equals, nodeCount)
  228. // service image at start
  229. image1 := "busybox:latest"
  230. // target image in update
  231. image2 := "busybox:test"
  232. // create a different tag
  233. for _, d := range daemons {
  234. out, err := d.Cmd("tag", image1, image2)
  235. c.Assert(err, checker.IsNil, check.Commentf(out))
  236. }
  237. // create service
  238. instances := 5
  239. parallelism := 2
  240. id := daemons[0].createService(c, serviceForUpdate, setInstances(instances))
  241. // wait for tasks ready
  242. waitAndAssert(c, defaultReconciliationTimeout, daemons[0].checkRunningTaskImages, checker.DeepEquals,
  243. map[string]int{image1: instances})
  244. // issue service update
  245. service := daemons[0].getService(c, id)
  246. daemons[0].updateService(c, service, setImage(image2))
  247. // first batch
  248. waitAndAssert(c, defaultReconciliationTimeout, daemons[0].checkRunningTaskImages, checker.DeepEquals,
  249. map[string]int{image1: instances - parallelism, image2: parallelism})
  250. // 2nd batch
  251. waitAndAssert(c, defaultReconciliationTimeout, daemons[0].checkRunningTaskImages, checker.DeepEquals,
  252. map[string]int{image1: instances - 2*parallelism, image2: 2 * parallelism})
  253. // 3nd batch
  254. waitAndAssert(c, defaultReconciliationTimeout, daemons[0].checkRunningTaskImages, checker.DeepEquals,
  255. map[string]int{image2: instances})
  256. }
  257. func (s *DockerSwarmSuite) TestApiSwarmServicesStateReporting(c *check.C) {
  258. testRequires(c, Network)
  259. testRequires(c, SameHostDaemon)
  260. testRequires(c, DaemonIsLinux)
  261. d1 := s.AddDaemon(c, true, true)
  262. d2 := s.AddDaemon(c, true, true)
  263. d3 := s.AddDaemon(c, true, false)
  264. time.Sleep(1 * time.Second) // make sure all daemons are ready to accept
  265. instances := 9
  266. d1.createService(c, simpleTestService, setInstances(instances))
  267. waitAndAssert(c, defaultReconciliationTimeout, reducedCheck(sumAsIntegers, d1.checkActiveContainerCount, d2.checkActiveContainerCount, d3.checkActiveContainerCount), checker.Equals, instances)
  268. getContainers := func() map[string]*SwarmDaemon {
  269. m := make(map[string]*SwarmDaemon)
  270. for _, d := range []*SwarmDaemon{d1, d2, d3} {
  271. for _, id := range d.activeContainers() {
  272. m[id] = d
  273. }
  274. }
  275. return m
  276. }
  277. containers := getContainers()
  278. c.Assert(containers, checker.HasLen, instances)
  279. var toRemove string
  280. for i := range containers {
  281. toRemove = i
  282. }
  283. _, err := containers[toRemove].Cmd("stop", toRemove)
  284. c.Assert(err, checker.IsNil)
  285. waitAndAssert(c, defaultReconciliationTimeout, reducedCheck(sumAsIntegers, d1.checkActiveContainerCount, d2.checkActiveContainerCount, d3.checkActiveContainerCount), checker.Equals, instances)
  286. containers2 := getContainers()
  287. c.Assert(containers2, checker.HasLen, instances)
  288. for i := range containers {
  289. if i == toRemove {
  290. c.Assert(containers2[i], checker.IsNil)
  291. } else {
  292. c.Assert(containers2[i], checker.NotNil)
  293. }
  294. }
  295. containers = containers2
  296. for i := range containers {
  297. toRemove = i
  298. }
  299. // try with killing process outside of docker
  300. pidStr, err := containers[toRemove].Cmd("inspect", "-f", "{{.State.Pid}}", toRemove)
  301. c.Assert(err, checker.IsNil)
  302. pid, err := strconv.Atoi(strings.TrimSpace(pidStr))
  303. c.Assert(err, checker.IsNil)
  304. c.Assert(syscall.Kill(pid, syscall.SIGKILL), checker.IsNil)
  305. time.Sleep(time.Second) // give some time to handle the signal
  306. waitAndAssert(c, defaultReconciliationTimeout, reducedCheck(sumAsIntegers, d1.checkActiveContainerCount, d2.checkActiveContainerCount, d3.checkActiveContainerCount), checker.Equals, instances)
  307. containers2 = getContainers()
  308. c.Assert(containers2, checker.HasLen, instances)
  309. for i := range containers {
  310. if i == toRemove {
  311. c.Assert(containers2[i], checker.IsNil)
  312. } else {
  313. c.Assert(containers2[i], checker.NotNil)
  314. }
  315. }
  316. }
  317. func (s *DockerSwarmSuite) TestApiSwarmLeaderProxy(c *check.C) {
  318. // add three managers, one of these is leader
  319. d1 := s.AddDaemon(c, true, true)
  320. d2 := s.AddDaemon(c, true, true)
  321. d3 := s.AddDaemon(c, true, true)
  322. // start a service by hitting each of the 3 managers
  323. d1.createService(c, simpleTestService, func(s *swarm.Service) {
  324. s.Spec.Name = "test1"
  325. })
  326. d2.createService(c, simpleTestService, func(s *swarm.Service) {
  327. s.Spec.Name = "test2"
  328. })
  329. d3.createService(c, simpleTestService, func(s *swarm.Service) {
  330. s.Spec.Name = "test3"
  331. })
  332. // 3 services should be started now, because the requests were proxied to leader
  333. // query each node and make sure it returns 3 services
  334. for _, d := range []*SwarmDaemon{d1, d2, d3} {
  335. services := d.listServices(c)
  336. c.Assert(services, checker.HasLen, 3)
  337. }
  338. }
  339. func (s *DockerSwarmSuite) TestApiSwarmLeaderElection(c *check.C) {
  340. // Create 3 nodes
  341. d1 := s.AddDaemon(c, true, true)
  342. d2 := s.AddDaemon(c, true, true)
  343. d3 := s.AddDaemon(c, true, true)
  344. // assert that the first node we made is the leader, and the other two are followers
  345. c.Assert(d1.getNode(c, d1.NodeID).ManagerStatus.Leader, checker.True)
  346. c.Assert(d1.getNode(c, d2.NodeID).ManagerStatus.Leader, checker.False)
  347. c.Assert(d1.getNode(c, d3.NodeID).ManagerStatus.Leader, checker.False)
  348. d1.Stop() // stop the leader
  349. var (
  350. leader *SwarmDaemon // keep track of leader
  351. followers []*SwarmDaemon // keep track of followers
  352. )
  353. checkLeader := func(nodes ...*SwarmDaemon) checkF {
  354. return func(c *check.C) (interface{}, check.CommentInterface) {
  355. // clear these out before each run
  356. leader = nil
  357. followers = nil
  358. for _, d := range nodes {
  359. if d.getNode(c, d.NodeID).ManagerStatus.Leader {
  360. leader = d
  361. } else {
  362. followers = append(followers, d)
  363. }
  364. }
  365. if leader == nil {
  366. return false, check.Commentf("no leader elected")
  367. }
  368. return true, check.Commentf("elected %v", leader.id)
  369. }
  370. }
  371. // wait for an election to occur
  372. waitAndAssert(c, defaultReconciliationTimeout, checkLeader(d2, d3), checker.True)
  373. // assert that we have a new leader
  374. c.Assert(leader, checker.NotNil)
  375. // Keep track of the current leader, since we want that to be chosen.
  376. stableleader := leader
  377. // add the d1, the initial leader, back
  378. d1.Start()
  379. // TODO(stevvooe): may need to wait for rejoin here
  380. // wait for possible election
  381. waitAndAssert(c, defaultReconciliationTimeout, checkLeader(d1, d2, d3), checker.True)
  382. // pick out the leader and the followers again
  383. // verify that we still only have 1 leader and 2 followers
  384. c.Assert(leader, checker.NotNil)
  385. c.Assert(followers, checker.HasLen, 2)
  386. // and that after we added d1 back, the leader hasn't changed
  387. c.Assert(leader.NodeID, checker.Equals, stableleader.NodeID)
  388. }
  389. func (s *DockerSwarmSuite) TestApiSwarmRaftQuorum(c *check.C) {
  390. testRequires(c, Network)
  391. d1 := s.AddDaemon(c, true, true)
  392. d2 := s.AddDaemon(c, true, true)
  393. d3 := s.AddDaemon(c, true, true)
  394. d1.createService(c, simpleTestService)
  395. c.Assert(d2.Stop(), checker.IsNil)
  396. // make sure there is a leader
  397. waitAndAssert(c, defaultReconciliationTimeout, d1.checkLeader, checker.IsNil)
  398. d1.createService(c, simpleTestService, func(s *swarm.Service) {
  399. s.Spec.Name = "top1"
  400. })
  401. c.Assert(d3.Stop(), checker.IsNil)
  402. // make sure there is a leader
  403. waitAndAssert(c, defaultReconciliationTimeout, d1.checkLeader, checker.IsNil)
  404. var service swarm.Service
  405. simpleTestService(&service)
  406. service.Spec.Name = "top2"
  407. status, out, err := d1.SockRequest("POST", "/services/create", service.Spec)
  408. c.Assert(err, checker.IsNil)
  409. c.Assert(status, checker.Equals, http.StatusInternalServerError, check.Commentf("deadline exceeded", string(out)))
  410. c.Assert(d2.Start(), checker.IsNil)
  411. // make sure there is a leader
  412. waitAndAssert(c, defaultReconciliationTimeout, d1.checkLeader, checker.IsNil)
  413. d1.createService(c, simpleTestService, func(s *swarm.Service) {
  414. s.Spec.Name = "top3"
  415. })
  416. }
  417. func (s *DockerSwarmSuite) TestApiSwarmListNodes(c *check.C) {
  418. testRequires(c, Network)
  419. d1 := s.AddDaemon(c, true, true)
  420. d2 := s.AddDaemon(c, true, false)
  421. d3 := s.AddDaemon(c, true, false)
  422. nodes := d1.listNodes(c)
  423. c.Assert(len(nodes), checker.Equals, 3, check.Commentf("nodes: %#v", nodes))
  424. loop0:
  425. for _, n := range nodes {
  426. for _, d := range []*SwarmDaemon{d1, d2, d3} {
  427. if n.ID == d.NodeID {
  428. continue loop0
  429. }
  430. }
  431. c.Errorf("unknown nodeID %v", n.ID)
  432. }
  433. }
  434. func (s *DockerSwarmSuite) TestApiSwarmNodeUpdate(c *check.C) {
  435. testRequires(c, Network)
  436. d := s.AddDaemon(c, true, true)
  437. nodes := d.listNodes(c)
  438. d.updateNode(c, nodes[0].ID, func(n *swarm.Node) {
  439. n.Spec.Availability = swarm.NodeAvailabilityPause
  440. })
  441. n := d.getNode(c, nodes[0].ID)
  442. c.Assert(n.Spec.Availability, checker.Equals, swarm.NodeAvailabilityPause)
  443. }
  444. func (s *DockerSwarmSuite) TestApiSwarmNodeDrainPause(c *check.C) {
  445. testRequires(c, Network)
  446. d1 := s.AddDaemon(c, true, true)
  447. d2 := s.AddDaemon(c, true, false)
  448. time.Sleep(1 * time.Second) // make sure all daemons are ready to accept tasks
  449. // start a service, expect balanced distribution
  450. instances := 8
  451. id := d1.createService(c, simpleTestService, setInstances(instances))
  452. waitAndAssert(c, defaultReconciliationTimeout, d1.checkActiveContainerCount, checker.GreaterThan, 0)
  453. waitAndAssert(c, defaultReconciliationTimeout, d2.checkActiveContainerCount, checker.GreaterThan, 0)
  454. waitAndAssert(c, defaultReconciliationTimeout, reducedCheck(sumAsIntegers, d1.checkActiveContainerCount, d2.checkActiveContainerCount), checker.Equals, instances)
  455. // drain d2, all containers should move to d1
  456. d1.updateNode(c, d2.NodeID, func(n *swarm.Node) {
  457. n.Spec.Availability = swarm.NodeAvailabilityDrain
  458. })
  459. waitAndAssert(c, defaultReconciliationTimeout, d1.checkActiveContainerCount, checker.Equals, instances)
  460. waitAndAssert(c, defaultReconciliationTimeout, d2.checkActiveContainerCount, checker.Equals, 0)
  461. // set d2 back to active
  462. d1.updateNode(c, d2.NodeID, func(n *swarm.Node) {
  463. n.Spec.Availability = swarm.NodeAvailabilityActive
  464. })
  465. instances = 1
  466. d1.updateService(c, d1.getService(c, id), setInstances(instances))
  467. waitAndAssert(c, defaultReconciliationTimeout*2, reducedCheck(sumAsIntegers, d1.checkActiveContainerCount, d2.checkActiveContainerCount), checker.Equals, instances)
  468. instances = 8
  469. d1.updateService(c, d1.getService(c, id), setInstances(instances))
  470. // drained node first so we don't get any old containers
  471. waitAndAssert(c, defaultReconciliationTimeout, d2.checkActiveContainerCount, checker.GreaterThan, 0)
  472. waitAndAssert(c, defaultReconciliationTimeout, d1.checkActiveContainerCount, checker.GreaterThan, 0)
  473. waitAndAssert(c, defaultReconciliationTimeout*2, reducedCheck(sumAsIntegers, d1.checkActiveContainerCount, d2.checkActiveContainerCount), checker.Equals, instances)
  474. d2ContainerCount := len(d2.activeContainers())
  475. // set d2 to paused, scale service up, only d1 gets new tasks
  476. d1.updateNode(c, d2.NodeID, func(n *swarm.Node) {
  477. n.Spec.Availability = swarm.NodeAvailabilityPause
  478. })
  479. instances = 14
  480. d1.updateService(c, d1.getService(c, id), setInstances(instances))
  481. waitAndAssert(c, defaultReconciliationTimeout, d1.checkActiveContainerCount, checker.Equals, instances-d2ContainerCount)
  482. waitAndAssert(c, defaultReconciliationTimeout, d2.checkActiveContainerCount, checker.Equals, d2ContainerCount)
  483. }
  484. func (s *DockerSwarmSuite) TestApiSwarmLeaveRemovesContainer(c *check.C) {
  485. testRequires(c, Network)
  486. d := s.AddDaemon(c, true, true)
  487. instances := 2
  488. d.createService(c, simpleTestService, setInstances(instances))
  489. id, err := d.Cmd("run", "-d", "busybox", "top")
  490. c.Assert(err, checker.IsNil)
  491. id = strings.TrimSpace(id)
  492. waitAndAssert(c, defaultReconciliationTimeout, d.checkActiveContainerCount, checker.Equals, instances+1)
  493. c.Assert(d.Leave(false), checker.NotNil)
  494. c.Assert(d.Leave(true), checker.IsNil)
  495. waitAndAssert(c, defaultReconciliationTimeout, d.checkActiveContainerCount, checker.Equals, 1)
  496. id2, err := d.Cmd("ps", "-q")
  497. c.Assert(err, checker.IsNil)
  498. c.Assert(id, checker.HasPrefix, strings.TrimSpace(id2))
  499. }
  500. // #23629
  501. func (s *DockerSwarmSuite) TestApiSwarmLeaveOnPendingJoin(c *check.C) {
  502. s.AddDaemon(c, true, true)
  503. d2 := s.AddDaemon(c, false, false)
  504. id, err := d2.Cmd("run", "-d", "busybox", "top")
  505. c.Assert(err, checker.IsNil)
  506. id = strings.TrimSpace(id)
  507. go d2.Join(swarm.JoinRequest{
  508. RemoteAddrs: []string{"nosuchhost:1234"},
  509. })
  510. waitAndAssert(c, defaultReconciliationTimeout, d2.checkLocalNodeState, checker.Equals, swarm.LocalNodeStateInactive)
  511. waitAndAssert(c, defaultReconciliationTimeout, d2.checkActiveContainerCount, checker.Equals, 1)
  512. id2, err := d2.Cmd("ps", "-q")
  513. c.Assert(err, checker.IsNil)
  514. c.Assert(id, checker.HasPrefix, strings.TrimSpace(id2))
  515. }
  516. // #23705
  517. func (s *DockerSwarmSuite) TestApiSwarmRestoreOnPendingJoin(c *check.C) {
  518. d := s.AddDaemon(c, false, false)
  519. go d.Join(swarm.JoinRequest{
  520. RemoteAddrs: []string{"nosuchhost:1234"},
  521. })
  522. waitAndAssert(c, defaultReconciliationTimeout, d.checkLocalNodeState, checker.Equals, swarm.LocalNodeStateInactive)
  523. c.Assert(d.Stop(), checker.IsNil)
  524. c.Assert(d.Start(), checker.IsNil)
  525. info, err := d.info()
  526. c.Assert(err, checker.IsNil)
  527. c.Assert(info.LocalNodeState, checker.Equals, swarm.LocalNodeStateInactive)
  528. }
  529. func (s *DockerSwarmSuite) TestApiSwarmManagerRestore(c *check.C) {
  530. testRequires(c, Network)
  531. d1 := s.AddDaemon(c, true, true)
  532. instances := 2
  533. id := d1.createService(c, simpleTestService, setInstances(instances))
  534. d1.getService(c, id)
  535. d1.Stop()
  536. d1.Start()
  537. d1.getService(c, id)
  538. d2 := s.AddDaemon(c, true, true)
  539. d2.getService(c, id)
  540. d2.Stop()
  541. d2.Start()
  542. d2.getService(c, id)
  543. d3 := s.AddDaemon(c, true, true)
  544. d3.getService(c, id)
  545. d3.Stop()
  546. d3.Start()
  547. d3.getService(c, id)
  548. d3.Kill()
  549. time.Sleep(1 * time.Second) // time to handle signal
  550. d3.Start()
  551. d3.getService(c, id)
  552. }
  553. func (s *DockerSwarmSuite) TestApiSwarmScaleNoRollingUpdate(c *check.C) {
  554. testRequires(c, Network)
  555. d := s.AddDaemon(c, true, true)
  556. instances := 2
  557. id := d.createService(c, simpleTestService, setInstances(instances))
  558. waitAndAssert(c, defaultReconciliationTimeout, d.checkActiveContainerCount, checker.Equals, instances)
  559. containers := d.activeContainers()
  560. instances = 4
  561. d.updateService(c, d.getService(c, id), setInstances(instances))
  562. waitAndAssert(c, defaultReconciliationTimeout, d.checkActiveContainerCount, checker.Equals, instances)
  563. containers2 := d.activeContainers()
  564. loop0:
  565. for _, c1 := range containers {
  566. for _, c2 := range containers2 {
  567. if c1 == c2 {
  568. continue loop0
  569. }
  570. }
  571. c.Errorf("container %v not found in new set %#v", c1, containers2)
  572. }
  573. }
  574. func (s *DockerSwarmSuite) TestApiSwarmInvalidAddress(c *check.C) {
  575. d := s.AddDaemon(c, false, false)
  576. req := swarm.InitRequest{
  577. ListenAddr: "",
  578. }
  579. status, _, err := d.SockRequest("POST", "/swarm/init", req)
  580. c.Assert(err, checker.IsNil)
  581. c.Assert(status, checker.Equals, http.StatusInternalServerError)
  582. req2 := swarm.JoinRequest{
  583. ListenAddr: "0.0.0.0:2377",
  584. RemoteAddrs: []string{""},
  585. }
  586. status, _, err = d.SockRequest("POST", "/swarm/join", req2)
  587. c.Assert(err, checker.IsNil)
  588. c.Assert(status, checker.Equals, http.StatusInternalServerError)
  589. }
  590. func (s *DockerSwarmSuite) TestApiSwarmForceNewCluster(c *check.C) {
  591. d1 := s.AddDaemon(c, true, true)
  592. d2 := s.AddDaemon(c, true, true)
  593. instances := 2
  594. id := d1.createService(c, simpleTestService, setInstances(instances))
  595. waitAndAssert(c, defaultReconciliationTimeout, reducedCheck(sumAsIntegers, d1.checkActiveContainerCount, d2.checkActiveContainerCount), checker.Equals, instances)
  596. c.Assert(d2.Stop(), checker.IsNil)
  597. time.Sleep(5 * time.Second)
  598. c.Assert(d1.Init(swarm.InitRequest{
  599. ForceNewCluster: true,
  600. Spec: swarm.Spec{},
  601. }), checker.IsNil)
  602. waitAndAssert(c, defaultReconciliationTimeout, d1.checkActiveContainerCount, checker.Equals, instances)
  603. d3 := s.AddDaemon(c, true, true)
  604. info, err := d3.info()
  605. c.Assert(err, checker.IsNil)
  606. c.Assert(info.ControlAvailable, checker.True)
  607. c.Assert(info.LocalNodeState, checker.Equals, swarm.LocalNodeStateActive)
  608. instances = 4
  609. d3.updateService(c, d3.getService(c, id), setInstances(instances))
  610. waitAndAssert(c, defaultReconciliationTimeout, reducedCheck(sumAsIntegers, d1.checkActiveContainerCount, d3.checkActiveContainerCount), checker.Equals, instances)
  611. }
  612. func simpleTestService(s *swarm.Service) {
  613. ureplicas := uint64(1)
  614. restartDelay := time.Duration(100 * time.Millisecond)
  615. s.Spec = swarm.ServiceSpec{
  616. TaskTemplate: swarm.TaskSpec{
  617. ContainerSpec: swarm.ContainerSpec{
  618. Image: "busybox:latest",
  619. Command: []string{"/bin/top"},
  620. },
  621. RestartPolicy: &swarm.RestartPolicy{
  622. Delay: &restartDelay,
  623. },
  624. },
  625. Mode: swarm.ServiceMode{
  626. Replicated: &swarm.ReplicatedService{
  627. Replicas: &ureplicas,
  628. },
  629. },
  630. }
  631. s.Spec.Name = "top"
  632. }
  633. func serviceForUpdate(s *swarm.Service) {
  634. ureplicas := uint64(1)
  635. restartDelay := time.Duration(100 * time.Millisecond)
  636. s.Spec = swarm.ServiceSpec{
  637. TaskTemplate: swarm.TaskSpec{
  638. ContainerSpec: swarm.ContainerSpec{
  639. Image: "busybox:latest",
  640. Command: []string{"/bin/top"},
  641. },
  642. RestartPolicy: &swarm.RestartPolicy{
  643. Delay: &restartDelay,
  644. },
  645. },
  646. Mode: swarm.ServiceMode{
  647. Replicated: &swarm.ReplicatedService{
  648. Replicas: &ureplicas,
  649. },
  650. },
  651. UpdateConfig: &swarm.UpdateConfig{
  652. Parallelism: 2,
  653. Delay: 4 * time.Second,
  654. FailureAction: swarm.UpdateFailureActionContinue,
  655. },
  656. }
  657. s.Spec.Name = "updatetest"
  658. }
  659. func setInstances(replicas int) serviceConstructor {
  660. ureplicas := uint64(replicas)
  661. return func(s *swarm.Service) {
  662. s.Spec.Mode = swarm.ServiceMode{
  663. Replicated: &swarm.ReplicatedService{
  664. Replicas: &ureplicas,
  665. },
  666. }
  667. }
  668. }
  669. func setImage(image string) serviceConstructor {
  670. return func(s *swarm.Service) {
  671. s.Spec.TaskTemplate.ContainerSpec.Image = image
  672. }
  673. }
  674. func setGlobalMode(s *swarm.Service) {
  675. s.Spec.Mode = swarm.ServiceMode{
  676. Global: &swarm.GlobalService{},
  677. }
  678. }
  679. func checkClusterHealth(c *check.C, cl []*SwarmDaemon, managerCount, workerCount int) {
  680. var totalMCount, totalWCount int
  681. for _, d := range cl {
  682. var (
  683. info swarm.Info
  684. err error
  685. )
  686. // check info in a waitAndAssert, because if the cluster doesn't have a leader, `info` will return an error
  687. checkInfo := func(c *check.C) (interface{}, check.CommentInterface) {
  688. info, err = d.info()
  689. return err, check.Commentf("cluster not ready in time")
  690. }
  691. waitAndAssert(c, defaultReconciliationTimeout, checkInfo, checker.IsNil)
  692. if !info.ControlAvailable {
  693. totalWCount++
  694. continue
  695. }
  696. var leaderFound bool
  697. totalMCount++
  698. var mCount, wCount int
  699. for _, n := range d.listNodes(c) {
  700. waitReady := func(c *check.C) (interface{}, check.CommentInterface) {
  701. if n.Status.State == swarm.NodeStateReady {
  702. return true, nil
  703. }
  704. nn := d.getNode(c, n.ID)
  705. n = *nn
  706. return n.Status.State == swarm.NodeStateReady, check.Commentf("state of node %s, reported by %s", n.ID, d.Info.NodeID)
  707. }
  708. waitAndAssert(c, defaultReconciliationTimeout, waitReady, checker.True)
  709. waitActive := func(c *check.C) (interface{}, check.CommentInterface) {
  710. if n.Spec.Availability == swarm.NodeAvailabilityActive {
  711. return true, nil
  712. }
  713. nn := d.getNode(c, n.ID)
  714. n = *nn
  715. return n.Spec.Availability == swarm.NodeAvailabilityActive, check.Commentf("availability of node %s, reported by %s", n.ID, d.Info.NodeID)
  716. }
  717. waitAndAssert(c, defaultReconciliationTimeout, waitActive, checker.True)
  718. if n.Spec.Role == swarm.NodeRoleManager {
  719. c.Assert(n.ManagerStatus, checker.NotNil, check.Commentf("manager status of node %s (manager), reported by %s", n.ID, d.Info.NodeID))
  720. if n.ManagerStatus.Leader {
  721. leaderFound = true
  722. }
  723. mCount++
  724. } else {
  725. c.Assert(n.ManagerStatus, checker.IsNil, check.Commentf("manager status of node %s (worker), reported by %s", n.ID, d.Info.NodeID))
  726. wCount++
  727. }
  728. }
  729. c.Assert(leaderFound, checker.True, check.Commentf("lack of leader reported by node %s", info.NodeID))
  730. c.Assert(mCount, checker.Equals, managerCount, check.Commentf("managers count reported by node %s", info.NodeID))
  731. c.Assert(wCount, checker.Equals, workerCount, check.Commentf("workers count reported by node %s", info.NodeID))
  732. }
  733. c.Assert(totalMCount, checker.Equals, managerCount)
  734. c.Assert(totalWCount, checker.Equals, workerCount)
  735. }
  736. func (s *DockerSwarmSuite) TestApiSwarmRestartCluster(c *check.C) {
  737. mCount, wCount := 5, 1
  738. var nodes []*SwarmDaemon
  739. for i := 0; i < mCount; i++ {
  740. manager := s.AddDaemon(c, true, true)
  741. info, err := manager.info()
  742. c.Assert(err, checker.IsNil)
  743. c.Assert(info.ControlAvailable, checker.True)
  744. c.Assert(info.LocalNodeState, checker.Equals, swarm.LocalNodeStateActive)
  745. nodes = append(nodes, manager)
  746. }
  747. for i := 0; i < wCount; i++ {
  748. worker := s.AddDaemon(c, true, false)
  749. info, err := worker.info()
  750. c.Assert(err, checker.IsNil)
  751. c.Assert(info.ControlAvailable, checker.False)
  752. c.Assert(info.LocalNodeState, checker.Equals, swarm.LocalNodeStateActive)
  753. nodes = append(nodes, worker)
  754. }
  755. // stop whole cluster
  756. {
  757. var wg sync.WaitGroup
  758. wg.Add(len(nodes))
  759. errs := make(chan error, len(nodes))
  760. for _, d := range nodes {
  761. go func(daemon *SwarmDaemon) {
  762. defer wg.Done()
  763. if err := daemon.Stop(); err != nil {
  764. errs <- err
  765. }
  766. if root := os.Getenv("DOCKER_REMAP_ROOT"); root != "" {
  767. daemon.root = filepath.Dir(daemon.root)
  768. }
  769. }(d)
  770. }
  771. wg.Wait()
  772. close(errs)
  773. for err := range errs {
  774. c.Assert(err, check.IsNil)
  775. }
  776. }
  777. // start whole cluster
  778. {
  779. var wg sync.WaitGroup
  780. wg.Add(len(nodes))
  781. errs := make(chan error, len(nodes))
  782. for _, d := range nodes {
  783. go func(daemon *SwarmDaemon) {
  784. defer wg.Done()
  785. if err := daemon.Start("--iptables=false"); err != nil {
  786. errs <- err
  787. }
  788. }(d)
  789. }
  790. wg.Wait()
  791. close(errs)
  792. for err := range errs {
  793. c.Assert(err, check.IsNil)
  794. }
  795. }
  796. checkClusterHealth(c, nodes, mCount, wCount)
  797. }