docker_api_swarm_test.go 32 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003
  1. // +build !windows
  2. package main
  3. import (
  4. "fmt"
  5. "net/http"
  6. "os"
  7. "path/filepath"
  8. "strconv"
  9. "strings"
  10. "sync"
  11. "syscall"
  12. "time"
  13. "github.com/docker/docker/pkg/integration/checker"
  14. "github.com/docker/engine-api/types/swarm"
  15. "github.com/go-check/check"
  16. )
  17. var defaultReconciliationTimeout = 30 * time.Second
  18. func (s *DockerSwarmSuite) TestApiSwarmInit(c *check.C) {
  19. testRequires(c, Network)
  20. // todo: should find a better way to verify that components are running than /info
  21. d1 := s.AddDaemon(c, true, true)
  22. info, err := d1.info()
  23. c.Assert(err, checker.IsNil)
  24. c.Assert(info.ControlAvailable, checker.True)
  25. c.Assert(info.LocalNodeState, checker.Equals, swarm.LocalNodeStateActive)
  26. d2 := s.AddDaemon(c, true, false)
  27. info, err = d2.info()
  28. c.Assert(err, checker.IsNil)
  29. c.Assert(info.ControlAvailable, checker.False)
  30. c.Assert(info.LocalNodeState, checker.Equals, swarm.LocalNodeStateActive)
  31. // Leaving cluster
  32. c.Assert(d2.Leave(false), checker.IsNil)
  33. info, err = d2.info()
  34. c.Assert(err, checker.IsNil)
  35. c.Assert(info.ControlAvailable, checker.False)
  36. c.Assert(info.LocalNodeState, checker.Equals, swarm.LocalNodeStateInactive)
  37. c.Assert(d2.Join(swarm.JoinRequest{RemoteAddrs: []string{d1.listenAddr}}), checker.IsNil)
  38. info, err = d2.info()
  39. c.Assert(err, checker.IsNil)
  40. c.Assert(info.ControlAvailable, checker.False)
  41. c.Assert(info.LocalNodeState, checker.Equals, swarm.LocalNodeStateActive)
  42. // Current state restoring after restarts
  43. err = d1.Stop()
  44. c.Assert(err, checker.IsNil)
  45. err = d2.Stop()
  46. c.Assert(err, checker.IsNil)
  47. err = d1.Start()
  48. c.Assert(err, checker.IsNil)
  49. err = d2.Start()
  50. c.Assert(err, checker.IsNil)
  51. info, err = d1.info()
  52. c.Assert(err, checker.IsNil)
  53. c.Assert(info.ControlAvailable, checker.True)
  54. c.Assert(info.LocalNodeState, checker.Equals, swarm.LocalNodeStateActive)
  55. info, err = d2.info()
  56. c.Assert(err, checker.IsNil)
  57. c.Assert(info.ControlAvailable, checker.False)
  58. c.Assert(info.LocalNodeState, checker.Equals, swarm.LocalNodeStateActive)
  59. }
  60. func (s *DockerSwarmSuite) TestApiSwarmManualAcceptance(c *check.C) {
  61. testRequires(c, Network)
  62. s.testAPISwarmManualAcceptance(c, "")
  63. }
  64. func (s *DockerSwarmSuite) TestApiSwarmManualAcceptanceSecret(c *check.C) {
  65. testRequires(c, Network)
  66. s.testAPISwarmManualAcceptance(c, "foobaz")
  67. }
  68. func (s *DockerSwarmSuite) testAPISwarmManualAcceptance(c *check.C, secret string) {
  69. d1 := s.AddDaemon(c, false, false)
  70. c.Assert(d1.Init(swarm.InitRequest{
  71. Spec: swarm.Spec{
  72. AcceptancePolicy: swarm.AcceptancePolicy{
  73. Policies: []swarm.Policy{
  74. {Role: swarm.NodeRoleWorker, Secret: &secret},
  75. {Role: swarm.NodeRoleManager, Secret: &secret},
  76. },
  77. },
  78. },
  79. }), checker.IsNil)
  80. d2 := s.AddDaemon(c, false, false)
  81. err := d2.Join(swarm.JoinRequest{RemoteAddrs: []string{d1.listenAddr}})
  82. c.Assert(err, checker.NotNil)
  83. if secret == "" {
  84. c.Assert(err.Error(), checker.Contains, "needs to be accepted")
  85. info, err := d2.info()
  86. c.Assert(err, checker.IsNil)
  87. c.Assert(info.LocalNodeState, checker.Equals, swarm.LocalNodeStatePending)
  88. c.Assert(d2.Leave(false), checker.IsNil)
  89. info, err = d2.info()
  90. c.Assert(err, checker.IsNil)
  91. c.Assert(info.LocalNodeState, checker.Equals, swarm.LocalNodeStateInactive)
  92. } else {
  93. c.Assert(err.Error(), checker.Contains, "valid secret token is necessary")
  94. info, err := d2.info()
  95. c.Assert(err, checker.IsNil)
  96. c.Assert(info.LocalNodeState, checker.Equals, swarm.LocalNodeStateInactive)
  97. }
  98. d3 := s.AddDaemon(c, false, false)
  99. c.Assert(d3.Join(swarm.JoinRequest{Secret: secret, RemoteAddrs: []string{d1.listenAddr}}), checker.NotNil)
  100. info, err := d3.info()
  101. c.Assert(err, checker.IsNil)
  102. c.Assert(info.LocalNodeState, checker.Equals, swarm.LocalNodeStatePending)
  103. c.Assert(len(info.NodeID), checker.GreaterThan, 5)
  104. d1.updateNode(c, info.NodeID, func(n *swarm.Node) {
  105. n.Spec.Membership = swarm.NodeMembershipAccepted
  106. })
  107. waitAndAssert(c, defaultReconciliationTimeout, d3.checkLocalNodeState, checker.Equals, swarm.LocalNodeStateActive)
  108. }
  109. func (s *DockerSwarmSuite) TestApiSwarmSecretAcceptance(c *check.C) {
  110. testRequires(c, Network)
  111. d1 := s.AddDaemon(c, false, false)
  112. secret := "foobar"
  113. c.Assert(d1.Init(swarm.InitRequest{
  114. Spec: swarm.Spec{
  115. AcceptancePolicy: swarm.AcceptancePolicy{
  116. Policies: []swarm.Policy{
  117. {Role: swarm.NodeRoleWorker, Autoaccept: true, Secret: &secret},
  118. {Role: swarm.NodeRoleManager, Secret: &secret},
  119. },
  120. },
  121. },
  122. }), checker.IsNil)
  123. d2 := s.AddDaemon(c, false, false)
  124. err := d2.Join(swarm.JoinRequest{RemoteAddrs: []string{d1.listenAddr}})
  125. c.Assert(err, checker.NotNil)
  126. c.Assert(err.Error(), checker.Contains, "secret token is necessary")
  127. info, err := d2.info()
  128. c.Assert(err, checker.IsNil)
  129. c.Assert(info.LocalNodeState, checker.Equals, swarm.LocalNodeStateInactive)
  130. err = d2.Join(swarm.JoinRequest{Secret: "foobaz", RemoteAddrs: []string{d1.listenAddr}})
  131. c.Assert(err, checker.NotNil)
  132. c.Assert(err.Error(), checker.Contains, "secret token is necessary")
  133. info, err = d2.info()
  134. c.Assert(err, checker.IsNil)
  135. c.Assert(info.LocalNodeState, checker.Equals, swarm.LocalNodeStateInactive)
  136. c.Assert(d2.Join(swarm.JoinRequest{Secret: "foobar", RemoteAddrs: []string{d1.listenAddr}}), checker.IsNil)
  137. info, err = d2.info()
  138. c.Assert(err, checker.IsNil)
  139. c.Assert(info.LocalNodeState, checker.Equals, swarm.LocalNodeStateActive)
  140. c.Assert(d2.Leave(false), checker.IsNil)
  141. info, err = d2.info()
  142. c.Assert(err, checker.IsNil)
  143. c.Assert(info.LocalNodeState, checker.Equals, swarm.LocalNodeStateInactive)
  144. // change secret
  145. d1.updateSwarm(c, func(s *swarm.Spec) {
  146. for i := range s.AcceptancePolicy.Policies {
  147. p := "foobaz"
  148. s.AcceptancePolicy.Policies[i].Secret = &p
  149. }
  150. })
  151. err = d2.Join(swarm.JoinRequest{Secret: "foobar", RemoteAddrs: []string{d1.listenAddr}})
  152. c.Assert(err, checker.NotNil)
  153. c.Assert(err.Error(), checker.Contains, "secret token is necessary")
  154. info, err = d2.info()
  155. c.Assert(err, checker.IsNil)
  156. c.Assert(info.LocalNodeState, checker.Equals, swarm.LocalNodeStateInactive)
  157. c.Assert(d2.Join(swarm.JoinRequest{Secret: "foobaz", RemoteAddrs: []string{d1.listenAddr}}), checker.IsNil)
  158. info, err = d2.info()
  159. c.Assert(err, checker.IsNil)
  160. c.Assert(info.LocalNodeState, checker.Equals, swarm.LocalNodeStateActive)
  161. c.Assert(d2.Leave(false), checker.IsNil)
  162. info, err = d2.info()
  163. c.Assert(err, checker.IsNil)
  164. c.Assert(info.LocalNodeState, checker.Equals, swarm.LocalNodeStateInactive)
  165. // change policy, don't change secret
  166. d1.updateSwarm(c, func(s *swarm.Spec) {
  167. for i, p := range s.AcceptancePolicy.Policies {
  168. if p.Role == swarm.NodeRoleManager {
  169. s.AcceptancePolicy.Policies[i].Autoaccept = false
  170. }
  171. s.AcceptancePolicy.Policies[i].Secret = nil
  172. }
  173. })
  174. err = d2.Join(swarm.JoinRequest{RemoteAddrs: []string{d1.listenAddr}})
  175. c.Assert(err, checker.NotNil)
  176. c.Assert(err.Error(), checker.Contains, "secret token is necessary")
  177. info, err = d2.info()
  178. c.Assert(err, checker.IsNil)
  179. c.Assert(info.LocalNodeState, checker.Equals, swarm.LocalNodeStateInactive)
  180. c.Assert(d2.Join(swarm.JoinRequest{Secret: "foobaz", RemoteAddrs: []string{d1.listenAddr}}), checker.IsNil)
  181. info, err = d2.info()
  182. c.Assert(err, checker.IsNil)
  183. c.Assert(info.LocalNodeState, checker.Equals, swarm.LocalNodeStateActive)
  184. c.Assert(d2.Leave(false), checker.IsNil)
  185. info, err = d2.info()
  186. c.Assert(err, checker.IsNil)
  187. c.Assert(info.LocalNodeState, checker.Equals, swarm.LocalNodeStateInactive)
  188. // clear secret
  189. d1.updateSwarm(c, func(s *swarm.Spec) {
  190. for i := range s.AcceptancePolicy.Policies {
  191. p := ""
  192. s.AcceptancePolicy.Policies[i].Secret = &p
  193. }
  194. })
  195. c.Assert(d2.Join(swarm.JoinRequest{RemoteAddrs: []string{d1.listenAddr}}), checker.IsNil)
  196. info, err = d2.info()
  197. c.Assert(err, checker.IsNil)
  198. c.Assert(info.LocalNodeState, checker.Equals, swarm.LocalNodeStateActive)
  199. c.Assert(d2.Leave(false), checker.IsNil)
  200. info, err = d2.info()
  201. c.Assert(err, checker.IsNil)
  202. c.Assert(info.LocalNodeState, checker.Equals, swarm.LocalNodeStateInactive)
  203. }
  204. func (s *DockerSwarmSuite) TestApiSwarmCAHash(c *check.C) {
  205. testRequires(c, Network)
  206. d1 := s.AddDaemon(c, true, true)
  207. d2 := s.AddDaemon(c, false, false)
  208. err := d2.Join(swarm.JoinRequest{CACertHash: "foobar", RemoteAddrs: []string{d1.listenAddr}})
  209. c.Assert(err, checker.NotNil)
  210. c.Assert(err.Error(), checker.Contains, "invalid checksum digest format")
  211. c.Assert(len(d1.CACertHash), checker.GreaterThan, 0)
  212. c.Assert(d2.Join(swarm.JoinRequest{CACertHash: d1.CACertHash, RemoteAddrs: []string{d1.listenAddr}}), checker.IsNil)
  213. }
  214. func (s *DockerSwarmSuite) TestApiSwarmPromoteDemote(c *check.C) {
  215. testRequires(c, Network)
  216. d1 := s.AddDaemon(c, false, false)
  217. c.Assert(d1.Init(swarm.InitRequest{
  218. Spec: swarm.Spec{
  219. AcceptancePolicy: swarm.AcceptancePolicy{
  220. Policies: []swarm.Policy{
  221. {Role: swarm.NodeRoleWorker, Autoaccept: true},
  222. {Role: swarm.NodeRoleManager},
  223. },
  224. },
  225. },
  226. }), checker.IsNil)
  227. d2 := s.AddDaemon(c, true, false)
  228. info, err := d2.info()
  229. c.Assert(err, checker.IsNil)
  230. c.Assert(info.ControlAvailable, checker.False)
  231. c.Assert(info.LocalNodeState, checker.Equals, swarm.LocalNodeStateActive)
  232. d1.updateNode(c, d2.NodeID, func(n *swarm.Node) {
  233. n.Spec.Role = swarm.NodeRoleManager
  234. })
  235. waitAndAssert(c, defaultReconciliationTimeout, d2.checkControlAvailable, checker.True)
  236. d1.updateNode(c, d2.NodeID, func(n *swarm.Node) {
  237. n.Spec.Role = swarm.NodeRoleWorker
  238. })
  239. waitAndAssert(c, defaultReconciliationTimeout, d2.checkControlAvailable, checker.False)
  240. // Demoting last node should fail
  241. node := d1.getNode(c, d1.NodeID)
  242. node.Spec.Role = swarm.NodeRoleWorker
  243. url := fmt.Sprintf("/nodes/%s/update?version=%d", node.ID, node.Version.Index)
  244. status, out, err := d1.SockRequest("POST", url, node.Spec)
  245. c.Assert(err, checker.IsNil)
  246. c.Assert(status, checker.Equals, http.StatusInternalServerError, check.Commentf("output: %q", string(out)))
  247. c.Assert(string(out), checker.Contains, "last manager of the swarm")
  248. info, err = d1.info()
  249. c.Assert(err, checker.IsNil)
  250. c.Assert(info.LocalNodeState, checker.Equals, swarm.LocalNodeStateActive)
  251. c.Assert(info.ControlAvailable, checker.True)
  252. // Promote already demoted node
  253. d1.updateNode(c, d2.NodeID, func(n *swarm.Node) {
  254. n.Spec.Role = swarm.NodeRoleManager
  255. })
  256. waitAndAssert(c, defaultReconciliationTimeout, d2.checkControlAvailable, checker.True)
  257. }
  258. func (s *DockerSwarmSuite) TestApiSwarmServicesEmptyList(c *check.C) {
  259. testRequires(c, Network)
  260. d := s.AddDaemon(c, true, true)
  261. services := d.listServices(c)
  262. c.Assert(services, checker.NotNil)
  263. c.Assert(len(services), checker.Equals, 0, check.Commentf("services: %#v", services))
  264. }
  265. func (s *DockerSwarmSuite) TestApiSwarmServicesCreate(c *check.C) {
  266. testRequires(c, Network)
  267. d := s.AddDaemon(c, true, true)
  268. instances := 2
  269. id := d.createService(c, simpleTestService, setInstances(instances))
  270. waitAndAssert(c, defaultReconciliationTimeout, d.checkActiveContainerCount, checker.Equals, instances)
  271. service := d.getService(c, id)
  272. instances = 5
  273. d.updateService(c, service, setInstances(instances))
  274. waitAndAssert(c, defaultReconciliationTimeout, d.checkActiveContainerCount, checker.Equals, instances)
  275. d.removeService(c, service.ID)
  276. waitAndAssert(c, defaultReconciliationTimeout, d.checkActiveContainerCount, checker.Equals, 0)
  277. }
  278. func (s *DockerSwarmSuite) TestApiSwarmServicesMultipleAgents(c *check.C) {
  279. testRequires(c, Network)
  280. d1 := s.AddDaemon(c, true, true)
  281. d2 := s.AddDaemon(c, true, false)
  282. d3 := s.AddDaemon(c, true, false)
  283. time.Sleep(1 * time.Second) // make sure all daemons are ready to accept tasks
  284. instances := 9
  285. id := d1.createService(c, simpleTestService, setInstances(instances))
  286. waitAndAssert(c, defaultReconciliationTimeout, d1.checkActiveContainerCount, checker.GreaterThan, 0)
  287. waitAndAssert(c, defaultReconciliationTimeout, d2.checkActiveContainerCount, checker.GreaterThan, 0)
  288. waitAndAssert(c, defaultReconciliationTimeout, d3.checkActiveContainerCount, checker.GreaterThan, 0)
  289. waitAndAssert(c, defaultReconciliationTimeout, reducedCheck(sumAsIntegers, d1.checkActiveContainerCount, d2.checkActiveContainerCount, d3.checkActiveContainerCount), checker.Equals, instances)
  290. // reconciliation on d2 node down
  291. c.Assert(d2.Stop(), checker.IsNil)
  292. waitAndAssert(c, defaultReconciliationTimeout, reducedCheck(sumAsIntegers, d1.checkActiveContainerCount, d3.checkActiveContainerCount), checker.Equals, instances)
  293. // test downscaling
  294. instances = 5
  295. d1.updateService(c, d1.getService(c, id), setInstances(instances))
  296. waitAndAssert(c, defaultReconciliationTimeout, reducedCheck(sumAsIntegers, d1.checkActiveContainerCount, d3.checkActiveContainerCount), checker.Equals, instances)
  297. }
  298. func (s *DockerSwarmSuite) TestApiSwarmServicesCreateGlobal(c *check.C) {
  299. testRequires(c, Network)
  300. d1 := s.AddDaemon(c, true, true)
  301. d2 := s.AddDaemon(c, true, false)
  302. d3 := s.AddDaemon(c, true, false)
  303. d1.createService(c, simpleTestService, setGlobalMode)
  304. waitAndAssert(c, defaultReconciliationTimeout, d1.checkActiveContainerCount, checker.Equals, 1)
  305. waitAndAssert(c, defaultReconciliationTimeout, d2.checkActiveContainerCount, checker.Equals, 1)
  306. waitAndAssert(c, defaultReconciliationTimeout, d3.checkActiveContainerCount, checker.Equals, 1)
  307. d4 := s.AddDaemon(c, true, false)
  308. d5 := s.AddDaemon(c, true, false)
  309. waitAndAssert(c, defaultReconciliationTimeout, d4.checkActiveContainerCount, checker.Equals, 1)
  310. waitAndAssert(c, defaultReconciliationTimeout, d5.checkActiveContainerCount, checker.Equals, 1)
  311. }
  312. func (s *DockerSwarmSuite) TestApiSwarmServicesUpdate(c *check.C) {
  313. const nodeCount = 3
  314. var daemons [nodeCount]*SwarmDaemon
  315. for i := 0; i < nodeCount; i++ {
  316. daemons[i] = s.AddDaemon(c, true, i == 0)
  317. }
  318. // wait for nodes ready
  319. waitAndAssert(c, 5*time.Second, daemons[0].checkNodeReadyCount, checker.Equals, nodeCount)
  320. // service image at start
  321. image1 := "busybox:latest"
  322. // target image in update
  323. image2 := "busybox:test"
  324. // create a different tag
  325. for _, d := range daemons {
  326. out, err := d.Cmd("tag", image1, image2)
  327. c.Assert(err, checker.IsNil, check.Commentf(out))
  328. }
  329. // create service
  330. instances := 5
  331. parallelism := 2
  332. id := daemons[0].createService(c, serviceForUpdate, setInstances(instances))
  333. // wait for tasks ready
  334. waitAndAssert(c, defaultReconciliationTimeout, daemons[0].checkRunningTaskImages, checker.DeepEquals,
  335. map[string]int{image1: instances})
  336. // issue service update
  337. service := daemons[0].getService(c, id)
  338. daemons[0].updateService(c, service, setImage(image2))
  339. // first batch
  340. waitAndAssert(c, defaultReconciliationTimeout, daemons[0].checkRunningTaskImages, checker.DeepEquals,
  341. map[string]int{image1: instances - parallelism, image2: parallelism})
  342. // 2nd batch
  343. waitAndAssert(c, defaultReconciliationTimeout, daemons[0].checkRunningTaskImages, checker.DeepEquals,
  344. map[string]int{image1: instances - 2*parallelism, image2: 2 * parallelism})
  345. // 3nd batch
  346. waitAndAssert(c, defaultReconciliationTimeout, daemons[0].checkRunningTaskImages, checker.DeepEquals,
  347. map[string]int{image2: instances})
  348. }
  349. func (s *DockerSwarmSuite) TestApiSwarmServicesStateReporting(c *check.C) {
  350. testRequires(c, Network)
  351. testRequires(c, SameHostDaemon)
  352. testRequires(c, DaemonIsLinux)
  353. d1 := s.AddDaemon(c, true, true)
  354. d2 := s.AddDaemon(c, true, true)
  355. d3 := s.AddDaemon(c, true, false)
  356. time.Sleep(1 * time.Second) // make sure all daemons are ready to accept
  357. instances := 9
  358. d1.createService(c, simpleTestService, setInstances(instances))
  359. waitAndAssert(c, defaultReconciliationTimeout, reducedCheck(sumAsIntegers, d1.checkActiveContainerCount, d2.checkActiveContainerCount, d3.checkActiveContainerCount), checker.Equals, instances)
  360. getContainers := func() map[string]*SwarmDaemon {
  361. m := make(map[string]*SwarmDaemon)
  362. for _, d := range []*SwarmDaemon{d1, d2, d3} {
  363. for _, id := range d.activeContainers() {
  364. m[id] = d
  365. }
  366. }
  367. return m
  368. }
  369. containers := getContainers()
  370. c.Assert(containers, checker.HasLen, instances)
  371. var toRemove string
  372. for i := range containers {
  373. toRemove = i
  374. }
  375. _, err := containers[toRemove].Cmd("stop", toRemove)
  376. c.Assert(err, checker.IsNil)
  377. waitAndAssert(c, defaultReconciliationTimeout, reducedCheck(sumAsIntegers, d1.checkActiveContainerCount, d2.checkActiveContainerCount, d3.checkActiveContainerCount), checker.Equals, instances)
  378. containers2 := getContainers()
  379. c.Assert(containers2, checker.HasLen, instances)
  380. for i := range containers {
  381. if i == toRemove {
  382. c.Assert(containers2[i], checker.IsNil)
  383. } else {
  384. c.Assert(containers2[i], checker.NotNil)
  385. }
  386. }
  387. containers = containers2
  388. for i := range containers {
  389. toRemove = i
  390. }
  391. // try with killing process outside of docker
  392. pidStr, err := containers[toRemove].Cmd("inspect", "-f", "{{.State.Pid}}", toRemove)
  393. c.Assert(err, checker.IsNil)
  394. pid, err := strconv.Atoi(strings.TrimSpace(pidStr))
  395. c.Assert(err, checker.IsNil)
  396. c.Assert(syscall.Kill(pid, syscall.SIGKILL), checker.IsNil)
  397. time.Sleep(time.Second) // give some time to handle the signal
  398. waitAndAssert(c, defaultReconciliationTimeout, reducedCheck(sumAsIntegers, d1.checkActiveContainerCount, d2.checkActiveContainerCount, d3.checkActiveContainerCount), checker.Equals, instances)
  399. containers2 = getContainers()
  400. c.Assert(containers2, checker.HasLen, instances)
  401. for i := range containers {
  402. if i == toRemove {
  403. c.Assert(containers2[i], checker.IsNil)
  404. } else {
  405. c.Assert(containers2[i], checker.NotNil)
  406. }
  407. }
  408. }
  409. func (s *DockerSwarmSuite) TestApiSwarmLeaderElection(c *check.C) {
  410. // Create 3 nodes
  411. d1 := s.AddDaemon(c, true, true)
  412. d2 := s.AddDaemon(c, true, true)
  413. d3 := s.AddDaemon(c, true, true)
  414. // assert that the first node we made is the leader, and the other two are followers
  415. c.Assert(d1.getNode(c, d1.NodeID).ManagerStatus.Leader, checker.True)
  416. c.Assert(d1.getNode(c, d2.NodeID).ManagerStatus.Leader, checker.False)
  417. c.Assert(d1.getNode(c, d3.NodeID).ManagerStatus.Leader, checker.False)
  418. leader := d1
  419. // stop the leader
  420. leader.Stop()
  421. // wait for an election to occur
  422. var newleader *SwarmDaemon
  423. for _, d := range []*SwarmDaemon{d2, d3} {
  424. if d.getNode(c, d.NodeID).ManagerStatus.Leader {
  425. newleader = d
  426. break
  427. }
  428. }
  429. // assert that we have a new leader
  430. c.Assert(newleader, checker.NotNil)
  431. // add the old leader back
  432. leader.Start()
  433. // clear leader and reinit the followers list
  434. followers := make([]*SwarmDaemon, 0, 3)
  435. // pick out the leader and the followers again
  436. for _, d := range []*SwarmDaemon{d1, d2, d3} {
  437. if d1.getNode(c, d.NodeID).ManagerStatus.Leader {
  438. leader = d
  439. } else {
  440. followers = append(followers, d)
  441. }
  442. }
  443. // verify that we still only have 1 leader and 2 followers
  444. c.Assert(leader, checker.NotNil)
  445. c.Assert(followers, checker.HasLen, 2)
  446. // and that after we added d1 back, the leader hasn't changed
  447. c.Assert(leader.NodeID, checker.Equals, newleader.NodeID)
  448. }
  449. func (s *DockerSwarmSuite) TestApiSwarmRaftQuorum(c *check.C) {
  450. testRequires(c, Network)
  451. d1 := s.AddDaemon(c, true, true)
  452. d2 := s.AddDaemon(c, true, true)
  453. d3 := s.AddDaemon(c, true, true)
  454. d1.createService(c, simpleTestService)
  455. c.Assert(d2.Stop(), checker.IsNil)
  456. d1.createService(c, simpleTestService, func(s *swarm.Service) {
  457. s.Spec.Name = "top1"
  458. })
  459. c.Assert(d3.Stop(), checker.IsNil)
  460. var service swarm.Service
  461. simpleTestService(&service)
  462. service.Spec.Name = "top2"
  463. status, out, err := d1.SockRequest("POST", "/services/create", service.Spec)
  464. c.Assert(err, checker.IsNil)
  465. c.Assert(status, checker.Equals, http.StatusInternalServerError, check.Commentf("deadline exceeded", string(out)))
  466. c.Assert(d2.Start(), checker.IsNil)
  467. d1.createService(c, simpleTestService, func(s *swarm.Service) {
  468. s.Spec.Name = "top3"
  469. })
  470. }
  471. func (s *DockerSwarmSuite) TestApiSwarmListNodes(c *check.C) {
  472. testRequires(c, Network)
  473. d1 := s.AddDaemon(c, true, true)
  474. d2 := s.AddDaemon(c, true, false)
  475. d3 := s.AddDaemon(c, true, false)
  476. nodes := d1.listNodes(c)
  477. c.Assert(len(nodes), checker.Equals, 3, check.Commentf("nodes: %#v", nodes))
  478. loop0:
  479. for _, n := range nodes {
  480. for _, d := range []*SwarmDaemon{d1, d2, d3} {
  481. if n.ID == d.NodeID {
  482. continue loop0
  483. }
  484. }
  485. c.Errorf("unknown nodeID %v", n.ID)
  486. }
  487. }
  488. func (s *DockerSwarmSuite) TestApiSwarmNodeUpdate(c *check.C) {
  489. testRequires(c, Network)
  490. d := s.AddDaemon(c, true, true)
  491. nodes := d.listNodes(c)
  492. d.updateNode(c, nodes[0].ID, func(n *swarm.Node) {
  493. n.Spec.Availability = swarm.NodeAvailabilityPause
  494. })
  495. n := d.getNode(c, nodes[0].ID)
  496. c.Assert(n.Spec.Availability, checker.Equals, swarm.NodeAvailabilityPause)
  497. }
  498. func (s *DockerSwarmSuite) TestApiSwarmNodeDrainPause(c *check.C) {
  499. testRequires(c, Network)
  500. d1 := s.AddDaemon(c, true, true)
  501. d2 := s.AddDaemon(c, true, false)
  502. time.Sleep(1 * time.Second) // make sure all daemons are ready to accept tasks
  503. // start a service, expect balanced distribution
  504. instances := 8
  505. id := d1.createService(c, simpleTestService, setInstances(instances))
  506. waitAndAssert(c, defaultReconciliationTimeout, d1.checkActiveContainerCount, checker.GreaterThan, 0)
  507. waitAndAssert(c, defaultReconciliationTimeout, d2.checkActiveContainerCount, checker.GreaterThan, 0)
  508. waitAndAssert(c, defaultReconciliationTimeout, reducedCheck(sumAsIntegers, d1.checkActiveContainerCount, d2.checkActiveContainerCount), checker.Equals, instances)
  509. // drain d2, all containers should move to d1
  510. d1.updateNode(c, d2.NodeID, func(n *swarm.Node) {
  511. n.Spec.Availability = swarm.NodeAvailabilityDrain
  512. })
  513. waitAndAssert(c, defaultReconciliationTimeout, d1.checkActiveContainerCount, checker.Equals, instances)
  514. waitAndAssert(c, defaultReconciliationTimeout, d2.checkActiveContainerCount, checker.Equals, 0)
  515. // set d2 back to active
  516. d1.updateNode(c, d2.NodeID, func(n *swarm.Node) {
  517. n.Spec.Availability = swarm.NodeAvailabilityActive
  518. })
  519. instances = 1
  520. d1.updateService(c, d1.getService(c, id), setInstances(instances))
  521. waitAndAssert(c, defaultReconciliationTimeout*2, reducedCheck(sumAsIntegers, d1.checkActiveContainerCount, d2.checkActiveContainerCount), checker.Equals, instances)
  522. instances = 8
  523. d1.updateService(c, d1.getService(c, id), setInstances(instances))
  524. // drained node first so we don't get any old containers
  525. waitAndAssert(c, defaultReconciliationTimeout, d2.checkActiveContainerCount, checker.GreaterThan, 0)
  526. waitAndAssert(c, defaultReconciliationTimeout, d1.checkActiveContainerCount, checker.GreaterThan, 0)
  527. waitAndAssert(c, defaultReconciliationTimeout*2, reducedCheck(sumAsIntegers, d1.checkActiveContainerCount, d2.checkActiveContainerCount), checker.Equals, instances)
  528. d2ContainerCount := len(d2.activeContainers())
  529. // set d2 to paused, scale service up, only d1 gets new tasks
  530. d1.updateNode(c, d2.NodeID, func(n *swarm.Node) {
  531. n.Spec.Availability = swarm.NodeAvailabilityPause
  532. })
  533. instances = 14
  534. d1.updateService(c, d1.getService(c, id), setInstances(instances))
  535. waitAndAssert(c, defaultReconciliationTimeout, d1.checkActiveContainerCount, checker.Equals, instances-d2ContainerCount)
  536. waitAndAssert(c, defaultReconciliationTimeout, d2.checkActiveContainerCount, checker.Equals, d2ContainerCount)
  537. }
  538. func (s *DockerSwarmSuite) TestApiSwarmLeaveRemovesContainer(c *check.C) {
  539. testRequires(c, Network)
  540. d := s.AddDaemon(c, true, true)
  541. instances := 2
  542. d.createService(c, simpleTestService, setInstances(instances))
  543. id, err := d.Cmd("run", "-d", "busybox", "top")
  544. c.Assert(err, checker.IsNil)
  545. id = strings.TrimSpace(id)
  546. waitAndAssert(c, defaultReconciliationTimeout, d.checkActiveContainerCount, checker.Equals, instances+1)
  547. c.Assert(d.Leave(false), checker.NotNil)
  548. c.Assert(d.Leave(true), checker.IsNil)
  549. waitAndAssert(c, defaultReconciliationTimeout, d.checkActiveContainerCount, checker.Equals, 1)
  550. id2, err := d.Cmd("ps", "-q")
  551. c.Assert(err, checker.IsNil)
  552. c.Assert(id, checker.HasPrefix, strings.TrimSpace(id2))
  553. }
  554. // #23629
  555. func (s *DockerSwarmSuite) TestApiSwarmLeaveOnPendingJoin(c *check.C) {
  556. s.AddDaemon(c, true, true)
  557. d2 := s.AddDaemon(c, false, false)
  558. id, err := d2.Cmd("run", "-d", "busybox", "top")
  559. c.Assert(err, checker.IsNil)
  560. id = strings.TrimSpace(id)
  561. go d2.Join(swarm.JoinRequest{
  562. RemoteAddrs: []string{"nosuchhost:1234"},
  563. }) // will block on pending state
  564. waitAndAssert(c, defaultReconciliationTimeout, d2.checkLocalNodeState, checker.Equals, swarm.LocalNodeStatePending)
  565. c.Assert(d2.Leave(true), checker.IsNil)
  566. waitAndAssert(c, defaultReconciliationTimeout, d2.checkActiveContainerCount, checker.Equals, 1)
  567. id2, err := d2.Cmd("ps", "-q")
  568. c.Assert(err, checker.IsNil)
  569. c.Assert(id, checker.HasPrefix, strings.TrimSpace(id2))
  570. }
  571. // #23705
  572. func (s *DockerSwarmSuite) TestApiSwarmRestoreOnPendingJoin(c *check.C) {
  573. d := s.AddDaemon(c, false, false)
  574. go d.Join(swarm.JoinRequest{
  575. RemoteAddrs: []string{"nosuchhost:1234"},
  576. }) // will block on pending state
  577. waitAndAssert(c, defaultReconciliationTimeout, d.checkLocalNodeState, checker.Equals, swarm.LocalNodeStatePending)
  578. c.Assert(d.Stop(), checker.IsNil)
  579. c.Assert(d.Start(), checker.IsNil)
  580. info, err := d.info()
  581. c.Assert(err, checker.IsNil)
  582. c.Assert(info.LocalNodeState, checker.Equals, swarm.LocalNodeStateInactive)
  583. }
  584. func (s *DockerSwarmSuite) TestApiSwarmManagerRestore(c *check.C) {
  585. testRequires(c, Network)
  586. d1 := s.AddDaemon(c, true, true)
  587. instances := 2
  588. id := d1.createService(c, simpleTestService, setInstances(instances))
  589. d1.getService(c, id)
  590. d1.Stop()
  591. d1.Start()
  592. d1.getService(c, id)
  593. d2 := s.AddDaemon(c, true, true)
  594. d2.getService(c, id)
  595. d2.Stop()
  596. d2.Start()
  597. d2.getService(c, id)
  598. d3 := s.AddDaemon(c, true, true)
  599. d3.getService(c, id)
  600. d3.Stop()
  601. d3.Start()
  602. d3.getService(c, id)
  603. d3.Kill()
  604. time.Sleep(1 * time.Second) // time to handle signal
  605. d3.Start()
  606. d3.getService(c, id)
  607. }
  608. func (s *DockerSwarmSuite) TestApiSwarmScaleNoRollingUpdate(c *check.C) {
  609. testRequires(c, Network)
  610. d := s.AddDaemon(c, true, true)
  611. instances := 2
  612. id := d.createService(c, simpleTestService, setInstances(instances))
  613. waitAndAssert(c, defaultReconciliationTimeout, d.checkActiveContainerCount, checker.Equals, instances)
  614. containers := d.activeContainers()
  615. instances = 4
  616. d.updateService(c, d.getService(c, id), setInstances(instances))
  617. waitAndAssert(c, defaultReconciliationTimeout, d.checkActiveContainerCount, checker.Equals, instances)
  618. containers2 := d.activeContainers()
  619. loop0:
  620. for _, c1 := range containers {
  621. for _, c2 := range containers2 {
  622. if c1 == c2 {
  623. continue loop0
  624. }
  625. }
  626. c.Errorf("container %v not found in new set %#v", c1, containers2)
  627. }
  628. }
  629. func (s *DockerSwarmSuite) TestApiSwarmInvalidAddress(c *check.C) {
  630. d := s.AddDaemon(c, false, false)
  631. req := swarm.InitRequest{
  632. ListenAddr: "",
  633. }
  634. status, _, err := d.SockRequest("POST", "/swarm/init", req)
  635. c.Assert(err, checker.IsNil)
  636. c.Assert(status, checker.Equals, http.StatusInternalServerError)
  637. req2 := swarm.JoinRequest{
  638. ListenAddr: "0.0.0.0:2377",
  639. RemoteAddrs: []string{""},
  640. }
  641. status, _, err = d.SockRequest("POST", "/swarm/join", req2)
  642. c.Assert(err, checker.IsNil)
  643. c.Assert(status, checker.Equals, http.StatusInternalServerError)
  644. }
  645. func (s *DockerSwarmSuite) TestApiSwarmForceNewCluster(c *check.C) {
  646. d1 := s.AddDaemon(c, true, true)
  647. d2 := s.AddDaemon(c, true, true)
  648. instances := 2
  649. id := d1.createService(c, simpleTestService, setInstances(instances))
  650. waitAndAssert(c, defaultReconciliationTimeout, reducedCheck(sumAsIntegers, d1.checkActiveContainerCount, d2.checkActiveContainerCount), checker.Equals, instances)
  651. c.Assert(d2.Stop(), checker.IsNil)
  652. time.Sleep(5 * time.Second)
  653. c.Assert(d1.Init(swarm.InitRequest{
  654. ForceNewCluster: true,
  655. Spec: swarm.Spec{
  656. AcceptancePolicy: autoAcceptPolicy,
  657. },
  658. }), checker.IsNil)
  659. waitAndAssert(c, defaultReconciliationTimeout, d1.checkActiveContainerCount, checker.Equals, instances)
  660. d3 := s.AddDaemon(c, true, true)
  661. info, err := d3.info()
  662. c.Assert(err, checker.IsNil)
  663. c.Assert(info.ControlAvailable, checker.True)
  664. c.Assert(info.LocalNodeState, checker.Equals, swarm.LocalNodeStateActive)
  665. instances = 4
  666. d3.updateService(c, d3.getService(c, id), setInstances(instances))
  667. waitAndAssert(c, defaultReconciliationTimeout, reducedCheck(sumAsIntegers, d1.checkActiveContainerCount, d3.checkActiveContainerCount), checker.Equals, instances)
  668. }
  669. func simpleTestService(s *swarm.Service) {
  670. var ureplicas uint64
  671. ureplicas = 1
  672. s.Spec = swarm.ServiceSpec{
  673. TaskTemplate: swarm.TaskSpec{
  674. ContainerSpec: swarm.ContainerSpec{
  675. Image: "busybox:latest",
  676. Command: []string{"/bin/top"},
  677. },
  678. },
  679. Mode: swarm.ServiceMode{
  680. Replicated: &swarm.ReplicatedService{
  681. Replicas: &ureplicas,
  682. },
  683. },
  684. }
  685. s.Spec.Name = "top"
  686. }
  687. func serviceForUpdate(s *swarm.Service) {
  688. var ureplicas uint64
  689. ureplicas = 1
  690. s.Spec = swarm.ServiceSpec{
  691. TaskTemplate: swarm.TaskSpec{
  692. ContainerSpec: swarm.ContainerSpec{
  693. Image: "busybox:latest",
  694. Command: []string{"/bin/top"},
  695. },
  696. },
  697. Mode: swarm.ServiceMode{
  698. Replicated: &swarm.ReplicatedService{
  699. Replicas: &ureplicas,
  700. },
  701. },
  702. UpdateConfig: &swarm.UpdateConfig{
  703. Parallelism: 2,
  704. Delay: 8 * time.Second,
  705. },
  706. }
  707. s.Spec.Name = "updatetest"
  708. }
  709. func setInstances(replicas int) serviceConstructor {
  710. ureplicas := uint64(replicas)
  711. return func(s *swarm.Service) {
  712. s.Spec.Mode = swarm.ServiceMode{
  713. Replicated: &swarm.ReplicatedService{
  714. Replicas: &ureplicas,
  715. },
  716. }
  717. }
  718. }
  719. func setImage(image string) serviceConstructor {
  720. return func(s *swarm.Service) {
  721. s.Spec.TaskTemplate.ContainerSpec.Image = image
  722. }
  723. }
  724. func setGlobalMode(s *swarm.Service) {
  725. s.Spec.Mode = swarm.ServiceMode{
  726. Global: &swarm.GlobalService{},
  727. }
  728. }
  729. func checkClusterHealth(c *check.C, cl []*SwarmDaemon, managerCount, workerCount int) {
  730. var totalMCount, totalWCount int
  731. for _, d := range cl {
  732. info, err := d.info()
  733. c.Assert(err, check.IsNil)
  734. if !info.ControlAvailable {
  735. totalWCount++
  736. continue
  737. }
  738. var leaderFound bool
  739. totalMCount++
  740. var mCount, wCount int
  741. for _, n := range d.listNodes(c) {
  742. c.Assert(n.Status.State, checker.Equals, swarm.NodeStateReady, check.Commentf("state of node %s, reported by %s", n.ID, d.Info.NodeID))
  743. c.Assert(n.Spec.Availability, checker.Equals, swarm.NodeAvailabilityActive, check.Commentf("availability of node %s, reported by %s", n.ID, d.Info.NodeID))
  744. c.Assert(n.Spec.Membership, checker.Equals, swarm.NodeMembershipAccepted, check.Commentf("membership of node %s, reported by %s", n.ID, d.Info.NodeID))
  745. if n.Spec.Role == swarm.NodeRoleManager {
  746. c.Assert(n.ManagerStatus, checker.NotNil, check.Commentf("manager status of node %s (manager), reported by %s", n.ID, d.Info.NodeID))
  747. if n.ManagerStatus.Leader {
  748. leaderFound = true
  749. }
  750. mCount++
  751. } else {
  752. c.Assert(n.ManagerStatus, checker.IsNil, check.Commentf("manager status of node %s (worker), reported by %s", n.ID, d.Info.NodeID))
  753. wCount++
  754. }
  755. }
  756. c.Assert(leaderFound, checker.True, check.Commentf("lack of leader reported by node %s", info.NodeID))
  757. c.Assert(mCount, checker.Equals, managerCount, check.Commentf("managers count reported by node %s", info.NodeID))
  758. c.Assert(wCount, checker.Equals, workerCount, check.Commentf("workers count reported by node %s", info.NodeID))
  759. }
  760. c.Assert(totalMCount, checker.Equals, managerCount)
  761. c.Assert(totalWCount, checker.Equals, workerCount)
  762. }
  763. func (s *DockerSwarmSuite) TestApiSwarmRestartCluster(c *check.C) {
  764. mCount, wCount := 5, 1
  765. var nodes []*SwarmDaemon
  766. for i := 0; i < mCount; i++ {
  767. manager := s.AddDaemon(c, true, true)
  768. info, err := manager.info()
  769. c.Assert(err, checker.IsNil)
  770. c.Assert(info.ControlAvailable, checker.True)
  771. c.Assert(info.LocalNodeState, checker.Equals, swarm.LocalNodeStateActive)
  772. nodes = append(nodes, manager)
  773. }
  774. for i := 0; i < wCount; i++ {
  775. worker := s.AddDaemon(c, true, false)
  776. info, err := worker.info()
  777. c.Assert(err, checker.IsNil)
  778. c.Assert(info.ControlAvailable, checker.False)
  779. c.Assert(info.LocalNodeState, checker.Equals, swarm.LocalNodeStateActive)
  780. nodes = append(nodes, worker)
  781. }
  782. // stop whole cluster
  783. {
  784. var wg sync.WaitGroup
  785. wg.Add(len(nodes))
  786. errs := make(chan error, len(nodes))
  787. for _, d := range nodes {
  788. go func(daemon *SwarmDaemon) {
  789. defer wg.Done()
  790. if err := daemon.Stop(); err != nil {
  791. errs <- err
  792. }
  793. if root := os.Getenv("DOCKER_REMAP_ROOT"); root != "" {
  794. daemon.root = filepath.Dir(daemon.root)
  795. }
  796. }(d)
  797. }
  798. wg.Wait()
  799. close(errs)
  800. for err := range errs {
  801. c.Assert(err, check.IsNil)
  802. }
  803. }
  804. // start whole cluster
  805. {
  806. var wg sync.WaitGroup
  807. wg.Add(len(nodes))
  808. errs := make(chan error, len(nodes))
  809. for _, d := range nodes {
  810. go func(daemon *SwarmDaemon) {
  811. defer wg.Done()
  812. if err := daemon.Start("--iptables=false"); err != nil {
  813. errs <- err
  814. }
  815. }(d)
  816. }
  817. wg.Wait()
  818. close(errs)
  819. for err := range errs {
  820. c.Assert(err, check.IsNil)
  821. }
  822. }
  823. checkClusterHealth(c, nodes, mCount, wCount)
  824. }