ndbClient.go 29 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855
  1. package dbclient
  2. import (
  3. "context"
  4. "fmt"
  5. "io"
  6. "net"
  7. "net/http"
  8. "os"
  9. "regexp"
  10. "strconv"
  11. "strings"
  12. "time"
  13. "github.com/containerd/log"
  14. )
  15. var servicePort string
  16. const totalWrittenKeys string = "totalKeys"
  17. type resultTuple struct {
  18. id string
  19. result int
  20. }
  21. func httpGetFatalError(ip, port, path string) {
  22. body, err := httpGet(ip, port, path)
  23. if err != nil || !strings.Contains(string(body), "OK") {
  24. log.G(context.TODO()).Fatalf("[%s] error %s %s", path, err, body)
  25. }
  26. }
  27. func httpGet(ip, port, path string) ([]byte, error) {
  28. resp, err := http.Get("http://" + ip + ":" + port + path)
  29. if err != nil {
  30. log.G(context.TODO()).Errorf("httpGet error:%s", err)
  31. return nil, err
  32. }
  33. defer resp.Body.Close()
  34. body, err := io.ReadAll(resp.Body)
  35. return body, err
  36. }
  37. func joinCluster(ip, port string, members []string, doneCh chan resultTuple) {
  38. httpGetFatalError(ip, port, "/join?members="+strings.Join(members, ","))
  39. if doneCh != nil {
  40. doneCh <- resultTuple{id: ip, result: 0}
  41. }
  42. }
  43. func joinNetwork(ip, port, network string, doneCh chan resultTuple) {
  44. httpGetFatalError(ip, port, "/joinnetwork?nid="+network)
  45. if doneCh != nil {
  46. doneCh <- resultTuple{id: ip, result: 0}
  47. }
  48. }
  49. func leaveNetwork(ip, port, network string, doneCh chan resultTuple) {
  50. httpGetFatalError(ip, port, "/leavenetwork?nid="+network)
  51. if doneCh != nil {
  52. doneCh <- resultTuple{id: ip, result: 0}
  53. }
  54. }
  55. func writeTableKey(ip, port, networkName, tableName, key string) {
  56. createPath := "/createentry?unsafe&nid=" + networkName + "&tname=" + tableName + "&value=v&key="
  57. httpGetFatalError(ip, port, createPath+key)
  58. }
  59. func deleteTableKey(ip, port, networkName, tableName, key string) {
  60. deletePath := "/deleteentry?nid=" + networkName + "&tname=" + tableName + "&key="
  61. httpGetFatalError(ip, port, deletePath+key)
  62. }
  63. func clusterPeersNumber(ip, port string, doneCh chan resultTuple) {
  64. body, err := httpGet(ip, port, "/clusterpeers")
  65. if err != nil {
  66. log.G(context.TODO()).Errorf("clusterPeers %s there was an error: %s", ip, err)
  67. doneCh <- resultTuple{id: ip, result: -1}
  68. return
  69. }
  70. peersRegexp := regexp.MustCompile(`total entries: ([0-9]+)`)
  71. peersNum, _ := strconv.Atoi(peersRegexp.FindStringSubmatch(string(body))[1])
  72. doneCh <- resultTuple{id: ip, result: peersNum}
  73. }
  74. func networkPeersNumber(ip, port, networkName string, doneCh chan resultTuple) {
  75. body, err := httpGet(ip, port, "/networkpeers?nid="+networkName)
  76. if err != nil {
  77. log.G(context.TODO()).Errorf("networkPeersNumber %s there was an error: %s", ip, err)
  78. doneCh <- resultTuple{id: ip, result: -1}
  79. return
  80. }
  81. peersRegexp := regexp.MustCompile(`total entries: ([0-9]+)`)
  82. peersNum, _ := strconv.Atoi(peersRegexp.FindStringSubmatch(string(body))[1])
  83. doneCh <- resultTuple{id: ip, result: peersNum}
  84. }
  85. func dbTableEntriesNumber(ip, port, networkName, tableName string, doneCh chan resultTuple) {
  86. body, err := httpGet(ip, port, "/gettable?nid="+networkName+"&tname="+tableName)
  87. if err != nil {
  88. log.G(context.TODO()).Errorf("tableEntriesNumber %s there was an error: %s", ip, err)
  89. doneCh <- resultTuple{id: ip, result: -1}
  90. return
  91. }
  92. elementsRegexp := regexp.MustCompile(`total entries: ([0-9]+)`)
  93. entriesNum, _ := strconv.Atoi(elementsRegexp.FindStringSubmatch(string(body))[1])
  94. doneCh <- resultTuple{id: ip, result: entriesNum}
  95. }
  96. func dbQueueLength(ip, port, networkName string, doneCh chan resultTuple) {
  97. body, err := httpGet(ip, port, "/networkstats?nid="+networkName)
  98. if err != nil {
  99. log.G(context.TODO()).Errorf("queueLength %s there was an error: %s", ip, err)
  100. doneCh <- resultTuple{id: ip, result: -1}
  101. return
  102. }
  103. elementsRegexp := regexp.MustCompile(`qlen: ([0-9]+)`)
  104. entriesNum, _ := strconv.Atoi(elementsRegexp.FindStringSubmatch(string(body))[1])
  105. doneCh <- resultTuple{id: ip, result: entriesNum}
  106. }
  107. func clientWatchTable(ip, port, networkName, tableName string, doneCh chan resultTuple) {
  108. httpGetFatalError(ip, port, "/watchtable?nid="+networkName+"&tname="+tableName)
  109. if doneCh != nil {
  110. doneCh <- resultTuple{id: ip, result: 0}
  111. }
  112. }
  113. func clientTableEntriesNumber(ip, port, networkName, tableName string, doneCh chan resultTuple) {
  114. body, err := httpGet(ip, port, "/watchedtableentries?nid="+networkName+"&tname="+tableName)
  115. if err != nil {
  116. log.G(context.TODO()).Errorf("clientTableEntriesNumber %s there was an error: %s", ip, err)
  117. doneCh <- resultTuple{id: ip, result: -1}
  118. return
  119. }
  120. elementsRegexp := regexp.MustCompile(`total elements: ([0-9]+)`)
  121. entriesNum, _ := strconv.Atoi(elementsRegexp.FindStringSubmatch(string(body))[1])
  122. doneCh <- resultTuple{id: ip, result: entriesNum}
  123. }
  124. func writeKeysNumber(ip, port, networkName, tableName, key string, number int, doneCh chan resultTuple) {
  125. x := 0
  126. for ; x < number; x++ {
  127. k := key + strconv.Itoa(x)
  128. // write key
  129. writeTableKey(ip, port, networkName, tableName, k)
  130. }
  131. doneCh <- resultTuple{id: ip, result: x}
  132. }
  133. func deleteKeysNumber(ip, port, networkName, tableName, key string, number int, doneCh chan resultTuple) {
  134. x := 0
  135. for ; x < number; x++ {
  136. k := key + strconv.Itoa(x)
  137. // write key
  138. deleteTableKey(ip, port, networkName, tableName, k)
  139. }
  140. doneCh <- resultTuple{id: ip, result: x}
  141. }
  142. func writeUniqueKeys(ctx context.Context, ip, port, networkName, tableName, key string, doneCh chan resultTuple) {
  143. for x := 0; ; x++ {
  144. select {
  145. case <-ctx.Done():
  146. doneCh <- resultTuple{id: ip, result: x}
  147. return
  148. default:
  149. k := key + strconv.Itoa(x)
  150. // write key
  151. writeTableKey(ip, port, networkName, tableName, k)
  152. // give time to send out key writes
  153. time.Sleep(100 * time.Millisecond)
  154. }
  155. }
  156. }
  157. func writeDeleteUniqueKeys(ctx context.Context, ip, port, networkName, tableName, key string, doneCh chan resultTuple) {
  158. for x := 0; ; x++ {
  159. select {
  160. case <-ctx.Done():
  161. doneCh <- resultTuple{id: ip, result: x}
  162. return
  163. default:
  164. k := key + strconv.Itoa(x)
  165. // write key
  166. writeTableKey(ip, port, networkName, tableName, k)
  167. // give time to send out key writes
  168. time.Sleep(100 * time.Millisecond)
  169. // delete key
  170. deleteTableKey(ip, port, networkName, tableName, k)
  171. }
  172. }
  173. }
  174. func writeDeleteLeaveJoin(ctx context.Context, ip, port, networkName, tableName, key string, doneCh chan resultTuple) {
  175. for x := 0; ; x++ {
  176. select {
  177. case <-ctx.Done():
  178. doneCh <- resultTuple{id: ip, result: x}
  179. return
  180. default:
  181. k := key + strconv.Itoa(x)
  182. // write key
  183. writeTableKey(ip, port, networkName, tableName, k)
  184. time.Sleep(100 * time.Millisecond)
  185. // delete key
  186. deleteTableKey(ip, port, networkName, tableName, k)
  187. // give some time
  188. time.Sleep(100 * time.Millisecond)
  189. // leave network
  190. leaveNetwork(ip, port, networkName, nil)
  191. // join network
  192. joinNetwork(ip, port, networkName, nil)
  193. }
  194. }
  195. }
  196. func ready(ip, port string, doneCh chan resultTuple) {
  197. for {
  198. body, err := httpGet(ip, port, "/ready")
  199. if err != nil || !strings.Contains(string(body), "OK") {
  200. time.Sleep(500 * time.Millisecond)
  201. continue
  202. }
  203. // success
  204. break
  205. }
  206. // notify the completion
  207. doneCh <- resultTuple{id: ip, result: 0}
  208. }
  209. func checkTable(ctx context.Context, ips []string, port, networkName, tableName string, expectedEntries int, fn func(string, string, string, string, chan resultTuple)) (opTime time.Duration) {
  210. startTime := time.Now().UnixNano()
  211. var successTime int64
  212. // Loop for 2 minutes to guarantee that the result is stable
  213. for {
  214. select {
  215. case <-ctx.Done():
  216. // Validate test success, if the time is set means that all the tables are empty
  217. if successTime != 0 {
  218. opTime = time.Duration(successTime-startTime) / time.Millisecond
  219. log.G(ctx).Infof("Check table passed, the cluster converged in %d msec", opTime)
  220. return
  221. }
  222. log.G(ctx).Fatal("Test failed, there is still entries in the tables of the nodes")
  223. default:
  224. log.G(ctx).Infof("Checking table %s expected %d", tableName, expectedEntries)
  225. doneCh := make(chan resultTuple, len(ips))
  226. for _, ip := range ips {
  227. go fn(ip, servicePort, networkName, tableName, doneCh)
  228. }
  229. nodesWithCorrectEntriesNum := 0
  230. for i := len(ips); i > 0; i-- {
  231. tableEntries := <-doneCh
  232. log.G(ctx).Infof("Node %s has %d entries", tableEntries.id, tableEntries.result)
  233. if tableEntries.result == expectedEntries {
  234. nodesWithCorrectEntriesNum++
  235. }
  236. }
  237. close(doneCh)
  238. if nodesWithCorrectEntriesNum == len(ips) {
  239. if successTime == 0 {
  240. successTime = time.Now().UnixNano()
  241. log.G(ctx).Infof("Success after %d msec", time.Duration(successTime-startTime)/time.Millisecond)
  242. }
  243. } else {
  244. successTime = 0
  245. }
  246. time.Sleep(10 * time.Second)
  247. }
  248. }
  249. }
  250. func waitWriters(parallelWriters int, mustWrite bool, doneCh chan resultTuple) map[string]int {
  251. var totalKeys int
  252. resultTable := make(map[string]int)
  253. for i := 0; i < parallelWriters; i++ {
  254. log.G(context.TODO()).Infof("Waiting for %d workers", parallelWriters-i)
  255. workerReturn := <-doneCh
  256. totalKeys += workerReturn.result
  257. if mustWrite && workerReturn.result == 0 {
  258. log.G(context.TODO()).Fatalf("The worker %s did not write any key %d == 0", workerReturn.id, workerReturn.result)
  259. }
  260. if !mustWrite && workerReturn.result != 0 {
  261. log.G(context.TODO()).Fatalf("The worker %s was supposed to return 0 instead %d != 0", workerReturn.id, workerReturn.result)
  262. }
  263. if mustWrite {
  264. resultTable[workerReturn.id] = workerReturn.result
  265. log.G(context.TODO()).Infof("The worker %s wrote %d keys", workerReturn.id, workerReturn.result)
  266. }
  267. }
  268. resultTable[totalWrittenKeys] = totalKeys
  269. return resultTable
  270. }
  271. // ready
  272. func doReady(ips []string) {
  273. doneCh := make(chan resultTuple, len(ips))
  274. // check all the nodes
  275. for _, ip := range ips {
  276. go ready(ip, servicePort, doneCh)
  277. }
  278. // wait for the readiness of all nodes
  279. for i := len(ips); i > 0; i-- {
  280. <-doneCh
  281. }
  282. close(doneCh)
  283. }
  284. // join
  285. func doJoin(ips []string) {
  286. doneCh := make(chan resultTuple, len(ips))
  287. // check all the nodes
  288. for i, ip := range ips {
  289. members := append([]string(nil), ips[:i]...)
  290. members = append(members, ips[i+1:]...)
  291. go joinCluster(ip, servicePort, members, doneCh)
  292. }
  293. // wait for the readiness of all nodes
  294. for i := len(ips); i > 0; i-- {
  295. <-doneCh
  296. }
  297. close(doneCh)
  298. }
  299. // cluster-peers expectedNumberPeers maxRetry
  300. func doClusterPeers(ips []string, args []string) {
  301. doneCh := make(chan resultTuple, len(ips))
  302. expectedPeers, _ := strconv.Atoi(args[0])
  303. maxRetry, _ := strconv.Atoi(args[1])
  304. for retry := 0; retry < maxRetry; retry++ {
  305. // check all the nodes
  306. for _, ip := range ips {
  307. go clusterPeersNumber(ip, servicePort, doneCh)
  308. }
  309. var failed bool
  310. // wait for the readiness of all nodes
  311. for i := len(ips); i > 0; i-- {
  312. node := <-doneCh
  313. if node.result != expectedPeers {
  314. failed = true
  315. if retry == maxRetry-1 {
  316. log.G(context.TODO()).Fatalf("Expected peers from %s mismatch %d != %d", node.id, expectedPeers, node.result)
  317. } else {
  318. log.G(context.TODO()).Warnf("Expected peers from %s mismatch %d != %d", node.id, expectedPeers, node.result)
  319. }
  320. time.Sleep(1 * time.Second)
  321. }
  322. }
  323. // check if needs retry
  324. if !failed {
  325. break
  326. }
  327. }
  328. close(doneCh)
  329. }
  330. // join-network networkName
  331. func doJoinNetwork(ips []string, args []string) {
  332. doneCh := make(chan resultTuple, len(ips))
  333. // check all the nodes
  334. for _, ip := range ips {
  335. go joinNetwork(ip, servicePort, args[0], doneCh)
  336. }
  337. // wait for the readiness of all nodes
  338. for i := len(ips); i > 0; i-- {
  339. <-doneCh
  340. }
  341. close(doneCh)
  342. }
  343. // leave-network networkName
  344. func doLeaveNetwork(ips []string, args []string) {
  345. doneCh := make(chan resultTuple, len(ips))
  346. // check all the nodes
  347. for _, ip := range ips {
  348. go leaveNetwork(ip, servicePort, args[0], doneCh)
  349. }
  350. // wait for the readiness of all nodes
  351. for i := len(ips); i > 0; i-- {
  352. <-doneCh
  353. }
  354. close(doneCh)
  355. }
  356. // network-peers networkName expectedNumberPeers maxRetry
  357. func doNetworkPeers(ips []string, args []string) {
  358. doneCh := make(chan resultTuple, len(ips))
  359. networkName := args[0]
  360. expectedPeers, _ := strconv.Atoi(args[1])
  361. maxRetry, _ := strconv.Atoi(args[2])
  362. for retry := 0; retry < maxRetry; retry++ {
  363. // check all the nodes
  364. for _, ip := range ips {
  365. go networkPeersNumber(ip, servicePort, networkName, doneCh)
  366. }
  367. var failed bool
  368. // wait for the readiness of all nodes
  369. for i := len(ips); i > 0; i-- {
  370. node := <-doneCh
  371. if node.result != expectedPeers {
  372. failed = true
  373. if retry == maxRetry-1 {
  374. log.G(context.TODO()).Fatalf("Expected peers from %s mismatch %d != %d", node.id, expectedPeers, node.result)
  375. } else {
  376. log.G(context.TODO()).Warnf("Expected peers from %s mismatch %d != %d", node.id, expectedPeers, node.result)
  377. }
  378. time.Sleep(1 * time.Second)
  379. }
  380. }
  381. // check if needs retry
  382. if !failed {
  383. break
  384. }
  385. }
  386. close(doneCh)
  387. }
  388. // network-stats-queue networkName <gt/lt> queueSize
  389. func doNetworkStatsQueue(ips []string, args []string) {
  390. doneCh := make(chan resultTuple, len(ips))
  391. networkName := args[0]
  392. comparison := args[1]
  393. size, _ := strconv.Atoi(args[2])
  394. // check all the nodes
  395. for _, ip := range ips {
  396. go dbQueueLength(ip, servicePort, networkName, doneCh)
  397. }
  398. var avgQueueSize int
  399. // wait for the readiness of all nodes
  400. for i := len(ips); i > 0; i-- {
  401. node := <-doneCh
  402. switch comparison {
  403. case "lt":
  404. if node.result > size {
  405. log.G(context.TODO()).Fatalf("Expected queue size from %s to be %d < %d", node.id, node.result, size)
  406. }
  407. case "gt":
  408. if node.result < size {
  409. log.G(context.TODO()).Fatalf("Expected queue size from %s to be %d > %d", node.id, node.result, size)
  410. }
  411. default:
  412. log.G(context.TODO()).Fatal("unknown comparison operator")
  413. }
  414. avgQueueSize += node.result
  415. }
  416. close(doneCh)
  417. avgQueueSize /= len(ips)
  418. fmt.Fprintf(os.Stderr, "doNetworkStatsQueue succeeded with avg queue:%d", avgQueueSize)
  419. }
  420. // write-keys networkName tableName parallelWriters numberOfKeysEach
  421. func doWriteKeys(ips []string, args []string) {
  422. networkName := args[0]
  423. tableName := args[1]
  424. parallelWriters, _ := strconv.Atoi(args[2])
  425. numberOfKeys, _ := strconv.Atoi(args[3])
  426. doneCh := make(chan resultTuple, parallelWriters)
  427. // Enable watch of tables from clients
  428. for i := 0; i < parallelWriters; i++ {
  429. go clientWatchTable(ips[i], servicePort, networkName, tableName, doneCh)
  430. }
  431. waitWriters(parallelWriters, false, doneCh)
  432. // Start parallel writers that will create and delete unique keys
  433. defer close(doneCh)
  434. for i := 0; i < parallelWriters; i++ {
  435. key := "key-" + strconv.Itoa(i) + "-"
  436. log.G(context.TODO()).Infof("Spawn worker: %d on IP:%s", i, ips[i])
  437. go writeKeysNumber(ips[i], servicePort, networkName, tableName, key, numberOfKeys, doneCh)
  438. }
  439. // Sync with all the writers
  440. keyMap := waitWriters(parallelWriters, true, doneCh)
  441. log.G(context.TODO()).Infof("Written a total of %d keys on the cluster", keyMap[totalWrittenKeys])
  442. // check table entries for 2 minutes
  443. ctx, cancel := context.WithTimeout(context.Background(), 3*time.Minute)
  444. opTime := checkTable(ctx, ips, servicePort, networkName, tableName, keyMap[totalWrittenKeys], dbTableEntriesNumber)
  445. cancel()
  446. fmt.Fprintf(os.Stderr, "doWriteKeys succeeded in %d msec", opTime)
  447. }
  448. // delete-keys networkName tableName parallelWriters numberOfKeysEach
  449. func doDeleteKeys(ips []string, args []string) {
  450. networkName := args[0]
  451. tableName := args[1]
  452. parallelWriters, _ := strconv.Atoi(args[2])
  453. numberOfKeys, _ := strconv.Atoi(args[3])
  454. doneCh := make(chan resultTuple, parallelWriters)
  455. // Enable watch of tables from clients
  456. for i := 0; i < parallelWriters; i++ {
  457. go clientWatchTable(ips[i], servicePort, networkName, tableName, doneCh)
  458. }
  459. waitWriters(parallelWriters, false, doneCh)
  460. // Start parallel writers that will create and delete unique keys
  461. defer close(doneCh)
  462. for i := 0; i < parallelWriters; i++ {
  463. key := "key-" + strconv.Itoa(i) + "-"
  464. log.G(context.TODO()).Infof("Spawn worker: %d on IP:%s", i, ips[i])
  465. go deleteKeysNumber(ips[i], servicePort, networkName, tableName, key, numberOfKeys, doneCh)
  466. }
  467. // Sync with all the writers
  468. keyMap := waitWriters(parallelWriters, true, doneCh)
  469. log.G(context.TODO()).Infof("Written a total of %d keys on the cluster", keyMap[totalWrittenKeys])
  470. // check table entries for 2 minutes
  471. ctx, cancel := context.WithTimeout(context.Background(), 3*time.Minute)
  472. opTime := checkTable(ctx, ips, servicePort, networkName, tableName, 0, dbTableEntriesNumber)
  473. cancel()
  474. fmt.Fprintf(os.Stderr, "doDeletekeys succeeded in %d msec", opTime)
  475. }
  476. // write-delete-unique-keys networkName tableName numParallelWriters writeTimeSec
  477. func doWriteDeleteUniqueKeys(ips []string, args []string) {
  478. networkName := args[0]
  479. tableName := args[1]
  480. parallelWriters, _ := strconv.Atoi(args[2])
  481. writeTimeSec, _ := strconv.Atoi(args[3])
  482. doneCh := make(chan resultTuple, parallelWriters)
  483. // Enable watch of tables from clients
  484. for i := 0; i < parallelWriters; i++ {
  485. go clientWatchTable(ips[i], servicePort, networkName, tableName, doneCh)
  486. }
  487. waitWriters(parallelWriters, false, doneCh)
  488. // Start parallel writers that will create and delete unique keys
  489. ctx, cancel := context.WithTimeout(context.Background(), time.Duration(writeTimeSec)*time.Second)
  490. for i := 0; i < parallelWriters; i++ {
  491. key := "key-" + strconv.Itoa(i) + "-"
  492. log.G(ctx).Infof("Spawn worker: %d on IP:%s", i, ips[i])
  493. go writeDeleteUniqueKeys(ctx, ips[i], servicePort, networkName, tableName, key, doneCh)
  494. }
  495. // Sync with all the writers
  496. keyMap := waitWriters(parallelWriters, true, doneCh)
  497. cancel()
  498. log.G(ctx).Infof("Written a total of %d keys on the cluster", keyMap[totalWrittenKeys])
  499. // check table entries for 2 minutes
  500. ctx, cancel = context.WithTimeout(context.Background(), 2*time.Minute)
  501. opDBTime := checkTable(ctx, ips, servicePort, networkName, tableName, 0, dbTableEntriesNumber)
  502. cancel()
  503. ctx, cancel = context.WithTimeout(context.Background(), 30*time.Second)
  504. opClientTime := checkTable(ctx, ips, servicePort, networkName, tableName, 0, clientTableEntriesNumber)
  505. cancel()
  506. fmt.Fprintf(os.Stderr, "doWriteDeleteUniqueKeys succeeded in %d msec and client %d msec", opDBTime, opClientTime)
  507. }
  508. // write-unique-keys networkName tableName numParallelWriters writeTimeSec
  509. func doWriteUniqueKeys(ips []string, args []string) {
  510. networkName := args[0]
  511. tableName := args[1]
  512. parallelWriters, _ := strconv.Atoi(args[2])
  513. writeTimeSec, _ := strconv.Atoi(args[3])
  514. doneCh := make(chan resultTuple, parallelWriters)
  515. // Enable watch of tables from clients
  516. for i := 0; i < parallelWriters; i++ {
  517. go clientWatchTable(ips[i], servicePort, networkName, tableName, doneCh)
  518. }
  519. waitWriters(parallelWriters, false, doneCh)
  520. // Start parallel writers that will create and delete unique keys
  521. defer close(doneCh)
  522. ctx, cancel := context.WithTimeout(context.Background(), time.Duration(writeTimeSec)*time.Second)
  523. for i := 0; i < parallelWriters; i++ {
  524. key := "key-" + strconv.Itoa(i) + "-"
  525. log.G(ctx).Infof("Spawn worker: %d on IP:%s", i, ips[i])
  526. go writeUniqueKeys(ctx, ips[i], servicePort, networkName, tableName, key, doneCh)
  527. }
  528. // Sync with all the writers
  529. keyMap := waitWriters(parallelWriters, true, doneCh)
  530. cancel()
  531. log.G(ctx).Infof("Written a total of %d keys on the cluster", keyMap[totalWrittenKeys])
  532. // check table entries for 2 minutes
  533. ctx, cancel = context.WithTimeout(context.Background(), 2*time.Minute)
  534. opTime := checkTable(ctx, ips, servicePort, networkName, tableName, keyMap[totalWrittenKeys], dbTableEntriesNumber)
  535. cancel()
  536. fmt.Fprintf(os.Stderr, "doWriteUniqueKeys succeeded in %d msec", opTime)
  537. }
  538. // write-delete-leave-join networkName tableName numParallelWriters writeTimeSec
  539. func doWriteDeleteLeaveJoin(ips []string, args []string) {
  540. networkName := args[0]
  541. tableName := args[1]
  542. parallelWriters, _ := strconv.Atoi(args[2])
  543. writeTimeSec, _ := strconv.Atoi(args[3])
  544. // Start parallel writers that will create and delete unique keys
  545. doneCh := make(chan resultTuple, parallelWriters)
  546. defer close(doneCh)
  547. ctx, cancel := context.WithTimeout(context.Background(), time.Duration(writeTimeSec)*time.Second)
  548. for i := 0; i < parallelWriters; i++ {
  549. key := "key-" + strconv.Itoa(i) + "-"
  550. log.G(ctx).Infof("Spawn worker: %d on IP:%s", i, ips[i])
  551. go writeDeleteLeaveJoin(ctx, ips[i], servicePort, networkName, tableName, key, doneCh)
  552. }
  553. // Sync with all the writers
  554. keyMap := waitWriters(parallelWriters, true, doneCh)
  555. cancel()
  556. log.G(ctx).Infof("Written a total of %d keys on the cluster", keyMap["totalKeys"])
  557. // check table entries for 2 minutes
  558. ctx, cancel = context.WithTimeout(context.Background(), 2*time.Minute)
  559. opTime := checkTable(ctx, ips, servicePort, networkName, tableName, 0, dbTableEntriesNumber)
  560. cancel()
  561. fmt.Fprintf(os.Stderr, "doWriteDeleteLeaveJoin succeeded in %d msec", opTime)
  562. }
  563. // write-delete-wait-leave-join networkName tableName numParallelWriters writeTimeSec
  564. func doWriteDeleteWaitLeaveJoin(ips []string, args []string) {
  565. networkName := args[0]
  566. tableName := args[1]
  567. parallelWriters, _ := strconv.Atoi(args[2])
  568. writeTimeSec, _ := strconv.Atoi(args[3])
  569. // Start parallel writers that will create and delete unique keys
  570. doneCh := make(chan resultTuple, parallelWriters)
  571. defer close(doneCh)
  572. ctx, cancel := context.WithTimeout(context.Background(), time.Duration(writeTimeSec)*time.Second)
  573. for i := 0; i < parallelWriters; i++ {
  574. key := "key-" + strconv.Itoa(i) + "-"
  575. log.G(ctx).Infof("Spawn worker: %d on IP:%s", i, ips[i])
  576. go writeDeleteUniqueKeys(ctx, ips[i], servicePort, networkName, tableName, key, doneCh)
  577. }
  578. // Sync with all the writers
  579. keyMap := waitWriters(parallelWriters, true, doneCh)
  580. cancel()
  581. log.G(ctx).Infof("Written a total of %d keys on the cluster", keyMap[totalWrittenKeys])
  582. // The writers will leave the network
  583. for i := 0; i < parallelWriters; i++ {
  584. log.G(ctx).Infof("worker leaveNetwork: %d on IP:%s", i, ips[i])
  585. go leaveNetwork(ips[i], servicePort, networkName, doneCh)
  586. }
  587. waitWriters(parallelWriters, false, doneCh)
  588. // Give some time
  589. time.Sleep(100 * time.Millisecond)
  590. // The writers will join the network
  591. for i := 0; i < parallelWriters; i++ {
  592. log.G(ctx).Infof("worker joinNetwork: %d on IP:%s", i, ips[i])
  593. go joinNetwork(ips[i], servicePort, networkName, doneCh)
  594. }
  595. waitWriters(parallelWriters, false, doneCh)
  596. // check table entries for 2 minutes
  597. ctx, cancel = context.WithTimeout(context.Background(), 2*time.Minute)
  598. opTime := checkTable(ctx, ips, servicePort, networkName, tableName, 0, dbTableEntriesNumber)
  599. cancel()
  600. fmt.Fprintf(os.Stderr, "doWriteDeleteWaitLeaveJoin succeeded in %d msec", opTime)
  601. }
  602. // write-wait-leave networkName tableName numParallelWriters writeTimeSec
  603. func doWriteWaitLeave(ips []string, args []string) {
  604. networkName := args[0]
  605. tableName := args[1]
  606. parallelWriters, _ := strconv.Atoi(args[2])
  607. writeTimeSec, _ := strconv.Atoi(args[3])
  608. // Start parallel writers that will create and delete unique keys
  609. doneCh := make(chan resultTuple, parallelWriters)
  610. defer close(doneCh)
  611. ctx, cancel := context.WithTimeout(context.Background(), time.Duration(writeTimeSec)*time.Second)
  612. for i := 0; i < parallelWriters; i++ {
  613. key := "key-" + strconv.Itoa(i) + "-"
  614. log.G(ctx).Infof("Spawn worker: %d on IP:%s", i, ips[i])
  615. go writeUniqueKeys(ctx, ips[i], servicePort, networkName, tableName, key, doneCh)
  616. }
  617. // Sync with all the writers
  618. keyMap := waitWriters(parallelWriters, true, doneCh)
  619. cancel()
  620. log.G(ctx).Infof("Written a total of %d keys on the cluster", keyMap[totalWrittenKeys])
  621. // The writers will leave the network
  622. for i := 0; i < parallelWriters; i++ {
  623. log.G(ctx).Infof("worker leaveNetwork: %d on IP:%s", i, ips[i])
  624. go leaveNetwork(ips[i], servicePort, networkName, doneCh)
  625. }
  626. waitWriters(parallelWriters, false, doneCh)
  627. // check table entries for 2 minutes
  628. ctx, cancel = context.WithTimeout(context.Background(), 2*time.Minute)
  629. opTime := checkTable(ctx, ips, servicePort, networkName, tableName, 0, dbTableEntriesNumber)
  630. cancel()
  631. fmt.Fprintf(os.Stderr, "doWriteLeaveJoin succeeded in %d msec", opTime)
  632. }
  633. // write-wait-leave-join networkName tableName numParallelWriters writeTimeSec numParallelLeaver
  634. func doWriteWaitLeaveJoin(ips []string, args []string) {
  635. networkName := args[0]
  636. tableName := args[1]
  637. parallelWriters, _ := strconv.Atoi(args[2])
  638. writeTimeSec, _ := strconv.Atoi(args[3])
  639. parallelLeaver, _ := strconv.Atoi(args[4])
  640. // Start parallel writers that will create and delete unique keys
  641. doneCh := make(chan resultTuple, parallelWriters)
  642. defer close(doneCh)
  643. ctx, cancel := context.WithTimeout(context.Background(), time.Duration(writeTimeSec)*time.Second)
  644. for i := 0; i < parallelWriters; i++ {
  645. key := "key-" + strconv.Itoa(i) + "-"
  646. log.G(ctx).Infof("Spawn worker: %d on IP:%s", i, ips[i])
  647. go writeUniqueKeys(ctx, ips[i], servicePort, networkName, tableName, key, doneCh)
  648. }
  649. // Sync with all the writers
  650. keyMap := waitWriters(parallelWriters, true, doneCh)
  651. cancel()
  652. log.G(ctx).Infof("Written a total of %d keys on the cluster", keyMap[totalWrittenKeys])
  653. keysExpected := keyMap[totalWrittenKeys]
  654. // The Leavers will leave the network
  655. for i := 0; i < parallelLeaver; i++ {
  656. log.G(ctx).Infof("worker leaveNetwork: %d on IP:%s", i, ips[i])
  657. go leaveNetwork(ips[i], servicePort, networkName, doneCh)
  658. // Once a node leave all the keys written previously will be deleted, so the expected keys will consider that as removed
  659. keysExpected -= keyMap[ips[i]]
  660. }
  661. waitWriters(parallelLeaver, false, doneCh)
  662. // Give some time
  663. time.Sleep(100 * time.Millisecond)
  664. // The writers will join the network
  665. for i := 0; i < parallelLeaver; i++ {
  666. log.G(ctx).Infof("worker joinNetwork: %d on IP:%s", i, ips[i])
  667. go joinNetwork(ips[i], servicePort, networkName, doneCh)
  668. }
  669. waitWriters(parallelLeaver, false, doneCh)
  670. // check table entries for 2 minutes
  671. ctx, cancel = context.WithTimeout(context.Background(), 2*time.Minute)
  672. opTime := checkTable(ctx, ips, servicePort, networkName, tableName, keysExpected, dbTableEntriesNumber)
  673. cancel()
  674. fmt.Fprintf(os.Stderr, "doWriteWaitLeaveJoin succeeded in %d msec", opTime)
  675. }
  676. var cmdArgChec = map[string]int{
  677. "debug": 0,
  678. "fail": 0,
  679. "ready": 2,
  680. "join": 2,
  681. "leave": 2,
  682. "join-network": 3,
  683. "leave-network": 3,
  684. "cluster-peers": 5,
  685. "network-peers": 5,
  686. "write-delete-unique-keys": 7,
  687. }
  688. // Client is a client
  689. func Client(args []string) {
  690. log.G(context.TODO()).Infof("[CLIENT] Starting with arguments %v", args)
  691. command := args[0]
  692. if len(args) < cmdArgChec[command] {
  693. log.G(context.TODO()).Fatalf("Command %s requires %d arguments, passed %d, aborting...", command, cmdArgChec[command], len(args))
  694. }
  695. switch command {
  696. case "debug":
  697. time.Sleep(1 * time.Hour)
  698. os.Exit(0)
  699. case "fail":
  700. log.G(context.TODO()).Fatalf("Test error condition with message: error error error")
  701. }
  702. serviceName := args[1]
  703. ips, _ := net.LookupHost("tasks." + serviceName)
  704. log.G(context.TODO()).Infof("got the ips %v", ips)
  705. if len(ips) == 0 {
  706. log.G(context.TODO()).Fatalf("Cannot resolve any IP for the service tasks.%s", serviceName)
  707. }
  708. servicePort = args[2]
  709. commandArgs := args[3:]
  710. log.G(context.TODO()).Infof("Executing %s with args:%v", command, commandArgs)
  711. switch command {
  712. case "ready":
  713. doReady(ips)
  714. case "join":
  715. doJoin(ips)
  716. case "leave":
  717. case "cluster-peers":
  718. // cluster-peers maxRetry
  719. doClusterPeers(ips, commandArgs)
  720. case "join-network":
  721. // join-network networkName
  722. doJoinNetwork(ips, commandArgs)
  723. case "leave-network":
  724. // leave-network networkName
  725. doLeaveNetwork(ips, commandArgs)
  726. case "network-peers":
  727. // network-peers networkName expectedNumberPeers maxRetry
  728. doNetworkPeers(ips, commandArgs)
  729. // case "network-stats-entries":
  730. // // network-stats-entries networkName maxRetry
  731. // doNetworkPeers(ips, commandArgs)
  732. case "network-stats-queue":
  733. // network-stats-queue networkName <lt/gt> queueSize
  734. doNetworkStatsQueue(ips, commandArgs)
  735. case "write-keys":
  736. // write-keys networkName tableName parallelWriters numberOfKeysEach
  737. doWriteKeys(ips, commandArgs)
  738. case "delete-keys":
  739. // delete-keys networkName tableName parallelWriters numberOfKeysEach
  740. doDeleteKeys(ips, commandArgs)
  741. case "write-unique-keys":
  742. // write-delete-unique-keys networkName tableName numParallelWriters writeTimeSec
  743. doWriteUniqueKeys(ips, commandArgs)
  744. case "write-delete-unique-keys":
  745. // write-delete-unique-keys networkName tableName numParallelWriters writeTimeSec
  746. doWriteDeleteUniqueKeys(ips, commandArgs)
  747. case "write-delete-leave-join":
  748. // write-delete-leave-join networkName tableName numParallelWriters writeTimeSec
  749. doWriteDeleteLeaveJoin(ips, commandArgs)
  750. case "write-delete-wait-leave-join":
  751. // write-delete-wait-leave-join networkName tableName numParallelWriters writeTimeSec
  752. doWriteDeleteWaitLeaveJoin(ips, commandArgs)
  753. case "write-wait-leave":
  754. // write-wait-leave networkName tableName numParallelWriters writeTimeSec
  755. doWriteWaitLeave(ips, commandArgs)
  756. case "write-wait-leave-join":
  757. // write-wait-leave networkName tableName numParallelWriters writeTimeSec
  758. doWriteWaitLeaveJoin(ips, commandArgs)
  759. default:
  760. log.G(context.TODO()).Fatalf("Command %s not recognized", command)
  761. }
  762. }