btree_generic.go 35 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083
  1. // Copyright 2014-2022 Google Inc.
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. //go:build go1.18
  15. // +build go1.18
  16. // In Go 1.18 and beyond, a BTreeG generic is created, and BTree is a specific
  17. // instantiation of that generic for the Item interface, with a backwards-
  18. // compatible API. Before go1.18, generics are not supported,
  19. // and BTree is just an implementation based around the Item interface.
  20. // Package btree implements in-memory B-Trees of arbitrary degree.
  21. //
  22. // btree implements an in-memory B-Tree for use as an ordered data structure.
  23. // It is not meant for persistent storage solutions.
  24. //
  25. // It has a flatter structure than an equivalent red-black or other binary tree,
  26. // which in some cases yields better memory usage and/or performance.
  27. // See some discussion on the matter here:
  28. // http://google-opensource.blogspot.com/2013/01/c-containers-that-save-memory-and-time.html
  29. // Note, though, that this project is in no way related to the C++ B-Tree
  30. // implementation written about there.
  31. //
  32. // Within this tree, each node contains a slice of items and a (possibly nil)
  33. // slice of children. For basic numeric values or raw structs, this can cause
  34. // efficiency differences when compared to equivalent C++ template code that
  35. // stores values in arrays within the node:
  36. // * Due to the overhead of storing values as interfaces (each
  37. // value needs to be stored as the value itself, then 2 words for the
  38. // interface pointing to that value and its type), resulting in higher
  39. // memory use.
  40. // * Since interfaces can point to values anywhere in memory, values are
  41. // most likely not stored in contiguous blocks, resulting in a higher
  42. // number of cache misses.
  43. // These issues don't tend to matter, though, when working with strings or other
  44. // heap-allocated structures, since C++-equivalent structures also must store
  45. // pointers and also distribute their values across the heap.
  46. //
  47. // This implementation is designed to be a drop-in replacement to gollrb.LLRB
  48. // trees, (http://github.com/petar/gollrb), an excellent and probably the most
  49. // widely used ordered tree implementation in the Go ecosystem currently.
  50. // Its functions, therefore, exactly mirror those of
  51. // llrb.LLRB where possible. Unlike gollrb, though, we currently don't
  52. // support storing multiple equivalent values.
  53. //
  54. // There are two implementations; those suffixed with 'G' are generics, usable
  55. // for any type, and require a passed-in "less" function to define their ordering.
  56. // Those without this prefix are specific to the 'Item' interface, and use
  57. // its 'Less' function for ordering.
  58. package btree
  59. import (
  60. "fmt"
  61. "io"
  62. "sort"
  63. "strings"
  64. "sync"
  65. )
  66. // Item represents a single object in the tree.
  67. type Item interface {
  68. // Less tests whether the current item is less than the given argument.
  69. //
  70. // This must provide a strict weak ordering.
  71. // If !a.Less(b) && !b.Less(a), we treat this to mean a == b (i.e. we can only
  72. // hold one of either a or b in the tree).
  73. Less(than Item) bool
  74. }
  75. const (
  76. DefaultFreeListSize = 32
  77. )
  78. // FreeListG represents a free list of btree nodes. By default each
  79. // BTree has its own FreeList, but multiple BTrees can share the same
  80. // FreeList, in particular when they're created with Clone.
  81. // Two Btrees using the same freelist are safe for concurrent write access.
  82. type FreeListG[T any] struct {
  83. mu sync.Mutex
  84. freelist []*node[T]
  85. }
  86. // NewFreeListG creates a new free list.
  87. // size is the maximum size of the returned free list.
  88. func NewFreeListG[T any](size int) *FreeListG[T] {
  89. return &FreeListG[T]{freelist: make([]*node[T], 0, size)}
  90. }
  91. func (f *FreeListG[T]) newNode() (n *node[T]) {
  92. f.mu.Lock()
  93. index := len(f.freelist) - 1
  94. if index < 0 {
  95. f.mu.Unlock()
  96. return new(node[T])
  97. }
  98. n = f.freelist[index]
  99. f.freelist[index] = nil
  100. f.freelist = f.freelist[:index]
  101. f.mu.Unlock()
  102. return
  103. }
  104. func (f *FreeListG[T]) freeNode(n *node[T]) (out bool) {
  105. f.mu.Lock()
  106. if len(f.freelist) < cap(f.freelist) {
  107. f.freelist = append(f.freelist, n)
  108. out = true
  109. }
  110. f.mu.Unlock()
  111. return
  112. }
  113. // ItemIteratorG allows callers of {A/De}scend* to iterate in-order over portions of
  114. // the tree. When this function returns false, iteration will stop and the
  115. // associated Ascend* function will immediately return.
  116. type ItemIteratorG[T any] func(item T) bool
  117. // Ordered represents the set of types for which the '<' operator work.
  118. type Ordered interface {
  119. ~int | ~int8 | ~int16 | ~int32 | ~int64 | ~uint | ~uint8 | ~uint16 | ~uint32 | ~uint64 | ~float32 | ~float64 | ~string
  120. }
  121. // Less[T] returns a default LessFunc that uses the '<' operator for types that support it.
  122. func Less[T Ordered]() LessFunc[T] {
  123. return func(a, b T) bool { return a < b }
  124. }
  125. // NewOrderedG creates a new B-Tree for ordered types.
  126. func NewOrderedG[T Ordered](degree int) *BTreeG[T] {
  127. return NewG[T](degree, Less[T]())
  128. }
  129. // NewG creates a new B-Tree with the given degree.
  130. //
  131. // NewG(2), for example, will create a 2-3-4 tree (each node contains 1-3 items
  132. // and 2-4 children).
  133. //
  134. // The passed-in LessFunc determines how objects of type T are ordered.
  135. func NewG[T any](degree int, less LessFunc[T]) *BTreeG[T] {
  136. return NewWithFreeListG(degree, less, NewFreeListG[T](DefaultFreeListSize))
  137. }
  138. // NewWithFreeListG creates a new B-Tree that uses the given node free list.
  139. func NewWithFreeListG[T any](degree int, less LessFunc[T], f *FreeListG[T]) *BTreeG[T] {
  140. if degree <= 1 {
  141. panic("bad degree")
  142. }
  143. return &BTreeG[T]{
  144. degree: degree,
  145. cow: &copyOnWriteContext[T]{freelist: f, less: less},
  146. }
  147. }
  148. // items stores items in a node.
  149. type items[T any] []T
  150. // insertAt inserts a value into the given index, pushing all subsequent values
  151. // forward.
  152. func (s *items[T]) insertAt(index int, item T) {
  153. var zero T
  154. *s = append(*s, zero)
  155. if index < len(*s) {
  156. copy((*s)[index+1:], (*s)[index:])
  157. }
  158. (*s)[index] = item
  159. }
  160. // removeAt removes a value at a given index, pulling all subsequent values
  161. // back.
  162. func (s *items[T]) removeAt(index int) T {
  163. item := (*s)[index]
  164. copy((*s)[index:], (*s)[index+1:])
  165. var zero T
  166. (*s)[len(*s)-1] = zero
  167. *s = (*s)[:len(*s)-1]
  168. return item
  169. }
  170. // pop removes and returns the last element in the list.
  171. func (s *items[T]) pop() (out T) {
  172. index := len(*s) - 1
  173. out = (*s)[index]
  174. var zero T
  175. (*s)[index] = zero
  176. *s = (*s)[:index]
  177. return
  178. }
  179. // truncate truncates this instance at index so that it contains only the
  180. // first index items. index must be less than or equal to length.
  181. func (s *items[T]) truncate(index int) {
  182. var toClear items[T]
  183. *s, toClear = (*s)[:index], (*s)[index:]
  184. var zero T
  185. for i := 0; i < len(toClear); i++ {
  186. toClear[i] = zero
  187. }
  188. }
  189. // find returns the index where the given item should be inserted into this
  190. // list. 'found' is true if the item already exists in the list at the given
  191. // index.
  192. func (s items[T]) find(item T, less func(T, T) bool) (index int, found bool) {
  193. i := sort.Search(len(s), func(i int) bool {
  194. return less(item, s[i])
  195. })
  196. if i > 0 && !less(s[i-1], item) {
  197. return i - 1, true
  198. }
  199. return i, false
  200. }
  201. // node is an internal node in a tree.
  202. //
  203. // It must at all times maintain the invariant that either
  204. // * len(children) == 0, len(items) unconstrained
  205. // * len(children) == len(items) + 1
  206. type node[T any] struct {
  207. items items[T]
  208. children items[*node[T]]
  209. cow *copyOnWriteContext[T]
  210. }
  211. func (n *node[T]) mutableFor(cow *copyOnWriteContext[T]) *node[T] {
  212. if n.cow == cow {
  213. return n
  214. }
  215. out := cow.newNode()
  216. if cap(out.items) >= len(n.items) {
  217. out.items = out.items[:len(n.items)]
  218. } else {
  219. out.items = make(items[T], len(n.items), cap(n.items))
  220. }
  221. copy(out.items, n.items)
  222. // Copy children
  223. if cap(out.children) >= len(n.children) {
  224. out.children = out.children[:len(n.children)]
  225. } else {
  226. out.children = make(items[*node[T]], len(n.children), cap(n.children))
  227. }
  228. copy(out.children, n.children)
  229. return out
  230. }
  231. func (n *node[T]) mutableChild(i int) *node[T] {
  232. c := n.children[i].mutableFor(n.cow)
  233. n.children[i] = c
  234. return c
  235. }
  236. // split splits the given node at the given index. The current node shrinks,
  237. // and this function returns the item that existed at that index and a new node
  238. // containing all items/children after it.
  239. func (n *node[T]) split(i int) (T, *node[T]) {
  240. item := n.items[i]
  241. next := n.cow.newNode()
  242. next.items = append(next.items, n.items[i+1:]...)
  243. n.items.truncate(i)
  244. if len(n.children) > 0 {
  245. next.children = append(next.children, n.children[i+1:]...)
  246. n.children.truncate(i + 1)
  247. }
  248. return item, next
  249. }
  250. // maybeSplitChild checks if a child should be split, and if so splits it.
  251. // Returns whether or not a split occurred.
  252. func (n *node[T]) maybeSplitChild(i, maxItems int) bool {
  253. if len(n.children[i].items) < maxItems {
  254. return false
  255. }
  256. first := n.mutableChild(i)
  257. item, second := first.split(maxItems / 2)
  258. n.items.insertAt(i, item)
  259. n.children.insertAt(i+1, second)
  260. return true
  261. }
  262. // insert inserts an item into the subtree rooted at this node, making sure
  263. // no nodes in the subtree exceed maxItems items. Should an equivalent item be
  264. // be found/replaced by insert, it will be returned.
  265. func (n *node[T]) insert(item T, maxItems int) (_ T, _ bool) {
  266. i, found := n.items.find(item, n.cow.less)
  267. if found {
  268. out := n.items[i]
  269. n.items[i] = item
  270. return out, true
  271. }
  272. if len(n.children) == 0 {
  273. n.items.insertAt(i, item)
  274. return
  275. }
  276. if n.maybeSplitChild(i, maxItems) {
  277. inTree := n.items[i]
  278. switch {
  279. case n.cow.less(item, inTree):
  280. // no change, we want first split node
  281. case n.cow.less(inTree, item):
  282. i++ // we want second split node
  283. default:
  284. out := n.items[i]
  285. n.items[i] = item
  286. return out, true
  287. }
  288. }
  289. return n.mutableChild(i).insert(item, maxItems)
  290. }
  291. // get finds the given key in the subtree and returns it.
  292. func (n *node[T]) get(key T) (_ T, _ bool) {
  293. i, found := n.items.find(key, n.cow.less)
  294. if found {
  295. return n.items[i], true
  296. } else if len(n.children) > 0 {
  297. return n.children[i].get(key)
  298. }
  299. return
  300. }
  301. // min returns the first item in the subtree.
  302. func min[T any](n *node[T]) (_ T, found bool) {
  303. if n == nil {
  304. return
  305. }
  306. for len(n.children) > 0 {
  307. n = n.children[0]
  308. }
  309. if len(n.items) == 0 {
  310. return
  311. }
  312. return n.items[0], true
  313. }
  314. // max returns the last item in the subtree.
  315. func max[T any](n *node[T]) (_ T, found bool) {
  316. if n == nil {
  317. return
  318. }
  319. for len(n.children) > 0 {
  320. n = n.children[len(n.children)-1]
  321. }
  322. if len(n.items) == 0 {
  323. return
  324. }
  325. return n.items[len(n.items)-1], true
  326. }
  327. // toRemove details what item to remove in a node.remove call.
  328. type toRemove int
  329. const (
  330. removeItem toRemove = iota // removes the given item
  331. removeMin // removes smallest item in the subtree
  332. removeMax // removes largest item in the subtree
  333. )
  334. // remove removes an item from the subtree rooted at this node.
  335. func (n *node[T]) remove(item T, minItems int, typ toRemove) (_ T, _ bool) {
  336. var i int
  337. var found bool
  338. switch typ {
  339. case removeMax:
  340. if len(n.children) == 0 {
  341. return n.items.pop(), true
  342. }
  343. i = len(n.items)
  344. case removeMin:
  345. if len(n.children) == 0 {
  346. return n.items.removeAt(0), true
  347. }
  348. i = 0
  349. case removeItem:
  350. i, found = n.items.find(item, n.cow.less)
  351. if len(n.children) == 0 {
  352. if found {
  353. return n.items.removeAt(i), true
  354. }
  355. return
  356. }
  357. default:
  358. panic("invalid type")
  359. }
  360. // If we get to here, we have children.
  361. if len(n.children[i].items) <= minItems {
  362. return n.growChildAndRemove(i, item, minItems, typ)
  363. }
  364. child := n.mutableChild(i)
  365. // Either we had enough items to begin with, or we've done some
  366. // merging/stealing, because we've got enough now and we're ready to return
  367. // stuff.
  368. if found {
  369. // The item exists at index 'i', and the child we've selected can give us a
  370. // predecessor, since if we've gotten here it's got > minItems items in it.
  371. out := n.items[i]
  372. // We use our special-case 'remove' call with typ=maxItem to pull the
  373. // predecessor of item i (the rightmost leaf of our immediate left child)
  374. // and set it into where we pulled the item from.
  375. var zero T
  376. n.items[i], _ = child.remove(zero, minItems, removeMax)
  377. return out, true
  378. }
  379. // Final recursive call. Once we're here, we know that the item isn't in this
  380. // node and that the child is big enough to remove from.
  381. return child.remove(item, minItems, typ)
  382. }
  383. // growChildAndRemove grows child 'i' to make sure it's possible to remove an
  384. // item from it while keeping it at minItems, then calls remove to actually
  385. // remove it.
  386. //
  387. // Most documentation says we have to do two sets of special casing:
  388. // 1) item is in this node
  389. // 2) item is in child
  390. // In both cases, we need to handle the two subcases:
  391. // A) node has enough values that it can spare one
  392. // B) node doesn't have enough values
  393. // For the latter, we have to check:
  394. // a) left sibling has node to spare
  395. // b) right sibling has node to spare
  396. // c) we must merge
  397. // To simplify our code here, we handle cases #1 and #2 the same:
  398. // If a node doesn't have enough items, we make sure it does (using a,b,c).
  399. // We then simply redo our remove call, and the second time (regardless of
  400. // whether we're in case 1 or 2), we'll have enough items and can guarantee
  401. // that we hit case A.
  402. func (n *node[T]) growChildAndRemove(i int, item T, minItems int, typ toRemove) (T, bool) {
  403. if i > 0 && len(n.children[i-1].items) > minItems {
  404. // Steal from left child
  405. child := n.mutableChild(i)
  406. stealFrom := n.mutableChild(i - 1)
  407. stolenItem := stealFrom.items.pop()
  408. child.items.insertAt(0, n.items[i-1])
  409. n.items[i-1] = stolenItem
  410. if len(stealFrom.children) > 0 {
  411. child.children.insertAt(0, stealFrom.children.pop())
  412. }
  413. } else if i < len(n.items) && len(n.children[i+1].items) > minItems {
  414. // steal from right child
  415. child := n.mutableChild(i)
  416. stealFrom := n.mutableChild(i + 1)
  417. stolenItem := stealFrom.items.removeAt(0)
  418. child.items = append(child.items, n.items[i])
  419. n.items[i] = stolenItem
  420. if len(stealFrom.children) > 0 {
  421. child.children = append(child.children, stealFrom.children.removeAt(0))
  422. }
  423. } else {
  424. if i >= len(n.items) {
  425. i--
  426. }
  427. child := n.mutableChild(i)
  428. // merge with right child
  429. mergeItem := n.items.removeAt(i)
  430. mergeChild := n.children.removeAt(i + 1)
  431. child.items = append(child.items, mergeItem)
  432. child.items = append(child.items, mergeChild.items...)
  433. child.children = append(child.children, mergeChild.children...)
  434. n.cow.freeNode(mergeChild)
  435. }
  436. return n.remove(item, minItems, typ)
  437. }
  438. type direction int
  439. const (
  440. descend = direction(-1)
  441. ascend = direction(+1)
  442. )
  443. type optionalItem[T any] struct {
  444. item T
  445. valid bool
  446. }
  447. func optional[T any](item T) optionalItem[T] {
  448. return optionalItem[T]{item: item, valid: true}
  449. }
  450. func empty[T any]() optionalItem[T] {
  451. return optionalItem[T]{}
  452. }
  453. // iterate provides a simple method for iterating over elements in the tree.
  454. //
  455. // When ascending, the 'start' should be less than 'stop' and when descending,
  456. // the 'start' should be greater than 'stop'. Setting 'includeStart' to true
  457. // will force the iterator to include the first item when it equals 'start',
  458. // thus creating a "greaterOrEqual" or "lessThanEqual" rather than just a
  459. // "greaterThan" or "lessThan" queries.
  460. func (n *node[T]) iterate(dir direction, start, stop optionalItem[T], includeStart bool, hit bool, iter ItemIteratorG[T]) (bool, bool) {
  461. var ok, found bool
  462. var index int
  463. switch dir {
  464. case ascend:
  465. if start.valid {
  466. index, _ = n.items.find(start.item, n.cow.less)
  467. }
  468. for i := index; i < len(n.items); i++ {
  469. if len(n.children) > 0 {
  470. if hit, ok = n.children[i].iterate(dir, start, stop, includeStart, hit, iter); !ok {
  471. return hit, false
  472. }
  473. }
  474. if !includeStart && !hit && start.valid && !n.cow.less(start.item, n.items[i]) {
  475. hit = true
  476. continue
  477. }
  478. hit = true
  479. if stop.valid && !n.cow.less(n.items[i], stop.item) {
  480. return hit, false
  481. }
  482. if !iter(n.items[i]) {
  483. return hit, false
  484. }
  485. }
  486. if len(n.children) > 0 {
  487. if hit, ok = n.children[len(n.children)-1].iterate(dir, start, stop, includeStart, hit, iter); !ok {
  488. return hit, false
  489. }
  490. }
  491. case descend:
  492. if start.valid {
  493. index, found = n.items.find(start.item, n.cow.less)
  494. if !found {
  495. index = index - 1
  496. }
  497. } else {
  498. index = len(n.items) - 1
  499. }
  500. for i := index; i >= 0; i-- {
  501. if start.valid && !n.cow.less(n.items[i], start.item) {
  502. if !includeStart || hit || n.cow.less(start.item, n.items[i]) {
  503. continue
  504. }
  505. }
  506. if len(n.children) > 0 {
  507. if hit, ok = n.children[i+1].iterate(dir, start, stop, includeStart, hit, iter); !ok {
  508. return hit, false
  509. }
  510. }
  511. if stop.valid && !n.cow.less(stop.item, n.items[i]) {
  512. return hit, false // continue
  513. }
  514. hit = true
  515. if !iter(n.items[i]) {
  516. return hit, false
  517. }
  518. }
  519. if len(n.children) > 0 {
  520. if hit, ok = n.children[0].iterate(dir, start, stop, includeStart, hit, iter); !ok {
  521. return hit, false
  522. }
  523. }
  524. }
  525. return hit, true
  526. }
  527. // print is used for testing/debugging purposes.
  528. func (n *node[T]) print(w io.Writer, level int) {
  529. fmt.Fprintf(w, "%sNODE:%v\n", strings.Repeat(" ", level), n.items)
  530. for _, c := range n.children {
  531. c.print(w, level+1)
  532. }
  533. }
  534. // BTreeG is a generic implementation of a B-Tree.
  535. //
  536. // BTreeG stores items of type T in an ordered structure, allowing easy insertion,
  537. // removal, and iteration.
  538. //
  539. // Write operations are not safe for concurrent mutation by multiple
  540. // goroutines, but Read operations are.
  541. type BTreeG[T any] struct {
  542. degree int
  543. length int
  544. root *node[T]
  545. cow *copyOnWriteContext[T]
  546. }
  547. // LessFunc[T] determines how to order a type 'T'. It should implement a strict
  548. // ordering, and should return true if within that ordering, 'a' < 'b'.
  549. type LessFunc[T any] func(a, b T) bool
  550. // copyOnWriteContext pointers determine node ownership... a tree with a write
  551. // context equivalent to a node's write context is allowed to modify that node.
  552. // A tree whose write context does not match a node's is not allowed to modify
  553. // it, and must create a new, writable copy (IE: it's a Clone).
  554. //
  555. // When doing any write operation, we maintain the invariant that the current
  556. // node's context is equal to the context of the tree that requested the write.
  557. // We do this by, before we descend into any node, creating a copy with the
  558. // correct context if the contexts don't match.
  559. //
  560. // Since the node we're currently visiting on any write has the requesting
  561. // tree's context, that node is modifiable in place. Children of that node may
  562. // not share context, but before we descend into them, we'll make a mutable
  563. // copy.
  564. type copyOnWriteContext[T any] struct {
  565. freelist *FreeListG[T]
  566. less LessFunc[T]
  567. }
  568. // Clone clones the btree, lazily. Clone should not be called concurrently,
  569. // but the original tree (t) and the new tree (t2) can be used concurrently
  570. // once the Clone call completes.
  571. //
  572. // The internal tree structure of b is marked read-only and shared between t and
  573. // t2. Writes to both t and t2 use copy-on-write logic, creating new nodes
  574. // whenever one of b's original nodes would have been modified. Read operations
  575. // should have no performance degredation. Write operations for both t and t2
  576. // will initially experience minor slow-downs caused by additional allocs and
  577. // copies due to the aforementioned copy-on-write logic, but should converge to
  578. // the original performance characteristics of the original tree.
  579. func (t *BTreeG[T]) Clone() (t2 *BTreeG[T]) {
  580. // Create two entirely new copy-on-write contexts.
  581. // This operation effectively creates three trees:
  582. // the original, shared nodes (old b.cow)
  583. // the new b.cow nodes
  584. // the new out.cow nodes
  585. cow1, cow2 := *t.cow, *t.cow
  586. out := *t
  587. t.cow = &cow1
  588. out.cow = &cow2
  589. return &out
  590. }
  591. // maxItems returns the max number of items to allow per node.
  592. func (t *BTreeG[T]) maxItems() int {
  593. return t.degree*2 - 1
  594. }
  595. // minItems returns the min number of items to allow per node (ignored for the
  596. // root node).
  597. func (t *BTreeG[T]) minItems() int {
  598. return t.degree - 1
  599. }
  600. func (c *copyOnWriteContext[T]) newNode() (n *node[T]) {
  601. n = c.freelist.newNode()
  602. n.cow = c
  603. return
  604. }
  605. type freeType int
  606. const (
  607. ftFreelistFull freeType = iota // node was freed (available for GC, not stored in freelist)
  608. ftStored // node was stored in the freelist for later use
  609. ftNotOwned // node was ignored by COW, since it's owned by another one
  610. )
  611. // freeNode frees a node within a given COW context, if it's owned by that
  612. // context. It returns what happened to the node (see freeType const
  613. // documentation).
  614. func (c *copyOnWriteContext[T]) freeNode(n *node[T]) freeType {
  615. if n.cow == c {
  616. // clear to allow GC
  617. n.items.truncate(0)
  618. n.children.truncate(0)
  619. n.cow = nil
  620. if c.freelist.freeNode(n) {
  621. return ftStored
  622. } else {
  623. return ftFreelistFull
  624. }
  625. } else {
  626. return ftNotOwned
  627. }
  628. }
  629. // ReplaceOrInsert adds the given item to the tree. If an item in the tree
  630. // already equals the given one, it is removed from the tree and returned,
  631. // and the second return value is true. Otherwise, (zeroValue, false)
  632. //
  633. // nil cannot be added to the tree (will panic).
  634. func (t *BTreeG[T]) ReplaceOrInsert(item T) (_ T, _ bool) {
  635. if t.root == nil {
  636. t.root = t.cow.newNode()
  637. t.root.items = append(t.root.items, item)
  638. t.length++
  639. return
  640. } else {
  641. t.root = t.root.mutableFor(t.cow)
  642. if len(t.root.items) >= t.maxItems() {
  643. item2, second := t.root.split(t.maxItems() / 2)
  644. oldroot := t.root
  645. t.root = t.cow.newNode()
  646. t.root.items = append(t.root.items, item2)
  647. t.root.children = append(t.root.children, oldroot, second)
  648. }
  649. }
  650. out, outb := t.root.insert(item, t.maxItems())
  651. if !outb {
  652. t.length++
  653. }
  654. return out, outb
  655. }
  656. // Delete removes an item equal to the passed in item from the tree, returning
  657. // it. If no such item exists, returns (zeroValue, false).
  658. func (t *BTreeG[T]) Delete(item T) (T, bool) {
  659. return t.deleteItem(item, removeItem)
  660. }
  661. // DeleteMin removes the smallest item in the tree and returns it.
  662. // If no such item exists, returns (zeroValue, false).
  663. func (t *BTreeG[T]) DeleteMin() (T, bool) {
  664. var zero T
  665. return t.deleteItem(zero, removeMin)
  666. }
  667. // DeleteMax removes the largest item in the tree and returns it.
  668. // If no such item exists, returns (zeroValue, false).
  669. func (t *BTreeG[T]) DeleteMax() (T, bool) {
  670. var zero T
  671. return t.deleteItem(zero, removeMax)
  672. }
  673. func (t *BTreeG[T]) deleteItem(item T, typ toRemove) (_ T, _ bool) {
  674. if t.root == nil || len(t.root.items) == 0 {
  675. return
  676. }
  677. t.root = t.root.mutableFor(t.cow)
  678. out, outb := t.root.remove(item, t.minItems(), typ)
  679. if len(t.root.items) == 0 && len(t.root.children) > 0 {
  680. oldroot := t.root
  681. t.root = t.root.children[0]
  682. t.cow.freeNode(oldroot)
  683. }
  684. if outb {
  685. t.length--
  686. }
  687. return out, outb
  688. }
  689. // AscendRange calls the iterator for every value in the tree within the range
  690. // [greaterOrEqual, lessThan), until iterator returns false.
  691. func (t *BTreeG[T]) AscendRange(greaterOrEqual, lessThan T, iterator ItemIteratorG[T]) {
  692. if t.root == nil {
  693. return
  694. }
  695. t.root.iterate(ascend, optional[T](greaterOrEqual), optional[T](lessThan), true, false, iterator)
  696. }
  697. // AscendLessThan calls the iterator for every value in the tree within the range
  698. // [first, pivot), until iterator returns false.
  699. func (t *BTreeG[T]) AscendLessThan(pivot T, iterator ItemIteratorG[T]) {
  700. if t.root == nil {
  701. return
  702. }
  703. t.root.iterate(ascend, empty[T](), optional(pivot), false, false, iterator)
  704. }
  705. // AscendGreaterOrEqual calls the iterator for every value in the tree within
  706. // the range [pivot, last], until iterator returns false.
  707. func (t *BTreeG[T]) AscendGreaterOrEqual(pivot T, iterator ItemIteratorG[T]) {
  708. if t.root == nil {
  709. return
  710. }
  711. t.root.iterate(ascend, optional[T](pivot), empty[T](), true, false, iterator)
  712. }
  713. // Ascend calls the iterator for every value in the tree within the range
  714. // [first, last], until iterator returns false.
  715. func (t *BTreeG[T]) Ascend(iterator ItemIteratorG[T]) {
  716. if t.root == nil {
  717. return
  718. }
  719. t.root.iterate(ascend, empty[T](), empty[T](), false, false, iterator)
  720. }
  721. // DescendRange calls the iterator for every value in the tree within the range
  722. // [lessOrEqual, greaterThan), until iterator returns false.
  723. func (t *BTreeG[T]) DescendRange(lessOrEqual, greaterThan T, iterator ItemIteratorG[T]) {
  724. if t.root == nil {
  725. return
  726. }
  727. t.root.iterate(descend, optional[T](lessOrEqual), optional[T](greaterThan), true, false, iterator)
  728. }
  729. // DescendLessOrEqual calls the iterator for every value in the tree within the range
  730. // [pivot, first], until iterator returns false.
  731. func (t *BTreeG[T]) DescendLessOrEqual(pivot T, iterator ItemIteratorG[T]) {
  732. if t.root == nil {
  733. return
  734. }
  735. t.root.iterate(descend, optional[T](pivot), empty[T](), true, false, iterator)
  736. }
  737. // DescendGreaterThan calls the iterator for every value in the tree within
  738. // the range [last, pivot), until iterator returns false.
  739. func (t *BTreeG[T]) DescendGreaterThan(pivot T, iterator ItemIteratorG[T]) {
  740. if t.root == nil {
  741. return
  742. }
  743. t.root.iterate(descend, empty[T](), optional[T](pivot), false, false, iterator)
  744. }
  745. // Descend calls the iterator for every value in the tree within the range
  746. // [last, first], until iterator returns false.
  747. func (t *BTreeG[T]) Descend(iterator ItemIteratorG[T]) {
  748. if t.root == nil {
  749. return
  750. }
  751. t.root.iterate(descend, empty[T](), empty[T](), false, false, iterator)
  752. }
  753. // Get looks for the key item in the tree, returning it. It returns
  754. // (zeroValue, false) if unable to find that item.
  755. func (t *BTreeG[T]) Get(key T) (_ T, _ bool) {
  756. if t.root == nil {
  757. return
  758. }
  759. return t.root.get(key)
  760. }
  761. // Min returns the smallest item in the tree, or (zeroValue, false) if the tree is empty.
  762. func (t *BTreeG[T]) Min() (_ T, _ bool) {
  763. return min(t.root)
  764. }
  765. // Max returns the largest item in the tree, or (zeroValue, false) if the tree is empty.
  766. func (t *BTreeG[T]) Max() (_ T, _ bool) {
  767. return max(t.root)
  768. }
  769. // Has returns true if the given key is in the tree.
  770. func (t *BTreeG[T]) Has(key T) bool {
  771. _, ok := t.Get(key)
  772. return ok
  773. }
  774. // Len returns the number of items currently in the tree.
  775. func (t *BTreeG[T]) Len() int {
  776. return t.length
  777. }
  778. // Clear removes all items from the btree. If addNodesToFreelist is true,
  779. // t's nodes are added to its freelist as part of this call, until the freelist
  780. // is full. Otherwise, the root node is simply dereferenced and the subtree
  781. // left to Go's normal GC processes.
  782. //
  783. // This can be much faster
  784. // than calling Delete on all elements, because that requires finding/removing
  785. // each element in the tree and updating the tree accordingly. It also is
  786. // somewhat faster than creating a new tree to replace the old one, because
  787. // nodes from the old tree are reclaimed into the freelist for use by the new
  788. // one, instead of being lost to the garbage collector.
  789. //
  790. // This call takes:
  791. // O(1): when addNodesToFreelist is false, this is a single operation.
  792. // O(1): when the freelist is already full, it breaks out immediately
  793. // O(freelist size): when the freelist is empty and the nodes are all owned
  794. // by this tree, nodes are added to the freelist until full.
  795. // O(tree size): when all nodes are owned by another tree, all nodes are
  796. // iterated over looking for nodes to add to the freelist, and due to
  797. // ownership, none are.
  798. func (t *BTreeG[T]) Clear(addNodesToFreelist bool) {
  799. if t.root != nil && addNodesToFreelist {
  800. t.root.reset(t.cow)
  801. }
  802. t.root, t.length = nil, 0
  803. }
  804. // reset returns a subtree to the freelist. It breaks out immediately if the
  805. // freelist is full, since the only benefit of iterating is to fill that
  806. // freelist up. Returns true if parent reset call should continue.
  807. func (n *node[T]) reset(c *copyOnWriteContext[T]) bool {
  808. for _, child := range n.children {
  809. if !child.reset(c) {
  810. return false
  811. }
  812. }
  813. return c.freeNode(n) != ftFreelistFull
  814. }
  815. // Int implements the Item interface for integers.
  816. type Int int
  817. // Less returns true if int(a) < int(b).
  818. func (a Int) Less(b Item) bool {
  819. return a < b.(Int)
  820. }
  821. // BTree is an implementation of a B-Tree.
  822. //
  823. // BTree stores Item instances in an ordered structure, allowing easy insertion,
  824. // removal, and iteration.
  825. //
  826. // Write operations are not safe for concurrent mutation by multiple
  827. // goroutines, but Read operations are.
  828. type BTree BTreeG[Item]
  829. var itemLess LessFunc[Item] = func(a, b Item) bool {
  830. return a.Less(b)
  831. }
  832. // New creates a new B-Tree with the given degree.
  833. //
  834. // New(2), for example, will create a 2-3-4 tree (each node contains 1-3 items
  835. // and 2-4 children).
  836. func New(degree int) *BTree {
  837. return (*BTree)(NewG[Item](degree, itemLess))
  838. }
  839. // FreeList represents a free list of btree nodes. By default each
  840. // BTree has its own FreeList, but multiple BTrees can share the same
  841. // FreeList.
  842. // Two Btrees using the same freelist are safe for concurrent write access.
  843. type FreeList FreeListG[Item]
  844. // NewFreeList creates a new free list.
  845. // size is the maximum size of the returned free list.
  846. func NewFreeList(size int) *FreeList {
  847. return (*FreeList)(NewFreeListG[Item](size))
  848. }
  849. // NewWithFreeList creates a new B-Tree that uses the given node free list.
  850. func NewWithFreeList(degree int, f *FreeList) *BTree {
  851. return (*BTree)(NewWithFreeListG[Item](degree, itemLess, (*FreeListG[Item])(f)))
  852. }
  853. // ItemIterator allows callers of Ascend* to iterate in-order over portions of
  854. // the tree. When this function returns false, iteration will stop and the
  855. // associated Ascend* function will immediately return.
  856. type ItemIterator ItemIteratorG[Item]
  857. // Clone clones the btree, lazily. Clone should not be called concurrently,
  858. // but the original tree (t) and the new tree (t2) can be used concurrently
  859. // once the Clone call completes.
  860. //
  861. // The internal tree structure of b is marked read-only and shared between t and
  862. // t2. Writes to both t and t2 use copy-on-write logic, creating new nodes
  863. // whenever one of b's original nodes would have been modified. Read operations
  864. // should have no performance degredation. Write operations for both t and t2
  865. // will initially experience minor slow-downs caused by additional allocs and
  866. // copies due to the aforementioned copy-on-write logic, but should converge to
  867. // the original performance characteristics of the original tree.
  868. func (t *BTree) Clone() (t2 *BTree) {
  869. return (*BTree)((*BTreeG[Item])(t).Clone())
  870. }
  871. // Delete removes an item equal to the passed in item from the tree, returning
  872. // it. If no such item exists, returns nil.
  873. func (t *BTree) Delete(item Item) Item {
  874. i, _ := (*BTreeG[Item])(t).Delete(item)
  875. return i
  876. }
  877. // DeleteMax removes the largest item in the tree and returns it.
  878. // If no such item exists, returns nil.
  879. func (t *BTree) DeleteMax() Item {
  880. i, _ := (*BTreeG[Item])(t).DeleteMax()
  881. return i
  882. }
  883. // DeleteMin removes the smallest item in the tree and returns it.
  884. // If no such item exists, returns nil.
  885. func (t *BTree) DeleteMin() Item {
  886. i, _ := (*BTreeG[Item])(t).DeleteMin()
  887. return i
  888. }
  889. // Get looks for the key item in the tree, returning it. It returns nil if
  890. // unable to find that item.
  891. func (t *BTree) Get(key Item) Item {
  892. i, _ := (*BTreeG[Item])(t).Get(key)
  893. return i
  894. }
  895. // Max returns the largest item in the tree, or nil if the tree is empty.
  896. func (t *BTree) Max() Item {
  897. i, _ := (*BTreeG[Item])(t).Max()
  898. return i
  899. }
  900. // Min returns the smallest item in the tree, or nil if the tree is empty.
  901. func (t *BTree) Min() Item {
  902. i, _ := (*BTreeG[Item])(t).Min()
  903. return i
  904. }
  905. // Has returns true if the given key is in the tree.
  906. func (t *BTree) Has(key Item) bool {
  907. return (*BTreeG[Item])(t).Has(key)
  908. }
  909. // ReplaceOrInsert adds the given item to the tree. If an item in the tree
  910. // already equals the given one, it is removed from the tree and returned.
  911. // Otherwise, nil is returned.
  912. //
  913. // nil cannot be added to the tree (will panic).
  914. func (t *BTree) ReplaceOrInsert(item Item) Item {
  915. i, _ := (*BTreeG[Item])(t).ReplaceOrInsert(item)
  916. return i
  917. }
  918. // AscendRange calls the iterator for every value in the tree within the range
  919. // [greaterOrEqual, lessThan), until iterator returns false.
  920. func (t *BTree) AscendRange(greaterOrEqual, lessThan Item, iterator ItemIterator) {
  921. (*BTreeG[Item])(t).AscendRange(greaterOrEqual, lessThan, (ItemIteratorG[Item])(iterator))
  922. }
  923. // AscendLessThan calls the iterator for every value in the tree within the range
  924. // [first, pivot), until iterator returns false.
  925. func (t *BTree) AscendLessThan(pivot Item, iterator ItemIterator) {
  926. (*BTreeG[Item])(t).AscendLessThan(pivot, (ItemIteratorG[Item])(iterator))
  927. }
  928. // AscendGreaterOrEqual calls the iterator for every value in the tree within
  929. // the range [pivot, last], until iterator returns false.
  930. func (t *BTree) AscendGreaterOrEqual(pivot Item, iterator ItemIterator) {
  931. (*BTreeG[Item])(t).AscendGreaterOrEqual(pivot, (ItemIteratorG[Item])(iterator))
  932. }
  933. // Ascend calls the iterator for every value in the tree within the range
  934. // [first, last], until iterator returns false.
  935. func (t *BTree) Ascend(iterator ItemIterator) {
  936. (*BTreeG[Item])(t).Ascend((ItemIteratorG[Item])(iterator))
  937. }
  938. // DescendRange calls the iterator for every value in the tree within the range
  939. // [lessOrEqual, greaterThan), until iterator returns false.
  940. func (t *BTree) DescendRange(lessOrEqual, greaterThan Item, iterator ItemIterator) {
  941. (*BTreeG[Item])(t).DescendRange(lessOrEqual, greaterThan, (ItemIteratorG[Item])(iterator))
  942. }
  943. // DescendLessOrEqual calls the iterator for every value in the tree within the range
  944. // [pivot, first], until iterator returns false.
  945. func (t *BTree) DescendLessOrEqual(pivot Item, iterator ItemIterator) {
  946. (*BTreeG[Item])(t).DescendLessOrEqual(pivot, (ItemIteratorG[Item])(iterator))
  947. }
  948. // DescendGreaterThan calls the iterator for every value in the tree within
  949. // the range [last, pivot), until iterator returns false.
  950. func (t *BTree) DescendGreaterThan(pivot Item, iterator ItemIterator) {
  951. (*BTreeG[Item])(t).DescendGreaterThan(pivot, (ItemIteratorG[Item])(iterator))
  952. }
  953. // Descend calls the iterator for every value in the tree within the range
  954. // [last, first], until iterator returns false.
  955. func (t *BTree) Descend(iterator ItemIterator) {
  956. (*BTreeG[Item])(t).Descend((ItemIteratorG[Item])(iterator))
  957. }
  958. // Len returns the number of items currently in the tree.
  959. func (t *BTree) Len() int {
  960. return (*BTreeG[Item])(t).Len()
  961. }
  962. // Clear removes all items from the btree. If addNodesToFreelist is true,
  963. // t's nodes are added to its freelist as part of this call, until the freelist
  964. // is full. Otherwise, the root node is simply dereferenced and the subtree
  965. // left to Go's normal GC processes.
  966. //
  967. // This can be much faster
  968. // than calling Delete on all elements, because that requires finding/removing
  969. // each element in the tree and updating the tree accordingly. It also is
  970. // somewhat faster than creating a new tree to replace the old one, because
  971. // nodes from the old tree are reclaimed into the freelist for use by the new
  972. // one, instead of being lost to the garbage collector.
  973. //
  974. // This call takes:
  975. // O(1): when addNodesToFreelist is false, this is a single operation.
  976. // O(1): when the freelist is already full, it breaks out immediately
  977. // O(freelist size): when the freelist is empty and the nodes are all owned
  978. // by this tree, nodes are added to the freelist until full.
  979. // O(tree size): when all nodes are owned by another tree, all nodes are
  980. // iterated over looking for nodes to add to the freelist, and due to
  981. // ownership, none are.
  982. func (t *BTree) Clear(addNodesToFreelist bool) {
  983. (*BTreeG[Item])(t).Clear(addNodesToFreelist)
  984. }