enc_fast.go 23 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898
  1. // Copyright 2019+ Klaus Post. All rights reserved.
  2. // License information can be found in the LICENSE file.
  3. // Based on work by Yann Collet, released under BSD License.
  4. package zstd
  5. import (
  6. "fmt"
  7. )
  8. const (
  9. tableBits = 15 // Bits used in the table
  10. tableSize = 1 << tableBits // Size of the table
  11. tableShardCnt = 1 << (tableBits - dictShardBits) // Number of shards in the table
  12. tableShardSize = tableSize / tableShardCnt // Size of an individual shard
  13. tableFastHashLen = 6
  14. tableMask = tableSize - 1 // Mask for table indices. Redundant, but can eliminate bounds checks.
  15. maxMatchLength = 131074
  16. )
  17. type tableEntry struct {
  18. val uint32
  19. offset int32
  20. }
  21. type fastEncoder struct {
  22. fastBase
  23. table [tableSize]tableEntry
  24. }
  25. type fastEncoderDict struct {
  26. fastEncoder
  27. dictTable []tableEntry
  28. tableShardDirty [tableShardCnt]bool
  29. allDirty bool
  30. }
  31. // Encode mimmics functionality in zstd_fast.c
  32. func (e *fastEncoder) Encode(blk *blockEnc, src []byte) {
  33. const (
  34. inputMargin = 8
  35. minNonLiteralBlockSize = 1 + 1 + inputMargin
  36. )
  37. // Protect against e.cur wraparound.
  38. for e.cur >= e.bufferReset-int32(len(e.hist)) {
  39. if len(e.hist) == 0 {
  40. for i := range e.table[:] {
  41. e.table[i] = tableEntry{}
  42. }
  43. e.cur = e.maxMatchOff
  44. break
  45. }
  46. // Shift down everything in the table that isn't already too far away.
  47. minOff := e.cur + int32(len(e.hist)) - e.maxMatchOff
  48. for i := range e.table[:] {
  49. v := e.table[i].offset
  50. if v < minOff {
  51. v = 0
  52. } else {
  53. v = v - e.cur + e.maxMatchOff
  54. }
  55. e.table[i].offset = v
  56. }
  57. e.cur = e.maxMatchOff
  58. break
  59. }
  60. s := e.addBlock(src)
  61. blk.size = len(src)
  62. if len(src) < minNonLiteralBlockSize {
  63. blk.extraLits = len(src)
  64. blk.literals = blk.literals[:len(src)]
  65. copy(blk.literals, src)
  66. return
  67. }
  68. // Override src
  69. src = e.hist
  70. sLimit := int32(len(src)) - inputMargin
  71. // stepSize is the number of bytes to skip on every main loop iteration.
  72. // It should be >= 2.
  73. const stepSize = 2
  74. // TEMPLATE
  75. const hashLog = tableBits
  76. // seems global, but would be nice to tweak.
  77. const kSearchStrength = 6
  78. // nextEmit is where in src the next emitLiteral should start from.
  79. nextEmit := s
  80. cv := load6432(src, s)
  81. // Relative offsets
  82. offset1 := int32(blk.recentOffsets[0])
  83. offset2 := int32(blk.recentOffsets[1])
  84. addLiterals := func(s *seq, until int32) {
  85. if until == nextEmit {
  86. return
  87. }
  88. blk.literals = append(blk.literals, src[nextEmit:until]...)
  89. s.litLen = uint32(until - nextEmit)
  90. }
  91. if debugEncoder {
  92. println("recent offsets:", blk.recentOffsets)
  93. }
  94. encodeLoop:
  95. for {
  96. // t will contain the match offset when we find one.
  97. // When existing the search loop, we have already checked 4 bytes.
  98. var t int32
  99. // We will not use repeat offsets across blocks.
  100. // By not using them for the first 3 matches
  101. canRepeat := len(blk.sequences) > 2
  102. for {
  103. if debugAsserts && canRepeat && offset1 == 0 {
  104. panic("offset0 was 0")
  105. }
  106. nextHash := hashLen(cv, hashLog, tableFastHashLen)
  107. nextHash2 := hashLen(cv>>8, hashLog, tableFastHashLen)
  108. candidate := e.table[nextHash]
  109. candidate2 := e.table[nextHash2]
  110. repIndex := s - offset1 + 2
  111. e.table[nextHash] = tableEntry{offset: s + e.cur, val: uint32(cv)}
  112. e.table[nextHash2] = tableEntry{offset: s + e.cur + 1, val: uint32(cv >> 8)}
  113. if canRepeat && repIndex >= 0 && load3232(src, repIndex) == uint32(cv>>16) {
  114. // Consider history as well.
  115. var seq seq
  116. var length int32
  117. length = 4 + e.matchlen(s+6, repIndex+4, src)
  118. seq.matchLen = uint32(length - zstdMinMatch)
  119. // We might be able to match backwards.
  120. // Extend as long as we can.
  121. start := s + 2
  122. // We end the search early, so we don't risk 0 literals
  123. // and have to do special offset treatment.
  124. startLimit := nextEmit + 1
  125. sMin := s - e.maxMatchOff
  126. if sMin < 0 {
  127. sMin = 0
  128. }
  129. for repIndex > sMin && start > startLimit && src[repIndex-1] == src[start-1] && seq.matchLen < maxMatchLength-zstdMinMatch {
  130. repIndex--
  131. start--
  132. seq.matchLen++
  133. }
  134. addLiterals(&seq, start)
  135. // rep 0
  136. seq.offset = 1
  137. if debugSequences {
  138. println("repeat sequence", seq, "next s:", s)
  139. }
  140. blk.sequences = append(blk.sequences, seq)
  141. s += length + 2
  142. nextEmit = s
  143. if s >= sLimit {
  144. if debugEncoder {
  145. println("repeat ended", s, length)
  146. }
  147. break encodeLoop
  148. }
  149. cv = load6432(src, s)
  150. continue
  151. }
  152. coffset0 := s - (candidate.offset - e.cur)
  153. coffset1 := s - (candidate2.offset - e.cur) + 1
  154. if coffset0 < e.maxMatchOff && uint32(cv) == candidate.val {
  155. // found a regular match
  156. t = candidate.offset - e.cur
  157. if debugAsserts && s <= t {
  158. panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
  159. }
  160. if debugAsserts && s-t > e.maxMatchOff {
  161. panic("s - t >e.maxMatchOff")
  162. }
  163. break
  164. }
  165. if coffset1 < e.maxMatchOff && uint32(cv>>8) == candidate2.val {
  166. // found a regular match
  167. t = candidate2.offset - e.cur
  168. s++
  169. if debugAsserts && s <= t {
  170. panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
  171. }
  172. if debugAsserts && s-t > e.maxMatchOff {
  173. panic("s - t >e.maxMatchOff")
  174. }
  175. if debugAsserts && t < 0 {
  176. panic("t<0")
  177. }
  178. break
  179. }
  180. s += stepSize + ((s - nextEmit) >> (kSearchStrength - 1))
  181. if s >= sLimit {
  182. break encodeLoop
  183. }
  184. cv = load6432(src, s)
  185. }
  186. // A 4-byte match has been found. We'll later see if more than 4 bytes.
  187. offset2 = offset1
  188. offset1 = s - t
  189. if debugAsserts && s <= t {
  190. panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
  191. }
  192. if debugAsserts && canRepeat && int(offset1) > len(src) {
  193. panic("invalid offset")
  194. }
  195. // Extend the 4-byte match as long as possible.
  196. l := e.matchlen(s+4, t+4, src) + 4
  197. // Extend backwards
  198. tMin := s - e.maxMatchOff
  199. if tMin < 0 {
  200. tMin = 0
  201. }
  202. for t > tMin && s > nextEmit && src[t-1] == src[s-1] && l < maxMatchLength {
  203. s--
  204. t--
  205. l++
  206. }
  207. // Write our sequence.
  208. var seq seq
  209. seq.litLen = uint32(s - nextEmit)
  210. seq.matchLen = uint32(l - zstdMinMatch)
  211. if seq.litLen > 0 {
  212. blk.literals = append(blk.literals, src[nextEmit:s]...)
  213. }
  214. // Don't use repeat offsets
  215. seq.offset = uint32(s-t) + 3
  216. s += l
  217. if debugSequences {
  218. println("sequence", seq, "next s:", s)
  219. }
  220. blk.sequences = append(blk.sequences, seq)
  221. nextEmit = s
  222. if s >= sLimit {
  223. break encodeLoop
  224. }
  225. cv = load6432(src, s)
  226. // Check offset 2
  227. if o2 := s - offset2; canRepeat && load3232(src, o2) == uint32(cv) {
  228. // We have at least 4 byte match.
  229. // No need to check backwards. We come straight from a match
  230. l := 4 + e.matchlen(s+4, o2+4, src)
  231. // Store this, since we have it.
  232. nextHash := hashLen(cv, hashLog, tableFastHashLen)
  233. e.table[nextHash] = tableEntry{offset: s + e.cur, val: uint32(cv)}
  234. seq.matchLen = uint32(l) - zstdMinMatch
  235. seq.litLen = 0
  236. // Since litlen is always 0, this is offset 1.
  237. seq.offset = 1
  238. s += l
  239. nextEmit = s
  240. if debugSequences {
  241. println("sequence", seq, "next s:", s)
  242. }
  243. blk.sequences = append(blk.sequences, seq)
  244. // Swap offset 1 and 2.
  245. offset1, offset2 = offset2, offset1
  246. if s >= sLimit {
  247. break encodeLoop
  248. }
  249. // Prepare next loop.
  250. cv = load6432(src, s)
  251. }
  252. }
  253. if int(nextEmit) < len(src) {
  254. blk.literals = append(blk.literals, src[nextEmit:]...)
  255. blk.extraLits = len(src) - int(nextEmit)
  256. }
  257. blk.recentOffsets[0] = uint32(offset1)
  258. blk.recentOffsets[1] = uint32(offset2)
  259. if debugEncoder {
  260. println("returning, recent offsets:", blk.recentOffsets, "extra literals:", blk.extraLits)
  261. }
  262. }
  263. // EncodeNoHist will encode a block with no history and no following blocks.
  264. // Most notable difference is that src will not be copied for history and
  265. // we do not need to check for max match length.
  266. func (e *fastEncoder) EncodeNoHist(blk *blockEnc, src []byte) {
  267. const (
  268. inputMargin = 8
  269. minNonLiteralBlockSize = 1 + 1 + inputMargin
  270. )
  271. if debugEncoder {
  272. if len(src) > maxCompressedBlockSize {
  273. panic("src too big")
  274. }
  275. }
  276. // Protect against e.cur wraparound.
  277. if e.cur >= e.bufferReset {
  278. for i := range e.table[:] {
  279. e.table[i] = tableEntry{}
  280. }
  281. e.cur = e.maxMatchOff
  282. }
  283. s := int32(0)
  284. blk.size = len(src)
  285. if len(src) < minNonLiteralBlockSize {
  286. blk.extraLits = len(src)
  287. blk.literals = blk.literals[:len(src)]
  288. copy(blk.literals, src)
  289. return
  290. }
  291. sLimit := int32(len(src)) - inputMargin
  292. // stepSize is the number of bytes to skip on every main loop iteration.
  293. // It should be >= 2.
  294. const stepSize = 2
  295. // TEMPLATE
  296. const hashLog = tableBits
  297. // seems global, but would be nice to tweak.
  298. const kSearchStrength = 6
  299. // nextEmit is where in src the next emitLiteral should start from.
  300. nextEmit := s
  301. cv := load6432(src, s)
  302. // Relative offsets
  303. offset1 := int32(blk.recentOffsets[0])
  304. offset2 := int32(blk.recentOffsets[1])
  305. addLiterals := func(s *seq, until int32) {
  306. if until == nextEmit {
  307. return
  308. }
  309. blk.literals = append(blk.literals, src[nextEmit:until]...)
  310. s.litLen = uint32(until - nextEmit)
  311. }
  312. if debugEncoder {
  313. println("recent offsets:", blk.recentOffsets)
  314. }
  315. encodeLoop:
  316. for {
  317. // t will contain the match offset when we find one.
  318. // When existing the search loop, we have already checked 4 bytes.
  319. var t int32
  320. // We will not use repeat offsets across blocks.
  321. // By not using them for the first 3 matches
  322. for {
  323. nextHash := hashLen(cv, hashLog, tableFastHashLen)
  324. nextHash2 := hashLen(cv>>8, hashLog, tableFastHashLen)
  325. candidate := e.table[nextHash]
  326. candidate2 := e.table[nextHash2]
  327. repIndex := s - offset1 + 2
  328. e.table[nextHash] = tableEntry{offset: s + e.cur, val: uint32(cv)}
  329. e.table[nextHash2] = tableEntry{offset: s + e.cur + 1, val: uint32(cv >> 8)}
  330. if len(blk.sequences) > 2 && load3232(src, repIndex) == uint32(cv>>16) {
  331. // Consider history as well.
  332. var seq seq
  333. length := 4 + e.matchlen(s+6, repIndex+4, src)
  334. seq.matchLen = uint32(length - zstdMinMatch)
  335. // We might be able to match backwards.
  336. // Extend as long as we can.
  337. start := s + 2
  338. // We end the search early, so we don't risk 0 literals
  339. // and have to do special offset treatment.
  340. startLimit := nextEmit + 1
  341. sMin := s - e.maxMatchOff
  342. if sMin < 0 {
  343. sMin = 0
  344. }
  345. for repIndex > sMin && start > startLimit && src[repIndex-1] == src[start-1] {
  346. repIndex--
  347. start--
  348. seq.matchLen++
  349. }
  350. addLiterals(&seq, start)
  351. // rep 0
  352. seq.offset = 1
  353. if debugSequences {
  354. println("repeat sequence", seq, "next s:", s)
  355. }
  356. blk.sequences = append(blk.sequences, seq)
  357. s += length + 2
  358. nextEmit = s
  359. if s >= sLimit {
  360. if debugEncoder {
  361. println("repeat ended", s, length)
  362. }
  363. break encodeLoop
  364. }
  365. cv = load6432(src, s)
  366. continue
  367. }
  368. coffset0 := s - (candidate.offset - e.cur)
  369. coffset1 := s - (candidate2.offset - e.cur) + 1
  370. if coffset0 < e.maxMatchOff && uint32(cv) == candidate.val {
  371. // found a regular match
  372. t = candidate.offset - e.cur
  373. if debugAsserts && s <= t {
  374. panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
  375. }
  376. if debugAsserts && s-t > e.maxMatchOff {
  377. panic("s - t >e.maxMatchOff")
  378. }
  379. if debugAsserts && t < 0 {
  380. panic(fmt.Sprintf("t (%d) < 0, candidate.offset: %d, e.cur: %d, coffset0: %d, e.maxMatchOff: %d", t, candidate.offset, e.cur, coffset0, e.maxMatchOff))
  381. }
  382. break
  383. }
  384. if coffset1 < e.maxMatchOff && uint32(cv>>8) == candidate2.val {
  385. // found a regular match
  386. t = candidate2.offset - e.cur
  387. s++
  388. if debugAsserts && s <= t {
  389. panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
  390. }
  391. if debugAsserts && s-t > e.maxMatchOff {
  392. panic("s - t >e.maxMatchOff")
  393. }
  394. if debugAsserts && t < 0 {
  395. panic("t<0")
  396. }
  397. break
  398. }
  399. s += stepSize + ((s - nextEmit) >> (kSearchStrength - 1))
  400. if s >= sLimit {
  401. break encodeLoop
  402. }
  403. cv = load6432(src, s)
  404. }
  405. // A 4-byte match has been found. We'll later see if more than 4 bytes.
  406. offset2 = offset1
  407. offset1 = s - t
  408. if debugAsserts && s <= t {
  409. panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
  410. }
  411. if debugAsserts && t < 0 {
  412. panic(fmt.Sprintf("t (%d) < 0 ", t))
  413. }
  414. // Extend the 4-byte match as long as possible.
  415. l := e.matchlen(s+4, t+4, src) + 4
  416. // Extend backwards
  417. tMin := s - e.maxMatchOff
  418. if tMin < 0 {
  419. tMin = 0
  420. }
  421. for t > tMin && s > nextEmit && src[t-1] == src[s-1] {
  422. s--
  423. t--
  424. l++
  425. }
  426. // Write our sequence.
  427. var seq seq
  428. seq.litLen = uint32(s - nextEmit)
  429. seq.matchLen = uint32(l - zstdMinMatch)
  430. if seq.litLen > 0 {
  431. blk.literals = append(blk.literals, src[nextEmit:s]...)
  432. }
  433. // Don't use repeat offsets
  434. seq.offset = uint32(s-t) + 3
  435. s += l
  436. if debugSequences {
  437. println("sequence", seq, "next s:", s)
  438. }
  439. blk.sequences = append(blk.sequences, seq)
  440. nextEmit = s
  441. if s >= sLimit {
  442. break encodeLoop
  443. }
  444. cv = load6432(src, s)
  445. // Check offset 2
  446. if o2 := s - offset2; len(blk.sequences) > 2 && load3232(src, o2) == uint32(cv) {
  447. // We have at least 4 byte match.
  448. // No need to check backwards. We come straight from a match
  449. l := 4 + e.matchlen(s+4, o2+4, src)
  450. // Store this, since we have it.
  451. nextHash := hashLen(cv, hashLog, tableFastHashLen)
  452. e.table[nextHash] = tableEntry{offset: s + e.cur, val: uint32(cv)}
  453. seq.matchLen = uint32(l) - zstdMinMatch
  454. seq.litLen = 0
  455. // Since litlen is always 0, this is offset 1.
  456. seq.offset = 1
  457. s += l
  458. nextEmit = s
  459. if debugSequences {
  460. println("sequence", seq, "next s:", s)
  461. }
  462. blk.sequences = append(blk.sequences, seq)
  463. // Swap offset 1 and 2.
  464. offset1, offset2 = offset2, offset1
  465. if s >= sLimit {
  466. break encodeLoop
  467. }
  468. // Prepare next loop.
  469. cv = load6432(src, s)
  470. }
  471. }
  472. if int(nextEmit) < len(src) {
  473. blk.literals = append(blk.literals, src[nextEmit:]...)
  474. blk.extraLits = len(src) - int(nextEmit)
  475. }
  476. if debugEncoder {
  477. println("returning, recent offsets:", blk.recentOffsets, "extra literals:", blk.extraLits)
  478. }
  479. // We do not store history, so we must offset e.cur to avoid false matches for next user.
  480. if e.cur < e.bufferReset {
  481. e.cur += int32(len(src))
  482. }
  483. }
  484. // Encode will encode the content, with a dictionary if initialized for it.
  485. func (e *fastEncoderDict) Encode(blk *blockEnc, src []byte) {
  486. const (
  487. inputMargin = 8
  488. minNonLiteralBlockSize = 1 + 1 + inputMargin
  489. )
  490. if e.allDirty || len(src) > 32<<10 {
  491. e.fastEncoder.Encode(blk, src)
  492. e.allDirty = true
  493. return
  494. }
  495. // Protect against e.cur wraparound.
  496. for e.cur >= e.bufferReset-int32(len(e.hist)) {
  497. if len(e.hist) == 0 {
  498. e.table = [tableSize]tableEntry{}
  499. e.cur = e.maxMatchOff
  500. break
  501. }
  502. // Shift down everything in the table that isn't already too far away.
  503. minOff := e.cur + int32(len(e.hist)) - e.maxMatchOff
  504. for i := range e.table[:] {
  505. v := e.table[i].offset
  506. if v < minOff {
  507. v = 0
  508. } else {
  509. v = v - e.cur + e.maxMatchOff
  510. }
  511. e.table[i].offset = v
  512. }
  513. e.cur = e.maxMatchOff
  514. break
  515. }
  516. s := e.addBlock(src)
  517. blk.size = len(src)
  518. if len(src) < minNonLiteralBlockSize {
  519. blk.extraLits = len(src)
  520. blk.literals = blk.literals[:len(src)]
  521. copy(blk.literals, src)
  522. return
  523. }
  524. // Override src
  525. src = e.hist
  526. sLimit := int32(len(src)) - inputMargin
  527. // stepSize is the number of bytes to skip on every main loop iteration.
  528. // It should be >= 2.
  529. const stepSize = 2
  530. // TEMPLATE
  531. const hashLog = tableBits
  532. // seems global, but would be nice to tweak.
  533. const kSearchStrength = 7
  534. // nextEmit is where in src the next emitLiteral should start from.
  535. nextEmit := s
  536. cv := load6432(src, s)
  537. // Relative offsets
  538. offset1 := int32(blk.recentOffsets[0])
  539. offset2 := int32(blk.recentOffsets[1])
  540. addLiterals := func(s *seq, until int32) {
  541. if until == nextEmit {
  542. return
  543. }
  544. blk.literals = append(blk.literals, src[nextEmit:until]...)
  545. s.litLen = uint32(until - nextEmit)
  546. }
  547. if debugEncoder {
  548. println("recent offsets:", blk.recentOffsets)
  549. }
  550. encodeLoop:
  551. for {
  552. // t will contain the match offset when we find one.
  553. // When existing the search loop, we have already checked 4 bytes.
  554. var t int32
  555. // We will not use repeat offsets across blocks.
  556. // By not using them for the first 3 matches
  557. canRepeat := len(blk.sequences) > 2
  558. for {
  559. if debugAsserts && canRepeat && offset1 == 0 {
  560. panic("offset0 was 0")
  561. }
  562. nextHash := hashLen(cv, hashLog, tableFastHashLen)
  563. nextHash2 := hashLen(cv>>8, hashLog, tableFastHashLen)
  564. candidate := e.table[nextHash]
  565. candidate2 := e.table[nextHash2]
  566. repIndex := s - offset1 + 2
  567. e.table[nextHash] = tableEntry{offset: s + e.cur, val: uint32(cv)}
  568. e.markShardDirty(nextHash)
  569. e.table[nextHash2] = tableEntry{offset: s + e.cur + 1, val: uint32(cv >> 8)}
  570. e.markShardDirty(nextHash2)
  571. if canRepeat && repIndex >= 0 && load3232(src, repIndex) == uint32(cv>>16) {
  572. // Consider history as well.
  573. var seq seq
  574. var length int32
  575. length = 4 + e.matchlen(s+6, repIndex+4, src)
  576. seq.matchLen = uint32(length - zstdMinMatch)
  577. // We might be able to match backwards.
  578. // Extend as long as we can.
  579. start := s + 2
  580. // We end the search early, so we don't risk 0 literals
  581. // and have to do special offset treatment.
  582. startLimit := nextEmit + 1
  583. sMin := s - e.maxMatchOff
  584. if sMin < 0 {
  585. sMin = 0
  586. }
  587. for repIndex > sMin && start > startLimit && src[repIndex-1] == src[start-1] && seq.matchLen < maxMatchLength-zstdMinMatch {
  588. repIndex--
  589. start--
  590. seq.matchLen++
  591. }
  592. addLiterals(&seq, start)
  593. // rep 0
  594. seq.offset = 1
  595. if debugSequences {
  596. println("repeat sequence", seq, "next s:", s)
  597. }
  598. blk.sequences = append(blk.sequences, seq)
  599. s += length + 2
  600. nextEmit = s
  601. if s >= sLimit {
  602. if debugEncoder {
  603. println("repeat ended", s, length)
  604. }
  605. break encodeLoop
  606. }
  607. cv = load6432(src, s)
  608. continue
  609. }
  610. coffset0 := s - (candidate.offset - e.cur)
  611. coffset1 := s - (candidate2.offset - e.cur) + 1
  612. if coffset0 < e.maxMatchOff && uint32(cv) == candidate.val {
  613. // found a regular match
  614. t = candidate.offset - e.cur
  615. if debugAsserts && s <= t {
  616. panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
  617. }
  618. if debugAsserts && s-t > e.maxMatchOff {
  619. panic("s - t >e.maxMatchOff")
  620. }
  621. break
  622. }
  623. if coffset1 < e.maxMatchOff && uint32(cv>>8) == candidate2.val {
  624. // found a regular match
  625. t = candidate2.offset - e.cur
  626. s++
  627. if debugAsserts && s <= t {
  628. panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
  629. }
  630. if debugAsserts && s-t > e.maxMatchOff {
  631. panic("s - t >e.maxMatchOff")
  632. }
  633. if debugAsserts && t < 0 {
  634. panic("t<0")
  635. }
  636. break
  637. }
  638. s += stepSize + ((s - nextEmit) >> (kSearchStrength - 1))
  639. if s >= sLimit {
  640. break encodeLoop
  641. }
  642. cv = load6432(src, s)
  643. }
  644. // A 4-byte match has been found. We'll later see if more than 4 bytes.
  645. offset2 = offset1
  646. offset1 = s - t
  647. if debugAsserts && s <= t {
  648. panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
  649. }
  650. if debugAsserts && canRepeat && int(offset1) > len(src) {
  651. panic("invalid offset")
  652. }
  653. // Extend the 4-byte match as long as possible.
  654. l := e.matchlen(s+4, t+4, src) + 4
  655. // Extend backwards
  656. tMin := s - e.maxMatchOff
  657. if tMin < 0 {
  658. tMin = 0
  659. }
  660. for t > tMin && s > nextEmit && src[t-1] == src[s-1] && l < maxMatchLength {
  661. s--
  662. t--
  663. l++
  664. }
  665. // Write our sequence.
  666. var seq seq
  667. seq.litLen = uint32(s - nextEmit)
  668. seq.matchLen = uint32(l - zstdMinMatch)
  669. if seq.litLen > 0 {
  670. blk.literals = append(blk.literals, src[nextEmit:s]...)
  671. }
  672. // Don't use repeat offsets
  673. seq.offset = uint32(s-t) + 3
  674. s += l
  675. if debugSequences {
  676. println("sequence", seq, "next s:", s)
  677. }
  678. blk.sequences = append(blk.sequences, seq)
  679. nextEmit = s
  680. if s >= sLimit {
  681. break encodeLoop
  682. }
  683. cv = load6432(src, s)
  684. // Check offset 2
  685. if o2 := s - offset2; canRepeat && load3232(src, o2) == uint32(cv) {
  686. // We have at least 4 byte match.
  687. // No need to check backwards. We come straight from a match
  688. l := 4 + e.matchlen(s+4, o2+4, src)
  689. // Store this, since we have it.
  690. nextHash := hashLen(cv, hashLog, tableFastHashLen)
  691. e.table[nextHash] = tableEntry{offset: s + e.cur, val: uint32(cv)}
  692. e.markShardDirty(nextHash)
  693. seq.matchLen = uint32(l) - zstdMinMatch
  694. seq.litLen = 0
  695. // Since litlen is always 0, this is offset 1.
  696. seq.offset = 1
  697. s += l
  698. nextEmit = s
  699. if debugSequences {
  700. println("sequence", seq, "next s:", s)
  701. }
  702. blk.sequences = append(blk.sequences, seq)
  703. // Swap offset 1 and 2.
  704. offset1, offset2 = offset2, offset1
  705. if s >= sLimit {
  706. break encodeLoop
  707. }
  708. // Prepare next loop.
  709. cv = load6432(src, s)
  710. }
  711. }
  712. if int(nextEmit) < len(src) {
  713. blk.literals = append(blk.literals, src[nextEmit:]...)
  714. blk.extraLits = len(src) - int(nextEmit)
  715. }
  716. blk.recentOffsets[0] = uint32(offset1)
  717. blk.recentOffsets[1] = uint32(offset2)
  718. if debugEncoder {
  719. println("returning, recent offsets:", blk.recentOffsets, "extra literals:", blk.extraLits)
  720. }
  721. }
  722. // ResetDict will reset and set a dictionary if not nil
  723. func (e *fastEncoder) Reset(d *dict, singleBlock bool) {
  724. e.resetBase(d, singleBlock)
  725. if d != nil {
  726. panic("fastEncoder: Reset with dict")
  727. }
  728. }
  729. // ResetDict will reset and set a dictionary if not nil
  730. func (e *fastEncoderDict) Reset(d *dict, singleBlock bool) {
  731. e.resetBase(d, singleBlock)
  732. if d == nil {
  733. return
  734. }
  735. // Init or copy dict table
  736. if len(e.dictTable) != len(e.table) || d.id != e.lastDictID {
  737. if len(e.dictTable) != len(e.table) {
  738. e.dictTable = make([]tableEntry, len(e.table))
  739. }
  740. if true {
  741. end := e.maxMatchOff + int32(len(d.content)) - 8
  742. for i := e.maxMatchOff; i < end; i += 3 {
  743. const hashLog = tableBits
  744. cv := load6432(d.content, i-e.maxMatchOff)
  745. nextHash := hashLen(cv, hashLog, tableFastHashLen) // 0 -> 5
  746. nextHash1 := hashLen(cv>>8, hashLog, tableFastHashLen) // 1 -> 6
  747. nextHash2 := hashLen(cv>>16, hashLog, tableFastHashLen) // 2 -> 7
  748. e.dictTable[nextHash] = tableEntry{
  749. val: uint32(cv),
  750. offset: i,
  751. }
  752. e.dictTable[nextHash1] = tableEntry{
  753. val: uint32(cv >> 8),
  754. offset: i + 1,
  755. }
  756. e.dictTable[nextHash2] = tableEntry{
  757. val: uint32(cv >> 16),
  758. offset: i + 2,
  759. }
  760. }
  761. }
  762. e.lastDictID = d.id
  763. e.allDirty = true
  764. }
  765. e.cur = e.maxMatchOff
  766. dirtyShardCnt := 0
  767. if !e.allDirty {
  768. for i := range e.tableShardDirty {
  769. if e.tableShardDirty[i] {
  770. dirtyShardCnt++
  771. }
  772. }
  773. }
  774. const shardCnt = tableShardCnt
  775. const shardSize = tableShardSize
  776. if e.allDirty || dirtyShardCnt > shardCnt*4/6 {
  777. //copy(e.table[:], e.dictTable)
  778. e.table = *(*[tableSize]tableEntry)(e.dictTable)
  779. for i := range e.tableShardDirty {
  780. e.tableShardDirty[i] = false
  781. }
  782. e.allDirty = false
  783. return
  784. }
  785. for i := range e.tableShardDirty {
  786. if !e.tableShardDirty[i] {
  787. continue
  788. }
  789. //copy(e.table[i*shardSize:(i+1)*shardSize], e.dictTable[i*shardSize:(i+1)*shardSize])
  790. *(*[shardSize]tableEntry)(e.table[i*shardSize:]) = *(*[shardSize]tableEntry)(e.dictTable[i*shardSize:])
  791. e.tableShardDirty[i] = false
  792. }
  793. e.allDirty = false
  794. }
  795. func (e *fastEncoderDict) markAllShardsDirty() {
  796. e.allDirty = true
  797. }
  798. func (e *fastEncoderDict) markShardDirty(entryNum uint32) {
  799. e.tableShardDirty[entryNum/tableShardSize] = true
  800. }