encoder.go 9.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383
  1. // Copyright 2021 The Go Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style
  3. // license that can be found in the LICENSE file.
  4. package pkgbits
  5. import (
  6. "bytes"
  7. "crypto/md5"
  8. "encoding/binary"
  9. "go/constant"
  10. "io"
  11. "math/big"
  12. "runtime"
  13. )
  14. // currentVersion is the current version number.
  15. //
  16. // - v0: initial prototype
  17. //
  18. // - v1: adds the flags uint32 word
  19. const currentVersion uint32 = 1
  20. // A PkgEncoder provides methods for encoding a package's Unified IR
  21. // export data.
  22. type PkgEncoder struct {
  23. // elems holds the bitstream for previously encoded elements.
  24. elems [numRelocs][]string
  25. // stringsIdx maps previously encoded strings to their index within
  26. // the RelocString section, to allow deduplication. That is,
  27. // elems[RelocString][stringsIdx[s]] == s (if present).
  28. stringsIdx map[string]Index
  29. // syncFrames is the number of frames to write at each sync
  30. // marker. A negative value means sync markers are omitted.
  31. syncFrames int
  32. }
  33. // SyncMarkers reports whether pw uses sync markers.
  34. func (pw *PkgEncoder) SyncMarkers() bool { return pw.syncFrames >= 0 }
  35. // NewPkgEncoder returns an initialized PkgEncoder.
  36. //
  37. // syncFrames is the number of caller frames that should be serialized
  38. // at Sync points. Serializing additional frames results in larger
  39. // export data files, but can help diagnosing desync errors in
  40. // higher-level Unified IR reader/writer code. If syncFrames is
  41. // negative, then sync markers are omitted entirely.
  42. func NewPkgEncoder(syncFrames int) PkgEncoder {
  43. return PkgEncoder{
  44. stringsIdx: make(map[string]Index),
  45. syncFrames: syncFrames,
  46. }
  47. }
  48. // DumpTo writes the package's encoded data to out0 and returns the
  49. // package fingerprint.
  50. func (pw *PkgEncoder) DumpTo(out0 io.Writer) (fingerprint [8]byte) {
  51. h := md5.New()
  52. out := io.MultiWriter(out0, h)
  53. writeUint32 := func(x uint32) {
  54. assert(binary.Write(out, binary.LittleEndian, x) == nil)
  55. }
  56. writeUint32(currentVersion)
  57. var flags uint32
  58. if pw.SyncMarkers() {
  59. flags |= flagSyncMarkers
  60. }
  61. writeUint32(flags)
  62. // Write elemEndsEnds.
  63. var sum uint32
  64. for _, elems := range &pw.elems {
  65. sum += uint32(len(elems))
  66. writeUint32(sum)
  67. }
  68. // Write elemEnds.
  69. sum = 0
  70. for _, elems := range &pw.elems {
  71. for _, elem := range elems {
  72. sum += uint32(len(elem))
  73. writeUint32(sum)
  74. }
  75. }
  76. // Write elemData.
  77. for _, elems := range &pw.elems {
  78. for _, elem := range elems {
  79. _, err := io.WriteString(out, elem)
  80. assert(err == nil)
  81. }
  82. }
  83. // Write fingerprint.
  84. copy(fingerprint[:], h.Sum(nil))
  85. _, err := out0.Write(fingerprint[:])
  86. assert(err == nil)
  87. return
  88. }
  89. // StringIdx adds a string value to the strings section, if not
  90. // already present, and returns its index.
  91. func (pw *PkgEncoder) StringIdx(s string) Index {
  92. if idx, ok := pw.stringsIdx[s]; ok {
  93. assert(pw.elems[RelocString][idx] == s)
  94. return idx
  95. }
  96. idx := Index(len(pw.elems[RelocString]))
  97. pw.elems[RelocString] = append(pw.elems[RelocString], s)
  98. pw.stringsIdx[s] = idx
  99. return idx
  100. }
  101. // NewEncoder returns an Encoder for a new element within the given
  102. // section, and encodes the given SyncMarker as the start of the
  103. // element bitstream.
  104. func (pw *PkgEncoder) NewEncoder(k RelocKind, marker SyncMarker) Encoder {
  105. e := pw.NewEncoderRaw(k)
  106. e.Sync(marker)
  107. return e
  108. }
  109. // NewEncoderRaw returns an Encoder for a new element within the given
  110. // section.
  111. //
  112. // Most callers should use NewEncoder instead.
  113. func (pw *PkgEncoder) NewEncoderRaw(k RelocKind) Encoder {
  114. idx := Index(len(pw.elems[k]))
  115. pw.elems[k] = append(pw.elems[k], "") // placeholder
  116. return Encoder{
  117. p: pw,
  118. k: k,
  119. Idx: idx,
  120. }
  121. }
  122. // An Encoder provides methods for encoding an individual element's
  123. // bitstream data.
  124. type Encoder struct {
  125. p *PkgEncoder
  126. Relocs []RelocEnt
  127. RelocMap map[RelocEnt]uint32
  128. Data bytes.Buffer // accumulated element bitstream data
  129. encodingRelocHeader bool
  130. k RelocKind
  131. Idx Index // index within relocation section
  132. }
  133. // Flush finalizes the element's bitstream and returns its Index.
  134. func (w *Encoder) Flush() Index {
  135. var sb bytes.Buffer // TODO(mdempsky): strings.Builder after #44505 is resolved
  136. // Backup the data so we write the relocations at the front.
  137. var tmp bytes.Buffer
  138. io.Copy(&tmp, &w.Data)
  139. // TODO(mdempsky): Consider writing these out separately so they're
  140. // easier to strip, along with function bodies, so that we can prune
  141. // down to just the data that's relevant to go/types.
  142. if w.encodingRelocHeader {
  143. panic("encodingRelocHeader already true; recursive flush?")
  144. }
  145. w.encodingRelocHeader = true
  146. w.Sync(SyncRelocs)
  147. w.Len(len(w.Relocs))
  148. for _, rEnt := range w.Relocs {
  149. w.Sync(SyncReloc)
  150. w.Len(int(rEnt.Kind))
  151. w.Len(int(rEnt.Idx))
  152. }
  153. io.Copy(&sb, &w.Data)
  154. io.Copy(&sb, &tmp)
  155. w.p.elems[w.k][w.Idx] = sb.String()
  156. return w.Idx
  157. }
  158. func (w *Encoder) checkErr(err error) {
  159. if err != nil {
  160. errorf("unexpected encoding error: %v", err)
  161. }
  162. }
  163. func (w *Encoder) rawUvarint(x uint64) {
  164. var buf [binary.MaxVarintLen64]byte
  165. n := binary.PutUvarint(buf[:], x)
  166. _, err := w.Data.Write(buf[:n])
  167. w.checkErr(err)
  168. }
  169. func (w *Encoder) rawVarint(x int64) {
  170. // Zig-zag encode.
  171. ux := uint64(x) << 1
  172. if x < 0 {
  173. ux = ^ux
  174. }
  175. w.rawUvarint(ux)
  176. }
  177. func (w *Encoder) rawReloc(r RelocKind, idx Index) int {
  178. e := RelocEnt{r, idx}
  179. if w.RelocMap != nil {
  180. if i, ok := w.RelocMap[e]; ok {
  181. return int(i)
  182. }
  183. } else {
  184. w.RelocMap = make(map[RelocEnt]uint32)
  185. }
  186. i := len(w.Relocs)
  187. w.RelocMap[e] = uint32(i)
  188. w.Relocs = append(w.Relocs, e)
  189. return i
  190. }
  191. func (w *Encoder) Sync(m SyncMarker) {
  192. if !w.p.SyncMarkers() {
  193. return
  194. }
  195. // Writing out stack frame string references requires working
  196. // relocations, but writing out the relocations themselves involves
  197. // sync markers. To prevent infinite recursion, we simply trim the
  198. // stack frame for sync markers within the relocation header.
  199. var frames []string
  200. if !w.encodingRelocHeader && w.p.syncFrames > 0 {
  201. pcs := make([]uintptr, w.p.syncFrames)
  202. n := runtime.Callers(2, pcs)
  203. frames = fmtFrames(pcs[:n]...)
  204. }
  205. // TODO(mdempsky): Save space by writing out stack frames as a
  206. // linked list so we can share common stack frames.
  207. w.rawUvarint(uint64(m))
  208. w.rawUvarint(uint64(len(frames)))
  209. for _, frame := range frames {
  210. w.rawUvarint(uint64(w.rawReloc(RelocString, w.p.StringIdx(frame))))
  211. }
  212. }
  213. // Bool encodes and writes a bool value into the element bitstream,
  214. // and then returns the bool value.
  215. //
  216. // For simple, 2-alternative encodings, the idiomatic way to call Bool
  217. // is something like:
  218. //
  219. // if w.Bool(x != 0) {
  220. // // alternative #1
  221. // } else {
  222. // // alternative #2
  223. // }
  224. //
  225. // For multi-alternative encodings, use Code instead.
  226. func (w *Encoder) Bool(b bool) bool {
  227. w.Sync(SyncBool)
  228. var x byte
  229. if b {
  230. x = 1
  231. }
  232. err := w.Data.WriteByte(x)
  233. w.checkErr(err)
  234. return b
  235. }
  236. // Int64 encodes and writes an int64 value into the element bitstream.
  237. func (w *Encoder) Int64(x int64) {
  238. w.Sync(SyncInt64)
  239. w.rawVarint(x)
  240. }
  241. // Uint64 encodes and writes a uint64 value into the element bitstream.
  242. func (w *Encoder) Uint64(x uint64) {
  243. w.Sync(SyncUint64)
  244. w.rawUvarint(x)
  245. }
  246. // Len encodes and writes a non-negative int value into the element bitstream.
  247. func (w *Encoder) Len(x int) { assert(x >= 0); w.Uint64(uint64(x)) }
  248. // Int encodes and writes an int value into the element bitstream.
  249. func (w *Encoder) Int(x int) { w.Int64(int64(x)) }
  250. // Uint encodes and writes a uint value into the element bitstream.
  251. func (w *Encoder) Uint(x uint) { w.Uint64(uint64(x)) }
  252. // Reloc encodes and writes a relocation for the given (section,
  253. // index) pair into the element bitstream.
  254. //
  255. // Note: Only the index is formally written into the element
  256. // bitstream, so bitstream decoders must know from context which
  257. // section an encoded relocation refers to.
  258. func (w *Encoder) Reloc(r RelocKind, idx Index) {
  259. w.Sync(SyncUseReloc)
  260. w.Len(w.rawReloc(r, idx))
  261. }
  262. // Code encodes and writes a Code value into the element bitstream.
  263. func (w *Encoder) Code(c Code) {
  264. w.Sync(c.Marker())
  265. w.Len(c.Value())
  266. }
  267. // String encodes and writes a string value into the element
  268. // bitstream.
  269. //
  270. // Internally, strings are deduplicated by adding them to the strings
  271. // section (if not already present), and then writing a relocation
  272. // into the element bitstream.
  273. func (w *Encoder) String(s string) {
  274. w.Sync(SyncString)
  275. w.Reloc(RelocString, w.p.StringIdx(s))
  276. }
  277. // Strings encodes and writes a variable-length slice of strings into
  278. // the element bitstream.
  279. func (w *Encoder) Strings(ss []string) {
  280. w.Len(len(ss))
  281. for _, s := range ss {
  282. w.String(s)
  283. }
  284. }
  285. // Value encodes and writes a constant.Value into the element
  286. // bitstream.
  287. func (w *Encoder) Value(val constant.Value) {
  288. w.Sync(SyncValue)
  289. if w.Bool(val.Kind() == constant.Complex) {
  290. w.scalar(constant.Real(val))
  291. w.scalar(constant.Imag(val))
  292. } else {
  293. w.scalar(val)
  294. }
  295. }
  296. func (w *Encoder) scalar(val constant.Value) {
  297. switch v := constant.Val(val).(type) {
  298. default:
  299. errorf("unhandled %v (%v)", val, val.Kind())
  300. case bool:
  301. w.Code(ValBool)
  302. w.Bool(v)
  303. case string:
  304. w.Code(ValString)
  305. w.String(v)
  306. case int64:
  307. w.Code(ValInt64)
  308. w.Int64(v)
  309. case *big.Int:
  310. w.Code(ValBigInt)
  311. w.bigInt(v)
  312. case *big.Rat:
  313. w.Code(ValBigRat)
  314. w.bigInt(v.Num())
  315. w.bigInt(v.Denom())
  316. case *big.Float:
  317. w.Code(ValBigFloat)
  318. w.bigFloat(v)
  319. }
  320. }
  321. func (w *Encoder) bigInt(v *big.Int) {
  322. b := v.Bytes()
  323. w.String(string(b)) // TODO: More efficient encoding.
  324. w.Bool(v.Sign() < 0)
  325. }
  326. func (w *Encoder) bigFloat(v *big.Float) {
  327. b := v.Append(nil, 'p', -1)
  328. w.String(string(b)) // TODO: More efficient encoding.
  329. }