123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383 |
- // Copyright 2021 The Go Authors. All rights reserved.
- // Use of this source code is governed by a BSD-style
- // license that can be found in the LICENSE file.
- package pkgbits
- import (
- "bytes"
- "crypto/md5"
- "encoding/binary"
- "go/constant"
- "io"
- "math/big"
- "runtime"
- )
- // currentVersion is the current version number.
- //
- // - v0: initial prototype
- //
- // - v1: adds the flags uint32 word
- const currentVersion uint32 = 1
- // A PkgEncoder provides methods for encoding a package's Unified IR
- // export data.
- type PkgEncoder struct {
- // elems holds the bitstream for previously encoded elements.
- elems [numRelocs][]string
- // stringsIdx maps previously encoded strings to their index within
- // the RelocString section, to allow deduplication. That is,
- // elems[RelocString][stringsIdx[s]] == s (if present).
- stringsIdx map[string]Index
- // syncFrames is the number of frames to write at each sync
- // marker. A negative value means sync markers are omitted.
- syncFrames int
- }
- // SyncMarkers reports whether pw uses sync markers.
- func (pw *PkgEncoder) SyncMarkers() bool { return pw.syncFrames >= 0 }
- // NewPkgEncoder returns an initialized PkgEncoder.
- //
- // syncFrames is the number of caller frames that should be serialized
- // at Sync points. Serializing additional frames results in larger
- // export data files, but can help diagnosing desync errors in
- // higher-level Unified IR reader/writer code. If syncFrames is
- // negative, then sync markers are omitted entirely.
- func NewPkgEncoder(syncFrames int) PkgEncoder {
- return PkgEncoder{
- stringsIdx: make(map[string]Index),
- syncFrames: syncFrames,
- }
- }
- // DumpTo writes the package's encoded data to out0 and returns the
- // package fingerprint.
- func (pw *PkgEncoder) DumpTo(out0 io.Writer) (fingerprint [8]byte) {
- h := md5.New()
- out := io.MultiWriter(out0, h)
- writeUint32 := func(x uint32) {
- assert(binary.Write(out, binary.LittleEndian, x) == nil)
- }
- writeUint32(currentVersion)
- var flags uint32
- if pw.SyncMarkers() {
- flags |= flagSyncMarkers
- }
- writeUint32(flags)
- // Write elemEndsEnds.
- var sum uint32
- for _, elems := range &pw.elems {
- sum += uint32(len(elems))
- writeUint32(sum)
- }
- // Write elemEnds.
- sum = 0
- for _, elems := range &pw.elems {
- for _, elem := range elems {
- sum += uint32(len(elem))
- writeUint32(sum)
- }
- }
- // Write elemData.
- for _, elems := range &pw.elems {
- for _, elem := range elems {
- _, err := io.WriteString(out, elem)
- assert(err == nil)
- }
- }
- // Write fingerprint.
- copy(fingerprint[:], h.Sum(nil))
- _, err := out0.Write(fingerprint[:])
- assert(err == nil)
- return
- }
- // StringIdx adds a string value to the strings section, if not
- // already present, and returns its index.
- func (pw *PkgEncoder) StringIdx(s string) Index {
- if idx, ok := pw.stringsIdx[s]; ok {
- assert(pw.elems[RelocString][idx] == s)
- return idx
- }
- idx := Index(len(pw.elems[RelocString]))
- pw.elems[RelocString] = append(pw.elems[RelocString], s)
- pw.stringsIdx[s] = idx
- return idx
- }
- // NewEncoder returns an Encoder for a new element within the given
- // section, and encodes the given SyncMarker as the start of the
- // element bitstream.
- func (pw *PkgEncoder) NewEncoder(k RelocKind, marker SyncMarker) Encoder {
- e := pw.NewEncoderRaw(k)
- e.Sync(marker)
- return e
- }
- // NewEncoderRaw returns an Encoder for a new element within the given
- // section.
- //
- // Most callers should use NewEncoder instead.
- func (pw *PkgEncoder) NewEncoderRaw(k RelocKind) Encoder {
- idx := Index(len(pw.elems[k]))
- pw.elems[k] = append(pw.elems[k], "") // placeholder
- return Encoder{
- p: pw,
- k: k,
- Idx: idx,
- }
- }
- // An Encoder provides methods for encoding an individual element's
- // bitstream data.
- type Encoder struct {
- p *PkgEncoder
- Relocs []RelocEnt
- RelocMap map[RelocEnt]uint32
- Data bytes.Buffer // accumulated element bitstream data
- encodingRelocHeader bool
- k RelocKind
- Idx Index // index within relocation section
- }
- // Flush finalizes the element's bitstream and returns its Index.
- func (w *Encoder) Flush() Index {
- var sb bytes.Buffer // TODO(mdempsky): strings.Builder after #44505 is resolved
- // Backup the data so we write the relocations at the front.
- var tmp bytes.Buffer
- io.Copy(&tmp, &w.Data)
- // TODO(mdempsky): Consider writing these out separately so they're
- // easier to strip, along with function bodies, so that we can prune
- // down to just the data that's relevant to go/types.
- if w.encodingRelocHeader {
- panic("encodingRelocHeader already true; recursive flush?")
- }
- w.encodingRelocHeader = true
- w.Sync(SyncRelocs)
- w.Len(len(w.Relocs))
- for _, rEnt := range w.Relocs {
- w.Sync(SyncReloc)
- w.Len(int(rEnt.Kind))
- w.Len(int(rEnt.Idx))
- }
- io.Copy(&sb, &w.Data)
- io.Copy(&sb, &tmp)
- w.p.elems[w.k][w.Idx] = sb.String()
- return w.Idx
- }
- func (w *Encoder) checkErr(err error) {
- if err != nil {
- errorf("unexpected encoding error: %v", err)
- }
- }
- func (w *Encoder) rawUvarint(x uint64) {
- var buf [binary.MaxVarintLen64]byte
- n := binary.PutUvarint(buf[:], x)
- _, err := w.Data.Write(buf[:n])
- w.checkErr(err)
- }
- func (w *Encoder) rawVarint(x int64) {
- // Zig-zag encode.
- ux := uint64(x) << 1
- if x < 0 {
- ux = ^ux
- }
- w.rawUvarint(ux)
- }
- func (w *Encoder) rawReloc(r RelocKind, idx Index) int {
- e := RelocEnt{r, idx}
- if w.RelocMap != nil {
- if i, ok := w.RelocMap[e]; ok {
- return int(i)
- }
- } else {
- w.RelocMap = make(map[RelocEnt]uint32)
- }
- i := len(w.Relocs)
- w.RelocMap[e] = uint32(i)
- w.Relocs = append(w.Relocs, e)
- return i
- }
- func (w *Encoder) Sync(m SyncMarker) {
- if !w.p.SyncMarkers() {
- return
- }
- // Writing out stack frame string references requires working
- // relocations, but writing out the relocations themselves involves
- // sync markers. To prevent infinite recursion, we simply trim the
- // stack frame for sync markers within the relocation header.
- var frames []string
- if !w.encodingRelocHeader && w.p.syncFrames > 0 {
- pcs := make([]uintptr, w.p.syncFrames)
- n := runtime.Callers(2, pcs)
- frames = fmtFrames(pcs[:n]...)
- }
- // TODO(mdempsky): Save space by writing out stack frames as a
- // linked list so we can share common stack frames.
- w.rawUvarint(uint64(m))
- w.rawUvarint(uint64(len(frames)))
- for _, frame := range frames {
- w.rawUvarint(uint64(w.rawReloc(RelocString, w.p.StringIdx(frame))))
- }
- }
- // Bool encodes and writes a bool value into the element bitstream,
- // and then returns the bool value.
- //
- // For simple, 2-alternative encodings, the idiomatic way to call Bool
- // is something like:
- //
- // if w.Bool(x != 0) {
- // // alternative #1
- // } else {
- // // alternative #2
- // }
- //
- // For multi-alternative encodings, use Code instead.
- func (w *Encoder) Bool(b bool) bool {
- w.Sync(SyncBool)
- var x byte
- if b {
- x = 1
- }
- err := w.Data.WriteByte(x)
- w.checkErr(err)
- return b
- }
- // Int64 encodes and writes an int64 value into the element bitstream.
- func (w *Encoder) Int64(x int64) {
- w.Sync(SyncInt64)
- w.rawVarint(x)
- }
- // Uint64 encodes and writes a uint64 value into the element bitstream.
- func (w *Encoder) Uint64(x uint64) {
- w.Sync(SyncUint64)
- w.rawUvarint(x)
- }
- // Len encodes and writes a non-negative int value into the element bitstream.
- func (w *Encoder) Len(x int) { assert(x >= 0); w.Uint64(uint64(x)) }
- // Int encodes and writes an int value into the element bitstream.
- func (w *Encoder) Int(x int) { w.Int64(int64(x)) }
- // Uint encodes and writes a uint value into the element bitstream.
- func (w *Encoder) Uint(x uint) { w.Uint64(uint64(x)) }
- // Reloc encodes and writes a relocation for the given (section,
- // index) pair into the element bitstream.
- //
- // Note: Only the index is formally written into the element
- // bitstream, so bitstream decoders must know from context which
- // section an encoded relocation refers to.
- func (w *Encoder) Reloc(r RelocKind, idx Index) {
- w.Sync(SyncUseReloc)
- w.Len(w.rawReloc(r, idx))
- }
- // Code encodes and writes a Code value into the element bitstream.
- func (w *Encoder) Code(c Code) {
- w.Sync(c.Marker())
- w.Len(c.Value())
- }
- // String encodes and writes a string value into the element
- // bitstream.
- //
- // Internally, strings are deduplicated by adding them to the strings
- // section (if not already present), and then writing a relocation
- // into the element bitstream.
- func (w *Encoder) String(s string) {
- w.Sync(SyncString)
- w.Reloc(RelocString, w.p.StringIdx(s))
- }
- // Strings encodes and writes a variable-length slice of strings into
- // the element bitstream.
- func (w *Encoder) Strings(ss []string) {
- w.Len(len(ss))
- for _, s := range ss {
- w.String(s)
- }
- }
- // Value encodes and writes a constant.Value into the element
- // bitstream.
- func (w *Encoder) Value(val constant.Value) {
- w.Sync(SyncValue)
- if w.Bool(val.Kind() == constant.Complex) {
- w.scalar(constant.Real(val))
- w.scalar(constant.Imag(val))
- } else {
- w.scalar(val)
- }
- }
- func (w *Encoder) scalar(val constant.Value) {
- switch v := constant.Val(val).(type) {
- default:
- errorf("unhandled %v (%v)", val, val.Kind())
- case bool:
- w.Code(ValBool)
- w.Bool(v)
- case string:
- w.Code(ValString)
- w.String(v)
- case int64:
- w.Code(ValInt64)
- w.Int64(v)
- case *big.Int:
- w.Code(ValBigInt)
- w.bigInt(v)
- case *big.Rat:
- w.Code(ValBigRat)
- w.bigInt(v.Num())
- w.bigInt(v.Denom())
- case *big.Float:
- w.Code(ValBigFloat)
- w.bigFloat(v)
- }
- }
- func (w *Encoder) bigInt(v *big.Int) {
- b := v.Bytes()
- w.String(string(b)) // TODO: More efficient encoding.
- w.Bool(v.Sign() < 0)
- }
- func (w *Encoder) bigFloat(v *big.Float) {
- b := v.Append(nil, 'p', -1)
- w.String(string(b)) // TODO: More efficient encoding.
- }
|