From 0b2eecdc8a8ef6fd9873bccfac255f17b01a84b3 Mon Sep 17 00:00:00 2001 From: Bjorn Neergaard Date: Tue, 10 Jan 2023 13:35:46 -0700 Subject: [PATCH] daemon/config: support alternate (common) unicode encodings using a BOM MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is a pragmatic but impure choice, in order to better support the default tools available on Windows Server, and reduce user confusion due to otherwise inscrutable-to-the-uninitiated errors like the following: > invalid character 'þ' looking for beginning of value > invalid character 'ÿ' looking for beginning of value While meaningful to those who are familiar with and are equipped to diagnose encoding issues, these characters will be hidden when the file is edited with a BOM-aware text editor, and further confuse the user. Signed-off-by: Bjorn Neergaard (cherry picked from commit d42495033e7a493204d613681e831ba650a03dd5) Signed-off-by: Sebastiaan van Stijn --- daemon/config/config.go | 37 +- daemon/config/config_test.go | 41 +- vendor.mod | 2 +- vendor/golang.org/x/text/encoding/encoding.go | 335 ++++ .../internal/identifier/identifier.go | 81 + .../text/encoding/internal/identifier/mib.go | 1627 +++++++++++++++++ .../x/text/encoding/internal/internal.go | 75 + .../x/text/encoding/unicode/override.go | 82 + .../x/text/encoding/unicode/unicode.go | 512 ++++++ .../internal/utf8internal/utf8internal.go | 87 + vendor/golang.org/x/text/runes/cond.go | 187 ++ vendor/golang.org/x/text/runes/runes.go | 355 ++++ vendor/modules.txt | 6 + 13 files changed, 3417 insertions(+), 10 deletions(-) create mode 100644 vendor/golang.org/x/text/encoding/encoding.go create mode 100644 vendor/golang.org/x/text/encoding/internal/identifier/identifier.go create mode 100644 vendor/golang.org/x/text/encoding/internal/identifier/mib.go create mode 100644 vendor/golang.org/x/text/encoding/internal/internal.go create mode 100644 vendor/golang.org/x/text/encoding/unicode/override.go create mode 100644 vendor/golang.org/x/text/encoding/unicode/unicode.go create mode 100644 vendor/golang.org/x/text/internal/utf8internal/utf8internal.go create mode 100644 vendor/golang.org/x/text/runes/cond.go create mode 100644 vendor/golang.org/x/text/runes/runes.go diff --git a/daemon/config/config.go b/daemon/config/config.go index 62ba2db6d1..cbc48cd9b8 100644 --- a/daemon/config/config.go +++ b/daemon/config/config.go @@ -11,6 +11,9 @@ import ( "strings" "sync" + "golang.org/x/text/encoding/unicode" + "golang.org/x/text/transform" + "github.com/containerd/containerd/runtime/v2/shim" "github.com/docker/docker/opts" "github.com/docker/docker/pkg/authorization" @@ -434,10 +437,36 @@ func getConflictFreeConfiguration(configFile string, flags *pflag.FlagSet) (*Con return nil, err } - // Strip the UTF-8 BOM if present ([RFC 8259] allows JSON implementations to optionally strip the BOM for - // interoperability; do so here as Notepad on older versions of Windows Server insists on a BOM). - // [RFC 8259]: https://tools.ietf.org/html/rfc8259#section-8.1 - b = bytes.TrimPrefix(b, []byte("\xef\xbb\xbf")) + // Decode the contents of the JSON file using a [byte order mark] if present, instead of assuming UTF-8 without BOM. + // The BOM, if present, will be used to determine the encoding. If no BOM is present, we will assume the default + // and preferred encoding for JSON as defined by [RFC 8259], UTF-8 without BOM. + // + // While JSON is normatively UTF-8 with no BOM, there are a couple of reasons to decode here: + // * UTF-8 with BOM is something that new implementations should avoid producing; however, [RFC 8259 Section 8.1] + // allows implementations to ignore the UTF-8 BOM when present for interoperability. Older versions of Notepad, + // the only text editor available out of the box on Windows Server, writes UTF-8 with a BOM by default. + // * The default encoding for [Windows PowerShell] is UTF-16 LE with BOM. While encodings in PowerShell can be a + // bit idiosyncratic, BOMs are still generally written. There is no support for selecting UTF-8 without a BOM as + // the encoding in Windows PowerShell, though some Cmdlets only write UTF-8 with no BOM. PowerShell Core + // introduces `utf8NoBOM` and makes it the default, but PowerShell Core is unlikely to be the implementation for + // a majority of Windows Server + PowerShell users. + // * While [RFC 8259 Section 8.1] asserts that software that is not part of a closed ecosystem or that crosses a + // network boundary should only support UTF-8, and should never write a BOM, it does acknowledge older versions + // of the standard, such as [RFC 7159 Section 8.1]. In the interest of pragmatism and easing pain for Windows + // users, we consider Windows tools such as Windows PowerShell and Notepad part of our ecosystem, and support + // the two most common encodings: UTF-16 LE with BOM, and UTF-8 with BOM, in addition to the standard UTF-8 + // without BOM. + // + // [byte order mark]: https://www.unicode.org/faq/utf_bom.html#BOM + // [RFC 8259]: https://www.rfc-editor.org/rfc/rfc8259 + // [RFC 8259 Section 8.1]: https://www.rfc-editor.org/rfc/rfc8259#section-8.1 + // [RFC 7159 Section 8.1]: https://www.rfc-editor.org/rfc/rfc7159#section-8.1 + // [Windows PowerShell]: https://learn.microsoft.com/en-us/powershell/module/microsoft.powershell.core/about/about_character_encoding?view=powershell-5.1 + decoder := unicode.BOMOverride(unicode.UTF8.NewDecoder()) + b, _, err = transform.Bytes(decoder, b) + if err != nil { + return nil, err + } // Trim whitespace so that an empty config can be detected for an early return. b = bytes.TrimSpace(b) diff --git a/daemon/config/config_test.go b/daemon/config/config_test.go index e1fbe08d71..55b3722ca1 100644 --- a/daemon/config/config_test.go +++ b/daemon/config/config_test.go @@ -13,6 +13,8 @@ import ( "github.com/google/go-cmp/cmp/cmpopts" "github.com/imdario/mergo" "github.com/spf13/pflag" + "golang.org/x/text/encoding" + "golang.org/x/text/encoding/unicode" "gotest.tools/v3/assert" is "gotest.tools/v3/assert/cmp" "gotest.tools/v3/skip" @@ -38,12 +40,41 @@ func TestDaemonBrokenConfiguration(t *testing.T) { assert.ErrorContains(t, err, `invalid character ' ' in literal true`) } -// TestDaemonConfigurationWithBOM ensures that the UTF-8 byte order mark is ignored when reading the configuration file. -func TestDaemonConfigurationWithBOM(t *testing.T) { - configFile := makeConfigFile(t, "\xef\xbb\xbf{\"debug\": true}") +// TestDaemonConfigurationUnicodeVariations feeds various variations of Unicode into the JSON parser, ensuring that we +// respect a BOM and otherwise default to UTF-8. +func TestDaemonConfigurationUnicodeVariations(t *testing.T) { + jsonData := `{"debug": true}` - _, err := MergeDaemonConfigurations(&Config{}, nil, configFile) - assert.NilError(t, err) + testCases := []struct { + name string + encoding encoding.Encoding + }{ + { + name: "UTF-8", + encoding: unicode.UTF8, + }, + { + name: "UTF-8 (with BOM)", + encoding: unicode.UTF8BOM, + }, + { + name: "UTF-16 (BE with BOM)", + encoding: unicode.UTF16(unicode.BigEndian, unicode.UseBOM), + }, + { + name: "UTF-16 (LE with BOM)", + encoding: unicode.UTF16(unicode.LittleEndian, unicode.UseBOM), + }, + } + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + encodedJson, err := tc.encoding.NewEncoder().String(jsonData) + assert.NilError(t, err) + configFile := makeConfigFile(t, encodedJson) + _, err = MergeDaemonConfigurations(&Config{}, nil, configFile) + assert.NilError(t, err) + }) + } } func TestFindConfigurationConflicts(t *testing.T) { diff --git a/vendor.mod b/vendor.mod index 9da449a480..c37c923225 100644 --- a/vendor.mod +++ b/vendor.mod @@ -84,6 +84,7 @@ require ( golang.org/x/net v0.4.0 golang.org/x/sync v0.1.0 golang.org/x/sys v0.3.0 + golang.org/x/text v0.5.0 golang.org/x/time v0.1.0 google.golang.org/genproto v0.0.0-20220706185917-7780775163c4 google.golang.org/grpc v1.48.0 @@ -159,7 +160,6 @@ require ( go.uber.org/zap v1.21.0 // indirect golang.org/x/crypto v0.2.0 // indirect golang.org/x/oauth2 v0.1.0 // indirect - golang.org/x/text v0.5.0 // indirect google.golang.org/api v0.93.0 // indirect google.golang.org/appengine v1.6.7 // indirect google.golang.org/protobuf v1.28.1 // indirect diff --git a/vendor/golang.org/x/text/encoding/encoding.go b/vendor/golang.org/x/text/encoding/encoding.go new file mode 100644 index 0000000000..a0bd7cd4d0 --- /dev/null +++ b/vendor/golang.org/x/text/encoding/encoding.go @@ -0,0 +1,335 @@ +// Copyright 2013 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package encoding defines an interface for character encodings, such as Shift +// JIS and Windows 1252, that can convert to and from UTF-8. +// +// Encoding implementations are provided in other packages, such as +// golang.org/x/text/encoding/charmap and +// golang.org/x/text/encoding/japanese. +package encoding // import "golang.org/x/text/encoding" + +import ( + "errors" + "io" + "strconv" + "unicode/utf8" + + "golang.org/x/text/encoding/internal/identifier" + "golang.org/x/text/transform" +) + +// TODO: +// - There seems to be some inconsistency in when decoders return errors +// and when not. Also documentation seems to suggest they shouldn't return +// errors at all (except for UTF-16). +// - Encoders seem to rely on or at least benefit from the input being in NFC +// normal form. Perhaps add an example how users could prepare their output. + +// Encoding is a character set encoding that can be transformed to and from +// UTF-8. +type Encoding interface { + // NewDecoder returns a Decoder. + NewDecoder() *Decoder + + // NewEncoder returns an Encoder. + NewEncoder() *Encoder +} + +// A Decoder converts bytes to UTF-8. It implements transform.Transformer. +// +// Transforming source bytes that are not of that encoding will not result in an +// error per se. Each byte that cannot be transcoded will be represented in the +// output by the UTF-8 encoding of '\uFFFD', the replacement rune. +type Decoder struct { + transform.Transformer + + // This forces external creators of Decoders to use names in struct + // initializers, allowing for future extendibility without having to break + // code. + _ struct{} +} + +// Bytes converts the given encoded bytes to UTF-8. It returns the converted +// bytes or nil, err if any error occurred. +func (d *Decoder) Bytes(b []byte) ([]byte, error) { + b, _, err := transform.Bytes(d, b) + if err != nil { + return nil, err + } + return b, nil +} + +// String converts the given encoded string to UTF-8. It returns the converted +// string or "", err if any error occurred. +func (d *Decoder) String(s string) (string, error) { + s, _, err := transform.String(d, s) + if err != nil { + return "", err + } + return s, nil +} + +// Reader wraps another Reader to decode its bytes. +// +// The Decoder may not be used for any other operation as long as the returned +// Reader is in use. +func (d *Decoder) Reader(r io.Reader) io.Reader { + return transform.NewReader(r, d) +} + +// An Encoder converts bytes from UTF-8. It implements transform.Transformer. +// +// Each rune that cannot be transcoded will result in an error. In this case, +// the transform will consume all source byte up to, not including the offending +// rune. Transforming source bytes that are not valid UTF-8 will be replaced by +// `\uFFFD`. To return early with an error instead, use transform.Chain to +// preprocess the data with a UTF8Validator. +type Encoder struct { + transform.Transformer + + // This forces external creators of Encoders to use names in struct + // initializers, allowing for future extendibility without having to break + // code. + _ struct{} +} + +// Bytes converts bytes from UTF-8. It returns the converted bytes or nil, err if +// any error occurred. +func (e *Encoder) Bytes(b []byte) ([]byte, error) { + b, _, err := transform.Bytes(e, b) + if err != nil { + return nil, err + } + return b, nil +} + +// String converts a string from UTF-8. It returns the converted string or +// "", err if any error occurred. +func (e *Encoder) String(s string) (string, error) { + s, _, err := transform.String(e, s) + if err != nil { + return "", err + } + return s, nil +} + +// Writer wraps another Writer to encode its UTF-8 output. +// +// The Encoder may not be used for any other operation as long as the returned +// Writer is in use. +func (e *Encoder) Writer(w io.Writer) io.Writer { + return transform.NewWriter(w, e) +} + +// ASCIISub is the ASCII substitute character, as recommended by +// https://unicode.org/reports/tr36/#Text_Comparison +const ASCIISub = '\x1a' + +// Nop is the nop encoding. Its transformed bytes are the same as the source +// bytes; it does not replace invalid UTF-8 sequences. +var Nop Encoding = nop{} + +type nop struct{} + +func (nop) NewDecoder() *Decoder { + return &Decoder{Transformer: transform.Nop} +} +func (nop) NewEncoder() *Encoder { + return &Encoder{Transformer: transform.Nop} +} + +// Replacement is the replacement encoding. Decoding from the replacement +// encoding yields a single '\uFFFD' replacement rune. Encoding from UTF-8 to +// the replacement encoding yields the same as the source bytes except that +// invalid UTF-8 is converted to '\uFFFD'. +// +// It is defined at http://encoding.spec.whatwg.org/#replacement +var Replacement Encoding = replacement{} + +type replacement struct{} + +func (replacement) NewDecoder() *Decoder { + return &Decoder{Transformer: replacementDecoder{}} +} + +func (replacement) NewEncoder() *Encoder { + return &Encoder{Transformer: replacementEncoder{}} +} + +func (replacement) ID() (mib identifier.MIB, other string) { + return identifier.Replacement, "" +} + +type replacementDecoder struct{ transform.NopResetter } + +func (replacementDecoder) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) { + if len(dst) < 3 { + return 0, 0, transform.ErrShortDst + } + if atEOF { + const fffd = "\ufffd" + dst[0] = fffd[0] + dst[1] = fffd[1] + dst[2] = fffd[2] + nDst = 3 + } + return nDst, len(src), nil +} + +type replacementEncoder struct{ transform.NopResetter } + +func (replacementEncoder) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) { + r, size := rune(0), 0 + + for ; nSrc < len(src); nSrc += size { + r = rune(src[nSrc]) + + // Decode a 1-byte rune. + if r < utf8.RuneSelf { + size = 1 + + } else { + // Decode a multi-byte rune. + r, size = utf8.DecodeRune(src[nSrc:]) + if size == 1 { + // All valid runes of size 1 (those below utf8.RuneSelf) were + // handled above. We have invalid UTF-8 or we haven't seen the + // full character yet. + if !atEOF && !utf8.FullRune(src[nSrc:]) { + err = transform.ErrShortSrc + break + } + r = '\ufffd' + } + } + + if nDst+utf8.RuneLen(r) > len(dst) { + err = transform.ErrShortDst + break + } + nDst += utf8.EncodeRune(dst[nDst:], r) + } + return nDst, nSrc, err +} + +// HTMLEscapeUnsupported wraps encoders to replace source runes outside the +// repertoire of the destination encoding with HTML escape sequences. +// +// This wrapper exists to comply to URL and HTML forms requiring a +// non-terminating legacy encoder. The produced sequences may lead to data +// loss as they are indistinguishable from legitimate input. To avoid this +// issue, use UTF-8 encodings whenever possible. +func HTMLEscapeUnsupported(e *Encoder) *Encoder { + return &Encoder{Transformer: &errorHandler{e, errorToHTML}} +} + +// ReplaceUnsupported wraps encoders to replace source runes outside the +// repertoire of the destination encoding with an encoding-specific +// replacement. +// +// This wrapper is only provided for backwards compatibility and legacy +// handling. Its use is strongly discouraged. Use UTF-8 whenever possible. +func ReplaceUnsupported(e *Encoder) *Encoder { + return &Encoder{Transformer: &errorHandler{e, errorToReplacement}} +} + +type errorHandler struct { + *Encoder + handler func(dst []byte, r rune, err repertoireError) (n int, ok bool) +} + +// TODO: consider making this error public in some form. +type repertoireError interface { + Replacement() byte +} + +func (h errorHandler) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) { + nDst, nSrc, err = h.Transformer.Transform(dst, src, atEOF) + for err != nil { + rerr, ok := err.(repertoireError) + if !ok { + return nDst, nSrc, err + } + r, sz := utf8.DecodeRune(src[nSrc:]) + n, ok := h.handler(dst[nDst:], r, rerr) + if !ok { + return nDst, nSrc, transform.ErrShortDst + } + err = nil + nDst += n + if nSrc += sz; nSrc < len(src) { + var dn, sn int + dn, sn, err = h.Transformer.Transform(dst[nDst:], src[nSrc:], atEOF) + nDst += dn + nSrc += sn + } + } + return nDst, nSrc, err +} + +func errorToHTML(dst []byte, r rune, err repertoireError) (n int, ok bool) { + buf := [8]byte{} + b := strconv.AppendUint(buf[:0], uint64(r), 10) + if n = len(b) + len("&#;"); n >= len(dst) { + return 0, false + } + dst[0] = '&' + dst[1] = '#' + dst[copy(dst[2:], b)+2] = ';' + return n, true +} + +func errorToReplacement(dst []byte, r rune, err repertoireError) (n int, ok bool) { + if len(dst) == 0 { + return 0, false + } + dst[0] = err.Replacement() + return 1, true +} + +// ErrInvalidUTF8 means that a transformer encountered invalid UTF-8. +var ErrInvalidUTF8 = errors.New("encoding: invalid UTF-8") + +// UTF8Validator is a transformer that returns ErrInvalidUTF8 on the first +// input byte that is not valid UTF-8. +var UTF8Validator transform.Transformer = utf8Validator{} + +type utf8Validator struct{ transform.NopResetter } + +func (utf8Validator) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) { + n := len(src) + if n > len(dst) { + n = len(dst) + } + for i := 0; i < n; { + if c := src[i]; c < utf8.RuneSelf { + dst[i] = c + i++ + continue + } + _, size := utf8.DecodeRune(src[i:]) + if size == 1 { + // All valid runes of size 1 (those below utf8.RuneSelf) were + // handled above. We have invalid UTF-8 or we haven't seen the + // full character yet. + err = ErrInvalidUTF8 + if !atEOF && !utf8.FullRune(src[i:]) { + err = transform.ErrShortSrc + } + return i, i, err + } + if i+size > len(dst) { + return i, i, transform.ErrShortDst + } + for ; size > 0; size-- { + dst[i] = src[i] + i++ + } + } + if len(src) > len(dst) { + err = transform.ErrShortDst + } + return n, n, err +} diff --git a/vendor/golang.org/x/text/encoding/internal/identifier/identifier.go b/vendor/golang.org/x/text/encoding/internal/identifier/identifier.go new file mode 100644 index 0000000000..5c9b85c280 --- /dev/null +++ b/vendor/golang.org/x/text/encoding/internal/identifier/identifier.go @@ -0,0 +1,81 @@ +// Copyright 2015 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:generate go run gen.go + +// Package identifier defines the contract between implementations of Encoding +// and Index by defining identifiers that uniquely identify standardized coded +// character sets (CCS) and character encoding schemes (CES), which we will +// together refer to as encodings, for which Encoding implementations provide +// converters to and from UTF-8. This package is typically only of concern to +// implementers of Indexes and Encodings. +// +// One part of the identifier is the MIB code, which is defined by IANA and +// uniquely identifies a CCS or CES. Each code is associated with data that +// references authorities, official documentation as well as aliases and MIME +// names. +// +// Not all CESs are covered by the IANA registry. The "other" string that is +// returned by ID can be used to identify other character sets or versions of +// existing ones. +// +// It is recommended that each package that provides a set of Encodings provide +// the All and Common variables to reference all supported encodings and +// commonly used subset. This allows Index implementations to include all +// available encodings without explicitly referencing or knowing about them. +package identifier + +// Note: this package is internal, but could be made public if there is a need +// for writing third-party Indexes and Encodings. + +// References: +// - http://source.icu-project.org/repos/icu/icu/trunk/source/data/mappings/convrtrs.txt +// - http://www.iana.org/assignments/character-sets/character-sets.xhtml +// - http://www.iana.org/assignments/ianacharset-mib/ianacharset-mib +// - http://www.ietf.org/rfc/rfc2978.txt +// - https://www.unicode.org/reports/tr22/ +// - http://www.w3.org/TR/encoding/ +// - https://encoding.spec.whatwg.org/ +// - https://encoding.spec.whatwg.org/encodings.json +// - https://tools.ietf.org/html/rfc6657#section-5 + +// Interface can be implemented by Encodings to define the CCS or CES for which +// it implements conversions. +type Interface interface { + // ID returns an encoding identifier. Exactly one of the mib and other + // values should be non-zero. + // + // In the usual case it is only necessary to indicate the MIB code. The + // other string can be used to specify encodings for which there is no MIB, + // such as "x-mac-dingbat". + // + // The other string may only contain the characters a-z, A-Z, 0-9, - and _. + ID() (mib MIB, other string) + + // NOTE: the restrictions on the encoding are to allow extending the syntax + // with additional information such as versions, vendors and other variants. +} + +// A MIB identifies an encoding. It is derived from the IANA MIB codes and adds +// some identifiers for some encodings that are not covered by the IANA +// standard. +// +// See http://www.iana.org/assignments/ianacharset-mib. +type MIB uint16 + +// These additional MIB types are not defined in IANA. They are added because +// they are common and defined within the text repo. +const ( + // Unofficial marks the start of encodings not registered by IANA. + Unofficial MIB = 10000 + iota + + // Replacement is the WhatWG replacement encoding. + Replacement + + // XUserDefined is the code for x-user-defined. + XUserDefined + + // MacintoshCyrillic is the code for x-mac-cyrillic. + MacintoshCyrillic +) diff --git a/vendor/golang.org/x/text/encoding/internal/identifier/mib.go b/vendor/golang.org/x/text/encoding/internal/identifier/mib.go new file mode 100644 index 0000000000..351fb86e29 --- /dev/null +++ b/vendor/golang.org/x/text/encoding/internal/identifier/mib.go @@ -0,0 +1,1627 @@ +// Code generated by running "go generate" in golang.org/x/text. DO NOT EDIT. + +package identifier + +const ( + // ASCII is the MIB identifier with IANA name US-ASCII (MIME: US-ASCII). + // + // ANSI X3.4-1986 + // Reference: RFC2046 + ASCII MIB = 3 + + // ISOLatin1 is the MIB identifier with IANA name ISO_8859-1:1987 (MIME: ISO-8859-1). + // + // ISO-IR: International Register of Escape Sequences + // Note: The current registration authority is IPSJ/ITSCJ, Japan. + // Reference: RFC1345 + ISOLatin1 MIB = 4 + + // ISOLatin2 is the MIB identifier with IANA name ISO_8859-2:1987 (MIME: ISO-8859-2). + // + // ISO-IR: International Register of Escape Sequences + // Note: The current registration authority is IPSJ/ITSCJ, Japan. + // Reference: RFC1345 + ISOLatin2 MIB = 5 + + // ISOLatin3 is the MIB identifier with IANA name ISO_8859-3:1988 (MIME: ISO-8859-3). + // + // ISO-IR: International Register of Escape Sequences + // Note: The current registration authority is IPSJ/ITSCJ, Japan. + // Reference: RFC1345 + ISOLatin3 MIB = 6 + + // ISOLatin4 is the MIB identifier with IANA name ISO_8859-4:1988 (MIME: ISO-8859-4). + // + // ISO-IR: International Register of Escape Sequences + // Note: The current registration authority is IPSJ/ITSCJ, Japan. + // Reference: RFC1345 + ISOLatin4 MIB = 7 + + // ISOLatinCyrillic is the MIB identifier with IANA name ISO_8859-5:1988 (MIME: ISO-8859-5). + // + // ISO-IR: International Register of Escape Sequences + // Note: The current registration authority is IPSJ/ITSCJ, Japan. + // Reference: RFC1345 + ISOLatinCyrillic MIB = 8 + + // ISOLatinArabic is the MIB identifier with IANA name ISO_8859-6:1987 (MIME: ISO-8859-6). + // + // ISO-IR: International Register of Escape Sequences + // Note: The current registration authority is IPSJ/ITSCJ, Japan. + // Reference: RFC1345 + ISOLatinArabic MIB = 9 + + // ISOLatinGreek is the MIB identifier with IANA name ISO_8859-7:1987 (MIME: ISO-8859-7). + // + // ISO-IR: International Register of Escape Sequences + // Note: The current registration authority is IPSJ/ITSCJ, Japan. + // Reference: RFC1947 + // Reference: RFC1345 + ISOLatinGreek MIB = 10 + + // ISOLatinHebrew is the MIB identifier with IANA name ISO_8859-8:1988 (MIME: ISO-8859-8). + // + // ISO-IR: International Register of Escape Sequences + // Note: The current registration authority is IPSJ/ITSCJ, Japan. + // Reference: RFC1345 + ISOLatinHebrew MIB = 11 + + // ISOLatin5 is the MIB identifier with IANA name ISO_8859-9:1989 (MIME: ISO-8859-9). + // + // ISO-IR: International Register of Escape Sequences + // Note: The current registration authority is IPSJ/ITSCJ, Japan. + // Reference: RFC1345 + ISOLatin5 MIB = 12 + + // ISOLatin6 is the MIB identifier with IANA name ISO-8859-10 (MIME: ISO-8859-10). + // + // ISO-IR: International Register of Escape Sequences + // Note: The current registration authority is IPSJ/ITSCJ, Japan. + // Reference: RFC1345 + ISOLatin6 MIB = 13 + + // ISOTextComm is the MIB identifier with IANA name ISO_6937-2-add. + // + // ISO-IR: International Register of Escape Sequences and ISO 6937-2:1983 + // Note: The current registration authority is IPSJ/ITSCJ, Japan. + // Reference: RFC1345 + ISOTextComm MIB = 14 + + // HalfWidthKatakana is the MIB identifier with IANA name JIS_X0201. + // + // JIS X 0201-1976. One byte only, this is equivalent to + // JIS/Roman (similar to ASCII) plus eight-bit half-width + // Katakana + // Reference: RFC1345 + HalfWidthKatakana MIB = 15 + + // JISEncoding is the MIB identifier with IANA name JIS_Encoding. + // + // JIS X 0202-1991. Uses ISO 2022 escape sequences to + // shift code sets as documented in JIS X 0202-1991. + JISEncoding MIB = 16 + + // ShiftJIS is the MIB identifier with IANA name Shift_JIS (MIME: Shift_JIS). + // + // This charset is an extension of csHalfWidthKatakana by + // adding graphic characters in JIS X 0208. The CCS's are + // JIS X0201:1997 and JIS X0208:1997. The + // complete definition is shown in Appendix 1 of JIS + // X0208:1997. + // This charset can be used for the top-level media type "text". + ShiftJIS MIB = 17 + + // EUCPkdFmtJapanese is the MIB identifier with IANA name Extended_UNIX_Code_Packed_Format_for_Japanese (MIME: EUC-JP). + // + // Standardized by OSF, UNIX International, and UNIX Systems + // Laboratories Pacific. Uses ISO 2022 rules to select + // code set 0: US-ASCII (a single 7-bit byte set) + // code set 1: JIS X0208-1990 (a double 8-bit byte set) + // restricted to A0-FF in both bytes + // code set 2: Half Width Katakana (a single 7-bit byte set) + // requiring SS2 as the character prefix + // code set 3: JIS X0212-1990 (a double 7-bit byte set) + // restricted to A0-FF in both bytes + // requiring SS3 as the character prefix + EUCPkdFmtJapanese MIB = 18 + + // EUCFixWidJapanese is the MIB identifier with IANA name Extended_UNIX_Code_Fixed_Width_for_Japanese. + // + // Used in Japan. Each character is 2 octets. + // code set 0: US-ASCII (a single 7-bit byte set) + // 1st byte = 00 + // 2nd byte = 20-7E + // code set 1: JIS X0208-1990 (a double 7-bit byte set) + // restricted to A0-FF in both bytes + // code set 2: Half Width Katakana (a single 7-bit byte set) + // 1st byte = 00 + // 2nd byte = A0-FF + // code set 3: JIS X0212-1990 (a double 7-bit byte set) + // restricted to A0-FF in + // the first byte + // and 21-7E in the second byte + EUCFixWidJapanese MIB = 19 + + // ISO4UnitedKingdom is the MIB identifier with IANA name BS_4730. + // + // ISO-IR: International Register of Escape Sequences + // Note: The current registration authority is IPSJ/ITSCJ, Japan. + // Reference: RFC1345 + ISO4UnitedKingdom MIB = 20 + + // ISO11SwedishForNames is the MIB identifier with IANA name SEN_850200_C. + // + // ISO-IR: International Register of Escape Sequences + // Note: The current registration authority is IPSJ/ITSCJ, Japan. + // Reference: RFC1345 + ISO11SwedishForNames MIB = 21 + + // ISO15Italian is the MIB identifier with IANA name IT. + // + // ISO-IR: International Register of Escape Sequences + // Note: The current registration authority is IPSJ/ITSCJ, Japan. + // Reference: RFC1345 + ISO15Italian MIB = 22 + + // ISO17Spanish is the MIB identifier with IANA name ES. + // + // ISO-IR: International Register of Escape Sequences + // Note: The current registration authority is IPSJ/ITSCJ, Japan. + // Reference: RFC1345 + ISO17Spanish MIB = 23 + + // ISO21German is the MIB identifier with IANA name DIN_66003. + // + // ISO-IR: International Register of Escape Sequences + // Note: The current registration authority is IPSJ/ITSCJ, Japan. + // Reference: RFC1345 + ISO21German MIB = 24 + + // ISO60Norwegian1 is the MIB identifier with IANA name NS_4551-1. + // + // ISO-IR: International Register of Escape Sequences + // Note: The current registration authority is IPSJ/ITSCJ, Japan. + // Reference: RFC1345 + ISO60Norwegian1 MIB = 25 + + // ISO69French is the MIB identifier with IANA name NF_Z_62-010. + // + // ISO-IR: International Register of Escape Sequences + // Note: The current registration authority is IPSJ/ITSCJ, Japan. + // Reference: RFC1345 + ISO69French MIB = 26 + + // ISO10646UTF1 is the MIB identifier with IANA name ISO-10646-UTF-1. + // + // Universal Transfer Format (1), this is the multibyte + // encoding, that subsets ASCII-7. It does not have byte + // ordering issues. + ISO10646UTF1 MIB = 27 + + // ISO646basic1983 is the MIB identifier with IANA name ISO_646.basic:1983. + // + // ISO-IR: International Register of Escape Sequences + // Note: The current registration authority is IPSJ/ITSCJ, Japan. + // Reference: RFC1345 + ISO646basic1983 MIB = 28 + + // INVARIANT is the MIB identifier with IANA name INVARIANT. + // + // Reference: RFC1345 + INVARIANT MIB = 29 + + // ISO2IntlRefVersion is the MIB identifier with IANA name ISO_646.irv:1983. + // + // ISO-IR: International Register of Escape Sequences + // Note: The current registration authority is IPSJ/ITSCJ, Japan. + // Reference: RFC1345 + ISO2IntlRefVersion MIB = 30 + + // NATSSEFI is the MIB identifier with IANA name NATS-SEFI. + // + // ISO-IR: International Register of Escape Sequences + // Note: The current registration authority is IPSJ/ITSCJ, Japan. + // Reference: RFC1345 + NATSSEFI MIB = 31 + + // NATSSEFIADD is the MIB identifier with IANA name NATS-SEFI-ADD. + // + // ISO-IR: International Register of Escape Sequences + // Note: The current registration authority is IPSJ/ITSCJ, Japan. + // Reference: RFC1345 + NATSSEFIADD MIB = 32 + + // NATSDANO is the MIB identifier with IANA name NATS-DANO. + // + // ISO-IR: International Register of Escape Sequences + // Note: The current registration authority is IPSJ/ITSCJ, Japan. + // Reference: RFC1345 + NATSDANO MIB = 33 + + // NATSDANOADD is the MIB identifier with IANA name NATS-DANO-ADD. + // + // ISO-IR: International Register of Escape Sequences + // Note: The current registration authority is IPSJ/ITSCJ, Japan. + // Reference: RFC1345 + NATSDANOADD MIB = 34 + + // ISO10Swedish is the MIB identifier with IANA name SEN_850200_B. + // + // ISO-IR: International Register of Escape Sequences + // Note: The current registration authority is IPSJ/ITSCJ, Japan. + // Reference: RFC1345 + ISO10Swedish MIB = 35 + + // KSC56011987 is the MIB identifier with IANA name KS_C_5601-1987. + // + // ISO-IR: International Register of Escape Sequences + // Note: The current registration authority is IPSJ/ITSCJ, Japan. + // Reference: RFC1345 + KSC56011987 MIB = 36 + + // ISO2022KR is the MIB identifier with IANA name ISO-2022-KR (MIME: ISO-2022-KR). + // + // rfc1557 (see also KS_C_5601-1987) + // Reference: RFC1557 + ISO2022KR MIB = 37 + + // EUCKR is the MIB identifier with IANA name EUC-KR (MIME: EUC-KR). + // + // rfc1557 (see also KS_C_5861-1992) + // Reference: RFC1557 + EUCKR MIB = 38 + + // ISO2022JP is the MIB identifier with IANA name ISO-2022-JP (MIME: ISO-2022-JP). + // + // rfc1468 (see also rfc2237 ) + // Reference: RFC1468 + ISO2022JP MIB = 39 + + // ISO2022JP2 is the MIB identifier with IANA name ISO-2022-JP-2 (MIME: ISO-2022-JP-2). + // + // rfc1554 + // Reference: RFC1554 + ISO2022JP2 MIB = 40 + + // ISO13JISC6220jp is the MIB identifier with IANA name JIS_C6220-1969-jp. + // + // ISO-IR: International Register of Escape Sequences + // Note: The current registration authority is IPSJ/ITSCJ, Japan. + // Reference: RFC1345 + ISO13JISC6220jp MIB = 41 + + // ISO14JISC6220ro is the MIB identifier with IANA name JIS_C6220-1969-ro. + // + // ISO-IR: International Register of Escape Sequences + // Note: The current registration authority is IPSJ/ITSCJ, Japan. + // Reference: RFC1345 + ISO14JISC6220ro MIB = 42 + + // ISO16Portuguese is the MIB identifier with IANA name PT. + // + // ISO-IR: International Register of Escape Sequences + // Note: The current registration authority is IPSJ/ITSCJ, Japan. + // Reference: RFC1345 + ISO16Portuguese MIB = 43 + + // ISO18Greek7Old is the MIB identifier with IANA name greek7-old. + // + // ISO-IR: International Register of Escape Sequences + // Note: The current registration authority is IPSJ/ITSCJ, Japan. + // Reference: RFC1345 + ISO18Greek7Old MIB = 44 + + // ISO19LatinGreek is the MIB identifier with IANA name latin-greek. + // + // ISO-IR: International Register of Escape Sequences + // Note: The current registration authority is IPSJ/ITSCJ, Japan. + // Reference: RFC1345 + ISO19LatinGreek MIB = 45 + + // ISO25French is the MIB identifier with IANA name NF_Z_62-010_(1973). + // + // ISO-IR: International Register of Escape Sequences + // Note: The current registration authority is IPSJ/ITSCJ, Japan. + // Reference: RFC1345 + ISO25French MIB = 46 + + // ISO27LatinGreek1 is the MIB identifier with IANA name Latin-greek-1. + // + // ISO-IR: International Register of Escape Sequences + // Note: The current registration authority is IPSJ/ITSCJ, Japan. + // Reference: RFC1345 + ISO27LatinGreek1 MIB = 47 + + // ISO5427Cyrillic is the MIB identifier with IANA name ISO_5427. + // + // ISO-IR: International Register of Escape Sequences + // Note: The current registration authority is IPSJ/ITSCJ, Japan. + // Reference: RFC1345 + ISO5427Cyrillic MIB = 48 + + // ISO42JISC62261978 is the MIB identifier with IANA name JIS_C6226-1978. + // + // ISO-IR: International Register of Escape Sequences + // Note: The current registration authority is IPSJ/ITSCJ, Japan. + // Reference: RFC1345 + ISO42JISC62261978 MIB = 49 + + // ISO47BSViewdata is the MIB identifier with IANA name BS_viewdata. + // + // ISO-IR: International Register of Escape Sequences + // Note: The current registration authority is IPSJ/ITSCJ, Japan. + // Reference: RFC1345 + ISO47BSViewdata MIB = 50 + + // ISO49INIS is the MIB identifier with IANA name INIS. + // + // ISO-IR: International Register of Escape Sequences + // Note: The current registration authority is IPSJ/ITSCJ, Japan. + // Reference: RFC1345 + ISO49INIS MIB = 51 + + // ISO50INIS8 is the MIB identifier with IANA name INIS-8. + // + // ISO-IR: International Register of Escape Sequences + // Note: The current registration authority is IPSJ/ITSCJ, Japan. + // Reference: RFC1345 + ISO50INIS8 MIB = 52 + + // ISO51INISCyrillic is the MIB identifier with IANA name INIS-cyrillic. + // + // ISO-IR: International Register of Escape Sequences + // Note: The current registration authority is IPSJ/ITSCJ, Japan. + // Reference: RFC1345 + ISO51INISCyrillic MIB = 53 + + // ISO54271981 is the MIB identifier with IANA name ISO_5427:1981. + // + // ISO-IR: International Register of Escape Sequences + // Note: The current registration authority is IPSJ/ITSCJ, Japan. + // Reference: RFC1345 + ISO54271981 MIB = 54 + + // ISO5428Greek is the MIB identifier with IANA name ISO_5428:1980. + // + // ISO-IR: International Register of Escape Sequences + // Note: The current registration authority is IPSJ/ITSCJ, Japan. + // Reference: RFC1345 + ISO5428Greek MIB = 55 + + // ISO57GB1988 is the MIB identifier with IANA name GB_1988-80. + // + // ISO-IR: International Register of Escape Sequences + // Note: The current registration authority is IPSJ/ITSCJ, Japan. + // Reference: RFC1345 + ISO57GB1988 MIB = 56 + + // ISO58GB231280 is the MIB identifier with IANA name GB_2312-80. + // + // ISO-IR: International Register of Escape Sequences + // Note: The current registration authority is IPSJ/ITSCJ, Japan. + // Reference: RFC1345 + ISO58GB231280 MIB = 57 + + // ISO61Norwegian2 is the MIB identifier with IANA name NS_4551-2. + // + // ISO-IR: International Register of Escape Sequences + // Note: The current registration authority is IPSJ/ITSCJ, Japan. + // Reference: RFC1345 + ISO61Norwegian2 MIB = 58 + + // ISO70VideotexSupp1 is the MIB identifier with IANA name videotex-suppl. + // + // ISO-IR: International Register of Escape Sequences + // Note: The current registration authority is IPSJ/ITSCJ, Japan. + // Reference: RFC1345 + ISO70VideotexSupp1 MIB = 59 + + // ISO84Portuguese2 is the MIB identifier with IANA name PT2. + // + // ISO-IR: International Register of Escape Sequences + // Note: The current registration authority is IPSJ/ITSCJ, Japan. + // Reference: RFC1345 + ISO84Portuguese2 MIB = 60 + + // ISO85Spanish2 is the MIB identifier with IANA name ES2. + // + // ISO-IR: International Register of Escape Sequences + // Note: The current registration authority is IPSJ/ITSCJ, Japan. + // Reference: RFC1345 + ISO85Spanish2 MIB = 61 + + // ISO86Hungarian is the MIB identifier with IANA name MSZ_7795.3. + // + // ISO-IR: International Register of Escape Sequences + // Note: The current registration authority is IPSJ/ITSCJ, Japan. + // Reference: RFC1345 + ISO86Hungarian MIB = 62 + + // ISO87JISX0208 is the MIB identifier with IANA name JIS_C6226-1983. + // + // ISO-IR: International Register of Escape Sequences + // Note: The current registration authority is IPSJ/ITSCJ, Japan. + // Reference: RFC1345 + ISO87JISX0208 MIB = 63 + + // ISO88Greek7 is the MIB identifier with IANA name greek7. + // + // ISO-IR: International Register of Escape Sequences + // Note: The current registration authority is IPSJ/ITSCJ, Japan. + // Reference: RFC1345 + ISO88Greek7 MIB = 64 + + // ISO89ASMO449 is the MIB identifier with IANA name ASMO_449. + // + // ISO-IR: International Register of Escape Sequences + // Note: The current registration authority is IPSJ/ITSCJ, Japan. + // Reference: RFC1345 + ISO89ASMO449 MIB = 65 + + // ISO90 is the MIB identifier with IANA name iso-ir-90. + // + // ISO-IR: International Register of Escape Sequences + // Note: The current registration authority is IPSJ/ITSCJ, Japan. + // Reference: RFC1345 + ISO90 MIB = 66 + + // ISO91JISC62291984a is the MIB identifier with IANA name JIS_C6229-1984-a. + // + // ISO-IR: International Register of Escape Sequences + // Note: The current registration authority is IPSJ/ITSCJ, Japan. + // Reference: RFC1345 + ISO91JISC62291984a MIB = 67 + + // ISO92JISC62991984b is the MIB identifier with IANA name JIS_C6229-1984-b. + // + // ISO-IR: International Register of Escape Sequences + // Note: The current registration authority is IPSJ/ITSCJ, Japan. + // Reference: RFC1345 + ISO92JISC62991984b MIB = 68 + + // ISO93JIS62291984badd is the MIB identifier with IANA name JIS_C6229-1984-b-add. + // + // ISO-IR: International Register of Escape Sequences + // Note: The current registration authority is IPSJ/ITSCJ, Japan. + // Reference: RFC1345 + ISO93JIS62291984badd MIB = 69 + + // ISO94JIS62291984hand is the MIB identifier with IANA name JIS_C6229-1984-hand. + // + // ISO-IR: International Register of Escape Sequences + // Note: The current registration authority is IPSJ/ITSCJ, Japan. + // Reference: RFC1345 + ISO94JIS62291984hand MIB = 70 + + // ISO95JIS62291984handadd is the MIB identifier with IANA name JIS_C6229-1984-hand-add. + // + // ISO-IR: International Register of Escape Sequences + // Note: The current registration authority is IPSJ/ITSCJ, Japan. + // Reference: RFC1345 + ISO95JIS62291984handadd MIB = 71 + + // ISO96JISC62291984kana is the MIB identifier with IANA name JIS_C6229-1984-kana. + // + // ISO-IR: International Register of Escape Sequences + // Note: The current registration authority is IPSJ/ITSCJ, Japan. + // Reference: RFC1345 + ISO96JISC62291984kana MIB = 72 + + // ISO2033 is the MIB identifier with IANA name ISO_2033-1983. + // + // ISO-IR: International Register of Escape Sequences + // Note: The current registration authority is IPSJ/ITSCJ, Japan. + // Reference: RFC1345 + ISO2033 MIB = 73 + + // ISO99NAPLPS is the MIB identifier with IANA name ANSI_X3.110-1983. + // + // ISO-IR: International Register of Escape Sequences + // Note: The current registration authority is IPSJ/ITSCJ, Japan. + // Reference: RFC1345 + ISO99NAPLPS MIB = 74 + + // ISO102T617bit is the MIB identifier with IANA name T.61-7bit. + // + // ISO-IR: International Register of Escape Sequences + // Note: The current registration authority is IPSJ/ITSCJ, Japan. + // Reference: RFC1345 + ISO102T617bit MIB = 75 + + // ISO103T618bit is the MIB identifier with IANA name T.61-8bit. + // + // ISO-IR: International Register of Escape Sequences + // Note: The current registration authority is IPSJ/ITSCJ, Japan. + // Reference: RFC1345 + ISO103T618bit MIB = 76 + + // ISO111ECMACyrillic is the MIB identifier with IANA name ECMA-cyrillic. + // + // ISO registry + ISO111ECMACyrillic MIB = 77 + + // ISO121Canadian1 is the MIB identifier with IANA name CSA_Z243.4-1985-1. + // + // ISO-IR: International Register of Escape Sequences + // Note: The current registration authority is IPSJ/ITSCJ, Japan. + // Reference: RFC1345 + ISO121Canadian1 MIB = 78 + + // ISO122Canadian2 is the MIB identifier with IANA name CSA_Z243.4-1985-2. + // + // ISO-IR: International Register of Escape Sequences + // Note: The current registration authority is IPSJ/ITSCJ, Japan. + // Reference: RFC1345 + ISO122Canadian2 MIB = 79 + + // ISO123CSAZ24341985gr is the MIB identifier with IANA name CSA_Z243.4-1985-gr. + // + // ISO-IR: International Register of Escape Sequences + // Note: The current registration authority is IPSJ/ITSCJ, Japan. + // Reference: RFC1345 + ISO123CSAZ24341985gr MIB = 80 + + // ISO88596E is the MIB identifier with IANA name ISO_8859-6-E (MIME: ISO-8859-6-E). + // + // rfc1556 + // Reference: RFC1556 + ISO88596E MIB = 81 + + // ISO88596I is the MIB identifier with IANA name ISO_8859-6-I (MIME: ISO-8859-6-I). + // + // rfc1556 + // Reference: RFC1556 + ISO88596I MIB = 82 + + // ISO128T101G2 is the MIB identifier with IANA name T.101-G2. + // + // ISO-IR: International Register of Escape Sequences + // Note: The current registration authority is IPSJ/ITSCJ, Japan. + // Reference: RFC1345 + ISO128T101G2 MIB = 83 + + // ISO88598E is the MIB identifier with IANA name ISO_8859-8-E (MIME: ISO-8859-8-E). + // + // rfc1556 + // Reference: RFC1556 + ISO88598E MIB = 84 + + // ISO88598I is the MIB identifier with IANA name ISO_8859-8-I (MIME: ISO-8859-8-I). + // + // rfc1556 + // Reference: RFC1556 + ISO88598I MIB = 85 + + // ISO139CSN369103 is the MIB identifier with IANA name CSN_369103. + // + // ISO-IR: International Register of Escape Sequences + // Note: The current registration authority is IPSJ/ITSCJ, Japan. + // Reference: RFC1345 + ISO139CSN369103 MIB = 86 + + // ISO141JUSIB1002 is the MIB identifier with IANA name JUS_I.B1.002. + // + // ISO-IR: International Register of Escape Sequences + // Note: The current registration authority is IPSJ/ITSCJ, Japan. + // Reference: RFC1345 + ISO141JUSIB1002 MIB = 87 + + // ISO143IECP271 is the MIB identifier with IANA name IEC_P27-1. + // + // ISO-IR: International Register of Escape Sequences + // Note: The current registration authority is IPSJ/ITSCJ, Japan. + // Reference: RFC1345 + ISO143IECP271 MIB = 88 + + // ISO146Serbian is the MIB identifier with IANA name JUS_I.B1.003-serb. + // + // ISO-IR: International Register of Escape Sequences + // Note: The current registration authority is IPSJ/ITSCJ, Japan. + // Reference: RFC1345 + ISO146Serbian MIB = 89 + + // ISO147Macedonian is the MIB identifier with IANA name JUS_I.B1.003-mac. + // + // ISO-IR: International Register of Escape Sequences + // Note: The current registration authority is IPSJ/ITSCJ, Japan. + // Reference: RFC1345 + ISO147Macedonian MIB = 90 + + // ISO150GreekCCITT is the MIB identifier with IANA name greek-ccitt. + // + // ISO-IR: International Register of Escape Sequences + // Note: The current registration authority is IPSJ/ITSCJ, Japan. + // Reference: RFC1345 + ISO150GreekCCITT MIB = 91 + + // ISO151Cuba is the MIB identifier with IANA name NC_NC00-10:81. + // + // ISO-IR: International Register of Escape Sequences + // Note: The current registration authority is IPSJ/ITSCJ, Japan. + // Reference: RFC1345 + ISO151Cuba MIB = 92 + + // ISO6937Add is the MIB identifier with IANA name ISO_6937-2-25. + // + // ISO-IR: International Register of Escape Sequences + // Note: The current registration authority is IPSJ/ITSCJ, Japan. + // Reference: RFC1345 + ISO6937Add MIB = 93 + + // ISO153GOST1976874 is the MIB identifier with IANA name GOST_19768-74. + // + // ISO-IR: International Register of Escape Sequences + // Note: The current registration authority is IPSJ/ITSCJ, Japan. + // Reference: RFC1345 + ISO153GOST1976874 MIB = 94 + + // ISO8859Supp is the MIB identifier with IANA name ISO_8859-supp. + // + // ISO-IR: International Register of Escape Sequences + // Note: The current registration authority is IPSJ/ITSCJ, Japan. + // Reference: RFC1345 + ISO8859Supp MIB = 95 + + // ISO10367Box is the MIB identifier with IANA name ISO_10367-box. + // + // ISO-IR: International Register of Escape Sequences + // Note: The current registration authority is IPSJ/ITSCJ, Japan. + // Reference: RFC1345 + ISO10367Box MIB = 96 + + // ISO158Lap is the MIB identifier with IANA name latin-lap. + // + // ISO-IR: International Register of Escape Sequences + // Note: The current registration authority is IPSJ/ITSCJ, Japan. + // Reference: RFC1345 + ISO158Lap MIB = 97 + + // ISO159JISX02121990 is the MIB identifier with IANA name JIS_X0212-1990. + // + // ISO-IR: International Register of Escape Sequences + // Note: The current registration authority is IPSJ/ITSCJ, Japan. + // Reference: RFC1345 + ISO159JISX02121990 MIB = 98 + + // ISO646Danish is the MIB identifier with IANA name DS_2089. + // + // Danish Standard, DS 2089, February 1974 + // Reference: RFC1345 + ISO646Danish MIB = 99 + + // USDK is the MIB identifier with IANA name us-dk. + // + // Reference: RFC1345 + USDK MIB = 100 + + // DKUS is the MIB identifier with IANA name dk-us. + // + // Reference: RFC1345 + DKUS MIB = 101 + + // KSC5636 is the MIB identifier with IANA name KSC5636. + // + // Reference: RFC1345 + KSC5636 MIB = 102 + + // Unicode11UTF7 is the MIB identifier with IANA name UNICODE-1-1-UTF-7. + // + // rfc1642 + // Reference: RFC1642 + Unicode11UTF7 MIB = 103 + + // ISO2022CN is the MIB identifier with IANA name ISO-2022-CN. + // + // rfc1922 + // Reference: RFC1922 + ISO2022CN MIB = 104 + + // ISO2022CNEXT is the MIB identifier with IANA name ISO-2022-CN-EXT. + // + // rfc1922 + // Reference: RFC1922 + ISO2022CNEXT MIB = 105 + + // UTF8 is the MIB identifier with IANA name UTF-8. + // + // rfc3629 + // Reference: RFC3629 + UTF8 MIB = 106 + + // ISO885913 is the MIB identifier with IANA name ISO-8859-13. + // + // ISO See https://www.iana.org/assignments/charset-reg/ISO-8859-13 https://www.iana.org/assignments/charset-reg/ISO-8859-13 + ISO885913 MIB = 109 + + // ISO885914 is the MIB identifier with IANA name ISO-8859-14. + // + // ISO See https://www.iana.org/assignments/charset-reg/ISO-8859-14 + ISO885914 MIB = 110 + + // ISO885915 is the MIB identifier with IANA name ISO-8859-15. + // + // ISO + // Please see: https://www.iana.org/assignments/charset-reg/ISO-8859-15 + ISO885915 MIB = 111 + + // ISO885916 is the MIB identifier with IANA name ISO-8859-16. + // + // ISO + ISO885916 MIB = 112 + + // GBK is the MIB identifier with IANA name GBK. + // + // Chinese IT Standardization Technical Committee + // Please see: https://www.iana.org/assignments/charset-reg/GBK + GBK MIB = 113 + + // GB18030 is the MIB identifier with IANA name GB18030. + // + // Chinese IT Standardization Technical Committee + // Please see: https://www.iana.org/assignments/charset-reg/GB18030 + GB18030 MIB = 114 + + // OSDEBCDICDF0415 is the MIB identifier with IANA name OSD_EBCDIC_DF04_15. + // + // Fujitsu-Siemens standard mainframe EBCDIC encoding + // Please see: https://www.iana.org/assignments/charset-reg/OSD-EBCDIC-DF04-15 + OSDEBCDICDF0415 MIB = 115 + + // OSDEBCDICDF03IRV is the MIB identifier with IANA name OSD_EBCDIC_DF03_IRV. + // + // Fujitsu-Siemens standard mainframe EBCDIC encoding + // Please see: https://www.iana.org/assignments/charset-reg/OSD-EBCDIC-DF03-IRV + OSDEBCDICDF03IRV MIB = 116 + + // OSDEBCDICDF041 is the MIB identifier with IANA name OSD_EBCDIC_DF04_1. + // + // Fujitsu-Siemens standard mainframe EBCDIC encoding + // Please see: https://www.iana.org/assignments/charset-reg/OSD-EBCDIC-DF04-1 + OSDEBCDICDF041 MIB = 117 + + // ISO115481 is the MIB identifier with IANA name ISO-11548-1. + // + // See https://www.iana.org/assignments/charset-reg/ISO-11548-1 + ISO115481 MIB = 118 + + // KZ1048 is the MIB identifier with IANA name KZ-1048. + // + // See https://www.iana.org/assignments/charset-reg/KZ-1048 + KZ1048 MIB = 119 + + // Unicode is the MIB identifier with IANA name ISO-10646-UCS-2. + // + // the 2-octet Basic Multilingual Plane, aka Unicode + // this needs to specify network byte order: the standard + // does not specify (it is a 16-bit integer space) + Unicode MIB = 1000 + + // UCS4 is the MIB identifier with IANA name ISO-10646-UCS-4. + // + // the full code space. (same comment about byte order, + // these are 31-bit numbers. + UCS4 MIB = 1001 + + // UnicodeASCII is the MIB identifier with IANA name ISO-10646-UCS-Basic. + // + // ASCII subset of Unicode. Basic Latin = collection 1 + // See ISO 10646, Appendix A + UnicodeASCII MIB = 1002 + + // UnicodeLatin1 is the MIB identifier with IANA name ISO-10646-Unicode-Latin1. + // + // ISO Latin-1 subset of Unicode. Basic Latin and Latin-1 + // Supplement = collections 1 and 2. See ISO 10646, + // Appendix A. See rfc1815 . + UnicodeLatin1 MIB = 1003 + + // UnicodeJapanese is the MIB identifier with IANA name ISO-10646-J-1. + // + // ISO 10646 Japanese, see rfc1815 . + UnicodeJapanese MIB = 1004 + + // UnicodeIBM1261 is the MIB identifier with IANA name ISO-Unicode-IBM-1261. + // + // IBM Latin-2, -3, -5, Extended Presentation Set, GCSGID: 1261 + UnicodeIBM1261 MIB = 1005 + + // UnicodeIBM1268 is the MIB identifier with IANA name ISO-Unicode-IBM-1268. + // + // IBM Latin-4 Extended Presentation Set, GCSGID: 1268 + UnicodeIBM1268 MIB = 1006 + + // UnicodeIBM1276 is the MIB identifier with IANA name ISO-Unicode-IBM-1276. + // + // IBM Cyrillic Greek Extended Presentation Set, GCSGID: 1276 + UnicodeIBM1276 MIB = 1007 + + // UnicodeIBM1264 is the MIB identifier with IANA name ISO-Unicode-IBM-1264. + // + // IBM Arabic Presentation Set, GCSGID: 1264 + UnicodeIBM1264 MIB = 1008 + + // UnicodeIBM1265 is the MIB identifier with IANA name ISO-Unicode-IBM-1265. + // + // IBM Hebrew Presentation Set, GCSGID: 1265 + UnicodeIBM1265 MIB = 1009 + + // Unicode11 is the MIB identifier with IANA name UNICODE-1-1. + // + // rfc1641 + // Reference: RFC1641 + Unicode11 MIB = 1010 + + // SCSU is the MIB identifier with IANA name SCSU. + // + // SCSU See https://www.iana.org/assignments/charset-reg/SCSU + SCSU MIB = 1011 + + // UTF7 is the MIB identifier with IANA name UTF-7. + // + // rfc2152 + // Reference: RFC2152 + UTF7 MIB = 1012 + + // UTF16BE is the MIB identifier with IANA name UTF-16BE. + // + // rfc2781 + // Reference: RFC2781 + UTF16BE MIB = 1013 + + // UTF16LE is the MIB identifier with IANA name UTF-16LE. + // + // rfc2781 + // Reference: RFC2781 + UTF16LE MIB = 1014 + + // UTF16 is the MIB identifier with IANA name UTF-16. + // + // rfc2781 + // Reference: RFC2781 + UTF16 MIB = 1015 + + // CESU8 is the MIB identifier with IANA name CESU-8. + // + // https://www.unicode.org/reports/tr26 + CESU8 MIB = 1016 + + // UTF32 is the MIB identifier with IANA name UTF-32. + // + // https://www.unicode.org/reports/tr19/ + UTF32 MIB = 1017 + + // UTF32BE is the MIB identifier with IANA name UTF-32BE. + // + // https://www.unicode.org/reports/tr19/ + UTF32BE MIB = 1018 + + // UTF32LE is the MIB identifier with IANA name UTF-32LE. + // + // https://www.unicode.org/reports/tr19/ + UTF32LE MIB = 1019 + + // BOCU1 is the MIB identifier with IANA name BOCU-1. + // + // https://www.unicode.org/notes/tn6/ + BOCU1 MIB = 1020 + + // UTF7IMAP is the MIB identifier with IANA name UTF-7-IMAP. + // + // Note: This charset is used to encode Unicode in IMAP mailbox names; + // see section 5.1.3 of rfc3501 . It should never be used + // outside this context. A name has been assigned so that charset processing + // implementations can refer to it in a consistent way. + UTF7IMAP MIB = 1021 + + // Windows30Latin1 is the MIB identifier with IANA name ISO-8859-1-Windows-3.0-Latin-1. + // + // Extended ISO 8859-1 Latin-1 for Windows 3.0. + // PCL Symbol Set id: 9U + Windows30Latin1 MIB = 2000 + + // Windows31Latin1 is the MIB identifier with IANA name ISO-8859-1-Windows-3.1-Latin-1. + // + // Extended ISO 8859-1 Latin-1 for Windows 3.1. + // PCL Symbol Set id: 19U + Windows31Latin1 MIB = 2001 + + // Windows31Latin2 is the MIB identifier with IANA name ISO-8859-2-Windows-Latin-2. + // + // Extended ISO 8859-2. Latin-2 for Windows 3.1. + // PCL Symbol Set id: 9E + Windows31Latin2 MIB = 2002 + + // Windows31Latin5 is the MIB identifier with IANA name ISO-8859-9-Windows-Latin-5. + // + // Extended ISO 8859-9. Latin-5 for Windows 3.1 + // PCL Symbol Set id: 5T + Windows31Latin5 MIB = 2003 + + // HPRoman8 is the MIB identifier with IANA name hp-roman8. + // + // LaserJet IIP Printer User's Manual, + // HP part no 33471-90901, Hewlet-Packard, June 1989. + // Reference: RFC1345 + HPRoman8 MIB = 2004 + + // AdobeStandardEncoding is the MIB identifier with IANA name Adobe-Standard-Encoding. + // + // PostScript Language Reference Manual + // PCL Symbol Set id: 10J + AdobeStandardEncoding MIB = 2005 + + // VenturaUS is the MIB identifier with IANA name Ventura-US. + // + // Ventura US. ASCII plus characters typically used in + // publishing, like pilcrow, copyright, registered, trade mark, + // section, dagger, and double dagger in the range A0 (hex) + // to FF (hex). + // PCL Symbol Set id: 14J + VenturaUS MIB = 2006 + + // VenturaInternational is the MIB identifier with IANA name Ventura-International. + // + // Ventura International. ASCII plus coded characters similar + // to Roman8. + // PCL Symbol Set id: 13J + VenturaInternational MIB = 2007 + + // DECMCS is the MIB identifier with IANA name DEC-MCS. + // + // VAX/VMS User's Manual, + // Order Number: AI-Y517A-TE, April 1986. + // Reference: RFC1345 + DECMCS MIB = 2008 + + // PC850Multilingual is the MIB identifier with IANA name IBM850. + // + // IBM NLS RM Vol2 SE09-8002-01, March 1990 + // Reference: RFC1345 + PC850Multilingual MIB = 2009 + + // PC8DanishNorwegian is the MIB identifier with IANA name PC8-Danish-Norwegian. + // + // PC Danish Norwegian + // 8-bit PC set for Danish Norwegian + // PCL Symbol Set id: 11U + PC8DanishNorwegian MIB = 2012 + + // PC862LatinHebrew is the MIB identifier with IANA name IBM862. + // + // IBM NLS RM Vol2 SE09-8002-01, March 1990 + // Reference: RFC1345 + PC862LatinHebrew MIB = 2013 + + // PC8Turkish is the MIB identifier with IANA name PC8-Turkish. + // + // PC Latin Turkish. PCL Symbol Set id: 9T + PC8Turkish MIB = 2014 + + // IBMSymbols is the MIB identifier with IANA name IBM-Symbols. + // + // Presentation Set, CPGID: 259 + IBMSymbols MIB = 2015 + + // IBMThai is the MIB identifier with IANA name IBM-Thai. + // + // Presentation Set, CPGID: 838 + IBMThai MIB = 2016 + + // HPLegal is the MIB identifier with IANA name HP-Legal. + // + // PCL 5 Comparison Guide, Hewlett-Packard, + // HP part number 5961-0510, October 1992 + // PCL Symbol Set id: 1U + HPLegal MIB = 2017 + + // HPPiFont is the MIB identifier with IANA name HP-Pi-font. + // + // PCL 5 Comparison Guide, Hewlett-Packard, + // HP part number 5961-0510, October 1992 + // PCL Symbol Set id: 15U + HPPiFont MIB = 2018 + + // HPMath8 is the MIB identifier with IANA name HP-Math8. + // + // PCL 5 Comparison Guide, Hewlett-Packard, + // HP part number 5961-0510, October 1992 + // PCL Symbol Set id: 8M + HPMath8 MIB = 2019 + + // HPPSMath is the MIB identifier with IANA name Adobe-Symbol-Encoding. + // + // PostScript Language Reference Manual + // PCL Symbol Set id: 5M + HPPSMath MIB = 2020 + + // HPDesktop is the MIB identifier with IANA name HP-DeskTop. + // + // PCL 5 Comparison Guide, Hewlett-Packard, + // HP part number 5961-0510, October 1992 + // PCL Symbol Set id: 7J + HPDesktop MIB = 2021 + + // VenturaMath is the MIB identifier with IANA name Ventura-Math. + // + // PCL 5 Comparison Guide, Hewlett-Packard, + // HP part number 5961-0510, October 1992 + // PCL Symbol Set id: 6M + VenturaMath MIB = 2022 + + // MicrosoftPublishing is the MIB identifier with IANA name Microsoft-Publishing. + // + // PCL 5 Comparison Guide, Hewlett-Packard, + // HP part number 5961-0510, October 1992 + // PCL Symbol Set id: 6J + MicrosoftPublishing MIB = 2023 + + // Windows31J is the MIB identifier with IANA name Windows-31J. + // + // Windows Japanese. A further extension of Shift_JIS + // to include NEC special characters (Row 13), NEC + // selection of IBM extensions (Rows 89 to 92), and IBM + // extensions (Rows 115 to 119). The CCS's are + // JIS X0201:1997, JIS X0208:1997, and these extensions. + // This charset can be used for the top-level media type "text", + // but it is of limited or specialized use (see rfc2278 ). + // PCL Symbol Set id: 19K + Windows31J MIB = 2024 + + // GB2312 is the MIB identifier with IANA name GB2312 (MIME: GB2312). + // + // Chinese for People's Republic of China (PRC) mixed one byte, + // two byte set: + // 20-7E = one byte ASCII + // A1-FE = two byte PRC Kanji + // See GB 2312-80 + // PCL Symbol Set Id: 18C + GB2312 MIB = 2025 + + // Big5 is the MIB identifier with IANA name Big5 (MIME: Big5). + // + // Chinese for Taiwan Multi-byte set. + // PCL Symbol Set Id: 18T + Big5 MIB = 2026 + + // Macintosh is the MIB identifier with IANA name macintosh. + // + // The Unicode Standard ver1.0, ISBN 0-201-56788-1, Oct 1991 + // Reference: RFC1345 + Macintosh MIB = 2027 + + // IBM037 is the MIB identifier with IANA name IBM037. + // + // IBM NLS RM Vol2 SE09-8002-01, March 1990 + // Reference: RFC1345 + IBM037 MIB = 2028 + + // IBM038 is the MIB identifier with IANA name IBM038. + // + // IBM 3174 Character Set Ref, GA27-3831-02, March 1990 + // Reference: RFC1345 + IBM038 MIB = 2029 + + // IBM273 is the MIB identifier with IANA name IBM273. + // + // IBM NLS RM Vol2 SE09-8002-01, March 1990 + // Reference: RFC1345 + IBM273 MIB = 2030 + + // IBM274 is the MIB identifier with IANA name IBM274. + // + // IBM 3174 Character Set Ref, GA27-3831-02, March 1990 + // Reference: RFC1345 + IBM274 MIB = 2031 + + // IBM275 is the MIB identifier with IANA name IBM275. + // + // IBM NLS RM Vol2 SE09-8002-01, March 1990 + // Reference: RFC1345 + IBM275 MIB = 2032 + + // IBM277 is the MIB identifier with IANA name IBM277. + // + // IBM NLS RM Vol2 SE09-8002-01, March 1990 + // Reference: RFC1345 + IBM277 MIB = 2033 + + // IBM278 is the MIB identifier with IANA name IBM278. + // + // IBM NLS RM Vol2 SE09-8002-01, March 1990 + // Reference: RFC1345 + IBM278 MIB = 2034 + + // IBM280 is the MIB identifier with IANA name IBM280. + // + // IBM NLS RM Vol2 SE09-8002-01, March 1990 + // Reference: RFC1345 + IBM280 MIB = 2035 + + // IBM281 is the MIB identifier with IANA name IBM281. + // + // IBM 3174 Character Set Ref, GA27-3831-02, March 1990 + // Reference: RFC1345 + IBM281 MIB = 2036 + + // IBM284 is the MIB identifier with IANA name IBM284. + // + // IBM NLS RM Vol2 SE09-8002-01, March 1990 + // Reference: RFC1345 + IBM284 MIB = 2037 + + // IBM285 is the MIB identifier with IANA name IBM285. + // + // IBM NLS RM Vol2 SE09-8002-01, March 1990 + // Reference: RFC1345 + IBM285 MIB = 2038 + + // IBM290 is the MIB identifier with IANA name IBM290. + // + // IBM 3174 Character Set Ref, GA27-3831-02, March 1990 + // Reference: RFC1345 + IBM290 MIB = 2039 + + // IBM297 is the MIB identifier with IANA name IBM297. + // + // IBM NLS RM Vol2 SE09-8002-01, March 1990 + // Reference: RFC1345 + IBM297 MIB = 2040 + + // IBM420 is the MIB identifier with IANA name IBM420. + // + // IBM NLS RM Vol2 SE09-8002-01, March 1990, + // IBM NLS RM p 11-11 + // Reference: RFC1345 + IBM420 MIB = 2041 + + // IBM423 is the MIB identifier with IANA name IBM423. + // + // IBM NLS RM Vol2 SE09-8002-01, March 1990 + // Reference: RFC1345 + IBM423 MIB = 2042 + + // IBM424 is the MIB identifier with IANA name IBM424. + // + // IBM NLS RM Vol2 SE09-8002-01, March 1990 + // Reference: RFC1345 + IBM424 MIB = 2043 + + // PC8CodePage437 is the MIB identifier with IANA name IBM437. + // + // IBM NLS RM Vol2 SE09-8002-01, March 1990 + // Reference: RFC1345 + PC8CodePage437 MIB = 2011 + + // IBM500 is the MIB identifier with IANA name IBM500. + // + // IBM NLS RM Vol2 SE09-8002-01, March 1990 + // Reference: RFC1345 + IBM500 MIB = 2044 + + // IBM851 is the MIB identifier with IANA name IBM851. + // + // IBM NLS RM Vol2 SE09-8002-01, March 1990 + // Reference: RFC1345 + IBM851 MIB = 2045 + + // PCp852 is the MIB identifier with IANA name IBM852. + // + // IBM NLS RM Vol2 SE09-8002-01, March 1990 + // Reference: RFC1345 + PCp852 MIB = 2010 + + // IBM855 is the MIB identifier with IANA name IBM855. + // + // IBM NLS RM Vol2 SE09-8002-01, March 1990 + // Reference: RFC1345 + IBM855 MIB = 2046 + + // IBM857 is the MIB identifier with IANA name IBM857. + // + // IBM NLS RM Vol2 SE09-8002-01, March 1990 + // Reference: RFC1345 + IBM857 MIB = 2047 + + // IBM860 is the MIB identifier with IANA name IBM860. + // + // IBM NLS RM Vol2 SE09-8002-01, March 1990 + // Reference: RFC1345 + IBM860 MIB = 2048 + + // IBM861 is the MIB identifier with IANA name IBM861. + // + // IBM NLS RM Vol2 SE09-8002-01, March 1990 + // Reference: RFC1345 + IBM861 MIB = 2049 + + // IBM863 is the MIB identifier with IANA name IBM863. + // + // IBM Keyboard layouts and code pages, PN 07G4586 June 1991 + // Reference: RFC1345 + IBM863 MIB = 2050 + + // IBM864 is the MIB identifier with IANA name IBM864. + // + // IBM Keyboard layouts and code pages, PN 07G4586 June 1991 + // Reference: RFC1345 + IBM864 MIB = 2051 + + // IBM865 is the MIB identifier with IANA name IBM865. + // + // IBM DOS 3.3 Ref (Abridged), 94X9575 (Feb 1987) + // Reference: RFC1345 + IBM865 MIB = 2052 + + // IBM868 is the MIB identifier with IANA name IBM868. + // + // IBM NLS RM Vol2 SE09-8002-01, March 1990 + // Reference: RFC1345 + IBM868 MIB = 2053 + + // IBM869 is the MIB identifier with IANA name IBM869. + // + // IBM Keyboard layouts and code pages, PN 07G4586 June 1991 + // Reference: RFC1345 + IBM869 MIB = 2054 + + // IBM870 is the MIB identifier with IANA name IBM870. + // + // IBM NLS RM Vol2 SE09-8002-01, March 1990 + // Reference: RFC1345 + IBM870 MIB = 2055 + + // IBM871 is the MIB identifier with IANA name IBM871. + // + // IBM NLS RM Vol2 SE09-8002-01, March 1990 + // Reference: RFC1345 + IBM871 MIB = 2056 + + // IBM880 is the MIB identifier with IANA name IBM880. + // + // IBM NLS RM Vol2 SE09-8002-01, March 1990 + // Reference: RFC1345 + IBM880 MIB = 2057 + + // IBM891 is the MIB identifier with IANA name IBM891. + // + // IBM NLS RM Vol2 SE09-8002-01, March 1990 + // Reference: RFC1345 + IBM891 MIB = 2058 + + // IBM903 is the MIB identifier with IANA name IBM903. + // + // IBM NLS RM Vol2 SE09-8002-01, March 1990 + // Reference: RFC1345 + IBM903 MIB = 2059 + + // IBBM904 is the MIB identifier with IANA name IBM904. + // + // IBM NLS RM Vol2 SE09-8002-01, March 1990 + // Reference: RFC1345 + IBBM904 MIB = 2060 + + // IBM905 is the MIB identifier with IANA name IBM905. + // + // IBM 3174 Character Set Ref, GA27-3831-02, March 1990 + // Reference: RFC1345 + IBM905 MIB = 2061 + + // IBM918 is the MIB identifier with IANA name IBM918. + // + // IBM NLS RM Vol2 SE09-8002-01, March 1990 + // Reference: RFC1345 + IBM918 MIB = 2062 + + // IBM1026 is the MIB identifier with IANA name IBM1026. + // + // IBM NLS RM Vol2 SE09-8002-01, March 1990 + // Reference: RFC1345 + IBM1026 MIB = 2063 + + // IBMEBCDICATDE is the MIB identifier with IANA name EBCDIC-AT-DE. + // + // IBM 3270 Char Set Ref Ch 10, GA27-2837-9, April 1987 + // Reference: RFC1345 + IBMEBCDICATDE MIB = 2064 + + // EBCDICATDEA is the MIB identifier with IANA name EBCDIC-AT-DE-A. + // + // IBM 3270 Char Set Ref Ch 10, GA27-2837-9, April 1987 + // Reference: RFC1345 + EBCDICATDEA MIB = 2065 + + // EBCDICCAFR is the MIB identifier with IANA name EBCDIC-CA-FR. + // + // IBM 3270 Char Set Ref Ch 10, GA27-2837-9, April 1987 + // Reference: RFC1345 + EBCDICCAFR MIB = 2066 + + // EBCDICDKNO is the MIB identifier with IANA name EBCDIC-DK-NO. + // + // IBM 3270 Char Set Ref Ch 10, GA27-2837-9, April 1987 + // Reference: RFC1345 + EBCDICDKNO MIB = 2067 + + // EBCDICDKNOA is the MIB identifier with IANA name EBCDIC-DK-NO-A. + // + // IBM 3270 Char Set Ref Ch 10, GA27-2837-9, April 1987 + // Reference: RFC1345 + EBCDICDKNOA MIB = 2068 + + // EBCDICFISE is the MIB identifier with IANA name EBCDIC-FI-SE. + // + // IBM 3270 Char Set Ref Ch 10, GA27-2837-9, April 1987 + // Reference: RFC1345 + EBCDICFISE MIB = 2069 + + // EBCDICFISEA is the MIB identifier with IANA name EBCDIC-FI-SE-A. + // + // IBM 3270 Char Set Ref Ch 10, GA27-2837-9, April 1987 + // Reference: RFC1345 + EBCDICFISEA MIB = 2070 + + // EBCDICFR is the MIB identifier with IANA name EBCDIC-FR. + // + // IBM 3270 Char Set Ref Ch 10, GA27-2837-9, April 1987 + // Reference: RFC1345 + EBCDICFR MIB = 2071 + + // EBCDICIT is the MIB identifier with IANA name EBCDIC-IT. + // + // IBM 3270 Char Set Ref Ch 10, GA27-2837-9, April 1987 + // Reference: RFC1345 + EBCDICIT MIB = 2072 + + // EBCDICPT is the MIB identifier with IANA name EBCDIC-PT. + // + // IBM 3270 Char Set Ref Ch 10, GA27-2837-9, April 1987 + // Reference: RFC1345 + EBCDICPT MIB = 2073 + + // EBCDICES is the MIB identifier with IANA name EBCDIC-ES. + // + // IBM 3270 Char Set Ref Ch 10, GA27-2837-9, April 1987 + // Reference: RFC1345 + EBCDICES MIB = 2074 + + // EBCDICESA is the MIB identifier with IANA name EBCDIC-ES-A. + // + // IBM 3270 Char Set Ref Ch 10, GA27-2837-9, April 1987 + // Reference: RFC1345 + EBCDICESA MIB = 2075 + + // EBCDICESS is the MIB identifier with IANA name EBCDIC-ES-S. + // + // IBM 3270 Char Set Ref Ch 10, GA27-2837-9, April 1987 + // Reference: RFC1345 + EBCDICESS MIB = 2076 + + // EBCDICUK is the MIB identifier with IANA name EBCDIC-UK. + // + // IBM 3270 Char Set Ref Ch 10, GA27-2837-9, April 1987 + // Reference: RFC1345 + EBCDICUK MIB = 2077 + + // EBCDICUS is the MIB identifier with IANA name EBCDIC-US. + // + // IBM 3270 Char Set Ref Ch 10, GA27-2837-9, April 1987 + // Reference: RFC1345 + EBCDICUS MIB = 2078 + + // Unknown8BiT is the MIB identifier with IANA name UNKNOWN-8BIT. + // + // Reference: RFC1428 + Unknown8BiT MIB = 2079 + + // Mnemonic is the MIB identifier with IANA name MNEMONIC. + // + // rfc1345 , also known as "mnemonic+ascii+38" + // Reference: RFC1345 + Mnemonic MIB = 2080 + + // Mnem is the MIB identifier with IANA name MNEM. + // + // rfc1345 , also known as "mnemonic+ascii+8200" + // Reference: RFC1345 + Mnem MIB = 2081 + + // VISCII is the MIB identifier with IANA name VISCII. + // + // rfc1456 + // Reference: RFC1456 + VISCII MIB = 2082 + + // VIQR is the MIB identifier with IANA name VIQR. + // + // rfc1456 + // Reference: RFC1456 + VIQR MIB = 2083 + + // KOI8R is the MIB identifier with IANA name KOI8-R (MIME: KOI8-R). + // + // rfc1489 , based on GOST-19768-74, ISO-6937/8, + // INIS-Cyrillic, ISO-5427. + // Reference: RFC1489 + KOI8R MIB = 2084 + + // HZGB2312 is the MIB identifier with IANA name HZ-GB-2312. + // + // rfc1842 , rfc1843 rfc1843 rfc1842 + HZGB2312 MIB = 2085 + + // IBM866 is the MIB identifier with IANA name IBM866. + // + // IBM NLDG Volume 2 (SE09-8002-03) August 1994 + IBM866 MIB = 2086 + + // PC775Baltic is the MIB identifier with IANA name IBM775. + // + // HP PCL 5 Comparison Guide (P/N 5021-0329) pp B-13, 1996 + PC775Baltic MIB = 2087 + + // KOI8U is the MIB identifier with IANA name KOI8-U. + // + // rfc2319 + // Reference: RFC2319 + KOI8U MIB = 2088 + + // IBM00858 is the MIB identifier with IANA name IBM00858. + // + // IBM See https://www.iana.org/assignments/charset-reg/IBM00858 + IBM00858 MIB = 2089 + + // IBM00924 is the MIB identifier with IANA name IBM00924. + // + // IBM See https://www.iana.org/assignments/charset-reg/IBM00924 + IBM00924 MIB = 2090 + + // IBM01140 is the MIB identifier with IANA name IBM01140. + // + // IBM See https://www.iana.org/assignments/charset-reg/IBM01140 + IBM01140 MIB = 2091 + + // IBM01141 is the MIB identifier with IANA name IBM01141. + // + // IBM See https://www.iana.org/assignments/charset-reg/IBM01141 + IBM01141 MIB = 2092 + + // IBM01142 is the MIB identifier with IANA name IBM01142. + // + // IBM See https://www.iana.org/assignments/charset-reg/IBM01142 + IBM01142 MIB = 2093 + + // IBM01143 is the MIB identifier with IANA name IBM01143. + // + // IBM See https://www.iana.org/assignments/charset-reg/IBM01143 + IBM01143 MIB = 2094 + + // IBM01144 is the MIB identifier with IANA name IBM01144. + // + // IBM See https://www.iana.org/assignments/charset-reg/IBM01144 + IBM01144 MIB = 2095 + + // IBM01145 is the MIB identifier with IANA name IBM01145. + // + // IBM See https://www.iana.org/assignments/charset-reg/IBM01145 + IBM01145 MIB = 2096 + + // IBM01146 is the MIB identifier with IANA name IBM01146. + // + // IBM See https://www.iana.org/assignments/charset-reg/IBM01146 + IBM01146 MIB = 2097 + + // IBM01147 is the MIB identifier with IANA name IBM01147. + // + // IBM See https://www.iana.org/assignments/charset-reg/IBM01147 + IBM01147 MIB = 2098 + + // IBM01148 is the MIB identifier with IANA name IBM01148. + // + // IBM See https://www.iana.org/assignments/charset-reg/IBM01148 + IBM01148 MIB = 2099 + + // IBM01149 is the MIB identifier with IANA name IBM01149. + // + // IBM See https://www.iana.org/assignments/charset-reg/IBM01149 + IBM01149 MIB = 2100 + + // Big5HKSCS is the MIB identifier with IANA name Big5-HKSCS. + // + // See https://www.iana.org/assignments/charset-reg/Big5-HKSCS + Big5HKSCS MIB = 2101 + + // IBM1047 is the MIB identifier with IANA name IBM1047. + // + // IBM1047 (EBCDIC Latin 1/Open Systems) https://www-1.ibm.com/servers/eserver/iseries/software/globalization/pdf/cp01047z.pdf + IBM1047 MIB = 2102 + + // PTCP154 is the MIB identifier with IANA name PTCP154. + // + // See https://www.iana.org/assignments/charset-reg/PTCP154 + PTCP154 MIB = 2103 + + // Amiga1251 is the MIB identifier with IANA name Amiga-1251. + // + // See https://www.amiga.ultranet.ru/Amiga-1251.html + Amiga1251 MIB = 2104 + + // KOI7switched is the MIB identifier with IANA name KOI7-switched. + // + // See https://www.iana.org/assignments/charset-reg/KOI7-switched + KOI7switched MIB = 2105 + + // BRF is the MIB identifier with IANA name BRF. + // + // See https://www.iana.org/assignments/charset-reg/BRF + BRF MIB = 2106 + + // TSCII is the MIB identifier with IANA name TSCII. + // + // See https://www.iana.org/assignments/charset-reg/TSCII + TSCII MIB = 2107 + + // CP51932 is the MIB identifier with IANA name CP51932. + // + // See https://www.iana.org/assignments/charset-reg/CP51932 + CP51932 MIB = 2108 + + // Windows874 is the MIB identifier with IANA name windows-874. + // + // See https://www.iana.org/assignments/charset-reg/windows-874 + Windows874 MIB = 2109 + + // Windows1250 is the MIB identifier with IANA name windows-1250. + // + // Microsoft https://www.iana.org/assignments/charset-reg/windows-1250 + Windows1250 MIB = 2250 + + // Windows1251 is the MIB identifier with IANA name windows-1251. + // + // Microsoft https://www.iana.org/assignments/charset-reg/windows-1251 + Windows1251 MIB = 2251 + + // Windows1252 is the MIB identifier with IANA name windows-1252. + // + // Microsoft https://www.iana.org/assignments/charset-reg/windows-1252 + Windows1252 MIB = 2252 + + // Windows1253 is the MIB identifier with IANA name windows-1253. + // + // Microsoft https://www.iana.org/assignments/charset-reg/windows-1253 + Windows1253 MIB = 2253 + + // Windows1254 is the MIB identifier with IANA name windows-1254. + // + // Microsoft https://www.iana.org/assignments/charset-reg/windows-1254 + Windows1254 MIB = 2254 + + // Windows1255 is the MIB identifier with IANA name windows-1255. + // + // Microsoft https://www.iana.org/assignments/charset-reg/windows-1255 + Windows1255 MIB = 2255 + + // Windows1256 is the MIB identifier with IANA name windows-1256. + // + // Microsoft https://www.iana.org/assignments/charset-reg/windows-1256 + Windows1256 MIB = 2256 + + // Windows1257 is the MIB identifier with IANA name windows-1257. + // + // Microsoft https://www.iana.org/assignments/charset-reg/windows-1257 + Windows1257 MIB = 2257 + + // Windows1258 is the MIB identifier with IANA name windows-1258. + // + // Microsoft https://www.iana.org/assignments/charset-reg/windows-1258 + Windows1258 MIB = 2258 + + // TIS620 is the MIB identifier with IANA name TIS-620. + // + // Thai Industrial Standards Institute (TISI) + TIS620 MIB = 2259 + + // CP50220 is the MIB identifier with IANA name CP50220. + // + // See https://www.iana.org/assignments/charset-reg/CP50220 + CP50220 MIB = 2260 +) diff --git a/vendor/golang.org/x/text/encoding/internal/internal.go b/vendor/golang.org/x/text/encoding/internal/internal.go new file mode 100644 index 0000000000..75a5fd1658 --- /dev/null +++ b/vendor/golang.org/x/text/encoding/internal/internal.go @@ -0,0 +1,75 @@ +// Copyright 2015 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package internal contains code that is shared among encoding implementations. +package internal + +import ( + "golang.org/x/text/encoding" + "golang.org/x/text/encoding/internal/identifier" + "golang.org/x/text/transform" +) + +// Encoding is an implementation of the Encoding interface that adds the String +// and ID methods to an existing encoding. +type Encoding struct { + encoding.Encoding + Name string + MIB identifier.MIB +} + +// _ verifies that Encoding implements identifier.Interface. +var _ identifier.Interface = (*Encoding)(nil) + +func (e *Encoding) String() string { + return e.Name +} + +func (e *Encoding) ID() (mib identifier.MIB, other string) { + return e.MIB, "" +} + +// SimpleEncoding is an Encoding that combines two Transformers. +type SimpleEncoding struct { + Decoder transform.Transformer + Encoder transform.Transformer +} + +func (e *SimpleEncoding) NewDecoder() *encoding.Decoder { + return &encoding.Decoder{Transformer: e.Decoder} +} + +func (e *SimpleEncoding) NewEncoder() *encoding.Encoder { + return &encoding.Encoder{Transformer: e.Encoder} +} + +// FuncEncoding is an Encoding that combines two functions returning a new +// Transformer. +type FuncEncoding struct { + Decoder func() transform.Transformer + Encoder func() transform.Transformer +} + +func (e FuncEncoding) NewDecoder() *encoding.Decoder { + return &encoding.Decoder{Transformer: e.Decoder()} +} + +func (e FuncEncoding) NewEncoder() *encoding.Encoder { + return &encoding.Encoder{Transformer: e.Encoder()} +} + +// A RepertoireError indicates a rune is not in the repertoire of a destination +// encoding. It is associated with an encoding-specific suggested replacement +// byte. +type RepertoireError byte + +// Error implements the error interrface. +func (r RepertoireError) Error() string { + return "encoding: rune not supported by encoding." +} + +// Replacement returns the replacement string associated with this error. +func (r RepertoireError) Replacement() byte { return byte(r) } + +var ErrASCIIReplacement = RepertoireError(encoding.ASCIISub) diff --git a/vendor/golang.org/x/text/encoding/unicode/override.go b/vendor/golang.org/x/text/encoding/unicode/override.go new file mode 100644 index 0000000000..35d62fcc99 --- /dev/null +++ b/vendor/golang.org/x/text/encoding/unicode/override.go @@ -0,0 +1,82 @@ +// Copyright 2015 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package unicode + +import ( + "golang.org/x/text/transform" +) + +// BOMOverride returns a new decoder transformer that is identical to fallback, +// except that the presence of a Byte Order Mark at the start of the input +// causes it to switch to the corresponding Unicode decoding. It will only +// consider BOMs for UTF-8, UTF-16BE, and UTF-16LE. +// +// This differs from using ExpectBOM by allowing a BOM to switch to UTF-8, not +// just UTF-16 variants, and allowing falling back to any encoding scheme. +// +// This technique is recommended by the W3C for use in HTML 5: "For +// compatibility with deployed content, the byte order mark (also known as BOM) +// is considered more authoritative than anything else." +// http://www.w3.org/TR/encoding/#specification-hooks +// +// Using BOMOverride is mostly intended for use cases where the first characters +// of a fallback encoding are known to not be a BOM, for example, for valid HTML +// and most encodings. +func BOMOverride(fallback transform.Transformer) transform.Transformer { + // TODO: possibly allow a variadic argument of unicode encodings to allow + // specifying details of which fallbacks are supported as well as + // specifying the details of the implementations. This would also allow for + // support for UTF-32, which should not be supported by default. + return &bomOverride{fallback: fallback} +} + +type bomOverride struct { + fallback transform.Transformer + current transform.Transformer +} + +func (d *bomOverride) Reset() { + d.current = nil + d.fallback.Reset() +} + +var ( + // TODO: we could use decode functions here, instead of allocating a new + // decoder on every NewDecoder as IgnoreBOM decoders can be stateless. + utf16le = UTF16(LittleEndian, IgnoreBOM) + utf16be = UTF16(BigEndian, IgnoreBOM) +) + +const utf8BOM = "\ufeff" + +func (d *bomOverride) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) { + if d.current != nil { + return d.current.Transform(dst, src, atEOF) + } + if len(src) < 3 && !atEOF { + return 0, 0, transform.ErrShortSrc + } + d.current = d.fallback + bomSize := 0 + if len(src) >= 2 { + if src[0] == 0xFF && src[1] == 0xFE { + d.current = utf16le.NewDecoder() + bomSize = 2 + } else if src[0] == 0xFE && src[1] == 0xFF { + d.current = utf16be.NewDecoder() + bomSize = 2 + } else if len(src) >= 3 && + src[0] == utf8BOM[0] && + src[1] == utf8BOM[1] && + src[2] == utf8BOM[2] { + d.current = transform.Nop + bomSize = 3 + } + } + if bomSize < len(src) { + nDst, nSrc, err = d.current.Transform(dst, src[bomSize:], atEOF) + } + return nDst, nSrc + bomSize, err +} diff --git a/vendor/golang.org/x/text/encoding/unicode/unicode.go b/vendor/golang.org/x/text/encoding/unicode/unicode.go new file mode 100644 index 0000000000..dd99ad14d3 --- /dev/null +++ b/vendor/golang.org/x/text/encoding/unicode/unicode.go @@ -0,0 +1,512 @@ +// Copyright 2013 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package unicode provides Unicode encodings such as UTF-16. +package unicode // import "golang.org/x/text/encoding/unicode" + +import ( + "bytes" + "errors" + "unicode/utf16" + "unicode/utf8" + + "golang.org/x/text/encoding" + "golang.org/x/text/encoding/internal" + "golang.org/x/text/encoding/internal/identifier" + "golang.org/x/text/internal/utf8internal" + "golang.org/x/text/runes" + "golang.org/x/text/transform" +) + +// TODO: I think the Transformers really should return errors on unmatched +// surrogate pairs and odd numbers of bytes. This is not required by RFC 2781, +// which leaves it open, but is suggested by WhatWG. It will allow for all error +// modes as defined by WhatWG: fatal, HTML and Replacement. This would require +// the introduction of some kind of error type for conveying the erroneous code +// point. + +// UTF8 is the UTF-8 encoding. It neither removes nor adds byte order marks. +var UTF8 encoding.Encoding = utf8enc + +// UTF8BOM is an UTF-8 encoding where the decoder strips a leading byte order +// mark while the encoder adds one. +// +// Some editors add a byte order mark as a signature to UTF-8 files. Although +// the byte order mark is not useful for detecting byte order in UTF-8, it is +// sometimes used as a convention to mark UTF-8-encoded files. This relies on +// the observation that the UTF-8 byte order mark is either an illegal or at +// least very unlikely sequence in any other character encoding. +var UTF8BOM encoding.Encoding = utf8bomEncoding{} + +type utf8bomEncoding struct{} + +func (utf8bomEncoding) String() string { + return "UTF-8-BOM" +} + +func (utf8bomEncoding) ID() (identifier.MIB, string) { + return identifier.Unofficial, "x-utf8bom" +} + +func (utf8bomEncoding) NewEncoder() *encoding.Encoder { + return &encoding.Encoder{ + Transformer: &utf8bomEncoder{t: runes.ReplaceIllFormed()}, + } +} + +func (utf8bomEncoding) NewDecoder() *encoding.Decoder { + return &encoding.Decoder{Transformer: &utf8bomDecoder{}} +} + +var utf8enc = &internal.Encoding{ + &internal.SimpleEncoding{utf8Decoder{}, runes.ReplaceIllFormed()}, + "UTF-8", + identifier.UTF8, +} + +type utf8bomDecoder struct { + checked bool +} + +func (t *utf8bomDecoder) Reset() { + t.checked = false +} + +func (t *utf8bomDecoder) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) { + if !t.checked { + if !atEOF && len(src) < len(utf8BOM) { + if len(src) == 0 { + return 0, 0, nil + } + return 0, 0, transform.ErrShortSrc + } + if bytes.HasPrefix(src, []byte(utf8BOM)) { + nSrc += len(utf8BOM) + src = src[len(utf8BOM):] + } + t.checked = true + } + nDst, n, err := utf8Decoder.Transform(utf8Decoder{}, dst[nDst:], src, atEOF) + nSrc += n + return nDst, nSrc, err +} + +type utf8bomEncoder struct { + written bool + t transform.Transformer +} + +func (t *utf8bomEncoder) Reset() { + t.written = false + t.t.Reset() +} + +func (t *utf8bomEncoder) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) { + if !t.written { + if len(dst) < len(utf8BOM) { + return nDst, 0, transform.ErrShortDst + } + nDst = copy(dst, utf8BOM) + t.written = true + } + n, nSrc, err := utf8Decoder.Transform(utf8Decoder{}, dst[nDst:], src, atEOF) + nDst += n + return nDst, nSrc, err +} + +type utf8Decoder struct{ transform.NopResetter } + +func (utf8Decoder) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) { + var pSrc int // point from which to start copy in src + var accept utf8internal.AcceptRange + + // The decoder can only make the input larger, not smaller. + n := len(src) + if len(dst) < n { + err = transform.ErrShortDst + n = len(dst) + atEOF = false + } + for nSrc < n { + c := src[nSrc] + if c < utf8.RuneSelf { + nSrc++ + continue + } + first := utf8internal.First[c] + size := int(first & utf8internal.SizeMask) + if first == utf8internal.FirstInvalid { + goto handleInvalid // invalid starter byte + } + accept = utf8internal.AcceptRanges[first>>utf8internal.AcceptShift] + if nSrc+size > n { + if !atEOF { + // We may stop earlier than necessary here if the short sequence + // has invalid bytes. Not checking for this simplifies the code + // and may avoid duplicate computations in certain conditions. + if err == nil { + err = transform.ErrShortSrc + } + break + } + // Determine the maximal subpart of an ill-formed subsequence. + switch { + case nSrc+1 >= n || src[nSrc+1] < accept.Lo || accept.Hi < src[nSrc+1]: + size = 1 + case nSrc+2 >= n || src[nSrc+2] < utf8internal.LoCB || utf8internal.HiCB < src[nSrc+2]: + size = 2 + default: + size = 3 // As we are short, the maximum is 3. + } + goto handleInvalid + } + if c = src[nSrc+1]; c < accept.Lo || accept.Hi < c { + size = 1 + goto handleInvalid // invalid continuation byte + } else if size == 2 { + } else if c = src[nSrc+2]; c < utf8internal.LoCB || utf8internal.HiCB < c { + size = 2 + goto handleInvalid // invalid continuation byte + } else if size == 3 { + } else if c = src[nSrc+3]; c < utf8internal.LoCB || utf8internal.HiCB < c { + size = 3 + goto handleInvalid // invalid continuation byte + } + nSrc += size + continue + + handleInvalid: + // Copy the scanned input so far. + nDst += copy(dst[nDst:], src[pSrc:nSrc]) + + // Append RuneError to the destination. + const runeError = "\ufffd" + if nDst+len(runeError) > len(dst) { + return nDst, nSrc, transform.ErrShortDst + } + nDst += copy(dst[nDst:], runeError) + + // Skip the maximal subpart of an ill-formed subsequence according to + // the W3C standard way instead of the Go way. This Transform is + // probably the only place in the text repo where it is warranted. + nSrc += size + pSrc = nSrc + + // Recompute the maximum source length. + if sz := len(dst) - nDst; sz < len(src)-nSrc { + err = transform.ErrShortDst + n = nSrc + sz + atEOF = false + } + } + return nDst + copy(dst[nDst:], src[pSrc:nSrc]), nSrc, err +} + +// UTF16 returns a UTF-16 Encoding for the given default endianness and byte +// order mark (BOM) policy. +// +// When decoding from UTF-16 to UTF-8, if the BOMPolicy is IgnoreBOM then +// neither BOMs U+FEFF nor noncharacters U+FFFE in the input stream will affect +// the endianness used for decoding, and will instead be output as their +// standard UTF-8 encodings: "\xef\xbb\xbf" and "\xef\xbf\xbe". If the BOMPolicy +// is UseBOM or ExpectBOM a staring BOM is not written to the UTF-8 output. +// Instead, it overrides the default endianness e for the remainder of the +// transformation. Any subsequent BOMs U+FEFF or noncharacters U+FFFE will not +// affect the endianness used, and will instead be output as their standard +// UTF-8 encodings. For UseBOM, if there is no starting BOM, it will proceed +// with the default Endianness. For ExpectBOM, in that case, the transformation +// will return early with an ErrMissingBOM error. +// +// When encoding from UTF-8 to UTF-16, a BOM will be inserted at the start of +// the output if the BOMPolicy is UseBOM or ExpectBOM. Otherwise, a BOM will not +// be inserted. The UTF-8 input does not need to contain a BOM. +// +// There is no concept of a 'native' endianness. If the UTF-16 data is produced +// and consumed in a greater context that implies a certain endianness, use +// IgnoreBOM. Otherwise, use ExpectBOM and always produce and consume a BOM. +// +// In the language of https://www.unicode.org/faq/utf_bom.html#bom10, IgnoreBOM +// corresponds to "Where the precise type of the data stream is known... the +// BOM should not be used" and ExpectBOM corresponds to "A particular +// protocol... may require use of the BOM". +func UTF16(e Endianness, b BOMPolicy) encoding.Encoding { + return utf16Encoding{config{e, b}, mibValue[e][b&bomMask]} +} + +// mibValue maps Endianness and BOMPolicy settings to MIB constants. Note that +// some configurations map to the same MIB identifier. RFC 2781 has requirements +// and recommendations. Some of the "configurations" are merely recommendations, +// so multiple configurations could match. +var mibValue = map[Endianness][numBOMValues]identifier.MIB{ + BigEndian: [numBOMValues]identifier.MIB{ + IgnoreBOM: identifier.UTF16BE, + UseBOM: identifier.UTF16, // BigEnding default is preferred by RFC 2781. + // TODO: acceptBOM | strictBOM would map to UTF16BE as well. + }, + LittleEndian: [numBOMValues]identifier.MIB{ + IgnoreBOM: identifier.UTF16LE, + UseBOM: identifier.UTF16, // LittleEndian default is allowed and preferred on Windows. + // TODO: acceptBOM | strictBOM would map to UTF16LE as well. + }, + // ExpectBOM is not widely used and has no valid MIB identifier. +} + +// All lists a configuration for each IANA-defined UTF-16 variant. +var All = []encoding.Encoding{ + UTF8, + UTF16(BigEndian, UseBOM), + UTF16(BigEndian, IgnoreBOM), + UTF16(LittleEndian, IgnoreBOM), +} + +// BOMPolicy is a UTF-16 encoding's byte order mark policy. +type BOMPolicy uint8 + +const ( + writeBOM BOMPolicy = 0x01 + acceptBOM BOMPolicy = 0x02 + requireBOM BOMPolicy = 0x04 + bomMask BOMPolicy = 0x07 + + // HACK: numBOMValues == 8 triggers a bug in the 1.4 compiler (cannot have a + // map of an array of length 8 of a type that is also used as a key or value + // in another map). See golang.org/issue/11354. + // TODO: consider changing this value back to 8 if the use of 1.4.* has + // been minimized. + numBOMValues = 8 + 1 + + // IgnoreBOM means to ignore any byte order marks. + IgnoreBOM BOMPolicy = 0 + // Common and RFC 2781-compliant interpretation for UTF-16BE/LE. + + // UseBOM means that the UTF-16 form may start with a byte order mark, which + // will be used to override the default encoding. + UseBOM BOMPolicy = writeBOM | acceptBOM + // Common and RFC 2781-compliant interpretation for UTF-16. + + // ExpectBOM means that the UTF-16 form must start with a byte order mark, + // which will be used to override the default encoding. + ExpectBOM BOMPolicy = writeBOM | acceptBOM | requireBOM + // Used in Java as Unicode (not to be confused with Java's UTF-16) and + // ICU's UTF-16,version=1. Not compliant with RFC 2781. + + // TODO (maybe): strictBOM: BOM must match Endianness. This would allow: + // - UTF-16(B|L)E,version=1: writeBOM | acceptBOM | requireBOM | strictBOM + // (UnicodeBig and UnicodeLittle in Java) + // - RFC 2781-compliant, but less common interpretation for UTF-16(B|L)E: + // acceptBOM | strictBOM (e.g. assigned to CheckBOM). + // This addition would be consistent with supporting ExpectBOM. +) + +// Endianness is a UTF-16 encoding's default endianness. +type Endianness bool + +const ( + // BigEndian is UTF-16BE. + BigEndian Endianness = false + // LittleEndian is UTF-16LE. + LittleEndian Endianness = true +) + +// ErrMissingBOM means that decoding UTF-16 input with ExpectBOM did not find a +// starting byte order mark. +var ErrMissingBOM = errors.New("encoding: missing byte order mark") + +type utf16Encoding struct { + config + mib identifier.MIB +} + +type config struct { + endianness Endianness + bomPolicy BOMPolicy +} + +func (u utf16Encoding) NewDecoder() *encoding.Decoder { + return &encoding.Decoder{Transformer: &utf16Decoder{ + initial: u.config, + current: u.config, + }} +} + +func (u utf16Encoding) NewEncoder() *encoding.Encoder { + return &encoding.Encoder{Transformer: &utf16Encoder{ + endianness: u.endianness, + initialBOMPolicy: u.bomPolicy, + currentBOMPolicy: u.bomPolicy, + }} +} + +func (u utf16Encoding) ID() (mib identifier.MIB, other string) { + return u.mib, "" +} + +func (u utf16Encoding) String() string { + e, b := "B", "" + if u.endianness == LittleEndian { + e = "L" + } + switch u.bomPolicy { + case ExpectBOM: + b = "Expect" + case UseBOM: + b = "Use" + case IgnoreBOM: + b = "Ignore" + } + return "UTF-16" + e + "E (" + b + " BOM)" +} + +type utf16Decoder struct { + initial config + current config +} + +func (u *utf16Decoder) Reset() { + u.current = u.initial +} + +func (u *utf16Decoder) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) { + if len(src) < 2 && atEOF && u.current.bomPolicy&requireBOM != 0 { + return 0, 0, ErrMissingBOM + } + if len(src) == 0 { + return 0, 0, nil + } + if len(src) >= 2 && u.current.bomPolicy&acceptBOM != 0 { + switch { + case src[0] == 0xfe && src[1] == 0xff: + u.current.endianness = BigEndian + nSrc = 2 + case src[0] == 0xff && src[1] == 0xfe: + u.current.endianness = LittleEndian + nSrc = 2 + default: + if u.current.bomPolicy&requireBOM != 0 { + return 0, 0, ErrMissingBOM + } + } + u.current.bomPolicy = IgnoreBOM + } + + var r rune + var dSize, sSize int + for nSrc < len(src) { + if nSrc+1 < len(src) { + x := uint16(src[nSrc+0])<<8 | uint16(src[nSrc+1]) + if u.current.endianness == LittleEndian { + x = x>>8 | x<<8 + } + r, sSize = rune(x), 2 + if utf16.IsSurrogate(r) { + if nSrc+3 < len(src) { + x = uint16(src[nSrc+2])<<8 | uint16(src[nSrc+3]) + if u.current.endianness == LittleEndian { + x = x>>8 | x<<8 + } + // Save for next iteration if it is not a high surrogate. + if isHighSurrogate(rune(x)) { + r, sSize = utf16.DecodeRune(r, rune(x)), 4 + } + } else if !atEOF { + err = transform.ErrShortSrc + break + } + } + if dSize = utf8.RuneLen(r); dSize < 0 { + r, dSize = utf8.RuneError, 3 + } + } else if atEOF { + // Single trailing byte. + r, dSize, sSize = utf8.RuneError, 3, 1 + } else { + err = transform.ErrShortSrc + break + } + if nDst+dSize > len(dst) { + err = transform.ErrShortDst + break + } + nDst += utf8.EncodeRune(dst[nDst:], r) + nSrc += sSize + } + return nDst, nSrc, err +} + +func isHighSurrogate(r rune) bool { + return 0xDC00 <= r && r <= 0xDFFF +} + +type utf16Encoder struct { + endianness Endianness + initialBOMPolicy BOMPolicy + currentBOMPolicy BOMPolicy +} + +func (u *utf16Encoder) Reset() { + u.currentBOMPolicy = u.initialBOMPolicy +} + +func (u *utf16Encoder) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) { + if u.currentBOMPolicy&writeBOM != 0 { + if len(dst) < 2 { + return 0, 0, transform.ErrShortDst + } + dst[0], dst[1] = 0xfe, 0xff + u.currentBOMPolicy = IgnoreBOM + nDst = 2 + } + + r, size := rune(0), 0 + for nSrc < len(src) { + r = rune(src[nSrc]) + + // Decode a 1-byte rune. + if r < utf8.RuneSelf { + size = 1 + + } else { + // Decode a multi-byte rune. + r, size = utf8.DecodeRune(src[nSrc:]) + if size == 1 { + // All valid runes of size 1 (those below utf8.RuneSelf) were + // handled above. We have invalid UTF-8 or we haven't seen the + // full character yet. + if !atEOF && !utf8.FullRune(src[nSrc:]) { + err = transform.ErrShortSrc + break + } + } + } + + if r <= 0xffff { + if nDst+2 > len(dst) { + err = transform.ErrShortDst + break + } + dst[nDst+0] = uint8(r >> 8) + dst[nDst+1] = uint8(r) + nDst += 2 + } else { + if nDst+4 > len(dst) { + err = transform.ErrShortDst + break + } + r1, r2 := utf16.EncodeRune(r) + dst[nDst+0] = uint8(r1 >> 8) + dst[nDst+1] = uint8(r1) + dst[nDst+2] = uint8(r2 >> 8) + dst[nDst+3] = uint8(r2) + nDst += 4 + } + nSrc += size + } + + if u.endianness == LittleEndian { + for i := 0; i < nDst; i += 2 { + dst[i], dst[i+1] = dst[i+1], dst[i] + } + } + return nDst, nSrc, err +} diff --git a/vendor/golang.org/x/text/internal/utf8internal/utf8internal.go b/vendor/golang.org/x/text/internal/utf8internal/utf8internal.go new file mode 100644 index 0000000000..e5c53b1b3e --- /dev/null +++ b/vendor/golang.org/x/text/internal/utf8internal/utf8internal.go @@ -0,0 +1,87 @@ +// Copyright 2015 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package utf8internal contains low-level utf8-related constants, tables, etc. +// that are used internally by the text package. +package utf8internal + +// The default lowest and highest continuation byte. +const ( + LoCB = 0x80 // 1000 0000 + HiCB = 0xBF // 1011 1111 +) + +// Constants related to getting information of first bytes of UTF-8 sequences. +const ( + // ASCII identifies a UTF-8 byte as ASCII. + ASCII = as + + // FirstInvalid indicates a byte is invalid as a first byte of a UTF-8 + // sequence. + FirstInvalid = xx + + // SizeMask is a mask for the size bits. Use use x&SizeMask to get the size. + SizeMask = 7 + + // AcceptShift is the right-shift count for the first byte info byte to get + // the index into the AcceptRanges table. See AcceptRanges. + AcceptShift = 4 + + // The names of these constants are chosen to give nice alignment in the + // table below. The first nibble is an index into acceptRanges or F for + // special one-byte cases. The second nibble is the Rune length or the + // Status for the special one-byte case. + xx = 0xF1 // invalid: size 1 + as = 0xF0 // ASCII: size 1 + s1 = 0x02 // accept 0, size 2 + s2 = 0x13 // accept 1, size 3 + s3 = 0x03 // accept 0, size 3 + s4 = 0x23 // accept 2, size 3 + s5 = 0x34 // accept 3, size 4 + s6 = 0x04 // accept 0, size 4 + s7 = 0x44 // accept 4, size 4 +) + +// First is information about the first byte in a UTF-8 sequence. +var First = [256]uint8{ + // 1 2 3 4 5 6 7 8 9 A B C D E F + as, as, as, as, as, as, as, as, as, as, as, as, as, as, as, as, // 0x00-0x0F + as, as, as, as, as, as, as, as, as, as, as, as, as, as, as, as, // 0x10-0x1F + as, as, as, as, as, as, as, as, as, as, as, as, as, as, as, as, // 0x20-0x2F + as, as, as, as, as, as, as, as, as, as, as, as, as, as, as, as, // 0x30-0x3F + as, as, as, as, as, as, as, as, as, as, as, as, as, as, as, as, // 0x40-0x4F + as, as, as, as, as, as, as, as, as, as, as, as, as, as, as, as, // 0x50-0x5F + as, as, as, as, as, as, as, as, as, as, as, as, as, as, as, as, // 0x60-0x6F + as, as, as, as, as, as, as, as, as, as, as, as, as, as, as, as, // 0x70-0x7F + // 1 2 3 4 5 6 7 8 9 A B C D E F + xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, // 0x80-0x8F + xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, // 0x90-0x9F + xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, // 0xA0-0xAF + xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, // 0xB0-0xBF + xx, xx, s1, s1, s1, s1, s1, s1, s1, s1, s1, s1, s1, s1, s1, s1, // 0xC0-0xCF + s1, s1, s1, s1, s1, s1, s1, s1, s1, s1, s1, s1, s1, s1, s1, s1, // 0xD0-0xDF + s2, s3, s3, s3, s3, s3, s3, s3, s3, s3, s3, s3, s3, s4, s3, s3, // 0xE0-0xEF + s5, s6, s6, s6, s7, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, // 0xF0-0xFF +} + +// AcceptRange gives the range of valid values for the second byte in a UTF-8 +// sequence for any value for First that is not ASCII or FirstInvalid. +type AcceptRange struct { + Lo uint8 // lowest value for second byte. + Hi uint8 // highest value for second byte. +} + +// AcceptRanges is a slice of AcceptRange values. For a given byte sequence b +// +// AcceptRanges[First[b[0]]>>AcceptShift] +// +// will give the value of AcceptRange for the multi-byte UTF-8 sequence starting +// at b[0]. +var AcceptRanges = [...]AcceptRange{ + 0: {LoCB, HiCB}, + 1: {0xA0, HiCB}, + 2: {LoCB, 0x9F}, + 3: {0x90, HiCB}, + 4: {LoCB, 0x8F}, +} diff --git a/vendor/golang.org/x/text/runes/cond.go b/vendor/golang.org/x/text/runes/cond.go new file mode 100644 index 0000000000..df7aa02db6 --- /dev/null +++ b/vendor/golang.org/x/text/runes/cond.go @@ -0,0 +1,187 @@ +// Copyright 2015 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package runes + +import ( + "unicode/utf8" + + "golang.org/x/text/transform" +) + +// Note: below we pass invalid UTF-8 to the tIn and tNotIn transformers as is. +// This is done for various reasons: +// - To retain the semantics of the Nop transformer: if input is passed to a Nop +// one would expect it to be unchanged. +// - It would be very expensive to pass a converted RuneError to a transformer: +// a transformer might need more source bytes after RuneError, meaning that +// the only way to pass it safely is to create a new buffer and manage the +// intermingling of RuneErrors and normal input. +// - Many transformers leave ill-formed UTF-8 as is, so this is not +// inconsistent. Generally ill-formed UTF-8 is only replaced if it is a +// logical consequence of the operation (as for Map) or if it otherwise would +// pose security concerns (as for Remove). +// - An alternative would be to return an error on ill-formed UTF-8, but this +// would be inconsistent with other operations. + +// If returns a transformer that applies tIn to consecutive runes for which +// s.Contains(r) and tNotIn to consecutive runes for which !s.Contains(r). Reset +// is called on tIn and tNotIn at the start of each run. A Nop transformer will +// substitute a nil value passed to tIn or tNotIn. Invalid UTF-8 is translated +// to RuneError to determine which transformer to apply, but is passed as is to +// the respective transformer. +func If(s Set, tIn, tNotIn transform.Transformer) Transformer { + if tIn == nil && tNotIn == nil { + return Transformer{transform.Nop} + } + if tIn == nil { + tIn = transform.Nop + } + if tNotIn == nil { + tNotIn = transform.Nop + } + sIn, ok := tIn.(transform.SpanningTransformer) + if !ok { + sIn = dummySpan{tIn} + } + sNotIn, ok := tNotIn.(transform.SpanningTransformer) + if !ok { + sNotIn = dummySpan{tNotIn} + } + + a := &cond{ + tIn: sIn, + tNotIn: sNotIn, + f: s.Contains, + } + a.Reset() + return Transformer{a} +} + +type dummySpan struct{ transform.Transformer } + +func (d dummySpan) Span(src []byte, atEOF bool) (n int, err error) { + return 0, transform.ErrEndOfSpan +} + +type cond struct { + tIn, tNotIn transform.SpanningTransformer + f func(rune) bool + check func(rune) bool // current check to perform + t transform.SpanningTransformer // current transformer to use +} + +// Reset implements transform.Transformer. +func (t *cond) Reset() { + t.check = t.is + t.t = t.tIn + t.t.Reset() // notIn will be reset on first usage. +} + +func (t *cond) is(r rune) bool { + if t.f(r) { + return true + } + t.check = t.isNot + t.t = t.tNotIn + t.tNotIn.Reset() + return false +} + +func (t *cond) isNot(r rune) bool { + if !t.f(r) { + return true + } + t.check = t.is + t.t = t.tIn + t.tIn.Reset() + return false +} + +// This implementation of Span doesn't help all too much, but it needs to be +// there to satisfy this package's Transformer interface. +// TODO: there are certainly room for improvements, though. For example, if +// t.t == transform.Nop (which will a common occurrence) it will save a bundle +// to special-case that loop. +func (t *cond) Span(src []byte, atEOF bool) (n int, err error) { + p := 0 + for n < len(src) && err == nil { + // Don't process too much at a time as the Spanner that will be + // called on this block may terminate early. + const maxChunk = 4096 + max := len(src) + if v := n + maxChunk; v < max { + max = v + } + atEnd := false + size := 0 + current := t.t + for ; p < max; p += size { + r := rune(src[p]) + if r < utf8.RuneSelf { + size = 1 + } else if r, size = utf8.DecodeRune(src[p:]); size == 1 { + if !atEOF && !utf8.FullRune(src[p:]) { + err = transform.ErrShortSrc + break + } + } + if !t.check(r) { + // The next rune will be the start of a new run. + atEnd = true + break + } + } + n2, err2 := current.Span(src[n:p], atEnd || (atEOF && p == len(src))) + n += n2 + if err2 != nil { + return n, err2 + } + // At this point either err != nil or t.check will pass for the rune at p. + p = n + size + } + return n, err +} + +func (t *cond) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) { + p := 0 + for nSrc < len(src) && err == nil { + // Don't process too much at a time, as the work might be wasted if the + // destination buffer isn't large enough to hold the result or a + // transform returns an error early. + const maxChunk = 4096 + max := len(src) + if n := nSrc + maxChunk; n < len(src) { + max = n + } + atEnd := false + size := 0 + current := t.t + for ; p < max; p += size { + r := rune(src[p]) + if r < utf8.RuneSelf { + size = 1 + } else if r, size = utf8.DecodeRune(src[p:]); size == 1 { + if !atEOF && !utf8.FullRune(src[p:]) { + err = transform.ErrShortSrc + break + } + } + if !t.check(r) { + // The next rune will be the start of a new run. + atEnd = true + break + } + } + nDst2, nSrc2, err2 := current.Transform(dst[nDst:], src[nSrc:p], atEnd || (atEOF && p == len(src))) + nDst += nDst2 + nSrc += nSrc2 + if err2 != nil { + return nDst, nSrc, err2 + } + // At this point either err != nil or t.check will pass for the rune at p. + p = nSrc + size + } + return nDst, nSrc, err +} diff --git a/vendor/golang.org/x/text/runes/runes.go b/vendor/golang.org/x/text/runes/runes.go new file mode 100644 index 0000000000..930e87fedb --- /dev/null +++ b/vendor/golang.org/x/text/runes/runes.go @@ -0,0 +1,355 @@ +// Copyright 2014 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package runes provide transforms for UTF-8 encoded text. +package runes // import "golang.org/x/text/runes" + +import ( + "unicode" + "unicode/utf8" + + "golang.org/x/text/transform" +) + +// A Set is a collection of runes. +type Set interface { + // Contains returns true if r is contained in the set. + Contains(r rune) bool +} + +type setFunc func(rune) bool + +func (s setFunc) Contains(r rune) bool { + return s(r) +} + +// Note: using funcs here instead of wrapping types result in cleaner +// documentation and a smaller API. + +// In creates a Set with a Contains method that returns true for all runes in +// the given RangeTable. +func In(rt *unicode.RangeTable) Set { + return setFunc(func(r rune) bool { return unicode.Is(rt, r) }) +} + +// NotIn creates a Set with a Contains method that returns true for all runes not +// in the given RangeTable. +func NotIn(rt *unicode.RangeTable) Set { + return setFunc(func(r rune) bool { return !unicode.Is(rt, r) }) +} + +// Predicate creates a Set with a Contains method that returns f(r). +func Predicate(f func(rune) bool) Set { + return setFunc(f) +} + +// Transformer implements the transform.Transformer interface. +type Transformer struct { + t transform.SpanningTransformer +} + +func (t Transformer) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) { + return t.t.Transform(dst, src, atEOF) +} + +func (t Transformer) Span(b []byte, atEOF bool) (n int, err error) { + return t.t.Span(b, atEOF) +} + +func (t Transformer) Reset() { t.t.Reset() } + +// Bytes returns a new byte slice with the result of converting b using t. It +// calls Reset on t. It returns nil if any error was found. This can only happen +// if an error-producing Transformer is passed to If. +func (t Transformer) Bytes(b []byte) []byte { + b, _, err := transform.Bytes(t, b) + if err != nil { + return nil + } + return b +} + +// String returns a string with the result of converting s using t. It calls +// Reset on t. It returns the empty string if any error was found. This can only +// happen if an error-producing Transformer is passed to If. +func (t Transformer) String(s string) string { + s, _, err := transform.String(t, s) + if err != nil { + return "" + } + return s +} + +// TODO: +// - Copy: copying strings and bytes in whole-rune units. +// - Validation (maybe) +// - Well-formed-ness (maybe) + +const runeErrorString = string(utf8.RuneError) + +// Remove returns a Transformer that removes runes r for which s.Contains(r). +// Illegal input bytes are replaced by RuneError before being passed to f. +func Remove(s Set) Transformer { + if f, ok := s.(setFunc); ok { + // This little trick cuts the running time of BenchmarkRemove for sets + // created by Predicate roughly in half. + // TODO: special-case RangeTables as well. + return Transformer{remove(f)} + } + return Transformer{remove(s.Contains)} +} + +// TODO: remove transform.RemoveFunc. + +type remove func(r rune) bool + +func (remove) Reset() {} + +// Span implements transform.Spanner. +func (t remove) Span(src []byte, atEOF bool) (n int, err error) { + for r, size := rune(0), 0; n < len(src); { + if r = rune(src[n]); r < utf8.RuneSelf { + size = 1 + } else if r, size = utf8.DecodeRune(src[n:]); size == 1 { + // Invalid rune. + if !atEOF && !utf8.FullRune(src[n:]) { + err = transform.ErrShortSrc + } else { + err = transform.ErrEndOfSpan + } + break + } + if t(r) { + err = transform.ErrEndOfSpan + break + } + n += size + } + return +} + +// Transform implements transform.Transformer. +func (t remove) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) { + for r, size := rune(0), 0; nSrc < len(src); { + if r = rune(src[nSrc]); r < utf8.RuneSelf { + size = 1 + } else if r, size = utf8.DecodeRune(src[nSrc:]); size == 1 { + // Invalid rune. + if !atEOF && !utf8.FullRune(src[nSrc:]) { + err = transform.ErrShortSrc + break + } + // We replace illegal bytes with RuneError. Not doing so might + // otherwise turn a sequence of invalid UTF-8 into valid UTF-8. + // The resulting byte sequence may subsequently contain runes + // for which t(r) is true that were passed unnoticed. + if !t(utf8.RuneError) { + if nDst+3 > len(dst) { + err = transform.ErrShortDst + break + } + dst[nDst+0] = runeErrorString[0] + dst[nDst+1] = runeErrorString[1] + dst[nDst+2] = runeErrorString[2] + nDst += 3 + } + nSrc++ + continue + } + if t(r) { + nSrc += size + continue + } + if nDst+size > len(dst) { + err = transform.ErrShortDst + break + } + for i := 0; i < size; i++ { + dst[nDst] = src[nSrc] + nDst++ + nSrc++ + } + } + return +} + +// Map returns a Transformer that maps the runes in the input using the given +// mapping. Illegal bytes in the input are converted to utf8.RuneError before +// being passed to the mapping func. +func Map(mapping func(rune) rune) Transformer { + return Transformer{mapper(mapping)} +} + +type mapper func(rune) rune + +func (mapper) Reset() {} + +// Span implements transform.Spanner. +func (t mapper) Span(src []byte, atEOF bool) (n int, err error) { + for r, size := rune(0), 0; n < len(src); n += size { + if r = rune(src[n]); r < utf8.RuneSelf { + size = 1 + } else if r, size = utf8.DecodeRune(src[n:]); size == 1 { + // Invalid rune. + if !atEOF && !utf8.FullRune(src[n:]) { + err = transform.ErrShortSrc + } else { + err = transform.ErrEndOfSpan + } + break + } + if t(r) != r { + err = transform.ErrEndOfSpan + break + } + } + return n, err +} + +// Transform implements transform.Transformer. +func (t mapper) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) { + var replacement rune + var b [utf8.UTFMax]byte + + for r, size := rune(0), 0; nSrc < len(src); { + if r = rune(src[nSrc]); r < utf8.RuneSelf { + if replacement = t(r); replacement < utf8.RuneSelf { + if nDst == len(dst) { + err = transform.ErrShortDst + break + } + dst[nDst] = byte(replacement) + nDst++ + nSrc++ + continue + } + size = 1 + } else if r, size = utf8.DecodeRune(src[nSrc:]); size == 1 { + // Invalid rune. + if !atEOF && !utf8.FullRune(src[nSrc:]) { + err = transform.ErrShortSrc + break + } + + if replacement = t(utf8.RuneError); replacement == utf8.RuneError { + if nDst+3 > len(dst) { + err = transform.ErrShortDst + break + } + dst[nDst+0] = runeErrorString[0] + dst[nDst+1] = runeErrorString[1] + dst[nDst+2] = runeErrorString[2] + nDst += 3 + nSrc++ + continue + } + } else if replacement = t(r); replacement == r { + if nDst+size > len(dst) { + err = transform.ErrShortDst + break + } + for i := 0; i < size; i++ { + dst[nDst] = src[nSrc] + nDst++ + nSrc++ + } + continue + } + + n := utf8.EncodeRune(b[:], replacement) + + if nDst+n > len(dst) { + err = transform.ErrShortDst + break + } + for i := 0; i < n; i++ { + dst[nDst] = b[i] + nDst++ + } + nSrc += size + } + return +} + +// ReplaceIllFormed returns a transformer that replaces all input bytes that are +// not part of a well-formed UTF-8 code sequence with utf8.RuneError. +func ReplaceIllFormed() Transformer { + return Transformer{&replaceIllFormed{}} +} + +type replaceIllFormed struct{ transform.NopResetter } + +func (t replaceIllFormed) Span(src []byte, atEOF bool) (n int, err error) { + for n < len(src) { + // ASCII fast path. + if src[n] < utf8.RuneSelf { + n++ + continue + } + + r, size := utf8.DecodeRune(src[n:]) + + // Look for a valid non-ASCII rune. + if r != utf8.RuneError || size != 1 { + n += size + continue + } + + // Look for short source data. + if !atEOF && !utf8.FullRune(src[n:]) { + err = transform.ErrShortSrc + break + } + + // We have an invalid rune. + err = transform.ErrEndOfSpan + break + } + return n, err +} + +func (t replaceIllFormed) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) { + for nSrc < len(src) { + // ASCII fast path. + if r := src[nSrc]; r < utf8.RuneSelf { + if nDst == len(dst) { + err = transform.ErrShortDst + break + } + dst[nDst] = r + nDst++ + nSrc++ + continue + } + + // Look for a valid non-ASCII rune. + if _, size := utf8.DecodeRune(src[nSrc:]); size != 1 { + if size != copy(dst[nDst:], src[nSrc:nSrc+size]) { + err = transform.ErrShortDst + break + } + nDst += size + nSrc += size + continue + } + + // Look for short source data. + if !atEOF && !utf8.FullRune(src[nSrc:]) { + err = transform.ErrShortSrc + break + } + + // We have an invalid rune. + if nDst+3 > len(dst) { + err = transform.ErrShortDst + break + } + dst[nDst+0] = runeErrorString[0] + dst[nDst+1] = runeErrorString[1] + dst[nDst+2] = runeErrorString[2] + nDst += 3 + nSrc++ + } + return nDst, nSrc, err +} diff --git a/vendor/modules.txt b/vendor/modules.txt index f18ebdbdfd..4839165dd8 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -975,6 +975,12 @@ golang.org/x/sys/windows/svc/eventlog golang.org/x/sys/windows/svc/mgr # golang.org/x/text v0.5.0 ## explicit; go 1.17 +golang.org/x/text/encoding +golang.org/x/text/encoding/internal +golang.org/x/text/encoding/internal/identifier +golang.org/x/text/encoding/unicode +golang.org/x/text/internal/utf8internal +golang.org/x/text/runes golang.org/x/text/secure/bidirule golang.org/x/text/transform golang.org/x/text/unicode/bidi