Merge pull request #44832 from thaJeztah/23.0_backport_wide_json

[23.0 backport] daemon/config: support alternate (common) unicode encodings using a BOM
This commit is contained in:
Bjorn Neergaard 2023-01-17 22:00:54 -07:00 committed by GitHub
commit 110a9eaac1
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
15 changed files with 3471 additions and 105 deletions

View file

@ -11,6 +11,10 @@ import (
"strings"
"sync"
"golang.org/x/text/encoding"
"golang.org/x/text/encoding/unicode"
"golang.org/x/text/transform"
"github.com/containerd/containerd/runtime/v2/shim"
"github.com/docker/docker/opts"
"github.com/docker/docker/pkg/authorization"
@ -434,10 +438,35 @@ func getConflictFreeConfiguration(configFile string, flags *pflag.FlagSet) (*Con
return nil, err
}
// Strip the UTF-8 BOM if present ([RFC 8259] allows JSON implementations to optionally strip the BOM for
// interoperability; do so here as Notepad on older versions of Windows Server insists on a BOM).
// [RFC 8259]: https://tools.ietf.org/html/rfc8259#section-8.1
b = bytes.TrimPrefix(b, []byte("\xef\xbb\xbf"))
// Decode the contents of the JSON file using a [byte order mark] if present, instead of assuming UTF-8 without BOM.
// The BOM, if present, will be used to determine the encoding. If no BOM is present, we will assume the default
// and preferred encoding for JSON as defined by [RFC 8259], UTF-8 without BOM.
//
// While JSON is normatively UTF-8 with no BOM, there are a couple of reasons to decode here:
// * UTF-8 with BOM is something that new implementations should avoid producing; however, [RFC 8259 Section 8.1]
// allows implementations to ignore the UTF-8 BOM when present for interoperability. Older versions of Notepad,
// the only text editor available out of the box on Windows Server, writes UTF-8 with a BOM by default.
// * The default encoding for [Windows PowerShell] is UTF-16 LE with BOM. While encodings in PowerShell can be a
// bit idiosyncratic, BOMs are still generally written. There is no support for selecting UTF-8 without a BOM as
// the encoding in Windows PowerShell, though some Cmdlets only write UTF-8 with no BOM. PowerShell Core
// introduces `utf8NoBOM` and makes it the default, but PowerShell Core is unlikely to be the implementation for
// a majority of Windows Server + PowerShell users.
// * While [RFC 8259 Section 8.1] asserts that software that is not part of a closed ecosystem or that crosses a
// network boundary should only support UTF-8, and should never write a BOM, it does acknowledge older versions
// of the standard, such as [RFC 7159 Section 8.1]. In the interest of pragmatism and easing pain for Windows
// users, we consider Windows tools such as Windows PowerShell and Notepad part of our ecosystem, and support
// the two most common encodings: UTF-16 LE with BOM, and UTF-8 with BOM, in addition to the standard UTF-8
// without BOM.
//
// [byte order mark]: https://www.unicode.org/faq/utf_bom.html#BOM
// [RFC 8259]: https://www.rfc-editor.org/rfc/rfc8259
// [RFC 8259 Section 8.1]: https://www.rfc-editor.org/rfc/rfc8259#section-8.1
// [RFC 7159 Section 8.1]: https://www.rfc-editor.org/rfc/rfc7159#section-8.1
// [Windows PowerShell]: https://learn.microsoft.com/en-us/powershell/module/microsoft.powershell.core/about/about_character_encoding?view=powershell-5.1
b, n, err := transform.Bytes(transform.Chain(unicode.BOMOverride(transform.Nop), encoding.UTF8Validator), b)
if err != nil {
return nil, errors.Wrapf(err, "failed to decode configuration JSON at offset %d", n)
}
// Trim whitespace so that an empty config can be detected for an early return.
b = bytes.TrimSpace(b)

View file

@ -9,11 +9,10 @@ import (
"github.com/spf13/pflag"
"gotest.tools/v3/assert"
is "gotest.tools/v3/assert/cmp"
"gotest.tools/v3/fs"
)
func TestGetConflictFreeConfiguration(t *testing.T) {
configFileData := `
configFile := makeConfigFile(t, `
{
"debug": true,
"default-ulimits": {
@ -26,10 +25,7 @@ func TestGetConflictFreeConfiguration(t *testing.T) {
"log-opts": {
"tag": "test_tag"
}
}`
file := fs.NewFile(t, "docker-config", fs.WithContent(configFileData))
defer file.Remove()
}`)
flags := pflag.NewFlagSet("test", pflag.ContinueOnError)
var debug bool
@ -37,7 +33,7 @@ func TestGetConflictFreeConfiguration(t *testing.T) {
flags.Var(opts.NewNamedUlimitOpt("default-ulimits", nil), "default-ulimit", "")
flags.Var(opts.NewNamedMapOpts("log-opts", nil, nil), "log-opt", "")
cc, err := getConflictFreeConfiguration(file.Path(), flags)
cc, err := getConflictFreeConfiguration(configFile, flags)
assert.NilError(t, err)
assert.Check(t, cc.Debug)
@ -54,7 +50,7 @@ func TestGetConflictFreeConfiguration(t *testing.T) {
}
func TestDaemonConfigurationMerge(t *testing.T) {
configFileData := `
configFile := makeConfigFile(t, `
{
"debug": true,
"default-ulimits": {
@ -64,10 +60,7 @@ func TestDaemonConfigurationMerge(t *testing.T) {
"Soft": 1024
}
}
}`
file := fs.NewFile(t, "docker-config", fs.WithContent(configFileData))
defer file.Remove()
}`)
conf, err := New()
assert.NilError(t, err)
@ -82,7 +75,7 @@ func TestDaemonConfigurationMerge(t *testing.T) {
assert.Check(t, flags.Set("log-driver", "syslog"))
assert.Check(t, flags.Set("log-opt", "tag=from_flag"))
cc, err := MergeDaemonConfigurations(conf, flags, file.Path())
cc, err := MergeDaemonConfigurations(conf, flags, configFile)
assert.NilError(t, err)
assert.Check(t, cc.Debug)
@ -107,10 +100,7 @@ func TestDaemonConfigurationMerge(t *testing.T) {
}
func TestDaemonConfigurationMergeShmSize(t *testing.T) {
data := `{"default-shm-size": "1g"}`
file := fs.NewFile(t, "docker-config", fs.WithContent(data))
defer file.Remove()
configFile := makeConfigFile(t, `{"default-shm-size": "1g"}`)
c := &Config{}
@ -118,7 +108,7 @@ func TestDaemonConfigurationMergeShmSize(t *testing.T) {
shmSize := opts.MemBytes(DefaultShmSize)
flags.Var(&shmSize, "default-shm-size", "")
cc, err := MergeDaemonConfigurations(c, flags, file.Path())
cc, err := MergeDaemonConfigurations(c, flags, configFile)
assert.NilError(t, err)
expectedValue := 1 * 1024 * 1024 * 1024

View file

@ -13,41 +13,80 @@ import (
"github.com/google/go-cmp/cmp/cmpopts"
"github.com/imdario/mergo"
"github.com/spf13/pflag"
"golang.org/x/text/encoding"
"golang.org/x/text/encoding/unicode"
"gotest.tools/v3/assert"
is "gotest.tools/v3/assert/cmp"
"gotest.tools/v3/fs"
"gotest.tools/v3/skip"
)
func makeConfigFile(t *testing.T, content string) string {
t.Helper()
name := filepath.Join(t.TempDir(), "daemon.json")
err := os.WriteFile(name, []byte(content), 0666)
assert.NilError(t, err)
return name
}
func TestDaemonConfigurationNotFound(t *testing.T) {
_, err := MergeDaemonConfigurations(&Config{}, nil, "/tmp/foo-bar-baz-docker")
assert.Check(t, os.IsNotExist(err), "got: %[1]T: %[1]v", err)
}
func TestDaemonBrokenConfiguration(t *testing.T) {
f, err := os.CreateTemp("", "docker-config-")
assert.NilError(t, err)
configFile := makeConfigFile(t, `{"Debug": tru`)
configFile := f.Name()
f.Write([]byte(`{"Debug": tru`))
f.Close()
_, err = MergeDaemonConfigurations(&Config{}, nil, configFile)
_, err := MergeDaemonConfigurations(&Config{}, nil, configFile)
assert.ErrorContains(t, err, `invalid character ' ' in literal true`)
}
// TestDaemonConfigurationWithBOM ensures that the UTF-8 byte order mark is ignored when reading the configuration file.
func TestDaemonConfigurationWithBOM(t *testing.T) {
configFile := filepath.Join(t.TempDir(), "daemon.json")
// TestDaemonConfigurationUnicodeVariations feeds various variations of Unicode into the JSON parser, ensuring that we
// respect a BOM and otherwise default to UTF-8.
func TestDaemonConfigurationUnicodeVariations(t *testing.T) {
jsonData := `{"debug": true}`
f, err := os.Create(configFile)
assert.NilError(t, err)
testCases := []struct {
name string
encoding encoding.Encoding
}{
{
name: "UTF-8",
encoding: unicode.UTF8,
},
{
name: "UTF-8 (with BOM)",
encoding: unicode.UTF8BOM,
},
{
name: "UTF-16 (BE with BOM)",
encoding: unicode.UTF16(unicode.BigEndian, unicode.UseBOM),
},
{
name: "UTF-16 (LE with BOM)",
encoding: unicode.UTF16(unicode.LittleEndian, unicode.UseBOM),
},
}
for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
encodedJson, err := tc.encoding.NewEncoder().String(jsonData)
assert.NilError(t, err)
configFile := makeConfigFile(t, encodedJson)
_, err = MergeDaemonConfigurations(&Config{}, nil, configFile)
assert.NilError(t, err)
})
}
}
f.Write([]byte("\xef\xbb\xbf{\"debug\": true}"))
f.Close()
// TestDaemonConfigurationInvalidUnicode ensures that the JSON parser returns a useful error message if malformed UTF-8
// is provided.
func TestDaemonConfigurationInvalidUnicode(t *testing.T) {
configFileBOM := makeConfigFile(t, "\xef\xbb\xbf{\"debug\": true}\xff")
_, err := MergeDaemonConfigurations(&Config{}, nil, configFileBOM)
assert.ErrorIs(t, err, encoding.ErrInvalidUTF8)
_, err = MergeDaemonConfigurations(&Config{}, nil, configFile)
assert.NilError(t, err)
configFileNoBOM := makeConfigFile(t, "{\"debug\": true}\xff")
_, err = MergeDaemonConfigurations(&Config{}, nil, configFileNoBOM)
assert.ErrorIs(t, err, encoding.ErrInvalidUTF8)
}
func TestFindConfigurationConflicts(t *testing.T) {
@ -71,18 +110,13 @@ func TestFindConfigurationConflictsWithNamedOptions(t *testing.T) {
}
func TestDaemonConfigurationMergeConflicts(t *testing.T) {
f, err := os.CreateTemp("", "docker-config-")
assert.NilError(t, err)
configFile := f.Name()
f.Write([]byte(`{"debug": true}`))
f.Close()
configFile := makeConfigFile(t, `{"debug": true}`)
flags := pflag.NewFlagSet("test", pflag.ContinueOnError)
flags.Bool("debug", false, "")
assert.Check(t, flags.Set("debug", "false"))
_, err = MergeDaemonConfigurations(&Config{}, flags, configFile)
_, err := MergeDaemonConfigurations(&Config{}, flags, configFile)
if err == nil {
t.Fatal("expected error, got nil")
}
@ -92,51 +126,34 @@ func TestDaemonConfigurationMergeConflicts(t *testing.T) {
}
func TestDaemonConfigurationMergeConcurrent(t *testing.T) {
f, err := os.CreateTemp("", "docker-config-")
assert.NilError(t, err)
configFile := makeConfigFile(t, `{"max-concurrent-downloads": 1}`)
configFile := f.Name()
f.Write([]byte(`{"max-concurrent-downloads": 1}`))
f.Close()
_, err = MergeDaemonConfigurations(&Config{}, nil, configFile)
_, err := MergeDaemonConfigurations(&Config{}, nil, configFile)
assert.NilError(t, err)
}
func TestDaemonConfigurationMergeConcurrentError(t *testing.T) {
f, err := os.CreateTemp("", "docker-config-")
assert.NilError(t, err)
configFile := makeConfigFile(t, `{"max-concurrent-downloads": -1}`)
configFile := f.Name()
f.Write([]byte(`{"max-concurrent-downloads": -1}`))
f.Close()
_, err = MergeDaemonConfigurations(&Config{}, nil, configFile)
_, err := MergeDaemonConfigurations(&Config{}, nil, configFile)
assert.ErrorContains(t, err, `invalid max concurrent downloads: -1`)
}
func TestDaemonConfigurationMergeConflictsWithInnerStructs(t *testing.T) {
f, err := os.CreateTemp("", "docker-config-")
assert.NilError(t, err)
configFile := f.Name()
f.Write([]byte(`{"tlscacert": "/etc/certificates/ca.pem"}`))
f.Close()
configFile := makeConfigFile(t, `{"tlscacert": "/etc/certificates/ca.pem"}`)
flags := pflag.NewFlagSet("test", pflag.ContinueOnError)
flags.String("tlscacert", "", "")
assert.Check(t, flags.Set("tlscacert", "~/.docker/ca.pem"))
_, err = MergeDaemonConfigurations(&Config{}, flags, configFile)
_, err := MergeDaemonConfigurations(&Config{}, flags, configFile)
assert.ErrorContains(t, err, `the following directives are specified both as a flag and in the configuration file: tlscacert`)
}
// Test for #40711
// TestDaemonConfigurationMergeDefaultAddressPools is a regression test for #40711.
func TestDaemonConfigurationMergeDefaultAddressPools(t *testing.T) {
emptyConfigFile := fs.NewFile(t, "config", fs.WithContent(`{}`))
defer emptyConfigFile.Remove()
configFile := fs.NewFile(t, "config", fs.WithContent(`{"default-address-pools":[{"base": "10.123.0.0/16", "size": 24 }]}`))
defer configFile.Remove()
emptyConfigFile := makeConfigFile(t, `{}`)
configFile := makeConfigFile(t, `{"default-address-pools":[{"base": "10.123.0.0/16", "size": 24 }]}`)
expected := []*ipamutils.NetworkToSplit{{Base: "10.123.0.0/16", Size: 24}}
@ -146,7 +163,7 @@ func TestDaemonConfigurationMergeDefaultAddressPools(t *testing.T) {
flags.Var(&conf.NetworkConfig.DefaultAddressPools, "default-address-pool", "")
assert.Check(t, flags.Set("default-address-pool", "base=10.123.0.0/16,size=24"))
config, err := MergeDaemonConfigurations(&conf, flags, emptyConfigFile.Path())
config, err := MergeDaemonConfigurations(&conf, flags, emptyConfigFile)
assert.NilError(t, err)
assert.DeepEqual(t, config.DefaultAddressPools.Value(), expected)
})
@ -156,7 +173,7 @@ func TestDaemonConfigurationMergeDefaultAddressPools(t *testing.T) {
flags := pflag.NewFlagSet("test", pflag.ContinueOnError)
flags.Var(&conf.NetworkConfig.DefaultAddressPools, "default-address-pool", "")
config, err := MergeDaemonConfigurations(&conf, flags, configFile.Path())
config, err := MergeDaemonConfigurations(&conf, flags, configFile)
assert.NilError(t, err)
assert.DeepEqual(t, config.DefaultAddressPools.Value(), expected)
})
@ -167,7 +184,7 @@ func TestDaemonConfigurationMergeDefaultAddressPools(t *testing.T) {
flags.Var(&conf.NetworkConfig.DefaultAddressPools, "default-address-pool", "")
assert.Check(t, flags.Set("default-address-pool", "base=10.123.0.0/16,size=24"))
_, err := MergeDaemonConfigurations(&conf, flags, configFile.Path())
_, err := MergeDaemonConfigurations(&conf, flags, configFile)
assert.ErrorContains(t, err, "the following directives are specified both as a flag and in the configuration file")
assert.ErrorContains(t, err, "default-address-pools")
})
@ -520,8 +537,8 @@ func field(field string) cmp.Option {
return cmpopts.IgnoreFields(Config{}, ignoreFields...)
}
// TestReloadSetConfigFileNotExist tests that when `--config-file` is set
// and it doesn't exist the `Reload` function returns an error.
// TestReloadSetConfigFileNotExist tests that when `--config-file` is set, and it doesn't exist the `Reload` function
// returns an error.
func TestReloadSetConfigFileNotExist(t *testing.T) {
configFile := "/tmp/blabla/not/exists/config.json"
flags := pflag.NewFlagSet("test", pflag.ContinueOnError)
@ -532,8 +549,8 @@ func TestReloadSetConfigFileNotExist(t *testing.T) {
assert.Check(t, is.ErrorContains(err, "unable to configure the Docker daemon with file"))
}
// TestReloadDefaultConfigNotExist tests that if the default configuration file
// doesn't exist the daemon still will be reloaded.
// TestReloadDefaultConfigNotExist tests that if the default configuration file doesn't exist the daemon still will
// still be reloaded.
func TestReloadDefaultConfigNotExist(t *testing.T) {
skip.If(t, os.Getuid() != 0, "skipping test that requires root")
defaultConfigFile := "/tmp/blabla/not/exists/daemon.json"
@ -547,20 +564,15 @@ func TestReloadDefaultConfigNotExist(t *testing.T) {
assert.Check(t, reloaded)
}
// TestReloadBadDefaultConfig tests that when `--config-file` is not set
// and the default configuration file exists and is bad return an error
// TestReloadBadDefaultConfig tests that when `--config-file` is not set and the default configuration file exists and
// is bad, an error is returned.
func TestReloadBadDefaultConfig(t *testing.T) {
f, err := os.CreateTemp("", "docker-config-")
assert.NilError(t, err)
configFile := f.Name()
f.Write([]byte(`{wrong: "configuration"}`))
f.Close()
configFile := makeConfigFile(t, `{wrong: "configuration"}`)
flags := pflag.NewFlagSet("test", pflag.ContinueOnError)
flags.String("config-file", configFile, "")
reloaded := false
err = Reload(configFile, flags, func(c *Config) {
err := Reload(configFile, flags, func(c *Config) {
reloaded = true
})
assert.Check(t, is.ErrorContains(err, "unable to configure the Docker daemon with file"))
@ -568,9 +580,7 @@ func TestReloadBadDefaultConfig(t *testing.T) {
}
func TestReloadWithConflictingLabels(t *testing.T) {
tempFile := fs.NewFile(t, "config", fs.WithContent(`{"labels":["foo=bar","foo=baz"]}`))
defer tempFile.Remove()
configFile := tempFile.Path()
configFile := makeConfigFile(t, `{"labels": ["foo=bar", "foo=baz"]}`)
var lbls []string
flags := pflag.NewFlagSet("test", pflag.ContinueOnError)
@ -585,9 +595,7 @@ func TestReloadWithConflictingLabels(t *testing.T) {
}
func TestReloadWithDuplicateLabels(t *testing.T) {
tempFile := fs.NewFile(t, "config", fs.WithContent(`{"labels":["foo=the-same","foo=the-same"]}`))
defer tempFile.Remove()
configFile := tempFile.Path()
configFile := makeConfigFile(t, `{"labels": ["foo=the-same", "foo=the-same"]}`)
var lbls []string
flags := pflag.NewFlagSet("test", pflag.ContinueOnError)

View file

@ -1,7 +1,6 @@
package config // import "github.com/docker/docker/daemon/config"
import (
"os"
"testing"
"github.com/docker/docker/opts"
@ -11,17 +10,10 @@ import (
)
func TestDaemonConfigurationMerge(t *testing.T) {
f, err := os.CreateTemp("", "docker-config-")
assert.NilError(t, err)
configFile := f.Name()
f.Write([]byte(`
configFile := makeConfigFile(t, `
{
"debug": true
}`))
f.Close()
}`)
conf, err := New()
assert.NilError(t, err)

View file

@ -84,6 +84,7 @@ require (
golang.org/x/net v0.4.0
golang.org/x/sync v0.1.0
golang.org/x/sys v0.3.0
golang.org/x/text v0.5.0
golang.org/x/time v0.1.0
google.golang.org/genproto v0.0.0-20220706185917-7780775163c4
google.golang.org/grpc v1.48.0
@ -159,7 +160,6 @@ require (
go.uber.org/zap v1.21.0 // indirect
golang.org/x/crypto v0.2.0 // indirect
golang.org/x/oauth2 v0.1.0 // indirect
golang.org/x/text v0.5.0 // indirect
google.golang.org/api v0.93.0 // indirect
google.golang.org/appengine v1.6.7 // indirect
google.golang.org/protobuf v1.28.1 // indirect

335
vendor/golang.org/x/text/encoding/encoding.go generated vendored Normal file
View file

@ -0,0 +1,335 @@
// Copyright 2013 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package encoding defines an interface for character encodings, such as Shift
// JIS and Windows 1252, that can convert to and from UTF-8.
//
// Encoding implementations are provided in other packages, such as
// golang.org/x/text/encoding/charmap and
// golang.org/x/text/encoding/japanese.
package encoding // import "golang.org/x/text/encoding"
import (
"errors"
"io"
"strconv"
"unicode/utf8"
"golang.org/x/text/encoding/internal/identifier"
"golang.org/x/text/transform"
)
// TODO:
// - There seems to be some inconsistency in when decoders return errors
// and when not. Also documentation seems to suggest they shouldn't return
// errors at all (except for UTF-16).
// - Encoders seem to rely on or at least benefit from the input being in NFC
// normal form. Perhaps add an example how users could prepare their output.
// Encoding is a character set encoding that can be transformed to and from
// UTF-8.
type Encoding interface {
// NewDecoder returns a Decoder.
NewDecoder() *Decoder
// NewEncoder returns an Encoder.
NewEncoder() *Encoder
}
// A Decoder converts bytes to UTF-8. It implements transform.Transformer.
//
// Transforming source bytes that are not of that encoding will not result in an
// error per se. Each byte that cannot be transcoded will be represented in the
// output by the UTF-8 encoding of '\uFFFD', the replacement rune.
type Decoder struct {
transform.Transformer
// This forces external creators of Decoders to use names in struct
// initializers, allowing for future extendibility without having to break
// code.
_ struct{}
}
// Bytes converts the given encoded bytes to UTF-8. It returns the converted
// bytes or nil, err if any error occurred.
func (d *Decoder) Bytes(b []byte) ([]byte, error) {
b, _, err := transform.Bytes(d, b)
if err != nil {
return nil, err
}
return b, nil
}
// String converts the given encoded string to UTF-8. It returns the converted
// string or "", err if any error occurred.
func (d *Decoder) String(s string) (string, error) {
s, _, err := transform.String(d, s)
if err != nil {
return "", err
}
return s, nil
}
// Reader wraps another Reader to decode its bytes.
//
// The Decoder may not be used for any other operation as long as the returned
// Reader is in use.
func (d *Decoder) Reader(r io.Reader) io.Reader {
return transform.NewReader(r, d)
}
// An Encoder converts bytes from UTF-8. It implements transform.Transformer.
//
// Each rune that cannot be transcoded will result in an error. In this case,
// the transform will consume all source byte up to, not including the offending
// rune. Transforming source bytes that are not valid UTF-8 will be replaced by
// `\uFFFD`. To return early with an error instead, use transform.Chain to
// preprocess the data with a UTF8Validator.
type Encoder struct {
transform.Transformer
// This forces external creators of Encoders to use names in struct
// initializers, allowing for future extendibility without having to break
// code.
_ struct{}
}
// Bytes converts bytes from UTF-8. It returns the converted bytes or nil, err if
// any error occurred.
func (e *Encoder) Bytes(b []byte) ([]byte, error) {
b, _, err := transform.Bytes(e, b)
if err != nil {
return nil, err
}
return b, nil
}
// String converts a string from UTF-8. It returns the converted string or
// "", err if any error occurred.
func (e *Encoder) String(s string) (string, error) {
s, _, err := transform.String(e, s)
if err != nil {
return "", err
}
return s, nil
}
// Writer wraps another Writer to encode its UTF-8 output.
//
// The Encoder may not be used for any other operation as long as the returned
// Writer is in use.
func (e *Encoder) Writer(w io.Writer) io.Writer {
return transform.NewWriter(w, e)
}
// ASCIISub is the ASCII substitute character, as recommended by
// https://unicode.org/reports/tr36/#Text_Comparison
const ASCIISub = '\x1a'
// Nop is the nop encoding. Its transformed bytes are the same as the source
// bytes; it does not replace invalid UTF-8 sequences.
var Nop Encoding = nop{}
type nop struct{}
func (nop) NewDecoder() *Decoder {
return &Decoder{Transformer: transform.Nop}
}
func (nop) NewEncoder() *Encoder {
return &Encoder{Transformer: transform.Nop}
}
// Replacement is the replacement encoding. Decoding from the replacement
// encoding yields a single '\uFFFD' replacement rune. Encoding from UTF-8 to
// the replacement encoding yields the same as the source bytes except that
// invalid UTF-8 is converted to '\uFFFD'.
//
// It is defined at http://encoding.spec.whatwg.org/#replacement
var Replacement Encoding = replacement{}
type replacement struct{}
func (replacement) NewDecoder() *Decoder {
return &Decoder{Transformer: replacementDecoder{}}
}
func (replacement) NewEncoder() *Encoder {
return &Encoder{Transformer: replacementEncoder{}}
}
func (replacement) ID() (mib identifier.MIB, other string) {
return identifier.Replacement, ""
}
type replacementDecoder struct{ transform.NopResetter }
func (replacementDecoder) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
if len(dst) < 3 {
return 0, 0, transform.ErrShortDst
}
if atEOF {
const fffd = "\ufffd"
dst[0] = fffd[0]
dst[1] = fffd[1]
dst[2] = fffd[2]
nDst = 3
}
return nDst, len(src), nil
}
type replacementEncoder struct{ transform.NopResetter }
func (replacementEncoder) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
r, size := rune(0), 0
for ; nSrc < len(src); nSrc += size {
r = rune(src[nSrc])
// Decode a 1-byte rune.
if r < utf8.RuneSelf {
size = 1
} else {
// Decode a multi-byte rune.
r, size = utf8.DecodeRune(src[nSrc:])
if size == 1 {
// All valid runes of size 1 (those below utf8.RuneSelf) were
// handled above. We have invalid UTF-8 or we haven't seen the
// full character yet.
if !atEOF && !utf8.FullRune(src[nSrc:]) {
err = transform.ErrShortSrc
break
}
r = '\ufffd'
}
}
if nDst+utf8.RuneLen(r) > len(dst) {
err = transform.ErrShortDst
break
}
nDst += utf8.EncodeRune(dst[nDst:], r)
}
return nDst, nSrc, err
}
// HTMLEscapeUnsupported wraps encoders to replace source runes outside the
// repertoire of the destination encoding with HTML escape sequences.
//
// This wrapper exists to comply to URL and HTML forms requiring a
// non-terminating legacy encoder. The produced sequences may lead to data
// loss as they are indistinguishable from legitimate input. To avoid this
// issue, use UTF-8 encodings whenever possible.
func HTMLEscapeUnsupported(e *Encoder) *Encoder {
return &Encoder{Transformer: &errorHandler{e, errorToHTML}}
}
// ReplaceUnsupported wraps encoders to replace source runes outside the
// repertoire of the destination encoding with an encoding-specific
// replacement.
//
// This wrapper is only provided for backwards compatibility and legacy
// handling. Its use is strongly discouraged. Use UTF-8 whenever possible.
func ReplaceUnsupported(e *Encoder) *Encoder {
return &Encoder{Transformer: &errorHandler{e, errorToReplacement}}
}
type errorHandler struct {
*Encoder
handler func(dst []byte, r rune, err repertoireError) (n int, ok bool)
}
// TODO: consider making this error public in some form.
type repertoireError interface {
Replacement() byte
}
func (h errorHandler) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
nDst, nSrc, err = h.Transformer.Transform(dst, src, atEOF)
for err != nil {
rerr, ok := err.(repertoireError)
if !ok {
return nDst, nSrc, err
}
r, sz := utf8.DecodeRune(src[nSrc:])
n, ok := h.handler(dst[nDst:], r, rerr)
if !ok {
return nDst, nSrc, transform.ErrShortDst
}
err = nil
nDst += n
if nSrc += sz; nSrc < len(src) {
var dn, sn int
dn, sn, err = h.Transformer.Transform(dst[nDst:], src[nSrc:], atEOF)
nDst += dn
nSrc += sn
}
}
return nDst, nSrc, err
}
func errorToHTML(dst []byte, r rune, err repertoireError) (n int, ok bool) {
buf := [8]byte{}
b := strconv.AppendUint(buf[:0], uint64(r), 10)
if n = len(b) + len("&#;"); n >= len(dst) {
return 0, false
}
dst[0] = '&'
dst[1] = '#'
dst[copy(dst[2:], b)+2] = ';'
return n, true
}
func errorToReplacement(dst []byte, r rune, err repertoireError) (n int, ok bool) {
if len(dst) == 0 {
return 0, false
}
dst[0] = err.Replacement()
return 1, true
}
// ErrInvalidUTF8 means that a transformer encountered invalid UTF-8.
var ErrInvalidUTF8 = errors.New("encoding: invalid UTF-8")
// UTF8Validator is a transformer that returns ErrInvalidUTF8 on the first
// input byte that is not valid UTF-8.
var UTF8Validator transform.Transformer = utf8Validator{}
type utf8Validator struct{ transform.NopResetter }
func (utf8Validator) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
n := len(src)
if n > len(dst) {
n = len(dst)
}
for i := 0; i < n; {
if c := src[i]; c < utf8.RuneSelf {
dst[i] = c
i++
continue
}
_, size := utf8.DecodeRune(src[i:])
if size == 1 {
// All valid runes of size 1 (those below utf8.RuneSelf) were
// handled above. We have invalid UTF-8 or we haven't seen the
// full character yet.
err = ErrInvalidUTF8
if !atEOF && !utf8.FullRune(src[i:]) {
err = transform.ErrShortSrc
}
return i, i, err
}
if i+size > len(dst) {
return i, i, transform.ErrShortDst
}
for ; size > 0; size-- {
dst[i] = src[i]
i++
}
}
if len(src) > len(dst) {
err = transform.ErrShortDst
}
return n, n, err
}

View file

@ -0,0 +1,81 @@
// Copyright 2015 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:generate go run gen.go
// Package identifier defines the contract between implementations of Encoding
// and Index by defining identifiers that uniquely identify standardized coded
// character sets (CCS) and character encoding schemes (CES), which we will
// together refer to as encodings, for which Encoding implementations provide
// converters to and from UTF-8. This package is typically only of concern to
// implementers of Indexes and Encodings.
//
// One part of the identifier is the MIB code, which is defined by IANA and
// uniquely identifies a CCS or CES. Each code is associated with data that
// references authorities, official documentation as well as aliases and MIME
// names.
//
// Not all CESs are covered by the IANA registry. The "other" string that is
// returned by ID can be used to identify other character sets or versions of
// existing ones.
//
// It is recommended that each package that provides a set of Encodings provide
// the All and Common variables to reference all supported encodings and
// commonly used subset. This allows Index implementations to include all
// available encodings without explicitly referencing or knowing about them.
package identifier
// Note: this package is internal, but could be made public if there is a need
// for writing third-party Indexes and Encodings.
// References:
// - http://source.icu-project.org/repos/icu/icu/trunk/source/data/mappings/convrtrs.txt
// - http://www.iana.org/assignments/character-sets/character-sets.xhtml
// - http://www.iana.org/assignments/ianacharset-mib/ianacharset-mib
// - http://www.ietf.org/rfc/rfc2978.txt
// - https://www.unicode.org/reports/tr22/
// - http://www.w3.org/TR/encoding/
// - https://encoding.spec.whatwg.org/
// - https://encoding.spec.whatwg.org/encodings.json
// - https://tools.ietf.org/html/rfc6657#section-5
// Interface can be implemented by Encodings to define the CCS or CES for which
// it implements conversions.
type Interface interface {
// ID returns an encoding identifier. Exactly one of the mib and other
// values should be non-zero.
//
// In the usual case it is only necessary to indicate the MIB code. The
// other string can be used to specify encodings for which there is no MIB,
// such as "x-mac-dingbat".
//
// The other string may only contain the characters a-z, A-Z, 0-9, - and _.
ID() (mib MIB, other string)
// NOTE: the restrictions on the encoding are to allow extending the syntax
// with additional information such as versions, vendors and other variants.
}
// A MIB identifies an encoding. It is derived from the IANA MIB codes and adds
// some identifiers for some encodings that are not covered by the IANA
// standard.
//
// See http://www.iana.org/assignments/ianacharset-mib.
type MIB uint16
// These additional MIB types are not defined in IANA. They are added because
// they are common and defined within the text repo.
const (
// Unofficial marks the start of encodings not registered by IANA.
Unofficial MIB = 10000 + iota
// Replacement is the WhatWG replacement encoding.
Replacement
// XUserDefined is the code for x-user-defined.
XUserDefined
// MacintoshCyrillic is the code for x-mac-cyrillic.
MacintoshCyrillic
)

File diff suppressed because it is too large Load diff

75
vendor/golang.org/x/text/encoding/internal/internal.go generated vendored Normal file
View file

@ -0,0 +1,75 @@
// Copyright 2015 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package internal contains code that is shared among encoding implementations.
package internal
import (
"golang.org/x/text/encoding"
"golang.org/x/text/encoding/internal/identifier"
"golang.org/x/text/transform"
)
// Encoding is an implementation of the Encoding interface that adds the String
// and ID methods to an existing encoding.
type Encoding struct {
encoding.Encoding
Name string
MIB identifier.MIB
}
// _ verifies that Encoding implements identifier.Interface.
var _ identifier.Interface = (*Encoding)(nil)
func (e *Encoding) String() string {
return e.Name
}
func (e *Encoding) ID() (mib identifier.MIB, other string) {
return e.MIB, ""
}
// SimpleEncoding is an Encoding that combines two Transformers.
type SimpleEncoding struct {
Decoder transform.Transformer
Encoder transform.Transformer
}
func (e *SimpleEncoding) NewDecoder() *encoding.Decoder {
return &encoding.Decoder{Transformer: e.Decoder}
}
func (e *SimpleEncoding) NewEncoder() *encoding.Encoder {
return &encoding.Encoder{Transformer: e.Encoder}
}
// FuncEncoding is an Encoding that combines two functions returning a new
// Transformer.
type FuncEncoding struct {
Decoder func() transform.Transformer
Encoder func() transform.Transformer
}
func (e FuncEncoding) NewDecoder() *encoding.Decoder {
return &encoding.Decoder{Transformer: e.Decoder()}
}
func (e FuncEncoding) NewEncoder() *encoding.Encoder {
return &encoding.Encoder{Transformer: e.Encoder()}
}
// A RepertoireError indicates a rune is not in the repertoire of a destination
// encoding. It is associated with an encoding-specific suggested replacement
// byte.
type RepertoireError byte
// Error implements the error interrface.
func (r RepertoireError) Error() string {
return "encoding: rune not supported by encoding."
}
// Replacement returns the replacement string associated with this error.
func (r RepertoireError) Replacement() byte { return byte(r) }
var ErrASCIIReplacement = RepertoireError(encoding.ASCIISub)

82
vendor/golang.org/x/text/encoding/unicode/override.go generated vendored Normal file
View file

@ -0,0 +1,82 @@
// Copyright 2015 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package unicode
import (
"golang.org/x/text/transform"
)
// BOMOverride returns a new decoder transformer that is identical to fallback,
// except that the presence of a Byte Order Mark at the start of the input
// causes it to switch to the corresponding Unicode decoding. It will only
// consider BOMs for UTF-8, UTF-16BE, and UTF-16LE.
//
// This differs from using ExpectBOM by allowing a BOM to switch to UTF-8, not
// just UTF-16 variants, and allowing falling back to any encoding scheme.
//
// This technique is recommended by the W3C for use in HTML 5: "For
// compatibility with deployed content, the byte order mark (also known as BOM)
// is considered more authoritative than anything else."
// http://www.w3.org/TR/encoding/#specification-hooks
//
// Using BOMOverride is mostly intended for use cases where the first characters
// of a fallback encoding are known to not be a BOM, for example, for valid HTML
// and most encodings.
func BOMOverride(fallback transform.Transformer) transform.Transformer {
// TODO: possibly allow a variadic argument of unicode encodings to allow
// specifying details of which fallbacks are supported as well as
// specifying the details of the implementations. This would also allow for
// support for UTF-32, which should not be supported by default.
return &bomOverride{fallback: fallback}
}
type bomOverride struct {
fallback transform.Transformer
current transform.Transformer
}
func (d *bomOverride) Reset() {
d.current = nil
d.fallback.Reset()
}
var (
// TODO: we could use decode functions here, instead of allocating a new
// decoder on every NewDecoder as IgnoreBOM decoders can be stateless.
utf16le = UTF16(LittleEndian, IgnoreBOM)
utf16be = UTF16(BigEndian, IgnoreBOM)
)
const utf8BOM = "\ufeff"
func (d *bomOverride) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
if d.current != nil {
return d.current.Transform(dst, src, atEOF)
}
if len(src) < 3 && !atEOF {
return 0, 0, transform.ErrShortSrc
}
d.current = d.fallback
bomSize := 0
if len(src) >= 2 {
if src[0] == 0xFF && src[1] == 0xFE {
d.current = utf16le.NewDecoder()
bomSize = 2
} else if src[0] == 0xFE && src[1] == 0xFF {
d.current = utf16be.NewDecoder()
bomSize = 2
} else if len(src) >= 3 &&
src[0] == utf8BOM[0] &&
src[1] == utf8BOM[1] &&
src[2] == utf8BOM[2] {
d.current = transform.Nop
bomSize = 3
}
}
if bomSize < len(src) {
nDst, nSrc, err = d.current.Transform(dst, src[bomSize:], atEOF)
}
return nDst, nSrc + bomSize, err
}

512
vendor/golang.org/x/text/encoding/unicode/unicode.go generated vendored Normal file
View file

@ -0,0 +1,512 @@
// Copyright 2013 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package unicode provides Unicode encodings such as UTF-16.
package unicode // import "golang.org/x/text/encoding/unicode"
import (
"bytes"
"errors"
"unicode/utf16"
"unicode/utf8"
"golang.org/x/text/encoding"
"golang.org/x/text/encoding/internal"
"golang.org/x/text/encoding/internal/identifier"
"golang.org/x/text/internal/utf8internal"
"golang.org/x/text/runes"
"golang.org/x/text/transform"
)
// TODO: I think the Transformers really should return errors on unmatched
// surrogate pairs and odd numbers of bytes. This is not required by RFC 2781,
// which leaves it open, but is suggested by WhatWG. It will allow for all error
// modes as defined by WhatWG: fatal, HTML and Replacement. This would require
// the introduction of some kind of error type for conveying the erroneous code
// point.
// UTF8 is the UTF-8 encoding. It neither removes nor adds byte order marks.
var UTF8 encoding.Encoding = utf8enc
// UTF8BOM is an UTF-8 encoding where the decoder strips a leading byte order
// mark while the encoder adds one.
//
// Some editors add a byte order mark as a signature to UTF-8 files. Although
// the byte order mark is not useful for detecting byte order in UTF-8, it is
// sometimes used as a convention to mark UTF-8-encoded files. This relies on
// the observation that the UTF-8 byte order mark is either an illegal or at
// least very unlikely sequence in any other character encoding.
var UTF8BOM encoding.Encoding = utf8bomEncoding{}
type utf8bomEncoding struct{}
func (utf8bomEncoding) String() string {
return "UTF-8-BOM"
}
func (utf8bomEncoding) ID() (identifier.MIB, string) {
return identifier.Unofficial, "x-utf8bom"
}
func (utf8bomEncoding) NewEncoder() *encoding.Encoder {
return &encoding.Encoder{
Transformer: &utf8bomEncoder{t: runes.ReplaceIllFormed()},
}
}
func (utf8bomEncoding) NewDecoder() *encoding.Decoder {
return &encoding.Decoder{Transformer: &utf8bomDecoder{}}
}
var utf8enc = &internal.Encoding{
&internal.SimpleEncoding{utf8Decoder{}, runes.ReplaceIllFormed()},
"UTF-8",
identifier.UTF8,
}
type utf8bomDecoder struct {
checked bool
}
func (t *utf8bomDecoder) Reset() {
t.checked = false
}
func (t *utf8bomDecoder) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
if !t.checked {
if !atEOF && len(src) < len(utf8BOM) {
if len(src) == 0 {
return 0, 0, nil
}
return 0, 0, transform.ErrShortSrc
}
if bytes.HasPrefix(src, []byte(utf8BOM)) {
nSrc += len(utf8BOM)
src = src[len(utf8BOM):]
}
t.checked = true
}
nDst, n, err := utf8Decoder.Transform(utf8Decoder{}, dst[nDst:], src, atEOF)
nSrc += n
return nDst, nSrc, err
}
type utf8bomEncoder struct {
written bool
t transform.Transformer
}
func (t *utf8bomEncoder) Reset() {
t.written = false
t.t.Reset()
}
func (t *utf8bomEncoder) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
if !t.written {
if len(dst) < len(utf8BOM) {
return nDst, 0, transform.ErrShortDst
}
nDst = copy(dst, utf8BOM)
t.written = true
}
n, nSrc, err := utf8Decoder.Transform(utf8Decoder{}, dst[nDst:], src, atEOF)
nDst += n
return nDst, nSrc, err
}
type utf8Decoder struct{ transform.NopResetter }
func (utf8Decoder) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
var pSrc int // point from which to start copy in src
var accept utf8internal.AcceptRange
// The decoder can only make the input larger, not smaller.
n := len(src)
if len(dst) < n {
err = transform.ErrShortDst
n = len(dst)
atEOF = false
}
for nSrc < n {
c := src[nSrc]
if c < utf8.RuneSelf {
nSrc++
continue
}
first := utf8internal.First[c]
size := int(first & utf8internal.SizeMask)
if first == utf8internal.FirstInvalid {
goto handleInvalid // invalid starter byte
}
accept = utf8internal.AcceptRanges[first>>utf8internal.AcceptShift]
if nSrc+size > n {
if !atEOF {
// We may stop earlier than necessary here if the short sequence
// has invalid bytes. Not checking for this simplifies the code
// and may avoid duplicate computations in certain conditions.
if err == nil {
err = transform.ErrShortSrc
}
break
}
// Determine the maximal subpart of an ill-formed subsequence.
switch {
case nSrc+1 >= n || src[nSrc+1] < accept.Lo || accept.Hi < src[nSrc+1]:
size = 1
case nSrc+2 >= n || src[nSrc+2] < utf8internal.LoCB || utf8internal.HiCB < src[nSrc+2]:
size = 2
default:
size = 3 // As we are short, the maximum is 3.
}
goto handleInvalid
}
if c = src[nSrc+1]; c < accept.Lo || accept.Hi < c {
size = 1
goto handleInvalid // invalid continuation byte
} else if size == 2 {
} else if c = src[nSrc+2]; c < utf8internal.LoCB || utf8internal.HiCB < c {
size = 2
goto handleInvalid // invalid continuation byte
} else if size == 3 {
} else if c = src[nSrc+3]; c < utf8internal.LoCB || utf8internal.HiCB < c {
size = 3
goto handleInvalid // invalid continuation byte
}
nSrc += size
continue
handleInvalid:
// Copy the scanned input so far.
nDst += copy(dst[nDst:], src[pSrc:nSrc])
// Append RuneError to the destination.
const runeError = "\ufffd"
if nDst+len(runeError) > len(dst) {
return nDst, nSrc, transform.ErrShortDst
}
nDst += copy(dst[nDst:], runeError)
// Skip the maximal subpart of an ill-formed subsequence according to
// the W3C standard way instead of the Go way. This Transform is
// probably the only place in the text repo where it is warranted.
nSrc += size
pSrc = nSrc
// Recompute the maximum source length.
if sz := len(dst) - nDst; sz < len(src)-nSrc {
err = transform.ErrShortDst
n = nSrc + sz
atEOF = false
}
}
return nDst + copy(dst[nDst:], src[pSrc:nSrc]), nSrc, err
}
// UTF16 returns a UTF-16 Encoding for the given default endianness and byte
// order mark (BOM) policy.
//
// When decoding from UTF-16 to UTF-8, if the BOMPolicy is IgnoreBOM then
// neither BOMs U+FEFF nor noncharacters U+FFFE in the input stream will affect
// the endianness used for decoding, and will instead be output as their
// standard UTF-8 encodings: "\xef\xbb\xbf" and "\xef\xbf\xbe". If the BOMPolicy
// is UseBOM or ExpectBOM a staring BOM is not written to the UTF-8 output.
// Instead, it overrides the default endianness e for the remainder of the
// transformation. Any subsequent BOMs U+FEFF or noncharacters U+FFFE will not
// affect the endianness used, and will instead be output as their standard
// UTF-8 encodings. For UseBOM, if there is no starting BOM, it will proceed
// with the default Endianness. For ExpectBOM, in that case, the transformation
// will return early with an ErrMissingBOM error.
//
// When encoding from UTF-8 to UTF-16, a BOM will be inserted at the start of
// the output if the BOMPolicy is UseBOM or ExpectBOM. Otherwise, a BOM will not
// be inserted. The UTF-8 input does not need to contain a BOM.
//
// There is no concept of a 'native' endianness. If the UTF-16 data is produced
// and consumed in a greater context that implies a certain endianness, use
// IgnoreBOM. Otherwise, use ExpectBOM and always produce and consume a BOM.
//
// In the language of https://www.unicode.org/faq/utf_bom.html#bom10, IgnoreBOM
// corresponds to "Where the precise type of the data stream is known... the
// BOM should not be used" and ExpectBOM corresponds to "A particular
// protocol... may require use of the BOM".
func UTF16(e Endianness, b BOMPolicy) encoding.Encoding {
return utf16Encoding{config{e, b}, mibValue[e][b&bomMask]}
}
// mibValue maps Endianness and BOMPolicy settings to MIB constants. Note that
// some configurations map to the same MIB identifier. RFC 2781 has requirements
// and recommendations. Some of the "configurations" are merely recommendations,
// so multiple configurations could match.
var mibValue = map[Endianness][numBOMValues]identifier.MIB{
BigEndian: [numBOMValues]identifier.MIB{
IgnoreBOM: identifier.UTF16BE,
UseBOM: identifier.UTF16, // BigEnding default is preferred by RFC 2781.
// TODO: acceptBOM | strictBOM would map to UTF16BE as well.
},
LittleEndian: [numBOMValues]identifier.MIB{
IgnoreBOM: identifier.UTF16LE,
UseBOM: identifier.UTF16, // LittleEndian default is allowed and preferred on Windows.
// TODO: acceptBOM | strictBOM would map to UTF16LE as well.
},
// ExpectBOM is not widely used and has no valid MIB identifier.
}
// All lists a configuration for each IANA-defined UTF-16 variant.
var All = []encoding.Encoding{
UTF8,
UTF16(BigEndian, UseBOM),
UTF16(BigEndian, IgnoreBOM),
UTF16(LittleEndian, IgnoreBOM),
}
// BOMPolicy is a UTF-16 encoding's byte order mark policy.
type BOMPolicy uint8
const (
writeBOM BOMPolicy = 0x01
acceptBOM BOMPolicy = 0x02
requireBOM BOMPolicy = 0x04
bomMask BOMPolicy = 0x07
// HACK: numBOMValues == 8 triggers a bug in the 1.4 compiler (cannot have a
// map of an array of length 8 of a type that is also used as a key or value
// in another map). See golang.org/issue/11354.
// TODO: consider changing this value back to 8 if the use of 1.4.* has
// been minimized.
numBOMValues = 8 + 1
// IgnoreBOM means to ignore any byte order marks.
IgnoreBOM BOMPolicy = 0
// Common and RFC 2781-compliant interpretation for UTF-16BE/LE.
// UseBOM means that the UTF-16 form may start with a byte order mark, which
// will be used to override the default encoding.
UseBOM BOMPolicy = writeBOM | acceptBOM
// Common and RFC 2781-compliant interpretation for UTF-16.
// ExpectBOM means that the UTF-16 form must start with a byte order mark,
// which will be used to override the default encoding.
ExpectBOM BOMPolicy = writeBOM | acceptBOM | requireBOM
// Used in Java as Unicode (not to be confused with Java's UTF-16) and
// ICU's UTF-16,version=1. Not compliant with RFC 2781.
// TODO (maybe): strictBOM: BOM must match Endianness. This would allow:
// - UTF-16(B|L)E,version=1: writeBOM | acceptBOM | requireBOM | strictBOM
// (UnicodeBig and UnicodeLittle in Java)
// - RFC 2781-compliant, but less common interpretation for UTF-16(B|L)E:
// acceptBOM | strictBOM (e.g. assigned to CheckBOM).
// This addition would be consistent with supporting ExpectBOM.
)
// Endianness is a UTF-16 encoding's default endianness.
type Endianness bool
const (
// BigEndian is UTF-16BE.
BigEndian Endianness = false
// LittleEndian is UTF-16LE.
LittleEndian Endianness = true
)
// ErrMissingBOM means that decoding UTF-16 input with ExpectBOM did not find a
// starting byte order mark.
var ErrMissingBOM = errors.New("encoding: missing byte order mark")
type utf16Encoding struct {
config
mib identifier.MIB
}
type config struct {
endianness Endianness
bomPolicy BOMPolicy
}
func (u utf16Encoding) NewDecoder() *encoding.Decoder {
return &encoding.Decoder{Transformer: &utf16Decoder{
initial: u.config,
current: u.config,
}}
}
func (u utf16Encoding) NewEncoder() *encoding.Encoder {
return &encoding.Encoder{Transformer: &utf16Encoder{
endianness: u.endianness,
initialBOMPolicy: u.bomPolicy,
currentBOMPolicy: u.bomPolicy,
}}
}
func (u utf16Encoding) ID() (mib identifier.MIB, other string) {
return u.mib, ""
}
func (u utf16Encoding) String() string {
e, b := "B", ""
if u.endianness == LittleEndian {
e = "L"
}
switch u.bomPolicy {
case ExpectBOM:
b = "Expect"
case UseBOM:
b = "Use"
case IgnoreBOM:
b = "Ignore"
}
return "UTF-16" + e + "E (" + b + " BOM)"
}
type utf16Decoder struct {
initial config
current config
}
func (u *utf16Decoder) Reset() {
u.current = u.initial
}
func (u *utf16Decoder) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
if len(src) < 2 && atEOF && u.current.bomPolicy&requireBOM != 0 {
return 0, 0, ErrMissingBOM
}
if len(src) == 0 {
return 0, 0, nil
}
if len(src) >= 2 && u.current.bomPolicy&acceptBOM != 0 {
switch {
case src[0] == 0xfe && src[1] == 0xff:
u.current.endianness = BigEndian
nSrc = 2
case src[0] == 0xff && src[1] == 0xfe:
u.current.endianness = LittleEndian
nSrc = 2
default:
if u.current.bomPolicy&requireBOM != 0 {
return 0, 0, ErrMissingBOM
}
}
u.current.bomPolicy = IgnoreBOM
}
var r rune
var dSize, sSize int
for nSrc < len(src) {
if nSrc+1 < len(src) {
x := uint16(src[nSrc+0])<<8 | uint16(src[nSrc+1])
if u.current.endianness == LittleEndian {
x = x>>8 | x<<8
}
r, sSize = rune(x), 2
if utf16.IsSurrogate(r) {
if nSrc+3 < len(src) {
x = uint16(src[nSrc+2])<<8 | uint16(src[nSrc+3])
if u.current.endianness == LittleEndian {
x = x>>8 | x<<8
}
// Save for next iteration if it is not a high surrogate.
if isHighSurrogate(rune(x)) {
r, sSize = utf16.DecodeRune(r, rune(x)), 4
}
} else if !atEOF {
err = transform.ErrShortSrc
break
}
}
if dSize = utf8.RuneLen(r); dSize < 0 {
r, dSize = utf8.RuneError, 3
}
} else if atEOF {
// Single trailing byte.
r, dSize, sSize = utf8.RuneError, 3, 1
} else {
err = transform.ErrShortSrc
break
}
if nDst+dSize > len(dst) {
err = transform.ErrShortDst
break
}
nDst += utf8.EncodeRune(dst[nDst:], r)
nSrc += sSize
}
return nDst, nSrc, err
}
func isHighSurrogate(r rune) bool {
return 0xDC00 <= r && r <= 0xDFFF
}
type utf16Encoder struct {
endianness Endianness
initialBOMPolicy BOMPolicy
currentBOMPolicy BOMPolicy
}
func (u *utf16Encoder) Reset() {
u.currentBOMPolicy = u.initialBOMPolicy
}
func (u *utf16Encoder) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
if u.currentBOMPolicy&writeBOM != 0 {
if len(dst) < 2 {
return 0, 0, transform.ErrShortDst
}
dst[0], dst[1] = 0xfe, 0xff
u.currentBOMPolicy = IgnoreBOM
nDst = 2
}
r, size := rune(0), 0
for nSrc < len(src) {
r = rune(src[nSrc])
// Decode a 1-byte rune.
if r < utf8.RuneSelf {
size = 1
} else {
// Decode a multi-byte rune.
r, size = utf8.DecodeRune(src[nSrc:])
if size == 1 {
// All valid runes of size 1 (those below utf8.RuneSelf) were
// handled above. We have invalid UTF-8 or we haven't seen the
// full character yet.
if !atEOF && !utf8.FullRune(src[nSrc:]) {
err = transform.ErrShortSrc
break
}
}
}
if r <= 0xffff {
if nDst+2 > len(dst) {
err = transform.ErrShortDst
break
}
dst[nDst+0] = uint8(r >> 8)
dst[nDst+1] = uint8(r)
nDst += 2
} else {
if nDst+4 > len(dst) {
err = transform.ErrShortDst
break
}
r1, r2 := utf16.EncodeRune(r)
dst[nDst+0] = uint8(r1 >> 8)
dst[nDst+1] = uint8(r1)
dst[nDst+2] = uint8(r2 >> 8)
dst[nDst+3] = uint8(r2)
nDst += 4
}
nSrc += size
}
if u.endianness == LittleEndian {
for i := 0; i < nDst; i += 2 {
dst[i], dst[i+1] = dst[i+1], dst[i]
}
}
return nDst, nSrc, err
}

View file

@ -0,0 +1,87 @@
// Copyright 2015 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package utf8internal contains low-level utf8-related constants, tables, etc.
// that are used internally by the text package.
package utf8internal
// The default lowest and highest continuation byte.
const (
LoCB = 0x80 // 1000 0000
HiCB = 0xBF // 1011 1111
)
// Constants related to getting information of first bytes of UTF-8 sequences.
const (
// ASCII identifies a UTF-8 byte as ASCII.
ASCII = as
// FirstInvalid indicates a byte is invalid as a first byte of a UTF-8
// sequence.
FirstInvalid = xx
// SizeMask is a mask for the size bits. Use use x&SizeMask to get the size.
SizeMask = 7
// AcceptShift is the right-shift count for the first byte info byte to get
// the index into the AcceptRanges table. See AcceptRanges.
AcceptShift = 4
// The names of these constants are chosen to give nice alignment in the
// table below. The first nibble is an index into acceptRanges or F for
// special one-byte cases. The second nibble is the Rune length or the
// Status for the special one-byte case.
xx = 0xF1 // invalid: size 1
as = 0xF0 // ASCII: size 1
s1 = 0x02 // accept 0, size 2
s2 = 0x13 // accept 1, size 3
s3 = 0x03 // accept 0, size 3
s4 = 0x23 // accept 2, size 3
s5 = 0x34 // accept 3, size 4
s6 = 0x04 // accept 0, size 4
s7 = 0x44 // accept 4, size 4
)
// First is information about the first byte in a UTF-8 sequence.
var First = [256]uint8{
// 1 2 3 4 5 6 7 8 9 A B C D E F
as, as, as, as, as, as, as, as, as, as, as, as, as, as, as, as, // 0x00-0x0F
as, as, as, as, as, as, as, as, as, as, as, as, as, as, as, as, // 0x10-0x1F
as, as, as, as, as, as, as, as, as, as, as, as, as, as, as, as, // 0x20-0x2F
as, as, as, as, as, as, as, as, as, as, as, as, as, as, as, as, // 0x30-0x3F
as, as, as, as, as, as, as, as, as, as, as, as, as, as, as, as, // 0x40-0x4F
as, as, as, as, as, as, as, as, as, as, as, as, as, as, as, as, // 0x50-0x5F
as, as, as, as, as, as, as, as, as, as, as, as, as, as, as, as, // 0x60-0x6F
as, as, as, as, as, as, as, as, as, as, as, as, as, as, as, as, // 0x70-0x7F
// 1 2 3 4 5 6 7 8 9 A B C D E F
xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, // 0x80-0x8F
xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, // 0x90-0x9F
xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, // 0xA0-0xAF
xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, // 0xB0-0xBF
xx, xx, s1, s1, s1, s1, s1, s1, s1, s1, s1, s1, s1, s1, s1, s1, // 0xC0-0xCF
s1, s1, s1, s1, s1, s1, s1, s1, s1, s1, s1, s1, s1, s1, s1, s1, // 0xD0-0xDF
s2, s3, s3, s3, s3, s3, s3, s3, s3, s3, s3, s3, s3, s4, s3, s3, // 0xE0-0xEF
s5, s6, s6, s6, s7, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, // 0xF0-0xFF
}
// AcceptRange gives the range of valid values for the second byte in a UTF-8
// sequence for any value for First that is not ASCII or FirstInvalid.
type AcceptRange struct {
Lo uint8 // lowest value for second byte.
Hi uint8 // highest value for second byte.
}
// AcceptRanges is a slice of AcceptRange values. For a given byte sequence b
//
// AcceptRanges[First[b[0]]>>AcceptShift]
//
// will give the value of AcceptRange for the multi-byte UTF-8 sequence starting
// at b[0].
var AcceptRanges = [...]AcceptRange{
0: {LoCB, HiCB},
1: {0xA0, HiCB},
2: {LoCB, 0x9F},
3: {0x90, HiCB},
4: {LoCB, 0x8F},
}

187
vendor/golang.org/x/text/runes/cond.go generated vendored Normal file
View file

@ -0,0 +1,187 @@
// Copyright 2015 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package runes
import (
"unicode/utf8"
"golang.org/x/text/transform"
)
// Note: below we pass invalid UTF-8 to the tIn and tNotIn transformers as is.
// This is done for various reasons:
// - To retain the semantics of the Nop transformer: if input is passed to a Nop
// one would expect it to be unchanged.
// - It would be very expensive to pass a converted RuneError to a transformer:
// a transformer might need more source bytes after RuneError, meaning that
// the only way to pass it safely is to create a new buffer and manage the
// intermingling of RuneErrors and normal input.
// - Many transformers leave ill-formed UTF-8 as is, so this is not
// inconsistent. Generally ill-formed UTF-8 is only replaced if it is a
// logical consequence of the operation (as for Map) or if it otherwise would
// pose security concerns (as for Remove).
// - An alternative would be to return an error on ill-formed UTF-8, but this
// would be inconsistent with other operations.
// If returns a transformer that applies tIn to consecutive runes for which
// s.Contains(r) and tNotIn to consecutive runes for which !s.Contains(r). Reset
// is called on tIn and tNotIn at the start of each run. A Nop transformer will
// substitute a nil value passed to tIn or tNotIn. Invalid UTF-8 is translated
// to RuneError to determine which transformer to apply, but is passed as is to
// the respective transformer.
func If(s Set, tIn, tNotIn transform.Transformer) Transformer {
if tIn == nil && tNotIn == nil {
return Transformer{transform.Nop}
}
if tIn == nil {
tIn = transform.Nop
}
if tNotIn == nil {
tNotIn = transform.Nop
}
sIn, ok := tIn.(transform.SpanningTransformer)
if !ok {
sIn = dummySpan{tIn}
}
sNotIn, ok := tNotIn.(transform.SpanningTransformer)
if !ok {
sNotIn = dummySpan{tNotIn}
}
a := &cond{
tIn: sIn,
tNotIn: sNotIn,
f: s.Contains,
}
a.Reset()
return Transformer{a}
}
type dummySpan struct{ transform.Transformer }
func (d dummySpan) Span(src []byte, atEOF bool) (n int, err error) {
return 0, transform.ErrEndOfSpan
}
type cond struct {
tIn, tNotIn transform.SpanningTransformer
f func(rune) bool
check func(rune) bool // current check to perform
t transform.SpanningTransformer // current transformer to use
}
// Reset implements transform.Transformer.
func (t *cond) Reset() {
t.check = t.is
t.t = t.tIn
t.t.Reset() // notIn will be reset on first usage.
}
func (t *cond) is(r rune) bool {
if t.f(r) {
return true
}
t.check = t.isNot
t.t = t.tNotIn
t.tNotIn.Reset()
return false
}
func (t *cond) isNot(r rune) bool {
if !t.f(r) {
return true
}
t.check = t.is
t.t = t.tIn
t.tIn.Reset()
return false
}
// This implementation of Span doesn't help all too much, but it needs to be
// there to satisfy this package's Transformer interface.
// TODO: there are certainly room for improvements, though. For example, if
// t.t == transform.Nop (which will a common occurrence) it will save a bundle
// to special-case that loop.
func (t *cond) Span(src []byte, atEOF bool) (n int, err error) {
p := 0
for n < len(src) && err == nil {
// Don't process too much at a time as the Spanner that will be
// called on this block may terminate early.
const maxChunk = 4096
max := len(src)
if v := n + maxChunk; v < max {
max = v
}
atEnd := false
size := 0
current := t.t
for ; p < max; p += size {
r := rune(src[p])
if r < utf8.RuneSelf {
size = 1
} else if r, size = utf8.DecodeRune(src[p:]); size == 1 {
if !atEOF && !utf8.FullRune(src[p:]) {
err = transform.ErrShortSrc
break
}
}
if !t.check(r) {
// The next rune will be the start of a new run.
atEnd = true
break
}
}
n2, err2 := current.Span(src[n:p], atEnd || (atEOF && p == len(src)))
n += n2
if err2 != nil {
return n, err2
}
// At this point either err != nil or t.check will pass for the rune at p.
p = n + size
}
return n, err
}
func (t *cond) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
p := 0
for nSrc < len(src) && err == nil {
// Don't process too much at a time, as the work might be wasted if the
// destination buffer isn't large enough to hold the result or a
// transform returns an error early.
const maxChunk = 4096
max := len(src)
if n := nSrc + maxChunk; n < len(src) {
max = n
}
atEnd := false
size := 0
current := t.t
for ; p < max; p += size {
r := rune(src[p])
if r < utf8.RuneSelf {
size = 1
} else if r, size = utf8.DecodeRune(src[p:]); size == 1 {
if !atEOF && !utf8.FullRune(src[p:]) {
err = transform.ErrShortSrc
break
}
}
if !t.check(r) {
// The next rune will be the start of a new run.
atEnd = true
break
}
}
nDst2, nSrc2, err2 := current.Transform(dst[nDst:], src[nSrc:p], atEnd || (atEOF && p == len(src)))
nDst += nDst2
nSrc += nSrc2
if err2 != nil {
return nDst, nSrc, err2
}
// At this point either err != nil or t.check will pass for the rune at p.
p = nSrc + size
}
return nDst, nSrc, err
}

355
vendor/golang.org/x/text/runes/runes.go generated vendored Normal file
View file

@ -0,0 +1,355 @@
// Copyright 2014 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package runes provide transforms for UTF-8 encoded text.
package runes // import "golang.org/x/text/runes"
import (
"unicode"
"unicode/utf8"
"golang.org/x/text/transform"
)
// A Set is a collection of runes.
type Set interface {
// Contains returns true if r is contained in the set.
Contains(r rune) bool
}
type setFunc func(rune) bool
func (s setFunc) Contains(r rune) bool {
return s(r)
}
// Note: using funcs here instead of wrapping types result in cleaner
// documentation and a smaller API.
// In creates a Set with a Contains method that returns true for all runes in
// the given RangeTable.
func In(rt *unicode.RangeTable) Set {
return setFunc(func(r rune) bool { return unicode.Is(rt, r) })
}
// NotIn creates a Set with a Contains method that returns true for all runes not
// in the given RangeTable.
func NotIn(rt *unicode.RangeTable) Set {
return setFunc(func(r rune) bool { return !unicode.Is(rt, r) })
}
// Predicate creates a Set with a Contains method that returns f(r).
func Predicate(f func(rune) bool) Set {
return setFunc(f)
}
// Transformer implements the transform.Transformer interface.
type Transformer struct {
t transform.SpanningTransformer
}
func (t Transformer) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
return t.t.Transform(dst, src, atEOF)
}
func (t Transformer) Span(b []byte, atEOF bool) (n int, err error) {
return t.t.Span(b, atEOF)
}
func (t Transformer) Reset() { t.t.Reset() }
// Bytes returns a new byte slice with the result of converting b using t. It
// calls Reset on t. It returns nil if any error was found. This can only happen
// if an error-producing Transformer is passed to If.
func (t Transformer) Bytes(b []byte) []byte {
b, _, err := transform.Bytes(t, b)
if err != nil {
return nil
}
return b
}
// String returns a string with the result of converting s using t. It calls
// Reset on t. It returns the empty string if any error was found. This can only
// happen if an error-producing Transformer is passed to If.
func (t Transformer) String(s string) string {
s, _, err := transform.String(t, s)
if err != nil {
return ""
}
return s
}
// TODO:
// - Copy: copying strings and bytes in whole-rune units.
// - Validation (maybe)
// - Well-formed-ness (maybe)
const runeErrorString = string(utf8.RuneError)
// Remove returns a Transformer that removes runes r for which s.Contains(r).
// Illegal input bytes are replaced by RuneError before being passed to f.
func Remove(s Set) Transformer {
if f, ok := s.(setFunc); ok {
// This little trick cuts the running time of BenchmarkRemove for sets
// created by Predicate roughly in half.
// TODO: special-case RangeTables as well.
return Transformer{remove(f)}
}
return Transformer{remove(s.Contains)}
}
// TODO: remove transform.RemoveFunc.
type remove func(r rune) bool
func (remove) Reset() {}
// Span implements transform.Spanner.
func (t remove) Span(src []byte, atEOF bool) (n int, err error) {
for r, size := rune(0), 0; n < len(src); {
if r = rune(src[n]); r < utf8.RuneSelf {
size = 1
} else if r, size = utf8.DecodeRune(src[n:]); size == 1 {
// Invalid rune.
if !atEOF && !utf8.FullRune(src[n:]) {
err = transform.ErrShortSrc
} else {
err = transform.ErrEndOfSpan
}
break
}
if t(r) {
err = transform.ErrEndOfSpan
break
}
n += size
}
return
}
// Transform implements transform.Transformer.
func (t remove) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
for r, size := rune(0), 0; nSrc < len(src); {
if r = rune(src[nSrc]); r < utf8.RuneSelf {
size = 1
} else if r, size = utf8.DecodeRune(src[nSrc:]); size == 1 {
// Invalid rune.
if !atEOF && !utf8.FullRune(src[nSrc:]) {
err = transform.ErrShortSrc
break
}
// We replace illegal bytes with RuneError. Not doing so might
// otherwise turn a sequence of invalid UTF-8 into valid UTF-8.
// The resulting byte sequence may subsequently contain runes
// for which t(r) is true that were passed unnoticed.
if !t(utf8.RuneError) {
if nDst+3 > len(dst) {
err = transform.ErrShortDst
break
}
dst[nDst+0] = runeErrorString[0]
dst[nDst+1] = runeErrorString[1]
dst[nDst+2] = runeErrorString[2]
nDst += 3
}
nSrc++
continue
}
if t(r) {
nSrc += size
continue
}
if nDst+size > len(dst) {
err = transform.ErrShortDst
break
}
for i := 0; i < size; i++ {
dst[nDst] = src[nSrc]
nDst++
nSrc++
}
}
return
}
// Map returns a Transformer that maps the runes in the input using the given
// mapping. Illegal bytes in the input are converted to utf8.RuneError before
// being passed to the mapping func.
func Map(mapping func(rune) rune) Transformer {
return Transformer{mapper(mapping)}
}
type mapper func(rune) rune
func (mapper) Reset() {}
// Span implements transform.Spanner.
func (t mapper) Span(src []byte, atEOF bool) (n int, err error) {
for r, size := rune(0), 0; n < len(src); n += size {
if r = rune(src[n]); r < utf8.RuneSelf {
size = 1
} else if r, size = utf8.DecodeRune(src[n:]); size == 1 {
// Invalid rune.
if !atEOF && !utf8.FullRune(src[n:]) {
err = transform.ErrShortSrc
} else {
err = transform.ErrEndOfSpan
}
break
}
if t(r) != r {
err = transform.ErrEndOfSpan
break
}
}
return n, err
}
// Transform implements transform.Transformer.
func (t mapper) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
var replacement rune
var b [utf8.UTFMax]byte
for r, size := rune(0), 0; nSrc < len(src); {
if r = rune(src[nSrc]); r < utf8.RuneSelf {
if replacement = t(r); replacement < utf8.RuneSelf {
if nDst == len(dst) {
err = transform.ErrShortDst
break
}
dst[nDst] = byte(replacement)
nDst++
nSrc++
continue
}
size = 1
} else if r, size = utf8.DecodeRune(src[nSrc:]); size == 1 {
// Invalid rune.
if !atEOF && !utf8.FullRune(src[nSrc:]) {
err = transform.ErrShortSrc
break
}
if replacement = t(utf8.RuneError); replacement == utf8.RuneError {
if nDst+3 > len(dst) {
err = transform.ErrShortDst
break
}
dst[nDst+0] = runeErrorString[0]
dst[nDst+1] = runeErrorString[1]
dst[nDst+2] = runeErrorString[2]
nDst += 3
nSrc++
continue
}
} else if replacement = t(r); replacement == r {
if nDst+size > len(dst) {
err = transform.ErrShortDst
break
}
for i := 0; i < size; i++ {
dst[nDst] = src[nSrc]
nDst++
nSrc++
}
continue
}
n := utf8.EncodeRune(b[:], replacement)
if nDst+n > len(dst) {
err = transform.ErrShortDst
break
}
for i := 0; i < n; i++ {
dst[nDst] = b[i]
nDst++
}
nSrc += size
}
return
}
// ReplaceIllFormed returns a transformer that replaces all input bytes that are
// not part of a well-formed UTF-8 code sequence with utf8.RuneError.
func ReplaceIllFormed() Transformer {
return Transformer{&replaceIllFormed{}}
}
type replaceIllFormed struct{ transform.NopResetter }
func (t replaceIllFormed) Span(src []byte, atEOF bool) (n int, err error) {
for n < len(src) {
// ASCII fast path.
if src[n] < utf8.RuneSelf {
n++
continue
}
r, size := utf8.DecodeRune(src[n:])
// Look for a valid non-ASCII rune.
if r != utf8.RuneError || size != 1 {
n += size
continue
}
// Look for short source data.
if !atEOF && !utf8.FullRune(src[n:]) {
err = transform.ErrShortSrc
break
}
// We have an invalid rune.
err = transform.ErrEndOfSpan
break
}
return n, err
}
func (t replaceIllFormed) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
for nSrc < len(src) {
// ASCII fast path.
if r := src[nSrc]; r < utf8.RuneSelf {
if nDst == len(dst) {
err = transform.ErrShortDst
break
}
dst[nDst] = r
nDst++
nSrc++
continue
}
// Look for a valid non-ASCII rune.
if _, size := utf8.DecodeRune(src[nSrc:]); size != 1 {
if size != copy(dst[nDst:], src[nSrc:nSrc+size]) {
err = transform.ErrShortDst
break
}
nDst += size
nSrc += size
continue
}
// Look for short source data.
if !atEOF && !utf8.FullRune(src[nSrc:]) {
err = transform.ErrShortSrc
break
}
// We have an invalid rune.
if nDst+3 > len(dst) {
err = transform.ErrShortDst
break
}
dst[nDst+0] = runeErrorString[0]
dst[nDst+1] = runeErrorString[1]
dst[nDst+2] = runeErrorString[2]
nDst += 3
nSrc++
}
return nDst, nSrc, err
}

6
vendor/modules.txt vendored
View file

@ -975,6 +975,12 @@ golang.org/x/sys/windows/svc/eventlog
golang.org/x/sys/windows/svc/mgr
# golang.org/x/text v0.5.0
## explicit; go 1.17
golang.org/x/text/encoding
golang.org/x/text/encoding/internal
golang.org/x/text/encoding/internal/identifier
golang.org/x/text/encoding/unicode
golang.org/x/text/internal/utf8internal
golang.org/x/text/runes
golang.org/x/text/secure/bidirule
golang.org/x/text/transform
golang.org/x/text/unicode/bidi