Merge pull request #23372 from GaretJax/23369-unicode-parse-words
Support unicode characters in parseWords
This commit is contained in:
commit
a63e95423a
2 changed files with 22 additions and 9 deletions
|
@ -12,6 +12,7 @@ import (
|
|||
"fmt"
|
||||
"strings"
|
||||
"unicode"
|
||||
"unicode/utf8"
|
||||
)
|
||||
|
||||
var (
|
||||
|
@ -58,10 +59,11 @@ func parseWords(rest string) []string {
|
|||
quote := '\000'
|
||||
blankOK := false
|
||||
var ch rune
|
||||
var chWidth int
|
||||
|
||||
for pos := 0; pos <= len(rest); pos++ {
|
||||
for pos := 0; pos <= len(rest); pos += chWidth {
|
||||
if pos != len(rest) {
|
||||
ch = rune(rest[pos])
|
||||
ch, chWidth = utf8.DecodeRuneInString(rest[pos:])
|
||||
}
|
||||
|
||||
if phase == inSpaces { // Looking for start of word
|
||||
|
@ -95,15 +97,15 @@ func parseWords(rest string) []string {
|
|||
phase = inQuote
|
||||
}
|
||||
if ch == tokenEscape {
|
||||
if pos+1 == len(rest) {
|
||||
if pos+chWidth == len(rest) {
|
||||
continue // just skip an escape token at end of line
|
||||
}
|
||||
// If we're not quoted and we see an escape token, then always just
|
||||
// add the escape token plus the char to the word, even if the char
|
||||
// is a quote.
|
||||
word += string(ch)
|
||||
pos++
|
||||
ch = rune(rest[pos])
|
||||
pos += chWidth
|
||||
ch, chWidth = utf8.DecodeRuneInString(rest[pos:])
|
||||
}
|
||||
word += string(ch)
|
||||
continue
|
||||
|
@ -114,14 +116,13 @@ func parseWords(rest string) []string {
|
|||
}
|
||||
// The escape token is special except for ' quotes - can't escape anything for '
|
||||
if ch == tokenEscape && quote != '\'' {
|
||||
if pos+1 == len(rest) {
|
||||
if pos+chWidth == len(rest) {
|
||||
phase = inWord
|
||||
continue // just skip the escape token at end
|
||||
}
|
||||
pos++
|
||||
nextCh := rune(rest[pos])
|
||||
pos += chWidth
|
||||
word += string(ch)
|
||||
ch = nextCh
|
||||
ch, chWidth = utf8.DecodeRuneInString(rest[pos:])
|
||||
}
|
||||
word += string(ch)
|
||||
}
|
||||
|
|
|
@ -92,6 +92,10 @@ func TestParseWords(t *testing.T) {
|
|||
"input": {"foo bar"},
|
||||
"expect": {"foo", "bar"},
|
||||
},
|
||||
{
|
||||
"input": {"foo\\ bar"},
|
||||
"expect": {"foo\\ bar"},
|
||||
},
|
||||
{
|
||||
"input": {"foo=bar"},
|
||||
"expect": {"foo=bar"},
|
||||
|
@ -104,6 +108,14 @@ func TestParseWords(t *testing.T) {
|
|||
"input": {`foo bar "abc xyz"`},
|
||||
"expect": {"foo", "bar", `"abc xyz"`},
|
||||
},
|
||||
{
|
||||
"input": {"àöû"},
|
||||
"expect": {"àöû"},
|
||||
},
|
||||
{
|
||||
"input": {`föo bàr "âbc xÿz"`},
|
||||
"expect": {"föo", "bàr", `"âbc xÿz"`},
|
||||
},
|
||||
}
|
||||
|
||||
for _, test := range tests {
|
||||
|
|
Loading…
Reference in a new issue