Merge pull request #23372 from GaretJax/23369-unicode-parse-words

Support unicode characters in parseWords
2016-06-08 17:05:02 +02:00 · 2016-06-08 17:05:02 +02:00 · a63e95423a
commit a63e95423a
parent a4422e6ebc 6284f04a6b
2 changed files with 22 additions and 9 deletions
--- a/builder/dockerfile/parser/line_parsers.go
+++ b/builder/dockerfile/parser/line_parsers.go
@ -12,6 +12,7 @@ import (
 	"fmt"
 	"strings"
 	"unicode"
+	"unicode/utf8"
 )

 var (
@ -58,10 +59,11 @@ func parseWords(rest string) []string {
 	quote := '\000'
 	blankOK := false
 	var ch rune
+	var chWidth int

-	for pos := 0; pos <= len(rest); pos++ {
+	for pos := 0; pos <= len(rest); pos += chWidth {
 		if pos != len(rest) {
-			ch = rune(rest[pos])
+			ch, chWidth = utf8.DecodeRuneInString(rest[pos:])
 		}

 		if phase == inSpaces { // Looking for start of word
@ -95,15 +97,15 @@ func parseWords(rest string) []string {
 				phase = inQuote
 			}
 			if ch == tokenEscape {
-				if pos+1 == len(rest) {
+				if pos+chWidth == len(rest) {
 					continue // just skip an escape token at end of line
 				}
 				// If we're not quoted and we see an escape token, then always just
 				// add the escape token plus the char to the word, even if the char
 				// is a quote.
 				word += string(ch)
-				pos++
-				ch = rune(rest[pos])
+				pos += chWidth
+				ch, chWidth = utf8.DecodeRuneInString(rest[pos:])
 			}
 			word += string(ch)
 			continue
@ -114,14 +116,13 @@ func parseWords(rest string) []string {
 			}
 			// The escape token is special except for ' quotes - can't escape anything for '
 			if ch == tokenEscape && quote != '\'' {
-				if pos+1 == len(rest) {
+				if pos+chWidth == len(rest) {
 					phase = inWord
 					continue // just skip the escape token at end
 				}
-				pos++
-				nextCh := rune(rest[pos])
+				pos += chWidth
 				word += string(ch)
-				ch = nextCh
+				ch, chWidth = utf8.DecodeRuneInString(rest[pos:])
 			}
 			word += string(ch)
 		}
--- a/builder/dockerfile/parser/parser_test.go
+++ b/builder/dockerfile/parser/parser_test.go
@ -92,6 +92,10 @@ func TestParseWords(t *testing.T) {
 			"input":  {"foo bar"},
 			"expect": {"foo", "bar"},
 		},
+		{
+			"input":  {"foo\\ bar"},
+			"expect": {"foo\\ bar"},
+		},
 		{
 			"input":  {"foo=bar"},
 			"expect": {"foo=bar"},
@ -104,6 +108,14 @@ func TestParseWords(t *testing.T) {
 			"input":  {`foo bar "abc xyz"`},
 			"expect": {"foo", "bar", `"abc xyz"`},
 		},
+		{
+			"input":  {"àöû"},
+			"expect": {"àöû"},
+		},
+		{
+			"input":  {`föo bàr "âbc xÿz"`},
+			"expect": {"föo", "bàr", `"âbc xÿz"`},
+		},
 	}

 	for _, test := range tests {