فهرست منبع

Refactor dockerfile parser

Signed-off-by: Daniel Nephin <dnephin@docker.com>
Daniel Nephin 8 سال پیش
والد
کامیت
4cbc953a5d

+ 1 - 1
builder/dockerfile/parser/line_parsers.go

@@ -42,7 +42,7 @@ func parseSubCommand(rest string, d *Directive) (*Node, map[string]bool, error)
 		return nil, nil, nil
 		return nil, nil, nil
 	}
 	}
 
 
-	_, child, err := ParseLine(rest, d, false)
+	child, err := newNodeFromLine(rest, d)
 	if err != nil {
 	if err != nil {
 		return nil, nil, err
 		return nil, nil, err
 	}
 	}

+ 78 - 84
builder/dockerfile/parser/parser.go

@@ -63,6 +63,21 @@ func (node *Node) Dump() string {
 	return strings.TrimSpace(str)
 	return strings.TrimSpace(str)
 }
 }
 
 
+func (node *Node) lines(start, end int) {
+	node.StartLine = start
+	node.endLine = end
+}
+
+// AddChild adds a new child node, and updates line information
+func (node *Node) AddChild(child *Node, startLine, endLine int) {
+	child.lines(startLine, endLine)
+	if node.StartLine < 0 {
+		node.StartLine = startLine
+	}
+	node.endLine = endLine
+	node.Children = append(node.Children, child)
+}
+
 var (
 var (
 	dispatch           map[string]func(string, *Directive) (*Node, map[string]bool, error)
 	dispatch           map[string]func(string, *Directive) (*Node, map[string]bool, error)
 	tokenWhitespace    = regexp.MustCompile(`[\t\v\f\r ]+`)
 	tokenWhitespace    = regexp.MustCompile(`[\t\v\f\r ]+`)
@@ -154,25 +169,6 @@ func init() {
 	}
 	}
 }
 }
 
 
-// ParseLine parses a line and returns the remainder.
-func ParseLine(line string, d *Directive, ignoreCont bool) (string, *Node, error) {
-	if err := d.processLine(line); err != nil {
-		return "", nil, err
-	}
-
-	if line = stripComments(line); line == "" {
-		return "", nil, nil
-	}
-
-	if !ignoreCont && d.lineContinuationRegex.MatchString(line) {
-		line = d.lineContinuationRegex.ReplaceAllString(line, "")
-		return line, nil, nil
-	}
-
-	node, err := newNodeFromLine(line, d)
-	return "", node, err
-}
-
 // newNodeFromLine splits the line into parts, and dispatches to a function
 // newNodeFromLine splits the line into parts, and dispatches to a function
 // based on the command and command arguments. A Node is created from the
 // based on the command and command arguments. A Node is created from the
 // result of the dispatch.
 // result of the dispatch.
@@ -207,13 +203,6 @@ type Result struct {
 	EscapeToken rune
 	EscapeToken rune
 }
 }
 
 
-// scanLines is a split function for bufio.Scanner. that augments the default
-// line scanner by supporting newline escapes.
-func scanLines(data []byte, atEOF bool) (int, []byte, error) {
-	advance, token, err := bufio.ScanLines(data, atEOF)
-	return advance, token, err
-}
-
 // Parse reads lines from a Reader, parses the lines into an AST and returns
 // Parse reads lines from a Reader, parses the lines into an AST and returns
 // the AST and escape token
 // the AST and escape token
 func Parse(rwc io.Reader) (*Result, error) {
 func Parse(rwc io.Reader) (*Result, error) {
@@ -221,80 +210,85 @@ func Parse(rwc io.Reader) (*Result, error) {
 	currentLine := 0
 	currentLine := 0
 	root := &Node{StartLine: -1}
 	root := &Node{StartLine: -1}
 	scanner := bufio.NewScanner(rwc)
 	scanner := bufio.NewScanner(rwc)
-	scanner.Split(scanLines)
 
 
-	utf8bom := []byte{0xEF, 0xBB, 0xBF}
+	var err error
 	for scanner.Scan() {
 	for scanner.Scan() {
-		scannedBytes := scanner.Bytes()
-		// We trim UTF8 BOM
-		if currentLine == 0 {
-			scannedBytes = bytes.TrimPrefix(scannedBytes, utf8bom)
+		bytes := scanner.Bytes()
+		switch currentLine {
+		case 0:
+			bytes, err = processFirstLine(d, bytes)
+			if err != nil {
+				return nil, err
+			}
+		default:
+			bytes = processLine(bytes, true)
 		}
 		}
-
-		scannedLine := strings.TrimLeftFunc(string(scannedBytes), unicode.IsSpace)
 		currentLine++
 		currentLine++
-		line, child, err := ParseLine(scannedLine, d, false)
-		if err != nil {
-			return nil, err
-		}
-		startLine := currentLine
-
-		if line != "" && child == nil {
-			for scanner.Scan() {
-				newline := scanner.Text()
-				currentLine++
 
 
-				if stripComments(strings.TrimSpace(newline)) == "" {
-					continue
-				}
+		startLine := currentLine
+		line, isEndOfLine := trimContinuationCharacter(string(bytes), d)
+		if isEndOfLine && line == "" {
+			continue
+		}
 
 
-				line, child, err = ParseLine(line+newline, d, false)
-				if err != nil {
-					return nil, err
-				}
+		for !isEndOfLine && scanner.Scan() {
+			bytes := processLine(scanner.Bytes(), false)
+			currentLine++
 
 
-				if child != nil {
-					break
-				}
-			}
-			if child == nil && line != "" {
-				// When we call ParseLine we'll pass in 'true' for
-				// the ignoreCont param if we're at the EOF. This will
-				// prevent the func from returning immediately w/o
-				// parsing the line thinking that there's more input
-				// to come.
-
-				_, child, err = ParseLine(line, d, scanner.Err() == nil)
-				if err != nil {
-					return nil, err
-				}
+			// TODO: warn this is being deprecated/removed
+			if isEmptyContinuationLine(bytes) {
+				continue
 			}
 			}
+
+			continuationLine := string(bytes)
+			continuationLine, isEndOfLine = trimContinuationCharacter(continuationLine, d)
+			line += continuationLine
 		}
 		}
 
 
-		if child != nil {
-			// Update the line information for the current child.
-			child.StartLine = startLine
-			child.endLine = currentLine
-			// Update the line information for the root. The starting line of the root is always the
-			// starting line of the first child and the ending line is the ending line of the last child.
-			if root.StartLine < 0 {
-				root.StartLine = currentLine
-			}
-			root.endLine = currentLine
-			root.Children = append(root.Children, child)
+		child, err := newNodeFromLine(line, d)
+		if err != nil {
+			return nil, err
 		}
 		}
+		root.AddChild(child, startLine, currentLine)
 	}
 	}
 
 
 	return &Result{AST: root, EscapeToken: d.escapeToken}, nil
 	return &Result{AST: root, EscapeToken: d.escapeToken}, nil
 }
 }
 
 
-// covers comments and empty lines. Lines should be trimmed before passing to
-// this function.
-func stripComments(line string) string {
-	// string is already trimmed at this point
-	if tokenComment.MatchString(line) {
-		return tokenComment.ReplaceAllString(line, "")
+func trimComments(src []byte) []byte {
+	return tokenComment.ReplaceAll(src, []byte{})
+}
+
+func trimWhitespace(src []byte) []byte {
+	return bytes.TrimLeftFunc(src, unicode.IsSpace)
+}
+
+func isEmptyContinuationLine(line []byte) bool {
+	return len(trimComments(trimWhitespace(line))) == 0
+}
+
+var utf8bom = []byte{0xEF, 0xBB, 0xBF}
+
+func trimContinuationCharacter(line string, d *Directive) (string, bool) {
+	if d.lineContinuationRegex.MatchString(line) {
+		line = d.lineContinuationRegex.ReplaceAllString(line, "")
+		return line, false
 	}
 	}
+	return line, true
+}
+
+// TODO: remove stripLeftWhitespace after deprecation period. It seems silly
+// to preserve whitespace on continuation lines. Why is that done?
+func processLine(token []byte, stripLeftWhitespace bool) []byte {
+	if stripLeftWhitespace {
+		token = trimWhitespace(token)
+	}
+	return trimComments(token)
+}
 
 
-	return line
+func processFirstLine(d *Directive, token []byte) ([]byte, error) {
+	token = bytes.TrimPrefix(token, utf8bom)
+	token = trimWhitespace(token)
+	err := d.processLine(string(token))
+	return trimComments(token), err
 }
 }

+ 3 - 0
builder/dockerfile/parser/testfiles/continue-at-eof/Dockerfile

@@ -0,0 +1,3 @@
+FROM alpine:3.5
+
+RUN something \

+ 2 - 0
builder/dockerfile/parser/testfiles/continue-at-eof/result

@@ -0,0 +1,2 @@
+(from "alpine:3.5")
+(run "something")