123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137 |
- // Copyright 2009 The Go Authors. All rights reserved.
- // Use of this source code is governed by a BSD-style
- // license that can be found in the LICENSE file.
- // Copied and modified from Go 1.14 stdlib's encoding/xml
- package xml
- import (
- "unicode/utf8"
- )
- // Copied from Go 1.14 stdlib's encoding/xml
- var (
- escQuot = []byte(""") // shorter than """
- escApos = []byte("'") // shorter than "'"
- escAmp = []byte("&")
- escLT = []byte("<")
- escGT = []byte(">")
- escTab = []byte("	")
- escNL = []byte("
")
- escCR = []byte("
")
- escFFFD = []byte("\uFFFD") // Unicode replacement character
- // Additional Escapes
- escNextLine = []byte("…")
- escLS = []byte("
")
- )
- // Decide whether the given rune is in the XML Character Range, per
- // the Char production of https://www.xml.com/axml/testaxml.htm,
- // Section 2.2 Characters.
- func isInCharacterRange(r rune) (inrange bool) {
- return r == 0x09 ||
- r == 0x0A ||
- r == 0x0D ||
- r >= 0x20 && r <= 0xD7FF ||
- r >= 0xE000 && r <= 0xFFFD ||
- r >= 0x10000 && r <= 0x10FFFF
- }
- // TODO: When do we need to escape the string?
- // Based on encoding/xml escapeString from the Go Standard Library.
- // https://golang.org/src/encoding/xml/xml.go
- func escapeString(e writer, s string) {
- var esc []byte
- last := 0
- for i := 0; i < len(s); {
- r, width := utf8.DecodeRuneInString(s[i:])
- i += width
- switch r {
- case '"':
- esc = escQuot
- case '\'':
- esc = escApos
- case '&':
- esc = escAmp
- case '<':
- esc = escLT
- case '>':
- esc = escGT
- case '\t':
- esc = escTab
- case '\n':
- esc = escNL
- case '\r':
- esc = escCR
- case '\u0085':
- // Not escaped by stdlib
- esc = escNextLine
- case '\u2028':
- // Not escaped by stdlib
- esc = escLS
- default:
- if !isInCharacterRange(r) || (r == 0xFFFD && width == 1) {
- esc = escFFFD
- break
- }
- continue
- }
- e.WriteString(s[last : i-width])
- e.Write(esc)
- last = i
- }
- e.WriteString(s[last:])
- }
- // escapeText writes to w the properly escaped XML equivalent
- // of the plain text data s. If escapeNewline is true, newline
- // characters will be escaped.
- //
- // Based on encoding/xml escapeText from the Go Standard Library.
- // https://golang.org/src/encoding/xml/xml.go
- func escapeText(e writer, s []byte) {
- var esc []byte
- last := 0
- for i := 0; i < len(s); {
- r, width := utf8.DecodeRune(s[i:])
- i += width
- switch r {
- case '"':
- esc = escQuot
- case '\'':
- esc = escApos
- case '&':
- esc = escAmp
- case '<':
- esc = escLT
- case '>':
- esc = escGT
- case '\t':
- esc = escTab
- case '\n':
- // This always escapes newline, which is different than stdlib's optional
- // escape of new line.
- esc = escNL
- case '\r':
- esc = escCR
- case '\u0085':
- // Not escaped by stdlib
- esc = escNextLine
- case '\u2028':
- // Not escaped by stdlib
- esc = escLS
- default:
- if !isInCharacterRange(r) || (r == 0xFFFD && width == 1) {
- esc = escFFFD
- break
- }
- continue
- }
- e.Write(s[last : i-width])
- e.Write(esc)
- last = i
- }
- e.Write(s[last:])
- }
|