escape.go 3.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137
  1. // Copyright 2009 The Go Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style
  3. // license that can be found in the LICENSE file.
  4. // Copied and modified from Go 1.14 stdlib's encoding/xml
  5. package xml
  6. import (
  7. "unicode/utf8"
  8. )
  9. // Copied from Go 1.14 stdlib's encoding/xml
  10. var (
  11. escQuot = []byte(""") // shorter than """
  12. escApos = []byte("'") // shorter than "'"
  13. escAmp = []byte("&")
  14. escLT = []byte("<")
  15. escGT = []byte(">")
  16. escTab = []byte("	")
  17. escNL = []byte("
")
  18. escCR = []byte("
")
  19. escFFFD = []byte("\uFFFD") // Unicode replacement character
  20. // Additional Escapes
  21. escNextLine = []byte("…")
  22. escLS = []byte("
")
  23. )
  24. // Decide whether the given rune is in the XML Character Range, per
  25. // the Char production of https://www.xml.com/axml/testaxml.htm,
  26. // Section 2.2 Characters.
  27. func isInCharacterRange(r rune) (inrange bool) {
  28. return r == 0x09 ||
  29. r == 0x0A ||
  30. r == 0x0D ||
  31. r >= 0x20 && r <= 0xD7FF ||
  32. r >= 0xE000 && r <= 0xFFFD ||
  33. r >= 0x10000 && r <= 0x10FFFF
  34. }
  35. // TODO: When do we need to escape the string?
  36. // Based on encoding/xml escapeString from the Go Standard Library.
  37. // https://golang.org/src/encoding/xml/xml.go
  38. func escapeString(e writer, s string) {
  39. var esc []byte
  40. last := 0
  41. for i := 0; i < len(s); {
  42. r, width := utf8.DecodeRuneInString(s[i:])
  43. i += width
  44. switch r {
  45. case '"':
  46. esc = escQuot
  47. case '\'':
  48. esc = escApos
  49. case '&':
  50. esc = escAmp
  51. case '<':
  52. esc = escLT
  53. case '>':
  54. esc = escGT
  55. case '\t':
  56. esc = escTab
  57. case '\n':
  58. esc = escNL
  59. case '\r':
  60. esc = escCR
  61. case '\u0085':
  62. // Not escaped by stdlib
  63. esc = escNextLine
  64. case '\u2028':
  65. // Not escaped by stdlib
  66. esc = escLS
  67. default:
  68. if !isInCharacterRange(r) || (r == 0xFFFD && width == 1) {
  69. esc = escFFFD
  70. break
  71. }
  72. continue
  73. }
  74. e.WriteString(s[last : i-width])
  75. e.Write(esc)
  76. last = i
  77. }
  78. e.WriteString(s[last:])
  79. }
  80. // escapeText writes to w the properly escaped XML equivalent
  81. // of the plain text data s. If escapeNewline is true, newline
  82. // characters will be escaped.
  83. //
  84. // Based on encoding/xml escapeText from the Go Standard Library.
  85. // https://golang.org/src/encoding/xml/xml.go
  86. func escapeText(e writer, s []byte) {
  87. var esc []byte
  88. last := 0
  89. for i := 0; i < len(s); {
  90. r, width := utf8.DecodeRune(s[i:])
  91. i += width
  92. switch r {
  93. case '"':
  94. esc = escQuot
  95. case '\'':
  96. esc = escApos
  97. case '&':
  98. esc = escAmp
  99. case '<':
  100. esc = escLT
  101. case '>':
  102. esc = escGT
  103. case '\t':
  104. esc = escTab
  105. case '\n':
  106. // This always escapes newline, which is different than stdlib's optional
  107. // escape of new line.
  108. esc = escNL
  109. case '\r':
  110. esc = escCR
  111. case '\u0085':
  112. // Not escaped by stdlib
  113. esc = escNextLine
  114. case '\u2028':
  115. // Not escaped by stdlib
  116. esc = escLS
  117. default:
  118. if !isInCharacterRange(r) || (r == 0xFFFD && width == 1) {
  119. esc = escFFFD
  120. break
  121. }
  122. continue
  123. }
  124. e.Write(s[last : i-width])
  125. e.Write(esc)
  126. last = i
  127. }
  128. e.Write(s[last:])
  129. }