TestHTMLTokenizerSwift.swift 9.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255
  1. /*
  2. * Copyright (c) 2024, Andrew Kaster <andrew@ladybird.org>
  3. *
  4. * SPDX-License-Identifier: BSD-2-Clause
  5. */
  6. import AK
  7. import Testing
  8. import Web
  9. @Suite
  10. struct TestHTMLTokenizerSwift {
  11. @Test func tokenTypes() {
  12. let default_token = HTMLToken()
  13. default_token.type = .Character(codePoint: "a")
  14. #expect(default_token.isCharacter())
  15. #expect("\(default_token)" == "HTMLToken(type: Character(codePoint: a))")
  16. }
  17. @Test func parserWhitespace() {
  18. for codePoint: Character in ["\t", "\n", "\r", "\u{000C}", " "] {
  19. let token = HTMLToken(type: .Character(codePoint: codePoint))
  20. #expect(token.isParserWhitespace())
  21. }
  22. for codePoint: Character in ["a", "b", "c", "d", "e", "f", "g", "h", "i", "j"] {
  23. let token = HTMLToken(type: .Character(codePoint: codePoint))
  24. #expect(!token.isParserWhitespace())
  25. }
  26. }
  27. @Test func dataStateNoInput() {
  28. let tokenizer = HTMLTokenizer()
  29. #expect(tokenizer.state == HTMLTokenizer.State.Data) // initial state
  30. let token = tokenizer.nextToken()
  31. #expect(token?.type == .EndOfFile)
  32. let token2 = tokenizer.nextToken()
  33. #expect(token2 == nil)
  34. #expect(tokenizer.state == HTMLTokenizer.State.Data)
  35. }
  36. @Test func dataStateSingleChar() {
  37. guard let tokenizer = HTMLTokenizer(input: "X") else {
  38. Issue.record("Failed to create tokenizer for 'X'")
  39. return
  40. }
  41. #expect(tokenizer.state == HTMLTokenizer.State.Data) // initial state
  42. let token = tokenizer.nextToken()
  43. #expect(token?.type == .Character(codePoint: "X"))
  44. let token2 = tokenizer.nextToken()
  45. #expect(token2?.type == .EndOfFile)
  46. let token3 = tokenizer.nextToken()
  47. #expect(token3 == nil)
  48. #expect(tokenizer.state == HTMLTokenizer.State.Data)
  49. }
  50. @Test func dataStateAmpersand() {
  51. guard let tokenizer = HTMLTokenizer(input: "&") else {
  52. Issue.record("Failed to create tokenizer for '&'")
  53. return
  54. }
  55. #expect(tokenizer.state == HTMLTokenizer.State.Data) // initial state
  56. let token = tokenizer.nextToken()
  57. #expect(token?.type == .EndOfFile)
  58. #expect(tokenizer.state == HTMLTokenizer.State.CharacterReference)
  59. let token2 = tokenizer.nextToken()
  60. #expect(token2 == nil)
  61. }
  62. @Test func tagOpenOnly() {
  63. guard let tokenizer = HTMLTokenizer(input: "<") else {
  64. Issue.record("Failed to create tokenizer for '<'")
  65. return
  66. }
  67. #expect(tokenizer.state == HTMLTokenizer.State.Data) // initial state
  68. let token = tokenizer.nextToken()
  69. #expect(token?.type == .Character(codePoint: "<"))
  70. let token2 = tokenizer.nextToken()
  71. #expect(token2?.type == .EndOfFile)
  72. #expect(tokenizer.state == HTMLTokenizer.State.TagOpen)
  73. let token3 = tokenizer.nextToken()
  74. #expect(token3 == nil)
  75. }
  76. @Test func dataStateNulChar() {
  77. guard let tokenizer = HTMLTokenizer(input: "H\0I") else {
  78. Issue.record("Failed to create tokenizer for 'H\\0I'")
  79. return
  80. }
  81. #expect(tokenizer.state == HTMLTokenizer.State.Data) // initial state
  82. let token = tokenizer.nextToken()
  83. #expect(token?.type == .Character(codePoint: "H"))
  84. let token2 = tokenizer.nextToken()
  85. #expect(token2?.type == .Character(codePoint: "\u{FFFD}"))
  86. let token3 = tokenizer.nextToken()
  87. #expect(token3?.type == .Character(codePoint: "I"))
  88. let token4 = tokenizer.nextToken()
  89. #expect(token4?.type == .EndOfFile)
  90. #expect(tokenizer.state == HTMLTokenizer.State.Data)
  91. }
  92. @Test func scriptTagWithAttributes() {
  93. guard let tokenizer = HTMLTokenizer(input: "<script type=\"text/javascript\">") else {
  94. Issue.record("Failed to create tokenizer for '<script type=\"text/javascript\">'")
  95. return
  96. }
  97. #expect(tokenizer.state == HTMLTokenizer.State.Data) // initial state
  98. let token = tokenizer.nextToken()
  99. #expect(token?.type == .StartTag(tagName: "script", attributes: [HTMLToken.Attribute(localName: "type", value: "text/javascript")]))
  100. let token2 = tokenizer.nextToken()
  101. #expect(token2?.type == .EndOfFile)
  102. #expect(tokenizer.state == HTMLTokenizer.State.Data)
  103. }
  104. @Test func scriptWithContent() {
  105. guard let tokenizer = HTMLTokenizer(input: "<script>var x = 1;</script>") else {
  106. Issue.record("Failed to create tokenizer for '<script>var x = 1;</script>'")
  107. return
  108. }
  109. let token = tokenizer.nextToken()
  110. #expect(token?.type == .StartTag(tagName: "script", attributes: []))
  111. for codePoint in "var x = 1;" {
  112. let token = tokenizer.nextToken()
  113. #expect(token?.type == .Character(codePoint: codePoint))
  114. }
  115. let token2 = tokenizer.nextToken()
  116. #expect(token2?.type == .EndTag(tagName: "script"))
  117. let token3 = tokenizer.nextToken()
  118. #expect(token3?.type == .EndOfFile)
  119. }
  120. @Test func simpleDivWithContent() {
  121. guard let tokenizer = HTMLTokenizer(input: "<div>hi</div>") else {
  122. Issue.record("Failed to create tokenizer for '<div>hi</div>'")
  123. return
  124. }
  125. #expect(tokenizer.state == HTMLTokenizer.State.Data) // initial state
  126. let token = tokenizer.nextToken()
  127. #expect(token?.type == .StartTag(tagName: "div", attributes: []))
  128. let token2 = tokenizer.nextToken()
  129. #expect(token2?.type == .Character(codePoint: "h"))
  130. let token3 = tokenizer.nextToken()
  131. #expect(token3?.type == .Character(codePoint: "i"))
  132. let token4 = tokenizer.nextToken()
  133. #expect(token4?.type == .EndTag(tagName: "div"))
  134. let token5 = tokenizer.nextToken()
  135. #expect(token5?.type == .EndOfFile)
  136. }
  137. @Test func simpleDivWithContentAndAttributes() {
  138. guard let tokenizer = HTMLTokenizer(input: "<div class=\"foo\">hi</div>") else {
  139. Issue.record("Failed to create tokenizer for '<div class=\"foo\">hi</div>'")
  140. return
  141. }
  142. #expect(tokenizer.state == HTMLTokenizer.State.Data) // initial state
  143. let token = tokenizer.nextToken()
  144. #expect(token?.type == .StartTag(tagName: "div", attributes: [HTMLToken.Attribute(localName: "class", value: "foo")]))
  145. let token2 = tokenizer.nextToken()
  146. #expect(token2?.type == .Character(codePoint: "h"))
  147. let token3 = tokenizer.nextToken()
  148. #expect(token3?.type == .Character(codePoint: "i"))
  149. let token4 = tokenizer.nextToken()
  150. #expect(token4?.type == .EndTag(tagName: "div"))
  151. let token5 = tokenizer.nextToken()
  152. #expect(token5?.type == .EndOfFile)
  153. }
  154. @Test func severalDivsWithAttributesAndContent() {
  155. // Explicitly use unquoted and single quotes for attribute values
  156. guard let tokenizer = HTMLTokenizer(input: "<div class=foo>hi</div><div class='bar'>bye</div>") else {
  157. Issue.record("Failed to create tokenizer for '<div class=\"foo\">hi</div><div class=\"bar\">bye</div>'")
  158. return
  159. }
  160. let token = tokenizer.nextToken()
  161. #expect(token?.type == .StartTag(tagName: "div", attributes: [HTMLToken.Attribute(localName: "class", value: "foo")]))
  162. for codePoint in "hi" {
  163. let token = tokenizer.nextToken()
  164. #expect(token?.type == .Character(codePoint: codePoint))
  165. }
  166. let token2 = tokenizer.nextToken()
  167. #expect(token2?.type == .EndTag(tagName: "div"))
  168. let token3 = tokenizer.nextToken()
  169. #expect(token3?.type == .StartTag(tagName: "div", attributes: [HTMLToken.Attribute(localName: "class", value: "bar")]))
  170. for codePoint in "bye" {
  171. let token = tokenizer.nextToken()
  172. #expect(token?.type == .Character(codePoint: codePoint))
  173. }
  174. let token4 = tokenizer.nextToken()
  175. #expect(token4?.type == .EndTag(tagName: "div"))
  176. let token5 = tokenizer.nextToken()
  177. #expect(token5?.type == .EndOfFile)
  178. }
  179. @Test func startTagWithMultipleAttributes() {
  180. guard let tokenizer = HTMLTokenizer(input: "<div class=\"foo\" id=\"bar\">hi</div attr=endTagAttributeWhee>") else {
  181. Issue.record("Failed to create tokenizer for '<div class=\"foo\" id=\"bar\">hi</div>'")
  182. return
  183. }
  184. let token = tokenizer.nextToken()
  185. #expect(token?.type == .StartTag(tagName: "div", attributes: [HTMLToken.Attribute(localName: "class", value: "foo"), HTMLToken.Attribute(localName: "id", value: "bar")]))
  186. for codePoint in "hi" {
  187. let token = tokenizer.nextToken()
  188. #expect(token?.type == .Character(codePoint: codePoint))
  189. }
  190. let token2 = tokenizer.nextToken()
  191. #expect(token2?.type == .EndTag(tagName: "div", attributes: [HTMLToken.Attribute(localName: "attr", value: "endTagAttributeWhee")]))
  192. let token3 = tokenizer.nextToken()
  193. #expect(token3?.type == .EndOfFile)
  194. }
  195. }