HTMLToken.swift 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286
  1. /*
  2. * Copyright (c) 2024, Andrew Kaster <andrew@ladybird.org>>
  3. *
  4. * SPDX-License-Identifier: BSD-2-Clause
  5. */
  6. @_exported import WebCxx
  7. public class HTMLToken {
  8. public struct Position: Equatable {
  9. var line = UInt()
  10. var column = UInt()
  11. var byteOffset = UInt()
  12. }
  13. public struct Attribute: Equatable {
  14. public var prefix: Swift.String? = nil
  15. public var localName: Swift.String
  16. public var namespace_: Swift.String? = nil
  17. public var value: Swift.String
  18. public var nameStartPosition = Position()
  19. public var nameEndPosition = Position()
  20. public var valueStartPosition = Position()
  21. public var valueEndPosition = Position()
  22. public init(localName: Swift.String, value: Swift.String) {
  23. self.localName = localName
  24. self.value = value
  25. }
  26. }
  27. public enum TokenType: Equatable {
  28. case Invalid
  29. case DOCTYPE(
  30. name: Swift.String?,
  31. publicIdentifier: Swift.String?,
  32. systemIdentifier: Swift.String?,
  33. forceQuirksMode: Bool)
  34. case StartTag(
  35. tagName: Swift.String,
  36. selfClosing: Bool = false,
  37. selfClosingAcknowledged: Bool = false,
  38. attributes: [Attribute] = [])
  39. case EndTag(
  40. tagName: Swift.String,
  41. selfClosing: Bool = false,
  42. selfClosingAcknowledged: Bool = false,
  43. attributes: [Attribute] = [])
  44. case Comment(data: Swift.String)
  45. case Character(codePoint: Character)
  46. case EndOfFile
  47. }
  48. public func isCharacter() -> Bool {
  49. if case .Character(_) = self.type {
  50. return true
  51. }
  52. return false
  53. }
  54. public func isEndTag() -> Bool {
  55. if case .EndTag(_, _, _, _) = self.type {
  56. return true
  57. }
  58. return false
  59. }
  60. public func isStartTag() -> Bool {
  61. if case .StartTag(_, _, _, _) = self.type {
  62. return true
  63. }
  64. return false
  65. }
  66. public func isTag() -> Bool {
  67. return isStartTag() || isEndTag()
  68. }
  69. public func isParserWhitespace() -> Bool {
  70. precondition(isCharacter(), "isParserWhitespace() called on non-character token")
  71. // NOTE: The parser considers '\r' to be whitespace, while the tokenizer does not.
  72. switch self.type {
  73. case .Character(codePoint: "\t"),
  74. .Character(codePoint: "\n"),
  75. .Character(codePoint: "\u{000C}"), // \f
  76. .Character(codePoint: "\r"),
  77. .Character(codePoint: " "):
  78. return true
  79. default:
  80. return false
  81. }
  82. }
  83. public var type = TokenType.Invalid
  84. public var startPosition = Position()
  85. public var endPosition = Position()
  86. // Is in-place mutating enums a thing? Seems not https://forums.swift.org/t/in-place-mutation-of-an-enum-associated-value/11747
  87. public var attributes: [Attribute] {
  88. get {
  89. switch self.type {
  90. case .StartTag(_, _, _, let attributes):
  91. return attributes
  92. case .EndTag(_, _, _, let attributes):
  93. return attributes
  94. default:
  95. preconditionFailure("attributes called on non-tag token")
  96. }
  97. }
  98. set {
  99. switch self.type {
  100. case .StartTag(let tagName, let selfClosing, let selfClosingAcknowledged, attributes: _):
  101. self.type = .StartTag(tagName: tagName, selfClosing: selfClosing, selfClosingAcknowledged: selfClosingAcknowledged, attributes: newValue)
  102. case .EndTag(let tagName, let selfClosing, let selfClosingAcknowledged, attributes: _):
  103. self.type = .EndTag(tagName: tagName, selfClosing: selfClosing, selfClosingAcknowledged: selfClosingAcknowledged, attributes: newValue)
  104. default:
  105. preconditionFailure("attributes= called on non-tag token")
  106. }
  107. }
  108. }
  109. public var tagName: Swift.String {
  110. get {
  111. switch self.type {
  112. case .StartTag(let tagName, _, _, _):
  113. return tagName
  114. case .EndTag(let tagName, _, _, _):
  115. return tagName
  116. default:
  117. preconditionFailure("tagName called on non-tag token")
  118. }
  119. }
  120. set {
  121. switch self.type {
  122. case .StartTag(tagName: _, let selfClosing, let selfClosingAcknowledged, let attributes):
  123. self.type = .StartTag(tagName: newValue, selfClosing: selfClosing, selfClosingAcknowledged: selfClosingAcknowledged, attributes: attributes)
  124. case .EndTag(tagName: _, let selfClosing, let selfClosingAcknowledged, let attributes):
  125. self.type = .EndTag(tagName: newValue, selfClosing: selfClosing, selfClosingAcknowledged: selfClosingAcknowledged, attributes: attributes)
  126. default:
  127. preconditionFailure("tagName= called on non-tag token")
  128. }
  129. }
  130. }
  131. public var selfClosing: Bool {
  132. get {
  133. switch self.type {
  134. case .StartTag(_, let selfClosing, _, _):
  135. return selfClosing
  136. case .EndTag(_, let selfClosing, _, _):
  137. return selfClosing
  138. default:
  139. preconditionFailure("selfClosing called on non-tag token")
  140. }
  141. }
  142. set {
  143. switch self.type {
  144. case .StartTag(let tagName, _, let selfClosingAcknowledged, let attributes):
  145. self.type = .StartTag(tagName: tagName, selfClosing: newValue, selfClosingAcknowledged: selfClosingAcknowledged, attributes: attributes)
  146. case .EndTag(let tagName, _, let selfClosingAcknowledged, let attributes):
  147. self.type = .EndTag(tagName: tagName, selfClosing: newValue, selfClosingAcknowledged: selfClosingAcknowledged, attributes: attributes)
  148. default:
  149. preconditionFailure("selfClosing= called on non-tag token")
  150. }
  151. }
  152. }
  153. public var name: Swift.String? {
  154. get {
  155. switch self.type {
  156. case .DOCTYPE(let name, _, _, _):
  157. return name
  158. default:
  159. preconditionFailure("doctypeName called on non-doctype token")
  160. }
  161. }
  162. set {
  163. switch self.type {
  164. case .DOCTYPE(_, let publicIdentifier, let systemIdentifier, let forceQuirksMode):
  165. self.type = .DOCTYPE(name: newValue, publicIdentifier: publicIdentifier, systemIdentifier: systemIdentifier, forceQuirksMode: forceQuirksMode)
  166. default:
  167. preconditionFailure("doctypeName= called on non-doctype token")
  168. }
  169. }
  170. }
  171. public var forceQuirks: Bool {
  172. get {
  173. switch self.type {
  174. case .DOCTYPE(_, _, _, let forceQuirksMode):
  175. return forceQuirksMode
  176. default:
  177. preconditionFailure("forceQuirks called on non-doctype token")
  178. }
  179. }
  180. set {
  181. switch self.type {
  182. case .DOCTYPE(let name, let publicIdentifier, let systemIdentifier, _):
  183. self.type = .DOCTYPE(name: name, publicIdentifier: publicIdentifier, systemIdentifier: systemIdentifier, forceQuirksMode: newValue)
  184. default:
  185. preconditionFailure("forceQuirks= called on non-doctype token")
  186. }
  187. }
  188. }
  189. public var publicIdentifier: Swift.String? {
  190. get {
  191. switch self.type {
  192. case .DOCTYPE(_, let publicIdentifier, _, _):
  193. return publicIdentifier
  194. default:
  195. preconditionFailure("publicIdentifier called on non-doctype token")
  196. }
  197. }
  198. set {
  199. switch self.type {
  200. case .DOCTYPE(let name, _, let systemIdentifier, let forceQuirksMode):
  201. self.type = .DOCTYPE(name: name, publicIdentifier: newValue, systemIdentifier: systemIdentifier, forceQuirksMode: forceQuirksMode)
  202. default:
  203. preconditionFailure("publicIdentifier= called on non-doctype token")
  204. }
  205. }
  206. }
  207. public var systemIdentifier: Swift.String? {
  208. get {
  209. switch self.type {
  210. case .DOCTYPE(_, _, let systemIdentifier, _):
  211. return systemIdentifier
  212. default:
  213. preconditionFailure("systemIdentifier called on non-doctype token")
  214. }
  215. }
  216. set {
  217. switch self.type {
  218. case .DOCTYPE(let name, let publicIdentifier, _, let forceQuirksMode):
  219. self.type = .DOCTYPE(name: name, publicIdentifier: publicIdentifier, systemIdentifier: newValue, forceQuirksMode: forceQuirksMode)
  220. default:
  221. preconditionFailure("systemIdentifier= called on non-doctype token")
  222. }
  223. }
  224. }
  225. public init() {}
  226. public init(type: TokenType) {
  227. self.type = type
  228. }
  229. }
  230. extension HTMLToken.Position: CustomStringConvertible {
  231. public var description: Swift.String {
  232. return "\(self.line):\(self.column)"
  233. }
  234. }
  235. extension HTMLToken.TokenType: CustomStringConvertible {
  236. // FIXME: Print attributes for start/end tags
  237. public var description: Swift.String {
  238. switch self {
  239. case .Invalid:
  240. return "Invalid"
  241. case .DOCTYPE(let name, let publicIdentifier, let systemIdentifier, let forceQuirksMode):
  242. return "DOCTYPE(name: \(name ?? "nil"), publicIdentifier: \(publicIdentifier ?? "nil"), systemIdentifier: \(systemIdentifier ?? "nil"), forceQuirksMode: \(forceQuirksMode))"
  243. case .StartTag(let tagName, let selfClosing, let selfClosingAcknowledged, let attributes):
  244. return "StartTag(tagName: \(tagName), selfClosing: \(selfClosing), selfClosingAcknowledged: \(selfClosingAcknowledged), attributes: \(attributes))"
  245. case .EndTag(let tagName, let selfClosing, let selfClosingAcknowledged, let attributes):
  246. return "EndTag(tagName: \(tagName), selfClosing: \(selfClosing), selfClosingAcknowledged: \(selfClosingAcknowledged), attributes: \(attributes))"
  247. case .Comment(let data):
  248. return "Comment(data: \(data))"
  249. case .Character(let codePoint):
  250. return "Character(codePoint: \(codePoint))"
  251. case .EndOfFile:
  252. return "EndOfFile"
  253. }
  254. }
  255. }
  256. extension HTMLToken: CustomStringConvertible {
  257. public var description: Swift.String {
  258. if self.startPosition == Position() {
  259. return "HTMLToken(type: \(self.type))"
  260. } else if self.endPosition == Position() {
  261. return "HTMLToken(type: \(self.type))@\(self.startPosition)"
  262. } else {
  263. return "HTMLToken(type: \(self.type))@\(self.startPosition)-\(self.endPosition)"
  264. }
  265. }
  266. }