HTMLToken.swift 4.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120
  1. /*
  2. * Copyright (c) 2024, Andrew Kaster <andrew@ladybird.org>>
  3. *
  4. * SPDX-License-Identifier: BSD-2-Clause
  5. */
  6. @_exported import WebCxx
  7. public class HTMLToken {
  8. public struct Position: Equatable {
  9. var line = UInt()
  10. var column = UInt()
  11. var byteOffset = UInt()
  12. }
  13. public struct Attribute: Equatable {
  14. var prefix: Swift.String?
  15. var localName: Swift.String
  16. var namespace_: Swift.String?
  17. var value: Swift.String
  18. var nameStartPosition: Position
  19. var nameEndPosition: Position
  20. var valueStartPosition: Position
  21. var valueEndPosition: Position
  22. }
  23. public enum TokenType: Equatable {
  24. case Invalid
  25. case DOCTYPE(
  26. name: Swift.String?,
  27. publicIdentifier: Swift.String?,
  28. systemIdentifier: Swift.String?,
  29. forceQuirksMode: Bool)
  30. case StartTag(
  31. tagName: Swift.String,
  32. selfClosing: Bool,
  33. selfClosingAcknowledged: Bool,
  34. attributes: [Attribute])
  35. case EndTag(
  36. tagName: Swift.String,
  37. selfClosing: Bool,
  38. selfClosingAcknowledged: Bool,
  39. attributes: [Attribute])
  40. case Comment(data: Swift.String)
  41. case Character(codePoint: Character)
  42. case EndOfFile
  43. }
  44. public func isCharacter() -> Bool {
  45. if case .Character(_) = self.type {
  46. return true
  47. }
  48. return false
  49. }
  50. public func isParserWhitespace() -> Bool {
  51. precondition(isCharacter(), "isParserWhitespace() called on non-character token")
  52. // NOTE: The parser considers '\r' to be whitespace, while the tokenizer does not.
  53. switch self.type {
  54. case .Character(codePoint: "\t"),
  55. .Character(codePoint: "\n"),
  56. .Character(codePoint: "\u{000C}"), // \f
  57. .Character(codePoint: "\r"),
  58. .Character(codePoint: " "):
  59. return true
  60. default:
  61. return false
  62. }
  63. }
  64. public var type = TokenType.Invalid
  65. public var startPosition = Position()
  66. public var endPosition = Position()
  67. public init() {}
  68. public init(type: TokenType) {
  69. self.type = type
  70. }
  71. }
  72. extension HTMLToken.Position: CustomStringConvertible {
  73. public var description: Swift.String {
  74. return "\(self.line):\(self.column)"
  75. }
  76. }
  77. extension HTMLToken.TokenType: CustomStringConvertible {
  78. // FIXME: Print attributes for start/end tags
  79. public var description: Swift.String {
  80. switch self {
  81. case .Invalid:
  82. return "Invalid"
  83. case .DOCTYPE(let name, let publicIdentifier, let systemIdentifier, let forceQuirksMode):
  84. return "DOCTYPE(name: \(name ?? "nil"), publicIdentifier: \(publicIdentifier ?? "nil"), systemIdentifier: \(systemIdentifier ?? "nil"), forceQuirksMode: \(forceQuirksMode))"
  85. case .StartTag(let tagName, let selfClosing, let selfClosingAcknowledged, let attributes):
  86. return "StartTag(tagName: \(tagName), selfClosing: \(selfClosing), selfClosingAcknowledged: \(selfClosingAcknowledged), attributes: \(attributes))"
  87. case .EndTag(let tagName, let selfClosing, let selfClosingAcknowledged, let attributes):
  88. return "EndTag(tagName: \(tagName), selfClosing: \(selfClosing), selfClosingAcknowledged: \(selfClosingAcknowledged), attributes: \(attributes))"
  89. case .Comment(let data):
  90. return "Comment(data: \(data))"
  91. case .Character(let codePoint):
  92. return "Character(codePoint: \(codePoint))"
  93. case .EndOfFile:
  94. return "EndOfFile"
  95. }
  96. }
  97. }
  98. extension HTMLToken: CustomStringConvertible {
  99. public var description: Swift.String {
  100. if self.startPosition == Position() {
  101. return "HTMLToken(type: \(self.type))"
  102. } else if self.endPosition == Position() {
  103. return "HTMLToken(type: \(self.type))@\(self.startPosition)"
  104. } else {
  105. return "HTMLToken(type: \(self.type))@\(self.startPosition)-\(self.endPosition)"
  106. }
  107. }
  108. }