HTMLToken.swift 2.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778
  1. /*
  2. * Copyright (c) 2024, Andrew Kaster <andrew@ladybird.org>>
  3. *
  4. * SPDX-License-Identifier: BSD-2-Clause
  5. */
  6. public class HTMLToken {
  7. public struct Position {
  8. var line = UInt()
  9. var column = UInt()
  10. var byteOffset = UInt()
  11. }
  12. public struct Attribute {
  13. var prefix: String?
  14. var localName: String
  15. var namespace_: String?
  16. var value: String
  17. var nameStartPosition: Position
  18. var nameEndPosition: Position
  19. var valueStartPosition: Position
  20. var valueEndPosition: Position
  21. }
  22. public enum TokenType {
  23. case Invalid
  24. case DOCTYPE(
  25. name: String?,
  26. publicIdentifier: String?,
  27. systemIdentifier: String?,
  28. forceQuirksMode: Bool)
  29. case StartTag(
  30. tagName: String,
  31. selfClosing: Bool,
  32. selfClosingAcknowledged: Bool,
  33. attributes: [Attribute])
  34. case EndTag(
  35. tagName: String,
  36. selfClosing: Bool,
  37. selfClosingAcknowledged: Bool,
  38. attributes: [Attribute])
  39. case Comment(data: String)
  40. case Character(codePoint: Character)
  41. case EndOfFile
  42. }
  43. public func isCharacter() -> Bool {
  44. if case .Character(_) = self.type {
  45. return true
  46. }
  47. return false
  48. }
  49. public func isParserWhitespace() -> Bool {
  50. precondition(isCharacter(), "isParserWhitespace() called on non-character token")
  51. // NOTE: The parser considers '\r' to be whitespace, while the tokenizer does not.
  52. switch self.type {
  53. case .Character(codePoint: "\t"),
  54. .Character(codePoint: "\n"),
  55. .Character(codePoint: "\u{000C}"), // \f
  56. .Character(codePoint: "\r"),
  57. .Character(codePoint: " "):
  58. return true
  59. default:
  60. return false
  61. }
  62. }
  63. public var type = TokenType.Invalid
  64. public var startPosition = Position()
  65. public var endPosition = Position()
  66. public init() {}
  67. public init(type: TokenType) {
  68. self.type = type
  69. }
  70. }