123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286 |
- /*
- * Copyright (c) 2024, Andrew Kaster <andrew@ladybird.org>>
- *
- * SPDX-License-Identifier: BSD-2-Clause
- */
- @_exported import WebCxx
- public class HTMLToken {
- public struct Position: Equatable {
- var line = UInt()
- var column = UInt()
- var byteOffset = UInt()
- }
- public struct Attribute: Equatable {
- public var prefix: Swift.String? = nil
- public var localName: Swift.String
- public var namespace_: Swift.String? = nil
- public var value: Swift.String
- public var nameStartPosition = Position()
- public var nameEndPosition = Position()
- public var valueStartPosition = Position()
- public var valueEndPosition = Position()
- public init(localName: Swift.String, value: Swift.String) {
- self.localName = localName
- self.value = value
- }
- }
- public enum TokenType: Equatable {
- case Invalid
- case DOCTYPE(
- name: Swift.String?,
- publicIdentifier: Swift.String?,
- systemIdentifier: Swift.String?,
- forceQuirksMode: Bool)
- case StartTag(
- tagName: Swift.String,
- selfClosing: Bool = false,
- selfClosingAcknowledged: Bool = false,
- attributes: [Attribute] = [])
- case EndTag(
- tagName: Swift.String,
- selfClosing: Bool = false,
- selfClosingAcknowledged: Bool = false,
- attributes: [Attribute] = [])
- case Comment(data: Swift.String)
- case Character(codePoint: Character)
- case EndOfFile
- }
- public func isCharacter() -> Bool {
- if case .Character(_) = self.type {
- return true
- }
- return false
- }
- public func isEndTag() -> Bool {
- if case .EndTag(_, _, _, _) = self.type {
- return true
- }
- return false
- }
- public func isStartTag() -> Bool {
- if case .StartTag(_, _, _, _) = self.type {
- return true
- }
- return false
- }
- public func isTag() -> Bool {
- return isStartTag() || isEndTag()
- }
- public func isParserWhitespace() -> Bool {
- precondition(isCharacter(), "isParserWhitespace() called on non-character token")
- // NOTE: The parser considers '\r' to be whitespace, while the tokenizer does not.
- switch self.type {
- case .Character(codePoint: "\t"),
- .Character(codePoint: "\n"),
- .Character(codePoint: "\u{000C}"), // \f
- .Character(codePoint: "\r"),
- .Character(codePoint: " "):
- return true
- default:
- return false
- }
- }
- public var type = TokenType.Invalid
- public var startPosition = Position()
- public var endPosition = Position()
- // Is in-place mutating enums a thing? Seems not https://forums.swift.org/t/in-place-mutation-of-an-enum-associated-value/11747
- public var attributes: [Attribute] {
- get {
- switch self.type {
- case .StartTag(_, _, _, let attributes):
- return attributes
- case .EndTag(_, _, _, let attributes):
- return attributes
- default:
- preconditionFailure("attributes called on non-tag token")
- }
- }
- set {
- switch self.type {
- case .StartTag(let tagName, let selfClosing, let selfClosingAcknowledged, attributes: _):
- self.type = .StartTag(tagName: tagName, selfClosing: selfClosing, selfClosingAcknowledged: selfClosingAcknowledged, attributes: newValue)
- case .EndTag(let tagName, let selfClosing, let selfClosingAcknowledged, attributes: _):
- self.type = .EndTag(tagName: tagName, selfClosing: selfClosing, selfClosingAcknowledged: selfClosingAcknowledged, attributes: newValue)
- default:
- preconditionFailure("attributes= called on non-tag token")
- }
- }
- }
- public var tagName: Swift.String {
- get {
- switch self.type {
- case .StartTag(let tagName, _, _, _):
- return tagName
- case .EndTag(let tagName, _, _, _):
- return tagName
- default:
- preconditionFailure("tagName called on non-tag token")
- }
- }
- set {
- switch self.type {
- case .StartTag(tagName: _, let selfClosing, let selfClosingAcknowledged, let attributes):
- self.type = .StartTag(tagName: newValue, selfClosing: selfClosing, selfClosingAcknowledged: selfClosingAcknowledged, attributes: attributes)
- case .EndTag(tagName: _, let selfClosing, let selfClosingAcknowledged, let attributes):
- self.type = .EndTag(tagName: newValue, selfClosing: selfClosing, selfClosingAcknowledged: selfClosingAcknowledged, attributes: attributes)
- default:
- preconditionFailure("tagName= called on non-tag token")
- }
- }
- }
- public var selfClosing: Bool {
- get {
- switch self.type {
- case .StartTag(_, let selfClosing, _, _):
- return selfClosing
- case .EndTag(_, let selfClosing, _, _):
- return selfClosing
- default:
- preconditionFailure("selfClosing called on non-tag token")
- }
- }
- set {
- switch self.type {
- case .StartTag(let tagName, _, let selfClosingAcknowledged, let attributes):
- self.type = .StartTag(tagName: tagName, selfClosing: newValue, selfClosingAcknowledged: selfClosingAcknowledged, attributes: attributes)
- case .EndTag(let tagName, _, let selfClosingAcknowledged, let attributes):
- self.type = .EndTag(tagName: tagName, selfClosing: newValue, selfClosingAcknowledged: selfClosingAcknowledged, attributes: attributes)
- default:
- preconditionFailure("selfClosing= called on non-tag token")
- }
- }
- }
- public var name: Swift.String? {
- get {
- switch self.type {
- case .DOCTYPE(let name, _, _, _):
- return name
- default:
- preconditionFailure("doctypeName called on non-doctype token")
- }
- }
- set {
- switch self.type {
- case .DOCTYPE(_, let publicIdentifier, let systemIdentifier, let forceQuirksMode):
- self.type = .DOCTYPE(name: newValue, publicIdentifier: publicIdentifier, systemIdentifier: systemIdentifier, forceQuirksMode: forceQuirksMode)
- default:
- preconditionFailure("doctypeName= called on non-doctype token")
- }
- }
- }
- public var forceQuirks: Bool {
- get {
- switch self.type {
- case .DOCTYPE(_, _, _, let forceQuirksMode):
- return forceQuirksMode
- default:
- preconditionFailure("forceQuirks called on non-doctype token")
- }
- }
- set {
- switch self.type {
- case .DOCTYPE(let name, let publicIdentifier, let systemIdentifier, _):
- self.type = .DOCTYPE(name: name, publicIdentifier: publicIdentifier, systemIdentifier: systemIdentifier, forceQuirksMode: newValue)
- default:
- preconditionFailure("forceQuirks= called on non-doctype token")
- }
- }
- }
- public var publicIdentifier: Swift.String? {
- get {
- switch self.type {
- case .DOCTYPE(_, let publicIdentifier, _, _):
- return publicIdentifier
- default:
- preconditionFailure("publicIdentifier called on non-doctype token")
- }
- }
- set {
- switch self.type {
- case .DOCTYPE(let name, _, let systemIdentifier, let forceQuirksMode):
- self.type = .DOCTYPE(name: name, publicIdentifier: newValue, systemIdentifier: systemIdentifier, forceQuirksMode: forceQuirksMode)
- default:
- preconditionFailure("publicIdentifier= called on non-doctype token")
- }
- }
- }
- public var systemIdentifier: Swift.String? {
- get {
- switch self.type {
- case .DOCTYPE(_, _, let systemIdentifier, _):
- return systemIdentifier
- default:
- preconditionFailure("systemIdentifier called on non-doctype token")
- }
- }
- set {
- switch self.type {
- case .DOCTYPE(let name, let publicIdentifier, _, let forceQuirksMode):
- self.type = .DOCTYPE(name: name, publicIdentifier: publicIdentifier, systemIdentifier: newValue, forceQuirksMode: forceQuirksMode)
- default:
- preconditionFailure("systemIdentifier= called on non-doctype token")
- }
- }
- }
- public init() {}
- public init(type: TokenType) {
- self.type = type
- }
- }
- extension HTMLToken.Position: CustomStringConvertible {
- public var description: Swift.String {
- return "\(self.line):\(self.column)"
- }
- }
- extension HTMLToken.TokenType: CustomStringConvertible {
- // FIXME: Print attributes for start/end tags
- public var description: Swift.String {
- switch self {
- case .Invalid:
- return "Invalid"
- case .DOCTYPE(let name, let publicIdentifier, let systemIdentifier, let forceQuirksMode):
- return "DOCTYPE(name: \(name ?? "nil"), publicIdentifier: \(publicIdentifier ?? "nil"), systemIdentifier: \(systemIdentifier ?? "nil"), forceQuirksMode: \(forceQuirksMode))"
- case .StartTag(let tagName, let selfClosing, let selfClosingAcknowledged, let attributes):
- return "StartTag(tagName: \(tagName), selfClosing: \(selfClosing), selfClosingAcknowledged: \(selfClosingAcknowledged), attributes: \(attributes))"
- case .EndTag(let tagName, let selfClosing, let selfClosingAcknowledged, let attributes):
- return "EndTag(tagName: \(tagName), selfClosing: \(selfClosing), selfClosingAcknowledged: \(selfClosingAcknowledged), attributes: \(attributes))"
- case .Comment(let data):
- return "Comment(data: \(data))"
- case .Character(let codePoint):
- return "Character(codePoint: \(codePoint))"
- case .EndOfFile:
- return "EndOfFile"
- }
- }
- }
- extension HTMLToken: CustomStringConvertible {
- public var description: Swift.String {
- if self.startPosition == Position() {
- return "HTMLToken(type: \(self.type))"
- } else if self.endPosition == Position() {
- return "HTMLToken(type: \(self.type))@\(self.startPosition)"
- } else {
- return "HTMLToken(type: \(self.type))@\(self.startPosition)-\(self.endPosition)"
- }
- }
- }
|