
At the same time, simplify CMakeLists magic for libraries that want to include Swift code in the library. The Lib-less name of the library is now always the module name for the library with any Swift additions, extensions, etc. All vfs overlays now live in a common location to make finding them easier from CMake functions. A new pattern is needed for the Lib-less modules to re-export their Cxx counterparts.
133 lines
3.8 KiB
Swift
133 lines
3.8 KiB
Swift
/*
|
|
* Copyright (c) 2024, Andrew Kaster <andrew@ladybird.org>>
|
|
*
|
|
* SPDX-License-Identifier: BSD-2-Clause
|
|
*/
|
|
|
|
import AK
|
|
import Collections
|
|
import Foundation
|
|
@_exported import WebCxx
|
|
|
|
extension Swift.String {
|
|
public init?(decoding: AK.StringView, as: AK.StringView) {
|
|
let maybe_decoded = Web.HTML.decode_to_utf8(decoding, `as`)
|
|
if maybe_decoded.hasValue {
|
|
self.init(maybe_decoded.value!)
|
|
} else {
|
|
return nil
|
|
}
|
|
}
|
|
}
|
|
|
|
class HTMLTokenizer {
|
|
|
|
enum State {
|
|
case Data
|
|
case RCDATA
|
|
case RAWTEXT
|
|
case ScriptData
|
|
case PLAINTEXT
|
|
case TagOpen
|
|
case EndTagOpen
|
|
case TagName
|
|
case RCDATALessThanSign
|
|
case RCDATAEndTagOpen
|
|
case RCDATAEndTagName
|
|
case RAWTEXTLessThanSign
|
|
case RAWTEXTEndTagOpen
|
|
case RAWTEXTEndTagName
|
|
case ScriptDataLessThanSign
|
|
case ScriptDataEndTagOpen
|
|
case ScriptDataEndTagName
|
|
case ScriptDataEscapeStart
|
|
case ScriptDataEscapeStartDash
|
|
case ScriptDataEscaped
|
|
case ScriptDataEscapedDash
|
|
case ScriptDataEscapedDashDash
|
|
case ScriptDataEscapedLessThanSign
|
|
case ScriptDataEscapedEndTagOpen
|
|
case ScriptDataEscapedEndTagName
|
|
case ScriptDataDoubleEscapeStart
|
|
case ScriptDataDoubleEscaped
|
|
case ScriptDataDoubleEscapedDash
|
|
case ScriptDataDoubleEscapedDashDash
|
|
case ScriptDataDoubleEscapedLessThanSign
|
|
case ScriptDataDoubleEscapeEnd
|
|
case BeforeAttributeName
|
|
case AttributeName
|
|
case AfterAttributeName
|
|
case BeforeAttributeValue
|
|
case AttributeValueDoubleQuoted
|
|
case AttributeValueSingleQuoted
|
|
case AttributeValueUnquoted
|
|
case AfterAttributeValueQuoted
|
|
case SelfClosingStartTag
|
|
case BogusComment
|
|
case MarkupDeclarationOpen
|
|
case CommentStart
|
|
case CommentStartDash
|
|
case Comment
|
|
case CommentLessThanSign
|
|
case CommentLessThanSignBang
|
|
case CommentLessThanSignBangDash
|
|
case CommentLessThanSignBangDashDash
|
|
case CommentEndDash
|
|
case CommentEnd
|
|
case CommentEndBang
|
|
case DOCTYPE
|
|
case BeforeDOCTYPEName
|
|
case DOCTYPEName
|
|
case AfterDOCTYPEName
|
|
case AfterDOCTYPEPublicKeyword
|
|
case BeforeDOCTYPEPublicIdentifier
|
|
case DOCTYPEPublicIdentifierDoubleQuoted
|
|
case DOCTYPEPublicIdentifierSingleQuoted
|
|
case AfterDOCTYPEPublicIdentifier
|
|
case BetweenDOCTYPEPublicAndSystemIdentifiers
|
|
case AfterDOCTYPESystemKeyword
|
|
case BeforeDOCTYPESystemIdentifier
|
|
case DOCTYPESystemIdentifierDoubleQuoted
|
|
case DOCTYPESystemIdentifierSingleQuoted
|
|
case AfterDOCTYPESystemIdentifier
|
|
case BogusDOCTYPE
|
|
case CDATASection
|
|
case CDATASectionBracket
|
|
case CDATASectionEnd
|
|
case CharacterReference
|
|
case NamedCharacterReference
|
|
case AmbiguousAmpersand
|
|
case NumericCharacterReference
|
|
case HexadecimalCharacterReferenceStart
|
|
case DecimalCharacterReferenceStart
|
|
case HexadecimalCharacterReference
|
|
case DecimalCharacterReference
|
|
case NumericCharacterReferenceEnd
|
|
}
|
|
|
|
var input = Swift.String()
|
|
var state = State.Data
|
|
var returnState = State.Data
|
|
|
|
var currentToken = HTMLToken()
|
|
var queuedTokens = Deque<HTMLToken>()
|
|
|
|
public init() {}
|
|
public init?(input: AK.StringView, encoding: AK.StringView) {
|
|
if let string = Swift.String(decoding: input, as: encoding) {
|
|
self.input = string
|
|
} else {
|
|
return nil
|
|
}
|
|
}
|
|
|
|
public func nextToken(stopAtInsertionPoint: Bool = false) -> HTMLToken? {
|
|
|
|
while !queuedTokens.isEmpty {
|
|
return queuedTokens.popFirst()
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
}
|