LibWeb: Add start of HTML Tokenizer in Swift
Currently it's just a Token class.
This commit is contained in:
parent
d0bc266c55
commit
fb074f9d0c
Notes:
github-actions[bot]
2024-08-24 01:18:22 +00:00
Author: https://github.com/ADKaster Commit: https://github.com/LadybirdBrowser/ladybird/commit/fb074f9d0c8 Pull-request: https://github.com/LadybirdBrowser/ladybird/pull/1130 Reviewed-by: https://github.com/AtkinsSJ Reviewed-by: https://github.com/alimpfard Reviewed-by: https://github.com/dzfrias
5 changed files with 169 additions and 1 deletions
Tests/LibWeb
Userland/Libraries/LibWeb
|
@ -20,4 +20,9 @@ if (ENABLE_SWIFT)
|
|||
target_link_libraries(TestLibWebSwiftBindings PRIVATE AK LibWeb)
|
||||
target_compile_options(TestLibWebSwiftBindings PRIVATE -parse-as-library)
|
||||
add_test(NAME TestLibWebSwiftBindings COMMAND TestLibWebSwiftBindings)
|
||||
|
||||
add_executable(TestHTMLTokenizerSwift TestHTMLTokenizerSwift.swift)
|
||||
target_link_libraries(TestHTMLTokenizerSwift PRIVATE AK LibWeb)
|
||||
target_compile_options(TestHTMLTokenizerSwift PRIVATE -parse-as-library)
|
||||
add_test(NAME TestHTMLTokenizerSwift COMMAND TestHTMLTokenizerSwift)
|
||||
endif()
|
||||
|
|
58
Tests/LibWeb/TestHTMLTokenizerSwift.swift
Normal file
58
Tests/LibWeb/TestHTMLTokenizerSwift.swift
Normal file
|
@ -0,0 +1,58 @@
|
|||
/*
|
||||
* Copyright (c) 2024, Andrew Kaster <andrew@ladybird.org>
|
||||
*
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*/
|
||||
|
||||
import AK
|
||||
import LibWeb
|
||||
import SwiftLibWeb
|
||||
import Foundation
|
||||
|
||||
class StandardError: TextOutputStream {
|
||||
func write(_ string: Swift.String) {
|
||||
try! FileHandle.standardError.write(contentsOf: Data(string.utf8))
|
||||
}
|
||||
}
|
||||
|
||||
@main
|
||||
struct TestHTMLTokenizerSwift {
|
||||
|
||||
static func testTokenTypes() {
|
||||
var standardError = StandardError()
|
||||
print("Testing HTMLToken types...", to: &standardError)
|
||||
|
||||
let default_token = HTMLToken()
|
||||
default_token.type = .Character(codePoint: "a")
|
||||
precondition(default_token.isCharacter())
|
||||
|
||||
print("HTMLToken types pass", to: &standardError)
|
||||
}
|
||||
|
||||
static func testParserWhitespace() {
|
||||
var standardError = StandardError()
|
||||
print("Testing HTMLToken parser whitespace...", to: &standardError)
|
||||
|
||||
for codePoint: Character in ["\t", "\n", "\r", "\u{000C}", " "] {
|
||||
let token = HTMLToken(type: .Character(codePoint: codePoint))
|
||||
precondition(token.isParserWhitespace())
|
||||
}
|
||||
|
||||
for codePoint: Character in ["a", "b", "c", "d", "e", "f", "g", "h", "i", "j"] {
|
||||
let token = HTMLToken(type: .Character(codePoint: codePoint))
|
||||
precondition(!token.isParserWhitespace())
|
||||
}
|
||||
|
||||
print("HTMLToken parser whitespace pass", to: &standardError)
|
||||
}
|
||||
|
||||
static func main() {
|
||||
var standardError = StandardError()
|
||||
print("Starting test suite...", to: &standardError)
|
||||
|
||||
testTokenTypes()
|
||||
testParserWhitespace()
|
||||
|
||||
print("All tests pass", to: &standardError)
|
||||
}
|
||||
}
|
|
@ -44,4 +44,4 @@ struct TestLibWebSwiftBindings {
|
|||
|
||||
print("All tests pass", to: &standardError)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -794,4 +794,31 @@ if (ENABLE_SWIFT)
|
|||
list(APPEND LIBWEB_ALL_GENERATED_HEADERS ${generated_headers})
|
||||
|
||||
generate_clang_module_map(LibWeb GENERATED_FILES ${LIBWEB_ALL_GENERATED_HEADERS})
|
||||
|
||||
target_compile_features(LibWeb PUBLIC cxx_std_23)
|
||||
|
||||
target_sources(LibWeb PRIVATE
|
||||
HTML/Parser/HTMLToken.swift
|
||||
)
|
||||
target_compile_definitions(LibWeb PRIVATE LIBWEB_USE_SWIFT)
|
||||
set_target_properties(LibWeb PROPERTIES Swift_MODULE_NAME "SwiftLibWeb")
|
||||
|
||||
# FIXME: These should be pulled automatically from interface compile options for the target
|
||||
set(VFS_OVERLAY_OPTIONS
|
||||
-Xcc -ivfsoverlay${CMAKE_CURRENT_BINARY_DIR}/vfs_overlay.yaml
|
||||
-Xcc -ivfsoverlay${CMAKE_CURRENT_BINARY_DIR}/../LibGfx/vfs_overlay.yaml
|
||||
-Xcc -ivfsoverlay${Lagom_BINARY_DIR}/AK/vfs_overlay.yaml
|
||||
)
|
||||
get_target_property(LIBWEB_NATIVE_DIRS LibWeb INCLUDE_DIRECTORIES)
|
||||
_swift_generate_cxx_header(LibWeb "LibWeb-Swift.h"
|
||||
SEARCH_PATHS ${LIBWEB_NATIVE_DIRS}
|
||||
COMPILE_OPTIONS ${VFS_OVERLAY_OPTIONS}
|
||||
)
|
||||
|
||||
# FIXME: https://gitlab.kitware.com/cmake/cmake/-/issues/26175
|
||||
if (APPLE)
|
||||
add_custom_command(TARGET LibWeb POST_BUILD
|
||||
COMMAND install_name_tool -id @rpath/liblagom-web.0.dylib "$<TARGET_FILE:LibWeb>"
|
||||
)
|
||||
endif()
|
||||
endif()
|
||||
|
|
78
Userland/Libraries/LibWeb/HTML/Parser/HTMLToken.swift
Normal file
78
Userland/Libraries/LibWeb/HTML/Parser/HTMLToken.swift
Normal file
|
@ -0,0 +1,78 @@
|
|||
/*
|
||||
* Copyright (c) 2024, Andrew Kaster <andrew@ladybird.org>>
|
||||
*
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*/
|
||||
|
||||
public class HTMLToken {
|
||||
public struct Position {
|
||||
var line = UInt()
|
||||
var column = UInt()
|
||||
var byteOffset = UInt()
|
||||
}
|
||||
|
||||
public struct Attribute {
|
||||
var prefix: String?
|
||||
var localName: String
|
||||
var namespace_: String?
|
||||
var value: String
|
||||
var nameStartPosition: Position
|
||||
var nameEndPosition: Position
|
||||
var valueStartPosition: Position
|
||||
var valueEndPosition: Position
|
||||
}
|
||||
|
||||
public enum TokenType {
|
||||
case Invalid
|
||||
case DOCTYPE(
|
||||
name: String?,
|
||||
publicIdentifier: String?,
|
||||
systemIdentifier: String?,
|
||||
forceQuirksMode: Bool)
|
||||
case StartTag(
|
||||
tagName: String,
|
||||
selfClosing: Bool,
|
||||
selfClosingAcknowledged: Bool,
|
||||
attributes: [Attribute])
|
||||
case EndTag(
|
||||
tagName: String,
|
||||
selfClosing: Bool,
|
||||
selfClosingAcknowledged: Bool,
|
||||
attributes: [Attribute])
|
||||
case Comment(data: String)
|
||||
case Character(codePoint: Character)
|
||||
case EndOfFile
|
||||
}
|
||||
|
||||
public func isCharacter() -> Bool {
|
||||
if case .Character(_) = self.type {
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
public func isParserWhitespace() -> Bool {
|
||||
precondition(isCharacter(), "isParserWhitespace() called on non-character token")
|
||||
|
||||
// NOTE: The parser considers '\r' to be whitespace, while the tokenizer does not.
|
||||
switch self.type {
|
||||
case .Character(codePoint: "\t"),
|
||||
.Character(codePoint: "\n"),
|
||||
.Character(codePoint: "\u{000C}"), // \f
|
||||
.Character(codePoint: "\r"),
|
||||
.Character(codePoint: " "):
|
||||
return true
|
||||
default:
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
public var type = TokenType.Invalid
|
||||
public var startPosition = Position()
|
||||
public var endPosition = Position()
|
||||
|
||||
public init() {}
|
||||
public init(type: TokenType) {
|
||||
self.type = type
|
||||
}
|
||||
}
|
Loading…
Add table
Reference in a new issue