123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314 |
- /*
- * Copyright (c) 2024, Andrew Kaster <andrew@ladybird.org>
- *
- * SPDX-License-Identifier: BSD-2-Clause
- */
- import AK
- import Testing
- import Web
- @Suite
- struct TestHTMLTokenizerSwift {
- @Test func tokenTypes() {
- let default_token = HTMLToken()
- default_token.type = .Character(codePoint: "a")
- #expect(default_token.isCharacter())
- #expect("\(default_token)" == "HTMLToken(type: Character(codePoint: a))")
- }
- @Test func parserWhitespace() {
- for codePoint: Character in ["\t", "\n", "\r", "\u{000C}", " "] {
- let token = HTMLToken(type: .Character(codePoint: codePoint))
- #expect(token.isParserWhitespace())
- }
- for codePoint: Character in ["a", "b", "c", "d", "e", "f", "g", "h", "i", "j"] {
- let token = HTMLToken(type: .Character(codePoint: codePoint))
- #expect(!token.isParserWhitespace())
- }
- }
- @Test func dataStateNoInput() {
- let tokenizer = HTMLTokenizer()
- #expect(tokenizer.state == HTMLTokenizer.State.Data) // initial state
- let token = tokenizer.nextToken()
- #expect(token?.type == .EndOfFile)
- let token2 = tokenizer.nextToken()
- #expect(token2 == nil)
- #expect(tokenizer.state == HTMLTokenizer.State.Data)
- }
- @Test func dataStateSingleChar() {
- guard let tokenizer = HTMLTokenizer(input: "X") else {
- Issue.record("Failed to create tokenizer for 'X'")
- return
- }
- #expect(tokenizer.state == HTMLTokenizer.State.Data) // initial state
- let token = tokenizer.nextToken()
- #expect(token?.type == .Character(codePoint: "X"))
- let token2 = tokenizer.nextToken()
- #expect(token2?.type == .EndOfFile)
- let token3 = tokenizer.nextToken()
- #expect(token3 == nil)
- #expect(tokenizer.state == HTMLTokenizer.State.Data)
- }
- @Test func dataStateAmpersand() {
- guard let tokenizer = HTMLTokenizer(input: "&") else {
- Issue.record("Failed to create tokenizer for '&'")
- return
- }
- #expect(tokenizer.state == HTMLTokenizer.State.Data) // initial state
- let token = tokenizer.nextToken()
- #expect(token?.type == .Character(codePoint: "&"))
- let token2 = tokenizer.nextToken()
- #expect(token2?.type == .EndOfFile)
- let token3 = tokenizer.nextToken()
- #expect(token3 == nil)
- }
- @Test func tagOpenOnly() {
- guard let tokenizer = HTMLTokenizer(input: "<") else {
- Issue.record("Failed to create tokenizer for '<'")
- return
- }
- #expect(tokenizer.state == HTMLTokenizer.State.Data) // initial state
- let token = tokenizer.nextToken()
- #expect(token?.type == .Character(codePoint: "<"))
- let token2 = tokenizer.nextToken()
- #expect(token2?.type == .EndOfFile)
- #expect(tokenizer.state == HTMLTokenizer.State.TagOpen)
- let token3 = tokenizer.nextToken()
- #expect(token3 == nil)
- }
- @Test func dataStateNulChar() {
- guard let tokenizer = HTMLTokenizer(input: "H\0I") else {
- Issue.record("Failed to create tokenizer for 'H\\0I'")
- return
- }
- #expect(tokenizer.state == HTMLTokenizer.State.Data) // initial state
- let token = tokenizer.nextToken()
- #expect(token?.type == .Character(codePoint: "H"))
- let token2 = tokenizer.nextToken()
- #expect(token2?.type == .Character(codePoint: "\u{FFFD}"))
- let token3 = tokenizer.nextToken()
- #expect(token3?.type == .Character(codePoint: "I"))
- let token4 = tokenizer.nextToken()
- #expect(token4?.type == .EndOfFile)
- #expect(tokenizer.state == HTMLTokenizer.State.Data)
- }
- @Test func scriptTagWithAttributes() {
- guard let tokenizer = HTMLTokenizer(input: "<script type=\"text/javascript\">") else {
- Issue.record("Failed to create tokenizer for '<script type=\"text/javascript\">'")
- return
- }
- #expect(tokenizer.state == HTMLTokenizer.State.Data) // initial state
- let token = tokenizer.nextToken()
- #expect(token?.type == .StartTag(tagName: "script", attributes: [HTMLToken.Attribute(localName: "type", value: "text/javascript")]))
- let token2 = tokenizer.nextToken()
- #expect(token2?.type == .EndOfFile)
- #expect(tokenizer.state == HTMLTokenizer.State.Data)
- }
- @Test func scriptWithContent() {
- guard let tokenizer = HTMLTokenizer(input: "<script>var x = 1;</script>") else {
- Issue.record("Failed to create tokenizer for '<script>var x = 1;</script>'")
- return
- }
- let token = tokenizer.nextToken()
- #expect(token?.type == .StartTag(tagName: "script", attributes: []))
- for codePoint in "var x = 1;" {
- let token = tokenizer.nextToken()
- #expect(token?.type == .Character(codePoint: codePoint))
- }
- let token2 = tokenizer.nextToken()
- #expect(token2?.type == .EndTag(tagName: "script"))
- let token3 = tokenizer.nextToken()
- #expect(token3?.type == .EndOfFile)
- }
- @Test func simpleDivWithContent() {
- guard let tokenizer = HTMLTokenizer(input: "<div>hi</div>") else {
- Issue.record("Failed to create tokenizer for '<div>hi</div>'")
- return
- }
- #expect(tokenizer.state == HTMLTokenizer.State.Data) // initial state
- let token = tokenizer.nextToken()
- #expect(token?.type == .StartTag(tagName: "div", attributes: []))
- let token2 = tokenizer.nextToken()
- #expect(token2?.type == .Character(codePoint: "h"))
- let token3 = tokenizer.nextToken()
- #expect(token3?.type == .Character(codePoint: "i"))
- let token4 = tokenizer.nextToken()
- #expect(token4?.type == .EndTag(tagName: "div"))
- let token5 = tokenizer.nextToken()
- #expect(token5?.type == .EndOfFile)
- }
- @Test func simpleDivWithContentAndAttributes() {
- guard let tokenizer = HTMLTokenizer(input: "<div class=\"foo\">hi</div>") else {
- Issue.record("Failed to create tokenizer for '<div class=\"foo\">hi</div>'")
- return
- }
- #expect(tokenizer.state == HTMLTokenizer.State.Data) // initial state
- let token = tokenizer.nextToken()
- #expect(token?.type == .StartTag(tagName: "div", attributes: [HTMLToken.Attribute(localName: "class", value: "foo")]))
- let token2 = tokenizer.nextToken()
- #expect(token2?.type == .Character(codePoint: "h"))
- let token3 = tokenizer.nextToken()
- #expect(token3?.type == .Character(codePoint: "i"))
- let token4 = tokenizer.nextToken()
- #expect(token4?.type == .EndTag(tagName: "div"))
- let token5 = tokenizer.nextToken()
- #expect(token5?.type == .EndOfFile)
- }
- @Test func severalDivsWithAttributesAndContent() {
- // Explicitly use unquoted and single quotes for attribute values
- guard let tokenizer = HTMLTokenizer(input: "<div class=foo>hi</div><div class='bar'>bye</div>") else {
- Issue.record("Failed to create tokenizer for '<div class=\"foo\">hi</div><div class=\"bar\">bye</div>'")
- return
- }
- let token = tokenizer.nextToken()
- #expect(token?.type == .StartTag(tagName: "div", attributes: [HTMLToken.Attribute(localName: "class", value: "foo")]))
- for codePoint in "hi" {
- let token = tokenizer.nextToken()
- #expect(token?.type == .Character(codePoint: codePoint))
- }
- let token2 = tokenizer.nextToken()
- #expect(token2?.type == .EndTag(tagName: "div"))
- let token3 = tokenizer.nextToken()
- #expect(token3?.type == .StartTag(tagName: "div", attributes: [HTMLToken.Attribute(localName: "class", value: "bar")]))
- for codePoint in "bye" {
- let token = tokenizer.nextToken()
- #expect(token?.type == .Character(codePoint: codePoint))
- }
- let token4 = tokenizer.nextToken()
- #expect(token4?.type == .EndTag(tagName: "div"))
- let token5 = tokenizer.nextToken()
- #expect(token5?.type == .EndOfFile)
- }
- @Test func startTagWithMultipleAttributes() {
- guard let tokenizer = HTMLTokenizer(input: "<div class=\"foo\" id=\"bar\">hi</div attr=endTagAttributeWhee>") else {
- Issue.record("Failed to create tokenizer for '<div class=\"foo\" id=\"bar\">hi</div>'")
- return
- }
- let token = tokenizer.nextToken()
- #expect(token?.type == .StartTag(tagName: "div", attributes: [HTMLToken.Attribute(localName: "class", value: "foo"), HTMLToken.Attribute(localName: "id", value: "bar")]))
- for codePoint in "hi" {
- let token = tokenizer.nextToken()
- #expect(token?.type == .Character(codePoint: codePoint))
- }
- let token2 = tokenizer.nextToken()
- #expect(token2?.type == .EndTag(tagName: "div", attributes: [HTMLToken.Attribute(localName: "attr", value: "endTagAttributeWhee")]))
- let token3 = tokenizer.nextToken()
- #expect(token3?.type == .EndOfFile)
- }
- @Test func xmlDeclaration() {
- guard let tokenizer = HTMLTokenizer(input: "<?xml version=\"1.0\" encoding=\"UTF-8\"?>") else {
- Issue.record("Failed to create tokenizer for '<?xml version=\"1.0\" encoding=\"UTF-8\"?>'")
- return
- }
- let token = tokenizer.nextToken()
- #expect(token?.type == .Comment(data: "?xml version=\"1.0\" encoding=\"UTF-8\"?"))
- let token2 = tokenizer.nextToken()
- #expect(token2?.type == .EndOfFile)
- }
- @Test func simpleComment() {
- guard let tokenizer = HTMLTokenizer(input: "<!-- comment -->") else {
- Issue.record("Failed to create tokenizer for '<!-- comment -->'")
- return
- }
- let token = tokenizer.nextToken()
- #expect(token?.type == .Comment(data: " comment "))
- let token2 = tokenizer.nextToken()
- #expect(token2?.type == .EndOfFile)
- }
- @Test func nestedComment() {
- guard let tokenizer = HTMLTokenizer(input: "<!-- <!-- nested --> -->") else {
- Issue.record("Failed to create tokenizer for '<!-- <!-- nested --> -->'")
- return
- }
- let token = tokenizer.nextToken()
- #expect(token?.type == .Comment(data: " <!-- nested "))
- for codePoint in " -->" {
- let token = tokenizer.nextToken()
- #expect(token?.type == .Character(codePoint: codePoint))
- }
- let token2 = tokenizer.nextToken()
- #expect(token2?.type == .EndOfFile)
- }
- @Test func commentWithScriptTagInside() {
- guard let tokenizer = HTMLTokenizer(input: "<!-- <script>var x = 1;</script> -->") else {
- Issue.record("Failed to create tokenizer for '<!-- <script>var x = 1;</script> -->'")
- return
- }
- let token = tokenizer.nextToken()
- #expect(token?.type == .Comment(data: " <script>var x = 1;</script> "))
- let token2 = tokenizer.nextToken()
- #expect(token2?.type == .EndOfFile)
- }
- }
|