/* * Copyright (c) 2024, Andrew Kaster * * SPDX-License-Identifier: BSD-2-Clause */ import AK import Testing import Web @Suite struct TestHTMLTokenizerSwift { @Test func tokenTypes() { let default_token = HTMLToken() default_token.type = .Character(codePoint: "a") #expect(default_token.isCharacter()) #expect("\(default_token)" == "HTMLToken(type: Character(codePoint: a))") } @Test func parserWhitespace() { for codePoint: Character in ["\t", "\n", "\r", "\u{000C}", " "] { let token = HTMLToken(type: .Character(codePoint: codePoint)) #expect(token.isParserWhitespace()) } for codePoint: Character in ["a", "b", "c", "d", "e", "f", "g", "h", "i", "j"] { let token = HTMLToken(type: .Character(codePoint: codePoint)) #expect(!token.isParserWhitespace()) } } @Test func dataStateNoInput() { let tokenizer = HTMLTokenizer() #expect(tokenizer.state == HTMLTokenizer.State.Data) // initial state let token = tokenizer.nextToken() #expect(token?.type == .EndOfFile) let token2 = tokenizer.nextToken() #expect(token2 == nil) #expect(tokenizer.state == HTMLTokenizer.State.Data) } @Test func dataStateSingleChar() { guard let tokenizer = HTMLTokenizer(input: "X") else { Issue.record("Failed to create tokenizer for 'X'") return } #expect(tokenizer.state == HTMLTokenizer.State.Data) // initial state let token = tokenizer.nextToken() #expect(token?.type == .Character(codePoint: "X")) let token2 = tokenizer.nextToken() #expect(token2?.type == .EndOfFile) let token3 = tokenizer.nextToken() #expect(token3 == nil) #expect(tokenizer.state == HTMLTokenizer.State.Data) } @Test func dataStateAmpersand() { guard let tokenizer = HTMLTokenizer(input: "&") else { Issue.record("Failed to create tokenizer for '&'") return } #expect(tokenizer.state == HTMLTokenizer.State.Data) // initial state let token = tokenizer.nextToken() #expect(token?.type == .EndOfFile) #expect(tokenizer.state == HTMLTokenizer.State.CharacterReference) let token2 = tokenizer.nextToken() #expect(token2 == nil) } @Test func tagOpenOnly() { guard let tokenizer = HTMLTokenizer(input: "<") else { Issue.record("Failed to create tokenizer for '<'") return } #expect(tokenizer.state == HTMLTokenizer.State.Data) // initial state let token = tokenizer.nextToken() #expect(token?.type == .Character(codePoint: "<")) let token2 = tokenizer.nextToken() #expect(token2?.type == .EndOfFile) #expect(tokenizer.state == HTMLTokenizer.State.TagOpen) let token3 = tokenizer.nextToken() #expect(token3 == nil) } @Test func dataStateNulChar() { guard let tokenizer = HTMLTokenizer(input: "H\0I") else { Issue.record("Failed to create tokenizer for 'H\\0I'") return } #expect(tokenizer.state == HTMLTokenizer.State.Data) // initial state let token = tokenizer.nextToken() #expect(token?.type == .Character(codePoint: "H")) let token2 = tokenizer.nextToken() #expect(token2?.type == .Character(codePoint: "\u{FFFD}")) let token3 = tokenizer.nextToken() #expect(token3?.type == .Character(codePoint: "I")) let token4 = tokenizer.nextToken() #expect(token4?.type == .EndOfFile) #expect(tokenizer.state == HTMLTokenizer.State.Data) } @Test func scriptTagWithAttributes() { guard let tokenizer = HTMLTokenizer(input: "") else { Issue.record("Failed to create tokenizer for ''") return } let token = tokenizer.nextToken() #expect(token?.type == .StartTag(tagName: "script", attributes: [])) for codePoint in "var x = 1;" { let token = tokenizer.nextToken() #expect(token?.type == .Character(codePoint: codePoint)) } let token2 = tokenizer.nextToken() #expect(token2?.type == .EndTag(tagName: "script")) let token3 = tokenizer.nextToken() #expect(token3?.type == .EndOfFile) } @Test func simpleDivWithContent() { guard let tokenizer = HTMLTokenizer(input: "
hi
") else { Issue.record("Failed to create tokenizer for '
hi
'") return } #expect(tokenizer.state == HTMLTokenizer.State.Data) // initial state let token = tokenizer.nextToken() #expect(token?.type == .StartTag(tagName: "div", attributes: [])) let token2 = tokenizer.nextToken() #expect(token2?.type == .Character(codePoint: "h")) let token3 = tokenizer.nextToken() #expect(token3?.type == .Character(codePoint: "i")) let token4 = tokenizer.nextToken() #expect(token4?.type == .EndTag(tagName: "div")) let token5 = tokenizer.nextToken() #expect(token5?.type == .EndOfFile) } @Test func simpleDivWithContentAndAttributes() { guard let tokenizer = HTMLTokenizer(input: "
hi
") else { Issue.record("Failed to create tokenizer for '
hi
'") return } #expect(tokenizer.state == HTMLTokenizer.State.Data) // initial state let token = tokenizer.nextToken() #expect(token?.type == .StartTag(tagName: "div", attributes: [HTMLToken.Attribute(localName: "class", value: "foo")])) let token2 = tokenizer.nextToken() #expect(token2?.type == .Character(codePoint: "h")) let token3 = tokenizer.nextToken() #expect(token3?.type == .Character(codePoint: "i")) let token4 = tokenizer.nextToken() #expect(token4?.type == .EndTag(tagName: "div")) let token5 = tokenizer.nextToken() #expect(token5?.type == .EndOfFile) } @Test func severalDivsWithAttributesAndContent() { // Explicitly use unquoted and single quotes for attribute values guard let tokenizer = HTMLTokenizer(input: "
hi
bye
") else { Issue.record("Failed to create tokenizer for '
hi
bye
'") return } let token = tokenizer.nextToken() #expect(token?.type == .StartTag(tagName: "div", attributes: [HTMLToken.Attribute(localName: "class", value: "foo")])) for codePoint in "hi" { let token = tokenizer.nextToken() #expect(token?.type == .Character(codePoint: codePoint)) } let token2 = tokenizer.nextToken() #expect(token2?.type == .EndTag(tagName: "div")) let token3 = tokenizer.nextToken() #expect(token3?.type == .StartTag(tagName: "div", attributes: [HTMLToken.Attribute(localName: "class", value: "bar")])) for codePoint in "bye" { let token = tokenizer.nextToken() #expect(token?.type == .Character(codePoint: codePoint)) } let token4 = tokenizer.nextToken() #expect(token4?.type == .EndTag(tagName: "div")) let token5 = tokenizer.nextToken() #expect(token5?.type == .EndOfFile) } @Test func startTagWithMultipleAttributes() { guard let tokenizer = HTMLTokenizer(input: "
hi
") else { Issue.record("Failed to create tokenizer for '
hi
'") return } let token = tokenizer.nextToken() #expect(token?.type == .StartTag(tagName: "div", attributes: [HTMLToken.Attribute(localName: "class", value: "foo"), HTMLToken.Attribute(localName: "id", value: "bar")])) for codePoint in "hi" { let token = tokenizer.nextToken() #expect(token?.type == .Character(codePoint: codePoint)) } let token2 = tokenizer.nextToken() #expect(token2?.type == .EndTag(tagName: "div", attributes: [HTMLToken.Attribute(localName: "attr", value: "endTagAttributeWhee")])) let token3 = tokenizer.nextToken() #expect(token3?.type == .EndOfFile) } @Test func xmlDeclaration() { guard let tokenizer = HTMLTokenizer(input: "") else { Issue.record("Failed to create tokenizer for ''") return } let token = tokenizer.nextToken() #expect(token?.type == .Comment(data: "?xml version=\"1.0\" encoding=\"UTF-8\"?")) let token2 = tokenizer.nextToken() #expect(token2?.type == .EndOfFile) } @Test func simpleComment() { guard let tokenizer = HTMLTokenizer(input: "") else { Issue.record("Failed to create tokenizer for ''") return } let token = tokenizer.nextToken() #expect(token?.type == .Comment(data: " comment ")) let token2 = tokenizer.nextToken() #expect(token2?.type == .EndOfFile) } @Test func nestedComment() { guard let tokenizer = HTMLTokenizer(input: " -->") else { Issue.record("Failed to create tokenizer for ' -->'") return } let token = tokenizer.nextToken() #expect(token?.type == .Comment(data: " " { let token = tokenizer.nextToken() #expect(token?.type == .Character(codePoint: codePoint)) } let token2 = tokenizer.nextToken() #expect(token2?.type == .EndOfFile) } @Test func commentWithScriptTagInside() { guard let tokenizer = HTMLTokenizer(input: "") else { Issue.record("Failed to create tokenizer for ''") return } let token = tokenizer.nextToken() #expect(token?.type == .Comment(data: " ")) let token2 = tokenizer.nextToken() #expect(token2?.type == .EndOfFile) } }