LibWeb: Add a super basic HTML syntax highlighter
This can currently highlight tag names and attribute names/values.
This commit is contained in:
parent
aa7939bc6c
commit
97a230e4ef
Notes:
sideshowbarker
2024-07-18 17:41:59 +09:00
Author: https://github.com/alimpfard Commit: https://github.com/SerenityOS/serenity/commit/97a230e4efe Pull-request: https://github.com/SerenityOS/serenity/pull/7322
6 changed files with 194 additions and 4 deletions
|
@ -15,13 +15,14 @@
|
|||
namespace Syntax {
|
||||
|
||||
enum class Language {
|
||||
PlainText,
|
||||
Cpp,
|
||||
JavaScript,
|
||||
INI,
|
||||
GML,
|
||||
Shell,
|
||||
HTML,
|
||||
INI,
|
||||
JavaScript,
|
||||
PlainText,
|
||||
SQL,
|
||||
Shell,
|
||||
};
|
||||
|
||||
struct TextStyle {
|
||||
|
|
|
@ -153,6 +153,7 @@ set(SOURCES
|
|||
HTML/Parser/ListOfActiveFormattingElements.cpp
|
||||
HTML/Parser/StackOfOpenElements.cpp
|
||||
HTML/SubmitEvent.cpp
|
||||
HTML/SyntaxHighlighter/SyntaxHighlighter.cpp
|
||||
HTML/TagNames.cpp
|
||||
HTML/WebSocket.cpp
|
||||
HighResolutionTime/Performance.cpp
|
||||
|
|
|
@ -229,6 +229,11 @@ Optional<u32> HTMLTokenizer::peek_code_point(size_t offset) const
|
|||
|
||||
Optional<HTMLToken> HTMLTokenizer::next_token()
|
||||
{
|
||||
{
|
||||
auto last_position = m_source_positions.last();
|
||||
m_source_positions.clear();
|
||||
m_source_positions.append(move(last_position));
|
||||
}
|
||||
_StartOfFunction:
|
||||
if (!m_queued_tokens.is_empty())
|
||||
return m_queued_tokens.dequeue();
|
||||
|
|
|
@ -110,6 +110,10 @@ public:
|
|||
Optional<HTMLToken> next_token();
|
||||
|
||||
void switch_to(Badge<HTMLDocumentParser>, State new_state);
|
||||
void switch_to(State new_state)
|
||||
{
|
||||
m_state = new_state;
|
||||
}
|
||||
|
||||
void set_blocked(bool b) { m_blocked = b; }
|
||||
bool is_blocked() const { return m_blocked; }
|
||||
|
|
|
@ -0,0 +1,147 @@
|
|||
/*
|
||||
* Copyright (c) 2021, Ali Mohammad Pur <mpfard@serenityos.org>
|
||||
*
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*/
|
||||
|
||||
#include <LibWeb/HTML/Parser/HTMLTokenizer.h>
|
||||
#include <LibWeb/HTML/SyntaxHighlighter/SyntaxHighlighter.h>
|
||||
|
||||
namespace Web::HTML {
|
||||
|
||||
enum class AugmentedTokenKind : u32 {
|
||||
AttributeName,
|
||||
AttributeValue,
|
||||
OpenTag,
|
||||
CloseTag,
|
||||
Comment,
|
||||
Doctype,
|
||||
};
|
||||
|
||||
bool SyntaxHighlighter::is_identifier(void* token) const
|
||||
{
|
||||
if (!token)
|
||||
return false;
|
||||
return false;
|
||||
}
|
||||
|
||||
bool SyntaxHighlighter::is_navigatable(void*) const
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
void SyntaxHighlighter::rehighlight(const Palette& palette)
|
||||
{
|
||||
(void)palette;
|
||||
auto text = m_client->get_text();
|
||||
|
||||
Vector<GUI::TextDocumentSpan> spans;
|
||||
auto highlight = [&](auto start_line, auto start_column, auto end_line, auto end_column, Gfx::TextAttributes attributes, AugmentedTokenKind kind) {
|
||||
spans.empend(
|
||||
GUI::TextRange {
|
||||
{ start_line, start_column },
|
||||
{ end_line, end_column },
|
||||
},
|
||||
move(attributes),
|
||||
(void*)kind,
|
||||
false);
|
||||
};
|
||||
|
||||
HTMLTokenizer tokenizer { text, "utf-8" };
|
||||
[[maybe_unused]] enum class State {
|
||||
HTML,
|
||||
Javascript,
|
||||
CSS,
|
||||
} state { State::HTML };
|
||||
for (;;) {
|
||||
auto token = tokenizer.next_token();
|
||||
if (!token.has_value())
|
||||
break;
|
||||
|
||||
if (token->is_start_tag()) {
|
||||
if (token->tag_name() == "script"sv) {
|
||||
tokenizer.switch_to(HTMLTokenizer::State::ScriptData);
|
||||
state = State::Javascript;
|
||||
} else if (token->tag_name() == "style"sv) {
|
||||
tokenizer.switch_to(HTMLTokenizer::State::RAWTEXT);
|
||||
state = State::CSS;
|
||||
}
|
||||
} else if (token->is_end_tag()) {
|
||||
if (token->tag_name().is_one_of("script"sv, "style"sv)) {
|
||||
if (state == State::Javascript) {
|
||||
// FIXME: Highlight javascript code here instead.
|
||||
} else if (state == State::CSS) {
|
||||
// FIXME: Highlight CSS code here instead.
|
||||
}
|
||||
state = State::HTML;
|
||||
}
|
||||
}
|
||||
|
||||
size_t token_start_offset = token->is_end_tag() ? 1 : 0;
|
||||
|
||||
if (token->is_comment()) {
|
||||
highlight(
|
||||
token->start_position().line,
|
||||
token->start_position().column,
|
||||
token->start_position().line,
|
||||
token->start_position().column,
|
||||
{ palette.syntax_comment(), {} },
|
||||
AugmentedTokenKind::Comment);
|
||||
} else if (token->is_start_tag() || token->is_end_tag()) {
|
||||
// FIXME: This breaks with single-character tag names.
|
||||
highlight(
|
||||
token->start_position().line,
|
||||
token->start_position().column + token_start_offset,
|
||||
token->start_position().line,
|
||||
token->start_position().column + token->tag_name().length() + token_start_offset - 1,
|
||||
{ palette.syntax_keyword(), {} },
|
||||
token->is_start_tag() ? AugmentedTokenKind::OpenTag : AugmentedTokenKind::CloseTag);
|
||||
|
||||
for (auto& attribute : token->attributes()) {
|
||||
highlight(
|
||||
attribute.name_start_position.line,
|
||||
attribute.name_start_position.column + token_start_offset,
|
||||
attribute.name_end_position.line,
|
||||
attribute.name_end_position.column + token_start_offset,
|
||||
{ palette.syntax_identifier(), {} },
|
||||
AugmentedTokenKind::AttributeName);
|
||||
highlight(
|
||||
attribute.value_start_position.line,
|
||||
attribute.value_start_position.column + token_start_offset,
|
||||
attribute.value_end_position.line,
|
||||
attribute.value_end_position.column + token_start_offset,
|
||||
{ palette.syntax_string(), {} },
|
||||
AugmentedTokenKind::AttributeValue);
|
||||
}
|
||||
} else if (token->is_doctype()) {
|
||||
highlight(
|
||||
token->start_position().line,
|
||||
token->start_position().column,
|
||||
token->start_position().line,
|
||||
token->start_position().column,
|
||||
{ palette.syntax_preprocessor_statement(), {} },
|
||||
AugmentedTokenKind::Doctype);
|
||||
}
|
||||
}
|
||||
|
||||
m_client->do_set_spans(move(spans));
|
||||
m_has_brace_buddies = false;
|
||||
highlight_matching_token_pair();
|
||||
m_client->do_update();
|
||||
}
|
||||
|
||||
Vector<Syntax::Highlighter::MatchingTokenPair> SyntaxHighlighter::matching_token_pairs() const
|
||||
{
|
||||
static Vector<MatchingTokenPair> pairs;
|
||||
if (pairs.is_empty()) {
|
||||
pairs.append({ (void*)AugmentedTokenKind::OpenTag, (void*)AugmentedTokenKind::CloseTag });
|
||||
}
|
||||
return pairs;
|
||||
}
|
||||
|
||||
bool SyntaxHighlighter::token_types_equal(void* token0, void* token1) const
|
||||
{
|
||||
return token0 == token1;
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,32 @@
|
|||
/*
|
||||
* Copyright (c) 2021, Ali Mohammad Pur <mpfard@serenityos.org>
|
||||
*
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <LibSyntax/Highlighter.h>
|
||||
|
||||
namespace Web::HTML {
|
||||
|
||||
class SyntaxHighlighter : public Syntax::Highlighter {
|
||||
public:
|
||||
SyntaxHighlighter() = default;
|
||||
virtual ~SyntaxHighlighter() override = default;
|
||||
|
||||
virtual bool is_identifier(void*) const override;
|
||||
virtual bool is_navigatable(void*) const override;
|
||||
|
||||
virtual Syntax::Language language() const override { return Syntax::Language::HTML; }
|
||||
virtual void rehighlight(const Palette&) override;
|
||||
|
||||
protected:
|
||||
virtual Vector<MatchingTokenPair> matching_token_pairs() const override;
|
||||
virtual bool token_types_equal(void*, void*) const override;
|
||||
|
||||
size_t m_line { 1 };
|
||||
size_t m_column { 0 };
|
||||
};
|
||||
|
||||
}
|
Loading…
Add table
Reference in a new issue