LibGUI: Lex INI files as Utf8
Iterating byte by byte meant that the column positions assigned to INI tokens would be off if there were any multi-byte codepoints. Using a Utf8View means these positions refer to whole codepoints instead, and the column positions match what GUI::TextEditor expects. :^) Fixes #12706.
This commit is contained in:
parent
95df712c2e
commit
ae6a84c261
Notes:
sideshowbarker
2024-07-17 01:50:18 +09:00
Author: https://github.com/AtkinsSJ Commit: https://github.com/SerenityOS/serenity/commit/ae6a84c261 Pull-request: https://github.com/SerenityOS/serenity/pull/16953 Issue: https://github.com/SerenityOS/serenity/issues/12706 Reviewed-by: https://github.com/gmta ✅
2 changed files with 14 additions and 16 deletions
|
@ -1,5 +1,6 @@
|
|||
/*
|
||||
* Copyright (c) 2020, Hüseyin Aslıtürk <asliturk@hotmail.com>
|
||||
* Copyright (c) 2023, Sam Atkins <atkinssj@serenityos.org>
|
||||
*
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*/
|
||||
|
@ -12,20 +13,20 @@ namespace GUI {
|
|||
|
||||
IniLexer::IniLexer(StringView input)
|
||||
: m_input(input)
|
||||
, m_iterator(m_input.begin())
|
||||
{
|
||||
}
|
||||
|
||||
char IniLexer::peek(size_t offset) const
|
||||
u32 IniLexer::peek(size_t offset) const
|
||||
{
|
||||
if ((m_index + offset) >= m_input.length())
|
||||
return 0;
|
||||
return m_input[m_index + offset];
|
||||
return m_iterator.peek(offset).value_or(0);
|
||||
}
|
||||
|
||||
char IniLexer::consume()
|
||||
u32 IniLexer::consume()
|
||||
{
|
||||
VERIFY(m_index < m_input.length());
|
||||
char ch = m_input[m_index++];
|
||||
VERIFY(m_iterator != m_input.end());
|
||||
u32 ch = *m_iterator;
|
||||
++m_iterator;
|
||||
if (ch == '\n') {
|
||||
m_position.line++;
|
||||
m_position.column = 0;
|
||||
|
@ -38,8 +39,6 @@ char IniLexer::consume()
|
|||
Vector<IniToken> IniLexer::lex()
|
||||
{
|
||||
Vector<IniToken> tokens;
|
||||
|
||||
size_t token_start_index = 0;
|
||||
IniPosition token_start_position;
|
||||
|
||||
auto emit_token = [&](auto type) {
|
||||
|
@ -52,7 +51,6 @@ Vector<IniToken> IniLexer::lex()
|
|||
};
|
||||
|
||||
auto begin_token = [&] {
|
||||
token_start_index = m_index;
|
||||
token_start_position = m_position;
|
||||
};
|
||||
|
||||
|
@ -64,7 +62,7 @@ Vector<IniToken> IniLexer::lex()
|
|||
tokens.append(token);
|
||||
};
|
||||
|
||||
while (m_index < m_input.length()) {
|
||||
while (m_iterator != m_input.end()) {
|
||||
auto ch = peek();
|
||||
|
||||
if (is_ascii_space(ch)) {
|
||||
|
|
|
@ -6,7 +6,7 @@
|
|||
|
||||
#pragma once
|
||||
|
||||
#include <AK/StringView.h>
|
||||
#include <AK/Utf8View.h>
|
||||
|
||||
namespace GUI {
|
||||
|
||||
|
@ -57,11 +57,11 @@ public:
|
|||
Vector<IniToken> lex();
|
||||
|
||||
private:
|
||||
char peek(size_t offset = 0) const;
|
||||
char consume();
|
||||
u32 peek(size_t offset = 0) const;
|
||||
u32 consume();
|
||||
|
||||
StringView m_input;
|
||||
size_t m_index { 0 };
|
||||
Utf8View m_input;
|
||||
Utf8CodePointIterator m_iterator;
|
||||
IniPosition m_position { 0, 0 };
|
||||
};
|
||||
|
||||
|
|
Loading…
Add table
Reference in a new issue