mirror of
https://github.com/LadybirdBrowser/ladybird.git
synced 2024-12-12 09:20:36 +00:00
AK: Enhance GenericLexer's string consumption
The `consume_quoted_string()` can now take an escape character. This allows it (for example) to capture a string's enclosing quotes. The escape character is optional by default. You can also consume and unescape a quoted string with the eponymous method `consume_and_unescape_string()`. It takes an escape character as parameter (backslash by default). It builds a String in which common escape sequences get... unescaped :^) (e.g. \n, \r, \t...).
This commit is contained in:
parent
1ab6dd67e9
commit
8f34b493e4
Notes:
sideshowbarker
2024-07-19 02:13:02 +09:00
Author: https://github.com/benit8 Commit: https://github.com/SerenityOS/serenity/commit/8f34b493e4e Pull-request: https://github.com/SerenityOS/serenity/pull/3610
2 changed files with 44 additions and 8 deletions
|
@ -26,6 +26,7 @@
|
|||
|
||||
#include <AK/Assertions.h>
|
||||
#include <AK/GenericLexer.h>
|
||||
#include <AK/StringBuilder.h>
|
||||
|
||||
namespace AK {
|
||||
|
||||
|
@ -211,29 +212,63 @@ StringView GenericLexer::consume_until(Condition condition)
|
|||
return m_input.substring_view(start, length);
|
||||
}
|
||||
|
||||
// Consume a string surrounded by single or double quotes
|
||||
// The returned StringView does not include the quotes
|
||||
StringView GenericLexer::consume_quoted_string()
|
||||
/*
|
||||
* Consume a string surrounded by single or double quotes. The returned
|
||||
* StringView does not include the quotes. An escape character can be provided
|
||||
* to capture the enclosing quotes. Please note that the escape character will
|
||||
* still be in the resulting StringView
|
||||
*/
|
||||
StringView GenericLexer::consume_quoted_string(char escape_char)
|
||||
{
|
||||
if (!is_quote(peek()))
|
||||
return {};
|
||||
|
||||
char quote_char = consume();
|
||||
size_t start = m_index;
|
||||
while (!is_eof() && peek() != quote_char)
|
||||
while (!is_eof()) {
|
||||
if (next_is(escape_char))
|
||||
m_index++;
|
||||
else if (next_is(quote_char))
|
||||
break;
|
||||
m_index++;
|
||||
}
|
||||
size_t length = m_index - start;
|
||||
|
||||
if (peek() != quote_char) {
|
||||
m_index = start - 1; // Restore the index in case the string is unterminated
|
||||
// Restore the index in case the string is unterminated
|
||||
m_index = start - 1;
|
||||
return {};
|
||||
}
|
||||
|
||||
// Ignore closing quote
|
||||
ignore();
|
||||
|
||||
return m_input.substring_view(start, length);
|
||||
}
|
||||
|
||||
String GenericLexer::consume_and_unescape_string(char escape_char)
|
||||
{
|
||||
auto view = consume_quoted_string(escape_char);
|
||||
if (view.is_null())
|
||||
return {};
|
||||
|
||||
// Transform common escape sequences
|
||||
auto unescape_character = [](char c) {
|
||||
static const char* escape_map = "n\nr\rt\tb\bf\f";
|
||||
for (size_t i = 0; escape_map[i] != '\0'; i += 2)
|
||||
if (c == escape_map[i])
|
||||
return escape_map[i + 1];
|
||||
return c;
|
||||
};
|
||||
|
||||
StringBuilder builder;
|
||||
for (size_t i = 0; i < view.length(); ++i) {
|
||||
char c = (view[i] == escape_char) ? unescape_character(view[++i]) : view[i];
|
||||
builder.append(c);
|
||||
}
|
||||
return builder.to_string();
|
||||
}
|
||||
|
||||
// Ignore a number of characters (1 by default)
|
||||
void GenericLexer::ignore(size_t count)
|
||||
{
|
||||
|
|
|
@ -27,6 +27,7 @@
|
|||
#pragma once
|
||||
|
||||
#include <AK/Function.h>
|
||||
#include <AK/String.h>
|
||||
#include <AK/StringView.h>
|
||||
|
||||
namespace AK {
|
||||
|
@ -36,7 +37,7 @@ public:
|
|||
explicit GenericLexer(const StringView& input);
|
||||
virtual ~GenericLexer();
|
||||
|
||||
// A lambda/function can be used to match characters as the user pleases
|
||||
// A lambda/function can be used to match characters as the user pleases
|
||||
using Condition = Function<bool(char)>;
|
||||
|
||||
size_t tell() const { return m_index; }
|
||||
|
@ -64,8 +65,8 @@ public:
|
|||
StringView consume_until(char);
|
||||
StringView consume_until(const char*);
|
||||
StringView consume_until(Condition);
|
||||
// FIXME: provide an escape character
|
||||
StringView consume_quoted_string();
|
||||
StringView consume_quoted_string(char escape_char = 0);
|
||||
String consume_and_unescape_string(char escape_char = '\\');
|
||||
|
||||
void ignore(size_t count = 1);
|
||||
void ignore_while(Condition);
|
||||
|
|
Loading…
Reference in a new issue