AK: Enhance GenericLexer's string consumption

The `consume_quoted_string()` can now take an escape character. This allows it
(for example) to capture a string's enclosing quotes. The escape character is
optional by default.

You can also consume and unescape a quoted string with the eponymous method
`consume_and_unescape_string()`. It takes an escape character as parameter
(backslash by default). It builds a String in which common escape sequences
get... unescaped :^) (e.g. \n, \r, \t...).
This commit is contained in:
Benoit Lormeau 2020-09-26 12:21:14 +02:00 committed by Andreas Kling
parent 1ab6dd67e9
commit 8f34b493e4
Notes: sideshowbarker 2024-07-19 02:13:02 +09:00
2 changed files with 44 additions and 8 deletions

View file

@ -26,6 +26,7 @@
#include <AK/Assertions.h>
#include <AK/GenericLexer.h>
#include <AK/StringBuilder.h>
namespace AK {
@ -211,29 +212,63 @@ StringView GenericLexer::consume_until(Condition condition)
return m_input.substring_view(start, length);
}
// Consume a string surrounded by single or double quotes
// The returned StringView does not include the quotes
StringView GenericLexer::consume_quoted_string()
/*
* Consume a string surrounded by single or double quotes. The returned
* StringView does not include the quotes. An escape character can be provided
* to capture the enclosing quotes. Please note that the escape character will
* still be in the resulting StringView
*/
StringView GenericLexer::consume_quoted_string(char escape_char)
{
if (!is_quote(peek()))
return {};
char quote_char = consume();
size_t start = m_index;
while (!is_eof() && peek() != quote_char)
while (!is_eof()) {
if (next_is(escape_char))
m_index++;
else if (next_is(quote_char))
break;
m_index++;
}
size_t length = m_index - start;
if (peek() != quote_char) {
m_index = start - 1; // Restore the index in case the string is unterminated
// Restore the index in case the string is unterminated
m_index = start - 1;
return {};
}
// Ignore closing quote
ignore();
return m_input.substring_view(start, length);
}
String GenericLexer::consume_and_unescape_string(char escape_char)
{
auto view = consume_quoted_string(escape_char);
if (view.is_null())
return {};
// Transform common escape sequences
auto unescape_character = [](char c) {
static const char* escape_map = "n\nr\rt\tb\bf\f";
for (size_t i = 0; escape_map[i] != '\0'; i += 2)
if (c == escape_map[i])
return escape_map[i + 1];
return c;
};
StringBuilder builder;
for (size_t i = 0; i < view.length(); ++i) {
char c = (view[i] == escape_char) ? unescape_character(view[++i]) : view[i];
builder.append(c);
}
return builder.to_string();
}
// Ignore a number of characters (1 by default)
void GenericLexer::ignore(size_t count)
{

View file

@ -27,6 +27,7 @@
#pragma once
#include <AK/Function.h>
#include <AK/String.h>
#include <AK/StringView.h>
namespace AK {
@ -36,7 +37,7 @@ public:
explicit GenericLexer(const StringView& input);
virtual ~GenericLexer();
// A lambda/function can be used to match characters as the user pleases
// A lambda/function can be used to match characters as the user pleases
using Condition = Function<bool(char)>;
size_t tell() const { return m_index; }
@ -64,8 +65,8 @@ public:
StringView consume_until(char);
StringView consume_until(const char*);
StringView consume_until(Condition);
// FIXME: provide an escape character
StringView consume_quoted_string();
StringView consume_quoted_string(char escape_char = 0);
String consume_and_unescape_string(char escape_char = '\\');
void ignore(size_t count = 1);
void ignore_while(Condition);