mirror of
https://github.com/LadybirdBrowser/ladybird.git
synced 2024-11-22 15:40:19 +00:00
02e3633b7f
This is primarily to be able to remove the GenericLexer include out of Format.h as well. A subsequent commit will add AK::Result to GenericLexer, which will cause naming conflicts with other structures named Result. This can be avoided (for now) by preventing nearly every file in the system from implicitly including GenericLexer. Other changes in this commit are to add the GenericLexer include to files where it is missing.
287 lines
10 KiB
C++
287 lines
10 KiB
C++
/*
|
|
* Copyright (c) 2020, Emanuel Sprung <emanuel.sprung@gmail.com>
|
|
*
|
|
* SPDX-License-Identifier: BSD-2-Clause
|
|
*/
|
|
|
|
#pragma once
|
|
|
|
#include "RegexByteCode.h"
|
|
#include "RegexMatch.h"
|
|
#include "RegexOptions.h"
|
|
#include "RegexParser.h"
|
|
|
|
#include <AK/Forward.h>
|
|
#include <AK/GenericLexer.h>
|
|
#include <AK/HashMap.h>
|
|
#include <AK/NonnullOwnPtrVector.h>
|
|
#include <AK/Types.h>
|
|
#include <AK/Utf32View.h>
|
|
#include <AK/Vector.h>
|
|
#include <ctype.h>
|
|
|
|
#include <stdio.h>
|
|
|
|
namespace regex {
|
|
|
|
static constexpr const size_t c_max_recursion = 5000;
|
|
static constexpr const size_t c_match_preallocation_count = 0;
|
|
|
|
struct RegexResult final {
|
|
bool success { false };
|
|
size_t count { 0 };
|
|
Vector<Match> matches;
|
|
Vector<Vector<Match>> capture_group_matches;
|
|
size_t n_operations { 0 };
|
|
size_t n_capture_groups { 0 };
|
|
size_t n_named_capture_groups { 0 };
|
|
};
|
|
|
|
template<class Parser>
|
|
class Regex;
|
|
|
|
template<class Parser>
|
|
class Matcher final {
|
|
|
|
public:
|
|
Matcher(Regex<Parser> const* pattern, Optional<typename ParserTraits<Parser>::OptionsType> regex_options = {})
|
|
: m_pattern(pattern)
|
|
, m_regex_options(regex_options.value_or({}))
|
|
{
|
|
}
|
|
~Matcher() = default;
|
|
|
|
RegexResult match(RegexStringView const&, Optional<typename ParserTraits<Parser>::OptionsType> = {}) const;
|
|
RegexResult match(Vector<RegexStringView> const&, Optional<typename ParserTraits<Parser>::OptionsType> = {}) const;
|
|
|
|
typename ParserTraits<Parser>::OptionsType options() const
|
|
{
|
|
return m_regex_options;
|
|
}
|
|
|
|
void reset_pattern(Badge<Regex<Parser>>, Regex<Parser> const* pattern)
|
|
{
|
|
m_pattern = pattern;
|
|
}
|
|
|
|
private:
|
|
Optional<bool> execute(MatchInput const& input, MatchState& state, size_t& operations) const;
|
|
|
|
Regex<Parser> const* m_pattern;
|
|
typename ParserTraits<Parser>::OptionsType const m_regex_options;
|
|
};
|
|
|
|
template<class Parser>
|
|
class Regex final {
|
|
public:
|
|
String pattern_value;
|
|
regex::Parser::Result parser_result;
|
|
OwnPtr<Matcher<Parser>> matcher { nullptr };
|
|
mutable size_t start_offset { 0 };
|
|
|
|
static regex::Parser::Result parse_pattern(StringView pattern, typename ParserTraits<Parser>::OptionsType regex_options = {});
|
|
|
|
explicit Regex(String pattern, typename ParserTraits<Parser>::OptionsType regex_options = {});
|
|
Regex(regex::Parser::Result parse_result, String pattern, typename ParserTraits<Parser>::OptionsType regex_options = {});
|
|
~Regex() = default;
|
|
Regex(Regex&&);
|
|
Regex& operator=(Regex&&);
|
|
|
|
typename ParserTraits<Parser>::OptionsType options() const;
|
|
void print_bytecode(FILE* f = stdout) const;
|
|
String error_string(Optional<String> message = {}) const;
|
|
|
|
RegexResult match(RegexStringView const view, Optional<typename ParserTraits<Parser>::OptionsType> regex_options = {}) const
|
|
{
|
|
if (!matcher || parser_result.error != Error::NoError)
|
|
return {};
|
|
return matcher->match(view, regex_options);
|
|
}
|
|
|
|
RegexResult match(Vector<RegexStringView> const views, Optional<typename ParserTraits<Parser>::OptionsType> regex_options = {}) const
|
|
{
|
|
if (!matcher || parser_result.error != Error::NoError)
|
|
return {};
|
|
return matcher->match(views, regex_options);
|
|
}
|
|
|
|
String replace(RegexStringView const view, StringView const& replacement_pattern, Optional<typename ParserTraits<Parser>::OptionsType> regex_options = {}) const
|
|
{
|
|
if (!matcher || parser_result.error != Error::NoError)
|
|
return {};
|
|
|
|
StringBuilder builder;
|
|
size_t start_offset = 0;
|
|
RegexResult result = matcher->match(view, regex_options);
|
|
if (!result.success)
|
|
return view.to_string();
|
|
|
|
for (size_t i = 0; i < result.matches.size(); ++i) {
|
|
auto& match = result.matches[i];
|
|
builder.append(view.substring_view(start_offset, match.global_offset - start_offset).to_string());
|
|
start_offset = match.global_offset + match.view.length();
|
|
GenericLexer lexer(replacement_pattern);
|
|
while (!lexer.is_eof()) {
|
|
if (lexer.consume_specific('\\')) {
|
|
if (lexer.consume_specific('\\')) {
|
|
builder.append('\\');
|
|
continue;
|
|
}
|
|
auto number = lexer.consume_while(isdigit);
|
|
if (auto index = number.to_uint(); index.has_value() && result.n_capture_groups >= index.value()) {
|
|
builder.append(result.capture_group_matches[i][index.value() - 1].view.to_string());
|
|
} else {
|
|
builder.appendff("\\{}", number);
|
|
}
|
|
} else {
|
|
builder.append(lexer.consume_while([](auto ch) { return ch != '\\'; }));
|
|
}
|
|
}
|
|
}
|
|
|
|
builder.append(view.substring_view(start_offset, view.length() - start_offset).to_string());
|
|
|
|
return builder.to_string();
|
|
}
|
|
|
|
// FIXME: replace(Vector<RegexStringView> const , ...)
|
|
|
|
RegexResult search(RegexStringView const view, Optional<typename ParserTraits<Parser>::OptionsType> regex_options = {}) const
|
|
{
|
|
if (!matcher || parser_result.error != Error::NoError)
|
|
return {};
|
|
|
|
AllOptions options = (AllOptions)regex_options.value_or({});
|
|
if ((options & AllFlags::MatchNotBeginOfLine) && (options & AllFlags::MatchNotEndOfLine)) {
|
|
options.reset_flag(AllFlags::MatchNotEndOfLine);
|
|
options.reset_flag(AllFlags::MatchNotBeginOfLine);
|
|
}
|
|
options.reset_flag(AllFlags::Internal_Stateful);
|
|
options |= AllFlags::Global;
|
|
|
|
return matcher->match(view, options);
|
|
}
|
|
|
|
RegexResult search(Vector<RegexStringView> const views, Optional<typename ParserTraits<Parser>::OptionsType> regex_options = {}) const
|
|
{
|
|
if (!matcher || parser_result.error != Error::NoError)
|
|
return {};
|
|
|
|
AllOptions options = (AllOptions)regex_options.value_or({});
|
|
if ((options & AllFlags::MatchNotBeginOfLine) && (options & AllFlags::MatchNotEndOfLine)) {
|
|
options.reset_flag(AllFlags::MatchNotEndOfLine);
|
|
options.reset_flag(AllFlags::MatchNotBeginOfLine);
|
|
}
|
|
options.reset_flag(AllFlags::Internal_Stateful);
|
|
options |= AllFlags::Global;
|
|
|
|
return matcher->match(views, options);
|
|
}
|
|
|
|
bool match(RegexStringView const view, RegexResult& m, Optional<typename ParserTraits<Parser>::OptionsType> regex_options = {}) const
|
|
{
|
|
m = match(view, regex_options);
|
|
return m.success;
|
|
}
|
|
|
|
bool match(Vector<RegexStringView> const views, RegexResult& m, Optional<typename ParserTraits<Parser>::OptionsType> regex_options = {}) const
|
|
{
|
|
m = match(views, regex_options);
|
|
return m.success;
|
|
}
|
|
|
|
bool search(RegexStringView const view, RegexResult& m, Optional<typename ParserTraits<Parser>::OptionsType> regex_options = {}) const
|
|
{
|
|
m = search(view, regex_options);
|
|
return m.success;
|
|
}
|
|
|
|
bool search(Vector<RegexStringView> const views, RegexResult& m, Optional<typename ParserTraits<Parser>::OptionsType> regex_options = {}) const
|
|
{
|
|
m = search(views, regex_options);
|
|
return m.success;
|
|
}
|
|
|
|
bool has_match(RegexStringView const view, Optional<typename ParserTraits<Parser>::OptionsType> regex_options = {}) const
|
|
{
|
|
if (!matcher || parser_result.error != Error::NoError)
|
|
return false;
|
|
RegexResult result = matcher->match(view, AllOptions { regex_options.value_or({}) } | AllFlags::SkipSubExprResults);
|
|
return result.success;
|
|
}
|
|
|
|
bool has_match(Vector<RegexStringView> const views, Optional<typename ParserTraits<Parser>::OptionsType> regex_options = {}) const
|
|
{
|
|
if (!matcher || parser_result.error != Error::NoError)
|
|
return false;
|
|
RegexResult result = matcher->match(views, AllOptions { regex_options.value_or({}) } | AllFlags::SkipSubExprResults);
|
|
return result.success;
|
|
}
|
|
};
|
|
|
|
// free standing functions for match, search and has_match
|
|
template<class Parser>
|
|
RegexResult match(RegexStringView const view, Regex<Parser>& pattern, Optional<typename ParserTraits<Parser>::OptionsType> regex_options = {})
|
|
{
|
|
return pattern.match(view, regex_options);
|
|
}
|
|
|
|
template<class Parser>
|
|
RegexResult match(Vector<RegexStringView> const view, Regex<Parser>& pattern, Optional<typename ParserTraits<Parser>::OptionsType> regex_options = {})
|
|
{
|
|
return pattern.match(view, regex_options);
|
|
}
|
|
|
|
template<class Parser>
|
|
bool match(RegexStringView const view, Regex<Parser>& pattern, RegexResult&, Optional<typename ParserTraits<Parser>::OptionsType> regex_options = {})
|
|
{
|
|
return pattern.match(view, regex_options);
|
|
}
|
|
|
|
template<class Parser>
|
|
bool match(Vector<RegexStringView> const view, Regex<Parser>& pattern, RegexResult&, Optional<typename ParserTraits<Parser>::OptionsType> regex_options = {})
|
|
{
|
|
return pattern.match(view, regex_options);
|
|
}
|
|
|
|
template<class Parser>
|
|
RegexResult search(RegexStringView const view, Regex<Parser>& pattern, Optional<typename ParserTraits<Parser>::OptionsType> regex_options = {})
|
|
{
|
|
return pattern.search(view, regex_options);
|
|
}
|
|
|
|
template<class Parser>
|
|
RegexResult search(Vector<RegexStringView> const views, Regex<Parser>& pattern, Optional<typename ParserTraits<Parser>::OptionsType> regex_options = {})
|
|
{
|
|
return pattern.search(views, regex_options);
|
|
}
|
|
|
|
template<class Parser>
|
|
bool search(RegexStringView const view, Regex<Parser>& pattern, RegexResult&, Optional<typename ParserTraits<Parser>::OptionsType> regex_options = {})
|
|
{
|
|
return pattern.search(view, regex_options);
|
|
}
|
|
|
|
template<class Parser>
|
|
bool search(Vector<RegexStringView> const views, Regex<Parser>& pattern, RegexResult&, Optional<typename ParserTraits<Parser>::OptionsType> regex_options = {})
|
|
{
|
|
return pattern.search(views, regex_options);
|
|
}
|
|
|
|
template<class Parser>
|
|
bool has_match(RegexStringView const view, Regex<Parser>& pattern, Optional<typename ParserTraits<Parser>::OptionsType> regex_options = {})
|
|
{
|
|
return pattern.has_match(view, regex_options);
|
|
}
|
|
|
|
template<class Parser>
|
|
bool has_match(Vector<RegexStringView> const views, Regex<Parser>& pattern, Optional<typename ParserTraits<Parser>::OptionsType> regex_options = {})
|
|
{
|
|
return pattern.has_match(views, regex_options);
|
|
}
|
|
}
|
|
|
|
using regex::has_match;
|
|
using regex::match;
|
|
using regex::Regex;
|
|
using regex::RegexResult;
|