LibWeb: Add a basic content filter (ad blocking!) :^)

This patch adds a global (per-process) filter list to LibWeb that is
used to filter all outgoing resource load requests.

Basically we check the URL against a list of filter patterns and if
it's a match for any one of them, we immediately fail the load.

The filter list is a simple text file:

    ~/.config/BrowserContentFilters.txt

It's one filter per line and they are simple glob filters for now,
with implicit asterisks (*) at the start and end of the line.
This commit is contained in:
Andreas Kling 2021-01-05 18:12:29 +01:00
parent 1c8eaf28cd
commit a6d52e0c97
Notes: sideshowbarker 2024-07-19 00:05:36 +09:00
5 changed files with 141 additions and 1 deletions

View file

@ -43,6 +43,7 @@
#include <LibGUI/TabWidget.h>
#include <LibGUI/Window.h>
#include <LibGfx/Bitmap.h>
#include <LibWeb/Loader/ContentFilter.h>
#include <LibWeb/Loader/ResourceLoader.h>
#include <stdio.h>
#include <stdlib.h>
@ -132,6 +133,17 @@ int main(int argc, char** argv)
auto m_config = Core::ConfigFile::get_for_app("Browser");
Browser::g_home_url = m_config->read_entry("Preferences", "Home", "about:blank");
auto ad_filter_list_or_error = Core::File::open(String::formatted("{}/BrowserContentFilters.txt", Core::StandardPaths::config_directory()), Core::IODevice::ReadOnly);
if (!ad_filter_list_or_error.is_error()) {
auto& ad_filter_list = *ad_filter_list_or_error.value();
while (!ad_filter_list.eof()) {
auto line = ad_filter_list.read_line();
if (line.is_empty())
continue;
Web::ContentFilter::the().add_pattern(line);
}
}
bool bookmarksbar_enabled = true;
auto bookmarks_bar = Browser::BookmarksBarWidget::construct(Browser::bookmarks_file_path(), bookmarksbar_enabled);

View file

@ -177,6 +177,7 @@ set(SOURCES
Layout/TreeBuilder.cpp
Layout/WidgetBox.cpp
LayoutTreeModel.cpp
Loader/ContentFilter.cpp
Loader/FrameLoader.cpp
Loader/ImageLoader.cpp
Loader/ImageResource.cpp

View file

@ -0,0 +1,68 @@
/*
* Copyright (c) 2021, Andreas Kling <kling@serenityos.org>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <AK/StringBuilder.h>
#include <LibWeb/Loader/ContentFilter.h>
namespace Web {
ContentFilter& ContentFilter::the()
{
static ContentFilter* filter = new ContentFilter;
return *filter;
}
ContentFilter::ContentFilter()
{
}
ContentFilter::~ContentFilter()
{
}
bool ContentFilter::is_filtered(const URL& url) const
{
auto url_string = url.to_string();
for (auto& pattern : m_patterns) {
if (url_string.matches(pattern.text, CaseSensitivity::CaseSensitive))
return true;
}
return false;
}
void ContentFilter::add_pattern(const String& pattern)
{
StringBuilder builder;
if (!pattern.starts_with('*'))
builder.append('*');
builder.append(pattern);
if (!pattern.ends_with('*'))
builder.append('*');
m_patterns.empend(builder.to_string());
}
}

View file

@ -0,0 +1,51 @@
/*
* Copyright (c) 2021, Andreas Kling <kling@serenityos.org>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#pragma once
#include <AK/URL.h>
#include <AK/Vector.h>
namespace Web {
class ContentFilter {
public:
static ContentFilter& the();
bool is_filtered(const URL&) const;
void add_pattern(const String&);
private:
ContentFilter();
~ContentFilter();
struct Pattern {
String text;
};
Vector<Pattern> m_patterns;
};
}

View file

@ -1,5 +1,5 @@
/*
* Copyright (c) 2018-2020, Andreas Kling <kling@serenityos.org>
* Copyright (c) 2018-2021, Andreas Kling <kling@serenityos.org>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@ -31,6 +31,7 @@
#include <LibCore/File.h>
#include <LibProtocol/Client.h>
#include <LibProtocol/Download.h>
#include <LibWeb/Loader/ContentFilter.h>
#include <LibWeb/Loader/LoadRequest.h>
#include <LibWeb/Loader/Resource.h>
#include <LibWeb/Loader/ResourceLoader.h>
@ -110,11 +111,18 @@ RefPtr<Resource> ResourceLoader::load_resource(Resource::Type type, const LoadRe
void ResourceLoader::load(const LoadRequest& request, Function<void(ReadonlyBytes, const HashMap<String, String, CaseInsensitiveStringTraits>& response_headers)> success_callback, Function<void(const String&)> error_callback)
{
auto& url = request.url();
if (is_port_blocked(url.port())) {
dbg() << "ResourceLoader::load: Error: blocked port " << url.port() << " for URL: " << url;
return;
}
if (ContentFilter::the().is_filtered(url)) {
dbgln("\033[32;1mResourceLoader::load: URL was filtered! {}\033[0m", url);
error_callback("URL was filtered");
return;
}
if (url.protocol() == "about") {
dbg() << "Loading about: URL " << url;
deferred_invoke([success_callback = move(success_callback)](auto&) {