Lagom: Add a tool to verify correctness of the LibJS GC

This is implemented as a Clang frontend tool, and currently does two
things:
  - Ensure for all fields wrapped in {Nonnull,}GCPtr<T>, T inherits from
    JS::Cell
  - Ensure for all fields not wrapped in {Nonnull,}GCPtr, that the type
    does not inherit from JS::Cell (otherwise it should be wrapped in a
    Ptr class).

In the future, this tool could be extended further. For example, we may
consider validating all implementations of Cell::visit_impl.
This commit is contained in:
Matthew Olsson 2023-03-03 17:15:59 -07:00 committed by Linus Groh
parent 176beeb08e
commit b33b950e45
Notes: sideshowbarker 2024-07-16 23:21:16 +09:00
7 changed files with 341 additions and 0 deletions

View file

@ -0,0 +1,3 @@
build/
cmake-build-debug/
venv/

View file

@ -0,0 +1,19 @@
cmake_minimum_required(VERSION 3.24)
project(LibJSGCVerifier C CXX)
find_package(Clang CONFIG REQUIRED HINTS "../../../../Toolchain/Local/clang")
add_executable(LibJSGCVerifier src/main.cpp src/CellsHandler.cpp)
target_include_directories(LibJSGCVerifier SYSTEM PRIVATE ${CLANG_INCLUDE_DIRS} ${LLVM_INCLUDE_DIRS})
target_compile_features(LibJSGCVerifier PRIVATE cxx_std_20)
target_link_libraries(LibJSGCVerifier PRIVATE clangTooling)
target_compile_options(LibJSGCVerifier PRIVATE
-Wall
-Wextra
-Werror
-Wno-unused
-fno-rtti
)

View file

@ -0,0 +1,15 @@
### LibJSGCVerifier
This is a simple Clang tool to validate certain behavior relating to LibJS's GC. It currently validates
two things:
- For all types wrapped by `GCPtr` or `NonnullGCPtr`, that the wrapped type inherits from `Cell`
- For all types not wrapped by `GCPtr` or `NonnullGCPtr`, that the wrapped type does not inherit from `Cell`
(otherwise it should be wrapped).
Usage:
```
cmake -GNinja -B build
cmake --build build
src/main.py -b <path to serenity>/Build
```

View file

@ -0,0 +1,172 @@
/*
* Copyright (c) 2023, Matthew Olsson <mattco@serenityos.org>
*
* SPDX-License-Identifier: BSD-2-Clause
*/
#include "CellsHandler.h"
#include <clang/AST/DeclCXX.h>
#include <clang/AST/Type.h>
#include <clang/ASTMatchers/ASTMatchFinder.h>
#include <clang/ASTMatchers/ASTMatchers.h>
#include <clang/Basic/Diagnostic.h>
#include <clang/Basic/Specifiers.h>
#include <clang/Frontend/CompilerInstance.h>
#include <filesystem>
#include <llvm/Support/Casting.h>
#include <llvm/Support/raw_ostream.h>
#include <unordered_set>
#include <vector>
CollectCellsHandler::CollectCellsHandler()
{
using namespace clang::ast_matchers;
m_finder.addMatcher(
traverse(
clang::TK_IgnoreUnlessSpelledInSource,
cxxRecordDecl(decl().bind("record-decl"))),
this);
}
bool CollectCellsHandler::handleBeginSource(clang::CompilerInstance& ci)
{
auto const& source_manager = ci.getSourceManager();
ci.getFileManager().getNumUniqueRealFiles();
auto file_id = source_manager.getMainFileID();
auto const* file_entry = source_manager.getFileEntryForID(file_id);
if (!file_entry)
return false;
auto current_filepath = std::filesystem::canonical(file_entry->getName().str());
llvm::outs() << "Processing " << current_filepath.string() << "\n";
return true;
}
bool record_inherits_from_cell(clang::CXXRecordDecl const& record)
{
if (!record.isCompleteDefinition())
return false;
bool inherits_from_cell = record.getQualifiedNameAsString() == "JS::Cell";
record.forallBases([&](clang::CXXRecordDecl const* base) -> bool {
if (base->getQualifiedNameAsString() == "JS::Cell") {
inherits_from_cell = true;
return false;
}
return true;
});
return inherits_from_cell;
}
std::vector<clang::QualType> get_all_qualified_types(clang::QualType const& type)
{
std::vector<clang::QualType> qualified_types;
if (auto const* template_specialization = type->getAs<clang::TemplateSpecializationType>()) {
auto specialization_name = template_specialization->getTemplateName().getAsTemplateDecl()->getQualifiedNameAsString();
// Do not unwrap GCPtr/NonnullGCPtr
if (specialization_name == "JS::GCPtr" || specialization_name == "JS::NonnullGCPtr") {
qualified_types.push_back(type);
} else {
for (size_t i = 0; i < template_specialization->getNumArgs(); i++) {
auto const& template_arg = template_specialization->getArg(i);
if (template_arg.getKind() == clang::TemplateArgument::Type) {
auto template_qualified_types = get_all_qualified_types(template_arg.getAsType());
std::move(template_qualified_types.begin(), template_qualified_types.end(), std::back_inserter(qualified_types));
}
}
}
} else {
qualified_types.push_back(type);
}
return qualified_types;
}
struct FieldValidationResult {
bool is_valid { false };
bool is_wrapped_in_gcptr { false };
};
FieldValidationResult validate_field(clang::FieldDecl const* field_decl)
{
auto type = field_decl->getType();
if (auto const* elaborated_type = llvm::dyn_cast<clang::ElaboratedType>(type.getTypePtr()))
type = elaborated_type->desugar();
FieldValidationResult result { .is_valid = true };
for (auto const& qualified_type : get_all_qualified_types(type)) {
if (auto const* pointer_decl = qualified_type->getAs<clang::PointerType>()) {
if (auto const* pointee = pointer_decl->getPointeeCXXRecordDecl()) {
if (record_inherits_from_cell(*pointee)) {
result.is_valid = false;
result.is_wrapped_in_gcptr = false;
return result;
}
}
} else if (auto const* reference_decl = qualified_type->getAs<clang::ReferenceType>()) {
if (auto const* pointee = reference_decl->getPointeeCXXRecordDecl()) {
if (record_inherits_from_cell(*pointee)) {
result.is_valid = false;
result.is_wrapped_in_gcptr = false;
return result;
}
}
} else if (auto const* specialization = qualified_type->getAs<clang::TemplateSpecializationType>()) {
auto template_type_name = specialization->getTemplateName().getAsTemplateDecl()->getName();
if (template_type_name != "GCPtr" && template_type_name != "NonnullGCPtr")
return result;
if (specialization->getNumArgs() != 1)
return result; // Not really valid, but will produce a compilation error anyway
auto const& type_arg = specialization->getArg(0);
auto const* record_type = type_arg.getAsType()->getAs<clang::RecordType>();
if (!record_type)
return result;
auto const* record_decl = record_type->getAsCXXRecordDecl();
if (!record_decl->hasDefinition())
return result;
result.is_wrapped_in_gcptr = true;
result.is_valid = record_inherits_from_cell(*record_decl);
}
}
return result;
}
void CollectCellsHandler::run(clang::ast_matchers::MatchFinder::MatchResult const& result)
{
clang::CXXRecordDecl const* record = result.Nodes.getNodeAs<clang::CXXRecordDecl>("record-decl");
if (!record || !record->isCompleteDefinition() || (!record->isClass() && !record->isStruct()))
return;
auto& diag_engine = result.Context->getDiagnostics();
for (clang::FieldDecl const* field : record->fields()) {
auto const& type = field->getType();
auto validation_results = validate_field(field);
if (!validation_results.is_valid) {
if (validation_results.is_wrapped_in_gcptr) {
auto diag_id = diag_engine.getCustomDiagID(clang::DiagnosticsEngine::Warning, "Specialization type must inherit from JS::Cell");
diag_engine.Report(field->getLocation(), diag_id);
} else {
auto diag_id = diag_engine.getCustomDiagID(clang::DiagnosticsEngine::Warning, "%0 to JS::Cell type should be wrapped in %1");
auto builder = diag_engine.Report(field->getLocation(), diag_id);
if (type->isReferenceType()) {
builder << "reference"
<< "JS::NonnullGCPtr";
} else {
builder << "pointer"
<< "JS::GCPtr";
}
}
}
}
}

View file

@ -0,0 +1,30 @@
/*
* Copyright (c) 2023, Matthew Olsson <mattco@serenityos.org>
*
* SPDX-License-Identifier: BSD-2-Clause
*/
#pragma once
#include <clang/ASTMatchers/ASTMatchFinder.h>
#include <clang/ASTMatchers/ASTMatchers.h>
#include <clang/Tooling/Tooling.h>
#include <unordered_set>
class CollectCellsHandler
: public clang::tooling::SourceFileCallbacks
, public clang::ast_matchers::MatchFinder::MatchCallback {
public:
CollectCellsHandler();
virtual ~CollectCellsHandler() override = default;
virtual bool handleBeginSource(clang::CompilerInstance&) override;
virtual void run(clang::ast_matchers::MatchFinder::MatchResult const& result) override;
clang::ast_matchers::MatchFinder& finder() { return m_finder; }
private:
std::unordered_set<std::string> m_visited_classes;
clang::ast_matchers::MatchFinder m_finder;
};

View file

@ -0,0 +1,27 @@
/*
* Copyright (c) 2023, Matthew Olsson <mattco@serenityos.org>
*
* SPDX-License-Identifier: BSD-2-Clause
*/
#include "CellsHandler.h"
#include <clang/Tooling/CommonOptionsParser.h>
#include <clang/Tooling/Tooling.h>
#include <llvm/Support/CommandLine.h>
int main(int argc, char const** argv)
{
llvm::cl::OptionCategory s_tool_category("LibJSGCVerifier options");
auto maybe_parser = clang::tooling::CommonOptionsParser::create(argc, argv, s_tool_category);
if (!maybe_parser) {
llvm::errs() << maybe_parser.takeError();
return 1;
}
auto& parser = maybe_parser.get();
clang::tooling::ClangTool tool(parser.getCompilations(), parser.getSourcePathList());
CollectCellsHandler collect_handler;
auto collect_action = clang::tooling::newFrontendActionFactory(&collect_handler.finder(), &collect_handler);
return tool.run(collect_action.get());
}

View file

@ -0,0 +1,75 @@
#!/bin/python3
import argparse
import multiprocessing
import os
from pathlib import Path
import signal
import subprocess
import sys
# Relative to Userland directory
PATHS_TO_SEARCH = [
'Applications/Assistant',
'Applications/Browser',
'Applications/Spreadsheet',
'Applications/TextEditor',
'DevTools/HackStudio',
'Libraries/LibJS',
'Libraries/LibMarkdown',
'Libraries/LibWeb',
'Services/WebContent',
]
parser = argparse.ArgumentParser('LibJSGCVerifier', description='A Clang tool to validate usage of the LibJS GC')
parser.add_argument('-b', '--build-path', required=True, help='Path to the project Build folder')
args = parser.parse_args()
build_path = Path(args.build_path).resolve()
userland_path = build_path / '..' / 'Userland'
include_path = build_path / 'x86_64clang' / 'Root' / 'usr' / 'include'
compile_commands_path = build_path / 'x86_64clang' / 'compile_commands.json'
if not compile_commands_path.exists():
print(f'Could not find compile_commands.json in {compile_commands_path.parent}')
exit(1)
paths = []
for containing_path in PATHS_TO_SEARCH:
for root, dirs, files in os.walk(userland_path / containing_path):
for file in files:
# FIXME: The dollar sign in $262Object.cpp gets corrupted in compile_commands.json somehow, and
# ends up as "\\$$262Object.cpp". This results in a clang error, so we just ignore the
# file here
if file.endswith('.cpp') and not file.endswith('262Object.cpp'):
paths.append(Path(root) / file)
# paths = ['/home/matthew/code/serenity/Userland/Libraries/LibJS/Runtime/Set.h']
def thread_init():
signal.signal(signal.SIGINT, signal.SIG_IGN)
def thread_execute(file_path):
clang_args = [
'./build/LibJSGCVerifier',
'--extra-arg',
'-DUSING_AK_GLOBALLY=1', # To avoid errors about USING_AK_GLOBALLY not being defined at all
'-p',
compile_commands_path,
file_path
]
proc = subprocess.Popen(clang_args, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
sys.stdout.buffer.write(proc.communicate()[0])
sys.stdout.buffer.flush()
with multiprocessing.Pool(processes=multiprocessing.cpu_count() - 2, initializer=thread_init) as pool:
try:
pool.map(thread_execute, paths)
except KeyboardInterrupt:
pool.terminate()
pool.join()