Преглед на файлове

LibJS/Bytecode: Add peephole optimization pass and fuse compare+jump

This patch adds a new "Peephole" pass for performing small, local
optimizations to bytecode.

We also introduce the first such optimization, fusing a sequence of
some comparison instruction FooCompare followed by a JumpIf into a
new set of JumpFooCompare instructions.

This gives a ~50% speed-up on the following microbenchmark:

    for (let i = 0; i < 10_000_000; ++i) {
    }

But more traditional benchmarks see a pretty sizable speed-up as well,
for example 15% on Kraken/ai-astar.js and 16% on Kraken/audio-dft.js :^)
Andreas Kling преди 1 година
родител
ревизия
4438ec481c

+ 3 - 3
Userland/Libraries/LibJS/Bytecode/BasicBlock.h

@@ -82,15 +82,15 @@ public:
         op->set_source_record({ start_offset, end_offset });
     }
 
-private:
-    explicit BasicBlock(String name);
-
     void terminate(size_t slot_offset)
     {
         m_terminated = true;
         m_terminator_offset = slot_offset;
     }
 
+private:
+    explicit BasicBlock(String name);
+
     Vector<u8> m_buffer;
     BasicBlock const* m_handler { nullptr };
     BasicBlock const* m_finalizer { nullptr };

+ 8 - 0
Userland/Libraries/LibJS/Bytecode/Instruction.h

@@ -69,6 +69,14 @@
     O(IteratorToArray)                 \
     O(Jump)                            \
     O(JumpIf)                          \
+    O(JumpGreaterThan)                 \
+    O(JumpGreaterThanEquals)           \
+    O(JumpLessThan)                    \
+    O(JumpLessThanEquals)              \
+    O(JumpLooselyEquals)               \
+    O(JumpLooselyInequals)             \
+    O(JumpStrictlyEquals)              \
+    O(JumpStrictlyInequals)            \
     O(JumpNullish)                     \
     O(JumpUndefined)                   \
     O(LeaveLexicalEnvironment)         \

+ 62 - 21
Userland/Libraries/LibJS/Bytecode/Interpreter.cpp

@@ -119,6 +119,26 @@ NonnullOwnPtr<CallFrame> CallFrame::create(size_t register_count)
     return call_frame;
 }
 
+static ThrowCompletionOr<Value> loosely_inequals(VM& vm, Value src1, Value src2)
+{
+    return Value(!TRY(is_loosely_equal(vm, src1, src2)));
+}
+
+static ThrowCompletionOr<Value> loosely_equals(VM& vm, Value src1, Value src2)
+{
+    return Value(TRY(is_loosely_equal(vm, src1, src2)));
+}
+
+static ThrowCompletionOr<Value> strict_inequals(VM&, Value src1, Value src2)
+{
+    return Value(!is_strictly_equal(src1, src2));
+}
+
+static ThrowCompletionOr<Value> strict_equals(VM&, Value src1, Value src2)
+{
+    return Value(is_strictly_equal(src1, src2));
+}
+
 Interpreter::Interpreter(VM& vm)
     : m_vm(vm)
 {
@@ -324,6 +344,33 @@ void Interpreter::run_bytecode()
                 else
                     m_current_block = &static_cast<Op::JumpIf const&>(instruction).false_target()->block();
                 goto start;
+
+#define JS_HANDLE_FUSABLE_BINARY_JUMP(PreOp, int32_operator, slow_case) \
+    case Instruction::Type::Jump##PreOp: {                              \
+        auto& jump = static_cast<Op::Jump##PreOp const&>(instruction);  \
+        auto lhs = get(jump.lhs());                                     \
+        auto rhs = get(jump.rhs());                                     \
+        bool condition = false;                                         \
+        if (lhs.is_int32() && rhs.is_int32()) {                         \
+            condition = lhs.as_i32() int32_operator rhs.as_i32();       \
+        } else {                                                        \
+            auto condition_or_error = slow_case(vm(), lhs, rhs);        \
+            if (condition_or_error.is_error()) {                        \
+                result = condition_or_error.release_error();            \
+                break;                                                  \
+            }                                                           \
+            condition = condition_or_error.value().to_boolean();        \
+        }                                                               \
+                                                                        \
+        if (condition)                                                  \
+            m_current_block = &jump.true_target()->block();             \
+        else                                                            \
+            m_current_block = &jump.false_target()->block();            \
+        goto start;                                                     \
+    }
+
+                JS_ENUMERATE_FUSABLE_BINARY_OPS(JS_HANDLE_FUSABLE_BINARY_JUMP)
+
             case Instruction::Type::JumpNullish:
                 if (get(static_cast<Op::JumpNullish const&>(instruction).condition()).is_nullish())
                     m_current_block = &static_cast<Op::Jump const&>(instruction).true_target()->block();
@@ -545,6 +592,7 @@ static PassManager& optimization_pipeline()
         pm->add<Passes::UnifySameBlocks>();
         pm->add<Passes::GenerateCFG>();
         pm->add<Passes::MergeBlocks>();
+        pm->add<Passes::Peephole>();
         pm->add<Passes::GenerateCFG>();
         pm->add<Passes::PlaceBlocks>();
         return pm;
@@ -587,7 +635,6 @@ Variant<NonnullOwnPtr<CallFrame>, CallFrame*> Interpreter::pop_call_frame()
     m_current_call_frame = m_call_frames.is_empty() ? Span<Value> {} : this->call_frame().registers();
     return frame;
 }
-
 }
 
 namespace JS::Bytecode {
@@ -642,26 +689,6 @@ ThrowCompletionOr<void> End::execute_impl(Bytecode::Interpreter&) const
     __builtin_unreachable();
 }
 
-static ThrowCompletionOr<Value> loosely_inequals(VM& vm, Value src1, Value src2)
-{
-    return Value(!TRY(is_loosely_equal(vm, src1, src2)));
-}
-
-static ThrowCompletionOr<Value> loosely_equals(VM& vm, Value src1, Value src2)
-{
-    return Value(TRY(is_loosely_equal(vm, src1, src2)));
-}
-
-static ThrowCompletionOr<Value> strict_inequals(VM&, Value src1, Value src2)
-{
-    return Value(!is_strictly_equal(src1, src2));
-}
-
-static ThrowCompletionOr<Value> strict_equals(VM&, Value src1, Value src2)
-{
-    return Value(is_strictly_equal(src1, src2));
-}
-
 #define JS_DEFINE_EXECUTE_FOR_COMMON_BINARY_OP(OpTitleCase, op_snake_case)                      \
     ThrowCompletionOr<void> OpTitleCase::execute_impl(Bytecode::Interpreter& interpreter) const \
     {                                                                                           \
@@ -1225,6 +1252,20 @@ ThrowCompletionOr<void> JumpIf::execute_impl(Bytecode::Interpreter&) const
     __builtin_unreachable();
 }
 
+#define JS_DEFINE_FUSABLE_BINARY_OP(PreOp, ...)                                                                  \
+    ThrowCompletionOr<void> Jump##PreOp::execute_impl(Bytecode::Interpreter&) const { __builtin_unreachable(); } \
+                                                                                                                 \
+    ByteString Jump##PreOp::to_byte_string_impl(Bytecode::Executable const& executable) const                    \
+    {                                                                                                            \
+        return ByteString::formatted("Jump" #PreOp " {}, {}, \033[32mtrue\033[0m:{} \033[32mfalse\033[0m:{}",    \
+            format_operand("lhs"sv, m_lhs, executable),                                                          \
+            format_operand("rhs"sv, m_rhs, executable),                                                          \
+            *m_true_target,                                                                                      \
+            *m_false_target);                                                                                    \
+    }
+
+JS_ENUMERATE_FUSABLE_BINARY_OPS(JS_DEFINE_FUSABLE_BINARY_OP)
+
 ThrowCompletionOr<void> JumpUndefined::execute_impl(Bytecode::Interpreter&) const
 {
     // Handled in the interpreter loop.

+ 34 - 0
Userland/Libraries/LibJS/Bytecode/Op.h

@@ -1123,6 +1123,40 @@ private:
     Operand m_condition;
 };
 
+// NOTE: The raw operator is used for comparing two Int32 values.
+#define JS_ENUMERATE_FUSABLE_BINARY_OPS(X)        \
+    X(GreaterThan, >, greater_than)               \
+    X(GreaterThanEquals, >=, greater_than_equals) \
+    X(LessThan, <, less_than)                     \
+    X(LessThanEquals, <=, less_than_equals)       \
+    X(LooselyEquals, ==, loosely_equals)          \
+    X(LooselyInequals, !=, loosely_inequals)      \
+    X(StrictlyEquals, ==, strict_equals)          \
+    X(StrictlyInequals, !=, strict_inequals)
+
+#define JS_DECLARE_FUSED_JUMP(PreOp, ...)                                                     \
+    class Jump##PreOp final : public Jump {                                                   \
+    public:                                                                                   \
+        explicit Jump##PreOp(Operand lhs, Operand rhs, Label true_target, Label false_target) \
+            : Jump(Type::Jump##PreOp, move(true_target), move(false_target), sizeof(*this))   \
+            , m_lhs(lhs)                                                                      \
+            , m_rhs(rhs)                                                                      \
+        {                                                                                     \
+        }                                                                                     \
+        ThrowCompletionOr<void> execute_impl(Bytecode::Interpreter&) const;                   \
+        ByteString to_byte_string_impl(Bytecode::Executable const&) const;                    \
+                                                                                              \
+        Operand lhs() const { return m_lhs; }                                                 \
+        Operand rhs() const { return m_rhs; }                                                 \
+                                                                                              \
+    private:                                                                                  \
+        Operand m_lhs;                                                                        \
+        Operand m_rhs;                                                                        \
+    };
+
+JS_ENUMERATE_FUSABLE_BINARY_OPS(JS_DECLARE_FUSED_JUMP)
+#undef JS_DECLARE_FUSED_JUMP
+
 class JumpNullish final : public Jump {
 public:
     explicit JumpNullish(Operand condition, Label true_target, Label false_target)

+ 7 - 0
Userland/Libraries/LibJS/Bytecode/Pass/GenerateCFG.cpp

@@ -63,6 +63,13 @@ static void generate_cfg_for_block(BasicBlock const& current_block, PassPipeline
             enter_label(true_target, current_block);
             return;
         }
+
+#define JS_ENUMERATE_FUSABLE_BINARY_OP(PreOp, ...) \
+    case Jump##PreOp:
+
+            JS_ENUMERATE_FUSABLE_BINARY_OPS(JS_ENUMERATE_FUSABLE_BINARY_OP)
+#undef JS_ENUMERATE_FUSABLE_BINARY_OP
+
         case JumpIf:
         case JumpNullish:
         case JumpUndefined: {

+ 90 - 0
Userland/Libraries/LibJS/Bytecode/Pass/Peephole.cpp

@@ -0,0 +1,90 @@
+/*
+ * Copyright (c) 2024, Andreas Kling <kling@serenityos.org>
+ *
+ * SPDX-License-Identifier: BSD-2-Clause
+ */
+
+#include <LibJS/Bytecode/PassManager.h>
+
+namespace JS::Bytecode::Passes {
+
+void Peephole::perform(PassPipelineExecutable& executable)
+{
+    started();
+
+    // Fuse compare-followed-by-jump into a single compare-and-jump
+    // This is a very common pattern in bytecode, and it's nice to have it as a single instruction
+    // For example, LessThan + JumpIf => JumpLessThan
+
+    HashMap<BasicBlock const*, BasicBlock const*> replacement_blocks;
+    Vector<NonnullOwnPtr<BasicBlock>> replaced_blocks;
+
+    for (size_t i = 0; i < executable.executable.basic_blocks.size(); ++i) {
+        auto& block = executable.executable.basic_blocks[i];
+        auto new_block = BasicBlock::create(block->name());
+        if (block->handler())
+            new_block->set_handler(*block->handler());
+        if (block->finalizer())
+            new_block->set_finalizer(*block->finalizer());
+        replacement_blocks.set(block.ptr(), new_block.ptr());
+
+        InstructionStreamIterator it { block->instruction_stream() };
+        while (!it.at_end()) {
+            auto const& instruction = *it;
+            ++it;
+
+            if (!it.at_end()) {
+                auto const& next_instruction = *it;
+                if (next_instruction.type() == Instruction::Type::JumpIf) {
+                    auto const& jump = static_cast<Op::JumpIf const&>(next_instruction);
+
+#define DO_FUSE_JUMP(PreOp, ...)                                          \
+    if (instruction.type() == Instruction::Type::PreOp) {                 \
+        auto const& compare = static_cast<Op::PreOp const&>(instruction); \
+        VERIFY(jump.condition() == compare.dst());                        \
+        new_block->append<Op::Jump##PreOp>(                               \
+            compare.source_record().source_start_offset,                  \
+            compare.source_record().source_end_offset,                    \
+            compare.lhs(),                                                \
+            compare.rhs(),                                                \
+            *jump.true_target(),                                          \
+            *jump.false_target());                                        \
+        ++it;                                                             \
+        VERIFY(it.at_end());                                              \
+        continue;                                                         \
+    }
+                    JS_ENUMERATE_FUSABLE_BINARY_OPS(DO_FUSE_JUMP)
+                }
+            }
+
+            auto slot_offset = new_block->size();
+            new_block->grow(instruction.length());
+            memcpy(new_block->data() + slot_offset, &instruction, instruction.length());
+            if (instruction.is_terminator())
+                new_block->terminate(slot_offset);
+        }
+        replaced_blocks.append(move(executable.executable.basic_blocks[i]));
+        executable.executable.basic_blocks[i] = move(new_block);
+    }
+
+    auto update_block_references = [&](BasicBlock const& original, BasicBlock const& replacement) {
+        for (auto& block : executable.executable.basic_blocks) {
+            InstructionStreamIterator it { block->instruction_stream() };
+            if (block->handler() == &original)
+                block->set_handler(replacement);
+            if (block->finalizer() == &original)
+                block->set_finalizer(replacement);
+            while (!it.at_end()) {
+                auto const& instruction = *it;
+                ++it;
+                const_cast<Instruction&>(instruction).replace_references(original, replacement);
+            }
+        }
+    };
+    for (auto& entry : replacement_blocks)
+        update_block_references(*entry.key, *entry.value);
+
+    finished();
+}
+
+}

+ 9 - 0
Userland/Libraries/LibJS/Bytecode/PassManager.h

@@ -107,6 +107,15 @@ private:
     virtual void perform(PassPipelineExecutable&) override;
 };
 
+class Peephole : public Pass {
+public:
+    Peephole() = default;
+    ~Peephole() override = default;
+
+private:
+    virtual void perform(PassPipelineExecutable&) override;
+};
+
 class DumpCFG : public Pass {
 public:
     DumpCFG(FILE* file)

+ 1 - 0
Userland/Libraries/LibJS/CMakeLists.txt

@@ -12,6 +12,7 @@ set(SOURCES
     Bytecode/Pass/DumpCFG.cpp
     Bytecode/Pass/GenerateCFG.cpp
     Bytecode/Pass/MergeBlocks.cpp
+    Bytecode/Pass/Peephole.cpp
     Bytecode/Pass/PlaceBlocks.cpp
     Bytecode/Pass/UnifySameBlocks.cpp
     Bytecode/RegexTable.cpp