Browse Source

Kernel: Add polling support to NVMe

Add polling support to NVMe so that it does not use interrupt to
complete a IO but instead actively polls for completion. This probably
is not very efficient in terms of CPU usage but it does not use
interrupts to complete a IO which is beneficial at the moment as there
is no MSI(X) support and it can reduce the latency of an IO in a very
fast NVMe device.

The NVMeQueue class has been made the base class for NVMeInterruptQueue
and NVMePollQueue. The factory function `NVMeQueue::try_create` will
return the appropriate queue to the controller based on the polling
boot parameter.

The polling mode can be enabled by adding an extra boot parameter:
`nvme_poll`.
Pankaj Raghav 3 years ago
parent
commit
d234e6b801

+ 2 - 0
Kernel/CMakeLists.txt

@@ -103,6 +103,8 @@ set(KERNEL_SOURCES
     Storage/Partition/PartitionTable.cpp
     Storage/NVMe/NVMeController.cpp
     Storage/NVMe/NVMeNameSpace.cpp
+    Storage/NVMe/NVMeInterruptQueue.cpp
+    Storage/NVMe/NVMePollQueue.cpp
     Storage/NVMe/NVMeQueue.cpp
     Storage/StorageDevice.cpp
     Storage/RamdiskController.cpp

+ 13 - 13
Kernel/Storage/NVMe/NVMeController.cpp

@@ -12,6 +12,7 @@
 #include <Kernel/Arch/x86/Processor.h>
 #include <Kernel/Arch/x86/SafeMem.h>
 #include <Kernel/Bus/PCI/API.h>
+#include <Kernel/CommandLine.h>
 #include <Kernel/Devices/Device.h>
 #include <Kernel/FileSystem/ProcFS.h>
 #include <Kernel/Sections.h>
@@ -19,10 +20,10 @@
 namespace Kernel {
 Atomic<u8> NVMeController::controller_id {};
 
-UNMAP_AFTER_INIT ErrorOr<NonnullRefPtr<NVMeController>> NVMeController::try_initialize(const Kernel::PCI::DeviceIdentifier& device_identifier)
+UNMAP_AFTER_INIT ErrorOr<NonnullRefPtr<NVMeController>> NVMeController::try_initialize(const Kernel::PCI::DeviceIdentifier& device_identifier, bool is_queue_polled)
 {
     auto controller = TRY(adopt_nonnull_ref_or_enomem(new NVMeController(device_identifier)));
-    TRY(controller->initialize());
+    TRY(controller->initialize(is_queue_polled));
     NVMeController::controller_id++;
     return controller;
 }
@@ -33,11 +34,11 @@ UNMAP_AFTER_INIT NVMeController::NVMeController(const PCI::DeviceIdentifier& dev
 {
 }
 
-UNMAP_AFTER_INIT ErrorOr<void> NVMeController::initialize()
+UNMAP_AFTER_INIT ErrorOr<void> NVMeController::initialize(bool is_queue_polled)
 {
     // Nr of queues = one queue per core
     auto nr_of_queues = Processor::count();
-    auto irq = m_pci_device_id.interrupt_line().value();
+    auto irq = is_queue_polled ? Optional<u8> {} : m_pci_device_id.interrupt_line().value();
 
     PCI::enable_memory_space(m_pci_device_id.address());
     PCI::enable_bus_mastering(m_pci_device_id.address());
@@ -62,7 +63,7 @@ UNMAP_AFTER_INIT ErrorOr<void> NVMeController::initialize()
     // Create an IO queue per core
     for (u32 cpuid = 0; cpuid < nr_of_queues; ++cpuid) {
         // qid is zero is used for admin queue
-        TRY(create_io_queue(irq, cpuid + 1));
+        TRY(create_io_queue(cpuid + 1, irq));
     }
     TRY(identify_and_init_namespaces());
     return {};
@@ -253,7 +254,7 @@ void NVMeController::complete_current_request([[maybe_unused]] AsyncDeviceReques
     VERIFY_NOT_REACHED();
 }
 
-UNMAP_AFTER_INIT ErrorOr<void> NVMeController::create_admin_queue(u8 irq)
+UNMAP_AFTER_INIT ErrorOr<void> NVMeController::create_admin_queue(Optional<u8> irq)
 {
     auto qdepth = get_admin_q_dept();
     OwnPtr<Memory::Region> cq_dma_region;
@@ -281,8 +282,6 @@ UNMAP_AFTER_INIT ErrorOr<void> NVMeController::create_admin_queue(u8 irq)
     }
     auto doorbell_regs = TRY(Memory::map_typed_writable<volatile DoorbellRegister>(PhysicalAddress(m_bar + REG_SQ0TDBL_START)));
 
-    m_admin_queue = TRY(NVMeQueue::try_create(0, irq, qdepth, move(cq_dma_region), cq_dma_pages, move(sq_dma_region), sq_dma_pages, move(doorbell_regs)));
-
     m_controller_regs->acq = reinterpret_cast<u64>(AK::convert_between_host_and_little_endian(cq_dma_pages.first().paddr().as_ptr()));
     m_controller_regs->asq = reinterpret_cast<u64>(AK::convert_between_host_and_little_endian(sq_dma_pages.first().paddr().as_ptr()));
 
@@ -291,12 +290,13 @@ UNMAP_AFTER_INIT ErrorOr<void> NVMeController::create_admin_queue(u8 irq)
         return EFAULT;
     }
     set_admin_queue_ready_flag();
-    m_admin_queue->enable_interrupts();
+    m_admin_queue = TRY(NVMeQueue::try_create(0, irq, qdepth, move(cq_dma_region), cq_dma_pages, move(sq_dma_region), sq_dma_pages, move(doorbell_regs)));
+
     dbgln_if(NVME_DEBUG, "NVMe: Admin queue created");
     return {};
 }
 
-UNMAP_AFTER_INIT ErrorOr<void> NVMeController::create_io_queue(u8 irq, u8 qid)
+UNMAP_AFTER_INIT ErrorOr<void> NVMeController::create_io_queue(u8 qid, Optional<u8> irq)
 {
     OwnPtr<Memory::Region> cq_dma_region;
     NonnullRefPtrVector<Memory::PhysicalPage> cq_dma_pages;
@@ -326,7 +326,8 @@ UNMAP_AFTER_INIT ErrorOr<void> NVMeController::create_io_queue(u8 irq, u8 qid)
         sub.create_cq.cqid = qid;
         // The queue size is 0 based
         sub.create_cq.qsize = AK::convert_between_host_and_little_endian(IO_QUEUE_SIZE - 1);
-        auto flags = QUEUE_IRQ_ENABLED | QUEUE_PHY_CONTIGUOUS;
+        auto flags = irq.has_value() ? QUEUE_IRQ_ENABLED : QUEUE_IRQ_DISABLED;
+        flags |= QUEUE_PHY_CONTIGUOUS;
         // TODO: Eventually move to MSI.
         // For now using pin based interrupts. Clear the first 16 bits
         // to use pin-based interrupts.
@@ -340,7 +341,7 @@ UNMAP_AFTER_INIT ErrorOr<void> NVMeController::create_io_queue(u8 irq, u8 qid)
         sub.create_sq.sqid = qid;
         // The queue size is 0 based
         sub.create_sq.qsize = AK::convert_between_host_and_little_endian(IO_QUEUE_SIZE - 1);
-        auto flags = QUEUE_IRQ_ENABLED | QUEUE_PHY_CONTIGUOUS;
+        auto flags = QUEUE_PHY_CONTIGUOUS;
         sub.create_sq.cqid = qid;
         sub.create_sq.sq_flags = AK::convert_between_host_and_little_endian(flags);
         submit_admin_command(sub, true);
@@ -350,7 +351,6 @@ UNMAP_AFTER_INIT ErrorOr<void> NVMeController::create_io_queue(u8 irq, u8 qid)
     auto doorbell_regs = TRY(Memory::map_typed_writable<volatile DoorbellRegister>(PhysicalAddress(m_bar + queue_doorbell_offset)));
 
     m_queues.append(TRY(NVMeQueue::try_create(qid, irq, IO_QUEUE_SIZE, move(cq_dma_region), cq_dma_pages, move(sq_dma_region), sq_dma_pages, move(doorbell_regs))));
-    m_queues.last().enable_interrupts();
     dbgln_if(NVME_DEBUG, "NVMe: Created IO Queue with QID{}", m_queues.size());
     return {};
 }

+ 4 - 4
Kernel/Storage/NVMe/NVMeController.h

@@ -26,8 +26,8 @@ namespace Kernel {
 class NVMeController : public PCI::Device
     , public StorageController {
 public:
-    static ErrorOr<NonnullRefPtr<NVMeController>> try_initialize(PCI::DeviceIdentifier const&);
-    ErrorOr<void> initialize();
+    static ErrorOr<NonnullRefPtr<NVMeController>> try_initialize(PCI::DeviceIdentifier const&, bool is_queue_polled);
+    ErrorOr<void> initialize(bool is_queue_polled);
     explicit NVMeController(PCI::DeviceIdentifier const&);
     RefPtr<StorageDevice> device(u32 index) const override;
     size_t devices_count() const override;
@@ -58,8 +58,8 @@ public:
 private:
     ErrorOr<void> identify_and_init_namespaces();
     Tuple<u64, u8> get_ns_features(IdentifyNamespace& identify_data_struct);
-    ErrorOr<void> create_admin_queue(u8 irq);
-    ErrorOr<void> create_io_queue(u8 irq, u8 qid);
+    ErrorOr<void> create_admin_queue(Optional<u8> irq);
+    ErrorOr<void> create_io_queue(u8 qid, Optional<u8> irq);
     void calculate_doorbell_stride()
     {
         m_dbl_stride = (m_controller_regs->cap >> CAP_DBL_SHIFT) & CAP_DBL_MASK;

+ 1 - 0
Kernel/Storage/NVMe/NVMeDefinitions.h

@@ -125,6 +125,7 @@ enum IOCommandOpcode {
 // FLAGS
 static constexpr u8 QUEUE_PHY_CONTIGUOUS = (1 << 0);
 static constexpr u8 QUEUE_IRQ_ENABLED = (1 << 1);
+static constexpr u8 QUEUE_IRQ_DISABLED = (0 << 1);
 
 struct [[gnu::packed]] NVMeCompletion {
     LittleEndian<u32> cmd_spec;

+ 57 - 0
Kernel/Storage/NVMe/NVMeInterruptQueue.cpp

@@ -0,0 +1,57 @@
+/*
+ * Copyright (c) 2022, Pankaj R <pankydev8@gmail.com>
+ *
+ * SPDX-License-Identifier: BSD-2-Clause
+ */
+
+#include "NVMeInterruptQueue.h"
+#include "Kernel/Devices/BlockDevice.h"
+#include "NVMeDefinitions.h"
+#include <Kernel/WorkQueue.h>
+
+namespace Kernel {
+
+UNMAP_AFTER_INIT NVMeInterruptQueue::NVMeInterruptQueue(NonnullOwnPtr<Memory::Region> rw_dma_region, Memory::PhysicalPage const& rw_dma_page, u16 qid, u8 irq, u32 q_depth, OwnPtr<Memory::Region> cq_dma_region, NonnullRefPtrVector<Memory::PhysicalPage> cq_dma_page, OwnPtr<Memory::Region> sq_dma_region, NonnullRefPtrVector<Memory::PhysicalPage> sq_dma_page, Memory::TypedMapping<volatile DoorbellRegister> db_regs)
+    : NVMeQueue(move(rw_dma_region), rw_dma_page, qid, q_depth, move(cq_dma_region), cq_dma_page, move(sq_dma_region), sq_dma_page, move(db_regs))
+    , IRQHandler(irq)
+{
+    enable_irq();
+}
+
+bool NVMeInterruptQueue::handle_irq(const RegisterState&)
+{
+    SpinlockLocker lock(m_request_lock);
+    return process_cq() ? true : false;
+}
+
+void NVMeInterruptQueue::submit_sqe(NVMeSubmission& sub)
+{
+    NVMeQueue::submit_sqe(sub);
+}
+
+void NVMeInterruptQueue::complete_current_request(u16 status)
+{
+    VERIFY(m_request_lock.is_locked());
+
+    g_io_work->queue([this, status]() {
+        SpinlockLocker lock(m_request_lock);
+        auto current_request = m_current_request;
+        m_current_request.clear();
+        if (status) {
+            lock.unlock();
+            current_request->complete(AsyncBlockDeviceRequest::Failure);
+            return;
+        }
+        if (current_request->request_type() == AsyncBlockDeviceRequest::RequestType::Read) {
+            if (auto result = current_request->write_to_buffer(current_request->buffer(), m_rw_dma_region->vaddr().as_ptr(), 512 * current_request->block_count()); result.is_error()) {
+                lock.unlock();
+                current_request->complete(AsyncDeviceRequest::MemoryFault);
+                return;
+            }
+        }
+        lock.unlock();
+        current_request->complete(AsyncDeviceRequest::Success);
+        return;
+    });
+}
+}

+ 24 - 0
Kernel/Storage/NVMe/NVMeInterruptQueue.h

@@ -0,0 +1,24 @@
+/*
+ * Copyright (c) 2022, Pankaj R <pankydev8@gmail.com>
+ *
+ * SPDX-License-Identifier: BSD-2-Clause
+ */
+
+#pragma once
+
+#include <Kernel/Storage/NVMe/NVMeQueue.h>
+
+namespace Kernel {
+
+class NVMeInterruptQueue : public NVMeQueue
+    , public IRQHandler {
+public:
+    NVMeInterruptQueue(NonnullOwnPtr<Memory::Region> rw_dma_region, Memory::PhysicalPage const& rw_dma_page, u16 qid, u8 irq, u32 q_depth, OwnPtr<Memory::Region> cq_dma_region, NonnullRefPtrVector<Memory::PhysicalPage> cq_dma_page, OwnPtr<Memory::Region> sq_dma_region, NonnullRefPtrVector<Memory::PhysicalPage> sq_dma_page, Memory::TypedMapping<volatile DoorbellRegister> db_regs);
+    void submit_sqe(NVMeSubmission& submission) override;
+    virtual ~NVMeInterruptQueue() override {};
+
+private:
+    virtual void complete_current_request(u16 status) override;
+    bool handle_irq(RegisterState const&) override;
+};
+}

+ 44 - 0
Kernel/Storage/NVMe/NVMePollQueue.cpp

@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2022, Pankaj R <pankydev8@gmail.com>
+ *
+ * SPDX-License-Identifier: BSD-2-Clause
+ */
+
+#include "NVMePollQueue.h"
+#include "Kernel/Arch/x86/IO.h"
+#include "Kernel/Devices/BlockDevice.h"
+#include "NVMeDefinitions.h"
+
+namespace Kernel {
+UNMAP_AFTER_INIT NVMePollQueue::NVMePollQueue(NonnullOwnPtr<Memory::Region> rw_dma_region, Memory::PhysicalPage const& rw_dma_page, u16 qid, u32 q_depth, OwnPtr<Memory::Region> cq_dma_region, NonnullRefPtrVector<Memory::PhysicalPage> cq_dma_page, OwnPtr<Memory::Region> sq_dma_region, NonnullRefPtrVector<Memory::PhysicalPage> sq_dma_page, Memory::TypedMapping<volatile DoorbellRegister> db_regs)
+    : NVMeQueue(move(rw_dma_region), rw_dma_page, qid, q_depth, move(cq_dma_region), cq_dma_page, move(sq_dma_region), sq_dma_page, move(db_regs))
+{
+}
+
+void NVMePollQueue::submit_sqe(NVMeSubmission& sub)
+{
+    NVMeQueue::submit_sqe(sub);
+    SpinlockLocker lock_cq(m_cq_lock);
+    while (!process_cq()) {
+        IO::delay(1);
+    }
+}
+
+void NVMePollQueue::complete_current_request(u16 status)
+{
+    auto current_request = m_current_request;
+    m_current_request.clear();
+    if (status) {
+        current_request->complete(AsyncBlockDeviceRequest::Failure);
+        return;
+    }
+    if (current_request->request_type() == AsyncBlockDeviceRequest::RequestType::Read) {
+        if (auto result = current_request->write_to_buffer(current_request->buffer(), m_rw_dma_region->vaddr().as_ptr(), 512 * current_request->block_count()); result.is_error()) {
+            current_request->complete(AsyncDeviceRequest::MemoryFault);
+            return;
+        }
+    }
+    current_request->complete(AsyncDeviceRequest::Success);
+    return;
+}
+}

+ 22 - 0
Kernel/Storage/NVMe/NVMePollQueue.h

@@ -0,0 +1,22 @@
+/*
+ * Copyright (c) 2022, Pankaj R <pankydev8@gmail.com>
+ *
+ * SPDX-License-Identifier: BSD-2-Clause
+ */
+
+#pragma once
+
+#include <Kernel/Storage/NVMe/NVMeQueue.h>
+
+namespace Kernel {
+
+class NVMePollQueue : public NVMeQueue {
+public:
+    NVMePollQueue(NonnullOwnPtr<Memory::Region> rw_dma_region, Memory::PhysicalPage const& rw_dma_page, u16 qid, u32 q_depth, OwnPtr<Memory::Region> cq_dma_region, NonnullRefPtrVector<Memory::PhysicalPage> cq_dma_page, OwnPtr<Memory::Region> sq_dma_region, NonnullRefPtrVector<Memory::PhysicalPage> sq_dma_page, Memory::TypedMapping<volatile DoorbellRegister> db_regs);
+    void submit_sqe(NVMeSubmission& submission) override;
+    virtual ~NVMePollQueue() override {};
+
+private:
+    virtual void complete_current_request(u16 status) override;
+};
+}

+ 16 - 37
Kernel/Storage/NVMe/NVMeQueue.cpp

@@ -6,36 +6,38 @@
 
 #include "NVMeQueue.h"
 #include "Kernel/StdLib.h"
+#include "NVMeQueue.h"
 #include <Kernel/Arch/x86/IO.h>
-#include <Kernel/Scheduler.h>
 #include <Kernel/Storage/NVMe/NVMeController.h>
-#include <Kernel/WorkQueue.h>
+#include <Kernel/Storage/NVMe/NVMeInterruptQueue.h>
+#include <Kernel/Storage/NVMe/NVMePollQueue.h>
 
 namespace Kernel {
-
-ErrorOr<NonnullRefPtr<NVMeQueue>> NVMeQueue::try_create(u16 qid, u8 irq, u32 q_depth, OwnPtr<Memory::Region> cq_dma_region, NonnullRefPtrVector<Memory::PhysicalPage> cq_dma_page, OwnPtr<Memory::Region> sq_dma_region, NonnullRefPtrVector<Memory::PhysicalPage> sq_dma_page, Memory::TypedMapping<volatile DoorbellRegister> db_regs)
+ErrorOr<NonnullRefPtr<NVMeQueue>> NVMeQueue::try_create(u16 qid, Optional<u8> irq, u32 q_depth, OwnPtr<Memory::Region> cq_dma_region, NonnullRefPtrVector<Memory::PhysicalPage> cq_dma_page, OwnPtr<Memory::Region> sq_dma_region, NonnullRefPtrVector<Memory::PhysicalPage> sq_dma_page, Memory::TypedMapping<volatile DoorbellRegister> db_regs)
 {
     // Note: Allocate DMA region for RW operation. For now the requests don't exceed more than 4096 bytes (Storage device takes care of it)
     RefPtr<Memory::PhysicalPage> rw_dma_page;
     auto rw_dma_region = TRY(MM.allocate_dma_buffer_page("NVMe Queue Read/Write DMA"sv, Memory::Region::Access::ReadWrite, rw_dma_page));
-    auto queue = TRY(adopt_nonnull_ref_or_enomem(new (nothrow) NVMeQueue(move(rw_dma_region), *rw_dma_page, qid, irq, q_depth, move(cq_dma_region), cq_dma_page, move(sq_dma_region), sq_dma_page, move(db_regs))));
+    if (!irq.has_value()) {
+        auto queue = TRY(adopt_nonnull_ref_or_enomem(new (nothrow) NVMePollQueue(move(rw_dma_region), *rw_dma_page, qid, q_depth, move(cq_dma_region), cq_dma_page, move(sq_dma_region), sq_dma_page, move(db_regs))));
+        return queue;
+    }
+    auto queue = TRY(adopt_nonnull_ref_or_enomem(new (nothrow) NVMeInterruptQueue(move(rw_dma_region), *rw_dma_page, qid, irq.value(), q_depth, move(cq_dma_region), cq_dma_page, move(sq_dma_region), sq_dma_page, move(db_regs))));
     return queue;
 }
 
-UNMAP_AFTER_INIT NVMeQueue::NVMeQueue(NonnullOwnPtr<Memory::Region> rw_dma_region, Memory::PhysicalPage const& rw_dma_page, u16 qid, u8 irq, u32 q_depth, OwnPtr<Memory::Region> cq_dma_region, NonnullRefPtrVector<Memory::PhysicalPage> cq_dma_page, OwnPtr<Memory::Region> sq_dma_region, NonnullRefPtrVector<Memory::PhysicalPage> sq_dma_page, Memory::TypedMapping<volatile DoorbellRegister> db_regs)
-    : IRQHandler(irq)
+UNMAP_AFTER_INIT NVMeQueue::NVMeQueue(NonnullOwnPtr<Memory::Region> rw_dma_region, Memory::PhysicalPage const& rw_dma_page, u16 qid, u32 q_depth, OwnPtr<Memory::Region> cq_dma_region, NonnullRefPtrVector<Memory::PhysicalPage> cq_dma_page, OwnPtr<Memory::Region> sq_dma_region, NonnullRefPtrVector<Memory::PhysicalPage> sq_dma_page, Memory::TypedMapping<volatile DoorbellRegister> db_regs)
+    : m_current_request(nullptr)
+    , m_rw_dma_region(move(rw_dma_region))
     , m_qid(qid)
     , m_admin_queue(qid == 0)
-    , m_irq(irq)
     , m_qdepth(q_depth)
     , m_cq_dma_region(move(cq_dma_region))
     , m_cq_dma_page(cq_dma_page)
     , m_sq_dma_region(move(sq_dma_region))
     , m_sq_dma_page(sq_dma_page)
-    , m_rw_dma_region(move(rw_dma_region))
     , m_db_regs(move(db_regs))
     , m_rw_dma_page(rw_dma_page)
-    , m_current_request(nullptr)
 
 {
     m_sqe_array = { reinterpret_cast<NVMeSubmission*>(m_sq_dma_region->vaddr().as_ptr()), m_qdepth };
@@ -59,7 +61,7 @@ void NVMeQueue::update_cqe_head()
     }
 }
 
-bool NVMeQueue::handle_irq(const RegisterState&)
+u32 NVMeQueue::process_cq()
 {
     u32 nr_of_processed_cqes = 0;
     while (cqe_available()) {
@@ -76,7 +78,6 @@ bool NVMeQueue::handle_irq(const RegisterState&)
             // everything is operated on a single request similar to BMIDE driver.
             // TODO: Remove this constraint eventually.
             VERIFY(cmdid == m_prev_sq_tail);
-            SpinlockLocker lock(m_request_lock);
             if (m_current_request) {
                 complete_current_request(status);
             }
@@ -86,7 +87,7 @@ bool NVMeQueue::handle_irq(const RegisterState&)
     if (nr_of_processed_cqes) {
         update_cq_doorbell();
     }
-    return nr_of_processed_cqes ? true : false;
+    return nr_of_processed_cqes;
 }
 
 void NVMeQueue::submit_sqe(NVMeSubmission& sub)
@@ -126,7 +127,7 @@ u16 NVMeQueue::submit_sync_sqe(NVMeSubmission& sub)
                 index = m_qdepth - 1;
         }
         cqe_cid = m_cqe_array[index].command_id;
-        Scheduler::yield();
+        IO::delay(1);
     } while (cid != cqe_cid);
 
     auto status = CQ_STATUS_FIELD(m_cqe_array[m_cq_head].status);
@@ -171,29 +172,7 @@ void NVMeQueue::write(AsyncBlockDeviceRequest& request, u16 nsid, u64 index, u32
     submit_sqe(sub);
 }
 
-void NVMeQueue::complete_current_request(u16 status)
+UNMAP_AFTER_INIT NVMeQueue::~NVMeQueue()
 {
-    VERIFY(m_request_lock.is_locked());
-
-    g_io_work->queue([this, status]() {
-        SpinlockLocker lock(m_request_lock);
-        auto current_request = m_current_request;
-        m_current_request.clear();
-        if (status) {
-            lock.unlock();
-            current_request->complete(AsyncBlockDeviceRequest::Failure);
-            return;
-        }
-        if (current_request->request_type() == AsyncBlockDeviceRequest::RequestType::Read) {
-            if (auto result = current_request->write_to_buffer(current_request->buffer(), m_rw_dma_region->vaddr().as_ptr(), 512 * current_request->block_count()); result.is_error()) {
-                lock.unlock();
-                current_request->complete(AsyncDeviceRequest::MemoryFault);
-                return;
-            }
-        }
-        lock.unlock();
-        current_request->complete(AsyncDeviceRequest::Success);
-        return;
-    });
 }
 }

+ 19 - 20
Kernel/Storage/NVMe/NVMeQueue.h

@@ -27,45 +27,47 @@ struct DoorbellRegister {
 };
 
 class AsyncBlockDeviceRequest;
-class NVMeQueue : public IRQHandler
-    , public RefCounted<NVMeQueue> {
+class NVMeQueue : public RefCounted<NVMeQueue> {
 public:
-    static ErrorOr<NonnullRefPtr<NVMeQueue>> try_create(u16 qid, u8 irq, u32 q_depth, OwnPtr<Memory::Region> cq_dma_region, NonnullRefPtrVector<Memory::PhysicalPage> cq_dma_page, OwnPtr<Memory::Region> sq_dma_region, NonnullRefPtrVector<Memory::PhysicalPage> sq_dma_page, Memory::TypedMapping<volatile DoorbellRegister> db_regs);
+    static ErrorOr<NonnullRefPtr<NVMeQueue>> try_create(u16 qid, Optional<u8> irq, u32 q_depth, OwnPtr<Memory::Region> cq_dma_region, NonnullRefPtrVector<Memory::PhysicalPage> cq_dma_page, OwnPtr<Memory::Region> sq_dma_region, NonnullRefPtrVector<Memory::PhysicalPage> sq_dma_page, Memory::TypedMapping<volatile DoorbellRegister> db_regs);
     bool is_admin_queue() { return m_admin_queue; };
-    void submit_sqe(NVMeSubmission&);
     u16 submit_sync_sqe(NVMeSubmission&);
     void read(AsyncBlockDeviceRequest& request, u16 nsid, u64 index, u32 count);
     void write(AsyncBlockDeviceRequest& request, u16 nsid, u64 index, u32 count);
-    void enable_interrupts() { enable_irq(); };
-    void disable_interrupts() { disable_irq(); };
+    virtual void submit_sqe(NVMeSubmission&);
+    virtual ~NVMeQueue();
 
-private:
-    NVMeQueue(NonnullOwnPtr<Memory::Region> rw_dma_region, Memory::PhysicalPage const& rw_dma_page, u16 qid, u8 irq, u32 q_depth, OwnPtr<Memory::Region> cq_dma_region, NonnullRefPtrVector<Memory::PhysicalPage> cq_dma_page, OwnPtr<Memory::Region> sq_dma_region, NonnullRefPtrVector<Memory::PhysicalPage> sq_dma_page, Memory::TypedMapping<volatile DoorbellRegister> db_regs);
-
-    virtual bool handle_irq(const RegisterState&) override;
+protected:
+    u32 process_cq();
+    void update_sq_doorbell()
+    {
+        m_db_regs->sq_tail = m_sq_tail;
+    }
+    NVMeQueue(NonnullOwnPtr<Memory::Region> rw_dma_region, Memory::PhysicalPage const& rw_dma_page, u16 qid, u32 q_depth, OwnPtr<Memory::Region> cq_dma_region, NonnullRefPtrVector<Memory::PhysicalPage> cq_dma_page, OwnPtr<Memory::Region> sq_dma_region, NonnullRefPtrVector<Memory::PhysicalPage> sq_dma_page, Memory::TypedMapping<volatile DoorbellRegister> db_regs);
 
+private:
     bool cqe_available();
     void update_cqe_head();
-    void complete_current_request(u16 status);
+    virtual void complete_current_request(u16 status) = 0;
     void update_cq_doorbell()
     {
         m_db_regs->cq_head = m_cq_head;
     }
 
-    void update_sq_doorbell()
-    {
-        m_db_regs->sq_tail = m_sq_tail;
-    }
+protected:
+    Spinlock m_cq_lock { LockRank::Interrupts };
+    RefPtr<AsyncBlockDeviceRequest> m_current_request;
+    NonnullOwnPtr<Memory::Region> m_rw_dma_region;
+    Spinlock m_request_lock;
 
+private:
     u16 m_qid {};
     u8 m_cq_valid_phase { 1 };
     u16 m_sq_tail {};
     u16 m_prev_sq_tail {};
     u16 m_cq_head {};
     bool m_admin_queue { false };
-    u8 m_irq {};
     u32 m_qdepth {};
-    Spinlock m_cq_lock { LockRank::Interrupts };
     Spinlock m_sq_lock { LockRank::Interrupts };
     OwnPtr<Memory::Region> m_cq_dma_region;
     NonnullRefPtrVector<Memory::PhysicalPage> m_cq_dma_page;
@@ -73,10 +75,7 @@ private:
     OwnPtr<Memory::Region> m_sq_dma_region;
     NonnullRefPtrVector<Memory::PhysicalPage> m_sq_dma_page;
     Span<NVMeCompletion> m_cqe_array;
-    NonnullOwnPtr<Memory::Region> m_rw_dma_region;
     Memory::TypedMapping<volatile DoorbellRegister> m_db_regs;
     NonnullRefPtr<Memory::PhysicalPage> m_rw_dma_page;
-    Spinlock m_request_lock;
-    RefPtr<AsyncBlockDeviceRequest> m_current_request;
 };
 }

+ 6 - 6
Kernel/Storage/StorageManagement.cpp

@@ -44,7 +44,7 @@ bool StorageManagement::boot_argument_contains_partition_uuid()
     return m_boot_argument.starts_with(partition_uuid_prefix);
 }
 
-UNMAP_AFTER_INIT void StorageManagement::enumerate_controllers(bool force_pio)
+UNMAP_AFTER_INIT void StorageManagement::enumerate_controllers(bool force_pio, bool nvme_poll)
 {
     VERIFY(m_controllers.is_empty());
 
@@ -60,10 +60,10 @@ UNMAP_AFTER_INIT void StorageManagement::enumerate_controllers(bool force_pio)
                 static constexpr PCI::HardwareID vmd_device = { 0x8086, 0x9a0b };
                 if (device_identifier.hardware_id() == vmd_device) {
                     auto controller = PCI::VolumeManagementDevice::must_create(device_identifier);
-                    PCI::Access::the().add_host_controller_and_enumerate_attached_devices(move(controller), [this](PCI::DeviceIdentifier const& device_identifier) -> void {
+                    PCI::Access::the().add_host_controller_and_enumerate_attached_devices(move(controller), [this, nvme_poll](PCI::DeviceIdentifier const& device_identifier) -> void {
                         auto subclass_code = static_cast<SubclassID>(device_identifier.subclass_code().value());
                         if (subclass_code == SubclassID::NVMeController) {
-                            auto controller = NVMeController::try_initialize(device_identifier);
+                            auto controller = NVMeController::try_initialize(device_identifier, nvme_poll);
                             if (controller.is_error()) {
                                 dmesgln("Unable to initialize NVMe controller: {}", controller.error());
                             } else {
@@ -84,7 +84,7 @@ UNMAP_AFTER_INIT void StorageManagement::enumerate_controllers(bool force_pio)
                 m_controllers.append(AHCIController::initialize(device_identifier));
             }
             if (subclass_code == SubclassID::NVMeController) {
-                auto controller = NVMeController::try_initialize(device_identifier);
+                auto controller = NVMeController::try_initialize(device_identifier, nvme_poll);
                 if (controller.is_error()) {
                     dmesgln("Unable to initialize NVMe controller: {}", controller.error());
                 } else {
@@ -274,11 +274,11 @@ NonnullRefPtr<FileSystem> StorageManagement::root_filesystem() const
     return file_system;
 }
 
-UNMAP_AFTER_INIT void StorageManagement::initialize(StringView root_device, bool force_pio)
+UNMAP_AFTER_INIT void StorageManagement::initialize(StringView root_device, bool force_pio, bool poll)
 {
     VERIFY(s_device_minor_number == 0);
     m_boot_argument = root_device;
-    enumerate_controllers(force_pio);
+    enumerate_controllers(force_pio, poll);
     enumerate_storage_devices();
     enumerate_disk_partitions();
     if (!boot_argument_contains_partition_uuid()) {

+ 2 - 2
Kernel/Storage/StorageManagement.h

@@ -23,7 +23,7 @@ class StorageManagement {
 public:
     StorageManagement();
     static bool initialized();
-    void initialize(StringView boot_argument, bool force_pio);
+    void initialize(StringView boot_argument, bool force_pio, bool nvme_poll);
     static StorageManagement& the();
 
     NonnullRefPtr<FileSystem> root_filesystem() const;
@@ -36,7 +36,7 @@ public:
 private:
     bool boot_argument_contains_partition_uuid();
 
-    void enumerate_controllers(bool force_pio);
+    void enumerate_controllers(bool force_pio, bool nvme_poll);
     void enumerate_storage_devices();
     void enumerate_disk_partitions();
 

+ 1 - 1
Kernel/init.cpp

@@ -330,7 +330,7 @@ void init_stage2(void*)
     (void)SB16::try_detect_and_create();
     AC97::detect();
 
-    StorageManagement::the().initialize(kernel_command_line().root_device(), kernel_command_line().is_force_pio());
+    StorageManagement::the().initialize(kernel_command_line().root_device(), kernel_command_line().is_force_pio(), kernel_command_line().is_nvme_polling_enabled());
     if (VirtualFileSystem::the().mount_root(StorageManagement::the().root_filesystem()).is_error()) {
         PANIC("VirtualFileSystem::mount_root failed");
     }