diff --git a/Kernel/Bus/PCI/Definitions.h b/Kernel/Bus/PCI/Definitions.h index e4af0afdb6c..1b0faf44113 100644 --- a/Kernel/Bus/PCI/Definitions.h +++ b/Kernel/Bus/PCI/Definitions.h @@ -75,6 +75,7 @@ namespace MassStorage { enum class SubclassID { IDEController = 0x1, SATAController = 0x6, + NVMeController = 0x8, }; enum class SATAProgIF { AHCI = 0x1, diff --git a/Kernel/CMakeLists.txt b/Kernel/CMakeLists.txt index f98f5dd1e74..64265bc558b 100644 --- a/Kernel/CMakeLists.txt +++ b/Kernel/CMakeLists.txt @@ -97,6 +97,9 @@ set(KERNEL_SOURCES Storage/Partition/GUIDPartitionTable.cpp Storage/Partition/MBRPartitionTable.cpp Storage/Partition/PartitionTable.cpp + Storage/NVMe/NVMeController.cpp + Storage/NVMe/NVMeNameSpace.cpp + Storage/NVMe/NVMeQueue.cpp Storage/StorageDevice.cpp Storage/RamdiskController.cpp Storage/RamdiskDevice.cpp diff --git a/Kernel/Debug.h.in b/Kernel/Debug.h.in index 252db7e4916..bbd09d2f870 100644 --- a/Kernel/Debug.h.in +++ b/Kernel/Debug.h.in @@ -210,6 +210,10 @@ #cmakedefine01 NETWORK_TASK_DEBUG #endif +#ifndef NVME_DEBUG +#cmakedefine01 NVME_DEBUG +#endif + #ifndef OFFD_DEBUG #cmakedefine01 OFFD_DEBUG #endif diff --git a/Kernel/Devices/BlockDevice.h b/Kernel/Devices/BlockDevice.h index 1fdcf211971..a3c3197eb9a 100644 --- a/Kernel/Devices/BlockDevice.h +++ b/Kernel/Devices/BlockDevice.h @@ -68,6 +68,8 @@ protected: : Device(major, minor) , m_block_size(block_size) { + // 512 is the minimum sector size in most block devices + VERIFY(m_block_size >= 512); } private: diff --git a/Kernel/Storage/NVMe/NVMeController.cpp b/Kernel/Storage/NVMe/NVMeController.cpp new file mode 100644 index 00000000000..51c78449375 --- /dev/null +++ b/Kernel/Storage/NVMe/NVMeController.cpp @@ -0,0 +1,327 @@ +/* + * Copyright (c) 2021, Pankaj R + * + * SPDX-License-Identifier: BSD-2-Clause + */ + +#include "NVMeController.h" +#include "AK/Format.h" +#include +#include +#include +#include +#include +#include +#include +#include + +namespace Kernel { +Atomic NVMeController::controller_id {}; + +ErrorOr> NVMeController::try_initialize(const Kernel::PCI::DeviceIdentifier& device_identifier) +{ + auto controller = TRY(adopt_nonnull_ref_or_enomem(new NVMeController(device_identifier))); + TRY(controller->initialize()); + NVMeController::controller_id++; + return controller; +} + +NVMeController::NVMeController(const PCI::DeviceIdentifier& device_identifier) + : PCI::Device(device_identifier.address()) + , m_pci_device_id(device_identifier) +{ +} + +ErrorOr NVMeController::initialize() +{ + // Nr of queues = one queue per core + auto nr_of_queues = Processor::count(); + auto irq = m_pci_device_id.interrupt_line().value(); + + PCI::enable_memory_space(m_pci_device_id.address()); + PCI::enable_bus_mastering(m_pci_device_id.address()); + m_bar = PCI::get_BAR0(m_pci_device_id.address()) & BAR_ADDR_MASK; + static_assert(sizeof(ControllerRegister) == REG_SQ0TDBL_START); + + // Map only until doorbell register for the controller + // Queues will individually map the doorbell register respectively + m_controller_regs = Memory::map_typed_writable(PhysicalAddress(m_bar)); + + calculate_doorbell_stride(); + TRY(create_admin_queue(irq)); + VERIFY(m_admin_queue_ready == true); + + VERIFY(IO_QUEUE_SIZE < MQES(m_controller_regs->cap)); + dbgln_if(NVME_DEBUG, "NVMe: IO queue depth is: {}", IO_QUEUE_SIZE); + + // Create an IO queue per core + for (u32 cpuid = 0; cpuid < nr_of_queues; ++cpuid) { + // qid is zero is used for admin queue + TRY(create_io_queue(irq, cpuid + 1)); + } + TRY(identify_and_init_namespaces()); + return {}; +} + +bool NVMeController::reset_controller() +{ + volatile u32 cc, csts; + csts = m_controller_regs->csts; + if ((csts & (1 << CSTS_RDY_BIT)) != 0x1) + return false; + + cc = m_controller_regs->cc; + cc = cc & ~(1 << CC_EN_BIT); + + m_controller_regs->cc = cc; + + IO::delay(10); + full_memory_barrier(); + + csts = m_controller_regs->csts; + if ((csts & (1 << CSTS_RDY_BIT)) != 0x0) + return false; + + return true; +} + +bool NVMeController::start_controller() +{ + volatile u32 cc, csts; + csts = m_controller_regs->csts; + if ((csts & (1 << CSTS_RDY_BIT)) != 0x0) + return false; + + cc = m_controller_regs->cc; + + cc = cc | (1 << CC_EN_BIT); + cc = cc | (CQ_WIDTH << CC_IOCQES_BIT); + cc = cc | (SQ_WIDTH << CC_IOSQES_BIT); + + m_controller_regs->cc = cc; + + IO::delay(10); + full_memory_barrier(); + csts = m_controller_regs->csts; + if ((csts & (1 << CSTS_RDY_BIT)) != 0x1) + return false; + + return true; +} + +u32 NVMeController::get_admin_q_dept() +{ + u32 aqa = m_controller_regs->aqa; + // Queue depth is 0 based + u32 q_depth = min(ACQ_SIZE(aqa), ASQ_SIZE(aqa)) + 1; + dbgln_if(NVME_DEBUG, "NVMe: Admin queue depth is {}", q_depth); + return q_depth; +} + +ErrorOr NVMeController::identify_and_init_namespaces() +{ + + RefPtr prp_dma_buffer; + OwnPtr prp_dma_region; + auto namespace_data_struct = ByteBuffer::create_zeroed(NVMe_IDENTIFY_SIZE).release_value(); + u32 active_namespace_list[NVMe_IDENTIFY_SIZE / sizeof(u32)]; + + { + auto buffer = TRY(MM.allocate_dma_buffer_page("Identify PRP", Memory::Region::Access::ReadWrite, prp_dma_buffer)); + prp_dma_region = move(buffer); + } + + // Get the active namespace + { + NVMeSubmission sub {}; + u16 status = 0; + sub.op = OP_ADMIN_IDENTIFY; + sub.data_ptr.prp1 = reinterpret_cast(AK::convert_between_host_and_little_endian(prp_dma_buffer->paddr().as_ptr())); + sub.cdw10 = NVMe_CNS_ID_ACTIVE_NS & 0xff; + status = submit_admin_command(sub, true); + if (status) { + dmesgln("Failed to identify active namespace command"); + return EFAULT; + } + if (void* fault_at; !safe_memcpy(active_namespace_list, prp_dma_region->vaddr().as_ptr(), NVMe_IDENTIFY_SIZE, fault_at)) { + return EFAULT; + } + } + // Get the NAMESPACE attributes + { + NVMeSubmission sub {}; + IdentifyNamespace id_ns {}; + u16 status = 0; + for (auto nsid : active_namespace_list) { + memset(prp_dma_region->vaddr().as_ptr(), 0, NVMe_IDENTIFY_SIZE); + // Invalid NS + if (nsid == 0) + break; + sub.op = OP_ADMIN_IDENTIFY; + sub.data_ptr.prp1 = reinterpret_cast(AK::convert_between_host_and_little_endian(prp_dma_buffer->paddr().as_ptr())); + sub.cdw10 = NVMe_CNS_ID_NS & 0xff; + sub.nsid = nsid; + status = submit_admin_command(sub, true); + if (status) { + dmesgln("Failed identify namespace with nsid {}", nsid); + return EFAULT; + } + static_assert(sizeof(IdentifyNamespace) == NVMe_IDENTIFY_SIZE); + if (void* fault_at; !safe_memcpy(&id_ns, prp_dma_region->vaddr().as_ptr(), NVMe_IDENTIFY_SIZE, fault_at)) { + return EFAULT; + } + auto val = get_ns_features(id_ns); + auto block_counts = val.get<0>(); + auto block_size = 1 << val.get<1>(); + + dbgln_if(NVME_DEBUG, "NVMe: Block count is {} and Block size is {}", block_counts, block_size); + + m_namespaces.append(TRY(NVMeNameSpace::try_create(m_queues, controller_id.load(), nsid, block_counts, block_size))); + m_device_count++; + dbgln_if(NVME_DEBUG, "NVMe: Initialized namespace with NSID: {}", nsid); + } + } + return {}; +} + +Tuple NVMeController::get_ns_features(IdentifyNamespace& identify_data_struct) +{ + auto flbas = identify_data_struct.flbas & FLBA_SIZE_MASK; + auto namespace_size = identify_data_struct.nsze; + auto lba_format = identify_data_struct.lbaf[flbas]; + + auto lba_size = (lba_format & LBA_SIZE_MASK) >> 16; + return Tuple(namespace_size, lba_size); +} + +RefPtr NVMeController::device(u32 index) const +{ + return m_namespaces.at(index); +} + +size_t NVMeController::devices_count() const +{ + return m_device_count; +} + +bool NVMeController::reset() +{ + if (!reset_controller()) + return false; + if (!start_controller()) + return false; + return true; +} + +bool NVMeController::shutdown() +{ + TODO(); + return false; +} + +void NVMeController::complete_current_request([[maybe_unused]] AsyncDeviceRequest::RequestResult result) +{ + VERIFY_NOT_REACHED(); +} + +ErrorOr NVMeController::create_admin_queue(u8 irq) +{ + auto qdepth = get_admin_q_dept(); + OwnPtr cq_dma_region; + NonnullRefPtrVector cq_dma_pages; + OwnPtr sq_dma_region; + NonnullRefPtrVector sq_dma_pages; + auto cq_size = round_up_to_power_of_two(CQ_SIZE(qdepth), 4096); + auto sq_size = round_up_to_power_of_two(SQ_SIZE(qdepth), 4096); + if (!reset_controller()) { + dmesgln("Failed to reset the NVMe controller"); + return EFAULT; + } + { + auto buffer = TRY(MM.allocate_dma_buffer_pages(cq_size, "Admin CQ queue", Memory::Region::Access::ReadWrite, cq_dma_pages)); + cq_dma_region = move(buffer); + } + + // Phase bit is important to determine completion, so zero out the space + // so that we don't get any garbage phase bit value + memset(cq_dma_region->vaddr().as_ptr(), 0, cq_size); + + { + auto buffer = TRY(MM.allocate_dma_buffer_pages(sq_size, "Admin SQ queue", Memory::Region::Access::ReadWrite, sq_dma_pages)); + sq_dma_region = move(buffer); + } + auto doorbell_regs = Memory::map_typed_writable(PhysicalAddress(m_bar + REG_SQ0TDBL_START)); + + m_admin_queue = TRY(NVMeQueue::try_create(0, irq, qdepth, move(cq_dma_region), cq_dma_pages, move(sq_dma_region), sq_dma_pages, move(doorbell_regs))); + + m_controller_regs->acq = reinterpret_cast(AK::convert_between_host_and_little_endian(cq_dma_pages.first().paddr().as_ptr())); + m_controller_regs->asq = reinterpret_cast(AK::convert_between_host_and_little_endian(sq_dma_pages.first().paddr().as_ptr())); + + if (!start_controller()) { + dmesgln("Failed to restart the NVMe controller"); + return EFAULT; + } + set_admin_queue_ready_flag(); + m_admin_queue->enable_interrupts(); + dbgln_if(NVME_DEBUG, "NVMe: Admin queue created"); + return {}; +} + +ErrorOr NVMeController::create_io_queue(u8 irq, u8 qid) +{ + NVMeSubmission sub {}; + OwnPtr cq_dma_region; + NonnullRefPtrVector cq_dma_pages; + OwnPtr sq_dma_region; + NonnullRefPtrVector sq_dma_pages; + auto cq_size = round_up_to_power_of_two(CQ_SIZE(IO_QUEUE_SIZE), 4096); + auto sq_size = round_up_to_power_of_two(SQ_SIZE(IO_QUEUE_SIZE), 4096); + + static_assert(sizeof(NVMeSubmission) == (1 << SQ_WIDTH)); + + { + auto buffer = TRY(MM.allocate_dma_buffer_pages(cq_size, "IO CQ queue", Memory::Region::Access::ReadWrite, cq_dma_pages)); + cq_dma_region = move(buffer); + } + + // Phase bit is important to determine completion, so zero out the space + // so that we don't get any garbage phase bit value + memset(cq_dma_region->vaddr().as_ptr(), 0, cq_size); + + { + auto buffer = TRY(MM.allocate_dma_buffer_pages(sq_size, "IO SQ queue", Memory::Region::Access::ReadWrite, sq_dma_pages)); + sq_dma_region = move(buffer); + } + + { + sub.op = OP_ADMIN_CREATE_COMPLETION_QUEUE; + sub.data_ptr.prp1 = reinterpret_cast(AK::convert_between_host_and_little_endian(cq_dma_pages.first().paddr().as_ptr())); + // The queue size is 0 based + sub.cdw10 = AK::convert_between_host_and_little_endian(((IO_QUEUE_SIZE - 1) << 16 | qid)); + auto flags = QUEUE_IRQ_ENABLED | QUEUE_PHY_CONTIGUOUS; + // TODO: Eventually move to MSI. + // For now using pin based interrupts. Clear the first 16 bits + // to use pin-based interrupts. + sub.cdw11 = AK::convert_between_host_and_little_endian(flags & 0xFFFF); + submit_admin_command(sub, true); + } + { + sub.op = OP_ADMIN_CREATE_SUBMISSION_QUEUE; + sub.data_ptr.prp1 = reinterpret_cast(AK::convert_between_host_and_little_endian(sq_dma_pages.first().paddr().as_ptr())); + // The queue size is 0 based + sub.cdw10 = AK::convert_between_host_and_little_endian(((IO_QUEUE_SIZE - 1) << 16 | qid)); + auto flags = QUEUE_IRQ_ENABLED | QUEUE_PHY_CONTIGUOUS; + // The qid used below points to the completion queue qid + sub.cdw11 = AK::convert_between_host_and_little_endian(qid << 16 | flags); + submit_admin_command(sub, true); + } + + auto queue_doorbell_offset = REG_SQ0TDBL_START + ((2 * qid) * (4 << m_dbl_stride)); + auto doorbell_regs = Memory::map_typed_writable(PhysicalAddress(m_bar + queue_doorbell_offset)); + + m_queues.append(TRY(NVMeQueue::try_create(qid, irq, IO_QUEUE_SIZE, move(cq_dma_region), cq_dma_pages, move(sq_dma_region), sq_dma_pages, move(doorbell_regs)))); + m_queues.last().enable_interrupts(); + dbgln_if(NVME_DEBUG, "NVMe: Created IO Queue with QID{}", m_queues.size()); + return {}; +} +} diff --git a/Kernel/Storage/NVMe/NVMeController.h b/Kernel/Storage/NVMe/NVMeController.h new file mode 100644 index 00000000000..af41444b1ec --- /dev/null +++ b/Kernel/Storage/NVMe/NVMeController.h @@ -0,0 +1,79 @@ +/* + * Copyright (c) 2021, Pankaj R + * + * SPDX-License-Identifier: BSD-2-Clause + */ + +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace Kernel { + +class NVMeController : public PCI::Device + , public StorageController { +public: + static ErrorOr> try_initialize(PCI::DeviceIdentifier const&); + ErrorOr initialize(); + explicit NVMeController(PCI::DeviceIdentifier const&); + RefPtr device(u32 index) const override; + size_t devices_count() const override; + +protected: + bool reset() override; + bool shutdown() override; + void complete_current_request(AsyncDeviceRequest::RequestResult result) override; + +public: + bool reset_controller(); + bool start_controller(); + u32 get_admin_q_dept(); + + u16 submit_admin_command(struct NVMeSubmission& sub, bool sync = false) + { + // First queue is always the admin queue + if (sync) { + return m_admin_queue->submit_sync_sqe(sub); + } + m_admin_queue->submit_sqe(sub); + return 0; + } + + bool is_admin_queue_ready() { return m_admin_queue_ready; }; + void set_admin_queue_ready_flag() { m_admin_queue_ready = true; }; + +private: + ErrorOr identify_and_init_namespaces(); + Tuple get_ns_features(IdentifyNamespace& identify_data_struct); + ErrorOr create_admin_queue(u8 irq); + ErrorOr create_io_queue(u8 irq, u8 qid); + void calculate_doorbell_stride() + { + m_dbl_stride = (m_controller_regs->cap >> CAP_DBL_SHIFT) & CAP_DBL_MASK; + } + +private: + PCI::DeviceIdentifier m_pci_device_id; + RefPtr m_admin_queue; + NonnullRefPtrVector m_queues; + NonnullRefPtrVector m_namespaces; + Memory::TypedMapping m_controller_regs; + bool m_admin_queue_ready { false }; + size_t m_device_count {}; + u32 m_bar; + u8 m_dbl_stride; + static Atomic controller_id; +}; +} diff --git a/Kernel/Storage/NVMe/NVMeDefinitions.h b/Kernel/Storage/NVMe/NVMeDefinitions.h new file mode 100644 index 00000000000..5a5aece11c2 --- /dev/null +++ b/Kernel/Storage/NVMe/NVMeDefinitions.h @@ -0,0 +1,148 @@ +/* + * Copyright (c) 2021, Pankaj R + * + * SPDX-License-Identifier: BSD-2-Clause + */ + +#pragma once + +#include +#include + +struct NVMeCompletion; +struct NVMeSubmission; + +struct ControllerRegister { + u64 cap; + u32 vs; + u32 intms; + u32 intmc; + u32 cc; + u32 rsvd1; + u32 csts; + u32 nssr; + u32 aqa; + u64 asq; + u64 acq; + u64 rsvd2[505]; +}; + +struct IdentifyNamespace { + u64 nsze; + u64 ncap; + u8 rsdv1[10]; + u8 flbas; + u8 rsvd2[100]; + u32 lbaf[16]; + u64 rsvd3[488]; +}; + +// BAR +static constexpr u32 BAR_ADDR_MASK = 0xFFFFFFF0; +// DOORBELL +static constexpr u32 REG_SQ0TDBL_START = 0x1000; +static constexpr u32 REG_SQ0TDBL_END = 0x1003; +static constexpr u8 DBL_REG_SIZE = 8; +// CAP +static constexpr u8 CAP_DBL_SHIFT = 32; +static constexpr u8 CAP_DBL_MASK = 0xf; +static constexpr u16 MQES(u64 cap) +{ + return (cap & 0xffff) + 1; +} + +// CC – Controller Configuration +static constexpr u8 CC_EN_BIT = 0x0; +static constexpr u8 CSTS_RDY_BIT = 0x0; +static constexpr u8 CC_IOSQES_BIT = 16; +static constexpr u8 CC_IOCQES_BIT = 20; + +static constexpr u16 CC_AQA_MASK = (0xfff); +static constexpr u16 ACQ_SIZE(u32 x) +{ + return (x >> 16) & CC_AQA_MASK; +} +static constexpr u16 ASQ_SIZE(u32 x) +{ + return x & CC_AQA_MASK; +} +static constexpr u8 CQ_WIDTH = 4; // CQ is 16 bytes(2^4) in size. +static constexpr u8 SQ_WIDTH = 6; // SQ size is 64 bytes(2^6) in size. +static constexpr u16 CQ_SIZE(u16 q_depth) +{ + return q_depth << CQ_WIDTH; +} +static constexpr u16 SQ_SIZE(u16 q_depth) +{ + return q_depth << SQ_WIDTH; +} +static constexpr u8 PHASE_TAG(u16 x) +{ + return x & 0x1; +} +static constexpr u16 CQ_STATUS_FIELD_MASK = 0xfffe; +static constexpr u16 CQ_STATUS_FIELD(u16 x) +{ + return (x & CQ_STATUS_FIELD_MASK) >> 1; +} + +static constexpr u16 IO_QUEUE_SIZE = 64; // TODO:Need to be configurable + +// IDENTIFY +static constexpr u16 NVMe_IDENTIFY_SIZE = 4096; +static constexpr u8 NVMe_CNS_ID_ACTIVE_NS = 0x2; +static constexpr u8 NVMe_CNS_ID_NS = 0x0; +static constexpr u8 FLBA_SIZE_INDEX = 26; +static constexpr u8 FLBA_SIZE_MASK = 0xf; +static constexpr u8 LBA_FORMAT_SUPPORT_INDEX = 128; +static constexpr u32 LBA_SIZE_MASK = 0x00ff0000; + +// OPCODES +// ADMIN COMMAND SET +enum AdminCommandOpCode { + OP_ADMIN_CREATE_COMPLETION_QUEUE = 0x5, + OP_ADMIN_CREATE_SUBMISSION_QUEUE = 0x1, + OP_ADMIN_IDENTIFY = 0x6, +}; + +// IO opcodes +enum IOCommandOpcode { + OP_NVME_WRITE = 0x1, + OP_NVME_READ = 0x2 +}; + +// FLAGS +static constexpr u8 QUEUE_PHY_CONTIGUOUS = (1 << 0); +static constexpr u8 QUEUE_IRQ_ENABLED = (1 << 1); + +struct NVMeCompletion { + LittleEndian cmd_spec; + LittleEndian res; + + LittleEndian sq_head; /* how much of this queue may be reclaimed */ + LittleEndian sq_id; /* submission queue that generated this entry */ + + u16 command_id; /* of the command which completed */ + LittleEndian status; /* did the command fail, and if so, why? */ +}; + +struct DataPtr { + LittleEndian prp1; + LittleEndian prp2; +}; + +struct NVMeSubmission { + LittleEndian op; + LittleEndian flags; + LittleEndian cmdid; + LittleEndian nsid; + LittleEndian rsvd; + LittleEndian meta_ptr; + struct DataPtr data_ptr; + LittleEndian cdw10; + LittleEndian cdw11; + LittleEndian cdw12; + LittleEndian cdw13; + LittleEndian cdw14; + LittleEndian cdw15; +}; diff --git a/Kernel/Storage/NVMe/NVMeNameSpace.cpp b/Kernel/Storage/NVMe/NVMeNameSpace.cpp new file mode 100644 index 00000000000..7df86434ae4 --- /dev/null +++ b/Kernel/Storage/NVMe/NVMeNameSpace.cpp @@ -0,0 +1,47 @@ +/* + * Copyright (c) 2021, Pankaj R + * + * SPDX-License-Identifier: BSD-2-Clause + */ + +#include "NVMeNameSpace.h" +#include +#include +#include +#include + +namespace Kernel { + +ErrorOr> NVMeNameSpace::try_create(NonnullRefPtrVector queues, u8 controller_id, u16 nsid, size_t storage_size, size_t lba_size) +{ + auto minor_number = StorageManagement::generate_storage_minor_number(); + auto major_number = StorageManagement::storage_type_major_number(); + auto device_name = String::formatted("nvme{:d}n{:d}", controller_id, nsid); + auto device_name_kstring = KString::must_create(device_name.view()); + auto device = TRY(DeviceManagement::try_create_device(queues, storage_size, lba_size, major_number.value(), minor_number.value(), nsid, move(device_name_kstring))); + + return device; +} + +NVMeNameSpace::NVMeNameSpace(NonnullRefPtrVector queues, size_t max_addresable_block, size_t lba_size, size_t major_number, size_t minor_number, u16 nsid, NonnullOwnPtr dev_name) + : StorageDevice(major_number, minor_number, lba_size, max_addresable_block, move(dev_name)) + , m_nsid(nsid) + , m_queues(queues) +{ +} + +void NVMeNameSpace::start_request(AsyncBlockDeviceRequest& request) +{ + auto index = Processor::current_id(); + auto& queue = m_queues.at(index); + // TODO: For now we support only IO transfers of size PAGE_SIZE (Going along with the current constraint in the block layer) + // Eventually remove this constraint by using the PRP2 field in the submission struct and remove block layer constraint for NVMe driver. + VERIFY(request.block_count() <= (PAGE_SIZE / block_size())); + + if (request.request_type() == AsyncBlockDeviceRequest::Read) { + queue.read(request, m_nsid, request.block_index(), request.block_count()); + } else { + queue.write(request, m_nsid, request.block_index(), request.block_count()); + } +} +} diff --git a/Kernel/Storage/NVMe/NVMeNameSpace.h b/Kernel/Storage/NVMe/NVMeNameSpace.h new file mode 100644 index 00000000000..b36cf2aa61f --- /dev/null +++ b/Kernel/Storage/NVMe/NVMeNameSpace.h @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2021, Pankaj R + * + * SPDX-License-Identifier: BSD-2-Clause + */ + +#pragma once + +#include "AK/kmalloc.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace Kernel { +class NVMeNameSpace : public StorageDevice { + +public: + static ErrorOr> try_create(NonnullRefPtrVector queues, u8 controller_id, u16 nsid, size_t storage_size, size_t lba_size); + explicit NVMeNameSpace(NonnullRefPtrVector queues, size_t storage_size, size_t lba_size, size_t major_number, size_t minor_number, u16 nsid, NonnullOwnPtr early_device_name); + + CommandSet command_set() const override { return CommandSet::NVMe; }; + void start_request(AsyncBlockDeviceRequest& request) override; + +private: + u16 m_nsid; + NonnullRefPtrVector m_queues; +}; + +} diff --git a/Kernel/Storage/NVMe/NVMeQueue.cpp b/Kernel/Storage/NVMe/NVMeQueue.cpp new file mode 100644 index 00000000000..0c7dd1e99b3 --- /dev/null +++ b/Kernel/Storage/NVMe/NVMeQueue.cpp @@ -0,0 +1,205 @@ +/* + * Copyright (c) 2021, Pankaj R + * + * SPDX-License-Identifier: BSD-2-Clause + */ + +#include "NVMeQueue.h" +#include "Kernel/StdLib.h" +#include +#include +#include +#include + +namespace Kernel { + +ErrorOr> NVMeQueue::try_create(u16 qid, u8 irq, u32 q_depth, OwnPtr cq_dma_region, NonnullRefPtrVector cq_dma_page, OwnPtr sq_dma_region, NonnullRefPtrVector sq_dma_page, Memory::TypedMapping db_regs) +{ + auto queue = TRY(adopt_nonnull_ref_or_enomem(new (nothrow) NVMeQueue(qid, irq, q_depth, move(cq_dma_region), cq_dma_page, move(sq_dma_region), sq_dma_page, move(db_regs)))); + TRY(queue->create()); + return queue; +} + +NVMeQueue::NVMeQueue(u16 qid, u8 irq, u32 q_depth, OwnPtr cq_dma_region, NonnullRefPtrVector cq_dma_page, OwnPtr sq_dma_region, NonnullRefPtrVector sq_dma_page, Memory::TypedMapping db_regs) + : IRQHandler(irq) + , m_qid(qid) + , m_admin_queue(qid == 0) + , m_irq(irq) + , m_qdepth(q_depth) + , m_cq_dma_region(move(cq_dma_region)) + , m_cq_dma_page(cq_dma_page) + , m_sq_dma_region(move(sq_dma_region)) + , m_sq_dma_page(sq_dma_page) + , m_db_regs(move(db_regs)) + , m_current_request(nullptr) + +{ + m_sqe_array = { reinterpret_cast(m_sq_dma_region->vaddr().as_ptr()), m_qdepth }; + m_cqe_array = { reinterpret_cast(m_cq_dma_region->vaddr().as_ptr()), m_qdepth }; +} + +ErrorOr NVMeQueue::create() +{ + // DMA region for RW operation. For now the requests don't exceed more than 4096 bytes(Storage device takes of it) + auto buffer = TRY(MM.allocate_dma_buffer_page("Admin CQ queue", Memory::Region::Access::ReadWrite, m_rw_dma_page)); + m_rw_dma_region = move(buffer); + return {}; +} + +bool NVMeQueue::cqe_available() +{ + return PHASE_TAG(m_cqe_array[m_cq_head].status) == m_cq_valid_phase; +} + +void NVMeQueue::update_cqe_head() +{ + // To prevent overflow, use a temp variable + u32 temp_cq_head = m_cq_head + 1; + if (temp_cq_head == m_qdepth) { + m_cq_head = 0; + m_cq_valid_phase ^= 1; + } else { + m_cq_head = temp_cq_head; + } +} + +bool NVMeQueue::handle_irq(const RegisterState&) +{ + u32 nr_of_processed_cqes = 0; + while (cqe_available()) { + u16 status; + u16 cmdid; + ++nr_of_processed_cqes; + status = CQ_STATUS_FIELD(m_cqe_array[m_cq_head].status); + cmdid = m_cqe_array[m_cq_head].command_id; + dbgln_if(NVME_DEBUG, "NVMe: Completion with status {:x} and command identifier {}. CQ_HEAD: {}", status, cmdid, m_cq_head); + // TODO: We don't use AsyncBlockDevice requests for admin queue as it is only applicable for a block device (NVMe namespace) + // But admin commands precedes namespace creation. Unify requests to avoid special conditions + if (m_admin_queue == false) { + // As the block layer calls are now sync (as we wait on each requests), + // everything is operated on a single request similar to BMIDE driver. + // TODO: Remove this constraint eventually. + VERIFY(cmdid == m_prev_sq_tail); + SpinlockLocker lock(m_request_lock); + if (m_current_request) { + complete_current_request(status); + } + } + update_cqe_head(); + } + if (nr_of_processed_cqes) { + update_cq_doorbell(); + } + return nr_of_processed_cqes ? true : false; +} + +void NVMeQueue::submit_sqe(struct NVMeSubmission& sub) +{ + SpinlockLocker lock(m_sq_lock); + // For now let's use sq tail as a unique command id. + sub.cmdid = m_sq_tail; + m_prev_sq_tail = m_sq_tail; + + memcpy(&m_sqe_array[m_sq_tail], &sub, sizeof(NVMeSubmission)); + { + u32 temp_sq_tail = m_sq_tail + 1; + if (temp_sq_tail == m_qdepth) + m_sq_tail = 0; + else + m_sq_tail = temp_sq_tail; + } + + dbgln_if(NVME_DEBUG, "NVMe: Submission with command identifier {}. SQ_TAIL: {}", sub.cmdid, m_sq_tail); + full_memory_barrier(); + update_sq_doorbell(); +} + +u16 NVMeQueue::submit_sync_sqe(NVMeSubmission& sub) +{ + // For now let's use sq tail as a unique command id. + u16 cqe_cid; + u16 cid = m_sq_tail; + + submit_sqe(sub); + do { + int index; + { + SpinlockLocker lock(m_cq_lock); + index = m_cq_head - 1; + if (index < 0) + index = IO_QUEUE_SIZE - 1; + } + cqe_cid = m_cqe_array[index].command_id; + Scheduler::yield(); + } while (cid != cqe_cid); + + auto status = CQ_STATUS_FIELD(m_cqe_array[m_cq_head].status); + return status; +} + +void NVMeQueue::read(AsyncBlockDeviceRequest& request, u16 nsid, u64 index, u32 count) +{ + NVMeSubmission sub {}; + SpinlockLocker m_lock(m_request_lock); + m_current_request = request; + + sub.op = OP_NVME_READ; + sub.nsid = nsid; + sub.cdw10 = AK::convert_between_host_and_little_endian(index & 0xFFFFFFFF); + sub.cdw11 = AK::convert_between_host_and_little_endian(index >> 32); + // No. of lbas is 0 based + sub.cdw12 = AK::convert_between_host_and_little_endian((count - 1) & 0xFFFF); + sub.data_ptr.prp1 = reinterpret_cast(AK::convert_between_host_and_little_endian(m_rw_dma_page->paddr().as_ptr())); + + full_memory_barrier(); + submit_sqe(sub); +} + +void NVMeQueue::write(AsyncBlockDeviceRequest& request, u16 nsid, u64 index, u32 count) +{ + NVMeSubmission sub {}; + SpinlockLocker m_lock(m_request_lock); + m_current_request = request; + + if (auto result = m_current_request->read_from_buffer(m_current_request->buffer(), m_rw_dma_region->vaddr().as_ptr(), 512 * m_current_request->block_count()); result.is_error()) { + complete_current_request(AsyncDeviceRequest::MemoryFault); + return; + } + sub.op = OP_NVME_WRITE; + sub.nsid = nsid; + sub.cdw10 = AK::convert_between_host_and_little_endian(index & 0xFFFFFFFF); + sub.cdw11 = AK::convert_between_host_and_little_endian(index >> 32); + // No. of lbas is 0 based + sub.cdw12 = AK::convert_between_host_and_little_endian((count - 1) & 0xFFFF); + sub.data_ptr.prp1 = reinterpret_cast(AK::convert_between_host_and_little_endian(m_rw_dma_page->paddr().as_ptr())); + + full_memory_barrier(); + submit_sqe(sub); +} + +void NVMeQueue::complete_current_request(u16 status) +{ + VERIFY(m_request_lock.is_locked()); + + g_io_work->queue([this, status]() { + SpinlockLocker lock(m_request_lock); + auto current_request = m_current_request; + m_current_request.clear(); + if (status) { + lock.unlock(); + current_request->complete(AsyncBlockDeviceRequest::Failure); + return; + } + if (current_request->request_type() == AsyncBlockDeviceRequest::RequestType::Read) { + if (auto result = current_request->write_to_buffer(current_request->buffer(), m_rw_dma_region->vaddr().as_ptr(), 512 * current_request->block_count()); result.is_error()) { + lock.unlock(); + current_request->complete(AsyncDeviceRequest::MemoryFault); + return; + } + } + lock.unlock(); + current_request->complete(AsyncDeviceRequest::Success); + return; + }); +} +} diff --git a/Kernel/Storage/NVMe/NVMeQueue.h b/Kernel/Storage/NVMe/NVMeQueue.h new file mode 100644 index 00000000000..40ba9ffd836 --- /dev/null +++ b/Kernel/Storage/NVMe/NVMeQueue.h @@ -0,0 +1,81 @@ +/* + * Copyright (c) 2021, Pankaj R + * + * SPDX-License-Identifier: BSD-2-Clause + */ + +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace Kernel { + +struct DoorbellRegister { + u32 sq_tail; + u32 cq_head; +}; + +class AsyncBlockDeviceRequest; +class NVMeQueue : public IRQHandler + , public RefCounted { +public: + static ErrorOr> try_create(u16 qid, u8 irq, u32 q_depth, OwnPtr cq_dma_region, NonnullRefPtrVector cq_dma_page, OwnPtr sq_dma_region, NonnullRefPtrVector sq_dma_page, Memory::TypedMapping db_regs); + ErrorOr create(); + explicit NVMeQueue(u16 qid, u8 irq, u32 q_depth, OwnPtr cq_dma_region, NonnullRefPtrVector cq_dma_page, OwnPtr sq_dma_region, NonnullRefPtrVector sq_dma_page, Memory::TypedMapping db_regs); + bool is_admin_queue() { return m_admin_queue; }; + bool handle_irq(const RegisterState&) override; + void submit_sqe(struct NVMeSubmission&); + u16 submit_sync_sqe(struct NVMeSubmission&); + void read(AsyncBlockDeviceRequest& request, u16 nsid, u64 index, u32 count); + void write(AsyncBlockDeviceRequest& request, u16 nsid, u64 index, u32 count); + void enable_interrupts() { enable_irq(); }; + void disable_interrupts() { disable_irq(); }; + +private: + bool cqe_available(); + void update_cqe_head(); + void complete_current_request(u16 status); + void update_cq_doorbell() + { + m_db_regs->cq_head = m_cq_head; + } + + void update_sq_doorbell() + { + m_db_regs->sq_tail = m_sq_tail; + } + +private: + u16 m_qid {}; + u8 m_cq_valid_phase { 1 }; + u16 m_sq_tail {}; + u16 m_prev_sq_tail {}; + u16 m_cq_head {}; + bool m_admin_queue { false }; + u8 m_irq {}; + u32 m_qdepth {}; + Spinlock m_cq_lock { LockRank::Interrupts }; + Spinlock m_sq_lock { LockRank::Interrupts }; + OwnPtr m_cq_dma_region; + NonnullRefPtrVector m_cq_dma_page; + Span m_sqe_array; + OwnPtr m_sq_dma_region; + NonnullRefPtrVector m_sq_dma_page; + Span m_cqe_array; + OwnPtr m_rw_dma_region; + Memory::TypedMapping m_db_regs; + RefPtr m_rw_dma_page; + Spinlock m_request_lock; + RefPtr m_current_request; +}; +} diff --git a/Kernel/Storage/StorageManagement.cpp b/Kernel/Storage/StorageManagement.cpp index c64571846ee..c9215ff8397 100644 --- a/Kernel/Storage/StorageManagement.cpp +++ b/Kernel/Storage/StorageManagement.cpp @@ -15,6 +15,7 @@ #include #include #include +#include #include #include #include @@ -61,6 +62,17 @@ UNMAP_AFTER_INIT void StorageManagement::enumerate_controllers(bool force_pio) m_controllers.append(AHCIController::initialize(device_identifier)); } }); + PCI::enumerate([&](PCI::DeviceIdentifier const& device_identifier) { + if (device_identifier.class_code().value() == to_underlying(PCI::ClassID::MassStorage) + && device_identifier.subclass_code().value() == to_underlying(PCI::MassStorage::SubclassID::NVMeController)) { + auto controller = NVMeController::try_initialize(device_identifier); + if (controller.is_error()) { + dmesgln("Unable to initialize NVMe controller"); + } else { + m_controllers.append(controller.release_value()); + } + } + }); } m_controllers.append(RamdiskController::initialize()); } diff --git a/Meta/CMake/all_the_debug_macros.cmake b/Meta/CMake/all_the_debug_macros.cmake index f26686ffecf..bd2e5f11829 100644 --- a/Meta/CMake/all_the_debug_macros.cmake +++ b/Meta/CMake/all_the_debug_macros.cmake @@ -123,6 +123,7 @@ set(MULTIPROCESSOR_DEBUG ON) set(NE2000_DEBUG ON) set(NETWORK_TASK_DEBUG ON) set(NT_DEBUG ON) +set(NVME_DEBUG ON) set(OCCLUSIONS_DEBUG ON) set(OFFD_DEBUG ON) set(PAGE_FAULT_DEBUG ON)