NVMeController.cpp 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327
  1. /*
  2. * Copyright (c) 2021, Pankaj R <pankydev8@gmail.com>
  3. *
  4. * SPDX-License-Identifier: BSD-2-Clause
  5. */
  6. #include "NVMeController.h"
  7. #include "AK/Format.h"
  8. #include <AK/RefPtr.h>
  9. #include <AK/Types.h>
  10. #include <Kernel/Arch/x86/IO.h>
  11. #include <Kernel/Arch/x86/Processor.h>
  12. #include <Kernel/Bus/PCI/API.h>
  13. #include <Kernel/Devices/Device.h>
  14. #include <Kernel/FileSystem/ProcFS.h>
  15. #include <Kernel/Sections.h>
  16. namespace Kernel {
  17. Atomic<u8> NVMeController::controller_id {};
  18. ErrorOr<NonnullRefPtr<NVMeController>> NVMeController::try_initialize(const Kernel::PCI::DeviceIdentifier& device_identifier)
  19. {
  20. auto controller = TRY(adopt_nonnull_ref_or_enomem(new NVMeController(device_identifier)));
  21. TRY(controller->initialize());
  22. NVMeController::controller_id++;
  23. return controller;
  24. }
  25. NVMeController::NVMeController(const PCI::DeviceIdentifier& device_identifier)
  26. : PCI::Device(device_identifier.address())
  27. , m_pci_device_id(device_identifier)
  28. {
  29. }
  30. ErrorOr<void> NVMeController::initialize()
  31. {
  32. // Nr of queues = one queue per core
  33. auto nr_of_queues = Processor::count();
  34. auto irq = m_pci_device_id.interrupt_line().value();
  35. PCI::enable_memory_space(m_pci_device_id.address());
  36. PCI::enable_bus_mastering(m_pci_device_id.address());
  37. m_bar = PCI::get_BAR0(m_pci_device_id.address()) & BAR_ADDR_MASK;
  38. static_assert(sizeof(ControllerRegister) == REG_SQ0TDBL_START);
  39. // Map only until doorbell register for the controller
  40. // Queues will individually map the doorbell register respectively
  41. m_controller_regs = Memory::map_typed_writable<volatile ControllerRegister>(PhysicalAddress(m_bar));
  42. calculate_doorbell_stride();
  43. TRY(create_admin_queue(irq));
  44. VERIFY(m_admin_queue_ready == true);
  45. VERIFY(IO_QUEUE_SIZE < MQES(m_controller_regs->cap));
  46. dbgln_if(NVME_DEBUG, "NVMe: IO queue depth is: {}", IO_QUEUE_SIZE);
  47. // Create an IO queue per core
  48. for (u32 cpuid = 0; cpuid < nr_of_queues; ++cpuid) {
  49. // qid is zero is used for admin queue
  50. TRY(create_io_queue(irq, cpuid + 1));
  51. }
  52. TRY(identify_and_init_namespaces());
  53. return {};
  54. }
  55. bool NVMeController::reset_controller()
  56. {
  57. volatile u32 cc, csts;
  58. csts = m_controller_regs->csts;
  59. if ((csts & (1 << CSTS_RDY_BIT)) != 0x1)
  60. return false;
  61. cc = m_controller_regs->cc;
  62. cc = cc & ~(1 << CC_EN_BIT);
  63. m_controller_regs->cc = cc;
  64. IO::delay(10);
  65. full_memory_barrier();
  66. csts = m_controller_regs->csts;
  67. if ((csts & (1 << CSTS_RDY_BIT)) != 0x0)
  68. return false;
  69. return true;
  70. }
  71. bool NVMeController::start_controller()
  72. {
  73. volatile u32 cc, csts;
  74. csts = m_controller_regs->csts;
  75. if ((csts & (1 << CSTS_RDY_BIT)) != 0x0)
  76. return false;
  77. cc = m_controller_regs->cc;
  78. cc = cc | (1 << CC_EN_BIT);
  79. cc = cc | (CQ_WIDTH << CC_IOCQES_BIT);
  80. cc = cc | (SQ_WIDTH << CC_IOSQES_BIT);
  81. m_controller_regs->cc = cc;
  82. IO::delay(10);
  83. full_memory_barrier();
  84. csts = m_controller_regs->csts;
  85. if ((csts & (1 << CSTS_RDY_BIT)) != 0x1)
  86. return false;
  87. return true;
  88. }
  89. u32 NVMeController::get_admin_q_dept()
  90. {
  91. u32 aqa = m_controller_regs->aqa;
  92. // Queue depth is 0 based
  93. u32 q_depth = min(ACQ_SIZE(aqa), ASQ_SIZE(aqa)) + 1;
  94. dbgln_if(NVME_DEBUG, "NVMe: Admin queue depth is {}", q_depth);
  95. return q_depth;
  96. }
  97. ErrorOr<void> NVMeController::identify_and_init_namespaces()
  98. {
  99. RefPtr<Memory::PhysicalPage> prp_dma_buffer;
  100. OwnPtr<Memory::Region> prp_dma_region;
  101. auto namespace_data_struct = ByteBuffer::create_zeroed(NVMe_IDENTIFY_SIZE).release_value();
  102. u32 active_namespace_list[NVMe_IDENTIFY_SIZE / sizeof(u32)];
  103. {
  104. auto buffer = TRY(MM.allocate_dma_buffer_page("Identify PRP", Memory::Region::Access::ReadWrite, prp_dma_buffer));
  105. prp_dma_region = move(buffer);
  106. }
  107. // Get the active namespace
  108. {
  109. NVMeSubmission sub {};
  110. u16 status = 0;
  111. sub.op = OP_ADMIN_IDENTIFY;
  112. sub.data_ptr.prp1 = reinterpret_cast<u64>(AK::convert_between_host_and_little_endian(prp_dma_buffer->paddr().as_ptr()));
  113. sub.cdw10 = NVMe_CNS_ID_ACTIVE_NS & 0xff;
  114. status = submit_admin_command(sub, true);
  115. if (status) {
  116. dmesgln("Failed to identify active namespace command");
  117. return EFAULT;
  118. }
  119. if (void* fault_at; !safe_memcpy(active_namespace_list, prp_dma_region->vaddr().as_ptr(), NVMe_IDENTIFY_SIZE, fault_at)) {
  120. return EFAULT;
  121. }
  122. }
  123. // Get the NAMESPACE attributes
  124. {
  125. NVMeSubmission sub {};
  126. IdentifyNamespace id_ns {};
  127. u16 status = 0;
  128. for (auto nsid : active_namespace_list) {
  129. memset(prp_dma_region->vaddr().as_ptr(), 0, NVMe_IDENTIFY_SIZE);
  130. // Invalid NS
  131. if (nsid == 0)
  132. break;
  133. sub.op = OP_ADMIN_IDENTIFY;
  134. sub.data_ptr.prp1 = reinterpret_cast<u64>(AK::convert_between_host_and_little_endian(prp_dma_buffer->paddr().as_ptr()));
  135. sub.cdw10 = NVMe_CNS_ID_NS & 0xff;
  136. sub.nsid = nsid;
  137. status = submit_admin_command(sub, true);
  138. if (status) {
  139. dmesgln("Failed identify namespace with nsid {}", nsid);
  140. return EFAULT;
  141. }
  142. static_assert(sizeof(IdentifyNamespace) == NVMe_IDENTIFY_SIZE);
  143. if (void* fault_at; !safe_memcpy(&id_ns, prp_dma_region->vaddr().as_ptr(), NVMe_IDENTIFY_SIZE, fault_at)) {
  144. return EFAULT;
  145. }
  146. auto val = get_ns_features(id_ns);
  147. auto block_counts = val.get<0>();
  148. auto block_size = 1 << val.get<1>();
  149. dbgln_if(NVME_DEBUG, "NVMe: Block count is {} and Block size is {}", block_counts, block_size);
  150. m_namespaces.append(TRY(NVMeNameSpace::try_create(m_queues, controller_id.load(), nsid, block_counts, block_size)));
  151. m_device_count++;
  152. dbgln_if(NVME_DEBUG, "NVMe: Initialized namespace with NSID: {}", nsid);
  153. }
  154. }
  155. return {};
  156. }
  157. Tuple<u64, u8> NVMeController::get_ns_features(IdentifyNamespace& identify_data_struct)
  158. {
  159. auto flbas = identify_data_struct.flbas & FLBA_SIZE_MASK;
  160. auto namespace_size = identify_data_struct.nsze;
  161. auto lba_format = identify_data_struct.lbaf[flbas];
  162. auto lba_size = (lba_format & LBA_SIZE_MASK) >> 16;
  163. return Tuple<u64, u8>(namespace_size, lba_size);
  164. }
  165. RefPtr<StorageDevice> NVMeController::device(u32 index) const
  166. {
  167. return m_namespaces.at(index);
  168. }
  169. size_t NVMeController::devices_count() const
  170. {
  171. return m_device_count;
  172. }
  173. bool NVMeController::reset()
  174. {
  175. if (!reset_controller())
  176. return false;
  177. if (!start_controller())
  178. return false;
  179. return true;
  180. }
  181. bool NVMeController::shutdown()
  182. {
  183. TODO();
  184. return false;
  185. }
  186. void NVMeController::complete_current_request([[maybe_unused]] AsyncDeviceRequest::RequestResult result)
  187. {
  188. VERIFY_NOT_REACHED();
  189. }
  190. ErrorOr<void> NVMeController::create_admin_queue(u8 irq)
  191. {
  192. auto qdepth = get_admin_q_dept();
  193. OwnPtr<Memory::Region> cq_dma_region;
  194. NonnullRefPtrVector<Memory::PhysicalPage> cq_dma_pages;
  195. OwnPtr<Memory::Region> sq_dma_region;
  196. NonnullRefPtrVector<Memory::PhysicalPage> sq_dma_pages;
  197. auto cq_size = round_up_to_power_of_two(CQ_SIZE(qdepth), 4096);
  198. auto sq_size = round_up_to_power_of_two(SQ_SIZE(qdepth), 4096);
  199. if (!reset_controller()) {
  200. dmesgln("Failed to reset the NVMe controller");
  201. return EFAULT;
  202. }
  203. {
  204. auto buffer = TRY(MM.allocate_dma_buffer_pages(cq_size, "Admin CQ queue", Memory::Region::Access::ReadWrite, cq_dma_pages));
  205. cq_dma_region = move(buffer);
  206. }
  207. // Phase bit is important to determine completion, so zero out the space
  208. // so that we don't get any garbage phase bit value
  209. memset(cq_dma_region->vaddr().as_ptr(), 0, cq_size);
  210. {
  211. auto buffer = TRY(MM.allocate_dma_buffer_pages(sq_size, "Admin SQ queue", Memory::Region::Access::ReadWrite, sq_dma_pages));
  212. sq_dma_region = move(buffer);
  213. }
  214. auto doorbell_regs = Memory::map_typed_writable<volatile DoorbellRegister>(PhysicalAddress(m_bar + REG_SQ0TDBL_START));
  215. m_admin_queue = TRY(NVMeQueue::try_create(0, irq, qdepth, move(cq_dma_region), cq_dma_pages, move(sq_dma_region), sq_dma_pages, move(doorbell_regs)));
  216. m_controller_regs->acq = reinterpret_cast<u64>(AK::convert_between_host_and_little_endian(cq_dma_pages.first().paddr().as_ptr()));
  217. m_controller_regs->asq = reinterpret_cast<u64>(AK::convert_between_host_and_little_endian(sq_dma_pages.first().paddr().as_ptr()));
  218. if (!start_controller()) {
  219. dmesgln("Failed to restart the NVMe controller");
  220. return EFAULT;
  221. }
  222. set_admin_queue_ready_flag();
  223. m_admin_queue->enable_interrupts();
  224. dbgln_if(NVME_DEBUG, "NVMe: Admin queue created");
  225. return {};
  226. }
  227. ErrorOr<void> NVMeController::create_io_queue(u8 irq, u8 qid)
  228. {
  229. NVMeSubmission sub {};
  230. OwnPtr<Memory::Region> cq_dma_region;
  231. NonnullRefPtrVector<Memory::PhysicalPage> cq_dma_pages;
  232. OwnPtr<Memory::Region> sq_dma_region;
  233. NonnullRefPtrVector<Memory::PhysicalPage> sq_dma_pages;
  234. auto cq_size = round_up_to_power_of_two(CQ_SIZE(IO_QUEUE_SIZE), 4096);
  235. auto sq_size = round_up_to_power_of_two(SQ_SIZE(IO_QUEUE_SIZE), 4096);
  236. static_assert(sizeof(NVMeSubmission) == (1 << SQ_WIDTH));
  237. {
  238. auto buffer = TRY(MM.allocate_dma_buffer_pages(cq_size, "IO CQ queue", Memory::Region::Access::ReadWrite, cq_dma_pages));
  239. cq_dma_region = move(buffer);
  240. }
  241. // Phase bit is important to determine completion, so zero out the space
  242. // so that we don't get any garbage phase bit value
  243. memset(cq_dma_region->vaddr().as_ptr(), 0, cq_size);
  244. {
  245. auto buffer = TRY(MM.allocate_dma_buffer_pages(sq_size, "IO SQ queue", Memory::Region::Access::ReadWrite, sq_dma_pages));
  246. sq_dma_region = move(buffer);
  247. }
  248. {
  249. sub.op = OP_ADMIN_CREATE_COMPLETION_QUEUE;
  250. sub.data_ptr.prp1 = reinterpret_cast<u64>(AK::convert_between_host_and_little_endian(cq_dma_pages.first().paddr().as_ptr()));
  251. // The queue size is 0 based
  252. sub.cdw10 = AK::convert_between_host_and_little_endian(((IO_QUEUE_SIZE - 1) << 16 | qid));
  253. auto flags = QUEUE_IRQ_ENABLED | QUEUE_PHY_CONTIGUOUS;
  254. // TODO: Eventually move to MSI.
  255. // For now using pin based interrupts. Clear the first 16 bits
  256. // to use pin-based interrupts.
  257. sub.cdw11 = AK::convert_between_host_and_little_endian(flags & 0xFFFF);
  258. submit_admin_command(sub, true);
  259. }
  260. {
  261. sub.op = OP_ADMIN_CREATE_SUBMISSION_QUEUE;
  262. sub.data_ptr.prp1 = reinterpret_cast<u64>(AK::convert_between_host_and_little_endian(sq_dma_pages.first().paddr().as_ptr()));
  263. // The queue size is 0 based
  264. sub.cdw10 = AK::convert_between_host_and_little_endian(((IO_QUEUE_SIZE - 1) << 16 | qid));
  265. auto flags = QUEUE_IRQ_ENABLED | QUEUE_PHY_CONTIGUOUS;
  266. // The qid used below points to the completion queue qid
  267. sub.cdw11 = AK::convert_between_host_and_little_endian(qid << 16 | flags);
  268. submit_admin_command(sub, true);
  269. }
  270. auto queue_doorbell_offset = REG_SQ0TDBL_START + ((2 * qid) * (4 << m_dbl_stride));
  271. auto doorbell_regs = Memory::map_typed_writable<volatile DoorbellRegister>(PhysicalAddress(m_bar + queue_doorbell_offset));
  272. m_queues.append(TRY(NVMeQueue::try_create(qid, irq, IO_QUEUE_SIZE, move(cq_dma_region), cq_dma_pages, move(sq_dma_region), sq_dma_pages, move(doorbell_regs))));
  273. m_queues.last().enable_interrupts();
  274. dbgln_if(NVME_DEBUG, "NVMe: Created IO Queue with QID{}", m_queues.size());
  275. return {};
  276. }
  277. }