ladybird/Kernel/Storage/StorageDevice.cpp
Liav A 2c84466ad8 Kernel/Storage: Introduce new boot device addressing modes
Before of this patch, we supported two methods to address a boot device:
1. Specifying root=/dev/hdXY, where X is a-z letter which corresponds to
a boot device, and Y as number from 1 to 16, to indicate the partition
number, which can be omitted to instruct the kernel to use a raw device
rather than a partition on a raw device.
2. Specifying root=PARTUUID: with a GUID string of a GUID partition. In
case of existing storage device with GPT partitions, this is most likely
the safest option to ensure booting from persistent storage.

While option 2 is more advanced and reliable, the first option has 2
caveats:
1. The string prefix "/dev/hd" doesn't mean anything beside a convention
on Linux installations, that was taken into use in Serenity. In Serenity
we don't mount DevTmpFS before we mount the boot device on /, so the
kernel doesn't really access /dev anyway, so this convention is only a
big misleading relic that can easily make the user to assume we access
/dev early on boot.
2. This convention although resemble the simple linux convention, is
quite limited in specifying a correct boot device across hardware setup
changes, so option 2 was recommended to ensure the system is always
bootable.

With these caveats in mind, this commit tries to fix the problem with
adding more addressing options as well as to remove the first option
being mentioned above of addressing.
To sum it up, there are 4 addressing options:
1. Hardware relative address - Each instance of StorageController is
assigned with a index number relative to the type of hardware it handles
which makes it possible to address storage devices with a prefix of the
commandset ("ata" for ATA, "nvme" for NVMe, "ramdisk" for Plain memory),
and then the number for the parent controller relative hardware index,
another number LUN target_id, and a third number for LUN disk_id.
2. LUN address - Similar to the previous option, but instead we rely on
the parent controller absolute index for the first number.
3. Block device major and minor numbers - by specifying the major and
minor numbers, the kernel can simply try to get the corresponding block
device and use it as the boot device.
4. GUID string, in the same fashion like before, so the user use the
"PARTUUID:" string prefix and add the GUID of the GPT partition.

For the new address modes 1 and 2, the user can choose to also specify a
partition out of the selected boot device. To do that, the user needs to
append the semicolon character and then add the string "partX" where X
is to be changed for the partition number. We start counting from 0, and
therefore the first partition number is 0 and not 1 in the kernel boot
argument.
2022-08-30 00:50:15 +01:00

262 lines
9.9 KiB
C++

/*
* Copyright (c) 2020, Liav A. <liavalb@hotmail.co.il>
*
* SPDX-License-Identifier: BSD-2-Clause
*/
#include <AK/Memory.h>
#include <AK/StringView.h>
#include <Kernel/Debug.h>
#include <Kernel/Devices/DeviceManagement.h>
#include <Kernel/FileSystem/OpenFileDescription.h>
#include <Kernel/FileSystem/SysFS/Subsystems/DeviceIdentifiers/BlockDevicesDirectory.h>
#include <Kernel/FileSystem/SysFS/Subsystems/DeviceIdentifiers/SymbolicLinkDeviceComponent.h>
#include <Kernel/FileSystem/SysFS/Subsystems/Devices/Storage/DeviceDirectory.h>
#include <Kernel/FileSystem/SysFS/Subsystems/Devices/Storage/Directory.h>
#include <Kernel/Storage/StorageDevice.h>
#include <Kernel/Storage/StorageManagement.h>
#include <LibC/sys/ioctl_numbers.h>
namespace Kernel {
StorageDevice::StorageDevice(LUNAddress logical_unit_number_address, u32 hardware_relative_controller_id, size_t sector_size, u64 max_addressable_block)
: BlockDevice(StorageManagement::storage_type_major_number(), StorageManagement::generate_storage_minor_number(), sector_size)
, m_logical_unit_number_address(logical_unit_number_address)
, m_hardware_relative_controller_id(hardware_relative_controller_id)
, m_max_addressable_block(max_addressable_block)
, m_blocks_per_page(PAGE_SIZE / block_size())
{
}
StorageDevice::StorageDevice(Badge<RamdiskDevice>, LUNAddress logical_unit_number_address, u32 hardware_relative_controller_id, MajorNumber major, MinorNumber minor, size_t sector_size, u64 max_addressable_block)
: BlockDevice(major, minor, sector_size)
, m_logical_unit_number_address(logical_unit_number_address)
, m_hardware_relative_controller_id(hardware_relative_controller_id)
, m_max_addressable_block(max_addressable_block)
, m_blocks_per_page(PAGE_SIZE / block_size())
{
}
void StorageDevice::after_inserting()
{
after_inserting_add_to_device_management();
auto sysfs_storage_device_directory = StorageDeviceSysFSDirectory::create(SysFSStorageDirectory::the(), *this);
m_sysfs_device_directory = sysfs_storage_device_directory;
SysFSStorageDirectory::the().plug({}, *sysfs_storage_device_directory);
VERIFY(!m_symlink_sysfs_component);
auto sys_fs_component = MUST(SysFSSymbolicLinkDeviceComponent::try_create(SysFSBlockDevicesDirectory::the(), *this, *m_sysfs_device_directory));
m_symlink_sysfs_component = sys_fs_component;
after_inserting_add_symlink_to_device_identifier_directory();
}
void StorageDevice::will_be_destroyed()
{
VERIFY(m_symlink_sysfs_component);
before_will_be_destroyed_remove_symlink_from_device_identifier_directory();
m_symlink_sysfs_component.clear();
SysFSStorageDirectory::the().unplug({}, *m_sysfs_device_directory);
before_will_be_destroyed_remove_from_device_management();
}
StringView StorageDevice::class_name() const
{
return "StorageDevice"sv;
}
StringView StorageDevice::command_set_to_string_view() const
{
switch (command_set()) {
case CommandSet::PlainMemory:
return "memory"sv;
case CommandSet::SCSI:
return "scsi"sv;
case CommandSet::ATA:
return "ata"sv;
case CommandSet::NVMe:
return "nvme"sv;
default:
break;
}
VERIFY_NOT_REACHED();
}
ErrorOr<size_t> StorageDevice::read(OpenFileDescription&, u64 offset, UserOrKernelBuffer& outbuf, size_t len)
{
u64 index = offset >> block_size_log();
off_t offset_within_block = 0;
size_t whole_blocks = len >> block_size_log();
size_t remaining = len - (whole_blocks << block_size_log());
// PATAChannel will chuck a wobbly if we try to read more than PAGE_SIZE
// at a time, because it uses a single page for its DMA buffer.
if (whole_blocks >= m_blocks_per_page) {
whole_blocks = m_blocks_per_page;
remaining = 0;
}
if (len < block_size())
offset_within_block = offset - (index << block_size_log());
dbgln_if(STORAGE_DEVICE_DEBUG, "StorageDevice::read() index={}, whole_blocks={}, remaining={}", index, whole_blocks, remaining);
if (whole_blocks > 0) {
auto read_request = TRY(try_make_request<AsyncBlockDeviceRequest>(AsyncBlockDeviceRequest::Read, index, whole_blocks, outbuf, whole_blocks * block_size()));
auto result = read_request->wait();
if (result.wait_result().was_interrupted())
return EINTR;
switch (result.request_result()) {
case AsyncDeviceRequest::Failure:
case AsyncDeviceRequest::Cancelled:
return EIO;
case AsyncDeviceRequest::MemoryFault:
return EFAULT;
default:
break;
}
}
off_t pos = whole_blocks * block_size();
if (remaining > 0) {
auto data = TRY(ByteBuffer::create_uninitialized(block_size()));
auto data_buffer = UserOrKernelBuffer::for_kernel_buffer(data.data());
auto read_request = TRY(try_make_request<AsyncBlockDeviceRequest>(AsyncBlockDeviceRequest::Read, index + whole_blocks, 1, data_buffer, block_size()));
auto result = read_request->wait();
if (result.wait_result().was_interrupted())
return EINTR;
switch (result.request_result()) {
case AsyncDeviceRequest::Failure:
return pos;
case AsyncDeviceRequest::Cancelled:
return EIO;
case AsyncDeviceRequest::MemoryFault:
// This should never happen, we're writing to a kernel buffer!
VERIFY_NOT_REACHED();
default:
break;
}
TRY(outbuf.write(data.offset_pointer(offset_within_block), pos, remaining));
}
return pos + remaining;
}
bool StorageDevice::can_read(OpenFileDescription const&, u64 offset) const
{
return offset < (max_addressable_block() * block_size());
}
ErrorOr<size_t> StorageDevice::write(OpenFileDescription&, u64 offset, UserOrKernelBuffer const& inbuf, size_t len)
{
u64 index = offset >> block_size_log();
off_t offset_within_block = 0;
size_t whole_blocks = len >> block_size_log();
size_t remaining = len - (whole_blocks << block_size_log());
// PATAChannel will chuck a wobbly if we try to write more than PAGE_SIZE
// at a time, because it uses a single page for its DMA buffer.
if (whole_blocks >= m_blocks_per_page) {
whole_blocks = m_blocks_per_page;
remaining = 0;
}
if (len < block_size())
offset_within_block = offset - (index << block_size_log());
// We try to allocate the temporary block buffer for partial writes *before* we start any full block writes,
// to try and prevent partial writes
Optional<ByteBuffer> partial_write_block;
if (remaining > 0)
partial_write_block = TRY(ByteBuffer::create_zeroed(block_size()));
dbgln_if(STORAGE_DEVICE_DEBUG, "StorageDevice::write() index={}, whole_blocks={}, remaining={}", index, whole_blocks, remaining);
if (whole_blocks > 0) {
auto write_request = TRY(try_make_request<AsyncBlockDeviceRequest>(AsyncBlockDeviceRequest::Write, index, whole_blocks, inbuf, whole_blocks * block_size()));
auto result = write_request->wait();
if (result.wait_result().was_interrupted())
return EINTR;
switch (result.request_result()) {
case AsyncDeviceRequest::Failure:
case AsyncDeviceRequest::Cancelled:
return EIO;
case AsyncDeviceRequest::MemoryFault:
return EFAULT;
default:
break;
}
}
off_t pos = whole_blocks * block_size();
// since we can only write in block_size() increments, if we want to do a
// partial write, we have to read the block's content first, modify it,
// then write the whole block back to the disk.
if (remaining > 0) {
auto data_buffer = UserOrKernelBuffer::for_kernel_buffer(partial_write_block->data());
{
auto read_request = TRY(try_make_request<AsyncBlockDeviceRequest>(AsyncBlockDeviceRequest::Read, index + whole_blocks, 1, data_buffer, block_size()));
auto result = read_request->wait();
if (result.wait_result().was_interrupted())
return EINTR;
switch (result.request_result()) {
case AsyncDeviceRequest::Failure:
return pos;
case AsyncDeviceRequest::Cancelled:
return EIO;
case AsyncDeviceRequest::MemoryFault:
// This should never happen, we're writing to a kernel buffer!
VERIFY_NOT_REACHED();
default:
break;
}
}
TRY(inbuf.read(partial_write_block->offset_pointer(offset_within_block), pos, remaining));
{
auto write_request = TRY(try_make_request<AsyncBlockDeviceRequest>(AsyncBlockDeviceRequest::Write, index + whole_blocks, 1, data_buffer, block_size()));
auto result = write_request->wait();
if (result.wait_result().was_interrupted())
return EINTR;
switch (result.request_result()) {
case AsyncDeviceRequest::Failure:
return pos;
case AsyncDeviceRequest::Cancelled:
return EIO;
case AsyncDeviceRequest::MemoryFault:
// This should never happen, we're writing to a kernel buffer!
VERIFY_NOT_REACHED();
default:
break;
}
}
}
return pos + remaining;
}
bool StorageDevice::can_write(OpenFileDescription const&, u64 offset) const
{
return offset < (max_addressable_block() * block_size());
}
ErrorOr<void> StorageDevice::ioctl(OpenFileDescription&, unsigned request, Userspace<void*> arg)
{
switch (request) {
case STORAGE_DEVICE_GET_SIZE: {
u64 disk_size = m_max_addressable_block * block_size();
return copy_to_user(static_ptr_cast<u64*>(arg), &disk_size);
break;
}
case STORAGE_DEVICE_GET_BLOCK_SIZE: {
size_t size = block_size();
return copy_to_user(static_ptr_cast<size_t*>(arg), &size);
break;
}
default:
return EINVAL;
}
}
}