Kernel: Introduce support for using FileSystem object in multiple mounts

The idea is to enable mounting FileSystem objects across multiple mounts
in contrast to what happened until now - each mount has its own unique
FileSystem object being attached to it.

Considering a situation of mounting a block device at 2 different mount
points at in system, there were a couple of critical flaws due to how
the previous "design" worked:
1. BlockBasedFileSystem(s) that pointed to the same actual device had a
separate DiskCache object being attached to them. Because both instances
were not synchronized by any means, corruption of the filesystem is most
likely achieveable by a simple cache flush of either of the instances.
2. For superblock-oriented filesystems (such as the ext2 filesystem),
lack of synchronization between both instances can lead to severe
corruption in the superblock, which could render the entire filesystem
unusable.
3. Flags of a specific filesystem implementation (for example, with xfs
on Linux, one can instruct to mount it with the discard option) must be
honored across multiple mounts, to ensure expected behavior against a
particular filesystem.

This patch put the foundations to start fix the issues mentioned above.
However, there are still major issues to solve, so this is only a start.
This commit is contained in:
Liav A 2022-08-20 00:03:24 +03:00 committed by Andrew Kaster
parent 965afba320
commit 0fd7b688af
Notes: sideshowbarker 2024-07-17 08:43:11 +09:00
17 changed files with 168 additions and 33 deletions

View file

@ -116,8 +116,10 @@ BlockBasedFileSystem::BlockBasedFileSystem(OpenFileDescription& file_description
BlockBasedFileSystem::~BlockBasedFileSystem() = default;
ErrorOr<void> BlockBasedFileSystem::initialize()
ErrorOr<void> BlockBasedFileSystem::initialize_while_locked()
{
VERIFY(m_lock.is_locked());
VERIFY(!is_initialized_while_locked());
VERIFY(block_size() != 0);
auto cached_block_data = TRY(KBuffer::try_create_with_size("BlockBasedFS: Cache blocks"sv, DiskCache::EntryCount * block_size()));
auto entries_data = TRY(KBuffer::try_create_with_size("BlockBasedFS: Cache entries"sv, DiskCache::EntryCount * sizeof(CacheEntry)));

View file

@ -16,7 +16,6 @@ public:
AK_TYPEDEF_DISTINCT_ORDERED_ID(u64, BlockIndex);
virtual ~BlockBasedFileSystem() override;
virtual ErrorOr<void> initialize() override;
u64 logical_block_size() const { return m_logical_block_size; };
@ -26,6 +25,8 @@ public:
protected:
explicit BlockBasedFileSystem(OpenFileDescription&);
virtual ErrorOr<void> initialize_while_locked() override;
ErrorOr<void> read_block(BlockIndex, UserOrKernelBuffer*, size_t count, u64 offset = 0, bool allow_cache = true) const;
ErrorOr<void> read_blocks(BlockIndex, unsigned count, UserOrKernelBuffer&, bool allow_cache = true) const;

View file

@ -77,9 +77,16 @@ ext2_group_desc const& Ext2FS::group_descriptor(GroupIndex group_index) const
return block_group_descriptors()[group_index.value() - 1];
}
ErrorOr<void> Ext2FS::initialize()
bool Ext2FS::is_initialized_while_locked()
{
MutexLocker locker(m_lock);
VERIFY(m_lock.is_locked());
return !m_root_inode.is_null();
}
ErrorOr<void> Ext2FS::initialize_while_locked()
{
VERIFY(m_lock.is_locked());
VERIFY(!is_initialized_while_locked());
VERIFY((sizeof(ext2_super_block) % logical_block_size()) == 0);
auto super_block_buffer = UserOrKernelBuffer::for_kernel_buffer((u8*)&m_super_block);
@ -109,7 +116,7 @@ ErrorOr<void> Ext2FS::initialize()
set_fragment_size(EXT2_FRAG_SIZE(&super_block));
// Note: This depends on the block size being available.
TRY(BlockBasedFileSystem::initialize());
TRY(BlockBasedFileSystem::initialize_while_locked());
VERIFY(block_size() <= (int)max_block_size);
@ -1680,7 +1687,7 @@ unsigned Ext2FS::free_inode_count() const
return super_block().s_free_inodes_count;
}
ErrorOr<void> Ext2FS::prepare_to_unmount()
ErrorOr<void> Ext2FS::prepare_to_clear_last_mount()
{
MutexLocker locker(m_lock);

View file

@ -91,15 +91,12 @@ public:
static ErrorOr<NonnullLockRefPtr<FileSystem>> try_create(OpenFileDescription&);
virtual ~Ext2FS() override;
virtual ErrorOr<void> initialize() override;
virtual unsigned total_block_count() const override;
virtual unsigned free_block_count() const override;
virtual unsigned total_inode_count() const override;
virtual unsigned free_inode_count() const override;
virtual ErrorOr<void> prepare_to_unmount() override;
virtual bool supports_watchers() const override { return true; }
virtual u8 internal_file_type_to_directory_entry_type(DirectoryEntryView const& entry) const override;
@ -126,6 +123,10 @@ private:
ErrorOr<void> flush_super_block();
virtual ErrorOr<void> initialize_while_locked() override;
virtual bool is_initialized_while_locked() override;
virtual ErrorOr<void> prepare_to_clear_last_mount() override;
virtual StringView class_name() const override { return "Ext2FS"sv; }
virtual Ext2FSInode& root_inode() override;
ErrorOr<NonnullLockRefPtr<Inode>> get_inode(InodeIdentifier) const;

View file

@ -22,9 +22,16 @@ FATFS::FATFS(OpenFileDescription& file_description)
{
}
ErrorOr<void> FATFS::initialize()
bool FATFS::is_initialized_while_locked()
{
MutexLocker locker(m_lock);
VERIFY(m_lock.is_locked());
return !m_root_inode.is_null();
}
ErrorOr<void> FATFS::initialize_while_locked()
{
VERIFY(m_lock.is_locked());
VERIFY(!is_initialized_while_locked());
m_boot_record = TRY(KBuffer::try_create_with_size("FATFS: Boot Record"sv, m_logical_block_size));
auto boot_record_buffer = UserOrKernelBuffer::for_kernel_buffer(m_boot_record->data());
@ -63,7 +70,7 @@ ErrorOr<void> FATFS::initialize()
u32 root_directory_sectors = ((boot_record()->root_directory_entry_count * sizeof(FATEntry)) + (m_logical_block_size - 1)) / m_logical_block_size;
m_first_data_sector = boot_record()->reserved_sector_count + (boot_record()->fat_count * boot_record()->sectors_per_fat) + root_directory_sectors;
TRY(BlockBasedFileSystem::initialize());
TRY(BlockBasedFileSystem::initialize_while_locked());
FATEntry root_entry {};

View file

@ -116,11 +116,16 @@ public:
static ErrorOr<NonnullLockRefPtr<FileSystem>> try_create(OpenFileDescription&);
virtual ~FATFS() override = default;
virtual ErrorOr<void> initialize() override;
virtual StringView class_name() const override { return "FATFS"sv; }
virtual Inode& root_inode() override;
private:
virtual ErrorOr<void> initialize_while_locked() override;
virtual bool is_initialized_while_locked() override;
// FIXME: This is not a proper way to clear last mount of a FAT filesystem,
// but for now we simply have no other way to properly do it.
virtual ErrorOr<void> prepare_to_clear_last_mount() override { return {}; }
FATFS(OpenFileDescription&);
static constexpr u8 signature_1 = 0x28;

View file

@ -15,4 +15,12 @@ FileBackedFileSystem::FileBackedFileSystem(OpenFileDescription& file_description
FileBackedFileSystem::~FileBackedFileSystem() = default;
ErrorOr<void> FileBackedFileSystem::initialize()
{
MutexLocker locker(m_lock);
if (is_initialized_while_locked())
return {};
return initialize_while_locked();
}
}

View file

@ -12,6 +12,8 @@
namespace Kernel {
class FileBackedFileSystem : public FileSystem {
friend class VirtualFileSystem;
public:
virtual ~FileBackedFileSystem() override;
@ -23,10 +25,19 @@ public:
protected:
explicit FileBackedFileSystem(OpenFileDescription&);
// Note: We require all FileBackedFileSystem to implement something that actually
// takes into account the fact that we will clean the last mount of the filesystem,
// therefore, removing the file system with it from the Kernel memory.
virtual ErrorOr<void> prepare_to_clear_last_mount() override = 0;
virtual ErrorOr<void> initialize_while_locked() = 0;
virtual bool is_initialized_while_locked() = 0;
private:
virtual ErrorOr<void> initialize() override final;
virtual bool is_file_backed() const override { return true; }
IntrusiveListNode<FileBackedFileSystem> m_file_backed_file_system_node;
mutable NonnullLockRefPtr<OpenFileDescription> m_file_description;
};
}

View file

@ -42,6 +42,15 @@ FileSystem* FileSystem::from_fsid(FileSystemID id)
return nullptr;
}
ErrorOr<void> FileSystem::prepare_to_unmount()
{
return m_attach_count.with([&](auto& attach_count) -> ErrorOr<void> {
if (attach_count == 1)
return prepare_to_clear_last_mount();
return {};
});
}
FileSystem::DirectoryEntryView::DirectoryEntryView(StringView n, InodeIdentifier i, u8 ft)
: name(n)
, inode(i)

View file

@ -41,7 +41,7 @@ public:
virtual unsigned total_inode_count() const { return 0; }
virtual unsigned free_inode_count() const { return 0; }
virtual ErrorOr<void> prepare_to_unmount() { return {}; }
ErrorOr<void> prepare_to_unmount();
struct DirectoryEntryView {
DirectoryEntryView(StringView name, InodeIdentifier, u8 file_type);
@ -61,12 +61,16 @@ public:
// Converts file types that are used internally by the filesystem to DT_* types
virtual u8 internal_file_type_to_directory_entry_type(DirectoryEntryView const& entry) const { return entry.file_type; }
SpinlockProtected<size_t>& mounted_count(Badge<VirtualFileSystem>) { return m_attach_count; }
protected:
FileSystem();
void set_block_size(u64 size) { m_block_size = size; }
void set_fragment_size(size_t size) { m_fragment_size = size; }
virtual ErrorOr<void> prepare_to_clear_last_mount() { return {}; }
mutable Mutex m_lock { "FS"sv };
private:
@ -74,6 +78,8 @@ private:
u64 m_block_size { 0 };
size_t m_fragment_size { 0 };
bool m_readonly { false };
SpinlockProtected<size_t> m_attach_count { LockRank::FileSystem, 0 };
};
inline FileSystem* InodeIdentifier::fs() // NOLINT(readability-make-member-function-const) const InodeIdentifiers should not be able to modify the FileSystem

View file

@ -182,9 +182,18 @@ ISO9660FS::ISO9660FS(OpenFileDescription& description)
ISO9660FS::~ISO9660FS() = default;
ErrorOr<void> ISO9660FS::initialize()
bool ISO9660FS::is_initialized_while_locked()
{
TRY(BlockBasedFileSystem::initialize());
VERIFY(m_lock.is_locked());
return !m_root_inode.is_null();
}
ErrorOr<void> ISO9660FS::initialize_while_locked()
{
VERIFY(m_lock.is_locked());
VERIFY(!is_initialized_while_locked());
TRY(BlockBasedFileSystem::initialize_while_locked());
TRY(parse_volume_set());
TRY(create_root_inode());
return {};
@ -223,6 +232,12 @@ u8 ISO9660FS::internal_file_type_to_directory_entry_type(DirectoryEntryView cons
return DT_REG;
}
ErrorOr<void> ISO9660FS::prepare_to_clear_last_mount()
{
// FIXME: Do proper cleaning here.
return {};
}
ErrorOr<void> ISO9660FS::parse_volume_set()
{
VERIFY(!m_primary_volume);

View file

@ -308,7 +308,6 @@ public:
static ErrorOr<NonnullLockRefPtr<FileSystem>> try_create(OpenFileDescription&);
virtual ~ISO9660FS() override;
virtual ErrorOr<void> initialize() override;
virtual StringView class_name() const override { return "ISO9660FS"sv; }
virtual Inode& root_inode() override;
@ -322,6 +321,11 @@ public:
private:
ISO9660FS(OpenFileDescription&);
virtual ErrorOr<void> prepare_to_clear_last_mount() override;
virtual bool is_initialized_while_locked() override;
virtual ErrorOr<void> initialize_while_locked() override;
ErrorOr<void> parse_volume_set();
ErrorOr<void> create_root_inode();
ErrorOr<void> calculate_inode_count() const;

View file

@ -20,6 +20,12 @@ Plan9FS::Plan9FS(OpenFileDescription& file_description)
{
}
ErrorOr<void> Plan9FS::prepare_to_clear_last_mount()
{
// FIXME: Do proper cleaning here.
return {};
}
Plan9FS::~Plan9FS()
{
// Make sure to destroy the root inode before the FS gets destroyed.
@ -196,8 +202,17 @@ private:
bool m_have_been_built { false };
};
ErrorOr<void> Plan9FS::initialize()
bool Plan9FS::is_initialized_while_locked()
{
VERIFY(m_lock.is_locked());
return !m_root_inode.is_null();
}
ErrorOr<void> Plan9FS::initialize_while_locked()
{
VERIFY(m_lock.is_locked());
VERIFY(!is_initialized_while_locked());
ensure_thread();
Message version_message { *this, Message::Type::Tversion };

View file

@ -22,8 +22,6 @@ public:
virtual ~Plan9FS() override;
static ErrorOr<NonnullLockRefPtr<FileSystem>> try_create(OpenFileDescription&);
virtual ErrorOr<void> initialize() override;
virtual bool supports_watchers() const override { return false; }
virtual Inode& root_inode() override;
@ -48,6 +46,11 @@ public:
private:
Plan9FS(OpenFileDescription&);
virtual ErrorOr<void> prepare_to_clear_last_mount() override;
virtual bool is_initialized_while_locked() override;
virtual ErrorOr<void> initialize_while_locked() override;
class Blocker;
class Plan9FSBlockerSet final : public Thread::BlockerSet {

View file

@ -65,7 +65,8 @@ ErrorOr<void> VirtualFileSystem::mount(FileSystem& fs, Custody& mount_point, int
auto new_mount = TRY(adopt_nonnull_own_or_enomem(new (nothrow) Mount(fs, &mount_point, flags)));
return m_mounts.with([&](auto& mounts) -> ErrorOr<void> {
auto& inode = mount_point.inode();
dbgln("VirtualFileSystem: Mounting {} at inode {} with flags {}",
dbgln("VirtualFileSystem: FileSystemID {}, Mounting {} at inode {} with flags {}",
fs.fsid(),
fs.class_name(),
inode.identifier(),
flags);
@ -73,6 +74,10 @@ ErrorOr<void> VirtualFileSystem::mount(FileSystem& fs, Custody& mount_point, int
dbgln("VirtualFileSystem: Mounting unsuccessful - inode {} is already a mount-point.", inode.identifier());
return EBUSY;
}
// Note: Actually add a mount for the filesystem and increment the filesystem mounted count
new_mount->guest_fs().mounted_count({}).with([&](auto& mounted_count) {
mounted_count++;
});
mounts.append(move(new_mount));
return {};
});
@ -106,16 +111,24 @@ ErrorOr<void> VirtualFileSystem::remount(Custody& mount_point, int new_flags)
return {};
}
ErrorOr<void> VirtualFileSystem::unmount(Inode& guest_inode)
ErrorOr<void> VirtualFileSystem::unmount(Custody& mountpoint_custody)
{
dbgln("VirtualFileSystem: unmount called with inode {}", guest_inode.identifier());
auto& guest_inode = mountpoint_custody.inode();
auto custody_path = TRY(mountpoint_custody.try_serialize_absolute_path());
dbgln("VirtualFileSystem: unmount called with inode {} on mountpoint {}", guest_inode.identifier(), custody_path->view());
return m_mounts.with([&](auto& mounts) -> ErrorOr<void> {
for (size_t i = 0; i < mounts.size(); ++i) {
auto& mount = mounts[i];
if (&mount->guest() != &guest_inode)
continue;
auto mountpoint_path = TRY(mount->absolute_path());
if (custody_path->view() != mountpoint_path->view())
continue;
TRY(mount->guest_fs().prepare_to_unmount());
mount->guest_fs().mounted_count({}).with([&](auto& mounted_count) {
mounted_count--;
});
dbgln("VirtualFileSystem: Unmounting file system {}...", mount->guest_fs().fsid());
(void)mounts.unstable_take(i);
return {};
@ -142,9 +155,13 @@ ErrorOr<void> VirtualFileSystem::mount_root(FileSystem& fs)
m_root_inode = root_inode;
auto pseudo_path = TRY(static_cast<FileBackedFileSystem&>(fs).file_description().pseudo_path());
dmesgln("VirtualFileSystem: mounted root from {} ({})", fs.class_name(), pseudo_path);
dmesgln("VirtualFileSystem: mounted root({}) from {} ({})", fs.fsid(), fs.class_name(), pseudo_path);
// Note: Actually add a mount for the filesystem and increment the filesystem mounted count
m_mounts.with([&](auto& mounts) {
new_mount->guest_fs().mounted_count({}).with([&](auto& mounted_count) {
mounted_count++;
});
mounts.append(move(new_mount));
});
@ -241,6 +258,24 @@ ErrorOr<InodeMetadata> VirtualFileSystem::lookup_metadata(Credentials const& cre
return custody->inode().metadata();
}
ErrorOr<NonnullLockRefPtr<FileBackedFileSystem>> VirtualFileSystem::find_already_existing_or_create_file_backed_file_system(OpenFileDescription& description, Function<ErrorOr<NonnullLockRefPtr<FileSystem>>(OpenFileDescription&)> callback)
{
return TRY(m_file_backed_file_systems_list.with([&](auto& list) -> ErrorOr<NonnullLockRefPtr<FileBackedFileSystem>> {
for (auto& node : list) {
if (&node.file_description() == &description) {
return node;
}
if (&node.file() == &description.file()) {
return node;
}
}
auto fs = TRY(callback(description));
VERIFY(fs->is_file_backed());
list.append(static_cast<FileBackedFileSystem&>(*fs));
return static_ptr_cast<FileBackedFileSystem>(fs);
}));
}
ErrorOr<NonnullLockRefPtr<OpenFileDescription>> VirtualFileSystem::open(Credentials const& credentials, StringView path, int options, mode_t mode, Custody& base, Optional<UidAndGid> owner)
{
if ((options & O_CREAT) && (options & O_DIRECTORY))

View file

@ -12,6 +12,8 @@
#include <AK/HashMap.h>
#include <AK/NonnullOwnPtrVector.h>
#include <AK/OwnPtr.h>
#include <AK/RefPtr.h>
#include <Kernel/FileSystem/FileBackedFileSystem.h>
#include <Kernel/FileSystem/FileSystem.h>
#include <Kernel/FileSystem/InodeIdentifier.h>
#include <Kernel/FileSystem/InodeMetadata.h>
@ -48,7 +50,7 @@ public:
ErrorOr<void> mount(FileSystem&, Custody& mount_point, int flags);
ErrorOr<void> bind_mount(Custody& source, Custody& mount_point, int flags);
ErrorOr<void> remount(Custody& mount_point, int new_flags);
ErrorOr<void> unmount(Inode& guest_inode);
ErrorOr<void> unmount(Custody& mount_point);
ErrorOr<NonnullLockRefPtr<OpenFileDescription>> open(Credentials const&, StringView path, int options, mode_t mode, Custody& base, Optional<UidAndGid> = {});
ErrorOr<NonnullLockRefPtr<OpenFileDescription>> create(Credentials const&, StringView path, int options, mode_t mode, Custody& parent_custody, Optional<UidAndGid> = {});
@ -71,6 +73,8 @@ public:
ErrorOr<void> for_each_mount(Function<ErrorOr<void>(Mount const&)>) const;
ErrorOr<NonnullLockRefPtr<FileBackedFileSystem>> find_already_existing_or_create_file_backed_file_system(OpenFileDescription& description, Function<ErrorOr<NonnullLockRefPtr<FileSystem>>(OpenFileDescription&)> callback);
InodeIdentifier root_inode_id() const;
static void sync();
@ -101,6 +105,7 @@ private:
SpinlockProtected<RefPtr<Custody>> m_root_custody;
SpinlockProtected<Vector<NonnullOwnPtr<Mount>, 16>> m_mounts { LockRank::None };
SpinlockProtected<IntrusiveList<&FileBackedFileSystem::m_file_backed_file_system_node>> m_file_backed_file_systems_list { LockRank::None };
};
}

View file

@ -39,7 +39,7 @@ static constexpr FileSystemInitializer s_initializers[] = {
{ "fat"sv, "FATFS"sv, true, true, true, FATFS::try_create, {} },
};
static ErrorOr<NonnullLockRefPtr<FileSystem>> create_filesystem_instance(StringView fs_type, OpenFileDescription* possible_description)
static ErrorOr<NonnullLockRefPtr<FileSystem>> find_or_create_filesystem_instance(StringView fs_type, OpenFileDescription* possible_description)
{
for (auto& initializer_entry : s_initializers) {
if (fs_type != initializer_entry.short_name && fs_type != initializer_entry.name)
@ -49,6 +49,9 @@ static ErrorOr<NonnullLockRefPtr<FileSystem>> create_filesystem_instance(StringV
NonnullLockRefPtr<FileSystem> fs = TRY(initializer_entry.create());
return fs;
}
// Note: If there's an associated file description with the filesystem, we could
// try to first find it from the VirtualFileSystem filesystem list and if it was not found,
// then create it and add it.
VERIFY(initializer_entry.create_with_fd);
if (!possible_description)
return EBADF;
@ -60,8 +63,7 @@ static ErrorOr<NonnullLockRefPtr<FileSystem>> create_filesystem_instance(StringV
dbgln("mount: this is not a seekable file");
return ENODEV;
}
NonnullLockRefPtr<FileSystem> fs = TRY(initializer_entry.create_with_fd(description));
return fs;
return TRY(VirtualFileSystem::the().find_already_existing_or_create_file_backed_file_system(description, initializer_entry.create_with_fd));
}
return ENODEV;
}
@ -112,11 +114,11 @@ ErrorOr<FlatPtr> Process::sys$mount(Userspace<Syscall::SC_mount_params const*> u
if (!description_or_error.is_error()) {
auto description = description_or_error.release_value();
fs = TRY(create_filesystem_instance(fs_type, description.ptr()));
fs = TRY(find_or_create_filesystem_instance(fs_type, description.ptr()));
auto source_pseudo_path = TRY(description->pseudo_path());
dbgln("mount: attempting to mount {} on {}", source_pseudo_path, target);
} else {
fs = TRY(create_filesystem_instance(fs_type, {}));
fs = TRY(find_or_create_filesystem_instance(fs_type, {}));
}
TRY(fs->initialize());
@ -135,8 +137,7 @@ ErrorOr<FlatPtr> Process::sys$umount(Userspace<char const*> user_mountpoint, siz
auto mountpoint = TRY(get_syscall_path_argument(user_mountpoint, mountpoint_length));
auto custody = TRY(VirtualFileSystem::the().resolve_path(credentials, mountpoint->view(), current_directory()));
auto& guest_inode = custody->inode();
TRY(VirtualFileSystem::the().unmount(guest_inode));
TRY(VirtualFileSystem::the().unmount(*custody));
return 0;
}