Kernel+SystemServer: Defer creation of device nodes to userspace

Don't create these device nodes in the Kernel, so we essentially enforce
userspace (SystemServer) to take control of this operation and to decide
how to create these device nodes.

This makes the DevFS to resemble linux devtmpfs, and allows us to remove
a bunch of unneeded overriding implementations of device name creation
in the Kernel.
This commit is contained in:
Liav A 2021-08-14 05:04:56 +03:00 committed by Andreas Kling
parent fcc046047f
commit e0d712c6f7
Notes: sideshowbarker 2024-07-18 04:29:12 +09:00
7 changed files with 144 additions and 45 deletions

View file

@ -19,15 +19,6 @@ DevFS::DevFS()
{
}
void DevFS::notify_new_device(Device& device)
{
// FIXME: Handle KString allocation failure.
auto name = KString::try_create(device.device_name()).release_value();
MutexLocker locker(m_lock);
auto new_device_inode = adopt_ref(*new DevFSDeviceInode(*this, device, move(name)));
m_root_inode->m_nodes.append(new_device_inode);
}
size_t DevFS::allocate_inode_index()
{
MutexLocker locker(m_lock);
@ -36,11 +27,6 @@ size_t DevFS::allocate_inode_index()
return 1 + m_next_inode_index.value();
}
void DevFS::notify_device_removal(Device&)
{
TODO();
}
DevFS::~DevFS()
{
}
@ -48,12 +34,6 @@ DevFS::~DevFS()
KResult DevFS::initialize()
{
m_root_inode = TRY(adopt_nonnull_ref_or_enomem(new (nothrow) DevFSRootDirectoryInode(*this)));
Device::for_each([&](Device& device) {
// FIXME: Find a better way to not add MasterPTYs or SlavePTYs!
if (device.is_master_pty() || (device.is_character_device() && device.major() == 201))
return;
notify_new_device(device);
});
return KSuccess;
}
@ -214,7 +194,7 @@ KResultOr<NonnullRefPtr<Inode>> DevFSRootDirectoryInode::lookup(StringView name)
}
return ENOENT;
}
KResultOr<NonnullRefPtr<Inode>> DevFSRootDirectoryInode::create_child(StringView name, mode_t mode, dev_t, UserID, GroupID)
KResultOr<NonnullRefPtr<Inode>> DevFSRootDirectoryInode::create_child(StringView name, mode_t mode, dev_t device_mode, UserID, GroupID)
{
MutexLocker locker(fs().m_lock);
@ -232,6 +212,15 @@ KResultOr<NonnullRefPtr<Inode>> DevFSRootDirectoryInode::create_child(StringView
m_nodes.append(*new_directory_inode);
return new_directory_inode;
}
if (metadata.is_device()) {
auto name_kstring = TRY(KString::try_create(name));
unsigned major = major_from_encoded_device(device_mode);
unsigned minor = minor_from_encoded_device(device_mode);
auto new_device_inode = TRY(adopt_nonnull_ref_or_enomem(new (nothrow) DevFSDeviceInode(fs(), major, minor, is_block_device(mode), move(name_kstring))));
TRY(new_device_inode->chmod(mode));
m_nodes.append(*new_device_inode);
return new_device_inode;
}
if (metadata.is_symlink()) {
auto name_kstring = TRY(KString::try_create(name));
auto new_link_inode = TRY(adopt_nonnull_ref_or_enomem(new (nothrow) DevFSLinkInode(fs(), move(name_kstring))));
@ -256,10 +245,12 @@ InodeMetadata DevFSRootDirectoryInode::metadata() const
return metadata;
}
DevFSDeviceInode::DevFSDeviceInode(DevFS& fs, Device const& device, NonnullOwnPtr<KString> name)
DevFSDeviceInode::DevFSDeviceInode(DevFS& fs, unsigned major_number, unsigned minor_number, bool block_device, NonnullOwnPtr<KString> name)
: DevFSInode(fs)
, m_attached_device(device)
, m_name(move(name))
, m_major_number(major_number)
, m_minor_number(minor_number)
, m_block_device(block_device)
{
}
@ -275,6 +266,16 @@ KResult DevFSDeviceInode::chown(UserID uid, GroupID gid)
return KSuccess;
}
KResult DevFSDeviceInode::chmod(mode_t mode)
{
MutexLocker locker(m_inode_lock);
mode &= 0777;
if (m_required_mode == mode)
return KSuccess;
m_required_mode = mode;
return KSuccess;
}
StringView DevFSDeviceInode::name() const
{
return m_name->view();
@ -284,12 +285,15 @@ KResultOr<size_t> DevFSDeviceInode::read_bytes(off_t offset, size_t count, UserO
{
MutexLocker locker(m_inode_lock);
VERIFY(!!description);
if (!m_attached_device->can_read(*description, offset))
return 0;
auto nread = const_cast<Device&>(*m_attached_device).read(*description, offset, buffer, count);
if (nread.is_error())
return EIO;
return nread.value();
RefPtr<Device> device = Device::get_device(m_major_number, m_minor_number);
if (!device)
return KResult(ENODEV);
if (!device->can_read(*description, offset))
return KResult(ENOTIMPL);
auto result = const_cast<Device&>(*device).read(*description, offset, buffer, count);
if (result.is_error())
return result;
return result.value();
}
InodeMetadata DevFSDeviceInode::metadata() const
@ -297,25 +301,28 @@ InodeMetadata DevFSDeviceInode::metadata() const
MutexLocker locker(m_inode_lock);
InodeMetadata metadata;
metadata.inode = { fsid(), index() };
metadata.mode = (m_attached_device->is_block_device() ? S_IFBLK : S_IFCHR) | m_attached_device->required_mode();
metadata.mode = (m_block_device ? S_IFBLK : S_IFCHR) | m_required_mode;
metadata.uid = m_uid;
metadata.gid = m_gid;
metadata.size = 0;
metadata.mtime = mepoch;
metadata.major_device = m_attached_device->major();
metadata.minor_device = m_attached_device->minor();
metadata.major_device = m_major_number;
metadata.minor_device = m_minor_number;
return metadata;
}
KResultOr<size_t> DevFSDeviceInode::write_bytes(off_t offset, size_t count, const UserOrKernelBuffer& buffer, OpenFileDescription* description)
{
MutexLocker locker(m_inode_lock);
VERIFY(!!description);
if (!m_attached_device->can_write(*description, offset))
return 0;
auto nread = const_cast<Device&>(*m_attached_device).write(*description, offset, buffer, count);
if (nread.is_error())
return EIO;
return nread.value();
RefPtr<Device> device = Device::get_device(m_major_number, m_minor_number);
if (!device)
return KResult(ENODEV);
if (!device->can_write(*description, offset))
return KResult(ENOTIMPL);
auto result = const_cast<Device&>(*device).write(*description, offset, buffer, count);
if (result.is_error())
return result;
return result.value();
}
DevFSPtsDirectoryInode::DevFSPtsDirectoryInode(DevFS& fs)

View file

@ -24,10 +24,6 @@ public:
virtual KResult initialize() override;
virtual StringView class_name() const override { return "DevFS"sv; }
void notify_new_device(Device&);
void notify_device_removal(Device&);
virtual Inode& root_inode() override;
private:
@ -76,16 +72,19 @@ public:
virtual ~DevFSDeviceInode() override;
private:
DevFSDeviceInode(DevFS&, Device const&, NonnullOwnPtr<KString> name);
DevFSDeviceInode(DevFS&, unsigned, unsigned, bool, NonnullOwnPtr<KString> name);
// ^Inode
virtual KResultOr<size_t> read_bytes(off_t, size_t, UserOrKernelBuffer& buffer, OpenFileDescription*) const override;
virtual InodeMetadata metadata() const override;
virtual KResultOr<size_t> write_bytes(off_t, size_t, const UserOrKernelBuffer& buffer, OpenFileDescription*) override;
virtual KResult chown(UserID, GroupID) override;
virtual KResult chmod(mode_t) override;
NonnullRefPtr<Device> m_attached_device;
NonnullOwnPtr<KString> m_name;
const unsigned m_major_number;
const unsigned m_minor_number;
const bool m_block_device;
mode_t m_required_mode;
UserID m_uid { 0 };
GroupID m_gid { 0 };
};

View file

@ -19,6 +19,8 @@ constexpr u32 encoded_device(unsigned major, unsigned minor)
{
return (minor & 0xff) | (major << 8) | ((minor & ~0xff) << 12);
}
static inline unsigned int major_from_encoded_device(dev_t dev) { return (dev & 0xfff00u) >> 8u; }
static inline unsigned int minor_from_encoded_device(dev_t dev) { return (dev & 0xffu) | ((dev >> 12u) & 0xfff00u); }
inline bool is_directory(mode_t mode) { return (mode & S_IFMT) == S_IFDIR; }
inline bool is_character_device(mode_t mode) { return (mode & S_IFMT) == S_IFCHR; }

View file

@ -71,6 +71,11 @@ KResult OpenFileDescription::attach()
return m_file->attach(*this);
}
void OpenFileDescription::set_original_custody(Badge<VirtualFileSystem>, Custody& custody)
{
m_custody = custody;
}
Thread::FileBlocker::BlockFlags OpenFileDescription::should_unblock(Thread::FileBlocker::BlockFlags block_flags) const
{
using BlockFlags = Thread::FileBlocker::BlockFlags;

View file

@ -119,6 +119,7 @@ public:
OwnPtr<OpenFileDescriptionData>& data() { return m_data; }
void set_original_inode(Badge<VirtualFileSystem>, NonnullRefPtr<Inode>&& inode) { m_inode = move(inode); }
void set_original_custody(Badge<VirtualFileSystem>, Custody& custody);
KResult truncate(u64);

View file

@ -276,6 +276,7 @@ KResultOr<NonnullRefPtr<OpenFileDescription>> VirtualFileSystem::open(StringView
}
auto description = TRY(device->open(options));
description->set_original_inode({}, inode);
description->set_original_custody({}, custody);
return description;
}

View file

@ -14,6 +14,7 @@
#include <LibCore/EventLoop.h>
#include <LibCore/File.h>
#include <errno.h>
#include <fcntl.h>
#include <grp.h>
#include <signal.h>
#include <stdio.h>
@ -102,6 +103,61 @@ static void chown_all_matching_device_nodes(group* group, unsigned major_number)
}
}
constexpr unsigned encoded_device(unsigned major, unsigned minor)
{
return (minor & 0xff) | (major << 8) | ((minor & ~0xff) << 12);
}
static void populate_devfs()
{
mode_t old_mask = umask(0);
if (auto rc = mknod("/dev/audio", 0220 | S_IFCHR, encoded_device(42, 42)); rc < 0)
VERIFY_NOT_REACHED();
if (auto rc = mknod("/dev/console", 0666 | S_IFCHR, encoded_device(5, 1)); rc < 0)
VERIFY_NOT_REACHED();
if (auto rc = mknod("/dev/fb0", 0666 | S_IFBLK, encoded_device(29, 0)); rc < 0)
VERIFY_NOT_REACHED();
if (auto rc = mknod("/dev/full", 0660 | S_IFCHR, encoded_device(29, 0)); rc < 0)
VERIFY_NOT_REACHED();
if (auto rc = mknod("/dev/hda", 0600 | S_IFBLK, encoded_device(3, 0)); rc < 0)
VERIFY_NOT_REACHED();
if (auto rc = mknod("/dev/hvc0p0", 0666 | S_IFCHR, encoded_device(229, 0)); rc < 0)
VERIFY_NOT_REACHED();
if (auto rc = mknod("/dev/hwrng", 0666 | S_IFCHR, encoded_device(10, 183)); rc < 0)
VERIFY_NOT_REACHED();
if (auto rc = mknod("/dev/keyboard0", 0660 | S_IFCHR, encoded_device(85, 0)); rc < 0)
VERIFY_NOT_REACHED();
if (auto rc = mknod("/dev/mem", 0660 | S_IFCHR, encoded_device(1, 1)); rc < 0)
VERIFY_NOT_REACHED();
if (auto rc = mknod("/dev/mouse0", 0660 | S_IFCHR, encoded_device(10, 0)); rc < 0)
VERIFY_NOT_REACHED();
if (auto rc = mknod("/dev/null", 0666 | S_IFCHR, encoded_device(1, 3)); rc < 0)
VERIFY_NOT_REACHED();
if (auto rc = mknod("/dev/ptmx", 0666 | S_IFCHR, encoded_device(5, 2)); rc < 0)
VERIFY_NOT_REACHED();
if (auto rc = mknod("/dev/random", 0666 | S_IFCHR, encoded_device(1, 8)); rc < 0)
VERIFY_NOT_REACHED();
if (auto rc = mknod("/dev/tty0", 0620 | S_IFCHR, encoded_device(4, 0)); rc < 0)
VERIFY_NOT_REACHED();
if (auto rc = mknod("/dev/tty1", 0620 | S_IFCHR, encoded_device(4, 1)); rc < 0)
VERIFY_NOT_REACHED();
if (auto rc = mknod("/dev/tty2", 0620 | S_IFCHR, encoded_device(4, 2)); rc < 0)
VERIFY_NOT_REACHED();
if (auto rc = mknod("/dev/tty3", 0620 | S_IFCHR, encoded_device(4, 3)); rc < 0)
VERIFY_NOT_REACHED();
if (auto rc = mknod("/dev/ttyS0", 0620 | S_IFCHR, encoded_device(4, 64)); rc < 0)
VERIFY_NOT_REACHED();
if (auto rc = mknod("/dev/ttyS1", 0620 | S_IFCHR, encoded_device(4, 65)); rc < 0)
VERIFY_NOT_REACHED();
if (auto rc = mknod("/dev/ttyS2", 0620 | S_IFCHR, encoded_device(4, 66)); rc < 0)
VERIFY_NOT_REACHED();
if (auto rc = mknod("/dev/ttyS3", 0666 | S_IFCHR, encoded_device(4, 67)); rc < 0)
VERIFY_NOT_REACHED();
if (auto rc = mknod("/dev/zero", 0666 | S_IFCHR, encoded_device(1, 5)); rc < 0)
VERIFY_NOT_REACHED();
umask(old_mask);
}
static void prepare_devfs()
{
// FIXME: Find a better way to all of this stuff, without hardcoding all of this!
@ -111,6 +167,8 @@ static void prepare_devfs()
VERIFY_NOT_REACHED();
}
populate_devfs();
rc = mount(-1, "/sys", "sys", 0);
if (rc != 0) {
VERIFY_NOT_REACHED();
@ -162,6 +220,32 @@ static void prepare_devfs()
VERIFY_NOT_REACHED();
}
// Note: We open the /dev/null device and set file descriptors 0, 1, 2 to it
// because otherwise these file descriptors won't have a custody, making
// the ProcFS file descriptor links (at /proc/PID/fd/{0,1,2}) to have an
// absolute path of "device:1,3" instead of something like "/dev/null".
// This affects also every other process that inherits the file descriptors
// from SystemServer, so it is important for other things (also for ProcFS
// tests that are running in CI mode).
int stdin_new_fd = open("/dev/null", O_NONBLOCK);
if (stdin_new_fd < 0) {
VERIFY_NOT_REACHED();
}
rc = dup2(stdin_new_fd, 0);
if (rc < 0) {
VERIFY_NOT_REACHED();
}
rc = dup2(stdin_new_fd, 1);
if (rc < 0) {
VERIFY_NOT_REACHED();
}
rc = dup2(stdin_new_fd, 2);
if (rc < 0) {
VERIFY_NOT_REACHED();
}
endgrent();
}