Inode.cpp 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446
  1. /*
  2. * Copyright (c) 2018-2021, Andreas Kling <kling@serenityos.org>
  3. * Copyright (c) 2021, sin-ack <sin-ack@protonmail.com>
  4. * Copyright (c) 2022, Idan Horowitz <idan.horowitz@serenityos.org>
  5. *
  6. * SPDX-License-Identifier: BSD-2-Clause
  7. */
  8. #include <AK/Singleton.h>
  9. #include <AK/StringView.h>
  10. #include <Kernel/API/InodeWatcherEvent.h>
  11. #include <Kernel/FileSystem/Custody.h>
  12. #include <Kernel/FileSystem/Inode.h>
  13. #include <Kernel/FileSystem/InodeWatcher.h>
  14. #include <Kernel/FileSystem/OpenFileDescription.h>
  15. #include <Kernel/FileSystem/VirtualFileSystem.h>
  16. #include <Kernel/KBufferBuilder.h>
  17. #include <Kernel/Memory/SharedInodeVMObject.h>
  18. #include <Kernel/Net/LocalSocket.h>
  19. #include <Kernel/Process.h>
  20. namespace Kernel {
  21. static Singleton<SpinlockProtected<Inode::AllInstancesList, LockRank::None>> s_all_instances;
  22. SpinlockProtected<Inode::AllInstancesList, LockRank::None>& Inode::all_instances()
  23. {
  24. return s_all_instances;
  25. }
  26. void Inode::sync_all()
  27. {
  28. Vector<NonnullRefPtr<Inode>, 32> inodes;
  29. Inode::all_instances().with([&](auto& all_inodes) {
  30. for (auto& inode : all_inodes) {
  31. if (inode.is_metadata_dirty())
  32. inodes.append(inode);
  33. }
  34. });
  35. for (auto& inode : inodes) {
  36. VERIFY(inode->is_metadata_dirty());
  37. (void)inode->flush_metadata();
  38. }
  39. }
  40. void Inode::sync()
  41. {
  42. if (is_metadata_dirty())
  43. (void)flush_metadata();
  44. fs().flush_writes();
  45. }
  46. ErrorOr<NonnullOwnPtr<KBuffer>> Inode::read_entire(OpenFileDescription* description) const
  47. {
  48. auto builder = TRY(KBufferBuilder::try_create());
  49. u8 buffer[4096];
  50. off_t offset = 0;
  51. for (;;) {
  52. auto buf = UserOrKernelBuffer::for_kernel_buffer(buffer);
  53. auto nread = TRY(read_bytes(offset, sizeof(buffer), buf, description));
  54. VERIFY(nread <= sizeof(buffer));
  55. if (nread == 0)
  56. break;
  57. TRY(builder.append((char const*)buffer, nread));
  58. offset += nread;
  59. if (nread < sizeof(buffer))
  60. break;
  61. }
  62. auto entire_file = builder.build();
  63. if (!entire_file)
  64. return ENOMEM;
  65. return entire_file.release_nonnull();
  66. }
  67. ErrorOr<NonnullRefPtr<Custody>> Inode::resolve_as_link(Credentials const& credentials, Custody& base, RefPtr<Custody>* out_parent, int options, int symlink_recursion_level) const
  68. {
  69. // The default implementation simply treats the stored
  70. // contents as a path and resolves that. That is, it
  71. // behaves exactly how you would expect a symlink to work.
  72. // Make sure that our assumptions about the path length hold up.
  73. // Note that this doesn't mean that the reported size can be trusted, some inodes just report zero.
  74. VERIFY(size() <= MAXPATHLEN);
  75. Array<u8, MAXPATHLEN> contents;
  76. auto read_bytes = TRY(read_until_filled_or_end(0, contents.size(), UserOrKernelBuffer::for_kernel_buffer(contents.data()), nullptr));
  77. return VirtualFileSystem::the().resolve_path(credentials, StringView { contents.span().trim(read_bytes) }, base, out_parent, options, symlink_recursion_level);
  78. }
  79. Inode::Inode(FileSystem& fs, InodeIndex index)
  80. : m_file_system(fs)
  81. , m_index(index)
  82. {
  83. Inode::all_instances().with([&](auto& all_inodes) { all_inodes.append(*this); });
  84. }
  85. Inode::~Inode()
  86. {
  87. m_watchers.for_each([&](auto& watcher) {
  88. watcher->unregister_by_inode({}, identifier());
  89. });
  90. }
  91. void Inode::will_be_destroyed()
  92. {
  93. MutexLocker locker(m_inode_lock);
  94. if (m_metadata_dirty)
  95. (void)flush_metadata();
  96. }
  97. ErrorOr<size_t> Inode::write_bytes(off_t offset, size_t length, UserOrKernelBuffer const& target_buffer, OpenFileDescription* open_description)
  98. {
  99. MutexLocker locker(m_inode_lock);
  100. TRY(prepare_to_write_data());
  101. return write_bytes_locked(offset, length, target_buffer, open_description);
  102. }
  103. ErrorOr<size_t> Inode::read_bytes(off_t offset, size_t length, UserOrKernelBuffer& buffer, OpenFileDescription* open_description) const
  104. {
  105. MutexLocker locker(m_inode_lock, Mutex::Mode::Shared);
  106. return read_bytes_locked(offset, length, buffer, open_description);
  107. }
  108. ErrorOr<size_t> Inode::read_until_filled_or_end(off_t offset, size_t length, UserOrKernelBuffer buffer, OpenFileDescription* open_description) const
  109. {
  110. auto remaining_length = length;
  111. while (remaining_length > 0) {
  112. auto filled_bytes = TRY(read_bytes(offset, remaining_length, buffer, open_description));
  113. if (filled_bytes == 0)
  114. break;
  115. offset += filled_bytes;
  116. remaining_length -= filled_bytes;
  117. }
  118. return length - remaining_length;
  119. }
  120. ErrorOr<void> Inode::update_timestamps([[maybe_unused]] Optional<Time> atime, [[maybe_unused]] Optional<Time> ctime, [[maybe_unused]] Optional<Time> mtime)
  121. {
  122. return ENOTIMPL;
  123. }
  124. ErrorOr<void> Inode::increment_link_count()
  125. {
  126. return ENOTIMPL;
  127. }
  128. ErrorOr<void> Inode::decrement_link_count()
  129. {
  130. return ENOTIMPL;
  131. }
  132. ErrorOr<void> Inode::set_shared_vmobject(Memory::SharedInodeVMObject& vmobject)
  133. {
  134. MutexLocker locker(m_inode_lock);
  135. m_shared_vmobject = TRY(vmobject.try_make_weak_ptr<Memory::SharedInodeVMObject>());
  136. return {};
  137. }
  138. LockRefPtr<LocalSocket> Inode::bound_socket() const
  139. {
  140. return m_bound_socket.strong_ref();
  141. }
  142. bool Inode::bind_socket(LocalSocket& socket)
  143. {
  144. MutexLocker locker(m_inode_lock);
  145. if (m_bound_socket)
  146. return false;
  147. m_bound_socket = socket;
  148. return true;
  149. }
  150. bool Inode::unbind_socket()
  151. {
  152. MutexLocker locker(m_inode_lock);
  153. if (!m_bound_socket)
  154. return false;
  155. m_bound_socket = nullptr;
  156. return true;
  157. }
  158. ErrorOr<void> Inode::register_watcher(Badge<InodeWatcher>, InodeWatcher& watcher)
  159. {
  160. return m_watchers.with([&](auto& watchers) -> ErrorOr<void> {
  161. VERIFY(!watchers.contains(&watcher));
  162. TRY(watchers.try_set(&watcher));
  163. return {};
  164. });
  165. }
  166. void Inode::unregister_watcher(Badge<InodeWatcher>, InodeWatcher& watcher)
  167. {
  168. m_watchers.with([&](auto& watchers) {
  169. VERIFY(watchers.contains(&watcher));
  170. watchers.remove(&watcher);
  171. });
  172. }
  173. ErrorOr<NonnullRefPtr<FIFO>> Inode::fifo()
  174. {
  175. MutexLocker locker(m_inode_lock);
  176. VERIFY(metadata().is_fifo());
  177. // FIXME: Release m_fifo when it is closed by all readers and writers
  178. if (!m_fifo)
  179. m_fifo = TRY(FIFO::try_create(metadata().uid));
  180. return NonnullRefPtr { *m_fifo };
  181. }
  182. void Inode::set_metadata_dirty(bool metadata_dirty)
  183. {
  184. MutexLocker locker(m_inode_lock);
  185. if (metadata_dirty) {
  186. // Sanity check.
  187. VERIFY(!fs().is_readonly());
  188. }
  189. if (m_metadata_dirty == metadata_dirty)
  190. return;
  191. m_metadata_dirty = metadata_dirty;
  192. if (m_metadata_dirty) {
  193. // FIXME: Maybe we should hook into modification events somewhere else, I'm not sure where.
  194. // We don't always end up on this particular code path, for instance when writing to an ext2fs file.
  195. m_watchers.for_each([&](auto& watcher) {
  196. watcher->notify_inode_event({}, identifier(), InodeWatcherEvent::Type::MetadataModified);
  197. });
  198. }
  199. }
  200. void Inode::did_add_child(InodeIdentifier, StringView name)
  201. {
  202. m_watchers.for_each([&](auto& watcher) {
  203. watcher->notify_inode_event({}, identifier(), InodeWatcherEvent::Type::ChildCreated, name);
  204. });
  205. }
  206. void Inode::did_remove_child(InodeIdentifier, StringView name)
  207. {
  208. if (name == "." || name == "..") {
  209. // These are just aliases and are not interesting to userspace.
  210. return;
  211. }
  212. m_watchers.for_each([&](auto& watcher) {
  213. watcher->notify_inode_event({}, identifier(), InodeWatcherEvent::Type::ChildDeleted, name);
  214. });
  215. }
  216. void Inode::did_modify_contents()
  217. {
  218. // FIXME: What happens if this fails?
  219. // ENOTIMPL would be a meaningless error to return here
  220. auto now = kgettimeofday();
  221. (void)update_timestamps({}, now, now);
  222. m_watchers.for_each([&](auto& watcher) {
  223. watcher->notify_inode_event({}, identifier(), InodeWatcherEvent::Type::ContentModified);
  224. });
  225. }
  226. void Inode::did_delete_self()
  227. {
  228. m_watchers.for_each([&](auto& watcher) {
  229. watcher->notify_inode_event({}, identifier(), InodeWatcherEvent::Type::Deleted);
  230. });
  231. }
  232. ErrorOr<void> Inode::prepare_to_write_data()
  233. {
  234. VERIFY(m_inode_lock.is_locked());
  235. if (fs().is_readonly())
  236. return EROFS;
  237. auto metadata = this->metadata();
  238. if (metadata.is_setuid() || metadata.is_setgid()) {
  239. dbgln("Inode::prepare_to_write_data(): Stripping SUID/SGID bits from {}", identifier());
  240. return chmod(metadata.mode & ~(04000 | 02000));
  241. }
  242. return {};
  243. }
  244. LockRefPtr<Memory::SharedInodeVMObject> Inode::shared_vmobject() const
  245. {
  246. MutexLocker locker(m_inode_lock);
  247. return m_shared_vmobject.strong_ref();
  248. }
  249. template<typename T>
  250. static inline bool range_overlap(T start1, T len1, T start2, T len2)
  251. {
  252. return ((start1 < start2 + len2) || len2 == 0) && ((start2 < start1 + len1) || len1 == 0);
  253. }
  254. static inline ErrorOr<void> normalize_flock(OpenFileDescription const& description, flock& lock)
  255. {
  256. off_t start;
  257. switch (lock.l_whence) {
  258. case SEEK_SET:
  259. start = lock.l_start;
  260. break;
  261. case SEEK_CUR:
  262. start = description.offset() + lock.l_start;
  263. break;
  264. case SEEK_END:
  265. // FIXME: Implement SEEK_END and negative lengths.
  266. return ENOTSUP;
  267. default:
  268. return EINVAL;
  269. }
  270. lock = { lock.l_type, SEEK_SET, start, lock.l_len, 0 };
  271. return {};
  272. }
  273. bool Inode::can_apply_flock(flock const& new_lock, Optional<OpenFileDescription const&> description) const
  274. {
  275. VERIFY(new_lock.l_whence == SEEK_SET);
  276. if (new_lock.l_type == F_UNLCK)
  277. return true;
  278. return m_flocks.with([&](auto& flocks) {
  279. for (auto const& lock : flocks) {
  280. if (!range_overlap(lock.start, lock.len, new_lock.l_start, new_lock.l_len))
  281. continue;
  282. // There are two cases where we can attempt downgrade:
  283. //
  284. // 1) We're the owner of this lock. The downgrade will immediately
  285. // succeed.
  286. // 2) We're not the owner of this lock. Our downgrade attempt will
  287. // fail, and the thread will start blocking on an FlockBlocker.
  288. //
  289. // For the first case, we get the description from try_apply_flock
  290. // below. For the second case, the check below would always be
  291. // false, so there is no need to store the description in the
  292. // blocker in the first place.
  293. if (new_lock.l_type == F_RDLCK && lock.type == F_WRLCK)
  294. return description.has_value() && lock.owner == &description.value() && lock.start == new_lock.l_start && lock.len == new_lock.l_len;
  295. if (new_lock.l_type == F_WRLCK)
  296. return false;
  297. }
  298. return true;
  299. });
  300. }
  301. ErrorOr<bool> Inode::try_apply_flock(Process const& process, OpenFileDescription const& description, flock const& new_lock)
  302. {
  303. return m_flocks.with([&](auto& flocks) -> ErrorOr<bool> {
  304. if (!can_apply_flock(new_lock, description))
  305. return false;
  306. bool did_manipulate_lock = false;
  307. for (size_t i = 0; i < flocks.size(); ++i) {
  308. auto const& lock = flocks[i];
  309. bool is_potential_downgrade = new_lock.l_type == F_RDLCK && lock.type == F_WRLCK;
  310. bool is_potential_unlock = new_lock.l_type == F_UNLCK;
  311. bool is_lock_owner = &description == lock.owner;
  312. bool lock_range_exactly_matches = lock.start == new_lock.l_start && lock.len == new_lock.l_len;
  313. bool can_manage_this_lock = is_lock_owner && lock_range_exactly_matches;
  314. if ((is_potential_downgrade || is_potential_unlock) && can_manage_this_lock) {
  315. flocks.remove(i);
  316. did_manipulate_lock = true;
  317. break;
  318. }
  319. }
  320. if (new_lock.l_type != F_UNLCK)
  321. TRY(flocks.try_append(Flock { new_lock.l_start, new_lock.l_len, &description, process.pid().value(), new_lock.l_type }));
  322. if (did_manipulate_lock)
  323. m_flock_blocker_set.unblock_all_blockers_whose_conditions_are_met();
  324. // Judging by the Linux implementation, unlocking a non-existent lock
  325. // also works.
  326. return true;
  327. });
  328. }
  329. ErrorOr<void> Inode::apply_flock(Process const& process, OpenFileDescription const& description, Userspace<flock const*> input_lock, ShouldBlock should_block)
  330. {
  331. auto new_lock = TRY(copy_typed_from_user(input_lock));
  332. TRY(normalize_flock(description, new_lock));
  333. while (true) {
  334. auto success = TRY(try_apply_flock(process, description, new_lock));
  335. if (success)
  336. return {};
  337. if (should_block == ShouldBlock::No)
  338. return EAGAIN;
  339. if (Thread::current()->block<Thread::FlockBlocker>({}, *this, new_lock).was_interrupted())
  340. return EINTR;
  341. }
  342. }
  343. ErrorOr<void> Inode::get_flock(OpenFileDescription const& description, Userspace<flock*> reference_lock) const
  344. {
  345. flock lookup = {};
  346. TRY(copy_from_user(&lookup, reference_lock));
  347. TRY(normalize_flock(description, lookup));
  348. return m_flocks.with([&](auto& flocks) {
  349. for (auto const& lock : flocks) {
  350. if (!range_overlap(lock.start, lock.len, lookup.l_start, lookup.l_len))
  351. continue;
  352. // Locks with the same owner can't conflict with each other.
  353. if (lock.pid == Process::current().pid())
  354. continue;
  355. if ((lookup.l_type == F_RDLCK && lock.type == F_WRLCK) || lookup.l_type == F_WRLCK) {
  356. lookup = { lock.type, SEEK_SET, lock.start, lock.len, lock.pid };
  357. return copy_to_user(reference_lock, &lookup);
  358. }
  359. }
  360. lookup.l_type = F_UNLCK;
  361. return copy_to_user(reference_lock, &lookup);
  362. });
  363. }
  364. void Inode::remove_flocks_for_description(OpenFileDescription const& description)
  365. {
  366. m_flocks.with([&](auto& flocks) {
  367. flocks.remove_all_matching([&](auto& entry) { return entry.owner == &description; });
  368. });
  369. }
  370. bool Inode::has_watchers() const
  371. {
  372. return !m_watchers.with([&](auto& watchers) { return watchers.is_empty(); });
  373. }
  374. }