TCPSocket.cpp 24 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662
  1. /*
  2. * Copyright (c) 2018-2020, Andreas Kling <kling@serenityos.org>
  3. *
  4. * SPDX-License-Identifier: BSD-2-Clause
  5. */
  6. #include <AK/Singleton.h>
  7. #include <AK/Time.h>
  8. #include <Kernel/Debug.h>
  9. #include <Kernel/Devices/RandomDevice.h>
  10. #include <Kernel/FileSystem/OpenFileDescription.h>
  11. #include <Kernel/Locking/MutexProtected.h>
  12. #include <Kernel/Net/EthernetFrameHeader.h>
  13. #include <Kernel/Net/IPv4.h>
  14. #include <Kernel/Net/NetworkAdapter.h>
  15. #include <Kernel/Net/NetworkingManagement.h>
  16. #include <Kernel/Net/Routing.h>
  17. #include <Kernel/Net/TCP.h>
  18. #include <Kernel/Net/TCPSocket.h>
  19. #include <Kernel/Process.h>
  20. #include <Kernel/Random.h>
  21. namespace Kernel {
  22. void TCPSocket::for_each(Function<void(TCPSocket const&)> callback)
  23. {
  24. sockets_by_tuple().for_each_shared([&](auto const& it) {
  25. callback(*it.value);
  26. });
  27. }
  28. ErrorOr<void> TCPSocket::try_for_each(Function<ErrorOr<void>(TCPSocket const&)> callback)
  29. {
  30. return sockets_by_tuple().with_shared([&](auto const& sockets) -> ErrorOr<void> {
  31. for (auto& it : sockets)
  32. TRY(callback(*it.value));
  33. return {};
  34. });
  35. }
  36. bool TCPSocket::unref() const
  37. {
  38. bool did_hit_zero = sockets_by_tuple().with_exclusive([&](auto& table) {
  39. if (deref_base())
  40. return false;
  41. table.remove(tuple());
  42. const_cast<TCPSocket&>(*this).revoke_weak_ptrs();
  43. return true;
  44. });
  45. if (did_hit_zero) {
  46. const_cast<TCPSocket&>(*this).will_be_destroyed();
  47. delete this;
  48. }
  49. return did_hit_zero;
  50. }
  51. void TCPSocket::set_state(State new_state)
  52. {
  53. dbgln_if(TCP_SOCKET_DEBUG, "TCPSocket({}) state moving from {} to {}", this, to_string(m_state), to_string(new_state));
  54. auto was_disconnected = protocol_is_disconnected();
  55. auto previous_role = m_role;
  56. m_state = new_state;
  57. if (new_state == State::Established && m_direction == Direction::Outgoing) {
  58. set_role(Role::Connected);
  59. clear_so_error();
  60. }
  61. if (new_state == State::TimeWait) {
  62. // Once we hit TimeWait, we are only holding the socket in case there
  63. // are packets on the way which we wouldn't want a new socket to get hit
  64. // with, so there's no point in keeping the receive buffer around.
  65. drop_receive_buffer();
  66. }
  67. if (new_state == State::Closed) {
  68. closing_sockets().with_exclusive([&](auto& table) {
  69. table.remove(tuple());
  70. });
  71. if (m_originator)
  72. release_to_originator();
  73. }
  74. if (previous_role != m_role || was_disconnected != protocol_is_disconnected())
  75. evaluate_block_conditions();
  76. }
  77. static Singleton<MutexProtected<HashMap<IPv4SocketTuple, RefPtr<TCPSocket>>>> s_socket_closing;
  78. MutexProtected<HashMap<IPv4SocketTuple, RefPtr<TCPSocket>>>& TCPSocket::closing_sockets()
  79. {
  80. return *s_socket_closing;
  81. }
  82. static Singleton<MutexProtected<HashMap<IPv4SocketTuple, TCPSocket*>>> s_socket_tuples;
  83. MutexProtected<HashMap<IPv4SocketTuple, TCPSocket*>>& TCPSocket::sockets_by_tuple()
  84. {
  85. return *s_socket_tuples;
  86. }
  87. RefPtr<TCPSocket> TCPSocket::from_tuple(IPv4SocketTuple const& tuple)
  88. {
  89. return sockets_by_tuple().with_shared([&](auto const& table) -> RefPtr<TCPSocket> {
  90. auto exact_match = table.get(tuple);
  91. if (exact_match.has_value())
  92. return { *exact_match.value() };
  93. auto address_tuple = IPv4SocketTuple(tuple.local_address(), tuple.local_port(), IPv4Address(), 0);
  94. auto address_match = table.get(address_tuple);
  95. if (address_match.has_value())
  96. return { *address_match.value() };
  97. auto wildcard_tuple = IPv4SocketTuple(IPv4Address(), tuple.local_port(), IPv4Address(), 0);
  98. auto wildcard_match = table.get(wildcard_tuple);
  99. if (wildcard_match.has_value())
  100. return { *wildcard_match.value() };
  101. return {};
  102. });
  103. }
  104. ErrorOr<NonnullRefPtr<TCPSocket>> TCPSocket::try_create_client(IPv4Address const& new_local_address, u16 new_local_port, IPv4Address const& new_peer_address, u16 new_peer_port)
  105. {
  106. auto tuple = IPv4SocketTuple(new_local_address, new_local_port, new_peer_address, new_peer_port);
  107. return sockets_by_tuple().with_exclusive([&](auto& table) -> ErrorOr<NonnullRefPtr<TCPSocket>> {
  108. if (table.contains(tuple))
  109. return EEXIST;
  110. auto receive_buffer = TRY(try_create_receive_buffer());
  111. auto client = TRY(TCPSocket::try_create(protocol(), move(receive_buffer)));
  112. client->set_setup_state(SetupState::InProgress);
  113. client->set_local_address(new_local_address);
  114. client->set_local_port(new_local_port);
  115. client->set_peer_address(new_peer_address);
  116. client->set_peer_port(new_peer_port);
  117. client->set_direction(Direction::Incoming);
  118. client->set_originator(*this);
  119. m_pending_release_for_accept.set(tuple, client);
  120. table.set(tuple, client);
  121. return { move(client) };
  122. });
  123. }
  124. void TCPSocket::release_to_originator()
  125. {
  126. VERIFY(!!m_originator);
  127. m_originator.strong_ref()->release_for_accept(*this);
  128. m_originator.clear();
  129. }
  130. void TCPSocket::release_for_accept(NonnullRefPtr<TCPSocket> socket)
  131. {
  132. VERIFY(m_pending_release_for_accept.contains(socket->tuple()));
  133. m_pending_release_for_accept.remove(socket->tuple());
  134. // FIXME: Should we observe this error somehow?
  135. [[maybe_unused]] auto rc = queue_connection_from(move(socket));
  136. }
  137. TCPSocket::TCPSocket(int protocol, NonnullOwnPtr<DoubleBuffer> receive_buffer, NonnullOwnPtr<KBuffer> scratch_buffer)
  138. : IPv4Socket(SOCK_STREAM, protocol, move(receive_buffer), move(scratch_buffer))
  139. {
  140. m_last_retransmit_time = kgettimeofday();
  141. }
  142. TCPSocket::~TCPSocket()
  143. {
  144. dequeue_for_retransmit();
  145. dbgln_if(TCP_SOCKET_DEBUG, "~TCPSocket in state {}", to_string(state()));
  146. }
  147. ErrorOr<NonnullRefPtr<TCPSocket>> TCPSocket::try_create(int protocol, NonnullOwnPtr<DoubleBuffer> receive_buffer)
  148. {
  149. // Note: Scratch buffer is only used for SOCK_STREAM sockets.
  150. auto scratch_buffer = TRY(KBuffer::try_create_with_size("TCPSocket: Scratch buffer"sv, 65536));
  151. return adopt_nonnull_ref_or_enomem(new (nothrow) TCPSocket(protocol, move(receive_buffer), move(scratch_buffer)));
  152. }
  153. ErrorOr<size_t> TCPSocket::protocol_size(ReadonlyBytes raw_ipv4_packet)
  154. {
  155. auto& ipv4_packet = *reinterpret_cast<IPv4Packet const*>(raw_ipv4_packet.data());
  156. auto& tcp_packet = *static_cast<TCPPacket const*>(ipv4_packet.payload());
  157. return raw_ipv4_packet.size() - sizeof(IPv4Packet) - tcp_packet.header_size();
  158. }
  159. ErrorOr<size_t> TCPSocket::protocol_receive(ReadonlyBytes raw_ipv4_packet, UserOrKernelBuffer& buffer, size_t buffer_size, [[maybe_unused]] int flags)
  160. {
  161. auto& ipv4_packet = *reinterpret_cast<IPv4Packet const*>(raw_ipv4_packet.data());
  162. auto& tcp_packet = *static_cast<TCPPacket const*>(ipv4_packet.payload());
  163. size_t payload_size = raw_ipv4_packet.size() - sizeof(IPv4Packet) - tcp_packet.header_size();
  164. dbgln_if(TCP_SOCKET_DEBUG, "payload_size {}, will it fit in {}?", payload_size, buffer_size);
  165. VERIFY(buffer_size >= payload_size);
  166. SOCKET_TRY(buffer.write(tcp_packet.payload(), payload_size));
  167. return payload_size;
  168. }
  169. ErrorOr<size_t> TCPSocket::protocol_send(UserOrKernelBuffer const& data, size_t data_length)
  170. {
  171. auto adapter = bound_interface().with([](auto& bound_device) -> RefPtr<NetworkAdapter> { return bound_device; });
  172. RoutingDecision routing_decision = route_to(peer_address(), local_address(), adapter);
  173. if (routing_decision.is_zero())
  174. return set_so_error(EHOSTUNREACH);
  175. size_t mss = routing_decision.adapter->mtu() - sizeof(IPv4Packet) - sizeof(TCPPacket);
  176. data_length = min(data_length, mss);
  177. TRY(send_tcp_packet(TCPFlags::PSH | TCPFlags::ACK, &data, data_length, &routing_decision));
  178. return data_length;
  179. }
  180. ErrorOr<void> TCPSocket::send_ack(bool allow_duplicate)
  181. {
  182. if (!allow_duplicate && m_last_ack_number_sent == m_ack_number)
  183. return {};
  184. return send_tcp_packet(TCPFlags::ACK);
  185. }
  186. ErrorOr<void> TCPSocket::send_tcp_packet(u16 flags, UserOrKernelBuffer const* payload, size_t payload_size, RoutingDecision* user_routing_decision)
  187. {
  188. auto adapter = bound_interface().with([](auto& bound_device) -> RefPtr<NetworkAdapter> { return bound_device; });
  189. RoutingDecision routing_decision = user_routing_decision ? *user_routing_decision : route_to(peer_address(), local_address(), adapter);
  190. if (routing_decision.is_zero())
  191. return set_so_error(EHOSTUNREACH);
  192. auto ipv4_payload_offset = routing_decision.adapter->ipv4_payload_offset();
  193. bool const has_mss_option = flags == TCPFlags::SYN;
  194. const size_t options_size = has_mss_option ? sizeof(TCPOptionMSS) : 0;
  195. const size_t tcp_header_size = sizeof(TCPPacket) + options_size;
  196. const size_t buffer_size = ipv4_payload_offset + tcp_header_size + payload_size;
  197. auto packet = routing_decision.adapter->acquire_packet_buffer(buffer_size);
  198. if (!packet)
  199. return set_so_error(ENOMEM);
  200. routing_decision.adapter->fill_in_ipv4_header(*packet, local_address(),
  201. routing_decision.next_hop, peer_address(), IPv4Protocol::TCP,
  202. buffer_size - ipv4_payload_offset, type_of_service(), ttl());
  203. memset(packet->buffer->data() + ipv4_payload_offset, 0, sizeof(TCPPacket));
  204. auto& tcp_packet = *(TCPPacket*)(packet->buffer->data() + ipv4_payload_offset);
  205. VERIFY(local_port());
  206. tcp_packet.set_source_port(local_port());
  207. tcp_packet.set_destination_port(peer_port());
  208. tcp_packet.set_window_size(NumericLimits<u16>::max());
  209. tcp_packet.set_sequence_number(m_sequence_number);
  210. tcp_packet.set_data_offset(tcp_header_size / sizeof(u32));
  211. tcp_packet.set_flags(flags);
  212. if (payload) {
  213. if (auto result = payload->read(tcp_packet.payload(), payload_size); result.is_error()) {
  214. routing_decision.adapter->release_packet_buffer(*packet);
  215. return set_so_error(result.release_error());
  216. }
  217. }
  218. if (flags & TCPFlags::ACK) {
  219. m_last_ack_number_sent = m_ack_number;
  220. m_last_ack_sent_time = kgettimeofday();
  221. tcp_packet.set_ack_number(m_ack_number);
  222. }
  223. if (flags & TCPFlags::SYN) {
  224. ++m_sequence_number;
  225. } else {
  226. m_sequence_number += payload_size;
  227. }
  228. if (has_mss_option) {
  229. u16 mss = routing_decision.adapter->mtu() - sizeof(IPv4Packet) - sizeof(TCPPacket);
  230. TCPOptionMSS mss_option { mss };
  231. VERIFY(packet->buffer->size() >= ipv4_payload_offset + sizeof(TCPPacket) + sizeof(mss_option));
  232. memcpy(packet->buffer->data() + ipv4_payload_offset + sizeof(TCPPacket), &mss_option, sizeof(mss_option));
  233. }
  234. tcp_packet.set_checksum(compute_tcp_checksum(local_address(), peer_address(), tcp_packet, payload_size));
  235. bool expect_ack { tcp_packet.has_syn() || payload_size > 0 };
  236. if (expect_ack) {
  237. bool append_failed { false };
  238. m_unacked_packets.with_exclusive([&](auto& unacked_packets) {
  239. auto result = unacked_packets.packets.try_append({ m_sequence_number, packet, ipv4_payload_offset, *routing_decision.adapter });
  240. if (result.is_error()) {
  241. dbgln("TCPSocket: Dropped outbound packet because try_append() failed");
  242. append_failed = true;
  243. return;
  244. }
  245. unacked_packets.size += payload_size;
  246. enqueue_for_retransmit();
  247. });
  248. if (append_failed)
  249. return set_so_error(ENOMEM);
  250. }
  251. m_packets_out++;
  252. m_bytes_out += buffer_size;
  253. routing_decision.adapter->send_packet(packet->bytes());
  254. if (!expect_ack)
  255. routing_decision.adapter->release_packet_buffer(*packet);
  256. return {};
  257. }
  258. void TCPSocket::receive_tcp_packet(TCPPacket const& packet, u16 size)
  259. {
  260. if (packet.has_ack()) {
  261. u32 ack_number = packet.ack_number();
  262. dbgln_if(TCP_SOCKET_DEBUG, "TCPSocket: receive_tcp_packet: {}", ack_number);
  263. int removed = 0;
  264. m_unacked_packets.with_exclusive([&](auto& unacked_packets) {
  265. while (!unacked_packets.packets.is_empty()) {
  266. auto& packet = unacked_packets.packets.first();
  267. dbgln_if(TCP_SOCKET_DEBUG, "TCPSocket: iterate: {}", packet.ack_number);
  268. if (packet.ack_number <= ack_number) {
  269. auto old_adapter = packet.adapter.strong_ref();
  270. if (old_adapter)
  271. old_adapter->release_packet_buffer(*packet.buffer);
  272. TCPPacket& tcp_packet = *(TCPPacket*)(packet.buffer->buffer->data() + packet.ipv4_payload_offset);
  273. auto payload_size = packet.buffer->buffer->data() + packet.buffer->buffer->size() - (u8*)tcp_packet.payload();
  274. unacked_packets.size -= payload_size;
  275. evaluate_block_conditions();
  276. unacked_packets.packets.take_first();
  277. removed++;
  278. } else {
  279. break;
  280. }
  281. }
  282. if (unacked_packets.packets.is_empty()) {
  283. m_retransmit_attempts = 0;
  284. dequeue_for_retransmit();
  285. }
  286. dbgln_if(TCP_SOCKET_DEBUG, "TCPSocket: receive_tcp_packet acknowledged {} packets", removed);
  287. });
  288. }
  289. m_packets_in++;
  290. m_bytes_in += packet.header_size() + size;
  291. }
  292. bool TCPSocket::should_delay_next_ack() const
  293. {
  294. // FIXME: We don't know the MSS here so make a reasonable guess.
  295. const size_t mss = 1500;
  296. // RFC 1122 says we should send an ACK for every two full-sized segments.
  297. if (m_ack_number >= m_last_ack_number_sent + 2 * mss)
  298. return false;
  299. // RFC 1122 says we should not delay ACKs for more than 500 milliseconds.
  300. if (kgettimeofday() >= m_last_ack_sent_time + Time::from_milliseconds(500))
  301. return false;
  302. return true;
  303. }
  304. NetworkOrdered<u16> TCPSocket::compute_tcp_checksum(IPv4Address const& source, IPv4Address const& destination, TCPPacket const& packet, u16 payload_size)
  305. {
  306. union PseudoHeader {
  307. struct [[gnu::packed]] {
  308. IPv4Address source;
  309. IPv4Address destination;
  310. u8 zero;
  311. u8 protocol;
  312. NetworkOrdered<u16> payload_size;
  313. } header;
  314. u16 raw[6];
  315. };
  316. static_assert(sizeof(PseudoHeader) == 12);
  317. Checked<u16> packet_size = packet.header_size();
  318. packet_size += payload_size;
  319. VERIFY(!packet_size.has_overflow());
  320. PseudoHeader pseudo_header { .header = { source, destination, 0, (u8)IPv4Protocol::TCP, packet_size.value() } };
  321. u32 checksum = 0;
  322. auto* raw_pseudo_header = pseudo_header.raw;
  323. for (size_t i = 0; i < sizeof(pseudo_header) / sizeof(u16); ++i) {
  324. checksum += AK::convert_between_host_and_network_endian(raw_pseudo_header[i]);
  325. if (checksum > 0xffff)
  326. checksum = (checksum >> 16) + (checksum & 0xffff);
  327. }
  328. auto* raw_packet = bit_cast<u16*>(&packet);
  329. for (size_t i = 0; i < packet.header_size() / sizeof(u16); ++i) {
  330. checksum += AK::convert_between_host_and_network_endian(raw_packet[i]);
  331. if (checksum > 0xffff)
  332. checksum = (checksum >> 16) + (checksum & 0xffff);
  333. }
  334. VERIFY(packet.data_offset() * 4 == packet.header_size());
  335. auto* raw_payload = bit_cast<u16*>(packet.payload());
  336. for (size_t i = 0; i < payload_size / sizeof(u16); ++i) {
  337. checksum += AK::convert_between_host_and_network_endian(raw_payload[i]);
  338. if (checksum > 0xffff)
  339. checksum = (checksum >> 16) + (checksum & 0xffff);
  340. }
  341. if (payload_size & 1) {
  342. u16 expanded_byte = ((u8 const*)packet.payload())[payload_size - 1] << 8;
  343. checksum += expanded_byte;
  344. if (checksum > 0xffff)
  345. checksum = (checksum >> 16) + (checksum & 0xffff);
  346. }
  347. return ~(checksum & 0xffff);
  348. }
  349. ErrorOr<void> TCPSocket::protocol_bind()
  350. {
  351. return m_adapter.with([this](auto& adapter) -> ErrorOr<void> {
  352. if (has_specific_local_address() && !adapter) {
  353. adapter = NetworkingManagement::the().from_ipv4_address(local_address());
  354. if (!adapter)
  355. return set_so_error(EADDRNOTAVAIL);
  356. }
  357. return {};
  358. });
  359. }
  360. ErrorOr<void> TCPSocket::protocol_listen(bool did_allocate_port)
  361. {
  362. if (!did_allocate_port) {
  363. bool ok = sockets_by_tuple().with_exclusive([&](auto& table) -> bool {
  364. if (table.contains(tuple()))
  365. return false;
  366. table.set(tuple(), this);
  367. return true;
  368. });
  369. if (!ok)
  370. return set_so_error(EADDRINUSE);
  371. }
  372. set_direction(Direction::Passive);
  373. set_state(State::Listen);
  374. set_setup_state(SetupState::Completed);
  375. return {};
  376. }
  377. ErrorOr<void> TCPSocket::protocol_connect(OpenFileDescription& description)
  378. {
  379. MutexLocker locker(mutex());
  380. auto routing_decision = route_to(peer_address(), local_address());
  381. if (routing_decision.is_zero())
  382. return set_so_error(EHOSTUNREACH);
  383. if (!has_specific_local_address())
  384. set_local_address(routing_decision.adapter->ipv4_address());
  385. if (auto result = allocate_local_port_if_needed(); result.error_or_port.is_error())
  386. return result.error_or_port.release_error();
  387. m_sequence_number = get_good_random<u32>();
  388. m_ack_number = 0;
  389. set_setup_state(SetupState::InProgress);
  390. TRY(send_tcp_packet(TCPFlags::SYN));
  391. m_state = State::SynSent;
  392. set_role(Role::Connecting);
  393. m_direction = Direction::Outgoing;
  394. evaluate_block_conditions();
  395. if (description.is_blocking()) {
  396. locker.unlock();
  397. auto unblock_flags = Thread::FileBlocker::BlockFlags::None;
  398. if (Thread::current()->block<Thread::ConnectBlocker>({}, description, unblock_flags).was_interrupted())
  399. return set_so_error(EINTR);
  400. locker.lock();
  401. VERIFY(setup_state() == SetupState::Completed);
  402. if (has_error()) { // TODO: check unblock_flags
  403. set_role(Role::None);
  404. if (error() == TCPSocket::Error::RetransmitTimeout)
  405. return set_so_error(ETIMEDOUT);
  406. else
  407. return set_so_error(ECONNREFUSED);
  408. }
  409. return {};
  410. }
  411. return set_so_error(EINPROGRESS);
  412. }
  413. ErrorOr<u16> TCPSocket::protocol_allocate_local_port()
  414. {
  415. constexpr u16 first_ephemeral_port = 32768;
  416. constexpr u16 last_ephemeral_port = 60999;
  417. constexpr u16 ephemeral_port_range_size = last_ephemeral_port - first_ephemeral_port;
  418. u16 first_scan_port = first_ephemeral_port + get_good_random<u16>() % ephemeral_port_range_size;
  419. return sockets_by_tuple().with_exclusive([&](auto& table) -> ErrorOr<u16> {
  420. for (u16 port = first_scan_port;;) {
  421. IPv4SocketTuple proposed_tuple(local_address(), port, peer_address(), peer_port());
  422. auto it = table.find(proposed_tuple);
  423. if (it == table.end()) {
  424. set_local_port(port);
  425. table.set(proposed_tuple, this);
  426. return port;
  427. }
  428. ++port;
  429. if (port > last_ephemeral_port)
  430. port = first_ephemeral_port;
  431. if (port == first_scan_port)
  432. break;
  433. }
  434. return set_so_error(EADDRINUSE);
  435. });
  436. }
  437. bool TCPSocket::protocol_is_disconnected() const
  438. {
  439. switch (m_state) {
  440. case State::Closed:
  441. case State::CloseWait:
  442. case State::LastAck:
  443. case State::FinWait1:
  444. case State::FinWait2:
  445. case State::Closing:
  446. case State::TimeWait:
  447. return true;
  448. default:
  449. return false;
  450. }
  451. }
  452. void TCPSocket::shut_down_for_writing()
  453. {
  454. if (state() == State::Established) {
  455. dbgln_if(TCP_SOCKET_DEBUG, " Sending FIN from Established and moving into FinWait1");
  456. (void)send_tcp_packet(TCPFlags::FIN);
  457. set_state(State::FinWait1);
  458. } else {
  459. dbgln(" Shutting down TCPSocket for writing but not moving to FinWait1 since state is {}", to_string(state()));
  460. }
  461. }
  462. ErrorOr<void> TCPSocket::close()
  463. {
  464. MutexLocker locker(mutex());
  465. auto result = IPv4Socket::close();
  466. if (state() == State::CloseWait) {
  467. dbgln_if(TCP_SOCKET_DEBUG, " Sending FIN from CloseWait and moving into LastAck");
  468. [[maybe_unused]] auto rc = send_tcp_packet(TCPFlags::FIN | TCPFlags::ACK);
  469. set_state(State::LastAck);
  470. }
  471. if (state() != State::Closed && state() != State::Listen)
  472. closing_sockets().with_exclusive([&](auto& table) {
  473. table.set(tuple(), *this);
  474. });
  475. return result;
  476. }
  477. static Singleton<MutexProtected<TCPSocket::RetransmitList>> s_sockets_for_retransmit;
  478. MutexProtected<TCPSocket::RetransmitList>& TCPSocket::sockets_for_retransmit()
  479. {
  480. return *s_sockets_for_retransmit;
  481. }
  482. void TCPSocket::enqueue_for_retransmit()
  483. {
  484. sockets_for_retransmit().with_exclusive([&](auto& list) {
  485. list.append(*this);
  486. });
  487. }
  488. void TCPSocket::dequeue_for_retransmit()
  489. {
  490. sockets_for_retransmit().with_exclusive([&](auto& list) {
  491. list.remove(*this);
  492. });
  493. }
  494. void TCPSocket::retransmit_packets()
  495. {
  496. auto now = kgettimeofday();
  497. // RFC6298 says we should have at least one second between retransmits. According to
  498. // RFC1122 we must do exponential backoff - even for SYN packets.
  499. i64 retransmit_interval = 1;
  500. for (decltype(m_retransmit_attempts) i = 0; i < m_retransmit_attempts; i++)
  501. retransmit_interval *= 2;
  502. if (m_last_retransmit_time > now - Time::from_seconds(retransmit_interval))
  503. return;
  504. dbgln_if(TCP_SOCKET_DEBUG, "TCPSocket({}) handling retransmit", this);
  505. m_last_retransmit_time = now;
  506. ++m_retransmit_attempts;
  507. if (m_retransmit_attempts > maximum_retransmits) {
  508. set_state(TCPSocket::State::Closed);
  509. set_error(TCPSocket::Error::RetransmitTimeout);
  510. set_setup_state(Socket::SetupState::Completed);
  511. return;
  512. }
  513. auto adapter = bound_interface().with([](auto& bound_device) -> RefPtr<NetworkAdapter> { return bound_device; });
  514. auto routing_decision = route_to(peer_address(), local_address(), adapter);
  515. if (routing_decision.is_zero())
  516. return;
  517. m_unacked_packets.with_exclusive([&](auto& unacked_packets) {
  518. for (auto& packet : unacked_packets.packets) {
  519. packet.tx_counter++;
  520. if constexpr (TCP_SOCKET_DEBUG) {
  521. auto& tcp_packet = *(const TCPPacket*)(packet.buffer->buffer->data() + packet.ipv4_payload_offset);
  522. dbgln("Sending TCP packet from {}:{} to {}:{} with ({}{}{}{}) seq_no={}, ack_no={}, tx_counter={}",
  523. local_address(), local_port(),
  524. peer_address(), peer_port(),
  525. (tcp_packet.has_syn() ? "SYN " : ""),
  526. (tcp_packet.has_ack() ? "ACK " : ""),
  527. (tcp_packet.has_fin() ? "FIN " : ""),
  528. (tcp_packet.has_rst() ? "RST " : ""),
  529. tcp_packet.sequence_number(),
  530. tcp_packet.ack_number(),
  531. packet.tx_counter);
  532. }
  533. size_t ipv4_payload_offset = routing_decision.adapter->ipv4_payload_offset();
  534. if (ipv4_payload_offset != packet.ipv4_payload_offset) {
  535. // FIXME: Add support for this. This can happen if after a route change
  536. // we ended up on another adapter which doesn't have the same layer 2 type
  537. // like the previous adapter.
  538. VERIFY_NOT_REACHED();
  539. }
  540. auto packet_buffer = packet.buffer->bytes();
  541. routing_decision.adapter->fill_in_ipv4_header(*packet.buffer,
  542. local_address(), routing_decision.next_hop, peer_address(),
  543. IPv4Protocol::TCP, packet_buffer.size() - ipv4_payload_offset, type_of_service(), ttl());
  544. routing_decision.adapter->send_packet(packet_buffer);
  545. m_packets_out++;
  546. m_bytes_out += packet_buffer.size();
  547. }
  548. });
  549. }
  550. bool TCPSocket::can_write(OpenFileDescription const& file_description, u64 size) const
  551. {
  552. if (!IPv4Socket::can_write(file_description, size))
  553. return false;
  554. if (m_state == State::SynSent || m_state == State::SynReceived)
  555. return false;
  556. if (!file_description.is_blocking())
  557. return true;
  558. return m_unacked_packets.with_shared([&](auto& unacked_packets) {
  559. return unacked_packets.size + size <= m_send_window_size;
  560. });
  561. }
  562. }