NetworkTask.cpp 29 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672
  1. /*
  2. * Copyright (c) 2018-2021, Andreas Kling <kling@serenityos.org>
  3. *
  4. * SPDX-License-Identifier: BSD-2-Clause
  5. */
  6. #include <Kernel/Debug.h>
  7. #include <Kernel/Locking/Mutex.h>
  8. #include <Kernel/Locking/MutexProtected.h>
  9. #include <Kernel/Net/ARP.h>
  10. #include <Kernel/Net/EtherType.h>
  11. #include <Kernel/Net/EthernetFrameHeader.h>
  12. #include <Kernel/Net/ICMP.h>
  13. #include <Kernel/Net/IPv4.h>
  14. #include <Kernel/Net/IPv4Socket.h>
  15. #include <Kernel/Net/LoopbackAdapter.h>
  16. #include <Kernel/Net/NetworkTask.h>
  17. #include <Kernel/Net/NetworkingManagement.h>
  18. #include <Kernel/Net/Routing.h>
  19. #include <Kernel/Net/TCP.h>
  20. #include <Kernel/Net/TCPSocket.h>
  21. #include <Kernel/Net/UDP.h>
  22. #include <Kernel/Net/UDPSocket.h>
  23. #include <Kernel/Process.h>
  24. namespace Kernel {
  25. static void handle_arp(EthernetFrameHeader const&, size_t frame_size);
  26. static void handle_ipv4(EthernetFrameHeader const&, size_t frame_size, Time const& packet_timestamp);
  27. static void handle_icmp(EthernetFrameHeader const&, IPv4Packet const&, Time const& packet_timestamp);
  28. static void handle_udp(IPv4Packet const&, Time const& packet_timestamp);
  29. static void handle_tcp(IPv4Packet const&, Time const& packet_timestamp);
  30. static void send_delayed_tcp_ack(RefPtr<TCPSocket> socket);
  31. static void send_tcp_rst(IPv4Packet const& ipv4_packet, TCPPacket const& tcp_packet, RefPtr<NetworkAdapter> adapter);
  32. static void flush_delayed_tcp_acks();
  33. static void retransmit_tcp_packets();
  34. static Thread* network_task = nullptr;
  35. static HashTable<RefPtr<TCPSocket>>* delayed_ack_sockets;
  36. [[noreturn]] static void NetworkTask_main(void*);
  37. void NetworkTask::spawn()
  38. {
  39. RefPtr<Thread> thread;
  40. auto name = KString::try_create("NetworkTask");
  41. if (name.is_error())
  42. TODO();
  43. (void)Process::create_kernel_process(thread, name.release_value(), NetworkTask_main, nullptr);
  44. network_task = thread;
  45. }
  46. bool NetworkTask::is_current()
  47. {
  48. return Thread::current() == network_task;
  49. }
  50. void NetworkTask_main(void*)
  51. {
  52. delayed_ack_sockets = new HashTable<RefPtr<TCPSocket>>;
  53. WaitQueue packet_wait_queue;
  54. int pending_packets = 0;
  55. NetworkingManagement::the().for_each([&](auto& adapter) {
  56. dmesgln("NetworkTask: {} network adapter found: hw={}", adapter.class_name(), adapter.mac_address().to_string());
  57. if (adapter.class_name() == "LoopbackAdapter"sv) {
  58. adapter.set_ipv4_address({ 127, 0, 0, 1 });
  59. adapter.set_ipv4_netmask({ 255, 0, 0, 0 });
  60. }
  61. adapter.on_receive = [&]() {
  62. pending_packets++;
  63. packet_wait_queue.wake_all();
  64. };
  65. });
  66. auto dequeue_packet = [&pending_packets](u8* buffer, size_t buffer_size, Time& packet_timestamp) -> size_t {
  67. if (pending_packets == 0)
  68. return 0;
  69. size_t packet_size = 0;
  70. NetworkingManagement::the().for_each([&](auto& adapter) {
  71. if (packet_size || !adapter.has_queued_packets())
  72. return;
  73. packet_size = adapter.dequeue_packet(buffer, buffer_size, packet_timestamp);
  74. pending_packets--;
  75. dbgln_if(NETWORK_TASK_DEBUG, "NetworkTask: Dequeued packet from {} ({} bytes)", adapter.name(), packet_size);
  76. });
  77. return packet_size;
  78. };
  79. size_t buffer_size = 64 * KiB;
  80. auto region_or_error = MM.allocate_kernel_region(buffer_size, "Kernel Packet Buffer", Memory::Region::Access::ReadWrite);
  81. if (region_or_error.is_error())
  82. TODO();
  83. auto buffer_region = region_or_error.release_value();
  84. auto buffer = (u8*)buffer_region->vaddr().get();
  85. Time packet_timestamp;
  86. for (;;) {
  87. flush_delayed_tcp_acks();
  88. retransmit_tcp_packets();
  89. size_t packet_size = dequeue_packet(buffer, buffer_size, packet_timestamp);
  90. if (!packet_size) {
  91. auto timeout_time = Time::from_milliseconds(500);
  92. auto timeout = Thread::BlockTimeout { false, &timeout_time };
  93. [[maybe_unused]] auto result = packet_wait_queue.wait_on(timeout, "NetworkTask");
  94. continue;
  95. }
  96. if (packet_size < sizeof(EthernetFrameHeader)) {
  97. dbgln("NetworkTask: Packet is too small to be an Ethernet packet! ({})", packet_size);
  98. continue;
  99. }
  100. auto& eth = *(EthernetFrameHeader const*)buffer;
  101. dbgln_if(ETHERNET_DEBUG, "NetworkTask: From {} to {}, ether_type={:#04x}, packet_size={}", eth.source().to_string(), eth.destination().to_string(), eth.ether_type(), packet_size);
  102. switch (eth.ether_type()) {
  103. case EtherType::ARP:
  104. handle_arp(eth, packet_size);
  105. break;
  106. case EtherType::IPv4:
  107. handle_ipv4(eth, packet_size, packet_timestamp);
  108. break;
  109. case EtherType::IPv6:
  110. // ignore
  111. break;
  112. default:
  113. dbgln_if(ETHERNET_DEBUG, "NetworkTask: Unknown ethernet type {:#04x}", eth.ether_type());
  114. }
  115. }
  116. }
  117. void handle_arp(EthernetFrameHeader const& eth, size_t frame_size)
  118. {
  119. constexpr size_t minimum_arp_frame_size = sizeof(EthernetFrameHeader) + sizeof(ARPPacket);
  120. if (frame_size < minimum_arp_frame_size) {
  121. dbgln("handle_arp: Frame too small ({}, need {})", frame_size, minimum_arp_frame_size);
  122. return;
  123. }
  124. auto& packet = *static_cast<ARPPacket const*>(eth.payload());
  125. if (packet.hardware_type() != 1 || packet.hardware_address_length() != sizeof(MACAddress)) {
  126. dbgln("handle_arp: Hardware type not ethernet ({:#04x}, len={})", packet.hardware_type(), packet.hardware_address_length());
  127. return;
  128. }
  129. if (packet.protocol_type() != EtherType::IPv4 || packet.protocol_address_length() != sizeof(IPv4Address)) {
  130. dbgln("handle_arp: Protocol type not IPv4 ({:#04x}, len={})", packet.protocol_type(), packet.protocol_address_length());
  131. return;
  132. }
  133. dbgln_if(ARP_DEBUG, "handle_arp: operation={:#04x}, sender={}/{}, target={}/{}",
  134. packet.operation(),
  135. packet.sender_hardware_address().to_string(),
  136. packet.sender_protocol_address().to_string(),
  137. packet.target_hardware_address().to_string(),
  138. packet.target_protocol_address().to_string());
  139. if (!packet.sender_hardware_address().is_zero() && !packet.sender_protocol_address().is_zero()) {
  140. // Someone has this IPv4 address. I guess we can try to remember that.
  141. // FIXME: Protect against ARP spamming.
  142. update_arp_table(packet.sender_protocol_address(), packet.sender_hardware_address(), UpdateTable::Set);
  143. }
  144. if (packet.operation() == ARPOperation::Request) {
  145. // Who has this IP address?
  146. if (auto adapter = NetworkingManagement::the().from_ipv4_address(packet.target_protocol_address())) {
  147. // We do!
  148. dbgln("handle_arp: Responding to ARP request for my IPv4 address ({})", adapter->ipv4_address());
  149. ARPPacket response;
  150. response.set_operation(ARPOperation::Response);
  151. response.set_target_hardware_address(packet.sender_hardware_address());
  152. response.set_target_protocol_address(packet.sender_protocol_address());
  153. response.set_sender_hardware_address(adapter->mac_address());
  154. response.set_sender_protocol_address(adapter->ipv4_address());
  155. adapter->send(packet.sender_hardware_address(), response);
  156. }
  157. return;
  158. }
  159. }
  160. void handle_ipv4(EthernetFrameHeader const& eth, size_t frame_size, Time const& packet_timestamp)
  161. {
  162. constexpr size_t minimum_ipv4_frame_size = sizeof(EthernetFrameHeader) + sizeof(IPv4Packet);
  163. if (frame_size < minimum_ipv4_frame_size) {
  164. dbgln("handle_ipv4: Frame too small ({}, need {})", frame_size, minimum_ipv4_frame_size);
  165. return;
  166. }
  167. auto& packet = *static_cast<IPv4Packet const*>(eth.payload());
  168. if (packet.length() < sizeof(IPv4Packet)) {
  169. dbgln("handle_ipv4: IPv4 packet too short ({}, need {})", packet.length(), sizeof(IPv4Packet));
  170. return;
  171. }
  172. size_t actual_ipv4_packet_length = frame_size - sizeof(EthernetFrameHeader);
  173. if (packet.length() > actual_ipv4_packet_length) {
  174. dbgln("handle_ipv4: IPv4 packet claims to be longer than it is ({}, actually {})", packet.length(), actual_ipv4_packet_length);
  175. return;
  176. }
  177. dbgln_if(IPV4_DEBUG, "handle_ipv4: source={}, destination={}", packet.source(), packet.destination());
  178. NetworkingManagement::the().for_each([&](auto& adapter) {
  179. if (adapter.link_up()) {
  180. auto my_net = adapter.ipv4_address().to_u32() & adapter.ipv4_netmask().to_u32();
  181. auto their_net = packet.source().to_u32() & adapter.ipv4_netmask().to_u32();
  182. if (my_net == their_net)
  183. update_arp_table(packet.source(), eth.source(), UpdateTable::Set);
  184. }
  185. });
  186. switch ((IPv4Protocol)packet.protocol()) {
  187. case IPv4Protocol::ICMP:
  188. return handle_icmp(eth, packet, packet_timestamp);
  189. case IPv4Protocol::UDP:
  190. return handle_udp(packet, packet_timestamp);
  191. case IPv4Protocol::TCP:
  192. return handle_tcp(packet, packet_timestamp);
  193. default:
  194. dbgln_if(IPV4_DEBUG, "handle_ipv4: Unhandled protocol {:#02x}", packet.protocol());
  195. break;
  196. }
  197. }
  198. void handle_icmp(EthernetFrameHeader const& eth, IPv4Packet const& ipv4_packet, Time const& packet_timestamp)
  199. {
  200. auto& icmp_header = *static_cast<ICMPHeader const*>(ipv4_packet.payload());
  201. dbgln_if(ICMP_DEBUG, "handle_icmp: source={}, destination={}, type={:#02x}, code={:#02x}", ipv4_packet.source().to_string(), ipv4_packet.destination().to_string(), icmp_header.type(), icmp_header.code());
  202. {
  203. NonnullRefPtrVector<IPv4Socket> icmp_sockets;
  204. IPv4Socket::all_sockets().with_exclusive([&](auto& sockets) {
  205. for (auto& socket : sockets) {
  206. if (socket.protocol() == (unsigned)IPv4Protocol::ICMP)
  207. icmp_sockets.append(socket);
  208. }
  209. });
  210. for (auto& socket : icmp_sockets)
  211. socket.did_receive(ipv4_packet.source(), 0, { &ipv4_packet, sizeof(IPv4Packet) + ipv4_packet.payload_size() }, packet_timestamp);
  212. }
  213. auto adapter = NetworkingManagement::the().from_ipv4_address(ipv4_packet.destination());
  214. if (!adapter)
  215. return;
  216. if (icmp_header.type() == ICMPType::EchoRequest) {
  217. auto& request = reinterpret_cast<ICMPEchoPacket const&>(icmp_header);
  218. dbgln("handle_icmp: EchoRequest from {}: id={}, seq={}", ipv4_packet.source(), (u16)request.identifier, (u16)request.sequence_number);
  219. size_t icmp_packet_size = ipv4_packet.payload_size();
  220. if (icmp_packet_size < sizeof(ICMPEchoPacket)) {
  221. dbgln("handle_icmp: EchoRequest packet is too small, ignoring.");
  222. return;
  223. }
  224. auto ipv4_payload_offset = adapter->ipv4_payload_offset();
  225. auto packet = adapter->acquire_packet_buffer(ipv4_payload_offset + icmp_packet_size);
  226. if (!packet) {
  227. dbgln("Could not allocate packet buffer while sending ICMP packet");
  228. return;
  229. }
  230. adapter->fill_in_ipv4_header(*packet, adapter->ipv4_address(), eth.source(), ipv4_packet.source(), IPv4Protocol::ICMP, icmp_packet_size, 0, 64);
  231. memset(packet->buffer->data() + ipv4_payload_offset, 0, sizeof(ICMPEchoPacket));
  232. auto& response = *(ICMPEchoPacket*)(packet->buffer->data() + ipv4_payload_offset);
  233. response.header.set_type(ICMPType::EchoReply);
  234. response.header.set_code(0);
  235. response.identifier = request.identifier;
  236. response.sequence_number = request.sequence_number;
  237. if (size_t icmp_payload_size = icmp_packet_size - sizeof(ICMPEchoPacket))
  238. memcpy(response.payload(), request.payload(), icmp_payload_size);
  239. response.header.set_checksum(internet_checksum(&response, icmp_packet_size));
  240. // FIXME: What is the right TTL value here? Is 64 ok? Should we use the same TTL as the echo request?
  241. adapter->send_packet(packet->bytes());
  242. adapter->release_packet_buffer(*packet);
  243. }
  244. }
  245. void handle_udp(IPv4Packet const& ipv4_packet, Time const& packet_timestamp)
  246. {
  247. if (ipv4_packet.payload_size() < sizeof(UDPPacket)) {
  248. dbgln("handle_udp: Packet too small ({}, need {})", ipv4_packet.payload_size(), sizeof(UDPPacket));
  249. return;
  250. }
  251. auto& udp_packet = *static_cast<UDPPacket const*>(ipv4_packet.payload());
  252. dbgln_if(UDP_DEBUG, "handle_udp: source={}:{}, destination={}:{}, length={}",
  253. ipv4_packet.source(), udp_packet.source_port(),
  254. ipv4_packet.destination(), udp_packet.destination_port(),
  255. udp_packet.length());
  256. auto socket = UDPSocket::from_port(udp_packet.destination_port());
  257. if (!socket) {
  258. dbgln_if(UDP_DEBUG, "handle_udp: No local UDP socket for {}:{}", ipv4_packet.destination(), udp_packet.destination_port());
  259. return;
  260. }
  261. VERIFY(socket->type() == SOCK_DGRAM);
  262. VERIFY(socket->local_port() == udp_packet.destination_port());
  263. auto& destination = ipv4_packet.destination();
  264. if (destination == IPv4Address(255, 255, 255, 255) || NetworkingManagement::the().from_ipv4_address(destination) || socket->multicast_memberships().contains_slow(destination))
  265. socket->did_receive(ipv4_packet.source(), udp_packet.source_port(), { &ipv4_packet, sizeof(IPv4Packet) + ipv4_packet.payload_size() }, packet_timestamp);
  266. }
  267. void send_delayed_tcp_ack(RefPtr<TCPSocket> socket)
  268. {
  269. VERIFY(socket->mutex().is_locked());
  270. if (!socket->should_delay_next_ack()) {
  271. [[maybe_unused]] auto result = socket->send_ack();
  272. return;
  273. }
  274. delayed_ack_sockets->set(move(socket));
  275. }
  276. void flush_delayed_tcp_acks()
  277. {
  278. Vector<RefPtr<TCPSocket>, 32> remaining_sockets;
  279. for (auto& socket : *delayed_ack_sockets) {
  280. MutexLocker locker(socket->mutex());
  281. if (socket->should_delay_next_ack()) {
  282. MUST(remaining_sockets.try_append(socket));
  283. continue;
  284. }
  285. [[maybe_unused]] auto result = socket->send_ack();
  286. }
  287. if (remaining_sockets.size() != delayed_ack_sockets->size()) {
  288. delayed_ack_sockets->clear();
  289. if (remaining_sockets.size() > 0)
  290. dbgln("flush_delayed_tcp_acks: {} sockets remaining", remaining_sockets.size());
  291. for (auto&& socket : remaining_sockets)
  292. delayed_ack_sockets->set(move(socket));
  293. }
  294. }
  295. void send_tcp_rst(IPv4Packet const& ipv4_packet, TCPPacket const& tcp_packet, RefPtr<NetworkAdapter> adapter)
  296. {
  297. auto routing_decision = route_to(ipv4_packet.source(), ipv4_packet.destination(), adapter);
  298. if (routing_decision.is_zero())
  299. return;
  300. auto ipv4_payload_offset = routing_decision.adapter->ipv4_payload_offset();
  301. const size_t options_size = 0;
  302. const size_t tcp_header_size = sizeof(TCPPacket) + options_size;
  303. const size_t buffer_size = ipv4_payload_offset + tcp_header_size;
  304. auto packet = routing_decision.adapter->acquire_packet_buffer(buffer_size);
  305. if (!packet)
  306. return;
  307. routing_decision.adapter->fill_in_ipv4_header(*packet, ipv4_packet.destination(),
  308. routing_decision.next_hop, ipv4_packet.source(), IPv4Protocol::TCP,
  309. buffer_size - ipv4_payload_offset, 0, 64);
  310. auto& rst_packet = *(TCPPacket*)(packet->buffer->data() + ipv4_payload_offset);
  311. rst_packet = {};
  312. rst_packet.set_source_port(tcp_packet.destination_port());
  313. rst_packet.set_destination_port(tcp_packet.source_port());
  314. rst_packet.set_window_size(0);
  315. rst_packet.set_sequence_number(0);
  316. rst_packet.set_ack_number(tcp_packet.sequence_number() + 1);
  317. rst_packet.set_data_offset(tcp_header_size / sizeof(u32));
  318. rst_packet.set_flags(TCPFlags::RST | TCPFlags::ACK);
  319. rst_packet.set_checksum(TCPSocket::compute_tcp_checksum(ipv4_packet.source(), ipv4_packet.destination(), rst_packet, 0));
  320. routing_decision.adapter->send_packet(packet->bytes());
  321. routing_decision.adapter->release_packet_buffer(*packet);
  322. }
  323. void handle_tcp(IPv4Packet const& ipv4_packet, Time const& packet_timestamp)
  324. {
  325. if (ipv4_packet.payload_size() < sizeof(TCPPacket)) {
  326. dbgln("handle_tcp: IPv4 payload is too small to be a TCP packet ({}, need {})", ipv4_packet.payload_size(), sizeof(TCPPacket));
  327. return;
  328. }
  329. auto& tcp_packet = *static_cast<TCPPacket const*>(ipv4_packet.payload());
  330. size_t minimum_tcp_header_size = 5 * sizeof(u32);
  331. size_t maximum_tcp_header_size = 15 * sizeof(u32);
  332. if (tcp_packet.header_size() < minimum_tcp_header_size || tcp_packet.header_size() > maximum_tcp_header_size) {
  333. dbgln("handle_tcp: TCP packet header has invalid size {}", tcp_packet.header_size());
  334. }
  335. if (ipv4_packet.payload_size() < tcp_packet.header_size()) {
  336. dbgln("handle_tcp: IPv4 payload is smaller than TCP header claims ({}, supposedly {})", ipv4_packet.payload_size(), tcp_packet.header_size());
  337. return;
  338. }
  339. size_t payload_size = ipv4_packet.payload_size() - tcp_packet.header_size();
  340. dbgln_if(TCP_DEBUG, "handle_tcp: source={}:{}, destination={}:{}, seq_no={}, ack_no={}, flags={:#04x} ({}{}{}{}), window_size={}, payload_size={}",
  341. ipv4_packet.source().to_string(),
  342. tcp_packet.source_port(),
  343. ipv4_packet.destination().to_string(),
  344. tcp_packet.destination_port(),
  345. tcp_packet.sequence_number(),
  346. tcp_packet.ack_number(),
  347. tcp_packet.flags(),
  348. tcp_packet.has_syn() ? "SYN " : "",
  349. tcp_packet.has_ack() ? "ACK " : "",
  350. tcp_packet.has_fin() ? "FIN " : "",
  351. tcp_packet.has_rst() ? "RST " : "",
  352. tcp_packet.window_size(),
  353. payload_size);
  354. auto adapter = NetworkingManagement::the().from_ipv4_address(ipv4_packet.destination());
  355. if (!adapter) {
  356. dbgln("handle_tcp: this packet is not for me, it's for {}", ipv4_packet.destination());
  357. return;
  358. }
  359. IPv4SocketTuple tuple(ipv4_packet.destination(), tcp_packet.destination_port(), ipv4_packet.source(), tcp_packet.source_port());
  360. dbgln_if(TCP_DEBUG, "handle_tcp: looking for socket; tuple={}", tuple.to_string());
  361. auto socket = TCPSocket::from_tuple(tuple);
  362. if (!socket) {
  363. if (!tcp_packet.has_rst()) {
  364. dbgln("handle_tcp: No TCP socket for tuple {}. Sending RST.", tuple.to_string());
  365. send_tcp_rst(ipv4_packet, tcp_packet, adapter);
  366. }
  367. return;
  368. }
  369. MutexLocker locker(socket->mutex());
  370. VERIFY(socket->type() == SOCK_STREAM);
  371. VERIFY(socket->local_port() == tcp_packet.destination_port());
  372. dbgln_if(TCP_DEBUG, "handle_tcp: got socket {}; state={}", socket->tuple().to_string(), TCPSocket::to_string(socket->state()));
  373. socket->receive_tcp_packet(tcp_packet, ipv4_packet.payload_size());
  374. switch (socket->state()) {
  375. case TCPSocket::State::Closed:
  376. dbgln("handle_tcp: unexpected flags in Closed state ({:x})", tcp_packet.flags());
  377. // TODO: we may want to send an RST here, maybe as a configurable option
  378. return;
  379. case TCPSocket::State::TimeWait:
  380. dbgln("handle_tcp: unexpected flags in TimeWait state ({:x})", tcp_packet.flags());
  381. (void)socket->send_tcp_packet(TCPFlags::RST);
  382. socket->set_state(TCPSocket::State::Closed);
  383. return;
  384. case TCPSocket::State::Listen:
  385. switch (tcp_packet.flags()) {
  386. case TCPFlags::SYN: {
  387. dbgln_if(TCP_DEBUG, "handle_tcp: incoming connection");
  388. auto& local_address = ipv4_packet.destination();
  389. auto& peer_address = ipv4_packet.source();
  390. auto client_or_error = socket->try_create_client(local_address, tcp_packet.destination_port(), peer_address, tcp_packet.source_port());
  391. if (client_or_error.is_error()) {
  392. dmesgln("handle_tcp: couldn't create client socket: {}", client_or_error.error());
  393. return;
  394. }
  395. auto client = client_or_error.release_value();
  396. MutexLocker locker(client->mutex());
  397. dbgln_if(TCP_DEBUG, "handle_tcp: created new client socket with tuple {}", client->tuple().to_string());
  398. client->set_sequence_number(1000);
  399. client->set_ack_number(tcp_packet.sequence_number() + payload_size + 1);
  400. [[maybe_unused]] auto rc2 = client->send_tcp_packet(TCPFlags::SYN | TCPFlags::ACK);
  401. client->set_state(TCPSocket::State::SynReceived);
  402. return;
  403. }
  404. default:
  405. dbgln("handle_tcp: unexpected flags in Listen state ({:x})", tcp_packet.flags());
  406. // socket->send_tcp_packet(TCPFlags::RST);
  407. return;
  408. }
  409. case TCPSocket::State::SynSent:
  410. switch (tcp_packet.flags()) {
  411. case TCPFlags::SYN:
  412. socket->set_ack_number(tcp_packet.sequence_number() + payload_size + 1);
  413. (void)socket->send_tcp_packet(TCPFlags::SYN | TCPFlags::ACK);
  414. socket->set_state(TCPSocket::State::SynReceived);
  415. return;
  416. case TCPFlags::ACK | TCPFlags::SYN:
  417. socket->set_ack_number(tcp_packet.sequence_number() + payload_size + 1);
  418. (void)socket->send_ack(true);
  419. socket->set_state(TCPSocket::State::Established);
  420. socket->set_setup_state(Socket::SetupState::Completed);
  421. socket->set_connected(true);
  422. return;
  423. case TCPFlags::ACK | TCPFlags::FIN:
  424. socket->set_ack_number(tcp_packet.sequence_number() + payload_size + 1);
  425. send_delayed_tcp_ack(socket);
  426. socket->set_state(TCPSocket::State::Closed);
  427. socket->set_error(TCPSocket::Error::FINDuringConnect);
  428. socket->set_setup_state(Socket::SetupState::Completed);
  429. return;
  430. case TCPFlags::ACK | TCPFlags::RST:
  431. socket->set_state(TCPSocket::State::Closed);
  432. socket->set_error(TCPSocket::Error::RSTDuringConnect);
  433. socket->set_setup_state(Socket::SetupState::Completed);
  434. return;
  435. default:
  436. dbgln("handle_tcp: unexpected flags in SynSent state ({:x})", tcp_packet.flags());
  437. (void)socket->send_tcp_packet(TCPFlags::RST);
  438. socket->set_state(TCPSocket::State::Closed);
  439. socket->set_error(TCPSocket::Error::UnexpectedFlagsDuringConnect);
  440. socket->set_setup_state(Socket::SetupState::Completed);
  441. return;
  442. }
  443. case TCPSocket::State::SynReceived:
  444. switch (tcp_packet.flags()) {
  445. case TCPFlags::ACK:
  446. socket->set_ack_number(tcp_packet.sequence_number() + payload_size);
  447. switch (socket->direction()) {
  448. case TCPSocket::Direction::Incoming:
  449. if (!socket->has_originator()) {
  450. dbgln("handle_tcp: connection doesn't have an originating socket; maybe it went away?");
  451. (void)socket->send_tcp_packet(TCPFlags::RST);
  452. socket->set_state(TCPSocket::State::Closed);
  453. return;
  454. }
  455. socket->set_state(TCPSocket::State::Established);
  456. socket->set_setup_state(Socket::SetupState::Completed);
  457. socket->release_to_originator();
  458. return;
  459. case TCPSocket::Direction::Outgoing:
  460. socket->set_state(TCPSocket::State::Established);
  461. socket->set_setup_state(Socket::SetupState::Completed);
  462. socket->set_connected(true);
  463. return;
  464. default:
  465. dbgln("handle_tcp: got ACK in SynReceived state but direction is invalid ({})", TCPSocket::to_string(socket->direction()));
  466. (void)socket->send_tcp_packet(TCPFlags::RST);
  467. socket->set_state(TCPSocket::State::Closed);
  468. return;
  469. }
  470. VERIFY_NOT_REACHED();
  471. case TCPFlags::SYN:
  472. dbgln("handle_tcp: ignoring SYN for partially established connection");
  473. return;
  474. default:
  475. dbgln("handle_tcp: unexpected flags in SynReceived state ({:x})", tcp_packet.flags());
  476. (void)socket->send_tcp_packet(TCPFlags::RST);
  477. socket->set_state(TCPSocket::State::Closed);
  478. return;
  479. }
  480. case TCPSocket::State::CloseWait:
  481. switch (tcp_packet.flags()) {
  482. default:
  483. dbgln("handle_tcp: unexpected flags in CloseWait state ({:x})", tcp_packet.flags());
  484. (void)socket->send_tcp_packet(TCPFlags::RST);
  485. socket->set_state(TCPSocket::State::Closed);
  486. return;
  487. }
  488. case TCPSocket::State::LastAck:
  489. switch (tcp_packet.flags()) {
  490. case TCPFlags::ACK:
  491. socket->set_ack_number(tcp_packet.sequence_number() + payload_size);
  492. socket->set_state(TCPSocket::State::Closed);
  493. return;
  494. default:
  495. dbgln("handle_tcp: unexpected flags in LastAck state ({:x})", tcp_packet.flags());
  496. (void)socket->send_tcp_packet(TCPFlags::RST);
  497. socket->set_state(TCPSocket::State::Closed);
  498. return;
  499. }
  500. case TCPSocket::State::FinWait1:
  501. switch (tcp_packet.flags()) {
  502. case TCPFlags::ACK:
  503. socket->set_ack_number(tcp_packet.sequence_number() + payload_size);
  504. socket->set_state(TCPSocket::State::FinWait2);
  505. return;
  506. case TCPFlags::FIN:
  507. socket->set_ack_number(tcp_packet.sequence_number() + payload_size + 1);
  508. socket->set_state(TCPSocket::State::Closing);
  509. (void)socket->send_ack(true);
  510. return;
  511. case TCPFlags::FIN | TCPFlags::ACK:
  512. socket->set_ack_number(tcp_packet.sequence_number() + payload_size + 1);
  513. socket->set_state(TCPSocket::State::TimeWait);
  514. (void)socket->send_ack(true);
  515. return;
  516. default:
  517. dbgln("handle_tcp: unexpected flags in FinWait1 state ({:x})", tcp_packet.flags());
  518. (void)socket->send_tcp_packet(TCPFlags::RST);
  519. socket->set_state(TCPSocket::State::Closed);
  520. return;
  521. }
  522. case TCPSocket::State::FinWait2:
  523. switch (tcp_packet.flags()) {
  524. case TCPFlags::FIN:
  525. socket->set_ack_number(tcp_packet.sequence_number() + payload_size + 1);
  526. socket->set_state(TCPSocket::State::TimeWait);
  527. (void)socket->send_ack(true);
  528. return;
  529. case TCPFlags::ACK | TCPFlags::RST:
  530. // FIXME: Verify that this transition is legitimate.
  531. socket->set_state(TCPSocket::State::Closed);
  532. return;
  533. default:
  534. dbgln("handle_tcp: unexpected flags in FinWait2 state ({:x})", tcp_packet.flags());
  535. (void)socket->send_tcp_packet(TCPFlags::RST);
  536. socket->set_state(TCPSocket::State::Closed);
  537. return;
  538. }
  539. case TCPSocket::State::Closing:
  540. switch (tcp_packet.flags()) {
  541. case TCPFlags::ACK:
  542. socket->set_ack_number(tcp_packet.sequence_number() + payload_size);
  543. socket->set_state(TCPSocket::State::TimeWait);
  544. return;
  545. default:
  546. dbgln("handle_tcp: unexpected flags in Closing state ({:x})", tcp_packet.flags());
  547. (void)socket->send_tcp_packet(TCPFlags::RST);
  548. socket->set_state(TCPSocket::State::Closed);
  549. return;
  550. }
  551. case TCPSocket::State::Established:
  552. if (tcp_packet.has_rst()) {
  553. socket->set_state(TCPSocket::State::Closed);
  554. return;
  555. }
  556. if (tcp_packet.sequence_number() != socket->ack_number()) {
  557. dbgln_if(TCP_DEBUG, "Discarding out of order packet: seq {} vs. ack {}", tcp_packet.sequence_number(), socket->ack_number());
  558. if (socket->duplicate_acks() < TCPSocket::maximum_duplicate_acks) {
  559. dbgln_if(TCP_DEBUG, "Sending ACK with same ack number to trigger fast retransmission");
  560. socket->set_duplicate_acks(socket->duplicate_acks() + 1);
  561. [[maybe_unused]] auto result = socket->send_ack(true);
  562. }
  563. return;
  564. }
  565. socket->set_duplicate_acks(0);
  566. if (tcp_packet.has_fin()) {
  567. if (payload_size != 0)
  568. socket->did_receive(ipv4_packet.source(), tcp_packet.source_port(), { &ipv4_packet, sizeof(IPv4Packet) + ipv4_packet.payload_size() }, packet_timestamp);
  569. socket->set_ack_number(tcp_packet.sequence_number() + payload_size + 1);
  570. send_delayed_tcp_ack(socket);
  571. socket->set_state(TCPSocket::State::CloseWait);
  572. socket->set_connected(false);
  573. return;
  574. }
  575. if (payload_size) {
  576. if (socket->did_receive(ipv4_packet.source(), tcp_packet.source_port(), { &ipv4_packet, sizeof(IPv4Packet) + ipv4_packet.payload_size() }, packet_timestamp)) {
  577. socket->set_ack_number(tcp_packet.sequence_number() + payload_size);
  578. dbgln_if(TCP_DEBUG, "Got packet with ack_no={}, seq_no={}, payload_size={}, acking it with new ack_no={}, seq_no={}",
  579. tcp_packet.ack_number(), tcp_packet.sequence_number(), payload_size, socket->ack_number(), socket->sequence_number());
  580. send_delayed_tcp_ack(socket);
  581. }
  582. }
  583. }
  584. }
  585. void retransmit_tcp_packets()
  586. {
  587. // We must keep the sockets alive until after we've unlocked the hash table
  588. // in case retransmit_packets() realizes that it wants to close the socket.
  589. NonnullRefPtrVector<TCPSocket, 16> sockets;
  590. TCPSocket::sockets_for_retransmit().for_each_shared([&](auto const& socket) {
  591. // We ignore allocation failures above the first 16 guaranteed socket slots, as
  592. // we will just retransmit their packets the next time around
  593. (void)sockets.try_append(socket);
  594. });
  595. for (auto& socket : sockets) {
  596. MutexLocker socket_locker(socket.mutex());
  597. socket.retransmit_packets();
  598. }
  599. }
  600. }