NetworkTask.cpp 31 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707
  1. /*
  2. * Copyright (c) 2018-2021, Andreas Kling <kling@serenityos.org>
  3. *
  4. * SPDX-License-Identifier: BSD-2-Clause
  5. */
  6. #include <Kernel/Debug.h>
  7. #include <Kernel/Locking/Mutex.h>
  8. #include <Kernel/Locking/MutexProtected.h>
  9. #include <Kernel/Net/ARP.h>
  10. #include <Kernel/Net/EtherType.h>
  11. #include <Kernel/Net/EthernetFrameHeader.h>
  12. #include <Kernel/Net/ICMP.h>
  13. #include <Kernel/Net/IPv4.h>
  14. #include <Kernel/Net/IPv4Socket.h>
  15. #include <Kernel/Net/LoopbackAdapter.h>
  16. #include <Kernel/Net/NetworkTask.h>
  17. #include <Kernel/Net/NetworkingManagement.h>
  18. #include <Kernel/Net/Routing.h>
  19. #include <Kernel/Net/TCP.h>
  20. #include <Kernel/Net/TCPSocket.h>
  21. #include <Kernel/Net/UDP.h>
  22. #include <Kernel/Net/UDPSocket.h>
  23. #include <Kernel/Tasks/Process.h>
  24. namespace Kernel {
  25. static void handle_arp(EthernetFrameHeader const&, size_t frame_size);
  26. static void handle_ipv4(EthernetFrameHeader const&, size_t frame_size, UnixDateTime const& packet_timestamp);
  27. static void handle_icmp(EthernetFrameHeader const&, IPv4Packet const&, UnixDateTime const& packet_timestamp);
  28. static void handle_udp(IPv4Packet const&, UnixDateTime const& packet_timestamp);
  29. static void handle_tcp(IPv4Packet const&, UnixDateTime const& packet_timestamp);
  30. static void send_delayed_tcp_ack(TCPSocket& socket);
  31. static void send_tcp_rst(IPv4Packet const& ipv4_packet, TCPPacket const& tcp_packet, RefPtr<NetworkAdapter> adapter);
  32. static void flush_delayed_tcp_acks();
  33. static void retransmit_tcp_packets();
  34. static Thread* network_task = nullptr;
  35. static HashTable<NonnullRefPtr<TCPSocket>>* delayed_ack_sockets;
  36. [[noreturn]] static void NetworkTask_main(void*);
  37. void NetworkTask::spawn()
  38. {
  39. auto [_, first_thread] = MUST(Process::create_kernel_process("Network Task"sv, NetworkTask_main, nullptr));
  40. network_task = first_thread;
  41. }
  42. bool NetworkTask::is_current()
  43. {
  44. return Thread::current() == network_task;
  45. }
  46. void NetworkTask_main(void*)
  47. {
  48. delayed_ack_sockets = new HashTable<NonnullRefPtr<TCPSocket>>;
  49. WaitQueue packet_wait_queue;
  50. int pending_packets = 0;
  51. NetworkingManagement::the().for_each([&](auto& adapter) {
  52. dmesgln("NetworkTask: {} network adapter found: hw={}", adapter.class_name(), adapter.mac_address().to_string());
  53. if (adapter.class_name() == "LoopbackAdapter"sv) {
  54. adapter.set_ipv4_address({ 127, 0, 0, 1 });
  55. adapter.set_ipv4_netmask({ 255, 0, 0, 0 });
  56. }
  57. adapter.on_receive = [&]() {
  58. pending_packets++;
  59. packet_wait_queue.wake_all();
  60. };
  61. });
  62. auto dequeue_packet = [&pending_packets](u8* buffer, size_t buffer_size, UnixDateTime& packet_timestamp) -> size_t {
  63. if (pending_packets == 0)
  64. return 0;
  65. size_t packet_size = 0;
  66. NetworkingManagement::the().for_each([&](auto& adapter) {
  67. if (packet_size || !adapter.has_queued_packets())
  68. return;
  69. packet_size = adapter.dequeue_packet(buffer, buffer_size, packet_timestamp);
  70. pending_packets--;
  71. dbgln_if(NETWORK_TASK_DEBUG, "NetworkTask: Dequeued packet from {} ({} bytes)", adapter.name(), packet_size);
  72. });
  73. return packet_size;
  74. };
  75. size_t buffer_size = 64 * KiB;
  76. auto region_or_error = MM.allocate_kernel_region(buffer_size, "Kernel Packet Buffer"sv, Memory::Region::Access::ReadWrite);
  77. if (region_or_error.is_error())
  78. TODO();
  79. auto buffer_region = region_or_error.release_value();
  80. auto buffer = (u8*)buffer_region->vaddr().get();
  81. UnixDateTime packet_timestamp;
  82. while (!Process::current().is_dying()) {
  83. flush_delayed_tcp_acks();
  84. retransmit_tcp_packets();
  85. size_t packet_size = dequeue_packet(buffer, buffer_size, packet_timestamp);
  86. if (!packet_size) {
  87. auto timeout_time = Duration::from_milliseconds(500);
  88. auto timeout = Thread::BlockTimeout { false, &timeout_time };
  89. [[maybe_unused]] auto result = packet_wait_queue.wait_on(timeout, "NetworkTask"sv);
  90. continue;
  91. }
  92. if (packet_size < sizeof(EthernetFrameHeader)) {
  93. dbgln("NetworkTask: Packet is too small to be an Ethernet packet! ({})", packet_size);
  94. continue;
  95. }
  96. auto& eth = *(EthernetFrameHeader const*)buffer;
  97. dbgln_if(ETHERNET_DEBUG, "NetworkTask: From {} to {}, ether_type={:#04x}, packet_size={}", eth.source().to_string(), eth.destination().to_string(), eth.ether_type(), packet_size);
  98. switch (eth.ether_type()) {
  99. case EtherType::ARP:
  100. handle_arp(eth, packet_size);
  101. break;
  102. case EtherType::IPv4:
  103. handle_ipv4(eth, packet_size, packet_timestamp);
  104. break;
  105. case EtherType::IPv6:
  106. // ignore
  107. break;
  108. default:
  109. dbgln_if(ETHERNET_DEBUG, "NetworkTask: Unknown ethernet type {:#04x}", eth.ether_type());
  110. }
  111. }
  112. Process::current().sys$exit(0);
  113. VERIFY_NOT_REACHED();
  114. }
  115. void handle_arp(EthernetFrameHeader const& eth, size_t frame_size)
  116. {
  117. constexpr size_t minimum_arp_frame_size = sizeof(EthernetFrameHeader) + sizeof(ARPPacket);
  118. if (frame_size < minimum_arp_frame_size) {
  119. dbgln("handle_arp: Frame too small ({}, need {})", frame_size, minimum_arp_frame_size);
  120. return;
  121. }
  122. auto& packet = *static_cast<ARPPacket const*>(eth.payload());
  123. if (packet.hardware_type() != 1 || packet.hardware_address_length() != sizeof(MACAddress)) {
  124. dbgln("handle_arp: Hardware type not ethernet ({:#04x}, len={})", packet.hardware_type(), packet.hardware_address_length());
  125. return;
  126. }
  127. if (packet.protocol_type() != EtherType::IPv4 || packet.protocol_address_length() != sizeof(IPv4Address)) {
  128. dbgln("handle_arp: Protocol type not IPv4 ({:#04x}, len={})", packet.protocol_type(), packet.protocol_address_length());
  129. return;
  130. }
  131. dbgln_if(ARP_DEBUG, "handle_arp: operation={:#04x}, sender={}/{}, target={}/{}",
  132. packet.operation(),
  133. packet.sender_hardware_address().to_string(),
  134. packet.sender_protocol_address().to_string(),
  135. packet.target_hardware_address().to_string(),
  136. packet.target_protocol_address().to_string());
  137. if (!packet.sender_hardware_address().is_zero() && !packet.sender_protocol_address().is_zero()) {
  138. // Someone has this IPv4 address. I guess we can try to remember that.
  139. // FIXME: Protect against ARP spamming.
  140. update_arp_table(packet.sender_protocol_address(), packet.sender_hardware_address(), UpdateTable::Set);
  141. }
  142. if (packet.operation() == ARPOperation::Request) {
  143. // Who has this IP address?
  144. if (auto adapter = NetworkingManagement::the().from_ipv4_address(packet.target_protocol_address())) {
  145. // We do!
  146. dbgln("handle_arp: Responding to ARP request for my IPv4 address ({})", adapter->ipv4_address());
  147. ARPPacket response;
  148. response.set_operation(ARPOperation::Response);
  149. response.set_target_hardware_address(packet.sender_hardware_address());
  150. response.set_target_protocol_address(packet.sender_protocol_address());
  151. response.set_sender_hardware_address(adapter->mac_address());
  152. response.set_sender_protocol_address(adapter->ipv4_address());
  153. adapter->send(packet.sender_hardware_address(), response);
  154. }
  155. return;
  156. }
  157. }
  158. void handle_ipv4(EthernetFrameHeader const& eth, size_t frame_size, UnixDateTime const& packet_timestamp)
  159. {
  160. constexpr size_t minimum_ipv4_frame_size = sizeof(EthernetFrameHeader) + sizeof(IPv4Packet);
  161. if (frame_size < minimum_ipv4_frame_size) {
  162. dbgln("handle_ipv4: Frame too small ({}, need {})", frame_size, minimum_ipv4_frame_size);
  163. return;
  164. }
  165. auto& packet = *static_cast<IPv4Packet const*>(eth.payload());
  166. if (packet.length() < sizeof(IPv4Packet)) {
  167. dbgln("handle_ipv4: IPv4 packet too short ({}, need {})", packet.length(), sizeof(IPv4Packet));
  168. return;
  169. }
  170. size_t actual_ipv4_packet_length = frame_size - sizeof(EthernetFrameHeader);
  171. if (packet.length() > actual_ipv4_packet_length) {
  172. dbgln("handle_ipv4: IPv4 packet claims to be longer than it is ({}, actually {})", packet.length(), actual_ipv4_packet_length);
  173. return;
  174. }
  175. dbgln_if(IPV4_DEBUG, "handle_ipv4: source={}, destination={}", packet.source(), packet.destination());
  176. NetworkingManagement::the().for_each([&](auto& adapter) {
  177. if (adapter.ipv4_address().is_zero() || !adapter.link_up())
  178. return;
  179. auto my_net = adapter.ipv4_address().to_u32() & adapter.ipv4_netmask().to_u32();
  180. auto their_net = packet.source().to_u32() & adapter.ipv4_netmask().to_u32();
  181. if (my_net == their_net)
  182. update_arp_table(packet.source(), eth.source(), UpdateTable::Set);
  183. });
  184. switch ((IPv4Protocol)packet.protocol()) {
  185. case IPv4Protocol::ICMP:
  186. return handle_icmp(eth, packet, packet_timestamp);
  187. case IPv4Protocol::UDP:
  188. return handle_udp(packet, packet_timestamp);
  189. case IPv4Protocol::TCP:
  190. return handle_tcp(packet, packet_timestamp);
  191. default:
  192. dbgln_if(IPV4_DEBUG, "handle_ipv4: Unhandled protocol {:#02x}", packet.protocol());
  193. break;
  194. }
  195. }
  196. void handle_icmp(EthernetFrameHeader const& eth, IPv4Packet const& ipv4_packet, UnixDateTime const& packet_timestamp)
  197. {
  198. auto& icmp_header = *static_cast<ICMPHeader const*>(ipv4_packet.payload());
  199. dbgln_if(ICMP_DEBUG, "handle_icmp: source={}, destination={}, type={:#02x}, code={:#02x}", ipv4_packet.source().to_string(), ipv4_packet.destination().to_string(), icmp_header.type(), icmp_header.code());
  200. {
  201. Vector<NonnullRefPtr<IPv4Socket>> icmp_sockets;
  202. IPv4Socket::all_sockets().with_exclusive([&](auto& sockets) {
  203. for (auto& socket : sockets) {
  204. if (socket.protocol() == (unsigned)IPv4Protocol::ICMP)
  205. icmp_sockets.append(socket);
  206. }
  207. });
  208. for (auto& socket : icmp_sockets)
  209. socket->did_receive(ipv4_packet.source(), 0, { &ipv4_packet, sizeof(IPv4Packet) + ipv4_packet.payload_size() }, packet_timestamp);
  210. }
  211. auto adapter = NetworkingManagement::the().from_ipv4_address(ipv4_packet.destination());
  212. if (!adapter)
  213. return;
  214. if (icmp_header.type() == ICMPType::EchoRequest) {
  215. auto& request = reinterpret_cast<ICMPEchoPacket const&>(icmp_header);
  216. dbgln("handle_icmp: EchoRequest from {}: id={}, seq={}", ipv4_packet.source(), (u16)request.identifier, (u16)request.sequence_number);
  217. size_t icmp_packet_size = ipv4_packet.payload_size();
  218. if (icmp_packet_size < sizeof(ICMPEchoPacket)) {
  219. dbgln("handle_icmp: EchoRequest packet is too small, ignoring.");
  220. return;
  221. }
  222. auto ipv4_payload_offset = adapter->ipv4_payload_offset();
  223. auto packet = adapter->acquire_packet_buffer(ipv4_payload_offset + icmp_packet_size);
  224. if (!packet) {
  225. dbgln("Could not allocate packet buffer while sending ICMP packet");
  226. return;
  227. }
  228. adapter->fill_in_ipv4_header(*packet, adapter->ipv4_address(), eth.source(), ipv4_packet.source(), IPv4Protocol::ICMP, icmp_packet_size, 0, 64);
  229. memset(packet->buffer->data() + ipv4_payload_offset, 0, sizeof(ICMPEchoPacket));
  230. auto& response = *(ICMPEchoPacket*)(packet->buffer->data() + ipv4_payload_offset);
  231. response.header.set_type(ICMPType::EchoReply);
  232. response.header.set_code(0);
  233. response.identifier = request.identifier;
  234. response.sequence_number = request.sequence_number;
  235. if (size_t icmp_payload_size = icmp_packet_size - sizeof(ICMPEchoPacket))
  236. memcpy(response.payload(), request.payload(), icmp_payload_size);
  237. response.header.set_checksum(internet_checksum(&response, icmp_packet_size));
  238. // FIXME: What is the right TTL value here? Is 64 ok? Should we use the same TTL as the echo request?
  239. adapter->send_packet(packet->bytes());
  240. adapter->release_packet_buffer(*packet);
  241. }
  242. }
  243. void handle_udp(IPv4Packet const& ipv4_packet, UnixDateTime const& packet_timestamp)
  244. {
  245. if (ipv4_packet.payload_size() < sizeof(UDPPacket)) {
  246. dbgln("handle_udp: Packet too small ({}, need {})", ipv4_packet.payload_size(), sizeof(UDPPacket));
  247. return;
  248. }
  249. auto& udp_packet = *static_cast<UDPPacket const*>(ipv4_packet.payload());
  250. dbgln_if(UDP_DEBUG, "handle_udp: source={}:{}, destination={}:{}, length={}",
  251. ipv4_packet.source(), udp_packet.source_port(),
  252. ipv4_packet.destination(), udp_packet.destination_port(),
  253. udp_packet.length());
  254. auto socket = UDPSocket::from_port(udp_packet.destination_port());
  255. if (!socket) {
  256. dbgln_if(UDP_DEBUG, "handle_udp: No local UDP socket for {}:{}", ipv4_packet.destination(), udp_packet.destination_port());
  257. return;
  258. }
  259. VERIFY(socket->type() == SOCK_DGRAM);
  260. VERIFY(socket->local_port() == udp_packet.destination_port());
  261. auto& destination = ipv4_packet.destination();
  262. if (destination == IPv4Address(255, 255, 255, 255) || NetworkingManagement::the().from_ipv4_address(destination) || socket->multicast_memberships().contains_slow(destination))
  263. socket->did_receive(ipv4_packet.source(), udp_packet.source_port(), { &ipv4_packet, sizeof(IPv4Packet) + ipv4_packet.payload_size() }, packet_timestamp);
  264. }
  265. void send_delayed_tcp_ack(TCPSocket& socket)
  266. {
  267. VERIFY(socket.mutex().is_locked());
  268. if (!socket.should_delay_next_ack()) {
  269. [[maybe_unused]] auto result = socket.send_ack();
  270. return;
  271. }
  272. delayed_ack_sockets->set(move(socket));
  273. }
  274. void flush_delayed_tcp_acks()
  275. {
  276. Vector<NonnullRefPtr<TCPSocket>, 32> remaining_sockets;
  277. for (auto& socket : *delayed_ack_sockets) {
  278. MutexLocker locker(socket->mutex());
  279. if (socket->should_delay_next_ack()) {
  280. MUST(remaining_sockets.try_append(*socket));
  281. continue;
  282. }
  283. [[maybe_unused]] auto result = socket->send_ack();
  284. }
  285. if (remaining_sockets.size() != delayed_ack_sockets->size()) {
  286. delayed_ack_sockets->clear();
  287. if (remaining_sockets.size() > 0)
  288. dbgln("flush_delayed_tcp_acks: {} sockets remaining", remaining_sockets.size());
  289. for (auto&& socket : remaining_sockets)
  290. delayed_ack_sockets->set(move(socket));
  291. }
  292. }
  293. void send_tcp_rst(IPv4Packet const& ipv4_packet, TCPPacket const& tcp_packet, RefPtr<NetworkAdapter> adapter)
  294. {
  295. auto routing_decision = route_to(ipv4_packet.source(), ipv4_packet.destination(), adapter);
  296. if (routing_decision.is_zero())
  297. return;
  298. auto ipv4_payload_offset = routing_decision.adapter->ipv4_payload_offset();
  299. size_t const options_size = 0;
  300. size_t const tcp_header_size = sizeof(TCPPacket) + options_size;
  301. size_t const buffer_size = ipv4_payload_offset + tcp_header_size;
  302. auto packet = routing_decision.adapter->acquire_packet_buffer(buffer_size);
  303. if (!packet)
  304. return;
  305. routing_decision.adapter->fill_in_ipv4_header(*packet, ipv4_packet.destination(),
  306. routing_decision.next_hop, ipv4_packet.source(), IPv4Protocol::TCP,
  307. buffer_size - ipv4_payload_offset, 0, 64);
  308. auto& rst_packet = *(TCPPacket*)(packet->buffer->data() + ipv4_payload_offset);
  309. rst_packet = {};
  310. rst_packet.set_source_port(tcp_packet.destination_port());
  311. rst_packet.set_destination_port(tcp_packet.source_port());
  312. rst_packet.set_window_size(0);
  313. rst_packet.set_sequence_number(0);
  314. rst_packet.set_ack_number(tcp_packet.sequence_number() + 1);
  315. rst_packet.set_data_offset(tcp_header_size / sizeof(u32));
  316. rst_packet.set_flags(TCPFlags::RST | TCPFlags::ACK);
  317. rst_packet.set_checksum(TCPSocket::compute_tcp_checksum(ipv4_packet.source(), ipv4_packet.destination(), rst_packet, 0));
  318. routing_decision.adapter->send_packet(packet->bytes());
  319. routing_decision.adapter->release_packet_buffer(*packet);
  320. }
  321. void handle_tcp(IPv4Packet const& ipv4_packet, UnixDateTime const& packet_timestamp)
  322. {
  323. if (ipv4_packet.payload_size() < sizeof(TCPPacket)) {
  324. dbgln("handle_tcp: IPv4 payload is too small to be a TCP packet ({}, need {})", ipv4_packet.payload_size(), sizeof(TCPPacket));
  325. return;
  326. }
  327. auto& tcp_packet = *static_cast<TCPPacket const*>(ipv4_packet.payload());
  328. size_t minimum_tcp_header_size = 5 * sizeof(u32);
  329. size_t maximum_tcp_header_size = 15 * sizeof(u32);
  330. if (tcp_packet.header_size() < minimum_tcp_header_size || tcp_packet.header_size() > maximum_tcp_header_size) {
  331. dbgln("handle_tcp: TCP packet header has invalid size {}", tcp_packet.header_size());
  332. }
  333. if (ipv4_packet.payload_size() < tcp_packet.header_size()) {
  334. dbgln("handle_tcp: IPv4 payload is smaller than TCP header claims ({}, supposedly {})", ipv4_packet.payload_size(), tcp_packet.header_size());
  335. return;
  336. }
  337. size_t payload_size = ipv4_packet.payload_size() - tcp_packet.header_size();
  338. dbgln_if(TCP_DEBUG, "handle_tcp: source={}:{}, destination={}:{}, seq_no={}, ack_no={}, flags={:#04x} ({}{}{}{}), window_size={}, payload_size={}",
  339. ipv4_packet.source().to_string(),
  340. tcp_packet.source_port(),
  341. ipv4_packet.destination().to_string(),
  342. tcp_packet.destination_port(),
  343. tcp_packet.sequence_number(),
  344. tcp_packet.ack_number(),
  345. tcp_packet.flags(),
  346. tcp_packet.has_syn() ? "SYN " : "",
  347. tcp_packet.has_ack() ? "ACK " : "",
  348. tcp_packet.has_fin() ? "FIN " : "",
  349. tcp_packet.has_rst() ? "RST " : "",
  350. tcp_packet.window_size(),
  351. payload_size);
  352. auto adapter = NetworkingManagement::the().from_ipv4_address(ipv4_packet.destination());
  353. if (!adapter) {
  354. dbgln("handle_tcp: this packet is not for me, it's for {}", ipv4_packet.destination());
  355. return;
  356. }
  357. IPv4SocketTuple tuple(ipv4_packet.destination(), tcp_packet.destination_port(), ipv4_packet.source(), tcp_packet.source_port());
  358. dbgln_if(TCP_DEBUG, "handle_tcp: looking for socket; tuple={}", tuple.to_string());
  359. auto socket = TCPSocket::from_tuple(tuple);
  360. if (!socket) {
  361. if (!tcp_packet.has_rst()) {
  362. dbgln("handle_tcp: No TCP socket for tuple {}. Sending RST.", tuple.to_string());
  363. send_tcp_rst(ipv4_packet, tcp_packet, adapter);
  364. }
  365. return;
  366. }
  367. MutexLocker locker(socket->mutex());
  368. VERIFY(socket->type() == SOCK_STREAM);
  369. VERIFY(socket->local_port() == tcp_packet.destination_port());
  370. dbgln_if(TCP_DEBUG, "handle_tcp: got socket {}; state={}", socket->tuple().to_string(), TCPSocket::to_string(socket->state()));
  371. socket->receive_tcp_packet(tcp_packet, ipv4_packet.payload_size());
  372. Optional<u8> send_window_scale;
  373. if (tcp_packet.has_syn()) {
  374. tcp_packet.for_each_option([&send_window_scale](auto const& option) {
  375. if (option.kind() != TCPOptionKind::WindowScale)
  376. return;
  377. if (option.length() != sizeof(TCPOptionWindowScale))
  378. return;
  379. auto scale = static_cast<TCPOptionWindowScale const&>(option).value();
  380. if (scale > 14)
  381. return; // Maximum allowed as per RFC7323
  382. send_window_scale = scale;
  383. });
  384. }
  385. switch (socket->state()) {
  386. case TCPSocket::State::Closed:
  387. dbgln("handle_tcp: unexpected flags in Closed state ({:x}) for socket with tuple {}", tcp_packet.flags(), tuple.to_string());
  388. if (tcp_packet.has_rst()) {
  389. return;
  390. }
  391. socket->set_sequence_number(tcp_packet.has_ack() ? tcp_packet.ack_number() : 0);
  392. socket->set_ack_number(tcp_packet.sequence_number() + payload_size + 1);
  393. (void)socket->send_tcp_packet(TCPFlags::RST | TCPFlags::ACK);
  394. return;
  395. case TCPSocket::State::TimeWait:
  396. dbgln("handle_tcp: unexpected flags in TimeWait state ({:x}) for socket with tuple {}", tcp_packet.flags(), tuple.to_string());
  397. (void)socket->send_tcp_packet(TCPFlags::RST);
  398. socket->set_state(TCPSocket::State::Closed);
  399. return;
  400. case TCPSocket::State::Listen:
  401. switch (tcp_packet.flags()) {
  402. case TCPFlags::SYN: {
  403. dbgln_if(TCP_DEBUG, "handle_tcp: incoming connection");
  404. auto& local_address = ipv4_packet.destination();
  405. auto& peer_address = ipv4_packet.source();
  406. auto client_or_error = socket->try_create_client(local_address, tcp_packet.destination_port(), peer_address, tcp_packet.source_port());
  407. if (client_or_error.is_error()) {
  408. dmesgln("handle_tcp: couldn't create client socket: {}", client_or_error.error());
  409. return;
  410. }
  411. auto client = client_or_error.release_value();
  412. MutexLocker locker(client->mutex());
  413. dbgln_if(TCP_DEBUG, "handle_tcp: created new client socket with tuple {}", client->tuple().to_string());
  414. client->set_sequence_number(1000);
  415. client->set_ack_number(tcp_packet.sequence_number() + payload_size + 1);
  416. [[maybe_unused]] auto rc2 = client->send_tcp_packet(TCPFlags::SYN | TCPFlags::ACK);
  417. client->set_state(TCPSocket::State::SynReceived);
  418. if (send_window_scale.has_value())
  419. client->set_send_window_scale(*send_window_scale);
  420. return;
  421. }
  422. default:
  423. dbgln("handle_tcp: unexpected flags in Listen state ({:x})", tcp_packet.flags());
  424. // socket->send_tcp_packet(TCPFlags::RST);
  425. return;
  426. }
  427. case TCPSocket::State::SynSent:
  428. switch (tcp_packet.flags()) {
  429. case TCPFlags::SYN:
  430. socket->set_ack_number(tcp_packet.sequence_number() + payload_size + 1);
  431. (void)socket->send_tcp_packet(TCPFlags::SYN | TCPFlags::ACK);
  432. socket->set_state(TCPSocket::State::SynReceived);
  433. if (send_window_scale.has_value())
  434. socket->set_send_window_scale(*send_window_scale);
  435. return;
  436. case TCPFlags::ACK | TCPFlags::SYN:
  437. socket->set_ack_number(tcp_packet.sequence_number() + payload_size + 1);
  438. (void)socket->send_ack(true);
  439. socket->set_state(TCPSocket::State::Established);
  440. socket->set_setup_state(Socket::SetupState::Completed);
  441. socket->set_connected(true);
  442. if (send_window_scale.has_value())
  443. socket->set_send_window_scale(*send_window_scale);
  444. return;
  445. case TCPFlags::ACK | TCPFlags::FIN:
  446. socket->set_ack_number(tcp_packet.sequence_number() + payload_size + 1);
  447. send_delayed_tcp_ack(*socket);
  448. socket->set_state(TCPSocket::State::Closed);
  449. socket->set_error(TCPSocket::Error::FINDuringConnect);
  450. socket->set_setup_state(Socket::SetupState::Completed);
  451. return;
  452. case TCPFlags::ACK | TCPFlags::RST:
  453. socket->set_state(TCPSocket::State::Closed);
  454. socket->set_error(TCPSocket::Error::RSTDuringConnect);
  455. socket->set_setup_state(Socket::SetupState::Completed);
  456. return;
  457. default:
  458. dbgln("handle_tcp: unexpected flags in SynSent state ({:x})", tcp_packet.flags());
  459. (void)socket->send_tcp_packet(TCPFlags::RST);
  460. socket->set_state(TCPSocket::State::Closed);
  461. socket->set_error(TCPSocket::Error::UnexpectedFlagsDuringConnect);
  462. socket->set_setup_state(Socket::SetupState::Completed);
  463. return;
  464. }
  465. case TCPSocket::State::SynReceived:
  466. switch (tcp_packet.flags()) {
  467. case TCPFlags::ACK:
  468. socket->set_ack_number(tcp_packet.sequence_number() + payload_size);
  469. switch (socket->direction()) {
  470. case TCPSocket::Direction::Incoming:
  471. if (!socket->has_originator()) {
  472. dbgln("handle_tcp: connection doesn't have an originating socket; maybe it went away?");
  473. (void)socket->send_tcp_packet(TCPFlags::RST);
  474. socket->set_state(TCPSocket::State::Closed);
  475. return;
  476. }
  477. socket->set_state(TCPSocket::State::Established);
  478. socket->set_setup_state(Socket::SetupState::Completed);
  479. socket->release_to_originator();
  480. return;
  481. case TCPSocket::Direction::Outgoing:
  482. socket->set_state(TCPSocket::State::Established);
  483. socket->set_setup_state(Socket::SetupState::Completed);
  484. socket->set_connected(true);
  485. return;
  486. default:
  487. dbgln("handle_tcp: got ACK in SynReceived state but direction is invalid ({})", TCPSocket::to_string(socket->direction()));
  488. (void)socket->send_tcp_packet(TCPFlags::RST);
  489. socket->set_state(TCPSocket::State::Closed);
  490. return;
  491. }
  492. VERIFY_NOT_REACHED();
  493. case TCPFlags::SYN:
  494. dbgln("handle_tcp: ignoring SYN for partially established connection");
  495. return;
  496. default:
  497. dbgln("handle_tcp: unexpected flags in SynReceived state ({:x})", tcp_packet.flags());
  498. (void)socket->send_tcp_packet(TCPFlags::RST);
  499. socket->set_state(TCPSocket::State::Closed);
  500. return;
  501. }
  502. case TCPSocket::State::CloseWait:
  503. switch (tcp_packet.flags()) {
  504. default:
  505. dbgln("handle_tcp: unexpected flags in CloseWait state ({:x})", tcp_packet.flags());
  506. (void)socket->send_tcp_packet(TCPFlags::RST);
  507. socket->set_state(TCPSocket::State::Closed);
  508. return;
  509. }
  510. case TCPSocket::State::LastAck:
  511. switch (tcp_packet.flags()) {
  512. case TCPFlags::ACK:
  513. socket->set_ack_number(tcp_packet.sequence_number() + payload_size);
  514. socket->set_state(TCPSocket::State::Closed);
  515. return;
  516. default:
  517. dbgln("handle_tcp: unexpected flags in LastAck state ({:x})", tcp_packet.flags());
  518. (void)socket->send_tcp_packet(TCPFlags::RST);
  519. socket->set_state(TCPSocket::State::Closed);
  520. return;
  521. }
  522. case TCPSocket::State::FinWait1:
  523. switch (tcp_packet.flags()) {
  524. case TCPFlags::ACK:
  525. socket->set_ack_number(tcp_packet.sequence_number() + payload_size);
  526. socket->set_state(TCPSocket::State::FinWait2);
  527. return;
  528. case TCPFlags::FIN:
  529. socket->set_ack_number(tcp_packet.sequence_number() + payload_size + 1);
  530. socket->set_state(TCPSocket::State::Closing);
  531. (void)socket->send_ack(true);
  532. return;
  533. case TCPFlags::FIN | TCPFlags::ACK:
  534. socket->set_ack_number(tcp_packet.sequence_number() + payload_size + 1);
  535. socket->set_state(TCPSocket::State::TimeWait);
  536. (void)socket->send_ack(true);
  537. return;
  538. default:
  539. dbgln("handle_tcp: unexpected flags in FinWait1 state ({:x})", tcp_packet.flags());
  540. (void)socket->send_tcp_packet(TCPFlags::RST);
  541. socket->set_state(TCPSocket::State::Closed);
  542. return;
  543. }
  544. case TCPSocket::State::FinWait2:
  545. switch (tcp_packet.flags()) {
  546. case TCPFlags::FIN | TCPFlags::ACK: // Fallthrough
  547. case TCPFlags::FIN:
  548. socket->set_ack_number(tcp_packet.sequence_number() + payload_size + 1);
  549. socket->set_state(TCPSocket::State::TimeWait);
  550. (void)socket->send_ack(true);
  551. return;
  552. case TCPFlags::ACK | TCPFlags::RST:
  553. // FIXME: Verify that this transition is legitimate.
  554. socket->set_state(TCPSocket::State::Closed);
  555. return;
  556. case TCPFlags::ACK:
  557. if (payload_size) {
  558. if (socket->did_receive(ipv4_packet.source(), tcp_packet.source_port(), { &ipv4_packet, sizeof(IPv4Packet) + ipv4_packet.payload_size() }, packet_timestamp)) {
  559. socket->set_ack_number(tcp_packet.sequence_number() + payload_size + 1);
  560. dbgln_if(TCP_DEBUG, "Got packet with ack_no={}, seq_no={}, payload_size={}, acking it with new ack_no={}, seq_no={}",
  561. tcp_packet.ack_number(), tcp_packet.sequence_number(), payload_size, socket->ack_number(), socket->sequence_number());
  562. send_delayed_tcp_ack(*socket);
  563. }
  564. }
  565. return;
  566. default:
  567. dbgln("handle_tcp: unexpected flags in FinWait2 state ({:x})", tcp_packet.flags());
  568. (void)socket->send_tcp_packet(TCPFlags::RST);
  569. socket->set_state(TCPSocket::State::Closed);
  570. return;
  571. }
  572. case TCPSocket::State::Closing:
  573. switch (tcp_packet.flags()) {
  574. case TCPFlags::ACK:
  575. socket->set_ack_number(tcp_packet.sequence_number() + payload_size);
  576. socket->set_state(TCPSocket::State::TimeWait);
  577. return;
  578. default:
  579. dbgln("handle_tcp: unexpected flags in Closing state ({:x})", tcp_packet.flags());
  580. (void)socket->send_tcp_packet(TCPFlags::RST);
  581. socket->set_state(TCPSocket::State::Closed);
  582. return;
  583. }
  584. case TCPSocket::State::Established:
  585. if (tcp_packet.has_rst()) {
  586. socket->set_state(TCPSocket::State::Closed);
  587. return;
  588. }
  589. if (tcp_packet.sequence_number() != socket->ack_number()) {
  590. dbgln_if(TCP_DEBUG, "Discarding out of order packet: seq {} vs. ack {}", tcp_packet.sequence_number(), socket->ack_number());
  591. if (socket->duplicate_acks() < TCPSocket::maximum_duplicate_acks) {
  592. dbgln_if(TCP_DEBUG, "Sending ACK with same ack number to trigger fast retransmission");
  593. socket->set_duplicate_acks(socket->duplicate_acks() + 1);
  594. [[maybe_unused]] auto result = socket->send_ack(true);
  595. }
  596. return;
  597. }
  598. socket->set_duplicate_acks(0);
  599. if (tcp_packet.has_fin()) {
  600. if (payload_size != 0)
  601. socket->did_receive(ipv4_packet.source(), tcp_packet.source_port(), { &ipv4_packet, sizeof(IPv4Packet) + ipv4_packet.payload_size() }, packet_timestamp);
  602. socket->set_ack_number(tcp_packet.sequence_number() + payload_size + 1);
  603. send_delayed_tcp_ack(*socket);
  604. socket->set_state(TCPSocket::State::CloseWait);
  605. socket->set_connected(false);
  606. return;
  607. }
  608. if (payload_size) {
  609. if (socket->did_receive(ipv4_packet.source(), tcp_packet.source_port(), { &ipv4_packet, sizeof(IPv4Packet) + ipv4_packet.payload_size() }, packet_timestamp)) {
  610. socket->set_ack_number(tcp_packet.sequence_number() + payload_size);
  611. dbgln_if(TCP_DEBUG, "Got packet with ack_no={}, seq_no={}, payload_size={}, acking it with new ack_no={}, seq_no={}",
  612. tcp_packet.ack_number(), tcp_packet.sequence_number(), payload_size, socket->ack_number(), socket->sequence_number());
  613. send_delayed_tcp_ack(*socket);
  614. }
  615. }
  616. }
  617. }
  618. void retransmit_tcp_packets()
  619. {
  620. // We must keep the sockets alive until after we've unlocked the hash table
  621. // in case retransmit_packets() realizes that it wants to close the socket.
  622. Vector<NonnullRefPtr<TCPSocket>, 16> sockets;
  623. TCPSocket::sockets_for_retransmit().for_each_shared([&](auto const& socket) {
  624. // We ignore allocation failures above the first 16 guaranteed socket slots, as
  625. // we will just retransmit their packets the next time around
  626. (void)sockets.try_append(socket);
  627. });
  628. for (auto& socket : sockets) {
  629. MutexLocker socket_locker(socket->mutex());
  630. socket->retransmit_packets();
  631. }
  632. }
  633. }