SelectorEngine.cpp 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427
  1. /*
  2. * Copyright (c) 2018-2022, Andreas Kling <kling@serenityos.org>
  3. * Copyright (c) 2021-2023, Sam Atkins <atkinssj@serenityos.org>
  4. *
  5. * SPDX-License-Identifier: BSD-2-Clause
  6. */
  7. #include <LibWeb/CSS/Parser/Parser.h>
  8. #include <LibWeb/CSS/SelectorEngine.h>
  9. #include <LibWeb/DOM/Document.h>
  10. #include <LibWeb/DOM/Element.h>
  11. #include <LibWeb/DOM/Text.h>
  12. #include <LibWeb/HTML/AttributeNames.h>
  13. #include <LibWeb/HTML/HTMLAnchorElement.h>
  14. #include <LibWeb/HTML/HTMLAreaElement.h>
  15. #include <LibWeb/HTML/HTMLButtonElement.h>
  16. #include <LibWeb/HTML/HTMLFieldSetElement.h>
  17. #include <LibWeb/HTML/HTMLHtmlElement.h>
  18. #include <LibWeb/HTML/HTMLInputElement.h>
  19. #include <LibWeb/HTML/HTMLOptGroupElement.h>
  20. #include <LibWeb/HTML/HTMLOptionElement.h>
  21. #include <LibWeb/HTML/HTMLSelectElement.h>
  22. #include <LibWeb/HTML/HTMLTextAreaElement.h>
  23. #include <LibWeb/Infra/Strings.h>
  24. namespace Web::SelectorEngine {
  25. // https://drafts.csswg.org/selectors-4/#the-lang-pseudo
  26. static inline bool matches_lang_pseudo_class(DOM::Element const& element, Vector<FlyString> const& languages)
  27. {
  28. FlyString element_language;
  29. for (auto const* e = &element; e; e = e->parent_element()) {
  30. auto lang = e->attribute(HTML::AttributeNames::lang);
  31. if (!lang.is_null()) {
  32. element_language = FlyString::from_deprecated_fly_string(lang).release_value_but_fixme_should_propagate_errors();
  33. break;
  34. }
  35. }
  36. if (element_language.is_empty())
  37. return false;
  38. // FIXME: This is ad-hoc. Implement a proper language range matching algorithm as recommended by BCP47.
  39. for (auto const& language : languages) {
  40. if (language.is_empty())
  41. return false;
  42. if (language == "*"sv)
  43. return true;
  44. if (!element_language.to_string().contains('-'))
  45. return Infra::is_ascii_case_insensitive_match(element_language, language);
  46. auto parts = element_language.to_string().split_limit('-', 2).release_value_but_fixme_should_propagate_errors();
  47. return Infra::is_ascii_case_insensitive_match(parts[0], language);
  48. }
  49. return false;
  50. }
  51. // https://html.spec.whatwg.org/multipage/semantics-other.html#selector-link
  52. static inline bool matches_link_pseudo_class(DOM::Element const& element)
  53. {
  54. // All a elements that have an href attribute, and all area elements that have an href attribute, must match one of :link and :visited.
  55. if (!is<HTML::HTMLAnchorElement>(element) && !is<HTML::HTMLAreaElement>(element))
  56. return false;
  57. return element.has_attribute(HTML::AttributeNames::href);
  58. }
  59. static inline bool matches_hover_pseudo_class(DOM::Element const& element)
  60. {
  61. auto* hovered_node = element.document().hovered_node();
  62. if (!hovered_node)
  63. return false;
  64. if (&element == hovered_node)
  65. return true;
  66. return element.is_ancestor_of(*hovered_node);
  67. }
  68. // https://html.spec.whatwg.org/multipage/semantics-other.html#selector-checked
  69. static inline bool matches_checked_pseudo_class(DOM::Element const& element)
  70. {
  71. // The :checked pseudo-class must match any element falling into one of the following categories:
  72. // - input elements whose type attribute is in the Checkbox state and whose checkedness state is true
  73. // - input elements whose type attribute is in the Radio Button state and whose checkedness state is true
  74. if (is<HTML::HTMLInputElement>(element)) {
  75. auto const& input_element = static_cast<HTML::HTMLInputElement const&>(element);
  76. switch (input_element.type_state()) {
  77. case HTML::HTMLInputElement::TypeAttributeState::Checkbox:
  78. case HTML::HTMLInputElement::TypeAttributeState::RadioButton:
  79. return static_cast<HTML::HTMLInputElement const&>(element).checked();
  80. default:
  81. return false;
  82. }
  83. }
  84. // FIXME: - option elements whose selectedness is true
  85. return false;
  86. }
  87. static inline bool matches_attribute(CSS::Selector::SimpleSelector::Attribute const& attribute, DOM::Element const& element)
  88. {
  89. if (attribute.match_type == CSS::Selector::SimpleSelector::Attribute::MatchType::HasAttribute) {
  90. // Early way out in case of an attribute existence selector.
  91. return element.has_attribute(attribute.name.to_string().to_deprecated_string());
  92. }
  93. auto const case_insensitive_match = (attribute.case_type == CSS::Selector::SimpleSelector::Attribute::CaseType::CaseInsensitiveMatch);
  94. auto const case_sensitivity = case_insensitive_match
  95. ? CaseSensitivity::CaseInsensitive
  96. : CaseSensitivity::CaseSensitive;
  97. switch (attribute.match_type) {
  98. case CSS::Selector::SimpleSelector::Attribute::MatchType::ExactValueMatch:
  99. return case_insensitive_match
  100. ? Infra::is_ascii_case_insensitive_match(element.attribute(attribute.name.to_string().to_deprecated_string()), attribute.value)
  101. : element.attribute(attribute.name.to_string().to_deprecated_string()) == attribute.value.to_deprecated_string();
  102. case CSS::Selector::SimpleSelector::Attribute::MatchType::ContainsWord: {
  103. if (attribute.value.is_empty()) {
  104. // This selector is always false is match value is empty.
  105. return false;
  106. }
  107. auto const view = element.attribute(attribute.name.to_string().to_deprecated_string()).split_view(' ');
  108. auto const size = view.size();
  109. for (size_t i = 0; i < size; ++i) {
  110. auto const value = view.at(i);
  111. if (case_insensitive_match
  112. ? Infra::is_ascii_case_insensitive_match(value, attribute.value)
  113. : value == attribute.value) {
  114. return true;
  115. }
  116. }
  117. return false;
  118. }
  119. case CSS::Selector::SimpleSelector::Attribute::MatchType::ContainsString:
  120. return !attribute.value.is_empty()
  121. && element.attribute(attribute.name.to_string().to_deprecated_string()).contains(attribute.value, case_sensitivity);
  122. case CSS::Selector::SimpleSelector::Attribute::MatchType::StartsWithSegment: {
  123. auto const element_attr_value = element.attribute(attribute.name.to_string().to_deprecated_string());
  124. if (element_attr_value.is_empty()) {
  125. // If the attribute value on element is empty, the selector is true
  126. // if the match value is also empty and false otherwise.
  127. return attribute.value.is_empty();
  128. }
  129. if (attribute.value.is_empty()) {
  130. return false;
  131. }
  132. auto segments = element_attr_value.split_view('-');
  133. return case_insensitive_match
  134. ? Infra::is_ascii_case_insensitive_match(segments.first(), attribute.value)
  135. : segments.first() == attribute.value;
  136. }
  137. case CSS::Selector::SimpleSelector::Attribute::MatchType::StartsWithString:
  138. return !attribute.value.is_empty()
  139. && element.attribute(attribute.name.to_string().to_deprecated_string()).starts_with(attribute.value, case_sensitivity);
  140. case CSS::Selector::SimpleSelector::Attribute::MatchType::EndsWithString:
  141. return !attribute.value.is_empty()
  142. && element.attribute(attribute.name.to_string().to_deprecated_string()).ends_with(attribute.value, case_sensitivity);
  143. default:
  144. break;
  145. }
  146. return false;
  147. }
  148. static inline DOM::Element const* previous_sibling_with_same_tag_name(DOM::Element const& element)
  149. {
  150. for (auto const* sibling = element.previous_element_sibling(); sibling; sibling = sibling->previous_element_sibling()) {
  151. if (sibling->tag_name() == element.tag_name())
  152. return sibling;
  153. }
  154. return nullptr;
  155. }
  156. static inline DOM::Element const* next_sibling_with_same_tag_name(DOM::Element const& element)
  157. {
  158. for (auto const* sibling = element.next_element_sibling(); sibling; sibling = sibling->next_element_sibling()) {
  159. if (sibling->tag_name() == element.tag_name())
  160. return sibling;
  161. }
  162. return nullptr;
  163. }
  164. static inline bool matches_pseudo_class(CSS::Selector::SimpleSelector::PseudoClass const& pseudo_class, DOM::Element const& element)
  165. {
  166. switch (pseudo_class.type) {
  167. case CSS::Selector::SimpleSelector::PseudoClass::Type::Link:
  168. return matches_link_pseudo_class(element);
  169. case CSS::Selector::SimpleSelector::PseudoClass::Type::Visited:
  170. // FIXME: Maybe match this selector sometimes?
  171. return false;
  172. case CSS::Selector::SimpleSelector::PseudoClass::Type::Active:
  173. return element.is_active();
  174. case CSS::Selector::SimpleSelector::PseudoClass::Type::Hover:
  175. return matches_hover_pseudo_class(element);
  176. case CSS::Selector::SimpleSelector::PseudoClass::Type::Focus:
  177. return element.is_focused();
  178. case CSS::Selector::SimpleSelector::PseudoClass::Type::FocusWithin: {
  179. auto* focused_element = element.document().focused_element();
  180. return focused_element && element.is_inclusive_ancestor_of(*focused_element);
  181. }
  182. case CSS::Selector::SimpleSelector::PseudoClass::Type::FirstChild:
  183. return !element.previous_element_sibling();
  184. case CSS::Selector::SimpleSelector::PseudoClass::Type::LastChild:
  185. return !element.next_element_sibling();
  186. case CSS::Selector::SimpleSelector::PseudoClass::Type::OnlyChild:
  187. return !(element.previous_element_sibling() || element.next_element_sibling());
  188. case CSS::Selector::SimpleSelector::PseudoClass::Type::Empty: {
  189. if (!element.has_children())
  190. return true;
  191. if (element.first_child_of_type<DOM::Element>())
  192. return false;
  193. // NOTE: CSS Selectors level 4 changed ":empty" to also match whitespace-only text nodes.
  194. // However, none of the major browser supports this yet, so let's just hang back until they do.
  195. bool has_nonempty_text_child = false;
  196. element.for_each_child_of_type<DOM::Text>([&](auto const& text_child) {
  197. if (!text_child.data().is_empty()) {
  198. has_nonempty_text_child = true;
  199. return IterationDecision::Break;
  200. }
  201. return IterationDecision::Continue;
  202. });
  203. return !has_nonempty_text_child;
  204. }
  205. case CSS::Selector::SimpleSelector::PseudoClass::Type::Root:
  206. return is<HTML::HTMLHtmlElement>(element);
  207. case CSS::Selector::SimpleSelector::PseudoClass::Type::FirstOfType:
  208. return !previous_sibling_with_same_tag_name(element);
  209. case CSS::Selector::SimpleSelector::PseudoClass::Type::LastOfType:
  210. return !next_sibling_with_same_tag_name(element);
  211. case CSS::Selector::SimpleSelector::PseudoClass::Type::OnlyOfType:
  212. return !previous_sibling_with_same_tag_name(element) && !next_sibling_with_same_tag_name(element);
  213. case CSS::Selector::SimpleSelector::PseudoClass::Type::Lang:
  214. return matches_lang_pseudo_class(element, pseudo_class.languages);
  215. case CSS::Selector::SimpleSelector::PseudoClass::Type::Disabled:
  216. // https://html.spec.whatwg.org/multipage/semantics-other.html#selector-disabled
  217. // The :disabled pseudo-class must match any element that is actually disabled.
  218. return element.is_actually_disabled();
  219. case CSS::Selector::SimpleSelector::PseudoClass::Type::Enabled:
  220. // https://html.spec.whatwg.org/multipage/semantics-other.html#selector-enabled
  221. // The :enabled pseudo-class must match any button, input, select, textarea, optgroup, option, fieldset element, or form-associated custom element that is not actually disabled.
  222. return (is<HTML::HTMLButtonElement>(element) || is<HTML::HTMLInputElement>(element) || is<HTML::HTMLSelectElement>(element) || is<HTML::HTMLTextAreaElement>(element) || is<HTML::HTMLOptGroupElement>(element) || is<HTML::HTMLOptionElement>(element) || is<HTML::HTMLFieldSetElement>(element))
  223. && !element.is_actually_disabled();
  224. case CSS::Selector::SimpleSelector::PseudoClass::Type::Checked:
  225. return matches_checked_pseudo_class(element);
  226. case CSS::Selector::SimpleSelector::PseudoClass::Type::Is:
  227. case CSS::Selector::SimpleSelector::PseudoClass::Type::Where:
  228. for (auto& selector : pseudo_class.argument_selector_list) {
  229. if (matches(selector, element))
  230. return true;
  231. }
  232. return false;
  233. case CSS::Selector::SimpleSelector::PseudoClass::Type::Not:
  234. for (auto& selector : pseudo_class.argument_selector_list) {
  235. if (matches(selector, element))
  236. return false;
  237. }
  238. return true;
  239. case CSS::Selector::SimpleSelector::PseudoClass::Type::NthChild:
  240. case CSS::Selector::SimpleSelector::PseudoClass::Type::NthLastChild:
  241. case CSS::Selector::SimpleSelector::PseudoClass::Type::NthOfType:
  242. case CSS::Selector::SimpleSelector::PseudoClass::Type::NthLastOfType:
  243. auto const step_size = pseudo_class.nth_child_pattern.step_size;
  244. auto const offset = pseudo_class.nth_child_pattern.offset;
  245. if (step_size == 0 && offset == 0)
  246. return false; // "If both a and b are equal to zero, the pseudo-class represents no element in the document tree."
  247. auto const* parent = element.parent_element();
  248. if (!parent)
  249. return false;
  250. auto matches_selector_list = [](CSS::SelectorList const& list, DOM::Element const& element) {
  251. if (list.is_empty())
  252. return true;
  253. for (auto const& child_selector : list) {
  254. if (matches(child_selector, element)) {
  255. return true;
  256. }
  257. }
  258. return false;
  259. };
  260. int index = 1;
  261. switch (pseudo_class.type) {
  262. case CSS::Selector::SimpleSelector::PseudoClass::Type::NthChild: {
  263. if (!matches_selector_list(pseudo_class.argument_selector_list, element))
  264. return false;
  265. for (auto* child = parent->first_child_of_type<DOM::Element>(); child && child != &element; child = child->next_element_sibling()) {
  266. if (matches_selector_list(pseudo_class.argument_selector_list, *child))
  267. ++index;
  268. }
  269. break;
  270. }
  271. case CSS::Selector::SimpleSelector::PseudoClass::Type::NthLastChild: {
  272. if (!matches_selector_list(pseudo_class.argument_selector_list, element))
  273. return false;
  274. for (auto* child = parent->last_child_of_type<DOM::Element>(); child && child != &element; child = child->previous_element_sibling()) {
  275. if (matches_selector_list(pseudo_class.argument_selector_list, *child))
  276. ++index;
  277. }
  278. break;
  279. }
  280. case CSS::Selector::SimpleSelector::PseudoClass::Type::NthOfType: {
  281. for (auto* child = previous_sibling_with_same_tag_name(element); child; child = previous_sibling_with_same_tag_name(*child))
  282. ++index;
  283. break;
  284. }
  285. case CSS::Selector::SimpleSelector::PseudoClass::Type::NthLastOfType: {
  286. for (auto* child = next_sibling_with_same_tag_name(element); child; child = next_sibling_with_same_tag_name(*child))
  287. ++index;
  288. break;
  289. }
  290. default:
  291. VERIFY_NOT_REACHED();
  292. }
  293. // When "step_size == -1", selector represents first "offset" elements in document tree.
  294. if (step_size == -1)
  295. return !(offset <= 0 || index > offset);
  296. // When "step_size == 1", selector represents last "offset" elements in document tree.
  297. if (step_size == 1)
  298. return !(offset < 0 || index < offset);
  299. // When "step_size == 0", selector picks only the "offset" element.
  300. if (step_size == 0)
  301. return index == offset;
  302. // If both are negative, nothing can match.
  303. if (step_size < 0 && offset < 0)
  304. return false;
  305. // Like "a % b", but handles negative integers correctly.
  306. auto const canonical_modulo = [](int a, int b) -> int {
  307. int c = a % b;
  308. if ((c < 0 && b > 0) || (c > 0 && b < 0)) {
  309. c += b;
  310. }
  311. return c;
  312. };
  313. // When "step_size < 0", we start at "offset" and count backwards.
  314. if (step_size < 0)
  315. return index <= offset && canonical_modulo(index - offset, -step_size) == 0;
  316. // Otherwise, we start at "offset" and count forwards.
  317. return index >= offset && canonical_modulo(index - offset, step_size) == 0;
  318. }
  319. return false;
  320. }
  321. static inline bool matches(CSS::Selector::SimpleSelector const& component, DOM::Element const& element)
  322. {
  323. switch (component.type) {
  324. case CSS::Selector::SimpleSelector::Type::Universal:
  325. return true;
  326. case CSS::Selector::SimpleSelector::Type::Id:
  327. return component.name() == element.attribute(HTML::AttributeNames::id).view();
  328. case CSS::Selector::SimpleSelector::Type::Class:
  329. return element.has_class(component.name());
  330. case CSS::Selector::SimpleSelector::Type::TagName:
  331. // See https://html.spec.whatwg.org/multipage/semantics-other.html#case-sensitivity-of-selectors
  332. if (element.document().document_type() == DOM::Document::Type::HTML)
  333. return component.lowercase_name() == element.local_name().view();
  334. return Infra::is_ascii_case_insensitive_match(component.name(), element.local_name());
  335. case CSS::Selector::SimpleSelector::Type::Attribute:
  336. return matches_attribute(component.attribute(), element);
  337. case CSS::Selector::SimpleSelector::Type::PseudoClass:
  338. return matches_pseudo_class(component.pseudo_class(), element);
  339. case CSS::Selector::SimpleSelector::Type::PseudoElement:
  340. // Pseudo-element matching/not-matching is handled in the top level matches().
  341. return true;
  342. default:
  343. VERIFY_NOT_REACHED();
  344. }
  345. }
  346. static inline bool matches(CSS::Selector const& selector, int component_list_index, DOM::Element const& element)
  347. {
  348. auto& relative_selector = selector.compound_selectors()[component_list_index];
  349. for (auto& simple_selector : relative_selector.simple_selectors) {
  350. if (!matches(simple_selector, element))
  351. return false;
  352. }
  353. switch (relative_selector.combinator) {
  354. case CSS::Selector::Combinator::None:
  355. return true;
  356. case CSS::Selector::Combinator::Descendant:
  357. VERIFY(component_list_index != 0);
  358. for (auto* ancestor = element.parent(); ancestor; ancestor = ancestor->parent()) {
  359. if (!is<DOM::Element>(*ancestor))
  360. continue;
  361. if (matches(selector, component_list_index - 1, static_cast<DOM::Element const&>(*ancestor)))
  362. return true;
  363. }
  364. return false;
  365. case CSS::Selector::Combinator::ImmediateChild:
  366. VERIFY(component_list_index != 0);
  367. if (!element.parent() || !is<DOM::Element>(*element.parent()))
  368. return false;
  369. return matches(selector, component_list_index - 1, static_cast<DOM::Element const&>(*element.parent()));
  370. case CSS::Selector::Combinator::NextSibling:
  371. VERIFY(component_list_index != 0);
  372. if (auto* sibling = element.previous_element_sibling())
  373. return matches(selector, component_list_index - 1, *sibling);
  374. return false;
  375. case CSS::Selector::Combinator::SubsequentSibling:
  376. VERIFY(component_list_index != 0);
  377. for (auto* sibling = element.previous_element_sibling(); sibling; sibling = sibling->previous_element_sibling()) {
  378. if (matches(selector, component_list_index - 1, *sibling))
  379. return true;
  380. }
  381. return false;
  382. case CSS::Selector::Combinator::Column:
  383. TODO();
  384. }
  385. VERIFY_NOT_REACHED();
  386. }
  387. bool matches(CSS::Selector const& selector, DOM::Element const& element, Optional<CSS::Selector::PseudoElement> pseudo_element)
  388. {
  389. VERIFY(!selector.compound_selectors().is_empty());
  390. if (pseudo_element.has_value() && selector.pseudo_element() != pseudo_element)
  391. return false;
  392. if (!pseudo_element.has_value() && selector.pseudo_element().has_value())
  393. return false;
  394. return matches(selector, selector.compound_selectors().size() - 1, element);
  395. }
  396. }