Parser.h 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434
  1. /*
  2. * Copyright (c) 2020-2021, the SerenityOS developers.
  3. * Copyright (c) 2021-2022, Sam Atkins <atkinssj@serenityos.org>
  4. *
  5. * SPDX-License-Identifier: BSD-2-Clause
  6. */
  7. #pragma once
  8. #include <AK/Error.h>
  9. #include <AK/NonnullOwnPtrVector.h>
  10. #include <AK/NonnullRefPtrVector.h>
  11. #include <AK/RefPtr.h>
  12. #include <AK/Vector.h>
  13. #include <LibWeb/CSS/CSSStyleDeclaration.h>
  14. #include <LibWeb/CSS/FontFace.h>
  15. #include <LibWeb/CSS/GeneralEnclosed.h>
  16. #include <LibWeb/CSS/MediaQuery.h>
  17. #include <LibWeb/CSS/Parser/Block.h>
  18. #include <LibWeb/CSS/Parser/ComponentValue.h>
  19. #include <LibWeb/CSS/Parser/Declaration.h>
  20. #include <LibWeb/CSS/Parser/DeclarationOrAtRule.h>
  21. #include <LibWeb/CSS/Parser/Function.h>
  22. #include <LibWeb/CSS/Parser/Rule.h>
  23. #include <LibWeb/CSS/Parser/Tokenizer.h>
  24. #include <LibWeb/CSS/Ratio.h>
  25. #include <LibWeb/CSS/Selector.h>
  26. #include <LibWeb/CSS/StyleValue.h>
  27. #include <LibWeb/CSS/Supports.h>
  28. #include <LibWeb/CSS/UnicodeRange.h>
  29. #include <LibWeb/Forward.h>
  30. namespace Web::CSS::Parser {
  31. class ParsingContext {
  32. public:
  33. ParsingContext();
  34. explicit ParsingContext(HTML::Window&);
  35. explicit ParsingContext(DOM::Document const&);
  36. explicit ParsingContext(DOM::Document const&, AK::URL);
  37. explicit ParsingContext(DOM::ParentNode&);
  38. bool in_quirks_mode() const;
  39. DOM::Document const* document() const { return m_document; }
  40. AK::URL complete_url(String const&) const;
  41. PropertyID current_property_id() const { return m_current_property_id; }
  42. void set_current_property_id(PropertyID property_id) { m_current_property_id = property_id; }
  43. HTML::Window& window_object() const { return m_window_object; }
  44. private:
  45. HTML::Window& m_window_object;
  46. DOM::Document const* m_document { nullptr };
  47. PropertyID m_current_property_id { PropertyID::Invalid };
  48. AK::URL m_url;
  49. };
  50. template<typename T>
  51. class TokenStream {
  52. public:
  53. class StateTransaction {
  54. public:
  55. explicit StateTransaction(TokenStream<T>& token_stream)
  56. : m_token_stream(token_stream)
  57. , m_saved_iterator_offset(token_stream.m_iterator_offset)
  58. {
  59. }
  60. ~StateTransaction()
  61. {
  62. if (!m_commit)
  63. m_token_stream.m_iterator_offset = m_saved_iterator_offset;
  64. }
  65. StateTransaction create_child() { return StateTransaction(*this); }
  66. void commit()
  67. {
  68. m_commit = true;
  69. if (m_parent)
  70. m_parent->commit();
  71. }
  72. private:
  73. explicit StateTransaction(StateTransaction& parent)
  74. : m_parent(&parent)
  75. , m_token_stream(parent.m_token_stream)
  76. , m_saved_iterator_offset(parent.m_token_stream.m_iterator_offset)
  77. {
  78. }
  79. StateTransaction* m_parent { nullptr };
  80. TokenStream<T>& m_token_stream;
  81. int m_saved_iterator_offset { 0 };
  82. bool m_commit { false };
  83. };
  84. explicit TokenStream(Vector<T> const&);
  85. ~TokenStream() = default;
  86. TokenStream(TokenStream<T> const&) = delete;
  87. bool has_next_token();
  88. T const& next_token();
  89. T const& peek_token(int offset = 0);
  90. T const& current_token();
  91. void reconsume_current_input_token();
  92. StateTransaction begin_transaction() { return StateTransaction(*this); }
  93. void skip_whitespace();
  94. void dump_all_tokens();
  95. private:
  96. Vector<T> const& m_tokens;
  97. int m_iterator_offset { -1 };
  98. T make_eof();
  99. T m_eof;
  100. };
  101. class Parser {
  102. public:
  103. Parser(ParsingContext const&, StringView input, String const& encoding = "utf-8");
  104. ~Parser() = default;
  105. CSSStyleSheet* parse_as_css_stylesheet(Optional<AK::URL> location);
  106. ElementInlineCSSStyleDeclaration* parse_as_style_attribute(DOM::Element&);
  107. CSSRule* parse_as_css_rule();
  108. Optional<StyleProperty> parse_as_supports_condition();
  109. enum class SelectorParsingMode {
  110. Standard,
  111. // `<forgiving-selector-list>` and `<forgiving-relative-selector-list>`
  112. // are handled with this parameter, not as separate functions.
  113. // https://drafts.csswg.org/selectors/#forgiving-selector
  114. Forgiving
  115. };
  116. // Contrary to the name, these parse a comma-separated list of selectors, according to the spec.
  117. Optional<SelectorList> parse_as_selector(SelectorParsingMode = SelectorParsingMode::Standard);
  118. Optional<SelectorList> parse_as_relative_selector(SelectorParsingMode = SelectorParsingMode::Standard);
  119. NonnullRefPtrVector<MediaQuery> parse_as_media_query_list();
  120. RefPtr<MediaQuery> parse_as_media_query();
  121. RefPtr<Supports> parse_as_supports();
  122. RefPtr<StyleValue> parse_as_css_value(PropertyID);
  123. static RefPtr<StyleValue> parse_css_value(Badge<StyleComputer>, ParsingContext const&, PropertyID, Vector<ComponentValue> const&);
  124. private:
  125. enum class ParseError {
  126. IncludesIgnoredVendorPrefix,
  127. SyntaxError,
  128. };
  129. template<typename T>
  130. using ParseErrorOr = ErrorOr<T, ParseError>;
  131. // "Parse a stylesheet" is intended to be the normal parser entry point, for parsing stylesheets.
  132. struct ParsedStyleSheet {
  133. Optional<AK::URL> location;
  134. NonnullRefPtrVector<Rule> rules;
  135. };
  136. template<typename T>
  137. ParsedStyleSheet parse_a_stylesheet(TokenStream<T>&, Optional<AK::URL> location);
  138. // "Parse a list of rules" is intended for the content of at-rules such as @media. It differs from "Parse a stylesheet" in the handling of <CDO-token> and <CDC-token>.
  139. template<typename T>
  140. NonnullRefPtrVector<Rule> parse_a_list_of_rules(TokenStream<T>&);
  141. // "Parse a rule" is intended for use by the CSSStyleSheet#insertRule method, and similar functions which might exist, which parse text into a single rule.
  142. template<typename T>
  143. RefPtr<Rule> parse_a_rule(TokenStream<T>&);
  144. // "Parse a declaration" is used in @supports conditions. [CSS3-CONDITIONAL]
  145. template<typename T>
  146. Optional<Declaration> parse_a_declaration(TokenStream<T>&);
  147. template<typename T>
  148. Vector<DeclarationOrAtRule> parse_a_style_blocks_contents(TokenStream<T>&);
  149. // "Parse a list of declarations" is for the contents of a style attribute, which parses text into the contents of a single style rule.
  150. template<typename T>
  151. Vector<DeclarationOrAtRule> parse_a_list_of_declarations(TokenStream<T>&);
  152. // "Parse a component value" is for things that need to consume a single value, like the parsing rules for attr().
  153. template<typename T>
  154. Optional<ComponentValue> parse_a_component_value(TokenStream<T>&);
  155. // "Parse a list of component values" is for the contents of presentational attributes, which parse text into a single declaration’s value, or for parsing a stand-alone selector [SELECT] or list of Media Queries [MEDIAQ], as in Selectors API or the media HTML attribute.
  156. template<typename T>
  157. Vector<ComponentValue> parse_a_list_of_component_values(TokenStream<T>&);
  158. template<typename T>
  159. Vector<Vector<ComponentValue>> parse_a_comma_separated_list_of_component_values(TokenStream<T>&);
  160. enum class SelectorType {
  161. Standalone,
  162. Relative
  163. };
  164. template<typename T>
  165. ParseErrorOr<SelectorList> parse_a_selector_list(TokenStream<T>&, SelectorType, SelectorParsingMode = SelectorParsingMode::Standard);
  166. template<typename T>
  167. NonnullRefPtrVector<MediaQuery> parse_a_media_query_list(TokenStream<T>&);
  168. template<typename T>
  169. RefPtr<Supports> parse_a_supports(TokenStream<T>&);
  170. Optional<Selector::SimpleSelector::ANPlusBPattern> parse_a_n_plus_b_pattern(TokenStream<ComponentValue>&);
  171. enum class TopLevel {
  172. No,
  173. Yes
  174. };
  175. template<typename T>
  176. [[nodiscard]] NonnullRefPtrVector<Rule> consume_a_list_of_rules(TokenStream<T>&, TopLevel);
  177. template<typename T>
  178. [[nodiscard]] NonnullRefPtr<Rule> consume_an_at_rule(TokenStream<T>&);
  179. template<typename T>
  180. RefPtr<Rule> consume_a_qualified_rule(TokenStream<T>&);
  181. template<typename T>
  182. [[nodiscard]] Vector<DeclarationOrAtRule> consume_a_style_blocks_contents(TokenStream<T>&);
  183. template<typename T>
  184. [[nodiscard]] Vector<DeclarationOrAtRule> consume_a_list_of_declarations(TokenStream<T>&);
  185. template<typename T>
  186. Optional<Declaration> consume_a_declaration(TokenStream<T>&);
  187. template<typename T>
  188. [[nodiscard]] ComponentValue consume_a_component_value(TokenStream<T>&);
  189. template<typename T>
  190. NonnullRefPtr<Block> consume_a_simple_block(TokenStream<T>&);
  191. template<typename T>
  192. NonnullRefPtr<Function> consume_a_function(TokenStream<T>&);
  193. Optional<GeneralEnclosed> parse_general_enclosed(TokenStream<ComponentValue>&);
  194. CSSRule* parse_font_face_rule(TokenStream<ComponentValue>&);
  195. Vector<FontFace::Source> parse_font_face_src(TokenStream<ComponentValue>&);
  196. CSSRule* convert_to_rule(NonnullRefPtr<Rule>);
  197. PropertyOwningCSSStyleDeclaration* convert_to_style_declaration(Vector<DeclarationOrAtRule> declarations);
  198. Optional<StyleProperty> convert_to_style_property(Declaration const&);
  199. class Dimension {
  200. public:
  201. Dimension(Angle&& value)
  202. : m_value(move(value))
  203. {
  204. }
  205. Dimension(Frequency&& value)
  206. : m_value(move(value))
  207. {
  208. }
  209. Dimension(Length&& value)
  210. : m_value(move(value))
  211. {
  212. }
  213. Dimension(Percentage&& value)
  214. : m_value(move(value))
  215. {
  216. }
  217. Dimension(Resolution&& value)
  218. : m_value(move(value))
  219. {
  220. }
  221. Dimension(Time&& value)
  222. : m_value(move(value))
  223. {
  224. }
  225. bool is_angle() const;
  226. Angle angle() const;
  227. bool is_angle_percentage() const;
  228. AnglePercentage angle_percentage() const;
  229. bool is_frequency() const;
  230. Frequency frequency() const;
  231. bool is_frequency_percentage() const;
  232. FrequencyPercentage frequency_percentage() const;
  233. bool is_length() const;
  234. Length length() const;
  235. bool is_length_percentage() const;
  236. LengthPercentage length_percentage() const;
  237. bool is_percentage() const;
  238. Percentage percentage() const;
  239. bool is_resolution() const;
  240. Resolution resolution() const;
  241. bool is_time() const;
  242. Time time() const;
  243. bool is_time_percentage() const;
  244. TimePercentage time_percentage() const;
  245. private:
  246. Variant<Angle, Frequency, Length, Percentage, Resolution, Time> m_value;
  247. };
  248. Optional<Dimension> parse_dimension(ComponentValue const&);
  249. Optional<Color> parse_rgb_or_hsl_color(StringView function_name, Vector<ComponentValue> const&);
  250. Optional<Color> parse_color(ComponentValue const&);
  251. Optional<Length> parse_length(ComponentValue const&);
  252. Optional<Ratio> parse_ratio(TokenStream<ComponentValue>&);
  253. Optional<UnicodeRange> parse_unicode_range(TokenStream<ComponentValue>&);
  254. Optional<UnicodeRange> parse_unicode_range(StringView);
  255. enum class AllowedDataUrlType {
  256. None,
  257. Image,
  258. Font,
  259. };
  260. Optional<AK::URL> parse_url_function(ComponentValue const&, AllowedDataUrlType = AllowedDataUrlType::None);
  261. RefPtr<StyleValue> parse_linear_gradient_function(ComponentValue const&);
  262. ParseErrorOr<NonnullRefPtr<StyleValue>> parse_css_value(PropertyID, TokenStream<ComponentValue>&);
  263. RefPtr<StyleValue> parse_css_value(ComponentValue const&);
  264. RefPtr<StyleValue> parse_builtin_value(ComponentValue const&);
  265. RefPtr<StyleValue> parse_dynamic_value(ComponentValue const&);
  266. RefPtr<StyleValue> parse_calculated_value(Vector<ComponentValue> const&);
  267. RefPtr<StyleValue> parse_dimension_value(ComponentValue const&);
  268. RefPtr<StyleValue> parse_numeric_value(ComponentValue const&);
  269. RefPtr<StyleValue> parse_identifier_value(ComponentValue const&);
  270. RefPtr<StyleValue> parse_color_value(ComponentValue const&);
  271. RefPtr<StyleValue> parse_rect_value(ComponentValue const&);
  272. RefPtr<StyleValue> parse_string_value(ComponentValue const&);
  273. RefPtr<StyleValue> parse_image_value(ComponentValue const&);
  274. template<typename ParseFunction>
  275. RefPtr<StyleValue> parse_comma_separated_value_list(Vector<ComponentValue> const&, ParseFunction);
  276. RefPtr<StyleValue> parse_simple_comma_separated_value_list(Vector<ComponentValue> const&);
  277. RefPtr<StyleValue> parse_filter_value_list_value(Vector<ComponentValue> const&);
  278. RefPtr<StyleValue> parse_background_value(Vector<ComponentValue> const&);
  279. RefPtr<StyleValue> parse_single_background_position_value(TokenStream<ComponentValue>&);
  280. RefPtr<StyleValue> parse_single_background_repeat_value(TokenStream<ComponentValue>&);
  281. RefPtr<StyleValue> parse_single_background_size_value(TokenStream<ComponentValue>&);
  282. RefPtr<StyleValue> parse_border_value(Vector<ComponentValue> const&);
  283. RefPtr<StyleValue> parse_border_radius_value(Vector<ComponentValue> const&);
  284. RefPtr<StyleValue> parse_border_radius_shorthand_value(Vector<ComponentValue> const&);
  285. RefPtr<StyleValue> parse_content_value(Vector<ComponentValue> const&);
  286. RefPtr<StyleValue> parse_flex_value(Vector<ComponentValue> const&);
  287. RefPtr<StyleValue> parse_flex_flow_value(Vector<ComponentValue> const&);
  288. RefPtr<StyleValue> parse_font_value(Vector<ComponentValue> const&);
  289. RefPtr<StyleValue> parse_font_family_value(Vector<ComponentValue> const&, size_t start_index = 0);
  290. RefPtr<StyleValue> parse_list_style_value(Vector<ComponentValue> const&);
  291. RefPtr<StyleValue> parse_overflow_value(Vector<ComponentValue> const&);
  292. enum class AllowInsetKeyword {
  293. No,
  294. Yes,
  295. };
  296. RefPtr<StyleValue> parse_shadow_value(Vector<ComponentValue> const&, AllowInsetKeyword);
  297. RefPtr<StyleValue> parse_single_shadow_value(TokenStream<ComponentValue>&, AllowInsetKeyword);
  298. RefPtr<StyleValue> parse_text_decoration_value(Vector<ComponentValue> const&);
  299. RefPtr<StyleValue> parse_text_decoration_line_value(TokenStream<ComponentValue>&);
  300. RefPtr<StyleValue> parse_transform_value(Vector<ComponentValue> const&);
  301. RefPtr<StyleValue> parse_transform_origin_value(Vector<ComponentValue> const&);
  302. RefPtr<StyleValue> parse_grid_track_sizes(Vector<ComponentValue> const&);
  303. RefPtr<StyleValue> parse_grid_track_placement(Vector<ComponentValue> const&);
  304. RefPtr<StyleValue> parse_grid_track_placement_shorthand_value(Vector<ComponentValue> const&);
  305. // calc() parsing, according to https://www.w3.org/TR/css-values-3/#calc-syntax
  306. OwnPtr<CalculatedStyleValue::CalcSum> parse_calc_sum(TokenStream<ComponentValue>&);
  307. OwnPtr<CalculatedStyleValue::CalcProduct> parse_calc_product(TokenStream<ComponentValue>&);
  308. Optional<CalculatedStyleValue::CalcValue> parse_calc_value(TokenStream<ComponentValue>&);
  309. OwnPtr<CalculatedStyleValue::CalcNumberSum> parse_calc_number_sum(TokenStream<ComponentValue>&);
  310. OwnPtr<CalculatedStyleValue::CalcNumberProduct> parse_calc_number_product(TokenStream<ComponentValue>&);
  311. Optional<CalculatedStyleValue::CalcNumberValue> parse_calc_number_value(TokenStream<ComponentValue>&);
  312. OwnPtr<CalculatedStyleValue::CalcProductPartWithOperator> parse_calc_product_part_with_operator(TokenStream<ComponentValue>&);
  313. OwnPtr<CalculatedStyleValue::CalcSumPartWithOperator> parse_calc_sum_part_with_operator(TokenStream<ComponentValue>&);
  314. OwnPtr<CalculatedStyleValue::CalcNumberProductPartWithOperator> parse_calc_number_product_part_with_operator(TokenStream<ComponentValue>& tokens);
  315. OwnPtr<CalculatedStyleValue::CalcNumberSumPartWithOperator> parse_calc_number_sum_part_with_operator(TokenStream<ComponentValue>&);
  316. OwnPtr<CalculatedStyleValue::CalcSum> parse_calc_expression(Vector<ComponentValue> const&);
  317. ParseErrorOr<NonnullRefPtr<Selector>> parse_complex_selector(TokenStream<ComponentValue>&, SelectorType);
  318. ParseErrorOr<Optional<Selector::CompoundSelector>> parse_compound_selector(TokenStream<ComponentValue>&);
  319. Optional<Selector::Combinator> parse_selector_combinator(TokenStream<ComponentValue>&);
  320. ParseErrorOr<Selector::SimpleSelector> parse_attribute_simple_selector(ComponentValue const&);
  321. ParseErrorOr<Selector::SimpleSelector> parse_pseudo_simple_selector(TokenStream<ComponentValue>&);
  322. ParseErrorOr<Optional<Selector::SimpleSelector>> parse_simple_selector(TokenStream<ComponentValue>&);
  323. NonnullRefPtr<MediaQuery> parse_media_query(TokenStream<ComponentValue>&);
  324. OwnPtr<MediaCondition> parse_media_condition(TokenStream<ComponentValue>&, MediaCondition::AllowOr allow_or);
  325. Optional<MediaFeature> parse_media_feature(TokenStream<ComponentValue>&);
  326. Optional<MediaQuery::MediaType> parse_media_type(TokenStream<ComponentValue>&);
  327. OwnPtr<MediaCondition> parse_media_in_parens(TokenStream<ComponentValue>&);
  328. Optional<MediaFeatureValue> parse_media_feature_value(MediaFeatureID, TokenStream<ComponentValue>&);
  329. OwnPtr<Supports::Condition> parse_supports_condition(TokenStream<ComponentValue>&);
  330. Optional<Supports::InParens> parse_supports_in_parens(TokenStream<ComponentValue>&);
  331. Optional<Supports::Feature> parse_supports_feature(TokenStream<ComponentValue>&);
  332. static bool has_ignored_vendor_prefix(StringView);
  333. static bool is_builtin(StringView);
  334. struct PropertiesAndCustomProperties {
  335. Vector<StyleProperty> properties;
  336. HashMap<String, StyleProperty> custom_properties;
  337. };
  338. PropertiesAndCustomProperties extract_properties(Vector<DeclarationOrAtRule> const&);
  339. ParsingContext m_context;
  340. Tokenizer m_tokenizer;
  341. Vector<Token> m_tokens;
  342. TokenStream<Token> m_token_stream;
  343. };
  344. }
  345. namespace Web {
  346. CSS::CSSStyleSheet* parse_css_stylesheet(CSS::Parser::ParsingContext const&, StringView, Optional<AK::URL> location = {});
  347. CSS::ElementInlineCSSStyleDeclaration* parse_css_style_attribute(CSS::Parser::ParsingContext const&, StringView, DOM::Element&);
  348. RefPtr<CSS::StyleValue> parse_css_value(CSS::Parser::ParsingContext const&, StringView, CSS::PropertyID property_id = CSS::PropertyID::Invalid);
  349. Optional<CSS::SelectorList> parse_selector(CSS::Parser::ParsingContext const&, StringView);
  350. CSS::CSSRule* parse_css_rule(CSS::Parser::ParsingContext const&, StringView);
  351. RefPtr<CSS::MediaQuery> parse_media_query(CSS::Parser::ParsingContext const&, StringView);
  352. NonnullRefPtrVector<CSS::MediaQuery> parse_media_query_list(CSS::Parser::ParsingContext const&, StringView);
  353. RefPtr<CSS::Supports> parse_css_supports(CSS::Parser::ParsingContext const&, StringView);
  354. }