xml.cpp 23 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565
  1. /*
  2. * Copyright (c) 2022, Ali Mohammad Pur <mpfard@serenityos.org>
  3. *
  4. * SPDX-License-Identifier: BSD-2-Clause
  5. */
  6. #include <AK/LexicalPath.h>
  7. #include <AK/Queue.h>
  8. #include <AK/URL.h>
  9. #include <AK/URLParser.h>
  10. #include <LibCore/ArgsParser.h>
  11. #include <LibCore/DeprecatedFile.h>
  12. #include <LibCore/File.h>
  13. #include <LibMain/Main.h>
  14. #include <LibXML/DOM/Document.h>
  15. #include <LibXML/DOM/Node.h>
  16. #include <LibXML/Parser/Parser.h>
  17. static bool g_color = false;
  18. static bool g_only_contents = false;
  19. enum class ColorRole {
  20. PITag,
  21. PITarget,
  22. PIData,
  23. AttributeName,
  24. Eq,
  25. AttributeValue,
  26. Tag,
  27. Text,
  28. Comment,
  29. Reset,
  30. Doctype,
  31. Keyword,
  32. };
  33. static void color(ColorRole role)
  34. {
  35. if (!g_color)
  36. return;
  37. switch (role) {
  38. case ColorRole::PITag:
  39. case ColorRole::Doctype:
  40. out("\x1b[{};{}m", 1, "38;5;223");
  41. break;
  42. case ColorRole::PITarget:
  43. out("\x1b[{};{}m", 1, "38;5;23");
  44. break;
  45. case ColorRole::PIData:
  46. out("\x1b[{};{}m", 1, "38;5;43");
  47. break;
  48. case ColorRole::AttributeName:
  49. out("\x1b[38;5;27m");
  50. break;
  51. case ColorRole::Eq:
  52. break;
  53. case ColorRole::AttributeValue:
  54. out("\x1b[38;5;46m");
  55. break;
  56. case ColorRole::Tag:
  57. out("\x1b[{};{}m", 1, "38;5;220");
  58. break;
  59. case ColorRole::Text:
  60. break;
  61. case ColorRole::Comment:
  62. out("\x1b[{};{}m", 3, "38;5;250");
  63. break;
  64. case ColorRole::Reset:
  65. out("\x1b[0m");
  66. break;
  67. case ColorRole::Keyword:
  68. out("\x1b[38;5;40m");
  69. break;
  70. }
  71. }
  72. static void dump(XML::Node const& node)
  73. {
  74. node.content.visit(
  75. [](XML::Node::Text const& text) {
  76. out("{}", text.builder.string_view());
  77. },
  78. [](XML::Node::Comment const& comment) {
  79. color(ColorRole::Comment);
  80. out("<!--{}-->", comment.text);
  81. color(ColorRole::Reset);
  82. },
  83. [](XML::Node::Element const& element) {
  84. color(ColorRole::Tag);
  85. out("<{}", element.name);
  86. color(ColorRole::Reset);
  87. if (!element.attributes.is_empty()) {
  88. for (auto& attribute : element.attributes) {
  89. auto quote = attribute.value.contains('"') ? '\'' : '"';
  90. color(ColorRole::AttributeName);
  91. out(" {}", attribute.key);
  92. color(ColorRole::Eq);
  93. out("=");
  94. color(ColorRole::AttributeValue);
  95. out("{}{}{}", quote, attribute.value, quote);
  96. color(ColorRole::Reset);
  97. }
  98. }
  99. if (element.children.is_empty()) {
  100. color(ColorRole::Tag);
  101. out("/>");
  102. color(ColorRole::Reset);
  103. } else {
  104. color(ColorRole::Tag);
  105. out(">");
  106. color(ColorRole::Reset);
  107. for (auto& node : element.children)
  108. dump(*node);
  109. color(ColorRole::Tag);
  110. out("</{}>", element.name);
  111. color(ColorRole::Reset);
  112. }
  113. });
  114. }
  115. static void dump(XML::Document& document)
  116. {
  117. if (!g_only_contents) {
  118. {
  119. color(ColorRole::PITag);
  120. out("<?");
  121. color(ColorRole::Reset);
  122. color(ColorRole::PITarget);
  123. out("xml");
  124. color(ColorRole::Reset);
  125. color(ColorRole::PIData);
  126. out(" version='{}'", document.version() == XML::Version::Version10 ? "1.0" : "1.1");
  127. color(ColorRole::Reset);
  128. color(ColorRole::PITag);
  129. outln("?>");
  130. }
  131. for (auto& pi : document.processing_instructions()) {
  132. color(ColorRole::PITag);
  133. out("<?");
  134. color(ColorRole::Reset);
  135. color(ColorRole::PITarget);
  136. out("{}", pi.key);
  137. color(ColorRole::Reset);
  138. if (!pi.value.is_empty()) {
  139. color(ColorRole::PIData);
  140. out(" {}", pi.value);
  141. color(ColorRole::Reset);
  142. }
  143. color(ColorRole::PITag);
  144. outln("?>");
  145. }
  146. if (auto maybe_doctype = document.doctype(); maybe_doctype.has_value()) {
  147. auto& doctype = *maybe_doctype;
  148. color(ColorRole::Doctype);
  149. out("<!DOCTYPE ");
  150. color(ColorRole::Tag);
  151. out("{}", doctype.type);
  152. if (!doctype.markup_declarations.is_empty()) {
  153. color(ColorRole::Reset);
  154. out(" [\n");
  155. for (auto& entry : doctype.markup_declarations) {
  156. entry.visit(
  157. [&](XML::ElementDeclaration const& element) {
  158. color(ColorRole::Doctype);
  159. out(" <!ELEMENT ");
  160. color(ColorRole::Tag);
  161. out("{} ", element.type);
  162. element.content_spec.visit(
  163. [&](XML::ElementDeclaration::Empty const&) {
  164. color(ColorRole::Keyword);
  165. out("EMPTY");
  166. },
  167. [&](XML::ElementDeclaration::Any const&) {
  168. color(ColorRole::Keyword);
  169. out("ANY");
  170. },
  171. [&](XML::ElementDeclaration::Mixed const&) {
  172. },
  173. [&](XML::ElementDeclaration::Children const&) {
  174. });
  175. color(ColorRole::Doctype);
  176. outln(">");
  177. },
  178. [&](XML::AttributeListDeclaration const& list) {
  179. color(ColorRole::Doctype);
  180. out(" <!ATTLIST ");
  181. color(ColorRole::Tag);
  182. out("{}", list.type);
  183. for (auto& attribute : list.attributes) {
  184. color(ColorRole::AttributeName);
  185. out(" {} ", attribute.name);
  186. color(ColorRole::Keyword);
  187. attribute.type.visit(
  188. [](XML::AttributeListDeclaration::StringType) {
  189. out("CDATA");
  190. },
  191. [](XML::AttributeListDeclaration::TokenizedType type) {
  192. switch (type) {
  193. case XML::AttributeListDeclaration::TokenizedType::ID:
  194. out("ID");
  195. break;
  196. case XML::AttributeListDeclaration::TokenizedType::IDRef:
  197. out("IDREF");
  198. break;
  199. case XML::AttributeListDeclaration::TokenizedType::IDRefs:
  200. out("IDREFS");
  201. break;
  202. case XML::AttributeListDeclaration::TokenizedType::Entity:
  203. out("ENTITY");
  204. break;
  205. case XML::AttributeListDeclaration::TokenizedType::Entities:
  206. out("ENTITIES");
  207. break;
  208. case XML::AttributeListDeclaration::TokenizedType::NMToken:
  209. out("NMTOKEN");
  210. break;
  211. case XML::AttributeListDeclaration::TokenizedType::NMTokens:
  212. out("NMTOKENS");
  213. break;
  214. }
  215. },
  216. [](XML::AttributeListDeclaration::NotationType const& type) {
  217. out("NOTATION ");
  218. color(ColorRole::Reset);
  219. out("( ");
  220. bool first = true;
  221. for (auto& name : type.names) {
  222. color(ColorRole::Reset);
  223. if (first)
  224. first = false;
  225. else
  226. out(" | ");
  227. color(ColorRole::AttributeValue);
  228. out("{}", name);
  229. }
  230. color(ColorRole::Reset);
  231. out(" )");
  232. },
  233. [](XML::AttributeListDeclaration::Enumeration const& type) {
  234. color(ColorRole::Reset);
  235. out("( ");
  236. bool first = true;
  237. for (auto& name : type.tokens) {
  238. color(ColorRole::Reset);
  239. if (first)
  240. first = false;
  241. else
  242. out(" | ");
  243. color(ColorRole::AttributeValue);
  244. out("{}", name);
  245. }
  246. color(ColorRole::Reset);
  247. out(" )");
  248. });
  249. out(" ");
  250. attribute.default_.visit(
  251. [](XML::AttributeListDeclaration::Required) {
  252. color(ColorRole::Keyword);
  253. out("#REQUIRED");
  254. },
  255. [](XML::AttributeListDeclaration::Implied) {
  256. color(ColorRole::Keyword);
  257. out("#IMPLIED");
  258. },
  259. [](XML::AttributeListDeclaration::Fixed const& fixed) {
  260. color(ColorRole::Keyword);
  261. out("#FIXED ");
  262. color(ColorRole::AttributeValue);
  263. out("\"{}\"", fixed.value);
  264. },
  265. [](XML::AttributeListDeclaration::DefaultValue const& default_) {
  266. color(ColorRole::AttributeValue);
  267. out("\"{}\"", default_.value);
  268. });
  269. }
  270. color(ColorRole::Doctype);
  271. outln(">");
  272. },
  273. [&](XML::EntityDeclaration const& entity) {
  274. color(ColorRole::Doctype);
  275. out(" <!ENTITY ");
  276. entity.visit(
  277. [](XML::GEDeclaration const& declaration) {
  278. color(ColorRole::Tag);
  279. out("{} ", declaration.name);
  280. declaration.definition.visit(
  281. [](DeprecatedString const& value) {
  282. color(ColorRole::AttributeValue);
  283. out("\"{}\"", value);
  284. },
  285. [](XML::EntityDefinition const& definition) {
  286. if (definition.id.public_id.has_value()) {
  287. color(ColorRole::Keyword);
  288. out("PUBLIC ");
  289. color(ColorRole::PITarget);
  290. out("\"{}\" ", definition.id.public_id->public_literal);
  291. } else {
  292. color(ColorRole::Keyword);
  293. out("SYSTEM ");
  294. }
  295. color(ColorRole::PITarget);
  296. out("\"{}\" ", definition.id.system_id.system_literal);
  297. if (definition.notation.has_value()) {
  298. color(ColorRole::Keyword);
  299. out(" NDATA ");
  300. color(ColorRole::PITarget);
  301. out("{}", *definition.notation);
  302. }
  303. });
  304. color(ColorRole::Tag);
  305. outln(">");
  306. },
  307. [](XML::PEDeclaration const& declaration) {
  308. color(ColorRole::Tag);
  309. out("{} ", declaration.name);
  310. declaration.definition.visit(
  311. [](DeprecatedString const& value) {
  312. color(ColorRole::AttributeValue);
  313. out("\"{}\"", value);
  314. },
  315. [](XML::ExternalID const& id) {
  316. if (id.public_id.has_value()) {
  317. color(ColorRole::Keyword);
  318. out("PUBLIC ");
  319. color(ColorRole::PITarget);
  320. out("\"{}\" ", id.public_id->public_literal);
  321. } else {
  322. color(ColorRole::Keyword);
  323. out("SYSTEM ");
  324. }
  325. color(ColorRole::PITarget);
  326. out("\"{}\"", id.system_id.system_literal);
  327. });
  328. color(ColorRole::Tag);
  329. outln(">");
  330. });
  331. },
  332. [&](XML::NotationDeclaration const&) {
  333. });
  334. }
  335. color(ColorRole::Reset);
  336. out("]");
  337. }
  338. color(ColorRole::Doctype);
  339. outln(">");
  340. }
  341. }
  342. dump(document.root());
  343. }
  344. static DeprecatedString s_path;
  345. static auto parse(StringView contents)
  346. {
  347. return XML::Parser {
  348. contents,
  349. {
  350. .preserve_comments = true,
  351. .resolve_external_resource = [&](XML::SystemID const& system_id, Optional<XML::PublicID> const&) -> ErrorOr<DeprecatedString> {
  352. auto base = URL::create_with_file_scheme(s_path);
  353. auto url = URLParser::parse(system_id.system_literal, base);
  354. if (!url.is_valid())
  355. return Error::from_string_literal("Invalid URL");
  356. if (url.scheme() != "file")
  357. return Error::from_string_literal("NYI: Nonlocal entity");
  358. auto file = TRY(Core::File::open(url.path(), Core::File::OpenMode::Read));
  359. return DeprecatedString::copy(TRY(file->read_until_eof()));
  360. },
  361. },
  362. };
  363. }
  364. enum class TestResult {
  365. Passed,
  366. Failed,
  367. RunnerFailed,
  368. };
  369. static HashMap<DeprecatedString, TestResult> s_test_results {};
  370. static void do_run_tests(XML::Document& document)
  371. {
  372. auto& root = document.root().content.get<XML::Node::Element>();
  373. VERIFY(root.name == "TESTSUITE");
  374. Queue<XML::Node*> suites;
  375. auto dump_cases = [&](auto& root) {
  376. for (auto& node : root.children) {
  377. auto element = node->content.template get_pointer<XML::Node::Element>();
  378. if (!element)
  379. continue;
  380. if (element->name != "TESTCASES" && element->name != "TEST")
  381. continue;
  382. suites.enqueue(node);
  383. }
  384. };
  385. dump_cases(root);
  386. auto base_path = LexicalPath::dirname(s_path);
  387. while (!suites.is_empty()) {
  388. auto& node = *suites.dequeue();
  389. auto& suite = node.content.get<XML::Node::Element>();
  390. if (suite.name == "TESTCASES") {
  391. dump_cases(suite);
  392. continue;
  393. }
  394. if (suite.name == "TEST") {
  395. Vector<StringView> bases;
  396. for (auto* parent = node.parent; parent; parent = parent->parent) {
  397. auto& attributes = parent->content.get<XML::Node::Element>().attributes;
  398. auto it = attributes.find("xml:base");
  399. if (it == attributes.end())
  400. continue;
  401. bases.append(it->value);
  402. }
  403. auto type = suite.attributes.find("TYPE")->value;
  404. StringBuilder path_builder;
  405. path_builder.append(base_path);
  406. path_builder.append('/');
  407. for (auto& entry : bases.in_reverse()) {
  408. path_builder.append(entry);
  409. path_builder.append('/');
  410. }
  411. auto test_base_path = path_builder.to_deprecated_string();
  412. path_builder.append(suite.attributes.find("URI")->value);
  413. auto url = URL::create_with_file_scheme(path_builder.string_view());
  414. if (!url.is_valid()) {
  415. warnln("Invalid URL {}", path_builder.string_view());
  416. s_test_results.set(path_builder.string_view(), TestResult::RunnerFailed);
  417. continue;
  418. }
  419. auto file_result = Core::File::open(url.path(), Core::File::OpenMode::Read);
  420. if (file_result.is_error()) {
  421. warnln("Read error for {}: {}", url.path(), file_result.error());
  422. s_test_results.set(url.path(), TestResult::RunnerFailed);
  423. continue;
  424. }
  425. warnln("Running test {}", url.path());
  426. auto contents = file_result.value()->read_until_eof();
  427. if (contents.is_error()) {
  428. warnln("Read error for {}: {}", url.path(), contents.error());
  429. s_test_results.set(url.path(), TestResult::RunnerFailed);
  430. continue;
  431. }
  432. auto parser = parse(contents.value());
  433. auto doc_or_error = parser.parse();
  434. if (doc_or_error.is_error()) {
  435. if (type == "invalid" || type == "error" || type == "not-wf")
  436. s_test_results.set(url.path(), TestResult::Passed);
  437. else
  438. s_test_results.set(url.path(), TestResult::Failed);
  439. continue;
  440. }
  441. auto out = suite.attributes.find("OUTPUT");
  442. if (out != suite.attributes.end()) {
  443. auto out_path = LexicalPath::join(test_base_path, out->value).string();
  444. auto file_result = Core::File::open(out_path, Core::File::OpenMode::Read);
  445. if (file_result.is_error()) {
  446. warnln("Read error for {}: {}", out_path, file_result.error());
  447. s_test_results.set(url.path(), TestResult::RunnerFailed);
  448. continue;
  449. }
  450. auto contents = file_result.value()->read_until_eof();
  451. if (contents.is_error()) {
  452. warnln("Read error for {}: {}", out_path, contents.error());
  453. s_test_results.set(url.path(), TestResult::RunnerFailed);
  454. continue;
  455. }
  456. auto parser = parse(contents.value());
  457. auto out_doc_or_error = parser.parse();
  458. if (out_doc_or_error.is_error()) {
  459. warnln("Parse error for {}: {}", out_path, out_doc_or_error.error());
  460. s_test_results.set(url.path(), TestResult::RunnerFailed);
  461. continue;
  462. }
  463. auto out_doc = out_doc_or_error.release_value();
  464. if (out_doc.root() != doc_or_error.value().root()) {
  465. s_test_results.set(url.path(), TestResult::Failed);
  466. continue;
  467. }
  468. }
  469. if (type == "invalid" || type == "error" || type == "not-wf")
  470. s_test_results.set(url.path(), TestResult::Failed);
  471. else
  472. s_test_results.set(url.path(), TestResult::Passed);
  473. }
  474. }
  475. }
  476. ErrorOr<int> serenity_main(Main::Arguments arguments)
  477. {
  478. StringView filename;
  479. bool run_tests { false };
  480. Core::ArgsParser parser;
  481. parser.set_general_help("Parse and dump XML files");
  482. parser.add_option(g_color, "Syntax highlight the output", "color", 'c');
  483. parser.add_option(g_only_contents, "Only display markup and text", "only-contents", 'o');
  484. parser.add_option(run_tests, "Run tests", "run-tests", 't');
  485. parser.add_positional_argument(filename, "File to read from", "file");
  486. parser.parse(arguments);
  487. s_path = Core::DeprecatedFile::real_path_for(filename);
  488. auto file = TRY(Core::File::open(s_path, Core::File::OpenMode::Read));
  489. auto contents = TRY(file->read_until_eof());
  490. auto xml_parser = parse(contents);
  491. auto result = xml_parser.parse();
  492. if (result.is_error()) {
  493. if (xml_parser.parse_error_causes().is_empty()) {
  494. warnln("{}", result.error());
  495. } else {
  496. warnln("{}; caused by:", result.error());
  497. for (auto const& cause : xml_parser.parse_error_causes())
  498. warnln(" {}", cause);
  499. }
  500. return 1;
  501. }
  502. auto doc = result.release_value();
  503. if (run_tests) {
  504. do_run_tests(doc);
  505. size_t passed = 0;
  506. size_t failed = 0;
  507. size_t runner_error = 0;
  508. size_t total = 0;
  509. for (auto& entry : s_test_results) {
  510. total++;
  511. switch (entry.value) {
  512. case TestResult::Passed:
  513. passed++;
  514. break;
  515. case TestResult::Failed:
  516. failed++;
  517. break;
  518. case TestResult::RunnerFailed:
  519. runner_error++;
  520. break;
  521. }
  522. }
  523. outln("{} passed, {} failed, {} runner failed of {} tests run.", passed, failed, runner_error, total);
  524. return 0;
  525. }
  526. dump(doc);
  527. if (!g_only_contents)
  528. outln();
  529. return 0;
  530. }