xml.cpp 23 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554
  1. /*
  2. * Copyright (c) 2022, Ali Mohammad Pur <mpfard@serenityos.org>
  3. *
  4. * SPDX-License-Identifier: BSD-2-Clause
  5. */
  6. #include <AK/LexicalPath.h>
  7. #include <AK/Queue.h>
  8. #include <AK/URL.h>
  9. #include <AK/URLParser.h>
  10. #include <LibCore/ArgsParser.h>
  11. #include <LibCore/File.h>
  12. #include <LibMain/Main.h>
  13. #include <LibXML/DOM/Document.h>
  14. #include <LibXML/DOM/Node.h>
  15. #include <LibXML/Parser/Parser.h>
  16. static bool g_color = false;
  17. static bool g_only_contents = false;
  18. enum class ColorRole {
  19. PITag,
  20. PITarget,
  21. PIData,
  22. AttributeName,
  23. Eq,
  24. AttributeValue,
  25. Tag,
  26. Text,
  27. Comment,
  28. Reset,
  29. Doctype,
  30. Keyword,
  31. };
  32. static void color(ColorRole role)
  33. {
  34. if (!g_color)
  35. return;
  36. switch (role) {
  37. case ColorRole::PITag:
  38. case ColorRole::Doctype:
  39. out("\x1b[{};{}m", 1, "38;5;223");
  40. break;
  41. case ColorRole::PITarget:
  42. out("\x1b[{};{}m", 1, "38;5;23");
  43. break;
  44. case ColorRole::PIData:
  45. out("\x1b[{};{}m", 1, "38;5;43");
  46. break;
  47. case ColorRole::AttributeName:
  48. out("\x1b[38;5;27m");
  49. break;
  50. case ColorRole::Eq:
  51. break;
  52. case ColorRole::AttributeValue:
  53. out("\x1b[38;5;46m");
  54. break;
  55. case ColorRole::Tag:
  56. out("\x1b[{};{}m", 1, "38;5;220");
  57. break;
  58. case ColorRole::Text:
  59. break;
  60. case ColorRole::Comment:
  61. out("\x1b[{};{}m", 3, "38;5;250");
  62. break;
  63. case ColorRole::Reset:
  64. out("\x1b[0m");
  65. break;
  66. case ColorRole::Keyword:
  67. out("\x1b[38;5;40m");
  68. break;
  69. }
  70. }
  71. static void dump(XML::Node const& node)
  72. {
  73. node.content.visit(
  74. [](XML::Node::Text const& text) {
  75. out("{}", text.builder.string_view());
  76. },
  77. [](XML::Node::Comment const& comment) {
  78. color(ColorRole::Comment);
  79. out("<!--{}-->", comment.text);
  80. color(ColorRole::Reset);
  81. },
  82. [](XML::Node::Element const& element) {
  83. color(ColorRole::Tag);
  84. out("<{}", element.name);
  85. color(ColorRole::Reset);
  86. if (!element.attributes.is_empty()) {
  87. for (auto& attribute : element.attributes) {
  88. auto quote = attribute.value.contains('"') ? '\'' : '"';
  89. color(ColorRole::AttributeName);
  90. out(" {}", attribute.key);
  91. color(ColorRole::Eq);
  92. out("=");
  93. color(ColorRole::AttributeValue);
  94. out("{}{}{}", quote, attribute.value, quote);
  95. color(ColorRole::Reset);
  96. }
  97. }
  98. if (element.children.is_empty()) {
  99. color(ColorRole::Tag);
  100. out("/>");
  101. color(ColorRole::Reset);
  102. } else {
  103. color(ColorRole::Tag);
  104. out(">");
  105. color(ColorRole::Reset);
  106. for (auto& node : element.children)
  107. dump(node);
  108. color(ColorRole::Tag);
  109. out("</{}>", element.name);
  110. color(ColorRole::Reset);
  111. }
  112. });
  113. }
  114. static void dump(XML::Document& document)
  115. {
  116. if (!g_only_contents) {
  117. {
  118. color(ColorRole::PITag);
  119. out("<?");
  120. color(ColorRole::Reset);
  121. color(ColorRole::PITarget);
  122. out("xml");
  123. color(ColorRole::Reset);
  124. color(ColorRole::PIData);
  125. out(" version='{}'", document.version() == XML::Version::Version10 ? "1.0" : "1.1");
  126. color(ColorRole::Reset);
  127. color(ColorRole::PITag);
  128. outln("?>");
  129. }
  130. for (auto& pi : document.processing_instructions()) {
  131. color(ColorRole::PITag);
  132. out("<?");
  133. color(ColorRole::Reset);
  134. color(ColorRole::PITarget);
  135. out("{}", pi.key);
  136. color(ColorRole::Reset);
  137. if (!pi.value.is_empty()) {
  138. color(ColorRole::PIData);
  139. out(" {}", pi.value);
  140. color(ColorRole::Reset);
  141. }
  142. color(ColorRole::PITag);
  143. outln("?>");
  144. }
  145. if (auto maybe_doctype = document.doctype(); maybe_doctype.has_value()) {
  146. auto& doctype = *maybe_doctype;
  147. color(ColorRole::Doctype);
  148. out("<!DOCTYPE ");
  149. color(ColorRole::Tag);
  150. out("{}", doctype.type);
  151. if (!doctype.markup_declarations.is_empty()) {
  152. color(ColorRole::Reset);
  153. out(" [\n");
  154. for (auto& entry : doctype.markup_declarations) {
  155. entry.visit(
  156. [&](XML::ElementDeclaration const& element) {
  157. color(ColorRole::Doctype);
  158. out(" <!ELEMENT ");
  159. color(ColorRole::Tag);
  160. out("{} ", element.type);
  161. element.content_spec.visit(
  162. [&](XML::ElementDeclaration::Empty const&) {
  163. color(ColorRole::Keyword);
  164. out("EMPTY");
  165. },
  166. [&](XML::ElementDeclaration::Any const&) {
  167. color(ColorRole::Keyword);
  168. out("ANY");
  169. },
  170. [&](XML::ElementDeclaration::Mixed const&) {
  171. },
  172. [&](XML::ElementDeclaration::Children const&) {
  173. });
  174. color(ColorRole::Doctype);
  175. outln(">");
  176. },
  177. [&](XML::AttributeListDeclaration const& list) {
  178. color(ColorRole::Doctype);
  179. out(" <!ATTLIST ");
  180. color(ColorRole::Tag);
  181. out("{}", list.type);
  182. for (auto& attribute : list.attributes) {
  183. color(ColorRole::AttributeName);
  184. out(" {} ", attribute.name);
  185. color(ColorRole::Keyword);
  186. attribute.type.visit(
  187. [](XML::AttributeListDeclaration::StringType) {
  188. out("CDATA");
  189. },
  190. [](XML::AttributeListDeclaration::TokenizedType type) {
  191. switch (type) {
  192. case XML::AttributeListDeclaration::TokenizedType::ID:
  193. out("ID");
  194. break;
  195. case XML::AttributeListDeclaration::TokenizedType::IDRef:
  196. out("IDREF");
  197. break;
  198. case XML::AttributeListDeclaration::TokenizedType::IDRefs:
  199. out("IDREFS");
  200. break;
  201. case XML::AttributeListDeclaration::TokenizedType::Entity:
  202. out("ENTITY");
  203. break;
  204. case XML::AttributeListDeclaration::TokenizedType::Entities:
  205. out("ENTITIES");
  206. break;
  207. case XML::AttributeListDeclaration::TokenizedType::NMToken:
  208. out("NMTOKEN");
  209. break;
  210. case XML::AttributeListDeclaration::TokenizedType::NMTokens:
  211. out("NMTOKENS");
  212. break;
  213. }
  214. },
  215. [](XML::AttributeListDeclaration::NotationType const& type) {
  216. out("NOTATION ");
  217. color(ColorRole::Reset);
  218. out("( ");
  219. bool first = true;
  220. for (auto& name : type.names) {
  221. color(ColorRole::Reset);
  222. if (first)
  223. first = false;
  224. else
  225. out(" | ");
  226. color(ColorRole::AttributeValue);
  227. out("{}", name);
  228. }
  229. color(ColorRole::Reset);
  230. out(" )");
  231. },
  232. [](XML::AttributeListDeclaration::Enumeration const& type) {
  233. color(ColorRole::Reset);
  234. out("( ");
  235. bool first = true;
  236. for (auto& name : type.tokens) {
  237. color(ColorRole::Reset);
  238. if (first)
  239. first = false;
  240. else
  241. out(" | ");
  242. color(ColorRole::AttributeValue);
  243. out("{}", name);
  244. }
  245. color(ColorRole::Reset);
  246. out(" )");
  247. });
  248. out(" ");
  249. attribute.default_.visit(
  250. [](XML::AttributeListDeclaration::Required) {
  251. color(ColorRole::Keyword);
  252. out("#REQUIRED");
  253. },
  254. [](XML::AttributeListDeclaration::Implied) {
  255. color(ColorRole::Keyword);
  256. out("#IMPLIED");
  257. },
  258. [](XML::AttributeListDeclaration::Fixed const& fixed) {
  259. color(ColorRole::Keyword);
  260. out("#FIXED ");
  261. color(ColorRole::AttributeValue);
  262. out("\"{}\"", fixed.value);
  263. },
  264. [](XML::AttributeListDeclaration::DefaultValue const& default_) {
  265. color(ColorRole::AttributeValue);
  266. out("\"{}\"", default_.value);
  267. });
  268. }
  269. color(ColorRole::Doctype);
  270. outln(">");
  271. },
  272. [&](XML::EntityDeclaration const& entity) {
  273. color(ColorRole::Doctype);
  274. out(" <!ENTITY ");
  275. entity.visit(
  276. [](XML::GEDeclaration const& declaration) {
  277. color(ColorRole::Tag);
  278. out("{} ", declaration.name);
  279. declaration.definition.visit(
  280. [](String const& value) {
  281. color(ColorRole::AttributeValue);
  282. out("\"{}\"", value);
  283. },
  284. [](XML::EntityDefinition const& definition) {
  285. if (definition.id.public_id.has_value()) {
  286. color(ColorRole::Keyword);
  287. out("PUBLIC ");
  288. color(ColorRole::PITarget);
  289. out("\"{}\" ", definition.id.public_id->public_literal);
  290. } else {
  291. color(ColorRole::Keyword);
  292. out("SYSTEM ");
  293. }
  294. color(ColorRole::PITarget);
  295. out("\"{}\" ", definition.id.system_id.system_literal);
  296. if (definition.notation.has_value()) {
  297. color(ColorRole::Keyword);
  298. out(" NDATA ");
  299. color(ColorRole::PITarget);
  300. out("{}", *definition.notation);
  301. }
  302. });
  303. color(ColorRole::Tag);
  304. outln(">");
  305. },
  306. [](XML::PEDeclaration const& declaration) {
  307. color(ColorRole::Tag);
  308. out("{} ", declaration.name);
  309. declaration.definition.visit(
  310. [](String const& value) {
  311. color(ColorRole::AttributeValue);
  312. out("\"{}\"", value);
  313. },
  314. [](XML::ExternalID const& id) {
  315. if (id.public_id.has_value()) {
  316. color(ColorRole::Keyword);
  317. out("PUBLIC ");
  318. color(ColorRole::PITarget);
  319. out("\"{}\" ", id.public_id->public_literal);
  320. } else {
  321. color(ColorRole::Keyword);
  322. out("SYSTEM ");
  323. }
  324. color(ColorRole::PITarget);
  325. out("\"{}\"", id.system_id.system_literal);
  326. });
  327. color(ColorRole::Tag);
  328. outln(">");
  329. });
  330. },
  331. [&](XML::NotationDeclaration const&) {
  332. });
  333. }
  334. color(ColorRole::Reset);
  335. out("]");
  336. }
  337. color(ColorRole::Doctype);
  338. outln(">");
  339. }
  340. }
  341. dump(document.root());
  342. }
  343. static String s_path;
  344. static auto parse(StringView contents)
  345. {
  346. return XML::Parser {
  347. contents,
  348. {
  349. .preserve_comments = true,
  350. .resolve_external_resource = [&](XML::SystemID const& system_id, Optional<XML::PublicID> const&) -> ErrorOr<String> {
  351. auto base = URL::create_with_file_scheme(s_path);
  352. auto url = URLParser::parse(system_id.system_literal, &base);
  353. if (!url.is_valid())
  354. return Error::from_string_literal("Invalid URL");
  355. if (url.scheme() != "file")
  356. return Error::from_string_literal("NYI: Nonlocal entity");
  357. auto file = TRY(Core::File::open(url.path(), Core::OpenMode::ReadOnly));
  358. return String::copy(file->read_all());
  359. },
  360. },
  361. };
  362. }
  363. enum class TestResult {
  364. Passed,
  365. Failed,
  366. RunnerFailed,
  367. };
  368. static HashMap<String, TestResult> s_test_results {};
  369. static void do_run_tests(XML::Document& document)
  370. {
  371. auto& root = document.root().content.get<XML::Node::Element>();
  372. VERIFY(root.name == "TESTSUITE");
  373. Queue<XML::Node*> suites;
  374. auto dump_cases = [&](auto& root) {
  375. for (auto& node : root.children) {
  376. auto element = node.content.template get_pointer<XML::Node::Element>();
  377. if (!element)
  378. continue;
  379. if (element->name != "TESTCASES" && element->name != "TEST")
  380. continue;
  381. suites.enqueue(&node);
  382. }
  383. };
  384. dump_cases(root);
  385. auto base_path = LexicalPath::dirname(s_path);
  386. while (!suites.is_empty()) {
  387. auto& node = *suites.dequeue();
  388. auto& suite = node.content.get<XML::Node::Element>();
  389. if (suite.name == "TESTCASES") {
  390. dump_cases(suite);
  391. continue;
  392. }
  393. if (suite.name == "TEST") {
  394. Vector<StringView> bases;
  395. for (auto* parent = node.parent; parent; parent = parent->parent) {
  396. auto& attributes = parent->content.get<XML::Node::Element>().attributes;
  397. auto it = attributes.find("xml:base");
  398. if (it == attributes.end())
  399. continue;
  400. bases.append(it->value);
  401. }
  402. auto type = suite.attributes.find("TYPE")->value;
  403. StringBuilder path_builder;
  404. path_builder.append(base_path);
  405. path_builder.append('/');
  406. for (auto& entry : bases.in_reverse()) {
  407. path_builder.append(entry);
  408. path_builder.append('/');
  409. }
  410. auto test_base_path = path_builder.to_string();
  411. path_builder.append(suite.attributes.find("URI")->value);
  412. auto url = URL::create_with_file_scheme(path_builder.string_view());
  413. if (!url.is_valid()) {
  414. warnln("Invalid URL {}", path_builder.string_view());
  415. s_test_results.set(path_builder.string_view(), TestResult::RunnerFailed);
  416. continue;
  417. }
  418. auto file_result = Core::File::open(url.path(), Core::OpenMode::ReadOnly);
  419. if (file_result.is_error()) {
  420. warnln("Read error for {}: {}", url.path(), file_result.error());
  421. s_test_results.set(url.path(), TestResult::RunnerFailed);
  422. continue;
  423. }
  424. warnln("Running test {}", url.path());
  425. auto contents = file_result.value()->read_all();
  426. auto parser = parse(contents);
  427. auto doc_or_error = parser.parse();
  428. if (doc_or_error.is_error()) {
  429. if (type == "invalid" || type == "error" || type == "not-wf")
  430. s_test_results.set(url.path(), TestResult::Passed);
  431. else
  432. s_test_results.set(url.path(), TestResult::Failed);
  433. continue;
  434. }
  435. auto out = suite.attributes.find("OUTPUT");
  436. if (out != suite.attributes.end()) {
  437. auto out_path = LexicalPath::join(test_base_path, out->value).string();
  438. auto file_result = Core::File::open(out_path, Core::OpenMode::ReadOnly);
  439. if (file_result.is_error()) {
  440. warnln("Read error for {}: {}", out_path, file_result.error());
  441. s_test_results.set(url.path(), TestResult::RunnerFailed);
  442. continue;
  443. }
  444. auto contents = file_result.value()->read_all();
  445. auto parser = parse(contents);
  446. auto out_doc_or_error = parser.parse();
  447. if (out_doc_or_error.is_error()) {
  448. warnln("Parse error for {}: {}", out_path, out_doc_or_error.error());
  449. s_test_results.set(url.path(), TestResult::RunnerFailed);
  450. continue;
  451. }
  452. auto out_doc = out_doc_or_error.release_value();
  453. if (out_doc.root() != doc_or_error.value().root()) {
  454. s_test_results.set(url.path(), TestResult::Failed);
  455. continue;
  456. }
  457. }
  458. if (type == "invalid" || type == "error" || type == "not-wf")
  459. s_test_results.set(url.path(), TestResult::Failed);
  460. else
  461. s_test_results.set(url.path(), TestResult::Passed);
  462. }
  463. }
  464. }
  465. ErrorOr<int> serenity_main(Main::Arguments arguments)
  466. {
  467. StringView filename;
  468. bool run_tests { false };
  469. Core::ArgsParser parser;
  470. parser.set_general_help("Parse and dump XML files");
  471. parser.add_option(g_color, "Syntax highlight the output", "color", 'c');
  472. parser.add_option(g_only_contents, "Only display markup and text", "only-contents", 'o');
  473. parser.add_option(run_tests, "Run tests", "run-tests", 't');
  474. parser.add_positional_argument(filename, "File to read from", "file");
  475. parser.parse(arguments);
  476. s_path = Core::File::real_path_for(filename);
  477. auto file = TRY(Core::File::open(s_path, Core::OpenMode::ReadOnly));
  478. auto contents = file->read_all();
  479. auto xml_parser = parse(contents);
  480. auto result = xml_parser.parse();
  481. if (result.is_error()) {
  482. if (xml_parser.parse_error_causes().is_empty()) {
  483. warnln("{}", result.error());
  484. } else {
  485. warnln("{}; caused by:", result.error());
  486. for (auto const& cause : xml_parser.parse_error_causes())
  487. warnln(" {}", cause);
  488. }
  489. return 1;
  490. }
  491. auto doc = result.release_value();
  492. if (run_tests) {
  493. do_run_tests(doc);
  494. size_t passed = 0;
  495. size_t failed = 0;
  496. size_t runner_error = 0;
  497. size_t total = 0;
  498. for (auto& entry : s_test_results) {
  499. total++;
  500. switch (entry.value) {
  501. case TestResult::Passed:
  502. passed++;
  503. break;
  504. case TestResult::Failed:
  505. failed++;
  506. break;
  507. case TestResult::RunnerFailed:
  508. runner_error++;
  509. break;
  510. }
  511. }
  512. outln("{} passed, {} failed, {} runner failed of {} tests run.", passed, failed, runner_error, total);
  513. return 0;
  514. }
  515. dump(doc);
  516. if (!g_only_contents)
  517. outln();
  518. return 0;
  519. }