Explorar o código

LibPDF: Always treat `/Subtype /Image` as binary data when dumping

Sometimes, the "is mostly text" heuristic fails for images.

Before:

    Build/lagom/bin/pdf --render out.png ~/Downloads/0000/0000521.pdf \
        --page 10 --dump-contents 2>&1 | wc -l
       25709

After:

    Build/lagom/bin/pdf --render out.png ~/Downloads/0000/0000521.pdf \
         --page 10 --dump-contents 2>&1 | wc -l
       11376
Nico Weber hai 1 ano
pai
achega
92a628c07c
Modificáronse 1 ficheiros con 4 adicións e 0 borrados
  1. 4 0
      Userland/Libraries/LibPDF/ObjectDerivatives.cpp

+ 4 - 0
Userland/Libraries/LibPDF/ObjectDerivatives.cpp

@@ -5,6 +5,7 @@
  */
 
 #include <AK/Hex.h>
+#include <LibPDF/CommonNames.h>
 #include <LibPDF/Document.h>
 #include <LibPDF/ObjectDerivatives.h>
 
@@ -136,6 +137,9 @@ ByteString StreamObject::to_byte_string(int indent) const
         percentage_ascii = ascii_count * 100 / bytes().size();
     bool is_mostly_text = percentage_ascii > 95;
 
+    if (dict()->contains(CommonNames::Subtype) && dict()->get_name(CommonNames::Subtype)->name() == "Image")
+        is_mostly_text = false;
+
     if (is_mostly_text) {
         for (size_t i = 0; i < bytes().size(); ++i) {
             auto c = bytes()[i];