
There were two problems: 1. parse_compressed_object_with_index() parses indirect objects without going through Parser::parse_indirect_value(), so push_reference() / pop_reference() weren't called. Manually call them, both for the indirect object containing the object stream and for the indirect object within the object stream. 2. The indirect object within the object stream got decrypted twice: Once when the object stream data itself got decrypted, and then incorrectly a second time when the object data within the stream was read. To fix, disable encryption while parsing object stream data (since it's already decrypted). The test is from http://opf-labs.org/format-corpus/pdfCabinetOfHorrors/ which according to readme.md at the same location is CC0.
89 lines
2.4 KiB
C++
89 lines
2.4 KiB
C++
/*
|
|
* Copyright (c) 2021-2022, Matthew Olsson <mattco@serenityos.org>
|
|
*
|
|
* SPDX-License-Identifier: BSD-2-Clause
|
|
*/
|
|
|
|
#pragma once
|
|
|
|
#include <AK/SourceLocation.h>
|
|
#include <AK/WeakPtr.h>
|
|
#include <LibPDF/Object.h>
|
|
#include <LibPDF/Operator.h>
|
|
#include <LibPDF/Reader.h>
|
|
#include <LibPDF/XRefTable.h>
|
|
|
|
namespace PDF {
|
|
|
|
template<typename T, typename... Args>
|
|
static NonnullRefPtr<T> make_object(Args... args)
|
|
requires(IsBaseOf<Object, T>)
|
|
{
|
|
return adopt_ref(*new T(forward<Args>(args)...));
|
|
}
|
|
|
|
class Document;
|
|
|
|
class Parser {
|
|
public:
|
|
static PDFErrorOr<Vector<Operator>> parse_operators(Document*, ReadonlyBytes);
|
|
|
|
Parser(ReadonlyBytes);
|
|
Parser(Document*, ReadonlyBytes);
|
|
|
|
void set_document(WeakPtr<Document> const&);
|
|
|
|
DeprecatedString parse_comment();
|
|
|
|
void move_by(size_t count) { m_reader.move_by(count); }
|
|
void move_to(size_t offset) { m_reader.move_to(offset); }
|
|
|
|
enum class CanBeIndirectValue {
|
|
No,
|
|
Yes
|
|
};
|
|
|
|
PDFErrorOr<Value> parse_value(CanBeIndirectValue = CanBeIndirectValue::Yes);
|
|
PDFErrorOr<Value> parse_possible_indirect_value_or_ref();
|
|
PDFErrorOr<NonnullRefPtr<IndirectValue>> parse_indirect_value(u32 index, u32 generation);
|
|
PDFErrorOr<NonnullRefPtr<IndirectValue>> parse_indirect_value();
|
|
PDFErrorOr<Value> parse_number();
|
|
PDFErrorOr<NonnullRefPtr<NameObject>> parse_name();
|
|
NonnullRefPtr<StringObject> parse_string();
|
|
DeprecatedString parse_literal_string();
|
|
DeprecatedString parse_hex_string();
|
|
PDFErrorOr<NonnullRefPtr<ArrayObject>> parse_array();
|
|
PDFErrorOr<NonnullRefPtr<DictObject>> parse_dict();
|
|
PDFErrorOr<NonnullRefPtr<StreamObject>> parse_stream(NonnullRefPtr<DictObject> dict);
|
|
PDFErrorOr<Vector<Operator>> parse_operators();
|
|
|
|
void set_filters_enabled(bool enabled)
|
|
{
|
|
m_enable_filters = enabled;
|
|
}
|
|
|
|
void set_encryption_enabled(bool enabled)
|
|
{
|
|
m_enable_encryption = enabled;
|
|
}
|
|
|
|
void push_reference(Reference const& ref) { m_current_reference_stack.append(ref); }
|
|
void pop_reference() { m_current_reference_stack.take_last(); }
|
|
|
|
protected:
|
|
Error error(
|
|
DeprecatedString const& message
|
|
#ifdef PDF_DEBUG
|
|
,
|
|
SourceLocation loc = SourceLocation::current()
|
|
#endif
|
|
) const;
|
|
|
|
Reader m_reader;
|
|
WeakPtr<Document> m_document;
|
|
Vector<Reference> m_current_reference_stack;
|
|
bool m_enable_encryption { true };
|
|
bool m_enable_filters { false };
|
|
};
|
|
|
|
};
|