LibPDF: Store indirect value refs in Value objects

IndirectValueRef is so simple that it can be stored directly in the
Value class instead of being heap allocated.

As the comment in Value says, however, in theory the max bits needed to
store is 48 (16 for the generation index and 32(?) for the object
index), but 32 should be good enough for now. We can increase it to u64
later if necessary.
This commit is contained in:
Matthew Olsson 2021-05-23 16:12:25 -07:00 committed by Ali Mohammad Pur
parent 534a2e95d2
commit be6e4b6f3c
Notes: sideshowbarker 2024-07-18 17:26:21 +09:00
7 changed files with 56 additions and 54 deletions

View file

@ -101,20 +101,18 @@ Page Document::get_page(u32 index)
Value Document::resolve(const Value& value) Value Document::resolve(const Value& value)
{ {
if (value.is_ref()) {
// FIXME: Surely indirect PDF objects can't contain another indirect PDF object,
// right? Unsure from the spec, but if they can, these return values would have
// to be wrapped with another resolve() call.
return get_or_load_value(value.as_ref_index());
}
if (!value.is_object()) if (!value.is_object())
return value; return value;
auto obj = value.as_object(); auto obj = value.as_object();
// FIXME: Surely indirect PDF objects can't contain another indirect PDF object,
// right? Unsure from the spec, but if they can, these return values would have
// to be wrapped with another resolve() call.
if (obj->is_indirect_value_ref()) {
auto object_index = static_cast<NonnullRefPtr<IndirectValueRef>>(obj)->index();
return get_or_load_value(object_index);
}
if (obj->is_indirect_value()) if (obj->is_indirect_value())
return static_cast<NonnullRefPtr<IndirectValue>>(obj)->value(); return static_cast<NonnullRefPtr<IndirectValue>>(obj)->value();
@ -137,13 +135,13 @@ void Document::add_page_tree_node_to_page_tree(NonnullRefPtr<DictObject> page_tr
// these pages to the overall page tree // these pages to the overall page tree
for (auto& value : *kids_array) { for (auto& value : *kids_array) {
auto reference = object_cast<IndirectValueRef>(value.as_object()); auto reference_index = value.as_ref_index();
auto byte_offset = m_xref_table.byte_offset_for_object(reference->index()); auto byte_offset = m_xref_table.byte_offset_for_object(reference_index);
auto maybe_page_tree_node = m_parser.conditionally_parse_page_tree_node_at_offset(byte_offset); auto maybe_page_tree_node = m_parser.conditionally_parse_page_tree_node_at_offset(byte_offset);
if (maybe_page_tree_node) { if (maybe_page_tree_node) {
add_page_tree_node_to_page_tree(maybe_page_tree_node.release_nonnull()); add_page_tree_node_to_page_tree(maybe_page_tree_node.release_nonnull());
} else { } else {
m_page_object_indices.append(reference->index()); m_page_object_indices.append(reference_index);
} }
} }
@ -151,10 +149,8 @@ void Document::add_page_tree_node_to_page_tree(NonnullRefPtr<DictObject> page_tr
} }
// We know all of the kids are leaf nodes // We know all of the kids are leaf nodes
for (auto& value : *kids_array) { for (auto& value : *kids_array)
auto reference = object_cast<IndirectValueRef>(value.as_object()); m_page_object_indices.append(value.as_ref_index());
m_page_object_indices.append(reference->index());
}
} }
} }

View file

@ -14,18 +14,14 @@ class Object;
// Note: This macro doesn't care about PlainTextStreamObject and EncodedStreamObject because // Note: This macro doesn't care about PlainTextStreamObject and EncodedStreamObject because
// we never need to work directly with either of them. // we never need to work directly with either of them.
#define ENUMERATE_DIRECT_OBJECT_TYPES(V) \ #define ENUMERATE_OBJECT_TYPES(V) \
V(StringObject, string) \ V(StringObject, string) \
V(NameObject, name) \ V(NameObject, name) \
V(ArrayObject, array) \ V(ArrayObject, array) \
V(DictObject, dict) \ V(DictObject, dict) \
V(StreamObject, stream) \ V(StreamObject, stream) \
V(IndirectValue, indirect_value) V(IndirectValue, indirect_value)
#define ENUMERATE_OBJECT_TYPES(V) \
ENUMERATE_DIRECT_OBJECT_TYPES(V) \
V(IndirectValueRef, indirect_value_ref)
#define FORWARD_DECL(class_name, _) class class_name; #define FORWARD_DECL(class_name, _) class class_name;
ENUMERATE_OBJECT_TYPES(FORWARD_DECL) ENUMERATE_OBJECT_TYPES(FORWARD_DECL)
#undef FORWARD_DECL #undef FORWARD_DECL

View file

@ -30,7 +30,7 @@ NonnullRefPtr<Object> DictObject::get_object(Document* document, const FlyString
{ \ { \
return document->resolve_to<class_name>(get(key).value()); \ return document->resolve_to<class_name>(get(key).value()); \
} }
ENUMERATE_DIRECT_OBJECT_TYPES(DEFINE_ACCESSORS) ENUMERATE_OBJECT_TYPES(DEFINE_ACCESSORS)
#undef DEFINE_INDEXER #undef DEFINE_INDEXER
static void append_indent(StringBuilder& builder, int indent) static void append_indent(StringBuilder& builder, int indent)
@ -132,9 +132,4 @@ String IndirectValue::to_string(int indent) const
return builder.to_string(); return builder.to_string();
} }
String IndirectValueRef::to_string(int) const
{
return String::formatted("{} {} R", index(), generation_index());
}
} }

View file

@ -222,26 +222,6 @@ private:
Value m_value; Value m_value;
}; };
class IndirectValueRef final : public Object {
public:
IndirectValueRef(u32 index, u32 generation_index)
: m_index(index)
{
set_generation_index(generation_index);
}
~IndirectValueRef() override = default;
[[nodiscard]] ALWAYS_INLINE u32 index() const { return m_index; }
ALWAYS_INLINE bool is_indirect_value_ref() const override { return true; }
ALWAYS_INLINE const char* type_name() const override { return "indirect_object_ref"; }
String to_string(int indent) const override;
private:
u32 m_index;
};
template<IsObject To, IsObject From> template<IsObject To, IsObject From>
[[nodiscard]] ALWAYS_INLINE static NonnullRefPtr<To> object_cast(NonnullRefPtr<From> obj [[nodiscard]] ALWAYS_INLINE static NonnullRefPtr<To> object_cast(NonnullRefPtr<From> obj
#ifdef PDF_DEBUG #ifdef PDF_DEBUG

View file

@ -321,7 +321,7 @@ Value Parser::parse_possible_indirect_value_or_ref()
m_reader.discard(); m_reader.discard();
consume(); consume();
consume_whitespace(); consume_whitespace();
return make_object<IndirectValueRef>(first_number.as_int(), second_number.as_int()); return Value(first_number.as_int(), second_number.as_int());
} }
if (m_reader.matches("obj")) { if (m_reader.matches("obj")) {

View file

@ -30,6 +30,9 @@ Value& Value::operator=(const Value& other)
case Type::Float: case Type::Float:
m_as_float = other.m_as_float; m_as_float = other.m_as_float;
break; break;
case Type::Ref:
m_as_ref = other.m_as_ref;
break;
case Type::Object: case Type::Object:
m_as_object = other.m_as_object; m_as_object = other.m_as_object;
if (m_as_object) if (m_as_object)
@ -50,6 +53,8 @@ String Value::to_string(int indent) const
return String::number(as_int()); return String::number(as_int());
case Type::Float: case Type::Float:
return String::number(as_float()); return String::number(as_float());
case Type::Ref:
return String::formatted("{} {} R", as_ref_index(), as_ref_generation_index());
case Type::Object: case Type::Object:
return as_object()->to_string(indent); return as_object()->to_string(indent);
} }

View file

@ -14,6 +14,14 @@ class Object;
class Value { class Value {
public: public:
// We store refs as u32, with 18 bits for the index and 14 bits for the
// generation index. The generation index is stored in the higher bits.
// This may need to be rethought later, as the max generation index is
// 2^16 and the max for the object index is probably 2^32 (I don't know
// exactly)
static constexpr auto max_ref_index = (1 << 19) - 1; // 2 ^ 18 - 1
static constexpr auto max_ref_generation_index = (1 << 15) - 1; // 2 ^ 14 - 1
Value() Value()
: m_type(Type::Null) : m_type(Type::Null)
{ {
@ -37,6 +45,14 @@ public:
m_as_float = f; m_as_float = f;
} }
Value(u32 index, u32 generation_index)
: m_type(Type::Ref)
{
VERIFY(index < max_ref_index);
VERIFY(generation_index < max_ref_generation_index);
m_as_ref = (generation_index << 14) | index;
}
template<IsObject T> template<IsObject T>
Value(NonnullRefPtr<T> obj) Value(NonnullRefPtr<T> obj)
: m_type(Type::Object) : m_type(Type::Object)
@ -59,7 +75,7 @@ public:
[[nodiscard]] ALWAYS_INLINE bool is_int() const { return m_type == Type::Int; } [[nodiscard]] ALWAYS_INLINE bool is_int() const { return m_type == Type::Int; }
[[nodiscard]] ALWAYS_INLINE bool is_float() const { return m_type == Type::Float; } [[nodiscard]] ALWAYS_INLINE bool is_float() const { return m_type == Type::Float; }
[[nodiscard]] ALWAYS_INLINE bool is_number() const { return is_int() || is_float(); } [[nodiscard]] ALWAYS_INLINE bool is_number() const { return is_int() || is_float(); }
[[nodiscard]] ALWAYS_INLINE bool is_ref() const { return m_type == Type::Ref; }
[[nodiscard]] ALWAYS_INLINE bool is_object() const { return m_type == Type::Object; } [[nodiscard]] ALWAYS_INLINE bool is_object() const { return m_type == Type::Object; }
[[nodiscard]] ALWAYS_INLINE bool as_bool() const [[nodiscard]] ALWAYS_INLINE bool as_bool() const
@ -94,6 +110,18 @@ public:
return static_cast<float>(as_int()); return static_cast<float>(as_int());
} }
[[nodiscard]] ALWAYS_INLINE u32 as_ref_index() const
{
VERIFY(is_ref());
return m_as_ref & 0x3ffff;
}
[[nodiscard]] ALWAYS_INLINE u32 as_ref_generation_index() const
{
VERIFY(is_ref());
return m_as_ref >> 18;
}
[[nodiscard]] ALWAYS_INLINE NonnullRefPtr<Object> as_object() const { return *m_as_object; } [[nodiscard]] ALWAYS_INLINE NonnullRefPtr<Object> as_object() const { return *m_as_object; }
[[nodiscard]] ALWAYS_INLINE explicit operator bool() const { return !is_null(); } [[nodiscard]] ALWAYS_INLINE explicit operator bool() const { return !is_null(); }
@ -106,12 +134,14 @@ private:
Bool, Bool,
Int, Int,
Float, Float,
Ref,
Object, Object,
}; };
union { union {
bool m_as_bool; bool m_as_bool;
int m_as_int; int m_as_int;
u32 m_as_ref;
float m_as_float; float m_as_float;
Object* m_as_object; Object* m_as_object;
}; };