mirror of
https://github.com/LadybirdBrowser/ladybird.git
synced 2024-11-22 15:40:19 +00:00
8c745ad0d9
This commit introduces the ability to parse the document catalog dict, as well as the page tree and individual pages. Pages obviously aren't fully parsed, as we won't care about most of the fields until we start actually rendering PDFs. One of the primary benefits of the PDF format is laziness. PDFs are not meant to be parsed all at once, and the same is true for pages. When a Document is constructed, it builds a map of page number to object index, but it does not fetch and parse any of the pages. A page is only parsed when a caller requests that particular page (and is cached going forwards). Additionally, this commit also adds an object_cast function which logs bad casts if DEBUG_PDF is set. Additionally, utility functions were added to ArrayObject and DictObject to get all types of objects from the collections to avoid having to manually cast.
156 lines
3.6 KiB
C++
156 lines
3.6 KiB
C++
/*
|
|
* Copyright (c) 2021, Matthew Olsson <mattco@serenityos.org>
|
|
*
|
|
* SPDX-License-Identifier: BSD-2-Clause
|
|
*/
|
|
|
|
#pragma once
|
|
|
|
#include <AK/Debug.h>
|
|
#include <AK/Function.h>
|
|
#include <AK/ScopeGuard.h>
|
|
|
|
namespace PDF {
|
|
|
|
class Reader {
|
|
public:
|
|
explicit Reader(const ReadonlyBytes& bytes)
|
|
: m_bytes(bytes)
|
|
{
|
|
}
|
|
|
|
ALWAYS_INLINE const ReadonlyBytes& bytes() const { return m_bytes; }
|
|
ALWAYS_INLINE size_t offset() const { return m_offset; }
|
|
|
|
bool done() const
|
|
{
|
|
if (m_forwards)
|
|
return offset() >= bytes().size();
|
|
return m_offset < 0;
|
|
}
|
|
|
|
size_t remaining() const
|
|
{
|
|
if (done())
|
|
return 0;
|
|
|
|
if (m_forwards)
|
|
return bytes().size() - offset() - 1;
|
|
return offset() + 1;
|
|
}
|
|
|
|
void move_by(size_t count)
|
|
{
|
|
if (m_forwards) {
|
|
m_offset += static_cast<ssize_t>(count);
|
|
} else {
|
|
m_offset -= static_cast<ssize_t>(count);
|
|
}
|
|
}
|
|
|
|
char read()
|
|
{
|
|
auto value = m_bytes.at(m_offset);
|
|
move_by(1);
|
|
return static_cast<char>(value);
|
|
}
|
|
|
|
char peek(size_t shift = 0) const
|
|
{
|
|
auto offset = m_offset + shift * (m_forwards ? 1 : -1);
|
|
return static_cast<char>(m_bytes.at(offset));
|
|
}
|
|
|
|
template<typename... T>
|
|
bool matches_any(T... elements) const
|
|
{
|
|
if (done())
|
|
return false;
|
|
auto ch = peek();
|
|
return ((ch == elements) || ...);
|
|
}
|
|
|
|
bool matches(char ch) const
|
|
{
|
|
return !done() && peek() == ch;
|
|
}
|
|
|
|
bool matches(const char* chars) const
|
|
{
|
|
String string(chars);
|
|
if (remaining() < string.length())
|
|
return false;
|
|
|
|
if (!m_forwards)
|
|
string = string.reverse();
|
|
|
|
for (size_t i = 0; i < string.length(); i++) {
|
|
if (peek(i) != string[i])
|
|
return false;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
template<typename T = char>
|
|
void move_to(size_t offset)
|
|
{
|
|
VERIFY(offset < m_bytes.size());
|
|
m_offset = static_cast<ssize_t>(offset);
|
|
}
|
|
|
|
void move_until(char ch)
|
|
{
|
|
while (!done() && peek() != ch)
|
|
move_by(1);
|
|
}
|
|
|
|
void move_until(Function<bool(char)> predicate)
|
|
{
|
|
while (!done() && !predicate(peek()))
|
|
move_by(1);
|
|
}
|
|
|
|
ALWAYS_INLINE void move_while(Function<bool(char)> predicate)
|
|
{
|
|
move_until([&predicate](char t) { return !predicate(t); });
|
|
}
|
|
|
|
ALWAYS_INLINE void set_reading_forwards() { m_forwards = true; }
|
|
ALWAYS_INLINE void set_reading_backwards() { m_forwards = false; }
|
|
|
|
ALWAYS_INLINE void save() { m_saved_offsets.append(m_offset); }
|
|
ALWAYS_INLINE void load() { m_offset = m_saved_offsets.take_last(); }
|
|
ALWAYS_INLINE void discard() { m_saved_offsets.take_last(); }
|
|
|
|
#ifdef PDF_DEBUG
|
|
void dump_state() const
|
|
{
|
|
StringBuilder builder;
|
|
builder.append("Reader State Dump\n\n");
|
|
|
|
size_t from = max(0ul, offset() - 10);
|
|
size_t to = min(bytes().size() - 1, offset() + 10);
|
|
|
|
for (auto i = from; i <= to; i++) {
|
|
char value = static_cast<char>(bytes().at(i));
|
|
builder.appendff("{}: '{}' (value={:3d}) ", i, value, static_cast<u8>(value));
|
|
if (i == offset())
|
|
builder.appendff(" <<< current location, forwards={}", m_forwards);
|
|
builder.append('\n');
|
|
}
|
|
builder.append('\n');
|
|
|
|
auto str = builder.to_string();
|
|
dbgputstr(str.characters(), str.length());
|
|
}
|
|
#endif
|
|
|
|
private:
|
|
ReadonlyBytes m_bytes;
|
|
ssize_t m_offset { 0 };
|
|
Vector<ssize_t> m_saved_offsets;
|
|
bool m_forwards { true };
|
|
};
|
|
|
|
}
|