Spreadsheet: Make the XSV parser start with a preview parse

Instead of parsing the whole document. That's really wasteful and
super slow.
This commit is contained in:
Ali Mohammad Pur 2021-06-16 08:34:19 +04:30 committed by Ali Mohammad Pur
parent 88b168ff16
commit b11b3c2f1c
Notes: sideshowbarker 2024-07-18 12:09:50 +09:00
4 changed files with 36 additions and 7 deletions

View file

@ -147,7 +147,7 @@ auto CSVImportDialogPage::make_reader() -> Optional<Reader::XSV>
if (should_trim_trailing)
behaviours = behaviours | Reader::ParserBehaviour::TrimTrailingFieldSpaces;
return Reader::XSV(m_csv, traits, behaviours);
return Reader::XSV(m_csv, move(traits), behaviours);
};
void CSVImportDialogPage::update_preview()
@ -195,6 +195,7 @@ Result<NonnullRefPtrVector<Sheet>, String> ImportDialog::make_and_run_for(String
NonnullRefPtrVector<Sheet> sheets;
if (reader.has_value()) {
reader->parse();
if (reader.value().has_error())
return String::formatted("CSV Import failed: {}", reader.value().error_string());

View file

@ -18,6 +18,7 @@ TEST_CASE(should_parse_valid_data)
4, 5, 6
"""x", y"z, 9)~~~";
auto csv = Reader::CSV { data, Reader::default_behaviours() | Reader::ParserBehaviour::ReadHeaders | Reader::ParserBehaviour::TrimLeadingFieldSpaces };
csv.parse();
EXPECT(!csv.has_error());
EXPECT_EQ(csv[0]["Foo"], "1");
@ -31,6 +32,7 @@ TEST_CASE(should_parse_valid_data)
4, "5 " , 6
"""x", y"z, 9 )~~~";
auto csv = Reader::CSV { data, Reader::default_behaviours() | Reader::ParserBehaviour::ReadHeaders | Reader::ParserBehaviour::TrimLeadingFieldSpaces | Reader::ParserBehaviour::TrimTrailingFieldSpaces };
csv.parse();
EXPECT(!csv.has_error());
EXPECT_EQ(csv[0]["Foo"], "1");
@ -46,6 +48,7 @@ TEST_CASE(should_fail_nicely)
auto data = R"~~~(Foo, Bar, Baz
x, y)~~~";
auto csv = Reader::CSV { data, Reader::default_behaviours() | Reader::ParserBehaviour::ReadHeaders | Reader::ParserBehaviour::TrimLeadingFieldSpaces };
csv.parse();
EXPECT(csv.has_error());
EXPECT_EQ(csv.error(), Reader::ReadError::NonConformingColumnCount);
}
@ -54,6 +57,7 @@ TEST_CASE(should_fail_nicely)
auto data = R"~~~(Foo, Bar, Baz
x, y, "z)~~~";
auto csv = Reader::CSV { data, Reader::default_behaviours() | Reader::ParserBehaviour::ReadHeaders | Reader::ParserBehaviour::TrimLeadingFieldSpaces };
csv.parse();
EXPECT(csv.has_error());
EXPECT_EQ(csv.error(), Reader::ReadError::QuoteFailure);
}
@ -66,6 +70,7 @@ TEST_CASE(should_iterate_rows)
4, 5, 6
"""x", y"z, 9)~~~";
auto csv = Reader::CSV { data, Reader::default_behaviours() | Reader::ParserBehaviour::ReadHeaders | Reader::ParserBehaviour::TrimLeadingFieldSpaces };
csv.parse();
EXPECT(!csv.has_error());
bool ran = false;
@ -82,6 +87,7 @@ BENCHMARK_CASE(fairly_big_data)
auto data = file_or_error.value()->read_all();
auto csv = Reader::CSV { data, Reader::default_behaviours() | Reader::ParserBehaviour::ReadHeaders };
csv.parse();
EXPECT(!csv.has_error());
EXPECT_EQ(csv.size(), 100000u);

View file

@ -11,12 +11,12 @@ namespace Reader {
ParserBehaviour operator&(ParserBehaviour left, ParserBehaviour right)
{
return static_cast<ParserBehaviour>(static_cast<u32>(left) & static_cast<u32>(right));
return static_cast<ParserBehaviour>(to_underlying(left) & to_underlying(right));
}
ParserBehaviour operator|(ParserBehaviour left, ParserBehaviour right)
{
return static_cast<ParserBehaviour>(static_cast<u32>(left) | static_cast<u32>(right));
return static_cast<ParserBehaviour>(to_underlying(left) | to_underlying(right));
}
void XSV::set_error(ReadError error)
@ -43,8 +43,22 @@ Vector<String> XSV::headers() const
return headers;
}
void XSV::parse_preview()
{
reset();
if ((m_behaviours & ParserBehaviour::ReadHeaders) != ParserBehaviour::None)
read_headers();
while (!has_error() && !m_lexer.is_eof()) {
if (m_rows.size() >= 10)
break;
m_rows.append(read_row());
}
}
void XSV::parse()
{
reset();
if ((m_behaviours & ParserBehaviour::ReadHeaders) != ParserBehaviour::None)
read_headers();

View file

@ -59,17 +59,18 @@ constexpr ParserBehaviour default_behaviours()
class XSV {
public:
XSV(StringView source, const ParserTraits& traits, ParserBehaviour behaviours = default_behaviours())
XSV(StringView source, ParserTraits traits, ParserBehaviour behaviours = default_behaviours())
: m_source(source)
, m_lexer(m_source)
, m_traits(traits)
, m_behaviours(behaviours)
{
parse();
parse_preview();
}
virtual ~XSV() { }
void parse();
bool has_error() const { return m_error != ReadError::None; }
ReadError error() const { return m_error; }
String error_string() const
@ -180,8 +181,15 @@ private:
}
};
void set_error(ReadError error);
void parse();
void parse_preview();
void read_headers();
void reset()
{
m_lexer = GenericLexer { m_source };
m_rows.clear();
m_names.clear();
m_error = ReadError::None;
}
Vector<Field> read_row(bool header_row = false);
Field read_one_field();
Field read_one_quoted_field();
@ -189,7 +197,7 @@ private:
StringView m_source;
GenericLexer m_lexer;
const ParserTraits& m_traits;
ParserTraits m_traits;
ParserBehaviour m_behaviours;
Vector<Field> m_names;
Vector<Vector<Field>> m_rows;