Add new translation library

(from cbeck88/spirit-po@627a5cb94e )
This commit is contained in:
Celtic Minstrel 2016-06-10 20:34:36 -04:00
parent 4bc0563e73
commit 6c32ccb3c3
10 changed files with 1591 additions and 0 deletions

12
src/spirit_po.hpp Normal file
View file

@ -0,0 +1,12 @@
// (C) Copyright 2015 - 2016 Christopher Beck
// Distributed under the Boost Software License, Version 1.0. (See accompanying
// file LICENSE or copy at http://www.boost.org/LICENSE_1_0.txt)
#ifndef SPIRIT_PO_HPP_INCLUDED
#define SPIRIT_PO_HPP_INCLUDED
#include <spirit_po/version.hpp>
#include <spirit_po/catalog.hpp>
#endif // SPIRIT_PO_HPP_INCLUDED

420
src/spirit_po/catalog.hpp Normal file
View file

@ -0,0 +1,420 @@
// (C) Copyright 2015 - 2016 Christopher Beck
// Distributed under the Boost Software License, Version 1.0. (See accompanying
// file LICENSE or copy at http://www.boost.org/LICENSE_1_0.txt)
#ifndef SPIRIT_PO_CATALOG_HPP_INCLUDED
#define SPIRIT_PO_CATALOG_HPP_INCLUDED
#ifndef BOOST_SPIRIT_USE_PHOENIX_V3
#define BOOST_SPIRIT_USE_PHOENIX_V3
#endif
#include <spirit_po/catalog_metadata.hpp>
#include <spirit_po/default_plural_forms_compiler.hpp>
#include <spirit_po/exceptions.hpp>
#include <spirit_po/po_grammar.hpp>
#include <spirit_po/po_message.hpp>
#include <boost/spirit/include/qi.hpp>
#include <functional>
#include <istream>
#include <string>
#include <unordered_map>
#include <vector>
namespace spirit_po {
namespace spirit = boost::spirit;
namespace qi = spirit::qi;
typedef unsigned int uint;
typedef std::function<void(const std::string &)> warning_channel_type;
typedef std::unordered_map<std::string, po_message> default_hashmap_type;
template <typename hashmap_type = default_hashmap_type, typename pf_compiler = default_plural_forms::compiler>
class catalog {
catalog_metadata metadata_;
typename pf_compiler::result_type pf_function_object_;
uint singular_index_; // cached result of pf_function_object(1)
#ifdef SPIRIT_PO_NOEXCEPT
boost::optional<std::string> error_message_;
// if loading failed, error_message_ contains an error
// (rather than throwing an exception)
#endif // SPIRIT_PO_NOEXCEPT
warning_channel_type warning_channel_;
hashmap_type hashmap_;
public:
static constexpr char EOT = static_cast<char>(4);
// ASCII 4 is EOT character
// Used to separate msg context from msgid in the hashmap, in MO files
// We use the same formatting system, just for consistency.
// c.f. https://www.gnu.org/software/gettext/manual/html_node/MO-Files.html
static std::string form_context_index(const std::string & msgctxt, const std::string & id) {
return msgctxt + EOT + id;
}
static std::string form_index(const po_message & msg) {
return msg.context ? form_context_index(*msg.context, msg.id) : msg.id;
}
private:
/***
* Helper for interacting with hashmap results
* get(msg) gets the *singular* string from the message. if it's a plural message, look in singular_index_.
* if it's not a plural message, then there is only one string. also, the po header is never a plural message
*/
const std::string & get(const po_message & msg) const {
if (msg.strings().size() == 1) { return msg.strings()[0]; }
return msg.strings()[singular_index_];
}
const std::string & get(const po_message & msg, uint plural) const {
uint idx = (plural == 1 ? singular_index_ : pf_function_object_(plural));
return msg.strings()[idx];
}
/***
* Emplace a message into the hashmap
*/
void insert_message(po_message && msg) {
if (!msg.strings().size()) { return; }
// don't allow messages with ZERO translations into the catalog, this will cause segfaults later.
// should perhaps throw an exception here
if (!msg.strings()[0].size()) { return; }
// if the (first) translated string is "", it is untranslated and message does not enter catalog
if (msg.strings().size() > 1 && msg.strings().size() != metadata_.num_plural_forms) {
if (warning_channel_) {
warning_channel_("Ignoring a message with an incorrect number of plural forms: plural = " + std::to_string(msg.strings().size()) + " msgid = '" + msg.id + "'");
}
return;
}
std::string index = form_index(msg);
// adjust the id based on context if necessary
auto result = hashmap_.emplace(std::move(index), std::move(msg));
// Issue a warning if emplace failed, rather than silently overwrite.
if (!result.second) {
if (warning_channel_) {
std::string warning = "Overwriting a message: msgid = <<<" + msg.id + ">>>";
if (msg.context) { warning += " msgctxt = <<<" + *msg.context + ">>>"; }
warning_channel_(warning);
}
result.first->second = std::move(msg);
}
}
public:
#ifdef SPIRIT_PO_NOEXCEPT
/***
* Error checking (this is done so we don't have to throw exceptions from the ctor.
*/
explicit operator bool() const {
return !error_message_;
}
std::string error() const {
return *error_message_; // UB if there there is not an error message
}
#endif // SPIRIT_PO_NOEXCEPT
/***
* Ctors
*/
template <typename Iterator>
catalog(spirit::line_pos_iterator<Iterator> & it, spirit::line_pos_iterator<Iterator> & end, warning_channel_type warn_channel = warning_channel_type(), pf_compiler compiler = pf_compiler())
: metadata_()
, pf_function_object_()
, warning_channel_(warn_channel)
, hashmap_()
{
typedef spirit::line_pos_iterator<Iterator> iterator_type;
po_grammar<iterator_type> grammar;
po_message msg;
std::size_t line_no = 0;
// Parse header first
{
// must be able to parse first message
qi::parse(it, end, grammar.skipped_block); // first parse any comments
if (!qi::parse(it, end, grammar, msg)) { // now parse the main grammar target
int err_line = it.position();
SPIRIT_PO_CATALOG_FAIL("Failed to parse po header, stopped at line " + std::to_string(err_line) + ": " + iterator_context(it, end));
}
// first message must have empty MSGID (po format says so)
if (msg.id.size()) {
SPIRIT_PO_CATALOG_FAIL("Failed to parse po header, first msgid must be empty string \"\", found: " + msg.id);
}
// Now parse the header string itself
if (msg.strings().size()) {
std::string maybe_error = metadata_.parse_header(msg.strings()[0]);
if (maybe_error.size()) {
SPIRIT_PO_CATALOG_FAIL("Failed to parse po header: " + maybe_error);
}
}
if (!metadata_.num_plural_forms) {
SPIRIT_PO_CATALOG_FAIL("Invalid metadata in po header, found num_plurals = 0");
}
// Try to compile the plural forms function string
pf_function_object_ = compiler(metadata_.plural_forms_function_string);
if (!pf_function_object_) {
SPIRIT_PO_CATALOG_FAIL(("Failed to read plural forms function. "
"Input: '" + metadata_.plural_forms_function_string + "', "
"error message: " + pf_function_object_.error()));
}
// Cache the 'singular' form index since it is most common
singular_index_ = pf_function_object_(1);
if (singular_index_ >= metadata_.num_plural_forms) {
SPIRIT_PO_CATALOG_FAIL(("Invalid plural forms function. "
"On input n = 1, returned plural = " + std::to_string(singular_index_) + ", "
"while num_plurals = " + std::to_string(metadata_.num_plural_forms)));
}
msg.line_no = line_no;
insert_message(std::move(msg)); // for compatibility, need to insert the header message at msgid ""
}
// Now parse non-fuzzy messages
while (it != end) {
// this parse rule cannot fail, it can be a zero length match
qi::parse(it, end, grammar.ignored_comments);
bool fuzzy = false;
// this parse rule cannot fail, it can be a zero length match
qi::parse(it, end, grammar.message_preamble, fuzzy);
// check if we exhausted the file by comments
if (it != end) {
msg = po_message{};
msg.strings().reserve(metadata_.num_plural_forms); // try to prevent frequent vector reallocations
line_no = it.position();
// actually parse a message
if (!qi::parse(it, end, grammar, msg)) {
int err_line = it.position();
SPIRIT_PO_CATALOG_FAIL(("Failed to parse po file, "
"started at " + std::to_string(line_no) + ": , stopped at " + std::to_string(err_line) + ":\n"
+ iterator_context(it, end)));
}
// cannot overwrite header
if (!msg.id.size()) {
int err_line = it.position();
SPIRIT_PO_CATALOG_FAIL(("Malformed po file: Cannot overwrite the header entry later in the po file."
"Started at " + std::to_string(line_no) + ": , stopped at " + std::to_string(err_line) + ":\n"
+ iterator_context(it, end)));
}
msg.line_no = line_no;
// only insert it if it wasn't marked fuzzy
if (!fuzzy) { insert_message(std::move(msg)); }
}
}
#ifdef SPIRIT_PO_DEBUG
// validate resulting hashmap
for (const auto & p : hashmap_) {
if (!p.second.strings().size()) { SPIRIT_PO_CATALOG_FAIL(("Internal catalog error: found a message id with no strings, msgid='" + p.first + "'")); }
if (p.second.strings().size() != 1 && p.second.strings().size() != metadata_.num_plural_forms) {
SPIRIT_PO_CATALOG_FAIL(("Internal catalog error: found a message id with wrong number of strings, msgid='" + p.first + "' num msgstr = " + std::to_string(p.second.strings().size()) + ", catalog num_plural_forms = " + std::to_string(metadata_.num_plural_forms) + "\nWhole message: " + debug_string(p.second) ));
}
}
#endif // SPIRIT_PO_DEBUG
}
// Upgrade an iterator pair to spirit::line_pos_iterators
template <typename Iterator>
static catalog from_iterators(Iterator & b, Iterator & e, warning_channel_type w = warning_channel_type()) {
spirit::line_pos_iterator<Iterator> it{b};
spirit::line_pos_iterator<Iterator> end{e};
return catalog(it, end, w);
}
template <typename Iterator>
static catalog from_iterators(spirit::line_pos_iterator<Iterator> & b, spirit::line_pos_iterator<Iterator> & e, warning_channel_type w = warning_channel_type()) {
return catalog(b, e, w);
}
// Construct a catalog from a range using one expression
template <typename Range>
static catalog from_range(const Range & range, warning_channel_type w = warning_channel_type()) {
auto it = boost::begin(range);
auto end = boost::end(range);
return from_iterators(it, end, w);
}
static catalog from_istream(std::istream & is, warning_channel_type w = warning_channel_type()) {
// no white space skipping in the stream!
is.unsetf(std::ios::skipws);
spirit::istream_iterator it(is);
spirit::istream_iterator end;
return from_iterators(it, end, w);
}
///////////////
// ACCESSORS //
///////////////
/***
* Lookup strings from the catalog
*
* When using string literals as the parameters, these versions are safe and
* are maximally efficient.
* (The returned pointer is either the input pointer, having static storage
* duration, or has lifetime as long as the catalog.)
*
* Chosen to behave in the same manner as corresponding gettext functions.
*/
const char * gettext(const char * msgid) const {
auto it = hashmap_.find(msgid);
if (it != hashmap_.end()) {
return get(it->second).c_str();
} else {
return msgid;
}
}
const char * ngettext(const char * msgid, const char * msgid_plural, uint plural) const {
auto it = hashmap_.find(msgid);
if (it != hashmap_.end() && it->second.is_plural()) {
return get(it->second, plural).c_str();
} else {
return (plural == 1 ? msgid : msgid_plural);
}
}
const char * pgettext(const char * context, const char * msgid) const {
auto it = hashmap_.find(form_context_index(context, msgid));
if (it != hashmap_.end()) {
return get(it->second).c_str();
} else {
return msgid;
}
}
const char * npgettext(const char * context, const char * msgid, const char * msgid_plural, uint plural) const {
auto it = hashmap_.find(form_context_index(context, msgid));
if (it != hashmap_.end() && it->second.is_plural()) {
return get(it->second, plural).c_str();
} else {
return (plural == 1 ? msgid : msgid_plural);
}
}
/***
* Lookup strings from catalog, return std::string.
*
* When, for whatever reason, it is more comfortable to use idiomatic C++.
*/
std::string gettext_str(const std::string & msgid) const {
auto it = hashmap_.find(msgid);
if (it != hashmap_.end()) {
return get(it->second);
} else {
return msgid;
}
}
std::string ngettext_str(const std::string & msgid, const std::string & msgid_plural, uint plural) const {
auto it = hashmap_.find(msgid);
if (it != hashmap_.end() && it->second.is_plural()) {
return get(it->second, plural);
} else {
return (plural == 1 ? msgid : msgid_plural);
}
}
std::string pgettext_str(const std::string & context, const std::string & msgid) const {
auto it = hashmap_.find(form_context_index(context, msgid));
if (it != hashmap_.end()) {
return get(it->second);
} else {
return msgid;
}
}
std::string npgettext_str(const std::string & context, const std::string & msgid, const std::string & msgid_plural, uint plural) const {
auto it = hashmap_.find(form_context_index(context, msgid));
if (it != hashmap_.end() && it->second.is_plural()) {
return get(it->second, plural);
} else {
return (plural == 1 ? msgid : msgid_plural);
}
}
/***
* Get line numbers of messages
*/
std::size_t gettext_line_no(const std::string & msgid) const {
auto it = hashmap_.find(msgid);
if (it != hashmap_.end()) {
return it->second.line_no;
} else {
return 0;
}
}
std::size_t pgettext_line_no(const std::string & context, const std::string & msgid) const {
auto it = hashmap_.find(form_context_index(context, msgid));
if (it != hashmap_.end()) {
return it->second.line_no;
} else {
return 0;
}
}
/***
* Access metadata
*/
const catalog_metadata & get_metadata() const { return metadata_; }
/***
* Catalog size
*/
uint size() const {
// exclude po header from the count, this is how msgfmt reports size also
return hashmap_.size() - hashmap_.count("");
}
/***
* Debugging output
*/
const hashmap_type & get_hashmap() const { return hashmap_; }
/***
* Set warning channel (for msgid overwrites)
*/
void set_warning_channel(const warning_channel_type & w) { warning_channel_ = w; }
/***
* Merge a different catalog into this one
*/
template <typename H, typename P>
void merge(catalog<H, P> && other) {
std::string maybe_error = metadata_.check_compatibility(other.metadata_);
if (maybe_error.size()) {
SPIRIT_PO_CATALOG_FAIL(("Cannot merge catalogs: " + maybe_error));
}
for (auto & p : other.hashmap_) {
if (p.first.size()) { // don't copy over the header, keep our original header
insert_message(std::move(p.second));
}
}
}
};
} // end namespace spirit_po
#endif // SPIRIT_PO_CATALOG_HPP_INCLUDED

View file

@ -0,0 +1,154 @@
// (C) Copyright 2015 - 2016 Christopher Beck
// Distributed under the Boost Software License, Version 1.0. (See accompanying
// file LICENSE or copy at http://www.boost.org/LICENSE_1_0.txt)
#ifndef SPIRIT_PO_CATALOG_METADATA_HPP_INCLUDED
#define SPIRIT_PO_CATALOG_METADATA_HPP_INCLUDED
#ifndef BOOST_SPIRIT_USE_PHOENIX_V3
#define BOOST_SPIRIT_USE_PHOENIX_V3
#endif
#include <spirit_po/exceptions.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/fusion/include/std_pair.hpp>
#include <string>
namespace spirit_po {
namespace qi = boost::spirit::qi;
typedef unsigned int uint;
typedef std::pair<uint, std::string> num_plurals_info;
struct catalog_metadata {
std::string project_id;
std::string language;
std::string language_team;
std::string last_translator;
uint num_plural_forms;
std::string plural_forms_function_string;
std::string charset;
catalog_metadata()
: project_id()
, language()
, language_team()
, last_translator()
, num_plural_forms(0)
, plural_forms_function_string()
{}
private:
std::string find_header_line(const std::string & header, const std::string & label) {
size_t idx = header.find(label);
if (idx == std::string::npos) {
return "";
}
auto it = header.begin() + idx + label.size();
while (it != header.end() && *it == ' ') { ++it; }
auto e = it;
while (e != header.end() && *e != '\n') { ++e; }
return std::string(it, e);
}
template <typename Iterator>
struct num_plurals_grammar : qi::grammar<Iterator, num_plurals_info()> {
qi::rule<Iterator, num_plurals_info()> main;
num_plurals_grammar() : num_plurals_grammar::base_type(main) {
using qi::lit;
main = qi::skip(' ') [ lit("nplurals=") >> qi::uint_ >> lit(';') >> lit("plural=") ] >> (*qi::char_);
}
};
#define DEFAULT_CHARSET "UTF-8"
template <typename Iterator>
struct content_type_grammar : qi::grammar<Iterator, std::string()> {
qi::rule<Iterator, std::string()> main;
content_type_grammar() : content_type_grammar::base_type(main) {
using qi::lit;
using qi::omit;
using qi::skip;
main = skip(' ')[ omit[ *(qi::char_ - ';') >> lit(';') ] >> ((lit("charset=") >> *(qi::char_)) | qi::attr(DEFAULT_CHARSET)) ];
}
};
public:
// nonempty return is an error mesage
std::string parse_header(const std::string & header) {
constexpr const char * default_charset = DEFAULT_CHARSET;
#undef DEFAULT_CHARSET
project_id = find_header_line(header, "Project-Id-Version:");
language = find_header_line(header, "Language:");
language_team = find_header_line(header, "Language-Team:");
last_translator = find_header_line(header, "Last-Translator:");
std::string content_type_line = find_header_line(header, "Content-Type:");
if (content_type_line.size()) {
auto it = content_type_line.begin();
auto end = content_type_line.end();
content_type_grammar<decltype(it)> gram;
std::string ct;
if (qi::parse(it, end, gram, ct)) {
charset = ct;
if (charset != "ASCII" && charset != "UTF-8") {
return "PO file declared charset of '" + charset + "', but spirit_po only supports UTF-8 and ASCII for this.";
}
}
} else {
// Assume defaults for charset
charset = default_charset;
}
std::string content_transfer_encoding = find_header_line(header, "Content-Transfer-Encoding:");
if (content_transfer_encoding.size()) {
auto it = content_transfer_encoding.begin();
auto end = content_transfer_encoding.end();
if (!qi::phrase_parse(it, end, qi::lit("8bit"), qi::ascii::space)) {
return "PO header 'Content-Transfer-Encoding' must be '8bit' if specified, but PO file declared '" + content_transfer_encoding + "'";
}
}
std::string num_plurals_line = find_header_line(header, "Plural-Forms:");
if (num_plurals_line.size()) {
auto it = num_plurals_line.begin();
auto end = num_plurals_line.end();
num_plurals_grammar<decltype(it)> gram;
num_plurals_info info;
if (qi::parse(it, end, gram, info)) {
num_plural_forms = info.first;
plural_forms_function_string = info.second;
} else {
num_plural_forms = 0;
plural_forms_function_string = "";
return "Failed to parse Plural-Forms entry -- stopped at:\n" + string_iterator_context(num_plurals_line, it);
}
} else {
num_plural_forms = 2;
plural_forms_function_string = "n != 1";
}
return "";
}
// check if this metadata is compatible with another metadata (number of plural forms, maybe other criteria)
// return a nonempty string containing error message if they are not compatible.
std::string check_compatibility(const catalog_metadata & other) const {
if (num_plural_forms != other.num_plural_forms) {
return std::string{"Num plural forms mismatch. this = "} + std::to_string(num_plural_forms) + " other = " + std::to_string(other.num_plural_forms);
}
return "";
}
};
} // end namespace spirit_po
#endif // SPIRIT_PO_CATALOG_METADATA_HPP_INCLUDED

View file

@ -0,0 +1,120 @@
// (C) Copyright 2015 - 2016 Christopher Beck
// Distributed under the Boost Software License, Version 1.0. (See accompanying
// file LICENSE or copy at http://www.boost.org/LICENSE_1_0.txt)
#ifndef SPIRIT_PO_DEFAULT_PLURAL_FORMS_COMPILER_HPP_INCLUDED
#define SPIRIT_PO_DEFAULT_PLURAL_FORMS_COMPILER_HPP_INCLUDED
/***
* In GNU gettext, a language is permitted to define any number of 'plural forms'.
* For instance, in English and most romance languages there are only two forms,
* singular and plural. However in many other languages, there may be only one
* form, or there may be many plural forms reserved for various numbers of items.
*
* In the header of a po file, as part of the metadata, translators are expected
* to specify exactly how many plural forms there are, (how many different
* variations of a pluralized string they will provide), and also a function that
* computes which form (the appropriate index) should be used when the number of
* items is a number "n".
*
* Traditionally, this function is specified as a single line of pseudo C code.
*
* Examples:
*
* Russian:
* Po header:
* num_plurals = 3
* plural=n%10==1 && n%100!=11 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2;
* Messages:
*
*
* There are many many more examples shown here:
* http://localization-guide.readthedocs.org/en/latest/l10n/pluralforms.html
*
* The code in *this* file is concerned with converting these strings into
* function objects implementing a function uint -> uint.
*
* These function objects are then associated to each catalog and used when
* looking up plurals.
*
* In spirit-po, we provide support for the standard gettext pseudo-C language
* using the 'default_plural_forms_compiler', which compiles these run-time
* pseudo-C expressions into expression trees which can be evaluated.
*
* By using non-default template parameters and providing an appropriate
* function object, you can make spirit-po use your favorite programming
* language for these instead. (Or, your translators' favorite?)
*
* The 'plural_forms_compiler' concept must be a class/struct and provide:
* - The plural_forms_compiler must be default constructible.
* - It must have a typedef 'result_type' which is the type of the function
* object it produces.
* - An operator() overload which takes const std::string &, and return an
* instance of 'result_type'.
* - result_type must be default constructible and move constructible.
* - result_type must have an operator() overload which takes and yields
* unsigned int.
* - result_type must have an explicit operator bool() const overload which
* returns whether the function object is valid (compilation succeeded)
* - result_type must have a function `error()` which returns a std::string
* representing a compilation error message in the case of failure.
*/
#ifndef BOOST_SPIRIT_USE_PHOENIX_V3
#define BOOST_SPIRIT_USE_PHOENIX_V3
#endif
#include <spirit_po/default_plural_forms_expressions.hpp>
#include <spirit_po/exceptions.hpp>
#include <boost/optional/optional.hpp>
#include <boost/spirit/include/qi.hpp>
#include <string>
namespace spirit_po {
namespace qi = boost::spirit::qi;
typedef unsigned int uint;
namespace default_plural_forms {
class function_object {
mutable stack_machine machine_;
boost::optional<std::string> parse_error_;
public:
function_object(const expr & _e) : machine_(_e), parse_error_() {}
function_object(const std::string & s) : machine_(n_var()), parse_error_(s) {}
function_object() : function_object(std::string{"uninitialized"}) {}
uint operator()(uint n) const {
return machine_.compute(n);
}
explicit operator bool() const { return !parse_error_; }
std::string error() const { return *parse_error_; }
};
struct compiler {
typedef function_object result_type;
result_type operator()(const std::string & str) const {
expr e;
typedef std::string::const_iterator str_it;
str_it it = str.begin();
str_it end = str.end();
op_grammar<str_it> grammar;
if (qi::phrase_parse(it, end, grammar, qi::space, e) && it == end) {
return function_object(std::move(e));
} else {
return function_object("Plural-Forms expression reader: Could not parse expression, stopped parsing at:\n" + string_iterator_context(str, it));
}
}
};
} // end namespace default_plura_forms
} // end namespace spirit_po
#endif // SPIRIT_PO_DEFAULT_PLURAL_FORMS_COMPILER_HPP_INCLUDED

View file

@ -0,0 +1,577 @@
// (C) Copyright 2015 - 2016 Christopher Beck
// Distributed under the Boost Software License, Version 1.0. (See accompanying
// file LICENSE or copy at http://www.boost.org/LICENSE_1_0.txt)
#ifndef SPIRIT_PO_DEFAULT_PLURAL_FORMS_EXPRESSIONS_HPP_INCLUDED
#define SPIRIT_PO_DEFAULT_PLURAL_FORMS_EXPRESSIONS_HPP_INCLUDED
/***
* The namespace default_plural_forms contains all the details to implement
* the subset of the C grammar used by standard GNU gettext po headers.
*
* Boolean expressions return uint 0 or 1.
*
* The 'compiler' is a spirit grammar which parses a string into an expression
* object. The expressions are evaluated by a simple stack machine.
*/
#ifndef BOOST_SPIRIT_USE_PHOENIX_V3
#define BOOST_SPIRIT_USE_PHOENIX_V3
#endif
#include <algorithm>
#include <vector>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix_core.hpp>
#include <boost/spirit/include/phoenix_operator.hpp>
#include <boost/fusion/include/adapt_struct.hpp>
#include <boost/variant/variant.hpp>
#include <boost/variant/recursive_wrapper.hpp>
#ifdef SPIRIT_PO_DEBUG
#include <cassert>
#include <string>
#endif
namespace spirit_po {
namespace qi = boost::spirit::qi;
typedef unsigned int uint;
namespace default_plural_forms {
// X Macro for repetitive binary ops declarations
#define FOREACH_SPIRIT_PO_BINARY_OP(X_) \
X_(eq_op, ==) X_(neq_op, !=) X_(ge_op, >=) X_(le_op, <=) X_(gt_op, >) X_(lt_op, <) X_(mod_op, %)
// && and || are treated slightly differently from other binary ops
#define FOREACH_SPIRIT_PO_CONJUNCTION(X_) \
X_(and_op, &&) X_(or_op, ||)
/***
* Declare / forward declare expr struct types
*/
struct constant { uint value; };
struct n_var { n_var() = default; explicit n_var(char) {}}; // work around a quirk in spirit
struct not_op;
struct ternary_op;
#define FWD_DECL_(name, op) \
struct name ; \
FOREACH_SPIRIT_PO_BINARY_OP(FWD_DECL_)
FOREACH_SPIRIT_PO_CONJUNCTION(FWD_DECL_)
#undef FWD_DECL_
/***
* Define expr variant type
*/
#define WRAP_(name, op) boost::recursive_wrapper< name >, \
typedef boost::variant<constant, n_var, boost::recursive_wrapper<not_op>,
FOREACH_SPIRIT_PO_BINARY_OP(WRAP_)
FOREACH_SPIRIT_PO_CONJUNCTION(WRAP_)
boost::recursive_wrapper<ternary_op>> expr;
#undef WRAP_
/***
* Define structs
*/
struct not_op { expr e1; };
struct ternary_op { expr e1, e2, e3; };
#define DECL_(name, op) \
struct name { expr e1, e2; }; \
FOREACH_SPIRIT_PO_BINARY_OP(DECL_)
FOREACH_SPIRIT_PO_CONJUNCTION(DECL_)
#undef DECL_
/***
* Visitor that naively evaluates expressions
*/
struct evaluator : public boost::static_visitor<uint> {
uint n_value_;
explicit evaluator(uint n) : n_value_(n) {}
uint operator()(const constant & c) const { return c.value; }
uint operator()(n_var) const { return n_value_; }
uint operator()(const not_op & op) const { return !boost::apply_visitor(*this, op.e1); }
#define EVAL_OP_(name, OPERATOR) \
uint operator()(const name & op) const { return (boost::apply_visitor(*this, op.e1)) OPERATOR (boost::apply_visitor(*this, op.e2)); } \
FOREACH_SPIRIT_PO_BINARY_OP(EVAL_OP_)
FOREACH_SPIRIT_PO_CONJUNCTION(EVAL_OP_)
#undef EVAL_OP_
uint operator()(const ternary_op & op) const { return boost::apply_visitor(*this, op.e1) ? boost::apply_visitor(*this, op.e2) : boost::apply_visitor(*this, op.e3); }
};
} // end namespace default_plural_forms
} // end namespace spirit_po
/***
* Adapt structs for fusion / qi
*/
BOOST_FUSION_ADAPT_STRUCT(spirit_po::default_plural_forms::constant,
(uint, value))
BOOST_FUSION_ADAPT_STRUCT(spirit_po::default_plural_forms::not_op,
(spirit_po::default_plural_forms::expr, e1))
BOOST_FUSION_ADAPT_STRUCT(spirit_po::default_plural_forms::ternary_op,
(spirit_po::default_plural_forms::expr, e1)
(spirit_po::default_plural_forms::expr, e2)
(spirit_po::default_plural_forms::expr, e3))
#define ADAPT_STRUCT_(name, op) \
BOOST_FUSION_ADAPT_STRUCT(spirit_po::default_plural_forms:: name, \
(spirit_po::default_plural_forms::expr, e1) \
(spirit_po::default_plural_forms::expr, e2)) \
FOREACH_SPIRIT_PO_BINARY_OP(ADAPT_STRUCT_)
FOREACH_SPIRIT_PO_CONJUNCTION(ADAPT_STRUCT_)
#undef ADAPT_STRUCT_
namespace spirit_po {
namespace default_plural_forms {
/***
* Pseudo-C Grammar
*
* Note that the grammar has been somewhat optimized by using local variables
* and inherited attributes, in order to avoid exponential backtracking overhead.
* This makes it a little harder to read than if we got rid of all local variables,
* but then it is too slow to parse the expressions for certain languages.
*
* The main idea is that instead of parsing things like
*
* BINARY_OP = LOWER_PRECENDENCE >> BINARY_OP_LITERAL >> CURRENT_PRECEDENCE
* CURRENT_PRECEDENCE = BINARY_OP | OTHER_OP | YET_ANOTHER_OP | LOWER_PRECEDENCE
*
* (which is bad because if the binary op literal is not there then we have to
* backtrack through an entire subexpression)
*
* we make BINARY_OP take the subexpression as a parameter, and in each
* precedence level, we capture the subexpression first and store it in a local
* variable, so that it does not get reparsed when we backtrack.
*
* BINARY_OP = BINARY_OP_LITERAL >> qi::attr(parameter) >> CURRENT_PRECEDENCE
*
* CURRENT_PRECEDENCE = LOWER_PRECEDENCE[local_var = result] >>
* (BINARY_OP(local_var) | OTHER_OP(local_var) | YET_ANOTHER_OP(local_var) | qi::attr(local_var)
*
*/
template <typename Iterator>
struct op_grammar : qi::grammar<Iterator, expr(), qi::space_type> {
qi::rule<Iterator, constant(), qi::space_type> constant_;
qi::rule<Iterator, n_var(), qi::space_type> n_;
qi::rule<Iterator, not_op(), qi::space_type> not_;
qi::rule<Iterator, and_op(expr), qi::space_type> and_;
qi::rule<Iterator, or_op(expr), qi::space_type> or_;
qi::rule<Iterator, eq_op(expr), qi::space_type> eq_;
qi::rule<Iterator, neq_op(expr), qi::space_type> neq_;
qi::rule<Iterator, ge_op(expr), qi::space_type> ge_;
qi::rule<Iterator, le_op(expr), qi::space_type> le_;
qi::rule<Iterator, gt_op(expr), qi::space_type> gt_;
qi::rule<Iterator, lt_op(expr), qi::space_type> lt_;
qi::rule<Iterator, mod_op(expr), qi::space_type> mod_;
qi::rule<Iterator, ternary_op(expr), qi::space_type> ternary_;
qi::rule<Iterator, expr(), qi::space_type> paren_expr_;
// expression precedence levels
qi::rule<Iterator, expr(), qi::space_type, qi::locals<expr>> ternary_level_;
qi::rule<Iterator, expr(), qi::space_type, qi::locals<expr>> or_level_;
qi::rule<Iterator, expr(), qi::space_type, qi::locals<expr>> and_level_;
qi::rule<Iterator, expr(), qi::space_type, qi::locals<expr>> eq_level_;
qi::rule<Iterator, expr(), qi::space_type, qi::locals<expr>> rel_level_;
qi::rule<Iterator, expr(), qi::space_type, qi::locals<expr>> mod_level_;
qi::rule<Iterator, expr(), qi::space_type> atom_level_;
qi::rule<Iterator, expr(), qi::space_type> expr_;
// handle optional ';' at end
qi::rule<Iterator, expr(), qi::space_type> main_;
op_grammar() : op_grammar::base_type(main_) {
using qi::attr;
using qi::lit;
constant_ = qi::uint_;
n_ = qi::char_('n');
paren_expr_ = lit('(') >> expr_ >> lit(')');
not_ = lit('!') >> atom_level_;
atom_level_ = paren_expr_ | not_ | n_ | constant_;
mod_ = lit('%') >> attr(qi::_r1) >> atom_level_;
mod_level_ = qi::omit[atom_level_[qi::_a = qi::_1]] >> (mod_(qi::_a) | attr(qi::_a));
ge_ = lit(">=") >> attr(qi::_r1) >> mod_level_;
le_ = lit("<=") >> attr(qi::_r1) >> mod_level_;
gt_ = lit('>') >> attr(qi::_r1) >> mod_level_;
lt_ = lit('<') >> attr(qi::_r1) >> mod_level_;
rel_level_ = qi::omit[mod_level_[qi::_a = qi::_1]] >> (ge_(qi::_a) | le_(qi::_a) | gt_(qi::_a) | lt_(qi::_a) | attr(qi::_a));
eq_ = lit("==") >> attr(qi::_r1) >> rel_level_;
neq_ = lit("!=") >> attr(qi::_r1) >> rel_level_;
eq_level_ = qi::omit[rel_level_[qi::_a = qi::_1]] >> (eq_(qi::_a) | neq_(qi::_a) | attr(qi::_a));
and_ = lit("&&") >> attr(qi::_r1) >> and_level_;
and_level_ = qi::omit[eq_level_[qi::_a = qi::_1]] >> (and_(qi::_a) | attr(qi::_a));
or_ = lit("||") >> attr(qi::_r1) >> or_level_;
or_level_ = qi::omit[and_level_[qi::_a = qi::_1]] >> (or_(qi::_a) | attr(qi::_a));
ternary_ = lit('?') >> attr(qi::_r1) >> ternary_level_ >> lit(':') >> ternary_level_;
ternary_level_ = qi::omit[or_level_[qi::_a = qi::_1]] >> (ternary_(qi::_a) | attr(qi::_a));
expr_ = ternary_level_;
main_ = expr_ >> -lit(';');
}
};
/***
* Now define a simple stack machine to evaluate the expressions efficiently.
*
* First define op_codes
*/
#define ENUMERATE(X, Y) X,
enum class op_code { n_var, FOREACH_SPIRIT_PO_BINARY_OP(ENUMERATE) not_op };
#undef ENUMERATE
/// Instruction that causes us to skip upcoming instructions
struct skip {
uint distance;
};
/// Instructions that conditionally cause us to skip upcoming instructions
struct skip_if {
uint distance;
};
struct skip_if_not {
uint distance;
};
/***
* Instruction is a variant type that represents either a push_constant, branch, jump, or arithmetic op.
*/
typedef boost::variant<constant, skip, skip_if, skip_if_not, op_code> instruction;
/***
* Debug strings for instruction set
*/
#ifdef SPIRIT_PO_DEBUG
inline std::string op_code_string(op_code oc) {
std::string result = "[ ";
switch (oc) {
case op_code::n_var: {
result += "n ";
break;
}
case op_code::not_op: {
result += "! ";
break;
}
#define OP_CODE_STR_CASE_(X, Y) \
case op_code::X: { \
result += #Y; \
break; \
}
FOREACH_SPIRIT_PO_BINARY_OP(OP_CODE_STR_CASE_)
#undef OP_CODE_STR_CASE_
}
if (result.size() < 5) { result += ' '; } \
result += " : ]";
return result;
}
struct instruction_debug_string_maker : boost::static_visitor<std::string> {
std::string operator()(const constant & c) const {
return "[ push : " + std::to_string(c.value) + " ]";
}
std::string operator()(const skip & s) const {
return "[ skip : " + std::to_string(s.distance) + " ]";
}
std::string operator()(const skip_if & s) const {
return "[ sif : " + std::to_string(s.distance) + " ]";
}
std::string operator()(const skip_if_not & s) const {
return "[ sifn : " + std::to_string(s.distance) + " ]";
}
std::string operator()(const op_code & oc) const {
return op_code_string(oc);
}
};
inline std::string debug_string(const instruction & i) {
return boost::apply_visitor(instruction_debug_string_maker{}, i);
}
#endif // SPIRIT_PO_DEBUG
/***
* Helper: Check if an expression obviously is zero-one valued
*/
struct is_boolean : public boost::static_visitor<bool> {
bool operator()(const and_op &) const { return true; }
bool operator()(const or_op &) const { return true; }
bool operator()(const not_op &) const { return true; }
bool operator()(const eq_op &) const { return true; }
bool operator()(const neq_op &) const { return true; }
bool operator()(const ge_op &) const { return true; }
bool operator()(const le_op &) const { return true; }
bool operator()(const gt_op &) const { return true; }
bool operator()(const lt_op &) const { return true; }
bool operator()(const n_var &) const { return false; }
bool operator()(const constant & c) const { return (c.value == 0 || c.value == 1); }
bool operator()(const mod_op & m) const { return boost::apply_visitor(*this, m.e1); }
bool operator()(const ternary_op & t) const { return boost::apply_visitor(*this, t.e2) && boost::apply_visitor(*this, t.e3); }
};
/***
* Visitor that maps expressions to instruction sequences
*/
struct emitter : public boost::static_visitor<std::vector<instruction>> {
std::vector<instruction> operator()(const constant & c) const {
return std::vector<instruction>{instruction{c}};
}
std::vector<instruction> operator()(const n_var &) const {
return std::vector<instruction>{instruction{op_code::n_var}};
}
std::vector<instruction> operator()(const not_op & o) const {
auto result = boost::apply_visitor(*this, o.e1);
result.emplace_back(op_code::not_op);
return result;
}
#define EMIT_OP_(name, op) \
std::vector<instruction> operator()(const name & o) const { \
auto result = boost::apply_visitor(*this, o.e1); \
auto temp = boost::apply_visitor(*this, o.e2); \
std::move(temp.begin(), temp.end(), std::back_inserter(result)); \
result.emplace_back(op_code::name); \
return result; \
}
FOREACH_SPIRIT_PO_BINARY_OP(EMIT_OP_)
#undef EMIT_OP_
/***
* We make &&, ||, and ? shortcut
*/
std::vector<instruction> operator()(const and_op & o) const {
auto result = boost::apply_visitor(*this, o.e1);
auto second = boost::apply_visitor(*this, o.e2);
bool second_is_boolean = boost::apply_visitor(is_boolean{}, o.e2);
uint sec_size = static_cast<uint>(second.size());
if (!second_is_boolean) { sec_size += 2; }
result.emplace_back(skip_if{2});
result.emplace_back(constant{0});
result.emplace_back(skip{sec_size});
std::move(second.begin(), second.end(), std::back_inserter(result));
if (!second_is_boolean) {
result.emplace_back(op_code::not_op);
result.emplace_back(op_code::not_op);
}
return result;
}
std::vector<instruction> operator()(const or_op & o) const {
auto result = boost::apply_visitor(*this, o.e1);
auto second = boost::apply_visitor(*this, o.e2);
bool second_is_boolean = boost::apply_visitor(is_boolean{}, o.e2);
uint sec_size = static_cast<uint>(second.size());
if (!second_is_boolean) { sec_size += 2; }
result.emplace_back(skip_if_not{2});
result.emplace_back(constant{1});
result.emplace_back(skip{sec_size});
std::move(second.begin(), second.end(), std::back_inserter(result));
if (!second_is_boolean) {
result.emplace_back(op_code::not_op);
result.emplace_back(op_code::not_op);
}
return result;
}
std::vector<instruction> operator()(const ternary_op & o) const {
auto result = boost::apply_visitor(*this, o.e1);
auto tbranch = boost::apply_visitor(*this, o.e2);
auto fbranch = boost::apply_visitor(*this, o.e3);
uint tsize = static_cast<uint>(tbranch.size());
uint fsize = static_cast<uint>(fbranch.size());
// We use jump if / jump if not in the way that will let us put the shorter branch first.
if (tbranch.size() > fbranch.size()) {
// + 1 to size because we have to put a jump at end of this branch also
result.emplace_back(skip_if{fsize + 1});
std::move(fbranch.begin(), fbranch.end(), std::back_inserter(result));
result.emplace_back(skip{tsize});
std::move(tbranch.begin(), tbranch.end(), std::back_inserter(result));
} else {
result.emplace_back(skip_if_not{tsize + 1});
std::move(tbranch.begin(), tbranch.end(), std::back_inserter(result));
result.emplace_back(skip{fsize});
std::move(fbranch.begin(), fbranch.end(), std::back_inserter(result));
}
return result;
}
};
/***
* Actual stack machine
*/
class stack_machine : public boost::static_visitor<uint> {
std::vector<instruction> instruction_seq_;
std::vector<uint> stack_;
uint n_value_;
#ifdef SPIRIT_PO_DEBUG
public:
void debug_print_instructions() const {
std::cerr << "Instruction sequence:\n";
for (const auto & i : instruction_seq_) {
std::cerr << debug_string(i) << std::endl;
}
}
private:
#define MACHINE_ASSERT(X) \
do { \
if (!(X)) { \
std::cerr << "Stack machine failure:\n"; \
debug_print_instructions(); \
assert(false && #X); \
} \
} while(0)
#else // SPIRIT_PO_DEBUG
#define MACHINE_ASSERT(...) do {} while(0)
#endif // SPIRIT_PO_DEBUG
uint pop_one() {
MACHINE_ASSERT(stack_.size());
uint result = stack_.back();
stack_.resize(stack_.size() - 1);
return result;
}
public:
explicit stack_machine(const expr & e)
: instruction_seq_(boost::apply_visitor(emitter{}, e))
, stack_()
, n_value_()
{}
/***
* operator() takes the instruction that we should execute
* It should perform the operation adjusting the stack
* It returns the amount by which we should increment the
* program counter.
*/
uint operator()(const constant & c) {
stack_.emplace_back(c.value);
return 1;
}
uint operator()(const skip & s) {
return 1 + s.distance;
}
uint operator()(const skip_if & s) {
return 1 + (pop_one() ? s.distance : 0);
}
uint operator()(const skip_if_not & s) {
return 1 + (pop_one() ? 0 : s.distance);
}
uint operator()(op_code oc) {
switch (oc) {
case op_code::n_var: {
stack_.emplace_back(n_value_);
return 1;
}
case op_code::not_op: {
MACHINE_ASSERT(stack_.size());
stack_.back() = !stack_.back();
return 1;
}
#define STACK_MACHINE_CASE_(name, op) \
case op_code::name: { \
MACHINE_ASSERT(stack_.size() >= 2); \
uint parm2 = pop_one(); \
\
if (op_code::name == op_code::mod_op) { \
MACHINE_ASSERT(parm2 && "Division by zero when evaluating gettext plural form expression"); \
} \
\
stack_.back() = (stack_.back() op parm2); \
return 1; \
}
FOREACH_SPIRIT_PO_BINARY_OP(STACK_MACHINE_CASE_)
#undef STACK_MACHINE_CASE_
}
MACHINE_ASSERT(false);
return 1;
}
uint compute(uint arg) {
n_value_ = arg;
stack_.resize(0);
uint pc = 0;
while (pc < instruction_seq_.size()) {
pc += boost::apply_visitor(*this, instruction_seq_[pc]);
}
MACHINE_ASSERT(pc == instruction_seq_.size());
MACHINE_ASSERT(stack_.size() == 1);
return stack_[0];
}
};
#undef MACHINE_ASSERT
// X macros not used anymore
#undef FOREACH_SPIRIT_PO_BINARY_OP
#undef FOREACH_SPIRIT_PO_CONJUNCTION
} // end namespace default_plural_forms
} // end namespace spirit_po
#endif // SPIRIT_PO_DEFAULT_PLURAL_FORMS_EXPRESSIONS_HPP_INCLUDED

View file

@ -0,0 +1,79 @@
// (C) Copyright 2015 - 2016 Christopher Beck
// Distributed under the Boost Software License, Version 1.0. (See accompanying
// file LICENSE or copy at http://www.boost.org/LICENSE_1_0.txt)
#ifndef SPIRIT_PO_EXCEPTIONS_HPP_INCLUDED
#define SPIRIT_PO_EXCEPTIONS_HPP_INCLUDED
#include <boost/spirit/include/support_line_pos_iterator.hpp>
#include <string>
namespace spirit_po {
// Show the next 80 characters from some iterator position.
// Intended to be used for parser error messages
template <typename Iterator>
std::string iterator_context(Iterator & it, Iterator & end) {
std::string result;
std::size_t line_no = boost::spirit::get_line(it);
if (line_no != static_cast<std::size_t>(-1)) {
result = "Line " + std::to_string(line_no) + ":\n";
}
uint count = 80;
while (it != end && count) {
result += *it;
++it;
--count;
}
return result;
}
// When the thing being parsed is a short string, we can give
// a better context report
inline std::string string_iterator_context(const std::string & str,
std::string::const_iterator it) {
std::string result{str};
result += "\n";
for (auto temp = str.begin(); temp != it; ++temp) {
result += ' ';
}
result += "^\n";
return result;
}
} // end namespace spirit_po
#ifdef SPIRIT_PO_NOEXCEPT
#define SPIRIT_PO_CATALOG_FAIL(Message) \
do { \
error_message_ = (Message); \
return ; \
} while(0)
#else // SPIRIT_PO_NOEXCEPT
#include <stdexcept>
namespace spirit_po {
struct catalog_exception : std::runtime_error {
catalog_exception(const char * what) : runtime_error(what) {}
catalog_exception(const std::string & what) : runtime_error(what) {}
};
} // end namespace spirit_po
#define SPIRIT_PO_CATALOG_FAIL(Message) \
do { \
throw spirit_po::catalog_exception(( Message )); \
} while(0)
#endif // SPIRIT_PO_NOEXCEPT
#endif // SPIRIT_PO_EXCEPTIONS_HPP_INCLUDED

View file

@ -0,0 +1,128 @@
// (C) Copyright 2015 - 2016 Christopher Beck
// Distributed under the Boost Software License, Version 1.0. (See accompanying
// file LICENSE or copy at http://www.boost.org/LICENSE_1_0.txt)
#ifndef SPIRIT_PO_PO_GRAMMAR_HPP_INCLUDED
#define SPIRIT_PO_PO_GRAMMAR_HPP_INCLUDED
#ifndef BOOST_SPIRIT_USE_PHOENIX_V3
#define BOOST_SPIRIT_USE_PHOENIX_V3
#endif
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix_core.hpp>
#include <boost/spirit/include/phoenix_operator.hpp>
#include <boost/fusion/include/std_pair.hpp>
#include <boost/fusion/include/define_struct.hpp>
#include <spirit_po/po_message_adapted.hpp>
#include <boost/optional/optional.hpp>
#include <string>
#include <utility>
#include <vector>
namespace spirit_po {
typedef unsigned int uint;
namespace qi = boost::spirit::qi;
namespace phoenix = boost::phoenix;
template <typename Iterator>
struct po_grammar : qi::grammar<Iterator, po_message()> {
qi::rule<Iterator> white_line;
qi::rule<Iterator> comment_line;
qi::rule<Iterator> skipped_line;
qi::rule<Iterator> skipped_block;
qi::rule<Iterator, char()> escaped_character;
qi::rule<Iterator, std::string()> single_line_string;
qi::rule<Iterator, std::string()> multiline_string;
qi::rule<Iterator, std::string()> message_id;
qi::rule<Iterator, std::string()> message_id_plural;
qi::rule<Iterator, std::string()> message_context;
qi::rule<Iterator, std::string()> message_str;
qi::rule<Iterator, std::string(uint)> message_str_plural;
qi::rule<Iterator, std::vector<std::string>()> message_single_str;
qi::rule<Iterator, std::vector<std::string>(uint)> message_strs;
qi::rule<Iterator, plural_and_strings_type()> message_singular;
qi::rule<Iterator, plural_and_strings_type()> message_plural;
qi::rule<Iterator, po_message()> message;
// Related to parsing "fuzzy" po comment
qi::rule<Iterator, qi::locals<bool>> fuzzy;
qi::rule<Iterator> preamble_comment_line;
qi::rule<Iterator> preamble_comment_block;
/// consume any number of blocks, consisting of any number of comments followed by a white line
qi::rule<Iterator> ignored_comments;
/// consume any number of non-white comment line (using #). bool result represents if we saw #, fuzzy comment
qi::rule<Iterator, bool()> message_preamble;
po_grammar() : po_grammar::base_type(message) {
using qi::attr;
using qi::char_;
using qi::eoi;
using qi::lit;
using qi::omit;
using qi::uint_;
white_line = *char_(" \t\r"); // nullable
comment_line = char_('#') >> *(char_ - '\n'); // not nullable
skipped_line = (comment_line | white_line) >> lit('\n'); // not nullable
skipped_block = *skipped_line; // nullable
// TODO: Do we need to handle other escaped characters?
escaped_character = lit('\\') >> (char_("\'\"\\") | (lit('n') >> attr('\n')) | (lit('t') >> attr('\t')));
single_line_string = lit('"') >> *(escaped_character | (char_ - '\\' - '"')) >> lit('"');
multiline_string = single_line_string % skipped_block; // ^ this is important, if we don't have this then \\ does not have to be escaped in po string, just form an illegal escape code
message_context = skipped_block >> lit("msgctxt ") >> multiline_string;
message_id = skipped_block >> lit("msgid ") >> multiline_string;
message_str = skipped_block >> lit("msgstr ") >> multiline_string;
message_id_plural = skipped_block >> lit("msgid_plural ") >> multiline_string;
message_str_plural = skipped_block >> lit("msgstr[") >> omit[ uint_(qi::_r1) ] >> lit("] ") >> multiline_string;
// ^ the index in the po file must match what we expect
// qi::repeat converts it from a std::string, to a singleton vector, as required
message_single_str = qi::repeat(1)[message_str];
message_strs = message_str_plural(qi::_r1) >> -message_strs(qi::_r1 + 1);
// ^ enforces that indices must count up
// Detect whether we should read multiple messages or a single message by presence of `msgid_plural`
message_plural = message_id_plural >> message_strs(0); // first line should be msgstr[0]
message_singular = attr("") >> message_single_str;
message = -message_context >> message_id >> (message_plural | message_singular);
/***
* The remaining rules are not contributing to message -- their job is to consume comments leading up to the message,
* keep track of if we saw a fuzzy marker, and to consume the entire file if only whitespace lines remain, whether or
* not it ends in new-line.
*
* First, parse "ignored_comments",
* message_preamble is the main rule of this section
*/
/// Fuzzy: Expect comment of the form #, with literal `, fuzzy` in the list somewhere.
/// We use a qi local to keep track of if we saw it, this avoids excessive backtracking
fuzzy = lit('#') >> (&lit(','))[qi::_a = false] >> *(lit(',') >> -(lit(" fuzzy")[qi::_a = true]) >> *(char_ - '\n' - ',')) >> lit('\n') >> qi::eps(qi::_a);
preamble_comment_line = comment_line >> lit('\n');
ignored_comments = *(*preamble_comment_line >> white_line >> lit('\n'));
preamble_comment_block = *preamble_comment_line >> -comment_line;
// ^ if po-file ends in a comment without eol we should still consume it
message_preamble = (fuzzy >> preamble_comment_block >> attr(true)) | (preamble_comment_line >> message_preamble) | (-comment_line >> attr(false));
// ^ if we find fuzzy, short cut out of this test ^ consume one comment line and repeat ^ didn't find fuzzy, return false
// ^ note: no backtrack after fuzzy... ^ note: no backtrack after comment line... and consume trailing comment
// preamble_comment_block is nullable message_preamble is nullable
}
};
} // end namespace spirit_po
#endif // SPIRIT_PO_PO_GRAMMAR_HPP_INCLUDED

View file

@ -0,0 +1,62 @@
// (C) Copyright 2015 - 2016 Christopher Beck
// Distributed under the Boost Software License, Version 1.0. (See accompanying
// file LICENSE or copy at http://www.boost.org/LICENSE_1_0.txt)
#ifndef SPIRIT_PO_PO_MESSAGE_HPP_INCLUDED
#define SPIRIT_PO_PO_MESSAGE_HPP_INCLUDED
#include <boost/optional/optional.hpp>
#include <string>
#include <utility>
#include <vector>
namespace spirit_po {
typedef std::pair<std::string, std::vector<std::string>> plural_and_strings_type;
struct po_message {
boost::optional<std::string> context;
std::string id;
plural_and_strings_type plural_and_strings;
std::size_t line_no;
// Get the 'id_plural', 'strings' fields from the pair.
// It is arranged as a pair here to allow for simpler parsing with spirit attributes.
std::string & id_plural() { return plural_and_strings.first; }
const std::string & id_plural() const { return plural_and_strings.first; }
std::vector<std::string> & strings() { return plural_and_strings.second; }
const std::vector<std::string> & strings() const { return plural_and_strings.second; }
// Check if message is plural. We do this for now by testing msgid_plural.size().
// Recommended to use this method in case we change it in the future.
bool is_plural() const { return static_cast<bool>(id_plural().size()); }
};
/***
* Debug printer
*/
#ifdef SPIRIT_PO_DEBUG
inline std::string debug_string(const po_message & msg) {
std::string result = "{\n";
if (msg.context) {
result += " context: \"" + *msg.context + "\"\n";
}
result += " id: \"" + msg.id + "\"\n";
result += " id_plural: \"" + msg.id_plural() + "\"\n";
result += " strings: { ";
for (uint i = 0; i < msg.strings().size(); ++i) {
if (i) { result += ", "; }
result += '"' + msg.strings()[i] + '"';
}
result += " }\n";
result += "}";
return result;
}
#endif // SPIRIT_PO_DEBUG
} // end namespace spirit_po
#endif // SPIRIT_PO_PO_MESSAGE_HPP_INCLUDED

View file

@ -0,0 +1,26 @@
// (C) Copyright 2015 - 2016 Christopher Beck
// Distributed under the Boost Software License, Version 1.0. (See accompanying
// file LICENSE or copy at http://www.boost.org/LICENSE_1_0.txt)
#ifndef SPIRIT_PO_PO_MESSAGE_ADAPTED_HPP_INCLUDED
#define SPIRIT_PO_PO_MESSAGE_ADAPTED_HPP_INCLUDED
#include <boost/fusion/adapted/struct/adapt_struct.hpp>
#include <boost/fusion/include/adapt_struct.hpp>
#include <boost/fusion/include/std_pair.hpp>
#include <boost/optional/optional.hpp>
#include <string>
#include <utility>
#include <vector>
#include <spirit_po/po_message.hpp>
BOOST_FUSION_ADAPT_STRUCT(
spirit_po::po_message,
(boost::optional<std::string>, context)
(std::string, id)
(spirit_po::plural_and_strings_type, plural_and_strings))
#endif // SPIRIT_PO_PO_MESSAGE_ADAPTED_HPP_INCLUDED

13
src/spirit_po/version.hpp Normal file
View file

@ -0,0 +1,13 @@
// (C) Copyright 2015 - 2016 Christopher Beck
// Distributed under the Boost Software License, Version 1.0. (See accompanying
// file LICENSE or copy at http://www.boost.org/LICENSE_1_0.txt)
#ifndef SPIRIT_PO_VERSION_HPP_INCLUDED
#define SPIRIT_PO_VERSION_HPP_INCLUDED
#define SPIRIT_PO_VERSION_MAJOR 1
#define SPIRIT_PO_VERSION_MINOR 0
#define SPIRIT_PO_VERSION_PATCH 1
#endif // SPIRIT_PO_VERSION_HPP_INCLUDED