Merge pull request #305 from gfgtdf/istream_fix

we now use boost::istreams instead of boost::filesystem::fstream because the later doesnt work with mingw.
we now use a custom codecvt instead of boost locale because we want to get better error messages for invalid utf8 strings using our _ERR streams
we now use unicode prgamm commandline args on windows..
This commit is contained in:
gfgtdf 2014-10-23 23:59:42 +02:00
commit 8a81fb526d
28 changed files with 767 additions and 434 deletions

View file

@ -64,7 +64,7 @@ bad_commandline_tuple::bad_commandline_tuple(const std::string& str,
{
}
commandline_options::commandline_options ( int argc, char** argv ) :
commandline_options::commandline_options (const std::vector<std::string>& args) :
bpp(),
bunzip2(),
bzip2(),
@ -144,8 +144,8 @@ commandline_options::commandline_options ( int argc, char** argv ) :
version(false),
windowed(false),
with_replay(false),
argc_(argc),
argv_(argv),
args_(args.begin() + 1 , args.end()),
args0_(*args.begin()),
all_(),
visible_(),
hidden_()
@ -279,7 +279,7 @@ commandline_options::commandline_options ( int argc, char** argv ) :
po::variables_map vm;
const int parsing_style = po::command_line_style::default_style ^ po::command_line_style::allow_guessing;
po::store(po::command_line_parser(argc_,argv_).options(all_).positional(positional).style(parsing_style).run(),vm);
po::store(po::command_line_parser(args_).options(all_).positional(positional).style(parsing_style).run(),vm);
if (vm.count("ai-config"))
multiplayer_ai_config = parse_to_uint_string_tuples_(vm["ai-config"].as<std::vector<std::string> >());
@ -565,7 +565,7 @@ std::vector<boost::tuple<unsigned int,std::string,std::string> > commandline_opt
std::ostream& operator<<(std::ostream &os, const commandline_options& cmdline_opts)
{
os << "Usage: " << cmdline_opts.argv_[0] << " [<options>] [<data-directory>]\n";
os << "Usage: " << cmdline_opts.args0_ << " [<options>] [<data-directory>]\n";
os << cmdline_opts.visible_;
return os;
}

View file

@ -41,7 +41,7 @@ class commandline_options
friend std::ostream& operator<<(std::ostream &os, const commandline_options& cmdline_opts);
public:
commandline_options(int argc, char **argv);
commandline_options(const std::vector<std::string>& args);
/// BitsPerPixel specified by --bpp option.
boost::optional<int> bpp;
@ -216,8 +216,8 @@ private:
std::vector<boost::tuple<unsigned int,std::string> > parse_to_uint_string_tuples_(const std::vector<std::string> &strings, char separator = ':');
/// A helper function splitting vector of strings of format unsigned int:string:string to vector of tuples (unsigned int,string,string)
std::vector<boost::tuple<unsigned int,std::string,std::string> > parse_to_uint_string_string_tuples_(const std::vector<std::string> &strings, char separator = ':');
int argc_;
char **argv_;
std::vector<std::string> args_;
std::string args0_;
boost::program_options::options_description all_;
boost::program_options::options_description visible_;
boost::program_options::options_description hidden_;

View file

@ -21,11 +21,14 @@
#include "global.hpp"
#include "filesystem.hpp"
#include "serialization/unicode.hpp"
#include <boost/filesystem.hpp>
#include <boost/filesystem/fstream.hpp>
#include <boost/foreach.hpp>
#include <boost/system/windows_error.hpp>
#include <boost/iostreams/device/file_descriptor.hpp>
#include <boost/iostreams/stream.hpp>
#include <set>
#ifdef _WIN32
@ -55,20 +58,132 @@ namespace {
const std::string finalcfg_filename = "_final.cfg";
const std::string initialcfg_filename = "_initial.cfg";
}
#ifdef _WIN32
namespace {
//only used by windows but put outside the ifdef to let it check by ci build.
class customcodecvt : public std::codecvt<wchar_t /*intern*/, char /*extern*/, std::mbstate_t>
{
private:
//private static helper things
template<typename char_t_to>
struct customcodecvt_do_conversion_writer
{
char_t_to*& to_next;
char_t_to* to_end;
bool can_push(size_t count)
{
return static_cast<size_t>(to_end - to_next) > count;
}
void push(char_t_to val)
{
assert(to_next != to_end);
*to_next++ = val;
}
};
template<typename char_t_from , typename char_t_to>
static void customcodecvt_do_conversion( std::mbstate_t& /*state*/,
const char_t_from* from,
const char_t_from* from_end,
const char_t_from*& from_next,
char_t_to* to,
char_t_to* to_end,
char_t_to*& to_next )
{
typedef typename ucs4_convert_impl::convert_impl<char_t_from>::type impl_type_from;
typedef typename ucs4_convert_impl::convert_impl<char_t_to>::type impl_type_to;
from_next = from;
to_next = to;
customcodecvt_do_conversion_writer<char_t_to> writer = { to_next, to_end };
while(from_next != from_end)
{
impl_type_to::write(writer, impl_type_from::read(from_next, from_end));
}
}
public:
//Not used by boost filesystem
int do_encoding() const throw() { return 0; }
//Not used by boost filesystem
bool do_always_noconv() const throw() { return false; }
int do_length( std::mbstate_t& /*state*/,
const char* /*from*/,
const char* /*from_end*/,
std::size_t /*max*/ ) const
{
//Not used by boost filesystem
throw "Not supported";
}
std::codecvt_base::result unshift( std::mbstate_t& /*state*/,
char* /*to*/,
char* /*to_end*/,
char*& /*to_next*/) const
{
//Not used by boost filesystem
throw "Not supported";
}
//there are still some methods which could be implemented but arent because boost filesystem won't use them.
std::codecvt_base::result do_in( std::mbstate_t& state,
const char* from,
const char* from_end,
const char*& from_next,
wchar_t* to,
wchar_t* to_end,
wchar_t*& to_next ) const
{
try
{
customcodecvt_do_conversion<char, wchar_t>(state, from, from_end, from_next, to, to_end, to_next);
}
catch(...)
{
ERR_FS << "Invalid UTF-8 string'" << std::string(from, from_end) << "' " << std::endl;
return std::codecvt_base::error;
}
return std::codecvt_base::ok;
}
std::codecvt_base::result do_out( std::mbstate_t& state,
const wchar_t* from,
const wchar_t* from_end,
const wchar_t*& from_next,
char* to,
char* to_end,
char*& to_next ) const
{
try
{
customcodecvt_do_conversion<wchar_t, char>(state, from, from_end, from_next, to, to_end, to_next);
}
catch(...)
{
ERR_FS << "Invalid UTF-16 string" << std::endl;
return std::codecvt_base::error;
}
return std::codecvt_base::ok;
}
};
#ifdef _WIN32
class static_runner {
public:
static_runner() {
// Boost uses the current locale to generate a UTF-8 one
std::locale utf8_loc = boost::locale::generator().generate("");
// use a custom locale becasue we want to use out log.hpp functions in case of an invalid string.
utf8_loc = std::locale(utf8_loc, new customcodecvt());
boost::filesystem::path::imbue(utf8_loc);
}
};
static static_runner static_bfs_path_imbuer;
}
#endif
}
namespace filesystem {
@ -602,6 +717,29 @@ std::string read_file(const std::string &fname)
ss << is->rdbuf();
return ss.str();
}
#if BOOST_VERSION < 1048000
//boost iostream < 1.48 expects boost filesystem v2 paths. This is an adapter
struct iostream_path
{
template<typename stringtype>
iostream_path(const stringtype& s)
: path_(s)
{
}
typedef bfs::path::string_type external_string_type;
external_string_type external_file_string() const
{
return path_.native();
}
bfs::path path_;
};
#else
typedef bfs::path iostream_path;
#endif
std::istream *istream_file(const std::string &fname, bool treat_failure_as_error)
{
LOG_FS << "Streaming " << fname << " for reading.\n";
@ -612,20 +750,38 @@ std::istream *istream_file(const std::string &fname, bool treat_failure_as_error
return s;
}
bfs::ifstream *s = new bfs::ifstream(path(fname),std::ios_base::binary);
if (s->is_open()) {
//mingw doesn't support std::basic_ifstream::basic_ifstream(const wchar_t* fname)
//that why boost::filesystem::fstream.hpp doesnt work with mingw.
try
{
boost::iostreams::file_descriptor_source fd(iostream_path(fname), std::ios_base::binary);
//TODO: has this still use ?
if (!fd.is_open() && treat_failure_as_error) {
ERR_FS << "Could not open '" << fname << "' for reading.\n";
}
return new boost::iostreams::stream<boost::iostreams::file_descriptor_source>(fd, 4096, 0);
}
catch(const std::exception ex)
{
if(treat_failure_as_error)
{
ERR_FS << "Could not open '" << fname << "' for reading.\n";
}
bfs::ifstream *s = new bfs::ifstream();
s->clear(std::ios_base::failbit);
return s;
}
if(treat_failure_as_error) {
ERR_FS << "Could not open '" << fname << "' for reading.\n";
}
return s;
}
std::ostream *ostream_file(std::string const &fname)
{
LOG_FS << "streaming " << fname << " for writing.\n";
#if 1
boost::iostreams::file_descriptor_sink fd(iostream_path(fname), std::ios_base::binary);
return new boost::iostreams::stream<boost::iostreams::file_descriptor_sink>(fd, 4096, 0);
#else
return new bfs::ofstream(path(fname), std::ios_base::binary);
#endif
}
// Throws io_exception if an error occurs
void write_file(const std::string& fname, const std::string& data)

View file

@ -24,6 +24,7 @@
#include "map.hpp"
#include "network.hpp" // ping_timeout
#include "serialization/string_utils.hpp"
#include "serialization/unicode_cast.hpp"
#include "settings.hpp"
#include "unit.hpp"
#include "unit_map.hpp"

View file

@ -18,7 +18,7 @@
#include "gui/auxiliary/event/handler.hpp"
#include "hotkey/hotkey_command.hpp"
#include "sdl/compat.hpp"
#include "serialization/unicode.hpp"
#include "serialization/unicode_types.hpp"
#include "SDL_events.h"

View file

@ -25,6 +25,7 @@
#include "hotkey/hotkey_item.hpp"
#include "utils/foreach.tpp"
#include "video.hpp"
#include "serialization/unicode_cast.hpp"
#include <cassert>
@ -729,7 +730,7 @@ void thandler::key_down(const SDL_KeyboardEvent& event)
#else
key_down(event.keysym.sym,
event.keysym.mod,
::implementation::ucs4char_to_string(event.keysym.unicode));
unicode_cast<std::string>(static_cast<ucs4::char_t>(event.keysym.unicode)));
#endif
}
}

View file

@ -24,6 +24,7 @@
#include "map_utils.hpp"
#include "util.hpp"
#include "serialization/string_utils.hpp"
#include "serialization/unicode.hpp"
namespace gui2
{

View file

@ -31,6 +31,7 @@
#include "utils/foreach.tpp"
#include "desktop/clipboard.hpp"
#include "serialization/unicode.hpp"
#include "../../game_preferences.hpp"
#include "../../log.hpp"
#include "../../resources.hpp"

View file

@ -24,6 +24,7 @@
#include "gui/widgets/detail/register.tpp"
#include "gui/widgets/settings.hpp"
#include "serialization/string_utils.hpp"
#include "serialization/unicode.hpp"
#include <boost/bind.hpp>

View file

@ -19,6 +19,7 @@
#include "desktop/clipboard.hpp"
#include "gui/auxiliary/log.hpp"
#include "serialization/string_utils.hpp"
#include "serialization/unicode.hpp"
#include <boost/bind.hpp>

View file

@ -25,7 +25,7 @@
#include "gui/widgets/window.hpp"
#include "game_preferences.hpp"
#include "utils/foreach.tpp"
#include "serialization/unicode.hpp"
#include <boost/bind.hpp>
#define LOG_SCOPE_HEADER get_control_type() + " [" + id() + "] " + __func__

View file

@ -462,7 +462,7 @@ void hotkey_item::save(config& item) const
if (get_hat() >= 0) item["hat"] = get_hat();
if (get_value() >= 0) item["value"] = get_value();
if (get_keycode() >= 0) item["key"] = SDL_GetKeyName(SDLKey(get_keycode()));
if (get_character() >= 0) item["key"] = unicode_cast<utf8::string, ucs4::char_t>(get_character()); // Second template argument because get_character returns a signed int
if (get_character() >= 0) item["key"] = unicode_cast<utf8::string>(static_cast<ucs4::char_t>(get_character()));
if (get_mouse() >= 0) item["mouse"] = get_mouse();
if (get_button() >= 0) item["button"] = get_button();

View file

@ -26,6 +26,7 @@
#include "gettext.hpp"
#include "marked-up_text.hpp"
#include "serialization/string_utils.hpp"
#include "serialization/unicode.hpp"
#include "video.hpp"
#include "wml_exception.hpp"

View file

@ -22,7 +22,7 @@ class CVideo;
struct surface;
#include <SDL_video.h>
#include <string>
#include "serialization/unicode.hpp"
#include "serialization/unicode_types.hpp"
#ifdef SDL_GPU
#include "sdl/image.hpp"

View file

@ -23,6 +23,7 @@
#include "log.hpp"
#include "serialization/string_utils.hpp"
#include "serialization/unicode_cast.hpp"
#include "random_new.hpp"
/// Dummy race used when a race is not yet known.

View file

@ -16,7 +16,7 @@
#define RACE_HPP_INCLUDED
#include "config.hpp"
#include "serialization/unicode.hpp"
#include "serialization/unicode_types.hpp"

View file

@ -0,0 +1,233 @@
/*
Copyright (C) 2003 - 2014 by David White <dave@whitevine.net>
Part of the Battle for Wesnoth Project http://www.wesnoth.org/
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY.
See the COPYING file for more details.
*/
#ifndef SERIALIZATION_UCS4_CONVERT_IMPL_HPP_INCLUDED
#define SERIALIZATION_UCS4_CONVERT_IMPL_HPP_INCLUDED
#include "unicode_types.hpp"
#include "util.hpp"
#include <cassert>
namespace ucs4_convert_impl
{
struct utf8_impl
{
static const char* get_name() { return "utf8"; };
static size_t byte_size_from_ucs4_codepoint(ucs4::char_t ch)
{
if(ch < (1u << 7))
return 1;
else if(ch < (1u << 11))
return 2;
else if(ch < (1u << 16))
return 3;
else if(ch < (1u << 21))
return 4;
else if(ch < (1u << 26))
return 5;
else if(ch < (1u << 31))
return 6;
else
throw utf8::invalid_utf8_exception(); // Invalid UCS-4
}
static int byte_size_from_utf8_first(utf8::char_t ch)
{
if (!(ch & 0x80)) {
return 1; // US-ASCII character, 1 byte
}
/* first bit set: character not in US-ASCII, multiple bytes
* number of set bits at the beginning = bytes per character
* e.g. 11110xxx indicates a 4-byte character */
int count = count_leading_ones(ch);
if (count == 1 || count > 6) { // count > 4 after RFC 3629
throw utf8::invalid_utf8_exception(); // Stop on invalid characters
}
return count;
};
/**
@param out an object to write utf8::char_t. required operations are:
1) push(utf8::char_t) to write a single character
2) can_push(size_t n) to check whether there is still enough space
for n characters.
@param ch the ucs4 chracter to write to the stream.
*/
template<typename writer>
static inline void write(writer out, ucs4::char_t ch)
{
size_t count = byte_size_from_ucs4_codepoint(ch);
assert(out.can_push(count));
if(count == 1) {
out.push(static_cast<utf8::char_t>(ch));
} else {
for(int j = static_cast<int>(count) - 1; j >= 0; --j) {
unsigned char c = (ch >> (6 * j)) & 0x3f;
c |= 0x80;
if(j == static_cast<int>(count) - 1) {
c |= 0xff << (8 - count);
}
out.push(c);
}
}
}
/**
reads an ucs4 character from an utf8 stream
@param input an iterator pointing to the first character of a utf8 sequence to read
@param end an iterator poinint to the end of teh utf8 sequence to read.
*/
template<typename iitor_t>
static inline ucs4::char_t read(iitor_t& input, const iitor_t& end)
{
assert(input != end);
size_t size = byte_size_from_utf8_first(*input);
uint32_t current_char = static_cast<unsigned char>(*input);
// Convert the first character
if(size != 1) {
current_char &= 0xFF >> (size + 1);
}
// Convert the continuation bytes
// i == number of '++input'
++input;
for(size_t i = 1; i < size; ++i, ++input) {
// If the string ends occurs within an UTF8-sequence, this is bad.
if (input == end)
throw utf8::invalid_utf8_exception();
if ((*input & 0xC0) != 0x80)
throw utf8::invalid_utf8_exception();
current_char = (current_char << 6) | (static_cast<unsigned char>(*input) & 0x3F);
}
//i == size => input was increased size times.
// Check for non-shortest-form encoding
// This has been forbidden in Unicode 3.1 for security reasons
if (size > byte_size_from_ucs4_codepoint(current_char))
throw utf8::invalid_utf8_exception();
return current_char;
}
};
struct utf16_impl
{
static const char* get_name() { return "utf16"; };
template<typename writer>
static inline void write(writer out, ucs4::char_t ch)
{
const uint32_t bit17 = 0x10000;
if(ch < bit17)
{
assert(out.can_push(1));
out.push(static_cast<utf16::char_t>(ch));
}
else
{
assert(out.can_push(2));
const uint32_t char20 = ch - bit17;
assert(char20 < (1 << 20));
const ucs4::char_t lead = 0xD800 + (char20 >> 10);
const ucs4::char_t trail = 0xDC00 + (char20 & 0x3FF);
assert(lead < bit17);
assert(trail < bit17);
out.push(static_cast<utf16::char_t>(lead));
out.push(static_cast<utf16::char_t>(trail));
}
}
template<typename iitor_t>
static inline ucs4::char_t read(iitor_t& input, const iitor_t& end)
{
const int32_t last10 = 0x3FF;
const int32_t type_filter = 0xFC00;
const int32_t type_lead = 0xD800;
const int32_t type_trail = 0xDC00;
assert(input != end);
uint32_t current_char = static_cast<utf16::char_t>(*input);
++input;
uint32_t type = current_char & type_filter;
if(type == type_trail)
{
//found trail without head
throw utf8::invalid_utf8_exception();
}
else if(type == type_lead)
{
if(input == end)
{
//If the string ends occurs within an UTF16-sequence, this is bad.
throw utf8::invalid_utf8_exception();
}
if((*input & type_filter) != type_trail)
{
throw utf8::invalid_utf8_exception();
}
current_char &= last10;
current_char <<= 10;
current_char += (*input & last10);
current_char += 0x10000;
++input;
}
return current_char;
}
};
struct utf32_impl
{
static const char* get_name() { return "UCS4"; };
template<typename writer>
static inline void write(writer out, ucs4::char_t ch)
{
assert(out.can_push(1));
out.push(ch);
}
template<typename iitor_t>
static inline ucs4::char_t read(iitor_t& input, const iitor_t& end)
{
assert(input != end);
uint32_t current_char = *input;
++input;
return current_char;
}
};
template<typename T_CHAR>
struct convert_impl {};
template<>
struct convert_impl<utf8::char_t>
{
typedef utf8_impl type;
};
template<>
struct convert_impl<utf16::char_t>
{
typedef utf16_impl type;
};
template<>
struct convert_impl<ucs4::char_t>
{
typedef utf32_impl type;
};
}
#endif

View file

@ -7,9 +7,10 @@
#include <cstddef> //ptrdiff_t
#include <cassert> //assert
#include "unicode_types.hpp"
namespace ucs4
{
typedef uint32_t char_t;
template<typename string_type, typename update_implementation>
class iterator_base
{
@ -85,7 +86,7 @@ namespace ucs4
assert(current_substr.first == current_substr.second);
if(current_substr.first == string_end)
return;
current_char = update_implementation::get_next_char(current_substr.second, string_end);
current_char = update_implementation::read(current_substr.second, string_end);
}
ucs4::char_t current_char;

View file

@ -20,7 +20,8 @@
*/
#include "global.hpp"
#include "ucs4_convert_impl.hpp"
#include "unicode_cast.hpp"
#include "serialization/unicode.hpp"
#include "log.hpp"
@ -33,198 +34,6 @@
static lg::log_domain log_engine("engine");
#define ERR_GENERAL LOG_STREAM(err, lg::general)
namespace {
size_t byte_size_from_ucs4_codepoint(ucs4::char_t ch)
{
if(ch < (1u << 7))
return 1;
else if(ch < (1u << 11))
return 2;
else if(ch < (1u << 16))
return 3;
else if(ch < (1u << 21))
return 4;
else if(ch < (1u << 26))
return 5;
else if(ch < (1u << 31))
return 6;
else
throw utf8::invalid_utf8_exception(); // Invalid UCS-4
}
} // anonymous namespace
namespace implementation {
inline void push_ucs4char_to_string(std::string& out, ucs4::char_t ch)
{
size_t count = byte_size_from_ucs4_codepoint(ch);
if(count == 1) {
out.push_back(static_cast<char>(ch));
} else {
for(int j = static_cast<int>(count) - 1; j >= 0; --j) {
unsigned char c = (ch >> (6 * j)) & 0x3f;
c |= 0x80;
if(j == static_cast<int>(count) - 1) {
c |= 0xff << (8 - count);
}
out.push_back(c);
}
}
}
std::string ucs4string_to_string(const ucs4::string &src)
{
std::string ret;
try {
for(ucs4::string::const_iterator i = src.begin(); i != src.end(); ++i) {
push_ucs4char_to_string(ret, *i);
}
return ret;
}
catch(utf8::invalid_utf8_exception&) {
ERR_GENERAL << "Invalid UCS-4 character string" << std::endl;
return ret;
}
}
std::string ucs4char_to_string(const ucs4::char_t c)
{
ucs4::string s;
s.push_back(c);
return ucs4string_to_string(s);
}
ucs4::string string_to_ucs4string(const std::string &src)
{
ucs4::string res;
try {
utf8::iterator i1(src);
const utf8::iterator i2(utf8::iterator::end(src));
// Equivalent to res.insert(res.end(),i1,i2) which doesn't work on VC++6.
while(i1 != i2) {
res.push_back(*i1);
++i1;
}
}
catch(utf8::invalid_utf8_exception&) {
ERR_GENERAL << "Invalid UTF-8 string: \"" << src << "\"" << std::endl;
return res;
}
return res;
}
ucs4::string utf16string_to_ucs4string(const utf16::string & src)
{
ucs4::string res;
try {
utf16::iterator i1(src);
const utf16::iterator i2(utf16::iterator::end(src));
// Equivalent to res.insert(res.end(),i1,i2) which doesn't work on VC++6.
while(i1 != i2) {
res.push_back(*i1);
++i1;
}
}
catch(utf8::invalid_utf8_exception&) {
ERR_GENERAL << "Invalid UTF-16 string" << std::endl;
return res;
}
return res;
}
std::string utf16string_to_string(const utf16::string & src)
{
std::string res;
try {
utf16::iterator i1(src);
const utf16::iterator i2(utf16::iterator::end(src));
while(i1 != i2) {
push_ucs4char_to_string(res, *i1);
++i1;
}
}
catch(utf8::invalid_utf8_exception&) {
ERR_GENERAL << "Invalid UTF-16 string" << std::endl;
return res;
}
return res;
}
utf16::string ucs4string_to_utf16string(const ucs4::string &src)
{
utf16::string res;
const Uint32 bit17 = 0x10000;
BOOST_FOREACH(const ucs4::char_t &u4, src) {
if(u4 < bit17)
res.push_back(static_cast<wchar_t>(u4));
else {
const Uint32 char20 = u4 - bit17;
assert(char20 < (1 << 20));
const ucs4::char_t lead = 0xD800 + (char20 >> 10);
const ucs4::char_t trail = 0xDC00 + (char20 & 0x3FF);
assert(lead < bit17);
assert(trail < bit17);
res.push_back(static_cast<wchar_t>(lead));
res.push_back(static_cast<wchar_t>(trail));
}
}
return res;
}
} // implementation namespace
namespace utf16 {
ucs4::char_t iterator_implementation::get_next_char(utf16::string::const_iterator& start, const utf16::string::const_iterator& end)
{
const ucs4::char_t last10 = 0x3FF;
const ucs4::char_t type_filter = 0xFC00;
const ucs4::char_t type_lead = 0xD800;
const ucs4::char_t type_trail = 0xDC00;
assert(start != end);
ucs4::char_t current_char = static_cast<uint16_t>(*start);
++start;
ucs4::char_t type = current_char & type_filter;
if(type == type_trail)
{
//found trail without head
throw utf8::invalid_utf8_exception();
}
else if(type == type_lead)
{
if(start == end)
{
//If the string ends occurs within an UTF16-sequence, this is bad.
throw utf8::invalid_utf8_exception();
}
if((*start & type_filter) != type_trail)
{
throw utf8::invalid_utf8_exception();
}
current_char &= last10;
current_char <<= 10;
current_char += (*start & last10);
current_char += 0x10000;
++start;
}
return current_char;
}
}
namespace utf8 {
static int byte_size_from_utf8_first(const unsigned char ch)
@ -242,40 +51,6 @@ static int byte_size_from_utf8_first(const unsigned char ch)
return count;
}
ucs4::char_t iterator_implementation::get_next_char(std::string::const_iterator& start, const std::string::const_iterator& string_end)
{
assert(start != string_end);
size_t size = byte_size_from_utf8_first(*start);
uint32_t current_char = static_cast<unsigned char>(*start);
// Convert the first character
if(size != 1) {
current_char &= 0xFF >> (size + 1);
}
// Convert the continuation bytes
// i == number of '++start'
++start;
for(size_t i = 1; i < size; ++i, ++start) {
// If the string ends occurs within an UTF8-sequence, this is bad.
if (start == string_end)
throw invalid_utf8_exception();
if ((*start & 0xC0) != 0x80)
throw invalid_utf8_exception();
current_char = (current_char << 6) | (static_cast<unsigned char>(*start) & 0x3F);
}
//i == size => start was increased size times.
// Check for non-shortest-form encoding
// This has been forbidden in Unicode 3.1 for security reasons
if (size > ::byte_size_from_ucs4_codepoint(current_char))
throw invalid_utf8_exception();
return current_char;
}
utf8::string lowercase(const utf8::string& s)
{
if(!s.empty()) {

View file

@ -17,32 +17,21 @@
#define SERIALIZATION_UNICODE_HPP_INCLUDED
#include "ucs4_iterator_base.hpp"
#include "unicode_types.hpp"
#include "ucs4_convert_impl.hpp"
#include "unicode_cast.hpp"
#include <boost/static_assert.hpp>
#include <string>
#include <vector>
#include <SDL_types.h>
namespace ucs4 {
typedef Uint32 char_t;
typedef std::vector<char_t> string;
}
namespace utf8 {
typedef std::string string;
}
/**
* For win32 API.
* On windows, wchar_t is defined as Uint16
* Wide strings are expected to be UTF-16
*/
namespace utf16 {
typedef std::vector<wchar_t> string;
struct iterator_implementation
{
static ucs4::char_t get_next_char(utf16::string::const_iterator& start, const utf16::string::const_iterator& end);
};
typedef ucs4::iterator_base<utf16::string, iterator_implementation> iterator;
typedef ucs4::iterator_base<utf16::string, ucs4_convert_impl::convert_impl<char_t>::type> iterator;
}
namespace utf8 {
@ -51,14 +40,7 @@ namespace utf8 {
* Functions for converting Unicode wide-char strings to UTF-8 encoded strings,
* back and forth.
*/
/** also used for invalid utf16 or ucs4 strings */
class invalid_utf8_exception : public std::exception {};
struct iterator_implementation
{
static ucs4::char_t get_next_char(std::string::const_iterator& start, const std::string::const_iterator& end);
};
typedef ucs4::iterator_base<std::string, iterator_implementation> iterator;
typedef ucs4::iterator_base<std::string, ucs4_convert_impl::convert_impl<char_t>::type> iterator;
/** Returns a lowercased version of the string. */
utf8::string lowercase(const utf8::string&);
@ -103,55 +85,4 @@ namespace utf8 {
void truncate_as_ucs4(utf8::string& str, const size_t size);
} // end namespace utf8
namespace implementation {
std::string ucs4string_to_string(const ucs4::string &);
ucs4::string string_to_ucs4string(const std::string &);
std::string ucs4char_to_string(const ucs4::char_t);
ucs4::string utf16string_to_ucs4string(const utf16::string &);
std::string utf16string_to_string(const utf16::string &);
utf16::string ucs4string_to_utf16string(const ucs4::string &);
} // end namespace implementation
template <typename To, typename From> inline
To unicode_cast(const From &) {
BOOST_STATIC_ASSERT(sizeof(To) == 0);
return To();
}
template <> inline
utf8::string unicode_cast<utf8::string, ucs4::string>(const ucs4::string &in) {
return implementation::ucs4string_to_string(in);
}
template <> inline
ucs4::string unicode_cast<ucs4::string, utf8::string>(const utf8::string &in) {
return implementation::string_to_ucs4string(in);
}
template <> inline
utf8::string unicode_cast<utf8::string, ucs4::char_t>(const ucs4::char_t &in) {
return implementation::ucs4char_to_string(in);
}
template <> inline
utf16::string unicode_cast<utf16::string, ucs4::string>(const ucs4::string &in) {
return implementation::ucs4string_to_utf16string(in);
}
template <> inline
utf16::string unicode_cast<utf16::string, utf8::string>(const utf8::string &in) {
const ucs4::string u4str = unicode_cast<ucs4::string>(in);
return unicode_cast<utf16::string>(u4str);
}
template <> inline
ucs4::string unicode_cast<ucs4::string, utf16::string>(const utf16::string &in) {
return implementation::utf16string_to_ucs4string(in);
}
template <> inline
std::string unicode_cast<std::string, utf16::string>(const utf16::string &in) {
return implementation::utf16string_to_string(in);
}
#endif

View file

@ -0,0 +1,118 @@
/*
Copyright (C) 2003 - 2014 by David White <dave@whitevine.net>
Part of the Battle for Wesnoth Project http://www.wesnoth.org/
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY.
See the COPYING file for more details.
*/
#ifndef SERIALIZATION_UNICODE_CAST_HPP_INCLUDED
#define SERIALIZATION_UNICODE_CAST_HPP_INCLUDED
#include "ucs4_convert_impl.hpp"
#include <iostream> //for std::cerr
#include <iterator>
#include <boost/utility/enable_if.hpp>
#include <boost/type_traits/is_arithmetic.hpp>
namespace ucs4_convert_impl
{
//transforms an outputiterator to a writer for ucs4_convert_impl functions.
template<typename oitor_t>
struct iteratorwriter
{
oitor_t& out_;
iteratorwriter(oitor_t& out) : out_(out) {};
bool can_push(size_t /*count*/)
{
return true;
}
template<typename value_type>
void push(value_type val)
{
*out_++ = val;
}
};
template<typename Tret, typename Tcheck>
struct enableif
{
typedef Tcheck ignore;
typedef Tret type;
};
}
/**
@param TD
output, a collection type.
@param TS
input, a collection type.
@return an instance of TD
*/
template<typename TD , typename TS>
typename ucs4_convert_impl::enableif<TD, typename TS::value_type>::type unicode_cast(const TS& source)
//TD unicode_cast(const TS& source)
{
using namespace ucs4_convert_impl;
typedef typename convert_impl<typename TD::value_type>::type t_impl_writer;
typedef typename convert_impl<typename TS::value_type>::type t_impl_reader;
typedef typename std::back_insert_iterator<TD> t_outputitor;
typedef typename TS::const_iterator t_inputitor;
TD res;
try
{
t_outputitor inserter(res);
iteratorwriter<t_outputitor> dst(inserter);
t_inputitor i1 = source.begin();
t_inputitor i2 = source.end();
while(i1 != i2) {
t_impl_writer::write (dst, t_impl_reader::read(i1, i2));
}
}
catch(utf8::invalid_utf8_exception&)
{
///TODO: use a ERR_.. stream but i dont know whether i can so to in header easily.
std::cerr << "Failed to convert a string from " << t_impl_reader::get_name() << " to " << t_impl_writer::get_name() << "\n";
return res;
}
return res;
}
/**
@param TD
output, a collection type.
@param TI
input, a single character.
@return an instance of TD
*/
template<typename TD>
TD unicode_cast(ucs4::char_t onechar)
{
using namespace ucs4_convert_impl;
typedef typename convert_impl<typename TD::value_type>::type t_impl_writer;
typedef convert_impl<ucs4::char_t>::type t_impl_reader;
typedef typename std::back_insert_iterator<TD> t_outputitor;
TD res;
try
{
t_outputitor inserter(res);
iteratorwriter<t_outputitor> dst(inserter);
t_impl_writer::write (dst, onechar);
}
catch(utf8::invalid_utf8_exception&)
{
///TODO: use a ERR_.. stream but i dont know whether i can so to in header easily.
std::cerr << "Failed to convert a string from " << t_impl_reader::get_name() << " to " << t_impl_writer::get_name() << "\n";
return res;
}
return res;
}
#endif

View file

@ -0,0 +1,46 @@
/*
Copyright (C) 2003 - 2014 by David White <dave@whitevine.net>
Part of the Battle for Wesnoth Project http://www.wesnoth.org/
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY.
See the COPYING file for more details.
*/
#ifndef SERIALIZATION_UNICODE_TYPES_HPP_INCLUDED
#define SERIALIZATION_UNICODE_TYPES_HPP_INCLUDED
#include <string>
#include <vector>
#include <exception>
#include <stdint.h>
namespace ucs4 {
typedef uint32_t char_t;
typedef std::vector<char_t> string;
}
namespace utf8 {
typedef char char_t;
typedef std::string string;
/** also used for invalid utf16 or ucs4 strings */
class invalid_utf8_exception : public std::exception {};
}
/**
* For win32 API.
* On windows, wchar_t is defined as Uint16
* Wide strings are expected to be UTF-16
*/
namespace utf16 {
typedef wchar_t char_t;
typedef std::vector<char_t> string;
}
#endif

View file

@ -16,14 +16,15 @@
#include "commandline_options.hpp"
#include <boost/test/unit_test.hpp>
#include <boost/assign.hpp>
BOOST_AUTO_TEST_SUITE( cmdline_opts )
BOOST_AUTO_TEST_CASE (test_empty_options)
{
const char *argv[] = {"wesnoth"};
const int argc = sizeof(argv)/sizeof(const char *);
commandline_options co(argc,const_cast<char**>(argv));
std::vector<std::string> args = boost::assign::list_of("wesnoth");
commandline_options co(args);
BOOST_CHECK(!co.bpp);
BOOST_CHECK(!co.campaign);
@ -96,19 +97,16 @@ BOOST_AUTO_TEST_CASE (test_empty_options)
BOOST_AUTO_TEST_CASE (test_default_options)
{
const char *argv[] =
{
"wesnoth",
"--campaign",
"--editor",
"--logdomains",
"--preprocess-output-macros",
"--server",
"--test"
};
const int argc = sizeof(argv)/sizeof(const char *);
commandline_options co(argc,const_cast<char**>(argv));
std::vector<std::string> args = boost::assign::list_of
("wesnoth")
("--campaign")
("--editor")
("--logdomains")
("--preprocess-output-macros")
("--server")
("--test");
commandline_options co(args);
BOOST_CHECK(!co.bpp);
BOOST_CHECK(co.campaign && co.campaign->empty());
BOOST_CHECK(!co.campaign_difficulty);
@ -180,83 +178,81 @@ BOOST_AUTO_TEST_CASE (test_default_options)
BOOST_AUTO_TEST_CASE (test_full_options)
{
const char *argv[] =
{
"wesnoth",
"--ai-config=1:aifoo",
"--ai-config=2:aibar",
"--algorithm=3:algfoo",
"--algorithm=4:algbar",
"--bpp=32",
"--campaign=campfoo",
"--campaign-difficulty=16",
"--campaign-scenario=scenfoo",
"--clock",
"--controller=5:confoo",
"--controller=6:conbar",
"--data-dir=datadirfoo",
"--data-path",
"--debug",
std::vector<std::string> args = boost::assign::list_of
("wesnoth")
("--ai-config=1:aifoo")
("--ai-config=2:aibar")
("--algorithm=3:algfoo")
("--algorithm=4:algbar")
("--bpp=32")
("--campaign=campfoo")
("--campaign-difficulty=16")
("--campaign-scenario=scenfoo")
("--clock")
("--controller=5:confoo")
("--controller=6:conbar")
("--data-dir=datadirfoo")
("--data-path")
("--debug")
#ifdef DEBUG_WINDOW_LAYOUT_GRAPHS
"--debug-dot-domain=ddfoo",
"--debug-dot-level=dlfoo",
("--debug-dot-domain=ddfoo")
("--debug-dot-level=dlfoo")
#endif
"--editor=editfoo",
"--era=erafoo",
"--exit-at-end",
"--fps",
"--fullscreen",
"--gunzip=gunzipfoo.gz",
"--gzip=gzipfoo",
"--help",
"--ignore-map-settings",
"--label=labelfoo",
"--load=loadfoo",
"--log-error=errfoo,errbar/*",
"--log-warning=warnfoo,warnfoo/bar",
"--log-info=infofoo",
"--log-debug=dbgfoo,dbgbar,dbg/foo/bar/baz",
"--logdomains=filterfoo",
"--max-fps=100",
"--multiplayer",
"--new-widgets",
"--nocache",
"--nodelay",
"--nomusic",
"--nosound",
"--nogui",
"--parm=7:parmfoo:valfoo",
"--parm=8:parmbar:valbar",
"--path",
"--preprocess", "preppathfoo", "preptargfoo",
"--preprocess-defines=DEFFOO,DEFBAR",
"--preprocess-input-macros=inmfoo",
"--preprocess-output-macros=outmfoo",
"--proxy",
"--proxy-address=addressfoo",
"--proxy-password=passfoo",
"--proxy-port=portfoo",
"--proxy-user=userfoo",
"--resolution=800x600",
"--rng-seed=1234",
"--scenario=scenfoo",
"--screenshot", "mapfoo", "outssfoo",
"--side=9:sidefoo",
"--side=10:sidebar",
"--server=servfoo",
"--test=testfoo",
"--turns=42",
"--userconfig-dir=userconfigdirfoo",
"--userconfig-path",
"--userdata-dir=userdatadirfoo",
"--userdata-path",
"--validcache",
"--version",
"--windowed",
"--with-replay"
};
const int argc = sizeof(argv)/sizeof(const char *);
commandline_options co(argc,const_cast<char**>(argv));
("--editor=editfoo")
("--era=erafoo")
("--exit-at-end")
("--fps")
("--fullscreen")
("--gunzip=gunzipfoo.gz")
("--gzip=gzipfoo")
("--help")
("--ignore-map-settings")
("--label=labelfoo")
("--load=loadfoo")
("--log-error=errfoo,errbar/*")
("--log-warning=warnfoo,warnfoo/bar")
("--log-info=infofoo")
("--log-debug=dbgfoo,dbgbar,dbg/foo/bar/baz")
("--logdomains=filterfoo")
("--max-fps=100")
("--multiplayer")
("--new-widgets")
("--nocache")
("--nodelay")
("--nomusic")
("--nosound")
("--nogui")
("--parm=7:parmfoo:valfoo")
("--parm=8:parmbar:valbar")
("--path")
("--preprocess") ("preppathfoo") ("preptargfoo")
("--preprocess-defines=DEFFOO,DEFBAR")
("--preprocess-input-macros=inmfoo")
("--preprocess-output-macros=outmfoo")
("--proxy")
("--proxy-address=addressfoo")
("--proxy-password=passfoo")
("--proxy-port=portfoo")
("--proxy-user=userfoo")
("--resolution=800x600")
("--rng-seed=1234")
("--scenario=scenfoo")
("--screenshot") ("mapfoo") ("outssfoo")
("--side=9:sidefoo")
("--side=10:sidebar")
("--server=servfoo")
("--test=testfoo")
("--turns=42")
("--userconfig-dir=userconfigdirfoo")
("--userconfig-path")
("--userdata-dir=userdatadirfoo")
("--userdata-path")
("--validcache")
("--version")
("--windowed")
("--with-replay");
commandline_options co(args);
BOOST_CHECK(co.bpp && *co.bpp == 32);
BOOST_CHECK(co.campaign && *co.campaign == "campfoo");
@ -350,13 +346,11 @@ BOOST_AUTO_TEST_CASE (test_full_options)
BOOST_AUTO_TEST_CASE (test_positional_options)
{
const char *argv[] =
{
"wesnoth",
"datadirfoo"
};
const int argc = sizeof(argv)/sizeof(const char *);
commandline_options co(argc,const_cast<char**>(argv));
std::vector<std::string> args = boost::assign::list_of
("wesnoth")
("datadirfoo");
commandline_options co(args);
BOOST_CHECK(!co.bpp);
BOOST_CHECK(!co.campaign);

View file

@ -25,6 +25,7 @@
#include <boost/foreach.hpp>
#include <boost/scoped_ptr.hpp>
#include <boost/assign.hpp>
/* Definitions */
@ -61,8 +62,8 @@ boost::scoped_ptr<saved_game> state;
struct mp_connect_fixture {
mp_connect_fixture() :
video(),
dummy_argv(),
cmdline_opts(1, dummy_argv),
dummy_args(boost::assign::list_of("wesnoth").convert_to_container<std::vector<std::string> >()),
cmdline_opts(dummy_args),
hotkey_manager(),
config_manager()
{
@ -91,7 +92,7 @@ struct mp_connect_fixture {
{
}
CVideo video;
char** dummy_argv;
std::vector<std::string> dummy_args;
commandline_options cmdline_opts;
hotkey::manager hotkey_manager;
boost::scoped_ptr<game_config_manager> config_manager;

View file

@ -22,6 +22,7 @@
#include "gui/lib/types/point.hpp"
#include "font.hpp"
#include "serialization/string_utils.hpp"
#include "serialization/unicode.hpp"
#include "tstring.hpp"
#include <boost/foreach.hpp>

View file

@ -16,7 +16,7 @@
#define TEXT_HPP_INCLUDED
#include "sdl/utils.hpp"
#include "serialization/unicode.hpp"
#include "serialization/unicode_types.hpp"
#include <boost/noncopyable.hpp>

View file

@ -241,6 +241,8 @@ inline std::size_t bit_width() {
*/
template<typename T>
inline std::size_t bit_width(const T& x) {
//msvc 2010 gives an unused parameter warning otherwise
(void)x;
return sizeof(x) * std::numeric_limits<unsigned char>::digits;
}

View file

@ -85,6 +85,10 @@
#include "SDL_stdinc.h" // for SDL_putenv, Uint32
#include "SDL_timer.h" // for SDL_GetTicks
#ifdef _WIN32
#include <windows.h>
#endif
#ifdef DEBUG_WINDOW_LAYOUT_GRAPHS
#include "gui/widgets/debug.hpp"
#endif
@ -507,19 +511,19 @@ static void warn_early_init_failure()
* Setups the game environment and enters
* the titlescreen or game loops.
*/
static int do_gameloop(int argc, char** argv)
static int do_gameloop(const std::vector<std::string>& args)
{
srand(time(NULL));
commandline_options cmdline_opts = commandline_options(argc,argv);
game_config::wesnoth_program_dir = filesystem::directory_name(argv[0]);
commandline_options cmdline_opts = commandline_options(args);
game_config::wesnoth_program_dir = filesystem::directory_name(args[0]);
int finished = process_command_args(cmdline_opts);
if(finished != -1) {
return finished;
}
boost::scoped_ptr<game_launcher> game(
new game_launcher(cmdline_opts,argv[0]));
new game_launcher(cmdline_opts,args[0].c_str()));
const int start_ticks = SDL_GetTicks();
init_locale();
@ -781,6 +785,57 @@ static int do_gameloop(int argc, char** argv)
}
}
}
#ifdef _WIN32
static bool parse_commandline_argument(const char*& next, const char* end, std::string& res)
{
//strip leading shitespace
while(next != end && *next == ' ')
++next;
if(next == end)
return false;
bool is_excaped = false;
for(;next != end; ++next)
{
if(*next == ' ' && !is_excaped) {
break;
}
else if(*next == '"' && !is_excaped) {
is_excaped = true;
continue;
}
else if(*next == '"' && is_excaped && next + 1 != end && *(next + 1) == '"') {
res.push_back('"');
++next;
continue;
}
else if(*next == '"' && is_excaped ) {
is_excaped = false;
continue;
}
else {
res.push_back(*next);
}
}
return true;
}
static std::vector<std::string> parse_commandline_arguments(std::string input)
{
const char* start = &input[0];
const char* end = start + input.size();
std::string buffer;
std::vector<std::string> res;
while(parse_commandline_argument(start, end, buffer))
{
res.push_back(std::string());
res.back().swap(buffer);
}
return res;
}
#endif
#ifdef __native_client__
@ -812,7 +867,19 @@ int main(int argc, char** argv)
execv(argv[0], argv);
}
#endif
#ifdef _WIN32
(void)argc;
(void)argv;
//windows argv is ansi encoded by default
std::vector<std::string> args = parse_commandline_arguments(unicode_cast<std::string>(std::wstring(GetCommandLineW())));
#else
std::vector<std::string> args;
for(int i = 0; i < argc; ++i)
{
args.push_back(std::string(argv[i]));
}
#endif
assert(!args.empty());
if(SDL_Init(SDL_INIT_TIMER) < 0) {
fprintf(stderr, "Couldn't initialize SDL: %s\n", SDL_GetError());
return(1);
@ -847,7 +914,7 @@ int main(int argc, char** argv)
}
}
const int res = do_gameloop(argc,argv);
const int res = do_gameloop(args);
safe_exit(res);
} catch(boost::program_options::error& e) {
std::cerr << "Error in command line: " << e.what() << '\n';