Convert custom unicode type aliases to proper types (available as of C++11)

This changes:
utf8::char_t   ->  char
utf8::string   ->  std::string
utf16::char_t  ->  char16_t
utf16::string  ->  std::u16string
ucs4::char_t   ->  char32_t
ucs4::string   ->  std::u32string

utf16::string and ucs4::string are now proper strings instead of vectors of characters too.

In order to get this change to compile at all, I needed to add a ucs4_convert_impl::convert_impl
specialization for wchar_t alongside the new char16_t specialization; both point to the same
conversion implementation type.

This commit doesn't do any additional cleanup. I'm sure if we looked, we could get rid of a
lot of the custom conversion code and probably a bunch of stuff that might have had to do with
supporting utf16::string and ucs4::string being vectors instead of basic_string specializations.
Either way, I don't know the code (or encoding handling in general) to make a call as to what's
needed or not. I'll let someone else do that.
This commit is contained in:
Charles Dang 2018-04-05 00:45:18 +11:00 committed by Jyrki Vesterinen
parent cc78bb3b1c
commit 1deacd89f6
30 changed files with 152 additions and 174 deletions

View file

@ -28,6 +28,7 @@
#include "random.hpp"
#include "serialization/parser.hpp"
#include "serialization/string_utils.hpp"
#include "serialization/utf8_exception.hpp"
#include <stdexcept>

View file

@ -60,7 +60,7 @@ namespace {
struct addon_filename_ucs4char_illegal
{
inline bool operator()(ucs4::char_t c) const
inline bool operator()(char32_t c) const
{
switch(c){
case ' ':
@ -118,8 +118,8 @@ bool addon_filename_legal(const std::string& name)
return false;
}
const ucs4::string name_ucs4 = unicode_cast<ucs4::string>(name);
const std::string name_utf8 = unicode_cast<utf8::string>(name_ucs4);
const std::u32string name_ucs4 = unicode_cast<std::u32string>(name);
const std::string name_utf8 = unicode_cast<std::string>(name_ucs4);
if(name != name_utf8){ // name is invalid UTF-8
return false;
}

View file

@ -198,7 +198,7 @@ void windows_tray_notification::switch_to_wesnoth_window()
std::wstring windows_tray_notification::string_to_wstring(const std::string& string, std::size_t maxlength)
{
utf16::string u16_string = unicode_cast<utf16::string>(string);
std::u16string u16_string = unicode_cast<std::u16string>(string);
if(u16_string.size() > maxlength) {
if((u16_string[maxlength-1] & 0xDC00) == 0xD800)
u16_string.resize(maxlength - 1);

View file

@ -22,7 +22,7 @@
namespace font
{
bool is_cjk_char(const ucs4::char_t ch)
bool is_cjk_char(const char32_t ch)
{
/**
* You can check these range at http://unicode.org/charts/
@ -96,7 +96,7 @@ namespace {
* CJK (CJK punctuations)
* http://www.unicode.org/charts/PDF/U3000.pdf
*/
inline bool no_break_after(const ucs4::char_t ch)
inline bool no_break_after(const char32_t ch)
{
return
/**
@ -117,7 +117,7 @@ inline bool no_break_after(const ucs4::char_t ch)
ch == 0x3016 || ch == 0x301a || ch == 0x301d;
}
inline bool no_break_before(const ucs4::char_t ch)
inline bool no_break_before(const char32_t ch)
{
return
/**
@ -154,7 +154,7 @@ inline bool no_break_before(const ucs4::char_t ch)
ch == 0x301b || ch == 0x301e;
}
inline bool break_before(const ucs4::char_t ch)
inline bool break_before(const char32_t ch)
{
if(no_break_before(ch))
return false;
@ -162,7 +162,7 @@ inline bool break_before(const ucs4::char_t ch)
return is_cjk_char(ch);
}
inline bool break_after(const ucs4::char_t ch)
inline bool break_after(const char32_t ch)
{
if(no_break_after(ch))
return false;

View file

@ -16,15 +16,13 @@
#pragma once
#include "serialization/unicode_types.hpp"
namespace font {
/**
* Determine if a ucs4::char_t is a CJK character
* Determine if a char32_t is a CJK character
*
* @retval true Input-char is a CJK char
* @retval false Input-char is a not CJK char.
*/
bool is_cjk_char(const ucs4::char_t ch);
bool is_cjk_char(const char32_t ch);
} // end namespace font

View file

@ -139,21 +139,21 @@ unsigned pango_text::insert_text(const unsigned offset, const std::string& text)
if (length_ + len > maximum_length_) {
len = maximum_length_ - length_;
}
const utf8::string insert = text.substr(0, utf8::index(text, len));
utf8::string tmp = text_;
const std::string insert = text.substr(0, utf8::index(text, len));
std::string tmp = text_;
this->set_text(utf8::insert(tmp, offset, insert), false);
// report back how many characters were actually inserted (e.g. to move the cursor selection)
return len;
}
bool pango_text::insert_unicode(const unsigned offset, ucs4::char_t unicode)
bool pango_text::insert_unicode(const unsigned offset, char32_t unicode)
{
return this->insert_unicode(offset, ucs4::string(1, unicode)) == 1;
return this->insert_unicode(offset, std::u32string(1, unicode)) == 1;
}
unsigned pango_text::insert_unicode(const unsigned offset, const ucs4::string& unicode)
unsigned pango_text::insert_unicode(const unsigned offset, const std::u32string& unicode)
{
const utf8::string insert = unicode_cast<utf8::string>(unicode);
const std::string insert = unicode_cast<std::string>(unicode);
return this->insert_text(offset, insert);
}
@ -296,8 +296,8 @@ bool pango_text::set_text(const std::string& text, const bool markedup)
layout_.reset(pango_layout_new(context_.get()));
}
const ucs4::string wide = unicode_cast<ucs4::string>(text);
const std::string narrow = unicode_cast<utf8::string>(wide);
const std::u32string wide = unicode_cast<std::u32string>(text);
const std::string narrow = unicode_cast<std::string>(wide);
if(text != narrow) {
ERR_GUI_L << "pango_text::" << __func__
<< " text '" << text
@ -446,7 +446,7 @@ pango_text& pango_text::set_maximum_length(const std::size_t maximum_length)
if(maximum_length != maximum_length_) {
maximum_length_ = maximum_length;
if(length_ > maximum_length_) {
utf8::string tmp = text_;
std::string tmp = text_;
this->set_text(utf8::truncate(tmp, maximum_length_), false);
}
}

View file

@ -19,7 +19,6 @@
#include "sdl/surface.hpp"
#include "sdl/texture.hpp"
#include "serialization/string_utils.hpp"
#include "serialization/unicode_types.hpp"
#include <pango/pango.h>
#include <pango/pangocairo.h>
@ -129,7 +128,7 @@ public:
*
* @returns True upon success, false otherwise.
*/
bool insert_unicode(const unsigned offset, ucs4::char_t unicode);
bool insert_unicode(const unsigned offset, char32_t unicode);
/**
* Inserts unicode text.
@ -140,7 +139,7 @@ public:
* @returns The number of characters inserted.
*/
unsigned insert_unicode(
const unsigned offset, const ucs4::string& unicode);
const unsigned offset, const std::u32string& unicode);
/***** ***** ***** ***** Font flags ***** ***** ***** *****/

View file

@ -124,7 +124,7 @@ bool dispatcher::fire(const ui_event event,
widget& target,
const SDL_Keycode key,
const SDL_Keymod modifier,
const utf8::string& unicode)
const std::string& unicode)
{
assert(is_keyboard_event(event));
return fire_event<signal_keyboard_function>(event, this, &target, key, modifier, unicode);

View file

@ -16,7 +16,6 @@
#include "gui/core/event/handler.hpp"
#include "hotkey/hotkey_command.hpp"
#include "serialization/unicode_types.hpp"
#include "utils/functional.hpp"
#include <SDL_events.h>
@ -199,7 +198,7 @@ typedef std::function<void(widget& dispatcher,
bool& halt,
const SDL_Keycode key,
const SDL_Keymod modifier,
const utf8::string& unicode)> signal_keyboard_function;
const std::string& unicode)> signal_keyboard_function;
/**
* Callback function signature.
@ -347,7 +346,7 @@ public:
widget& target,
const SDL_Keycode key,
const SDL_Keymod modifier,
const utf8::string& unicode);
const std::string& unicode);
/**
* Fires an event which takes touch parameters.

View file

@ -708,17 +708,17 @@ void distributor::signal_handler_keyboard_internal(event::ui_event evt, P1&& p1,
}
}
void distributor::signal_handler_sdl_key_down(const SDL_Keycode key, const SDL_Keymod modifier, const utf8::string& unicode)
void distributor::signal_handler_sdl_key_down(const SDL_Keycode key, const SDL_Keymod modifier, const std::string& unicode)
{
signal_handler_keyboard_internal<signal_keyboard_function>(event::SDL_KEY_DOWN, key, modifier, unicode);
}
void distributor::signal_handler_sdl_text_input(const utf8::string& unicode, int32_t start, int32_t end)
void distributor::signal_handler_sdl_text_input(const std::string& unicode, int32_t start, int32_t end)
{
signal_handler_keyboard_internal<signal_text_input_function>(event::SDL_TEXT_INPUT, unicode, start, end);
}
void distributor::signal_handler_sdl_text_editing(const utf8::string& unicode, int32_t start, int32_t end)
void distributor::signal_handler_sdl_text_editing(const std::string& unicode, int32_t start, int32_t end)
{
signal_handler_keyboard_internal<signal_text_input_function>(event::SDL_TEXT_EDITING, unicode, start, end);
}

View file

@ -40,7 +40,6 @@
#include "gui/core/event/dispatcher.hpp"
#include "gui/core/event/handler.hpp"
#include "sdl/point.hpp"
#include "serialization/unicode_types.hpp"
#include "video.hpp"
#include <string>
@ -354,10 +353,10 @@ private:
void signal_handler_sdl_key_down(const SDL_Keycode key,
const SDL_Keymod modifier,
const utf8::string& unicode);
const std::string& unicode);
void signal_handler_sdl_text_input(const utf8::string& unicode, int32_t start, int32_t len);
void signal_handler_sdl_text_editing(const utf8::string& unicode, int32_t start, int32_t len);
void signal_handler_sdl_text_input(const std::string& unicode, int32_t start, int32_t len);
void signal_handler_sdl_text_editing(const std::string& unicode, int32_t start, int32_t len);
template<typename Fcn, typename P1, typename P2, typename P3>
void signal_handler_keyboard_internal(event::ui_event evt, P1&& p1, P2&& p2, P3&& p3);

View file

@ -240,7 +240,7 @@ private:
*/
void key_down(const SDL_Keycode key,
const SDL_Keymod modifier,
const utf8::string& unicode);
const std::string& unicode);
/**
* Fires a text input event.
@ -696,7 +696,7 @@ bool sdl_event_handler::hotkey_pressed(const hotkey::hotkey_ptr key)
void sdl_event_handler::key_down(const SDL_Keycode key,
const SDL_Keymod modifier,
const utf8::string& unicode)
const std::string& unicode)
{
DBG_GUI_E << "Firing: " << SDL_KEY_DOWN << ".\n";

View file

@ -46,7 +46,7 @@ void password_box::set_value(const std::string& text)
{
real_value_ = text;
std::size_t sz = utf8::size(text);
utf8::string passwd;
std::string passwd;
for(std::size_t i = 0; i < sz; i++) {
passwd.append(font::unicode_bullet);
}
@ -70,7 +70,7 @@ void password_box::delete_selection()
set_cursor(start, false);
}
void password_box::insert_char(const utf8::string& unicode)
void password_box::insert_char(const std::string& unicode)
{
int len = get_selection_length();
unsigned sel = get_selection_start();
@ -83,7 +83,7 @@ void password_box::insert_char(const utf8::string& unicode)
if(sz == 1) {
text_box::insert_char(font::unicode_bullet);
} else {
utf8::string passwd;
std::string passwd;
for(std::size_t i = 0; i < sz; i++) {
passwd.append(font::unicode_bullet);
}

View file

@ -49,7 +49,7 @@ public:
protected:
void insert_char(const utf8::string& unicode) override;
void insert_char(const std::string& unicode) override;
void paste_selection(const bool mouse) override;
void delete_selection() override;

View file

@ -241,7 +241,7 @@ void text_box::delete_selection()
start -= len;
}
utf8::string tmp = get_value();
std::string tmp = get_value();
set_value(utf8::erase(tmp, start, len));
set_cursor(start, false);
}

View file

@ -146,7 +146,7 @@ void text_box_base::set_cursor(const std::size_t offset, const bool select)
}
}
void text_box_base::insert_char(const utf8::string& unicode)
void text_box_base::insert_char(const std::string& unicode)
{
delete_selection();
@ -188,7 +188,7 @@ void text_box_base::copy_selection(const bool mouse)
}
unsigned end, start = selection_start_;
const utf8::string txt = text_.text();
const std::string txt = text_.text();
if(selection_length_ > 0) {
end = utf8::index(txt, start + selection_length_);
@ -411,7 +411,7 @@ void text_box_base::handle_key_delete(SDL_Keymod /*modifier*/, bool& handled)
fire(event::NOTIFY_MODIFIED, *this, nullptr);
}
void text_box_base::handle_commit(bool& handled, const utf8::string& unicode)
void text_box_base::handle_commit(bool& handled, const std::string& unicode)
{
DBG_GUI_E << LOG_SCOPE_HEADER << '\n';
@ -431,7 +431,7 @@ void text_box_base::handle_commit(bool& handled, const utf8::string& unicode)
}
}
void text_box_base::handle_editing(bool& handled, const utf8::string& unicode, int32_t start)
void text_box_base::handle_editing(bool& handled, const std::string& unicode, int32_t start)
{
if(unicode.size() > 1 || unicode[0] != 0) {
handled = true;

View file

@ -185,7 +185,7 @@ protected:
*
* @param unicode The unicode value of the character to insert.
*/
virtual void insert_char(const utf8::string& unicode);
virtual void insert_char(const std::string& unicode);
/**
* Deletes the character.
@ -481,9 +481,9 @@ private:
protected:
virtual void handle_commit(bool& handled,
const utf8::string& unicode);
const std::string& unicode);
virtual void handle_editing(bool& handled,
const utf8::string& unicode,
const std::string& unicode,
int32_t start);
private:
@ -510,7 +510,7 @@ private:
void signal_handler_sdl_text_input(const event::ui_event event,
bool& handled,
const utf8::string& unicode,
const std::string& unicode,
int32_t start,
int32_t len);

View file

@ -305,7 +305,7 @@ bool hotkey_keyboard::matches_helper(const SDL_Event &event) const
if(text == ":" || text == "`") {
mods = mods & ~KMOD_SHIFT;
}
return text_ == text && utf8::size(utf8::string(event.text.text)) == 1 && mods == mod_;
return text_ == text && utf8::size(std::string(event.text.text)) == 1 && mods == mod_;
}
return false;

View file

@ -79,7 +79,7 @@ static std::string get_system_username()
if(GetUserNameW(buffer, &size)) {
//size includes a terminating null character.
assert(size > 0);
res = unicode_cast<utf8::string>(boost::iterator_range<wchar_t*>(buffer, buffer + size - 1));
res = unicode_cast<std::string>(boost::iterator_range<wchar_t*>(buffer, buffer + size - 1));
}
#else
if(char* const login = getenv("USER")) {

View file

@ -41,6 +41,7 @@
#include "saved_game.hpp"
#include "serialization/binary_or_text.hpp"
#include "serialization/parser.hpp"
#include "serialization/utf8_exception.hpp"
#include "statistics.hpp"
#include "version.hpp"
#include "video.hpp"

View file

@ -14,7 +14,7 @@
#pragma once
#include "unicode_types.hpp"
#include "utf8_exception.hpp"
#include "utils/math.hpp"
#include <cassert>
@ -23,7 +23,7 @@ namespace ucs4_convert_impl
struct utf8_impl
{
static const char* get_name() { return "utf8"; }
static std::size_t byte_size_from_ucs4_codepoint(ucs4::char_t ch)
static std::size_t byte_size_from_ucs4_codepoint(char32_t ch)
{
if(ch < (1u << 7))
return 1;
@ -41,7 +41,7 @@ namespace ucs4_convert_impl
throw utf8::invalid_utf8_exception(); // Invalid UCS-4
}
static int byte_size_from_utf8_first(utf8::char_t ch)
static int byte_size_from_utf8_first(char ch)
{
if (!(ch & 0x80)) {
return 1; // US-ASCII character, 1 byte
@ -59,19 +59,19 @@ namespace ucs4_convert_impl
/**
* Writes a UCS-4 character to a UTF-8 stream.
*
* @param out An object to write utf8::char_t. Required operations:
* 1) push(utf8::char_t) to write a single character
* @param out An object to write char. Required operations:
* 1) push(char) to write a single character
* 2) can_push(std::size_t n) to check whether there is still
* enough space for n characters.
* @param ch The UCS-4 character to write to the stream.
*/
template<typename writer>
static inline void write(writer out, ucs4::char_t ch)
static inline void write(writer out, char32_t ch)
{
std::size_t count = byte_size_from_ucs4_codepoint(ch);
assert(out.can_push(count));
if(count == 1) {
out.push(static_cast<utf8::char_t>(ch));
out.push(static_cast<char>(ch));
} else {
for(int j = static_cast<int>(count) - 1; j >= 0; --j) {
unsigned char c = (ch >> (6 * j)) & 0x3f;
@ -92,12 +92,12 @@ namespace ucs4_convert_impl
* to read.
*/
template<typename iitor_t>
static inline ucs4::char_t read(iitor_t& input, const iitor_t& end)
static inline char32_t read(iitor_t& input, const iitor_t& end)
{
assert(input != end);
std::size_t size = byte_size_from_utf8_first(*input);
ucs4::char_t current_char = static_cast<unsigned char>(*input);
char32_t current_char = static_cast<unsigned char>(*input);
// Convert the first character
if(size != 1) {
@ -131,41 +131,41 @@ namespace ucs4_convert_impl
{
static const char* get_name() { return "utf16"; }
template<typename writer>
static inline void write(writer out, ucs4::char_t ch)
static inline void write(writer out, char32_t ch)
{
const ucs4::char_t bit17 = 0x10000;
const char32_t bit17 = 0x10000;
if(ch < bit17)
{
assert(out.can_push(1));
out.push(static_cast<utf16::char_t>(ch));
out.push(static_cast<char16_t>(ch));
}
else
{
assert(out.can_push(2));
const ucs4::char_t char20 = ch - bit17;
const char32_t char20 = ch - bit17;
assert(char20 < (1 << 20));
const ucs4::char_t lead = 0xD800 + (char20 >> 10);
const ucs4::char_t trail = 0xDC00 + (char20 & 0x3FF);
const char32_t lead = 0xD800 + (char20 >> 10);
const char32_t trail = 0xDC00 + (char20 & 0x3FF);
assert(lead < bit17);
assert(trail < bit17);
out.push(static_cast<utf16::char_t>(lead));
out.push(static_cast<utf16::char_t>(trail));
out.push(static_cast<char16_t>(lead));
out.push(static_cast<char16_t>(trail));
}
}
template<typename iitor_t>
static inline ucs4::char_t read(iitor_t& input, const iitor_t& end)
static inline char32_t read(iitor_t& input, const iitor_t& end)
{
const ucs4::char_t last10 = 0x3FF;
const ucs4::char_t type_filter = 0xFC00;
const ucs4::char_t type_lead = 0xD800;
const ucs4::char_t type_trail = 0xDC00;
const char32_t last10 = 0x3FF;
const char32_t type_filter = 0xFC00;
const char32_t type_lead = 0xD800;
const char32_t type_trail = 0xDC00;
assert(input != end);
ucs4::char_t current_char = static_cast<utf16::char_t>(*input);
char32_t current_char = static_cast<char16_t>(*input);
++input;
ucs4::char_t type = current_char & type_filter;
char32_t type = current_char & type_filter;
if(type == type_trail)
{
//found trail without head
@ -196,17 +196,17 @@ namespace ucs4_convert_impl
{
static const char* get_name() { return "UCS4"; }
template<typename writer>
static inline void write(writer out, ucs4::char_t ch)
static inline void write(writer out, char32_t ch)
{
assert(out.can_push(1));
out.push(ch);
}
template<typename iitor_t>
static inline ucs4::char_t read(iitor_t& input, const iitor_t& end)
static inline char32_t read(iitor_t& input, const iitor_t& end)
{
assert(input != end);
ucs4::char_t current_char = *input;
char32_t current_char = *input;
++input;
return current_char;
}
@ -216,19 +216,25 @@ namespace ucs4_convert_impl
struct convert_impl {};
template<>
struct convert_impl<utf8::char_t>
struct convert_impl<char>
{
typedef utf8_impl type;
};
template<>
struct convert_impl<utf16::char_t>
struct convert_impl<char16_t>
{
typedef utf16_impl type;
};
template<>
struct convert_impl<ucs4::char_t>
struct convert_impl<wchar_t>
{
typedef utf16_impl type;
};
template<>
struct convert_impl<char32_t>
{
typedef utf32_impl type;
};

View file

@ -18,8 +18,6 @@
#include <cstddef> //ptrdiff_t
#include <cassert> //assert
#include "unicode_types.hpp"
namespace ucs4
{
template<typename string_type, typename update_implementation>
@ -27,10 +25,10 @@ namespace ucs4
{
public:
typedef std::input_iterator_tag iterator_category;
typedef ucs4::char_t value_type;
typedef char32_t value_type;
typedef ptrdiff_t difference_type;
typedef ucs4::char_t* pointer;
typedef ucs4::char_t& reference;
typedef char32_t* pointer;
typedef char32_t& reference;
iterator_base(const string_type& str)
: current_char(0)
@ -75,7 +73,7 @@ namespace ucs4
return *this;
}
ucs4::char_t operator*() const
char32_t operator*() const
{
return current_char;
}
@ -100,7 +98,7 @@ namespace ucs4
current_char = update_implementation::read(current_substr.second, string_end);
}
ucs4::char_t current_char;
char32_t current_char;
typename string_type::const_iterator string_end;
std::pair<typename string_type::const_iterator, typename string_type::const_iterator> current_substr;
};

View file

@ -48,18 +48,18 @@ static int byte_size_from_utf8_first(const unsigned char ch)
return count;
}
utf8::string lowercase(const utf8::string& s)
std::string lowercase(const std::string& s)
{
if(!s.empty()) {
utf8::iterator itor(s);
utf8::string res;
std::string res;
for(;itor != utf8::iterator::end(s); ++itor) {
ucs4::char_t uchar = *itor;
char32_t uchar = *itor;
// If wchar_t is less than 32 bits wide, we cannot apply towlower() to all codepoints
if(uchar <= static_cast<ucs4::char_t>(std::numeric_limits<wchar_t>::max()))
if(uchar <= static_cast<char32_t>(std::numeric_limits<wchar_t>::max()))
uchar = towlower(static_cast<wchar_t>(uchar));
res += unicode_cast<utf8::string>(uchar);
res += unicode_cast<std::string>(uchar);
}
res.append(itor.substr().second, s.end());
@ -68,7 +68,7 @@ utf8::string lowercase(const utf8::string& s)
return s;
}
std::size_t index(const utf8::string& str, const std::size_t index)
std::size_t index(const std::string& str, const std::size_t index)
{
// chr counts characters, i is the codepoint index
// remark: several functions rely on the fallback to str.length()
@ -83,7 +83,7 @@ std::size_t index(const utf8::string& str, const std::size_t index)
return i;
}
std::size_t size(const utf8::string& str)
std::size_t size(const std::string& str)
{
unsigned int chr, i = 0, len = str.size();
try {
@ -96,12 +96,12 @@ std::size_t size(const utf8::string& str)
return chr;
}
utf8::string& insert(utf8::string& str, const std::size_t pos, const utf8::string& insert)
std::string& insert(std::string& str, const std::size_t pos, const std::string& insert)
{
return str.insert(index(str, pos), insert);
}
utf8::string& erase(utf8::string& str, const std::size_t start, const std::size_t len)
std::string& erase(std::string& str, const std::size_t start, const std::size_t len)
{
if (start > size(str)) return str;
unsigned pos = index(str, start);
@ -114,17 +114,17 @@ utf8::string& erase(utf8::string& str, const std::size_t start, const std::size_
}
}
utf8::string& truncate(utf8::string& str, const std::size_t size)
std::string& truncate(std::string& str, const std::size_t size)
{
return erase(str, size);
}
void truncate_as_ucs4(utf8::string &str, const std::size_t size)
void truncate_as_ucs4(std::string &str, const std::size_t size)
{
ucs4::string u4_str = unicode_cast<ucs4::string>(str);
std::u32string u4_str = unicode_cast<std::u32string>(str);
if(u4_str.size() > size) {
u4_str.resize(size);
str = unicode_cast<utf8::string>(u4_str);
str = unicode_cast<std::string>(u4_str);
}
}

View file

@ -16,7 +16,6 @@
#pragma once
#include "ucs4_iterator_base.hpp"
#include "unicode_types.hpp"
#include "ucs4_convert_impl.hpp"
#include "unicode_cast.hpp"
@ -30,7 +29,7 @@
* Wide strings are expected to be UTF-16.
*/
namespace utf16 {
typedef ucs4::iterator_base<utf16::string, ucs4_convert_impl::convert_impl<char_t>::type> iterator;
typedef ucs4::iterator_base<std::u16string, ucs4_convert_impl::convert_impl<char16_t>::type> iterator;
}
/**
@ -38,23 +37,23 @@ namespace utf16 {
* back and forth.
*/
namespace utf8 {
typedef ucs4::iterator_base<std::string, ucs4_convert_impl::convert_impl<char_t>::type> iterator;
typedef ucs4::iterator_base<std::string, ucs4_convert_impl::convert_impl<char>::type> iterator;
/** Returns a lowercased version of the string. */
utf8::string lowercase(const utf8::string& s);
std::string lowercase(const std::string& s);
/**
* Codepoint index corresponding to the nth character in a UTF-8 string.
*
* @return str.length() if there are less than @p index characters.
*/
std::size_t index(const utf8::string& str, const std::size_t index);
std::size_t index(const std::string& str, const std::size_t index);
/** Length in characters of a UTF-8 string. */
std::size_t size(const utf8::string& str);
std::size_t size(const std::string& str);
/** Insert a UTF-8 string at the specified position. */
utf8::string& insert(utf8::string& str, const std::size_t pos, const utf8::string& insert);
std::string& insert(std::string& str, const std::size_t pos, const std::string& insert);
/**
* Erases a portion of a UTF-8 string.
@ -66,7 +65,7 @@ namespace utf8 {
* @note This implementation does not check for valid UTF-8. Don't use it
* for user input.
*/
utf8::string& erase(utf8::string& str, const std::size_t start, const std::size_t len = std::string::npos);
std::string& erase(std::string& str, const std::size_t start, const std::size_t len = std::string::npos);
/**
* Truncates a UTF-8 string to the specified number of characters.
@ -77,7 +76,7 @@ namespace utf8 {
* @note This implementation does not check for valid UTF-8. Don't use it
* for user input.
*/
utf8::string& truncate(utf8::string& str, const std::size_t size);
std::string& truncate(std::string& str, const std::size_t size);
/**
* Truncates a UTF-8 string to the specified number of characters.
@ -92,5 +91,5 @@ namespace utf8 {
* codepoints.
* @param size The size to truncate to.
*/
void truncate_as_ucs4(utf8::string& str, const std::size_t size);
void truncate_as_ucs4(std::string& str, const std::size_t size);
} // end namespace utf8

View file

@ -90,11 +90,11 @@ typename ucs4_convert_impl::enableif<TD, typename TS::value_type>::type unicode_
* @return An instance of TD.
*/
template<typename TD>
TD unicode_cast(ucs4::char_t onechar)
TD unicode_cast(char32_t onechar)
{
using namespace ucs4_convert_impl;
typedef typename convert_impl<typename TD::value_type>::type impl_writer;
typedef convert_impl<ucs4::char_t>::type impl_reader;
typedef convert_impl<char32_t>::type impl_reader;
typedef typename std::back_insert_iterator<TD> output_itor;
TD res;

View file

@ -14,20 +14,9 @@
#pragma once
#include <string>
#include <vector>
#include <exception>
#include <cstdint>
namespace ucs4 {
typedef uint32_t char_t;
typedef std::vector<char_t> string;
}
namespace utf8 {
typedef char char_t;
typedef std::string string;
/**
* Thrown by operations encountering invalid UTF-8 data.
*
@ -37,14 +26,3 @@ namespace utf8 {
*/
class invalid_utf8_exception : public std::exception {};
}
/**
* For Win32 API.
*
* On windows, wchar_t is defined as uint16_t.
* Wide strings are expected to be UTF-16.
*/
namespace utf16 {
typedef wchar_t char_t;
typedef std::vector<char_t> string;
}

View file

@ -29,10 +29,10 @@ const std::size_t max_message_length = 256;
void truncate_message(const simple_wml::string_span& str, simple_wml::node& message)
{
// testing for msg.size() is not sufficient but we're not getting false negatives
// and it's cheaper than always converting to ucs4::string.
// and it's cheaper than always converting to std::u32string.
if(str.size() > static_cast<int>(chat_message::max_message_length)) {
std::string tmp(str.begin(), str.end());
// The string can contain utf-8 characters so truncate as ucs4::string otherwise
// The string can contain utf-8 characters so truncate as std::u32string otherwise
// a corrupted utf-8 string can be returned.
utf8::truncate_as_ucs4(tmp, max_message_length);
message.set_attr_dup("message", tmp.c_str());

View file

@ -41,7 +41,7 @@ BOOST_AUTO_TEST_CASE( utils_join_test )
BOOST_AUTO_TEST_CASE( utils_unicode_test )
{
utf8::string unicode = "ünicod€ check";
std::string unicode = "ünicod€ check";
BOOST_CHECK( utf8::size(unicode) == 13 );
int euro = utf8::index(unicode,6);
@ -49,23 +49,23 @@ BOOST_AUTO_TEST_CASE( utils_unicode_test )
BOOST_CHECK( utf8::truncate(unicode,3) == "üni");
utf8::string apple_u8("apple");
ucs4::string apple_u4 = unicode_cast<ucs4::string>(apple_u8);
utf16::string apple_u16 = unicode_cast<utf16::string>(apple_u4);
std::string apple_u8("apple");
std::u32string apple_u4 = unicode_cast<std::u32string>(apple_u8);
std::u16string apple_u16 = unicode_cast<std::u16string>(apple_u4);
BOOST_CHECK( apple_u4.size() == 5 );
BOOST_CHECK_EQUAL( apple_u8, unicode_cast<utf8::string>(apple_u4) );
BOOST_CHECK_EQUAL( apple_u8, unicode_cast<utf8::string>(apple_u16) );
BOOST_CHECK( apple_u4 == unicode_cast<ucs4::string>(apple_u16) );
BOOST_CHECK( apple_u16 == unicode_cast<utf16::string>(apple_u4) );
BOOST_CHECK_EQUAL( apple_u8, unicode_cast<std::string>(apple_u4) );
BOOST_CHECK_EQUAL( apple_u8, unicode_cast<std::string>(apple_u16) );
BOOST_CHECK( apple_u4 == unicode_cast<std::u32string>(apple_u16) );
BOOST_CHECK( apple_u16 == unicode_cast<std::u16string>(apple_u4) );
BOOST_CHECK_EQUAL( apple_u8.size(), apple_u16.size() );
ucs4::string water_u4;
std::u32string water_u4;
water_u4.push_back(0x6C34);
utf8::string water_u8 = unicode_cast<utf8::string>(water_u4);
utf16::string water_u16 = unicode_cast<utf16::string>(water_u4);
std::string water_u8 = unicode_cast<std::string>(water_u4);
std::u16string water_u16 = unicode_cast<std::u16string>(water_u4);
BOOST_CHECK_EQUAL(water_u4[0], static_cast<ucs4::char_t>(water_u16[0]));
BOOST_CHECK_EQUAL(water_u4[0], static_cast<char32_t>(water_u16[0]));
#if defined(_WIN32) || defined(_WIN64)
// Windows complains it can't be represented in the currentl code-page.
// So instead, check directly for its UTF-8 representation.
@ -76,21 +76,21 @@ BOOST_AUTO_TEST_CASE( utils_unicode_test )
#if defined(_WIN32) || defined(_WIN64)
// Same as above.
utf8::string nonbmp_u8("\xF0\x90\x80\x80");
std::string nonbmp_u8("\xF0\x90\x80\x80");
#else
utf8::string nonbmp_u8("\U00010000");
std::string nonbmp_u8("\U00010000");
#endif
ucs4::string nonbmp_u4 = unicode_cast<ucs4::string>(nonbmp_u8);
utf16::string nonbmp_u16 = unicode_cast<utf16::string>(nonbmp_u4);
std::u32string nonbmp_u4 = unicode_cast<std::u32string>(nonbmp_u8);
std::u16string nonbmp_u16 = unicode_cast<std::u16string>(nonbmp_u4);
BOOST_CHECK_EQUAL(nonbmp_u8.size(), 4u);
BOOST_CHECK_EQUAL(nonbmp_u4[0], 0x10000u);
BOOST_CHECK_EQUAL(nonbmp_u16[0], 0xD800);
BOOST_CHECK_EQUAL(nonbmp_u16[1], 0xDC00);
BOOST_CHECK_EQUAL(nonbmp_u8, unicode_cast<utf8::string>(nonbmp_u4));
BOOST_CHECK_EQUAL(nonbmp_u8, unicode_cast<utf8::string>(nonbmp_u16));
BOOST_CHECK(nonbmp_u16 == unicode_cast<utf16::string>(nonbmp_u4));
BOOST_CHECK(nonbmp_u4 == unicode_cast<ucs4::string>(nonbmp_u16));
BOOST_CHECK_EQUAL(nonbmp_u8, unicode_cast<std::string>(nonbmp_u4));
BOOST_CHECK_EQUAL(nonbmp_u8, unicode_cast<std::string>(nonbmp_u16));
BOOST_CHECK(nonbmp_u16 == unicode_cast<std::u16string>(nonbmp_u4));
BOOST_CHECK(nonbmp_u4 == unicode_cast<std::u32string>(nonbmp_u16));
}
BOOST_AUTO_TEST_CASE( test_lowercase )

View file

@ -22,12 +22,12 @@
#include "serialization/unicode_cast.hpp"
#include "random.hpp"
static void add_prefixes(const ucs4::string& str, std::size_t length, markov_prefix_map& res)
static void add_prefixes(const std::u32string& str, std::size_t length, markov_prefix_map& res)
{
for(std::size_t i = 0; i <= str.size(); ++i) {
const std::size_t start = i > length ? i - length : 0;
const ucs4::string key(str.begin() + start, str.begin() + i);
const ucs4::char_t c = i != str.size() ? str[i] : 0;
const std::u32string key(str.begin() + start, str.begin() + i);
const char32_t c = i != str.size() ? str[i] : 0;
res[key].push_back(c);
}
}
@ -37,20 +37,20 @@ static markov_prefix_map markov_prefixes(const std::vector<std::string>& items,
markov_prefix_map res;
for(std::vector<std::string>::const_iterator i = items.begin(); i != items.end(); ++i) {
add_prefixes(unicode_cast<ucs4::string>(*i),length,res);
add_prefixes(unicode_cast<std::u32string>(*i),length,res);
}
return res;
}
static ucs4::string markov_generate_name(const markov_prefix_map& prefixes,
static std::u32string markov_generate_name(const markov_prefix_map& prefixes,
std::size_t chain_size, std::size_t max_len)
{
if(prefixes.empty() || chain_size == 0) {
return ucs4::string();
return std::u32string();
}
ucs4::string prefix, res;
std::u32string prefix, res;
// Since this function is called in the name description in a MP game it
// uses the local locale. The locale between players can be different and
@ -75,7 +75,7 @@ static ucs4::string markov_generate_name(const markov_prefix_map& prefixes,
return res;
}
const ucs4::char_t c = i->second[random[j++]%i->second.size()];
const char32_t c = i->second[random[j++]%i->second.size()];
if(c == 0) {
return res;
}
@ -98,16 +98,16 @@ static ucs4::string markov_generate_name(const markov_prefix_map& prefixes,
// name has end-of-string as a possible next character in the
// markov prefix map. If no valid ending is found, use the
// originally generated name.
ucs4::string originalRes = res;
std::u32string originalRes = res;
while(!res.empty()) {
const int prefixLen = chain_size < res.size() ? chain_size : res.size();
prefix = ucs4::string(res.end() - prefixLen, res.end());
prefix = std::u32string(res.end() - prefixLen, res.end());
const markov_prefix_map::const_iterator i = prefixes.find(prefix);
if (i == prefixes.end() || i->second.empty()) {
return res;
}
if (std::find(i->second.begin(), i->second.end(), static_cast<ucs4::char_t>(0))
if (std::find(i->second.begin(), i->second.end(), static_cast<char32_t>(0))
!= i->second.end()) {
// This ending is valid.
return res;
@ -132,6 +132,6 @@ markov_generator::markov_generator(const std::vector<std::string>& items, std::s
std::string markov_generator::generate() const
{
ucs4::string name = markov_generate_name(prefixes_, chain_size_, max_len_);
return unicode_cast<utf8::string>(name);
std::u32string name = markov_generate_name(prefixes_, chain_size_, max_len_);
return unicode_cast<std::string>(name);
}

View file

@ -14,11 +14,11 @@
#pragma once
#include "serialization/unicode_types.hpp"
#include "utils/name_generator.hpp"
#include <map>
#include <vector>
typedef std::map<ucs4::string, ucs4::string> markov_prefix_map;
typedef std::map<std::u32string, std::u32string> markov_prefix_map;
class markov_generator : public name_generator {
markov_prefix_map prefixes_;