Convert custom unicode type aliases to proper types (available as of C++11)

This changes:
utf8::char_t   ->  char
utf8::string   ->  std::string
utf16::char_t  ->  char16_t
utf16::string  ->  std::u16string
ucs4::char_t   ->  char32_t
ucs4::string   ->  std::u32string

utf16::string and ucs4::string are now proper strings instead of vectors of characters too.

In order to get this change to compile at all, I needed to add a ucs4_convert_impl::convert_impl
specialization for wchar_t alongside the new char16_t specialization; both point to the same
conversion implementation type.

This commit doesn't do any additional cleanup. I'm sure if we looked, we could get rid of a
lot of the custom conversion code and probably a bunch of stuff that might have had to do with
supporting utf16::string and ucs4::string being vectors instead of basic_string specializations.
Either way, I don't know the code (or encoding handling in general) to make a call as to what's
needed or not. I'll let someone else do that.

(cherry-picked from commit 1deacd89f6)
This commit is contained in:
Charles Dang 2018-04-05 00:45:18 +11:00 committed by Jyrki Vesterinen
parent 553cacd88e
commit dfce371287
34 changed files with 179 additions and 199 deletions

View file

@ -28,6 +28,7 @@
#include "random.hpp"
#include "serialization/parser.hpp"
#include "serialization/string_utils.hpp"
#include "serialization/utf8_exception.hpp"
#include <stdexcept>

View file

@ -60,7 +60,7 @@ namespace {
struct addon_filename_ucs4char_illegal
{
inline bool operator()(ucs4::char_t c) const
inline bool operator()(char32_t c) const
{
switch(c){
case ' ':
@ -118,8 +118,8 @@ bool addon_filename_legal(const std::string& name)
return false;
}
const ucs4::string name_ucs4 = unicode_cast<ucs4::string>(name);
const std::string name_utf8 = unicode_cast<utf8::string>(name_ucs4);
const std::u32string name_ucs4 = unicode_cast<std::u32string>(name);
const std::string name_utf8 = unicode_cast<std::string>(name_ucs4);
if(name != name_utf8){ // name is invalid UTF-8
return false;
}

View file

@ -198,7 +198,7 @@ void windows_tray_notification::switch_to_wesnoth_window()
std::wstring windows_tray_notification::string_to_wstring(const std::string& string, std::size_t maxlength)
{
utf16::string u16_string = unicode_cast<utf16::string>(string);
std::u16string u16_string = unicode_cast<std::u16string>(string);
if(u16_string.size() > maxlength) {
if((u16_string[maxlength-1] & 0xDC00) == 0xD800)
u16_string.resize(maxlength - 1);

View file

@ -25,6 +25,7 @@
#include "serialization/string_utils.hpp"
#include "color.hpp"
#include "preferences/credentials.hpp"
#include "serialization/utf8_exception.hpp"
#include <SDL_timer.h>

View file

@ -224,7 +224,7 @@ bool is_format_char(char c)
}
}
bool is_cjk_char(const ucs4::char_t ch)
bool is_cjk_char(const char32_t ch)
{
/**
* You can check these range at http://unicode.org/charts/
@ -320,7 +320,7 @@ namespace {
* CJK (CJK punctuations)
* http://www.unicode.org/charts/PDF/U3000.pdf
*/
inline bool no_break_after(const ucs4::char_t ch)
inline bool no_break_after(const char32_t ch)
{
return
/**
@ -341,7 +341,7 @@ inline bool no_break_after(const ucs4::char_t ch)
ch == 0x3016 || ch == 0x301a || ch == 0x301d;
}
inline bool no_break_before(const ucs4::char_t ch)
inline bool no_break_before(const char32_t ch)
{
return
/**
@ -378,7 +378,7 @@ inline bool no_break_before(const ucs4::char_t ch)
ch == 0x301b || ch == 0x301e;
}
inline bool break_before(const ucs4::char_t ch)
inline bool break_before(const char32_t ch)
{
if(no_break_before(ch))
return false;
@ -386,7 +386,7 @@ inline bool break_before(const ucs4::char_t ch)
return is_cjk_char(ch);
}
inline bool break_after(const ucs4::char_t ch)
inline bool break_after(const char32_t ch)
{
if(no_break_after(ch))
return false;
@ -420,7 +420,7 @@ std::string word_wrap_text(const std::string& unwrapped_text, int font_size,
if(start_of_line) {
line_width = 0;
format_string.clear();
while(ch != end && *ch < static_cast<ucs4::char_t>(0x100)
while(ch != end && *ch < static_cast<char32_t>(0x100)
&& is_format_char(*ch) && !ch.next_is_end()) {
format_string.append(ch.substr().first, ch.substr().second);
@ -443,7 +443,7 @@ std::string word_wrap_text(const std::string& unwrapped_text, int font_size,
current_word = *ch;
++ch;
} else {
ucs4::char_t previous = 0;
char32_t previous = 0;
for(;ch != utf8::iterator::end(unwrapped_text) &&
*ch != ' ' && *ch != '\n'; ++ch) {

View file

@ -22,7 +22,6 @@ class CVideo;
class surface;
#include <string>
#include "serialization/unicode_types.hpp"
#include <SDL_rect.h>
@ -84,12 +83,12 @@ std::string del_tags(const std::string& text);
bool is_format_char(char c);
/**
* Determine if a ucs4::char_t is a CJK character
* Determine if a char32_t is a CJK character
*
* @retval true Input-char is a CJK char
* @retval false Input-char is a not CJK char.
*/
bool is_cjk_char(const ucs4::char_t ch);
bool is_cjk_char(const char32_t ch);
/**
* Wrap text.

View file

@ -132,21 +132,21 @@ unsigned pango_text::insert_text(const unsigned offset, const std::string& text)
if (length_ + len > maximum_length_) {
len = maximum_length_ - length_;
}
const utf8::string insert = text.substr(0, utf8::index(text, len));
utf8::string tmp = text_;
const std::string insert = text.substr(0, utf8::index(text, len));
std::string tmp = text_;
this->set_text(utf8::insert(tmp, offset, insert), false);
// report back how many characters were actually inserted (e.g. to move the cursor selection)
return len;
}
bool pango_text::insert_unicode(const unsigned offset, ucs4::char_t unicode)
bool pango_text::insert_unicode(const unsigned offset, char32_t unicode)
{
return this->insert_unicode(offset, ucs4::string(1, unicode)) == 1;
return this->insert_unicode(offset, std::u32string(1, unicode)) == 1;
}
unsigned pango_text::insert_unicode(const unsigned offset, const ucs4::string& unicode)
unsigned pango_text::insert_unicode(const unsigned offset, const std::u32string& unicode)
{
const utf8::string insert = unicode_cast<utf8::string>(unicode);
const std::string insert = unicode_cast<std::string>(unicode);
return this->insert_text(offset, insert);
}
@ -289,8 +289,8 @@ bool pango_text::set_text(const std::string& text, const bool markedup)
layout_.reset(pango_layout_new(context_.get()));
}
const ucs4::string wide = unicode_cast<ucs4::string>(text);
const std::string narrow = unicode_cast<utf8::string>(wide);
const std::u32string wide = unicode_cast<std::u32string>(text);
const std::string narrow = unicode_cast<std::string>(wide);
if(text != narrow) {
ERR_GUI_L << "pango_text::" << __func__
<< " text '" << text
@ -439,7 +439,7 @@ pango_text& pango_text::set_maximum_length(const std::size_t maximum_length)
if(maximum_length != maximum_length_) {
maximum_length_ = maximum_length;
if(length_ > maximum_length_) {
utf8::string tmp = text_;
std::string tmp = text_;
this->set_text(utf8::truncate(tmp, maximum_length_), false);
}
}

View file

@ -18,7 +18,6 @@
#include "color.hpp"
#include "sdl/surface.hpp"
#include "serialization/string_utils.hpp"
#include "serialization/unicode_types.hpp"
#include <pango/pango.h>
#include <pango/pangocairo.h>
@ -119,7 +118,7 @@ public:
*
* @returns True upon success, false otherwise.
*/
bool insert_unicode(const unsigned offset, ucs4::char_t unicode);
bool insert_unicode(const unsigned offset, char32_t unicode);
/**
* Inserts unicode text.
@ -130,7 +129,7 @@ public:
* @returns The number of characters inserted.
*/
unsigned insert_unicode(
const unsigned offset, const ucs4::string& unicode);
const unsigned offset, const std::u32string& unicode);
/***** ***** ***** ***** Font flags ***** ***** ***** *****/

View file

@ -148,7 +148,7 @@ bool dispatcher::fire(const ui_event event,
widget& target,
const SDL_Keycode key,
const SDL_Keymod modifier,
const utf8::string& unicode)
const std::string& unicode)
{
assert(find<set_event_keyboard>(event, event_in_set()));
return fire_event<signal_keyboard_function>(event, this, &target, key, modifier, unicode);

View file

@ -16,7 +16,6 @@
#include "gui/core/event/handler.hpp"
#include "hotkey/hotkey_command.hpp"
#include "serialization/unicode_types.hpp"
#include "utils/functional.hpp"
@ -76,7 +75,7 @@ typedef std::function<void(widget& dispatcher,
bool& halt,
const SDL_Keycode key,
const SDL_Keymod modifier,
const utf8::string& unicode)> signal_keyboard_function;
const std::string& unicode)> signal_keyboard_function;
/**
* Callback function signature.
@ -224,7 +223,7 @@ public:
widget& target,
const SDL_Keycode key,
const SDL_Keymod modifier,
const utf8::string& unicode);
const std::string& unicode);
/**
* Fires an event which takes touch parameters.

View file

@ -708,17 +708,17 @@ void distributor::signal_handler_keyboard_internal(event::ui_event evt, P1&& p1,
}
}
void distributor::signal_handler_sdl_key_down(const SDL_Keycode key, const SDL_Keymod modifier, const utf8::string& unicode)
void distributor::signal_handler_sdl_key_down(const SDL_Keycode key, const SDL_Keymod modifier, const std::string& unicode)
{
signal_handler_keyboard_internal<signal_keyboard_function>(event::SDL_KEY_DOWN, key, modifier, unicode);
}
void distributor::signal_handler_sdl_text_input(const utf8::string& unicode, int32_t start, int32_t end)
void distributor::signal_handler_sdl_text_input(const std::string& unicode, int32_t start, int32_t end)
{
signal_handler_keyboard_internal<signal_text_input_function>(event::SDL_TEXT_INPUT, unicode, start, end);
}
void distributor::signal_handler_sdl_text_editing(const utf8::string& unicode, int32_t start, int32_t end)
void distributor::signal_handler_sdl_text_editing(const std::string& unicode, int32_t start, int32_t end)
{
signal_handler_keyboard_internal<signal_text_input_function>(event::SDL_TEXT_EDITING, unicode, start, end);
}

View file

@ -40,7 +40,6 @@
#include "gui/core/event/dispatcher.hpp"
#include "gui/core/event/handler.hpp"
#include "sdl/point.hpp"
#include "serialization/unicode_types.hpp"
#include "video.hpp"
#include <string>
@ -354,10 +353,10 @@ private:
void signal_handler_sdl_key_down(const SDL_Keycode key,
const SDL_Keymod modifier,
const utf8::string& unicode);
const std::string& unicode);
void signal_handler_sdl_text_input(const utf8::string& unicode, int32_t start, int32_t len);
void signal_handler_sdl_text_editing(const utf8::string& unicode, int32_t start, int32_t len);
void signal_handler_sdl_text_input(const std::string& unicode, int32_t start, int32_t len);
void signal_handler_sdl_text_editing(const std::string& unicode, int32_t start, int32_t len);
template<typename Fcn, typename P1, typename P2, typename P3>
void signal_handler_keyboard_internal(event::ui_event evt, P1&& p1, P2&& p2, P3&& p3);

View file

@ -272,7 +272,7 @@ private:
*/
void key_down(const SDL_Keycode key,
const SDL_Keymod modifier,
const utf8::string& unicode);
const std::string& unicode);
/**
* Fires a text input event.
@ -750,7 +750,7 @@ bool sdl_event_handler::hotkey_pressed(const hotkey::hotkey_ptr key)
void sdl_event_handler::key_down(const SDL_Keycode key,
const SDL_Keymod modifier,
const utf8::string& unicode)
const std::string& unicode)
{
DBG_GUI_E << "Firing: " << SDL_KEY_DOWN << ".\n";

View file

@ -46,7 +46,7 @@ void password_box::set_value(const std::string& text)
{
real_value_ = text;
std::size_t sz = utf8::size(text);
utf8::string passwd;
std::string passwd;
for(std::size_t i = 0; i < sz; i++) {
passwd.append(font::unicode_bullet);
}
@ -70,7 +70,7 @@ void password_box::delete_selection()
set_cursor(start, false);
}
void password_box::insert_char(const utf8::string& unicode)
void password_box::insert_char(const std::string& unicode)
{
int len = get_selection_length();
unsigned sel = get_selection_start();
@ -83,7 +83,7 @@ void password_box::insert_char(const utf8::string& unicode)
if(sz == 1) {
text_box::insert_char(font::unicode_bullet);
} else {
utf8::string passwd;
std::string passwd;
for(std::size_t i = 0; i < sz; i++) {
passwd.append(font::unicode_bullet);
}

View file

@ -49,7 +49,7 @@ public:
protected:
void insert_char(const utf8::string& unicode) override;
void insert_char(const std::string& unicode) override;
void paste_selection(const bool mouse) override;
void delete_selection() override;

View file

@ -242,7 +242,7 @@ void text_box::delete_selection()
start -= len;
}
utf8::string tmp = get_value();
std::string tmp = get_value();
set_value(utf8::erase(tmp, start, len));
set_cursor(start, false);
}

View file

@ -150,7 +150,7 @@ void text_box_base::set_cursor(const std::size_t offset, const bool select)
}
}
void text_box_base::insert_char(const utf8::string& unicode)
void text_box_base::insert_char(const std::string& unicode)
{
delete_selection();
@ -193,7 +193,7 @@ void text_box_base::copy_selection(const bool mouse)
}
unsigned end, start = selection_start_;
const utf8::string txt = text_.text();
const std::string txt = text_.text();
if(selection_length_ > 0) {
end = utf8::index(txt, start + selection_length_);
@ -422,7 +422,7 @@ void text_box_base::handle_key_delete(SDL_Keymod /*modifier*/, bool& handled)
fire(event::NOTIFY_MODIFIED, *this, nullptr);
}
void text_box_base::handle_commit(bool& handled, const utf8::string& unicode)
void text_box_base::handle_commit(bool& handled, const std::string& unicode)
{
DBG_GUI_E << LOG_SCOPE_HEADER << '\n';
@ -442,7 +442,7 @@ void text_box_base::handle_commit(bool& handled, const utf8::string& unicode)
}
}
void text_box_base::handle_editing(bool& handled, const utf8::string& unicode, int32_t start)
void text_box_base::handle_editing(bool& handled, const std::string& unicode, int32_t start)
{
if(unicode.size() > 1 || unicode[0] != 0) {
handled = true;

View file

@ -185,7 +185,7 @@ protected:
*
* @param unicode The unicode value of the character to insert.
*/
virtual void insert_char(const utf8::string& unicode);
virtual void insert_char(const std::string& unicode);
/**
* Deletes the character.
@ -481,9 +481,9 @@ private:
protected:
virtual void handle_commit(bool& handled,
const utf8::string& unicode);
const std::string& unicode);
virtual void handle_editing(bool& handled,
const utf8::string& unicode,
const std::string& unicode,
int32_t start);
private:
@ -510,7 +510,7 @@ private:
void signal_handler_sdl_text_input(const event::ui_event event,
bool& handled,
const utf8::string& unicode,
const std::string& unicode,
int32_t start,
int32_t len);

View file

@ -34,7 +34,7 @@
#include "sdl/surface.hpp" // for surface
#include "serialization/string_utils.hpp" // for split, quoted_split, etc
#include "serialization/unicode_cast.hpp" // for unicode_cast
#include "serialization/unicode_types.hpp" // for char_t, etc
#include "serialization/utf8_exception.hpp" // for char_t, etc
#include "terrain/terrain.hpp" // for terrain_type
#include "terrain/translation.hpp" // for operator==, ter_list, etc
#include "terrain/type_data.hpp" // for terrain_type_data, etc
@ -1361,9 +1361,9 @@ std::string get_first_word(const std::string &s)
if (ch == utf8::iterator::end(re))
return re;
ucs4::char_t firstchar = *ch;
char32_t firstchar = *ch;
if (font::is_cjk_char(firstchar)) {
re = unicode_cast<utf8::string>(firstchar);
re = unicode_cast<std::string>(firstchar);
}
return re;
}

View file

@ -305,7 +305,7 @@ bool hotkey_keyboard::matches_helper(const SDL_Event &event) const
if(text == ":" || text == "`") {
mods = mods & ~KMOD_SHIFT;
}
return text_ == text && utf8::size(utf8::string(event.text.text)) == 1 && mods == mod_;
return text_ == text && utf8::size(std::string(event.text.text)) == 1 && mods == mod_;
}
return false;

View file

@ -79,7 +79,7 @@ static std::string get_system_username()
if(GetUserNameW(buffer, &size)) {
//size includes a terminating null character.
assert(size > 0);
res = unicode_cast<utf8::string>(boost::iterator_range<wchar_t*>(buffer, buffer + size - 1));
res = unicode_cast<std::string>(boost::iterator_range<wchar_t*>(buffer, buffer + size - 1));
}
#else
if(char* const login = getenv("USER")) {

View file

@ -41,6 +41,7 @@
#include "saved_game.hpp"
#include "serialization/binary_or_text.hpp"
#include "serialization/parser.hpp"
#include "serialization/utf8_exception.hpp"
#include "statistics.hpp"
#include "version.hpp"
#include "video.hpp"

View file

@ -14,7 +14,7 @@
#pragma once
#include "unicode_types.hpp"
#include "utf8_exception.hpp"
#include "utils/math.hpp"
#include <cassert>
@ -23,7 +23,7 @@ namespace ucs4_convert_impl
struct utf8_impl
{
static const char* get_name() { return "utf8"; }
static std::size_t byte_size_from_ucs4_codepoint(ucs4::char_t ch)
static std::size_t byte_size_from_ucs4_codepoint(char32_t ch)
{
if(ch < (1u << 7))
return 1;
@ -41,7 +41,7 @@ namespace ucs4_convert_impl
throw utf8::invalid_utf8_exception(); // Invalid UCS-4
}
static int byte_size_from_utf8_first(utf8::char_t ch)
static int byte_size_from_utf8_first(char ch)
{
if (!(ch & 0x80)) {
return 1; // US-ASCII character, 1 byte
@ -59,19 +59,19 @@ namespace ucs4_convert_impl
/**
* Writes a UCS-4 character to a UTF-8 stream.
*
* @param out An object to write utf8::char_t. Required operations:
* 1) push(utf8::char_t) to write a single character
* @param out An object to write char. Required operations:
* 1) push(char) to write a single character
* 2) can_push(std::size_t n) to check whether there is still
* enough space for n characters.
* @param ch The UCS-4 character to write to the stream.
*/
template<typename writer>
static inline void write(writer out, ucs4::char_t ch)
static inline void write(writer out, char32_t ch)
{
std::size_t count = byte_size_from_ucs4_codepoint(ch);
assert(out.can_push(count));
if(count == 1) {
out.push(static_cast<utf8::char_t>(ch));
out.push(static_cast<char>(ch));
} else {
for(int j = static_cast<int>(count) - 1; j >= 0; --j) {
unsigned char c = (ch >> (6 * j)) & 0x3f;
@ -92,12 +92,12 @@ namespace ucs4_convert_impl
* to read.
*/
template<typename iitor_t>
static inline ucs4::char_t read(iitor_t& input, const iitor_t& end)
static inline char32_t read(iitor_t& input, const iitor_t& end)
{
assert(input != end);
std::size_t size = byte_size_from_utf8_first(*input);
ucs4::char_t current_char = static_cast<unsigned char>(*input);
char32_t current_char = static_cast<unsigned char>(*input);
// Convert the first character
if(size != 1) {
@ -131,41 +131,41 @@ namespace ucs4_convert_impl
{
static const char* get_name() { return "utf16"; }
template<typename writer>
static inline void write(writer out, ucs4::char_t ch)
static inline void write(writer out, char32_t ch)
{
const ucs4::char_t bit17 = 0x10000;
const char32_t bit17 = 0x10000;
if(ch < bit17)
{
assert(out.can_push(1));
out.push(static_cast<utf16::char_t>(ch));
out.push(static_cast<char16_t>(ch));
}
else
{
assert(out.can_push(2));
const ucs4::char_t char20 = ch - bit17;
const char32_t char20 = ch - bit17;
assert(char20 < (1 << 20));
const ucs4::char_t lead = 0xD800 + (char20 >> 10);
const ucs4::char_t trail = 0xDC00 + (char20 & 0x3FF);
const char32_t lead = 0xD800 + (char20 >> 10);
const char32_t trail = 0xDC00 + (char20 & 0x3FF);
assert(lead < bit17);
assert(trail < bit17);
out.push(static_cast<utf16::char_t>(lead));
out.push(static_cast<utf16::char_t>(trail));
out.push(static_cast<char16_t>(lead));
out.push(static_cast<char16_t>(trail));
}
}
template<typename iitor_t>
static inline ucs4::char_t read(iitor_t& input, const iitor_t& end)
static inline char32_t read(iitor_t& input, const iitor_t& end)
{
const ucs4::char_t last10 = 0x3FF;
const ucs4::char_t type_filter = 0xFC00;
const ucs4::char_t type_lead = 0xD800;
const ucs4::char_t type_trail = 0xDC00;
const char32_t last10 = 0x3FF;
const char32_t type_filter = 0xFC00;
const char32_t type_lead = 0xD800;
const char32_t type_trail = 0xDC00;
assert(input != end);
ucs4::char_t current_char = static_cast<utf16::char_t>(*input);
char32_t current_char = static_cast<char16_t>(*input);
++input;
ucs4::char_t type = current_char & type_filter;
char32_t type = current_char & type_filter;
if(type == type_trail)
{
//found trail without head
@ -196,17 +196,17 @@ namespace ucs4_convert_impl
{
static const char* get_name() { return "UCS4"; }
template<typename writer>
static inline void write(writer out, ucs4::char_t ch)
static inline void write(writer out, char32_t ch)
{
assert(out.can_push(1));
out.push(ch);
}
template<typename iitor_t>
static inline ucs4::char_t read(iitor_t& input, const iitor_t& end)
static inline char32_t read(iitor_t& input, const iitor_t& end)
{
assert(input != end);
ucs4::char_t current_char = *input;
char32_t current_char = *input;
++input;
return current_char;
}
@ -216,19 +216,25 @@ namespace ucs4_convert_impl
struct convert_impl {};
template<>
struct convert_impl<utf8::char_t>
struct convert_impl<char>
{
typedef utf8_impl type;
};
template<>
struct convert_impl<utf16::char_t>
struct convert_impl<char16_t>
{
typedef utf16_impl type;
};
template<>
struct convert_impl<ucs4::char_t>
struct convert_impl<wchar_t>
{
typedef utf16_impl type;
};
template<>
struct convert_impl<char32_t>
{
typedef utf32_impl type;
};

View file

@ -18,8 +18,6 @@
#include <cstddef> //ptrdiff_t
#include <cassert> //assert
#include "unicode_types.hpp"
namespace ucs4
{
template<typename string_type, typename update_implementation>
@ -27,10 +25,10 @@ namespace ucs4
{
public:
typedef std::input_iterator_tag iterator_category;
typedef ucs4::char_t value_type;
typedef char32_t value_type;
typedef ptrdiff_t difference_type;
typedef ucs4::char_t* pointer;
typedef ucs4::char_t& reference;
typedef char32_t* pointer;
typedef char32_t& reference;
iterator_base(const string_type& str)
: current_char(0)
@ -75,7 +73,7 @@ namespace ucs4
return *this;
}
ucs4::char_t operator*() const
char32_t operator*() const
{
return current_char;
}
@ -100,7 +98,7 @@ namespace ucs4
current_char = update_implementation::read(current_substr.second, string_end);
}
ucs4::char_t current_char;
char32_t current_char;
typename string_type::const_iterator string_end;
std::pair<typename string_type::const_iterator, typename string_type::const_iterator> current_substr;
};

View file

@ -48,18 +48,18 @@ static int byte_size_from_utf8_first(const unsigned char ch)
return count;
}
utf8::string lowercase(const utf8::string& s)
std::string lowercase(const std::string& s)
{
if(!s.empty()) {
utf8::iterator itor(s);
utf8::string res;
std::string res;
for(;itor != utf8::iterator::end(s); ++itor) {
ucs4::char_t uchar = *itor;
char32_t uchar = *itor;
// If wchar_t is less than 32 bits wide, we cannot apply towlower() to all codepoints
if(uchar <= static_cast<ucs4::char_t>(std::numeric_limits<wchar_t>::max()))
if(uchar <= static_cast<char32_t>(std::numeric_limits<wchar_t>::max()))
uchar = towlower(static_cast<wchar_t>(uchar));
res += unicode_cast<utf8::string>(uchar);
res += unicode_cast<std::string>(uchar);
}
res.append(itor.substr().second, s.end());
@ -68,7 +68,7 @@ utf8::string lowercase(const utf8::string& s)
return s;
}
std::size_t index(const utf8::string& str, const std::size_t index)
std::size_t index(const std::string& str, const std::size_t index)
{
// chr counts characters, i is the codepoint index
// remark: several functions rely on the fallback to str.length()
@ -83,7 +83,7 @@ std::size_t index(const utf8::string& str, const std::size_t index)
return i;
}
std::size_t size(const utf8::string& str)
std::size_t size(const std::string& str)
{
unsigned int chr, i = 0, len = str.size();
try {
@ -96,12 +96,12 @@ std::size_t size(const utf8::string& str)
return chr;
}
utf8::string& insert(utf8::string& str, const std::size_t pos, const utf8::string& insert)
std::string& insert(std::string& str, const std::size_t pos, const std::string& insert)
{
return str.insert(index(str, pos), insert);
}
utf8::string& erase(utf8::string& str, const std::size_t start, const std::size_t len)
std::string& erase(std::string& str, const std::size_t start, const std::size_t len)
{
if (start > size(str)) return str;
unsigned pos = index(str, start);
@ -114,17 +114,17 @@ utf8::string& erase(utf8::string& str, const std::size_t start, const std::size_
}
}
utf8::string& truncate(utf8::string& str, const std::size_t size)
std::string& truncate(std::string& str, const std::size_t size)
{
return erase(str, size);
}
void truncate_as_ucs4(utf8::string &str, const std::size_t size)
void truncate_as_ucs4(std::string &str, const std::size_t size)
{
ucs4::string u4_str = unicode_cast<ucs4::string>(str);
std::u32string u4_str = unicode_cast<std::u32string>(str);
if(u4_str.size() > size) {
u4_str.resize(size);
str = unicode_cast<utf8::string>(u4_str);
str = unicode_cast<std::string>(u4_str);
}
}

View file

@ -16,7 +16,6 @@
#pragma once
#include "ucs4_iterator_base.hpp"
#include "unicode_types.hpp"
#include "ucs4_convert_impl.hpp"
#include "unicode_cast.hpp"
@ -30,7 +29,7 @@
* Wide strings are expected to be UTF-16.
*/
namespace utf16 {
typedef ucs4::iterator_base<utf16::string, ucs4_convert_impl::convert_impl<char_t>::type> iterator;
typedef ucs4::iterator_base<std::u16string, ucs4_convert_impl::convert_impl<char16_t>::type> iterator;
}
/**
@ -38,23 +37,23 @@ namespace utf16 {
* back and forth.
*/
namespace utf8 {
typedef ucs4::iterator_base<std::string, ucs4_convert_impl::convert_impl<char_t>::type> iterator;
typedef ucs4::iterator_base<std::string, ucs4_convert_impl::convert_impl<char>::type> iterator;
/** Returns a lowercased version of the string. */
utf8::string lowercase(const utf8::string& s);
std::string lowercase(const std::string& s);
/**
* Codepoint index corresponding to the nth character in a UTF-8 string.
*
* @return str.length() if there are less than @p index characters.
*/
std::size_t index(const utf8::string& str, const std::size_t index);
std::size_t index(const std::string& str, const std::size_t index);
/** Length in characters of a UTF-8 string. */
std::size_t size(const utf8::string& str);
std::size_t size(const std::string& str);
/** Insert a UTF-8 string at the specified position. */
utf8::string& insert(utf8::string& str, const std::size_t pos, const utf8::string& insert);
std::string& insert(std::string& str, const std::size_t pos, const std::string& insert);
/**
* Erases a portion of a UTF-8 string.
@ -66,7 +65,7 @@ namespace utf8 {
* @note This implementation does not check for valid UTF-8. Don't use it
* for user input.
*/
utf8::string& erase(utf8::string& str, const std::size_t start, const std::size_t len = std::string::npos);
std::string& erase(std::string& str, const std::size_t start, const std::size_t len = std::string::npos);
/**
* Truncates a UTF-8 string to the specified number of characters.
@ -77,7 +76,7 @@ namespace utf8 {
* @note This implementation does not check for valid UTF-8. Don't use it
* for user input.
*/
utf8::string& truncate(utf8::string& str, const std::size_t size);
std::string& truncate(std::string& str, const std::size_t size);
/**
* Truncates a UTF-8 string to the specified number of characters.
@ -92,5 +91,5 @@ namespace utf8 {
* codepoints.
* @param size The size to truncate to.
*/
void truncate_as_ucs4(utf8::string& str, const std::size_t size);
void truncate_as_ucs4(std::string& str, const std::size_t size);
} // end namespace utf8

View file

@ -90,11 +90,11 @@ typename ucs4_convert_impl::enableif<TD, typename TS::value_type>::type unicode_
* @return An instance of TD.
*/
template<typename TD>
TD unicode_cast(ucs4::char_t onechar)
TD unicode_cast(char32_t onechar)
{
using namespace ucs4_convert_impl;
typedef typename convert_impl<typename TD::value_type>::type impl_writer;
typedef convert_impl<ucs4::char_t>::type impl_reader;
typedef convert_impl<char32_t>::type impl_reader;
typedef typename std::back_insert_iterator<TD> output_itor;
TD res;

View file

@ -14,20 +14,9 @@
#pragma once
#include <string>
#include <vector>
#include <exception>
#include <cstdint>
namespace ucs4 {
typedef uint32_t char_t;
typedef std::vector<char_t> string;
}
namespace utf8 {
typedef char char_t;
typedef std::string string;
/**
* Thrown by operations encountering invalid UTF-8 data.
*
@ -37,14 +26,3 @@ namespace utf8 {
*/
class invalid_utf8_exception : public std::exception {};
}
/**
* For Win32 API.
*
* On windows, wchar_t is defined as uint16_t.
* Wide strings are expected to be UTF-16.
*/
namespace utf16 {
typedef wchar_t char_t;
typedef std::vector<char_t> string;
}

View file

@ -29,10 +29,10 @@ const std::size_t max_message_length = 256;
void truncate_message(const simple_wml::string_span& str, simple_wml::node& message)
{
// testing for msg.size() is not sufficient but we're not getting false negatives
// and it's cheaper than always converting to ucs4::string.
// and it's cheaper than always converting to std::u32string.
if(str.size() > static_cast<int>(chat_message::max_message_length)) {
std::string tmp(str.begin(), str.end());
// The string can contain utf-8 characters so truncate as ucs4::string otherwise
// The string can contain utf-8 characters so truncate as std::u32string otherwise
// a corrupted utf-8 string can be returned.
utf8::truncate_as_ucs4(tmp, max_message_length);
message.set_attr_dup("message", tmp.c_str());

View file

@ -41,7 +41,7 @@ BOOST_AUTO_TEST_CASE( utils_join_test )
BOOST_AUTO_TEST_CASE( utils_unicode_test )
{
utf8::string unicode = "ünicod€ check";
std::string unicode = "ünicod€ check";
BOOST_CHECK( utf8::size(unicode) == 13 );
int euro = utf8::index(unicode,6);
@ -49,23 +49,23 @@ BOOST_AUTO_TEST_CASE( utils_unicode_test )
BOOST_CHECK( utf8::truncate(unicode,3) == "üni");
utf8::string apple_u8("apple");
ucs4::string apple_u4 = unicode_cast<ucs4::string>(apple_u8);
utf16::string apple_u16 = unicode_cast<utf16::string>(apple_u4);
std::string apple_u8("apple");
std::u32string apple_u4 = unicode_cast<std::u32string>(apple_u8);
std::u16string apple_u16 = unicode_cast<std::u16string>(apple_u4);
BOOST_CHECK( apple_u4.size() == 5 );
BOOST_CHECK_EQUAL( apple_u8, unicode_cast<utf8::string>(apple_u4) );
BOOST_CHECK_EQUAL( apple_u8, unicode_cast<utf8::string>(apple_u16) );
BOOST_CHECK( apple_u4 == unicode_cast<ucs4::string>(apple_u16) );
BOOST_CHECK( apple_u16 == unicode_cast<utf16::string>(apple_u4) );
BOOST_CHECK_EQUAL( apple_u8, unicode_cast<std::string>(apple_u4) );
BOOST_CHECK_EQUAL( apple_u8, unicode_cast<std::string>(apple_u16) );
BOOST_CHECK( apple_u4 == unicode_cast<std::u32string>(apple_u16) );
BOOST_CHECK( apple_u16 == unicode_cast<std::u16string>(apple_u4) );
BOOST_CHECK_EQUAL( apple_u8.size(), apple_u16.size() );
ucs4::string water_u4;
std::u32string water_u4;
water_u4.push_back(0x6C34);
utf8::string water_u8 = unicode_cast<utf8::string>(water_u4);
utf16::string water_u16 = unicode_cast<utf16::string>(water_u4);
std::string water_u8 = unicode_cast<std::string>(water_u4);
std::u16string water_u16 = unicode_cast<std::u16string>(water_u4);
BOOST_CHECK_EQUAL(water_u4[0], static_cast<ucs4::char_t>(water_u16[0]));
BOOST_CHECK_EQUAL(water_u4[0], static_cast<char32_t>(water_u16[0]));
#if defined(_WIN32) || defined(_WIN64)
// Windows complains it can't be represented in the currentl code-page.
// So instead, check directly for its UTF-8 representation.
@ -76,21 +76,21 @@ BOOST_AUTO_TEST_CASE( utils_unicode_test )
#if defined(_WIN32) || defined(_WIN64)
// Same as above.
utf8::string nonbmp_u8("\xF0\x90\x80\x80");
std::string nonbmp_u8("\xF0\x90\x80\x80");
#else
utf8::string nonbmp_u8("\U00010000");
std::string nonbmp_u8("\U00010000");
#endif
ucs4::string nonbmp_u4 = unicode_cast<ucs4::string>(nonbmp_u8);
utf16::string nonbmp_u16 = unicode_cast<utf16::string>(nonbmp_u4);
std::u32string nonbmp_u4 = unicode_cast<std::u32string>(nonbmp_u8);
std::u16string nonbmp_u16 = unicode_cast<std::u16string>(nonbmp_u4);
BOOST_CHECK_EQUAL(nonbmp_u8.size(), 4u);
BOOST_CHECK_EQUAL(nonbmp_u4[0], 0x10000u);
BOOST_CHECK_EQUAL(nonbmp_u16[0], 0xD800);
BOOST_CHECK_EQUAL(nonbmp_u16[1], 0xDC00);
BOOST_CHECK_EQUAL(nonbmp_u8, unicode_cast<utf8::string>(nonbmp_u4));
BOOST_CHECK_EQUAL(nonbmp_u8, unicode_cast<utf8::string>(nonbmp_u16));
BOOST_CHECK(nonbmp_u16 == unicode_cast<utf16::string>(nonbmp_u4));
BOOST_CHECK(nonbmp_u4 == unicode_cast<ucs4::string>(nonbmp_u16));
BOOST_CHECK_EQUAL(nonbmp_u8, unicode_cast<std::string>(nonbmp_u4));
BOOST_CHECK_EQUAL(nonbmp_u8, unicode_cast<std::string>(nonbmp_u16));
BOOST_CHECK(nonbmp_u16 == unicode_cast<std::u16string>(nonbmp_u4));
BOOST_CHECK(nonbmp_u4 == unicode_cast<std::u32string>(nonbmp_u16));
}
BOOST_AUTO_TEST_CASE( test_lowercase )

View file

@ -22,12 +22,12 @@
#include "serialization/unicode_cast.hpp"
#include "random.hpp"
static void add_prefixes(const ucs4::string& str, std::size_t length, markov_prefix_map& res)
static void add_prefixes(const std::u32string& str, std::size_t length, markov_prefix_map& res)
{
for(std::size_t i = 0; i <= str.size(); ++i) {
const std::size_t start = i > length ? i - length : 0;
const ucs4::string key(str.begin() + start, str.begin() + i);
const ucs4::char_t c = i != str.size() ? str[i] : 0;
const std::u32string key(str.begin() + start, str.begin() + i);
const char32_t c = i != str.size() ? str[i] : 0;
res[key].push_back(c);
}
}
@ -37,20 +37,20 @@ static markov_prefix_map markov_prefixes(const std::vector<std::string>& items,
markov_prefix_map res;
for(std::vector<std::string>::const_iterator i = items.begin(); i != items.end(); ++i) {
add_prefixes(unicode_cast<ucs4::string>(*i),length,res);
add_prefixes(unicode_cast<std::u32string>(*i),length,res);
}
return res;
}
static ucs4::string markov_generate_name(const markov_prefix_map& prefixes,
static std::u32string markov_generate_name(const markov_prefix_map& prefixes,
std::size_t chain_size, std::size_t max_len)
{
if(prefixes.empty() || chain_size == 0) {
return ucs4::string();
return std::u32string();
}
ucs4::string prefix, res;
std::u32string prefix, res;
// Since this function is called in the name description in a MP game it
// uses the local locale. The locale between players can be different and
@ -75,7 +75,7 @@ static ucs4::string markov_generate_name(const markov_prefix_map& prefixes,
return res;
}
const ucs4::char_t c = i->second[random[j++]%i->second.size()];
const char32_t c = i->second[random[j++]%i->second.size()];
if(c == 0) {
return res;
}
@ -98,16 +98,16 @@ static ucs4::string markov_generate_name(const markov_prefix_map& prefixes,
// name has end-of-string as a possible next character in the
// markov prefix map. If no valid ending is found, use the
// originally generated name.
ucs4::string originalRes = res;
std::u32string originalRes = res;
while(!res.empty()) {
const int prefixLen = chain_size < res.size() ? chain_size : res.size();
prefix = ucs4::string(res.end() - prefixLen, res.end());
prefix = std::u32string(res.end() - prefixLen, res.end());
const markov_prefix_map::const_iterator i = prefixes.find(prefix);
if (i == prefixes.end() || i->second.empty()) {
return res;
}
if (std::find(i->second.begin(), i->second.end(), static_cast<ucs4::char_t>(0))
if (std::find(i->second.begin(), i->second.end(), static_cast<char32_t>(0))
!= i->second.end()) {
// This ending is valid.
return res;
@ -132,6 +132,6 @@ markov_generator::markov_generator(const std::vector<std::string>& items, std::s
std::string markov_generator::generate() const
{
ucs4::string name = markov_generate_name(prefixes_, chain_size_, max_len_);
return unicode_cast<utf8::string>(name);
std::u32string name = markov_generate_name(prefixes_, chain_size_, max_len_);
return unicode_cast<std::string>(name);
}

View file

@ -14,11 +14,11 @@
#pragma once
#include "serialization/unicode_types.hpp"
#include "utils/name_generator.hpp"
#include <map>
#include <vector>
typedef std::map<ucs4::string, ucs4::string> markov_prefix_map;
typedef std::map<std::u32string, std::u32string> markov_prefix_map;
class markov_generator : public name_generator {
markov_prefix_map prefixes_;

View file

@ -30,7 +30,7 @@ static lg::log_domain log_display("display");
namespace gui {
textbox::textbox(CVideo &video, int width, const std::string& text, bool editable, std::size_t max_size, int font_size, double alpha, double alpha_focus, const bool auto_join)
: scrollarea(video, auto_join), max_size_(max_size), font_size_(font_size), text_(unicode_cast<ucs4::string>(text)),
: scrollarea(video, auto_join), max_size_(max_size), font_size_(font_size), text_(unicode_cast<std::u32string>(text)),
cursor_(text_.size()), selstart_(-1), selend_(-1),
grabmouse_(false), text_pos_(0), editable_(editable),
show_cursor_(true), show_cursor_at_(0), text_image_(nullptr),
@ -67,14 +67,14 @@ void textbox::set_inner_location(const SDL_Rect& rect)
const std::string textbox::text() const
{
const std::string &ret = unicode_cast<utf8::string>(text_);
const std::string &ret = unicode_cast<std::string>(text_);
return ret;
}
// set_text does not respect max_size_
void textbox::set_text(const std::string& text, const color_t& color)
{
text_ = unicode_cast<ucs4::string>(text);
text_ = unicode_cast<std::u32string>(text);
cursor_ = text_.size();
text_pos_ = 0;
selstart_ = -1;
@ -96,7 +96,7 @@ void textbox::append_text(const std::string& text, bool auto_scroll, const color
return;
}
const bool is_at_bottom = get_position() == get_max_position();
const ucs4::string& wtext = unicode_cast<ucs4::string>(text);
const std::u32string& wtext = unicode_cast<std::u32string>(text);
surface new_text = add_text_line(wtext, color);
surface new_surface = create_compatible_surface(text_image_,std::max<std::size_t>(text_image_->w,new_text->w),text_image_->h+new_text->h);
@ -295,7 +295,7 @@ void textbox::scroll(unsigned int pos)
set_dirty(true);
}
surface textbox::add_text_line(const ucs4::string& text, const color_t& color)
surface textbox::add_text_line(const std::u32string& text, const color_t& color)
{
line_height_ = font::get_max_height(font_size_);
@ -312,17 +312,17 @@ surface textbox::add_text_line(const ucs4::string& text, const color_t& color)
// some more complex scripts (that is, RTL languages). This part of the work should
// actually be done by the font-rendering system.
std::string visible_string;
ucs4::string wrapped_text;
std::u32string wrapped_text;
ucs4::string::const_iterator backup_itor = text.end();
std::u32string::const_iterator backup_itor = text.end();
ucs4::string::const_iterator itor = text.begin();
std::u32string::const_iterator itor = text.begin();
while(itor != text.end()) {
//If this is a space, save copies of the current state so we can roll back
if(char(*itor) == ' ') {
backup_itor = itor;
}
visible_string.append(unicode_cast<utf8::string>(*itor));
visible_string.append(unicode_cast<std::string>(*itor));
if(char(*itor) == '\n') {
backup_itor = text.end();
@ -341,12 +341,12 @@ surface textbox::add_text_line(const ucs4::string& text, const color_t& color)
wrapped_text.erase(wrapped_text.end()-backup, wrapped_text.end());
}
} else {
if (visible_string == std::string("").append(unicode_cast<utf8::string>(*itor))) {
if (visible_string == std::string("").append(unicode_cast<std::string>(*itor))) {
break; //breaks infinite loop where when running with a fake display, we word wrap a single character infinitely.
}
}
backup_itor = text.end();
wrapped_text.push_back(ucs4::char_t('\n'));
wrapped_text.push_back(char32_t('\n'));
char_x_.push_back(0);
char_y_.push_back(char_y_.back() + line_height_);
visible_string = "";
@ -358,7 +358,7 @@ surface textbox::add_text_line(const ucs4::string& text, const color_t& color)
}
}
const std::string s = unicode_cast<utf8::string>(wrapped_text);
const std::string s = unicode_cast<std::string>(wrapped_text);
const surface res(font::get_rendered_text(s, font_size_, color));
return res;
@ -399,7 +399,7 @@ void textbox::erase_selection()
if(!is_selection())
return;
ucs4::string::iterator itor = text_.begin() + std::min(selstart_, selend_);
std::u32string::iterator itor = text_.begin() + std::min(selstart_, selend_);
text_.erase(itor, itor + std::abs(selend_ - selstart_));
cursor_ = std::min(selstart_, selend_);
selstart_ = selend_ = -1;
@ -452,8 +452,8 @@ void textbox::handle_event(const SDL_Event& event)
bool textbox::handle_text_input(const SDL_Event& event)
{
bool changed = false;
utf8::string str = event.text.text;
ucs4::string s = unicode_cast<ucs4::string>(str);
std::string str = event.text.text;
std::u32string s = unicode_cast<std::u32string>(str);
DBG_G << "Char: " << str << "\n";
@ -567,7 +567,7 @@ bool textbox::handle_key_down(const SDL_Event &event)
//cut off anything after the first newline
str.erase(std::find_if(str.begin(),str.end(),utils::isnewline),str.end());
ucs4::string s = unicode_cast<ucs4::string>(str);
std::u32string s = unicode_cast<std::u32string>(str);
if(text_.size() < max_size_) {
if(s.size() + text_.size() > max_size_) {
@ -588,8 +588,8 @@ bool textbox::handle_key_down(const SDL_Event &event)
const std::size_t beg = std::min<std::size_t>(std::size_t(selstart_),std::size_t(selend_));
const std::size_t end = std::max<std::size_t>(std::size_t(selstart_),std::size_t(selend_));
ucs4::string ws(text_.begin() + beg, text_.begin() + end);
std::string s = unicode_cast<utf8::string>(ws);
std::u32string ws(text_.begin() + beg, text_.begin() + end);
std::string s = unicode_cast<std::string>(ws);
desktop::clipboard::copy_to_clipboard(s, false);
}
}

View file

@ -55,13 +55,13 @@ protected:
virtual void scroll(unsigned int pos);
private:
virtual void handle_text_changed(const ucs4::string&) {}
virtual void handle_text_changed(const std::u32string&) {}
std::size_t max_size_;
int font_size_;
ucs4::string text_;
std::u32string text_;
// mutable unsigned int firstOnScreen_;
int cursor_;
@ -106,7 +106,7 @@ private:
void draw_cursor(int pos) const;
void update_text_cache(bool reset = false, const color_t& color =font::NORMAL_COLOR);
surface add_text_line(const ucs4::string& text, const color_t& color =font::NORMAL_COLOR);
surface add_text_line(const std::u32string& text, const color_t& color =font::NORMAL_COLOR);
bool is_selection();
void erase_selection();