Doxygen fixes
This commit is contained in:
parent
00b3c7b214
commit
424d1e45ff
4 changed files with 80 additions and 55 deletions
|
@ -48,8 +48,8 @@ namespace ucs4_convert_impl
|
|||
return 1; // US-ASCII character, 1 byte
|
||||
}
|
||||
/* first bit set: character not in US-ASCII, multiple bytes
|
||||
* number of set bits at the beginning = bytes per character
|
||||
* e.g. 11110xxx indicates a 4-byte character */
|
||||
* number of set bits at the beginning = bytes per character
|
||||
* e.g. 11110xxx indicates a 4-byte character */
|
||||
int count = count_leading_ones(ch);
|
||||
if (count == 1 || count > 6) { // count > 4 after RFC 3629
|
||||
throw utf8::invalid_utf8_exception(); // Stop on invalid characters
|
||||
|
@ -58,12 +58,14 @@ namespace ucs4_convert_impl
|
|||
}
|
||||
|
||||
/**
|
||||
@param out an object to write utf8::char_t. required operations are:
|
||||
1) push(utf8::char_t) to write a single character
|
||||
2) can_push(size_t n) to check whether there is still enough space
|
||||
for n characters.
|
||||
@param ch the ucs4 chracter to write to the stream.
|
||||
*/
|
||||
* Writes a UCS-4 character to a UTF-8 stream.
|
||||
*
|
||||
* @param out An object to write utf8::char_t. Required operations:
|
||||
* 1) push(utf8::char_t) to write a single character
|
||||
* 2) can_push(size_t n) to check whether there is still
|
||||
* enough space for n characters.
|
||||
* @param ch The UCS-4 character to write to the stream.
|
||||
*/
|
||||
template<typename writer>
|
||||
static inline void write(writer out, ucs4::char_t ch)
|
||||
{
|
||||
|
@ -83,10 +85,13 @@ namespace ucs4_convert_impl
|
|||
}
|
||||
}
|
||||
/**
|
||||
reads an ucs4 character from an utf8 stream
|
||||
@param input an iterator pointing to the first character of a utf8 sequence to read
|
||||
@param end an iterator poinint to the end of teh utf8 sequence to read.
|
||||
*/
|
||||
* Reads a UCS-4 character from a UTF-8 stream
|
||||
*
|
||||
* @param input An iterator pointing to the first character of a UTF-8
|
||||
* sequence to read.
|
||||
* @param end An iterator pointing to the end of the UTF-8 sequence
|
||||
* to read.
|
||||
*/
|
||||
template<typename iitor_t>
|
||||
static inline ucs4::char_t read(iitor_t& input, const iitor_t& end)
|
||||
{
|
||||
|
|
|
@ -26,61 +26,73 @@
|
|||
#include <vector>
|
||||
|
||||
/**
|
||||
* For win32 API.
|
||||
* On windows, wchar_t is defined as Uint16
|
||||
* Wide strings are expected to be UTF-16
|
||||
* For Win32 API.
|
||||
*
|
||||
* On Windows, wchar_t is defined as Uint16.
|
||||
* Wide strings are expected to be UTF-16.
|
||||
*/
|
||||
namespace utf16 {
|
||||
typedef ucs4::iterator_base<utf16::string, ucs4_convert_impl::convert_impl<char_t>::type> iterator;
|
||||
}
|
||||
|
||||
/**
|
||||
* Functions for converting Unicode wide-char strings to UTF-8 encoded strings,
|
||||
* back and forth.
|
||||
*/
|
||||
namespace utf8 {
|
||||
|
||||
/**
|
||||
* Functions for converting Unicode wide-char strings to UTF-8 encoded strings,
|
||||
* back and forth.
|
||||
*/
|
||||
typedef ucs4::iterator_base<std::string, ucs4_convert_impl::convert_impl<char_t>::type> iterator;
|
||||
|
||||
/** Returns a lowercased version of the string. */
|
||||
utf8::string lowercase(const utf8::string&);
|
||||
utf8::string lowercase(const utf8::string& s);
|
||||
|
||||
/**
|
||||
* codepoint index corresponing to the ...th character in an UTF-8 encoded string
|
||||
* if there are less than index characters, return str.length()
|
||||
* Codepoint index corresponding to the nth character in a UTF-8 string.
|
||||
*
|
||||
* @return str.length() if there are less than @p index characters.
|
||||
*/
|
||||
size_t index(const utf8::string& str, const size_t index);
|
||||
|
||||
/** length in characters of an UTF-8 encoded string */
|
||||
/** Length in characters of a UTF-8 string. */
|
||||
size_t size(const utf8::string& str);
|
||||
|
||||
/** insert at position pos into an UTF-8 encoded string */
|
||||
/** Insert a UTF-8 string at the specified position. */
|
||||
utf8::string& insert(utf8::string& str, const size_t pos, const utf8::string& insert);
|
||||
|
||||
/**
|
||||
* erase len characters at position start from an UTF-8 encoded string
|
||||
* this implementation doesn't check for valid UTF-8, don't use for user input
|
||||
* Erases a portion of a UTF-8 string.
|
||||
*
|
||||
* @param str UTF-8 encoded string.
|
||||
* @param start Start position.
|
||||
* @param len Number of characters to erase.
|
||||
*
|
||||
* @note This implementation does not check for valid UTF-8. Don't use it
|
||||
* for user input.
|
||||
*/
|
||||
utf8::string& erase(utf8::string& str, const size_t start, const size_t len = std::string::npos);
|
||||
|
||||
/**
|
||||
* truncate an UTF-8 encoded string after size characters
|
||||
* this implementation doesn't check for valid UTF-8, don't use for user input
|
||||
*/
|
||||
* Truncates a UTF-8 string to the specified number of characters.
|
||||
*
|
||||
* @param str UTF-8 encoded string.
|
||||
* @param size Size to truncate to.
|
||||
*
|
||||
* @note This implementation does not check for valid UTF-8. Don't use it
|
||||
* for user input.
|
||||
*/
|
||||
utf8::string& truncate(utf8::string& str, const size_t size);
|
||||
|
||||
/**
|
||||
* Truncate a UTF-8 encoded string.
|
||||
* Truncates a UTF-8 string to the specified number of characters.
|
||||
*
|
||||
* If the string has more than @p size UTF-8 characters it will be
|
||||
* truncated to this size.
|
||||
*
|
||||
* If the string has more than @p size UTF-8 characters it will be truncated
|
||||
* to this size.
|
||||
* The output is guaranteed to be valid UTF-8.
|
||||
*
|
||||
* @param[in, out] str The parameter's usage is:
|
||||
* - Input: String encoded in UTF-8.
|
||||
* - Output: String encoded UTF-8 that contains at most @p size
|
||||
* codepoints.
|
||||
* @param size The size to truncate at.
|
||||
* @param[in] str String encoded in UTF-8.
|
||||
* @param[out] str String encoded UTF-8 that contains at most @p size
|
||||
* codepoints.
|
||||
* @param size The size to truncate to.
|
||||
*/
|
||||
void truncate_as_ucs4(utf8::string& str, const size_t size);
|
||||
} // end namespace utf8
|
||||
|
|
|
@ -22,7 +22,9 @@
|
|||
|
||||
namespace ucs4_convert_impl
|
||||
{
|
||||
//transforms an outputiterator to a writer for ucs4_convert_impl functions.
|
||||
/**
|
||||
* Transforms an output iterator to a writer for ucs4_convert_impl functions.
|
||||
*/
|
||||
template<typename oitor_t>
|
||||
struct iteratorwriter
|
||||
{
|
||||
|
@ -48,12 +50,11 @@ namespace ucs4_convert_impl
|
|||
}
|
||||
|
||||
/**
|
||||
@tparam TD
|
||||
output, a collection type.
|
||||
@tparam TS
|
||||
input, a collection type.
|
||||
@return an instance of TD
|
||||
*/
|
||||
* @tparam TD Output, a collection type.
|
||||
* @tparam TS Input, a collection type.
|
||||
*
|
||||
* @return An instance of TD.
|
||||
*/
|
||||
template<typename TD , typename TS>
|
||||
typename ucs4_convert_impl::enableif<TD, typename TS::value_type>::type unicode_cast(const TS& source)
|
||||
//TD unicode_cast(const TS& source)
|
||||
|
@ -78,7 +79,7 @@ typename ucs4_convert_impl::enableif<TD, typename TS::value_type>::type unicode_
|
|||
}
|
||||
catch(utf8::invalid_utf8_exception&)
|
||||
{
|
||||
///TODO: use a ERR_.. stream but i dont know whether i can so to in header easily.
|
||||
// TODO: use a ERR_.. stream but i dont know whether i can so to in header easily.
|
||||
std::cerr << "Failed to convert a string from " << t_impl_reader::get_name() << " to " << t_impl_writer::get_name() << "\n";
|
||||
return res;
|
||||
}
|
||||
|
@ -86,10 +87,10 @@ typename ucs4_convert_impl::enableif<TD, typename TS::value_type>::type unicode_
|
|||
}
|
||||
|
||||
/**
|
||||
@tparam TD
|
||||
output, a collection type.
|
||||
@return an instance of TD
|
||||
*/
|
||||
* @tparam TD Output, a collection type.
|
||||
*
|
||||
* @return An instance of TD.
|
||||
*/
|
||||
template<typename TD>
|
||||
TD unicode_cast(ucs4::char_t onechar)
|
||||
{
|
||||
|
@ -107,7 +108,7 @@ TD unicode_cast(ucs4::char_t onechar)
|
|||
}
|
||||
catch(utf8::invalid_utf8_exception&)
|
||||
{
|
||||
///TODO: use a ERR_.. stream but i dont know whether i can so to in header easily.
|
||||
// TODO: use a ERR_.. stream but i dont know whether i can so to in header easily.
|
||||
std::cerr << "Failed to convert a string from " << t_impl_reader::get_name() << " to " << t_impl_writer::get_name() << "\n";
|
||||
return res;
|
||||
}
|
||||
|
|
|
@ -29,14 +29,21 @@ namespace utf8 {
|
|||
typedef char char_t;
|
||||
typedef std::string string;
|
||||
|
||||
/** also used for invalid utf16 or ucs4 strings */
|
||||
/**
|
||||
* Thrown by operations encountering invalid UTF-8 data.
|
||||
*
|
||||
* Also used for invalid UTF-16 and UCS-4 data.
|
||||
*
|
||||
* @todo FIXME: This clearly needs a better name for that reason.
|
||||
*/
|
||||
class invalid_utf8_exception : public std::exception {};
|
||||
}
|
||||
|
||||
/**
|
||||
* For win32 API.
|
||||
* On windows, wchar_t is defined as Uint16
|
||||
* Wide strings are expected to be UTF-16
|
||||
* For Win32 API.
|
||||
*
|
||||
* On windows, wchar_t is defined as Uint16.
|
||||
* Wide strings are expected to be UTF-16.
|
||||
*/
|
||||
namespace utf16 {
|
||||
typedef wchar_t char_t;
|
||||
|
|
Loading…
Add table
Reference in a new issue