24 TextEncoder::Encoding TextEncoder::_default_encoding = TextEncoder::E_utf8;
34 for (si = _wtext.begin(); si != _wtext.end(); ++si) {
37 _flags &= ~F_got_text;
49 for (si = _wtext.begin(); si != _wtext.end(); ++si) {
52 _flags &= ~F_got_text;
73 wstring::const_iterator si;
74 for (si = _wtext.begin(); si != _wtext.end(); ++si) {
75 wchar_t character = (*si);
79 if (map_entry !=
nullptr && map_entry->_ascii_equiv != 0) {
80 result += (wchar_t)map_entry->_ascii_equiv;
81 if (map_entry->_ascii_additional != 0) {
82 result += (wchar_t)map_entry->_ascii_additional;
101 wstring::const_iterator ti;
102 for (ti = _wtext.begin(); ti != _wtext.end(); ++ti) {
103 if (((*ti) & ~0x7f) != 0) {
119 if ((ch & ~0xff) == 0) {
120 return string(1, (
char)ch);
127 if (map_entry !=
nullptr && map_entry->_ascii_equiv != 0) {
129 if (map_entry->_ascii_additional != 0) {
132 string(1, map_entry->_ascii_equiv) +
133 string(1, map_entry->_ascii_additional);
135 return string(1, map_entry->_ascii_equiv);
142 if ((ch & ~0x7f) == 0) {
143 return string(1, (
char)ch);
144 }
else if ((ch & ~0x7ff) == 0) {
146 string(1, (
char)((ch >> 6) | 0xc0)) +
147 string(1, (
char)((ch & 0x3f) | 0x80));
148 }
else if ((ch & ~0xffff) == 0) {
150 string(1, (
char)((ch >> 12) | 0xe0)) +
151 string(1, (
char)(((ch >> 6) & 0x3f) | 0x80)) +
152 string(1, (
char)((ch & 0x3f) | 0x80));
155 string(1, (
char)((ch >> 18) | 0xf0)) +
156 string(1, (
char)(((ch >> 12) & 0x3f) | 0x80)) +
157 string(1, (
char)(((ch >> 6) & 0x3f) | 0x80)) +
158 string(1, (
char)((ch & 0x3f) | 0x80));
162 if ((ch & ~0xffff) == 0) {
165 string(1, (
char)(ch >> 8)) +
166 string(1, (
char)(ch & 0xff));
169 uint32_t v = (uint32_t)ch - 0x10000u;
170 uint16_t hi = (v >> 10u) | 0xd800u;
171 uint16_t lo = (v & 0x3ffu) | 0xdc00u;
178 return string(encoded, 4);
193 for (
size_t i = 0; i < wtext.size(); ++i) {
194 wchar_t ch = wtext[i];
197 #if WCHAR_MAX < 0x10FFFF 198 if (ch >= 0xd800 && ch < 0xdc00 && (i + 1) < wtext.size()) {
200 wchar_t ch2 = wtext[i + 1];
201 if (ch2 >= 0xdc00 && ch2 < 0xe000) {
203 char32_t code_point = 0x10000 + ((ch - 0xd800) << 10) + (ch2 - 0xdc00);
227 return decode_text_impl(decoder);
233 return decode_text_impl(decoder);
240 return decode_text_impl(decoder);
249 wstring TextEncoder::
255 while (!decoder.
is_eof()) {
262 if (character <= WCHAR_MAX) {
266 uint32_t v = (uint32_t)character - 0x10000u;
267 result += (wchar_t)((v >> 10u) | 0xd800u);
268 result += (wchar_t)((v & 0x3ffu) | 0xdc00u);
375 operator << (ostream &out, TextEncoder::Encoding encoding) {
377 case TextEncoder::E_iso8859:
378 return out <<
"iso8859";
380 case TextEncoder::E_utf8:
381 return out <<
"utf8";
383 case TextEncoder::E_utf16be:
384 return out <<
"utf16be";
387 return out <<
"**invalid TextEncoder::Encoding(" << (int)encoding <<
")**";
394 operator >> (istream &in, TextEncoder::Encoding &encoding) {
398 if (word ==
"iso8859") {
399 encoding = TextEncoder::E_iso8859;
400 }
else if (word ==
"utf8" || word ==
"utf-8") {
401 encoding = TextEncoder::E_utf8;
402 }
else if (word ==
"unicode" || word ==
"utf16be" || word ==
"utf-16be" ||
403 word ==
"utf16-be" || word ==
"utf-16-be") {
404 encoding = TextEncoder::E_utf16be;
407 if (notify_ptr !=
nullptr) {
409 <<
"Invalid TextEncoder::Encoding: " << word <<
"\n";
411 encoding = TextEncoder::E_iso8859;
PANDA 3D SOFTWARE Copyright (c) Carnegie Mellon University.
static int unicode_toupper(char32_t character)
Returns the uppercase equivalent of the given Unicode character.
static std::string encode_wchar(char32_t ch, Encoding encoding)
Encodes a single Unicode character into a one-, two-, three-, or four-byte string,...
This decoder extracts characters two at a time to get a plain wide character sequence.
This decoder extracts utf-8 sequences.
void make_lower()
Adjusts the text stored within the encoder to all lowercase letters (preserving accent marks correctl...
virtual char32_t get_next_character()
Returns the next character in sequence.
bool is_wtext() const
Returns true if any of the characters in the string returned by get_wtext() are out of the range of a...
static const Entry * look_up(char32_t character)
Returns the Entry associated with the indicated character, if there is one.
PANDA 3D SOFTWARE Copyright (c) Carnegie Mellon University.
static int unicode_tolower(char32_t character)
Returns the uppercase equivalent of the given Unicode character.
std::wstring get_wtext_as_ascii() const
Returns the text associated with the node, converted as nearly as possible to a fully-ASCII represent...
std::string encode_wtext(const std::wstring &wtext) const
Encodes a wide-text string into a single-char string, according to the current encoding.
The base class to a family of classes that decode various kinds of encoded byte streams.
PANDA 3D SOFTWARE Copyright (c) Carnegie Mellon University.
bool is_eof()
Returns true if the decoder has returned the last character in the string, false if there are more to...
const std::wstring & get_wtext() const
Returns the text associated with the TextEncoder, as a wide-character string.
std::wstring decode_text(const std::string &text) const
Returns the given wstring decoded to a single-byte string, via the current encoding system.
void make_upper()
Adjusts the text stored within the encoder to all uppercase letters (preserving accent marks correctl...
static std::ostream * get_notify_ptr()
Returns the ostream that is used to write error messages to.