19 _encoding = _default_encoding;
23 _flags = (F_got_text | F_got_wtext);
32 _encoding(copy._encoding),
70 _default_encoding = encoding;
77 INLINE TextEncoder::Encoding TextEncoder::
78 get_default_encoding() {
79 return _default_encoding;
90 if (!has_text() || _text != text) {
92 _flags = (_flags | F_got_text) & ~F_got_wtext;
104 set_text(
const std::string &text, TextEncoder::Encoding encoding) {
105 if (encoding == _encoding) {
117 _text = std::string();
118 _wtext = std::wstring();
119 _flags |= (F_got_text | F_got_wtext);
126 INLINE
bool TextEncoder::
128 if (_flags & F_got_wtext) {
129 return !_wtext.empty();
131 return !_text.empty();
138 INLINE std::string TextEncoder::
140 if ((_flags & F_got_text) == 0) {
150 INLINE std::string TextEncoder::
151 get_text(TextEncoder::Encoding encoding)
const {
162 _flags = (_flags | F_got_text) & ~F_got_wtext;
173 #if WCHAR_MAX >= 0x10FFFF 175 _wtext =
get_wtext() + std::wstring(1, (
wchar_t)character);
177 if ((character & ~0xffff) == 0) {
178 _wtext =
get_wtext() + std::wstring(1, (
wchar_t)character);
181 uint32_t v = (uint32_t)character - 0x10000u;
183 (wchar_t)((v >> 10u) | 0xd800u),
184 (
wchar_t)((v & 0x3ffu) | 0xdc00u),
186 _wtext =
get_wtext() + std::wstring(wstr, 2);
189 _flags = (_flags | F_got_wtext) & ~F_got_text;
211 if (index < _wtext.length()) {
212 return _wtext[index];
225 if (index < _wtext.length()) {
226 _wtext[index] = character;
227 _flags &= ~F_got_text;
277 TextEncoder::Encoding to) {
288 if (entry ==
nullptr) {
291 return entry->_char_type == UnicodeLatinMap::CT_upper ||
292 entry->_char_type == UnicodeLatinMap::CT_lower;
302 if (entry ==
nullptr) {
304 return (character >=
'0' && character <=
'9');
307 return (isdigit(entry->_ascii_equiv) != 0);
317 if (entry ==
nullptr) {
319 return (character < 128 && ispunct(character));
321 return entry->_char_type == UnicodeLatinMap::CT_punct;
331 if (entry ==
nullptr) {
334 return entry->_char_type == UnicodeLatinMap::CT_upper;
361 if (entry ==
nullptr) {
364 return entry->_char_type == UnicodeLatinMap::CT_lower;
374 if (entry ==
nullptr) {
377 return entry->_toupper_character;
387 if (entry ==
nullptr) {
390 return entry->_tolower_character;
407 upper(
const std::string &source, TextEncoder::Encoding encoding) {
429 lower(
const std::string &source, TextEncoder::Encoding encoding) {
444 if (!has_text() || _wtext != wtext) {
446 _flags = (_flags | F_got_wtext) & ~F_got_text;
457 if ((_flags & F_got_wtext) == 0) {
469 if (!wtext.empty()) {
471 _flags = (_flags | F_got_wtext) & ~F_got_text;
497 INLINE std::ostream &
void append_text(const std::string &text)
Appends the indicates string to the end of the stored text.
std::ostream & operator<<(std::ostream &out, const std::wstring &str)
Uses the current default encoding to output the wstring.
static std::string reencode_text(const std::string &text, Encoding from, Encoding to)
Given the indicated text string, which is assumed to be encoded via the encoding "from",...
static int unicode_toupper(char32_t character)
Returns the uppercase equivalent of the given Unicode character.
int get_unicode_char(size_t index) const
Returns the Unicode value of the nth character in the stored text.
This class can be used to convert text between multiple representations, e.g.
static bool unicode_ispunct(char32_t character)
Returns true if the indicated character is a punctuation mark, false otherwise.
static std::string upper(const std::string &source)
Converts the string to uppercase, assuming the string is encoded in the default encoding.
set_default_encoding
Specifies the default encoding to be used for all subsequently created TextEncoder objects.
void make_lower()
Adjusts the text stored within the encoder to all lowercase letters (preserving accent marks correctl...
void clear_text()
Removes the text from the TextEncoder.
void append_wtext(const std::wstring &text)
Appends the indicates string to the end of the stored wide-character text.
set_text
Changes the text that is stored in the encoder.
std::string get_text_as_ascii() const
Returns the text associated with the node, converted as nearly as possible to a fully-ASCII represent...
static const Entry * look_up(char32_t character)
Returns the Entry associated with the indicated character, if there is one.
void set_unicode_char(size_t index, char32_t character)
Sets the Unicode value of the nth character in the stored text.
get_default_encoding
Specifies the default encoding to be used for all subsequently created TextEncoder objects.
static int unicode_tolower(char32_t character)
Returns the uppercase equivalent of the given Unicode character.
static bool unicode_isalpha(char32_t character)
Returns true if the indicated character is an alphabetic letter, false otherwise.
std::string get_encoded_char(size_t index) const
Returns the nth char of the stored text, as a one-, two-, or three-byte encoded string.
std::wstring get_wtext_as_ascii() const
Returns the text associated with the node, converted as nearly as possible to a fully-ASCII represent...
get_text
Returns the current text, as encoded via the current encoding system.
std::string encode_wtext(const std::wstring &wtext) const
Encodes a wide-text string into a single-char string, according to the current encoding.
static bool unicode_isdigit(char32_t character)
Returns true if the indicated character is a numeric digit, false otherwise.
void set_encoding(Encoding encoding)
Specifies how the string set via set_text() is to be interpreted.
static std::string lower(const std::string &source)
Converts the string to lowercase, assuming the string is encoded in the default encoding.
const std::wstring & get_wtext() const
Returns the text associated with the TextEncoder, as a wide-character string.
Encoding get_encoding() const
Returns the encoding by which the string set via set_text() is to be interpreted.
void append_unicode_char(char32_t character)
Appends a single character to the end of the stored text.
std::wstring decode_text(const std::string &text) const
Returns the given wstring decoded to a single-byte string, via the current encoding system.
size_t get_num_chars() const
Returns the number of characters in the stored text.
void make_upper()
Adjusts the text stored within the encoder to all uppercase letters (preserving accent marks correctl...
static bool unicode_islower(char32_t character)
Returns true if the indicated character is a lowercase letter, false otherwise.
void set_wtext(const std::wstring &wtext)
Changes the text that is stored in the encoder.
static bool unicode_isupper(char32_t character)
Returns true if the indicated character is an uppercase letter, false otherwise.
static bool unicode_isspace(char32_t character)
Returns true if the indicated character is a whitespace letter, false otherwise.