32 return mIter == mString->mData.begin();
37 return mIter == mString->mData.end();
42 return mIter - mString->mData.begin();
47 mIter = mString->mData.begin() + index;
53 return mString->getChar(current_index);
59 int change = mString->setChar(current_index, uc);
60 _jump_to(current_index);
75 lead_half = mIter[-1];
93 lead_half = mIter[-1];
194 return _getCharacter();
199 return _setCharacter(uc);
302 return _getCharacter();
507#if MYGUI_IS_NATIVE_WCHAR_T
542 assign(str.data(), str.size());
581 return mData.max_size();
591 mData.resize(num, val);
596 mData.swap(from.mData);
601 return mData.empty();
606 return mData.c_str();
616 return mData.capacity();
629 tmp.mData.swap(
data);
643#if MYGUI_IS_NATIVE_WCHAR_T
647 mData.push_back(
static_cast<code_point>(val));
653 mData.push_back(val);
658 mData.push_back(
static_cast<code_point>(val));
676 return *m_buffer.mStrBuffer;
682 return m_buffer.mStrBuffer->c_str();
687 _load_buffer_UTF32();
688 return *m_buffer.mUTF32StrBuffer;
693 _load_buffer_UTF32();
694 return m_buffer.mUTF32StrBuffer->c_str();
700 return *m_buffer.mWStrBuffer;
706 return m_buffer.mWStrBuffer->c_str();
711 return mData.at(loc);
716 return mData.at(loc);
729 if (l == 2 && (loc + 1) < mData.length())
731 cp[1] = ptr[loc + 1];
747 if (newSize > existingSize)
750 insert(loc + 1, 1, cp[1]);
753 if (newSize < existingSize)
770 i.
mIter = mData.begin();
786 i.
mIter = mData.end();
802 i.
mIter = mData.end();
818 i.
mIter = mData.begin();
839 mData.assign(str.mData);
851 mData.assign(str, num);
857 mData.assign(str.mData, index,
len);
863 mData.assign(num, ch);
870 mData.reserve(wstr.length());
873 std::wstring::const_iterator i, ie = wstr.end();
874 for (i = wstr.begin(); i != ie; i++)
877 mData.push_back(tmp);
882 std::wstring::const_iterator i;
883 std::wstring::const_iterator ie = wstr.end();
884 for (i = wstr.begin(); i != ie; i++)
889 mData.push_back(cp[0]);
891 mData.push_back(cp[1]);
897#if MYGUI_IS_NATIVE_WCHAR_T
915 for (
const auto& character : str)
932 unsigned char utf8buf[7];
943 for (
size_t j = 0; j < utf8len; j++)
945 utf8buf[j] = (
static_cast<unsigned char>(
950 utf8buf[utf8len] = 0;
955 append(utf16buff, utf16len);
962 mData.append(str.mData);
974 mData.append(str.mData, index,
len);
980 mData.append(str, num);
986 mData.append(num, ch);
996#if MYGUI_IS_NATIVE_WCHAR_T
999 std::wstring tmp(w_str, num);
1052 mData.insert(index, str.mData);
1058 mData.insert(index1, str.mData, index2, num);
1069 mData.insert(index, str, num);
1073#if MYGUI_IS_NATIVE_WCHAR_T
1091 mData.insert(index, num, ch);
1095#if MYGUI_IS_NATIVE_WCHAR_T
1115 return insert(index, num, cp[0]);
1128 mData.insert(i.
mIter, num, ch);
1130#if MYGUI_IS_NATIVE_WCHAR_T
1182 mData.erase(index, num);
1188 mData.replace(index1, num1, str.mData, 0,
npos);
1194 mData.replace(index1, num1, str.mData, 0, num2);
1200 mData.replace(index1, num1, str.mData, index2, num2);
1210 return replace(index1, num1, str, 0, num);
1215 mData.replace(index, num1, num2, ch);
1225 return replace(index1, num1, num, ch);
1230 return mData.compare(str.mData);
1235 return mData.compare(str);
1240 return mData.compare(index,
length, str.mData);
1246 return mData.compare(index,
length, str.mData, index2, length2);
1251 return mData.compare(index,
length, str, length2);
1254#if MYGUI_IS_NATIVE_WCHAR_T
1270 return mData.find(str.
c_str(), index);
1285#if MYGUI_IS_NATIVE_WCHAR_T
1289 return mData.find(tmp.c_str(), index,
length);
1300 return mData.find(ch, index);
1303#if MYGUI_IS_NATIVE_WCHAR_T
1319 return mData.rfind(str.
c_str(), index);
1325 return mData.rfind(tmp.
c_str(), index, num);
1331 return mData.rfind(tmp.
c_str(), index, num);
1334#if MYGUI_IS_NATIVE_WCHAR_T
1338 return mData.rfind(tmp.c_str(), index, num);
1349 return mData.rfind(ch, index);
1352#if MYGUI_IS_NATIVE_WCHAR_T
1371 while (i < num && (index + i) <
len)
1393#if MYGUI_IS_NATIVE_WCHAR_T
1414 while (i < num && (index + i) <
len)
1436#if MYGUI_IS_NATIVE_WCHAR_T
1458 while (i < num && (index - i) !=
npos)
1482#if MYGUI_IS_NATIVE_WCHAR_T
1506 while (i < num && (index - i) !=
npos)
1535#if MYGUI_IS_NATIVE_WCHAR_T
1576#if MYGUI_IS_NATIVE_WCHAR_T
1620 UString::operator std::string()
const
1626 UString::operator std::wstring()
const
1636 return 0xD800 > cp || cp > 0xDFFF;
1643 return 0xD800 <= cp && cp <= 0xDBFF;
1650 return 0xDC00 <= cp && cp <= 0xDFFF;
1655 if (0xD800 <= cp && cp <= 0xDBFF)
1671 bool wordPair =
false;
1674 if (0xD800 <= cp1 && cp1 <= 0xDBFF)
1677 if (0xDC00 <= cp2 && cp2 <= 0xDFFF)
1687 unsigned short cU = cp1;
1688 unsigned short cL = cp2;
1692 out_uc = (cU & 0x03FF) << 10;
1693 out_uc |= (cL & 0x03FF);
1701 if (in_uc <= 0xFFFF)
1711 tmp =
static_cast<unsigned short>((uc >> 10) & 0x03FF);
1716 tmp =
static_cast<unsigned short>(uc & 0x03FF);
1725 return (cp & ~_cont_mask) != _cont;
1732 if ((cp & ~_lead1_mask) == _lead1)
1734 if ((cp & ~_lead2_mask) == _lead2)
1736 if ((cp & ~_lead3_mask) == _lead3)
1738 if ((cp & ~_lead4_mask) == _lead4)
1740 if ((cp & ~_lead5_mask) == _lead5)
1757 if (!(uc & ~0x0000007F))
1759 if (!(uc & ~0x000007FF))
1761 if (!(uc & ~0x0000FFFF))
1763 if (!(uc & ~0x001FFFFF))
1765 if (!(uc & ~0x03FFFFFF))
1767 if (!(uc & ~0x7FFFFFFF))
1787 case 6: c = in_cp[i] & _lead5_mask;
break;
1788 case 5: c = in_cp[i] & _lead4_mask;
break;
1789 case 4: c = in_cp[i] & _lead3_mask;
break;
1790 case 3: c = in_cp[i] & _lead2_mask;
break;
1791 case 2: c = in_cp[i] & _lead1_mask;
break;
1796 for (++i; i <
len; i++)
1798 if ((in_cp[i] & ~_cont_mask) != _cont)
1805 c |= (in_cp[i] & _cont_mask);
1818 for (
size_t i =
len - 1; i > 0; i--)
1820 out_cp[i] =
static_cast<unsigned char>(((c)&_cont_mask) | _cont);
1827 case 6: out_cp[0] =
static_cast<unsigned char>(((c)&_lead5_mask) | _lead5);
break;
1828 case 5: out_cp[0] =
static_cast<unsigned char>(((c)&_lead4_mask) | _lead4);
break;
1829 case 4: out_cp[0] =
static_cast<unsigned char>(((c)&_lead3_mask) | _lead3);
break;
1830 case 3: out_cp[0] =
static_cast<unsigned char>(((c)&_lead2_mask) | _lead2);
break;
1831 case 2: out_cp[0] =
static_cast<unsigned char>(((c)&_lead1_mask) | _lead1);
break;
1833 default: out_cp[0] =
static_cast<unsigned char>((c)&0x7F);
break;
1842 std::string_view tmp(
reinterpret_cast<const char*
>(
c_str));
1853 if (
c_str[i] & 0x80)
1858 unsigned char c =
c_str[i];
1859 size_t contBytes = 0;
1862 if ((c & ~_lead1_mask) == _lead1)
1871 else if ((c & ~_lead2_mask) == _lead2)
1877 if ((c & _lead2) == _cont)
1884 else if ((c & ~_lead3_mask) == _lead3)
1890 if ((c & _lead3) == _cont)
1897 else if ((c & ~_lead4_mask) == _lead4)
1903 if ((c & _lead4) == _cont)
1910 else if ((c & ~_lead5_mask) == _lead5)
1916 if ((c & _lead5) == _cont)
1923 if (i + contBytes >= num)
1929 if ((c & ~_cont_mask) != _cont)
1941 void UString::_init()
1943 m_buffer.mVoidBuffer =
nullptr;
1944 m_bufferType = bt_none;
1948 void UString::_cleanBuffer()
const
1950 if (m_buffer.mVoidBuffer !=
nullptr)
1952 switch (m_bufferType)
1954 case bt_string:
delete m_buffer.mStrBuffer;
break;
1955 case bt_wstring:
delete m_buffer.mWStrBuffer;
break;
1956 case bt_utf32string:
delete m_buffer.mUTF32StrBuffer;
break;
1960 static_assert(
"This should never happen - mVoidBuffer should never contain something if we "
1961 "don't know the type");
1964 m_buffer.mVoidBuffer =
nullptr;
1966 m_bufferType = bt_none;
1970 void UString::_getBufferStr()
const
1972 if (m_bufferType != bt_string)
1975 m_buffer.mStrBuffer =
new std::string();
1976 m_bufferType = bt_string;
1978 m_buffer.mStrBuffer->clear();
1981 void UString::_getBufferWStr()
const
1983 if (m_bufferType != bt_wstring)
1986 m_buffer.mWStrBuffer =
new std::wstring();
1987 m_bufferType = bt_wstring;
1989 m_buffer.mWStrBuffer->clear();
1992 void UString::_getBufferUTF32Str()
const
1994 if (m_bufferType != bt_utf32string)
1998 m_bufferType = bt_utf32string;
2000 m_buffer.mUTF32StrBuffer->clear();
2003 void UString::_load_buffer_UTF8()
const
2006 std::string& buffer = (*m_buffer.mStrBuffer);
2007 buffer.reserve(
length());
2009 unsigned char utf8buf[6];
2010 char* charbuf = (
char*)utf8buf;
2018 c = i.getCharacter();
2022 buffer.push_back(charbuf[j++]);
2026 void UString::_load_buffer_WStr()
const
2029 std::wstring& buffer = (*m_buffer.mWStrBuffer);
2030 buffer.reserve(
length());
2033 for (i =
begin(); i != ie; ++i)
2035 buffer.push_back((
wchar_t)(*i));
2043 c = i.getCharacter();
2044 buffer.push_back((
wchar_t)c);
2049 void UString::_load_buffer_UTF32()
const
2051 _getBufferUTF32Str();
2052 utf32string& buffer = (*m_buffer.mUTF32StrBuffer);
2053 buffer.reserve(
length());
2061 c = i.getCharacter();
2062 buffer.push_back(c);
base iterator class for UString
ptrdiff_t difference_type
int _setCharacter(unicode_char uc)
size_type _get_index() const
void _become(const _base_iterator &i)
void _seekRev(size_type c)
void _jump_to(size_type index)
void _seekFwd(size_type c)
unicode_char _getCharacter() const
const forward iterator for UString
friend size_type operator-(const _const_fwd_iterator &left, const _const_fwd_iterator &right)
difference operator
_const_fwd_iterator & operator+=(difference_type n)
addition assignment operator
_const_fwd_iterator & operator++()
pre-increment
_const_fwd_iterator & operator-=(difference_type n)
subtraction assignment operator
_const_fwd_iterator & moveNext()
advances to the next Unicode character, honoring surrogate pairs in the UTF-16 stream
const value_type & operator[](difference_type n) const
dereference at offset operator
_const_fwd_iterator & operator--()
pre-decrement
_const_fwd_iterator operator+(difference_type n)
addition operator
_const_fwd_iterator & movePrev()
rewinds to the previous Unicode character, honoring surrogate pairs in the UTF-16 stream
unicode_char getCharacter() const
Returns the Unicode value of the character at the current position (decodes surrogate pairs if needed...
const value_type & operator*() const
dereference operator
const reverse iterator for UString
_const_rev_iterator operator+(difference_type n)
addition operator
_const_rev_iterator & operator++()
pre-increment
_const_rev_iterator & operator+=(difference_type n)
addition assignment operator
const value_type & operator[](difference_type n) const
dereference at offset operator
_const_rev_iterator & operator--()
pre-decrement
friend size_type operator-(const _const_rev_iterator &left, const _const_rev_iterator &right)
difference operator
const value_type & operator*() const
dereference operator
_const_rev_iterator & operator-=(difference_type n)
subtraction assignment operator
forward iterator for UString
unicode_char getCharacter() const
Returns the Unicode value of the character at the current position (decodes surrogate pairs if needed...
_fwd_iterator & operator++()
pre-increment
_fwd_iterator operator-(difference_type n)
subtraction operator
int setCharacter(unicode_char uc)
Sets the Unicode value of the character at the current position (adding a surrogate pair if needed); ...
_fwd_iterator & moveNext()
advances to the next Unicode character, honoring surrogate pairs in the UTF-16 stream
_fwd_iterator & movePrev()
rewinds to the previous Unicode character, honoring surrogate pairs in the UTF-16 stream
_fwd_iterator & operator+=(difference_type n)
addition assignment operator
_fwd_iterator & operator-=(difference_type n)
subtraction assignment operator
_fwd_iterator operator+(difference_type n)
addition operator
value_type & operator*() const
dereference operator
_fwd_iterator & operator--()
pre-decrement
value_type & operator[](difference_type n) const
dereference at offset operator
forward iterator for UString
_rev_iterator & operator+=(difference_type n)
addition assignment operator
_rev_iterator & operator--()
pre-decrement
value_type & operator*() const
dereference operator
_rev_iterator & operator++()
pre-increment
_rev_iterator operator-(difference_type n)
subtraction operator
_rev_iterator & operator-=(difference_type n)
subtraction assignment operator
value_type & operator[](difference_type n) const
dereference at offset operator
_rev_iterator operator+(difference_type n)
addition operator
A UTF-16 string with implicit conversion to/from std::string and std::wstring.
reverse_iterator rend()
returns a reverse iterator just past the beginning of the string
static size_type _verifyUTF8(const unsigned char *c_str)
verifies a UTF-8 stream, returning the total number of Unicode characters found
size_type length() const
Returns the number of code points in the current string.
iterator insert(iterator i, const code_point &ch)
inserts ch before the code point denoted by i
const wchar_t * asWStr_c_str() const
returns the current string in the native form of a nul-terminated wchar_t array
bool operator>(const UString &right) const
greater than operator
size_type size() const
Returns the number of code points in the current string.
static size_t _utf32_to_utf8(const unicode_char &in_uc, unsigned char out_cp[6])
writes the given UTF-32 uc_in to the buffer location out_cp using UTF-8 encoding, returns the number ...
const code_point * data() const
returns a pointer to the first character in the current string
UString()
default constructor, creates an empty string
static size_t _utf8_to_utf32(const unsigned char in_cp[6], unicode_char &out_uc)
converts the given UTF-8 character buffer to a single UTF-32 Unicode character, returns the number of...
const char * asUTF8_c_str() const
returns the current string in UTF-8 form as a nul-terminated char array
bool operator==(const UString &right) const
equality operator
bool operator!=(const UString &right) const
inequality operator
const unicode_char * asUTF32_c_str() const
returns the current string in UTF-32 form as a nul-terminated unicode_char array
size_type find(const UString &str, size_type index=0) const
returns the index of the first occurrence of str within the current string, starting at index; return...
bool operator>=(const UString &right) const
greater than or equal operator
size_type rfind(const UString &str, size_type index=0) const
returns the location of the first occurrence of str in the current string, doing a reverse search fro...
void reserve(size_type size)
sets the capacity of the string to at least size code points
static size_t _utf32_to_utf16(const unicode_char &in_uc, code_point out_cp[2])
writes the given UTF-32 uc_in to the buffer location out_cp using UTF-16 encoding,...
const utf32string & asUTF32() const
returns the current string in UTF-32 form within a utf32string
static size_t _utf16_to_utf32(const code_point in_cp[2], unicode_char &out_uc)
converts the given UTF-16 character buffer in_cp to a single UTF-32 Unicode character out_uc,...
void clear()
deletes all of the elements in the string
int setChar(size_type loc, unicode_char ch)
sets the value of the character at loc to the Unicode value ch (UTF-32)
UString & assign(iterator start, iterator end)
gives the current string the values from start to end
int compare(const UString &str) const
compare str to the current string
code_point value_type
value type typedef for use in iterators
bool operator<=(const UString &right) const
less than or equal operator
std::basic_string< unicode_char > utf32string
string type used for returning UTF-32 formatted data
static bool _utf16_surrogate_follow(code_point cp)
returns true if cp matches the signature of a surrogate pair following character
size_type find_first_of(const UString &str, size_type index=0, size_type num=npos) const
Returns the index of the first character within the current string that matches any character in str,...
static size_t _utf16_char_length(code_point cp)
estimates the number of UTF-16 code points in the sequence starting with cp
iterator erase(iterator loc)
removes the code point pointed to by loc, returning an iterator to the next character
std::basic_string< code_point > dstring
bool operator<(const UString &right) const
less than operator
static bool _utf8_start_char(unsigned char cp)
returns true if cp is the beginning of a UTF-8 sequence
uint16 code_point
a single UTF-16 code point
size_type find_last_of(const UString &str, size_type index=npos, size_type num=npos) const
returns the index of the first character within the current string that matches any character in str,...
static bool _utf16_surrogate_lead(code_point cp)
returns true if cp matches the signature of a surrogate pair lead character
iterator end()
returns an iterator just past the end of the string
unicode_char getChar(size_type loc) const
returns the data point loc evaluated as a UTF-32 value
static bool _utf16_independent_char(code_point cp)
returns true if cp does not match the signature for the lead of follow code point of a surrogate pair...
static const size_type npos
the usual constant representing: not found, no limit, etc
uint32 unicode_char
a single 32-bit Unicode character
UString & operator=(const UString &s)
assignment operator, implicitly casts all compatible types
_fwd_iterator iterator
iterator
const std::wstring & asWStr() const
returns the current string in the native form of std::wstring
bool inString(unicode_char ch) const
returns true if the given Unicode character ch is in this string
code_point & operator[](size_type index)
code point dereference operator
size_type find_first_not_of(const UString &str, size_type index=0, size_type num=npos) const
returns the index of the first character within the current string that does not match any character ...
UString & append(const UString &str)
appends str on to the end of the current string
const code_point * c_str() const
returns a pointer to the first character in the current string
code_point & at(size_type loc)
returns a reference to the element in the string at index loc
void resize(size_type num, const code_point &val=0)
changes the size of the string to size, filling in any new area with val
_const_fwd_iterator const_iterator
const iterator
reverse_iterator rbegin()
returns a reverse iterator to the last element of the string
size_t size_type
size type used to indicate string size and character positions within the string
UString & replace(size_type index1, size_type num1, const UString &str)
replaces up to num1 code points of the current string (starting at index1) with str
const std::string & asUTF8() const
returns the current string in UTF-8 form within a std::string
static size_t _utf8_char_length(unsigned char cp)
estimates the number of UTF-8 code points in the sequence starting with cp
size_type length_Characters() const
Returns the number of Unicode characters in the string.
void push_back(unicode_char val)
appends val to the end of the string
iterator begin()
returns an iterator to the first element of the string
size_type find_last_not_of(const UString &str, size_type index=npos, size_type num=npos) const
returns the index of the last character within the current string that does not match any character i...
size_type max_size() const
returns the maximum number of UTF-16 code points that the string can hold
UString substr(size_type index, size_type num=npos) const
returns a substring of the current string, starting at index, and num characters long.
void swap(UString &from)
exchanges the elements of the current string with those of from
size_type capacity() const
returns the number of elements that the string can hold before it will need to allocate more space
bool empty() const
returns true if the string has no elements, false otherwise
float len(float x, float y)