MyGUI 3.4.3
MyGUI_UString.cpp
Go to the documentation of this file.
1/*
2 * This source file is part of MyGUI. For the latest info, see http://mygui.info/
3 * Distributed under the MIT License
4 * (See accompanying file COPYING.MIT or copy at http://opensource.org/licenses/MIT)
5 */
6
7#include "MyGUI_Precompiled.h"
8#include "MyGUI_UString.h"
9
10namespace MyGUI
11{
12
13 //--------------------------------------------------------------------------
18 //--------------------------------------------------------------------------
23 //--------------------------------------------------------------------------
29 //--------------------------------------------------------------------------
31 {
32 return mIter == mString->mData.begin();
33 }
34 //--------------------------------------------------------------------------
36 {
37 return mIter == mString->mData.end();
38 }
39 //--------------------------------------------------------------------------
41 {
42 return mIter - mString->mData.begin();
43 }
44 //--------------------------------------------------------------------------
46 {
47 mIter = mString->mData.begin() + index;
48 }
49 //--------------------------------------------------------------------------
51 {
52 size_type current_index = _get_index();
53 return mString->getChar(current_index);
54 }
55 //--------------------------------------------------------------------------
57 {
58 size_type current_index = _get_index();
59 int change = mString->setChar(current_index, uc);
60 _jump_to(current_index);
61 return change;
62 }
63 //--------------------------------------------------------------------------
65 {
66 _seekFwd(1); // move 1 code point forward
67 if (_test_end())
68 return; // exit if we hit the end
70 {
71 // landing on a follow code point means we might be part of a bigger character
72 // so we test for that
73 code_point lead_half = 0;
74 //NB: we can't possibly be at the beginning here, so no need to test
75 lead_half = mIter[-1]; // check the previous code point to see if we're part of a surrogate pair
76 if (_utf16_surrogate_lead(lead_half))
77 {
78 _seekFwd(1); // if so, then advance 1 more code point
79 }
80 }
81 }
82 //--------------------------------------------------------------------------
84 {
85 _seekRev(1); // move 1 code point backwards
86 if (_test_begin())
87 return; // exit if we hit the beginning
89 {
90 // landing on a follow code point means we might be part of a bigger character
91 // so we test for that
92 code_point lead_half = 0;
93 lead_half = mIter[-1]; // check the previous character to see if we're part of a surrogate pair
94 if (_utf16_surrogate_lead(lead_half))
95 {
96 _seekRev(1); // if so, then rewind 1 more code point
97 }
98 }
99 }
100 //--------------------------------------------------------------------------
101 //--------------------------------------------------------------------------
102 //--------------------------------------------------------------------------
103 //--------------------------------------------------------------------------
109 //--------------------------------------------------------------------------
111 {
112 _fwd_iterator tmp(*this);
113 _seekFwd(1);
114 return tmp;
115 }
116 //--------------------------------------------------------------------------
122 //--------------------------------------------------------------------------
124 {
125 _fwd_iterator tmp(*this);
126 _seekRev(1);
127 return tmp;
128 }
129 //--------------------------------------------------------------------------
131 {
132 _fwd_iterator tmp(*this);
133 if (n < 0)
134 tmp._seekRev(-n);
135 else
136 tmp._seekFwd(n);
137 return tmp;
138 }
139 //--------------------------------------------------------------------------
141 {
142 _fwd_iterator tmp(*this);
143 if (n < 0)
144 tmp._seekFwd(-n);
145 else
146 tmp._seekRev(n);
147 return tmp;
148 }
149 //--------------------------------------------------------------------------
151 {
152 if (n < 0)
153 _seekRev(-n);
154 else
155 _seekFwd(n);
156 return *this;
157 }
158 //--------------------------------------------------------------------------
160 {
161 if (n < 0)
162 _seekFwd(-n);
163 else
164 _seekRev(n);
165 return *this;
166 }
167 //--------------------------------------------------------------------------
172 //--------------------------------------------------------------------------
174 {
175 _fwd_iterator tmp(*this);
176 tmp += n;
177 return *tmp;
178 }
179 //--------------------------------------------------------------------------
185 //--------------------------------------------------------------------------
191 //--------------------------------------------------------------------------
196 //--------------------------------------------------------------------------
201 //--------------------------------------------------------------------------
202 //--------------------------------------------------------------------------
203 //--------------------------------------------------------------------------
204 //--------------------------------------------------------------------------
206 //--------------------------------------------------------------------------
211 //--------------------------------------------------------------------------
217 //--------------------------------------------------------------------------
224 //--------------------------------------------------------------------------
230 //--------------------------------------------------------------------------
237 //--------------------------------------------------------------------------
239 {
240 _const_fwd_iterator tmp(*this);
241 if (n < 0)
242 tmp._seekRev(-n);
243 else
244 tmp._seekFwd(n);
245 return tmp;
246 }
247 //--------------------------------------------------------------------------
249 {
250 _const_fwd_iterator tmp(*this);
251 if (n < 0)
252 tmp._seekFwd(-n);
253 else
254 tmp._seekRev(n);
255 return tmp;
256 }
257 //--------------------------------------------------------------------------
259 {
260 if (n < 0)
261 _seekRev(-n);
262 else
263 _seekFwd(n);
264 return *this;
265 }
266 //--------------------------------------------------------------------------
268 {
269 if (n < 0)
270 _seekFwd(-n);
271 else
272 _seekRev(n);
273 return *this;
274 }
275 //--------------------------------------------------------------------------
280 //--------------------------------------------------------------------------
282 {
283 _const_fwd_iterator tmp(*this);
284 tmp += n;
285 return *tmp;
286 }
287 //--------------------------------------------------------------------------
293 //--------------------------------------------------------------------------
299 //--------------------------------------------------------------------------
304 //--------------------------------------------------------------------------
305 //--------------------------------------------------------------------------
306 //--------------------------------------------------------------------------
307 //--------------------------------------------------------------------------
313 //--------------------------------------------------------------------------
315 {
316 _rev_iterator tmp(*this);
317 _seekRev(1);
318 return tmp;
319 }
320 //--------------------------------------------------------------------------
326 //--------------------------------------------------------------------------
328 {
329 _rev_iterator tmp(*this);
330 _seekFwd(1);
331 return tmp;
332 }
333 //--------------------------------------------------------------------------
335 {
336 _rev_iterator tmp(*this);
337 if (n < 0)
338 tmp._seekFwd(-n);
339 else
340 tmp._seekRev(n);
341 return tmp;
342 }
343 //--------------------------------------------------------------------------
345 {
346 _rev_iterator tmp(*this);
347 if (n < 0)
348 tmp._seekRev(-n);
349 else
350 tmp._seekFwd(n);
351 return tmp;
352 }
353 //--------------------------------------------------------------------------
355 {
356 if (n < 0)
357 _seekFwd(-n);
358 else
359 _seekRev(n);
360 return *this;
361 }
362 //--------------------------------------------------------------------------
364 {
365 if (n < 0)
366 _seekRev(-n);
367 else
368 _seekFwd(n);
369 return *this;
370 }
371 //--------------------------------------------------------------------------
373 {
374 return mIter[-1];
375 }
376 //--------------------------------------------------------------------------
378 {
379 _rev_iterator tmp(*this);
380 tmp -= n;
381 return *tmp;
382 }
383 //--------------------------------------------------------------------------
384 //--------------------------------------------------------------------------
385 //--------------------------------------------------------------------------
386 //--------------------------------------------------------------------------
392 //--------------------------------------------------------------------------
398 //--------------------------------------------------------------------------
405 //--------------------------------------------------------------------------
411 //--------------------------------------------------------------------------
418 //--------------------------------------------------------------------------
420 {
421 _const_rev_iterator tmp(*this);
422 if (n < 0)
423 tmp._seekFwd(-n);
424 else
425 tmp._seekRev(n);
426 return tmp;
427 }
428 //--------------------------------------------------------------------------
430 {
431 _const_rev_iterator tmp(*this);
432 if (n < 0)
433 tmp._seekRev(-n);
434 else
435 tmp._seekFwd(n);
436 return tmp;
437 }
438 //--------------------------------------------------------------------------
440 {
441 if (n < 0)
442 _seekFwd(-n);
443 else
444 _seekRev(n);
445 return *this;
446 }
447 //--------------------------------------------------------------------------
449 {
450 if (n < 0)
451 _seekRev(-n);
452 else
453 _seekFwd(n);
454 return *this;
455 }
456 //--------------------------------------------------------------------------
458 {
459 return mIter[-1];
460 }
461 //--------------------------------------------------------------------------
463 {
464 _const_rev_iterator tmp(*this);
465 tmp -= n;
466 return *tmp;
467 }
468 //--------------------------------------------------------------------------
469 //--------------------------------------------------------------------------
470 //--------------------------------------------------------------------------
471 //--------------------------------------------------------------------------
473 {
474 _init();
475 }
476 //--------------------------------------------------------------------------
478 {
479 _init();
480 mData = copy.mData;
481 }
482 //--------------------------------------------------------------------------
484 {
485 _init();
486 assign(length, ch);
487 }
488 //--------------------------------------------------------------------------
490 {
491 _init();
492 assign(str);
493 }
494 //--------------------------------------------------------------------------
496 {
497 _init();
498 assign(str, length);
499 }
500 //--------------------------------------------------------------------------
502 {
503 _init();
504 assign(str, index, length);
505 }
506 //--------------------------------------------------------------------------
507#if MYGUI_IS_NATIVE_WCHAR_T
508 UString::UString(const wchar_t* w_str)
509 {
510 _init();
511 assign(w_str);
512 }
513 //--------------------------------------------------------------------------
514 UString::UString(const wchar_t* w_str, size_type length)
515 {
516 _init();
517 assign(w_str, length);
518 }
519#endif
520 //--------------------------------------------------------------------------
521 UString::UString(const std::wstring& wstr)
522 {
523 _init();
524 assign(wstr);
525 }
526 //--------------------------------------------------------------------------
528 {
529 _init();
530 assign(c_str, std::strlen(c_str));
531 }
532 //--------------------------------------------------------------------------
534 {
535 _init();
537 }
538 //--------------------------------------------------------------------------
539 UString::UString(const std::string& str)
540 {
541 _init();
542 assign(str.data(), str.size());
543 }
544 //--------------------------------------------------------------------------
546 {
547 _init();
548 assign(str);
549 }
550 //--------------------------------------------------------------------------
552 {
553 _cleanBuffer();
554 }
555 //--------------------------------------------------------------------------
557 {
558 return mData.size();
559 }
560 //--------------------------------------------------------------------------
562 {
563 return size();
564 }
565 //--------------------------------------------------------------------------
567 {
568 const_iterator i = begin();
569 const_iterator ie = end();
570 size_type c = 0;
571 while (i != ie)
572 {
573 i.moveNext();
574 ++c;
575 }
576 return c;
577 }
578 //--------------------------------------------------------------------------
580 {
581 return mData.max_size();
582 }
583 //--------------------------------------------------------------------------
585 {
586 mData.reserve(size);
587 }
588 //--------------------------------------------------------------------------
589 void UString::resize(size_type num, const code_point& val /*= 0 */)
590 {
591 mData.resize(num, val);
592 }
593 //--------------------------------------------------------------------------
595 {
596 mData.swap(from.mData);
597 }
598 //--------------------------------------------------------------------------
599 bool UString::empty() const
600 {
601 return mData.empty();
602 }
603 //--------------------------------------------------------------------------
605 {
606 return mData.c_str();
607 }
608 //--------------------------------------------------------------------------
610 {
611 return c_str();
612 }
613 //--------------------------------------------------------------------------
615 {
616 return mData.capacity();
617 }
618 //--------------------------------------------------------------------------
620 {
621 mData.clear();
622 }
623 //--------------------------------------------------------------------------
624 UString UString::substr(size_type index, size_type num /*= npos */) const
625 {
626 // this could avoid the extra copy if we used a private specialty constructor
627 dstring data = mData.substr(index, num);
628 UString tmp;
629 tmp.mData.swap(data);
630 return tmp;
631 }
632 //--------------------------------------------------------------------------
634 {
635 code_point cp[2];
636 size_t c = _utf32_to_utf16(val, cp);
637 if (c > 0)
638 push_back(cp[0]);
639 if (c > 1)
640 push_back(cp[1]);
641 }
642 //--------------------------------------------------------------------------
643#if MYGUI_IS_NATIVE_WCHAR_T
644 void UString::push_back(wchar_t val)
645 {
646 // we do this because the Unicode method still preserves UTF-16 code points
647 mData.push_back(static_cast<code_point>(val));
648 }
649#endif
650 //--------------------------------------------------------------------------
652 {
653 mData.push_back(val);
654 }
655
656 void UString::push_back(char val)
657 {
658 mData.push_back(static_cast<code_point>(val));
659 }
660
662 {
664 const_iterator ie = end();
665 for (i = begin(); i != ie; i.moveNext())
666 {
667 if (i.getCharacter() == ch)
668 return true;
669 }
670 return false;
671 }
672
673 const std::string& UString::asUTF8() const
674 {
675 _load_buffer_UTF8();
676 return *m_buffer.mStrBuffer;
677 }
678
679 const char* UString::asUTF8_c_str() const
680 {
681 _load_buffer_UTF8();
682 return m_buffer.mStrBuffer->c_str();
683 }
684
686 {
687 _load_buffer_UTF32();
688 return *m_buffer.mUTF32StrBuffer;
689 }
690
692 {
693 _load_buffer_UTF32();
694 return m_buffer.mUTF32StrBuffer->c_str();
695 }
696
697 const std::wstring& UString::asWStr() const
698 {
699 _load_buffer_WStr();
700 return *m_buffer.mWStrBuffer;
701 }
702
703 const wchar_t* UString::asWStr_c_str() const
704 {
705 _load_buffer_WStr();
706 return m_buffer.mWStrBuffer->c_str();
707 }
708
710 {
711 return mData.at(loc);
712 }
713
715 {
716 return mData.at(loc);
717 }
718
720 {
721 const code_point* ptr = c_str();
722 unicode_char uc;
723 size_t l = _utf16_char_length(ptr[loc]);
724 code_point cp[2] = {/* blame the code beautifier */
725 0,
726 0};
727 cp[0] = ptr[loc];
728
729 if (l == 2 && (loc + 1) < mData.length())
730 {
731 cp[1] = ptr[loc + 1];
732 }
733 _utf16_to_utf32(cp, uc);
734 return uc;
735 }
736
738 {
739 code_point cp[2] = {/* blame the code beautifier */
740 0,
741 0};
742 size_t l = _utf32_to_utf16(ch, cp);
743 unicode_char existingChar = getChar(loc);
744 size_t existingSize = _utf16_char_length(existingChar);
745 size_t newSize = _utf16_char_length(ch);
746
747 if (newSize > existingSize)
748 {
749 at(loc) = cp[0];
750 insert(loc + 1, 1, cp[1]);
751 return 1;
752 }
753 if (newSize < existingSize)
754 {
755 erase(loc, 1);
756 at(loc) = cp[0];
757 return -1;
758 }
759
760 // newSize == existingSize
761 at(loc) = cp[0];
762 if (l == 2)
763 at(loc + 1) = cp[1];
764 return 0;
765 }
766
768 {
769 iterator i;
770 i.mIter = mData.begin();
771 i.mString = this;
772 return i;
773 }
774
776 {
778 i.mIter = const_cast<UString*>(this)->mData.begin();
779 i.mString = const_cast<UString*>(this);
780 return i;
781 }
782
784 {
785 iterator i;
786 i.mIter = mData.end();
787 i.mString = this;
788 return i;
789 }
790
792 {
794 i.mIter = const_cast<UString*>(this)->mData.end();
795 i.mString = const_cast<UString*>(this);
796 return i;
797 }
798
800 {
802 i.mIter = mData.end();
803 i.mString = this;
804 return i;
805 }
806
808 {
810 i.mIter = const_cast<UString*>(this)->mData.end();
811 i.mString = const_cast<UString*>(this);
812 return i;
813 }
814
816 {
818 i.mIter = mData.begin();
819 i.mString = this;
820 return i;
821 }
822
824 {
826 i.mIter = const_cast<UString*>(this)->mData.begin();
827 i.mString = const_cast<UString*>(this);
828 return i;
829 }
830
832 {
833 mData.assign(start.mIter, end.mIter);
834 return *this;
835 }
836
838 {
839 mData.assign(str.mData);
840 return *this;
841 }
842
844 {
845 mData.assign(str);
846 return *this;
847 }
848
850 {
851 mData.assign(str, num);
852 return *this;
853 }
854
856 {
857 mData.assign(str.mData, index, len);
858 return *this;
859 }
860
862 {
863 mData.assign(num, ch);
864 return *this;
865 }
866
867 UString& UString::assign(const std::wstring& wstr)
868 {
869 mData.clear();
870 mData.reserve(wstr.length()); // best guess bulk allocate
871#ifdef WCHAR_UTF16 // if we're already working in UTF-16, this is easy
872 code_point tmp;
873 std::wstring::const_iterator i, ie = wstr.end();
874 for (i = wstr.begin(); i != ie; i++)
875 {
876 tmp = static_cast<code_point>(*i);
877 mData.push_back(tmp);
878 }
879#else // otherwise we do it the safe way (which is still 100% safe to pass UTF-16 through, just slower)
880 code_point cp[3] = {0, 0, 0};
881 unicode_char tmp;
882 std::wstring::const_iterator i;
883 std::wstring::const_iterator ie = wstr.end();
884 for (i = wstr.begin(); i != ie; i++)
885 {
886 tmp = static_cast<unicode_char>(*i);
887 size_t l = _utf32_to_utf16(tmp, cp);
888 if (l > 0)
889 mData.push_back(cp[0]);
890 if (l > 1)
891 mData.push_back(cp[1]);
892 }
893#endif
894 return *this;
895 }
896
897#if MYGUI_IS_NATIVE_WCHAR_T
898 UString& UString::assign(const wchar_t* w_str)
899 {
900 std::wstring tmp;
901 tmp.assign(w_str);
902 return assign(tmp);
903 }
904
905 UString& UString::assign(const wchar_t* w_str, size_type num)
906 {
907 std::wstring tmp;
908 tmp.assign(w_str, num);
909 return assign(tmp);
910 }
911#endif
912
914 {
915 for (const auto& character : str)
916 {
917 push_back(character);
918 }
919 return *this;
920 }
921
923 {
925 clear(); // empty our contents, if there are any
926 reserve(len); // best guess bulk capacity growth
927
928 // This is a 3 step process, converting each byte in the UTF-8 stream to UTF-32,
929 // then converting it to UTF-16, then finally appending the data buffer
930
931 unicode_char uc; // temporary Unicode character buffer
932 unsigned char utf8buf[7]; // temporary UTF-8 buffer
933 utf8buf[6] = 0;
934 size_t utf8len; // UTF-8 length
935 code_point utf16buff[3]; // temporary UTF-16 buffer
936 utf16buff[2] = 0;
937 size_t utf16len; // UTF-16 length
938
939 for (size_type i = 0; i < num; ++i)
940 {
941 utf8len =
942 std::min(_utf8_char_length(static_cast<unsigned char>(c_str[i])), num - i); // estimate bytes to load
943 for (size_t j = 0; j < utf8len; j++)
944 { // load the needed UTF-8 bytes
945 utf8buf[j] = (static_cast<unsigned char>(
946 c_str
947 [i +
948 j])); // we don't increment 'i' here just in case the estimate is wrong (shouldn't happen, but we're being careful)
949 }
950 utf8buf[utf8len] = 0; // nul terminate so we throw an exception before running off the end of the buffer
951 utf8len = _utf8_to_utf32(utf8buf, uc); // do the UTF-8 -> UTF-32 conversion
952 i += utf8len - 1; // we subtract 1 for the increment of the 'for' loop
953
954 utf16len = _utf32_to_utf16(uc, utf16buff); // UTF-32 -> UTF-16 conversion
955 append(utf16buff, utf16len); // append the characters to the string
956 }
957 return *this;
958 }
959
961 {
962 mData.append(str.mData);
963 return *this;
964 }
965
967 {
968 mData.append(str);
969 return *this;
970 }
971
973 {
974 mData.append(str.mData, index, len);
975 return *this;
976 }
977
979 {
980 mData.append(str, num);
981 return *this;
982 }
983
985 {
986 mData.append(num, ch);
987 return *this;
988 }
989
991 {
992 mData.append(start.mIter, end.mIter);
993 return *this;
994 }
995
996#if MYGUI_IS_NATIVE_WCHAR_T
997 UString& UString::append(const wchar_t* w_str, size_type num)
998 {
999 std::wstring tmp(w_str, num);
1000 return append(tmp);
1001 }
1002
1003 UString& UString::append(size_type num, wchar_t ch)
1004 {
1005 return append(num, static_cast<unicode_char>(ch));
1006 }
1007#endif
1009 {
1010 UString tmp(c_str, num);
1011 append(tmp);
1012 return *this;
1013 }
1014
1016 {
1017 append(num, static_cast<code_point>(ch));
1018 return *this;
1019 }
1020
1022 {
1023 code_point cp[2] = {0, 0};
1024 if (_utf32_to_utf16(ch, cp) == 2)
1025 {
1026 for (size_type i = 0; i < num; i++)
1027 {
1028 append(1, cp[0]);
1029 append(1, cp[1]);
1030 }
1031 }
1032 else
1033 {
1034 for (size_type i = 0; i < num; i++)
1035 {
1036 append(1, cp[0]);
1037 }
1038 }
1039 return *this;
1040 }
1041
1043 {
1044 iterator ret;
1045 ret.mIter = mData.insert(i.mIter, ch);
1046 ret.mString = this;
1047 return ret;
1048 }
1049
1051 {
1052 mData.insert(index, str.mData);
1053 return *this;
1054 }
1055
1057 {
1058 mData.insert(index1, str.mData, index2, num);
1059 return *this;
1060 }
1061
1063 {
1064 mData.insert(i.mIter, start.mIter, end.mIter);
1065 }
1066
1068 {
1069 mData.insert(index, str, num);
1070 return *this;
1071 }
1072
1073#if MYGUI_IS_NATIVE_WCHAR_T
1074 UString& UString::insert(size_type index, const wchar_t* w_str, size_type num)
1075 {
1076 UString tmp(w_str, num);
1077 insert(index, tmp);
1078 return *this;
1079 }
1080#endif
1081
1083 {
1084 UString tmp(c_str, num);
1085 insert(index, tmp);
1086 return *this;
1087 }
1088
1090 {
1091 mData.insert(index, num, ch);
1092 return *this;
1093 }
1094
1095#if MYGUI_IS_NATIVE_WCHAR_T
1096 UString& UString::insert(size_type index, size_type num, wchar_t ch)
1097 {
1098 insert(index, num, static_cast<unicode_char>(ch));
1099 return *this;
1100 }
1101#endif
1102
1104 {
1105 insert(index, num, static_cast<code_point>(ch));
1106 return *this;
1107 }
1108
1110 {
1111 code_point cp[3] = {0, 0, 0};
1112 size_t l = _utf32_to_utf16(ch, cp);
1113 if (l == 1)
1114 {
1115 return insert(index, num, cp[0]);
1116 }
1117 for (size_type c = 0; c < num; c++)
1118 {
1119 // insert in reverse order to preserve ordering after insert
1120 insert(index, 1, cp[1]);
1121 insert(index, 1, cp[0]);
1122 }
1123 return *this;
1124 }
1125
1127 {
1128 mData.insert(i.mIter, num, ch);
1129 }
1130#if MYGUI_IS_NATIVE_WCHAR_T
1131 void UString::insert(iterator i, size_type num, const wchar_t& ch)
1132 {
1133 insert(i, num, static_cast<unicode_char>(ch));
1134 }
1135#endif
1136
1137 void UString::insert(iterator i, size_type num, const char& ch)
1138 {
1139 insert(i, num, static_cast<code_point>(ch));
1140 }
1141
1143 {
1144 code_point cp[3] = {0, 0, 0};
1145 size_t l = _utf32_to_utf16(ch, cp);
1146 if (l == 1)
1147 {
1148 insert(i, num, cp[0]);
1149 }
1150 else
1151 {
1152 for (size_type c = 0; c < num; c++)
1153 {
1154 // insert in reverse order to preserve ordering after insert
1155 insert(i, 1, cp[1]);
1156 insert(i, 1, cp[0]);
1157 }
1158 }
1159 }
1160
1162 {
1163 iterator ret;
1164 ret.mIter = mData.erase(loc.mIter);
1165 ret.mString = this;
1166 return ret;
1167 }
1168
1170 {
1171 iterator ret;
1172 ret.mIter = mData.erase(start.mIter, end.mIter);
1173 ret.mString = this;
1174 return ret;
1175 }
1176
1177 UString& UString::erase(size_type index /*= 0*/, size_type num /*= npos */)
1178 {
1179 if (num == npos)
1180 mData.erase(index);
1181 else
1182 mData.erase(index, num);
1183 return *this;
1184 }
1185
1187 {
1188 mData.replace(index1, num1, str.mData, 0, npos);
1189 return *this;
1190 }
1191
1193 {
1194 mData.replace(index1, num1, str.mData, 0, num2);
1195 return *this;
1196 }
1197
1198 UString& UString::replace(size_type index1, size_type num1, const UString& str, size_type index2, size_type num2)
1199 {
1200 mData.replace(index1, num1, str.mData, index2, num2);
1201 return *this;
1202 }
1203
1204 UString& UString::replace(iterator start, iterator end, const UString& str, size_type num /*= npos */)
1205 {
1206 _const_fwd_iterator st(start); //Work around for gcc, allow it to find correct overload
1207
1208 size_type index1 = begin() - st;
1209 size_type num1 = end - st;
1210 return replace(index1, num1, str, 0, num);
1211 }
1212
1214 {
1215 mData.replace(index, num1, num2, ch);
1216 return *this;
1217 }
1218
1220 {
1221 _const_fwd_iterator st(start); //Work around for gcc, allow it to find correct overload
1222
1223 size_type index1 = begin() - st;
1224 size_type num1 = end - st;
1225 return replace(index1, num1, num, ch);
1226 }
1227
1228 int UString::compare(const UString& str) const
1229 {
1230 return mData.compare(str.mData);
1231 }
1232
1233 int UString::compare(const code_point* str) const
1234 {
1235 return mData.compare(str);
1236 }
1237
1238 int UString::compare(size_type index, size_type length, const UString& str) const
1239 {
1240 return mData.compare(index, length, str.mData);
1241 }
1242
1243 int UString::compare(size_type index, size_type length, const UString& str, size_type index2, size_type length2)
1244 const
1245 {
1246 return mData.compare(index, length, str.mData, index2, length2);
1247 }
1248
1249 int UString::compare(size_type index, size_type length, const code_point* str, size_type length2) const
1250 {
1251 return mData.compare(index, length, str, length2);
1252 }
1253
1254#if MYGUI_IS_NATIVE_WCHAR_T
1255 int UString::compare(size_type index, size_type length, const wchar_t* w_str, size_type length2) const
1256 {
1257 UString tmp(w_str, length2);
1258 return compare(index, length, tmp);
1259 }
1260#endif
1261
1262 int UString::compare(size_type index, size_type length, const char* c_str, size_type length2) const
1263 {
1264 UString tmp(c_str, length2);
1265 return compare(index, length, tmp);
1266 }
1267
1268 UString::size_type UString::find(const UString& str, size_type index /*= 0 */) const
1269 {
1270 return mData.find(str.c_str(), index);
1271 }
1272
1274 {
1275 UString tmp(cp_str);
1276 return mData.find(tmp.c_str(), index, length);
1277 }
1278
1280 {
1281 UString tmp(c_str);
1282 return mData.find(tmp.c_str(), index, length);
1283 }
1284
1285#if MYGUI_IS_NATIVE_WCHAR_T
1286 UString::size_type UString::find(const wchar_t* w_str, size_type index, size_type length) const
1287 {
1288 UString tmp(w_str);
1289 return mData.find(tmp.c_str(), index, length);
1290 }
1291#endif
1292
1293 UString::size_type UString::find(char ch, size_type index /*= 0 */) const
1294 {
1295 return find(static_cast<code_point>(ch), index);
1296 }
1297
1299 {
1300 return mData.find(ch, index);
1301 }
1302
1303#if MYGUI_IS_NATIVE_WCHAR_T
1304 UString::size_type UString::find(wchar_t ch, size_type index /*= 0 */) const
1305 {
1306 return find(static_cast<unicode_char>(ch), index);
1307 }
1308#endif
1309
1311 {
1312 code_point cp[3] = {0, 0, 0};
1313 size_t l = _utf32_to_utf16(ch, cp);
1314 return find(UString(cp, l), index);
1315 }
1316
1317 UString::size_type UString::rfind(const UString& str, size_type index /*= 0 */) const
1318 {
1319 return mData.rfind(str.c_str(), index);
1320 }
1321
1323 {
1324 UString tmp(cp_str);
1325 return mData.rfind(tmp.c_str(), index, num);
1326 }
1327
1329 {
1330 UString tmp(c_str);
1331 return mData.rfind(tmp.c_str(), index, num);
1332 }
1333
1334#if MYGUI_IS_NATIVE_WCHAR_T
1335 UString::size_type UString::rfind(const wchar_t* w_str, size_type index, size_type num) const
1336 {
1337 UString tmp(w_str);
1338 return mData.rfind(tmp.c_str(), index, num);
1339 }
1340#endif
1341
1342 UString::size_type UString::rfind(char ch, size_type index /*= 0 */) const
1343 {
1344 return rfind(static_cast<code_point>(ch), index);
1345 }
1346
1348 {
1349 return mData.rfind(ch, index);
1350 }
1351
1352#if MYGUI_IS_NATIVE_WCHAR_T
1353 UString::size_type UString::rfind(wchar_t ch, size_type index /*= 0 */) const
1354 {
1355 return rfind(static_cast<unicode_char>(ch), index);
1356 }
1357#endif
1358
1360 {
1361 code_point cp[3] = {0, 0, 0};
1362 size_t l = _utf32_to_utf16(ch, cp);
1363 return rfind(UString(cp, l), index);
1364 }
1365
1366 UString::size_type UString::find_first_of(const UString& str, size_type index /*= 0*/, size_type num /*= npos */)
1367 const
1368 {
1369 size_type i = 0;
1370 const size_type len = length();
1371 while (i < num && (index + i) < len)
1372 {
1373 unicode_char ch = getChar(index + i);
1374 if (str.inString(ch))
1375 return index + i;
1376 i += _utf16_char_length(ch); // increment by the Unicode character length
1377 }
1378 return npos;
1379 }
1380
1382 {
1383 UString tmp;
1384 tmp.assign(1, ch);
1385 return find_first_of(tmp, index);
1386 }
1387
1389 {
1390 return find_first_of(static_cast<code_point>(ch), index);
1391 }
1392
1393#if MYGUI_IS_NATIVE_WCHAR_T
1394 UString::size_type UString::find_first_of(wchar_t ch, size_type index /*= 0 */) const
1395 {
1396 return find_first_of(static_cast<unicode_char>(ch), index);
1397 }
1398#endif
1399
1401 {
1402 code_point cp[3] = {0, 0, 0};
1403 size_t l = _utf32_to_utf16(ch, cp);
1404 return find_first_of(UString(cp, l), index);
1405 }
1406
1408 const UString& str,
1409 size_type index /*= 0*/,
1410 size_type num /*= npos */) const
1411 {
1412 size_type i = 0;
1413 const size_type len = length();
1414 while (i < num && (index + i) < len)
1415 {
1416 unicode_char ch = getChar(index + i);
1417 if (!str.inString(ch))
1418 return index + i;
1419 i += _utf16_char_length(ch); // increment by the Unicode character length
1420 }
1421 return npos;
1422 }
1423
1425 {
1426 UString tmp;
1427 tmp.assign(1, ch);
1428 return find_first_not_of(tmp, index);
1429 }
1430
1432 {
1433 return find_first_not_of(static_cast<code_point>(ch), index);
1434 }
1435
1436#if MYGUI_IS_NATIVE_WCHAR_T
1437 UString::size_type UString::find_first_not_of(wchar_t ch, size_type index /*= 0 */) const
1438 {
1439 return find_first_not_of(static_cast<unicode_char>(ch), index);
1440 }
1441#endif
1442
1444 {
1445 code_point cp[3] = {0, 0, 0};
1446 size_t l = _utf32_to_utf16(ch, cp);
1447 return find_first_not_of(UString(cp, l), index);
1448 }
1449
1450 UString::size_type UString::find_last_of(const UString& str, size_type index /*= npos*/, size_type num /*= npos */)
1451 const
1452 {
1453 size_type i = 0;
1454 const size_type len = length();
1455 if (index > len)
1456 index = len - 1;
1457
1458 while (i < num && (index - i) != npos)
1459 {
1460 size_type j = index - i;
1461 // careful to step full Unicode characters
1462 if (j != 0 && _utf16_surrogate_follow(at(j)) && _utf16_surrogate_lead(at(j - 1)))
1463 {
1464 j = index - ++i;
1465 }
1466 // and back to the usual dull test
1467 unicode_char ch = getChar(j);
1468 if (str.inString(ch))
1469 return j;
1470 i++;
1471 }
1472 return npos;
1473 }
1474
1476 {
1477 UString tmp;
1478 tmp.assign(1, ch);
1479 return find_last_of(tmp, index);
1480 }
1481
1482#if MYGUI_IS_NATIVE_WCHAR_T
1483 UString::size_type UString::find_last_of(wchar_t ch, size_type index /*= npos */) const
1484 {
1485 return find_last_of(static_cast<unicode_char>(ch), index);
1486 }
1487#endif
1488
1490 {
1491 code_point cp[3] = {0, 0, 0};
1492 size_t l = _utf32_to_utf16(ch, cp);
1493 return find_last_of(UString(cp, l), index);
1494 }
1495
1497 const UString& str,
1498 size_type index /*= npos*/,
1499 size_type num /*= npos */) const
1500 {
1501 size_type i = 0;
1502 const size_type len = length();
1503 if (index > len)
1504 index = len - 1;
1505
1506 while (i < num && (index - i) != npos)
1507 {
1508 size_type j = index - i;
1509 // careful to step full Unicode characters
1510 if (j != 0 && _utf16_surrogate_follow(at(j)) && _utf16_surrogate_lead(at(j - 1)))
1511 {
1512 j = index - ++i;
1513 }
1514 // and back to the usual dull test
1515 unicode_char ch = getChar(j);
1516 if (!str.inString(ch))
1517 return j;
1518 i++;
1519 }
1520 return npos;
1521 }
1522
1524 {
1525 UString tmp;
1526 tmp.assign(1, ch);
1527 return find_last_not_of(tmp, index);
1528 }
1529
1531 {
1532 return find_last_not_of(static_cast<code_point>(ch), index);
1533 }
1534
1535#if MYGUI_IS_NATIVE_WCHAR_T
1536 UString::size_type UString::find_last_not_of(wchar_t ch, size_type index /*= npos */) const
1537 {
1538 return find_last_not_of(static_cast<unicode_char>(ch), index);
1539 }
1540#endif
1541
1543 {
1544 code_point cp[3] = {0, 0, 0};
1545 size_t l = _utf32_to_utf16(ch, cp);
1546 return find_last_not_of(UString(cp, l), index);
1547 }
1548
1549 bool UString::operator<(const UString& right) const
1550 {
1551 return compare(right) < 0;
1552 }
1553
1554 bool UString::operator<=(const UString& right) const
1555 {
1556 return compare(right) <= 0;
1557 }
1558
1560 {
1561 return assign(s);
1562 }
1563
1565 {
1566 clear();
1567 return append(1, ch);
1568 }
1569
1571 {
1572 clear();
1573 return append(1, ch);
1574 }
1575
1576#if MYGUI_IS_NATIVE_WCHAR_T
1577 UString& UString::operator=(wchar_t ch)
1578 {
1579 clear();
1580 return append(1, ch);
1581 }
1582#endif
1583
1585 {
1586 clear();
1587 return append(1, ch);
1588 }
1589
1590 bool UString::operator>(const UString& right) const
1591 {
1592 return compare(right) > 0;
1593 }
1594
1595 bool UString::operator>=(const UString& right) const
1596 {
1597 return compare(right) >= 0;
1598 }
1599
1600 bool UString::operator==(const UString& right) const
1601 {
1602 return compare(right) == 0;
1603 }
1604
1605 bool UString::operator!=(const UString& right) const
1606 {
1607 return !operator==(right);
1608 }
1609
1611 {
1612 return at(index);
1613 }
1614
1616 {
1617 return at(index);
1618 }
1619
1620 UString::operator std::string() const
1621 {
1622 return asUTF8();
1623 }
1624
1626 UString::operator std::wstring() const
1627 {
1628 return asWStr();
1629 }
1630
1631
1633 {
1634 // tests if the cp is within the surrogate pair range
1635 // everything else is a standalone code point, ot it matches a surrogate pair signature
1636 return 0xD800 > cp || cp > 0xDFFF;
1637 }
1638
1640 {
1641 // tests if the cp is within the 2nd word of a surrogate pair
1642 // it is a 1st word, or it isn't
1643 return 0xD800 <= cp && cp <= 0xDBFF;
1644 }
1645
1647 {
1648 // tests if the cp is within the 2nd word of a surrogate pair
1649 // it is a 2nd word, everything else isn't
1650 return 0xDC00 <= cp && cp <= 0xDFFF;
1651 }
1652
1654 {
1655 if (0xD800 <= cp && cp <= 0xDBFF) // test if cp is the beginning of a surrogate pair
1656 return 2; // if it is, then we are 2 words long
1657 return 1; // otherwise we are only 1 word long
1658 }
1659
1661 {
1662 if (uc > 0xFFFF) // test if uc is greater than the single word maximum
1663 return 2; // if so, we need a surrogate pair
1664 return 1; // otherwise we can stuff it into a single word
1665 }
1666
1667 size_t UString::_utf16_to_utf32(const code_point in_cp[2], unicode_char& out_uc)
1668 {
1669 const code_point& cp1 = in_cp[0];
1670 const code_point& cp2 = in_cp[1];
1671 bool wordPair = false;
1672
1673 // does it look like a surrogate pair?
1674 if (0xD800 <= cp1 && cp1 <= 0xDBFF)
1675 {
1676 // looks like one, but does the other half match the algorithm as well?
1677 if (0xDC00 <= cp2 && cp2 <= 0xDFFF)
1678 wordPair = true; // yep!
1679 }
1680
1681 if (!wordPair)
1682 { // if we aren't a 100% authentic surrogate pair, then just copy the value
1683 out_uc = cp1;
1684 return 1;
1685 }
1686
1687 unsigned short cU = cp1;
1688 unsigned short cL = cp2; // copy upper and lower words of surrogate pair to writable buffers
1689 cU -= 0xD800; // remove the encoding markers
1690 cL -= 0xDC00;
1691
1692 out_uc = (cU & 0x03FF) << 10; // grab the 10 upper bits and set them in their proper location
1693 out_uc |= (cL & 0x03FF); // combine in the lower 10 bits
1694 out_uc += 0x10000; // add back in the value offset
1695
1696 return 2; // this whole operation takes to words, so that's what we'll return
1697 }
1698
1699 size_t UString::_utf32_to_utf16(const unicode_char& in_uc, code_point out_cp[2])
1700 {
1701 if (in_uc <= 0xFFFF)
1702 { // we blindly preserve sentinel values because our decoder understands them
1703 out_cp[0] = static_cast<code_point>(in_uc);
1704 return 1;
1705 }
1706 unicode_char uc = in_uc; // copy to writable buffer
1707 unsigned short tmp; // single code point buffer
1708 uc -= 0x10000; // subtract value offset
1709
1710 //process upper word
1711 tmp = static_cast<unsigned short>((uc >> 10) & 0x03FF); // grab the upper 10 bits
1712 tmp += 0xD800; // add encoding offset
1713 out_cp[0] = tmp; // write
1714
1715 // process lower word
1716 tmp = static_cast<unsigned short>(uc & 0x03FF); // grab the lower 10 bits
1717 tmp += 0xDC00; // add encoding offset
1718 out_cp[1] = tmp; // write
1719
1720 return 2; // return used word count (2 for surrogate pairs)
1721 }
1722
1723 bool UString::_utf8_start_char(unsigned char cp)
1724 {
1725 return (cp & ~_cont_mask) != _cont;
1726 }
1727
1728 size_t UString::_utf8_char_length(unsigned char cp)
1729 {
1730 if (!(cp & 0x80))
1731 return 1;
1732 if ((cp & ~_lead1_mask) == _lead1)
1733 return 2;
1734 if ((cp & ~_lead2_mask) == _lead2)
1735 return 3;
1736 if ((cp & ~_lead3_mask) == _lead3)
1737 return 4;
1738 if ((cp & ~_lead4_mask) == _lead4)
1739 return 5;
1740 if ((cp & ~_lead5_mask) == _lead5)
1741 return 6;
1742
1743 return 1;
1744 //throw invalid_data( "invalid UTF-8 sequence header value" );
1745 }
1746
1748 {
1749 /*
1750 7 bit: U-00000000 - U-0000007F: 0xxxxxxx
1751 11 bit: U-00000080 - U-000007FF: 110xxxxx 10xxxxxx
1752 16 bit: U-00000800 - U-0000FFFF: 1110xxxx 10xxxxxx 10xxxxxx
1753 21 bit: U-00010000 - U-001FFFFF: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
1754 26 bit: U-00200000 - U-03FFFFFF: 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
1755 31 bit: U-04000000 - U-7FFFFFFF: 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
1756 */
1757 if (!(uc & ~0x0000007F))
1758 return 1;
1759 if (!(uc & ~0x000007FF))
1760 return 2;
1761 if (!(uc & ~0x0000FFFF))
1762 return 3;
1763 if (!(uc & ~0x001FFFFF))
1764 return 4;
1765 if (!(uc & ~0x03FFFFFF))
1766 return 5;
1767 if (!(uc & ~0x7FFFFFFF))
1768 return 6;
1769
1770 return 1;
1771 //throw invalid_data( "invalid UTF-32 value" );
1772 }
1773
1774 size_t UString::_utf8_to_utf32(const unsigned char in_cp[6], unicode_char& out_uc)
1775 {
1776 size_t len = _utf8_char_length(in_cp[0]);
1777 if (len == 1)
1778 { // if we are only 1 byte long, then just grab it and exit
1779 out_uc = in_cp[0];
1780 return 1;
1781 }
1782
1783 unicode_char c = 0; // temporary buffer
1784 size_t i = 0;
1785 switch (len)
1786 { // load header byte
1787 case 6: c = in_cp[i] & _lead5_mask; break;
1788 case 5: c = in_cp[i] & _lead4_mask; break;
1789 case 4: c = in_cp[i] & _lead3_mask; break;
1790 case 3: c = in_cp[i] & _lead2_mask; break;
1791 case 2: c = in_cp[i] & _lead1_mask; break;
1792 default: break;
1793 }
1794
1795 // load each continuation byte
1796 for (++i; i < len; i++)
1797 {
1798 if ((in_cp[i] & ~_cont_mask) != _cont)
1799 {
1800 //throw invalid_data( "bad UTF-8 continuation byte" );
1801 out_uc = in_cp[0];
1802 return 1;
1803 }
1804 c <<= 6;
1805 c |= (in_cp[i] & _cont_mask);
1806 }
1807
1808 out_uc = c; // write the final value and return the used byte length
1809 return len;
1810 }
1811
1812 size_t UString::_utf32_to_utf8(const unicode_char& in_uc, unsigned char out_cp[6])
1813 {
1814 size_t len = _utf8_char_length(in_uc); // predict byte length of sequence
1815 unicode_char c = in_uc; // copy to temp buffer
1816
1817 //stuff all of the lower bits
1818 for (size_t i = len - 1; i > 0; i--)
1819 {
1820 out_cp[i] = static_cast<unsigned char>(((c)&_cont_mask) | _cont);
1821 c >>= 6;
1822 }
1823
1824 //now write the header byte
1825 switch (len)
1826 {
1827 case 6: out_cp[0] = static_cast<unsigned char>(((c)&_lead5_mask) | _lead5); break;
1828 case 5: out_cp[0] = static_cast<unsigned char>(((c)&_lead4_mask) | _lead4); break;
1829 case 4: out_cp[0] = static_cast<unsigned char>(((c)&_lead3_mask) | _lead3); break;
1830 case 3: out_cp[0] = static_cast<unsigned char>(((c)&_lead2_mask) | _lead2); break;
1831 case 2: out_cp[0] = static_cast<unsigned char>(((c)&_lead1_mask) | _lead1); break;
1832 case 1:
1833 default: out_cp[0] = static_cast<unsigned char>((c)&0x7F); break;
1834 }
1835
1836 // return the byte length of the sequence
1837 return len;
1838 }
1839
1841 {
1842 std::string_view tmp(reinterpret_cast<const char*>(c_str));
1843 return _verifyUTF8(tmp);
1844 }
1845
1847 {
1848 size_type length = 0;
1849
1850 for (size_type i = 0; i < num; ++i)
1851 {
1852 // characters pass until we find an extended sequence
1853 if (c_str[i] & 0x80)
1854 {
1855 if (i + 1 >= num) // invalid extended sequence
1856 return num;
1857
1858 unsigned char c = c_str[i];
1859 size_t contBytes = 0;
1860
1861 // get continuation byte count and test for overlong sequences
1862 if ((c & ~_lead1_mask) == _lead1)
1863 { // 1 additional byte
1864 if (c == _lead1)
1865 {
1866 //throw invalid_data( "overlong UTF-8 sequence" );
1867 return num;
1868 }
1869 contBytes = 1;
1870 }
1871 else if ((c & ~_lead2_mask) == _lead2)
1872 { // 2 additional bytes
1873 contBytes = 2;
1874 if (c == _lead2)
1875 { // possible overlong UTF-8 sequence
1876 c = c_str[i + 1]; // look ahead to next byte in sequence
1877 if ((c & _lead2) == _cont)
1878 {
1879 //throw invalid_data( "overlong UTF-8 sequence" );
1880 return num;
1881 }
1882 }
1883 }
1884 else if ((c & ~_lead3_mask) == _lead3)
1885 { // 3 additional bytes
1886 contBytes = 3;
1887 if (c == _lead3)
1888 { // possible overlong UTF-8 sequence
1889 c = c_str[i + 1]; // look ahead to next byte in sequence
1890 if ((c & _lead3) == _cont)
1891 {
1892 //throw invalid_data( "overlong UTF-8 sequence" );
1893 return num;
1894 }
1895 }
1896 }
1897 else if ((c & ~_lead4_mask) == _lead4)
1898 { // 4 additional bytes
1899 contBytes = 4;
1900 if (c == _lead4)
1901 { // possible overlong UTF-8 sequence
1902 c = c_str[i + 1]; // look ahead to next byte in sequence
1903 if ((c & _lead4) == _cont)
1904 {
1905 //throw invalid_data( "overlong UTF-8 sequence" );
1906 return num;
1907 }
1908 }
1909 }
1910 else if ((c & ~_lead5_mask) == _lead5)
1911 { // 5 additional bytes
1912 contBytes = 5;
1913 if (c == _lead5)
1914 { // possible overlong UTF-8 sequence
1915 c = c_str[i + 1]; // look ahead to next byte in sequence
1916 if ((c & _lead5) == _cont)
1917 {
1918 //throw invalid_data( "overlong UTF-8 sequence" );
1919 return num;
1920 }
1921 }
1922 }
1923 if (i + contBytes >= num) // invalid extended sequence
1924 return num;
1925 // check remaining continuation bytes for
1926 while (contBytes--)
1927 {
1928 c = c_str[++i]; // get next byte in sequence
1929 if ((c & ~_cont_mask) != _cont)
1930 {
1931 //throw invalid_data( "bad UTF-8 continuation byte" );
1932 return num;
1933 }
1934 }
1935 }
1936 length++;
1937 }
1938 return length;
1939 }
1940
1941 void UString::_init()
1942 {
1943 m_buffer.mVoidBuffer = nullptr;
1944 m_bufferType = bt_none;
1945 m_bufferSize = 0;
1946 }
1947
1948 void UString::_cleanBuffer() const
1949 {
1950 if (m_buffer.mVoidBuffer != nullptr)
1951 {
1952 switch (m_bufferType)
1953 {
1954 case bt_string: delete m_buffer.mStrBuffer; break;
1955 case bt_wstring: delete m_buffer.mWStrBuffer; break;
1956 case bt_utf32string: delete m_buffer.mUTF32StrBuffer; break;
1957 case bt_none: // under the worse of circumstances, this is all we can do, and hope it works out
1958 //delete m_buffer.mVoidBuffer;
1959 // delete void* is undefined, don't do that
1960 static_assert("This should never happen - mVoidBuffer should never contain something if we "
1961 "don't know the type");
1962 break;
1963 }
1964 m_buffer.mVoidBuffer = nullptr;
1965 m_bufferSize = 0;
1966 m_bufferType = bt_none;
1967 }
1968 }
1969
1970 void UString::_getBufferStr() const
1971 {
1972 if (m_bufferType != bt_string)
1973 {
1974 _cleanBuffer();
1975 m_buffer.mStrBuffer = new std::string();
1976 m_bufferType = bt_string;
1977 }
1978 m_buffer.mStrBuffer->clear();
1979 }
1980
1981 void UString::_getBufferWStr() const
1982 {
1983 if (m_bufferType != bt_wstring)
1984 {
1985 _cleanBuffer();
1986 m_buffer.mWStrBuffer = new std::wstring();
1987 m_bufferType = bt_wstring;
1988 }
1989 m_buffer.mWStrBuffer->clear();
1990 }
1991
1992 void UString::_getBufferUTF32Str() const
1993 {
1994 if (m_bufferType != bt_utf32string)
1995 {
1996 _cleanBuffer();
1997 m_buffer.mUTF32StrBuffer = new utf32string();
1998 m_bufferType = bt_utf32string;
1999 }
2000 m_buffer.mUTF32StrBuffer->clear();
2001 }
2002
2003 void UString::_load_buffer_UTF8() const
2004 {
2005 _getBufferStr();
2006 std::string& buffer = (*m_buffer.mStrBuffer);
2007 buffer.reserve(length());
2008
2009 unsigned char utf8buf[6];
2010 char* charbuf = (char*)utf8buf;
2011 unicode_char c;
2012 size_t len;
2013
2015 const_iterator ie = end();
2016 for (i = begin(); i != ie; i.moveNext())
2017 {
2018 c = i.getCharacter();
2019 len = _utf32_to_utf8(c, utf8buf);
2020 size_t j = 0;
2021 while (j < len)
2022 buffer.push_back(charbuf[j++]);
2023 }
2024 }
2025
2026 void UString::_load_buffer_WStr() const
2027 {
2028 _getBufferWStr();
2029 std::wstring& buffer = (*m_buffer.mWStrBuffer);
2030 buffer.reserve(length()); // may over reserve, but should be close enough
2031#ifdef WCHAR_UTF16 // wchar_t matches UTF-16
2032 const_iterator i, ie = end();
2033 for (i = begin(); i != ie; ++i)
2034 {
2035 buffer.push_back((wchar_t)(*i));
2036 }
2037#else // wchar_t fits UTF-32
2038 unicode_char c;
2040 const_iterator ie = end();
2041 for (i = begin(); i != ie; i.moveNext())
2042 {
2043 c = i.getCharacter();
2044 buffer.push_back((wchar_t)c);
2045 }
2046#endif
2047 }
2048
2049 void UString::_load_buffer_UTF32() const
2050 {
2051 _getBufferUTF32Str();
2052 utf32string& buffer = (*m_buffer.mUTF32StrBuffer);
2053 buffer.reserve(length()); // may over reserve, but should be close enough
2054
2055 unicode_char c;
2056
2058 const_iterator ie = end();
2059 for (i = begin(); i != ie; i.moveNext())
2060 {
2061 c = i.getCharacter();
2062 buffer.push_back(c);
2063 }
2064 }
2065
2066} // namespace MyGUI
base iterator class for UString
int _setCharacter(unicode_char uc)
void _become(const _base_iterator &i)
void _jump_to(size_type index)
unicode_char _getCharacter() const
const forward iterator for UString
_const_fwd_iterator & operator+=(difference_type n)
addition assignment operator
_const_fwd_iterator & operator++()
pre-increment
_const_fwd_iterator operator-(difference_type n)
subtraction operator
_const_fwd_iterator & operator-=(difference_type n)
subtraction assignment operator
_const_fwd_iterator & moveNext()
advances to the next Unicode character, honoring surrogate pairs in the UTF-16 stream
const value_type & operator[](difference_type n) const
dereference at offset operator
_const_fwd_iterator & operator--()
pre-decrement
_const_fwd_iterator operator+(difference_type n)
addition operator
_const_fwd_iterator & movePrev()
rewinds to the previous Unicode character, honoring surrogate pairs in the UTF-16 stream
unicode_char getCharacter() const
Returns the Unicode value of the character at the current position (decodes surrogate pairs if needed...
const value_type & operator*() const
dereference operator
const reverse iterator for UString
_const_rev_iterator operator-(difference_type n)
subtraction operator
_const_rev_iterator operator+(difference_type n)
addition operator
_const_rev_iterator & operator++()
pre-increment
_const_rev_iterator & operator+=(difference_type n)
addition assignment operator
const value_type & operator[](difference_type n) const
dereference at offset operator
_const_rev_iterator & operator--()
pre-decrement
const value_type & operator*() const
dereference operator
_const_rev_iterator & operator-=(difference_type n)
subtraction assignment operator
forward iterator for UString
unicode_char getCharacter() const
Returns the Unicode value of the character at the current position (decodes surrogate pairs if needed...
_fwd_iterator & operator++()
pre-increment
_fwd_iterator operator-(difference_type n)
subtraction operator
int setCharacter(unicode_char uc)
Sets the Unicode value of the character at the current position (adding a surrogate pair if needed); ...
_fwd_iterator & moveNext()
advances to the next Unicode character, honoring surrogate pairs in the UTF-16 stream
_fwd_iterator & movePrev()
rewinds to the previous Unicode character, honoring surrogate pairs in the UTF-16 stream
_fwd_iterator & operator+=(difference_type n)
addition assignment operator
_fwd_iterator & operator-=(difference_type n)
subtraction assignment operator
_fwd_iterator operator+(difference_type n)
addition operator
value_type & operator*() const
dereference operator
_fwd_iterator & operator--()
pre-decrement
value_type & operator[](difference_type n) const
dereference at offset operator
forward iterator for UString
_rev_iterator & operator+=(difference_type n)
addition assignment operator
_rev_iterator & operator--()
pre-decrement
value_type & operator*() const
dereference operator
_rev_iterator & operator++()
pre-increment
_rev_iterator operator-(difference_type n)
subtraction operator
_rev_iterator & operator-=(difference_type n)
subtraction assignment operator
value_type & operator[](difference_type n) const
dereference at offset operator
_rev_iterator operator+(difference_type n)
addition operator
A UTF-16 string with implicit conversion to/from std::string and std::wstring.
reverse_iterator rend()
returns a reverse iterator just past the beginning of the string
static size_type _verifyUTF8(const unsigned char *c_str)
verifies a UTF-8 stream, returning the total number of Unicode characters found
size_type length() const
Returns the number of code points in the current string.
iterator insert(iterator i, const code_point &ch)
inserts ch before the code point denoted by i
const wchar_t * asWStr_c_str() const
returns the current string in the native form of a nul-terminated wchar_t array
bool operator>(const UString &right) const
greater than operator
size_type size() const
Returns the number of code points in the current string.
static size_t _utf32_to_utf8(const unicode_char &in_uc, unsigned char out_cp[6])
writes the given UTF-32 uc_in to the buffer location out_cp using UTF-8 encoding, returns the number ...
const code_point * data() const
returns a pointer to the first character in the current string
UString()
default constructor, creates an empty string
static size_t _utf8_to_utf32(const unsigned char in_cp[6], unicode_char &out_uc)
converts the given UTF-8 character buffer to a single UTF-32 Unicode character, returns the number of...
const char * asUTF8_c_str() const
returns the current string in UTF-8 form as a nul-terminated char array
bool operator==(const UString &right) const
equality operator
bool operator!=(const UString &right) const
inequality operator
const unicode_char * asUTF32_c_str() const
returns the current string in UTF-32 form as a nul-terminated unicode_char array
size_type find(const UString &str, size_type index=0) const
returns the index of the first occurrence of str within the current string, starting at index; return...
bool operator>=(const UString &right) const
greater than or equal operator
size_type rfind(const UString &str, size_type index=0) const
returns the location of the first occurrence of str in the current string, doing a reverse search fro...
void reserve(size_type size)
sets the capacity of the string to at least size code points
_const_rev_iterator const_reverse_iterator
const reverse iterator
static size_t _utf32_to_utf16(const unicode_char &in_uc, code_point out_cp[2])
writes the given UTF-32 uc_in to the buffer location out_cp using UTF-16 encoding,...
const utf32string & asUTF32() const
returns the current string in UTF-32 form within a utf32string
static size_t _utf16_to_utf32(const code_point in_cp[2], unicode_char &out_uc)
converts the given UTF-16 character buffer in_cp to a single UTF-32 Unicode character out_uc,...
void clear()
deletes all of the elements in the string
int setChar(size_type loc, unicode_char ch)
sets the value of the character at loc to the Unicode value ch (UTF-32)
~UString()
destructor
UString & assign(iterator start, iterator end)
gives the current string the values from start to end
int compare(const UString &str) const
compare str to the current string
code_point value_type
value type typedef for use in iterators
bool operator<=(const UString &right) const
less than or equal operator
std::basic_string< unicode_char > utf32string
string type used for returning UTF-32 formatted data
static bool _utf16_surrogate_follow(code_point cp)
returns true if cp matches the signature of a surrogate pair following character
size_type find_first_of(const UString &str, size_type index=0, size_type num=npos) const
Returns the index of the first character within the current string that matches any character in str,...
static size_t _utf16_char_length(code_point cp)
estimates the number of UTF-16 code points in the sequence starting with cp
iterator erase(iterator loc)
removes the code point pointed to by loc, returning an iterator to the next character
std::basic_string< code_point > dstring
bool operator<(const UString &right) const
less than operator
static bool _utf8_start_char(unsigned char cp)
returns true if cp is the beginning of a UTF-8 sequence
uint16 code_point
a single UTF-16 code point
size_type find_last_of(const UString &str, size_type index=npos, size_type num=npos) const
returns the index of the first character within the current string that matches any character in str,...
static bool _utf16_surrogate_lead(code_point cp)
returns true if cp matches the signature of a surrogate pair lead character
iterator end()
returns an iterator just past the end of the string
unicode_char getChar(size_type loc) const
returns the data point loc evaluated as a UTF-32 value
static bool _utf16_independent_char(code_point cp)
returns true if cp does not match the signature for the lead of follow code point of a surrogate pair...
static const size_type npos
the usual constant representing: not found, no limit, etc
uint32 unicode_char
a single 32-bit Unicode character
UString & operator=(const UString &s)
assignment operator, implicitly casts all compatible types
_fwd_iterator iterator
iterator
const std::wstring & asWStr() const
returns the current string in the native form of std::wstring
bool inString(unicode_char ch) const
returns true if the given Unicode character ch is in this string
code_point & operator[](size_type index)
code point dereference operator
size_type find_first_not_of(const UString &str, size_type index=0, size_type num=npos) const
returns the index of the first character within the current string that does not match any character ...
UString & append(const UString &str)
appends str on to the end of the current string
const code_point * c_str() const
returns a pointer to the first character in the current string
code_point & at(size_type loc)
returns a reference to the element in the string at index loc
void resize(size_type num, const code_point &val=0)
changes the size of the string to size, filling in any new area with val
_const_fwd_iterator const_iterator
const iterator
reverse_iterator rbegin()
returns a reverse iterator to the last element of the string
size_t size_type
size type used to indicate string size and character positions within the string
UString & replace(size_type index1, size_type num1, const UString &str)
replaces up to num1 code points of the current string (starting at index1) with str
const std::string & asUTF8() const
returns the current string in UTF-8 form within a std::string
static size_t _utf8_char_length(unsigned char cp)
estimates the number of UTF-8 code points in the sequence starting with cp
size_type length_Characters() const
Returns the number of Unicode characters in the string.
void push_back(unicode_char val)
appends val to the end of the string
iterator begin()
returns an iterator to the first element of the string
size_type find_last_not_of(const UString &str, size_type index=npos, size_type num=npos) const
returns the index of the last character within the current string that does not match any character i...
_rev_iterator reverse_iterator
reverse iterator
size_type max_size() const
returns the maximum number of UTF-16 code points that the string can hold
UString substr(size_type index, size_type num=npos) const
returns a substring of the current string, starting at index, and num characters long.
void swap(UString &from)
exchanges the elements of the current string with those of from
size_type capacity() const
returns the number of elements that the string can hold before it will need to allocate more space
bool empty() const
returns true if the string has no elements, false otherwise
float len(float x, float y)