xoreos  0.0.5
ustring.cpp
Go to the documentation of this file.
1 /* xoreos - A reimplementation of BioWare's Aurora engine
2  *
3  * xoreos is the legal property of its developers, whose names
4  * can be found in the AUTHORS file distributed with this source
5  * distribution.
6  *
7  * xoreos is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU General Public License
9  * as published by the Free Software Foundation; either version 3
10  * of the License, or (at your option) any later version.
11  *
12  * xoreos is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with xoreos. If not, see <http://www.gnu.org/licenses/>.
19  */
20 
25 #include <cstdarg>
26 #include <cstdio>
27 #include <cctype>
28 
29 #include <boost/algorithm/string/replace.hpp>
30 
31 #include "src/common/ustring.h"
32 #include "src/common/error.h"
33 #include "src/common/util.h"
34 
35 namespace Common {
36 
37 UString::UString() : _size(0) {
38 }
39 
41  *this = str;
42 }
43 
44 UString::UString(const std::string &str) {
45  *this = str;
46 }
47 
48 UString::UString(const char *str) {
49  *this = str;
50 }
51 
52 UString::UString(const char *str, size_t n) {
53  *this = std::string(str, n);
54 }
55 
56 UString::UString(uint32 c, size_t n) : _size(0) {
57  while (n-- > 0)
58  *this += c;
59 }
60 
61 UString::UString(iterator sBegin, iterator sEnd) : _size(0) {
62  for (; (sBegin != sEnd) && *sBegin; ++sBegin)
63  *this += *sBegin;
64 }
65 
67 }
68 
70  _string = str._string;
71  _size = str._size;
72 
73  return *this;
74 }
75 
76 UString &UString::operator=(const std::string &str) {
77  _string = str;
78 
80 
81  return *this;
82 }
83 
84 UString &UString::operator=(const char *str) {
85  *this = std::string(str);
86 
87  return *this;
88 }
89 
90 bool UString::operator==(const UString &str) const {
91  return strcmp(str) == 0;
92 }
93 
94 bool UString::operator!=(const UString &str) const {
95  return strcmp(str) != 0;
96 }
97 
98 bool UString::operator<(const UString &str) const {
99  return strcmp(str) < 0;
100 }
101 
102 bool UString::operator>(const UString &str) const {
103  return strcmp(str) > 0;
104 }
105 
106 UString UString::operator+(const UString &str) const {
107  UString tmp(*this);
108 
109  tmp += str;
110 
111  return tmp;
112 }
113 
114 UString UString::operator+(const std::string &str) const {
115  UString tmp(*this);
116 
117  tmp += str;
118 
119  return tmp;
120 }
121 
122 UString UString::operator+(const char *str) const {
123  UString tmp(*this);
124 
125  tmp += str;
126 
127  return tmp;
128 }
129 
131  UString tmp(*this);
132 
133  tmp += c;
134 
135  return tmp;
136 }
137 
139  _string += str._string;
140  _size += str._size;
141 
142  return *this;
143 }
144 
145 UString &UString::operator+=(const std::string &str) {
146  UString ustr(str);
147 
148  return *this += ustr;
149 }
150 
151 UString &UString::operator+=(const char *str) {
152  UString ustr(str);
153 
154  return *this += ustr;
155 }
156 
158  try {
159  utf8::append(c, std::back_inserter(_string));
160  } catch (const std::exception &se) {
161  Exception e(se);
162  throw e;
163  }
164 
165  _size++;
166 
167  return *this;
168 }
169 
170 int UString::strcmp(const UString &str) const {
171  UString::iterator it1 = begin();
172  UString::iterator it2 = str.begin();
173  for (; (it1 != end()) && (it2 != str.end()); ++it1, ++it2) {
174  uint32 c1 = *it1;
175  uint32 c2 = *it2;
176 
177  if (c1 < c2)
178  return -1;
179  if (c1 > c2)
180  return 1;
181  }
182 
183  if ((it1 == end()) && (it2 == str.end()))
184  return 0;
185 
186  if (it1 == end())
187  return -1;
188 
189  return 1;
190 }
191 
192 int UString::stricmp(const UString &str) const {
193  UString::iterator it1 = begin();
194  UString::iterator it2 = str.begin();
195  for (; (it1 != end()) && (it2 != str.end()); ++it1, ++it2) {
196  uint32 c1 = toLower(*it1);
197  uint32 c2 = toLower(*it2);
198 
199  if (c1 < c2)
200  return -1;
201  if (c1 > c2)
202  return 1;
203  }
204 
205  if ((it1 == end()) && (it2 == str.end()))
206  return 0;
207 
208  if (it1 == end())
209  return -1;
210 
211  return 1;
212 }
213 
214 bool UString::equals(const UString &str) const {
215  return strcmp(str) == 0;
216 }
217 
218 bool UString::equalsIgnoreCase(const UString &str) const {
219  return stricmp(str) == 0;
220 }
221 
222 bool UString::less(const UString &str) const {
223  return strcmp(str) < 0;
224 }
225 
226 bool UString::lessIgnoreCase(const UString &str) const {
227  return stricmp(str) < 0;
228 }
229 
230 void UString::swap(UString &str) {
231  _string.swap(str._string);
232 
233  SWAP(_size, str._size);
234 }
235 
237  _string.clear();
238  _size = 0;
239 }
240 
241 size_t UString::size() const {
242  return _size;
243 }
244 
245 bool UString::empty() const {
246  return _string.empty() || (_string[0] == '\0');
247 }
248 
249 const char *UString::c_str() const {
250  return _string.c_str();
251 }
252 
254  return iterator(_string.begin(), _string.begin(), _string.end());
255 }
256 
258  return iterator(_string.end(), _string.begin(), _string.end());
259 }
260 
262  for (iterator it = begin(); it != end(); ++it)
263  if (*it == c)
264  return it;
265 
266  return end();
267 }
268 
270  size_t index = _string.find(what._string);
271  if (index != std::string::npos) {
272  std::string::const_iterator it = _string.begin();
273  std::advance(it, index);
274  return iterator(it, _string.begin(), _string.end());
275  }
276  return end();
277 }
278 
280  if (empty())
281  return end();
282 
283  iterator it = end();
284  do {
285  --it;
286 
287  if (*it == c)
288  return it;
289 
290  } while (it != begin());
291 
292  return end();
293 }
294 
295 bool UString::beginsWith(const UString &with) const {
296  if (with.empty())
297  return true;
298 
299  if (empty())
300  return false;
301 
302  UString::iterator myIt = begin();
303  UString::iterator withIt = with.begin();
304 
305  while ((myIt != end()) && (withIt != with.end()))
306  if (*myIt++ != *withIt++)
307  return false;
308 
309  if ((myIt == end()) && (withIt != with.end()))
310  return false;
311 
312  return true;
313 }
314 
315 bool UString::endsWith(const UString &with) const {
316  if (with.empty())
317  return true;
318 
319  if (empty())
320  return false;
321 
322  UString::iterator myIt = --end();
323  UString::iterator withIt = --with.end();
324 
325  while ((myIt != begin()) && (withIt != with.begin()))
326  if (*myIt-- != *withIt--)
327  return false;
328 
329  if (withIt == with.begin())
330  return (*myIt == *withIt);
331 
332  return false;
333 }
334 
335 bool UString::contains(const UString &what) const {
336  return _string.find(what._string) != std::string::npos;
337 }
338 
339 bool UString::contains(uint32 c) const {
340  return findFirst(c) != end();
341 }
342 
343 void UString::truncate(const iterator &it) {
344  UString temp;
345 
346  for (iterator i = begin(); i != it; ++i)
347  temp += *i;
348 
349  swap(temp);
350 }
351 
352 void UString::truncate(size_t n) {
353  if (n >= _size)
354  return;
355 
356  UString temp;
357 
358  for (iterator it = begin(); n > 0; ++it, n--)
359  temp += *it;
360 
361  swap(temp);
362 }
363 
365  if (_string.empty())
366  // Nothing to do
367  return;
368 
369  // Find the last space, from the end
370  iterator itEnd = --end();
371  for (; itEnd != begin(); --itEnd) {
372  uint32 c = *itEnd;
373  if ((c != '\0') && (c != ' ')) {
374  ++itEnd;
375  break;
376  }
377  }
378 
379  if (itEnd == begin()) {
380  uint32 c = *itEnd;
381  if ((c != '\0') && (c != ' '))
382  ++itEnd;
383  }
384 
385  // Find the first non-space
386  iterator itStart = begin();
387  for (; itStart != itEnd; ++itStart)
388  if (*itStart != ' ')
389  break;
390 
391  _string = std::string(itStart.base(), itEnd.base());
392  recalculateSize();
393 }
394 
396  if (_string.empty())
397  // Nothing to do
398  return;
399 
400  // Find the first non-space
401  iterator itStart = begin();
402  for (; itStart != end(); ++itStart)
403  if (*itStart != ' ')
404  break;
405 
406  _string = std::string(itStart.base(), end().base());
407  recalculateSize();
408 }
409 
411  if (_string.empty())
412  // Nothing to do
413  return;
414 
415  // Find the last space, from the end
416  iterator itEnd = --end();
417  for (; itEnd != begin(); --itEnd) {
418  uint32 c = *itEnd;
419  if ((c != '\0') && (c != ' ')) {
420  ++itEnd;
421  break;
422  }
423  }
424 
425  if (itEnd == begin()) {
426  uint32 c = *itEnd;
427  if ((c != '\0') && (c != ' '))
428  ++itEnd;
429  }
430 
431  _string = std::string(begin().base(), itEnd.base());
432  recalculateSize();
433 }
434 
436  try {
437 
438  // The new string with characters replaced
439  std::string newString;
440  newString.reserve(_string.size());
441 
442  // Run through the whole string
443  std::string::iterator it = _string.begin();
444  while (it != _string.end()) {
445  std::string::iterator prev = it;
446 
447  // Get the codepoint
448  uint32 c = utf8::next(it, _string.end());
449 
450  if (c != what) {
451  // It's not what we're looking for, copy it
452  for (; prev != it; ++prev)
453  newString.push_back(*prev);
454  } else
455  // It's what we're looking for, insert the replacement instead
456  utf8::append(with, std::back_inserter(newString));
457 
458  }
459 
460  // And set the new string's contents
461  _string.swap(newString);
462 
463  } catch (const std::exception &se) {
464  Exception e(se);
465  throw e;
466  }
467 }
468 
469 void UString::replaceAll(const UString &what, const UString &with) {
470  boost::replace_all(_string, what._string, with._string);
471 }
472 
474  *this = toLower();
475 }
476 
478  *this = toUpper();
479 }
480 
482  UString str;
483 
484  str._string.reserve(_string.size());
485  for (iterator it = begin(); it != end(); ++it)
486  str += toLower(*it);
487 
488  return str;
489 }
490 
492  UString str;
493 
494  str._string.reserve(_string.size());
495  for (iterator it = begin(); it != end(); ++it)
496  str += toUpper(*it);
497 
498  return str;
499 }
500 
502  iterator it = begin();
503  for (size_t i = 0; (i < n) && (it != end()); i++, ++it);
504  return it;
505 }
506 
507 size_t UString::getPosition(iterator it) const {
508  size_t n = 0;
509  for (iterator i = begin(); i != it; ++i, n++);
510  return n;
511 }
512 
514  if (pos == end()) {
515  *this += c;
516  return;
517  }
518 
519  UString temp;
520 
521  iterator it;
522  for (it = begin(); it != pos; ++it)
523  temp += *it;
524 
525  temp += c;
526 
527  for ( ; it != end(); ++it)
528  temp += *it;
529 
530  swap(temp);
531 }
532 
534  if (pos == end()) {
535  *this += str;
536  return;
537  }
538 
539  UString temp;
540 
541  iterator it;
542  for (it = begin(); it != pos; ++it)
543  temp += *it;
544 
545  temp += str;
546 
547  for ( ; it != end(); ++it)
548  temp += *it;
549 
550  swap(temp);
551 }
552 
554  if (pos == end()) {
555  *this += c;
556  return;
557  }
558 
559  UString temp;
560 
561  iterator it;
562  for (it = begin(); it != pos; ++it)
563  temp += *it;
564 
565  temp += c;
566 
567  for (++it; it != end(); ++it)
568  temp += *it;
569 
570  swap(temp);
571 }
572 
574  if (pos == end()) {
575  *this += str;
576  return;
577  }
578 
579  UString temp;
580 
581  iterator it;
582  for (it = begin(); it != pos; ++it)
583  temp += *it;
584 
585  for (iterator it2 = str.begin(); it2 != str.end(); ++it2) {
586  temp += *it2;
587 
588  if (it != end())
589  ++it;
590  }
591 
592  for ( ; it != end(); ++it)
593  temp += *it;
594 
595  swap(temp);
596 }
597 
599  if (from == end())
600  return;
601 
602  UString temp;
603 
604  iterator it = begin();
605  for ( ; it != from; ++it)
606  temp += *it;
607 
608  for ( ; it != to; ++it);
609 
610  for ( ; it != end(); ++it)
611  temp += *it;
612 
613  swap(temp);
614 }
615 
617  iterator to = pos;
618  erase(pos, ++to);
619 }
620 
621 void UString::split(iterator splitPoint, UString &left, UString &right, bool remove) const {
622  left.clear();
623  right.clear();
624 
625  if (splitPoint == begin()) {
626  right = *this;
627  return;
628  }
629  if (splitPoint == end()) {
630  left = *this;
631  return;
632  }
633 
634  iterator it = begin();
635  for ( ; it != splitPoint; ++it)
636  left += *it;
637 
638  if (remove)
639  ++it;
640 
641  for ( ; it != end(); ++it)
642  right += *it;
643 }
644 
645 void UString::splitTextTokens(const UString &text, std::vector<UString> &tokens) {
646  UString collect;
647 
648  int state = 0;
649  for (iterator it = text.begin(); it != text.end(); ++it) {
650  uint32 c = *it;
651 
652  if (state == 0) {
653  // Collecting non-tokens
654 
655  if (c == '<') {
656  tokens.push_back(collect);
657 
658  collect.clear();
659  collect += c;
660 
661  state = 1;
662  } else
663  collect += c;
664 
665  } else if (state == 1) {
666  // Collecting tokens
667 
668  if (c == '<') {
669  // Start of a token within a token
670  // Add what we've collected to the last non-token
671 
672  tokens.back() += collect;
673 
674  collect.clear();
675  collect += c;
676 
677  } else if (c == '>') {
678  // End of the token
679 
680  collect += c;
681  tokens.push_back(collect);
682 
683  collect.clear();
684  state = 0;
685 
686  } else {
687  // Still within a token
688 
689  collect += c;
690  }
691 
692  }
693 
694  }
695 
696  if (collect.empty())
697  return;
698 
699  // What's now collected is no full token
700  if (state == 0)
701  tokens.push_back(collect);
702  else if (state == 1)
703  tokens.back() += collect;
704 }
705 
707  UString sub;
708 
709  iterator it = begin();
710  for ( ; it != from; ++it);
711 
712  for ( ; it != to; ++it)
713  sub += *it;
714 
715  return sub;
716 }
717 
718 UString UString::format(const char *s, ...) {
719  char buf[STRINGBUFLEN];
720  va_list va;
721 
722  va_start(va, s);
723  vsnprintf(buf, STRINGBUFLEN, s, va);
724  va_end(va);
725 
726  return UString(buf);
727 }
728 
729 size_t UString::split(const UString &text, uint32 delim, std::vector<UString> &texts) {
730  size_t length = 0;
731 
732  UString t = text;
733 
734  iterator point;
735  while ((point = t.findFirst(delim)) != t.end()) {
736  UString left, right;
737 
738  t.split(point, left, right, true);
739 
740  if (!left.empty()) {
741  length = MAX(length, left.size());
742  texts.push_back(left);
743  }
744 
745  t = right;
746  }
747 
748  if (!t.empty()) {
749  length = MAX(length, t.size());
750  texts.push_back(t);
751  }
752 
753  return length;
754 }
755 
757  try {
758  // Calculate the "distance" in characters from the beginning and end
759  _size = utf8::distance(_string.begin(), _string.end());
760  } catch (const std::exception &se) {
761  Exception e(se);
762  throw e;
763  }
764 }
765 
766 // NOTE: If we ever need uppercase<->lowercase mappings for non-ASCII
767 // characters: http://www.unicode.org/reports/tr21/tr21-5.html
768 
770  if (!isASCII(c))
771  // We don't know how to lowercase that
772  return c;
773 
774  return std::tolower(c);
775 }
776 
778  if (!isASCII(c))
779  // We don't know how to uppercase that
780  return c;
781 
782  return std::toupper(c);
783 }
784 
786  return (c & 0xFFFFFF80) == 0;
787 }
788 
790  return isASCII(c) && std::isspace(c);
791 }
792 
794  return isASCII(c) && std::isdigit(c);
795 }
796 
798  return isASCII(c) && std::isalpha(c);
799 }
800 
802  return isASCII(c) && std::isalnum(c);
803 }
804 
806  return isASCII(c) && std::iscntrl(c);
807 }
808 
810  std::string utf8result;
811 
812  try {
813  utf8::utf16to8(&c, &c + 1, std::back_inserter(utf8result));
814  } catch (const std::exception &se) {
815  Exception e(se);
816  throw e;
817  }
818 
819  return *iterator(utf8result.begin(), utf8result.begin(), utf8result.end());
820 }
821 
822 } // End of namespace Common
void insert(iterator pos, uint32 c)
Insert character c in front of this position.
Definition: ustring.cpp:513
UString operator+(const UString &str) const
Definition: ustring.cpp:106
Definition: 2dafile.h:39
A class holding an UTF-8 string.
Definition: ustring.h:48
bool beginsWith(const UString &with) const
Definition: ustring.cpp:295
UString & operator=(const UString &str)
Definition: ustring.cpp:69
bool equalsIgnoreCase(const UString &str) const
Definition: ustring.cpp:218
bool endsWith(const UString &with) const
Definition: ustring.cpp:315
void truncate(const iterator &it)
Definition: ustring.cpp:343
iterator getPosition(size_t n) const
Convert a numerical position into an iterator.
Definition: ustring.cpp:501
iterator begin() const
Definition: ustring.cpp:253
UString & operator+=(const UString &str)
Definition: ustring.cpp:138
iterator findFirst(uint32 c) const
Definition: ustring.cpp:261
void swap(UString &str)
Swap the contents of the string with this string&#39;s.
Definition: ustring.cpp:230
void makeUpper()
Convert the string to uppercase.
Definition: ustring.cpp:477
Exception that provides a stack of explanations.
Definition: error.h:36
bool lessIgnoreCase(const UString &str) const
Definition: ustring.cpp:226
bool less(const UString &str) const
Definition: ustring.cpp:222
Basic exceptions to throw.
bool contains(const UString &what) const
Definition: ustring.cpp:335
UString substr(iterator from, iterator to) const
Definition: ustring.cpp:706
utf8::iterator< std::string::const_iterator > iterator
Definition: ustring.h:50
const char * c_str() const
Return the (utf8 encoded) string data.
Definition: ustring.cpp:249
void replaceAll(uint32 what, uint32 with)
Replace all occurrences of a character with another character.
Definition: ustring.cpp:435
static UString format(const char *s,...) GCC_PRINTF(1
Print formatted data into an UString object, similar to sprintf().
Definition: ustring.cpp:718
uint16_t uint16
Definition: types.h:202
Utility templates and functions.
void trimRight()
Definition: ustring.cpp:410
static void splitTextTokens(const UString &text, std::vector< UString > &tokens)
Definition: ustring.cpp:645
bool operator<(const UString &str) const
Definition: ustring.cpp:98
bool equals(const UString &str) const
Definition: ustring.cpp:214
bool empty() const
Is the string empty?
Definition: ustring.cpp:245
iterator findLast(uint32 c) const
Definition: ustring.cpp:279
static bool isAlpha(uint32 c)
Is the character an ASCII alphabetic character?
Definition: ustring.cpp:797
void replace(iterator pos, uint32 c)
Replace the character at this position with c.
Definition: ustring.cpp:553
bool operator==(const UString &str) const
Definition: ustring.cpp:90
int strcmp(const UString &str) const
Definition: ustring.cpp:170
Unicode string handling.
void recalculateSize()
Definition: ustring.cpp:756
UString()
Construct an empty string.
Definition: ustring.cpp:37
UString toLower() const
Return a lowercased copy of the string.
Definition: ustring.cpp:481
void trimLeft()
Definition: ustring.cpp:395
bool operator!=(const UString &str) const
Definition: ustring.cpp:94
static bool isASCII(uint32 c)
Is the character an ASCII character?
Definition: ustring.cpp:785
#define STRINGBUFLEN
Definition: system.h:415
size_t size() const
Return the size of the string, in characters.
Definition: ustring.cpp:241
uint32_t uint32
Definition: types.h:204
bool operator>(const UString &str) const
Definition: ustring.cpp:102
void erase(iterator from, iterator to)
Erase the character within this range.
Definition: ustring.cpp:598
static bool isDigit(uint32 c)
Is the character an ASCII digit character?
Definition: ustring.cpp:793
void split(iterator splitPoint, UString &left, UString &right, bool remove=false) const
Definition: ustring.cpp:621
size_t _size
Definition: ustring.h:205
iterator end() const
Definition: ustring.cpp:257
static bool isCntrl(uint32 c)
Is the character an ASCII control character?
Definition: ustring.cpp:805
T MAX(T a, T b)
Definition: util.h:71
UString toUpper() const
Return an uppercased copy of the string.
Definition: ustring.cpp:491
void clear()
Clear the string&#39;s contents.
Definition: ustring.cpp:236
std::string _string
Internal string holding the actual data.
Definition: ustring.h:203
void makeLower()
Convert the string to lowercase.
Definition: ustring.cpp:473
static bool isAlNum(uint32 c)
Is the character an ASCII alphanumeric character?
Definition: ustring.cpp:801
int stricmp(const UString &str) const
Definition: ustring.cpp:192
void SWAP(T &a, T &b)
Template method which swaps the values of its two parameters.
Definition: util.h:78
static bool isSpace(uint32 c)
Is the character an ASCII space character?
Definition: ustring.cpp:789
static uint32 fromUTF16(uint16 c)
Definition: ustring.cpp:809