|
| ustring (void) noexcept |
| Default ctr for ustring – make an empty string. More...
|
|
| ustring (const char *str) |
| Construct a ustring from a null-terminated C string (char *). More...
|
|
| ustring (string_view str) |
|
| ustring (const char *str, size_type pos, size_type n) |
|
| ustring (const char *str, size_type n) |
| Construct a ustring from the first n characters of str. More...
|
|
| ustring (size_type n, char c) |
| Construct a ustring from n copies of character c. More...
|
|
| ustring (const std::string &str, size_type pos, size_type n=npos) |
| Construct a ustring from an indexed substring of a std::string. More...
|
|
| ustring (const ustring &str) noexcept |
| Copy construct a ustring from another ustring. More...
|
|
| ustring (const ustring &str, size_type pos, size_type n=npos) |
| Construct a ustring from an indexed substring of a ustring. More...
|
|
| ~ustring () noexcept |
| ustring destructor. More...
|
|
| operator string_view () const noexcept |
| Conversion to string_view. More...
|
|
| operator std::string () const noexcept |
| Conversion to std::string (explicit only!). More...
|
|
const ustring & | assign (const ustring &str) |
| Assign a ustring to *this. More...
|
|
const ustring & | assign (const ustring &str, size_type pos, size_type n=npos) |
| Assign a substring of a ustring to *this. More...
|
|
const ustring & | assign (const std::string &str) |
| Assign a std::string to *this. More...
|
|
const ustring & | assign (const std::string &str, size_type pos, size_type n=npos) |
| Assign a substring of a std::string to *this. More...
|
|
const ustring & | assign (const char *str) |
| Assign a null-terminated C string (char*) to *this. More...
|
|
const ustring & | assign (const char *str, size_type n) |
| Assign the first n characters of str to *this. More...
|
|
const ustring & | assign (size_type n, char c) |
| Assign n copies of c to *this. More...
|
|
const ustring & | assign (string_view str) |
| Assign a string_view to *this. More...
|
|
const ustring & | operator= (const ustring &str) |
| Assign a ustring to another ustring. More...
|
|
const ustring & | operator= (const char *str) |
| Assign a null-terminated C string (char *) to a ustring. More...
|
|
const ustring & | operator= (const std::string &str) |
| Assign a C++ std::string to a ustring. More...
|
|
const ustring & | operator= (string_view str) |
| Assign a string_view to a ustring. More...
|
|
const ustring & | operator= (char c) |
| Assign a single char to a ustring. More...
|
|
const char * | c_str () const noexcept |
| Return a C string representation of a ustring. More...
|
|
const char * | data () const noexcept |
| Return a C string representation of a ustring. More...
|
|
const std::string & | string () const noexcept |
| Return a C++ std::string representation of a ustring. More...
|
|
void | clear (void) noexcept |
| Reset to an empty string. More...
|
|
size_t | length (void) const noexcept |
| Return the number of characters in the string. More...
|
|
size_t | hash (void) const noexcept |
| Return a hashed version of the string. More...
|
|
size_t | size (void) const noexcept |
| Return the number of characters in the string. More...
|
|
bool | empty (void) const noexcept |
|
const_iterator | begin () const noexcept |
|
const_iterator | end () const noexcept |
|
const_reverse_iterator | rbegin () const noexcept |
|
const_reverse_iterator | rend () const noexcept |
|
const_reference | operator[] (size_type pos) const noexcept |
|
size_type | copy (char *s, size_type n, size_type pos=0) const |
|
ustring | substr (size_type pos=0, size_type n=npos) const |
|
size_type | find (const ustring &str, size_type pos=0) const noexcept |
|
size_type | find (const std::string &str, size_type pos=0) const noexcept |
|
size_type | find (const char *s, size_type pos, size_type n) const |
|
size_type | find (const char *s, size_type pos=0) const |
|
size_type | find (char c, size_type pos=0) const noexcept |
|
size_type | rfind (const ustring &str, size_type pos=npos) const noexcept |
|
size_type | rfind (const std::string &str, size_type pos=npos) const noexcept |
|
size_type | rfind (const char *s, size_type pos, size_type n) const |
|
size_type | rfind (const char *s, size_type pos=npos) const |
|
size_type | rfind (char c, size_type pos=npos) const noexcept |
|
size_type | find_first_of (const ustring &str, size_type pos=0) const noexcept |
|
size_type | find_first_of (const std::string &str, size_type pos=0) const noexcept |
|
size_type | find_first_of (const char *s, size_type pos, size_type n) const |
|
size_type | find_first_of (const char *s, size_type pos=0) const |
|
size_type | find_first_of (char c, size_type pos=0) const noexcept |
|
size_type | find_last_of (const ustring &str, size_type pos=npos) const noexcept |
|
size_type | find_last_of (const std::string &str, size_type pos=npos) const noexcept |
|
size_type | find_last_of (const char *s, size_type pos, size_type n) const |
|
size_type | find_last_of (const char *s, size_type pos=npos) const |
|
size_type | find_last_of (char c, size_type pos=npos) const noexcept |
|
size_type | find_first_not_of (const ustring &str, size_type pos=0) const noexcept |
|
size_type | find_first_not_of (const std::string &str, size_type pos=0) const noexcept |
|
size_type | find_first_not_of (const char *s, size_type pos, size_type n) const |
|
size_type | find_first_not_of (const char *s, size_type pos=0) const |
|
size_type | find_first_not_of (char c, size_type pos=0) const noexcept |
|
size_type | find_last_not_of (const ustring &str, size_type pos=npos) const noexcept |
|
size_type | find_last_not_of (const std::string &str, size_type pos=npos) const noexcept |
|
size_type | find_last_not_of (const char *s, size_type pos, size_type n) const |
|
size_type | find_last_not_of (const char *s, size_type pos=npos) const |
|
size_type | find_last_not_of (char c, size_type pos=npos) const noexcept |
|
int | compare (string_view str) const noexcept |
|
int | compare (const char *str) const noexcept |
|
bool | operator== (const ustring &str) const noexcept |
|
bool | operator!= (const ustring &str) const noexcept |
|
bool | operator== (const std::string &x) const noexcept |
|
bool | operator== (string_view x) const noexcept |
|
bool | operator== (const char *x) const noexcept |
| Test a ustring (this) for lexicographic equality with char x. More...
|
|
bool | operator!= (const std::string &x) const noexcept |
|
bool | operator!= (string_view x) const noexcept |
|
bool | operator!= (const char *x) const noexcept |
|
bool | operator< (const ustring &x) const noexcept |
|
A ustring is an alternative to char* or std::string for storing strings, in which the character sequence is unique (allowing many speed advantages for assignment, equality testing, and inequality testing).
The implementation is that behind the scenes there is a hash set of allocated strings, so the characters of each string are unique. A ustring itself is a pointer to the characters of one of these canonical strings. Therefore, assignment and equality testing is just a single 32- or 64-bit int operation, the only mutex is when a ustring is created from raw characters, and the only malloc is the first time each canonical ustring is created.
The internal table also contains a std::string version and the length of the string, so converting a ustring to a std::string (via ustring::string()) or querying the number of characters (via ustring::size() or ustring::length()) is extremely inexpensive, and does not involve creation/allocation of a new std::string or a call to strlen.
We try very hard to completely mimic the API of std::string, including all the constructors, comparisons, iterations, etc. Of course, the charaters of a ustring are non-modifiable, so we do not replicate any of the non-const methods of std::string. But in most other ways it looks and acts like a std::string and so most templated algorthms that would work on a "const std::string &" will also work on a ustring.
Usage guidelines:
Compared to standard strings, ustrings have several advantages:
- Each individual ustring is very small – in fact, we guarantee that a ustring is the same size and memory layout as an ordinary char*.
- Storage is frugal, since there is only one allocated copy of each unique character sequence, throughout the lifetime of the program.
- Assignment from one ustring to another is just copy of the pointer; no allocation, no character copying, no reference counting.
- Equality testing (do the strings contain the same characters) is a single operation, the comparison of the pointer.
- Memory allocation only occurs when a new ustring is constructed from raw characters the FIRST time – subsequent constructions of the same string just finds it in the canonical string set, but doesn't need to allocate new storage. Destruction of a ustring is trivial, there is no de-allocation because the canonical version stays in the set. Also, therefore, no user code mistake can lead to memory leaks.
But there are some problems, too. Canonical strings are never freed from the table. So in some sense all the strings "leak", but they only leak one copy for each unique string that the program ever comes across. Also, creation of unique strings from raw characters is more expensive than for standard strings, due to hashing, table queries, and other overhead.
On the whole, ustrings are a really great string representation
- if you tend to have (relatively) few unique strings, but many copies of those strings;
- if the creation of strings from raw characters is relatively rare compared to copying or comparing to existing strings;
- if you tend to make the same strings over and over again, and if it's relatively rare that a single unique character sequence is used only once in the entire lifetime of the program;
- if your most common string operations are assignment and equality testing and you want them to be as fast as possible;
- if you are doing relatively little character-by-character assembly of strings, string concatenation, or other "string manipulation" (other than equality testing).
ustrings are not so hot
- if your program tends to have very few copies of each character sequence over the entire lifetime of the program;
- if your program tends to generate a huge variety of unique strings over its lifetime, each of which is used only a short time and then discarded, never to be needed again;
- if you don't need to do a lot of string assignment or equality testing, but lots of more complex string manipulation.
Definition at line 108 of file ustring.h.