10 #ifndef __UT_Unicode__
11 #define __UT_Unicode__
19 #define UT_UTF8_MAX_ENCODING_LEN 4
22 static inline const utf8 *UTF8(
const uchar *lit)
24 return reinterpret_cast<const utf8 *
>(lit);
97 static inline bool fixpos(
const utf8 *start,
const utf8 *¤t);
131 bool big_endian =
false);
153 return cp >= 0xD800 && cp < 0xE000;
158 return cp >= 0x10000 && cp <= 0x10FFFF;
173 return cp <= 0x1F || cp == 0x7F;
188 static inline bool isSpace(
utf32 cp,
bool break_only =
true);
248 const utf8 *
at()
const {
return myCurrent; }
261 bool atEnd()
const {
return myCP == 0; }
267 inline void init(
const utf8 *start,
const utf8 *
end,
272 const utf8 *myStart, *myEnd;
273 const utf8 *myCurrent, *myNext, *myNext2;
274 utf32 myCP, myNextCP;
281 #endif // __UT_Unicode__
static bool isUpper(utf32 cp)
static bool isValidCodePoint(utf32 cp)
static bool isSpace(utf32 cp, bool break_only=true)
Helper functions for Unicode and the UTF-8 variable length encoding.
static bool isLower(utf32 cp)
static bool isAlpha(utf32 cp)
static bool isControlChar(utf32 cp)
static bool isFromSupplementaryPlane(utf32 cp)
static bool isPunct(utf32 cp)
void reset(const utf8 *to=0)
static bool isLatin1(utf32 cp)
static const utf8 * prev(const utf8 *start, const utf8 *current)
static bool isUTF8(utf8 octet)
A utility class to do read-only operations on a subset of an existing string.
static const utf8 * convert(const utf8 *str, utf32 &cp)
static bool isDigit(utf32 cp)
static bool isWordDelimiter(utf32 cp)
static bool isSurrogatePair(utf32 cp)
static const utf8 * nextWord(const utf8 *start, const utf8 *current)
static exint count(const utf8 *start, const utf8 *end=0)
Returns the number of code points this variable encoding represents.
static const utf8 * next(const utf8 *current)
static bool isAlnum(utf32 cp)
static bool fixpos(const utf8 *start, const utf8 *¤t)
static exint length(const utf8 *start, const utf8 *end=0)
const utf8 * next() const
static const utf8 * prevWord(const utf8 *start, const utf8 *current)
static bool isASCII(utf32 cp)
static bool isCJK(utf32 cp)
static const utf8 * find(utf32 cp, const utf8 *start, const utf8 *end=0)
static utf32 replacementCodePoint()
static utf32 toLower(utf32 cp)
static utf8 * duplicate(const utf8 *start, const utf8 *end=0)
static utf32 toUpper(utf32 cp)