UTF8 text utilities.
Functions | |
unsigned | CodePointLengthByFirstByte (unsigned char c) noexcept |
Returns the length in bytes of the UTF-8 code point by the first byte. | |
bool | IsWellFormedCodePoint (const unsigned char *bytes, std::size_t length) noexcept |
bytes must not be a nullptr, length must not be 0. | |
bool | IsValid (const unsigned char *bytes, std::size_t length) noexcept |
bytes must not be a nullptr, length must not be 0. | |
std::size_t | GetCodePointsCount (std::string_view text) |
void | TrimTruncatedEnding (std::string &str) |
void | TrimViewTruncatedEnding (std::string_view &view) |
std::size_t | GetTextPosByCodePointPos (std::string_view text, std::size_t pos) noexcept |
void | RemovePrefix (std::string &text, std::size_t count) noexcept |
void | RemoveViewPrefix (std::string_view &text, std::size_t count) noexcept |
void | TakePrefix (std::string &text, std::size_t count) noexcept |
void | TakeViewPrefix (std::string_view &text, std::size_t count) noexcept |
std::size_t utils::text::utf8::GetCodePointsCount | ( | std::string_view | text | ) |
returns number of utf-8 code points, text must be in utf-8 encoding
std::runtime_error | if not a valid UTF8 text |
|
noexcept |
Returns position in text
where utf-8 code point with position pos
starts OR text.length()
if text
contains less than or equal to pos
points
text
is valid utf-8 text
|
noexcept |
Removes the first count
utf-8 code points from text
text
is valid utf-8 text
|
noexcept |
|
noexcept |
Takes the first count
utf-8 code points from text
text
is valid utf-8 text
|
noexcept |
void utils::text::utf8::TrimTruncatedEnding | ( | std::string & | str | ) |
Removes the longest (possible empty) suffix of str
which is a proper prefix of some utf-8 multibyte character. If str
is not in utf-8 it may remove some suffix of length up to 3.
void utils::text::utf8::TrimViewTruncatedEnding | ( | std::string_view & | view | ) |