userver: userver/utils/text.hpp Source File
⚠️ This is the documentation for an old userver version. Click here to switch to the latest version.
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages Concepts
text.hpp
Go to the documentation of this file.
1#pragma once
2
3/// @file userver/utils/text.hpp
4/// @brief Text utilities
5
6#include <locale>
7#include <string>
8#include <string_view>
9#include <vector>
10
11#include <boost/multiprecision/cpp_dec_float.hpp>
12
13USERVER_NAMESPACE_BEGIN
14
15/// @brief Text utilities
16namespace utils::text {
17
18inline const std::string kEnLocale{"en_US.UTF-8"};
19
20/// Return trimmed copy of string.
21std::string Trim(const std::string& str);
22
23/// Trim string in-place.
24std::string Trim(std::string&& str);
25
26/// Split string by separators
27///
28/// @snippet utils/text_test.cpp SplitMultiple
31
32/// Split string by separators and return a non-owning container of chunks.
33///
34/// @warning Initial `str` should outlive the result of the function
35///
36/// @snippet utils/text_test.cpp SplitStringViewMultiple
39
40/// Join string
41std::string Join(const std::vector<std::string>& strs, std::string_view sep);
42
43/// Return number formatted with specified locale
44std::string Format(double value, const std::string& locale, int ndigits = 0,
45 bool is_fixed = true);
46
47/// Return number formatted
48std::string Format(double value, int ndigits);
49
50/// Return cpp_dec_float_50 formatted
51std::string Format(boost::multiprecision::cpp_dec_float_50 value, int ndigits);
52
53/// Return true if `hay` starts with `needle`, false otherwise.
54bool StartsWith(std::string_view hay, std::string_view needle) noexcept;
55
56/// Return true if `hay` ends with `needle`, false otherwise.
57bool EndsWith(std::string_view hay, std::string_view needle) noexcept;
58
59/// Transform letters to lower case
60std::string ToLower(std::string_view str,
61 const std::string& locale = kEnLocale);
62
63/// Capitalizes the first letter of the str
64std::string Capitalize(std::string_view str, const std::string& locale);
65
66/// Removes double quotes from front and back of string.
67///
68/// Examples:
69/// @code
70/// RemoveQuotes("\"test\"") // returns "test"
71/// RemoveQuotes("\"test") // returns "\"test"
72/// RemoveQuotes("'test'") // returns "'test'"
73/// RemoveQuotes("\"\"test\"\"") // returns "\"test\""
74/// @endcode
75std::string RemoveQuotes(std::string_view str);
76
77/// Checks whether the character is an ASCII character
78bool IsAscii(char ch) noexcept;
79
80/// Checks whether the character is a whitespace character in C locale
81bool IsAsciiSpace(char ch) noexcept;
82
83/// Checks if text contains only ASCII characters
84bool IsAscii(std::string_view text) noexcept;
85
86/// Returns a locale with the specified name
87const std::locale& GetLocale(const std::string& name);
88
89/// @brief UTF8 text utilities
90namespace utf8 {
91
92/// Returns the length in bytes of the UTF-8 code point by the first byte.
93unsigned CodePointLengthByFirstByte(unsigned char c) noexcept;
94
95/// `bytes` must not be a nullptr, `length` must not be 0.
96bool IsWellFormedCodePoint(const unsigned char* bytes,
97 std::size_t length) noexcept;
98
99/// `bytes` must not be a nullptr, `length` must not be 0.
100bool IsValid(const unsigned char* bytes, std::size_t length) noexcept;
101
102/// returns number of utf-8 code points, text must be in utf-8 encoding
103/// @throws std::runtime_error if not a valid UTF8 text
105
106/// Removes the longest (possible empty) suffix of `str` which is a proper
107/// prefix of some utf-8 multibyte character. If `str` is not in utf-8 it may
108/// remove some suffix of length up to 3.
109void TrimTruncatedEnding(std::string& str);
110
111/// @see void TrimTruncatedEnding(std::string& str)
112/// @warning this **does not** change the original string
113void TrimViewTruncatedEnding(std::string_view& view);
114
115/// Returns position in `text` where utf-8 code point with position `pos` starts
116/// OR `text.length()` if `text` contains less than or equal to `pos` points
117/// @warning this **does not** check if `text` is valid utf-8 text
119 std::size_t pos) noexcept;
120
121/// Removes the first `count` utf-8 code points from `text`
122/// @warning this **does not** check if `text` is valid utf-8 text
123void RemovePrefix(std::string& text, std::size_t count) noexcept;
124
125/// @see void RemovePrefix(std::string& text, std::size_t count)
126/// @warning this **does not** change the original string
127void RemoveViewPrefix(std::string_view& text, std::size_t count) noexcept;
128
129/// Takes the first `count` utf-8 code points from `text`
130/// @warning this **does not** check if `text` is valid utf-8 text
131void TakePrefix(std::string& text, std::size_t count) noexcept;
132
133/// @see void TakePrefix(std::string& text, std::size_t count)
134/// @warning this **does not** change the original string
135void TakeViewPrefix(std::string_view& text, std::size_t count) noexcept;
136
137} // namespace utf8
138
139/// Checks if text is in utf-8 encoding
140bool IsUtf8(std::string_view text) noexcept;
141
142/// Checks text on matching to the following conditions:
143/// 1. text is in utf-8 encoding
144/// 2. text does not contain any of control ascii characters
145/// 3. if flag ascii is true than text contains only ascii characters
146bool IsPrintable(std::string_view text, bool ascii_only = true) noexcept;
147
148/// Checks if there are no embedded null ('\0') characters in text
150
151/// convert CamelCase to snake_case(underscore)
152std::string CamelCaseToSnake(std::string_view camel);
153
154} // namespace utils::text
155
156USERVER_NAMESPACE_END