userver: userver/utils/regex.hpp Source File
Loading...
Searching...
No Matches
regex.hpp
Go to the documentation of this file.
1#pragma once
2
3/// @file userver/utils/regex.hpp
4/// @brief @copybrief utils::regex
5
6#include <cstddef>
7#include <exception>
8#include <string_view>
9
10#include <userver/utils/fast_pimpl.hpp>
11
12USERVER_NAMESPACE_BEGIN
13
14namespace utils {
15
16// NOLINTBEGIN(readability-identifier-naming)
17
18class match_results;
19struct Re2Replacement;
20
21/// Thrown from constructors of @ref utils::regex with an invalid pattern.
22class RegexError : public std::exception {};
23
24/// @ingroup userver_universal userver_containers
25///
26/// @brief A drop-in replacement for `std::regex` without huge includes
27/// and with better performance characteristics.
28///
29/// utils::regex is currently implemented using re2.
30///
31/// @see @ref utils::regex_match
32/// @see @ref utils::regex_search
33/// @see @ref utils::regex_replace
34///
35/// Read [re2 documentation](https://github.com/google/re2/wiki/syntax) on the limitations of re2 engine.
36/// Notably, it does not support:
37///
38/// 1. lookahead and lookbehind;
39/// 2. quantifiers over 1000, regexes with large repetition counts consume more memory;
40/// 3. spaces in quantifiers like `\w{1, 5}`;
41/// 4. possessive quantifiers.
42///
43/// ## An example of complex string parsing using `utils::regex`
44///
45/// @snippet utils/regex_test.cpp split text
46class regex final {
47public:
48 /// Constructs a null regex, any usage except for copy/move is UB.
50
51 /// @brief Compiles regex from pattern, always valid on construction.
52 /// @throws utils::InvalidRegex if @a pattern is invalid
53 explicit regex(std::string_view pattern);
54
55 regex(const regex&);
56 regex(regex&&) noexcept;
57 regex& operator=(const regex&);
58 regex& operator=(regex&&) noexcept;
59 ~regex();
60
61 /// @returns `true` if the patterns are equal.
62 /// @note May also return `true` if the patterns are not equal, but are equivalent.
63 bool operator==(const regex&) const;
64
65 /// @returns a view to the original pattern stored inside.
66 std::string_view GetPatternView() const;
67
68 /// @returns the original pattern.
69 std::string str() const;
70
71private:
72 class Impl;
73 utils::FastPimpl<Impl, 16, 8> impl_;
74
75 friend class match_results;
76 friend bool regex_match(std::string_view str, const regex& pattern);
77 friend bool regex_match(std::string_view str, match_results& m, const regex& pattern);
78 friend bool regex_search(std::string_view str, const regex& pattern);
79 friend bool regex_search(std::string_view str, match_results& m, const regex& pattern);
80 friend std::string regex_replace(std::string_view str, const regex& pattern, std::string_view repl);
81 friend std::string regex_replace(std::string_view str, const regex& pattern, Re2Replacement repl);
82};
83
84/// @ingroup userver_universal userver_containers
85///
86/// @brief A drop-in replacement for `std::match_results` without huge includes
87/// and with better performance characteristics. Represents capturing groups of a single match result.
88///
89/// The group 0 always matches the whole pattern. User groups start with index 1.
90///
91/// Non-empty groups always point within the source string, so the position of a group within the source string
92/// can be obtained by subtracting `.data()` pointers or `.begin()` iterators.
93///
94/// @warning The implementation can return empty groups as `std::string_view`s with `data() == nullptr` or some invalid
95/// pointer with `size() == 0`. Check for emptiness before performing pointer arithmetic if a group can be empty
96/// according to the regex!
97///
98/// @see utils::regex
99class match_results final {
100public:
101 /// Constructs a null `match_results`, any usage except for copy/move is UB.
102 /// Filled upon successful @ref regex_match or @ref regex_search.
104
105 match_results(const match_results&);
106 match_results& operator=(const match_results&);
107 ~match_results();
108
109 /// @returns the number of capturing groups, including the group 0.
110 std::size_t size() const;
111
112 /// @returns the capturing group at @a sub.
113 /// @note Group 0 always matches the whole pattern. User groups start with index 1.
114 std::string_view operator[](std::size_t sub) const;
115
116 /// @returns the position of the first character of the capturing group @a sub within the target (haystack) string.
117 /// @note Group 0 always matches the whole pattern. User groups start with index 1.
118 /// @warning For empty groups, calling this method is UB. Group 0 is always valid.
119 std::size_t position(std::size_t sub) const;
120
121 /// @returns the length of the capturing group at @a sub.
122 /// @note Group 0 always matches the whole pattern. User groups start with index 1.
123 std::size_t length(std::size_t sub) const;
124
125 /// @returns the substring from the beginning of the target (haystack) string to the beginning of the full match.
126 std::string_view prefix() const;
127
128 /// @returns the substring from the end of the full match to the end of the target (haystack) string.
129 std::string_view suffix() const;
130
131private:
132 struct Impl;
133 utils::FastPimpl<Impl, 120, 8> impl_;
134
135 friend bool regex_match(std::string_view str, const regex& pattern);
136 friend bool regex_match(std::string_view str, match_results& m, const regex& pattern);
137 friend bool regex_search(std::string_view str, const regex& pattern);
138 friend bool regex_search(std::string_view str, match_results& m, const regex& pattern);
139 friend std::string regex_replace(std::string_view str, const regex& pattern, std::string_view repl);
140 friend std::string regex_replace(std::string_view str, const regex& pattern, Re2Replacement repl);
141};
142
143/// @brief Determines whether the regular expression matches the entire target
144/// character sequence
145bool regex_match(std::string_view str, const regex& pattern);
146
147/// @brief Returns true if the specified regular expression matches
148/// the whole of the input. Fills in what matched in m.
149/// @note @a m may be clobbered on failure.
150bool regex_match(std::string_view str, match_results& m, const regex& pattern);
151
152/// @brief Determines whether the regular expression matches anywhere in the
153/// target character sequence
154bool regex_search(std::string_view str, const regex& pattern);
155
156/// @brief Determines whether the regular expression matches anywhere in the
157/// target character sequence. Fills in what matched in m.
158/// @note @a m may be clobbered on failure.
159bool regex_search(std::string_view str, match_results& m, const regex& pattern);
160
161/// @brief Create a new string where all regular expression matches replaced
162/// with repl.
163///
164/// Interprets @a repl as a literal, does not support substitutions.
165///
166/// @see utils::Re2Replacement
167std::string regex_replace(std::string_view str, const regex& pattern, std::string_view repl);
168
169/// @brief Replacement string with substitution support
170///
171/// @warning Avoid if at all possible, prefer using vanilla
172/// @ref utils::regex_replace, as it is more portable
173///
174/// @warning Allowing user-provided strings in @a replacement leads
175/// to injection vulnerabilities!
176///
177/// May contain the following special syntax:
178///
179/// * `\N` (spelled as `\\N` in C++ string literals), where 0 <= N <= 9,
180/// can be used to insert capture groups;
181/// * In particular, `\0` refers to the contents of the whole match;
182/// * Literal `\` should be escaped as `\\`
183/// (spelled as `\\\\` in C++ string literals)
184///
185/// @see utils::regex_replace
186struct Re2Replacement final {
187 std::string_view replacement;
188};
189
190/// @overload
191/// @see utils::Re2Replacement
192std::string regex_replace(std::string_view str, const regex& pattern, Re2Replacement repl);
193
194// NOLINTEND(readability-identifier-naming)
195
196} // namespace utils
197
198USERVER_NAMESPACE_END