userver: userver/utils/regex.hpp Source File
Loading...
Searching...
No Matches
regex.hpp
Go to the documentation of this file.
1#pragma once
2
3/// @file userver/utils/regex.hpp
4/// @brief @copybrief utils::regex
5
6#include <cstddef>
7#include <exception>
8#include <string_view>
9
10#include <userver/utils/fast_pimpl.hpp>
11
12USERVER_NAMESPACE_BEGIN
13
14namespace utils {
15
16class match_results;
17struct Re2Replacement;
18
19/// Thrown from constructors of @ref utils::regex with an invalid pattern.
20class RegexError : public std::exception {};
21
22/// @ingroup userver_universal userver_containers
23///
24/// @brief A drop-in replacement for `std::regex` without huge includes
25/// and with better performance characteristics.
26///
27/// utils::regex is currently implemented using re2.
28///
29/// @see @ref utils::regex_match
30/// @see @ref utils::regex_search
31/// @see @ref utils::regex_replace
32///
33/// Read [re2 documentation](https://github.com/google/re2/wiki/syntax) on the limitations of re2 engine.
34/// Notably, it does not support:
35///
36/// 1. lookahead and lookbehind;
37/// 2. quantifiers over 1000, regexes with large repetition counts consume more memory;
38/// 3. spaces in quantifiers like `\w{1, 5}`;
39/// 4. possessive quantifiers.
40///
41/// ## An example of complex string parsing using `utils::regex`
42///
43/// @snippet utils/regex_test.cpp split text
44class regex final {
45public:
46 /// Constructs a null regex, any usage except for copy/move is UB.
48
49 /// @brief Compiles regex from pattern, always valid on construction.
50 /// @throws utils::InvalidRegex if @a pattern is invalid
51 explicit regex(std::string_view pattern);
52
53 regex(const regex&);
54 regex(regex&&) noexcept;
55 regex& operator=(const regex&);
56 regex& operator=(regex&&) noexcept;
57 ~regex();
58
59 /// @returns `true` if the patterns are equal.
60 /// @note May also return `true` if the patterns are not equal, but are equivalent.
61 bool operator==(const regex&) const;
62
63 /// @returns a view to the original pattern stored inside.
65
66 /// @returns the original pattern.
67 std::string str() const;
68
69private:
70 struct Impl;
71 utils::FastPimpl<Impl, 16, 8> impl_;
72
73 friend class match_results;
74 friend bool regex_match(std::string_view str, const regex& pattern);
75 friend bool regex_match(std::string_view str, match_results& m, const regex& pattern);
76 friend bool regex_search(std::string_view str, const regex& pattern);
77 friend bool regex_search(std::string_view str, match_results& m, const regex& pattern);
78 friend std::string regex_replace(std::string_view str, const regex& pattern, std::string_view repl);
79 friend std::string regex_replace(std::string_view str, const regex& pattern, Re2Replacement repl);
80};
81
82/// @ingroup userver_universal userver_containers
83///
84/// @brief A drop-in replacement for `std::match_results` without huge includes
85/// and with better performance characteristics. Represents capturing groups of a single match result.
86///
87/// The group 0 always matches the whole pattern. User groups start with index 1.
88///
89/// Non-empty groups always point within the source string, so the position of a group within the source string
90/// can be obtained by subtracting `.data()` pointers or `.begin()` iterators.
91///
92/// @warning The implementation can return empty groups as `std::string_view`s with `data() == nullptr` or some invalid
93/// pointer with `size() == 0`. Check for emptiness before performing pointer arithmetic if a group can be empty
94/// according to the regex!
95///
96/// @see utils::regex
97class match_results final {
98public:
99 /// Constructs a null `match_results`, any usage except for copy/move is UB.
100 /// Filled upon successful @ref regex_match or @ref regex_search.
102
103 match_results(const match_results&);
104 match_results& operator=(const match_results&);
105 ~match_results();
106
107 /// @returns the number of capturing groups, including the group 0.
108 std::size_t size() const;
109
110 /// @returns the capturing group at @a sub.
111 /// @note Group 0 always matches the whole pattern. User groups start with index 1.
113
114 /// @returns the position of the first character of the capturing group @a sub within the target (haystack) string.
115 /// @note Group 0 always matches the whole pattern. User groups start with index 1.
116 /// @warning For empty groups, calling this method is UB. Group 0 is always valid.
117 std::size_t position(std::size_t sub) const;
118
119 /// @returns the length of the capturing group at @a sub.
120 /// @note Group 0 always matches the whole pattern. User groups start with index 1.
121 std::size_t length(std::size_t sub) const;
122
123 /// @returns the substring from the beginning of the target (haystack) string to the beginning of the full match.
125
126 /// @returns the substring from the end of the full match to the end of the target (haystack) string.
128
129private:
130 struct Impl;
131 utils::FastPimpl<Impl, 120, 8> impl_;
132
133 friend bool regex_match(std::string_view str, const regex& pattern);
134 friend bool regex_match(std::string_view str, match_results& m, const regex& pattern);
135 friend bool regex_search(std::string_view str, const regex& pattern);
136 friend bool regex_search(std::string_view str, match_results& m, const regex& pattern);
137 friend std::string regex_replace(std::string_view str, const regex& pattern, std::string_view repl);
138 friend std::string regex_replace(std::string_view str, const regex& pattern, Re2Replacement repl);
139};
140
141/// @brief Determines whether the regular expression matches the entire target
142/// character sequence
143bool regex_match(std::string_view str, const regex& pattern);
144
145/// @brief Returns true if the specified regular expression matches
146/// the whole of the input. Fills in what matched in m.
147/// @note @a m may be clobbered on failure.
148bool regex_match(std::string_view str, match_results& m, const regex& pattern);
149
150/// @brief Determines whether the regular expression matches anywhere in the
151/// target character sequence
152bool regex_search(std::string_view str, const regex& pattern);
153
154/// @brief Determines whether the regular expression matches anywhere in the
155/// target character sequence. Fills in what matched in m.
156/// @note @a m may be clobbered on failure.
157bool regex_search(std::string_view str, match_results& m, const regex& pattern);
158
159/// @brief Create a new string where all regular expression matches replaced
160/// with repl.
161///
162/// Interprets @a repl as a literal, does not support substitutions.
163///
164/// @see utils::Re2Replacement
165std::string regex_replace(std::string_view str, const regex& pattern, std::string_view repl);
166
167/// @brief Replacement string with substitution support
168///
169/// @warning Avoid if at all possible, prefer using vanilla
170/// @ref utils::regex_replace, as it is more portable
171///
172/// @warning Allowing user-provided strings in @a replacement leads
173/// to injection vulnerabilities!
174///
175/// May contain the following special syntax:
176///
177/// * `\N` (spelled as `\\N` in C++ string literals), where 0 <= N <= 9,
178/// can be used to insert capture groups;
179/// * In particular, `\0` refers to the contents of the whole match;
180/// * Literal `\` should be escaped as `\\`
181/// (spelled as `\\\\` in C++ string literals)
182///
183/// @see utils::regex_replace
184struct Re2Replacement final {
185 std::string_view replacement;
186};
187
188/// @overload
189/// @see utils::Re2Replacement
190std::string regex_replace(std::string_view str, const regex& pattern, Re2Replacement repl);
191
192/// @cond
193bool IsImplicitBoostRegexFallbackAllowed() noexcept;
194void SetImplicitBoostRegexFallbackAllowed(bool) noexcept;
195/// @endcond
196
197} // namespace utils
198
199USERVER_NAMESPACE_END