userver: userver/utils/regex.hpp Source File
Loading...
Searching...
No Matches
regex.hpp
Go to the documentation of this file.
1#pragma once
2
3/// @file userver/utils/regex.hpp
4/// @brief @copybrief utils::regex
5
6#include <cstddef>
7#include <exception>
8#include <string_view>
9
10#include <userver/compiler/impl/lifetime.hpp>
11#include <userver/utils/fast_pimpl.hpp>
12
13USERVER_NAMESPACE_BEGIN
14
15namespace utils {
16
17// NOLINTBEGIN(readability-identifier-naming)
18
19class match_results;
20struct Re2Replacement;
21
22/// Thrown from constructors of @ref utils::regex with an invalid pattern.
23class RegexError : public std::exception {};
24
25/// @ingroup userver_universal userver_containers
26///
27/// @brief A drop-in replacement for `std::regex` without huge includes
28/// and with better performance characteristics.
29///
30/// utils::regex is currently implemented using re2.
31///
32/// @see @ref utils::regex_match
33/// @see @ref utils::regex_search
34/// @see @ref utils::regex_replace
35///
36/// Read [re2 documentation](https://github.com/google/re2/wiki/syntax) on the limitations of re2 engine.
37/// Notably, it does not support:
38///
39/// 1. lookahead and lookbehind;
40/// 2. quantifiers over 1000, regexes with large repetition counts consume more memory;
41/// 3. spaces in quantifiers like `\w{1, 5}`;
42/// 4. possessive quantifiers.
43///
44/// ## An example of complex string parsing using `utils::regex`
45///
46/// @snippet utils/regex_test.cpp split text
47class regex final {
48public:
49 /// Constructs a null regex, any usage except for copy/move is UB.
51
52 /// @brief Compiles regex from pattern, always valid on construction.
53 /// @throws utils::InvalidRegex if @a pattern is invalid
54 explicit regex(std::string_view pattern);
55
56 regex(const regex&);
57 regex(regex&&) noexcept;
58 regex& operator=(const regex&);
59 regex& operator=(regex&&) noexcept;
60 ~regex();
61
62 /// @returns `true` if the patterns are equal.
63 /// @note May also return `true` if the patterns are not equal, but are equivalent.
64 bool operator==(const regex&) const;
65
66 /// @returns a view to the original pattern stored inside.
67 std::string_view GetPatternView() const USERVER_IMPL_LIFETIME_BOUND;
68
69 /// @returns the original pattern.
70 std::string str() const;
71
72private:
73 class Impl;
74 utils::FastPimpl<Impl, 16, 8> impl_;
75
76 friend class match_results;
77 friend bool regex_match(std::string_view str, const regex& pattern);
78 friend bool regex_match(std::string_view str, match_results& m, const regex& pattern);
79 friend bool regex_search(std::string_view str, const regex& pattern);
80 friend bool regex_search(std::string_view str, match_results& m, const regex& pattern);
81 friend std::string regex_replace(std::string_view str, const regex& pattern, std::string_view repl);
82 friend std::string regex_replace(std::string_view str, const regex& pattern, Re2Replacement repl);
83};
84
85/// @ingroup userver_universal userver_containers
86///
87/// @brief A drop-in replacement for `std::match_results` without huge includes
88/// and with better performance characteristics. Represents capturing groups of a single match result.
89///
90/// The group 0 always matches the whole pattern. User groups start with index 1.
91///
92/// Non-empty groups always point within the source string, so the position of a group within the source string
93/// can be obtained by subtracting `.data()` pointers or `.begin()` iterators.
94///
95/// @warning The implementation can return empty groups as `std::string_view`s with `data() == nullptr` or some invalid
96/// pointer with `size() == 0`. Check for emptiness before performing pointer arithmetic if a group can be empty
97/// according to the regex!
98///
99/// @see utils::regex
100class match_results final {
101public:
102 /// Constructs a null `match_results`, any usage except for copy/move is UB.
103 /// Filled upon successful @ref regex_match or @ref regex_search.
105
106 match_results(const match_results&);
107 match_results& operator=(const match_results&);
108 ~match_results();
109
110 /// @returns the number of capturing groups, including the group 0.
111 std::size_t size() const;
112
113 /// @returns the capturing group at @a sub.
114 /// @note Group 0 always matches the whole pattern. User groups start with index 1.
115 std::string_view operator[](std::size_t sub) const;
116
117 /// @returns the position of the first character of the capturing group @a sub within the target (haystack) string.
118 /// @note Group 0 always matches the whole pattern. User groups start with index 1.
119 /// @warning For empty groups, calling this method is UB. Group 0 is always valid.
120 std::size_t position(std::size_t sub) const;
121
122 /// @returns the length of the capturing group at @a sub.
123 /// @note Group 0 always matches the whole pattern. User groups start with index 1.
124 std::size_t length(std::size_t sub) const;
125
126 /// @returns the substring from the beginning of the target (haystack) string to the beginning of the full match.
127 std::string_view prefix() const;
128
129 /// @returns the substring from the end of the full match to the end of the target (haystack) string.
130 std::string_view suffix() const;
131
132private:
133 struct Impl;
134 utils::FastPimpl<Impl, 120, 8> impl_;
135
136 friend bool regex_match(std::string_view str, const regex& pattern);
137 friend bool regex_match(std::string_view str, match_results& m, const regex& pattern);
138 friend bool regex_search(std::string_view str, const regex& pattern);
139 friend bool regex_search(std::string_view str, match_results& m, const regex& pattern);
140 friend std::string regex_replace(std::string_view str, const regex& pattern, std::string_view repl);
141 friend std::string regex_replace(std::string_view str, const regex& pattern, Re2Replacement repl);
142};
143
144/// @brief Determines whether the regular expression matches the entire target
145/// character sequence
146bool regex_match(std::string_view str, const regex& pattern);
147
148/// @brief Returns true if the specified regular expression matches
149/// the whole of the input. Fills in what matched in m.
150/// @note @a m may be clobbered on failure.
151bool regex_match(std::string_view str, match_results& m, const regex& pattern);
152
153/// @brief Determines whether the regular expression matches anywhere in the
154/// target character sequence
155bool regex_search(std::string_view str, const regex& pattern);
156
157/// @brief Determines whether the regular expression matches anywhere in the
158/// target character sequence. Fills in what matched in m.
159/// @note @a m may be clobbered on failure.
160bool regex_search(std::string_view str, match_results& m, const regex& pattern);
161
162/// @brief Create a new string where all regular expression matches replaced
163/// with repl.
164///
165/// Interprets @a repl as a literal, does not support substitutions.
166///
167/// @see utils::Re2Replacement
168std::string regex_replace(std::string_view str, const regex& pattern, std::string_view repl);
169
170/// @brief Replacement string with substitution support
171///
172/// @warning Avoid if at all possible, prefer using vanilla
173/// @ref utils::regex_replace, as it is more portable
174///
175/// @warning Allowing user-provided strings in @a replacement leads
176/// to injection vulnerabilities!
177///
178/// May contain the following special syntax:
179///
180/// * `\N` (spelled as `\\N` in C++ string literals), where 0 <= N <= 9,
181/// can be used to insert capture groups;
182/// * In particular, `\0` refers to the contents of the whole match;
183/// * Literal `\` should be escaped as `\\`
184/// (spelled as `\\\\` in C++ string literals)
185///
186/// @see utils::regex_replace
187struct Re2Replacement final {
188 std::string_view replacement;
189};
190
191/// @overload
192/// @see utils::Re2Replacement
193std::string regex_replace(std::string_view str, const regex& pattern, Re2Replacement repl);
194
195// NOLINTEND(readability-identifier-naming)
196
197} // namespace utils
198
199USERVER_NAMESPACE_END