A drop-in replacement for std::regex
without huge includes and with better performance characteristics.
utils::regex is currently implemented using re2.
- See also
- utils::regex_match
-
utils::regex_search
-
utils::regex_replace
Read re2 documentation on the limitations of re2 engine. Notably, it does not support:
- lookahead and lookbehind;
- quantifiers over 1000, regexes with large repetition counts consume more memory;
- spaces in quantifiers like
\w{1, 5}
;
- possessive quantifiers.
An example of complex string parsing using utils::regex
std::vector<std::string_view> SplitTextIntoWords(const std::string_view text) {
static const utils::regex capitalized_word_start_regex(
"^[A-Z]");
std::vector<std::string_view> words;
auto remaining = text;
const auto punctuation = word_match.
prefix();
throw std::invalid_argument(fmt::format("Invalid characters '{}'", punctuation));
}
const auto word = word_match[0];
const bool should_be_capitalized = words.empty() || punctuation.find('.') != std::string_view::npos;
throw std::invalid_argument(fmt::format("Word '{}' should be capitalized", word));
}
words.push_back(word);
remaining = word_match.
suffix();
}
throw std::invalid_argument(fmt::format("Invalid characters '{}'", remaining));
}
return words;
}
TEST(Regex, SplitTextIntoWords) {
EXPECT_THAT(
SplitTextIntoWords("Foo bar. Baz, qux quux."), testing::ElementsAre("Foo", "bar", "Baz", "qux", "quux")
);
UEXPECT_THROW_MSG(SplitTextIntoWords(
"Foo + bar"), std::invalid_argument,
"Invalid characters ' + '");
UEXPECT_THROW_MSG(SplitTextIntoWords(
"Foo bar. baz."), std::invalid_argument,
"Word 'baz' should be capitalized");
UEXPECT_THROW_MSG(SplitTextIntoWords(
"Foo, bar% "), std::invalid_argument,
"Invalid characters '% '");
}
Definition at line 44 of file regex.hpp.