std::vector<std::string_view> SplitTextIntoWords(const std::string_view text) {
static const utils::regex capitalized_word_start_regex(
"^[A-Z]");
std::vector<std::string_view> words;
auto remaining = text;
const auto punctuation = word_match.
prefix();
throw std::invalid_argument(fmt::format("Invalid characters '{}'", punctuation));
}
const auto word = word_match[0];
const bool should_be_capitalized = words.empty() || punctuation.find('.') != std::string_view::npos;
throw std::invalid_argument(fmt::format("Word '{}' should be capitalized", word));
}
words.push_back(word);
remaining = word_match.
suffix();
}
throw std::invalid_argument(fmt::format("Invalid characters '{}'", remaining));
}
return words;
}
TEST(Regex, SplitTextIntoWords) {
EXPECT_THAT(
SplitTextIntoWords("Foo bar. Baz, qux quux."), testing::ElementsAre("Foo", "bar", "Baz", "qux", "quux")
);
UEXPECT_THROW_MSG(SplitTextIntoWords(
"Foo + bar"), std::invalid_argument,
"Invalid characters ' + '");
UEXPECT_THROW_MSG(SplitTextIntoWords(
"Foo bar. baz."), std::invalid_argument,
"Word 'baz' should be capitalized");
UEXPECT_THROW_MSG(SplitTextIntoWords(
"Foo, bar% "), std::invalid_argument,
"Invalid characters '% '");
}