17#elif defined(__SSSE3__)
19#elif defined(__SSE2__
)
23#include <userver/utils/assert.hpp>
26#define USERVER_IMPL_FORCE_INLINE __attribute__((always_inline)) inline
30#define USERVER_IMPL_DISABLE_ASAN __attribute__((no_sanitize_address, no_sanitize_memory, no_sanitize_thread))
33#define USERVER_IMPL_DISABLE_ASAN __attribute__((no_sanitize_address))
36USERVER_NAMESPACE_BEGIN
38namespace utils::encoding {
40constexpr inline char kTskvKeyValueSeparator =
'=';
41constexpr inline char kTskvPairsSeparator =
'\t';
46enum class EncodeTskvMode { kKey, kValue, kKeyReplacePeriod };
51template <
typename OutIter>
52OutIter
EncodeTskv(OutIter destination,
char ch, EncodeTskvMode mode);
60template <
typename Container>
61void EncodeTskv(Container& container, std::string_view str, EncodeTskvMode mode);
66template <
typename OutIter>
68 const bool is_key_encoding = (mode == EncodeTskvMode::kKey || mode == EncodeTskvMode::kKeyReplacePeriod);
69 const auto append = [&destination](
char ch) { *(destination++) = ch; };
93 if (mode == EncodeTskvMode::kKeyReplacePeriod) {
124 if (is_key_encoding) {
130 if (is_key_encoding) {
143namespace impl::tskv {
145template <std::size_t Alignment,
typename T>
147 static_assert(Alignment %
sizeof(T) == 0);
148 return reinterpret_cast<T*>(
reinterpret_cast<std::uintptr_t>(ptr) / Alignment * Alignment);
151template <std::size_t Alignment>
153 UASSERT(
reinterpret_cast<std::uintptr_t>(block) % Alignment == 0);
154 return static_cast<
const char*>(__builtin_assume_aligned(block, Alignment));
157constexpr auto MakeShuffleIndicesForRightShift()
noexcept {
158 constexpr std::size_t kShuffleWidth = 16;
159 std::array<std::uint8_t, kShuffleWidth * 2> result{};
160 for (
auto& item : result) {
163 for (std::size_t i = 0; i < kShuffleWidth; ++i) {
169struct EncoderStd
final {
170 using Block = std::uint64_t;
171 static constexpr std::size_t kBlockSize =
sizeof(Block);
177 block = AssumeAligned<kBlockSize>(block);
178 return *
reinterpret_cast<
const Block*>(block);
182 const auto cut_block = block >> (offset * 8);
183 std::memcpy(destination, &cut_block,
sizeof(cut_block));
188 char buffer[kBlockSize]{};
189 std::memcpy(&buffer, &block,
sizeof(block));
190 for (
const char c : std::string_view(buffer + offset, count)) {
191 if (c <=
'\r' || c ==
'\\') {
201 using Block = __m128i;
202 static constexpr std::size_t kBlockSize =
sizeof(Block);
208 block = AssumeAligned<kBlockSize>(block);
209 return _mm_load_si128(
reinterpret_cast<
const Block*>(block));
214 alignas(kBlockSize * 2)
char storage[kBlockSize * 2]{};
215 _mm_store_si128(
reinterpret_cast<Block*>(&storage), block_contents);
216 const auto cut_block = _mm_loadu_si128(
reinterpret_cast<__m128i_u*>(&storage[offset]));
217 _mm_storeu_si128(
reinterpret_cast<__m128i_u*>(destination), cut_block);
224 const auto may_need_escaping_mask = _mm_movemask_epi8(
225 _mm_or_si128(_mm_cmpgt_epi8(_mm_set1_epi8(
'\r' + 1), block), _mm_cmpeq_epi8(block, _mm_set1_epi8(
'\\')))
228 std::uint32_t>(
static_cast<std::uint32_t>(may_need_escaping_mask) >> offset << (32 - count)) != 0;
234struct EncoderSsse3 final :
public EncoderSse2 {
235 USERVER_IMPL_FORCE_INLINE
static void CopyBlock(Block block, std::size_t offset,
char* destination)
noexcept {
236 static constexpr auto kShuffleIdx = MakeShuffleIndicesForRightShift();
237 const auto pos = _mm_loadu_si128(
reinterpret_cast<
const __m128i_u*>(&kShuffleIdx[offset]));
238 const auto cut_block = _mm_shuffle_epi8(block, pos);
239 _mm_storeu_si128(
reinterpret_cast<__m128i_u*>(destination), cut_block);
245struct EncoderAvx2 final {
246 using Block = __m256i;
247 static constexpr std::size_t kBlockSize =
sizeof(Block);
252 USERVER_IMPL_DISABLE_ASAN
inline static Block LoadBlock(
const char* block)
noexcept {
253 block = AssumeAligned<kBlockSize>(block);
254 return _mm256_load_si256(
reinterpret_cast<
const Block*>(block));
257 USERVER_IMPL_FORCE_INLINE
static void CopyBlock(Block block, std::size_t offset,
char* destination)
noexcept {
258 alignas(kBlockSize * 2)
char storage[kBlockSize * 2]{};
259 _mm256_store_si256(
reinterpret_cast<Block*>(&storage), block);
260 const auto cut_block = _mm256_loadu_si256(
reinterpret_cast<__m256i_u*>(&storage[offset]));
261 _mm256_storeu_si256(
reinterpret_cast<__m256i_u*>(destination), cut_block);
264 USERVER_IMPL_FORCE_INLINE
static bool MayNeedValueEscaping(Block block, std::size_t offset, std::size_t count)
268 const auto may_need_escaping_mask = _mm256_movemask_epi8(_mm256_or_si256(
269 _mm256_cmpgt_epi8(_mm256_set1_epi8(
'\r' + 1), block),
270 _mm256_cmpeq_epi8(block, _mm256_set1_epi8(
'\\'))
273 std::uint32_t>(
static_cast<std::uint32_t>(may_need_escaping_mask) >> offset << (32 - count)) != 0;
279using SystemEncoder = EncoderAvx2;
280#elif defined(__SSSE3__)
281using SystemEncoder = EncoderSsse3;
282#elif defined(__SSE2__
)
283using SystemEncoder = EncoderSse2;
285using SystemEncoder = EncoderStd;
291template <
typename Encoder>
292constexpr std::size_t PaddingSize() {
293 return Encoder::kBlockSize;
296template <
typename Encoder>
297struct BufferPtr
final {
298 char* current{
nullptr};
301template <
typename Encoder>
303 BufferPtr<Encoder> destination,
304 typename Encoder::Block block,
308 char*
const old_current = destination.current;
309 destination.current += count;
310 Encoder::CopyBlock(block, offset, old_current);
315template <
typename Encoder>
316[[nodiscard]]
__attribute__((noinline)) BufferPtr<Encoder> EncodeValueEach(
317 BufferPtr<Encoder> destination,
320 for (
const char c : str) {
321 destination.current = encoding::EncodeTskv(destination.current, c, EncodeTskvMode::kValue);
326template <
typename Encoder>
328 BufferPtr<Encoder> destination,
333 UASSERT(offset < Encoder::kBlockSize);
334 UASSERT(offset + count <= Encoder::kBlockSize);
335 block = AssumeAligned<Encoder::kBlockSize>(block);
336 const auto block_contents = Encoder::LoadBlock(block);
338 if (__builtin_expect(Encoder::MayNeedValueEscaping(block_contents, offset, count),
false)) {
339 destination = tskv::EncodeValueEach(destination, std::string_view(block + offset, count));
342 destination = tskv::AppendBlock(destination, block_contents, offset, count);
349template <
typename Encoder>
350[[nodiscard]]
__attribute__((noinline)) BufferPtr<Encoder> EncodeValue(
351 BufferPtr<Encoder> destination,
358 const char*
const first_block = AlignDown<Encoder::kBlockSize>(str.data());
359 const auto first_block_offset =
static_cast<std::size_t>(str.data() - first_block);
360 const auto first_block_count = std::min(Encoder::kBlockSize - first_block_offset, str.size());
362 destination = tskv::EncodeValueBlock(destination, first_block, first_block_offset, first_block_count);
364 const char*
const last_block = AlignDown<Encoder::kBlockSize>(str.data() + str.size());
366 if (last_block != first_block) {
367 for (
const char* current_block = first_block + Encoder::kBlockSize; current_block < last_block;
368 current_block += Encoder::kBlockSize)
370 destination = tskv::EncodeValueBlock(destination, current_block, 0, Encoder::kBlockSize);
373 const auto last_block_count =
static_cast<std::size_t>(str.data() + str.size() - last_block);
374 if (last_block_count != 0) {
375 destination = tskv::EncodeValueBlock(destination, last_block, 0, last_block_count);
382template <
typename Encoder>
383[[nodiscard]] BufferPtr<Encoder> DoEncode(BufferPtr<Encoder> destination, std::string_view str, EncodeTskvMode mode) {
384 if (mode == EncodeTskvMode::kValue) {
385 return tskv::EncodeValue(destination, str);
387 for (
const char c : str) {
388 destination.current = encoding::EncodeTskv(destination.current, c, mode);
394inline std::size_t MaxEncodedSize(std::size_t source_size)
noexcept {
return source_size * 2; }
396template <
typename Encoder,
typename Container>
397void EncodeFullyBuffered(Container& container, std::string_view str, EncodeTskvMode mode) {
398 const auto old_size = container.size();
399 container.resize(old_size + MaxEncodedSize(str.size()) + PaddingSize<Encoder>());
400 BufferPtr<Encoder> buffer_ptr{container.data() + old_size};
402 buffer_ptr = tskv::DoEncode(buffer_ptr, str, mode);
404 container.resize(buffer_ptr.current - container.data());
409template <
typename Container>
410void EncodeTskv(Container& container, std::string_view str, EncodeTskvMode mode) {
411 impl::tskv::EncodeFullyBuffered<impl::tskv::SystemEncoder>(container, str, mode);
415inline bool ShouldKeyBeEscaped(std::string_view key)
noexcept {
416 for (
const char ch : key) {
427 if (
'A' <= ch && ch <=
'Z') {
436inline bool ShouldValueBeEscaped(std::string_view key)
noexcept {
437 using Encoder = impl::tskv::SystemEncoder;
438 const auto block_contents = Encoder::LoadBlock(key.data());
439 return Encoder::MayNeedValueEscaping(block_contents, 0, key.size());
447#undef USERVER_IMPL_FORCE_INLINE
448#undef USERVER_IMPL_DONT_SANITIZE