17#elif defined(__SSSE3__)
19#elif defined(__SSE2__
)
23#include <userver/utils/assert.hpp>
26#define USERVER_IMPL_FORCE_INLINE __attribute__((always_inline)) inline
30#define USERVER_IMPL_DISABLE_ASAN
31 __attribute__((no_sanitize_address, no_sanitize_memory, no_sanitize_thread))
34#define USERVER_IMPL_DISABLE_ASAN __attribute__((no_sanitize_address))
37USERVER_NAMESPACE_BEGIN
39namespace utils::encoding {
41constexpr inline char kTskvKeyValueSeparator =
'=';
42constexpr inline char kTskvPairsSeparator =
'\t';
47enum class EncodeTskvMode { kKey, kValue, kKeyReplacePeriod };
52template <
typename OutIter>
53OutIter
EncodeTskv(OutIter destination,
char ch, EncodeTskvMode mode);
61template <
typename Container>
62void EncodeTskv(Container& container, std::string_view str,
68template <
typename OutIter>
70 EncodeTskvMode mode) {
71 const bool is_key_encoding = (mode == EncodeTskvMode::kKey ||
72 mode == EncodeTskvMode::kKeyReplacePeriod);
73 const auto append = [&destination](
char ch) { *(destination++) = ch; };
97 if (mode == EncodeTskvMode::kKeyReplacePeriod) {
128 if (is_key_encoding) {
134 if (is_key_encoding) {
147namespace impl::tskv {
149template <std::size_t Alignment,
typename T>
151 static_assert(Alignment %
sizeof(T) == 0);
152 return reinterpret_cast<T*>(
reinterpret_cast<std::uintptr_t>(ptr) /
153 Alignment * Alignment);
156template <std::size_t Alignment>
158 const char* block)
noexcept {
159 UASSERT(
reinterpret_cast<std::uintptr_t>(block) % Alignment == 0);
160 return static_cast<
const char*>(__builtin_assume_aligned(block, Alignment));
163constexpr auto MakeShuffleIndicesForRightShift()
noexcept {
164 constexpr std::size_t kShuffleWidth = 16;
165 std::array<std::uint8_t, kShuffleWidth * 2> result{};
166 for (
auto& item : result) item = 0xf0;
167 for (std::size_t i = 0; i < kShuffleWidth; ++i) result[i] = i;
171struct EncoderStd
final {
172 using Block = std::uint64_t;
173 static constexpr std::size_t kBlockSize =
sizeof(Block);
179 const char* block)
noexcept {
180 block = AssumeAligned<kBlockSize>(block);
181 return *
reinterpret_cast<
const Block*>(block);
186 char* destination)
noexcept {
187 const auto cut_block = block >> (offset * 8);
188 std::memcpy(destination, &cut_block,
sizeof(cut_block));
192 Block block, std::size_t offset, std::size_t count)
noexcept {
193 char buffer[kBlockSize]{};
194 std::memcpy(&buffer, &block,
sizeof(block));
195 for (
const char c : std::string_view(buffer + offset, count)) {
196 if (c <=
'\r' || c ==
'\\')
return true;
204 using Block = __m128i;
205 static constexpr std::size_t kBlockSize =
sizeof(Block);
211 const char* block)
noexcept {
212 block = AssumeAligned<kBlockSize>(block);
213 return _mm_load_si128(
reinterpret_cast<
const Block*>(block));
218 char* destination)
noexcept {
219 alignas(kBlockSize * 2)
char storage[kBlockSize * 2]{};
220 _mm_store_si128(
reinterpret_cast<Block*>(&storage), block_contents);
221 const auto cut_block =
222 _mm_loadu_si128(
reinterpret_cast<__m128i_u*>(&storage[offset]));
223 _mm_storeu_si128(
reinterpret_cast<__m128i_u*>(destination), cut_block);
227 Block block, std::size_t offset, std::size_t count)
noexcept {
230 const auto may_need_escaping_mask = _mm_movemask_epi8(
231 _mm_or_si128(_mm_cmpgt_epi8(_mm_set1_epi8(
'\r' + 1), block),
232 _mm_cmpeq_epi8(block, _mm_set1_epi8(
'\\'))));
233 return static_cast<std::uint32_t>(
234 static_cast<std::uint32_t>(may_need_escaping_mask) >>
235 offset << (32 - count)) != 0;
241struct EncoderSsse3 final :
public EncoderSse2 {
242 USERVER_IMPL_FORCE_INLINE
static void CopyBlock(Block block,
244 char* destination)
noexcept {
245 static constexpr auto kShuffleIdx = MakeShuffleIndicesForRightShift();
246 const auto pos = _mm_loadu_si128(
247 reinterpret_cast<
const __m128i_u*>(&kShuffleIdx[offset]));
248 const auto cut_block = _mm_shuffle_epi8(block, pos);
249 _mm_storeu_si128(
reinterpret_cast<__m128i_u*>(destination), cut_block);
255struct EncoderAvx2 final {
256 using Block = __m256i;
257 static constexpr std::size_t kBlockSize =
sizeof(Block);
262 USERVER_IMPL_DISABLE_ASAN
inline static Block LoadBlock(
263 const char* block)
noexcept {
264 block = AssumeAligned<kBlockSize>(block);
265 return _mm256_load_si256(
reinterpret_cast<
const Block*>(block));
268 USERVER_IMPL_FORCE_INLINE
static void CopyBlock(Block block,
270 char* destination)
noexcept {
271 alignas(kBlockSize * 2)
char storage[kBlockSize * 2]{};
272 _mm256_store_si256(
reinterpret_cast<Block*>(&storage), block);
273 const auto cut_block =
274 _mm256_loadu_si256(
reinterpret_cast<__m256i_u*>(&storage[offset]));
275 _mm256_storeu_si256(
reinterpret_cast<__m256i_u*>(destination), cut_block);
278 USERVER_IMPL_FORCE_INLINE
static bool MayNeedValueEscaping(
279 Block block, std::size_t offset, std::size_t count)
noexcept {
282 const auto may_need_escaping_mask = _mm256_movemask_epi8(
283 _mm256_or_si256(_mm256_cmpgt_epi8(_mm256_set1_epi8(
'\r' + 1), block),
284 _mm256_cmpeq_epi8(block, _mm256_set1_epi8(
'\\'))));
285 return static_cast<std::uint32_t>(
286 static_cast<std::uint32_t>(may_need_escaping_mask) >>
287 offset << (32 - count)) != 0;
293using SystemEncoder = EncoderAvx2;
294#elif defined(__SSSE3__)
295using SystemEncoder = EncoderSsse3;
296#elif defined(__SSE2__
)
297using SystemEncoder = EncoderSse2;
299using SystemEncoder = EncoderStd;
305template <
typename Encoder>
306constexpr std::size_t PaddingSize() {
307 return Encoder::kBlockSize;
310template <
typename Encoder>
311struct BufferPtr
final {
312 char* current{
nullptr};
315template <
typename Encoder>
317 BufferPtr<Encoder> destination,
typename Encoder::Block block,
318 std::size_t offset, std::size_t count)
noexcept {
319 char*
const old_current = destination.current;
320 destination.current += count;
321 Encoder::CopyBlock(block, offset, old_current);
326template <
typename Encoder>
327[[nodiscard]]
__attribute__((noinline)) BufferPtr<Encoder> EncodeValueEach(
328 BufferPtr<Encoder> destination, std::string_view str) {
329 for (
const char c : str) {
330 destination.current =
331 encoding::EncodeTskv(destination.current, c, EncodeTskvMode::kValue);
336template <
typename Encoder>
338 BufferPtr<Encoder> destination,
const char* block, std::size_t offset,
340 UASSERT(offset < Encoder::kBlockSize);
341 UASSERT(offset + count <= Encoder::kBlockSize);
342 block = AssumeAligned<Encoder::kBlockSize>(block);
343 const auto block_contents = Encoder::LoadBlock(block);
345 if (__builtin_expect(
346 Encoder::MayNeedValueEscaping(block_contents, offset, count),
348 destination = tskv::EncodeValueEach(
349 destination, std::string_view(block + offset, count));
352 destination = tskv::AppendBlock(destination, block_contents, offset, count);
359template <
typename Encoder>
360[[nodiscard]]
__attribute__((noinline)) BufferPtr<Encoder> EncodeValue(
361 BufferPtr<Encoder> destination, std::string_view str) {
362 if (str.empty())
return destination;
364 const char*
const first_block = AlignDown<Encoder::kBlockSize>(str.data());
365 const auto first_block_offset =
366 static_cast<std::size_t>(str.data() - first_block);
367 const auto first_block_count =
368 std::min(Encoder::kBlockSize - first_block_offset, str.size());
370 destination = tskv::EncodeValueBlock(destination, first_block,
371 first_block_offset, first_block_count);
373 const char*
const last_block =
374 AlignDown<Encoder::kBlockSize>(str.data() + str.size());
376 if (last_block != first_block) {
377 for (
const char* current_block = first_block + Encoder::kBlockSize;
378 current_block < last_block; current_block += Encoder::kBlockSize) {
379 destination = tskv::EncodeValueBlock(destination, current_block, 0,
380 Encoder::kBlockSize);
383 const auto last_block_count =
384 static_cast<std::size_t>(str.data() + str.size() - last_block);
385 if (last_block_count != 0) {
387 tskv::EncodeValueBlock(destination, last_block, 0, last_block_count);
394template <
typename Encoder>
395[[nodiscard]] BufferPtr<Encoder> DoEncode(BufferPtr<Encoder> destination,
396 std::string_view str,
397 EncodeTskvMode mode) {
398 if (mode == EncodeTskvMode::kValue) {
399 return tskv::EncodeValue(destination, str);
401 for (
const char c : str) {
402 destination.current = encoding::EncodeTskv(destination.current, c, mode);
408inline std::size_t MaxEncodedSize(std::size_t source_size)
noexcept {
409 return source_size * 2;
412template <
typename Encoder,
typename Container>
413void EncodeFullyBuffered(Container& container, std::string_view str,
414 EncodeTskvMode mode) {
415 const auto old_size = container.size();
416 container.resize(old_size + MaxEncodedSize(str.size()) +
417 PaddingSize<Encoder>());
418 BufferPtr<Encoder> buffer_ptr{container.data() + old_size};
420 buffer_ptr = tskv::DoEncode(buffer_ptr, str, mode);
422 container.resize(buffer_ptr.current - container.data());
427template <
typename Container>
429 EncodeTskvMode mode) {
430 impl::tskv::EncodeFullyBuffered<impl::tskv::SystemEncoder>(container, str,
435inline bool ShouldKeyBeEscaped(std::string_view key)
noexcept {
436 for (
const char ch : key) {
447 if (
'A' <= ch && ch <=
'Z')
return true;
459#undef USERVER_IMPL_FORCE_INLINE
460#undef USERVER_IMPL_DONT_SANITIZE