17#elif defined(__SSSE3__)
19#elif defined(__SSE2__
)
23#include <userver/utils/assert.hpp>
26#define USERVER_IMPL_FORCE_INLINE __attribute__((always_inline)) inline
30#define USERVER_IMPL_DISABLE_ASAN __attribute__((no_sanitize_address, no_sanitize_memory, no_sanitize_thread))
33#define USERVER_IMPL_DISABLE_ASAN __attribute__((no_sanitize_address))
36USERVER_NAMESPACE_BEGIN
38namespace utils::encoding {
40constexpr inline char kTskvKeyValueSeparator =
'=';
41constexpr inline char kTskvPairsSeparator =
'\t';
46enum class EncodeTskvMode { kKey, kValue, kKeyReplacePeriod };
51template <
typename OutIter>
52OutIter
EncodeTskv(OutIter destination,
char ch, EncodeTskvMode mode);
60template <
typename Container>
61void EncodeTskv(Container& container, std::string_view str, EncodeTskvMode mode);
66template <
typename OutIter>
68 const bool is_key_encoding = (mode == EncodeTskvMode::kKey || mode == EncodeTskvMode::kKeyReplacePeriod);
69 const auto append = [&destination](
char ch) { *(destination++) = ch; };
93 if (mode == EncodeTskvMode::kKeyReplacePeriod) {
124 if (is_key_encoding) {
130 if (is_key_encoding) {
143namespace impl::tskv {
145template <std::size_t Alignment,
typename T>
147 static_assert(Alignment %
sizeof(T) == 0);
148 return reinterpret_cast<T*>(
reinterpret_cast<std::uintptr_t>(ptr) / Alignment * Alignment);
151template <std::size_t Alignment>
153 UASSERT(
reinterpret_cast<std::uintptr_t>(block) % Alignment == 0);
154 return static_cast<
const char*>(__builtin_assume_aligned(block, Alignment));
157constexpr auto MakeShuffleIndicesForRightShift()
noexcept {
158 constexpr std::size_t kShuffleWidth = 16;
159 std::array<std::uint8_t, kShuffleWidth * 2> result{};
160 for (
auto& item : result) item = 0xf0;
161 for (std::size_t i = 0; i < kShuffleWidth; ++i) result[i] = i;
165struct EncoderStd
final {
166 using Block = std::uint64_t;
167 static constexpr std::size_t kBlockSize =
sizeof(Block);
173 block = AssumeAligned<kBlockSize>(block);
174 return *
reinterpret_cast<
const Block*>(block);
178 const auto cut_block = block >> (offset * 8);
179 std::memcpy(destination, &cut_block,
sizeof(cut_block));
183 MayNeedValueEscaping(Block block, std::size_t offset, std::size_t count)
noexcept {
184 char buffer[kBlockSize]{};
185 std::memcpy(&buffer, &block,
sizeof(block));
186 for (
const char c : std::string_view(buffer + offset, count)) {
187 if (c <=
'\r' || c ==
'\\')
return true;
195 using Block = __m128i;
196 static constexpr std::size_t kBlockSize =
sizeof(Block);
202 block = AssumeAligned<kBlockSize>(block);
203 return _mm_load_si128(
reinterpret_cast<
const Block*>(block));
207 CopyBlock(Block block_contents, std::size_t offset,
char* destination)
noexcept {
208 alignas(kBlockSize * 2)
char storage[kBlockSize * 2]{};
209 _mm_store_si128(
reinterpret_cast<Block*>(&storage), block_contents);
210 const auto cut_block = _mm_loadu_si128(
reinterpret_cast<__m128i_u*>(&storage[offset]));
211 _mm_storeu_si128(
reinterpret_cast<__m128i_u*>(destination), cut_block);
215 MayNeedValueEscaping(Block block, std::size_t offset, std::size_t count)
noexcept {
218 const auto may_need_escaping_mask = _mm_movemask_epi8(
219 _mm_or_si128(_mm_cmpgt_epi8(_mm_set1_epi8(
'\r' + 1), block), _mm_cmpeq_epi8(block, _mm_set1_epi8(
'\\')))
221 return static_cast<std::uint32_t>(
222 static_cast<std::uint32_t>(may_need_escaping_mask) >> offset << (32 - count)
229struct EncoderSsse3 final :
public EncoderSse2 {
230 USERVER_IMPL_FORCE_INLINE
static void CopyBlock(Block block, std::size_t offset,
char* destination)
noexcept {
231 static constexpr auto kShuffleIdx = MakeShuffleIndicesForRightShift();
232 const auto pos = _mm_loadu_si128(
reinterpret_cast<
const __m128i_u*>(&kShuffleIdx[offset]));
233 const auto cut_block = _mm_shuffle_epi8(block, pos);
234 _mm_storeu_si128(
reinterpret_cast<__m128i_u*>(destination), cut_block);
240struct EncoderAvx2 final {
241 using Block = __m256i;
242 static constexpr std::size_t kBlockSize =
sizeof(Block);
247 USERVER_IMPL_DISABLE_ASAN
inline static Block LoadBlock(
const char* block)
noexcept {
248 block = AssumeAligned<kBlockSize>(block);
249 return _mm256_load_si256(
reinterpret_cast<
const Block*>(block));
252 USERVER_IMPL_FORCE_INLINE
static void CopyBlock(Block block, std::size_t offset,
char* destination)
noexcept {
253 alignas(kBlockSize * 2)
char storage[kBlockSize * 2]{};
254 _mm256_store_si256(
reinterpret_cast<Block*>(&storage), block);
255 const auto cut_block = _mm256_loadu_si256(
reinterpret_cast<__m256i_u*>(&storage[offset]));
256 _mm256_storeu_si256(
reinterpret_cast<__m256i_u*>(destination), cut_block);
259 USERVER_IMPL_FORCE_INLINE
static bool
260 MayNeedValueEscaping(Block block, std::size_t offset, std::size_t count)
noexcept {
263 const auto may_need_escaping_mask = _mm256_movemask_epi8(_mm256_or_si256(
264 _mm256_cmpgt_epi8(_mm256_set1_epi8(
'\r' + 1), block), _mm256_cmpeq_epi8(block, _mm256_set1_epi8(
'\\'))
266 return static_cast<std::uint32_t>(
267 static_cast<std::uint32_t>(may_need_escaping_mask) >> offset << (32 - count)
274using SystemEncoder = EncoderAvx2;
275#elif defined(__SSSE3__)
276using SystemEncoder = EncoderSsse3;
277#elif defined(__SSE2__
)
278using SystemEncoder = EncoderSse2;
280using SystemEncoder = EncoderStd;
286template <
typename Encoder>
287constexpr std::size_t PaddingSize() {
288 return Encoder::kBlockSize;
291template <
typename Encoder>
292struct BufferPtr
final {
293 char* current{
nullptr};
296template <
typename Encoder>
298 BufferPtr<Encoder> destination,
299 typename Encoder::Block block,
303 char*
const old_current = destination.current;
304 destination.current += count;
305 Encoder::CopyBlock(block, offset, old_current);
310template <
typename Encoder>
311[[nodiscard]]
__attribute__((noinline)) BufferPtr<Encoder>
312EncodeValueEach(BufferPtr<Encoder> destination, std::string_view str) {
313 for (
const char c : str) {
314 destination.current = encoding::EncodeTskv(destination.current, c, EncodeTskvMode::kValue);
319template <
typename Encoder>
321EncodeValueBlock(BufferPtr<Encoder> destination,
const char* block, std::size_t offset, std::size_t count) {
322 UASSERT(offset < Encoder::kBlockSize);
323 UASSERT(offset + count <= Encoder::kBlockSize);
324 block = AssumeAligned<Encoder::kBlockSize>(block);
325 const auto block_contents = Encoder::LoadBlock(block);
327 if (__builtin_expect(Encoder::MayNeedValueEscaping(block_contents, offset, count),
false)) {
328 destination = tskv::EncodeValueEach(destination, std::string_view(block + offset, count));
331 destination = tskv::AppendBlock(destination, block_contents, offset, count);
338template <
typename Encoder>
339[[nodiscard]]
__attribute__((noinline)) BufferPtr<Encoder>
340EncodeValue(BufferPtr<Encoder> destination, std::string_view str) {
341 if (str.empty())
return destination;
343 const char*
const first_block = AlignDown<Encoder::kBlockSize>(str.data());
344 const auto first_block_offset =
static_cast<std::size_t>(str.data() - first_block);
345 const auto first_block_count = std::min(Encoder::kBlockSize - first_block_offset, str.size());
347 destination = tskv::EncodeValueBlock(destination, first_block, first_block_offset, first_block_count);
349 const char*
const last_block = AlignDown<Encoder::kBlockSize>(str.data() + str.size());
351 if (last_block != first_block) {
352 for (
const char* current_block = first_block + Encoder::kBlockSize; current_block < last_block;
353 current_block += Encoder::kBlockSize) {
354 destination = tskv::EncodeValueBlock(destination, current_block, 0, Encoder::kBlockSize);
357 const auto last_block_count =
static_cast<std::size_t>(str.data() + str.size() - last_block);
358 if (last_block_count != 0) {
359 destination = tskv::EncodeValueBlock(destination, last_block, 0, last_block_count);
366template <
typename Encoder>
367[[nodiscard]] BufferPtr<Encoder> DoEncode(BufferPtr<Encoder> destination, std::string_view str, EncodeTskvMode mode) {
368 if (mode == EncodeTskvMode::kValue) {
369 return tskv::EncodeValue(destination, str);
371 for (
const char c : str) {
372 destination.current = encoding::EncodeTskv(destination.current, c, mode);
378inline std::size_t MaxEncodedSize(std::size_t source_size)
noexcept {
return source_size * 2; }
380template <
typename Encoder,
typename Container>
381void EncodeFullyBuffered(Container& container, std::string_view str, EncodeTskvMode mode) {
382 const auto old_size = container.size();
383 container.resize(old_size + MaxEncodedSize(str.size()) + PaddingSize<Encoder>());
384 BufferPtr<Encoder> buffer_ptr{container.data() + old_size};
386 buffer_ptr = tskv::DoEncode(buffer_ptr, str, mode);
388 container.resize(buffer_ptr.current - container.data());
393template <
typename Container>
394void EncodeTskv(Container& container, std::string_view str, EncodeTskvMode mode) {
395 impl::tskv::EncodeFullyBuffered<impl::tskv::SystemEncoder>(container, str, mode);
399inline bool ShouldKeyBeEscaped(std::string_view key)
noexcept {
400 for (
const char ch : key) {
411 if (
'A' <= ch && ch <=
'Z')
return true;
423#undef USERVER_IMPL_FORCE_INLINE
424#undef USERVER_IMPL_DONT_SANITIZE