userver: userver/http/predefined_header.hpp Source File
⚠️ This is the documentation for an old userver version. Click here to switch to the latest version.
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages Concepts
predefined_header.hpp
1#pragma once
2
3#include <string>
4#include <string_view>
5
6#include <fmt/core.h>
7
8#include <userver/utils/small_string_fwd.hpp>
9#include <userver/utils/trivial_map.hpp>
10
11USERVER_NAMESPACE_BEGIN
12
13namespace http::headers {
14
15// According to https://www.chromium.org/spdy/spdy-whitepaper/
16// "typical header sizes of 700-800 bytes is common"
17inline constexpr std::size_t kTypicalHeadersSize = 1024;
18using HeadersString = utils::SmallString<kTypicalHeadersSize>;
19
20namespace impl {
21
22// This is a constexpr implementation of case-insensitive MurMur hash for 64-bit
23// size_t and Little Endianness.
24// https://github.com/gcc-mirror/gcc/blob/65369ab62cee68eb7f6ef65e3d12d1969a9e20ee/libstdc%2B%2B-v3/libsupc%2B%2B/hash_bytes.cc#L138
25//
26// P.S. The hasher is "unsafe" in hash-flood sense.
27struct UnsafeConstexprHasher final {
28 constexpr std::size_t operator()(std::string_view str) const noexcept {
29 constexpr std::uint64_t mul = (0xc6a4a793UL << 32UL) + 0x5bd1e995UL;
30
31 std::uint64_t hash = seed_ ^ (str.size() * mul);
32 while (str.size() >= 8) {
33 const std::uint64_t data = ShiftMix(Load8(str.data()) * mul) * mul;
34 hash ^= data;
35 hash *= mul;
36
37 str = str.substr(8);
38 }
39 if (!str.empty()) {
40 const std::uint64_t data = LoadN(str.data(), str.size());
41 hash ^= data;
42 hash *= mul;
43 }
44
45 hash = ShiftMix(hash) * mul;
46 hash = ShiftMix(hash);
47 return hash;
48 }
49
50 private:
51 static constexpr inline std::uint64_t ShiftMix(std::uint64_t v) noexcept {
52 return v ^ (v >> 47);
53 }
54
55 static constexpr inline std::uint64_t Load8(const char* data) noexcept {
56 return LoadN(data, 8);
57 }
58
59 static constexpr inline std::uint64_t LoadN(const char* data,
60 std::size_t n) noexcept {
61 // Although lowercase and uppercase ASCII are indeed 32 (0x20) apart,
62 // this approach makes for instance '[' and '{' equivalent too,
63 // which is obviously broken and is easily exploitable.
64 // However, for expected input (lower/upper-case ASCII letters + dashes)
65 // this just works, and against malicious
66 // input we defend by falling back to case-insensitive SipHash.
67 constexpr std::uint64_t kDeliberatelyBrokenLowercaseMask =
68 0x2020202020202020UL;
69
70 std::uint64_t result = kDeliberatelyBrokenLowercaseMask >> (8 * (8 - n));
71 for (std::size_t i = 0; i < n; ++i) {
72 const std::uint8_t c = data[i];
73 result |= static_cast<std::uint64_t>(c) << (8 * i);
74 }
75 return result;
76 }
77
78 // Seed is chosen in such a way that 16 (presumably) most common userver
79 // headers don't collide within default size of HeaderMap (32),
80 // and that all headers used in userver itself don't collide within minimal
81 // size needed to store them all (36 headers, minimal size = 64).
82 // Note that since HeaderMap takes hashes modulo power of 2 it's guaranteed
83 // that if two headers don't collide within size S, they don't collide for
84 // bigger sizes.
85 std::uint64_t seed_{54999};
86};
87
88// Ugly, but TrivialBiMap requires keys to be in lower case.
89inline constexpr utils::TrivialBiMap kKnownHeadersLowercaseMap =
90 [](auto selector) {
91 return selector()
92 .Case("content-type", std::int8_t{1})
93 .Case("content-encoding", 2)
94 .Case("content-length", 3)
95 .Case("transfer-encoding", 4)
96 .Case("host", 5)
97 .Case("accept", 6)
98 .Case("accept-encoding", 7)
99 .Case("accept-language", 8)
100 .Case("x-yataxi-api-key", 9)
101 .Case("user-agent", 10)
102 .Case("x-request-application", 11)
103 .Case("date", 12)
104 .Case("warning", 13)
105 .Case("access-control-allow-headers", 14)
106 .Case("allow", 15)
107 .Case("server", 16)
108 .Case("set-cookie", 17)
109 .Case("connection", 18)
110 .Case("cookie", 19)
111 .Case("x-yarequestid", 20)
112 .Case("x-yatraceid", 21)
113 .Case("x-yaspanid", 22)
114 .Case("x-requestid", 23)
115 .Case("x-backend-server", 24)
116 .Case("x-taxi-envoyproxy-dstvhost", 25)
117 .Case("baggage", 26)
118 .Case("x-yataxi-allow-auth-request", 27)
119 .Case("x-yataxi-allow-auth-response", 28)
120 .Case("x-yataxi-server-hostname", 29)
121 .Case("x-yataxi-client-timeoutms", 30)
122 .Case("x-yataxi-deadline-expired", 31)
123 .Case("x-yataxi-ratelimited-by", 32)
124 .Case("x-yataxi-ratelimit-reason", 33)
125 .Case("x-b3-traceid", 34)
126 .Case("x-b3-spanid", 35)
127 .Case("x-b3-sampled", 36)
128 .Case("x-b3-parentspanid", 37)
129 .Case("traceparent", 38)
130 .Case("tracestate", 39);
131 };
132
133// We use different values for "no index" at compile and run time to simplify
134// comparison - with these values being different we cant just == them.
135inline constexpr std::int8_t kNoHeaderIndexLookup = -1;
136inline constexpr std::int8_t kNoHeaderIndexInsertion = -2;
137static_assert(kNoHeaderIndexLookup != kNoHeaderIndexInsertion);
138static_assert(kNoHeaderIndexLookup != 0 && kNoHeaderIndexInsertion != 0);
139
140// We use this function when constructing a PredefinedHeader ...
141constexpr std::int8_t GetHeaderIndexForLookup(std::string_view key) {
142 const auto opt = kKnownHeadersLowercaseMap.TryFindICaseByFirst(key);
143 return opt.value_or(kNoHeaderIndexLookup);
144}
145
146// And this one when inserting an entry into the HeaderMap.
147// The purpose of having 2 different functions is to be able to
148// == header indexes even if none is present (both headers are unknown).
149inline std::int8_t GetHeaderIndexForInsertion(std::string_view key) {
150 const auto opt = kKnownHeadersLowercaseMap.TryFindICaseByFirst(key);
151 return opt.value_or(kNoHeaderIndexInsertion);
152}
153
154} // namespace impl
155
156namespace header_map {
157class Danger;
158class Map;
159} // namespace header_map
160
161/// @ingroup userver_universal userver_containers
162///
163/// @brief A struct to represent compile-time known header name.
164///
165/// Calculates the hash value at compile time with the same hasher
166/// HeaderMap uses, which allows to speed things up greatly.
167///
168/// Although it's possible to construct PredefinedHeader at runtime
169/// it makes little sense and is error-prone, since it
170/// doesn't own its data, so don't do that until really needed.
171class PredefinedHeader final {
172 public:
173 explicit constexpr PredefinedHeader(std::string_view name)
174 : name{name},
175 hash{impl::UnsafeConstexprHasher{}(name)},
176 header_index{impl::GetHeaderIndexForLookup(name)} {}
177
178 constexpr operator std::string_view() const { return name; }
179
180 explicit operator std::string() const { return std::string{name}; }
181
182 private:
183 friend class header_map::Danger;
184 friend class header_map::Map;
185
186 // Header name.
187 const std::string_view name;
188
189 // Unsafe constexpr hash (unsafe in a hash-flood sense).
190 const std::size_t hash;
191
192 // We assign a different 'index' value to every known header,
193 // which allows us to do not perform case-insensitive names compare if indexes
194 // match.
195 // With this trick a successful lookup for PredefinedHeader in HeaderMap
196 // is basically "access an array by index and compare both hash and index".
197 // You can think of this field as an enum discriminant.
198 const std::int8_t header_index;
199};
200
201} // namespace http::headers
202
203USERVER_NAMESPACE_END
204
205template <>
206struct fmt::formatter<USERVER_NAMESPACE::http::headers::PredefinedHeader>
207 : fmt::formatter<std::string_view> {
208 template <typename FormatContext>
209 auto format(const USERVER_NAMESPACE::http::headers::PredefinedHeader& value,
210 FormatContext& ctx) const -> decltype(ctx.out()) {
211 return formatter<std::string_view>::format(std::string_view{value}, ctx);
212 }
213};