userver: userver/http/predefined_header.hpp Source File
Loading...
Searching...
No Matches
predefined_header.hpp
1#pragma once
2
3#include <string>
4#include <string_view>
5
6#include <fmt/core.h>
7
8#include <userver/utils/small_string_fwd.hpp>
9#include <userver/utils/trivial_map.hpp>
10
11USERVER_NAMESPACE_BEGIN
12
13namespace http::headers {
14
15// According to https://www.chromium.org/spdy/spdy-whitepaper/
16// "typical header sizes of 700-800 bytes is common"
17inline constexpr std::size_t kTypicalHeadersSize = 1024;
18using HeadersString = utils::SmallString<kTypicalHeadersSize>;
19
20namespace impl {
21
22// This is a constexpr implementation of case-insensitive MurMur hash for 64-bit
23// size_t and Little Endianness.
24// https://github.com/gcc-mirror/gcc/blob/65369ab62cee68eb7f6ef65e3d12d1969a9e20ee/libstdc%2B%2B-v3/libsupc%2B%2B/hash_bytes.cc#L138
25//
26// P.S. The hasher is "unsafe" in hash-flood sense.
27struct UnsafeConstexprHasher final {
28 constexpr std::size_t operator()(std::string_view str) const noexcept {
29 constexpr std::uint64_t mul = (0xc6a4a793UL << 32UL) + 0x5bd1e995UL;
30
31 std::uint64_t hash = seed_ ^ (str.size() * mul);
32 while (str.size() >= 8) {
33 const std::uint64_t data = ShiftMix(Load8(str.data()) * mul) * mul;
34 hash ^= data;
35 hash *= mul;
36
37 str = str.substr(8);
38 }
39 if (!str.empty()) {
40 const std::uint64_t data = LoadN(str.data(), str.size());
41 hash ^= data;
42 hash *= mul;
43 }
44
45 hash = ShiftMix(hash) * mul;
46 hash = ShiftMix(hash);
47 return hash;
48 }
49
50 private:
51 static constexpr inline std::uint64_t ShiftMix(std::uint64_t v) noexcept {
52 return v ^ (v >> 47);
53 }
54
55 static constexpr inline std::uint64_t Load8(const char* data) noexcept {
56 return LoadN(data, 8);
57 }
58
59 static constexpr inline std::uint64_t LoadN(const char* data,
60 std::size_t n) noexcept {
61 // Although lowercase and uppercase ASCII are indeed 32 (0x20) apart,
62 // this approach makes for instance '[' and '{' equivalent too,
63 // which is obviously broken and is easily exploitable.
64 // However, for expected input (lower/upper-case ASCII letters + dashes)
65 // this just works, and against malicious
66 // input we defend by falling back to case-insensitive SipHash.
67 constexpr std::uint64_t kDeliberatelyBrokenLowercaseMask =
68 0x2020202020202020UL;
69
70 std::uint64_t result = kDeliberatelyBrokenLowercaseMask >> (8 * (8 - n));
71 for (std::size_t i = 0; i < n; ++i) {
72 const std::uint8_t c = data[i];
73 result |= static_cast<std::uint64_t>(c) << (8 * i);
74 }
75 return result;
76 }
77
78 // Seed is chosen in such a way that 16 (presumably) most common userver
79 // headers don't collide within default size of HeaderMap (32),
80 // and that all headers used in userver itself don't collide within minimal
81 // size needed to store them all (36 headers, minimal size = 64).
82 // Note that since HeaderMap takes hashes modulo power of 2 it's guaranteed
83 // that if two headers don't collide within size S, they don't collide for
84 // bigger sizes.
85 std::uint64_t seed_{54999};
86};
87
88// Ugly, but TrivialBiMap requires keys to be in lower case.
89inline constexpr utils::TrivialBiMap kKnownHeadersLowercaseMap =
90 [](auto selector) {
91 return selector()
92 .Case("content-type", std::int8_t{1})
93 .Case("content-encoding", 2)
94 .Case("content-length", 3)
95 .Case("transfer-encoding", 4)
96 .Case("host", 5)
97 .Case("accept", 6)
98 .Case("accept-encoding", 7)
99 .Case("accept-language", 8)
100 .Case("x-yataxi-api-key", 9)
101 .Case("user-agent", 10)
102 .Case("x-request-application", 11)
103 .Case("date", 12)
104 .Case("warning", 13)
105 .Case("access-control-allow-headers", 14)
106 .Case("allow", 15)
107 .Case("server", 16)
108 .Case("set-cookie", 17)
109 .Case("connection", 18)
110 .Case("cookie", 19)
111 .Case("x-yarequestid", 20)
112 .Case("x-yatraceid", 21)
113 .Case("x-yaspanid", 22)
114 .Case("x-requestid", 23)
115 .Case("x-backend-server", 24)
116 .Case("x-taxi-envoyproxy-dstvhost", 25)
117 .Case("baggage", 26)
118 .Case("x-yataxi-allow-auth-request", 27)
119 .Case("x-yataxi-allow-auth-response", 28)
120 .Case("x-yataxi-server-hostname", 29)
121 .Case("x-yataxi-client-timeoutms", 30)
122 .Case("x-yataxi-deadline-expired", 31)
123 .Case("x-yataxi-ratelimited-by", 32)
124 .Case("x-yataxi-ratelimit-reason", 33)
125 .Case("x-b3-traceid", 34)
126 .Case("x-b3-spanid", 35)
127 .Case("x-b3-sampled", 36)
128 .Case("x-b3-parentspanid", 37)
129 .Case("traceparent", 38)
130 .Case("tracestate", 39);
131 };
132
133// We use different values for "no index" at compile and run time to simplify
134// comparison - with these values being different we cant just == them.
135inline constexpr std::int8_t kNoHeaderIndexLookup = -1;
136inline constexpr std::int8_t kNoHeaderIndexInsertion = -2;
137static_assert(kNoHeaderIndexLookup != kNoHeaderIndexInsertion);
138static_assert(kNoHeaderIndexLookup != 0 && kNoHeaderIndexInsertion != 0);
139
140// We use this function when constructing a PredefinedHeader ...
141constexpr std::int8_t GetHeaderIndexForLookup(std::string_view key) {
142 const auto opt = kKnownHeadersLowercaseMap.TryFindICaseByFirst(key);
143 return opt.value_or(kNoHeaderIndexLookup);
144}
145
146// And this one when inserting an entry into the HeaderMap.
147// The purpose of having 2 different functions is to be able to
148// == header indexes even if none is present (both headers are unknown).
149inline std::int8_t GetHeaderIndexForInsertion(std::string_view key) {
150 const auto opt = kKnownHeadersLowercaseMap.TryFindICaseByFirst(key);
151 return opt.value_or(kNoHeaderIndexInsertion);
152}
153
154} // namespace impl
155
156namespace header_map {
157class Danger;
158class Map;
159} // namespace header_map
160
161/// @ingroup userver_universal userver_containers
162///
163/// @brief A struct to represent compile-time known header name.
164///
165/// Calculates the hash value at compile time with the same hasher
166/// HeaderMap uses, which allows to speed things up greatly.
167///
168/// Although it's possible to construct PredefinedHeader at runtime
169/// it makes little sense and is error-prone, since it
170/// doesn't own its data, so don't do that until really needed.
171class PredefinedHeader final {
172 public:
173 explicit constexpr PredefinedHeader(std::string_view name)
174 : name{name},
175 hash{impl::UnsafeConstexprHasher{}(name)},
176 header_index{impl::GetHeaderIndexForLookup(name)} {}
177
178 constexpr operator std::string_view() const { return name; }
179
180 explicit operator std::string() const { return std::string{name}; }
181
182 private:
183 friend class header_map::Danger;
184 friend class header_map::Map;
185
186 // Header name.
187 const std::string_view name;
188
189 // Unsafe constexpr hash (unsafe in a hash-flood sense).
190 const std::size_t hash;
191
192 // We assign a different 'index' value to every known header,
193 // which allows us to do not perform case-insensitive names compare if indexes
194 // match.
195 // With this trick a successful lookup for PredefinedHeader in HeaderMap
196 // is basically "access an array by index and compare both hash and index".
197 // You can think of this field as an enum discriminant.
198 const std::int8_t header_index;
199};
200
201} // namespace http::headers
202
203USERVER_NAMESPACE_END
204
205template <>
206struct fmt::formatter<USERVER_NAMESPACE::http::headers::PredefinedHeader>
207 : fmt::formatter<std::string_view> {
208 template <typename FormatContext>
209 auto format(const USERVER_NAMESPACE::http::headers::PredefinedHeader& value,
210 FormatContext& ctx) const -> decltype(ctx.out()) {
211 return formatter<std::string_view>::format(std::string_view{value}, ctx);
212 }
213};