userver: userver/http/predefined_header.hpp Source File
Loading...
Searching...
No Matches
predefined_header.hpp
1#pragma once
2
3#include <string>
4#include <string_view>
5
6#include <fmt/core.h>
7
8#include <userver/utils/small_string_fwd.hpp>
9#include <userver/utils/trivial_map.hpp>
10
11USERVER_NAMESPACE_BEGIN
12
13namespace http::headers {
14
15// According to https://www.chromium.org/spdy/spdy-whitepaper/
16// "typical header sizes of 700-800 bytes is common"
17inline constexpr std::size_t kTypicalHeadersSize = 1024;
18using HeadersString = utils::SmallString<kTypicalHeadersSize>;
19
20namespace impl {
21
22// This is a constexpr implementation of case-insensitive MurMur hash for 64-bit
23// size_t and Little Endianness.
24// https://github.com/gcc-mirror/gcc/blob/65369ab62cee68eb7f6ef65e3d12d1969a9e20ee/libstdc%2B%2B-v3/libsupc%2B%2B/hash_bytes.cc#L138
25//
26// P.S. The hasher is "unsafe" in hash-flood sense.
27struct UnsafeConstexprHasher final {
28 constexpr std::size_t operator()(std::string_view str) const noexcept {
29 constexpr std::uint64_t mul = (0xc6a4a793UL << 32UL) + 0x5bd1e995UL;
30
31 std::uint64_t hash = seed_ ^ (str.size() * mul);
32 while (str.size() >= 8) {
33 const std::uint64_t data = ShiftMix(Load8(str.data()) * mul) * mul;
34 hash ^= data;
35 hash *= mul;
36
37 str = str.substr(8);
38 }
39 if (!str.empty()) {
40 const std::uint64_t data = LoadN(str.data(), str.size());
41 hash ^= data;
42 hash *= mul;
43 }
44
45 hash = ShiftMix(hash) * mul;
46 hash = ShiftMix(hash);
47 return hash;
48 }
49
50private:
51 static constexpr inline std::uint64_t ShiftMix(std::uint64_t v) noexcept { return v ^ (v >> 47); }
52
53 static constexpr inline std::uint64_t Load8(const char* data) noexcept { return LoadN(data, 8); }
54
55 static constexpr inline std::uint64_t LoadN(const char* data, std::size_t n) noexcept {
56 // Although lowercase and uppercase ASCII are indeed 32 (0x20) apart,
57 // this approach makes for instance '[' and '{' equivalent too,
58 // which is obviously broken and is easily exploitable.
59 // However, for expected input (lower/upper-case ASCII letters + dashes)
60 // this just works, and against malicious
61 // input we defend by falling back to case-insensitive SipHash.
62 constexpr std::uint64_t kDeliberatelyBrokenLowercaseMask = 0x2020202020202020UL;
63
64 std::uint64_t result = kDeliberatelyBrokenLowercaseMask >> (8 * (8 - n));
65 for (std::size_t i = 0; i < n; ++i) {
66 const std::uint8_t c = data[i];
67 result |= static_cast<std::uint64_t>(c) << (8 * i);
68 }
69 return result;
70 }
71
72 // Seed is chosen in such a way that 16 (presumably) most common userver
73 // headers don't collide within default size of HeaderMap (32),
74 // and that all headers used in userver itself don't collide within minimal
75 // size needed to store them all (36 headers, minimal size = 64).
76 // Note that since HeaderMap takes hashes modulo power of 2 it's guaranteed
77 // that if two headers don't collide within size S, they don't collide for
78 // bigger sizes.
79 std::uint64_t seed_{54999};
80};
81
82// Ugly, but TrivialBiMap requires keys to be in lower case.
83inline constexpr utils::TrivialBiMap kKnownHeadersLowercaseMap = [](auto selector) {
84 return selector()
85 .Case("content-type", std::int8_t{1})
86 .Case("content-encoding", 2)
87 .Case("content-length", 3)
88 .Case("transfer-encoding", 4)
89 .Case("host", 5)
90 .Case("accept", 6)
91 .Case("accept-encoding", 7)
92 .Case("accept-language", 8)
93 .Case("x-yataxi-api-key", 9)
94 .Case("user-agent", 10)
95 .Case("x-request-application", 11)
96 .Case("date", 12)
97 .Case("warning", 13)
98 .Case("access-control-allow-headers", 14)
99 .Case("allow", 15)
100 .Case("server", 16)
101 .Case("set-cookie", 17)
102 .Case("connection", 18)
103 .Case("cookie", 19)
104 .Case("x-yarequestid", 20)
105 .Case("x-yatraceid", 21)
106 .Case("x-yaspanid", 22)
107 .Case("x-requestid", 23)
108 .Case("x-backend-server", 24)
109 .Case("x-taxi-envoyproxy-dstvhost", 25)
110 .Case("baggage", 26)
111 .Case("x-yataxi-allow-auth-request", 27)
112 .Case("x-yataxi-allow-auth-response", 28)
113 .Case("x-yataxi-server-hostname", 29)
114 .Case("x-yataxi-client-timeoutms", 30)
115 .Case("x-yataxi-deadline-expired", 31)
116 .Case("x-yataxi-ratelimited-by", 32)
117 .Case("x-yataxi-ratelimit-reason", 33)
118 .Case("x-b3-traceid", 34)
119 .Case("x-b3-spanid", 35)
120 .Case("x-b3-sampled", 36)
121 .Case("x-b3-parentspanid", 37)
122 .Case("traceparent", 38)
123 .Case("tracestate", 39)
124 .Case("http2-settings", 40)
125 .Case(":method", 41)
126 .Case(":path", 42);
127};
128
129// We use different values for "no index" at compile and run time to simplify
130// comparison - with these values being different we cant just == them.
131inline constexpr std::int8_t kNoHeaderIndexLookup = -1;
132inline constexpr std::int8_t kNoHeaderIndexInsertion = -2;
133static_assert(kNoHeaderIndexLookup != kNoHeaderIndexInsertion);
134static_assert(kNoHeaderIndexLookup != 0 && kNoHeaderIndexInsertion != 0);
135
136// We use this function when constructing a PredefinedHeader ...
137constexpr std::int8_t GetHeaderIndexForLookup(std::string_view key) {
138 const auto opt = kKnownHeadersLowercaseMap.TryFindICaseByFirst(key);
139 return opt.value_or(kNoHeaderIndexLookup);
140}
141
142// And this one when inserting an entry into the HeaderMap.
143// The purpose of having 2 different functions is to be able to
144// == header indexes even if none is present (both headers are unknown).
145inline std::int8_t GetHeaderIndexForInsertion(std::string_view key) {
146 const auto opt = kKnownHeadersLowercaseMap.TryFindICaseByFirst(key);
147 return opt.value_or(kNoHeaderIndexInsertion);
148}
149
150} // namespace impl
151
152namespace header_map {
153class Danger;
154class Map;
155} // namespace header_map
156
157/// @ingroup userver_universal userver_containers
158///
159/// @brief A struct to represent compile-time known header name.
160///
161/// Calculates the hash value at compile time with the same hasher
162/// HeaderMap uses, which allows to speed things up greatly.
163///
164/// Although it's possible to construct PredefinedHeader at runtime
165/// it makes little sense and is error-prone, since it
166/// doesn't own its data, so don't do that until really needed.
167class PredefinedHeader final {
168public:
169 explicit constexpr PredefinedHeader(std::string_view name)
170 : name{name}, hash{impl::UnsafeConstexprHasher{}(name)}, header_index{impl::GetHeaderIndexForLookup(name)} {}
171
172 constexpr operator std::string_view() const { return name; }
173
174 explicit operator std::string() const { return std::string{name}; }
175
176private:
177 friend class header_map::Danger;
178 friend class header_map::Map;
179
180 // Header name.
181 const std::string_view name;
182
183 // Unsafe constexpr hash (unsafe in a hash-flood sense).
184 const std::size_t hash;
185
186 // We assign a different 'index' value to every known header,
187 // which allows us to do not perform case-insensitive names compare if indexes
188 // match.
189 // With this trick a successful lookup for PredefinedHeader in HeaderMap
190 // is basically "access an array by index and compare both hash and index".
191 // You can think of this field as an enum discriminant.
192 const std::int8_t header_index;
193};
194
195} // namespace http::headers
196
197USERVER_NAMESPACE_END
198
199template <>
200struct fmt::formatter<USERVER_NAMESPACE::http::headers::PredefinedHeader> : fmt::formatter<std::string_view> {
201 template <typename FormatContext>
202 auto format(const USERVER_NAMESPACE::http::headers::PredefinedHeader& value, FormatContext& ctx) const
203 -> decltype(ctx.out()) {
204 return formatter<std::string_view>::format(std::string_view{value}, ctx);
205 }
206};