userver: userver/http/predefined_header.hpp Source File
Loading...
Searching...
No Matches
predefined_header.hpp
1#pragma once
2
3#include <string>
4#include <string_view>
5
6#include <fmt/core.h>
7
8#include <userver/utils/small_string_fwd.hpp>
9#include <userver/utils/string_literal.hpp>
10#include <userver/utils/trivial_map.hpp>
11
12USERVER_NAMESPACE_BEGIN
13
14namespace http::headers {
15
16// According to https://www.chromium.org/spdy/spdy-whitepaper/
17// "typical header sizes of 700-800 bytes is common"
18inline constexpr std::size_t kTypicalHeadersSize = 1024;
19using HeadersString = utils::SmallString<kTypicalHeadersSize>;
20
21namespace impl {
22
23// This is a constexpr implementation of case-insensitive MurMur hash for 64-bit
24// size_t and Little Endianness.
25// https://github.com/gcc-mirror/gcc/blob/65369ab62cee68eb7f6ef65e3d12d1969a9e20ee/libstdc%2B%2B-v3/libsupc%2B%2B/hash_bytes.cc#L138
26//
27// P.S. The hasher is "unsafe" in hash-flood sense.
28struct UnsafeConstexprHasher final {
29 constexpr std::size_t operator()(std::string_view str) const noexcept {
30 constexpr std::uint64_t mul = (0xc6a4a793UL << 32UL) + 0x5bd1e995UL;
31
32 std::uint64_t hash = seed_ ^ (str.size() * mul);
33 while (str.size() >= 8) {
34 const std::uint64_t data = ShiftMix(Load8(str.data()) * mul) * mul;
35 hash ^= data;
36 hash *= mul;
37
38 str = str.substr(8);
39 }
40 if (!str.empty()) {
41 const std::uint64_t data = LoadN(str.data(), str.size());
42 hash ^= data;
43 hash *= mul;
44 }
45
46 hash = ShiftMix(hash) * mul;
47 hash = ShiftMix(hash);
48 return hash;
49 }
50
51private:
52 static constexpr inline std::uint64_t ShiftMix(std::uint64_t v) noexcept { return v ^ (v >> 47); }
53
54 static constexpr inline std::uint64_t Load8(const char* data) noexcept { return LoadN(data, 8); }
55
56 static constexpr inline std::uint64_t LoadN(const char* data, std::size_t n) noexcept {
57 // Although lowercase and uppercase ASCII are indeed 32 (0x20) apart,
58 // this approach makes for instance '[' and '{' equivalent too,
59 // which is obviously broken and is easily exploitable.
60 // However, for expected input (lower/upper-case ASCII letters + dashes)
61 // this just works, and against malicious
62 // input we defend by falling back to case-insensitive SipHash.
63 constexpr std::uint64_t kDeliberatelyBrokenLowercaseMask = 0x2020202020202020UL;
64
65 std::uint64_t result = kDeliberatelyBrokenLowercaseMask >> (8 * (8 - n));
66 for (std::size_t i = 0; i < n; ++i) {
67 const std::uint8_t c = data[i];
68 result |= static_cast<std::uint64_t>(c) << (8 * i);
69 }
70 return result;
71 }
72
73 // Seed is chosen in such a way that 16 (presumably) most common userver
74 // headers don't collide within default size of HeaderMap (32),
75 // and that all headers used in userver itself don't collide within minimal
76 // size needed to store them all (36 headers, minimal size = 64).
77 // Note that since HeaderMap takes hashes modulo power of 2 it's guaranteed
78 // that if two headers don't collide within size S, they don't collide for
79 // bigger sizes.
80 std::uint64_t seed_{54999};
81};
82
83// Ugly, but TrivialBiMap requires keys to be in lower case.
84inline constexpr utils::TrivialBiMap kKnownHeadersLowercaseMap = [](auto selector) {
85 return selector()
86 .Case("content-type", std::int8_t{1})
87 .Case("content-encoding", 2)
88 .Case("content-length", 3)
89 .Case("transfer-encoding", 4)
90 .Case("host", 5)
91 .Case("accept", 6)
92 .Case("accept-encoding", 7)
93 .Case("accept-language", 8)
94 .Case("x-yataxi-api-key", 9)
95 .Case("user-agent", 10)
96 .Case("x-request-application", 11)
97 .Case("date", 12)
98 .Case("warning", 13)
99 .Case("access-control-allow-headers", 14)
100 .Case("allow", 15)
101 .Case("server", 16)
102 .Case("set-cookie", 17)
103 .Case("connection", 18)
104 .Case("cookie", 19)
105 .Case("x-yarequestid", 20)
106 .Case("x-yatraceid", 21)
107 .Case("x-yaspanid", 22)
108 .Case("x-requestid", 23)
109 .Case("x-backend-server", 24)
110 .Case("x-taxi-envoyproxy-dstvhost", 25)
111 .Case("baggage", 26)
112 .Case("x-yataxi-allow-auth-request", 27)
113 .Case("x-yataxi-allow-auth-response", 28)
114 .Case("x-yataxi-server-hostname", 29)
115 .Case("x-yataxi-client-timeoutms", 30)
116 .Case("x-yataxi-deadline-expired", 31)
117 .Case("x-yataxi-ratelimited-by", 32)
118 .Case("x-yataxi-ratelimit-reason", 33)
119 .Case("x-b3-traceid", 34)
120 .Case("x-b3-spanid", 35)
121 .Case("x-b3-sampled", 36)
122 .Case("x-b3-parentspanid", 37)
123 .Case("traceparent", 38)
124 .Case("tracestate", 39)
125 .Case("http2-settings", 40)
126 .Case(":method", 41)
127 .Case(":path", 42);
128};
129
130// We use different values for "no index" at compile and run time to simplify
131// comparison - with these values being different we cant just == them.
132inline constexpr std::int8_t kNoHeaderIndexLookup = -1;
133inline constexpr std::int8_t kNoHeaderIndexInsertion = -2;
134static_assert(kNoHeaderIndexLookup != kNoHeaderIndexInsertion);
135static_assert(kNoHeaderIndexLookup != 0 && kNoHeaderIndexInsertion != 0);
136
137// We use this function when constructing a PredefinedHeader ...
138constexpr std::int8_t GetHeaderIndexForLookup(std::string_view key) {
139 const auto opt = kKnownHeadersLowercaseMap.TryFindICaseByFirst(key);
140 return opt.value_or(kNoHeaderIndexLookup);
141}
142
143// And this one when inserting an entry into the HeaderMap.
144// The purpose of having 2 different functions is to be able to
145// == header indexes even if none is present (both headers are unknown).
146inline std::int8_t GetHeaderIndexForInsertion(std::string_view key) {
147 const auto opt = kKnownHeadersLowercaseMap.TryFindICaseByFirst(key);
148 return opt.value_or(kNoHeaderIndexInsertion);
149}
150
151} // namespace impl
152
153namespace header_map {
154class Danger;
155class Map;
156} // namespace header_map
157
158/// @ingroup userver_universal userver_containers
159///
160/// @brief A struct to represent compile-time known header name.
161///
162/// Calculates the hash value at compile time with the same hasher
163/// HeaderMap uses, which allows to speed things up greatly.
164///
165/// Although it's possible to construct PredefinedHeader at runtime
166/// it makes little sense and is error-prone, since it
167/// doesn't own its data, so don't do that until really needed.
168class PredefinedHeader final {
169public:
170 explicit constexpr PredefinedHeader(utils::StringLiteral name)
171 : name{name}, hash{impl::UnsafeConstexprHasher{}(name)}, header_index{impl::GetHeaderIndexForLookup(name)} {}
172
173 constexpr operator utils::StringLiteral() const { return name; }
174
175 constexpr operator utils::zstring_view() const { return name; }
176
177 constexpr operator std::string_view() const { return name; }
178
179 explicit operator std::string() const { return std::string{name}; }
180
181private:
182 friend class header_map::Danger;
183 friend class header_map::Map;
184
185 // Header name.
186 const utils::StringLiteral name;
187
188 // Unsafe constexpr hash (unsafe in a hash-flood sense).
189 const std::size_t hash;
190
191 // We assign a different 'index' value to every known header,
192 // which allows us to do not perform case-insensitive names compare if indexes
193 // match.
194 // With this trick a successful lookup for PredefinedHeader in HeaderMap
195 // is basically "access an array by index and compare both hash and index".
196 // You can think of this field as an enum discriminant.
197 const std::int8_t header_index;
198};
199
200} // namespace http::headers
201
202USERVER_NAMESPACE_END
203
204template <>
205struct fmt::formatter<USERVER_NAMESPACE::http::headers::PredefinedHeader> : fmt::formatter<std::string_view> {
206 template <typename FormatContext>
207 auto format(const USERVER_NAMESPACE::http::headers::PredefinedHeader& value, FormatContext& ctx) const
208 -> decltype(ctx.out()) {
209 return fmt::format_to(ctx.out(), "{}", std::string_view{value});
210 }
211};