12#include <boost/container_hash/hash.hpp>
14#include <userver/utils/assert.hpp>
15#include <userver/utils/fixed_array.hpp>
17USERVER_NAMESPACE_BEGIN
36template <
typename T,
typename Counter =
unsigned,
37 typename Hash1 = boost::hash<T>,
typename Hash2 = std::hash<T>>
38class FilterBloom final {
43 explicit FilterBloom(std::size_t counters_num = 256, Hash1 hash_1 = Hash1{},
44 Hash2 hash_2 = Hash2{})
45 : counters_(counters_num, 0),
46 hasher_1(std::move(hash_1)),
47 hasher_2(std::move(hash_2)) {
48 UASSERT((!std::is_same_v<Hash1, Hash2>));
49 UASSERT((std::is_same_v<std::invoke_result_t<Hash1,
const T&>,
50 std::invoke_result_t<Hash2,
const T&>>));
57 Counter
Estimate(
const T& item)
const;
60 bool Has(
const T& item)
const;
66 using HashedType = std::invoke_result_t<Hash1,
const T&>;
68 inline uint64_t Coefficient(std::size_t step)
const;
69 uint64_t GetHash(
const HashedType& hashed_value_1,
70 const HashedType& hashed_value_2,
71 uint64_t coefficient)
const;
72 Counter MinFrequency(
const HashedType& hashed_value_1,
73 const HashedType& hashed_value_2)
const;
75 utils::FixedArray<Counter> counters_;
79 static constexpr std::size_t kHashFunctionsCount = 4;
82template <
typename T,
typename Counter,
typename Hash1,
typename Hash2>
83inline uint64_t FilterBloom<T, Counter, Hash1, Hash2>::Coefficient(
84 std::size_t step)
const {
88template <
typename T,
typename Counter,
typename Hash1,
typename Hash2>
89uint64_t FilterBloom<T, Counter, Hash1, Hash2>::GetHash(
90 const HashedType& hashed_value_1,
const HashedType& hashed_value_2,
91 uint64_t coefficient)
const {
94 return hashed_value_1 + hashed_value_2 * coefficient;
97template <
typename T,
typename Counter,
typename Hash1,
typename Hash2>
98Counter FilterBloom<T, Counter, Hash1, Hash2>::MinFrequency(
99 const HashedType& hashed_value_1,
const HashedType& hashed_value_2)
const {
100 std::optional<Counter> min_count;
101 for (std::size_t step = 0; step < kHashFunctionsCount; ++step) {
103 counters_[GetHash(hashed_value_1, hashed_value_2, Coefficient(step)) %
105 if (!min_count.has_value() || min_count.value() > current_count) {
106 min_count = current_count;
109 return min_count.value();
112template <
typename T,
typename Counter,
typename Hash1,
typename Hash2>
113void FilterBloom<T, Counter, Hash1, Hash2>::
Increment(
const T& item) {
114 auto hash_value_1 = hasher_1(item);
115 auto hash_value_2 = hasher_2(item);
116 Counter min_frequency = MinFrequency(hash_value_1, hash_value_2);
118 for (std::size_t step = 0; step < kHashFunctionsCount; ++step) {
119 auto& current_count =
120 counters_[GetHash(hash_value_1, hash_value_2, Coefficient(step)) %
122 if (current_count == min_frequency) {
128template <
typename T,
typename Counter,
typename Hash1,
typename Hash2>
129Counter FilterBloom<T, Counter, Hash1, Hash2>::
Estimate(
const T& item)
const {
130 auto hash_value_1 = hasher_1(item);
131 auto hash_value_2 = hasher_2(item);
132 return MinFrequency(hash_value_1, hash_value_2);
135template <
typename T,
typename Counter,
typename Hash1,
typename Hash2>
136bool FilterBloom<T, Counter, Hash1, Hash2>::
Has(
const T& item)
const {
140template <
typename T,
typename Counter,
typename Hash1,
typename Hash2>
141void FilterBloom<T, Counter, Hash1, Hash2>::
Clear() {
142 for (
auto& counter : counters_) {