12#include <boost/container_hash/hash.hpp>
14#include <userver/utils/assert.hpp>
15#include <userver/utils/fixed_array.hpp>
17USERVER_NAMESPACE_BEGIN
36template <
typename T,
typename Counter =
unsigned,
typename Hash1 = boost::hash<T>,
typename Hash2 = std::hash<T>>
37class FilterBloom final {
42 explicit FilterBloom(std::size_t counters_num = 256, Hash1 hash_1 = Hash1{}, Hash2 hash_2 = Hash2{})
43 : counters_(counters_num, 0), hasher_1(std::move(hash_1)), hasher_2(std::move(hash_2)) {
44 UASSERT((!std::is_same_v<Hash1, Hash2>));
45 UASSERT((std::is_same_v<std::invoke_result_t<Hash1,
const T&>, std::invoke_result_t<Hash2,
const T&>>));
52 Counter
Estimate(
const T& item)
const;
55 bool Has(
const T& item)
const;
61 using HashedType = std::invoke_result_t<Hash1,
const T&>;
63 inline uint64_t Coefficient(std::size_t step)
const;
64 uint64_t GetHash(
const HashedType& hashed_value_1,
const HashedType& hashed_value_2, uint64_t coefficient)
const;
65 Counter MinFrequency(
const HashedType& hashed_value_1,
const HashedType& hashed_value_2)
const;
67 utils::FixedArray<Counter> counters_;
71 static constexpr std::size_t kHashFunctionsCount = 4;
74template <
typename T,
typename Counter,
typename Hash1,
typename Hash2>
75inline uint64_t FilterBloom<T, Counter, Hash1, Hash2>::Coefficient(std::size_t step)
const {
79template <
typename T,
typename Counter,
typename Hash1,
typename Hash2>
80uint64_t FilterBloom<T, Counter, Hash1, Hash2>::GetHash(
81 const HashedType& hashed_value_1,
82 const HashedType& hashed_value_2,
87 return hashed_value_1 + hashed_value_2 * coefficient;
90template <
typename T,
typename Counter,
typename Hash1,
typename Hash2>
91Counter FilterBloom<T, Counter, Hash1, Hash2>::MinFrequency(
92 const HashedType& hashed_value_1,
93 const HashedType& hashed_value_2
95 std::optional<Counter> min_count;
96 for (std::size_t step = 0; step < kHashFunctionsCount; ++step) {
97 auto current_count = counters_[GetHash(hashed_value_1, hashed_value_2, Coefficient(step)) % counters_.size()];
98 if (!min_count.has_value() || min_count.value() > current_count) {
99 min_count = current_count;
102 return min_count.value();
105template <
typename T,
typename Counter,
typename Hash1,
typename Hash2>
106void FilterBloom<T, Counter, Hash1, Hash2>::
Increment(
const T& item) {
107 auto hash_value_1 = hasher_1(item);
108 auto hash_value_2 = hasher_2(item);
109 Counter min_frequency = MinFrequency(hash_value_1, hash_value_2);
111 for (std::size_t step = 0; step < kHashFunctionsCount; ++step) {
112 auto& current_count = counters_[GetHash(hash_value_1, hash_value_2, Coefficient(step)) % counters_.size()];
113 if (current_count == min_frequency) {
119template <
typename T,
typename Counter,
typename Hash1,
typename Hash2>
120Counter FilterBloom<T, Counter, Hash1, Hash2>::
Estimate(
const T& item)
const {
121 auto hash_value_1 = hasher_1(item);
122 auto hash_value_2 = hasher_2(item);
123 return MinFrequency(hash_value_1, hash_value_2);
126template <
typename T,
typename Counter,
typename Hash1,
typename Hash2>
127bool FilterBloom<T, Counter, Hash1, Hash2>::
Has(
const T& item)
const {
131template <
typename T,
typename Counter,
typename Hash1,
typename Hash2>
132void FilterBloom<T, Counter, Hash1, Hash2>::
Clear() {
133 for (
auto& counter : counters_) {