12#include <boost/container_hash/hash.hpp>
14#include <userver/utils/assert.hpp>
15#include <userver/utils/fixed_array.hpp>
17USERVER_NAMESPACE_BEGIN
36template <
typename T,
typename Counter =
unsigned,
typename Hash1 = boost::hash<T>,
typename Hash2 = std::hash<T>>
37class FilterBloom final {
42 explicit FilterBloom(std::size_t counters_num = 256, Hash1 hash_1 = Hash1{}, Hash2 hash_2 = Hash2{})
43 : counters_(counters_num, 0),
44 hasher_1_(std::move(hash_1)),
45 hasher_2_(std::move(hash_2))
47 UASSERT((!std::is_same_v<Hash1, Hash2>));
48 UASSERT((std::is_same_v<std::invoke_result_t<Hash1,
const T&>, std::invoke_result_t<Hash2,
const T&>>));
55 Counter
Estimate(
const T& item)
const;
58 bool Has(
const T& item)
const;
64 using HashedType = std::invoke_result_t<Hash1,
const T&>;
66 inline uint64_t Coefficient(std::size_t step)
const;
67 uint64_t GetHash(
const HashedType& hashed_value_1,
const HashedType& hashed_value_2, uint64_t coefficient)
const;
68 Counter MinFrequency(
const HashedType& hashed_value_1,
const HashedType& hashed_value_2)
const;
70 utils::FixedArray<Counter> counters_;
71 const Hash1 hasher_1_;
72 const Hash2 hasher_2_;
74 static constexpr std::size_t kHashFunctionsCount = 4;
77template <
typename T,
typename Counter,
typename Hash1,
typename Hash2>
78inline uint64_t FilterBloom<T, Counter, Hash1, Hash2>::Coefficient(std::size_t step)
const {
82template <
typename T,
typename Counter,
typename Hash1,
typename Hash2>
87 Hash2>::GetHash(
const HashedType& hashed_value_1,
const HashedType& hashed_value_2, uint64_t coefficient)
const {
90 return hashed_value_1 + hashed_value_2 * coefficient;
93template <
typename T,
typename Counter,
typename Hash1,
typename Hash2>
98 Hash2>::MinFrequency(
const HashedType& hashed_value_1,
const HashedType& hashed_value_2)
const {
99 std::optional<Counter> min_count;
100 for (std::size_t step = 0; step < kHashFunctionsCount; ++step) {
101 auto current_count = counters_[GetHash(hashed_value_1, hashed_value_2, Coefficient(step)) % counters_.size()];
102 if (!min_count.has_value() || min_count.value() > current_count) {
103 min_count = current_count;
106 return min_count.value();
109template <
typename T,
typename Counter,
typename Hash1,
typename Hash2>
110void FilterBloom<T, Counter, Hash1, Hash2>::
Increment(
const T& item) {
111 auto hash_value_1 = hasher_1_(item);
112 auto hash_value_2 = hasher_2_(item);
113 Counter min_frequency = MinFrequency(hash_value_1, hash_value_2);
115 for (std::size_t step = 0; step < kHashFunctionsCount; ++step) {
116 auto& current_count = counters_[GetHash(hash_value_1, hash_value_2, Coefficient(step)) % counters_.size()];
117 if (current_count == min_frequency) {
123template <
typename T,
typename Counter,
typename Hash1,
typename Hash2>
124Counter FilterBloom<T, Counter, Hash1, Hash2>::
Estimate(
const T& item)
const {
125 auto hash_value_1 = hasher_1_(item);
126 auto hash_value_2 = hasher_2_(item);
127 return MinFrequency(hash_value_1, hash_value_2);
130template <
typename T,
typename Counter,
typename Hash1,
typename Hash2>
131bool FilterBloom<T, Counter, Hash1, Hash2>::
Has(
const T& item)
const {
135template <
typename T,
typename Counter,
typename Hash1,
typename Hash2>
136void FilterBloom<T, Counter, Hash1, Hash2>::
Clear() {
137 for (
auto& counter : counters_) {