#include <benchmark/benchmark.h>
#include <atomic>
#include <cstddef>
#include <mutex>
#include <thread>
#include <vector>
#include <userver/concurrent/impl/interference_shield.hpp>
#include <utils/impl/parallelize_benchmark.hpp>
USERVER_NAMESPACE_BEGIN
namespace {
template <typename Mutex>
void GenericLock(benchmark::State& state) {
constexpr std::size_t kMutexCount = 256;
std::size_t i = 0;
Mutex mutexes[kMutexCount];
for ([[maybe_unused]] auto _ : state) {
mutexes[i].lock();
if (++i == kMutexCount) {
state.PauseTiming();
i = 0;
for (auto& m : mutexes) {
m.unlock();
}
state.ResumeTiming();
}
}
for (std::size_t j = 0; j < i; ++j) {
mutexes[j].unlock();
}
}
template <typename Mutex>
void GenericUnlock(benchmark::State& state) {
constexpr std::size_t kMutexCount = 256;
std::size_t i = 0;
Mutex mutexes[kMutexCount];
for (auto& m : mutexes) {
m.lock();
}
for ([[maybe_unused]] auto _ : state) {
mutexes[i].unlock();
if (++i == kMutexCount) {
state.PauseTiming();
i = 0;
for (auto& m : mutexes) {
m.lock();
}
state.ResumeTiming();
}
}
for (; i < kMutexCount; ++i) {
mutexes[i].unlock();
}
}
template <typename Mutex>
void GenericContention(benchmark::State& state) {
std::atomic<std::size_t> lock_unlock_count{0};
concurrent::impl::InterferenceShield<Mutex> m;
RunParallelBenchmark(state, [&](auto& range) {
std::uint64_t local_lock_unlock_count = 0;
for ([[maybe_unused]] auto _ : range) {
m->lock();
m->unlock();
++local_lock_unlock_count;
}
lock_unlock_count += local_lock_unlock_count;
});
const auto total_lock_unlock_count = static_cast<double>(lock_unlock_count.load());
state.counters["locks"] = benchmark::Counter(total_lock_unlock_count, benchmark::Counter::kIsRate);
state.counters["locks-per-thread"] =
benchmark::Counter(total_lock_unlock_count / state.range(0), benchmark::Counter::kIsRate);
}
template <typename Mutex>
void GenericContentionWithPayload(benchmark::State& state) {
std::atomic<std::uint64_t> lock_unlock_count{0};
concurrent::impl::InterferenceShield<Mutex> m;
RunParallelBenchmark(state, [&](auto& range) {
std::uint64_t local_lock_unlock_count = 0;
for ([[maybe_unused]] auto _ : range) {
m->lock();
for (int i = 0; i < 10; ++i) {
}
m->unlock();
++local_lock_unlock_count;
}
lock_unlock_count += local_lock_unlock_count;
});
const auto total_lock_unlock_count = static_cast<double>(lock_unlock_count.load());
state.counters["locks"] = benchmark::Counter(total_lock_unlock_count, benchmark::Counter::kIsRate);
state.counters["locks-per-thread"] =
benchmark::Counter(total_lock_unlock_count / state.range(0), benchmark::Counter::kIsRate);
}
void MutexCoroLock(benchmark::State& state) {
}
void MutexStdLock(benchmark::State& state) { GenericLock<std::mutex>(state); }
void SingleWaitingTaskMutexLock(benchmark::State& state) {
}
void MutexCoroUnlock(benchmark::State& state) {
}
void MutexStdUnlock(benchmark::State& state) { GenericLock<std::mutex>(state); }
void SingleWaitingTaskMutexUnlock(benchmark::State& state) {
}
void MutexCoroContention(benchmark::State& state) {
}
void MutexStdContention(benchmark::State& state) { GenericContention<std::mutex>(state); }
void SingleWaitingTaskMutexContention(benchmark::State& state) {
engine::RunStandalone(state.range(0), [&] { GenericContention<engine::SingleWaitingTaskMutex>(state); });
}
void MutexCoroContentionWithPayload(benchmark::State& state) {
}
void MutexStdContentionWithPayload(benchmark::State& state) { GenericContentionWithPayload<std::mutex>(state); }
void SingleWaitingTaskMutexContentionWithPayload(benchmark::State& state) {
engine::RunStandalone(state.range(0), [&] { GenericContentionWithPayload<engine::SingleWaitingTaskMutex>(state); });
}
}
BENCHMARK(MutexCoroLock);
BENCHMARK(MutexStdLock);
BENCHMARK(SingleWaitingTaskMutexLock);
BENCHMARK(MutexCoroUnlock);
BENCHMARK(MutexStdUnlock);
BENCHMARK(SingleWaitingTaskMutexUnlock);
BENCHMARK(MutexCoroContention)->RangeMultiplier(2)->Range(1, 32);
BENCHMARK(MutexStdContention)->RangeMultiplier(2)->Range(1, 32);
BENCHMARK(SingleWaitingTaskMutexContention)->Range(1, 2);
BENCHMARK(MutexCoroContentionWithPayload)->RangeMultiplier(2)->Range(1, 32);
BENCHMARK(MutexStdContentionWithPayload)->RangeMultiplier(2)->Range(1, 32);
BENCHMARK(SingleWaitingTaskMutexContentionWithPayload)->Range(1, 2);
USERVER_NAMESPACE_END