25#ifndef JOIN_CORE_STATISTICS_HPP
26#define JOIN_CORE_STATISTICS_HPP
48 template <
class ClockPolicy>
52 using Duration =
typename ClockPolicy::Duration;
60 : _countsMem (_countsSize)
61 , _counts (static_cast<
std::atomic<uint64_t>*> (_countsMem.get ()))
64 for (
int i = 0; i < _countsLen; ++i)
66 new (&_counts[i]) std::atomic<uint64_t> (0);
101 for (
int i = 0; i < _countsLen; ++i)
103 _counts[i].~atomic<uint64_t> ();
111 const std::string&
name () const noexcept
122 return ClockPolicy::now ();
132 static_cast<uint64_t
> (std::chrono::duration_cast<Duration> (ClockPolicy::now () - startTime).count ());
134 _sum.fetch_add (ns, std::memory_order_relaxed);
135 _last.store (ns, std::memory_order_relaxed);
137 auto prev = _min.load (std::memory_order_relaxed);
139 !_min.compare_exchange_weak (prev, ns, std::memory_order_relaxed, std::memory_order_relaxed))
142 prev = _max.load (std::memory_order_relaxed);
144 !_max.compare_exchange_weak (prev, ns, std::memory_order_relaxed, std::memory_order_relaxed))
147 _counts[countsIndex (ns)].fetch_add (1, std::memory_order_relaxed);
148 _count.fetch_add (1, std::memory_order_release);
156 _count.store (0, std::memory_order_relaxed);
157 _last.store (0, std::memory_order_relaxed);
158 _min.store (std::numeric_limits<uint64_t>::max (), std::memory_order_relaxed);
159 _max.store (0, std::memory_order_relaxed);
160 _sum.store (0, std::memory_order_relaxed);
161 for (
int i = 0; i < _countsLen; ++i)
163 _counts[i].store (0, std::memory_order_relaxed);
173 return _count.load (std::memory_order_acquire);
182 if (_count.load (std::memory_order_acquire) == 0)
186 return Duration (_last.load (std::memory_order_relaxed));
195 if (_count.load (std::memory_order_acquire) == 0)
199 return Duration (_min.load (std::memory_order_relaxed));
208 if (_count.load (std::memory_order_acquire) == 0)
212 return Duration (_max.load (std::memory_order_relaxed));
219 std::chrono::duration<double, std::nano>
mean () const noexcept
221 const auto count = _count.load (std::memory_order_acquire);
224 return std::chrono::duration<double, std::nano> (0.0);
226 return std::chrono::duration<double, std::nano> (
227 static_cast<double> (_sum.load (std::memory_order_relaxed)) /
static_cast<double> (
count));
236 const auto count = _count.load (std::memory_order_acquire);
237 const auto sum = _sum.load (std::memory_order_acquire);
239 if (
count == 0 || sum == 0)
244 return (
static_cast<double> (
count) * 1e9) /
static_cast<double> (sum);
254 const uint64_t total = _count.load (std::memory_order_acquire);
260 const uint64_t target =
static_cast<uint64_t
> (p / 100.0 *
static_cast<double> (total));
261 uint64_t cumulative = 0;
263 for (
int i = 0; i < _countsLen; ++i)
265 cumulative += _counts[i].load (std::memory_order_relaxed);
267 if (cumulative > target)
269 return Duration (
static_cast<typename Duration::rep
> (bucketUpperBound (i)));
273 return Duration (
static_cast<typename Duration::rep
> (_maxTrackableValue));
282 int mbind (
int numa)
const noexcept
284 return _countsMem.mbind (numa);
294 return _countsMem.
mlock ();
303 static int hdrBucketIndex (uint64_t v)
noexcept
305 const int pow2ceiling = 64 - __builtin_clzll (v |
static_cast<uint64_t
> (_subBucketCount - 1));
306 return std::max (0, pow2ceiling - (_subBucketHalfCountMagnitude + 1));
314 static int countsIndex (uint64_t ns)
noexcept
321 const int bi = hdrBucketIndex (ns);
322 const int si =
static_cast<int> (ns >> bi);
323 const int idx = (bi + 1) * _subBucketHalfCount + si - _subBucketHalfCount;
338 static uint64_t bucketUpperBound (
int idx)
noexcept
340 if (idx >= _overflowIdx)
342 return _maxTrackableValue;
345 const int bi = std::max (0, idx / _subBucketHalfCount - 1);
346 const int si = idx - bi * _subBucketHalfCount;
348 return static_cast<uint64_t
> (si + 1) << bi;
352 static constexpr int _subBucketHalfCountMagnitude = 7;
355 static constexpr int _subBucketCount = 1 << (_subBucketHalfCountMagnitude + 1);
358 static constexpr int _subBucketHalfCount = _subBucketCount >> 1;
361 static constexpr int _bucketCount = 30;
364 static constexpr int _buckets = (_bucketCount + 1) * _subBucketHalfCount;
367 static constexpr int _countsLen = _buckets + 1;
370 static constexpr int _overflowIdx = _buckets;
373 static constexpr uint64_t _maxTrackableValue =
static_cast<uint64_t
> (_subBucketCount) << (_bucketCount - 1);
376 static constexpr uint64_t _countsSize =
static_cast<uint64_t
> (_countsLen) *
sizeof (std::atomic<uint64_t>);
382 std::atomic<uint64_t>*
const _counts;
385 alignas (64) std::atomic_uint64_t _count{0};
388 alignas (64) std::atomic_uint64_t _last{0};
391 alignas (64) std::atomic_uint64_t _min{std::numeric_limits<uint64_t>::max ()};
394 alignas (64) std::atomic_uint64_t _max{0};
397 alignas (64) std::atomic_uint64_t _sum{0};
400 const std::string _name;
429 static int idx = std::ios_base::xalloc ();
439 static int idx = std::ios_base::xalloc ();
466 inline std::ostream&
nsec (std::ostream& out)
477 inline std::ostream&
usec (std::ostream& out)
488 inline std::ostream&
msec (std::ostream& out)
499 inline std::ostream&
sec (std::ostream& out)
510 inline std::ostream&
ops (std::ostream& out)
521 inline std::ostream&
kops (std::ostream& out)
532 inline std::ostream&
mops (std::ostream& out)
543 inline std::ostream&
gops (std::ostream& out)
555 template <
class ClockPolicy>
559 const long lscale = [&] {
561 return s == 0 ? 1L : s;
564 const double dlscale =
static_cast<double> (lscale);
565 const char* lunit =
"ns";
566 if (lscale == 1'000'000'000)
570 else if (lscale == 1'000'000)
574 else if (lscale == 1'000)
580 const long tscale = [&] {
582 return s == 0 ? 1L : s;
585 const double dtscale =
static_cast<double> (tscale);
586 const char* tunit =
"ops/s";
587 if (tscale == 1'000'000'000)
591 else if (tscale == 1'000'000)
595 else if (tscale == 1'000)
601 const auto count = statistics.
count ();
602 const auto min = statistics.
min ();
603 const auto mean = statistics.
mean ();
604 const auto max = statistics.
max ();
606 const auto p50 = statistics.
percentile (50.0);
607 const auto p90 = statistics.
percentile (90.0);
608 const auto p99 = statistics.
percentile (99.0);
610 auto printLatCol = [&] (
double v) {
611 std::ostringstream ss;
612 ss << std::fixed << std::setprecision (out.precision ()) << v <<
" (" << lunit <<
")";
615 std::ostringstream oss;
616 oss << std::fixed << std::setprecision (out.precision ()) << thr / dtscale <<
" (" << tunit <<
")";
619 printLatCol (
static_cast<double> (min.count ()) / dlscale);
620 printLatCol (mean.count () / dlscale);
621 printLatCol (
static_cast<double> (max.count ()) / dlscale);
622 printLatCol (
static_cast<double> (p50.count ()) / dlscale);
623 printLatCol (
static_cast<double> (p90.count ()) / dlscale);
624 printLatCol (
static_cast<double> (p99.count ()) / dlscale);
632 template <
typename Statistics>
643 : _statistics (stats)
644 , _start (_statistics.start ())
679 _statistics.stop (_start);
684 Statistics& _statistics;
lock-free, multi-producer-safe performance statistics collector.
Definition statistics.hpp:50
~BasicStats() noexcept
destroy instance.
Definition statistics.hpp:99
BasicStats(BasicStats &&other)=delete
move constructor.
std::chrono::duration< double, std::nano > mean() const noexcept
arithmetic mean of all completed intervals.
Definition statistics.hpp:219
Duration last() const noexcept
duration of the most recently completed interval.
Definition statistics.hpp:180
int mlock() const noexcept
lock histogram memory in RAM.
Definition statistics.hpp:292
typename ClockPolicy::TimePoint TimePoint
Definition statistics.hpp:53
TimePoint start() const noexcept
mark the beginning of a measured interval.
Definition statistics.hpp:120
void stop(TimePoint startTime) noexcept
mark the end of a measured interval and update all aggregates.
Definition statistics.hpp:129
BasicStats(const std::string &name={})
create instance.
Definition statistics.hpp:59
typename ClockPolicy::Duration Duration
Definition statistics.hpp:52
const std::string & name() const noexcept
get metric name.
Definition statistics.hpp:111
BasicStats(const BasicStats &other)=delete
copy constructor.
Duration min() const noexcept
minimum duration observed across all completed intervals.
Definition statistics.hpp:193
double throughput() const noexcept
operations per second.
Definition statistics.hpp:234
void reset() noexcept
reset all accumulators to their initial state.
Definition statistics.hpp:154
BasicStats & operator=(const BasicStats &other)=delete
copy assignment.
Duration max() const noexcept
maximum duration observed across all completed intervals.
Definition statistics.hpp:206
Duration percentile(double p) const noexcept
compute the requested percentile from the HDR histogram.
Definition statistics.hpp:252
uint64_t count() const noexcept
total number of completed intervals.
Definition statistics.hpp:171
int mlock() const noexcept
lock memory in RAM.
Definition memory.hpp:256
RAII guard that automatically calls start() on construction and stop() on destruction.
Definition statistics.hpp:634
ScopedStats(ScopedStats &&other)=delete
move constructor.
~ScopedStats() noexcept
destructor.
Definition statistics.hpp:677
ScopedStats(const ScopedStats &other)=delete
copy constructor.
ScopedStats & operator=(const ScopedStats &other)=delete
copy assignment.
ScopedStats(Statistics &stats) noexcept
construct the guard and immediately call start() on stats.
Definition statistics.hpp:642
typename Statistics::TimePoint TimePoint
Definition statistics.hpp:636
constexpr int colCount
width of the sample count column.
Definition statistics.hpp:412
int throughputScaleIndex() noexcept
returns the xalloc index used to store the throughput scale on a stream.
Definition statistics.hpp:437
constexpr int colLatency
width of the latency columns.
Definition statistics.hpp:418
constexpr int colTotal
total table width.
Definition statistics.hpp:421
int latencyScaleIndex() noexcept
returns the xalloc index used to store the latency scale on a stream.
Definition statistics.hpp:427
constexpr int colThroughput
width of the throughput column.
Definition statistics.hpp:415
constexpr int colMetric
width of the metric name column.
Definition statistics.hpp:409
Definition acceptor.hpp:32
std::ostream & kops(std::ostream &out)
set throughput display unit to Kops/sec.
Definition statistics.hpp:521
std::ostream & msec(std::ostream &out)
set latency display unit to milliseconds.
Definition statistics.hpp:488
std::ostream & sec(std::ostream &out)
set latency display unit to seconds.
Definition statistics.hpp:499
std::ostream & operator<<(std::ostream &os, const BasicUnixEndpoint< Protocol > &endpoint)
push endpoint representation into a stream.
Definition endpoint.hpp:255
std::ostream & ops(std::ostream &out)
set throughput display unit to ops/sec.
Definition statistics.hpp:510
std::ostream & gops(std::ostream &out)
set throughput display unit to Gops/sec.
Definition statistics.hpp:543
std::ostream & nsec(std::ostream &out)
set latency display unit to nanoseconds.
Definition statistics.hpp:466
std::ostream & usec(std::ostream &out)
set latency display unit to microseconds.
Definition statistics.hpp:477
std::ostream & mops(std::ostream &out)
set throughput display unit to Mops/sec.
Definition statistics.hpp:532
std::ostream & statsHeader(std::ostream &out)
print the statistics table header to a stream.
Definition statistics.hpp:449
#define JOIN_UNLIKELY(x)
Definition utils.hpp:47