stats-dist.c revision bcb4e51a409d94ae670de96afb8483a4f7855294
bcb4e51a409d94ae670de96afb8483a4f7855294Stephan Bosch/* Copyright (c) 2015-2018 Dovecot authors, see the included COPYING file */
5f08b0309190ec818d46bfe0e497468b30714a93Timo Sirainen
5f08b0309190ec818d46bfe0e497468b30714a93Timo Sirainen#include "lib.h"
5f08b0309190ec818d46bfe0e497468b30714a93Timo Sirainen#include "stats-dist.h"
5f08b0309190ec818d46bfe0e497468b30714a93Timo Sirainen#include "sort.h"
5f08b0309190ec818d46bfe0e497468b30714a93Timo Sirainen
5f08b0309190ec818d46bfe0e497468b30714a93Timo Sirainen/* In order to have a vaguely accurate 95th percentile, you need way
5f08b0309190ec818d46bfe0e497468b30714a93Timo Sirainen more than 20 in your subsample. */
5f08b0309190ec818d46bfe0e497468b30714a93Timo Sirainen#define TIMING_DEFAULT_SUBSAMPLING_BUFFER (20*24) /* 20*24 fits in a page */
5f08b0309190ec818d46bfe0e497468b30714a93Timo Sirainen
5f08b0309190ec818d46bfe0e497468b30714a93Timo Sirainenstruct stats_dist {
5f08b0309190ec818d46bfe0e497468b30714a93Timo Sirainen unsigned int sample_count;
5f08b0309190ec818d46bfe0e497468b30714a93Timo Sirainen unsigned int count;
5f08b0309190ec818d46bfe0e497468b30714a93Timo Sirainen bool sorted;
5f08b0309190ec818d46bfe0e497468b30714a93Timo Sirainen uint64_t min;
5f08b0309190ec818d46bfe0e497468b30714a93Timo Sirainen uint64_t max;
5f08b0309190ec818d46bfe0e497468b30714a93Timo Sirainen uint64_t sum;
5f08b0309190ec818d46bfe0e497468b30714a93Timo Sirainen uint64_t samples[];
5f08b0309190ec818d46bfe0e497468b30714a93Timo Sirainen};
5f08b0309190ec818d46bfe0e497468b30714a93Timo Sirainen
5f08b0309190ec818d46bfe0e497468b30714a93Timo Sirainenstruct stats_dist *stats_dist_init(void)
5f08b0309190ec818d46bfe0e497468b30714a93Timo Sirainen{
5f08b0309190ec818d46bfe0e497468b30714a93Timo Sirainen return stats_dist_init_with_size(TIMING_DEFAULT_SUBSAMPLING_BUFFER);
5f08b0309190ec818d46bfe0e497468b30714a93Timo Sirainen}
5f08b0309190ec818d46bfe0e497468b30714a93Timo Sirainen
5f08b0309190ec818d46bfe0e497468b30714a93Timo Sirainenstruct stats_dist *stats_dist_init_with_size(unsigned int sample_count)
5f08b0309190ec818d46bfe0e497468b30714a93Timo Sirainen{
5f08b0309190ec818d46bfe0e497468b30714a93Timo Sirainen i_assert(sample_count > 0);
5f08b0309190ec818d46bfe0e497468b30714a93Timo Sirainen
5f08b0309190ec818d46bfe0e497468b30714a93Timo Sirainen struct stats_dist *stats =
5f08b0309190ec818d46bfe0e497468b30714a93Timo Sirainen i_malloc(sizeof(struct stats_dist) +
5f08b0309190ec818d46bfe0e497468b30714a93Timo Sirainen sizeof(uint64_t) * sample_count);
5f08b0309190ec818d46bfe0e497468b30714a93Timo Sirainen stats->sample_count = sample_count;
5f08b0309190ec818d46bfe0e497468b30714a93Timo Sirainen return stats;
5f08b0309190ec818d46bfe0e497468b30714a93Timo Sirainen}
5f08b0309190ec818d46bfe0e497468b30714a93Timo Sirainen
5f08b0309190ec818d46bfe0e497468b30714a93Timo Sirainenvoid stats_dist_deinit(struct stats_dist **_stats)
5f08b0309190ec818d46bfe0e497468b30714a93Timo Sirainen{
5f08b0309190ec818d46bfe0e497468b30714a93Timo Sirainen i_free_and_null(*_stats);
5f08b0309190ec818d46bfe0e497468b30714a93Timo Sirainen}
5f08b0309190ec818d46bfe0e497468b30714a93Timo Sirainen
5f08b0309190ec818d46bfe0e497468b30714a93Timo Sirainenvoid stats_dist_reset(struct stats_dist *stats)
5f08b0309190ec818d46bfe0e497468b30714a93Timo Sirainen{
5f08b0309190ec818d46bfe0e497468b30714a93Timo Sirainen unsigned int sample_count = stats->sample_count;
5f08b0309190ec818d46bfe0e497468b30714a93Timo Sirainen i_zero(stats);
5f08b0309190ec818d46bfe0e497468b30714a93Timo Sirainen stats->sample_count = sample_count;
5f08b0309190ec818d46bfe0e497468b30714a93Timo Sirainen}
5f08b0309190ec818d46bfe0e497468b30714a93Timo Sirainen
5f08b0309190ec818d46bfe0e497468b30714a93Timo Sirainenvoid stats_dist_add(struct stats_dist *stats, uint64_t value)
5f08b0309190ec818d46bfe0e497468b30714a93Timo Sirainen{
5f08b0309190ec818d46bfe0e497468b30714a93Timo Sirainen if (stats->count < stats->sample_count) {
5f08b0309190ec818d46bfe0e497468b30714a93Timo Sirainen stats->samples[stats->count] = value;
5f08b0309190ec818d46bfe0e497468b30714a93Timo Sirainen if (stats->count == 0)
5f08b0309190ec818d46bfe0e497468b30714a93Timo Sirainen stats->min = stats->max = value;
5f08b0309190ec818d46bfe0e497468b30714a93Timo Sirainen } else {
5f08b0309190ec818d46bfe0e497468b30714a93Timo Sirainen unsigned int idx = i_rand_limit(stats->count);
5f08b0309190ec818d46bfe0e497468b30714a93Timo Sirainen if (idx < stats->sample_count)
5f08b0309190ec818d46bfe0e497468b30714a93Timo Sirainen stats->samples[idx] = value;
5f08b0309190ec818d46bfe0e497468b30714a93Timo Sirainen }
5f08b0309190ec818d46bfe0e497468b30714a93Timo Sirainen
5f08b0309190ec818d46bfe0e497468b30714a93Timo Sirainen stats->count++;
5f08b0309190ec818d46bfe0e497468b30714a93Timo Sirainen stats->sum += value;
5f08b0309190ec818d46bfe0e497468b30714a93Timo Sirainen if (stats->max < value)
5f08b0309190ec818d46bfe0e497468b30714a93Timo Sirainen stats->max = value;
5f08b0309190ec818d46bfe0e497468b30714a93Timo Sirainen if (stats->min > value)
5f08b0309190ec818d46bfe0e497468b30714a93Timo Sirainen stats->min = value;
5f08b0309190ec818d46bfe0e497468b30714a93Timo Sirainen stats->sorted = FALSE;
5f08b0309190ec818d46bfe0e497468b30714a93Timo Sirainen}
5f08b0309190ec818d46bfe0e497468b30714a93Timo Sirainen
5f08b0309190ec818d46bfe0e497468b30714a93Timo Sirainenunsigned int stats_dist_get_count(const struct stats_dist *stats)
5f08b0309190ec818d46bfe0e497468b30714a93Timo Sirainen{
5f08b0309190ec818d46bfe0e497468b30714a93Timo Sirainen return stats->count;
5f08b0309190ec818d46bfe0e497468b30714a93Timo Sirainen}
5f08b0309190ec818d46bfe0e497468b30714a93Timo Sirainen
5f08b0309190ec818d46bfe0e497468b30714a93Timo Sirainenuint64_t stats_dist_get_sum(const struct stats_dist *stats)
5f08b0309190ec818d46bfe0e497468b30714a93Timo Sirainen{
5f08b0309190ec818d46bfe0e497468b30714a93Timo Sirainen return stats->sum;
5f08b0309190ec818d46bfe0e497468b30714a93Timo Sirainen}
5f08b0309190ec818d46bfe0e497468b30714a93Timo Sirainen
5f08b0309190ec818d46bfe0e497468b30714a93Timo Sirainenuint64_t stats_dist_get_min(const struct stats_dist *stats)
5f08b0309190ec818d46bfe0e497468b30714a93Timo Sirainen{
5f08b0309190ec818d46bfe0e497468b30714a93Timo Sirainen return stats->min;
5f08b0309190ec818d46bfe0e497468b30714a93Timo Sirainen}
5f08b0309190ec818d46bfe0e497468b30714a93Timo Sirainen
5f08b0309190ec818d46bfe0e497468b30714a93Timo Sirainenuint64_t stats_dist_get_max(const struct stats_dist *stats)
5f08b0309190ec818d46bfe0e497468b30714a93Timo Sirainen{
5f08b0309190ec818d46bfe0e497468b30714a93Timo Sirainen return stats->max;
5f08b0309190ec818d46bfe0e497468b30714a93Timo Sirainen}
5f08b0309190ec818d46bfe0e497468b30714a93Timo Sirainen
5f08b0309190ec818d46bfe0e497468b30714a93Timo Sirainenuint64_t stats_dist_get_avg(const struct stats_dist *stats)
5f08b0309190ec818d46bfe0e497468b30714a93Timo Sirainen{
5f08b0309190ec818d46bfe0e497468b30714a93Timo Sirainen if (stats->count == 0)
5f08b0309190ec818d46bfe0e497468b30714a93Timo Sirainen return 0;
5f08b0309190ec818d46bfe0e497468b30714a93Timo Sirainen
5f08b0309190ec818d46bfe0e497468b30714a93Timo Sirainen return (stats->sum + stats->count/2) / stats->count;
5f08b0309190ec818d46bfe0e497468b30714a93Timo Sirainen}
5f08b0309190ec818d46bfe0e497468b30714a93Timo Sirainen
5f08b0309190ec818d46bfe0e497468b30714a93Timo Sirainenstatic void stats_dist_ensure_sorted(struct stats_dist *stats)
5f08b0309190ec818d46bfe0e497468b30714a93Timo Sirainen{
5f08b0309190ec818d46bfe0e497468b30714a93Timo Sirainen if (stats->sorted)
5f08b0309190ec818d46bfe0e497468b30714a93Timo Sirainen return;
5f08b0309190ec818d46bfe0e497468b30714a93Timo Sirainen
5f08b0309190ec818d46bfe0e497468b30714a93Timo Sirainen unsigned int count = (stats->count < stats->sample_count)
5f08b0309190ec818d46bfe0e497468b30714a93Timo Sirainen ? stats->count
5f08b0309190ec818d46bfe0e497468b30714a93Timo Sirainen : stats->sample_count;
5f08b0309190ec818d46bfe0e497468b30714a93Timo Sirainen i_qsort(stats->samples, count, sizeof(*stats->samples),
5f08b0309190ec818d46bfe0e497468b30714a93Timo Sirainen uint64_cmp);
5f08b0309190ec818d46bfe0e497468b30714a93Timo Sirainen stats->sorted = TRUE;
5f08b0309190ec818d46bfe0e497468b30714a93Timo Sirainen}
5f08b0309190ec818d46bfe0e497468b30714a93Timo Sirainen
5f08b0309190ec818d46bfe0e497468b30714a93Timo Sirainenuint64_t stats_dist_get_median(const struct stats_dist *stats)
5f08b0309190ec818d46bfe0e497468b30714a93Timo Sirainen{
5f08b0309190ec818d46bfe0e497468b30714a93Timo Sirainen if (stats->count == 0)
5f08b0309190ec818d46bfe0e497468b30714a93Timo Sirainen return 0;
5f08b0309190ec818d46bfe0e497468b30714a93Timo Sirainen /* cast-away const - reading requires sorting */
5f08b0309190ec818d46bfe0e497468b30714a93Timo Sirainen stats_dist_ensure_sorted((struct stats_dist *)stats);
5f08b0309190ec818d46bfe0e497468b30714a93Timo Sirainen unsigned int count = (stats->count < stats->sample_count)
5f08b0309190ec818d46bfe0e497468b30714a93Timo Sirainen ? stats->count
5f08b0309190ec818d46bfe0e497468b30714a93Timo Sirainen : stats->sample_count;
5f08b0309190ec818d46bfe0e497468b30714a93Timo Sirainen unsigned int idx1 = (count-1)/2, idx2 = count/2;
5f08b0309190ec818d46bfe0e497468b30714a93Timo Sirainen return (stats->samples[idx1] + stats->samples[idx2]) / 2;
5f08b0309190ec818d46bfe0e497468b30714a93Timo Sirainen}
5f08b0309190ec818d46bfe0e497468b30714a93Timo Sirainen
5f08b0309190ec818d46bfe0e497468b30714a93Timo Sirainen/* This is independent of the stats framework, useful for any selection task */
5f08b0309190ec818d46bfe0e497468b30714a93Timo Sirainenstatic unsigned int stats_dist_get_index(unsigned int range, double fraction)
5f08b0309190ec818d46bfe0e497468b30714a93Timo Sirainen{
5f08b0309190ec818d46bfe0e497468b30714a93Timo Sirainen /* With out of range fractions, we can give the caller what
5f08b0309190ec818d46bfe0e497468b30714a93Timo Sirainen they probably want rather than just crashing. */
5f08b0309190ec818d46bfe0e497468b30714a93Timo Sirainen if (fraction >= 1.)
5f08b0309190ec818d46bfe0e497468b30714a93Timo Sirainen return range - 1;
5f08b0309190ec818d46bfe0e497468b30714a93Timo Sirainen if (fraction <= 0.)
5f08b0309190ec818d46bfe0e497468b30714a93Timo Sirainen return 0;
5f08b0309190ec818d46bfe0e497468b30714a93Timo Sirainen
5f08b0309190ec818d46bfe0e497468b30714a93Timo Sirainen double idx_float = range * fraction;
5f08b0309190ec818d46bfe0e497468b30714a93Timo Sirainen unsigned int idx = idx_float; /* C defaults to rounding down */
5f08b0309190ec818d46bfe0e497468b30714a93Timo Sirainen idx_float -= idx;
5f08b0309190ec818d46bfe0e497468b30714a93Timo Sirainen /* Exact boundaries belong to the open range below them.
5f08b0309190ec818d46bfe0e497468b30714a93Timo Sirainen As FP isn't exact, and ratios may be specified inexactly,
5f08b0309190ec818d46bfe0e497468b30714a93Timo Sirainen include a small amount of fuzz around the exact boundary. */
5f08b0309190ec818d46bfe0e497468b30714a93Timo Sirainen if (idx_float < 1e-8*range)
5f08b0309190ec818d46bfe0e497468b30714a93Timo Sirainen idx--;
5f08b0309190ec818d46bfe0e497468b30714a93Timo Sirainen
5f08b0309190ec818d46bfe0e497468b30714a93Timo Sirainen return idx;
5f08b0309190ec818d46bfe0e497468b30714a93Timo Sirainen}
5f08b0309190ec818d46bfe0e497468b30714a93Timo Sirainen
5f08b0309190ec818d46bfe0e497468b30714a93Timo Sirainenuint64_t stats_dist_get_percentile(const struct stats_dist *stats, double fraction)
5f08b0309190ec818d46bfe0e497468b30714a93Timo Sirainen{
5f08b0309190ec818d46bfe0e497468b30714a93Timo Sirainen if (stats->count == 0)
5f08b0309190ec818d46bfe0e497468b30714a93Timo Sirainen return 0;
5f08b0309190ec818d46bfe0e497468b30714a93Timo Sirainen /* cast-away const - reading requires sorting */
5f08b0309190ec818d46bfe0e497468b30714a93Timo Sirainen stats_dist_ensure_sorted((struct stats_dist *)stats);
5f08b0309190ec818d46bfe0e497468b30714a93Timo Sirainen unsigned int count = (stats->count < stats->sample_count)
5f08b0309190ec818d46bfe0e497468b30714a93Timo Sirainen ? stats->count
5f08b0309190ec818d46bfe0e497468b30714a93Timo Sirainen : stats->sample_count;
5f08b0309190ec818d46bfe0e497468b30714a93Timo Sirainen unsigned int idx = stats_dist_get_index(count, fraction);
5f08b0309190ec818d46bfe0e497468b30714a93Timo Sirainen return stats->samples[idx];
5f08b0309190ec818d46bfe0e497468b30714a93Timo Sirainen}