44d29bbfa14662dc1b2873f1a570a0767f827cd1Aki Tuomi#ifndef BLOOMFILTER_H
44d29bbfa14662dc1b2873f1a570a0767f827cd1Aki Tuomi#define BLOOMFILTER_H
44d29bbfa14662dc1b2873f1a570a0767f827cd1Aki Tuomi
44d29bbfa14662dc1b2873f1a570a0767f827cd1Aki Tuomi#include "buffer.h"
44d29bbfa14662dc1b2873f1a570a0767f827cd1Aki Tuomi
44d29bbfa14662dc1b2873f1a570a0767f827cd1Aki Tuomi/* Short explanation of bloom filter:
44d29bbfa14662dc1b2873f1a570a0767f827cd1Aki Tuomi
44d29bbfa14662dc1b2873f1a570a0767f827cd1Aki TuomiBloom filter is a space-efficient probabilistic filter. The idea is
44d29bbfa14662dc1b2873f1a570a0767f827cd1Aki Tuomithat each element that gets added, is hashed thru one or more hashing
44d29bbfa14662dc1b2873f1a570a0767f827cd1Aki Tuomifunctions and the resulting hash modulo table size bit is set.
44d29bbfa14662dc1b2873f1a570a0767f827cd1Aki Tuomi
44d29bbfa14662dc1b2873f1a570a0767f827cd1Aki TuomiWhen seeing if there is an element set, it will check that each
44d29bbfa14662dc1b2873f1a570a0767f827cd1Aki Tuomihashing function result modulo table size bit is set. If any of them
44d29bbfa14662dc1b2873f1a570a0767f827cd1Aki Tuomiis not set, the element is missing. If all of them are set, the
44d29bbfa14662dc1b2873f1a570a0767f827cd1Aki Tuomielement is probably present.
44d29bbfa14662dc1b2873f1a570a0767f827cd1Aki Tuomi
44d29bbfa14662dc1b2873f1a570a0767f827cd1Aki TuomiA bloom filter will never report a false negative, but it might
44d29bbfa14662dc1b2873f1a570a0767f827cd1Aki Tuomireport a false positive value.
44d29bbfa14662dc1b2873f1a570a0767f827cd1Aki Tuomi
44d29bbfa14662dc1b2873f1a570a0767f827cd1Aki TuomiElements cannot be removed from this bloom filter.
44d29bbfa14662dc1b2873f1a570a0767f827cd1Aki Tuomi*/
44d29bbfa14662dc1b2873f1a570a0767f827cd1Aki Tuomi
44d29bbfa14662dc1b2873f1a570a0767f827cd1Aki Tuomistruct bloomfilter;
44d29bbfa14662dc1b2873f1a570a0767f827cd1Aki Tuomi
44d29bbfa14662dc1b2873f1a570a0767f827cd1Aki Tuomitypedef size_t bloomfilter_hash_func_t(const void *data, size_t len, uint32_t seed);
44d29bbfa14662dc1b2873f1a570a0767f827cd1Aki Tuomi
44d29bbfa14662dc1b2873f1a570a0767f827cd1Aki Tuomi/* create bloomfilter of size with hash functions */
44d29bbfa14662dc1b2873f1a570a0767f827cd1Aki Tuomistruct bloomfilter *
44d29bbfa14662dc1b2873f1a570a0767f827cd1Aki Tuomibloomfilter_create(pool_t pool, size_t size,
44d29bbfa14662dc1b2873f1a570a0767f827cd1Aki Tuomi bloomfilter_hash_func_t *const *hash_functions) ATTR_RETURNS_NONNULL;
44d29bbfa14662dc1b2873f1a570a0767f827cd1Aki Tuomi
44d29bbfa14662dc1b2873f1a570a0767f827cd1Aki Tuomi/* Some helpers */
44d29bbfa14662dc1b2873f1a570a0767f827cd1Aki Tuomi#define p_bloomfilter_create(pool, size) \
44d29bbfa14662dc1b2873f1a570a0767f827cd1Aki Tuomi bloomfilter_create(pool, size, bloomfilter_default_functions)
44d29bbfa14662dc1b2873f1a570a0767f827cd1Aki Tuomi#define i_bloomfilter_create(size) p_bloomfilter_create(default_pool, size)
44d29bbfa14662dc1b2873f1a570a0767f827cd1Aki Tuomi#define t_bloomfilter_create(size) \
44d29bbfa14662dc1b2873f1a570a0767f827cd1Aki Tuomi p_bloomfilter_create(pool_datastack_create(), size)
44d29bbfa14662dc1b2873f1a570a0767f827cd1Aki Tuomi
44d29bbfa14662dc1b2873f1a570a0767f827cd1Aki Tuomi/* Reference counting */
44d29bbfa14662dc1b2873f1a570a0767f827cd1Aki Tuomivoid bloomfilter_ref(struct bloomfilter *bf);
44d29bbfa14662dc1b2873f1a570a0767f827cd1Aki Tuomivoid bloomfilter_unref(struct bloomfilter **_bf);
44d29bbfa14662dc1b2873f1a570a0767f827cd1Aki Tuomi
44d29bbfa14662dc1b2873f1a570a0767f827cd1Aki Tuomi/* Returns estimated number of items in this filter */
44d29bbfa14662dc1b2873f1a570a0767f827cd1Aki Tuomisize_t bloomfilter_estimated_item_count(struct bloomfilter *bf);
44d29bbfa14662dc1b2873f1a570a0767f827cd1Aki Tuomi
44d29bbfa14662dc1b2873f1a570a0767f827cd1Aki Tuomi/* Returns TRUE if the element is probably in the filter */
44d29bbfa14662dc1b2873f1a570a0767f827cd1Aki Tuomibool bloomfilter_has_data(struct bloomfilter *bf, const void *data, size_t len) ATTR_NULL(2);
44d29bbfa14662dc1b2873f1a570a0767f827cd1Aki Tuomi
44d29bbfa14662dc1b2873f1a570a0767f827cd1Aki Tuomi/* Inserts element into filter */
44d29bbfa14662dc1b2873f1a570a0767f827cd1Aki Tuomivoid bloomfilter_set_data(struct bloomfilter *bf, const void *data, size_t len) ATTR_NULL(2);
44d29bbfa14662dc1b2873f1a570a0767f827cd1Aki Tuomi
44d29bbfa14662dc1b2873f1a570a0767f827cd1Aki Tuomistatic inline bool
44d29bbfa14662dc1b2873f1a570a0767f827cd1Aki Tuomibloomfilter_has_string(struct bloomfilter *bf, const char *data)
44d29bbfa14662dc1b2873f1a570a0767f827cd1Aki Tuomi{
44d29bbfa14662dc1b2873f1a570a0767f827cd1Aki Tuomi return bloomfilter_has_data(bf, data, strlen(data));
44d29bbfa14662dc1b2873f1a570a0767f827cd1Aki Tuomi}
44d29bbfa14662dc1b2873f1a570a0767f827cd1Aki Tuomi
44d29bbfa14662dc1b2873f1a570a0767f827cd1Aki Tuomistatic inline void
44d29bbfa14662dc1b2873f1a570a0767f827cd1Aki Tuomibloomfilter_set_string(struct bloomfilter *bf, const char *data)
44d29bbfa14662dc1b2873f1a570a0767f827cd1Aki Tuomi{
44d29bbfa14662dc1b2873f1a570a0767f827cd1Aki Tuomi bloomfilter_set_data(bf, data, strlen(data));
44d29bbfa14662dc1b2873f1a570a0767f827cd1Aki Tuomi}
44d29bbfa14662dc1b2873f1a570a0767f827cd1Aki Tuomi
44d29bbfa14662dc1b2873f1a570a0767f827cd1Aki Tuomistatic inline void
44d29bbfa14662dc1b2873f1a570a0767f827cd1Aki Tuomibloomfilter_set_strings(struct bloomfilter *bf, const char *const *datum)
44d29bbfa14662dc1b2873f1a570a0767f827cd1Aki Tuomi{
44d29bbfa14662dc1b2873f1a570a0767f827cd1Aki Tuomi while(*datum != NULL) {
44d29bbfa14662dc1b2873f1a570a0767f827cd1Aki Tuomi bloomfilter_set_data(bf, *datum, strlen(*datum));
44d29bbfa14662dc1b2873f1a570a0767f827cd1Aki Tuomi datum++;
44d29bbfa14662dc1b2873f1a570a0767f827cd1Aki Tuomi }
44d29bbfa14662dc1b2873f1a570a0767f827cd1Aki Tuomi}
44d29bbfa14662dc1b2873f1a570a0767f827cd1Aki Tuomi
44d29bbfa14662dc1b2873f1a570a0767f827cd1Aki Tuomistatic inline bool
44d29bbfa14662dc1b2873f1a570a0767f827cd1Aki Tuomibloomfilter_has_buffer(struct bloomfilter *bf, const buffer_t *data)
44d29bbfa14662dc1b2873f1a570a0767f827cd1Aki Tuomi{
44d29bbfa14662dc1b2873f1a570a0767f827cd1Aki Tuomi return bloomfilter_has_data(bf, data->data, data->used);
44d29bbfa14662dc1b2873f1a570a0767f827cd1Aki Tuomi}
44d29bbfa14662dc1b2873f1a570a0767f827cd1Aki Tuomi
44d29bbfa14662dc1b2873f1a570a0767f827cd1Aki Tuomistatic inline void
44d29bbfa14662dc1b2873f1a570a0767f827cd1Aki Tuomibloomfilter_set_buffer(struct bloomfilter *bf, const buffer_t *data)
44d29bbfa14662dc1b2873f1a570a0767f827cd1Aki Tuomi{
44d29bbfa14662dc1b2873f1a570a0767f827cd1Aki Tuomi bloomfilter_set_data(bf, data->data, data->used);
44d29bbfa14662dc1b2873f1a570a0767f827cd1Aki Tuomi}
44d29bbfa14662dc1b2873f1a570a0767f827cd1Aki Tuomi
44d29bbfa14662dc1b2873f1a570a0767f827cd1Aki Tuomistatic inline bool
44d29bbfa14662dc1b2873f1a570a0767f827cd1Aki Tuomibloomfilter_has_int(struct bloomfilter *bf, intmax_t value)
44d29bbfa14662dc1b2873f1a570a0767f827cd1Aki Tuomi{
44d29bbfa14662dc1b2873f1a570a0767f827cd1Aki Tuomi return bloomfilter_has_data(bf, &value, sizeof(value));
44d29bbfa14662dc1b2873f1a570a0767f827cd1Aki Tuomi}
44d29bbfa14662dc1b2873f1a570a0767f827cd1Aki Tuomi
44d29bbfa14662dc1b2873f1a570a0767f827cd1Aki Tuomistatic inline void
44d29bbfa14662dc1b2873f1a570a0767f827cd1Aki Tuomibloomfilter_set_int(struct bloomfilter *bf, intmax_t value)
44d29bbfa14662dc1b2873f1a570a0767f827cd1Aki Tuomi{
44d29bbfa14662dc1b2873f1a570a0767f827cd1Aki Tuomi bloomfilter_set_data(bf, &value, sizeof(value));
44d29bbfa14662dc1b2873f1a570a0767f827cd1Aki Tuomi}
44d29bbfa14662dc1b2873f1a570a0767f827cd1Aki Tuomi
44d29bbfa14662dc1b2873f1a570a0767f827cd1Aki Tuomistatic inline bool
44d29bbfa14662dc1b2873f1a570a0767f827cd1Aki Tuomibloomfilter_has_uint(struct bloomfilter *bf, uintmax_t value)
44d29bbfa14662dc1b2873f1a570a0767f827cd1Aki Tuomi{
44d29bbfa14662dc1b2873f1a570a0767f827cd1Aki Tuomi return bloomfilter_has_data(bf, &value, sizeof(value));
44d29bbfa14662dc1b2873f1a570a0767f827cd1Aki Tuomi}
44d29bbfa14662dc1b2873f1a570a0767f827cd1Aki Tuomi
44d29bbfa14662dc1b2873f1a570a0767f827cd1Aki Tuomistatic inline void
44d29bbfa14662dc1b2873f1a570a0767f827cd1Aki Tuomibloomfilter_set_uint(struct bloomfilter *bf, uintmax_t value)
44d29bbfa14662dc1b2873f1a570a0767f827cd1Aki Tuomi{
44d29bbfa14662dc1b2873f1a570a0767f827cd1Aki Tuomi bloomfilter_set_data(bf, &value, sizeof(value));
44d29bbfa14662dc1b2873f1a570a0767f827cd1Aki Tuomi}
44d29bbfa14662dc1b2873f1a570a0767f827cd1Aki Tuomi
44d29bbfa14662dc1b2873f1a570a0767f827cd1Aki Tuomisize_t
44d29bbfa14662dc1b2873f1a570a0767f827cd1Aki Tuomibloomfilter_murmur3_hash(const void *data, size_t len, uint32_t seed) ATTR_PURE;
44d29bbfa14662dc1b2873f1a570a0767f827cd1Aki Tuomisize_t
44d29bbfa14662dc1b2873f1a570a0767f827cd1Aki Tuomibloomfilter_md5_hash(const void *data, size_t len, uint32_t seed) ATTR_PURE;
44d29bbfa14662dc1b2873f1a570a0767f827cd1Aki Tuomi
44d29bbfa14662dc1b2873f1a570a0767f827cd1Aki Tuomi/* By default, only murmur3 is used. */
44d29bbfa14662dc1b2873f1a570a0767f827cd1Aki Tuomiextern bloomfilter_hash_func_t *const bloomfilter_default_functions[];
44d29bbfa14662dc1b2873f1a570a0767f827cd1Aki Tuomi
44d29bbfa14662dc1b2873f1a570a0767f827cd1Aki Tuomi#endif