str-find.c revision f609bd50846f898583ef481d8aac927b10aa8395
bcb4e51a409d94ae670de96afb8483a4f7855294Stephan Bosch/* Copyright (c) 2007-2008 Dovecot authors, see the included COPYING file */
8fe8f97e688779add9cd042a9db4ddb7b117cce2Stephan Bosch
8fe8f97e688779add9cd042a9db4ddb7b117cce2Stephan Bosch/* @UNSAFE: whole file */
8fe8f97e688779add9cd042a9db4ddb7b117cce2Stephan Bosch
8fe8f97e688779add9cd042a9db4ddb7b117cce2Stephan Bosch#include "lib.h"
bdd36cfdba3ff66d25570a9ff568d69e1eb543cfTimo Sirainen#include "str-find.h"
8fe8f97e688779add9cd042a9db4ddb7b117cce2Stephan Bosch
8fe8f97e688779add9cd042a9db4ddb7b117cce2Stephan Boschstruct str_find_context {
8fe8f97e688779add9cd042a9db4ddb7b117cce2Stephan Bosch pool_t pool;
8fe8f97e688779add9cd042a9db4ddb7b117cce2Stephan Bosch unsigned char *key;
62ed3307fb8a4a038a32a5181c503f1b645bf946Stephan Bosch unsigned int key_len;
62ed3307fb8a4a038a32a5181c503f1b645bf946Stephan Bosch
62ed3307fb8a4a038a32a5181c503f1b645bf946Stephan Bosch unsigned int *matches;
62ed3307fb8a4a038a32a5181c503f1b645bf946Stephan Bosch unsigned int match_count;
62ed3307fb8a4a038a32a5181c503f1b645bf946Stephan Bosch
62ed3307fb8a4a038a32a5181c503f1b645bf946Stephan Bosch int badtab[UCHAR_MAX+1];
62ed3307fb8a4a038a32a5181c503f1b645bf946Stephan Bosch int goodtab[];
62ed3307fb8a4a038a32a5181c503f1b645bf946Stephan Bosch};
62ed3307fb8a4a038a32a5181c503f1b645bf946Stephan Bosch
62ed3307fb8a4a038a32a5181c503f1b645bf946Stephan Boschstatic void init_badtab(struct str_find_context *ctx)
62ed3307fb8a4a038a32a5181c503f1b645bf946Stephan Bosch{
62ed3307fb8a4a038a32a5181c503f1b645bf946Stephan Bosch unsigned int i, len_1 = ctx->key_len - 1;
62ed3307fb8a4a038a32a5181c503f1b645bf946Stephan Bosch
62ed3307fb8a4a038a32a5181c503f1b645bf946Stephan Bosch for (i = 0; i <= UCHAR_MAX; i++)
62ed3307fb8a4a038a32a5181c503f1b645bf946Stephan Bosch ctx->badtab[i] = ctx->key_len;
62ed3307fb8a4a038a32a5181c503f1b645bf946Stephan Bosch
62ed3307fb8a4a038a32a5181c503f1b645bf946Stephan Bosch for (i = 0; i < len_1; i++)
62ed3307fb8a4a038a32a5181c503f1b645bf946Stephan Bosch ctx->badtab[ctx->key[i]] = len_1 - i;
62ed3307fb8a4a038a32a5181c503f1b645bf946Stephan Bosch}
62ed3307fb8a4a038a32a5181c503f1b645bf946Stephan Bosch
62ed3307fb8a4a038a32a5181c503f1b645bf946Stephan Boschstatic void init_suffixes(struct str_find_context *ctx, unsigned int *suffixes)
62ed3307fb8a4a038a32a5181c503f1b645bf946Stephan Bosch{
62ed3307fb8a4a038a32a5181c503f1b645bf946Stephan Bosch unsigned int len_1 = ctx->key_len - 1;
62ed3307fb8a4a038a32a5181c503f1b645bf946Stephan Bosch int f = 0, g, i;
62ed3307fb8a4a038a32a5181c503f1b645bf946Stephan Bosch
62ed3307fb8a4a038a32a5181c503f1b645bf946Stephan Bosch suffixes[len_1] = ctx->key_len;
62ed3307fb8a4a038a32a5181c503f1b645bf946Stephan Bosch g = len_1;
62ed3307fb8a4a038a32a5181c503f1b645bf946Stephan Bosch for (i = ctx->key_len - 2; i >= 0; i--) {
62ed3307fb8a4a038a32a5181c503f1b645bf946Stephan Bosch if (i > g && (int)suffixes[i + len_1 - f] < i - g)
62ed3307fb8a4a038a32a5181c503f1b645bf946Stephan Bosch suffixes[i] = suffixes[i + len_1 - f];
62ed3307fb8a4a038a32a5181c503f1b645bf946Stephan Bosch else {
62ed3307fb8a4a038a32a5181c503f1b645bf946Stephan Bosch if (i < g)
62ed3307fb8a4a038a32a5181c503f1b645bf946Stephan Bosch g = i;
62ed3307fb8a4a038a32a5181c503f1b645bf946Stephan Bosch f = i;
62ed3307fb8a4a038a32a5181c503f1b645bf946Stephan Bosch while (g >= 0 && ctx->key[g] == ctx->key[g + len_1 - f])
62ed3307fb8a4a038a32a5181c503f1b645bf946Stephan Bosch g--;
62ed3307fb8a4a038a32a5181c503f1b645bf946Stephan Bosch suffixes[i] = f - g;
62ed3307fb8a4a038a32a5181c503f1b645bf946Stephan Bosch }
62ed3307fb8a4a038a32a5181c503f1b645bf946Stephan Bosch }
62ed3307fb8a4a038a32a5181c503f1b645bf946Stephan Bosch}
62ed3307fb8a4a038a32a5181c503f1b645bf946Stephan Bosch
62ed3307fb8a4a038a32a5181c503f1b645bf946Stephan Boschstatic void init_goodtab(struct str_find_context *ctx)
62ed3307fb8a4a038a32a5181c503f1b645bf946Stephan Bosch{
62ed3307fb8a4a038a32a5181c503f1b645bf946Stephan Bosch unsigned int len_1 = ctx->key_len - 1;
62ed3307fb8a4a038a32a5181c503f1b645bf946Stephan Bosch unsigned int j, *suffixes;
62ed3307fb8a4a038a32a5181c503f1b645bf946Stephan Bosch int i;
62ed3307fb8a4a038a32a5181c503f1b645bf946Stephan Bosch
62ed3307fb8a4a038a32a5181c503f1b645bf946Stephan Bosch suffixes = t_buffer_get(sizeof(*suffixes) * ctx->key_len);
62ed3307fb8a4a038a32a5181c503f1b645bf946Stephan Bosch init_suffixes(ctx, suffixes);
62ed3307fb8a4a038a32a5181c503f1b645bf946Stephan Bosch
62ed3307fb8a4a038a32a5181c503f1b645bf946Stephan Bosch for (i = 0; i < (int)ctx->key_len; i++)
62ed3307fb8a4a038a32a5181c503f1b645bf946Stephan Bosch ctx->goodtab[i] = ctx->key_len;
62ed3307fb8a4a038a32a5181c503f1b645bf946Stephan Bosch
62ed3307fb8a4a038a32a5181c503f1b645bf946Stephan Bosch j = 0;
62ed3307fb8a4a038a32a5181c503f1b645bf946Stephan Bosch for (i = len_1; i >= -1; i--) {
62ed3307fb8a4a038a32a5181c503f1b645bf946Stephan Bosch if (i < 0 || suffixes[i] == (unsigned int)i + 1) {
62ed3307fb8a4a038a32a5181c503f1b645bf946Stephan Bosch for (; j < len_1 - i; j++) {
62ed3307fb8a4a038a32a5181c503f1b645bf946Stephan Bosch if (ctx->goodtab[j] == (int)ctx->key_len)
62ed3307fb8a4a038a32a5181c503f1b645bf946Stephan Bosch ctx->goodtab[j] = len_1 - i;
62ed3307fb8a4a038a32a5181c503f1b645bf946Stephan Bosch }
62ed3307fb8a4a038a32a5181c503f1b645bf946Stephan Bosch }
62ed3307fb8a4a038a32a5181c503f1b645bf946Stephan Bosch }
62ed3307fb8a4a038a32a5181c503f1b645bf946Stephan Bosch for (i = 0; i <= (int)ctx->key_len - 2; i++)
62ed3307fb8a4a038a32a5181c503f1b645bf946Stephan Bosch ctx->goodtab[len_1 - suffixes[i]] = len_1 - i;
62ed3307fb8a4a038a32a5181c503f1b645bf946Stephan Bosch}
62ed3307fb8a4a038a32a5181c503f1b645bf946Stephan Bosch
62ed3307fb8a4a038a32a5181c503f1b645bf946Stephan Boschstruct str_find_context *str_find_init(pool_t pool, const char *key)
62ed3307fb8a4a038a32a5181c503f1b645bf946Stephan Bosch{
62ed3307fb8a4a038a32a5181c503f1b645bf946Stephan Bosch struct str_find_context *ctx;
62ed3307fb8a4a038a32a5181c503f1b645bf946Stephan Bosch unsigned int key_len = strlen(key);
8fe8f97e688779add9cd042a9db4ddb7b117cce2Stephan Bosch
8fe8f97e688779add9cd042a9db4ddb7b117cce2Stephan Bosch ctx = p_malloc(pool, sizeof(struct str_find_context) +
8fe8f97e688779add9cd042a9db4ddb7b117cce2Stephan Bosch sizeof(ctx->goodtab[0]) * key_len);
8fe8f97e688779add9cd042a9db4ddb7b117cce2Stephan Bosch ctx->pool = pool;
8fe8f97e688779add9cd042a9db4ddb7b117cce2Stephan Bosch ctx->matches = p_new(pool, unsigned int, key_len);
8fe8f97e688779add9cd042a9db4ddb7b117cce2Stephan Bosch ctx->key_len = key_len;
8fe8f97e688779add9cd042a9db4ddb7b117cce2Stephan Bosch ctx->key = p_malloc(pool, key_len);
8fe8f97e688779add9cd042a9db4ddb7b117cce2Stephan Bosch memcpy(ctx->key, key, key_len);
8fe8f97e688779add9cd042a9db4ddb7b117cce2Stephan Bosch
8fe8f97e688779add9cd042a9db4ddb7b117cce2Stephan Bosch init_goodtab(ctx);
8fe8f97e688779add9cd042a9db4ddb7b117cce2Stephan Bosch init_badtab(ctx);
4d955db590c3d76a631dfc5d37bcdf578a43e55aStephan Bosch return ctx;
4d955db590c3d76a631dfc5d37bcdf578a43e55aStephan Bosch}
8fe8f97e688779add9cd042a9db4ddb7b117cce2Stephan Bosch
4d955db590c3d76a631dfc5d37bcdf578a43e55aStephan Boschvoid str_find_deinit(struct str_find_context **_ctx)
8fe8f97e688779add9cd042a9db4ddb7b117cce2Stephan Bosch{
8fe8f97e688779add9cd042a9db4ddb7b117cce2Stephan Bosch struct str_find_context *ctx = *_ctx;
8fe8f97e688779add9cd042a9db4ddb7b117cce2Stephan Bosch
4d955db590c3d76a631dfc5d37bcdf578a43e55aStephan Bosch *_ctx = NULL;
4d955db590c3d76a631dfc5d37bcdf578a43e55aStephan Bosch p_free(ctx->pool, ctx->matches);
4d955db590c3d76a631dfc5d37bcdf578a43e55aStephan Bosch p_free(ctx->pool, ctx->key);
4d955db590c3d76a631dfc5d37bcdf578a43e55aStephan Bosch p_free(ctx->pool, ctx);
4d955db590c3d76a631dfc5d37bcdf578a43e55aStephan Bosch}
4d955db590c3d76a631dfc5d37bcdf578a43e55aStephan Bosch
4d955db590c3d76a631dfc5d37bcdf578a43e55aStephan Boschbool str_find_more(struct str_find_context *ctx,
8fe8f97e688779add9cd042a9db4ddb7b117cce2Stephan Bosch const unsigned char *data, size_t size)
8fe8f97e688779add9cd042a9db4ddb7b117cce2Stephan Bosch{
8fe8f97e688779add9cd042a9db4ddb7b117cce2Stephan Bosch unsigned int key_len = ctx->key_len;
8fe8f97e688779add9cd042a9db4ddb7b117cce2Stephan Bosch unsigned int i, j, a, b;
4d955db590c3d76a631dfc5d37bcdf578a43e55aStephan Bosch int bad_value;
8fe8f97e688779add9cd042a9db4ddb7b117cce2Stephan Bosch
8fe8f97e688779add9cd042a9db4ddb7b117cce2Stephan Bosch for (i = j = 0; i < ctx->match_count; i++) {
8fe8f97e688779add9cd042a9db4ddb7b117cce2Stephan Bosch a = ctx->matches[i];
8fe8f97e688779add9cd042a9db4ddb7b117cce2Stephan Bosch if (ctx->matches[i] + size >= key_len) {
8fe8f97e688779add9cd042a9db4ddb7b117cce2Stephan Bosch /* we can fully determine this match now */
8fe8f97e688779add9cd042a9db4ddb7b117cce2Stephan Bosch for (; a < key_len; a++) {
8fe8f97e688779add9cd042a9db4ddb7b117cce2Stephan Bosch if (ctx->key[a] != data[a - ctx->matches[i]])
8fe8f97e688779add9cd042a9db4ddb7b117cce2Stephan Bosch break;
8fe8f97e688779add9cd042a9db4ddb7b117cce2Stephan Bosch }
8fe8f97e688779add9cd042a9db4ddb7b117cce2Stephan Bosch
8fe8f97e688779add9cd042a9db4ddb7b117cce2Stephan Bosch if (a == key_len)
8fe8f97e688779add9cd042a9db4ddb7b117cce2Stephan Bosch return TRUE;
8fe8f97e688779add9cd042a9db4ddb7b117cce2Stephan Bosch } else {
8fe8f97e688779add9cd042a9db4ddb7b117cce2Stephan Bosch for (b = 0; b < size; b++) {
8fe8f97e688779add9cd042a9db4ddb7b117cce2Stephan Bosch if (ctx->key[a+b] != data[b])
8fe8f97e688779add9cd042a9db4ddb7b117cce2Stephan Bosch break;
8fe8f97e688779add9cd042a9db4ddb7b117cce2Stephan Bosch }
8fe8f97e688779add9cd042a9db4ddb7b117cce2Stephan Bosch
8fe8f97e688779add9cd042a9db4ddb7b117cce2Stephan Bosch if (b == size)
8fe8f97e688779add9cd042a9db4ddb7b117cce2Stephan Bosch ctx->matches[j++] = a + size;
8fe8f97e688779add9cd042a9db4ddb7b117cce2Stephan Bosch }
8fe8f97e688779add9cd042a9db4ddb7b117cce2Stephan Bosch }
32f28ff765ef6983af0df78ebc5289b478abf3feStephan Bosch if (j > 0) {
32f28ff765ef6983af0df78ebc5289b478abf3feStephan Bosch i_assert(j + size < key_len);
32f28ff765ef6983af0df78ebc5289b478abf3feStephan Bosch ctx->match_count = j;
32f28ff765ef6983af0df78ebc5289b478abf3feStephan Bosch j = 0;
8fe8f97e688779add9cd042a9db4ddb7b117cce2Stephan Bosch } else {
8fe8f97e688779add9cd042a9db4ddb7b117cce2Stephan Bosch /* Boyer-Moore searching */
8fe8f97e688779add9cd042a9db4ddb7b117cce2Stephan Bosch j = 0;
32f28ff765ef6983af0df78ebc5289b478abf3feStephan Bosch while (j + key_len <= size) {
32f28ff765ef6983af0df78ebc5289b478abf3feStephan Bosch i = key_len - 1;
32f28ff765ef6983af0df78ebc5289b478abf3feStephan Bosch while (ctx->key[i] == data[i + j]) {
32f28ff765ef6983af0df78ebc5289b478abf3feStephan Bosch if (i == 0)
8fe8f97e688779add9cd042a9db4ddb7b117cce2Stephan Bosch return TRUE;
8fe8f97e688779add9cd042a9db4ddb7b117cce2Stephan Bosch i--;
8fe8f97e688779add9cd042a9db4ddb7b117cce2Stephan Bosch }
8fe8f97e688779add9cd042a9db4ddb7b117cce2Stephan Bosch
8fe8f97e688779add9cd042a9db4ddb7b117cce2Stephan Bosch bad_value = ctx->badtab[data[i + j]] + i + 1 - key_len;
8fe8f97e688779add9cd042a9db4ddb7b117cce2Stephan Bosch j += I_MAX(ctx->goodtab[i], bad_value);
72fc989c43a0dc94ec2f114b5e221beeab45519bTimo Sirainen }
8fe8f97e688779add9cd042a9db4ddb7b117cce2Stephan Bosch i_assert(j <= size);
8fe8f97e688779add9cd042a9db4ddb7b117cce2Stephan Bosch ctx->match_count = 0;
8fe8f97e688779add9cd042a9db4ddb7b117cce2Stephan Bosch }
8fe8f97e688779add9cd042a9db4ddb7b117cce2Stephan Bosch
8fe8f97e688779add9cd042a9db4ddb7b117cce2Stephan Bosch for (; j < size; j++) {
8fe8f97e688779add9cd042a9db4ddb7b117cce2Stephan Bosch for (i = j; i < size; i++) {
8fe8f97e688779add9cd042a9db4ddb7b117cce2Stephan Bosch if (ctx->key[i-j] != data[i])
8fe8f97e688779add9cd042a9db4ddb7b117cce2Stephan Bosch break;
72fc989c43a0dc94ec2f114b5e221beeab45519bTimo Sirainen }
8fe8f97e688779add9cd042a9db4ddb7b117cce2Stephan Bosch if (i == size)
8fe8f97e688779add9cd042a9db4ddb7b117cce2Stephan Bosch ctx->matches[ctx->match_count++] = size - j;
8fe8f97e688779add9cd042a9db4ddb7b117cce2Stephan Bosch }
8fe8f97e688779add9cd042a9db4ddb7b117cce2Stephan Bosch return FALSE;
8fe8f97e688779add9cd042a9db4ddb7b117cce2Stephan Bosch}
8fe8f97e688779add9cd042a9db4ddb7b117cce2Stephan Bosch
8fe8f97e688779add9cd042a9db4ddb7b117cce2Stephan Boschvoid str_find_reset(struct str_find_context *ctx)
89e040049336e69c43fec09dcbdfd0f2ae5efd51Martti Rannanjärvi{
8fe8f97e688779add9cd042a9db4ddb7b117cce2Stephan Bosch ctx->match_count = 0;
8fe8f97e688779add9cd042a9db4ddb7b117cce2Stephan Bosch}
8fe8f97e688779add9cd042a9db4ddb7b117cce2Stephan Bosch