bcb4e51a409d94ae670de96afb8483a4f7855294Stephan Bosch/* Copyright (c) 2007-2018 Dovecot authors, see the included COPYING file */
1d5e52e398381873081455ad99edec945038eec0Timo Sirainen
f67e08c0dbd9c0441e818d07a0b40b897f907495Timo Sirainen/* @UNSAFE: whole file */
f67e08c0dbd9c0441e818d07a0b40b897f907495Timo Sirainen
1d5e52e398381873081455ad99edec945038eec0Timo Sirainen#include "lib.h"
1d5e52e398381873081455ad99edec945038eec0Timo Sirainen#include "str-find.h"
1d5e52e398381873081455ad99edec945038eec0Timo Sirainen
1d5e52e398381873081455ad99edec945038eec0Timo Sirainenstruct str_find_context {
1d5e52e398381873081455ad99edec945038eec0Timo Sirainen pool_t pool;
1d5e52e398381873081455ad99edec945038eec0Timo Sirainen unsigned char *key;
1d5e52e398381873081455ad99edec945038eec0Timo Sirainen unsigned int key_len;
1d5e52e398381873081455ad99edec945038eec0Timo Sirainen
1d5e52e398381873081455ad99edec945038eec0Timo Sirainen unsigned int *matches;
1d5e52e398381873081455ad99edec945038eec0Timo Sirainen unsigned int match_count;
1d5e52e398381873081455ad99edec945038eec0Timo Sirainen
49a7e4dba84bbf35d82669d1ae79ad43949eed19Timo Sirainen size_t match_end_pos;
49a7e4dba84bbf35d82669d1ae79ad43949eed19Timo Sirainen
1d5e52e398381873081455ad99edec945038eec0Timo Sirainen int badtab[UCHAR_MAX+1];
b5b0658d7d895d31cf3d96962c63d4b398f2b82eTimo Sirainen int goodtab[FLEXIBLE_ARRAY_MEMBER];
1d5e52e398381873081455ad99edec945038eec0Timo Sirainen};
1d5e52e398381873081455ad99edec945038eec0Timo Sirainen
1d5e52e398381873081455ad99edec945038eec0Timo Sirainenstatic void init_badtab(struct str_find_context *ctx)
1d5e52e398381873081455ad99edec945038eec0Timo Sirainen{
1d5e52e398381873081455ad99edec945038eec0Timo Sirainen unsigned int i, len_1 = ctx->key_len - 1;
1d5e52e398381873081455ad99edec945038eec0Timo Sirainen
1d5e52e398381873081455ad99edec945038eec0Timo Sirainen for (i = 0; i <= UCHAR_MAX; i++)
1d5e52e398381873081455ad99edec945038eec0Timo Sirainen ctx->badtab[i] = ctx->key_len;
1d5e52e398381873081455ad99edec945038eec0Timo Sirainen
1d5e52e398381873081455ad99edec945038eec0Timo Sirainen for (i = 0; i < len_1; i++)
1d5e52e398381873081455ad99edec945038eec0Timo Sirainen ctx->badtab[ctx->key[i]] = len_1 - i;
1d5e52e398381873081455ad99edec945038eec0Timo Sirainen}
1d5e52e398381873081455ad99edec945038eec0Timo Sirainen
1d5e52e398381873081455ad99edec945038eec0Timo Sirainenstatic void init_suffixes(struct str_find_context *ctx, unsigned int *suffixes)
1d5e52e398381873081455ad99edec945038eec0Timo Sirainen{
1d5e52e398381873081455ad99edec945038eec0Timo Sirainen unsigned int len_1 = ctx->key_len - 1;
1d5e52e398381873081455ad99edec945038eec0Timo Sirainen int f = 0, g, i;
1d5e52e398381873081455ad99edec945038eec0Timo Sirainen
1d5e52e398381873081455ad99edec945038eec0Timo Sirainen suffixes[len_1] = ctx->key_len;
1d5e52e398381873081455ad99edec945038eec0Timo Sirainen g = len_1;
1384fac439fea3026b16a9d8d24954200e413bccTimo Sirainen for (i = (int)ctx->key_len - 2; i >= 0; i--) {
1d5e52e398381873081455ad99edec945038eec0Timo Sirainen if (i > g && (int)suffixes[i + len_1 - f] < i - g)
1d5e52e398381873081455ad99edec945038eec0Timo Sirainen suffixes[i] = suffixes[i + len_1 - f];
1d5e52e398381873081455ad99edec945038eec0Timo Sirainen else {
1d5e52e398381873081455ad99edec945038eec0Timo Sirainen if (i < g)
1d5e52e398381873081455ad99edec945038eec0Timo Sirainen g = i;
1d5e52e398381873081455ad99edec945038eec0Timo Sirainen f = i;
1d5e52e398381873081455ad99edec945038eec0Timo Sirainen while (g >= 0 && ctx->key[g] == ctx->key[g + len_1 - f])
1d5e52e398381873081455ad99edec945038eec0Timo Sirainen g--;
1d5e52e398381873081455ad99edec945038eec0Timo Sirainen suffixes[i] = f - g;
1d5e52e398381873081455ad99edec945038eec0Timo Sirainen }
1d5e52e398381873081455ad99edec945038eec0Timo Sirainen }
1d5e52e398381873081455ad99edec945038eec0Timo Sirainen}
1d5e52e398381873081455ad99edec945038eec0Timo Sirainen
1d5e52e398381873081455ad99edec945038eec0Timo Sirainenstatic void init_goodtab(struct str_find_context *ctx)
1d5e52e398381873081455ad99edec945038eec0Timo Sirainen{
1384fac439fea3026b16a9d8d24954200e413bccTimo Sirainen unsigned int *suffixes;
1384fac439fea3026b16a9d8d24954200e413bccTimo Sirainen int j, i, len_1 = ctx->key_len - 1;
1d5e52e398381873081455ad99edec945038eec0Timo Sirainen
f67e08c0dbd9c0441e818d07a0b40b897f907495Timo Sirainen suffixes = t_buffer_get(sizeof(*suffixes) * ctx->key_len);
1d5e52e398381873081455ad99edec945038eec0Timo Sirainen init_suffixes(ctx, suffixes);
1d5e52e398381873081455ad99edec945038eec0Timo Sirainen
1d5e52e398381873081455ad99edec945038eec0Timo Sirainen for (i = 0; i < (int)ctx->key_len; i++)
1d5e52e398381873081455ad99edec945038eec0Timo Sirainen ctx->goodtab[i] = ctx->key_len;
1d5e52e398381873081455ad99edec945038eec0Timo Sirainen
1d5e52e398381873081455ad99edec945038eec0Timo Sirainen j = 0;
1d5e52e398381873081455ad99edec945038eec0Timo Sirainen for (i = len_1; i >= -1; i--) {
1d5e52e398381873081455ad99edec945038eec0Timo Sirainen if (i < 0 || suffixes[i] == (unsigned int)i + 1) {
1d5e52e398381873081455ad99edec945038eec0Timo Sirainen for (; j < len_1 - i; j++) {
1d5e52e398381873081455ad99edec945038eec0Timo Sirainen if (ctx->goodtab[j] == (int)ctx->key_len)
1d5e52e398381873081455ad99edec945038eec0Timo Sirainen ctx->goodtab[j] = len_1 - i;
1d5e52e398381873081455ad99edec945038eec0Timo Sirainen }
1d5e52e398381873081455ad99edec945038eec0Timo Sirainen }
1d5e52e398381873081455ad99edec945038eec0Timo Sirainen }
1d5e52e398381873081455ad99edec945038eec0Timo Sirainen for (i = 0; i <= (int)ctx->key_len - 2; i++)
1d5e52e398381873081455ad99edec945038eec0Timo Sirainen ctx->goodtab[len_1 - suffixes[i]] = len_1 - i;
1d5e52e398381873081455ad99edec945038eec0Timo Sirainen}
f67e08c0dbd9c0441e818d07a0b40b897f907495Timo Sirainen
1d5e52e398381873081455ad99edec945038eec0Timo Sirainenstruct str_find_context *str_find_init(pool_t pool, const char *key)
1d5e52e398381873081455ad99edec945038eec0Timo Sirainen{
1d5e52e398381873081455ad99edec945038eec0Timo Sirainen struct str_find_context *ctx;
2ac5f36aa7c2e7a07ba8815d43a6d7483f62e74cTimo Sirainen size_t key_len = strlen(key);
1d5e52e398381873081455ad99edec945038eec0Timo Sirainen
49a7e4dba84bbf35d82669d1ae79ad43949eed19Timo Sirainen i_assert(key_len > 0);
2ac5f36aa7c2e7a07ba8815d43a6d7483f62e74cTimo Sirainen i_assert(key_len < INT_MAX);
49a7e4dba84bbf35d82669d1ae79ad43949eed19Timo Sirainen
e7d0bea63a08b08c47c4b5c187d2cb7127859657Timo Sirainen ctx = p_malloc(pool, MALLOC_ADD(sizeof(struct str_find_context),
e7d0bea63a08b08c47c4b5c187d2cb7127859657Timo Sirainen MALLOC_MULTIPLY(sizeof(ctx->goodtab[0]), key_len)));
1d5e52e398381873081455ad99edec945038eec0Timo Sirainen ctx->pool = pool;
f609bd50846f898583ef481d8aac927b10aa8395Timo Sirainen ctx->matches = p_new(pool, unsigned int, key_len);
1d5e52e398381873081455ad99edec945038eec0Timo Sirainen ctx->key_len = key_len;
1d5e52e398381873081455ad99edec945038eec0Timo Sirainen ctx->key = p_malloc(pool, key_len);
1d5e52e398381873081455ad99edec945038eec0Timo Sirainen memcpy(ctx->key, key, key_len);
1d5e52e398381873081455ad99edec945038eec0Timo Sirainen
1d5e52e398381873081455ad99edec945038eec0Timo Sirainen init_goodtab(ctx);
1d5e52e398381873081455ad99edec945038eec0Timo Sirainen init_badtab(ctx);
1d5e52e398381873081455ad99edec945038eec0Timo Sirainen return ctx;
1d5e52e398381873081455ad99edec945038eec0Timo Sirainen}
1d5e52e398381873081455ad99edec945038eec0Timo Sirainen
1d5e52e398381873081455ad99edec945038eec0Timo Sirainenvoid str_find_deinit(struct str_find_context **_ctx)
1d5e52e398381873081455ad99edec945038eec0Timo Sirainen{
1d5e52e398381873081455ad99edec945038eec0Timo Sirainen struct str_find_context *ctx = *_ctx;
1d5e52e398381873081455ad99edec945038eec0Timo Sirainen
1d5e52e398381873081455ad99edec945038eec0Timo Sirainen *_ctx = NULL;
1d5e52e398381873081455ad99edec945038eec0Timo Sirainen p_free(ctx->pool, ctx->matches);
1d5e52e398381873081455ad99edec945038eec0Timo Sirainen p_free(ctx->pool, ctx->key);
1d5e52e398381873081455ad99edec945038eec0Timo Sirainen p_free(ctx->pool, ctx);
1d5e52e398381873081455ad99edec945038eec0Timo Sirainen}
1d5e52e398381873081455ad99edec945038eec0Timo Sirainen
1d5e52e398381873081455ad99edec945038eec0Timo Sirainenbool str_find_more(struct str_find_context *ctx,
1d5e52e398381873081455ad99edec945038eec0Timo Sirainen const unsigned char *data, size_t size)
1d5e52e398381873081455ad99edec945038eec0Timo Sirainen{
1d5e52e398381873081455ad99edec945038eec0Timo Sirainen unsigned int key_len = ctx->key_len;
1d5e52e398381873081455ad99edec945038eec0Timo Sirainen unsigned int i, j, a, b;
1d5e52e398381873081455ad99edec945038eec0Timo Sirainen int bad_value;
1d5e52e398381873081455ad99edec945038eec0Timo Sirainen
1d5e52e398381873081455ad99edec945038eec0Timo Sirainen for (i = j = 0; i < ctx->match_count; i++) {
1d5e52e398381873081455ad99edec945038eec0Timo Sirainen a = ctx->matches[i];
1d5e52e398381873081455ad99edec945038eec0Timo Sirainen if (ctx->matches[i] + size >= key_len) {
1d5e52e398381873081455ad99edec945038eec0Timo Sirainen /* we can fully determine this match now */
1d5e52e398381873081455ad99edec945038eec0Timo Sirainen for (; a < key_len; a++) {
1d5e52e398381873081455ad99edec945038eec0Timo Sirainen if (ctx->key[a] != data[a - ctx->matches[i]])
1d5e52e398381873081455ad99edec945038eec0Timo Sirainen break;
1d5e52e398381873081455ad99edec945038eec0Timo Sirainen }
1d5e52e398381873081455ad99edec945038eec0Timo Sirainen
49a7e4dba84bbf35d82669d1ae79ad43949eed19Timo Sirainen if (a == key_len) {
49a7e4dba84bbf35d82669d1ae79ad43949eed19Timo Sirainen ctx->match_end_pos = key_len - ctx->matches[i];
1d5e52e398381873081455ad99edec945038eec0Timo Sirainen return TRUE;
49a7e4dba84bbf35d82669d1ae79ad43949eed19Timo Sirainen }
1d5e52e398381873081455ad99edec945038eec0Timo Sirainen } else {
1d5e52e398381873081455ad99edec945038eec0Timo Sirainen for (b = 0; b < size; b++) {
1d5e52e398381873081455ad99edec945038eec0Timo Sirainen if (ctx->key[a+b] != data[b])
1d5e52e398381873081455ad99edec945038eec0Timo Sirainen break;
1d5e52e398381873081455ad99edec945038eec0Timo Sirainen }
1d5e52e398381873081455ad99edec945038eec0Timo Sirainen
1d5e52e398381873081455ad99edec945038eec0Timo Sirainen if (b == size)
1d5e52e398381873081455ad99edec945038eec0Timo Sirainen ctx->matches[j++] = a + size;
1d5e52e398381873081455ad99edec945038eec0Timo Sirainen }
1d5e52e398381873081455ad99edec945038eec0Timo Sirainen }
1d5e52e398381873081455ad99edec945038eec0Timo Sirainen if (j > 0) {
1d5e52e398381873081455ad99edec945038eec0Timo Sirainen i_assert(j + size < key_len);
1d5e52e398381873081455ad99edec945038eec0Timo Sirainen ctx->match_count = j;
1d5e52e398381873081455ad99edec945038eec0Timo Sirainen j = 0;
1d5e52e398381873081455ad99edec945038eec0Timo Sirainen } else {
1d5e52e398381873081455ad99edec945038eec0Timo Sirainen /* Boyer-Moore searching */
1d5e52e398381873081455ad99edec945038eec0Timo Sirainen j = 0;
1d5e52e398381873081455ad99edec945038eec0Timo Sirainen while (j + key_len <= size) {
1d5e52e398381873081455ad99edec945038eec0Timo Sirainen i = key_len - 1;
1d5e52e398381873081455ad99edec945038eec0Timo Sirainen while (ctx->key[i] == data[i + j]) {
49a7e4dba84bbf35d82669d1ae79ad43949eed19Timo Sirainen if (i == 0) {
49a7e4dba84bbf35d82669d1ae79ad43949eed19Timo Sirainen ctx->match_end_pos = j + key_len;
1d5e52e398381873081455ad99edec945038eec0Timo Sirainen return TRUE;
49a7e4dba84bbf35d82669d1ae79ad43949eed19Timo Sirainen }
1d5e52e398381873081455ad99edec945038eec0Timo Sirainen i--;
1d5e52e398381873081455ad99edec945038eec0Timo Sirainen }
1d5e52e398381873081455ad99edec945038eec0Timo Sirainen
1384fac439fea3026b16a9d8d24954200e413bccTimo Sirainen bad_value = (int)(ctx->badtab[data[i + j]] + i + 1) -
1384fac439fea3026b16a9d8d24954200e413bccTimo Sirainen (int)key_len;
1d5e52e398381873081455ad99edec945038eec0Timo Sirainen j += I_MAX(ctx->goodtab[i], bad_value);
1d5e52e398381873081455ad99edec945038eec0Timo Sirainen }
1d5e52e398381873081455ad99edec945038eec0Timo Sirainen i_assert(j <= size);
1d5e52e398381873081455ad99edec945038eec0Timo Sirainen ctx->match_count = 0;
1d5e52e398381873081455ad99edec945038eec0Timo Sirainen }
1d5e52e398381873081455ad99edec945038eec0Timo Sirainen
1d5e52e398381873081455ad99edec945038eec0Timo Sirainen for (; j < size; j++) {
1d5e52e398381873081455ad99edec945038eec0Timo Sirainen for (i = j; i < size; i++) {
1d5e52e398381873081455ad99edec945038eec0Timo Sirainen if (ctx->key[i-j] != data[i])
1d5e52e398381873081455ad99edec945038eec0Timo Sirainen break;
1d5e52e398381873081455ad99edec945038eec0Timo Sirainen }
1d5e52e398381873081455ad99edec945038eec0Timo Sirainen if (i == size)
1d5e52e398381873081455ad99edec945038eec0Timo Sirainen ctx->matches[ctx->match_count++] = size - j;
1d5e52e398381873081455ad99edec945038eec0Timo Sirainen }
1d5e52e398381873081455ad99edec945038eec0Timo Sirainen return FALSE;
1d5e52e398381873081455ad99edec945038eec0Timo Sirainen}
1d5e52e398381873081455ad99edec945038eec0Timo Sirainen
49a7e4dba84bbf35d82669d1ae79ad43949eed19Timo Sirainensize_t str_find_get_match_end_pos(struct str_find_context *ctx)
49a7e4dba84bbf35d82669d1ae79ad43949eed19Timo Sirainen{
49a7e4dba84bbf35d82669d1ae79ad43949eed19Timo Sirainen return ctx->match_end_pos;
49a7e4dba84bbf35d82669d1ae79ad43949eed19Timo Sirainen}
49a7e4dba84bbf35d82669d1ae79ad43949eed19Timo Sirainen
1d5e52e398381873081455ad99edec945038eec0Timo Sirainenvoid str_find_reset(struct str_find_context *ctx)
1d5e52e398381873081455ad99edec945038eec0Timo Sirainen{
1d5e52e398381873081455ad99edec945038eec0Timo Sirainen ctx->match_count = 0;
1d5e52e398381873081455ad99edec945038eec0Timo Sirainen}