test-fts-tokenizer.c revision 19ed8f08b23d6ed204e6b27e5d1c0c6fe6bb11dd
45312f52ff3a3d4c137447be4c7556500c2f8bf2Timo Sirainen/* Copyright (c) 2014-2015 Dovecot authors, see the included COPYING file */
df02611c44e9432e7961223bf9bfa3fb233b1789Timo Sirainen
df02611c44e9432e7961223bf9bfa3fb233b1789Timo Sirainen#include "lib.h"
df02611c44e9432e7961223bf9bfa3fb233b1789Timo Sirainen#include "unichar.h"
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen#include "test-common.h"
df02611c44e9432e7961223bf9bfa3fb233b1789Timo Sirainen#include "fts-tokenizer.h"
3320f4770d1f6c2cdd10f3c4ca5a324beb335339Timo Sirainen#include "fts-tokenizer-private.h"
df02611c44e9432e7961223bf9bfa3fb233b1789Timo Sirainen#include "fts-tokenizer-generic-private.h"
b039dabf4c53f72454e795930e7643b6e0e625f9Timo Sirainen
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen
df02611c44e9432e7961223bf9bfa3fb233b1789Timo Sirainen#define TEST_INPUT_ADDRESS \
df02611c44e9432e7961223bf9bfa3fb233b1789Timo Sirainen "@invalid invalid@ Abc Dfg <abc.dfg@example.com>, " \
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen "Bar Baz <bar@example.org>" \
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen "Foo Bar (comment)foo.bar@host.example.org " \
df02611c44e9432e7961223bf9bfa3fb233b1789Timo Sirainen "foo, foo@domain"
39afc7584d935b2dc7332c21966a7b20da03f1ecTimo Sirainen
df02611c44e9432e7961223bf9bfa3fb233b1789Timo Sirainenstatic const char *test_inputs[] = {
df02611c44e9432e7961223bf9bfa3fb233b1789Timo Sirainen /* generic things and word truncation: */
8e371a3ce32bd64288786855b8ce0cb63f19f7d1Timo Sirainen "hello world\r\n\nAnd there\twas: text galor\xC3\xA9\xE2\x80\xA7 "
8e371a3ce32bd64288786855b8ce0cb63f19f7d1Timo Sirainen "abc@example.com, "
8e371a3ce32bd64288786855b8ce0cb63f19f7d1Timo Sirainen "Bar Baz <bar@example.org>, "
b039dabf4c53f72454e795930e7643b6e0e625f9Timo Sirainen "foo@domain "
8e371a3ce32bd64288786855b8ce0cb63f19f7d1Timo Sirainen "1234567890123456789012345678\xC3\xA4,"
df02611c44e9432e7961223bf9bfa3fb233b1789Timo Sirainen "12345678901234567890123456789\xC3\xA4,"
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen "123456789012345678901234567890\xC3\xA4,"
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen "and longlonglongabcdefghijklmnopqrstuvwxyz more.\n\n "
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen "(\"Hello world\")3.14 3,14 last",
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen "1.",
3320f4770d1f6c2cdd10f3c4ca5a324beb335339Timo Sirainen
39afc7584d935b2dc7332c21966a7b20da03f1ecTimo Sirainen "' ' '' ''' 'quoted text' 'word' 'hlo words' you're bad'''word '''pre post'''",
39afc7584d935b2dc7332c21966a7b20da03f1ecTimo Sirainen
39afc7584d935b2dc7332c21966a7b20da03f1ecTimo Sirainen "'1234567890123456789012345678\xC3\xA4,"
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen "123456789012345678901234567x'\xC3\xA4,"
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen "1234567890123456789012345678x're,"
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen "1234567890123456789012345678x',"
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen "1234567890123456789012345678x'',"
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen "12345678901234567890123456789x',"
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen "12345678901234567890123456789x'',"
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen "123456789012345678901234567890x',"
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen "123456789012345678901234567890x'',"
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen /* \xe28099 = U+2019 is a smart quote, sometimes used as an apostrophe */
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen "\xE2\x80\x99 \xE2\x80\x99 \xE2\x80\x99\xE2\x80\x99 \xE2\x80\x99\xE2\x80\x99\xE2\x80\x99 \xE2\x80\x99quoted text\xE2\x80\x99\xE2\x80\x99word\xE2\x80\x99 \xE2\x80\x99hlo words\xE2\x80\x99 you\xE2\x80\x99re78901234567890123456789012 bad\xE2\x80\x99\xE2\x80\x99\xE2\x80\x99word\xE2\x80\x99\xE2\x80\x99\xE2\x80\x99pre post\xE2\x80\x99\xE2\x80\x99\xE2\x80\x99",
82b990b0bb2a1dad5c2634a508a5ad87715db402Timo Sirainen
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen "you\xE2\x80\x99re\xE2\x80\x99xyz",
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen
3320f4770d1f6c2cdd10f3c4ca5a324beb335339Timo Sirainen /* whitespace: with Unicode(utf8) U+FF01(ef bc 81)(U+2000(e2 80 80) and
3320f4770d1f6c2cdd10f3c4ca5a324beb335339Timo Sirainen U+205A(e2 81 9a) and U+205F(e2 81 9f) */
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen "hello\xEF\xBC\x81world\r\nAnd\xE2\x80\x80there\twas: text "
3320f4770d1f6c2cdd10f3c4ca5a324beb335339Timo Sirainen "galore\xE2\x81\x9F""and\xE2\x81\x9Amore.\n\n",
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen /* TR29 MinNumLet U+FF0E at end: u+FF0E is EF BC 8E */
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen "hello world\xEF\xBC\x8E",
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen /* TR29 WB5a */
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen "l\xE2\x80\x99homme l\xE2\x80\x99humanit\xC3\xA9 d\xE2\x80\x99immixtions qu\xE2\x80\x99il aujourd'hui que'euq"
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen};
3320f4770d1f6c2cdd10f3c4ca5a324beb335339Timo Sirainen
39afc7584d935b2dc7332c21966a7b20da03f1ecTimo Sirainenstatic void test_fts_tokenizer_find(void)
39afc7584d935b2dc7332c21966a7b20da03f1ecTimo Sirainen{
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen test_begin("fts tokenizer find");
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen test_assert(fts_tokenizer_find("email-address") == fts_tokenizer_email_address);
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen test_assert(fts_tokenizer_find("generic") == fts_tokenizer_generic);
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen test_end();
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen}
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainenstatic unsigned int
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainentest_tokenizer_inputoutput(struct fts_tokenizer *tok, const char *_input,
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen const char *const *expected_output,
3320f4770d1f6c2cdd10f3c4ca5a324beb335339Timo Sirainen unsigned int first_outi)
3320f4770d1f6c2cdd10f3c4ca5a324beb335339Timo Sirainen{
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen const unsigned char *input = (const unsigned char *)_input;
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen const char *token, *error;
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen unsigned int i, outi, max, char_len, input_len = strlen(_input);
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen /* test all input at once */
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen outi = first_outi;
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen while (fts_tokenizer_next(tok, input, input_len, &token, &error) > 0) {
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen test_assert_idx(strcmp(token, expected_output[outi]) == 0, outi);
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen outi++;
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen }
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen while (fts_tokenizer_next(tok, NULL, 0, &token, &error) > 0) {
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen test_assert_idx(strcmp(token, expected_output[outi]) == 0, outi);
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen outi++;
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen }
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen test_assert_idx(expected_output[outi] == NULL, outi);
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen /* test input one byte at a time */
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen outi = first_outi;
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen for (i = 0; i < input_len; i += char_len) {
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen char_len = uni_utf8_char_bytes(input[i]);
b039dabf4c53f72454e795930e7643b6e0e625f9Timo Sirainen while (fts_tokenizer_next(tok, input+i, char_len, &token, &error) > 0) {
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen test_assert_idx(strcmp(token, expected_output[outi]) == 0, outi);
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen outi++;
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen }
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen }
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen while (fts_tokenizer_final(tok, &token, &error) > 0) {
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen test_assert_idx(strcmp(token, expected_output[outi]) == 0, outi);
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen outi++;
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen }
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen test_assert_idx(expected_output[outi] == NULL, outi);
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen /* test input in random chunks */
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen outi = first_outi;
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen for (i = 0; i < input_len; i += char_len) {
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen max = rand() % (input_len - i) + 1;
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen for (char_len = 0; char_len < max; )
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen char_len += uni_utf8_char_bytes(input[i+char_len]);
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen while (fts_tokenizer_next(tok, input+i, char_len, &token, &error) > 0) {
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen test_assert_idx(strcmp(token, expected_output[outi]) == 0, outi);
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen outi++;
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen }
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen }
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen while (fts_tokenizer_final(tok, &token, &error) > 0) {
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen test_assert_idx(strcmp(token, expected_output[outi]) == 0, outi);
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen outi++;
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen }
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen test_assert_idx(expected_output[outi] == NULL, outi);
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen return outi+1;
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen}
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainenstatic void
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainentest_tokenizer_inputs(struct fts_tokenizer *tok,
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen const char *const *expected_output)
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen{
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen unsigned int i, outi = 0;
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen for (i = 0; i < N_ELEMENTS(test_inputs); i++) {
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen outi = test_tokenizer_inputoutput(tok, test_inputs[i],
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen expected_output, outi);
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen }
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen test_assert_idx(expected_output[outi] == NULL, outi);
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen}
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainenstatic void test_fts_tokenizer_generic_only(void)
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen{
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen static const char *const expected_output[] = {
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen "hello", "world", "And",
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen "there", "was", "text", "galor\xC3\xA9",
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen "abc", "example", "com", "Bar", "Baz",
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen "bar", "example", "org", "foo", "domain",
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen "1234567890123456789012345678\xC3\xA4",
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen "12345678901234567890123456789",
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen "123456789012345678901234567890",
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen "and", "longlonglongabcdefghijklmnopqr",
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen "more", "Hello", "world", "3", "14", "3", "14", "last", NULL,
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen "1", NULL,
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen "quoted", "text", "word", "hlo", "words", "you're", "bad",
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen "word", "pre", "post", NULL,
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen "1234567890123456789012345678\xC3\xA4",
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen "123456789012345678901234567x'",
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen "1234567890123456789012345678x'",
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen "1234567890123456789012345678x",
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen "1234567890123456789012345678x",
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen "12345678901234567890123456789x",
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen "12345678901234567890123456789x",
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen "123456789012345678901234567890",
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen "123456789012345678901234567890",
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen "quoted", "text", "word", "hlo", "words", "you're789012345678901234567890", "bad",
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen "word", "pre", "post", NULL,
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen "you're'xyz", NULL,
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen "hello", "world", "And",
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen "there", "was", "text", "galore",
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen "and", "more", NULL,
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen "hello", "world", NULL,
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen "l'homme", "l'humanit\xC3\xA9", "d'immixtions", "qu'il", "aujourd'hui", "que'euq", NULL,
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen NULL
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen };
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen struct fts_tokenizer *tok;
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen const char *error;
df02611c44e9432e7961223bf9bfa3fb233b1789Timo Sirainen
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen test_begin("fts tokenizer generic simple");
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen test_assert(fts_tokenizer_create(fts_tokenizer_generic, NULL, NULL, &tok, &error) == 0);
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen test_assert(((struct generic_fts_tokenizer *) tok)->algorithm == BOUNDARY_ALGORITHM_SIMPLE);
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen test_tokenizer_inputs(tok, expected_output);
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen fts_tokenizer_unref(&tok);
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen test_end();
eddd9bf1a1369aea4a2715f6be1137da6d17d293Timo Sirainen}
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainenconst char *const tr29_settings[] = {"algorithm", "tr29", NULL};
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen/* TODO: U+206F is in "Format" and therefore currently not word break.
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen This definitely needs to be remapped. */
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainenstatic void test_fts_tokenizer_generic_tr29_only(void)
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen{
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen static const char *const expected_output[] = {
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen "hello", "world", "And",
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen "there", "was", "text", "galor\xC3\xA9",
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen "abc", "example", "com", "Bar", "Baz",
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen "bar", "example", "org", "foo", "domain",
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen "1234567890123456789012345678\xC3\xA4",
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen "12345678901234567890123456789",
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen "123456789012345678901234567890",
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen "and", "longlonglongabcdefghijklmnopqr",
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen "more", "Hello", "world", "3", "14", "3,14", "last", NULL,
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen "1", NULL,
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen "quoted", "text", "word", "hlo", "words", "you're", "bad",
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen "word", "pre", "post", NULL,
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen "1234567890123456789012345678\xC3\xA4",
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen "123456789012345678901234567x'",
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen "1234567890123456789012345678x'",
3320f4770d1f6c2cdd10f3c4ca5a324beb335339Timo Sirainen "1234567890123456789012345678x",
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen "1234567890123456789012345678x",
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen "12345678901234567890123456789x",
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen "12345678901234567890123456789x",
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen "123456789012345678901234567890",
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen "123456789012345678901234567890",
83bb013a99f0936995f9c7a1077822662d8fefdbTimo Sirainen
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen "quoted", "text", "word", "hlo", "words", "you're789012345678901234567890", "bad",
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen "word", "pre", "post", NULL,
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen "you're'xyz", NULL,
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen "hello", "world", "And",
83bb013a99f0936995f9c7a1077822662d8fefdbTimo Sirainen "there", "was", "text", "galore",
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen "and", "more", NULL,
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen "hello", "world", NULL,
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen "l'homme", "l'humanit\xC3\xA9", "d'immixtions", "qu'il", "aujourd'hui", "que'euq", NULL,
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen NULL
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen };
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen struct fts_tokenizer *tok;
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen const char *error;
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen test_begin("fts tokenizer generic TR29");
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen test_assert(fts_tokenizer_create(fts_tokenizer_generic, NULL, tr29_settings, &tok, &error) == 0);
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen test_tokenizer_inputs(tok, expected_output);
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen fts_tokenizer_unref(&tok);
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen test_end();
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen}
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainenconst char *const tr29_settings_wb5a[] = {"algorithm", "tr29", "wb5a", "yes", NULL};
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen/* TODO: U+206F is in "Format" and therefore currently not word break.
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen This definitely needs to be remapped. */
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainenstatic void test_fts_tokenizer_generic_tr29_wb5a(void)
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen{
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen static const char *const expected_output[] = {
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen "hello", "world", "And",
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen "there", "was", "text", "galor\xC3\xA9",
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen "abc", "example", "com", "Bar", "Baz",
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen "bar", "example", "org", "foo", "domain",
df02611c44e9432e7961223bf9bfa3fb233b1789Timo Sirainen "1234567890123456789012345678\xC3\xA4",
df02611c44e9432e7961223bf9bfa3fb233b1789Timo Sirainen "12345678901234567890123456789",
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen "123456789012345678901234567890",
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen "and", "longlonglongabcdefghijklmnopqr",
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen "more", "Hello", "world", "3", "14", "3,14", "last", NULL,
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen
df02611c44e9432e7961223bf9bfa3fb233b1789Timo Sirainen "1", NULL,
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen "quoted", "text", "word", "hlo", "words", "you're", "bad",
df02611c44e9432e7961223bf9bfa3fb233b1789Timo Sirainen "word", "pre", "post", NULL,
19e8adccba16ff419f5675b1575358c2956dce83Timo Sirainen
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen "1234567890123456789012345678\xC3\xA4",
19e8adccba16ff419f5675b1575358c2956dce83Timo Sirainen "123456789012345678901234567x'",
df02611c44e9432e7961223bf9bfa3fb233b1789Timo Sirainen "1234567890123456789012345678x'",
df02611c44e9432e7961223bf9bfa3fb233b1789Timo Sirainen "1234567890123456789012345678x",
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen "1234567890123456789012345678x",
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen "12345678901234567890123456789x",
df02611c44e9432e7961223bf9bfa3fb233b1789Timo Sirainen "12345678901234567890123456789x",
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen "123456789012345678901234567890",
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen "123456789012345678901234567890",
3320f4770d1f6c2cdd10f3c4ca5a324beb335339Timo Sirainen
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen "quoted", "text", "word", "hlo", "words", "you're789012345678901234567890", "bad",
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen "word", "pre", "post", NULL,
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen "you're'xyz", NULL,
3320f4770d1f6c2cdd10f3c4ca5a324beb335339Timo Sirainen
3320f4770d1f6c2cdd10f3c4ca5a324beb335339Timo Sirainen "hello", "world", "And",
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen "there", "was", "text", "galore",
3320f4770d1f6c2cdd10f3c4ca5a324beb335339Timo Sirainen "and", "more", NULL,
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen "hello", "world", NULL,
3320f4770d1f6c2cdd10f3c4ca5a324beb335339Timo Sirainen
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen "l", "homme", "l", "humanit\xC3\xA9", "d", "immixtions", "qu", "il", "aujourd'hui", "que'euq", NULL,
df02611c44e9432e7961223bf9bfa3fb233b1789Timo Sirainen
df02611c44e9432e7961223bf9bfa3fb233b1789Timo Sirainen NULL
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen };
df02611c44e9432e7961223bf9bfa3fb233b1789Timo Sirainen struct fts_tokenizer *tok;
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen const char *error;
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen test_begin("fts tokenizer generic TR29 with WB5a");
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen test_assert(fts_tokenizer_create(fts_tokenizer_generic, NULL, tr29_settings_wb5a, &tok, &error) == 0);
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen test_tokenizer_inputs(tok, expected_output);
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen fts_tokenizer_unref(&tok);
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen test_end();
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen}
df02611c44e9432e7961223bf9bfa3fb233b1789Timo Sirainen
df02611c44e9432e7961223bf9bfa3fb233b1789Timo Sirainenstatic void test_fts_tokenizer_address_only(void)
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen{
df02611c44e9432e7961223bf9bfa3fb233b1789Timo Sirainen static const char input[] = TEST_INPUT_ADDRESS;
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen static const char *const expected_output[] = {
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen "abc.dfg@example.com", "bar@example.org",
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen "foo.bar@host.example.org", "foo@domain", NULL
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen };
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen struct fts_tokenizer *tok;
df02611c44e9432e7961223bf9bfa3fb233b1789Timo Sirainen const char *error;
df02611c44e9432e7961223bf9bfa3fb233b1789Timo Sirainen
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen test_begin("fts tokenizer email address only");
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen test_assert(fts_tokenizer_create(fts_tokenizer_email_address, NULL, NULL, &tok, &error) == 0);
1f6c210c30992e95b806d2f517e2b3625ed941c5Timo Sirainen test_tokenizer_inputoutput(tok, input, expected_output, 0);
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen fts_tokenizer_unref(&tok);
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen test_end();
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen}
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainenstatic void test_fts_tokenizer_address_parent(const char *name, const char * const *settings)
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen{
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen static const char input[] = TEST_INPUT_ADDRESS;
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen static const char *const expected_output[] = {
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen "invalid", "invalid", "Abc", "Dfg", "abc", "dfg", "example", "com", "abc.dfg@example.com",
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen "Bar", "Baz", "bar", "example", "org", "bar@example.org",
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen "Foo", "Bar", "comment", "foo", "bar", "host", "example", "org", "foo.bar@host.example.org",
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen "foo", "foo", "domain", "foo@domain", NULL
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen };
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen struct fts_tokenizer *tok, *gen_tok;
1f6c210c30992e95b806d2f517e2b3625ed941c5Timo Sirainen const char *error;
1f6c210c30992e95b806d2f517e2b3625ed941c5Timo Sirainen
1f6c210c30992e95b806d2f517e2b3625ed941c5Timo Sirainen test_begin(t_strdup_printf("fts tokenizer email address + parent %s", name));
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen test_assert(fts_tokenizer_create(fts_tokenizer_generic, NULL, settings, &gen_tok, &error) == 0);
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen test_assert(fts_tokenizer_create(fts_tokenizer_email_address, gen_tok, NULL, &tok, &error) == 0);
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen test_tokenizer_inputoutput(tok, input, expected_output, 0);
1f6c210c30992e95b806d2f517e2b3625ed941c5Timo Sirainen fts_tokenizer_unref(&tok);
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen fts_tokenizer_unref(&gen_tok);
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen test_end();
1f6c210c30992e95b806d2f517e2b3625ed941c5Timo Sirainen}
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainenconst char *const simple_settings[] = {"algorithm", "simple", NULL};
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainenstatic void test_fts_tokenizer_address_parent_simple(void)
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen{
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen test_fts_tokenizer_address_parent("simple", simple_settings);
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen}
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainenstatic void test_fts_tokenizer_address_parent_tr29(void)
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen{
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen test_fts_tokenizer_address_parent("tr29", tr29_settings);
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen}
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainenstatic void test_fts_tokenizer_address_search(void)
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen{
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen static const char input[] = TEST_INPUT_ADDRESS;
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen static const char *const expected_output[] = {
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen "invalid", "invalid", "Abc", "Dfg", "abc.dfg@example.com",
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen "Bar", "Baz", "bar@example.org",
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen "Foo", "Bar", "comment", "foo.bar@host.example.org",
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen "foo", "foo@domain", NULL
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen };
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen static const char *const settings[] = { "search", "", NULL };
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen struct fts_tokenizer *tok, *gen_tok;
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen const char *token, *error;
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen test_begin("fts tokenizer search email address + parent");
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen test_assert(fts_tokenizer_create(fts_tokenizer_generic, NULL, NULL, &gen_tok, &error) == 0);
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen test_assert(fts_tokenizer_create(fts_tokenizer_email_address, gen_tok, settings, &tok, &error) == 0);
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen test_tokenizer_inputoutput(tok, input, expected_output, 0);
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen /* make sure state is forgotten at EOF */
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen test_assert(fts_tokenizer_next(tok, (const void *)"foo", 3, &token, &error) == 0);
df02611c44e9432e7961223bf9bfa3fb233b1789Timo Sirainen test_assert(fts_tokenizer_final(tok, &token, &error) > 0 &&
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen strcmp(token, "foo") == 0);
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen test_assert(fts_tokenizer_final(tok, &token, &error) == 0);
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen test_assert(fts_tokenizer_next(tok, (const void *)"bar@baz", 7, &token, &error) == 0);
df02611c44e9432e7961223bf9bfa3fb233b1789Timo Sirainen test_assert(fts_tokenizer_final(tok, &token, &error) > 0 &&
df02611c44e9432e7961223bf9bfa3fb233b1789Timo Sirainen strcmp(token, "bar@baz") == 0);
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen test_assert(fts_tokenizer_final(tok, &token, &error) == 0);
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen test_assert(fts_tokenizer_next(tok, (const void *)"foo@", 4, &token, &error) == 0);
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen test_assert(fts_tokenizer_final(tok, &token, &error) > 0 &&
df02611c44e9432e7961223bf9bfa3fb233b1789Timo Sirainen strcmp(token, "foo") == 0);
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen test_assert(fts_tokenizer_final(tok, &token, &error) == 0);
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen /* test reset explicitly */
df02611c44e9432e7961223bf9bfa3fb233b1789Timo Sirainen test_assert(fts_tokenizer_next(tok, (const void *)"a", 1, &token, &error) == 0);
df02611c44e9432e7961223bf9bfa3fb233b1789Timo Sirainen fts_tokenizer_reset(tok);
df02611c44e9432e7961223bf9bfa3fb233b1789Timo Sirainen test_assert(fts_tokenizer_next(tok, (const void *)"b@c", 3, &token, &error) == 0);
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen test_assert(fts_tokenizer_final(tok, &token, &error) > 0 &&
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen strcmp(token, "b@c") == 0);
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen test_assert(fts_tokenizer_final(tok, &token, &error) == 0);
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen fts_tokenizer_unref(&tok);
82b990b0bb2a1dad5c2634a508a5ad87715db402Timo Sirainen fts_tokenizer_unref(&gen_tok);
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen test_end();
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen}
82b990b0bb2a1dad5c2634a508a5ad87715db402Timo Sirainen
82b990b0bb2a1dad5c2634a508a5ad87715db402Timo Sirainenint main(void)
82b990b0bb2a1dad5c2634a508a5ad87715db402Timo Sirainen{
82b990b0bb2a1dad5c2634a508a5ad87715db402Timo Sirainen static void (*test_functions[])(void) = {
82b990b0bb2a1dad5c2634a508a5ad87715db402Timo Sirainen test_fts_tokenizer_find,
82b990b0bb2a1dad5c2634a508a5ad87715db402Timo Sirainen test_fts_tokenizer_generic_only,
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen test_fts_tokenizer_generic_tr29_only,
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen test_fts_tokenizer_generic_tr29_wb5a,
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen test_fts_tokenizer_address_only,
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen test_fts_tokenizer_address_parent_simple,
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen test_fts_tokenizer_address_parent_tr29,
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen test_fts_tokenizer_address_search,
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen NULL
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen };
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen int ret;
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen fts_tokenizers_init();
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen ret = test_run(test_functions);
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen fts_tokenizers_deinit();
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen return ret;
0df9428baed48afaff90b4d4f03792d2fd756a43Timo Sirainen}
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen