test-fts-tokenizer.c revision 19ed8f08b23d6ed204e6b27e5d1c0c6fe6bb11dd
45312f52ff3a3d4c137447be4c7556500c2f8bf2Timo Sirainen/* Copyright (c) 2014-2015 Dovecot authors, see the included COPYING file */
df02611c44e9432e7961223bf9bfa3fb233b1789Timo Sirainen "@invalid invalid@ Abc Dfg <abc.dfg@example.com>, " \
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen "Bar Baz <bar@example.org>" \
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen "Foo Bar (comment)foo.bar@host.example.org " \
df02611c44e9432e7961223bf9bfa3fb233b1789Timo Sirainen "foo, foo@domain"
df02611c44e9432e7961223bf9bfa3fb233b1789Timo Sirainenstatic const char *test_inputs[] = {
df02611c44e9432e7961223bf9bfa3fb233b1789Timo Sirainen /* generic things and word truncation: */
8e371a3ce32bd64288786855b8ce0cb63f19f7d1Timo Sirainen "hello world\r\n\nAnd there\twas: text galor\xC3\xA9\xE2\x80\xA7 "
8e371a3ce32bd64288786855b8ce0cb63f19f7d1Timo Sirainen "abc@example.com, "
8e371a3ce32bd64288786855b8ce0cb63f19f7d1Timo Sirainen "Bar Baz <bar@example.org>, "
b039dabf4c53f72454e795930e7643b6e0e625f9Timo Sirainen "foo@domain "
8e371a3ce32bd64288786855b8ce0cb63f19f7d1Timo Sirainen "1234567890123456789012345678\xC3\xA4,"
df02611c44e9432e7961223bf9bfa3fb233b1789Timo Sirainen "12345678901234567890123456789\xC3\xA4,"
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen "123456789012345678901234567890\xC3\xA4,"
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen "and longlonglongabcdefghijklmnopqrstuvwxyz more.\n\n "
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen "(\"Hello world\")3.14 3,14 last",
39afc7584d935b2dc7332c21966a7b20da03f1ecTimo Sirainen "' ' '' ''' 'quoted text' 'word' 'hlo words' you're bad'''word '''pre post'''",
39afc7584d935b2dc7332c21966a7b20da03f1ecTimo Sirainen "'1234567890123456789012345678\xC3\xA4,"
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen "123456789012345678901234567x'\xC3\xA4,"
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen "1234567890123456789012345678x're,"
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen "1234567890123456789012345678x',"
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen "1234567890123456789012345678x'',"
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen "12345678901234567890123456789x',"
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen "12345678901234567890123456789x'',"
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen "123456789012345678901234567890x',"
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen "123456789012345678901234567890x'',"
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen /* \xe28099 = U+2019 is a smart quote, sometimes used as an apostrophe */
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen "\xE2\x80\x99 \xE2\x80\x99 \xE2\x80\x99\xE2\x80\x99 \xE2\x80\x99\xE2\x80\x99\xE2\x80\x99 \xE2\x80\x99quoted text\xE2\x80\x99\xE2\x80\x99word\xE2\x80\x99 \xE2\x80\x99hlo words\xE2\x80\x99 you\xE2\x80\x99re78901234567890123456789012 bad\xE2\x80\x99\xE2\x80\x99\xE2\x80\x99word\xE2\x80\x99\xE2\x80\x99\xE2\x80\x99pre post\xE2\x80\x99\xE2\x80\x99\xE2\x80\x99",
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen "you\xE2\x80\x99re\xE2\x80\x99xyz",
3320f4770d1f6c2cdd10f3c4ca5a324beb335339Timo Sirainen /* whitespace: with Unicode(utf8) U+FF01(ef bc 81)(U+2000(e2 80 80) and
3320f4770d1f6c2cdd10f3c4ca5a324beb335339Timo Sirainen U+205A(e2 81 9a) and U+205F(e2 81 9f) */
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen "hello\xEF\xBC\x81world\r\nAnd\xE2\x80\x80there\twas: text "
3320f4770d1f6c2cdd10f3c4ca5a324beb335339Timo Sirainen "galore\xE2\x81\x9F""and\xE2\x81\x9Amore.\n\n",
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen /* TR29 MinNumLet U+FF0E at end: u+FF0E is EF BC 8E */
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen "hello world\xEF\xBC\x8E",
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen /* TR29 WB5a */
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen "l\xE2\x80\x99homme l\xE2\x80\x99humanit\xC3\xA9 d\xE2\x80\x99immixtions qu\xE2\x80\x99il aujourd'hui que'euq"
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen test_assert(fts_tokenizer_find("email-address") == fts_tokenizer_email_address);
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen test_assert(fts_tokenizer_find("generic") == fts_tokenizer_generic);
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainenstatic unsigned int
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainentest_tokenizer_inputoutput(struct fts_tokenizer *tok, const char *_input,
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen const char *const *expected_output,
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen const unsigned char *input = (const unsigned char *)_input;
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen unsigned int i, outi, max, char_len, input_len = strlen(_input);
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen /* test all input at once */
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen while (fts_tokenizer_next(tok, input, input_len, &token, &error) > 0) {
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen test_assert_idx(strcmp(token, expected_output[outi]) == 0, outi);
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen while (fts_tokenizer_next(tok, NULL, 0, &token, &error) > 0) {
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen test_assert_idx(strcmp(token, expected_output[outi]) == 0, outi);
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen test_assert_idx(expected_output[outi] == NULL, outi);
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen /* test input one byte at a time */
b039dabf4c53f72454e795930e7643b6e0e625f9Timo Sirainen while (fts_tokenizer_next(tok, input+i, char_len, &token, &error) > 0) {
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen test_assert_idx(strcmp(token, expected_output[outi]) == 0, outi);
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen while (fts_tokenizer_final(tok, &token, &error) > 0) {
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen test_assert_idx(strcmp(token, expected_output[outi]) == 0, outi);
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen test_assert_idx(expected_output[outi] == NULL, outi);
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen /* test input in random chunks */
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen char_len += uni_utf8_char_bytes(input[i+char_len]);
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen while (fts_tokenizer_next(tok, input+i, char_len, &token, &error) > 0) {
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen test_assert_idx(strcmp(token, expected_output[outi]) == 0, outi);
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen while (fts_tokenizer_final(tok, &token, &error) > 0) {
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen test_assert_idx(strcmp(token, expected_output[outi]) == 0, outi);
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen test_assert_idx(expected_output[outi] == NULL, outi);
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainentest_tokenizer_inputs(struct fts_tokenizer *tok,
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen const char *const *expected_output)
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen unsigned int i, outi = 0;
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen for (i = 0; i < N_ELEMENTS(test_inputs); i++) {
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen outi = test_tokenizer_inputoutput(tok, test_inputs[i],
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen test_assert_idx(expected_output[outi] == NULL, outi);
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainenstatic void test_fts_tokenizer_generic_only(void)
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen static const char *const expected_output[] = {
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen "1234567890123456789012345678\xC3\xA4",
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen "12345678901234567890123456789",
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen "123456789012345678901234567890",
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen "more", "Hello", "world", "3", "14", "3", "14", "last", NULL,
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen "quoted", "text", "word", "hlo", "words", "you're", "bad",
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen "1234567890123456789012345678\xC3\xA4",
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen "123456789012345678901234567x'",
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen "1234567890123456789012345678x'",
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen "1234567890123456789012345678x",
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen "1234567890123456789012345678x",
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen "12345678901234567890123456789x",
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen "12345678901234567890123456789x",
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen "123456789012345678901234567890",
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen "123456789012345678901234567890",
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen "quoted", "text", "word", "hlo", "words", "you're789012345678901234567890", "bad",
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen "l'homme", "l'humanit\xC3\xA9", "d'immixtions", "qu'il", "aujourd'hui", "que'euq", NULL,
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen test_assert(fts_tokenizer_create(fts_tokenizer_generic, NULL, NULL, &tok, &error) == 0);
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen test_assert(((struct generic_fts_tokenizer *) tok)->algorithm == BOUNDARY_ALGORITHM_SIMPLE);
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainenconst char *const tr29_settings[] = {"algorithm", "tr29", NULL};
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen/* TODO: U+206F is in "Format" and therefore currently not word break.
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen This definitely needs to be remapped. */
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainenstatic void test_fts_tokenizer_generic_tr29_only(void)
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen static const char *const expected_output[] = {
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen "1234567890123456789012345678\xC3\xA4",
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen "12345678901234567890123456789",
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen "123456789012345678901234567890",
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen "more", "Hello", "world", "3", "14", "3,14", "last", NULL,
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen "quoted", "text", "word", "hlo", "words", "you're", "bad",
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen "1234567890123456789012345678\xC3\xA4",
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen "123456789012345678901234567x'",
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen "1234567890123456789012345678x'",
3320f4770d1f6c2cdd10f3c4ca5a324beb335339Timo Sirainen "1234567890123456789012345678x",
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen "1234567890123456789012345678x",
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen "12345678901234567890123456789x",
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen "12345678901234567890123456789x",
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen "123456789012345678901234567890",
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen "123456789012345678901234567890",
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen "quoted", "text", "word", "hlo", "words", "you're789012345678901234567890", "bad",
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen "l'homme", "l'humanit\xC3\xA9", "d'immixtions", "qu'il", "aujourd'hui", "que'euq", NULL,
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen test_assert(fts_tokenizer_create(fts_tokenizer_generic, NULL, tr29_settings, &tok, &error) == 0);
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainenconst char *const tr29_settings_wb5a[] = {"algorithm", "tr29", "wb5a", "yes", NULL};
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen/* TODO: U+206F is in "Format" and therefore currently not word break.
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen This definitely needs to be remapped. */
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainenstatic void test_fts_tokenizer_generic_tr29_wb5a(void)
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen static const char *const expected_output[] = {
df02611c44e9432e7961223bf9bfa3fb233b1789Timo Sirainen "1234567890123456789012345678\xC3\xA4",
df02611c44e9432e7961223bf9bfa3fb233b1789Timo Sirainen "12345678901234567890123456789",
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen "123456789012345678901234567890",
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen "more", "Hello", "world", "3", "14", "3,14", "last", NULL,
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen "quoted", "text", "word", "hlo", "words", "you're", "bad",
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen "1234567890123456789012345678\xC3\xA4",
19e8adccba16ff419f5675b1575358c2956dce83Timo Sirainen "123456789012345678901234567x'",
df02611c44e9432e7961223bf9bfa3fb233b1789Timo Sirainen "1234567890123456789012345678x'",
df02611c44e9432e7961223bf9bfa3fb233b1789Timo Sirainen "1234567890123456789012345678x",
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen "1234567890123456789012345678x",
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen "12345678901234567890123456789x",
df02611c44e9432e7961223bf9bfa3fb233b1789Timo Sirainen "12345678901234567890123456789x",
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen "123456789012345678901234567890",
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen "123456789012345678901234567890",
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen "quoted", "text", "word", "hlo", "words", "you're789012345678901234567890", "bad",
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen "l", "homme", "l", "humanit\xC3\xA9", "d", "immixtions", "qu", "il", "aujourd'hui", "que'euq", NULL,
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen test_begin("fts tokenizer generic TR29 with WB5a");
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen test_assert(fts_tokenizer_create(fts_tokenizer_generic, NULL, tr29_settings_wb5a, &tok, &error) == 0);
df02611c44e9432e7961223bf9bfa3fb233b1789Timo Sirainenstatic void test_fts_tokenizer_address_only(void)
df02611c44e9432e7961223bf9bfa3fb233b1789Timo Sirainen static const char input[] = TEST_INPUT_ADDRESS;
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen static const char *const expected_output[] = {
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen "foo.bar@host.example.org", "foo@domain", NULL
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen test_begin("fts tokenizer email address only");
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen test_assert(fts_tokenizer_create(fts_tokenizer_email_address, NULL, NULL, &tok, &error) == 0);
1f6c210c30992e95b806d2f517e2b3625ed941c5Timo Sirainen test_tokenizer_inputoutput(tok, input, expected_output, 0);
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainenstatic void test_fts_tokenizer_address_parent(const char *name, const char * const *settings)
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen static const char input[] = TEST_INPUT_ADDRESS;
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen static const char *const expected_output[] = {
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen "invalid", "invalid", "Abc", "Dfg", "abc", "dfg", "example", "com", "abc.dfg@example.com",
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen "Bar", "Baz", "bar", "example", "org", "bar@example.org",
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen "Foo", "Bar", "comment", "foo", "bar", "host", "example", "org", "foo.bar@host.example.org",
1f6c210c30992e95b806d2f517e2b3625ed941c5Timo Sirainen test_begin(t_strdup_printf("fts tokenizer email address + parent %s", name));
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen test_assert(fts_tokenizer_create(fts_tokenizer_generic, NULL, settings, &gen_tok, &error) == 0);
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen test_assert(fts_tokenizer_create(fts_tokenizer_email_address, gen_tok, NULL, &tok, &error) == 0);
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen test_tokenizer_inputoutput(tok, input, expected_output, 0);
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainenconst char *const simple_settings[] = {"algorithm", "simple", NULL};
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainenstatic void test_fts_tokenizer_address_parent_simple(void)
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen test_fts_tokenizer_address_parent("simple", simple_settings);
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainenstatic void test_fts_tokenizer_address_parent_tr29(void)
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen test_fts_tokenizer_address_parent("tr29", tr29_settings);
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainenstatic void test_fts_tokenizer_address_search(void)
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen static const char input[] = TEST_INPUT_ADDRESS;
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen static const char *const expected_output[] = {
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen "invalid", "invalid", "Abc", "Dfg", "abc.dfg@example.com",
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen "Foo", "Bar", "comment", "foo.bar@host.example.org",
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen static const char *const settings[] = { "search", "", NULL };
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen test_begin("fts tokenizer search email address + parent");
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen test_assert(fts_tokenizer_create(fts_tokenizer_generic, NULL, NULL, &gen_tok, &error) == 0);
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen test_assert(fts_tokenizer_create(fts_tokenizer_email_address, gen_tok, settings, &tok, &error) == 0);
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen test_tokenizer_inputoutput(tok, input, expected_output, 0);
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen /* make sure state is forgotten at EOF */
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen test_assert(fts_tokenizer_next(tok, (const void *)"foo", 3, &token, &error) == 0);
df02611c44e9432e7961223bf9bfa3fb233b1789Timo Sirainen test_assert(fts_tokenizer_final(tok, &token, &error) > 0 &&
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen test_assert(fts_tokenizer_final(tok, &token, &error) == 0);
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen test_assert(fts_tokenizer_next(tok, (const void *)"bar@baz", 7, &token, &error) == 0);
df02611c44e9432e7961223bf9bfa3fb233b1789Timo Sirainen test_assert(fts_tokenizer_final(tok, &token, &error) > 0 &&
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen test_assert(fts_tokenizer_final(tok, &token, &error) == 0);
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen test_assert(fts_tokenizer_next(tok, (const void *)"foo@", 4, &token, &error) == 0);
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen test_assert(fts_tokenizer_final(tok, &token, &error) > 0 &&
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen test_assert(fts_tokenizer_final(tok, &token, &error) == 0);
cb1fd563e6000153d1be76fd8722a096bd144b77Timo Sirainen /* test reset explicitly */
df02611c44e9432e7961223bf9bfa3fb233b1789Timo Sirainen test_assert(fts_tokenizer_next(tok, (const void *)"a", 1, &token, &error) == 0);
df02611c44e9432e7961223bf9bfa3fb233b1789Timo Sirainen test_assert(fts_tokenizer_next(tok, (const void *)"b@c", 3, &token, &error) == 0);
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen test_assert(fts_tokenizer_final(tok, &token, &error) > 0 &&
b58fbcc79c40f867eccae98548fcd25a16823433Timo Sirainen test_assert(fts_tokenizer_final(tok, &token, &error) == 0);
82b990b0bb2a1dad5c2634a508a5ad87715db402Timo Sirainen static void (*test_functions[])(void) = {