bcb4e51a409d94ae670de96afb8483a4f7855294Stephan Bosch/* Copyright (c) 2014-2018 Dovecot authors, see the included COPYING file */
a46abae27d4b00f77e5932ed8f7595c054fe0280Timo Sirainenstatic ARRAY(const struct fts_tokenizer *) fts_tokenizer_classes;
4ef1f9f3293965734e6e3c38c191ceb2246a721fTeemu Huovila if (!array_is_created(&fts_tokenizer_classes)) {
4ef1f9f3293965734e6e3c38c191ceb2246a721fTeemu Huovila fts_tokenizer_register(fts_tokenizer_generic);
4ef1f9f3293965734e6e3c38c191ceb2246a721fTeemu Huovila fts_tokenizer_register(fts_tokenizer_email_address);
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainenvoid fts_tokenizer_register(const struct fts_tokenizer *tok_class)
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen if (!array_is_created(&fts_tokenizer_classes))
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen i_array_init(&fts_tokenizer_classes, FTS_TOKENIZER_CLASSES_NR);
a46abae27d4b00f77e5932ed8f7595c054fe0280Timo Sirainen array_append(&fts_tokenizer_classes, &tok_class, 1);
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainenvoid fts_tokenizer_unregister(const struct fts_tokenizer *tok_class)
a46abae27d4b00f77e5932ed8f7595c054fe0280Timo Sirainen if (strcmp((*tp)->name, tok_class->name) == 0) {
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen idx = array_foreach_idx(&fts_tokenizer_classes, tp);
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainenconst struct fts_tokenizer *fts_tokenizer_find(const char *name)
4ef1f9f3293965734e6e3c38c191ceb2246a721fTeemu Huovilaconst char *fts_tokenizer_name(const struct fts_tokenizer *tok)
12952c18d10fa83be65059471139c2fdc8a00c3dTimo Sirainenstatic void fts_tokenizer_self_reset(struct fts_tokenizer *tok)
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainenint fts_tokenizer_create(const struct fts_tokenizer *tok_class,
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen const char *const *settings,
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen const char **error_r)
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen i_assert(settings == NULL || str_array_length(settings) % 2 == 0);
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen if (tok_class->v->create(settings, &tok, error_r) < 0) {
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen tok->parent_input = buffer_create_dynamic(default_pool, 128);
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainenvoid fts_tokenizer_ref(struct fts_tokenizer *tok)
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainenvoid fts_tokenizer_unref(struct fts_tokenizer **_tok)
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainenfts_tokenizer_next_self(struct fts_tokenizer *tok,
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen (data == tok->prev_data && size == tok->prev_size));
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen /* whole new data */
8b1a9a4d63b0abccdf7cb1acb8359d5396dd657bTimo Sirainen ret = tok->v->next(tok, data, size, &skip, token_r, error_r);
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen /* continuing previous data */
3dc5a231160859c9627157dc53a94d5e4494fe9fTeemu Huovila ret = tok->v->next(tok, data + tok->prev_skip,
3dc5a231160859c9627157dc53a94d5e4494fe9fTeemu Huovila } else if (ret == 0) {
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen /* we need a new data block */
2730605833442b5ddcb261f90b8375fc98201e35Timo Sirainenvoid fts_tokenizer_reset(struct fts_tokenizer *tok)
2bb1ef0b669901fb91ff961e7fb074439ef769abTimo Sirainenint fts_tokenizer_next(struct fts_tokenizer *tok,
8b1a9a4d63b0abccdf7cb1acb8359d5396dd657bTimo Sirainen ret = fts_tokenizer_next_self(tok, data, size, token_r, error_r);
3dc5a231160859c9627157dc53a94d5e4494fe9fTeemu Huovila if (ret <= 0 || tok->parent == NULL || tok->skip_parents)
3dc5a231160859c9627157dc53a94d5e4494fe9fTeemu Huovila buffer_append(tok->parent_input, *token_r, strlen(*token_r));
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen /* fall through */
3dc5a231160859c9627157dc53a94d5e4494fe9fTeemu Huovila ret = fts_tokenizer_next(tok->parent, tok->parent_input->data,
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen /* fall through */
8b1a9a4d63b0abccdf7cb1acb8359d5396dd657bTimo Sirainen ret = fts_tokenizer_next(tok->parent, NULL, 0, token_r, error_r);
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen /* we're finished sending this token to parent tokenizer.
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen see if our own tokenizer has more tokens available */
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen tok->parent_state = FTS_TOKENIZER_PARENT_STATE_ADD_DATA;
8b1a9a4d63b0abccdf7cb1acb8359d5396dd657bTimo Sirainen return fts_tokenizer_next(tok, data, size, token_r, error_r);
9bbc62421e4000b04563eb5f93272fc51b893fb2Timo Sirainen /* we must not be returning empty tokens */
8b1a9a4d63b0abccdf7cb1acb8359d5396dd657bTimo Sirainenint fts_tokenizer_final(struct fts_tokenizer *tok, const char **token_r,
8b1a9a4d63b0abccdf7cb1acb8359d5396dd657bTimo Sirainen const char **error_r)