fts-language.c revision 82eadbc4311faf7719d5db33fddaa06cb3a7010b
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen/* Copyright (c) 2014-2015 Dovecot authors, see the included COPYING file */
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen#include "lib.h"
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen#include "array.h"
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen#include "fts-language.h"
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen#include "strfuncs.h"
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen#include "llist.h"
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen#ifdef HAVE_LIBEXTTEXTCAT_TEXTCAT_H
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen# include <libexttextcat/textcat.h>
82eadbc4311faf7719d5db33fddaa06cb3a7010bTimo Sirainen#elif defined (HAVE_FTS_EXTTEXTCAT)
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen# include <textcat.h>
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen#endif
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen#ifndef TEXTCAT_RESULT_UNKNOWN /* old textcat.h has typos */
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen# ifdef TEXTCAT_RESULT_UNKOWN
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen# define TEXTCAT_RESULT_UNKNOWN TEXTCAT_RESULT_UNKOWN
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen# endif
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen#endif
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen#define DETECT_STR_MAX_LEN 200
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainenstruct fts_language_list {
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen pool_t pool;
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen ARRAY_TYPE(fts_language) languages;
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen const char *textcat_config;
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen const char *textcat_datadir;
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen void *textcat_handle;
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen bool textcat_failed;
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen};
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainenconst struct fts_language fts_languages[] = {
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen { "en" },
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen { "fi" },
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen { "fr" },
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen { "de" }
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen};
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainenconst struct fts_language fts_language_data = {
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen "data"
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen};
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainenconst struct fts_language *fts_language_find(const char *name)
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen{
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen unsigned int i;
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen for (i = 0; i < N_ELEMENTS(fts_languages); i++) {
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen if (strcmp(fts_languages[i].name, name) == 0)
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen return &fts_languages[i];
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen }
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen return NULL;
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen}
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainenstruct fts_language_list *
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainenfts_language_list_init(const char *const *settings)
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen{
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen struct fts_language_list *lp;
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen pool_t pool;
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen unsigned int i;
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen const char *conf = NULL;
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen const char *data = NULL;
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen for (i = 0; settings[i] != NULL; i += 2) {
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen const char *key = settings[i], *value = settings[i+1];
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen if (strcmp(key, "fts_language_config") == 0) {
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen conf = value;
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen }
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen else if (strcmp(key, "fts_language_data") == 0) {
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen data = value;
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen } else {
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen i_debug("Unknown setting: %s", key);
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen return NULL;
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen }
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen }
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen pool = pool_alloconly_create("fts_language_list", 128);
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen lp = p_new(pool, struct fts_language_list, 1);
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen lp->pool = pool;
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen if (conf != NULL)
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen lp->textcat_config = p_strdup(pool, conf);
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen else
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen lp->textcat_config = NULL;
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen if (data != NULL)
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen lp->textcat_datadir = p_strdup(pool, data);
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen else
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen lp->textcat_datadir = NULL;
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen p_array_init(&lp->languages, pool, 32);
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen return lp;
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen}
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainenvoid fts_language_list_deinit(struct fts_language_list **list)
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen{
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen struct fts_language_list *lp = *list;
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen *list = NULL;
82eadbc4311faf7719d5db33fddaa06cb3a7010bTimo Sirainen#ifdef HAVE_FTS_EXTTEXTCAT
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen if (lp->textcat_handle != NULL)
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen textcat_Done(lp->textcat_handle);
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen#endif
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen pool_unref(&lp->pool);
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen}
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainenstatic const struct fts_language *
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainenfts_language_list_find(struct fts_language_list *list, const char *name)
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen{
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen const struct fts_language *const *langp;
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen array_foreach(&list->languages, langp) {
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen if (strcmp((*langp)->name, name) == 0)
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen return *langp;
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen }
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen return NULL;
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen}
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainenvoid fts_language_list_add(struct fts_language_list *list,
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen const struct fts_language *lang)
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen{
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen i_assert(fts_language_list_find(list, lang->name) == NULL);
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen array_append(&list->languages, &lang, 1);
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen}
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainenbool fts_language_list_add_names(struct fts_language_list *list,
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen const char *names,
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen const char **unknown_name_r)
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen{
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen const char *const *langs;
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen const struct fts_language *lang;
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen for (langs = t_strsplit_spaces(names, ", "); *langs != NULL; langs++) {
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen lang = fts_language_find(*langs);
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen if (lang == NULL) {
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen /* unknown language */
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen *unknown_name_r = *langs;
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen return FALSE;
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen }
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen if (fts_language_list_find(list, lang->name) == NULL)
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen fts_language_list_add(list, lang);
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen }
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen return TRUE;
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen}
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainenconst ARRAY_TYPE(fts_language) *
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainenfts_language_list_get_all(struct fts_language_list *list)
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen{
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen return &list->languages;
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen}
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainenconst struct fts_language *
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainenfts_language_list_get_first(struct fts_language_list *list)
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen{
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen const struct fts_language *const *langp;
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen langp = array_idx(&list->languages, 0);
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen return *langp;
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen}
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen
82eadbc4311faf7719d5db33fddaa06cb3a7010bTimo Sirainen#ifdef HAVE_FTS_EXTTEXTCAT
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainenstatic bool fts_language_match_lists(struct fts_language_list *list,
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen candidate_t *candp, int candp_len,
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen const struct fts_language **lang_r)
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen{
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen const char *name;
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen for (int i = 0; i < candp_len; i++) {
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen /* name is <lang>-<optional country or characterset>-<encoding>
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen eg, fi--utf8 or pt-PT-utf8 */
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen name = t_strcut(candp[i].name, '-');
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen if ((*lang_r = fts_language_list_find(list, name)) != NULL)
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen return TRUE;
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen }
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen return FALSE;
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen}
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen#endif
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen
82eadbc4311faf7719d5db33fddaa06cb3a7010bTimo Sirainen#ifdef HAVE_FTS_EXTTEXTCAT
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainenstatic int fts_language_textcat_init(struct fts_language_list *list)
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen{
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen const char *config_path;
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen const char *data_dir;
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen if (list->textcat_handle != NULL)
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen return 0;
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen if (list->textcat_failed)
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen return -1;
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen config_path = list->textcat_config != NULL ? list->textcat_config :
a6f1ded5a5df8ba467c4026e9cd9c15e6880997bTimo Sirainen TEXTCAT_DATADIR"/fpdb.conf";
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen data_dir = list->textcat_datadir != NULL ? list->textcat_datadir :
a6f1ded5a5df8ba467c4026e9cd9c15e6880997bTimo Sirainen TEXTCAT_DATADIR"/";
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen list->textcat_handle = special_textcat_Init(config_path, data_dir);
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen if (list->textcat_handle == NULL) {
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen i_error("special_textcat_Init(%s, %s) failed",
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen config_path, data_dir);
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen list->textcat_failed = TRUE;
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen return -1;
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen }
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen /* The textcat minimum document size could be set here. It
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen currently defaults to 3. UTF8 is enabled by default. */
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen return 0;
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen}
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen#endif
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainenstatic enum fts_language_result
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainenfts_language_detect_textcat(struct fts_language_list *list ATTR_UNUSED,
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen const unsigned char *text ATTR_UNUSED,
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen size_t size ATTR_UNUSED,
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen const struct fts_language **lang_r ATTR_UNUSED)
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen{
82eadbc4311faf7719d5db33fddaa06cb3a7010bTimo Sirainen#ifdef HAVE_FTS_EXTTEXTCAT
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen candidate_t *candp; /* textcat candidate result array pointer */
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen int cnt;
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen bool match = FALSE;
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen if (fts_language_textcat_init(list) < 0)
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen return FTS_LANGUAGE_RESULT_ERROR;
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen candp = textcat_GetClassifyFullOutput(list->textcat_handle);
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen if (candp == NULL)
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen i_fatal_status(FATAL_OUTOFMEM, "textcat_GetCLassifyFullOutput failed: malloc() returned NULL");
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen cnt = textcat_ClassifyFull(list->textcat_handle, (const void *)text,
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen I_MIN(size, DETECT_STR_MAX_LEN), candp);
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen if (cnt > 0) {
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen T_BEGIN {
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen match = fts_language_match_lists(list, candp, cnt, lang_r);
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen } T_END;
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen textcat_ReleaseClassifyFullOutput(list->textcat_handle, candp);
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen if (match)
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen return FTS_LANGUAGE_RESULT_OK;
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen else
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen return FTS_LANGUAGE_RESULT_UNKNOWN;
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen } else {
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen textcat_ReleaseClassifyFullOutput(list->textcat_handle, candp);
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen switch (cnt) {
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen case TEXTCAT_RESULT_SHORT:
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen i_assert(size < DETECT_STR_MAX_LEN);
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen return FTS_LANGUAGE_RESULT_SHORT;
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen case TEXTCAT_RESULT_UNKNOWN:
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen return FTS_LANGUAGE_RESULT_UNKNOWN;
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen default:
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen i_unreached();
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen }
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen }
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen#else
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen return FTS_LANGUAGE_RESULT_UNKNOWN;
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen#endif
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen}
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainenenum fts_language_result
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainenfts_language_detect(struct fts_language_list *list,
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen const unsigned char *text ATTR_UNUSED,
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen size_t size ATTR_UNUSED,
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen const struct fts_language **lang_r)
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen{
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen i_assert(array_count(&list->languages) > 0);
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen /* if there's only a single wanted language, return it always. */
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen if (array_count(&list->languages) == 1) {
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen const struct fts_language *const *langp =
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen array_idx(&list->languages, 0);
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen *lang_r = *langp;
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen return FTS_LANGUAGE_RESULT_OK;
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen }
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen return fts_language_detect_textcat(list, text, size, lang_r);
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen}