c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen#ifndef FTS_LANGUAGE_H
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen#define FTS_LANGUAGE_H
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen
a9b3887f4d9ed75a76fed964c1930432bf84f4f5Timo Sirainenstruct fts_language_list;
a9b3887f4d9ed75a76fed964c1930432bf84f4f5Timo Sirainen
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainenenum fts_language_result {
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen /* Provided sample is too short. */
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen FTS_LANGUAGE_RESULT_SHORT,
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen /* Language is unknown or not in the provided list . */
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen FTS_LANGUAGE_RESULT_UNKNOWN,
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen FTS_LANGUAGE_RESULT_OK,
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen /* textcat library initialization failed. */
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen FTS_LANGUAGE_RESULT_ERROR
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen};
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainenstruct fts_language {
927f6e67f9478f9b298fb85b18de990d9b80c337Timo Sirainen /* Two-letter language name lowercased, e.g. "en" */
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen const char *name;
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen};
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo SirainenARRAY_DEFINE_TYPE(fts_language, const struct fts_language *);
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen/* Used for raw data that is indexed. This data shouldn't go through any
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen language-specific filters. */
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainenextern const struct fts_language fts_language_data;
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen
4b26f71b46fc718be27933dfaa26905b993d87faTeemu Huovila/*
4b26f71b46fc718be27933dfaa26905b993d87faTeemu Huovila Language module API.
4b26f71b46fc718be27933dfaa26905b993d87faTeemu Huovila*/
4b26f71b46fc718be27933dfaa26905b993d87faTeemu Huovilavoid fts_languages_init(void);
4b26f71b46fc718be27933dfaa26905b993d87faTeemu Huovilavoid fts_languages_deinit(void);
4b26f71b46fc718be27933dfaa26905b993d87faTeemu Huovila/* Add a language to the list of supported languages. */
4b26f71b46fc718be27933dfaa26905b993d87faTeemu Huovilavoid fts_language_register(const char *name);
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen/* Find a specified language by name. This finds from the internal list of
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen supported languages. */
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainenconst struct fts_language *fts_language_find(const char *name);
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen
4b26f71b46fc718be27933dfaa26905b993d87faTeemu Huovila/*
4b26f71b46fc718be27933dfaa26905b993d87faTeemu Huovila Language list API
4b26f71b46fc718be27933dfaa26905b993d87faTeemu Huovila*/
a9b3887f4d9ed75a76fed964c1930432bf84f4f5Timo Sirainenint fts_language_list_init(const char *const *settings,
a9b3887f4d9ed75a76fed964c1930432bf84f4f5Timo Sirainen struct fts_language_list **list_r,
a9b3887f4d9ed75a76fed964c1930432bf84f4f5Timo Sirainen const char **error_r);
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainenvoid fts_language_list_deinit(struct fts_language_list **list);
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen/* Add a language to the list of wanted languages. */
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainenvoid fts_language_list_add(struct fts_language_list *list,
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen const struct fts_language *lang);
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen/* Add wanted languages from a space-separated list of language names.
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen Duplicates are ignored. Returns TRUE if ok, FALSE and unknown_name if an
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen unknown language was found from the list. */
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainenbool fts_language_list_add_names(struct fts_language_list *list,
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen const char *names,
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen const char **unknown_name_r);
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen/* Return an array of all wanted languages. */
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainenconst ARRAY_TYPE(fts_language) *
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainenfts_language_list_get_all(struct fts_language_list *list);
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen/* Returns the first wanted language (default language). */
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainenconst struct fts_language *
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainenfts_language_list_get_first(struct fts_language_list *list);
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen/* If text was detected to be one of the languages in the list,
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen returns FTS_LANGUAGE_RESULT_OK and (a pointer to) the language (in
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen the list). */
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainenenum fts_language_result
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainenfts_language_detect(struct fts_language_list *list,
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen const unsigned char *text, size_t size,
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen const struct fts_language **lang_r);
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen#endif