c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen#ifndef FTS_FILTER_H
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen#define FTS_FILTER_H
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainenstruct fts_language;
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainenstruct fts_filter;
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen/*
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen Settings are given in the form of a const char * const *settings =
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen {"key, "value", "key2", "value2", NULL} array of string pairs.
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen The array has to be NULL terminated.
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen*/
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen/*
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen Settings: "stopwords_dir", path to the directory containing stopword files.
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen Stopword files are looked up in "<path>"/stopwords_<lang>.txt
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen */
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainenextern const struct fts_filter *fts_filter_stopwords;
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen/*
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen Settings: "lang", language of the stemmed language.
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen */
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainenextern const struct fts_filter *fts_filter_stemmer_snowball;
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen/*
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen Settings: "id", description of the normalizing/translitterating rules
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen to use. See
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen http://userguide.icu-project.org/transforms/general#TOC-Transliterator-Identifiers
ea591a86852fb868a7cb9b6e2c80e3446071df8fTimo Sirainen for syntax. Defaults to "Any-Lower; NFKD; [: Nonspacing Mark :] Remove; NFC"
ea591a86852fb868a7cb9b6e2c80e3446071df8fTimo Sirainen
ea591a86852fb868a7cb9b6e2c80e3446071df8fTimo Sirainen "maxlen", maximum length of tokens that ICU normalizer will output.
ea591a86852fb868a7cb9b6e2c80e3446071df8fTimo Sirainen Defaults to 250.
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen */
63713f16bad8b55e74c479adb6b47965b519c29bTimo Sirainenextern const struct fts_filter *fts_filter_normalizer_icu;
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen
19557f192d37cd54a1a090a8a26d9d47265e4413Aki Tuomi/* Lowercases the input. Supports UTF8, if libicu is available. */
acfcf88e4dd529e4b2409f43bc9713cbc0169347Timo Sirainenextern const struct fts_filter *fts_filter_lowercase;
acfcf88e4dd529e4b2409f43bc9713cbc0169347Timo Sirainen
471167b9701fcc99b66f7a8bcae07bc4ac0dbbd4Timo Sirainen/* Removes <'s> suffix from words. */
471167b9701fcc99b66f7a8bcae07bc4ac0dbbd4Timo Sirainenextern const struct fts_filter *fts_filter_english_possessive;
471167b9701fcc99b66f7a8bcae07bc4ac0dbbd4Timo Sirainen
440b625484f3cc9d3ec0a7ba36fe3583aa90172dTeemu Huovila/* Removes prefixing contractions from words. */
440b625484f3cc9d3ec0a7ba36fe3583aa90172dTeemu Huovilaextern const struct fts_filter *fts_filter_contractions;
440b625484f3cc9d3ec0a7ba36fe3583aa90172dTeemu Huovila
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen/* Register all built-in filters. */
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainenvoid fts_filters_init(void);
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainenvoid fts_filters_deinit(void);
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen/* Register a new class explicitly. Built-in classes are automatically
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen registered. */
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainenvoid fts_filter_register(const struct fts_filter *filter_class);
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen/*
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen Filtering workflow, find --> create --> filter --> destroy.
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen */
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainenconst struct fts_filter *fts_filter_find(const char *name);
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainenint fts_filter_create(const struct fts_filter *filter_class,
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen struct fts_filter *parent,
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen const struct fts_language *lang,
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen const char *const *settings,
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen struct fts_filter **filter_r,
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen const char **error_r);
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainenvoid fts_filter_ref(struct fts_filter *filter);
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainenvoid fts_filter_unref(struct fts_filter **filter);
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen
db090e2a48bcc5ce162af0c36eef04407421379dTeemu Huovila/* Returns 1 if token is returned in *token, 0 if token was filtered
6111370718708a3a966c60a1941ea303a4e0c759Timo Sirainen out (*token is also set to NULL) and -1 on error.
db090e2a48bcc5ce162af0c36eef04407421379dTeemu Huovila Input is also given via *token.
db090e2a48bcc5ce162af0c36eef04407421379dTeemu Huovila*/
2bb1ef0b669901fb91ff961e7fb074439ef769abTimo Sirainenint fts_filter_filter(struct fts_filter *filter, const char **token,
2bb1ef0b669901fb91ff961e7fb074439ef769abTimo Sirainen const char **error_r);
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen
c865b0e9c65fd77f7b2ab6f8616d3def5501ecb3Timo Sirainen#endif