SnowballAnalyzer.h revision 88b9f9eb91da632d3e941fe4276f8ace03205b25
df74b8f6ff432d9a26727669dfed9a15d77efb55Aki Tuomi/*------------------------------------------------------------------------------
df74b8f6ff432d9a26727669dfed9a15d77efb55Aki Tuomi* Copyright (C) 2003-2006 Ben van Klinken and the CLucene Team
df74b8f6ff432d9a26727669dfed9a15d77efb55Aki Tuomi*
df74b8f6ff432d9a26727669dfed9a15d77efb55Aki Tuomi* Distributable under the terms of either the Apache License (Version 2.0) or
df74b8f6ff432d9a26727669dfed9a15d77efb55Aki Tuomi* the GNU Lesser General Public License, as specified in the COPYING file.
df74b8f6ff432d9a26727669dfed9a15d77efb55Aki Tuomi------------------------------------------------------------------------------*/
df74b8f6ff432d9a26727669dfed9a15d77efb55Aki Tuomi#ifndef _lucene_analysis_snowball_analyser_
df74b8f6ff432d9a26727669dfed9a15d77efb55Aki Tuomi#define _lucene_analysis_snowball_analyser_
df74b8f6ff432d9a26727669dfed9a15d77efb55Aki Tuomi
df74b8f6ff432d9a26727669dfed9a15d77efb55Aki Tuomiextern "C" {
df74b8f6ff432d9a26727669dfed9a15d77efb55Aki Tuomi#include "lib.h"
df74b8f6ff432d9a26727669dfed9a15d77efb55Aki Tuomi#include "unichar.h"
df74b8f6ff432d9a26727669dfed9a15d77efb55Aki Tuomi};
df74b8f6ff432d9a26727669dfed9a15d77efb55Aki Tuomi#include "CLucene/analysis/AnalysisHeader.h"
df74b8f6ff432d9a26727669dfed9a15d77efb55Aki Tuomi
df74b8f6ff432d9a26727669dfed9a15d77efb55Aki TuomiCL_CLASS_DEF(util,BufferedReader)
df74b8f6ff432d9a26727669dfed9a15d77efb55Aki TuomiCL_NS_DEF2(analysis,snowball)
df74b8f6ff432d9a26727669dfed9a15d77efb55Aki Tuomi
df74b8f6ff432d9a26727669dfed9a15d77efb55Aki Tuomi/** Filters {@link StandardTokenizer} with {@link StandardFilter}, {@link
df74b8f6ff432d9a26727669dfed9a15d77efb55Aki Tuomi * LowerCaseFilter}, {@link StopFilter} and {@link SnowballFilter}.
df74b8f6ff432d9a26727669dfed9a15d77efb55Aki Tuomi *
df74b8f6ff432d9a26727669dfed9a15d77efb55Aki Tuomi * Available stemmers are listed in {@link net.sf.snowball.ext}. The name of a
df74b8f6ff432d9a26727669dfed9a15d77efb55Aki Tuomi * stemmer is the part of the class name before "Stemmer", e.g., the stemmer in
df74b8f6ff432d9a26727669dfed9a15d77efb55Aki Tuomi * {@link EnglishStemmer} is named "English".
df74b8f6ff432d9a26727669dfed9a15d77efb55Aki Tuomi */
df74b8f6ff432d9a26727669dfed9a15d77efb55Aki Tuomiclass CLUCENE_CONTRIBS_EXPORT SnowballAnalyzer: public Analyzer {
df74b8f6ff432d9a26727669dfed9a15d77efb55Aki Tuomi char* language;
df74b8f6ff432d9a26727669dfed9a15d77efb55Aki Tuomi normalizer_func_t *normalizer;
df74b8f6ff432d9a26727669dfed9a15d77efb55Aki Tuomi CLTCSetList* stopSet;
df74b8f6ff432d9a26727669dfed9a15d77efb55Aki Tuomi TokenStream *prevstream;
df74b8f6ff432d9a26727669dfed9a15d77efb55Aki Tuomi
df74b8f6ff432d9a26727669dfed9a15d77efb55Aki Tuomipublic:
df74b8f6ff432d9a26727669dfed9a15d77efb55Aki Tuomi /** Builds the named analyzer with no stop words. */
df74b8f6ff432d9a26727669dfed9a15d77efb55Aki Tuomi SnowballAnalyzer(normalizer_func_t *normalizer, const char* language="english");
df74b8f6ff432d9a26727669dfed9a15d77efb55Aki Tuomi
df74b8f6ff432d9a26727669dfed9a15d77efb55Aki Tuomi /** Builds the named analyzer with the given stop words.
df74b8f6ff432d9a26727669dfed9a15d77efb55Aki Tuomi */
df74b8f6ff432d9a26727669dfed9a15d77efb55Aki Tuomi SnowballAnalyzer(const char* language, const TCHAR** stopWords);
df74b8f6ff432d9a26727669dfed9a15d77efb55Aki Tuomi
df74b8f6ff432d9a26727669dfed9a15d77efb55Aki Tuomi ~SnowballAnalyzer();
df74b8f6ff432d9a26727669dfed9a15d77efb55Aki Tuomi
df74b8f6ff432d9a26727669dfed9a15d77efb55Aki Tuomi /** Constructs a {@link StandardTokenizer} filtered by a {@link
df74b8f6ff432d9a26727669dfed9a15d77efb55Aki Tuomi StandardFilter}, a {@link LowerCaseFilter} and a {@link StopFilter}. */
df74b8f6ff432d9a26727669dfed9a15d77efb55Aki Tuomi TokenStream* tokenStream(const TCHAR* fieldName, CL_NS(util)::Reader* reader);
df74b8f6ff432d9a26727669dfed9a15d77efb55Aki Tuomi TokenStream* tokenStream(const TCHAR* fieldName, CL_NS(util)::Reader* reader, bool deleteReader);
df74b8f6ff432d9a26727669dfed9a15d77efb55Aki Tuomi TokenStream* reusableTokenStream(const TCHAR* fieldName, CL_NS(util)::Reader* reader);
df74b8f6ff432d9a26727669dfed9a15d77efb55Aki Tuomi};
df74b8f6ff432d9a26727669dfed9a15d77efb55Aki Tuomi
df74b8f6ff432d9a26727669dfed9a15d77efb55Aki TuomiCL_NS_END2
df74b8f6ff432d9a26727669dfed9a15d77efb55Aki Tuomi#endif
df74b8f6ff432d9a26727669dfed9a15d77efb55Aki Tuomi
df74b8f6ff432d9a26727669dfed9a15d77efb55Aki Tuomi