c8296ac1ed68ed5c5168de545b76f9b27fc76d35Timo Sirainen/*------------------------------------------------------------------------------
c8296ac1ed68ed5c5168de545b76f9b27fc76d35Timo Sirainen* Copyright (C) 2003-2006 Ben van Klinken and the CLucene Team
c8296ac1ed68ed5c5168de545b76f9b27fc76d35Timo Sirainen*
c8296ac1ed68ed5c5168de545b76f9b27fc76d35Timo Sirainen* Distributable under the terms of either the Apache License (Version 2.0) or
c8296ac1ed68ed5c5168de545b76f9b27fc76d35Timo Sirainen* the GNU Lesser General Public License, as specified in the COPYING file.
c8296ac1ed68ed5c5168de545b76f9b27fc76d35Timo Sirainen------------------------------------------------------------------------------*/
c8296ac1ed68ed5c5168de545b76f9b27fc76d35Timo Sirainen#ifndef _lucene_analysis_snowball_analyser_
c8296ac1ed68ed5c5168de545b76f9b27fc76d35Timo Sirainen#define _lucene_analysis_snowball_analyser_
c8296ac1ed68ed5c5168de545b76f9b27fc76d35Timo Sirainen
88b9f9eb91da632d3e941fe4276f8ace03205b25Timo Sirainenextern "C" {
88b9f9eb91da632d3e941fe4276f8ace03205b25Timo Sirainen#include "lib.h"
88b9f9eb91da632d3e941fe4276f8ace03205b25Timo Sirainen#include "unichar.h"
88b9f9eb91da632d3e941fe4276f8ace03205b25Timo Sirainen};
c8296ac1ed68ed5c5168de545b76f9b27fc76d35Timo Sirainen#include "CLucene/analysis/AnalysisHeader.h"
c8296ac1ed68ed5c5168de545b76f9b27fc76d35Timo Sirainen
c8296ac1ed68ed5c5168de545b76f9b27fc76d35Timo SirainenCL_CLASS_DEF(util,BufferedReader)
c8296ac1ed68ed5c5168de545b76f9b27fc76d35Timo SirainenCL_NS_DEF2(analysis,snowball)
c8296ac1ed68ed5c5168de545b76f9b27fc76d35Timo Sirainen
c8296ac1ed68ed5c5168de545b76f9b27fc76d35Timo Sirainen/** Filters {@link StandardTokenizer} with {@link StandardFilter}, {@link
c8296ac1ed68ed5c5168de545b76f9b27fc76d35Timo Sirainen * LowerCaseFilter}, {@link StopFilter} and {@link SnowballFilter}.
c8296ac1ed68ed5c5168de545b76f9b27fc76d35Timo Sirainen *
c8296ac1ed68ed5c5168de545b76f9b27fc76d35Timo Sirainen * Available stemmers are listed in {@link net.sf.snowball.ext}. The name of a
c8296ac1ed68ed5c5168de545b76f9b27fc76d35Timo Sirainen * stemmer is the part of the class name before "Stemmer", e.g., the stemmer in
c8296ac1ed68ed5c5168de545b76f9b27fc76d35Timo Sirainen * {@link EnglishStemmer} is named "English".
c8296ac1ed68ed5c5168de545b76f9b27fc76d35Timo Sirainen */
c8296ac1ed68ed5c5168de545b76f9b27fc76d35Timo Sirainenclass CLUCENE_CONTRIBS_EXPORT SnowballAnalyzer: public Analyzer {
c8296ac1ed68ed5c5168de545b76f9b27fc76d35Timo Sirainen char* language;
88b9f9eb91da632d3e941fe4276f8ace03205b25Timo Sirainen normalizer_func_t *normalizer;
c8296ac1ed68ed5c5168de545b76f9b27fc76d35Timo Sirainen CLTCSetList* stopSet;
920f6ee17b0c4aaa98e0043a575655eb84fc01a4Timo Sirainen TokenStream *prevstream;
c8296ac1ed68ed5c5168de545b76f9b27fc76d35Timo Sirainen
c8296ac1ed68ed5c5168de545b76f9b27fc76d35Timo Sirainenpublic:
c8296ac1ed68ed5c5168de545b76f9b27fc76d35Timo Sirainen /** Builds the named analyzer with no stop words. */
88b9f9eb91da632d3e941fe4276f8ace03205b25Timo Sirainen SnowballAnalyzer(normalizer_func_t *normalizer, const char* language="english");
c8296ac1ed68ed5c5168de545b76f9b27fc76d35Timo Sirainen
c8296ac1ed68ed5c5168de545b76f9b27fc76d35Timo Sirainen /** Builds the named analyzer with the given stop words.
c8296ac1ed68ed5c5168de545b76f9b27fc76d35Timo Sirainen */
c8296ac1ed68ed5c5168de545b76f9b27fc76d35Timo Sirainen SnowballAnalyzer(const char* language, const TCHAR** stopWords);
c8296ac1ed68ed5c5168de545b76f9b27fc76d35Timo Sirainen
c8296ac1ed68ed5c5168de545b76f9b27fc76d35Timo Sirainen ~SnowballAnalyzer();
c8296ac1ed68ed5c5168de545b76f9b27fc76d35Timo Sirainen
c8296ac1ed68ed5c5168de545b76f9b27fc76d35Timo Sirainen /** Constructs a {@link StandardTokenizer} filtered by a {@link
c8296ac1ed68ed5c5168de545b76f9b27fc76d35Timo Sirainen StandardFilter}, a {@link LowerCaseFilter} and a {@link StopFilter}. */
c8296ac1ed68ed5c5168de545b76f9b27fc76d35Timo Sirainen TokenStream* tokenStream(const TCHAR* fieldName, CL_NS(util)::Reader* reader);
c8296ac1ed68ed5c5168de545b76f9b27fc76d35Timo Sirainen TokenStream* tokenStream(const TCHAR* fieldName, CL_NS(util)::Reader* reader, bool deleteReader);
920f6ee17b0c4aaa98e0043a575655eb84fc01a4Timo Sirainen TokenStream* reusableTokenStream(const TCHAR* fieldName, CL_NS(util)::Reader* reader);
c8296ac1ed68ed5c5168de545b76f9b27fc76d35Timo Sirainen};
c8296ac1ed68ed5c5168de545b76f9b27fc76d35Timo Sirainen
c8296ac1ed68ed5c5168de545b76f9b27fc76d35Timo SirainenCL_NS_END2
c8296ac1ed68ed5c5168de545b76f9b27fc76d35Timo Sirainen#endif
c8296ac1ed68ed5c5168de545b76f9b27fc76d35Timo Sirainen