From c116120eef8912a5cc12f97edc80ae6947ab68ce Mon Sep 17 00:00:00 2001 From: Matt Magoffin Date: Wed, 22 Apr 2015 14:03:17 +1200 Subject: [PATCH] Add stemmingDisabled property to CLuceneSearchService, which toggles new stemmingDisabled property in BRSnowballAnalyzer to enable toggling support for stemming on/off. --- BRFullTextSearch/BRSnowballAnalyzer.cpp | 47 +++++++++++++++--------- BRFullTextSearch/BRSnowballAnalyzer.h | 6 ++- BRFullTextSearch/CLuceneSearchService.h | 7 ++++ BRFullTextSearch/CLuceneSearchService.mm | 17 +++++++++ 4 files changed, 58 insertions(+), 19 deletions(-) diff --git a/BRFullTextSearch/BRSnowballAnalyzer.cpp b/BRFullTextSearch/BRSnowballAnalyzer.cpp index 9e8531b..d10f70a 100644 --- a/BRFullTextSearch/BRSnowballAnalyzer.cpp +++ b/BRFullTextSearch/BRSnowballAnalyzer.cpp @@ -47,6 +47,15 @@ BRSnowballAnalyzer::BRSnowballAnalyzer(const TCHAR* language) { stopSet = NULL; } +/** Builds the named analyzer with the given stop words. */ +BRSnowballAnalyzer::BRSnowballAnalyzer(const TCHAR* language, const TCHAR** stopWords, bool prefixModeEnabled) { + this->language = STRDUP_TtoT(language); + + stopSet = _CLNEW CLTCSetList(true); + StopFilter::fillStopTable(stopSet,stopWords); + prefixMode = prefixModeEnabled; +} + BRSnowballAnalyzer::~BRSnowballAnalyzer(){ SavedStreams* t = reinterpret_cast(this->getPreviousTokenStream()); if (t) _CLDELETE(t->filteredTokenStream); @@ -55,16 +64,6 @@ BRSnowballAnalyzer::~BRSnowballAnalyzer(){ _CLDELETE(stopSet); } -/** Builds the named analyzer with the given stop words. - */ -BRSnowballAnalyzer::BRSnowballAnalyzer(const TCHAR* language, const TCHAR** stopWords, bool prefixModeEnabled) { - this->language = STRDUP_TtoT(language); - - stopSet = _CLNEW CLTCSetList(true); - StopFilter::fillStopTable(stopSet,stopWords); - prefixMode = prefixModeEnabled; -} - TokenStream* BRSnowballAnalyzer::tokenStream(const TCHAR* fieldName, CL_NS(util)::Reader* reader) { return this->tokenStream(fieldName,reader,false); } @@ -85,10 +84,12 @@ TokenStream* BRSnowballAnalyzer::tokenStream(const TCHAR* fieldName, CL_NS(util) if (stopSet != NULL) { result = _CLNEW CL_NS(analysis)::StopFilter(result, true, stopSet); } - if ( prefixMode ) { - result = _CLNEW bluerocket::lucene::analysis::SnowballPrefixFilter(result, true, language); - } else { - result = _CLNEW SnowballFilter(result, language, true); + if ( !stemmingDisabled ) { + if ( prefixMode ) { + result = _CLNEW bluerocket::lucene::analysis::SnowballPrefixFilter(result, true, language); + } else { + result = _CLNEW SnowballFilter(result, language, true); + } } return result; } @@ -108,10 +109,12 @@ TokenStream* BRSnowballAnalyzer::reusableTokenStream(const TCHAR* fieldName, Rea streams->filteredTokenStream = _CLNEW StandardFilter(streams->tokenStream, true); streams->filteredTokenStream = _CLNEW LowerCaseFilter(streams->filteredTokenStream, true); streams->filteredTokenStream = _CLNEW StopFilter(streams->filteredTokenStream, true, stopSet); - if ( prefixMode ) { - streams->filteredTokenStream = _CLNEW bluerocket::lucene::analysis::SnowballPrefixFilter(streams->filteredTokenStream, true, language); - } else { - streams->filteredTokenStream = _CLNEW SnowballFilter(streams->filteredTokenStream, language, true); + if ( !stemmingDisabled ) { + if ( prefixMode ) { + streams->filteredTokenStream = _CLNEW bluerocket::lucene::analysis::SnowballPrefixFilter(streams->filteredTokenStream, true, language); + } else { + streams->filteredTokenStream = _CLNEW SnowballFilter(streams->filteredTokenStream, language, true); + } } } else { streams->tokenStream->reset(reader); @@ -128,4 +131,12 @@ void BRSnowballAnalyzer::setPrefixMode(bool mode) { prefixMode = mode; } +bool BRSnowballAnalyzer::getStemmingDisabled() { + return stemmingDisabled; +} + +void BRSnowballAnalyzer::setStemmingDisabled(bool disabled) { + stemmingDisabled = disabled; +} + CL_NS_END2 diff --git a/BRFullTextSearch/BRSnowballAnalyzer.h b/BRFullTextSearch/BRSnowballAnalyzer.h index d6cec0b..3938085 100644 --- a/BRFullTextSearch/BRSnowballAnalyzer.h +++ b/BRFullTextSearch/BRSnowballAnalyzer.h @@ -14,7 +14,8 @@ CL_CLASS_DEF(util,BufferedReader) CL_NS_DEF2(analysis,snowball) -/** Filters {@link StandardTokenizer} with {@link StandardFilter}, {@link +/** + * Filters {@link StandardTokenizer} with {@link StandardFilter}, {@link * LowerCaseFilter}, {@link StopFilter} and {@link SnowballFilter}. * * Available stemmers are listed in {@link net.sf.snowball.ext}. The name of a @@ -25,6 +26,7 @@ class CLUCENE_CONTRIBS_EXPORT BRSnowballAnalyzer : public Analyzer { TCHAR* language; CLTCSetList* stopSet; bool prefixMode; + bool stemmingDisabled = false; class SavedStreams; @@ -41,6 +43,8 @@ class CLUCENE_CONTRIBS_EXPORT BRSnowballAnalyzer : public Analyzer { bool getPrefixMode(); void setPrefixMode(bool mode); + bool getStemmingDisabled(); + void setStemmingDisabled(bool disabled); /** Constructs a {@link StandardTokenizer} filtered by a {@link StandardFilter}, a {@link LowerCaseFilter} and a {@link StopFilter}. */ diff --git a/BRFullTextSearch/CLuceneSearchService.h b/BRFullTextSearch/CLuceneSearchService.h index e107ecb..272ab77 100644 --- a/BRFullTextSearch/CLuceneSearchService.h +++ b/BRFullTextSearch/CLuceneSearchService.h @@ -60,6 +60,13 @@ */ @property (nonatomic, strong) NSString *defaultAnalyzerLanguage; +/** + * Turn stemming for tokenized fields on/off. Defaults to @c NO. + * + * @since 1.0.6 + */ +@property (nonatomic, getter=isStemmingDisabled) BOOL stemmingDisabled; + /** * Turn support for prefix-based searches on tokenized and stemmed fields. Defaults to @c NO. * diff --git a/BRFullTextSearch/CLuceneSearchService.mm b/BRFullTextSearch/CLuceneSearchService.mm index e01bdb7..203aa8b 100644 --- a/BRFullTextSearch/CLuceneSearchService.mm +++ b/BRFullTextSearch/CLuceneSearchService.mm @@ -213,6 +213,23 @@ - (void)setSupportStemmedPrefixSearches:(BOOL)supportStemmedPrefixSearches { } } +- (BOOL)isStemmingDisabled { + Analyzer *analyzer = [self defaultAnalyzer]; + lucene::analysis::snowball::BRSnowballAnalyzer *snowball = dynamic_cast(analyzer); + if ( snowball != NULL ) { + return (snowball->getStemmingDisabled() ? YES : NO); + } + return NO; +} + +- (void)setStemmingDisabled:(BOOL)stemmingDisabled { + Analyzer *analyzer = [self defaultAnalyzer]; + lucene::analysis::snowball::BRSnowballAnalyzer *snowball = dynamic_cast(analyzer); + if ( snowball != NULL ) { + snowball->setStemmingDisabled(stemmingDisabled ? true : false); + } +} + - (void)resetSearcher { if ( searcher.get() != NULL ) { searcher->close();