Skip to content

Commit

Permalink
[Fix](analyzer) add ownership flag to Field's TokenStream value and A…
Browse files Browse the repository at this point in the history
…nalyzer's Reader
  • Loading branch information
airborne12 committed Jun 11, 2024
1 parent 1936207 commit 5422ac1
Show file tree
Hide file tree
Showing 5 changed files with 27 additions and 9 deletions.
16 changes: 13 additions & 3 deletions src/core/CLucene/analysis/AnalysisHeader.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
#include "CLucene/index/Payload.h"
#include "CLucene/util/VoidList.h"
#include "CLucene/LuceneThreads.h"
#include "CLucene/util/CLStreams.h"

#include <unordered_set>

Expand Down Expand Up @@ -304,6 +305,10 @@ class CLUCENE_EXPORT Analyzer{
_stopwords = stopwords;
}

virtual void set_ownReader(bool ownReader) {
_ownReader = ownReader;
}

private:

DEFINE_MUTEX(THIS_LOCK)
Expand All @@ -322,6 +327,7 @@ class CLUCENE_EXPORT Analyzer{
virtual void setPreviousTokenStream(TokenStream* obj);

bool _lowercase = false;
bool _ownReader = false;
std::unordered_set<std::string_view>* _stopwords = nullptr;

public:
Expand Down Expand Up @@ -359,19 +365,23 @@ class CLUCENE_EXPORT Tokenizer:public TokenStream {
/** The text source for this Tokenizer. */
CL_NS(util)::Reader* input;
bool lowercase = false;
bool ownReader = false;
std::unordered_set<std::string_view>* stopwords = nullptr;

public:
/** Construct a tokenizer with null input. */
Tokenizer():input(nullptr){}
/** Construct a token stream processing the given input. */
explicit Tokenizer(CL_NS(util)::Reader* _input):input(_input){}
explicit Tokenizer(CL_NS(util)::Reader* _input, bool _ownReader = false):input(_input), ownReader(_ownReader){}

/** By default, closes the input Reader. */
virtual void close() {
if (input != NULL) {
// ? delete input;
input = NULL;
if (ownReader) {
_CLDELETE(input);
} else {
input = NULL;
}
}
};

Expand Down
4 changes: 3 additions & 1 deletion src/core/CLucene/analysis/Analyzers.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -51,11 +51,13 @@ template class LowerCaseTokenizer<TCHAR>;
template<typename T>
SimpleTokenizer<T>::SimpleTokenizer(CL_NS(util)::Reader *in) : LowerCaseTokenizer<T>(in) {
Tokenizer::lowercase = true;
Tokenizer::ownReader = false;
}

template<typename T>
SimpleTokenizer<T>::SimpleTokenizer(CL_NS(util)::Reader *in, bool lowercase) : LowerCaseTokenizer<T>(in) {
SimpleTokenizer<T>::SimpleTokenizer(CL_NS(util)::Reader *in, bool lowercase, bool ownReader) : LowerCaseTokenizer<T>(in) {
Tokenizer::lowercase = lowercase;
Tokenizer::ownReader = ownReader;
}

template<typename T>
Expand Down
5 changes: 3 additions & 2 deletions src/core/CLucene/analysis/Analyzers.h
Original file line number Diff line number Diff line change
Expand Up @@ -140,7 +140,7 @@ class CLUCENE_EXPORT SimpleTokenizer:public LowerCaseTokenizer<T> {
public:
/** Construct a new SimpleTokenizer. */
explicit SimpleTokenizer(CL_NS(util)::Reader* in);
SimpleTokenizer(CL_NS(util)::Reader* in, bool lowercase);
SimpleTokenizer(CL_NS(util)::Reader* in, bool lowercase, bool ownReader = false);
virtual ~SimpleTokenizer();

Token* next(Token* token) override {
Expand Down Expand Up @@ -182,12 +182,13 @@ class CLUCENE_EXPORT SimpleAnalyzer: public Analyzer {
public:
SimpleAnalyzer(){
_lowercase = true;
_ownReader = false;
}

bool isSDocOpt() override { return true; }

TokenStream* tokenStream(const TCHAR* fieldName, CL_NS(util)::Reader* reader) override{
return _CLNEW SimpleTokenizer<T>(reader, _lowercase);
return _CLNEW SimpleTokenizer<T>(reader, _lowercase, _ownReader);
}
TokenStream* reusableTokenStream(const TCHAR* fieldName, CL_NS(util)::Reader* reader) override{
if (tokenizer_ == nullptr) {
Expand Down
8 changes: 6 additions & 2 deletions src/core/CLucene/document/Field.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -176,7 +176,8 @@ void Field::setValue(ValueArray<uint8_t>* value) {
}

/** Expert: change the value of this field. See <a href="#setValue(java.lang.String)">setValue(String)</a>. */
void Field::setValue(CL_NS(analysis)::TokenStream* value) {
void Field::setValue(CL_NS(analysis)::TokenStream* value, bool own_stream) {
ownStream = own_stream;
_resetValue();
fieldsData = value;
valueType = VALUE_TOKENSTREAM;
Expand Down Expand Up @@ -340,7 +341,10 @@ void Field::_resetValue() {
} else if (valueType & VALUE_BINARY) {
ValueArray<uint8_t>* v = static_cast<ValueArray<uint8_t>*>(fieldsData);
_CLDELETE(v);
}
} else if (valueType & VALUE_TOKENSTREAM && ownStream) {
auto* v = static_cast<CL_NS(analysis)::TokenStream*>(fieldsData);
_CLDELETE(v);
}
valueType=VALUE_NONE;
}
const char* Field::getObjectName() const{
Expand Down
3 changes: 2 additions & 1 deletion src/core/CLucene/document/Field.h
Original file line number Diff line number Diff line change
Expand Up @@ -305,7 +305,7 @@ class CLUCENE_EXPORT Field : public CL_NS(util)::NamedObject{
void setValue(CL_NS(util)::ValueArray<uint8_t>* value) ;

/** Expert: change the value of this field. See <a href="#setValue(TCHAR*)">setValue(TCHAR*)</a>. */
void setValue(CL_NS(analysis)::TokenStream* value);
void setValue(CL_NS(analysis)::TokenStream* value, bool own_stream = false);

//void setValue(CL_NS(analysis)::STokenStream* value);

Expand Down Expand Up @@ -334,6 +334,7 @@ class CLUCENE_EXPORT Field : public CL_NS(util)::NamedObject{
float_t boost;

IndexVersion indexVersion_ = IndexVersion::kV1;
bool ownStream = false;
};
CL_NS_END
#endif

0 comments on commit 5422ac1

Please sign in to comment.