/*
* Copyright 2004-2009 the original author or authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.compass.core.lucene.engine.all;
import java.io.IOException;
import java.io.Reader;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.Iterator;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.index.Payload;
import org.compass.core.Property;
import org.compass.core.engine.SearchEngineException;
import org.compass.core.lucene.engine.LuceneSearchEngineFactory;
import org.compass.core.mapping.AllMapping;
import org.compass.core.mapping.ExcludeFromAll;
import org.compass.core.mapping.ResourceMapping;
import org.compass.core.mapping.ResourcePropertyMapping;
import org.compass.core.spi.InternalProperty;
import org.compass.core.spi.InternalResource;
/**
* The All Analyzer is a specific analyzer that is used to wrap the analyzer passed when adding
* a document. It will gather all the tokens that the actual analyzer generates for fields that
* are included in All and allow to get them using {@link #createAllTokenStream()} (which will
* be used to create the all field with).
*
* <p>Un tokenized fields (which will not go through the analysis process) are identied when this
* analyzed is constructed and are added to the all field if they are supposed to be included.
* There are two options with the untokenized fields, either add them as is (un tokenized), or
* analyze them just for the all properties.
*
* @author kimchy
*/
public class AllAnalyzer extends Analyzer {
private final Analyzer analyzer;
private final InternalResource resource;
private final ResourceMapping resourceMapping;
private final AllMapping allMapping;
private final LuceneSearchEngineFactory searchEngineFactory;
private final ArrayList<Token> tokens = new ArrayList<Token>();
private final AllTokenStreamCollector allTokenStreamCollector = new AllTokenStreamCollector();
private final boolean boostSupport;
public AllAnalyzer(Analyzer analyzer, InternalResource resource, LuceneSearchEngineFactory searchEngineFactory) {
this.analyzer = analyzer;
this.resource = resource;
this.resourceMapping = resource.getResourceMapping();
this.searchEngineFactory = searchEngineFactory;
this.allMapping = resourceMapping.getAllMapping();
this.boostSupport = searchEngineFactory.getLuceneSettings().isAllPropertyBoostSupport();
if (!allMapping.isSupported()) {
return;
}
// Note, we can cache the tokens and reuse it (even though we set the positiions on it) since
// the positions (which are the only thing we set that is not thread safe) will always be the same
for (Token aliasToken : searchEngineFactory.getAllTermsCache().getAliasTerms(resource.getAlias())) {
tokens.add(aliasToken);
}
// go over all the un tokenized properties and add them as tokens (if required)
// they are added since they will never get analyzed thus tokenStream will never
// be called on them
for (Property property : resource.getProperties()) {
ResourcePropertyMapping resourcePropertyMapping = ((InternalProperty) property).getPropertyMapping();
// if not found within the property, try and get it based on the name from the resource mapping
if (resourcePropertyMapping == null) {
resourcePropertyMapping = resourceMapping.getResourcePropertyMapping(property.getName());
}
if (resourcePropertyMapping == null) {
if (allMapping.isIncludePropertiesWithNoMappings()) {
if (property.isIndexed() && !property.isTokenized()) {
if (searchEngineFactory.getPropertyNamingStrategy().isInternal(property.getName())) {
continue;
}
if (property.getName().equals(searchEngineFactory.getAliasProperty())) {
continue;
}
if (property.getName().equals(searchEngineFactory.getExtendedAliasProperty())) {
continue;
}
// no mapping, need to add un_tokenized ones
Payload payload = null;
if (boostSupport) {
if (property.getBoost() != 1.0f) {
payload = AllBoostUtils.writeFloat(property.getBoost());
} else if (resource.getBoost() != 1.0f) {
// we get the boost from the resource thus taking into account any resource property mapping
// and/or resource mapping boost level
payload = AllBoostUtils.writeFloat(resource.getBoost());
}
}
String value = property.getStringValue();
if (value != null) {
Token t = new Token(value, 0, value.length());
t.setPayload(payload);
tokens.add(t);
}
}
}
continue;
}
if (resourcePropertyMapping.isInternal()) {
continue;
}
if (resourcePropertyMapping.getExcludeFromAll() == ExcludeFromAll.YES) {
continue;
}
if (resourcePropertyMapping.getIndex() == Property.Index.UN_TOKENIZED || resourcePropertyMapping.getIndex() == Property.Index.NOT_ANALYZED) {
Payload payload = null;
if (boostSupport) {
if (resourcePropertyMapping.getBoost() != 1.0f) {
payload = AllBoostUtils.writeFloat(resourcePropertyMapping.getBoost());
} else if (resource.getBoost() != 1.0f) {
// we get the boost from the resource thus taking into account any resource property mapping
// and/or resource mapping boost level
payload = AllBoostUtils.writeFloat(resource.getBoost());
}
}
String value = property.getStringValue();
if (value != null) {
// if NO exclude from all, just add it
// if NO_ANALYZED, will analyze it as well
if (resourcePropertyMapping.getExcludeFromAll() == ExcludeFromAll.NO) {
Token t = new Token(value, 0, value.length());
t.setPayload(payload);
tokens.add(t);
} else if (resourcePropertyMapping.getExcludeFromAll() == ExcludeFromAll.NO_ANALYZED) {
Analyzer propAnalyzer;
if (resourcePropertyMapping.getAnalyzer() != null) {
propAnalyzer = searchEngineFactory
.getAnalyzerManager().getAnalyzerMustExist(resourcePropertyMapping.getAnalyzer());
} else {
propAnalyzer = searchEngineFactory.getAnalyzerManager().getAnalyzerByResource(resource);
}
TokenStream ts = propAnalyzer.tokenStream(property.getName(), new StringReader(value));
try {
Token token = ts.next();
while (token != null) {
token.setPayload(payload);
tokens.add(token);
token = ts.next();
}
} catch (IOException e) {
throw new SearchEngineException("Failed to analyzer " + property, e);
}
}
}
}
}
}
public TokenStream tokenStream(String fieldName, Reader reader) {
TokenStream retVal = analyzer.tokenStream(fieldName, reader);
return wrapTokenStreamIfNeeded(fieldName, retVal);
}
public TokenStream reusableTokenStream(String fieldName, Reader reader) throws IOException {
TokenStream retVal = analyzer.reusableTokenStream(fieldName, reader);
return wrapTokenStreamIfNeeded(fieldName, retVal);
}
public int getPositionIncrementGap(String fieldName) {
return analyzer.getPositionIncrementGap(fieldName);
}
public TokenStream createAllTokenStream() {
return new AllTokenStream();
}
private TokenStream wrapTokenStreamIfNeeded(String fieldName, TokenStream retVal) {
if (!allMapping.isSupported()) {
return retVal;
}
ResourcePropertyMapping resourcePropertyMapping = resourceMapping.getResourcePropertyMapping(fieldName);
if (resourcePropertyMapping == null) {
if (!searchEngineFactory.getPropertyNamingStrategy().isInternal(fieldName)) {
if (allMapping.isIncludePropertiesWithNoMappings()) {
allTokenStreamCollector.setTokenStream(retVal);
allTokenStreamCollector.updateMapping(resource, resourcePropertyMapping);
retVal = allTokenStreamCollector;
}
}
} else if (!(resourcePropertyMapping.getExcludeFromAll() == ExcludeFromAll.YES)
&& !resourcePropertyMapping.isInternal()) {
allTokenStreamCollector.setTokenStream(retVal);
allTokenStreamCollector.updateMapping(resource, resourcePropertyMapping);
retVal = allTokenStreamCollector;
}
return retVal;
}
/**
* The all token stream. To be used with the all property as its token stream. This stream will
* return all the tokens created and collected by this analyzer.
*/
private class AllTokenStream extends TokenStream {
private Iterator<Token> tokenIt;
private int offset = 0;
private AllTokenStream() {
}
/**
* Override the next with token so no unneeded token will be created. Also,
* no need to use the result, just return the token we saved where we just
* change offests.
*/
public Token next(Token result) throws IOException {
if (tokenIt == null) {
tokenIt = tokens.iterator();
}
if (tokenIt.hasNext()) {
Token token = tokenIt.next();
int delta = token.endOffset() - token.startOffset();
token.setStartOffset(offset);
offset += delta;
token.setEndOffset(offset);
return token;
}
tokens.clear();
return null;
}
public String toString() {
return "all-stream";
}
}
/**
* A token stream that wraps the actual token stream and collects all the
* tokens it produces.
*/
private class AllTokenStreamCollector extends TokenStream {
private TokenStream tokenStream;
private Payload payload;
private Token lastToken;
public AllTokenStreamCollector() {
}
public void updateMapping(InternalResource resource, ResourcePropertyMapping resourcePropertyMapping) {
if (lastToken != null && payload != null) {
lastToken.setPayload(payload);
lastToken = null;
}
if (boostSupport) {
if (resourcePropertyMapping != null && resourcePropertyMapping.getBoost() != 1.0f) {
payload = AllBoostUtils.writeFloat(resourcePropertyMapping.getBoost());
} else if (resource.getBoost() != 1.0f) {
// we get the boost from the resource thus taking into account any resource property mapping
// and/or resource mapping boost level
payload = AllBoostUtils.writeFloat(resource.getBoost());
} else {
payload = null;
}
}
}
public void setTokenStream(TokenStream tokenStream) {
this.tokenStream = tokenStream;
}
public Token next() throws IOException {
// we put the payload on the last token. It has already been indexed
// and it will be used on the all property later on
if (lastToken != null && payload != null) {
lastToken.setPayload(payload);
}
lastToken = tokenStream.next();
if (lastToken != null) {
tokens.add(lastToken);
}
return lastToken;
}
public void reset() throws IOException {
tokenStream.reset();
}
public void close() throws IOException {
if (lastToken != null && payload != null) {
lastToken.setPayload(payload);
}
tokenStream.close();
}
}
}