/*
* ModeShape (http://www.modeshape.org)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.modeshape.jcr.index.lucene;
import java.io.IOException;
import java.util.Arrays;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.LegacyDoubleField;
import org.apache.lucene.document.LegacyIntField;
import org.apache.lucene.document.LegacyLongField;
import org.apache.lucene.document.StringField;
import org.apache.lucene.index.Term;
import org.apache.lucene.util.BytesRef;
import org.modeshape.common.annotation.Immutable;
import org.modeshape.common.annotation.ThreadSafe;
import org.modeshape.common.util.CheckArg;
import org.modeshape.jcr.ExecutionContext;
import org.modeshape.jcr.index.lucene.query.LuceneQueryFactory;
import org.modeshape.jcr.value.PropertyType;
/**
* Lucene index which supports multiple heterogeneous columns for any given document. This is more complicated and performs
* worse in some cases than {@link SingleColumnIndex} because Lucene doesn't support updates, so this index has to deal with merging fields.
* <p>
* Whenever possible, prefer the {@link SingleColumnIndex} implementation to this one.
* </p>
* @author Horia Chiorean (hchiorea@redhat.com)
* @since 4.5
*/
@ThreadSafe
@Immutable
@SuppressWarnings("deprecation")
class MultiColumnIndex extends LuceneIndex {
private final DocumentIdCache cache;
protected MultiColumnIndex( String name,
String workspaceName,
LuceneConfig config,
Map<String, PropertyType> propertyTypesByName,
ExecutionContext context ) {
super(name, workspaceName, config, propertyTypesByName, context);
// we keep track of the node keys which are added/removed in the commit data
// this is an optimization to avoid searching for a document each time an update or partial remove is performed
this.cache = new DocumentIdCache();
}
@Override
public void add( final String nodeKey, final String propertyName, final Object[] values ) {
CheckArg.isNotNull(nodeKey, "nodeKey");
CheckArg.isNotNull(propertyName, "propertyName");
CheckArg.isNotNull(values, "values");
try {
// first look at the cache and commit data to check if a document exists or not with this key. If a document does not
// exist, this operation will be a lot faster. Otherwise the document needs to be loaded which is costly....
// THIS IS A COSTLY OPERATION...
if (documentExists(nodeKey)) {
// we're updating an existing document
logger.debug("Updating the property '{0}' of document '{1}' in the Lucene index '{2}' with the values '{3}'",
propertyName, nodeKey, name, values);
Document oldDocument = searcher.loadDocumentById(nodeKey);
Document newDocument = clone(oldDocument, propertyName);
//add the fields for the new property
List<Field> fields = valuesToFields(propertyName, values);
fields.stream().forEach(newDocument::add);
writer.updateDocument(FieldUtil.idTerm(nodeKey), newDocument);
} else {
// we're creating the document for the first time...
logger.debug("Adding the document '{0}' in the Lucene Index '{1}' with the property '{2}' and values '{3}",
nodeKey, name, propertyName, values);
Document document = new Document();
addProperty(nodeKey, document, propertyName, values);
writer.addDocument(document);
// mark the node key as added
cache.add(nodeKey);
}
} catch (IOException e) {
throw new LuceneIndexException(e);
}
}
private Document clone(Document oldDocument, String... excludeProps) {
List<String> excluded = Arrays.asList(excludeProps);
Document newDocument = new Document();
oldDocument.getFields()
.stream()
.filter((field) -> (!excluded.contains(field.name())))
.map((field) -> (clone((Field) field)))
.forEach(newDocument::add);
return newDocument;
}
private Field clone(Field existing) {
String name = existing.name();
if (name.startsWith(FieldUtil.LENGTH_PREFIX)) {
// these are always stored as longs
return new LegacyLongField(name, Long.valueOf(existing.stringValue()), Field.Store.YES);
}
Number numberValue = existing.numericValue();
if (numberValue instanceof Integer) {
return new LegacyIntField(name, numberValue.intValue(), Field.Store.YES);
} else if (numberValue instanceof Long) {
return new LegacyLongField(name, numberValue.longValue(), Field.Store.YES);
} else if (numberValue instanceof Double) {
return new LegacyDoubleField(name, numberValue.doubleValue(), Field.Store.YES);
}
String stringValue = existing.stringValue();
if (stringValue != null) {
return new StringField(name, stringValue, Field.Store.YES);
}
BytesRef bytesRef = existing.binaryValue();
if (bytesRef != null) {
// we don't really store any binary fields
return new StringField(name, bytesRef, Field.Store.YES);
}
throw new LuceneIndexException("Cannot clone existing field: " + existing);
}
@Override
protected void preCommit( Map<String, String> commitData ) {
super.preCommit(commitData);
cache.updateCommitData(commitData);
}
@Override
protected void remove(final String nodeKey, final String propertyName) {
CheckArg.isNotNull(nodeKey, "nodeKey");
CheckArg.isNotNull(propertyName, "propertyName");
if (!documentExists(nodeKey)) {
// no document found so nothing to do
return;
}
try {
Document document = searcher.loadDocumentById(nodeKey);
boolean hasProperty = document.getField(propertyName) != null;
if (!hasProperty) {
return;
}
Term idTerm = FieldUtil.idTerm(nodeKey);
if (document.getFields().size() == 1) {
// there are no more fields, so remove the entire document....
writer.deleteDocuments(idTerm);
// mark the node key as removed
cache.remove(nodeKey);
} else {
// create a clone without the property
Document newDocument = clone(document, propertyName);
writer.updateDocument(idTerm, newDocument);
}
} catch (IOException e) {
throw new LuceneIndexException(e);
}
}
@Override
protected LuceneQueryFactory queryFactory( Map<String, Object> variables ) {
return LuceneQueryFactory.forMultiColumnIndex(context.getValueFactories(), variables, propertyTypesByName);
}
@Override
public void remove( String nodeKey ) {
super.remove(nodeKey);
if (documentExists(nodeKey)) {
cache.remove(nodeKey);
}
}
private boolean documentExists( String nodeKey ) {
return cache.hasNode(nodeKey) || writer.getCommitData().containsKey(nodeKey);
}
/**
* A simple holder which tracks for each index writer session the document keys which exist in the index
* and then writes this information in the commit data. This avoids the document searching required when updating the column
* of an existing document.
*/
private class DocumentIdCache {
private final Set<String> removed;
private final Set<String> added;
private DocumentIdCache() {
this.removed = new HashSet<>();
this.added = new HashSet<>();
}
protected synchronized boolean hasNode(String nodeKey) {
return added.contains(nodeKey) || removed.contains(nodeKey);
}
protected synchronized void add(String nodeKey) {
this.removed.remove(nodeKey);
this.added.add(nodeKey);
}
protected synchronized void remove(String nodeKey) {
this.added.remove(nodeKey);
this.removed.add(nodeKey);
}
protected synchronized void clear() {
this.added.clear();
this.removed.clear();
}
protected synchronized void updateCommitData(Map<String, String> commitData) {
removed.forEach(commitData::remove);
added.forEach(key -> commitData.put(key, ""));
clear();
}
}
}