/*
* Constellation - An open source and standard compliant SDI
* http://www.constellation-sdi.org
*
* Copyright 2014 Geomatys.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.constellation.metadata.index;
// J2SE dependencies
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.DoubleField;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.document.FloatField;
import org.apache.lucene.document.IntField;
import org.apache.lucene.document.LongField;
import org.apache.lucene.document.StringField;
import org.apache.sis.util.NullArgumentException;
import org.geotoolkit.lucene.IndexingException;
import org.geotoolkit.lucene.index.AbstractIndexer;
import java.io.File;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.StringTokenizer;
import java.util.logging.Level;
import static org.constellation.metadata.CSWQueryable.DUBLIN_CORE_QUERYABLE;
import static org.constellation.metadata.CSWQueryable.ISO_FC_QUERYABLE;
import static org.constellation.metadata.CSWQueryable.ISO_QUERYABLE;
// Apache Lucene dependencies
// geotoolkit dependencies
/**
*
* @author Guilhem Legal (Geomatys)
*
* @param <A> the type of indexed Object
*/
public abstract class AbstractCSWIndexer<A> extends AbstractIndexer<A> {
protected static final String NOT_SPATIALLY_INDEXABLE = "unable to spatially index metadata: ";
protected static final String NULL_VALUE = "null";
private final Map<String, List<String>> additionalQueryable;
protected static final FieldType ID_TYPE = new FieldType();
static {
ID_TYPE.setTokenized(false);
ID_TYPE.setStored(true);
ID_TYPE.setIndexed(true);
}
protected static final FieldType SORT_TYPE = new FieldType();
static {
SORT_TYPE.setTokenized(false);
SORT_TYPE.setStored(false);
SORT_TYPE.setIndexed(true);
}
protected static final FieldType TEXT_TYPE = new FieldType();
static {
TEXT_TYPE.setTokenized(true);
TEXT_TYPE.setStored(true);
TEXT_TYPE.setIndexed(true);
}
protected static final FieldType SEARCH_TYPE = new FieldType();
static {
SEARCH_TYPE.setTokenized(true);
SEARCH_TYPE.setStored(false);
SEARCH_TYPE.setIndexed(true);
}
/**
* Build a new CSW metadata indexer.
*
* @param indexID The identifier, if there is one, of the index.
* @param configDirectory The directory where the files of the index will be stored.
* @param additionalQueryable A map of additional queryable elements.
*/
public AbstractCSWIndexer(final String indexID, final File configDirectory, final Map<String, List<String>> additionalQueryable) {
super(indexID, configDirectory);
if (additionalQueryable != null) {
this.additionalQueryable = additionalQueryable;
} else {
this.additionalQueryable = new HashMap<>();
}
}
/**
* Build a new CSW metadata indexer, with the specified lucene analyzer.
*
* @param indexID The identifier, if there is one, of the index.
* @param configDirectory The directory where the files of the index will be stored.
* @param analyzer A lucene analyzer used in text values indexation (default is ClassicAnalyzer).
* @param additionalQueryable A map of additional queryable elements.
*/
public AbstractCSWIndexer(String indexID, File configDirectory, Analyzer analyzer, Map<String, List<String>> additionalQueryable) {
super(indexID, configDirectory, analyzer);
if (additionalQueryable != null) {
this.additionalQueryable = additionalQueryable;
} else {
this.additionalQueryable = new HashMap<>();
}
}
/**
* Makes a document for a A Metadata Object.
*
* @param metadata The metadata to index.
* @param docId the document identifier.
* @return A Lucene document.
*/
@Override
protected Document createDocument(final A metadata, final int docId) throws IndexingException {
// make a new, empty document
final Document doc = new Document();
doc.add(new Field("docid", docId + "", ID_TYPE));
indexSpecialField(metadata, doc);
final StringBuilder anyText = new StringBuilder();
boolean alreadySpatiallyIndexed = false;
// For an ISO 19139 object
if (isISO19139(metadata)) {
final Map<String, List<String>> isoQueryable = removeOverridenField(ISO_QUERYABLE);
indexQueryableSet(doc, metadata, isoQueryable, anyText);
//we add the geometry parts
alreadySpatiallyIndexed = indexSpatialPart(doc, metadata, isoQueryable, 268435540);
doc.add(new Field("objectType", "MD_Metadata", SEARCH_TYPE));
} else if (isEbrim30(metadata)) {
// TODO
doc.add(new Field("objectType", "Ebrim", SEARCH_TYPE));
} else if (isEbrim25(metadata)) {
// TODO
doc.add(new Field("objectType", "Ebrim", SEARCH_TYPE));
} else if (isFeatureCatalogue(metadata)) {
final Map<String, List<String>> fcQueryable = removeOverridenField(ISO_FC_QUERYABLE);
indexQueryableSet(doc, metadata, fcQueryable, anyText);
doc.add(new Field("objectType", "FC_FeatureCatalogue", SEARCH_TYPE));
} else if (isDublinCore(metadata)) {
doc.add(new Field("objectType", "Record", SEARCH_TYPE));
} else {
LOGGER.log(Level.WARNING, "unknow Object classe unable to index: {0}", getType(metadata));
}
// All metadata types must be compatible with dublinCore.
final Map<String, List<String>> dcQueryable = removeOverridenField(DUBLIN_CORE_QUERYABLE);
indexQueryableSet(doc, metadata, dcQueryable, anyText);
//we add the geometry parts if its nor already indexed
if (!alreadySpatiallyIndexed) {
//TODO find the real srid instead of static 4326
indexSpatialPart(doc, metadata, dcQueryable, 4326);
}
// we add to the index the special queryable elements
indexQueryableSet(doc, metadata, additionalQueryable, anyText);
// add a default meta field to make searching all documents easy
doc.add(new Field("metafile", "doc",SEARCH_TYPE));
//we add the anyText values
doc.add(new Field("AnyText", anyText.toString(), SEARCH_TYPE));
return doc;
}
/**
* Remove the mapping of the specified Queryable set if it is overridden by one in the additional Queryable set.
*
* @param queryableSet
*/
private Map<String, List<String>> removeOverridenField(Map<String, List<String>> queryableSet) {
Map<String, List<String>> result = new HashMap<>();
for (Entry<String, List<String>> entry : queryableSet.entrySet()) {
if (!additionalQueryable.containsKey(entry.getKey())) {
result.put(entry.getKey(), entry.getValue());
}
}
return result;
}
/**
* Index the values for the specified Field
*
* @param values
* @param fieldName
* @param anyText
* @param doc
*/
protected void indexFields(final List<Object> values, final String fieldName, final StringBuilder anyText, final Document doc) {
for (Object value : values) {
if (value instanceof String) {
indexField(fieldName, (String) value, anyText, doc);
} else if (value instanceof Number) {
indexNumericField(fieldName, (Number) value, doc);
} else if (value != null){
LOGGER.log(Level.WARNING, "unexpected type for field:{0}", value.getClass());
}
}
}
/**
* Index a single String field.
* Add this value to the anyText builder if its not equals to "null".
*
* @param fieldName
* @param stringValue
* @param anyText
* @param doc
*/
protected void indexField(final String fieldName, final String stringValue, final StringBuilder anyText, final Document doc) {
final Field field = new Field(fieldName, stringValue, SEARCH_TYPE);
final Field fieldSort = new Field(fieldName + "_sort", stringValue, SORT_TYPE);
if (!stringValue.equals(NULL_VALUE) && anyText.indexOf(stringValue) == -1) {
anyText.append(stringValue).append(" ");
}
doc.add(field);
doc.add(fieldSort);
}
/**
* Inex a numeric field.
*
* @param fieldName
* @param numValue
* @param doc
*/
protected void indexNumericField(final String fieldName, final Number numValue, final Document doc) {
final Field numField;
final Field numSortField;
final Character fieldType;
if (numValue instanceof Integer) {
numField = new IntField(fieldName, (Integer) numValue, Field.Store.NO);
numSortField = new IntField(fieldName + "_sort", (Integer) numValue, Field.Store.NO);
fieldType = 'i';
} else if (numValue instanceof Double) {
numField = new DoubleField(fieldName, (Double) numValue, Field.Store.NO);
numSortField = new DoubleField(fieldName + "_sort", (Double) numValue, Field.Store.NO);
fieldType = 'd';
} else if (numValue instanceof Float) {
numField = new FloatField(fieldName, (Float) numValue, Field.Store.NO);
numSortField = new FloatField(fieldName + "_sort", (Float) numValue, Field.Store.NO);
fieldType = 'f';
} else if (numValue instanceof Long) {
numField = new LongField(fieldName, (Long) numValue, Field.Store.NO);
numSortField = new LongField(fieldName + "_sort", (Long) numValue, Field.Store.NO);
fieldType = 'l';
} else {
numField = new StringField(fieldName, numValue + "", Field.Store.NO);
numSortField = new StringField(fieldName + "_sort", numValue + "", Field.Store.NO);
fieldType = 'u';
LOGGER.log(Level.WARNING, "Unexpected Number type:{0}", numValue.getClass().getName());
}
addNumericField(fieldName, fieldType);
addNumericField(fieldName + "_sort", fieldType);
doc.add(numField);
doc.add(numSortField);
}
/**
* Add the specifics implementation field to the document.
*
* @param metadata The metadata to index.
* @param doc The lucene document currently building.
* @throws IndexingException
*/
protected abstract void indexSpecialField(final A metadata, final Document doc) throws IndexingException;
/**
* Return a String description of the type of the metadata.
*
* @param metadata The metadata currently indexed
* @return A string description (name of the class, name of the top value type, ...)
*/
protected abstract String getType(final A metadata);
/**
* Index a set of properties contained in the queryableSet.
*
* @param doc The lucene document currently building.
* @param metadata The metadata to index.
* @param queryableSet A set of queryable properties and their relative path in the metadata.
* @param anyText A {@link StringBuilder} in which are concatened all the text values.
* @throws IndexingException
*/
protected abstract void indexQueryableSet(final Document doc, final A metadata, Map<String, List<String>> queryableSet, final StringBuilder anyText) throws IndexingException;
/**
* Spatially index the form extracting the BBOX values with the specified queryable set.
*
* @param doc The current Lucene document.
* @param form The metadata records to spatially index.
* @param queryableSet A set of queryable Term.
* @param srid the coordinate reference system SRID
*
* @return true if the indexation succeed
* @throws MD_IOException
*/
private boolean indexSpatialPart(Document doc, A form, Map<String, List<String>> queryableSet, int srid) throws IndexingException {
final List<Double> minxs = extractPositions(form, queryableSet.get("WestBoundLongitude"));
final List<Double> maxxs = extractPositions(form, queryableSet.get("EastBoundLongitude"));
final List<Double> maxys = extractPositions(form, queryableSet.get("NorthBoundLatitude"));
final List<Double> minys = extractPositions(form, queryableSet.get("SouthBoundLatitude"));
try {
if (minxs.size() == minys.size() && minys.size() == maxxs.size() && maxxs.size() == maxys.size()) {
addBoundingBox(doc, minxs, maxxs, minys, maxys, srid);
return true;
} else {
LOGGER.log(Level.WARNING,NOT_SPATIALLY_INDEXABLE + "{0}\n cause: missing coordinates.", getIdentifier(form));
}
} catch (NullArgumentException ex) {
throw new IndexingException("error while spatially indexing:" + doc.get("id"), ex);
}
return false;
}
/**
* Extract the double coordinate from a metadata object using a list of paths to find the data.
*
* @param metadata The metadata to spatially index.
* @param paths A list of paths where to find the information within the metadata.
* @return A list of Double coordinates.
*
* @throws IndexingException
*/
private List<Double> extractPositions(A metadata, List<String> paths) throws IndexingException {
final String coord = getValues(metadata, paths);
final StringTokenizer tokens = new StringTokenizer(coord, ",;");
final List<Double> coordinate = new ArrayList<>(tokens.countTokens());
try {
while (tokens.hasMoreTokens()) {
coordinate.add(Double.parseDouble(tokens.nextToken()));
}
} catch (NumberFormatException e) {
if (!coord.equals(NULL_VALUE)) {
LOGGER.warning(NOT_SPATIALLY_INDEXABLE + getIdentifier(metadata) +
"\ncause: unable to parse double: " + coord);
}
}
return coordinate;
}
@Override
protected Iterator<A> getEntryIterator() throws IndexingException {
throw new UnsupportedOperationException("Not supported by this implementation");
}
@Override
protected boolean useEntryIterator() {
return false;
}
/**
* Extract some values from a metadata object using the list of paths.
*
* @param meta The object to index.
* @param paths A list of paths where to find the information within the metadata.
*
* @Deprecated
*
* @return A String containing one or more informations (comma separated) find in the metadata.
* @throws IndexingException
*/
@Deprecated
protected abstract String getValues(final A meta, final List<String> paths) throws IndexingException;
/**
* Return true if the metadata object is a ISO19139 object.
*
* @param meta The object to index
* @return true if the metadata object is a ISO19139 object.
*/
protected abstract boolean isISO19139(A meta);
/**
* Return true if the metadata object is a DublinCore object.
*
* @param meta The object to index
* @return true if the metadata object is a DublinCore object.
*/
protected abstract boolean isDublinCore(A meta);
/**
* Return true if the metadata object is a Ebrim version 2.5 object.
*
* @param meta The object to index
* @return true if the metadata object is a Ebrim version 2.5 object.
*/
protected abstract boolean isEbrim25(A meta);
/**
* Return true if the metadata object is a Ebrim version 3.0 object.
*
* @param meta The object to index
* @return true if the metadata object is a Ebrim version 3.0 object.
*/
protected abstract boolean isEbrim30(A meta);
/**
* Return true if the metadata object is a FeatureCatalogue object.
*
* @param meta The object to index
* @return true if the metadata object is a FeatureCatalogue object.
*/
protected abstract boolean isFeatureCatalogue(A meta);
}