/**
* Copyright (c) Codice Foundation
* <p/>
* This is free software: you can redistribute it and/or modify it under the terms of the GNU Lesser
* General Public License as published by the Free Software Foundation, either version 3 of the
* License, or any later version.
* <p/>
* This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without
* even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details. A copy of the GNU Lesser General Public License
* is distributed along with this program and can be found at
* <http://www.gnu.org/licenses/lgpl.html>.
*/
package ddf.catalog.source.solr;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.ObjectInputStream;
import java.io.ObjectOutputStream;
import java.io.Serializable;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
import javax.xml.stream.XMLInputFactory;
import javax.xml.stream.XMLStreamConstants;
import javax.xml.stream.XMLStreamException;
import javax.xml.stream.XMLStreamReader;
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang.StringUtils;
import org.apache.solr.client.solrj.SolrQuery;
import org.apache.solr.client.solrj.SolrServer;
import org.apache.solr.client.solrj.SolrServerException;
import org.apache.solr.client.solrj.response.QueryResponse;
import org.apache.solr.common.SolrDocument;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.SolrInputDocument;
import org.apache.solr.common.util.SimpleOrderedMap;
import org.codehaus.stax2.XMLInputFactory2;
import org.codice.solr.factory.ConfigurationStore;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import ddf.catalog.data.AttributeDescriptor;
import ddf.catalog.data.AttributeType;
import ddf.catalog.data.AttributeType.AttributeFormat;
import ddf.catalog.data.Metacard;
import ddf.catalog.data.MetacardCreationException;
import ddf.catalog.data.MetacardType;
import ddf.catalog.data.impl.AttributeDescriptorImpl;
import ddf.catalog.data.impl.MetacardTypeImpl;
/**
* This class tries to resolve all user given field names to their corresponding dynamic Solr index
* field name. This class takes most of its logic directly from the configured Solr schema.xml. For
* instance, the suffixes enumerated in this class are directly copied from the schema.xml.
*
* @since 0.2.0
*/
public class DynamicSchemaResolver {
public static final String LUX_XML_FIELD_NAME = "lux_xml";
protected static final char FIRST_CHAR_OF_SUFFIX = '_';
protected static final String COULD_NOT_READ_METACARD_TYPE_MESSAGE = "Could not read MetacardType.";
protected static final String FIELDS_KEY = "fields";
protected static final String COULD_NOT_SERIALIZE_OBJECT_MESSAGE = "Could not serialize object";
protected static final XMLInputFactory XML_INPUT_FACTORY;
private static final String SOLR_CLOUD_VERSION_FIELD = "_version_";
private static final List<String> PRIVATE_SOLR_FIELDS = Arrays
.asList(SOLR_CLOUD_VERSION_FIELD, SchemaFields.METACARD_TYPE_FIELD_NAME,
SchemaFields.METACARD_TYPE_OBJECT_FIELD_NAME);
private static final Logger LOGGER = LoggerFactory.getLogger(DynamicSchemaResolver.class);
static {
ClassLoader tccl = Thread.currentThread().getContextClassLoader();
try {
Thread.currentThread()
.setContextClassLoader(DynamicSchemaResolver.class.getClassLoader());
XML_INPUT_FACTORY = XMLInputFactory2.newInstance();
XML_INPUT_FACTORY
.setProperty(XMLInputFactory.IS_REPLACING_ENTITY_REFERENCES, Boolean.FALSE);
XML_INPUT_FACTORY
.setProperty(XMLInputFactory.IS_SUPPORTING_EXTERNAL_ENTITIES, Boolean.FALSE);
XML_INPUT_FACTORY.setProperty(XMLInputFactory.IS_COALESCING, Boolean.FALSE);
XML_INPUT_FACTORY.setProperty(XMLInputFactory.IS_VALIDATING, Boolean.FALSE);
} finally {
Thread.currentThread().setContextClassLoader(tccl);
}
}
protected Set<String> fieldsCache = new HashSet<>();
protected SchemaFields schemaFields;
protected Map<String, MetacardType> metacardTypesCache = new HashMap<>();
protected Map<String, byte[]> metacardTypeNameToSerialCache = new HashMap<>();
public DynamicSchemaResolver() {
this.schemaFields = new SchemaFields();
fieldsCache.add(Metacard.ID + SchemaFields.TEXT_SUFFIX);
fieldsCache.add(Metacard.ID + SchemaFields.TEXT_SUFFIX + SchemaFields.TOKENIZED);
fieldsCache.add(Metacard.ID + SchemaFields.TEXT_SUFFIX + SchemaFields.TOKENIZED
+ SchemaFields.HAS_CASE);
}
/**
* Adds the fields that are already in the server to the cache. This method should be called
* once the SolrServer is up to ensure the cache is synchronized with the server.
*
* @param server
* the SolrServer we are working with
*/
public void addFieldsFromServer(SolrServer server) {
if (server == null) {
LOGGER.warn("Server is null, could not add fields to cache.");
return;
}
SolrQuery query = new SolrQuery();
// numterms=0 means retrieve everything (regular or dynamic fields)
query.add("numterms", "0");
/*
* Adding this request handler allows us to query the schema dynamically. The name of the
* request handler is provided by the schema.xml. If the name is changed in the schema.xml,
* then this value must be changed as well.
*/
query.setRequestHandler("/admin/luke");
try {
QueryResponse response = server.query(query);
for (Entry<String, ?> e : ((SimpleOrderedMap<?>) (response.getResponse()
.get(FIELDS_KEY)))) {
fieldsCache.add(e.getKey());
}
} catch (SolrServerException | SolrException e) {
LOGGER.warn("Could not update cache for field names.", e);
}
}
/**
* Adds the fields of the Metacard into the {@link SolrInputDocument}
*/
public void addFields(Metacard metacard, SolrInputDocument solrInputDocument)
throws MetacardCreationException {
MetacardType schema = metacard.getMetacardType();
// TODO: register these metacard types when a new one is seen
for (AttributeDescriptor ad : schema.getAttributeDescriptors()) {
if (metacard.getAttribute(ad.getName()) != null) {
Serializable attributeValue = metacard.getAttribute(ad.getName()).getValue();
if (attributeValue != null) {
AttributeFormat format = ad.getType().getAttributeFormat();
String formatIndexName = ad.getName() + getFieldSuffix(format);
if (AttributeFormat.XML.equals(format)) {
// raw
solrInputDocument.addField(formatIndexName, attributeValue);
String parsedText = parseTextFrom(attributeValue.toString());
// text => metadata_txt_ws
String whitespaceTokenizedIndexName =
ad.getName() + getFieldSuffix(AttributeFormat.STRING) + SchemaFields.WHITESPACE_TEXT_SUFFIX;
solrInputDocument.addField(whitespaceTokenizedIndexName, parsedText);
// text => metadata_txt_ws_has_case
String whiteSpaceTokenizedHasCaseIndexName =
ad.getName() + getFieldSuffix(AttributeFormat.STRING) + SchemaFields.WHITESPACE_TEXT_SUFFIX
+ SchemaFields.HAS_CASE;
solrInputDocument.addField(whiteSpaceTokenizedHasCaseIndexName, parsedText);
// text => metadata_txt_tokenized
String specialStringIndexName =
ad.getName() + getFieldSuffix(AttributeFormat.STRING)
+ getSpecialIndexSuffix(AttributeFormat.STRING);
solrInputDocument.addField(specialStringIndexName, parsedText);
// text case sensitive
solrInputDocument.addField(getCaseSensitiveField(specialStringIndexName),
parsedText);
} else if (AttributeFormat.GEOMETRY.equals(format)) {
solrInputDocument.addField(formatIndexName, attributeValue);
} else if (AttributeFormat.OBJECT.equals(format)) {
ByteArrayOutputStream byteArrayOS = new ByteArrayOutputStream();
try {
ObjectOutputStream out = new ObjectOutputStream(byteArrayOS);
out.writeObject(attributeValue);
out.close();
} catch (IOException e) {
LOGGER.warn(COULD_NOT_SERIALIZE_OBJECT_MESSAGE, e);
throw new MetacardCreationException(COULD_NOT_SERIALIZE_OBJECT_MESSAGE);
}
solrInputDocument.addField(formatIndexName, byteArrayOS.toByteArray());
} else {
solrInputDocument.addField(formatIndexName, attributeValue);
}
}
}
}
if (!ConfigurationStore.getInstance().isDisableTextPath()) {
if (StringUtils.isNotBlank(metacard.getMetadata())) {
solrInputDocument.addField(LUX_XML_FIELD_NAME, metacard.getMetadata());
}
}
/*
* Lastly the metacardType must be added to the solr document. These are internal fields
*/
solrInputDocument.addField(SchemaFields.METACARD_TYPE_FIELD_NAME, schema.getName());
byte[] metacardTypeBytes = metacardTypeNameToSerialCache.get(schema.getName());
if (metacardTypeBytes == null) {
MetacardType coreMetacardType = new MetacardTypeImpl(schema.getName(),
convertAttributeDescriptors(schema.getAttributeDescriptors()));
metacardTypesCache.put(schema.getName(), coreMetacardType);
metacardTypeBytes = serialize(coreMetacardType);
metacardTypeNameToSerialCache.put(schema.getName(), metacardTypeBytes);
addToFieldsCache(coreMetacardType.getAttributeDescriptors());
}
solrInputDocument.addField(SchemaFields.METACARD_TYPE_OBJECT_FIELD_NAME, metacardTypeBytes);
}
/**
* Returns the best approximation as to what {@link AttributeFormat} this Solr Field is.
*
* @param solrFieldName
* name of the Solr field
* @return the {@link AttributeFormat} associated with the Solr field
*/
public AttributeFormat getType(String solrFieldName) {
String suffix = "";
int lastIndexOfUndercore = solrFieldName.lastIndexOf(FIRST_CHAR_OF_SUFFIX);
if (lastIndexOfUndercore != -1) {
suffix = solrFieldName.substring(lastIndexOfUndercore, solrFieldName.length());
}
return schemaFields.getFormat(suffix);
}
public Serializable getDocValue(String solrFieldName, Object docValue) {
AttributeFormat format = getType(solrFieldName);
if (AttributeFormat.SHORT.equals(format)) {
/*
* We have inside knowledge that user-given short objects are stored as Integers in
* Solr. You cannot cast an int to a short, so the workaround is to parse it. This
* should not lead to any loss of information because the value was originally a short.
*/
return Short.parseShort(docValue.toString());
} else if (AttributeFormat.OBJECT.equals(format)) {
ByteArrayInputStream bais = null;
ObjectInputStream in = null;
try {
bais = new ByteArrayInputStream((byte[]) docValue);
in = new ObjectInputStream(bais);
return (Serializable) in.readObject();
} catch (IOException e) {
LOGGER.warn("IO exception loading input document", e);
} catch (ClassNotFoundException e) {
LOGGER.warn("Could not create object to return.", e);
// TODO which exception to throw?
} finally {
IOUtils.closeQuietly(bais);
IOUtils.closeQuietly(in);
}
return null;
} else {
return ((Serializable) docValue);
}
}
/**
* PRE-CONDITION is that fieldname cannot be null.
*
* The convention is that we add a suffix starting with an underscore, so if we find the last
* underscore, then we can return the original field name.
*
* @param solrFieldName Solr index field name
* @return the original field name
*/
public String resolveFieldName(String solrFieldName) {
int lastIndexOfUndercore = solrFieldName.lastIndexOf(FIRST_CHAR_OF_SUFFIX);
if (lastIndexOfUndercore != -1) {
return solrFieldName.substring(0, lastIndexOfUndercore);
}
return solrFieldName;
}
public boolean isPrivateField(String solrFieldName) {
return PRIVATE_SOLR_FIELDS.contains(solrFieldName);
}
/**
* Attempts to resolve the name of a field without being given an {@link AttributeFormat}
*
* @param field
* user given field name
* @return a list of possible Solr field names that match the given field. If none are found,
* then an empty list is returned
*/
public List<String> getAnonymousField(String field) {
ArrayList<String> list = new ArrayList<>();
for (AttributeFormat format : AttributeFormat.values()) {
String fullFieldName = field + schemaFields.getFieldSuffix(format);
if (fieldsCache.contains(fullFieldName)) {
list.add(fullFieldName);
}
}
return list;
}
/**
* Attempts to find the fieldName for the given propertyName value.
*
* @param propertyName
* property name provided by user
* @param format
* {@link AttributeFormat} that describes the type of {@link ddf.catalog.data.Attribute} the field is
* @param isSearchedAsExactValue
* specifies if any special index suffixes need to be added to the field
* @return the proper schema field name. If a schema name cannot be found in cache, returns a
* schema field name that matches the dynamic field type formatting.
*/
public String getField(String propertyName, AttributeFormat format,
boolean isSearchedAsExactValue) {
String fieldName = propertyName + schemaFields.getFieldSuffix(format) + (
isSearchedAsExactValue ?
"" :
getSpecialIndexSuffix(format));
if (fieldsCache.contains(fieldName)) {
return fieldName;
}
switch (format) {
case DOUBLE:
case LONG:
case INTEGER:
case SHORT:
case FLOAT:
return findAnyMatchingNumericalField(propertyName);
default:
break;
}
LOGGER.info(
"Could not find exact schema field name for [{}], attempting to search with [{}]",
propertyName, fieldName);
return fieldName;
}
public String getFieldSuffix(AttributeFormat format) {
return schemaFields.getFieldSuffix(format);
}
public MetacardType getMetacardType(SolrDocument doc) throws MetacardCreationException {
String mTypeFieldName = doc.getFieldValue(SchemaFields.METACARD_TYPE_FIELD_NAME).toString();
MetacardType cachedMetacardType = metacardTypesCache.get(mTypeFieldName);
if (cachedMetacardType != null) {
return cachedMetacardType;
}
byte[] bytes = (byte[]) doc.getFieldValue(SchemaFields.METACARD_TYPE_OBJECT_FIELD_NAME);
ByteArrayInputStream bais = null;
ObjectInputStream in = null;
try {
bais = new ByteArrayInputStream(bytes);
in = new ObjectInputStream(bais);
cachedMetacardType = (MetacardType) in.readObject();
} catch (IOException e) {
LOGGER.warn("IO exception loading cached metacard type", e);
throw new MetacardCreationException(COULD_NOT_READ_METACARD_TYPE_MESSAGE);
} catch (ClassNotFoundException e) {
LOGGER.warn("Class exception loading cached metacard type", e);
throw new MetacardCreationException(COULD_NOT_READ_METACARD_TYPE_MESSAGE);
} finally {
IOUtils.closeQuietly(bais);
IOUtils.closeQuietly(in);
}
metacardTypeNameToSerialCache.put(mTypeFieldName, bytes);
metacardTypesCache.put(mTypeFieldName, cachedMetacardType);
addToFieldsCache(cachedMetacardType.getAttributeDescriptors());
return cachedMetacardType;
}
public String getCaseSensitiveField(String mappedPropertyName) {
// TODO We can check if this field really does exist
return mappedPropertyName + SchemaFields.HAS_CASE;
}
protected String getSpecialIndexSuffix(AttributeFormat format) {
switch (format) {
case STRING:
return SchemaFields.TOKENIZED;
case GEOMETRY:
return SchemaFields.INDEXED;
case XML:
return SchemaFields.TEXT_PATH;
default:
break;
}
return "";
}
private void addToFieldsCache(Set<AttributeDescriptor> descriptors) {
for (AttributeDescriptor ad : descriptors) {
AttributeFormat format = ad.getType().getAttributeFormat();
fieldsCache.add(ad.getName() + schemaFields.getFieldSuffix(format));
if (!getSpecialIndexSuffix(format).equals("")) {
fieldsCache.add(ad.getName() + schemaFields.getFieldSuffix(format)
+ getSpecialIndexSuffix(format));
}
if (format.equals(AttributeFormat.STRING)) {
fieldsCache.add(ad.getName() + schemaFields.getFieldSuffix(format)
+ getSpecialIndexSuffix(format) + SchemaFields.HAS_CASE);
}
if (format.equals(AttributeFormat.XML)) {
fieldsCache.add(ad.getName() + SchemaFields.TEXT_SUFFIX + SchemaFields.TOKENIZED);
fieldsCache.add(ad.getName() + SchemaFields.TEXT_SUFFIX + SchemaFields.TOKENIZED
+ SchemaFields.HAS_CASE);
fieldsCache.add(ad.getName() + schemaFields.getFieldSuffix(format)
+ getSpecialIndexSuffix(format));
}
}
}
private byte[] serialize(MetacardType anywhereMType) throws MetacardCreationException {
ByteArrayOutputStream baos = new ByteArrayOutputStream();
try {
ObjectOutputStream out = new ObjectOutputStream(baos);
out.writeObject(anywhereMType);
byte[] bytes = baos.toByteArray();
baos.close();
out.close();
return bytes;
} catch (IOException e) {
LOGGER.warn("IO exception reading metacard type message", e);
throw new MetacardCreationException(COULD_NOT_READ_METACARD_TYPE_MESSAGE);
}
}
private String findAnyMatchingNumericalField(String propertyName) {
if (fieldsCache.contains(propertyName + SchemaFields.DOUBLE_SUFFIX)) {
return propertyName + SchemaFields.DOUBLE_SUFFIX;
}
if (fieldsCache.contains(propertyName + SchemaFields.FLOAT_SUFFIX)) {
return propertyName + SchemaFields.FLOAT_SUFFIX;
}
if (fieldsCache.contains(propertyName + SchemaFields.INTEGER_SUFFIX)) {
return propertyName + SchemaFields.INTEGER_SUFFIX;
}
if (fieldsCache.contains(propertyName + SchemaFields.LONG_SUFFIX)) {
return propertyName + SchemaFields.LONG_SUFFIX;
}
if (fieldsCache.contains(propertyName + SchemaFields.SHORT_SUFFIX)) {
return propertyName + SchemaFields.SHORT_SUFFIX;
}
LOGGER.info(
"Did not find any numerical schema fields for property [{}]. Replacing with property [{}]",
propertyName, propertyName + SchemaFields.INTEGER_SUFFIX);
return propertyName + SchemaFields.INTEGER_SUFFIX;
}
/**
* Given xml as a string, this method will parse out element text and CDATA text. It separates
* each by one space character.
*
* @param xmlData
* XML as a {@code String}
* @return parsed CDATA and element text
*/
protected String parseTextFrom(String xmlData) {
StringBuilder builder = new StringBuilder();
XMLStreamReader xmlStreamReader = null;
StringReader sr = null;
long starttime = System.currentTimeMillis();
try {
// xml parser does not handle leading whitespace
sr = new StringReader(xmlData);
xmlStreamReader = XML_INPUT_FACTORY.createXMLStreamReader(sr);
while (xmlStreamReader.hasNext()) {
int event = xmlStreamReader.next();
if (event == XMLStreamConstants.CHARACTERS || event == XMLStreamConstants.CDATA) {
String text = xmlStreamReader.getText();
if (StringUtils.isNotBlank(text)) {
builder.append(" ").append(text.trim());
}
}
if (event == XMLStreamConstants.START_ELEMENT) {
for (int i = 0; i < xmlStreamReader.getAttributeCount(); i++) {
String text = xmlStreamReader.getAttributeValue(i);
if (StringUtils.isNotBlank(text)) {
builder.append(" ").append(text.trim());
}
}
}
}
} catch (XMLStreamException e1) {
LOGGER.warn(
"Failure occurred in parsing the xml data. No data has been stored or indexed.",
e1);
} finally {
IOUtils.closeQuietly(sr);
if (xmlStreamReader != null) {
try {
xmlStreamReader.close();
} catch (XMLStreamException e) {
LOGGER.debug("Exception closing XMLStreamReader", e);
}
}
}
long endTime = System.currentTimeMillis();
LOGGER.debug("Parsing took {} ms", endTime - starttime);
return builder.toString();
}
private Set<AttributeDescriptor> convertAttributeDescriptors(
Set<AttributeDescriptor> attributeDescriptors) {
Set<AttributeDescriptor> newAttributeDescriptors = new HashSet<>(
attributeDescriptors.size());
for (AttributeDescriptor attributeDescriptor : attributeDescriptors) {
String name = attributeDescriptor.getName();
boolean isIndexed = attributeDescriptor.isIndexed();
boolean isStored = attributeDescriptor.isStored();
boolean isTokenized = attributeDescriptor.isTokenized();
boolean isMultiValued = attributeDescriptor.isMultiValued();
AttributeType<?> attributeType = attributeDescriptor.getType();
newAttributeDescriptors
.add(new AttributeDescriptorImpl(name, isIndexed, isStored, isTokenized,
isMultiValued, attributeType));
}
return newAttributeDescriptors;
}
}