package lux.solr;
import java.io.IOException;
import java.io.StringReader;
import java.nio.charset.Charset;
import javax.xml.stream.XMLStreamException;
import lux.index.FieldRole;
import lux.index.IndexConfiguration;
import lux.index.XmlIndexer;
import lux.index.field.FieldDefinition;
import lux.xml.tinybin.TinyBinary;
import net.sf.saxon.Configuration;
import net.sf.saxon.om.NodeInfo;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.document.LongField;
import org.apache.lucene.index.IndexableField;
import org.apache.solr.common.SolrInputDocument;
import org.apache.solr.common.SolrInputField;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.schema.IndexSchema;
import org.apache.solr.schema.SchemaField;
import org.apache.solr.update.AddUpdateCommand;
import org.apache.solr.update.processor.UpdateRequestProcessor;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* Handles documents written to Solr via its HTTP APIs
*/
public class LuxUpdateProcessor extends UpdateRequestProcessor {
private final SolrIndexConfig solrIndexConfig;
private final IndexConfiguration indexConfig;
private final Configuration saxonConfig;
private final SolrQueryRequest req;
private final Logger logger;
public LuxUpdateProcessor (SolrIndexConfig config, SolrQueryRequest req, UpdateRequestProcessor next) {
super(next);
solrIndexConfig = config;
indexConfig = solrIndexConfig.getIndexConfig();
saxonConfig = solrIndexConfig.getCompiler().getProcessor().getUnderlyingConfiguration();
this.req = req;
logger = LoggerFactory.getLogger(getClass());
}
@Override
public void processAdd (final AddUpdateCommand cmd) throws IOException {
SolrInputDocument solrInputDocument = cmd.getSolrInputDocument();
String xmlFieldName = indexConfig.getFieldName(FieldRole.XML_STORE);
String idFieldName = indexConfig.getFieldName(FieldRole.ID);
// remove and stash the xml field value
SolrInputField xmlField = solrInputDocument.removeField(xmlFieldName);
SolrInputField luxIdField = solrInputDocument.removeField(idFieldName);
String uri = (String) solrInputDocument.getFieldValue(indexConfig.getFieldName(FieldRole.URI));
Document luceneDocument = cmd.getLuceneDocument();
UpdateDocCommand luxCommand = null;
if (uri != null && xmlField != null) {
// restore the xml field value
solrInputDocument.put (xmlFieldName, xmlField);
XmlIndexer xmlIndexer = solrIndexConfig.checkoutXmlIndexer();
Object xml = xmlField.getFirstValue();
try {
try {
if (xml instanceof String) {
xmlIndexer.index (new StringReader((String) xml), uri);
} else if (xml instanceof byte[]) {
TinyBinary xmlbin = new TinyBinary ((byte[]) xml, Charset.forName("utf-8"));
xmlIndexer.index(xmlbin.getTinyDocument(saxonConfig), uri);
} else if (xml instanceof NodeInfo) {
xmlIndexer.index((NodeInfo) xml, uri);
}
// why is this here? we're getting double values now since we also call
// addDocumentFIelds below?
//luceneDocument = xmlIndexer.createLuceneDocument();
} catch (XMLStreamException e) {
logger.error ("Failed to parse " + FieldRole.XML_STORE, e);
}
addDocumentFields (xmlIndexer, solrIndexConfig.getSchema(), luceneDocument);
if (luxIdField != null) {
Object id = luxIdField.getValue();
if (! (id instanceof Long)) {
// solr cloud distributes these as Strings
id = Long.valueOf(id.toString());
}
luceneDocument.add (new LongField(idFieldName, (Long) id, Store.YES));
}
luxCommand = new UpdateDocCommand(req, solrInputDocument, luceneDocument, uri);
} catch(Exception e) {
logger.error("An error occurred while indexing " + uri, e);
throw new IOException(e);
}
finally {
solrIndexConfig.returnXmlIndexer(xmlIndexer);
}
// logger.debug ("Indexed XML document " + uri);
}
if (next != null) {
next.processAdd(luxCommand == null ? cmd : luxCommand);
}
}
private void addDocumentFields (XmlIndexer indexer, IndexSchema indexSchema, Document doc) {
if (indexConfig.isOption(IndexConfiguration.STORE_TINY_BINARY)) {
// remove the serialized xml field value -- we will store a TinyBinary instead
doc.removeField(indexConfig.getFieldName(FieldRole.XML_STORE));
}
for (FieldDefinition field : indexConfig.getFields()) {
String fieldName = field.getName();
if (field == indexConfig.getField(FieldRole.URI) ||
field == indexConfig.getField(FieldRole.XML_STORE))
{
if (doc.getField(fieldName) != null) {
// uri and xml are provided externally in LuxUpdateProcessor
continue;
}
}
Iterable<?> values = field.getValues(indexer);
SchemaField schemaField = indexSchema.getField(fieldName);
if (values != null) {
for (Object value : values) {
addField(doc, schemaField, value, 1.0f);
}
} else {
for (IndexableField value : field.getFieldValues(indexer)) {
addField(doc, schemaField, value, 1.0f);
}
}
}
}
// from solr..DocumentBuilder
private static void addField(Document doc, SchemaField field, Object val, float boost) {
if (val instanceof IndexableField) {
doc.add((IndexableField) val);
} else {
for (IndexableField f : field.getType().createFields(field, val, boost)) {
if (f != null) doc.add((Field) f); // null fields are not added
}
}
}
}
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this file,
* You can obtain one at http://mozilla.org/MPL/2.0/. */