package lux.solr;
import static lux.index.IndexConfiguration.*;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.Map;
import java.util.Map.Entry;
import java.util.concurrent.ArrayBlockingQueue;
import lux.Compiler;
import lux.exception.LuxException;
import lux.index.FieldRole;
import lux.index.IndexConfiguration;
import lux.index.XmlIndexer;
import lux.index.analysis.DefaultAnalyzer;
import lux.index.analysis.WhitespaceGapAnalyzer;
import lux.index.field.FieldDefinition;
import lux.index.field.FieldDefinition.Type;
import lux.index.field.XPathField;
import lux.index.field.XmlTextField;
import net.sf.saxon.s9api.Serializer;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.core.KeywordAnalyzer;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Field.Store;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.SolrException.ErrorCode;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.core.PluginInfo;
import org.apache.solr.core.SolrCore;
import org.apache.solr.core.SolrInfoMBean;
import org.apache.solr.schema.BinaryField;
import org.apache.solr.schema.CopyField;
import org.apache.solr.schema.FieldType;
import org.apache.solr.schema.IndexSchema;
import org.apache.solr.schema.SchemaField;
import org.apache.solr.schema.StrField;
import org.apache.solr.schema.TextField;
import org.apache.solr.schema.TrieIntField;
import org.apache.solr.schema.TrieLongField;
import org.apache.solr.update.processor.UpdateRequestProcessorChain;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* Wraps a {@link IndexConfiguration}, adding field definitions from information in Solr's configuration files:
* solrconfig.xml and schema.xml
*/
public class SolrIndexConfig implements SolrInfoMBean {
private static final String SOURCE_URL = "https://github.com/msokolov/lux";
private final IndexConfiguration indexConfig;
private NamedList<String> xpathFieldConfig;
private Compiler compiler;
private ArrayBlockingQueue<XmlIndexer> indexerPool;
private ArrayBlockingQueue<Serializer> serializerPool;
private IndexSchema schema;
private final Logger logger;
public SolrIndexConfig (final IndexConfiguration indexConfig, NamedList<?> args) {
this.indexConfig = indexConfig;
indexerPool = new ArrayBlockingQueue<XmlIndexer>(8);
serializerPool = new ArrayBlockingQueue<Serializer>(8);
logger = LoggerFactory.getLogger(getClass());
if (args != null) {
applySolrConfig(args);
}
compiler = new Compiler (indexConfig);
}
public Compiler getCompiler () {
return compiler;
}
public XmlIndexer checkoutXmlIndexer () {
// In tests it didn't seem to make any appreciable difference whether this
// pool was present or not, but it salves my conscience
XmlIndexer indexer = indexerPool.poll();
if (indexer == null) {
indexer = new XmlIndexer (indexConfig, compiler);
logger.debug("created new XmlIndexer");
}
return indexer;
}
public void returnXmlIndexer (XmlIndexer doneWithIt) {
indexerPool.offer(doneWithIt);
// if the pool was full, we just drop the indexer as garbage
}
public Serializer checkoutSerializer() {
Serializer serializer = serializerPool.poll();
if (serializer == null) {
serializer = new Serializer();
serializer.setOutputProperty(Serializer.Property.ENCODING, "utf-8");
serializer.setOutputProperty(Serializer.Property.BYTE_ORDER_MARK, "no");
serializer.setOutputProperty(Serializer.Property.OMIT_XML_DECLARATION, "yes");
}
return serializer;
}
public void returnSerializer (Serializer doneWithIt) {
serializerPool.offer(doneWithIt);
// if the pool was full, we just drop the serializer
}
public static SolrIndexConfig registerIndexConfiguration (SolrCore core) {
// Read the init args from the LuxUpdateProcessorFactory's configuration
NamedList<?> initArgs = null;
for (PluginInfo info : core.getSolrConfig().getPluginInfos(UpdateRequestProcessorChain.class.getName())) {
// FIXME: if there are multiple processors, we prefer the 'default' one, otherwise
// just take the last? This is a bit lame, but it provides back-compat. We should at least
// raise a warning if this is ambiguous
initArgs = info.initArgs;
if ("true".equals(info.attributes.get("default"))) {
break;
}
}
String configName = SolrIndexConfig.class.getName();
SolrInfoMBean configBean = core.getInfoRegistry().get(configName);
SolrIndexConfig indexConfig;
if (configBean != null) {
indexConfig = (SolrIndexConfig) configBean;
} else {
int options = (INDEX_PATHS | INDEX_FULLTEXT | STORE_DOCUMENT | SOLR);
indexConfig = SolrIndexConfig.makeIndexConfiguration(options, initArgs, configName);
indexConfig.inform(core);
core.getInfoRegistry().put(configName, indexConfig);
}
return indexConfig;
}
public static SolrIndexConfig makeIndexConfiguration (int options, final NamedList<?> args, String configName) {
if (args != null) {
if ("yes".equals(args.get("strip-namespaces"))) {
options |= STRIP_NAMESPACES;
}
if ("yes".equals(args.get("namespace-aware"))) {
options |= NAMESPACE_AWARE;
}
Object format = args.get("xml-format");
if (format != null) {
if ("tiny".equals(format)) {
options |= STORE_TINY_BINARY;
} else if (! "xml".equals(format)) {
throw new LuxException("invalid xml-format: " + format + ", must be one of: (xml,tiny)");
}
}
}
IndexConfiguration indexConfig = new IndexConfiguration(options);
return new SolrIndexConfig(indexConfig, args);
}
public void applyFieldConfiguration (NamedList<String> fields) {
if (fields != null) {
xpathFieldConfig = new NamedList<String>();
for (Entry<String,String> f : fields) {
xpathFieldConfig.add(f.getKey(), f.getValue());
}
}
}
private void applySolrConfig (@SuppressWarnings("rawtypes") final NamedList args) {
NamedList<?> aliases = (NamedList<?>) args.get ("fieldAliases");
if (aliases == null) {
return;
}
for (int i = 0; i < aliases.size(); i++) {
String name = aliases.getName(i);
Object value = aliases.getVal(i);
if ("xmlFieldName".equals(name)) {
indexConfig.renameField(indexConfig.getField(FieldRole.XML_STORE), value.toString());
logger.info("XML storage field name: {}", value.toString());
}
else if ("uriFieldName".equals(name)) {
logger.info("URI field name: {}", value.toString());
indexConfig.renameField(indexConfig.getField(FieldRole.URI), value.toString());
}
else if ("textFieldName".equals(name)) {
logger.info("XML text field name: {}", value.toString());
indexConfig.renameField(indexConfig.getField(FieldRole.XML_TEXT), value.toString());
}
}
@SuppressWarnings("unchecked")
NamedList<String> fields = (NamedList<String>) args.get("fields");
if (fields != null) {
applyFieldConfiguration(fields);
}
@SuppressWarnings("unchecked")
NamedList<String> namespaces = (NamedList<String>) args.get("namespaces");
if (namespaces != null) {
for (Entry<String,String> ns : namespaces) {
indexConfig.defineNamespaceMapping(ns.getKey(), ns.getValue());
}
}
}
public void inform(SolrCore core) {
schema = core.getLatestSchema();
// XML_STORE is not listed explicitly by the indexer
informField (indexConfig.getField(FieldRole.XML_STORE), core);
// This must be run before informField() registers default analyzers with the Schema
registerXmlTextFields();
for (FieldDefinition xmlField : indexConfig.getFields()) {
informField (xmlField, core);
}
if (xpathFieldConfig != null) {
addXPathFields();
}
SchemaField uniqueKeyField = schema.getUniqueKeyField();
if (uniqueKeyField == null) {
logger.error("{} schema does not define any unique field", core.getName());
} else if (! uniqueKeyField.getName().equals(indexConfig.getFieldName(FieldRole.URI))) {
logger.error("{} schema defines a different unique field than the uri field declared in lux configuration", core.getName());
}
// must call this after making changes to the field map:
schema.refreshAnalyzers();
}
private void informField (FieldDefinition xmlField, SolrCore core) {
Map<String,SchemaField> schemaFields = schema.getFields();
Map<String,FieldType> fieldTypes = schema.getFieldTypes();
String fieldName = xmlField.getName();
if (schemaFields.containsKey(fieldName) && xmlField.getType() != Type.TOKENS) {
// The Solr schema has a definition for this field, but it's not a TOKENS field:
// We're only interested in TOKENS fields here; these need to install their own special field type since they wrap the
// analyzer defined by the schema
return;
}
// look up the type of this field using the mapping in this class
FieldType fieldType = getFieldType(xmlField);
if (! fieldTypes.containsKey(fieldType.getTypeName())) {
// The Solr schema does not define this field type, so add it
logger.info("{} defining fieldType: {}", core.getName(), fieldType.getTypeName());
fieldTypes.put(fieldType.getTypeName(), fieldType);
} else {
fieldType = fieldTypes.get(fieldType.getTypeName());
}
// Add the field to the schema
logger.info(core.getName() + " defining field: {} of type {}", fieldName, fieldType.getTypeName());
schemaFields.put(fieldName, new SchemaField (fieldName, fieldType, xmlField.getSolrFieldProperties(), ""));
}
private void registerXmlTextFields() {
String xmlFieldName = indexConfig.getFieldName(FieldRole.XML_TEXT);
SchemaField schemaField = schema.getFieldOrNull(xmlFieldName);
Analyzer xmlAnalyzer = null;
Analyzer xmlQueryAnalyzer = null;
if (schemaField != null) {
xmlAnalyzer = schemaField.getType().getAnalyzer();
xmlQueryAnalyzer = schemaField.getType().getQueryAnalyzer();
if (xmlAnalyzer != null) {
for (FieldRole role : new FieldRole [ ] { FieldRole.XML_TEXT, FieldRole.ELEMENT_TEXT, FieldRole.ATTRIBUTE_TEXT }) {
FieldDefinition field = indexConfig.getField(role);
field.setAnalyzer(xmlAnalyzer); // this analyzer is used when indexing
field.setQueryAnalyzer(xmlQueryAnalyzer); // this analyzer is used when indexing
indexConfig.getFieldAnalyzers().put(field.getName(), xmlQueryAnalyzer); // this analyzer is used when parsing queries
}
}
}
for (CopyField copyField : schema.getCopyFieldsList(xmlFieldName)) {
// register fields copied from lux_text with the indexer so that we feed them an XdmNode
SchemaField destination = copyField.getDestination();
Analyzer analyzer = destination.getType().getAnalyzer();
if (analyzer == null) {
// can this happen?? what about the query analyzer? can that be null?
if (xmlAnalyzer != null) {
analyzer = xmlAnalyzer; // why would you copy it then?
} else {
analyzer = new DefaultAnalyzer();
}
}
// TODO: should there be additional element and attribute text fields as well?
XmlTextField xmlCopyField = new XmlTextField (destination.getName(), analyzer);
xmlCopyField.setQueryAnalyzer(analyzer);
indexConfig.addField(xmlCopyField);
}
}
/** Add the xpathFields to the indexConfig using information about the field drawn from the schema. */
private void addXPathFields() {
for (Entry<String,String> f : xpathFieldConfig) {
SchemaField field = schema.getField(f.getKey());
FieldType fieldType = field.getType();
if (fieldType == null) {
throw new SolrException(ErrorCode.SERVER_ERROR, "Field " + f.getKey() + " declared in lux config, but not defined in schema");
}
XPathField xpathField = new XPathField(f.getKey(), f.getValue(), fieldType.getAnalyzer(), field.stored() ? Store.YES : Store.NO, field);
indexConfig.addField(xpathField);
}
}
private FieldType getFieldType(FieldDefinition xmlField) {
// TODO - we should store a field type name in XmlField and just look that up instead
// of trying to infer from the analyzer
Analyzer analyzer = xmlField.getAnalyzer();
String fieldName = xmlField.getName();
if (analyzer == null) {
if (! (xmlField.isStored() == Store.YES)) {
throw new SolrException(ErrorCode.BAD_REQUEST, "invalid xml field: " + fieldName + "; no analyzer and not stored");
}
switch (xmlField.getType()) {
case STRING:
return new StoredStringField ();
case INT:
return new NamedIntField();
case LONG:
return new NamedLongField();
case BYTES:
return new NamedBinaryField();
default:
throw new SolrException (ErrorCode.BAD_REQUEST, "invalid stored field: " + fieldName + " with type: " + xmlField.getType());
}
}
if (xmlField.getType() == Type.TOKENS) {
return new FieldableField(xmlField);
}
if (analyzer == null || analyzer instanceof KeywordAnalyzer) {
return new StringField();
}
if (analyzer instanceof WhitespaceGapAnalyzer) {
return new PathField ();
}
throw new SolrException(ErrorCode.BAD_REQUEST, "invalid xml field: " + fieldName + "; unknown analyzer type: " + analyzer);
}
public IndexSchema getSchema () {
return schema;
}
// subclasses of built-in Solr field types exist purely so we can name them.
// Is that actually necessary?
class StoredStringField extends StrField {
StoredStringField () {
typeName = "lux_stored_string";
}
}
class NamedIntField extends TrieIntField {
public NamedIntField() {
typeName = "int";
}
}
class NamedLongField extends TrieLongField {
public NamedLongField() {
typeName = "long";
}
}
class NamedBinaryField extends BinaryField {
public NamedBinaryField() {
typeName = "binary";
}
}
class StringField extends StrField {
StringField () {
typeName = "string";
}
}
class PathField extends TextField {
PathField () {
typeName = "lux_text_ws";
setAnalyzer(new WhitespaceGapAnalyzer());
setQueryAnalyzer(new WhitespaceGapAnalyzer());
}
/*
* REVIEW: do we need this?
@Override
protected Field.Index getFieldIndex(SchemaField field, String internalVal) {
return Field.Index.ANALYZED;
}
*/
}
/**
* enable pass-through of a Fieldable to Solr; this enables analysis to be performed outside of Solr
*/
class FieldableField extends TextField {
FieldableField (FieldDefinition xmlField) {
typeName = xmlField.getName() + "-fieldable-type";
this.analyzer = xmlField.getAnalyzer();
this.queryAnalyzer = xmlField.getQueryAnalyzer();
properties &= ~STORED;
properties |= INDEXED|TOKENIZED;
}
@Override
public Field createField(SchemaField field, Object val, float boost) {
return (Field) val;
}
}
public IndexConfiguration getIndexConfig() {
return indexConfig;
}
@Override
public String getName() {
return SolrIndexConfig.class.getName();
}
@Override
public String getVersion() {
return "1.0";
}
@Override
public String getDescription() {
return "Lux index configuration";
}
@Override
public Category getCategory() {
return Category.OTHER;
}
@Override
public String getSource() {
return SOURCE_URL;
}
private static URL[] docs;
@Override
public URL[] getDocs() {
if (docs == null) {
try {
docs = new URL [] { new URL(SOURCE_URL) };
} catch (MalformedURLException e) { }
}
return docs;
}
@Override
public NamedList<?> getStatistics() {
return null;
}
}
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this file,
* You can obtain one at http://mozilla.org/MPL/2.0/. */