/** * Copyright (c) 2015 Lemur Consulting Ltd. * <p/> * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * <p/> * http://www.apache.org/licenses/LICENSE-2.0 * <p/> * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package uk.co.flax.biosolr.solr.update.processor; import org.apache.commons.lang.StringUtils; import org.apache.solr.common.SolrException; import org.apache.solr.common.SolrException.ErrorCode; import org.apache.solr.common.SolrInputDocument; import org.apache.solr.common.params.SolrParams; import org.apache.solr.common.util.NamedList; import org.apache.solr.core.CloseHook; import org.apache.solr.core.SolrCore; import org.apache.solr.request.SolrQueryRequest; import org.apache.solr.response.SolrQueryResponse; import org.apache.solr.schema.SchemaField; import org.apache.solr.update.AddUpdateCommand; import org.apache.solr.update.processor.UpdateRequestProcessor; import org.apache.solr.update.processor.UpdateRequestProcessorFactory; import org.apache.solr.util.DefaultSolrThreadFactory; import org.apache.solr.util.plugin.SolrCoreAware; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import uk.co.flax.biosolr.ontology.core.*; import uk.co.flax.biosolr.solr.ontology.*; import java.io.IOException; import java.util.concurrent.ScheduledThreadPoolExecutor; import java.util.concurrent.ThreadPoolExecutor; import java.util.concurrent.TimeUnit; /** * This is an update processor for adding ontology data to documents annotated * with ontology references. It expects the location of the ontology and the * field containing the ontology reference to be passed in as configuration * parameters, as well as a number of optional parameters. * * <p> * The full set of configuration options are: * </p> * <ul> * <li> * <b>enabled</b> - boolean value to enable/disable the plugin. Default: * <code>true</code>.</li> * <li> * <b>annotationField</b> [REQUIRED] - the field in the input document that * contains the annotation URI. This is used as the reference when looking up * details in the ontology.</li> * <li> * <b>ontologyURI</b> [REQUIRED] - the location of the ontology being * referenced. Eg. <code>http://www.ebi.ac.uk/efo/efo.owl</code> or * <code>file:///home/mlp/Downloads/efo.owl</code>.</li> * <li> * <b>labelField</b> - the field in your schema that should be used for the * annotation's label(s). Default: <code>label_t</code>.</li> * <li> * <b>uriFieldSuffix</b> - the suffix to use for referenced URI fields, such as * parent or child URI references. Default: <code>_uri_s</code>.</li> * <li> * <b>labelFieldSuffix</b> - the suffix to use for referenced label fields, such * as the labels for parent or child references. Default: <code>_labels_t</code> * .</li> * <li> * <b>childField</b> - the field to use for child document references. These are * direct (ie. single-step) relationships *down* the hierarchy. This will be * combined with the URI and label field suffixes, so the field names will be * `child_uri_s` and `child_labels_t` (for example). Default: <code>child</code> * .</li> * <li> * <b>parentField</b> - the field to use for parent document references. These * are direct relationships *up* the hierarchy. Field name follows the same * conventions as `childField`, above. Default: <code>parent</code>.</li> * <li> * <b>includeIndirect</b> - (boolean) should indirect parent/child relationships * also be indexed? If this is set to `true`, *all* ancestor and descendant * relationships will also be stored in the index. Default: <code>true</code>.</li> * <li> * <b>descendantsField</b> - the field to use for the full set of descendant * references. These are indirect relationships *down* the hierarchy. Field name * follows the same conventions as `childField`, above. Default: * <code>descendants</code>.</li> * <li> * <b>ancestorssField</b> - the field to use for the full set of ancestor * references. These are indirect relationships *up* the hierarchy. Field name * follows the same conventions as `childField`, above. Default: * <code>ancestors</code>.</li> * <li> * <b>includeRelations</b> (boolean) - should other relationships between nodes * (eg. "has disease location", "is part of") be indexed. The fields will be * named using the short form of the field name, followed by "_rel", * plus the URI and label field * suffixes - for example, <code>has_disease_location_rel_uris_s</code>, * <code>has_disease_location_rel_labels_t</code>. Default: <code>true</code>.</li> * <li> * <b>synonymsField</b> - the field which should be used to store synonyms. If * left empty, synonyms will not be indexed. Default: <code>synonyms_t</code>.</li> * <li> * <b>definitionField</b> - the field to use to store definitions. If left * empty, definitions will not be indexed. Default: <code>definition_t</code>.</li> * <li> * <b>configurationFile</b> - the path to a properties-style file containing * additional, ontology-specific configuration, such as the property annotation * to use for synonyms, definitions, etc. See below for the format of this file, * and the default values used when not defined. There is no default value for * this configuration option.</li> * </ul> * * <p> * The plugin attempts to use sensible defaults for the property annotations for * labels, synonyms and definitions. However, if the ontology being referenced * uses different annotations for these properties, you will need to specify * them in an external properties file, referenced by the * <code>configurationFile</code> config option described above. This follows * the standard Java properties file format, with the following options * (including default values): * </p> * * <pre> * label_properties = http://www.w3.org/2000/01/rdf-schema#label * definition_properties = http://purl.obolibrary.org/obo/IAO_0000115 * synonym_properties = http://www.geneontology.org/formats/oboInOwl#hasExactSynonym * ignore_properties = * </pre> * * <p> * All of the properties above can be used to specify multiple values. These should * be comma-separated - eg.: * </p> * <pre> * definition_properties = http://www.ebi.ac.uk/efo/definition,http://purl.obolibrary.org/obo/IAO_0000115 * </pre> * * <p>The <code>ignore_properties</code> property can be used to specify parts * of the hierarchy that should be ignored - references that are now obsolete, * for example.</p> * @author mlp */ public class OntologyUpdateProcessorFactory extends UpdateRequestProcessorFactory implements SolrCoreAware { private static final Logger LOGGER = LoggerFactory.getLogger(OntologyUpdateProcessorFactory.class); public static final long DELETE_CHECK_DELAY_MS = 2 * 60 * 1000; // 2 minutes private static final String ENABLED_PARAM = "enabled"; /* * Field configuration parameters */ private static final String ANNOTATION_FIELD_PARAM = "annotationField"; private static final String LABEL_FIELD_PARAM = "labelField"; private static final String URI_FIELD_SUFFIX_PARAM = "uriFieldSuffix"; private static final String LABEL_FIELD_SUFFIX_PARAM = "labelFieldSuffix"; private static final String CHILD_FIELD_PARAM = "childField"; private static final String PARENT_FIELD_PARAM = "parentField"; private static final String INCLUDE_INDIRECT_PARAM = "includeIndirect"; private static final String DESCENDANT_FIELD_PARAM = "descendantsField"; private static final String ANCESTOR_FIELD_PARAM = "ancestorsField"; private static final String INCLUDE_RELATIONS_PARAM = "includeRelations"; private static final String SYNONYMS_FIELD_PARAM = "synonymsField"; private static final String DEFINITION_FIELD_PARAM = "definitionField"; private static final String FIELDNAME_PREFIX_PARAM = "fieldPrefix"; private static final String PARENT_PATHS_PARAM = "includeParentPaths"; private static final String PARENT_PATHS_LABEL_PARAM = "includeParentPathLabels"; private static final String PARENT_PATHS_FIELD_PARAM = "parentPathsField"; /* * Default field values */ private static final String LABEL_FIELD_DEFAULT = "label_t"; private static final String URI_FIELD_SUFFIX = "_uris_s"; private static final String LABEL_FIELD_SUFFIX = "_labels_t"; private static final String CHILD_FIELD_DEFAULT = "child"; private static final String PARENT_FIELD_DEFAULT = "parent"; private static final String DESCENDANT_FIELD_DEFAULT = "descendants"; private static final String ANCESTOR_FIELD_DEFAULT = "ancestors"; private static final String SYNONYMS_FIELD_DEFAULT = "synonyms_t"; private static final String DEFINITION_FIELD_DEFAULT = "definition_t"; private static final String RELATION_FIELD_INDICATOR = "_rel"; private static final String PARENT_PATHS_FIELD_DEFAULT = "parent_paths_t"; private boolean enabled; private String annotationField; private String fieldPrefix; private String labelField; private String uriFieldSuffix; private String labelFieldSuffix; private String childUriField; private String childLabelField; private String parentUriField; private String parentLabelField; private boolean includeIndirect; private String descendantUriField; private String descendantLabelField; private String ancestorUriField; private String ancestorLabelField; private boolean includeRelations; private String synonymsField; private String definitionField; private boolean includeParentPaths; private boolean includeParentPathLabels; private String parentPathsField; private SolrOntologyHelperFactory helperFactory; private OntologyHelper helper; private ScheduledThreadPoolExecutor executor; @Override public void init(@SuppressWarnings("rawtypes") final NamedList args) { if (args != null) { SolrParams params = SolrParams.toSolrParams(args); this.enabled = params.getBool(ENABLED_PARAM, true); if (enabled) { // Helper factory validates ontology parameters this.helperFactory = new SolrOntologyHelperFactory(params); } this.annotationField = params.get(ANNOTATION_FIELD_PARAM); this.fieldPrefix = params.get(FIELDNAME_PREFIX_PARAM, annotationField + "_"); this.labelField = params.get(LABEL_FIELD_PARAM, fieldPrefix + LABEL_FIELD_DEFAULT); this.uriFieldSuffix = params.get(URI_FIELD_SUFFIX_PARAM, URI_FIELD_SUFFIX); this.labelFieldSuffix = params.get(LABEL_FIELD_SUFFIX_PARAM, LABEL_FIELD_SUFFIX); String childField = params.get(CHILD_FIELD_PARAM, fieldPrefix + CHILD_FIELD_DEFAULT); this.childUriField = childField + uriFieldSuffix; this.childLabelField = childField + labelFieldSuffix; String parentField = params.get(PARENT_FIELD_PARAM, fieldPrefix + PARENT_FIELD_DEFAULT); this.parentUriField = parentField + uriFieldSuffix; this.parentLabelField = parentField + labelFieldSuffix; this.includeIndirect = params.getBool(INCLUDE_INDIRECT_PARAM, true); String descendentField = params.get(DESCENDANT_FIELD_PARAM, fieldPrefix + DESCENDANT_FIELD_DEFAULT); this.descendantUriField = descendentField + uriFieldSuffix; this.descendantLabelField = descendentField + labelFieldSuffix; String ancestorField = params.get(ANCESTOR_FIELD_PARAM, fieldPrefix + ANCESTOR_FIELD_DEFAULT); this.ancestorUriField = ancestorField + uriFieldSuffix; this.ancestorLabelField = ancestorField + labelFieldSuffix; this.includeRelations = params.getBool(INCLUDE_RELATIONS_PARAM, true); this.synonymsField = params.get(SYNONYMS_FIELD_PARAM, fieldPrefix + SYNONYMS_FIELD_DEFAULT); this.definitionField = params.get(DEFINITION_FIELD_PARAM, fieldPrefix + DEFINITION_FIELD_DEFAULT); this.includeParentPaths = params.getBool(PARENT_PATHS_PARAM, false); this.includeParentPathLabels = params.getBool(PARENT_PATHS_LABEL_PARAM, false); this.parentPathsField = params.get(PARENT_PATHS_FIELD_PARAM, PARENT_PATHS_FIELD_DEFAULT); } } @Override public void inform(SolrCore core) { final SchemaField annoField = core.getLatestSchema().getFieldOrNull(getAnnotationField()); if (annoField == null) { throw new SolrException(ErrorCode.SERVER_ERROR, "Cannot use annotation field which does not exist in schema: " + getAnnotationField()); } initialiseOntologyCheckScheduler(core); } private void initialiseOntologyCheckScheduler(SolrCore core) { executor = new ScheduledThreadPoolExecutor(1, new DefaultSolrThreadFactory("ontologyUpdate"), (Runnable r, ThreadPoolExecutor e) -> LOGGER.warn("Skipping execution of '{}' using '{}'", r, e) ); executor.setExecuteExistingDelayedTasksAfterShutdownPolicy(false); executor.setContinueExistingPeriodicTasksAfterShutdownPolicy(false); // Add CloseHook to tidy up if core closes core.addCloseHook(new CloseHook() { @Override public void preClose(SolrCore core) { LOGGER.info("Triggering graceful shutdown of OntologyUpdate executor"); if (getHelper() != null) { disposeHelper(); } executor.shutdown(); } @Override public void postClose(SolrCore core) { if (executor.isTerminating()) { LOGGER.info("Forcing shutdown of OntologyUpdate executor"); executor.shutdownNow(); } } }); executor.scheduleAtFixedRate(new OntologyCheckRunnable(this), DELETE_CHECK_DELAY_MS, DELETE_CHECK_DELAY_MS, TimeUnit.MILLISECONDS); } public boolean isEnabled() { return enabled; } public String getAnnotationField() { return annotationField; } public String getLabelField() { return labelField; } public String getChildUriField() { return childUriField; } public String getChildLabelField() { return childLabelField; } public String getParentUriField() { return parentUriField; } public String getParentLabelField() { return parentLabelField; } public boolean isIncludeIndirect() { return includeIndirect; } public String getDescendantUriField() { return descendantUriField; } public String getDescendantLabelField() { return descendantLabelField; } public String getAncestorUriField() { return ancestorUriField; } public String getAncestorLabelField() { return ancestorLabelField; } public boolean isIncludeRelations() { return includeRelations; } public String getUriFieldSuffix() { return uriFieldSuffix; } public String getLabelFieldSuffix() { return labelFieldSuffix; } public String getSynonymsField() { return synonymsField; } public String getDefinitionField() { return definitionField; } public String getFieldPrefix() { return fieldPrefix; } public boolean includeSynonyms() { return StringUtils.isNotBlank(synonymsField); } public boolean includeDefinitions() { return StringUtils.isNotBlank(definitionField); } public boolean isIncludeParentPaths() { return includeParentPaths; } public boolean isIncludeParentPathLabels() { return includeParentPathLabels; } public String getParentPathsField() { return parentPathsField; } public synchronized OntologyHelper initialiseHelper() throws OntologyHelperException { if (helper == null) { helper = helperFactory.buildOntologyHelper(); } return helper; } public synchronized OntologyHelper getHelper() { return helper; } public synchronized void disposeHelper() { helper.dispose(); helper = null; } @Override public UpdateRequestProcessor getInstance(SolrQueryRequest req, SolrQueryResponse rsp, UpdateRequestProcessor next) { return new OntologyUpdateProcessor(next); } class OntologyUpdateProcessor extends UpdateRequestProcessor { public OntologyUpdateProcessor(UpdateRequestProcessor next) { super(next); } @Override public void processAdd(AddUpdateCommand cmd) throws IOException { if (isEnabled()) { try { // Look up ontology data for document OntologyHelper helper = initialiseHelper(); String iri = (String)cmd.getSolrInputDocument().getFieldValue(getAnnotationField()); if (StringUtils.isNotBlank(iri)) { OntologyData data = findOntologyData(helper, iri); if (data == null) { LOGGER.debug("Cannot find OWL class for IRI {}", iri); } else { addDataToSolrDoc(cmd.getSolrInputDocument(), data); } } } catch (OntologyHelperException e) { throw new SolrException(ErrorCode.SERVER_ERROR, "Cannot load ontology: " + e.getMessage()); } } // Run the next processor in the chain if (next != null) { next.processAdd(cmd); } } private OntologyData findOntologyData(OntologyHelper helper, String iri) { OntologyData data = null; try { data = new OntologyDataBuilder(helper, iri) .includeSynonyms(includeSynonyms()) .includeDefinitions(includeDefinitions()) .includeIndirect(isIncludeIndirect()) .includeRelations(isIncludeRelations()) .includeParentPaths(isIncludeParentPaths()) .includeParentPathLabels(isIncludeParentPathLabels()) .build(); } catch (OntologyHelperException e) { LOGGER.error("Problem building ontology data for {}: {}", iri, e.getMessage()); } return data; } private void addDataToSolrDoc(SolrInputDocument doc, OntologyData data) { doc.addField(getLabelField(), data.getLabels()); if (includeSynonyms() && data.hasSynonyms()) { doc.addField(getSynonymsField(), data.getSynonyms()); } if (includeDefinitions() && data.hasDefinitions()) { doc.addField(getDefinitionField(), data.getDefinitions()); } // Add child and parent URIs and labels doc.addField(getChildUriField(), data.getChildIris()); doc.addField(getChildLabelField(), data.getChildLabels()); doc.addField(getParentUriField(), data.getParentIris()); doc.addField(getParentLabelField(), data.getParentLabels()); if (isIncludeIndirect()) { // Add descendant and ancestor URIs and labels doc.addField(getDescendantUriField(), data.getDescendantIris()); doc.addField(getDescendantLabelField(), data.getDescendantLabels()); doc.addField(getAncestorUriField(), data.getAncestorIris()); doc.addField(getAncestorLabelField(), data.getAncestorLabels()); } if (isIncludeRelations()) { for (String relation : data.getRelationIris().keySet()) { doc.addField(buildRelationUriFieldName(relation), data.getRelationIris().get(relation)); doc.addField(buildRelationLabelFieldName(relation), data.getRelationLabels().get(relation)); } } if (isIncludeParentPaths()) { doc.addField(getParentPathsField(), data.getParentPaths()); } } private String buildRelationUriFieldName(String relation) { return normalizeFieldName(getFieldPrefix() + relation + RELATION_FIELD_INDICATOR + getUriFieldSuffix()); } private String buildRelationLabelFieldName(String relation) { return normalizeFieldName(getFieldPrefix() + relation + RELATION_FIELD_INDICATOR + getLabelFieldSuffix()); } private String normalizeFieldName(String fieldName) { return fieldName.replaceAll("[^A-Za-z0-9]+", "_"); } } private final class OntologyCheckRunnable implements Runnable { final OntologyUpdateProcessorFactory updateProcessor; public OntologyCheckRunnable(OntologyUpdateProcessorFactory processor) { this.updateProcessor = processor; } @Override public void run() { OntologyHelper helper = updateProcessor.getHelper(); if (helper != null) { // Check if the last call time was longer ago than the maximum if (System.currentTimeMillis() - DELETE_CHECK_DELAY_MS > helper.getLastCallTime()) { // Assume helper is out of use - dispose of it to allow memory to be freed updateProcessor.disposeHelper(); } } } } }