package org.apache.solr.uima.processor;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.solr.common.SolrException;
import org.apache.solr.common.SolrException.ErrorCode;
import org.apache.solr.common.SolrInputDocument;
import org.apache.solr.core.SolrCore;
import org.apache.solr.schema.SchemaField;
import org.apache.solr.uima.processor.SolrUIMAConfiguration.MapField;
import org.apache.lucene.analysis.uima.ae.AEProvider;
import org.apache.lucene.analysis.uima.ae.AEProviderFactory;
import org.apache.solr.update.AddUpdateCommand;
import org.apache.solr.update.processor.UpdateRequestProcessor;
import org.apache.uima.analysis_engine.AnalysisEngine;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
import org.apache.uima.jcas.JCas;
import org.apache.uima.resource.ResourceInitializationException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.IOException;
import java.util.Map;
/**
* Update document(s) to be indexed with UIMA extracted information
*
*/
public class UIMAUpdateRequestProcessor extends UpdateRequestProcessor {
private final Logger log = LoggerFactory.getLogger(UIMAUpdateRequestProcessor.class);
SolrUIMAConfiguration solrUIMAConfiguration;
private AEProvider aeProvider;
private SolrCore solrCore;
public UIMAUpdateRequestProcessor(UpdateRequestProcessor next, SolrCore solrCore,
SolrUIMAConfiguration config) {
super(next);
initialize(solrCore, config);
}
private void initialize(SolrCore solrCore, SolrUIMAConfiguration config) {
this.solrCore = solrCore;
solrUIMAConfiguration = config;
aeProvider = AEProviderFactory.getInstance().getAEProvider(solrCore.getName(),
solrUIMAConfiguration.getAePath(), solrUIMAConfiguration.getRuntimeParameters());
}
@Override
public void processAdd(AddUpdateCommand cmd) throws IOException {
String text = null;
try {
/* get Solr document */
SolrInputDocument solrInputDocument = cmd.getSolrInputDocument();
/* get the fields to analyze */
String[] texts = getTextsToAnalyze(solrInputDocument);
for (int i = 0; i < texts.length; i++) {
text = texts[i];
if (text != null && text.length()>0) {
/* process the text value */
JCas jcas = processText(text);
UIMAToSolrMapper uimaToSolrMapper = new UIMAToSolrMapper(solrInputDocument, jcas);
/* get field mapping from config */
Map<String, Map<String, MapField>> typesAndFeaturesFieldsMap = solrUIMAConfiguration
.getTypesFeaturesFieldsMapping();
/* map type features on fields */
for (String typeFQN : typesAndFeaturesFieldsMap.keySet()) {
uimaToSolrMapper.map(typeFQN, typesAndFeaturesFieldsMap.get(typeFQN));
}
}
}
} catch (Exception e) {
String logField = solrUIMAConfiguration.getLogField();
if(logField == null){
SchemaField uniqueKeyField = solrCore.getSchema().getUniqueKeyField();
if(uniqueKeyField != null){
logField = uniqueKeyField.getName();
}
}
String optionalFieldInfo = logField == null ? "." :
new StringBuilder(". ").append(logField).append("=")
.append((String)cmd.getSolrInputDocument().getField(logField).getValue())
.append(", ").toString();
int len;
String debugString;
if (text != null && text.length() > 0) {
len = Math.min(text.length(), 100);
debugString = new StringBuilder(" text=\"").append(text.substring(0, len)).append("...\"").toString();
}
else {
debugString = " null text";
}
if (solrUIMAConfiguration.isIgnoreErrors()) {
log.warn(new StringBuilder("skip the text processing due to ")
.append(e.getLocalizedMessage()).append(optionalFieldInfo)
.append(debugString).toString());
} else {
throw new SolrException(ErrorCode.SERVER_ERROR,
new StringBuilder("processing error ")
.append(e.getLocalizedMessage()).append(optionalFieldInfo)
.append(debugString).toString(), e);
}
}
super.processAdd(cmd);
}
/*
* get the texts to analyze from the corresponding fields
*/
private String[] getTextsToAnalyze(SolrInputDocument solrInputDocument) {
String[] fieldsToAnalyze = solrUIMAConfiguration.getFieldsToAnalyze();
boolean merge = solrUIMAConfiguration.isFieldsMerging();
String[] textVals;
if (merge) {
StringBuilder unifiedText = new StringBuilder("");
for (int i = 0; i < fieldsToAnalyze.length; i++) {
unifiedText.append(String.valueOf(solrInputDocument.getFieldValue(fieldsToAnalyze[i])));
}
textVals = new String[1];
textVals[0] = unifiedText.toString();
} else {
textVals = new String[fieldsToAnalyze.length];
for (int i = 0; i < fieldsToAnalyze.length; i++) {
textVals[i] = String.valueOf(solrInputDocument.getFieldValue(fieldsToAnalyze[i]));
}
}
return textVals;
}
/* process a field value executing UIMA the CAS containing it as document text */
private JCas processText(String textFieldValue) throws ResourceInitializationException,
AnalysisEngineProcessException {
log.info(new StringBuffer("Analyzing text").toString());
/* get the UIMA analysis engine */
AnalysisEngine ae = aeProvider.getAE();
/* create a JCas which contain the text to analyze */
JCas jcas = ae.newJCas();
jcas.setDocumentText(textFieldValue);
/* perform analysis on text field */
ae.process(jcas);
log.info(new StringBuilder("Text processing completed").toString());
return jcas;
}
}