/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.uima.processor;
import java.io.IOException;
import java.lang.invoke.MethodHandles;
import java.util.Map;
import java.util.Map.Entry;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.SolrException.ErrorCode;
import org.apache.solr.common.SolrInputDocument;
import org.apache.solr.schema.SchemaField;
import org.apache.solr.uima.processor.SolrUIMAConfiguration.MapField;
import org.apache.solr.update.AddUpdateCommand;
import org.apache.solr.update.processor.UpdateRequestProcessor;
import org.apache.uima.analysis_engine.AnalysisEngine;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
import org.apache.uima.jcas.JCas;
import org.apache.uima.resource.ResourceInitializationException;
import org.apache.uima.util.JCasPool;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* Update document(s) to be indexed with UIMA extracted information
*
*/
public class UIMAUpdateRequestProcessor extends UpdateRequestProcessor {
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
private SolrUIMAConfiguration solrUIMAConfiguration;
private AnalysisEngine ae;
private JCasPool pool;
public UIMAUpdateRequestProcessor(UpdateRequestProcessor next,
String coreName, SolrUIMAConfiguration config, AnalysisEngine ae,
JCasPool pool) {
super(next);
this.ae = ae;
this.pool = pool;
solrUIMAConfiguration = config;
}
@Override
public void processAdd(AddUpdateCommand cmd) throws IOException {
String text = null;
try {
/* get Solr document */
SolrInputDocument solrInputDocument = cmd.getSolrInputDocument();
/* get the fields to analyze */
String[] texts = getTextsToAnalyze(solrInputDocument);
for (String currentText : texts) {
text = currentText;
if (text != null && text.length() > 0) {
/* create a JCas which contain the text to analyze */
JCas jcas = pool.getJCas(0);
try {
/* process the text value */
processText(text, jcas);
UIMAToSolrMapper uimaToSolrMapper = new UIMAToSolrMapper(
solrInputDocument, jcas);
/* get field mapping from config */
Map<String,Map<String,MapField>> typesAndFeaturesFieldsMap = solrUIMAConfiguration
.getTypesFeaturesFieldsMapping();
/* map type features on fields */
for (Entry<String,Map<String,MapField>> entry : typesAndFeaturesFieldsMap
.entrySet()) {
uimaToSolrMapper.map(entry.getKey(), entry.getValue());
}
} finally {
pool.releaseJCas(jcas);
}
}
}
} catch (Exception e) {
String logField = solrUIMAConfiguration.getLogField();
if (logField == null) {
SchemaField uniqueKeyField = cmd.getReq().getSchema()
.getUniqueKeyField();
if (uniqueKeyField != null) {
logField = uniqueKeyField.getName();
}
}
String optionalFieldInfo = logField == null ? "." : ". " + logField + "=" + cmd.getSolrInputDocument().
getField(logField).getValue() + ", ";
int len;
String debugString;
if (text != null && text.length() > 0) {
len = Math.min(text.length(), 100);
debugString = " text=\"" + text.substring(0, len) + "...\"";
} else {
debugString = " null text";
}
if (solrUIMAConfiguration.isIgnoreErrors()) {
log.warn(
"skip the text processing due to {}",
new StringBuilder().append(e.getLocalizedMessage())
.append(optionalFieldInfo).append(debugString));
} else {
throw new SolrException(ErrorCode.SERVER_ERROR, "processing error " + e.getLocalizedMessage() +
optionalFieldInfo + debugString, e);
}
}
super.processAdd(cmd);
}
/*
* get the texts to analyze from the corresponding fields
*/
private String[] getTextsToAnalyze(SolrInputDocument solrInputDocument) {
String[] fieldsToAnalyze = solrUIMAConfiguration.getFieldsToAnalyze();
boolean merge = solrUIMAConfiguration.isFieldsMerging();
String[] textVals;
if (merge) {
StringBuilder unifiedText = new StringBuilder("");
for (String aFieldsToAnalyze : fieldsToAnalyze) {
if (solrInputDocument.getFieldValues(aFieldsToAnalyze) != null) {
Object[] Values = solrInputDocument.getFieldValues(aFieldsToAnalyze).toArray();
for (Object Value : Values) {
if (unifiedText.length() > 0) {
unifiedText.append(' ');
}
unifiedText.append(Value.toString());
}
}
}
textVals = new String[1];
textVals[0] = unifiedText.toString();
} else {
textVals = new String[fieldsToAnalyze.length];
for (int i = 0; i < fieldsToAnalyze.length; i++) {
if (solrInputDocument.getFieldValues(fieldsToAnalyze[i]) != null) {
Object[] Values = solrInputDocument.getFieldValues(fieldsToAnalyze[i]).toArray();
for (Object Value : Values) {
textVals[i] += Value.toString();
}
}
}
}
return textVals;
}
/*
* process a field value executing UIMA on the JCas containing it as document
* text
*/
private void processText(String textFieldValue, JCas jcas)
throws ResourceInitializationException, AnalysisEngineProcessException {
if (log.isDebugEnabled()) {
log.debug("Analyzing text");
}
jcas.setDocumentText(textFieldValue);
/* perform analysis on text field */
ae.process(jcas);
if (log.isDebugEnabled()) {
log.debug("Text processing completed");
}
}
/**
* @return the configuration object for this request processor
*/
public SolrUIMAConfiguration getConfiguration()
{
return solrUIMAConfiguration;
}
}