package org.apache.solr.uima.processor;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import java.util.Map;
import java.util.Map.Entry;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.SolrException.ErrorCode;
import org.apache.solr.common.SolrInputDocument;
import org.apache.solr.schema.SchemaField;
import org.apache.solr.uima.processor.SolrUIMAConfiguration.MapField;
import org.apache.solr.update.AddUpdateCommand;
import org.apache.solr.update.processor.UpdateRequestProcessor;
import org.apache.uima.analysis_engine.AnalysisEngine;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
import org.apache.uima.jcas.JCas;
import org.apache.uima.resource.ResourceInitializationException;
import org.apache.uima.util.JCasPool;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* Update document(s) to be indexed with UIMA extracted information
*
*/
public class UIMAUpdateRequestProcessor extends UpdateRequestProcessor {
private final Logger log = LoggerFactory
.getLogger(UIMAUpdateRequestProcessor.class);
SolrUIMAConfiguration solrUIMAConfiguration;
private AnalysisEngine ae;
private JCasPool pool;
public UIMAUpdateRequestProcessor(UpdateRequestProcessor next,
String coreName, SolrUIMAConfiguration config, AnalysisEngine ae,
JCasPool pool) {
super(next);
this.ae = ae;
this.pool = pool;
solrUIMAConfiguration = config;
}
@Override
public void processAdd(AddUpdateCommand cmd) throws IOException {
String text = null;
try {
/* get Solr document */
SolrInputDocument solrInputDocument = cmd.getSolrInputDocument();
/* get the fields to analyze */
String[] texts = getTextsToAnalyze(solrInputDocument);
for (String currentText : texts) {
text = currentText;
if (text != null && text.length() > 0) {
/* create a JCas which contain the text to analyze */
JCas jcas = pool.getJCas(0);
try {
/* process the text value */
processText(text, jcas);
UIMAToSolrMapper uimaToSolrMapper = new UIMAToSolrMapper(
solrInputDocument, jcas);
/* get field mapping from config */
Map<String,Map<String,MapField>> typesAndFeaturesFieldsMap = solrUIMAConfiguration
.getTypesFeaturesFieldsMapping();
/* map type features on fields */
for (Entry<String,Map<String,MapField>> entry : typesAndFeaturesFieldsMap
.entrySet()) {
uimaToSolrMapper.map(entry.getKey(), entry.getValue());
}
} finally {
pool.releaseJCas(jcas);
}
}
}
} catch (Exception e) {
String logField = solrUIMAConfiguration.getLogField();
if (logField == null) {
SchemaField uniqueKeyField = cmd.getReq().getSchema()
.getUniqueKeyField();
if (uniqueKeyField != null) {
logField = uniqueKeyField.getName();
}
}
String optionalFieldInfo = logField == null ? "."
: new StringBuilder(". ")
.append(logField)
.append("=")
.append(
(String) cmd.getSolrInputDocument().getField(logField)
.getValue()).append(", ").toString();
int len;
String debugString;
if (text != null && text.length() > 0) {
len = Math.min(text.length(), 100);
debugString = new StringBuilder(" text=\"")
.append(text.substring(0, len)).append("...\"").toString();
} else {
debugString = " null text";
}
if (solrUIMAConfiguration.isIgnoreErrors()) {
log.warn(
"skip the text processing due to {}",
new StringBuilder().append(e.getLocalizedMessage())
.append(optionalFieldInfo).append(debugString));
} else {
throw new SolrException(ErrorCode.SERVER_ERROR, new StringBuilder(
"processing error ").append(e.getLocalizedMessage())
.append(optionalFieldInfo).append(debugString).toString(), e);
}
}
super.processAdd(cmd);
}
/*
* get the texts to analyze from the corresponding fields
*/
private String[] getTextsToAnalyze(SolrInputDocument solrInputDocument) {
String[] fieldsToAnalyze = solrUIMAConfiguration.getFieldsToAnalyze();
boolean merge = solrUIMAConfiguration.isFieldsMerging();
String[] textVals;
if (merge) {
StringBuilder unifiedText = new StringBuilder("");
for (String aFieldsToAnalyze : fieldsToAnalyze) {
unifiedText.append(String.valueOf(solrInputDocument
.getFieldValue(aFieldsToAnalyze)));
}
textVals = new String[1];
textVals[0] = unifiedText.toString();
} else {
textVals = new String[fieldsToAnalyze.length];
for (int i = 0; i < fieldsToAnalyze.length; i++) {
textVals[i] = String.valueOf(solrInputDocument
.getFieldValue(fieldsToAnalyze[i]));
}
}
return textVals;
}
/*
* process a field value executing UIMA on the JCas containing it as document
* text
*/
private void processText(String textFieldValue, JCas jcas)
throws ResourceInitializationException, AnalysisEngineProcessException {
if (log.isDebugEnabled()) {
log.debug("Analyzing text");
}
jcas.setDocumentText(textFieldValue);
/* perform analysis on text field */
ae.process(jcas);
if (log.isDebugEnabled()) {
log.debug("Text processing completed");
}
}
}