/* * Copyright 2015 * Ubiquitous Knowledge Processing (UKP) Lab * Technische Universität Darmstadt * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package de.tudarmstadt.ukp.dkpro.core.io.solr; import de.tudarmstadt.ukp.dkpro.core.api.parameter.ComponentParameters; import org.apache.solr.client.solrj.SolrClient; import org.apache.solr.client.solrj.SolrServerException; import org.apache.solr.client.solrj.impl.ConcurrentUpdateSolrClient; import org.apache.solr.client.solrj.response.UpdateResponse; import org.apache.solr.common.SolrInputDocument; import org.apache.uima.UimaContext; import org.apache.uima.analysis_engine.AnalysisEngineProcessException; import org.apache.uima.fit.component.JCasConsumer_ImplBase; import org.apache.uima.fit.descriptor.ConfigurationParameter; import org.apache.uima.jcas.JCas; import org.apache.uima.resource.ResourceInitializationException; import java.io.IOException; /** * This class implements a basic SolrWriter. Specific writers should define a subclass that * overwrites the {@code generateSolrDocument()} method to take custom fields into account. * <p> * The class initializes a SolrServer instance, and calls {@code generateSolrDocument()} for each * incoming CAS, and adds the result to the Solr server. A commit is executed when all documents are * processed. * * * */ public abstract class SolrWriter_ImplBase extends JCasConsumer_ImplBase { /** * Define whether existing documents with same ID are updated (true) of overwritten (false)? * Default: true (update). */ public static final String PARAM_UPDATE = "update"; @ConfigurationParameter(name = PARAM_UPDATE, mandatory = true, defaultValue = "true") private boolean update; /** * Solr server URL string in the form {@code <prot>://<host>:<port>/<path>}, e.g. * {@code http://localhost:8983/solr/collection1} */ public static final String PARAM_TARGET_LOCATION = ComponentParameters.PARAM_TARGET_LOCATION; @ConfigurationParameter(name = PARAM_TARGET_LOCATION, mandatory = true) private String targetLocation; /** * The buffer size before the documents are sent to the server (default: 10000). */ public static final String PARAM_QUEUE_SIZE = "queueSize"; @ConfigurationParameter(name = PARAM_QUEUE_SIZE, mandatory = true, defaultValue = "10000") private int queueSize; /** * The number of background numThreads used to empty the queue. Default: 1. */ public static final String PARAM_NUM_THREADS = ComponentParameters.PARAM_NUM_THREADS; @ConfigurationParameter(name = PARAM_NUM_THREADS, mandatory = true, defaultValue = "1") private int numThreads; /** * When committing to the index, i.e. when all documents are processed, block until index * changes are flushed to disk? Default: true. */ public static final String PARAM_WAIT_FLUSH = "waitFlush"; @ConfigurationParameter(name = PARAM_WAIT_FLUSH, mandatory = true, defaultValue = "true") private boolean waitFlush; /** * When committing to the index, i.e. when all documents are processed, block until a new * searcher is opened and registered as the main query searcher, making the changes visible? * Default: true. */ public static final String PARAM_WAIT_SEARCHER = "waitSearcher"; @ConfigurationParameter(name = PARAM_WAIT_SEARCHER, mandatory = true, defaultValue = "true") private boolean waitSearcher; /** * The name of the text field in the Solr schema (default: "text"). */ public static final String PARAM_TEXT_FIELD = "textField"; @ConfigurationParameter(name = PARAM_TEXT_FIELD, mandatory = true, defaultValue = "text") private String textField; /** * The name of the id field in the Solr schema (default: "id"). */ public static final String PARAM_ID_FIELD = "solrIdField"; @ConfigurationParameter(name = PARAM_ID_FIELD, mandatory = true, defaultValue = "id") private String idField; /** * If set to true, the index is optimized once all documents are uploaded. Default is false. */ public static final String PARAM_OPTIMIZE_INDEX = "optimizeIndex"; @ConfigurationParameter(name = PARAM_OPTIMIZE_INDEX, mandatory = true, defaultValue = "false") private boolean optimizeIndex; private SolrClient solrClient; @Override public void initialize(UimaContext context) throws ResourceInitializationException { super.initialize(context); getLogger().info( String.format("Using Solr server at %s.%nQueue size: %d\tThreads: %d%n", targetLocation, queueSize, numThreads)); solrClient = new ConcurrentUpdateSolrClient.Builder(targetLocation) .withQueueSize(queueSize) .withThreadCount(numThreads) .build(); try { int status = solrClient.ping().getStatus(); if (status != 0) { throw new ResourceInitializationException( "Server error. Response status: " + status, new Integer[] { status }); } } catch (SolrServerException | IOException e) { throw new ResourceInitializationException(e); } } @Override public void process(JCas aJCas) throws AnalysisEngineProcessException { try { SolrInputDocument solrDocument = generateSolrDocument(aJCas); solrClient.add(solrDocument); } catch (IOException | SolrServerException e) { throw new AnalysisEngineProcessException(e); } } @Override public void collectionProcessComplete() throws AnalysisEngineProcessException { super.collectionProcessComplete(); try { UpdateResponse response = solrClient.commit(waitFlush, waitSearcher); getLogger().info(String.format("Solr server at '%s' responded: %s", targetLocation, response.toString())); if (optimizeIndex) { getLogger().info("Starting index optimization..."); solrClient.optimize(waitFlush, waitSearcher); getLogger().info(String.format("Solr server at '%s' responded: %s", targetLocation, response.toString())); } solrClient.close(); } catch (SolrServerException | IOException e) { throw new AnalysisEngineProcessException(e); } } /** * Perform updates if added documents already exist? * * @return true if updates are to be performed rather than overwriting existing documents */ public boolean update() { return update; } /** * * @return the name of the Solr text field (e.g. "text") */ public String getTextField() { return textField; } /** * * @return the name of the Solr ID field (e.g. "id") */ public String getIdField() { return idField; } /** * * @return the SolrClient */ public SolrClient getSolrClient() { return solrClient; } abstract protected SolrInputDocument generateSolrDocument(JCas aJCas) throws AnalysisEngineProcessException; }