package org.apache.solr.handler.dataimport;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.http.client.HttpClient;
import org.apache.solr.client.solrj.SolrQuery;
import org.apache.solr.client.solrj.SolrServer;
import org.apache.solr.client.solrj.SolrServerException;
import org.apache.solr.client.solrj.impl.HttpClientUtil;
import org.apache.solr.client.solrj.impl.HttpSolrServer;
import org.apache.solr.client.solrj.impl.XMLResponseParser;
import org.apache.solr.client.solrj.response.QueryResponse;
import org.apache.solr.common.SolrDocument;
import org.apache.solr.common.SolrDocumentList;
import org.apache.solr.common.params.CommonParams;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.Collection;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import static org.apache.solr.handler.dataimport.DataImportHandlerException.SEVERE;
import static org.apache.solr.handler.dataimport.DataImportHandlerException.wrapAndThrow;
/**
* <p>
* An implementation of {@link EntityProcessor} which fetches values from a
* separate Solr implementation using the SolrJ client library. Yield a row per
* Solr document.
* </p>
* <p>
* Limitations:
* All configuration is evaluated at the beginning;
* Only one query is walked;
* </p>
*/
public class SolrEntityProcessor extends EntityProcessorBase {
private static final Logger LOG = LoggerFactory.getLogger(SolrEntityProcessor.class);
public static final String SOLR_SERVER = "url";
public static final String QUERY = "query";
public static final String TIMEOUT = "timeout";
public static final int TIMEOUT_SECS = 5 * 60; // 5 minutes
public static final int ROWS_DEFAULT = 50;
private SolrServer solrServer = null;
private String queryString;
private int rows = ROWS_DEFAULT;
private String[] filterQueries;
private String[] fields;
private String requestHandler;// 'qt' param
private int timeout = TIMEOUT_SECS;
/**
* Factory method that returns a {@link HttpClient} instance used for interfacing with a source Solr service.
* One can override this method to return a differently configured {@link HttpClient} instance.
* For example configure https and http authentication.
*
* @return a {@link HttpClient} instance used for interfacing with a source Solr service
*/
protected HttpClient getHttpClient() {
return HttpClientUtil.createClient(null);
}
@Override
protected void firstInit(Context context) {
super.firstInit(context);
try {
String serverPath = context.getResolvedEntityAttribute(SOLR_SERVER);
if (serverPath == null) {
throw new DataImportHandlerException(DataImportHandlerException.SEVERE,
"SolrEntityProcessor: parameter 'url' is required");
}
// TODO: we should close this client!
HttpClient client = getHttpClient();
URL url = new URL(serverPath);
// (wt="javabin|xml") default is javabin
if ("xml".equals(context.getResolvedEntityAttribute(CommonParams.WT))) {
// TODO: it doesn't matter for this impl when passing a client currently, but we should shutdown this!
solrServer = new HttpSolrServer(url.toExternalForm(), client, new XMLResponseParser());
LOG.info("using XMLResponseParser");
} else {
// TODO: it doesn't matter for this impl when passing a client currently, but we should shutdown this!
solrServer = new HttpSolrServer(url.toExternalForm(), client);
LOG.info("using BinaryResponseParser");
}
} catch (MalformedURLException e) {
throw new DataImportHandlerException(DataImportHandlerException.SEVERE, e);
}
}
@Override
public Map<String,Object> nextRow() {
buildIterator();
return getNext();
}
/**
* The following method changes the rowIterator mutable field. It requires
* external synchronization.
*/
private void buildIterator() {
if (rowIterator != null) {
SolrDocumentListIterator documentListIterator = (SolrDocumentListIterator) rowIterator;
if (!documentListIterator.hasNext() && documentListIterator.hasMoreRows()) {
SolrDocumentList solrDocumentList = doQuery(documentListIterator
.getStart() + documentListIterator.getSize());
if (solrDocumentList != null) {
rowIterator = new SolrDocumentListIterator(solrDocumentList);
}
}
} else {
SolrDocumentList solrDocumentList = doQuery(0);
if (solrDocumentList != null) {
rowIterator = new SolrDocumentListIterator(solrDocumentList);
}
return;
}
}
protected SolrDocumentList doQuery(int start) {
this.queryString = context.getResolvedEntityAttribute(QUERY);
if (this.queryString == null) {
throw new DataImportHandlerException(
DataImportHandlerException.SEVERE,
"SolrEntityProcessor: parameter 'query' is required"
);
}
String rowsP = context.getResolvedEntityAttribute(CommonParams.ROWS);
if (rowsP != null) {
rows = Integer.parseInt(rowsP);
}
String fqAsString = context.getResolvedEntityAttribute(CommonParams.FQ);
if (fqAsString != null) {
this.filterQueries = fqAsString.split(",");
}
String fieldsAsString = context.getResolvedEntityAttribute(CommonParams.FL);
if (fieldsAsString != null) {
this.fields = fieldsAsString.split(",");
}
this.requestHandler = context.getResolvedEntityAttribute(CommonParams.QT);
String timeoutAsString = context.getResolvedEntityAttribute(TIMEOUT);
if (timeoutAsString != null) {
this.timeout = Integer.parseInt(timeoutAsString);
}
SolrQuery solrQuery = new SolrQuery(queryString);
solrQuery.setRows(rows);
solrQuery.setStart(start);
if (fields != null) {
for (String field : fields) {
solrQuery.addField(field);
}
}
solrQuery.setRequestHandler(requestHandler);
solrQuery.setFilterQueries(filterQueries);
solrQuery.setTimeAllowed(timeout * 1000);
QueryResponse response = null;
try {
response = solrServer.query(solrQuery);
} catch (SolrServerException e) {
if (ABORT.equals(onError)) {
wrapAndThrow(SEVERE, e);
} else if (SKIP.equals(onError)) {
wrapAndThrow(DataImportHandlerException.SKIP_ROW, e);
}
}
return response == null ? null : response.getResults();
}
private static class SolrDocumentListIterator implements Iterator<Map<String,Object>> {
private final int start;
private final int size;
private final long numFound;
private final Iterator<SolrDocument> solrDocumentIterator;
public SolrDocumentListIterator(SolrDocumentList solrDocumentList) {
this.solrDocumentIterator = solrDocumentList.iterator();
this.numFound = solrDocumentList.getNumFound();
// SolrQuery has the start field of type int while SolrDocumentList of
// type long. We are always querying with an int so we can't receive a
// long as output. That's the reason why the following cast seems safe
this.start = (int) solrDocumentList.getStart();
this.size = solrDocumentList.size();
}
@Override
public boolean hasNext() {
return solrDocumentIterator.hasNext();
}
@Override
public Map<String,Object> next() {
SolrDocument solrDocument = solrDocumentIterator.next();
HashMap<String,Object> map = new HashMap<>();
Collection<String> fields = solrDocument.getFieldNames();
for (String field : fields) {
Object fieldValue = solrDocument.getFieldValue(field);
map.put(field, fieldValue);
}
return map;
}
public int getStart() {
return start;
}
public int getSize() {
return size;
}
public boolean hasMoreRows() {
return numFound > start + size;
}
@Override
public void remove() {
throw new UnsupportedOperationException();
}
}
}