/**
Copyright (C) SYSTAP, LLC DBA Blazegraph 2006-2016. All rights reserved.
Contact:
SYSTAP, LLC DBA Blazegraph
2501 Calvert ST NW #106
Washington, DC 20008
licenses@blazegraph.com
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
package com.bigdata.service.fts.impl;
import java.net.URLDecoder;
import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.TimeUnit;
import org.apache.log4j.Logger;
import org.codehaus.jettison.json.JSONArray;
import org.codehaus.jettison.json.JSONException;
import org.codehaus.jettison.json.JSONObject;
import org.eclipse.jetty.client.HttpClient;
import org.eclipse.jetty.client.api.ContentResponse;
import org.eclipse.jetty.client.api.Request;
import org.eclipse.jetty.client.util.FutureResponseListener;
import com.bigdata.service.fts.FTS;
import com.bigdata.service.fts.FulltextSearchException;
import com.bigdata.service.fts.FulltextSearchHit;
import com.bigdata.service.fts.FulltextSearchHiterator;
import com.bigdata.service.fts.IFulltextSearch;
/**
* Implementation based on the built-in keyword search capabilities for bigdata.
*
* @author <a href="mailto:ms@metaphacts.com">Michael Schmidt</a>
* @version $Id$
*/
public class SolrFulltextSearchImpl implements
IFulltextSearch<FulltextSearchHit> {
final private static transient Logger log = Logger
.getLogger(SolrFulltextSearchImpl.class);
@Override
public FulltextSearchHiterator<FulltextSearchHit> search(
com.bigdata.service.fts.IFulltextSearch.FulltextSearchQuery query,
HttpClient client) {
if (query != null) {
try {
FulltextSearchHit[] hits = queryIndex(query, client);
return new FulltextSearchHiterator<FulltextSearchHit>(hits);
} catch (Exception e) {
throw new FulltextSearchException(
"Error execution fulltext search: " + e);
}
}
return new FulltextSearchHiterator<FulltextSearchHit>(
new FulltextSearchHit[] {});
}
@SuppressWarnings("deprecation")
private FulltextSearchHit[] queryIndex(
FulltextSearchQuery query, HttpClient httpClient)
throws Exception {
if (httpClient.isStopped()) {
throw new FulltextSearchException("The client has been stopped");
}
Request request = httpClient.newRequest(query.getEndpoint());
// Limit response content buffer to 512 KiB
FutureResponseListener listener =
new FutureResponseListener(request, 10 * 1024 * 1024); // 100 MB size
request.param("q", query.getQuery());
request.param("wt", "json");
final String searchParams = query.getParams();
if (searchParams!=null && !searchParams.isEmpty()) {
final String[] params = searchParams.split("&");
for (int i=0; i<params.length; i++) {
if (params[i]!=null) {
String kv[] = params[i].split("=");
if (kv.length==2 && kv[0]!=null && !(kv[0].isEmpty())) {
if (!(kv[0].equals("wt"))) {
try {
final String val = kv[1]==null ? "" :
URLDecoder.decode(kv[1], "UTF-8");
request.param(kv[0], val);
} catch (Exception e) {
if (log.isInfoEnabled()) {
log.info("Solr search param: '" + params[i] + "'" +
"' can't be URL decoded. Will be ignored...");
}
}
}
} else {
if (log.isInfoEnabled()) {
log.info("Invalid Solr search param: '" + params[i] + "'");
log.info("Will be ignored...");
}
}
}
}
}
final Integer queryTimeoutSpecified = query.getSearchTimeout();
final Integer queryTimeoutUsed =
queryTimeoutSpecified==null ?
FTS.Options.DEFAULT_TIMEOUT : queryTimeoutSpecified;
request.send(listener);
ContentResponse resp = listener.get(queryTimeoutUsed, TimeUnit.MILLISECONDS);
final int statusCode = resp.getStatus();
if (statusCode != 200) {
throw new FulltextSearchException("Status code != 200 received from "
+ "external fulltext service: " + statusCode);
}
final String jsonStr = resp.getContentAsString();
final JSONObject json = new JSONObject(jsonStr);
return constructFulltextSearchList(json, query);
}
/**
* Constructs a list of fulltext search results from a Solr json result
* string.
*
* @param solrResultsJSON
* @param query
* @return
* @throws JSONException
*/
private FulltextSearchHit[] constructFulltextSearchList(
JSONObject solrResultsJSON, FulltextSearchQuery query)
throws JSONException {
String searchColumn = query.getSearchField();
String snippetColumn = query.getSnippetField();
String scoreColumn = query.getScoreField();
JSONObject resp = solrResultsJSON.getJSONObject("response");
JSONArray docs = resp.getJSONArray("docs");
/**
* Collect results from JSON
*/
List<FulltextSearchHit> searchHits =
new ArrayList<FulltextSearchHit>(docs.length());
for (int i = 0; i < docs.length(); i++) {
JSONObject result = docs.getJSONObject(i);
String search = null;
if (searchColumn!=null && !searchColumn.isEmpty()
&& result.has(searchColumn)) {
search = flattenJsonResult(result.get(searchColumn));
} else {
throw new FulltextSearchException(
"Search field undefined, empty, or does not exist.");
}
String snippet = null;
if (snippetColumn!=null && !snippetColumn.isEmpty()) {
snippet = result.has(snippetColumn) ?
flattenJsonResult(result.get(snippetColumn)) : null;
}
String score = null;
if (scoreColumn!=null && !scoreColumn.isEmpty()) {
score = result.has(scoreColumn) ?
flattenJsonResult(result.get(scoreColumn)) : null;
}
Double scoreAsDouble = null;
if (score!=null) {
try {
scoreAsDouble = Double.valueOf(score);
} catch (NumberFormatException e) {
if (log.isInfoEnabled()) {
log.info("Could not cast score to double: " + score);
}
}
}
if (search!=null && !search.isEmpty()) {
FulltextSearchHit currentHit =
new FulltextSearchHit(search, scoreAsDouble, snippet,
query.getIncomingBindings(), query.getSearchResultType());
searchHits.add(currentHit);
}
}
return searchHits.toArray(new FulltextSearchHit[searchHits.size()]);
}
/**
* Flattens a JSON result item, i.e. if the item is an array, it is
* (non-recursively) flattened, applying toString() to sub items,
* otherwise toString() is called directly.
*
* @param obj the json result item
* @return
*/
String flattenJsonResult(Object obj) {
if (obj instanceof JSONArray) {
StringBuffer buf = new StringBuffer();
final JSONArray arr = (JSONArray)obj;
for (int i=0; i<arr.length(); i++) {
try {
final Object cur = arr.get(i);
if (cur!=null) {
buf.append(cur.toString());
}
} catch (Exception e) {
// ignoring is probably the best we can do here
}
}
return buf.toString();
} else {
return obj.toString();
}
}
}