/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.nutch.searcher; import java.io.IOException; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.io.FloatWritable; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.io.WritableComparable; import org.apache.lucene.index.Term; import org.apache.lucene.search.BooleanClause; import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.TermQuery; import org.apache.lucene.util.ToStringUtils; import org.apache.nutch.indexer.solr.SolrWriter; import org.apache.solr.client.solrj.SolrQuery; import org.apache.solr.client.solrj.SolrServer; import org.apache.solr.client.solrj.SolrServerException; import org.apache.solr.client.solrj.SolrQuery.ORDER; import org.apache.solr.client.solrj.impl.CommonsHttpSolrServer; import org.apache.solr.client.solrj.response.QueryResponse; import org.apache.solr.common.SolrDocument; import org.apache.solr.common.SolrDocumentList; public class SolrSearchBean implements SearchBean { public static final Log LOG = LogFactory.getLog(SolrSearchBean.class); private final SolrServer solr; private final QueryFilters filters; public SolrSearchBean(Configuration conf, String solrServer) throws IOException { solr = new CommonsHttpSolrServer(solrServer); filters = new QueryFilters(conf); } public String getExplanation(Query query, Hit hit) throws IOException { return "SOLR backend does not support explanations yet."; } @SuppressWarnings("unchecked") public Hits search(Query query, int numHits, String dedupField, String sortField, boolean reverse) throws IOException { // filter query string final BooleanQuery bQuery = filters.filter(query); final SolrQuery solrQuery = new SolrQuery(stringify(bQuery)); solrQuery.setRows(numHits); if (sortField == null) { solrQuery.setFields(dedupField, "score", "id"); sortField = "score"; } else { solrQuery.setFields(dedupField, sortField, "id"); solrQuery.setSortField(sortField, reverse ? ORDER.asc : ORDER.desc); } QueryResponse response; try { response = solr.query(solrQuery); } catch (final SolrServerException e) { throw SolrWriter.makeIOException(e); } final SolrDocumentList docList = response.getResults(); final Hit[] hitArr = new Hit[docList.size()]; for (int i = 0; i < hitArr.length; i++) { final SolrDocument solrDoc = docList.get(i); final Object raw = solrDoc.getFirstValue(sortField); WritableComparable sortValue; if (raw instanceof Integer) { sortValue = new IntWritable(((Integer)raw).intValue()); } else if (raw instanceof Float) { sortValue = new FloatWritable(((Float)raw).floatValue()); } else if (raw instanceof String) { sortValue = new Text((String)raw); } else if (raw instanceof Long) { sortValue = new LongWritable(((Long)raw).longValue()); } else { throw new RuntimeException("Unknown sort value type!"); } final String dedupValue = (String) solrDoc.getFirstValue(dedupField); final String uniqueKey = (String )solrDoc.getFirstValue("id"); hitArr[i] = new Hit(uniqueKey, sortValue, dedupValue); } return new Hits(docList.getNumFound(), hitArr); } public HitDetails getDetails(Hit hit) throws IOException { QueryResponse response; try { response = solr.query(new SolrQuery("id:\"" + hit.getUniqueKey() + "\"")); } catch (final SolrServerException e) { throw SolrWriter.makeIOException(e); } final SolrDocumentList docList = response.getResults(); if (docList.getNumFound() == 0) { return null; } return buildDetails(docList.get(0)); } public HitDetails[] getDetails(Hit[] hits) throws IOException { final StringBuilder buf = new StringBuilder(); buf.append("("); for (final Hit hit : hits) { buf.append(" id:\""); buf.append(hit.getUniqueKey()); buf.append("\""); } buf.append(")"); QueryResponse response; try { response = solr.query(new SolrQuery(buf.toString())); } catch (final SolrServerException e) { throw SolrWriter.makeIOException(e); } final SolrDocumentList docList = response.getResults(); if (docList.size() < hits.length) { throw new RuntimeException("Missing hit details! Found: " + docList.size() + ", expecting: " + hits.length); } /* Response returned from SOLR server may be out of * order. So we make sure that nth element of HitDetails[] * is the detail of nth hit. */ final Map<String, HitDetails> detailsMap = new HashMap<String, HitDetails>(hits.length); for (final SolrDocument solrDoc : docList) { final HitDetails details = buildDetails(solrDoc); detailsMap.put(details.getValue("id"), details); } final HitDetails[] detailsArr = new HitDetails[hits.length]; for (int i = 0; i < hits.length; i++) { detailsArr[i] = detailsMap.get(hits[i].getUniqueKey()); } return detailsArr; } public boolean ping() throws IOException { try { return solr.ping().getStatus() == 0; } catch (final SolrServerException e) { throw SolrWriter.makeIOException(e); } } public void close() throws IOException { } private static HitDetails buildDetails(SolrDocument solrDoc) { final List<String> fieldList = new ArrayList<String>(); final List<String> valueList = new ArrayList<String>(); for (final String field : solrDoc.getFieldNames()) { for (final Object o : solrDoc.getFieldValues(field)) { fieldList.add(field); valueList.add(o.toString()); } } final String[] fields = fieldList.toArray(new String[fieldList.size()]); final String[] values = valueList.toArray(new String[valueList.size()]); return new HitDetails(fields, values); } /* Hackish solution for stringifying queries. Code from BooleanQuery. * This is necessary because a BooleanQuery.toString produces * statements like feed:http://www.google.com which doesn't work, we * need feed:"http://www.google.com". */ private static String stringify(BooleanQuery bQuery) { final StringBuilder buffer = new StringBuilder(); final boolean needParens=(bQuery.getBoost() != 1.0) || (bQuery.getMinimumNumberShouldMatch()>0) ; if (needParens) { buffer.append("("); } final BooleanClause[] clauses = bQuery.getClauses(); int i = 0; for (final BooleanClause c : clauses) { if (c.isProhibited()) buffer.append("-"); else if (c.isRequired()) buffer.append("+"); final org.apache.lucene.search.Query subQuery = c.getQuery(); if (subQuery instanceof BooleanQuery) { // wrap sub-bools in parens buffer.append("("); buffer.append(c.getQuery().toString("")); buffer.append(")"); } else if (subQuery instanceof TermQuery) { final Term term = ((TermQuery) subQuery).getTerm(); buffer.append(term.field()); buffer.append(":\""); buffer.append(term.text()); buffer.append("\""); } else { buffer.append(" "); buffer.append(c.getQuery().toString()); } if (i++ != clauses.length - 1) { buffer.append(" "); } } if (needParens) { buffer.append(")"); } if (bQuery.getMinimumNumberShouldMatch()>0) { buffer.append('~'); buffer.append(bQuery.getMinimumNumberShouldMatch()); } if (bQuery.getBoost() != 1.0f) { buffer.append(ToStringUtils.boost(bQuery.getBoost())); } return buffer.toString(); } }