/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.nutch.searcher.response; import java.io.IOException; import javax.servlet.ServletConfig; import javax.servlet.ServletException; import javax.servlet.http.HttpServlet; import javax.servlet.http.HttpServletRequest; import javax.servlet.http.HttpServletResponse; import org.apache.commons.lang.StringUtils; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; import org.apache.nutch.searcher.Hit; import org.apache.nutch.searcher.HitDetails; import org.apache.nutch.searcher.Hits; import org.apache.nutch.searcher.NutchBean; import org.apache.nutch.searcher.Query; import org.apache.nutch.searcher.Summary; import org.apache.nutch.util.NutchConfiguration; /** * Servlet that allows returning search results in multiple different formats * through a ResponseWriter Nutch extension point. * * @see org.apache.nutch.searcher.response.ResponseWriter */ public class SearchServlet extends HttpServlet { public static final Log LOG = LogFactory.getLog(SearchServlet.class); private NutchBean bean; private Configuration conf; private ResponseWriters writers; private String defaultRespType = "xml"; private String defaultLang = null; private int defaultNumRows = 10; private String defaultDedupField = "site"; private int defaultNumDupes = 1; public static final String RESPONSE_TYPE = "rt"; public static final String QUERY = "query"; public static final String LANG = "lang"; public static final String START = "start"; public static final String ROWS = "rows"; public static final String SORT = "sort"; public static final String REVERSE = "reverse"; public static final String DEDUPE = "ddf"; public static final String NUM_DUPES = "dupes"; public static final String SUMMARY = "summary"; public static final String FIELDS = "field"; /** * Initializes servlet configuration default values. Gets NutchBean and * ResponseWriters. */ public void init(ServletConfig config) throws ServletException { // set sensible defaults for response writer values and cache NutchBean. // Also get and cache all ResponseWriter implementations. super.init(config); try { this.conf = NutchConfiguration.get(config.getServletContext()); this.defaultRespType = conf.get("search.response.default.type", "xml"); this.defaultLang = conf.get("search.response.default.lang"); this.defaultNumRows = conf.getInt("search.response.default.numrows", 10); this.defaultDedupField = conf.get("search.response.default.dedupfield", "site"); this.defaultNumDupes = conf.getInt("search.response.default.numdupes", 1); bean = NutchBean.get(config.getServletContext(), this.conf); writers = new ResponseWriters(conf); } catch (IOException e) { throw new ServletException(e); } } /** * Forwards all responses to doGet. */ protected void doPost(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException { doGet(request, response); } /** * Handles all search requests. Gets parameter input. Does the search and * gets Hits, details, and summaries. Passes off to ResponseWriter classes * to writer different output formats directly to HttpServletResponse. */ protected void doGet(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException { if (NutchBean.LOG.isInfoEnabled()) { NutchBean.LOG.info("Query request from " + request.getRemoteAddr()); } // get the response type, used to call the correct ResponseWriter String respType = RequestUtils.getStringParameter(request, RESPONSE_TYPE, defaultRespType); ResponseWriter writer = writers.getResponseWriter(respType); if (writer == null) { throw new IOException("Unknown response type " + respType); } // get the query String query = RequestUtils.getStringParameter(request, QUERY); if (StringUtils.isBlank(query)) { throw new IOException("Query cannot be empty!"); } // get the language from parameter, then request, then finally configuration String lang = RequestUtils.getStringParameter(request, LANG); if (StringUtils.isBlank(lang)) { lang = request.getLocale().getLanguage(); if (StringUtils.isBlank(lang)) { lang = defaultLang; } } // get various other search parameters, fields allows only returning a // given set of fields boolean withSummary = RequestUtils.getBooleanParameter(request, SUMMARY, true); String sort = RequestUtils.getStringParameter(request, SORT); int start = RequestUtils.getIntegerParameter(request, START, 0); int rows = RequestUtils.getIntegerParameter(request, ROWS, defaultNumRows); boolean reverse = RequestUtils.getBooleanParameter(request, REVERSE, false); String dedup = RequestUtils.getStringParameter(request, DEDUPE, defaultDedupField); int numDupes = RequestUtils.getIntegerParameter(request, NUM_DUPES, defaultNumDupes); String[] fields = request.getParameterValues(FIELDS); // parse out the query Query queryObj = Query.parse(query, lang, this.conf); if (NutchBean.LOG.isInfoEnabled()) { NutchBean.LOG.info("query: " + query); NutchBean.LOG.info("lang: " + lang); } // search and return hits Hits hits; try { hits = bean.search(queryObj, start + rows, numDupes, dedup, sort, reverse); } catch (IOException e) { if (NutchBean.LOG.isWarnEnabled()) { NutchBean.LOG.warn("Search Error", e); } hits = new Hits(0, new Hit[0]); } // get the total number of hits, the hits to show, and the hit details long totalHits = hits.getTotal(); int end = (int)Math.min(hits.getLength(), start + rows); int numHits = (end > start) ? (end - start) : 0; Hit[] show = hits.getHits(start, numHits); HitDetails[] details = bean.getDetails(show); // setup the SearchResults object, used in response writing SearchResults results = new SearchResults(); results.setResponseType(respType); results.setQuery(query); results.setLang(lang); results.setSort(sort); results.setReverse(reverse); results.setStart(start); results.setRows(rows); results.setEnd(end); results.setTotalHits(totalHits); results.setHits(show); results.setDetails(details); // are we returning summaries with results, if not avoid network hit if (withSummary) { Summary[] summaries = bean.getSummary(details, queryObj); results.setSummaries(summaries); results.setWithSummary(true); } else { results.setWithSummary(false); } // set return fields if any specified, if not all fields are returned if (fields != null && fields.length > 0) { results.setFields(fields); } // call the response writer to write out content to HttpResponse directly writer.writeResponse(results, request, response); } }