/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.solr.search.concordance; import java.util.List; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.index.IndexReader; import org.apache.lucene.search.Filter; import org.apache.lucene.search.Query; import org.apache.lucene.search.concordance.classic.AbstractConcordanceWindowCollector; import org.apache.lucene.search.concordance.classic.ConcordanceSearcher; import org.apache.lucene.search.concordance.classic.ConcordanceSortOrder; import org.apache.lucene.search.concordance.classic.DocMetadataExtractor; import org.apache.lucene.search.concordance.classic.WindowBuilder; import org.apache.lucene.search.concordance.classic.impl.ConcordanceWindowCollector; import org.apache.lucene.search.concordance.classic.impl.DefaultSortKeyBuilder; import org.apache.lucene.search.concordance.classic.impl.FieldBasedDocIdBuilder; import org.apache.lucene.search.concordance.classic.impl.SimpleDocMetadataExtractor; import org.apache.solr.client.solrj.SolrServerException; import org.apache.solr.cloud.RequestThreads; import org.apache.solr.cloud.RequestWorker; import org.apache.solr.common.params.CommonParams; import org.apache.solr.common.params.ModifiableSolrParams; import org.apache.solr.common.params.SolrParams; import org.apache.solr.common.util.NamedList; import org.apache.solr.common.util.SimpleOrderedMap; import org.apache.solr.request.SolrQueryRequest; import org.apache.solr.response.SolrQueryResponse; import org.apache.solr.schema.IndexSchema; import org.apache.solr.schema.SchemaField; import org.apache.solr.search.QParser; /** * <requestHandler name="/kwic" class="org.apache.solr.handler.KWICRequestHandler"> * <lst name="defaults"> * <str name="echoParams">explicit</str> * <str name="f">content_txt</str> * <str name="df">content_txt</str> * <str name="maxWindows">500</str> * <str name="wt">xml</str> * <p> * <!-- other parameters: * <p> * <str name="debug">false</str> * <str name="fl">metadata field1,metadata field2,metadata field3</str> * <str name="targetOverlaps">true</str> * <str name="contentDisplaySize">42</str> * <str name="targetDisplaySize">42</str> * <str name="tokensAfter">42</str> * <str name="tokensBefore">42</str> * <str name="sortOrder">TARGET_PRE</str> //TODO: add options here: TARGET_POST, PRE, POST * <p> * --> * </lst> * <p> * </requestHandler> * * @author JRROBINSON */ //TODO: refactor to extend SearchHandler, and move Concordance logic into ConconcordanceSearchComponent //as planned??? public class KWICRequestHandler extends SolrConcordanceBase { public static final String DefaultName = "/concordance"; public static final String NODE = "contextWindows"; /** * Max number of request threads to spawn. Since this service wasn't intended to return * ALL possible results, it seems reasonable to cap this at something */ public final static int MAX_THREADS = 25; ; public static NamedList doLocalSearch(SolrQueryRequest req) throws Exception { return doLocalSearch(null, req); } public static NamedList doLocalSearch(Query filter, SolrQueryRequest req) throws Exception { SolrParams params = req.getParams(); String field = getField(params, req.getSchema().getDefaultSearchFieldName()); String q = params.get(CommonParams.Q); String fl = params.get(CommonParams.FL); String solrUniqueKeyField = req.getSchema().getUniqueKeyField().getName(); DocMetadataExtractor metadataExtractor = (fl != null && fl.length() > 0) ? new SimpleDocMetadataExtractor(fl.split(",")) : new SimpleDocMetadataExtractor(); Filter queryFilter = getFilterQuery(req); //TODO remove and only use index String anType = params.get("anType", "query").toLowerCase(); IndexSchema schema = req.getSchema(); Analyzer analyzer = null; SchemaField sf = schema.getField(field); if (sf != null && sf.getType() != null) { if (anType.equals("query")) { analyzer = sf.getType().getQueryAnalyzer(); } else { analyzer = sf.getType().getIndexAnalyzer(); } } else { throw new RuntimeException("No analyzer found for field " + field); } Query query = QParser.getParser(q, null, req).parse(); IndexReader reader = req.getSearcher().getIndexReader(); ConcordanceConfig config = buildConcordanceConfig(field, solrUniqueKeyField, params); WindowBuilder windowBuilder = new WindowBuilder(config.getTokensBefore(), config.getTokensAfter(), 100, new DefaultSortKeyBuilder(config.getSortOrder()), metadataExtractor, new FieldBasedDocIdBuilder(solrUniqueKeyField)); ConcordanceSearcher searcher = new ConcordanceSearcher(windowBuilder); AbstractConcordanceWindowCollector collector = new ConcordanceWindowCollector(config.getMaxWindows()); searcher.search(reader, field, query, queryFilter, analyzer, collector); NamedList results = convertToList(solrUniqueKeyField, collector); return results; } public static Results spinWait(RequestThreads<ConcordanceConfig> threads) { Results results = new Results(threads.getMetadata()); return spinWait(threads, results); } ; public static Results spinWait(RequestThreads<ConcordanceConfig> threads, Results results) { if (threads == null || threads.empty()) return results; while (!threads.isTerminated() && !threads.empty() && !results.hitMax) { //TODO: should iterate completed and not last inserted (!Stack) RequestWorker req = threads.next(); if (!req.isRunning()) { NamedList nl = req.getResults(); if (nl != null) { results.add(nl, req.getName()); } threads.removeLast(); } } //force complete shutdown threads.shutdownNow(); //if not enough hits, check any remaining threads that haven't been collected //for(RequestWorker req : otherRequests) while (!threads.empty() && !results.hitMax) { RequestWorker req = threads.next(); if (req != null && !req.isRunning()) { NamedList nl = req.getResults(); if (nl != null) { results.add(nl, req.getName()); } threads.removeLast(); } } threads.clear(); threads = null; return results; } static public RequestThreads<ConcordanceConfig> initRequestPump(List<String> shards, SolrQueryRequest req) { return initRequestPump(shards, req, MAX_THREADS); } static public RequestThreads<ConcordanceConfig> initRequestPump(List<String> shards, SolrQueryRequest req, int maxThreads) { SolrParams params = req.getParams(); String field = SolrConcordanceBase.getField(params, req.getSchema().getDefaultSearchFieldName()); String q = params.get(CommonParams.Q); ConcordanceConfig config = buildConcordanceConfig(field, req.getSchema().getUniqueKeyField().getName(), params); /**/ RequestThreads<ConcordanceConfig> threads = RequestThreads.<ConcordanceConfig>newFixedThreadPool(Math.min(shards.size(), maxThreads)) .setMetadata(config); String handler = getHandlerName(req, DefaultName, KWICRequestHandler.class); int windowsForEach = config.getMaxWindows();//Math.round(config.getMaxWindows() / (float)shards.size()) ; ModifiableSolrParams p = getWorkerParams(field, q, params, windowsForEach); int i = 0; for (String node : shards) { if (i++ > maxThreads) break; //could be https, no? String url = "http://" + node; RequestWorker worker = new RequestWorker(url, handler, p).setName(node); threads.addExecute(worker); } threads.seal(); //disallow future requests (& execute return threads; } private static ModifiableSolrParams getWorkerParams(String field, String q, SolrParams parent, Integer maxWindows) { ModifiableSolrParams params = new ModifiableSolrParams(); params.set("f", field); params.set("q", q); params.set("maxWindows", maxWindows); //TODO false if distrib params.set("lq", true); //flag to disallow recursive zoo queries //don't need rows of docs if SearchComponent is already returning them params.set("rows", 0); setParam("anType", params, parent); setParam("fq", params, parent); setParam("echoParams", params, parent); setParam("defType", params, parent); setParam("wt", params, parent); setParam("debug", params, parent); setParam("fl", params, parent); setParam("targetOverlaps", params, parent); setParam("contentDisplaySize", params, parent); setParam("targetDisplaySize", params, parent); setParam("tokensAfter", params, parent); setParam("tokensBefore", params, parent); setParam("sortOrder", params, parent); return params; } private static ConcordanceConfig buildConcordanceConfig(String field, String idField, SolrParams params) { ConcordanceConfig config = new ConcordanceConfig(field); String param = params.get("targetOverlaps"); if (param != null && param.length() > 0) { try { config.setAllowTargetOverlaps(Boolean.parseBoolean(param)); } catch (Exception e) { } } param = params.get("contentDisplaySize"); if (param != null && param.length() > 0) { try { config.setMaxContextDisplaySizeChars(Integer.parseInt(param)); } catch (Exception e) { } } param = params.get("targetDisplaySize"); if (param != null && param.length() > 0) { try { config.setMaxTargetDisplaySizeChars(Integer.parseInt(param)); } catch (Exception e) { } } param = params.get("maxWindows"); if (param != null && param.length() > 0) { try { config.setMaxWindows(Integer.parseInt(param)); } catch (Exception e) { } } param = params.get("tokensAfter"); if (param != null && param.length() > 0) { try { config.setTokensAfter(Integer.parseInt(param)); } catch (Exception e) { } } param = params.get("tokensBefore"); if (param != null && param.length() > 0) { try { config.setTokensBefore(Integer.parseInt(param)); } catch (Exception e) { } } param = params.get("sortOrder"); if (param != null && param.length() > 0) { try { config.setSortOrder(ConcordanceSortOrder.valueOf(param)); } catch (Exception e) { } } return config; } @Override public void init(@SuppressWarnings("rawtypes") NamedList args) { super.init(args); } @Override public String getDescription() { return "Returns concordance results for your query"; } ; @Override public String getSource() { return "$Source$"; } @Override protected String getHandlerName(SolrQueryRequest req) { return getHandlerName(req, DefaultName, this.getClass()); } @SuppressWarnings("unchecked") @Override public void handleRequestBody(SolrQueryRequest req, SolrQueryResponse rsp) throws Exception { boolean isDistrib = isDistributed(req); if (isDistrib) { doZooQuery(req, rsp); } else { doQuery(req, rsp); } } private void doQuery(SolrQueryRequest req, SolrQueryResponse rsp) throws Exception { NamedList results = doLocalSearch(req); rsp.add(NODE, results); } @SuppressWarnings("unchecked") private void doZooQuery(SolrQueryRequest req, SolrQueryResponse rsp) throws SolrServerException, Exception { List<String> shards = getShards(req, false); RequestThreads<ConcordanceConfig> threads = initRequestPump(shards, req); Results results = new Results(threads.getMetadata()); NamedList nl = doLocalSearch(req); results.add(nl, "local"); results = spinWait(threads, results); rsp.add(NODE, results.toNamedList()); } static class Results { int maxWindows = -1; boolean hitMax = false; long numDocs = 0; int totalDocs = 0; int totalWindows = 0; int numWindows = 0; NamedList windows = new SimpleOrderedMap<Object>(); Results(int maxWindows) { this.maxWindows = maxWindows; } Results(ConcordanceConfig config) { this.maxWindows = config.getMaxWindows(); } void add(NamedList nl, String extra) { NamedList nlRS = (NamedList) nl.get(NODE); if (nlRS == null) nlRS = nl; numDocs += getLong("numDocs", nlRS); totalDocs += getInt("totalDocs", nlRS); totalWindows += getInt("totalWindows", nlRS); numWindows += getInt("numWindows", nlRS); hitMax = numWindows >= maxWindows; Object o = nlRS.get("windows"); if (o != null) { NamedList nlWindows = (NamedList) o; List<NamedList> wins = nlWindows.getAll("window"); for (NamedList nlWin : wins) { if (extra != null && extra.length() > 0) nlWin.add("source", extra); //TODO: if one wanted to sort this, they'd have to convert it to a class and then sort //before returning windows.add("window", nlWin); } } } int getInt(String name, NamedList nl) { Object o = nl.get(name); if (o != null) return (int) o; return 0; } long getLong(String name, NamedList nl) { Object o = nl.get(name); if (o != null) return (long) o; return 0; } NamedList toNamedList() { NamedList nl = new SimpleOrderedMap<>(); nl.add("hitMax", hitMax); nl.add("numDocs", numDocs); nl.add("totalDocs", totalDocs); nl.add("totalWindows", totalWindows); nl.add("numWindows", numWindows); nl.add("windows", windows); return nl; } } }