/* Copyright (2005-2012) Schibsted ASA * This file is part of Possom. * * Possom is free software: you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * Possom is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public License * along with Possom. If not, see <http://www.gnu.org/licenses/>. * * AbstractESPFastSearchCommand.java * * Created on 14 March 2006, 19:51 * */ package no.sesat.search.mode.command; import com.fastsearch.esp.search.ConfigurationException; import com.fastsearch.esp.search.SearchEngineException; import com.fastsearch.esp.search.SearchFactory; import com.fastsearch.esp.search.query.BaseParameter; import com.fastsearch.esp.search.query.IQuery; import com.fastsearch.esp.search.query.Query; import com.fastsearch.esp.search.query.SearchParameter; import com.fastsearch.esp.search.result.IDocumentSummary; import com.fastsearch.esp.search.result.IDocumentSummaryField; import com.fastsearch.esp.search.result.IQueryResult; import com.fastsearch.esp.search.view.ISearchView; import java.util.Collection; import no.sesat.search.mode.config.EspFastCommandConfig; import no.sesat.commons.visitor.Visitor; import no.sesat.search.query.XorClause; import no.sesat.search.result.BasicResultList; import no.sesat.search.result.BasicResultItem; import no.sesat.search.result.FastSearchResult; import no.sesat.search.result.ResultItem; import no.sesat.search.result.ResultList; import no.sesat.search.site.config.SiteConfiguration; import org.apache.log4j.Logger; import java.io.IOException; import java.net.SocketTimeoutException; import java.net.URL; import java.util.ArrayList; import java.util.Collections; import java.util.Map; import java.util.Properties; import java.util.concurrent.ConcurrentHashMap; import org.apache.log4j.Level; /** * Base class for commands querying a FAST EPS Server. * See https://dev.sesat.no/confluence/display/TECHDEV/FAST+ESP+5.0+Documentation * * @version $Id$ */ public abstract class AbstractESPFastSearchCommand extends AbstractSearchCommand { // Attributes ---------------------------------------------------- private final EspFastCommandConfig cfg; private final String queryServer; private final ISearchView searchView; private IQueryResult result; // Static -------------------------------------------------------- private static final Map<String, ISearchView> SEARCH_VIEWS = new ConcurrentHashMap<String, ISearchView>(); private final static String FACTORY_PROPERTY = "com.fastsearch.esp.search.SearchFactory"; private final static String HTTP_FACTORY = "com.fastsearch.esp.search.http.HttpSearchFactory"; private final static String QR_SERVER_PROPERTY = "com.fastsearch.esp.search.http.qrservers"; private final static String ENCODER_PROPERTY = "com.fastsearch.esp.search.http.encoderclass"; private final static String ENCODER_CLASS = "com.fastsearch.esp.search.http.DSURLUTF8Encoder"; private final static String COLLAPSE_PARAMETER = "collapse"; private static final Logger LOG = Logger.getLogger(AbstractESPFastSearchCommand.class); private static final String ERR_CALL_SET_VIEW = "setView() must be called prior to calling this method"; private enum ReservedWord { AND("and"), OR("or"), ANDNOT("andnot"), NOT("not"), MAX("max"), MIN("min"), ANY("any"), PHRASE("phrase"), RANK("rank"), NEAR("near"), ONEAR("onear"), INT("int"), FLOAT("float"), DOUBLE("double"), DATETIME("datetime"), RANGE("range"), FILTER("filter"), STARTS_WITH("starts-with"), ENDS_WITH("ends-with"), EQUALS("equals"), COUNT("count"), STRING("string"), ESCAPE_REGEXP(".*[<>=].*"); private String word; ReservedWord(final String word) { this.word = word; } public final String getWord() { return word; } } private static transient Collection<String> RESERVED_WORDS; // Constructors -------------------------------------------------- /** * Creates new instance of search command. * * @param cxt The context to work in. */ public AbstractESPFastSearchCommand(final Context cxt) { super(cxt); cfg = (EspFastCommandConfig) getSearchConfiguration(); final SiteConfiguration siteConf = cxt.getDataModel().getSite().getSiteConfiguration(); queryServer = siteConf.getProperty(cfg.getQueryServer()); searchView = initialiseSearchView(); } // Public -------------------------------------------------------- public ResultList<ResultItem> execute() { try { final StringBuilder filterBuilder = new StringBuilder(); if (getFilter() != null) { filterBuilder.append(getFilter()); filterBuilder.append(' '); } final String transformedQuery = appendFilter(cfg.getFilter(), getTransformedQuery()); LOG.debug("Transformed query is " + transformedQuery); final String collapseId = getParameter(COLLAPSE_PARAMETER); final IQuery query = new Query(transformedQuery); if (isCollapsingEnabled()) { if (collapseId == null || "".equals(collapseId) || !cfg.isExpansionEnabled()) { if (cfg.isCollapsingRemoves()) { query.setParameter(new SearchParameter("collapseon", "batvcollapseid")); } } else { filterBuilder.append("+collapseid:").append(collapseId); } } final String sortBy = getSortBy(); query.setParameter(new SearchParameter("sesat:uniqueId", context.getDataModel().getParameters().getUniqueId())); query.setParameter(new SearchParameter(BaseParameter.OFFSET, getOffset())); query.setParameter(new SearchParameter(BaseParameter.HITS, cfg.getResultsToReturn())); query.setParameter(new SearchParameter(BaseParameter.SORT_BY, sortBy)); query.setParameter(new SearchParameter(BaseParameter.LEMMATIZE, cfg.isLemmatize())); query.setParameter(new SearchParameter(BaseParameter.FILTER, filterBuilder.toString())); if (!isNavigatable()) { query.setParameter(new SearchParameter(BaseParameter.NAVIGATION, 0)); } if (!"".equals(cfg.getQtPipeline())) { query.setParameter(new SearchParameter(BaseParameter.QT_PIPELINE, cfg.getQtPipeline())); } modifyQuery(query); DUMP.info(query); // when the root logger is set to DEBUG do not limit connection times if(Logger.getRootLogger().getLevel().isGreaterOrEqual(Level.INFO)){ query.setParameter(BaseParameter.TIMEOUT, getSearchConfiguration().getTimeout()); } result = searchView.search(query); return createSearchResult(result); } catch (SearchEngineException ex) { LOG.error(ex.getMessage() + ' ' + ex.getCause()); return new BasicResultList<ResultItem>(); } catch (SocketTimeoutException ex) { LOG.warn(this +" timed out. (Timeout=" + cfg.getTimeout() + "ms)"); return new BasicResultList<ResultItem>(); } catch (IOException ex) { throw new SearchCommandException(ex); } } @Override public EspFastCommandConfig getSearchConfiguration() { return (EspFastCommandConfig) super.getSearchConfiguration(); } // Package protected --------------------------------------------- // Protected ----------------------------------------------------- /** * Default collapsing from the configuration, can be overridden in subcommands.. * @return true if collapsing is enabled */ protected boolean isCollapsingEnabled() { return cfg.isCollapsingEnabled(); } /** * Default sortby, can be overridden by subcommands. * @return sortby field */ protected String getSortBy() { String sortBy = cfg.getSortBy(); if (isUserSortable()) { final String userSortBy = getUserSortBy(); LOG.debug("userSortBy " + userSortBy); // TODO move-out to genericno. this is configuration hardcoded. // it would be nice to have a sortBy-fields to allow these types of mappings to be configured easily. // see AbstractFast4SearchCommand.createQuery() if("default".equals(userSortBy)) { sortBy = cfg.getSortBy(); } else if ("alternative".equals(userSortBy) && cfg.getAlternativeSortBy() != null) { sortBy = cfg.getAlternativeSortBy(); } else if ("standard".equals(userSortBy)) { sortBy = "-frontpagename -contentprofile -docdatetime"; } else if ("datetime".equals(userSortBy)) { sortBy = "-frontpagename -docdatetime"; } else{ sortBy = cfg.getSortBy(); } } return sortBy; } /** * Concrete SearchCommand should override if it wants to set custom SearchParameters or do other * modifications to the query before it is run. * * @param query the FAST query to modify */ protected void modifyQuery(IQuery query) { // Doing nothing } /** * Concrete SearchCommand should override if it wants to make custom SearchResult * from the FAST QueryResult. * <p/> * <b>Note:</b> To be sure that we do not break code for subclasses that depends on that this is in fact a * FastSearchResult and not a SearchResult, I made the signature of this return a FastSearchResult. * This, at least, applies to NavigatableESPFastCommand. Geir H. Pettersen - T-Rank. * * @param result the FAST IQueryResult to make a SearchResult from. * @return a searchResult constructed from the supplied IQueryResult. * @throws IOException if something bad happens... Like, an invalid url. (Actually just to not break old code.) */ protected FastSearchResult<ResultItem> createSearchResult(final IQueryResult result) throws IOException { final FastSearchResult<ResultItem> searchResult = new FastSearchResult<ResultItem>(); final int cnt = getOffset(); final int maxIndex = getMaxDocIndex(result, cnt, cfg); searchResult.setHitCount(result.getDocCount()); for (int i = cnt; i < maxIndex; i++) { try { final IDocumentSummary document = result.getDocument(i + 1); searchResult.addResult(createResultItem(document)); } catch (NullPointerException e) { // THe doc count is not 100% accurate. LOG.debug("Error finding document " + e); return searchResult; } } if (isCollapsingEnabled() && cfg.isExpansionEnabled()) { final String collapseId = getParameter(COLLAPSE_PARAMETER); if (collapseId != null && !collapseId.equals("")) { if (searchResult.getResults().size() > 0) { final ResultItem itm = searchResult.getResults().get(0); final URL url = new URL(itm.getField("url")); searchResult.addField("collapsedDomain", url.getHost()); } } } return searchResult; } @Override protected Collection<String> getReservedWords() { if(null == RESERVED_WORDS){ final Collection<String> words = new ArrayList<String>(super.getReservedWords()); for (ReservedWord word : ReservedWord.values()) { words.add(word.getWord()); } RESERVED_WORDS = Collections.unmodifiableCollection(words); } return RESERVED_WORDS; } /** In addition to super.escape() also replaces all ? with whitespace. **/ @Override protected String escape(String word) { return super.escape(word.replace('?', ' ')); } /** * @param clause The clause to examine. */ @Override protected void visitXorClause(final Visitor visitor, final XorClause clause) { switch (clause.getHint()) { case FULLNAME_ON_LEFT: case PHRASE_ON_LEFT: // Web searches should use phrases over separate words. clause.getFirstClause().accept(visitor); break; default: // All other high level clauses are ignored. clause.getSecondClause().accept(visitor); break; } } /** * Returns the fast search result * * @return The fast search result. */ protected IQueryResult getIQueryResult() { return result; } protected boolean isNavigatable() { return false; } // Private ------------------------------------------------------- private ISearchView initialiseSearchView() { final String view = cfg.getView(); if (view == null) { throw new IllegalStateException(ERR_CALL_SET_VIEW); } final String searchViewKey = queryServer + "/" + view; ISearchView searchView = SEARCH_VIEWS.get(searchViewKey); if (null == searchView) { final Properties props = new Properties(); props.setProperty(FACTORY_PROPERTY, HTTP_FACTORY); props.setProperty(QR_SERVER_PROPERTY, queryServer); props.setProperty(ENCODER_PROPERTY, ENCODER_CLASS); try { searchView = SearchFactory.newInstance(props).getSearchView(view); // Force server address since we want to use the hardware load balancer. // This also enables us to do tunneling. final String serverName = queryServer.substring(0, queryServer.indexOf(':')); final String serverPort = queryServer.substring(queryServer.indexOf(':') + 1); searchView.setServerAddress(serverName, Integer.parseInt(serverPort), false); } catch (ConfigurationException e) { throw new SearchCommandException(e); } catch (SearchEngineException e) { throw new SearchCommandException(e); } SEARCH_VIEWS.put(searchViewKey, searchView); } LOG.debug("Using searchView: " + searchViewKey); return searchView; } private int getMaxDocIndex( final IQueryResult iQueryResult, final int cnt, final EspFastCommandConfig fastSearchConfiguration) { return Math.min(cnt + fastSearchConfiguration.getResultsToReturn(), iQueryResult.getDocCount()); } private ResultItem createResultItem(final IDocumentSummary document) { ResultItem item = new BasicResultItem(); for (final Map.Entry<String, String> entry : cfg.getResultFieldMap().entrySet()) { final IDocumentSummaryField summary = document.getSummaryField(entry.getKey()); if (summary != null && !summary.isEmpty()) { item = item.addField(entry.getValue(), summary.getStringValue().trim()); } } if (isCollapsingEnabled()) { final String currCollapseId = getParameter(COLLAPSE_PARAMETER); if (currCollapseId == null || currCollapseId.equals("")) { if (!document.getSummaryField("fcocount").isEmpty() && Integer.parseInt(document.getSummaryField("fcocount").getStringValue()) > 1) { item = item.addField("moreHits", "true") .addField("collapseParameter", COLLAPSE_PARAMETER) .addField("collapseId", document.getSummaryField("collapseid").getStringValue()); } } } return item; } private String appendFilter(final String filter, final String q) { if (q.length() == 0 && filter.length() > 0) { return "filter(" + filter + ")"; } else { return filter.length() > 0 ? "and(" + q + "," + "filter(" + filter + "))" : q; } } // Inner classes ------------------------------------------------- }