/*
* Copyright (2006-2012) Schibsted ASA
* This file is part of Possom.
*
* Possom is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Possom is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with Possom. If not, see <http://www.gnu.org/licenses/>.
*/
package no.sesat.search.mode.command;
import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.net.SocketTimeoutException;
import java.net.URL;
import java.net.URLEncoder;
import java.text.MessageFormat;
import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.Map;
import no.sesat.search.mode.config.YahooIdpCommandConfig;
import no.sesat.search.result.BasicResultList;
import no.sesat.search.result.BasicResultItem;
import no.sesat.search.result.ResultItem;
import no.sesat.search.result.ResultList;
import org.apache.log4j.Logger;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.NodeList;
import org.xml.sax.SAXException;
/**
* Search against Yahoo! Index Data Protocol 2.0.
*
*
* @version $Id$
*/
public class YahooIdpSearchCommand extends AbstractYahooSearchCommand {
// Constants -----------------------------------------------------
private static final Logger LOG = Logger.getLogger(YahooIdpSearchCommand.class);
private static final String ERR_FAILED_CREATING_URL = "Failed to create command url";
private static final String COMMAND_URL_PATTERN =
"/search?Client={0}&Database={1}&DateRange={2}&"
+ "FirstResult={3}&Numresults={4}&"
+ "{5}RegionMix={6}{7}&{8}LanguageMix={9}&"
+ "QueryEncoding={10}&Fields={11}&Unique={12}&Filter={13}&"
+ "Query={14}&"
+ "{15}";
private static final String DATE_PATTERN = "yyyy/MM/dd";
private static final String HEADER_ELEMENT = "HEADER";
private static final String TOTALHITS_ELEMENT ="TOTALHITS";
private static final String DEEPHITS_ELEMENT = "DEEPHITS";
private static final String RESULT_ELEMENT = "RESULT";
private static final String NUMRESULT_ELEMENT = "NUMRESULTS";
private static final String WORDCOUNTS_ELEMENT = "WORDCOUNTS";
// private static final String ALLWORDS = "ALLWORDS("; // not used for now. could be as ANYWORDS optimisation
private static final String ANYWORDS = "ANYWORDS(";
private static final String PHRASEWORDS = "PHRASEWORDS(";
private static final String OMNISEARCH_HACK = "a b c d e f g h i j k l m n o p q r s t u v w x y z";
// Attributes ----------------------------------------------------
// Static --------------------------------------------------------
// Constructors --------------------------------------------------
/**
* Create new overture command.
*
* @param cxt The context to execute in.
*/
public YahooIdpSearchCommand(final Context cxt) {
super(cxt);
setXmlRestful(
new AbstractXmlRestful(cxt) {
public String createRequestURL() {
final YahooIdpCommandConfig conf = (YahooIdpCommandConfig)cxt.getSearchConfiguration();
final String dateRange = '-' + new SimpleDateFormat(DATE_PATTERN).format(new Date());
final String wrappedTransformedQuery = ANYWORDS
// support "*" searches that return everything in the index.
+ ("*".equals(YahooIdpSearchCommand.this.getQuery().getQueryString())
? OMNISEARCH_HACK
: YahooIdpSearchCommand.this.getTransformedQuery()) + ' '
+ YahooIdpSearchCommand.this.getFilter() + ')';
final StringBuilder fields = new StringBuilder();
for (final String field : cxt.getSearchConfiguration().getResultFieldMap().keySet()) {
fields.append(field);
fields.append(',');
}
fields.setLength(fields.length() - 1);
try {
return MessageFormat.format(
COMMAND_URL_PATTERN,
YahooIdpSearchCommand.this.getPartnerId(),
conf.getDatabase(),
URLEncoder.encode(conf.getDateRange().length() >0 ? conf.getDateRange() : dateRange , "UTF-8"),
YahooIdpSearchCommand.this.getOffset(),
conf.getResultsToReturn(),
(0 < conf.getRegion().length() ? "Region=" + conf.getRegion() + '&' : ""),
conf.getRegionMix(),
"enabled".equals(conf.getSpellState()) ? "&SpellState=enabled" : "",
(0 < conf.getLanguage().length() ? "Language=" + conf.getLanguage() + '&' : ""),
conf.getLanguageMix(),
conf.getEncoding(),
fields.toString(),
YahooIdpSearchCommand.this.getParameter("unique") != null ? "" : conf.getUnique(),
conf.getFilter(),
URLEncoder.encode(wrappedTransformedQuery, "UTF-8"),
YahooIdpSearchCommand.this.getAffilDataParameter()
);
} catch (UnsupportedEncodingException ex) {
throw new SearchCommandException(ERR_FAILED_CREATING_URL, ex);
}
}
});
}
// Public --------------------------------------------------------
public ResultList<ResultItem> execute() {
try {
final ResultList<ResultItem> searchResult = new BasicResultList<ResultItem>();
if(getTransformedQuery().trim().length() > 0
|| getFilter().trim().length() > 0
|| "*".equals(getQuery().getQueryString())){
final Document doc = getXmlRestful().getXmlResult();
if (doc != null) {
final Element searchResponseE = doc.getDocumentElement();
final Element headerE = (Element) searchResponseE.getElementsByTagName(HEADER_ELEMENT).item(0);
final Element totalHitsE = (Element) headerE.getElementsByTagName(TOTALHITS_ELEMENT).item(0);
final Element deepHitsE = (Element) headerE.getElementsByTagName(DEEPHITS_ELEMENT).item(0);
final Element numResultsE = (Element) headerE.getElementsByTagName(NUMRESULT_ELEMENT).item(0);
int totalHits;
try {
totalHits = Integer.parseInt(totalHitsE.getFirstChild().getNodeValue());
}catch(NumberFormatException e) {
totalHits = Integer.MAX_VALUE;
}
searchResult.addField("totalhits", String.valueOf(totalHits));
int deepHits;
try {
deepHits = Integer.parseInt(deepHitsE.getFirstChild().getNodeValue());
}catch(NumberFormatException e) {
deepHits = Integer.MAX_VALUE;
}
searchResult.addField("deephits", String.valueOf(deepHits));
searchResult.setHitCount(deepHits);
if(searchResult.getHitCount() > totalHits) {
searchResult.addField("hasMoreHits", "true");
}
int numResults;
try {
numResults = Integer.parseInt(numResultsE.getFirstChild().getNodeValue());
}catch(NumberFormatException e) {
numResults = Integer.MAX_VALUE;
}
searchResult.addField("numResults", String.valueOf(numResults));
// build results
final NodeList list = searchResponseE.getElementsByTagName(RESULT_ELEMENT);
for (int i = 0; i < list.getLength(); ++i) {
final Element listing = (Element) list.item(i);
final BasicResultItem item = createItem(listing);
// HACK to certain hide domains
final String hideDomain = getSearchConfiguration().getHideDomain();
final String host = new URL(item.getField("clickurl")).getHost().replaceAll("/$","");
if(hideDomain.length() == 0 || !host.endsWith(hideDomain)){
searchResult.addResult(item);
} else {
// Improvent of HACK. Keeps the hitcount more accurate. SEARCH-2032
searchResult.setHitCount(searchResult.getHitCount() - 1);
}
}
// build navigators
final NodeList wordCountList = searchResponseE.getElementsByTagName(WORDCOUNTS_ELEMENT);
for (int i = 0; i < wordCountList.getLength(); ++i) {
final Element listing = (Element) wordCountList.item(i);
// TODO make modifiers fast independant!
// final Modifier modifier = new Modifier()
// getRunningQuery().addSource(modifier);
}
}
}
return searchResult;
} catch (SocketTimeoutException ste) {
LOG.error(getSearchConfiguration().getId() + " --> " + ste.getMessage());
return new BasicResultList<ResultItem>();
} catch (IOException e) {
throw new SearchCommandException(e);
} catch (SAXException e) {
throw new SearchCommandException(e);
}
}
/** Assured that associated SearchConfiguration is always of this type. **/
@Override
public YahooIdpCommandConfig getSearchConfiguration() {
return (YahooIdpCommandConfig)super.getSearchConfiguration();
}
@Override
public String getTransformedQuery() {
final String tq = super.getTransformedQuery();
if(tq == null) {
LOG.debug("transformedQuery is null, using \"\"");
return "";
}
return tq;
}
// Z implementation ----------------------------------------------
// Y overrides ---------------------------------------------------
// Package protected ---------------------------------------------
// Protected -----------------------------------------------------
@Override
protected String getFilter() {
return super.getFilter();
}
@Override
protected int getOffset() {
return super.getOffset();
}
@Override
protected String getParameter(String paramName) {
return super.getParameter(paramName);
}
@Override
protected BasicResultItem createItem(final Element result) {
final BasicResultItem item = new BasicResultItem();
for (final Map.Entry<String,String> entry : context.getSearchConfiguration().getResultFieldMap().entrySet()){
final Element fieldE = (Element) result.getElementsByTagName(entry.getKey().toUpperCase()).item(0);
if(fieldE.getChildNodes().getLength() >0){
item.addField(entry.getValue(), fieldE.getFirstChild().getNodeValue());
}
}
return item;
}
// Private -------------------------------------------------------
// Inner classes -------------------------------------------------
}