package info.ephyra.search.searchers;
import info.ephyra.io.MsgPrinter;
import info.ephyra.search.Result;
import java.math.BigInteger;
import com.yahoo.search.SearchClient;
import com.yahoo.search.WebSearchRequest;
import com.yahoo.search.WebSearchResult;
/**
* <p>A <code>KnowledgeMiner</code> that deploys the Yahoo search engine to
* search the Web.</p>
*
* <p>It runs as a separate thread, so several queries can be performed in
* parallel.</p>
*
* <p>This class extends the class <code>KnowledgeMiner</code>.</p>
*
* @author Nico Schlaefer
* @version 2007-05-29
*/
public class YahooKM extends KnowledgeMiner {
/** Yahoo application ID, allows 5,000 queries per day and IP address. */
private static final String YAHOO_ID = "questionanswering";
/** Maximum total number of search results. */
private static final int MAX_RESULTS_TOTAL = 100;
/** Maximum number of search results per query. */
private static final int MAX_RESULTS_PERQUERY = 100;
/** Number of retries if search fails. */
private static final int RETRIES = 50;
/**
* Returns a representation of the query string that is suitable for Yahoo.
*
* @param qs query string
* @return query string for Yahoo
*/
public static String transformQueryString(String qs) {
// drop parentheses
qs = qs.replace("(", "");
qs = qs.replace(")", "");
return qs;
}
/**
* Returns the maximum total number of search results.
*
* @return maximum total number of search results
*/
protected int getMaxResultsTotal() {
return MAX_RESULTS_TOTAL;
}
/**
* Returns the maximum number of search results per query.
*
* @return maximum total number of search results
*/
protected int getMaxResultsPerQuery() {
return MAX_RESULTS_PERQUERY;
}
/**
* Queries the Yahoo search engine and returns an array containing up to
* <code>MAX_RESULTS_PERQUERY</code> search results.
*
* @return Yahoo search results
*/
protected Result[] doSearch() {
SearchClient client = new SearchClient(YAHOO_ID);
// create request
WebSearchRequest request =
new WebSearchRequest(transformQueryString(query.getQueryString()));
request.setLanguage("en"); // search for English pages only
request.setStart(BigInteger.valueOf(firstResult));
request.setResults(maxResults);
// perform search
WebSearchResult[] searchResults = null;
int retries = 0;
while (searchResults == null)
try {
searchResults = client.webSearch(request).listResults();
} catch (Exception e) {
MsgPrinter.printSearchError(e); // print search error message
if (retries == RETRIES) {
MsgPrinter.printErrorMsg("\nSearch failed.");
System.exit(1);
}
retries++;
try {
YahooKM.sleep(1000);
} catch (InterruptedException ie) {}
}
// get snippets and URLs of the corresponding websites
String[] snippets = new String[searchResults.length];
String[] urls = new String[searchResults.length];
String[] cacheUrls = new String[searchResults.length];
for (int i = 0; i < searchResults.length; i++) {
snippets[i] = searchResults[i].getSummary();
urls[i] = searchResults[i].getUrl();
if (searchResults[i].getCache() != null)
cacheUrls[i] = searchResults[i].getCache().getUrl();
}
// set cache URLs and return results
return getResults(snippets, urls, cacheUrls, true);
}
/**
* Returns a new instance of <code>YahooKM</code>. A new instance is created
* for each query.
*
* @return new instance of <code>YahooKM</code>
*/
public KnowledgeMiner getCopy() {
return new YahooKM();
}
}