package info.ephyra.search.searchers;
import info.ephyra.io.MsgPrinter;
import info.ephyra.search.Result;
import com.google.soap.search.GoogleSearch;
import com.google.soap.search.GoogleSearchFault;
import com.google.soap.search.GoogleSearchResult;
import com.google.soap.search.GoogleSearchResultElement;
/**
* <p>A <code>KnowledgeMiner</code> that deploys the Google search engine to
* search the Web.</p>
*
* <p>It runs as a separate thread, so several queries can be performed in
* parallel.</p>
*
* <p>This class extends the class <code>KnowledgeMiner</code>.</p>
*
* @author Nico Schlaefer
* @version 2007-05-29
*/
public class GoogleKM extends KnowledgeMiner {
/** Google license key. */
private static final String GOOGLE_KEY = "Enter your Google license key.";
/** Maximum total number of search results. */
private static final int MAX_RESULTS_TOTAL = 100;
/** Maximum number of search results per query. */
private static final int MAX_RESULTS_PERQUERY = 10;
/** Number of retries if search fails. */
private static final int RETRIES = 50;
/**
* Returns the maximum total number of search results.
*
* @return maximum total number of search results
*/
protected int getMaxResultsTotal() {
return MAX_RESULTS_TOTAL;
}
/**
* Returns the maximum number of search results per query.
*
* @return maximum total number of search results
*/
protected int getMaxResultsPerQuery() {
return MAX_RESULTS_PERQUERY;
}
/**
* Queries the Google search engine and returns an array containing up to
* <code>MAX_RESULTS_PERQUERY</code> search results.
*
* @return Google search results
*/
protected Result[] doSearch() {
GoogleSearch search = new GoogleSearch();
// set license key
search.setKey(GOOGLE_KEY);
// set search string
search.setQueryString(query.getQueryString());
// set language to English only
search.setLanguageRestricts("English");
// set hit position of first search result
search.setStartResult(firstResult);
// set maximum number of search results
search.setMaxResults(maxResults);
// perform search
GoogleSearchResult googleResult = null;
int retries = 0;
while (googleResult == null)
try {
googleResult = search.doSearch();
} catch (GoogleSearchFault e) {
MsgPrinter.printSearchError(e); // print search error message
if (retries == RETRIES) {
MsgPrinter.printErrorMsg("\nSearch failed.");
System.exit(1);
}
retries++;
try {
GoogleKM.sleep(1000);
} catch (InterruptedException ie) {}
}
// get snippets and URLs of the corresponding websites
GoogleSearchResultElement[] elements = googleResult.getResultElements();
String[] snippets = new String[elements.length];
String[] urls = new String[elements.length];
for (int i = 0; i < elements.length; i++) {
snippets[i] = elements[i].getSnippet();
urls[i] = elements[i].getURL();
}
// return results
return getResults(snippets, urls, true);
}
/**
* Returns a new instance of <code>GoogleKM</code>. A new instance is
* created for each query.
*
* @return new instance of <code>GoogleKM</code>
*/
public KnowledgeMiner getCopy() {
return new GoogleKM();
}
}