package ecologylab.bigsemantics.seeding;
import java.io.File;
import ecologylab.bigsemantics.collecting.SemanticsGlobalScope;
import ecologylab.bigsemantics.collecting.SemanticsSessionScope;
import ecologylab.bigsemantics.documentparsers.SearchParser;
import ecologylab.bigsemantics.metametadata.MetaMetadataRepository;
import ecologylab.bigsemantics.metametadata.SearchEngine;
import ecologylab.bigsemantics.model.text.InterestModel;
import ecologylab.net.ParsedURL;
import ecologylab.serialization.SIMPLTranslationException;
import ecologylab.serialization.SimplTypesScope;
import ecologylab.serialization.annotations.simpl_inherit;
import ecologylab.serialization.annotations.simpl_scalar;
import ecologylab.serialization.formatenums.StringFormat;
/**
* {@link Seed Seed} element that directs combinFormation to perform a search.
*
* Starts by providing a basis for specification of search seeds.
* Then, keeps state during processing of the search.
*/
@simpl_inherit
public class SearchState extends Seed
implements SemanticsPrefs
{
private static final short DEFAULT_SEARCH_INTEREST_LEVEL = (short) 2;
public static final int NUM_IMAGE_RESULTS = 40;
/**
* Search engine to use. Currently supported are google, flickr, yahoo, yahoo_image, yahoo_news, yahoo_buzz, delicious.
*/
@simpl_scalar protected String engine;
protected String queryNoPluses;
/**
* For del.icio.us only. Allows querying the delicious tags for a particular user.
*/
@simpl_scalar protected String creator;
/**
* For Yahoo Buzz. Queries to buzz can either be "leaders" (the default) or "movers".
* The latter tend to be more current and interesting.
*/
@simpl_scalar protected boolean isMovers;
private int numResults = NUM_SEARCH_RESULTS.value();
/**
* The result index for the first result in the current search (to be) issued for this Search seed.
* Initially, this is 0.
* Each time a subsequent search is run, it will be incremented by the number of searchResults that
* were requested the previous time.
*/
@simpl_scalar
private int currentFirstResultIndex;
/**
* how many results we have actually seen
*/
private int numResultsFrom = 0;
/**
* Initial level of interest in this seed.
*/
private short interestLevel = DEFAULT_SEARCH_INTEREST_LEVEL;
/**
* Among the searches that were specified (as part of seeding), which one is this?
*/
private int searchNum;
public SearchState()
{
super();
}
public SearchState(String query, String engine)
{
this(query, engine, DEFAULT_SEARCH_INTEREST_LEVEL);
}
public SearchState(String query, String engine, short interestLevel)
{
this(query, engine, interestLevel, engine != null && engine.contains("image") ? NUM_IMAGE_RESULTS : NUM_SEARCH_RESULTS.value());
}
public SearchState(String query, String engine, short interestLevel, int numResults)
{
super();
setQuery(query);
this.engine = engine;
this.interestLevel = interestLevel;
this.numResults = numResults;
}
/**
* this is when we do a buzz, etc search, and seeds are created because they are a result.
* @param query
* @param engine
* @param noAggregator TODO
*/
public SearchState(SemanticsSessionScope infoProcessor, String query, String engine, boolean noAggregator)
{
this(query, engine);
this.noAggregator = noAggregator;
initialize(infoProcessor);
}
/**
* Get the next results from the previous search seed.
*
* @param previousSearch
*/
// public SearchState(SearchState previousSearch)
// {
// this(null, previousSearch.query, previousSearch.numResults, previousSearch.initialIntensity, previousSearch.bias);
// this.firstResult = previousSearch.firstResult + numResults;
// this.engine = previousSearch.engine;
// this.resultDistributer = previousSearch.resultDistributer;
// }
/**
* Check the validity of this seed.
*/
public boolean isActive()
{
if (engine == null)
{
error("Can't process search seed with null engine.");
return false;
}
if (((query == null) || (query.length()<=0) ) && !DeliciousState.DELICIOUS.equals(engine)) // delicious handles empty queries nicely
{
try
{
error("Can't process search seed with null query: " + SimplTypesScope.serialize(this, StringFormat.XML));
} catch (SIMPLTranslationException e)
{
e.printStackTrace();
}
return false;
}
return true;
}
public void eliminatePlusesFromQuery()
{
if (query != null && query.contains("+"))
{
query = query.replace('+', ' ');
}
}
/**
* Bring this seed into the agent or directly into the composition.
*
* @param objectRegistry Context passed between services calls.
* @param infoCollector TODO
*/
@Override
public void performInternalSeedingSteps(SemanticsGlobalScope infoCollector)
{
// InterestModel.expressInterest(query, interestLevel);
//infoCollector.instantiateDocumentType(SEARCH_DOCUMENT_TYPE_REGISTRY, engine, this);
new SearchParser(this);
}
/**
* Do stuff in the DocumentType constructor to setup this search result Container.
* For Search, the extra stuff we do involves setting up the TermVector to match the query,
* and squirting some initial interest into these Terms.
*
* @param container
*/
// public void bindToContainer(Container container)
// {
// super.bindToContainer(container);
// //container.metadata.initializeFromSearch(query);
// if(query != null)
// {
// String queryValue = query.replace('+', ' ');
// container.setQuery(queryValue);
// }
//// container.setMetadataField("query", queryValue);
//// queryField.incrementParticipantInterest(initialIntensity);
//// container.incrementTermVectorInterest((short) initialIntensity);
//
// //container.setIntensity(this.initialIntensity);
// }
/**
* The String the dashboard needs to show.
*
* @return The search query.
*/
public String valueString()
{
return query;
}
/**
* @return Returns the initialIntensity.
*/
public short initialIntensity()
{
return interestLevel;
}
/**
* The requested number of results to retrieve.
*
* @return Returns the numResults.
*/
public int numResults()
{
return numResults;
}
/**
* If multiple searches were specified within the {@link ecologylab.bigsemantics.seeding.SeedSet SeedSet}, this field specifies
* the index of this one.
*
* @return Returns the searchNum.
*/
public int searchNum()
{
return searchNum;
}
protected boolean noAggregator()
{
return this.noAggregator || (seedSet() == null);
}
/**
* @param query The query to set.
*/
public boolean setValue(String query)
{
this.query = query;
return true;
}
/**
* @return Returns the firstResult.
*/
public int currentFirstResultIndex()
{
return currentFirstResultIndex;
}
/**
* Called to specify that the next set of search results will be retrieved for this Search Seed.
*/
@Override
public int nextResultSet()
{
currentFirstResultIndex += numResults;
return currentFirstResultIndex;
}
public void setResultSetNum(int n)
{
currentFirstResultIndex = n * numResults;
}
/**
* @param queueInsteadOfImmediate The queueInsteadOfImmediate to set.
*/
public void setQueueInsteadOfImmediate(boolean queueInsteadOfImmediate)
{
this.queueInsteadOfImmediate = queueInsteadOfImmediate;
}
/**
* @return the creator
*/
public String creator()
{
return creator;
}
/**
* @return true if specified that the user wants to see the most popular tagged
* stuff in del.icio.us
*/
public boolean popular()
{
return false;
}
/**
* Set the seed to specify that the user wants to see the most popular tagged
* stuff in del.icio.us
*
* @param popular the popular to set
*/
public void setPopular(boolean popular)
{
}
public void setSearchNum(int searchNum)
{
this.searchNum = searchNum;
}
/**
* Set query and queryNoPluses.
*
* @param query
* @return Query with any plus characters removed.
*/
public String setQuery(String query)
{
this.query = query;
String result = null;
if (query != null)
{
result = query.replace('+', ' ');
queryNoPluses = result;
}
return result;
}
public String getEngine() {
return engine;
}
public void setEngine(String engine) {
this.engine = engine;
}
/**
* Pass back the array of choices, when an engine takes only a fixed set of them.
*
* @return null in the default implemenation, indicating vocabulary is not controlled.
*/
public String[] controlledVocabulary()
{
return null;
}
/**
* For Yahoo Buzz. Queries to buzz can either be "leaders" (the default) or "movers".
* The latter tend to be more current and interesting.
*/
public boolean isMovers()
{
return isMovers;
}
/**
* For Yahoo Buzz. Queries to buzz can either be "leaders" (the default) or "movers".
* The latter tend to be more current and interesting.
*/
public void setIsMovers(boolean isMovers)
{
this.isMovers = isMovers;
}
public boolean canChangeVisibility()
{
return true;
}
public boolean isDeletable()
{
return true;
}
public boolean isEditable()
{
return true;
}
public boolean isRejectable()
{
return false;
}
@Override
public boolean isHomogenousSeed()
{
return (query != null && query.contains("site:"));
}
static final File testFile = new File("config/preferences/katrinaLocationAware.xml");
// public static void main(String[] a)
// {
// try
// {
// PrefSet searchState = (PrefSet) ElementState.translateFromXML(testFile, CFServicesTranslations.get());
// searchState.translateToXML(System.out);
// } catch (XMLTranslationException e)
// {
// // TODO Auto-generated catch block
// e.printStackTrace();
// }
// }
//
public int numResultsFrom()
{
// TODO Auto-generated method stub
return numResultsFrom;
}
public void incrementNumResultsBy(int i)
{
numResultsFrom += i;
}
/**
* Processing peformed for each Seed in a SeedSet in a loop before performSeeding() will be called in a separate loop.
*
* @return true if this is not an image search, and we are processing it.
*/
@Override
public boolean initializeSeedingSteps(SeedSet seedSet, int searchNum)
{
InterestModel.expressInterest(query, interestLevel);
setSearchNum(searchNum);
setSeedSet(seedSet);
return true;
}
String toString;
public String toString()
{
String result = this.toString;
if (result == null)
{
if ((query != null) && engine != null)
{
StringBuilder buffy = new StringBuilder();
buffy.append("SearchState[").append(engine).append(']').append(' ');
buffy.append(query);
result = buffy.toString();
this.toString = result;
}
else
result = super.toString();
}
return result;
}
/**
* Called after a seed is parsed to prevent it being parsed again later during re-seeding.
* This override does nothing, because Search seeds should remain active.
*
* @param inActive the inActive to set
*/
@Override
public void setActive(boolean value)
{
}
/**
* Form the url for the particular query, engine and resultIndex
*/
public ParsedURL formSearchUrlBasedOnEngine()
{
MetaMetadataRepository repository = infoCollector.getMetaMetadataRepository();
SearchEngine searchEngine = repository.getSearchEngine(engine);
return searchEngine.formSearchUrl(getQuery(), numResults(), currentFirstResultIndex);
}
protected boolean useDistributor()
{
return true;
}
}