package ecologylab.bigsemantics.seeding;
import java.awt.Point;
import java.io.File;
import ecologylab.bigsemantics.collecting.SemanticsGlobalScope;
import ecologylab.bigsemantics.metadata.builtins.Document;
import ecologylab.net.ParsedURL;
import ecologylab.serialization.annotations.simpl_inherit;
import ecologylab.serialization.annotations.simpl_scalar;
/**
* {@link Seed Seed} element used to tell combinFormation to process a document.
*
* @author andruid
*/
@simpl_inherit
public class DocumentState extends Seed
{
private static final String DOCS_FEEDS = "docs_feeds";
/**
* URL of the document or container specified for downloading and processing.
*/
@simpl_scalar ParsedURL url;
Document document;
/**
* If true, then no media should be collected from this document.
* Rather, it will be treated as a collection of links, that will be fed to the focused web crawler agent.
*/
@simpl_scalar boolean justCrawl;
/**
* If true, then no links should be collected from this document and fed to the focused web crawler agent.
* Instead, only collect media to form image and text surrogates.
*/
@simpl_scalar boolean justMedia;
//action is just a placeholder, and doesn't do anything.
private String action;
/**
* This field get's filled out for a Drag and Drop Seed.
*/
private Point dropPoint;
public DocumentState()
{
super();
}
public DocumentState(String purlString, String action)
{
super();
this.action = action;
this.setValue(purlString);
}
public DocumentState(ParsedURL purl)
{
this.url = purl;
}
/**
* Bring this seed into the agent or directly into the composition.
*
* @param objectRegistry Context passed between services calls.
* @param infoCollector TODO
*/
@Override
public void performInternalSeedingSteps(SemanticsGlobalScope infoCollector)
{
if (url != null)
{
// get a Container object associated with this, associate a seed, and initiate download
println("-- processing document seed: " + url);
infoCollector.getSeeding().traversable(url);
// strangely enough, a file document seed can have a parentContainer!
File file = url.file();
document = infoCollector.getOrConstructDocument(url);
if (file != null)
{
File parent = file.getParentFile(); // the directory the file lives in
Document parentDocument = infoCollector.getOrConstructDocument(new ParsedURL(parent));
document.addInlink(parentDocument);
}
if (query != null)
{
//document.setQuery(query);
//InterestModel.expressInterest(query, (short)3);
throw new RuntimeException("not implemented");
}
document.queueDownload();
}
}
/**
* The String the dashboard needs to show.
*
* @return The purl -- as a String.
*/
public String valueString()
{
return (url != null) ? url.toString() : new String("");
}
/**
* Set the value of the purl field, if the String is valid.
*
* @param value
* @return true if the field was set succesfully, or false if the value is invalid.
*/
public boolean setValue(String value)
{
ParsedURL trialValue = ParsedURL.getAbsolute(value, "error parsing from seed");
boolean result = (trialValue != null);
if (result)
url = trialValue;
return result;
}
/**
* @param dropPoint The dropPoint to set.
*/
public void setDropPoint(Point dropPoint)
{
this.dropPoint = dropPoint;
}
/**
* @return Returns the dropPoint.
*/
public Point dropPoint()
{
return dropPoint;
}
/**
* The ParsedURL for this document or feed.
*
* @return
*/
public ParsedURL getUrl()
{
return url;
}
public boolean canChangeVisibility()
{
return true;
}
public boolean isDeletable()
{
return true;
}
public boolean isEditable()
{
return true;
}
public boolean isRejectable()
{
return false;
}
@Override
public boolean isHomogenousSeed()
{
return true;
}
@Override
public boolean isJustCrawl()
{
return justCrawl;
}
@Override
public boolean isJustMedia()
{
return justMedia;
}
}