package ecologylab.bigsemantics.example;
import ecologylab.bigsemantics.collecting.SemanticsSessionScope;
import ecologylab.bigsemantics.cyberneko.CybernekoWrapper;
import ecologylab.bigsemantics.generated.library.RepositoryMetadataTypesScope;
import ecologylab.bigsemantics.metadata.builtins.Document;
import ecologylab.bigsemantics.metadata.builtins.DocumentClosure;
import ecologylab.generic.Continuation;
import ecologylab.net.ParsedURL;
import ecologylab.serialization.SimplTypesScope;
import ecologylab.serialization.formatenums.StringFormat;
/**
* This is the most basic usage example of BigSemantics, that extracts a single metadata object from
* a single URL. Newcomers to BigSemantics should start with this example.
*
* @author quyin
*
*/
public class ExampleOne implements Continuation<DocumentClosure>
{
/**
* The SemanticsSessionScope encompasses all the information BigSemantics needs for accomplishing
* tasks, such as the wrapper repository, document parsers, and download monitors.
*/
SemanticsSessionScope sss;
public ExampleOne()
{
/**
* The metadataTypesScope contains descriptions of the generated metadata classes. These classes
* are used to represent extracted metadata in program.
*/
SimplTypesScope metadataTypesScope = RepositoryMetadataTypesScope.get();
/**
* DOM provider is used to convert a HTML stream into a DOM tree. BigSemantics by default uses
* Cyberneko as its DOM provider, but you can use another one by writing a Wrapper (adapter)
* class for it.
*/
Class<CybernekoWrapper> domProviderClass = CybernekoWrapper.class;
/**
* Creates the SemanticsSessionScope. It will look up the wrapper repository in several places,
* load all the wrapper definitions, and prepare all the objects that will be needed soon.
*/
sss = new SemanticsSessionScope(metadataTypesScope,
domProviderClass);
}
/**
* Extracts the given URL. Note that because of the async nature of network operations, this
* method does not directly return the extracted metadata. Instead, processing of the extracted
* metadata needs to happen in the callback method.
*
* @param url
*/
public void extract(String url)
{
ParsedURL purl = ParsedURL.getAbsolute(url);
Document doc = sss.getOrConstructDocument(purl);
DocumentClosure closure = doc.getOrConstructClosure();
closure.addContinuation(this);
closure.queueDownload();
}
/**
* The method that asynchronously processes extracted metadata objects.
*/
@Override
public void callback(DocumentClosure closure)
{
System.out.println("\n* * * * *\n");
Document doc = closure.getDocument();
System.out.println("The type of the extracted metadata: " + doc.getClass());
SimplTypesScope.serializeOut(doc, "Extracted metadata", StringFormat.XML);
}
/**
* @param args
*/
public static void main(String[] args)
{
ExampleOne example = new ExampleOne();
example.extract("http://dl.acm.org/citation.cfm?id=1871580");
}
}