ExampleOne.java example

Explorer
BigSemanticsJava-master
package ecologylab.bigsemantics.example;

import ecologylab.bigsemantics.collecting.SemanticsSessionScope;
import ecologylab.bigsemantics.cyberneko.CybernekoWrapper;
import ecologylab.bigsemantics.generated.library.RepositoryMetadataTypesScope;
import ecologylab.bigsemantics.metadata.builtins.Document;
import ecologylab.bigsemantics.metadata.builtins.DocumentClosure;
import ecologylab.generic.Continuation;
import ecologylab.net.ParsedURL;
import ecologylab.serialization.SimplTypesScope;
import ecologylab.serialization.formatenums.StringFormat;

/**
 * This is the most basic usage example of BigSemantics, that extracts a single metadata object from
 * a single URL. Newcomers to BigSemantics should start with this example.
 * 
 * @author quyin
 * 
 */
public class ExampleOne implements Continuation<DocumentClosure>
{

  /**
   * The SemanticsSessionScope encompasses all the information BigSemantics needs for accomplishing
   * tasks, such as the wrapper repository, document parsers, and download monitors.
   */
  SemanticsSessionScope sss;

  public ExampleOne()
  {
    /**
     * The metadataTypesScope contains descriptions of the generated metadata classes. These classes
     * are used to represent extracted metadata in program.
     */
    SimplTypesScope metadataTypesScope = RepositoryMetadataTypesScope.get();

    /**
     * DOM provider is used to convert a HTML stream into a DOM tree. BigSemantics by default uses
     * Cyberneko as its DOM provider, but you can use another one by writing a Wrapper (adapter)
     * class for it.
     */
    Class<CybernekoWrapper> domProviderClass = CybernekoWrapper.class;

    /**
     * Creates the SemanticsSessionScope. It will look up the wrapper repository in several places,
     * load all the wrapper definitions, and prepare all the objects that will be needed soon.
     */
    sss = new SemanticsSessionScope(metadataTypesScope,
                                    domProviderClass);
  }

  /**
   * Extracts the given URL. Note that because of the async nature of network operations, this
   * method does not directly return the extracted metadata. Instead, processing of the extracted
   * metadata needs to happen in the callback method.
   * 
   * @param url
   */
  public void extract(String url)
  {
    ParsedURL purl = ParsedURL.getAbsolute(url);
    Document doc = sss.getOrConstructDocument(purl);
    DocumentClosure closure = doc.getOrConstructClosure();
    closure.addContinuation(this);
    closure.queueDownload();
  }

  /**
   * The method that asynchronously processes extracted metadata objects.
   */
  @Override
  public void callback(DocumentClosure closure)
  {
    System.out.println("\n* * * * *\n");
    Document doc = closure.getDocument();
    System.out.println("The type of the extracted metadata: " + doc.getClass());
    SimplTypesScope.serializeOut(doc, "Extracted metadata", StringFormat.XML);
  }

  /**
   * @param args
   */
  public static void main(String[] args)
  {
    ExampleOne example = new ExampleOne();
    example.extract("http://dl.acm.org/citation.cfm?id=1871580");
  }

}