package ecologylab.bigsemantics.example.linking;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import ecologylab.bigsemantics.collecting.SemanticsSessionScope;
import ecologylab.bigsemantics.cyberneko.CybernekoWrapper;
import ecologylab.bigsemantics.generated.library.RepositoryMetadataTypesScope;
import ecologylab.bigsemantics.generated.library.creativeWork.scholarlyArticle.*;
import ecologylab.bigsemantics.metadata.builtins.Document;
import ecologylab.bigsemantics.metadata.builtins.DocumentClosure;
import ecologylab.bigsemantics.metadata.output.HtmlRenderer;
import ecologylab.generic.Continuation;
import ecologylab.net.ParsedURL;
public class LinkingMetadata implements Continuation<DocumentClosure>
{
private List<ScholarlyArticle> collection = new ArrayList<ScholarlyArticle>();
private int count = 0;
private Object countLock = new Object();
HtmlRenderer renderer;
public void collect(List<String> urls) throws IOException
{
String title = "Linking ACM Portal and CiteSeerX";
String header = title;
String styleSheet = "linking_metadata.css";
String javascript = "linking_metadata.js";
renderer = new HtmlRenderer(new File("linking_metadata.html"), title, header, styleSheet, javascript);
if (!renderer.isBad())
{
count = urls.size();
// create the infoCollector
SemanticsSessionScope infoCollector = new SemanticsSessionScope(RepositoryMetadataTypesScope.get(), CybernekoWrapper.class);
// seed start urls
for (String url : urls)
{
ParsedURL seedUrl = ParsedURL.getAbsolute(url);
Document doc = infoCollector.getOrConstructDocument(seedUrl);
doc.queueDownload(this);
}
}
}
@Override
public void callback(DocumentClosure closure)
{
Document doc = closure.getDocument();
if (doc != null)
if (doc.getMetaMetadata().getName().equals("acm_portal"))
collection.add((ScholarlyArticle) doc);
synchronized (countLock)
{
count--;
if (count == 0)
{
for (ScholarlyArticle article : collection)
renderer.appendMetadata(article);
renderer.close();
closure.getSemanticsScope().getDownloadMonitors().stop(false);
}
}
}
public static void main(String[] args) throws IOException, InterruptedException
{
if (args.length <= 0)
{
System.err.println("args: <url-to-a-resource> | -l <path-to-a-text-file-listing-urls>");
System.exit(-1);
return;
}
List<String> urls = new ArrayList<String>();
int i = 0;
while (i < args.length && args[i] != null && args[i].length() > 0)
{
if ("-l".equals(args[i]))
{
++i;
String pathList = args[i];
BufferedReader br = new BufferedReader(new FileReader(pathList));
String line = null;
while ((line = br.readLine()) != null)
{
line = line.trim();
if (line.length() > 0 && !line.startsWith("#"))
urls.add(line.trim());
}
br.close();
}
else
{
urls.add(args[i]);
}
++i;
}
LinkingMetadata lm = new LinkingMetadata();
lm.collect(urls);
}
}