package eu.leads.processor.utils;
import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.databind.ObjectMapper;
import eu.leads.crawler.model.Page;
import eu.leads.processor.execute.ProgressReport;
//import eu.leads.processor.pagerank.graph.LeadsPrGraph;
import org.infinispan.Cache;
import java.io.IOException;
import java.net.URISyntaxException;
import java.util.Timer;
import java.util.Vector;
/**
* Created by vagvaz on 12/23/13.
*/
public class ImportCrawledPages implements Runnable {
private Thread thread;
private String crawlerCache;
private String destinationCache;
public ImportCrawledPages(String src,String dest){
crawlerCache = src;
destinationCache = dest;
thread = new Thread(this);
}
@Override
public void run() {
Cache<String, String> control = (Cache<String, String>) InfinispanUtils.getOrCreatePersistentMap("processorControl");
if(control.containsKey("service"))
{
System.out.println("Service is running so import is unnecessary");
return;
}
importCrawledPages(crawlerCache,destinationCache);
thread.interrupt();
return;
}
public void start(){
this.thread.start();
}
public void join(){
try {
this.thread.join();
} catch (InterruptedException e) {
e.printStackTrace();
}
}
private void importCrawledPages(String source, String destination) {
AlchemyScore.initialize();
Cache cache = (Cache) InfinispanUtils.getOrCreatePersistentMap(source);
Cache webpagesCache = (Cache)InfinispanUtils.getOrCreatePersistentMap(destination);
ObjectMapper mapper = new ObjectMapper();
ProgressReport report = new ProgressReport("\nImported Tuples ",0,cache.size() );
Timer timer = new Timer();
// timer.scheduleAtFixedRate(report,10,2000);
Vector<LeadsProcessorPage> buffer = new Vector<LeadsProcessorPage>(100);
StdOutputWriter.getInstance().println("Importing " + cache.size() + " crawled pages");
for(Object ob : cache.values()){
Page page = (Page)ob;
LeadsProcessorPage processorPage = new LeadsProcessorPage(page);
Double sentiment = null;
sentiment = AlchemyScore.getScore(page.getUrl().toString());
report.tick();
processorPage.setSentiment(sentiment);
//put negative value for generating the pagerank value during query processing.
//and let the Leads pagerank algorithm to process the webpage.
try {
processorPage.setPagerank((double) Web.pagerank("http://" + page.getUrl().toURI().getHost()));
} catch (URISyntaxException e) {
e.printStackTrace();
}
buffer.add(processorPage);
if(buffer.size() == 100)
{
try {
for(LeadsProcessorPage procpage: buffer)
webpagesCache.put(procpage.getUrl(),mapper.writeValueAsString(procpage));
} catch (JsonProcessingException e) {
e.printStackTrace();
}
buffer.clear();
}
}
try {
for(LeadsProcessorPage procpage: buffer)
webpagesCache.put(procpage.getUrl(),mapper.writeValueAsString(procpage));
} catch (JsonProcessingException e) {
e.printStackTrace();
}
buffer.clear();
// report.run();
report.cancel();
timer.cancel();
}
public void stop() {
thread.interrupt();
thread = null;
}
}