package focusedCrawler.link.linkanalysis;
import java.net.URL;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Vector;
import focusedCrawler.link.BipartiteGraphRepository;
import focusedCrawler.util.parser.BackLinkNeighborhood;
import focusedCrawler.util.parser.LinkNeighborhood;
import focusedCrawler.util.vsm.VSMElement;
import focusedCrawler.util.vsm.VSMElementComparator;
public class InOutLinks {
private BipartiteGraphRepository graphRep;
public InOutLinks(BipartiteGraphRepository graphRep){
this.graphRep = graphRep;
}
public void execute(HashSet<String> relSites) throws Exception{
HashMap<String,VSMElement> hubCounts = new HashMap<String,VSMElement>();
HashMap<String,VSMElement> authCounts = new HashMap<String,VSMElement>();
Iterator<String> values = relSites.iterator();
while(values.hasNext()){
String site = values.next();
BackLinkNeighborhood[] backlinks = graphRep.getBacklinks(new URL(site));
if(backlinks == null){
continue;
}
for (int j = 0; j < backlinks.length; j++) {
VSMElement count = hubCounts.get(backlinks[j].getLink());
if(count == null){
count = new VSMElement(backlinks[j].getLink(), 0);
}
count.setWeight(count.getWeight()+1);
hubCounts.put(backlinks[j].getLink(), count);
LinkNeighborhood[] outlinks = graphRep.getOutlinks(new URL(backlinks[j].getLink()));
for (int i = 0; i < outlinks.length; i++) {
if(outlinks[i] == null){
continue;
}
if(!relSites.contains(outlinks[i].getLink().toString())){
VSMElement count1 = authCounts.get(outlinks[i].getLink().toString());
if(count1 == null){
count1 = new VSMElement(outlinks[i].getLink().toString(), 0);
}
count1.setWeight(count1.getWeight()+1);
authCounts.put(outlinks[i].getLink().toString(), count1);
}
}
}
}
System.out.println("------");
System.out.println("HUB:" + hubCounts.size());
Vector<VSMElement> finalHub = new Vector<VSMElement>(hubCounts.values());
Collections.sort(finalHub, new VSMElementComparator());
for (int i = 0; i < 100 && i < finalHub.size(); i++) {
VSMElement elem = finalHub.elementAt(i);
System.out.println(elem.getWord() + ":" + elem.getWeight());
}
System.out.println("------");
System.out.println("AUTH:" + authCounts.size());
Vector<VSMElement> finalAuth = new Vector<VSMElement>(authCounts.values());
Collections.sort(finalAuth, new VSMElementComparator());
for (int i = 0; i < 100 && i < finalAuth.size(); i++) {
VSMElement elem = finalAuth.elementAt(i);
System.out.println(elem.getWord() + ":" + elem.getWeight());
}
}
}