package focusedCrawler.link.classifier; import java.net.MalformedURLException; import java.util.Iterator; import java.util.Map; import focusedCrawler.link.classifier.builder.Instance; import focusedCrawler.link.classifier.builder.LinkNeighborhoodWrapper; import focusedCrawler.link.frontier.LinkRelevance; import focusedCrawler.target.model.Page; import focusedCrawler.util.parser.LinkNeighborhood; import weka.classifiers.Classifier; import weka.core.Instances; /** * This class implements the link classifier for the hub links. * @author lbarbosa * */ public class LinkClassifierHub implements LinkClassifier{ private Classifier classifier; private Instances instances; private LinkNeighborhoodWrapper wrapper; private String[] attributes; public LinkClassifierHub(){ } public LinkClassifierHub(Classifier classifier, Instances instances, LinkNeighborhoodWrapper wrapper,String[] attributes) { this.classifier = classifier; this.instances = instances; this.wrapper = wrapper; this.attributes = attributes; } public LinkRelevance classify(LinkNeighborhood ln) throws LinkClassifierException { LinkRelevance result = null; try { if(classifier == null){ result = new LinkRelevance(ln.getLink(),LinkRelevance.DEFAULT_HUB_RELEVANCE+1); }else{ Map<String, Instance> urlWords = wrapper.extractLinks(ln, attributes); Iterator<String> iter = urlWords.keySet().iterator(); while(iter.hasNext()){ String url = (String)iter.next(); Instance instance = (Instance)urlWords.get(url); double[] values = instance.getValues(); weka.core.Instance instanceWeka = new weka.core.Instance(1, values); instanceWeka.setDataset(instances); double[] prob = classifier.distributionForInstance(instanceWeka); double relevance = LinkRelevance.DEFAULT_HUB_RELEVANCE + prob[0]*100; result = new LinkRelevance(ln.getLink(),relevance); } } } catch (MalformedURLException e) { e.printStackTrace(); } catch (Exception e) { e.printStackTrace(); } return result; } @Override public LinkRelevance[] classify(Page page) throws LinkClassifierException { // TODO Auto-generated method stub return null; } }