/*
############################################################################
##
## Copyright (C) 2006-2009 University of Utah. All rights reserved.
##
## This file is part of DeepPeep.
##
## This file may be used under the terms of the GNU General Public
## License version 2.0 as published by the Free Software Foundation
## and appearing in the file LICENSE.GPL included in the packaging of
## this file. Please review the following to ensure GNU General Public
## Licensing requirements will be met:
## http://www.opensource.org/licenses/gpl-license.php
##
## If you are unsure which license is appropriate for your use (for
## instance, you are interested in developing a commercial derivative
## of DeepPeep), please contact us at deeppeep@sci.utah.edu.
##
## This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
## WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
##
############################################################################
*/
package focusedCrawler.link.classifier;
import focusedCrawler.link.frontier.LinkRelevance;
import focusedCrawler.target.model.Page;
import focusedCrawler.util.parser.LinkNeighborhood;
/**
*
* <p> </p>
*
* <p>Description: This classifier uses the naive bayes link classifier to
* set the link priority.</p>
*
* <p>Copyright: Copyright (c) 2004</p>
*
* <p> </p>
*
* @author Luciano Barbosa
* @version 1.0
*/
public class LinkClassifierImpl implements LinkClassifier{
private final int[] weights = new int[]{2,1,0};
private final int intervalRandom = 100;
private final LNClassifier lnClassifier;
public LinkClassifierImpl(LNClassifier lnClassifier) {
this.lnClassifier = lnClassifier;
}
/**
* This method classifies links based on the priority set by the
* naive bayes link classifier.
* @param page Page
* @return LinkRelevance[]
* @throws LinkClassifierException
*/
public LinkRelevance[] classify(Page page) throws LinkClassifierException {
LinkNeighborhood[] lns = page.getParsedData().getLinkNeighborhood();
LinkNeighborhood ln = null;
try {
LinkRelevance[] linkRelevance = new LinkRelevance[lns.length];
for (int i = 0; i < lns.length; i++) {
ln = lns[i];
linkRelevance[i] = classify(ln);
}
return linkRelevance;
} catch (Exception ex) {
throw new LinkClassifierException("Failed to classify link [" + ln.getLink().toString()
+ "] from page: " + page.getURL().toString(), ex);
}
}
public LinkRelevance classify(LinkNeighborhood ln) throws LinkClassifierException {
LinkRelevance linkRel = null;
try {
double[] prob = lnClassifier.classify(ln);
int classificationResult = -1;
double maxProb = -1;
for (int i = 0; i < prob.length; i++) {
if(prob[i] > maxProb){
maxProb = prob[i];
classificationResult = i;
}
}
double probability = prob[classificationResult]*100;
if(probability == 100){
probability = 99;
}
classificationResult = weights[classificationResult];
double result = (classificationResult * intervalRandom) + probability ;
linkRel = new LinkRelevance(ln.getLink(),result);
} catch (Exception ex) {
throw new LinkClassifierException("Failed to classify link: "+ln.getLink().toString(), ex);
}
return linkRel;
}
}