package xyz.anduo.crawler;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
/**
* HTIS HTIS算法
*
* @author anduo
*
*/
public class HITS {
/** 存储web图的数据结构 */
private WebGraphMemory graph;
/** 包含每个网页的评分 */
private Map<Integer, Double> hubScores; // <id,value>
/** 包含每个网页的Authority */
private Map<Integer, Double> authorityScores;// <id,value>
/**
* 构造函数
*/
public HITS(WebGraphMemory graph) {
this.graph = graph;
this.hubScores = new HashMap<Integer, Double>();
this.authorityScores = new HashMap<Integer, Double>();
int numLinks = graph.numNodes();
for (int i = 1; i <= numLinks; i++) {
hubScores.put(new Integer(i), new Double(1));
authorityScores.put(new Integer(i), new Double(1));
}
computeHITS();
}
/**
* 计算网页的 Hub 和 Authority scores
*/
public void computeHITS() {
computeHITS(25);
}
/**
* 计算网页的 Hub 和 Authority scores
*
*/
public void computeHITS(int numIterations) {
while (numIterations-- > 0) {
for (int i = 1; i <= graph.numNodes(); i++) {
Map<Integer, Double> inlinks = graph.inLinks(new Integer(i));
Map<Integer, Double> outlinks = graph.outLinks(new Integer(i));
double authorityScore = 0;
double hubScore = 0;
for (Integer id : inlinks.keySet()) {
authorityScore += (hubScores.get(id)).doubleValue();
}
for (Integer id : outlinks.keySet()) {
hubScore += (authorityScores.get(id)).doubleValue();
}
authorityScores.put(new Integer(i), new Double(authorityScore));
hubScores.put(new Integer(i), new Double(hubScore));
}
normalize(authorityScores);
normalize(hubScores);
}
}
public void computeWeightedHITS(int numIterations) {
while (numIterations-- > 0) {
for (int i = 1; i <= graph.numNodes(); i++) {
Map<Integer, Double> inlinks = graph.inLinks(new Integer(i));
Map<Integer, Double> outlinks = graph.outLinks(new Integer(i));
double authorityScore = 0;
double hubScore = 0;
for (Map.Entry<Integer, Double> in : inlinks.entrySet()) {
authorityScore += (hubScores.get(in.getKey())).doubleValue() * in.getValue();
}
for (Map.Entry<Integer, Double> out : outlinks.entrySet()) {
hubScore += (authorityScores.get(out.getKey())).doubleValue() * out.getValue();
}
authorityScores.put(new Integer(i), new Double(authorityScore));
hubScores.put(new Integer(i), new Double(hubScore));
}
normalize(authorityScores);
normalize(hubScores);
}
}
/**
* Normalize the set
*/
private void normalize(Map<Integer, Double> scoreSet) {
Iterator<Integer> iter = scoreSet.keySet().iterator();
double summation = 0.0;
while (iter.hasNext())
summation += ((scoreSet.get((Integer) (iter.next())))).doubleValue();
iter = scoreSet.keySet().iterator();
while (iter.hasNext()) {
Integer id = iter.next();
scoreSet.put(id, (scoreSet.get(id)).doubleValue() / summation);
}
}
/**
* 返回与给定链接关联的Hub score
*/
public Double hubScore(String link) {
return hubScore(graph.URLToIdentifyer(link));
}
/**
* 返回与给定链接关联的Hub score
*/
private Double hubScore(Integer id) {
return (Double) (hubScores.get(id));
}
/**
* 初始化与给定链接关联的Hub score
*/
public void initializeHubScore(String link, double value) {
Integer id = graph.URLToIdentifyer(link);
if (id != null)
hubScores.put(id, new Double(value));
}
/**
* 初始化与给定链接关联的Hub score
*/
public void initializeHubScore(Integer id, double value) {
if (id != null)
hubScores.put(id, new Double(value));
}
/**
* 返回与给定链接关联的 Authority score
*/
public Double authorityScore(String link) {
return authorityScore(graph.URLToIdentifyer(link));
}
/**
* 返回与给定链接关联的 Authority score
*/
private Double authorityScore(Integer id) {
return (Double) (authorityScores.get(id));
}
/**
* 初始化与给定链接关联的 Authority score
*/
public void initializeAuthorityScore(String link, double value) {
Integer id = graph.URLToIdentifyer(link);
if (id != null)
authorityScores.put(id, new Double(value));
}
/**
* 初始化与给定链接关联的 Authority score
*/
public void initializeAuthorityScore(Integer id, double value) {
if (id != null)
authorityScores.put(id, new Double(value));
}
}