package org.wikipedia.miner.model;
import gnu.trove.set.hash.TIntHashSet;
import org.wikipedia.miner.db.struct.DbPage;
import org.wikipedia.miner.db.WEnvironment;
/**
* Represents redirects in Wikipedia; the links that have been defined to connect synonyms to the correct article
* (i.e <em>Farming</em> redirects to <em>Agriculture</em>).
*/
public class Redirect extends Page {
/**
* Initialises a newly created Redirect so that it represents the article given by <em>id</em>.
*
* @param env an active WikipediaEnvironment
* @param id the unique identifier of the article
*/
public Redirect(WEnvironment env, int id) {
super(env, id) ;
}
protected Redirect(WEnvironment env, int id, DbPage pd) {
super(env, id, pd) ;
}
/**
* Returns the Article that this redirect points to. This will continue following redirects until it gets to an article
* (so it deals with double redirects). If a dead-end or loop of redirects is encountered, null is returned
*
* @return the equivalent Article for this redirect.
*/ //TODO: should just resolve double redirects during extraction.
public Article getTarget() {
int currId = id ;
TIntHashSet redirectsFollowed = new TIntHashSet() ;
while (!redirectsFollowed.contains(currId)) {
redirectsFollowed.add(currId) ;
Integer targetId = env.getDbRedirectTargetBySource().retrieve(currId) ;
if (targetId == null)
return null ;
Page target = Page.createPage(env, targetId) ;
if (!target.exists())
return null ;
if (target.getType() == PageType.redirect)
currId = targetId ;
else
return (Article)target ;
}
return null ;
}
}