package doser.entitydisambiguation.table.columndisambiguation;
import java.io.File;
import java.io.IOException;
import java.util.Locale;
import org.apache.log4j.Logger;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.jgrapht.graph.DefaultDirectedWeightedGraph;
import org.jgrapht.graph.DefaultWeightedEdge;
import doser.entitydisambiguation.properties.Properties;
import doser.entitydisambiguation.table.logic.Type;
public class InverseDocumentFrequencyFeature extends AbstractTypeDisFeatures {
private IndexReader iReader;
private IndexSearcher iSearcher;
private final static float WEIGHT = 0.005f;
public InverseDocumentFrequencyFeature(
final DefaultDirectedWeightedGraph<Type, DefaultWeightedEdge> grah) {
super(grah);
final File file = new File(Properties.getInstance()
.getTypeLuceneIndex());
Directory dir;
try {
dir = FSDirectory.open(file);
this.iReader = DirectoryReader.open(dir);
this.iSearcher = new IndexSearcher(this.iReader);
} catch (final IOException e) {
Logger.getRootLogger().error(e.getStackTrace());
}
}
@Override
public float computeFeature(final Type type) {
float res = 0;
if (type.getUri() != null) {
final String uri = type.getUri();
String correctedString = "";
if (uri.contains("Category:")) {
final String[] splitter = uri.split(":");
correctedString = "http://yago-knowledge.org/resource/wikicategory_"
+ splitter[2];
} else {
correctedString = uri;
}
final TermQuery tquery = new TermQuery(new Term("Type",
correctedString.toLowerCase(Locale.US)));
TopDocs docs;
try {
docs = this.iSearcher.search(tquery, 1);
final ScoreDoc[] scoredocs = docs.scoreDocs;
if (scoredocs.length == 0) {
return 0;
}
final String number = this.iReader.document(scoredocs[0].doc).get("Number");
final double val1 = (this.iReader.maxDoc() / Double
.parseDouble(number)) + 1;
res = (float) (WEIGHT * Math.sqrt(Math.log(val1)
/ Math.log(2)));
} catch (final IOException e) {
Logger.getRootLogger().error(e.getStackTrace());
}
return 0;
}
return res;
}
}