package doser.word2vec.dbpediaGraphThinning;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.io.PrintWriter;
import java.util.LinkedList;
import java.util.List;
import java.util.Random;
import org.apache.log4j.Logger;
import com.hp.hpl.jena.query.QueryException;
import com.hp.hpl.jena.query.QueryExecution;
import com.hp.hpl.jena.query.QueryExecutionFactory;
import com.hp.hpl.jena.query.QueryFactory;
import com.hp.hpl.jena.query.QuerySolution;
import com.hp.hpl.jena.query.ResultSet;
import com.hp.hpl.jena.rdf.model.Model;
import com.hp.hpl.jena.rdf.model.ModelFactory;
import word2vec.tools.Word2VecModel;
public class EvaluatePureDbpediaCategories {
public static final int RANDOMDRAWS = 50000;
private Random random;
private Word2VecModel w2c;
private Model m;
public EvaluatePureDbpediaCategories() {
super();
this.random = new Random();
// this.w2c =
// Word2VecModel.createWord2VecModel("/mnt/ssd1/disambiguation/word2vec/wikientitymodel_min5.seq");
this.m = ModelFactory.createDefaultModel();
this.m.read("/home/zwicklbauer/HDTGeneration/article_categories_en.nt");
}
public String[] createCategorySet() {
File file = new File(
"/home/zwicklbauer/word2vec/MSEDbpediaCategories.txt");
BufferedReader reader = null;
String line = null;
List<String> set = new LinkedList<String>();
int counter = 0;
try {
reader = new BufferedReader(new FileReader(file));
while ((line = reader.readLine()) != null) {
String splitter[] = line.split("\\t");
float score = Float.valueOf(splitter[0]);
if (score > 0 && score < 0.04) {
set.add(splitter[1]);
}
if (score > 0) {
counter++;
}
}
} catch (FileNotFoundException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
} finally {
if (reader != null) {
try {
reader.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
System.out.println("Category Relation: " + set.size() + " of "
+ counter);
String[] arr = new String[set.size()];
arr = set.toArray(arr);
return arr;
}
public void evaluate(String[] categories) {
try {
PrintWriter writer = new PrintWriter(new File(
"/home/zwicklbauer/samplingoutput.dat"));
int counter = 0;
while (counter < RANDOMDRAWS) {
String cat = categories[this.random.nextInt(categories.length)];
String e1 = queryEntitiesFromCategory(cat);
String e2 = queryEntitiesFromCategory(cat);
if (!e1.equalsIgnoreCase(e2)) {
writer.println("0\t" + e1 + "\t" + e2 + "\t" + cat + "\t"
+ cat);
counter++;
}
}
writer.close();
} catch (FileNotFoundException e) {
e.printStackTrace();
}
}
private String queryEntitiesFromCategory(final String catUri) {
String res = null;
final String query = "SELECT ?entities WHERE{ ?entities <http://purl.org/dc/terms/subject> <"
+ catUri + ">. }";
try {
final com.hp.hpl.jena.query.Query cquery = QueryFactory
.create(query);
final QueryExecution qexec = QueryExecutionFactory
.create(cquery, m);
final ResultSet results = qexec.execSelect();
List<String> entities = new LinkedList<String>();
while (results.hasNext()) {
final QuerySolution sol = results.nextSolution();
entities.add(sol.getResource("entities").getURI());
}
if (entities.size() != 0) {
int randomNr = this.random.nextInt(entities.size());
return entities.get(randomNr);
}
} catch (final QueryException e) {
Logger.getRootLogger().error(e.getStackTrace());
}
return res;
}
public static void main(String[] args) {
EvaluatePureDbpediaCategories ev = new EvaluatePureDbpediaCategories();
ev.evaluate(ev.createCategorySet());
}
}