package DBpediaCategoryCorrection;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.io.PrintWriter;
import java.util.HashSet;
import java.util.Set;
import org.apache.log4j.Logger;
import com.hp.hpl.jena.query.QueryException;
import com.hp.hpl.jena.query.QueryExecution;
import com.hp.hpl.jena.query.QueryExecutionFactory;
import com.hp.hpl.jena.query.QueryFactory;
import com.hp.hpl.jena.query.QuerySolution;
import com.hp.hpl.jena.query.ResultSet;
import com.hp.hpl.jena.rdf.model.Model;
import com.hp.hpl.jena.rdf.model.ModelFactory;
public class DBpediaCategoryCorrection {
public static String SKOSHDT;
public static String MAINFILE;
public static String OUTPUTFILE;
private Model categorySkosModel;
private HashSet<String> prominentCategories;
public DBpediaCategoryCorrection(String prominentFile) {
super();
this.categorySkosModel = ModelFactory.createDefaultModel();
this.categorySkosModel.read(SKOSHDT);
// Create ProminentHashSet
this.prominentCategories = new HashSet<String>();
File f = new File(prominentFile);
try {
BufferedReader reader = new BufferedReader(new FileReader(f));
String category = "";
while ((category = reader.readLine()) != null) {
String cat = "http://dbpedia.org/resource/Category:"+category;
prominentCategories.add(cat);
}
reader.close();
} catch (FileNotFoundException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}
public static void main(String[] args) {
if (args.length < 4 || args.length > 4) {
System.out
.println("Bitte Parameter korrekt eingeben: Promising Categories - OutputFile - EntityCategories - SKOSCategories");
} else {
DBpediaCategoryCorrection.MAINFILE = args[0];
DBpediaCategoryCorrection.OUTPUTFILE = args[2];
DBpediaCategoryCorrection.SKOSHDT = args[3];
DBpediaCategoryCorrection correction = new DBpediaCategoryCorrection(args[1]);
correction.correctCategories();
}
}
public void correctCategories() {
File outputFile = new File(OUTPUTFILE);
File f = new File(MAINFILE);
try {
PrintWriter writer = new PrintWriter(outputFile);
BufferedReader reader = new BufferedReader(new FileReader(f));
String category = "";
while ((category = reader.readLine()) != null) {
HashSet<String> s = new HashSet<String>();
s.add(category);
Set<String> set = recursiveIteration(s, 0);
for (String str : set) {
writer.println(category+"\t"+str);
}
}
writer.close();
reader.close();
} catch (FileNotFoundException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}
private Set<String> recursiveIteration(Set<String> stack, int depth) {
Set<String> result = new HashSet<String>();
for(String s : stack) {
if(prominentCategories.contains(s)) {
result.add(s);
}
}
if(result.size() == 0 && depth < 20) {
Set<String> newStack = new HashSet<String>();
for(String s : stack) {
Set<String> set = querySubCategories(s);
System.out.println("Size: "+set.size());
newStack.addAll(set);
}
return recursiveIteration(newStack, depth + 1);
} else {
return result;
}
}
public Set<String> querySubCategories(final String uri) {
final Set<String> types = new HashSet<String>();
final String query = "SELECT ?sub WHERE { <"+uri+"> <http://www.w3.org/2004/02/skos/core#broader> ?sub }";
try {
final com.hp.hpl.jena.query.Query que = QueryFactory.create(query);
final QueryExecution qexec = QueryExecutionFactory.create(que,
categorySkosModel);
final ResultSet results = qexec.execSelect();
while (results.hasNext()) {
final QuerySolution sol = results.nextSolution();
final String name = sol.getResource("sub").toString();
types.add(new String(name));
}
} catch (final QueryException e) {
Logger.getRootLogger().error(e.getStackTrace());
}
return types;
}
}