package doser.tools.indexcreation; import java.io.File; import java.io.IOException; import java.util.HashMap; import java.util.LinkedList; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field.Store; import org.apache.lucene.document.TextField; import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; import org.apache.lucene.util.Version; import com.hp.hpl.jena.rdf.model.Model; import com.hp.hpl.jena.rdf.model.ModelFactory; import com.hp.hpl.jena.rdf.model.Property; import com.hp.hpl.jena.rdf.model.RDFNode; import com.hp.hpl.jena.rdf.model.Resource; import com.hp.hpl.jena.rdf.model.Statement; import com.hp.hpl.jena.rdf.model.StmtIterator; import doser.lucene.analysis.DoserIDAnalyzer; public class AddFactsToIndex { public static final String NTFILE = "/home/zwicklbauer/HDTGeneration/mappingbased_properties_cleaned_en.nt"; public static final String OLDINDEX = "/mnt/ssd1/disambiguation/MMapLuceneIndexStandard/"; public static final String NEWINDEX = "/home/zwicklbauer/NewIndexTryout"; public static void main(String[] args) { HashMap<String, LinkedList<String>> map = new HashMap<String, LinkedList<String>>(); Model m = ModelFactory.createDefaultModel(); m.read(NTFILE); StmtIterator it = m.listStatements(); while (it.hasNext()) { Statement s = it.next(); Resource subject = s.getSubject(); Property pra = s.getPredicate(); RDFNode object = s.getObject(); if (object.isResource()) { Resource obj = object.asResource(); if (pra.isResource() && obj.getURI().startsWith( "http://dbpedia.org/resource/")) { if (!map.containsKey(subject.getURI())) { LinkedList<String> list = new LinkedList<String>(); map.put(subject.getURI(), list); } LinkedList<String> l = map.get(subject.getURI()); l.add(pra.getURI().replaceAll( "http://dbpedia.org/ontology/", "dbpediaOnt/") + ":::" + obj.getURI().replaceAll( "http://dbpedia.org/resource/", "dbpediaRes/")); } } } File oldIndexFile = new File(OLDINDEX); File newIndexFile = new File(NEWINDEX); try { final Directory oldDir = FSDirectory.open(oldIndexFile); final Directory newDir = FSDirectory.open(newIndexFile); final IndexWriterConfig config = new IndexWriterConfig( Version.LATEST, new DoserIDAnalyzer()); final IndexReader readerOldIndex = DirectoryReader.open(oldDir); final IndexWriter newIndexWriter = new IndexWriter(newDir, config); int numDocs = readerOldIndex.maxDoc(); for (int i = 0; i < numDocs; i++) { Document doc = readerOldIndex.document(i); String docurl = doc.get("Mainlink"); LinkedList<String> l = map.get(docurl); StringBuilder builder = new StringBuilder(); if (l != null) { for (String str : l) { builder.append(str); builder.append(";;;"); } } String s = builder.toString(); if (s.length() > 0) { s = s.substring(0, s.length() - 3); } doc.add(new TextField("Relations", s, Store.YES)); newIndexWriter.addDocument(doc); } readerOldIndex.close(); newIndexWriter.close(); } catch (IOException e) { e.printStackTrace(); } } }