package org.deri.grefine.reconcile.rdf.executors; import java.io.FileInputStream; import java.io.FileOutputStream; import java.io.IOException; import org.apache.jena.query.text.EntityDefinition; import org.apache.jena.query.text.TextDatasetFactory; import org.apache.lucene.store.Directory; import org.apache.lucene.store.RAMDirectory; import org.json.JSONException; import org.json.JSONWriter; import com.hp.hpl.jena.query.Dataset; import com.hp.hpl.jena.query.DatasetFactory; import com.hp.hpl.jena.query.Query; import com.hp.hpl.jena.query.QueryExecution; import com.hp.hpl.jena.query.QueryExecutionFactory; import com.hp.hpl.jena.query.QueryFactory; import com.hp.hpl.jena.query.ResultSet; import com.hp.hpl.jena.query.Syntax; import com.hp.hpl.jena.rdf.model.Model; import com.hp.hpl.jena.rdf.model.ModelFactory; /** * @author fadmaa * execute SPARQL queries agains Dump RDF and supports LARQ for * full text searc * as index in built with the model this calss can be costly to build * consider sharing instances of this class It is thread-safe */ public class DumpQueryExecutor implements QueryExecutor { private Dataset index; private boolean loaded = false; //property used for index/search (only if one property is used) private String propertyUri; public DumpQueryExecutor(){ } public DumpQueryExecutor(String propertyUri){ this.propertyUri = propertyUri; } public DumpQueryExecutor(Model m, String propUri){ this(m,propUri,false,DEFAULT_MIN_NGRAM, DEFAULT_MAX_NGRAM); } public DumpQueryExecutor(Model m){ this(m,null,false,DEFAULT_MIN_NGRAM, DEFAULT_MAX_NGRAM); } public DumpQueryExecutor(Model m, String propertyUri, boolean ngramIndex,int minGram, int maxGram){ loaded = true; this.propertyUri = propertyUri; Dataset ds1 = DatasetFactory.createMem(); EntityDefinition entDef = new EntityDefinition("uri", "text",m.getResource(propertyUri)) ; // Lucene, in memory. Directory dir = new RAMDirectory(); // Join together into a dataset this.index = TextDatasetFactory.createLucene(ds1, dir, entDef) ; this.index.getDefaultModel().add(m); //this.index.commit(); } @Override public ResultSet sparql(String sparql) { if(!loaded){ throw new RuntimeException("Model is not loaded"); } //this.index.begin(ReadWrite.READ) ; Query query = QueryFactory.create(sparql, Syntax.syntaxSPARQL_11); QueryExecution qExec = QueryExecutionFactory.create(query, this.index); ResultSet result = qExec.execSelect(); return result; } @Override public void write(JSONWriter writer) throws JSONException { writer.object(); writer.key("type"); writer.value("dump"); if(propertyUri!=null){ writer.key("propertyUri"); writer.value(propertyUri); } writer.endObject(); } public void dispose(){ this.index.close(); this.index = null; //free the memory used for the model } public synchronized void initialize(FileInputStream in) { if(loaded){ return; } loaded = true; // -- Read and index all literal strings. Model model = ModelFactory.createDefaultModel(); model.read(in, null,"TTL"); Dataset ds1 = DatasetFactory.createMem(); EntityDefinition entDef = new EntityDefinition("uri", "text",model.getResource(propertyUri)) ; // Lucene, in memory. Directory dir = new RAMDirectory(); // Join together into a dataset this.index = TextDatasetFactory.createLucene(ds1, dir, entDef) ; this.index.getDefaultModel().add(model); this.index.commit(); } private static final int DEFAULT_MIN_NGRAM = 3; private static final int DEFAULT_MAX_NGRAM = 3; @Override public void save(String serviceId, FileOutputStream out) throws IOException { this.index.getDefaultModel().write(out, "TTL"); out.close(); } }