/*
* This software is Copyright 2005,2006,2007,2008 Langdale Consultants.
* Langdale Consultants can be contacted at: http://www.langdale.com.au
*/
package au.com.langdale.splitmodel;
import java.io.BufferedInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.util.HashMap;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Queue;
import au.com.langdale.inference.AsyncModel;
import au.com.langdale.inference.AsyncResult;
import com.hp.hpl.jena.graph.Factory;
import com.hp.hpl.jena.graph.Graph;
import com.hp.hpl.jena.graph.Node;
import com.hp.hpl.jena.graph.Triple;
import com.hp.hpl.jena.n3.turtle.ParserTurtle;
import com.hp.hpl.jena.reasoner.TriplePattern;
import com.hp.hpl.jena.util.iterator.ExtendedIterator;
/**
* A query processor for split models.
*/
public class SplitReader extends SplitBase implements AsyncModel {
private static class Query {
protected Triple pattern;
protected AsyncResult results;
protected int pending;
public Query(Triple pattern, AsyncResult results, int pending) {
this.pattern = pattern;
this.results = results;
this.pending = pending;
}
public Triple getPattern() {
return pattern;
}
public boolean add(Triple result, boolean subjectResident, boolean objectResident) {
return results.add(result);
}
public void close() {
pending --;
if( pending == 0 )
results.close();
}
public int getPending() {
return pending;
}
}
private static class ObjectQuery extends Query {
public ObjectQuery(Triple pattern, AsyncResult results, int pending) {
super(pattern, results, pending);
}
@Override
public boolean add(Triple result, boolean subjectResident, boolean objectResident) {
if( objectResident )
return results.add(result);
else
return true;
}
}
private static class SubjectQuery extends Query {
public SubjectQuery(Triple pattern, AsyncResult results, int pending) {
super(pattern, results, pending);
}
@Override
public boolean add(Triple result, boolean subjectResident, boolean objectResident) {
if( subjectResident )
return results.add(result);
else
return true;
}
}
private class Bucket {
private Map queries = new LinkedHashMap();
private Graph graph;
private int load_count, query_count, index;
public Bucket(int ix) {
index = ix;
}
public Graph getGraph() {
return graph;
}
public void push(Query query) {
List group = (List) queries.get(query.getPattern());
if( group == null) {
group = new LinkedList();
queries.put(query.getPattern(), group);
}
group.add(query);
query_count++;
}
public boolean remove(Query query) {
List group = (List) queries.get(query.getPattern());
if( group != null && group.remove(query)) {
query_count--;
return true;
}
return false;
}
public List pop() {
Iterator it = queries.values().iterator();
List group = (List) it.next();
it.remove();
query_count -= group.size();
return group;
}
public int size() {
return query_count;
}
public void execute() throws IOException {
load();
List group = pop();
Triple pattern = ((Query)group.get(0)).getPattern();
ExtendedIterator it = graph.find(pattern);
while (it.hasNext()) {
Triple result = (Triple) it.next();
executeGroup(group, result);
if(group.size() == 0) {
it.close();
break;
}
}
for (Iterator iq = group.iterator(); iq.hasNext();) {
Query query = (Query) iq.next();
query.close();
}
}
private void executeGroup(List group, Triple result) {
Node subject = result.getSubject();
Node predicate = result.getPredicate();
Node object = result.getObject();
boolean subjectResident = subject.isURI() && selectBucket(subject) == this;
boolean objectResident = object.isURI() && ! predicate.equals(RDF_TYPE) && selectBucket(object) == this;
for (Iterator iq = group.iterator(); iq.hasNext();) {
Query query = (Query) iq.next();
if(! query.add(result, subjectResident, objectResident)) {
iq.remove();
removeFromOthers(query);
}
}
}
private void removeFromOthers(Query query) {
for( int ib = 0, ip = query.getPending() -1; ip > 0; ip--) {
Bucket bucket = buckets[ib++];
if( bucket == this)
bucket = buckets[ib++];
bucket.remove(query);
}
}
private void load() throws IOException {
if(graph == null) {
graph = read(getFile(index));
load_count++;
// System.out.println("Loaded bucket: " + index + ", loads: " + load_count + ", queries: " + query_count);
cache.add(this);
}
}
public void unload() {
graph = null;
// System.out.println("Unloaded bucket: " + index + ", loads: " + load_count + ", queries: " + query_count);
}
public int getModulus() throws IOException {
load();
Integer m = getInteger(getGraph(), DOCUMENT, MODULUS);
if( m == null || m.intValue() < 2 )
throw new IOException("not a valid split model (modulus undefined): " + destin);
return m.intValue();
}
}
private static class Cache {
private int quota;
private Queue resident = new LinkedList();
public Cache(int quota) {
this.quota = quota;
}
public void add(Bucket bucket) {
evict(quota-1);
resident.add(bucket);
}
public void evict(int goal) {
while( resident.size() > goal) {
Bucket bucket = (Bucket) resident.remove();
bucket.unload();
}
}
public void promote(Bucket bucket) {
if(resident.remove(bucket))
resident.add(bucket);
}
public Bucket findMostQueries() {
Bucket result = null;
for (Iterator it = resident.iterator(); it.hasNext();) {
Bucket cand = (Bucket) it.next();
if( result == null && cand.size() > 0 || result != null && cand.size() > result.size())
result = cand;
}
return result;
}
}
public static final int DEFAULT_QUOTA = 2;
private static final Node RDF_TYPE = Node.createURI(RDF_TYPE_URI);
private Bucket[] buckets;
private boolean running;
private Cache cache;
private Map quotes;
/**
* Access the split model at the given location.
* @param location: the pathname of a split model directory
* @param quote: the maximum number of splits to be resident in memory
* @throws IOException
*/
public SplitReader(String locations, int quota) throws IOException {
this(new File(locations), new Cache(quota));
}
/**
* Access the split model at the given location.
* @param location: the pathname of a split model directory
* @throws IOException
*/
public SplitReader(String location) throws IOException {
this(new File(location), new Cache(DEFAULT_QUOTA));
}
private SplitReader(File destin, Cache cache) throws IOException {
this.destin = destin;
if( ! this.destin.isDirectory())
throw new IOException("not a directory: " + destin);
this.cache = cache;
quotes = new HashMap();
Bucket boot = new Bucket(0);
modulus = boot.getModulus();
createBuckets(boot);
}
/* (non-Javadoc)
* @see au.com.langdale.splitmodel.AsyncModel#getQuote(com.hp.hpl.jena.graph.Node)
*/
public AsyncModel getQuote(Node quote) throws IOException {
AsyncModel result = null;
if( quote.isURI()) {
String name = quote.getLocalName();
result = (AsyncModel) quotes.get(name);
if( result == null) {
File nested = new File(destin, name);
if( nested.exists()) {
result = new SplitReader(nested, cache);
quotes.put(name, result);
}
}
}
return result;
}
/**
* Associate an external model with a node in the current model.
* This is used to link a base model to an difference model, for example.
* @param quote: the node representing (ie quoting) a submodel
* @param location: the pathname of a split model
* @throws IOException
*/
public void assignQuote(Node quote, String location) throws IOException {
assignQuote(quote, new SplitReader(new File(location), cache));
}
private void assignQuote(Node quote, AsyncModel model) {
quotes.put(quote.getLocalName(), model);
}
/* (non-Javadoc)
* @see au.com.langdale.splitmodel.AsyncModel#run()
*/
public void run() throws IOException {
if(running)
return;
running = true;
try {
for(;;)
if( !schedule())
break;
}
finally {
running = false;
cache.evict(0);
}
// printStats();
}
private void printStats() {
System.out.println("-----------------------------");
System.out.println("Bucket: Loads");
int total = 0;
for (int ix = 0; ix < buckets.length; ix++) {
System.out.println(ix + ": " + buckets[ix].load_count);
total += buckets[ix].load_count;
}
System.out.println("Total: " + total);
}
private void createBuckets(Bucket boot) {
buckets = new Bucket[modulus];
buckets[0] = boot;
for (int ix = 1; ix < modulus; ix++) {
buckets[ix] = new Bucket(ix);
}
}
/* (non-Javadoc)
* @see au.com.langdale.splitmodel.AsyncModel#find(com.hp.hpl.jena.reasoner.TriplePattern, au.com.langdale.splitmodel.SplitReader.SplitResult)
*/
public void find(TriplePattern clause, AsyncResult results) {
Triple pattern = new Triple(
var2Any(clause.getSubject()),
var2Any(clause.getPredicate()),
var2Any(clause.getObject()));
Bucket bucket = selectBucket(pattern);
// if( running && (bucket == null || bucket.getGraph() == null))
// System.out.println("Query to non resident bucket: " + pattern);
if( bucket != null) {
bucket.push(new Query(pattern, results, 1));
} else {
Query query;
if(clause.getObject().isVariable() && ! clause.getSubject().isVariable() && ! clause.getPredicate().equals(RDF_TYPE))
query = new ObjectQuery(pattern, results, buckets.length);
else
query = new SubjectQuery(pattern, results, buckets.length);
for (int ix = 0; ix < buckets.length; ix++)
buckets[ix].push(query);
}
}
private Bucket selectBucket(Triple pattern) {
Node subject = pattern.getSubject();
Node object = pattern.getObject();
boolean rdf_type = pattern.getPredicate().equals(RDF_TYPE);
Bucket bucket;
if( subject.isURI() && object.isURI() && ! rdf_type) {
Bucket sbucket = selectBucket(subject);
if( sbucket.getGraph() != null)
bucket = sbucket;
else {
Bucket obucket = selectBucket(object);
if( obucket.getGraph() != null) {
bucket = obucket;
}
else {
if( sbucket.size() > 0 || obucket.size() == 0)
bucket = sbucket;
else
bucket = obucket;
}
}
}
else if(subject.isURI()) {
bucket = selectBucket(subject);
}
else if(object.isURI() && ! rdf_type) {
bucket = selectBucket(object);
}
else {
// broad query
bucket = null;
}
return bucket;
}
private Bucket selectBucket(Node subject) {
return buckets[hashURI(subject.getURI())];
}
/**
* Select a query and a bucket and execute.
*/
private boolean schedule() throws IOException {
Bucket bucket = cache.findMostQueries();
if( bucket != null )
cache.promote(bucket);
else
bucket = findBucketWithMostQueries();
if(bucket.size() == 0)
return false;
bucket.execute();
return true;
}
private Bucket findBucketWithMostQueries() {
Bucket result = findLocalBucketWithMostQueries();
for (Iterator it = quotes.values().iterator(); it.hasNext();) {
SplitReader quote = (SplitReader) it.next();
Bucket cand = quote.findBucketWithMostQueries();
if( cand.size() > result.size())
result = cand;
}
return result;
}
private Bucket findLocalBucketWithMostQueries() {
Bucket result = buckets[0];
for (int ix = 1; ix < buckets.length; ix++) {
Bucket cand = buckets[ix];
if( cand.size() > result.size())
result = cand;
}
return result;
}
public static Node var2Any(Node node) {
return node.isVariable() || node == AsyncModel.WILDCARD ? Node.ANY: node;
}
private static Integer getInteger(Graph data, String subj, String pred) {
Iterator it = data.find(Node.createURI(subj), Node.createURI(pred), Node.ANY);
while (it.hasNext()) {
Triple t = (Triple) it.next();
if( t.getObject().isLiteral() && t.getObject().getLiteralDatatypeURI().equals(XSD_INTEGER_URI)) {
return (Integer) t.getObject().getLiteralValue();
}
}
return null;
}
private static Graph read(File file) throws IOException {
Graph graph = Factory.createDefaultGraph();
if( file.exists()) {
ParserTurtle parser = new ParserTurtle();
parser.parse(graph, file.toURI().toString(), new BufferedInputStream(new FileInputStream(file)));
}
return graph;
}
}