/*******************************************************************************
* Copyright (c) 2007 Cambridge Semantics Incorporated.
* All rights reserved. This program and the accompanying materials
* are made available under the terms of the Eclipse Public License v1.0
* which accompanies this distribution, and is available at
* http://www.eclipse.org/legal/epl-v10.html
*
* File: $Source$
* Created by: Matthew Roy ( <a href="mailto:mroy@cambridgesemantics.com">mroy@cambridgesemantics.com </a>)
* Created on: Oct 29, 2007
* Revision: $Id$
*
* Contributors:
* Cambridge Semantics Incorporated - initial API and implementation
*******************************************************************************/
package org.openanzo.datasource.nodecentric.internal;
import java.sql.Connection;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Set;
import org.openanzo.datasource.IDatasource;
import org.openanzo.datasource.nodecentric.indexer.ModelIndexQuery;
import org.openanzo.datasource.nodecentric.indexer.ModelIndexerFactory;
import org.openanzo.datasource.services.BaseIndexService;
import org.openanzo.exceptions.AnzoException;
import org.openanzo.exceptions.ExceptionConstants;
import org.openanzo.glitter.query.TextMatchPredicate.TextMatchQuery;
import org.openanzo.indexer.IQuery;
import org.openanzo.indexer.IResult;
import org.openanzo.indexer.IndexerProperties;
import org.openanzo.indexer.lucene.LuceneConstants;
import org.openanzo.indexer.lucene.LuceneProperties;
import org.openanzo.indexer.lucene.LuceneSearch;
import org.openanzo.jdbc.layout.Quad;
import org.openanzo.jdbc.query.IRdbValue;
import org.openanzo.jdbc.utils.RdbException;
import org.openanzo.rdf.Constants;
import org.openanzo.rdf.IRDFHandler;
import org.openanzo.rdf.Resource;
import org.openanzo.rdf.Statement;
import org.openanzo.rdf.URI;
import org.openanzo.rdf.Value;
import org.openanzo.rdf.utils.Collections;
import org.openanzo.services.IOperationContext;
import org.openanzo.services.Privilege;
/**
* NodeCentric implementation of the IIndexService
*
* @author Matthew Roy ( <a href="mailto:mroy@cambridgesemantics.com">mroy@cambridgesemantics.com </a>)
*
*/
public class NodeCentricIndexService extends BaseIndexService {
//private static final Logger logger = LoggerFactory.getLogger(NodeCentricIndexService.class);
private ModelIndexerFactory indexerFactory = null;
private LuceneSearch search = null;
private final NodeCentricDatasource datasource;
private String indexLocation = null;
/**
* Create a new NodeCentricIndexService
*
* @param datasource
* parent datasource
*/
public NodeCentricIndexService(NodeCentricDatasource datasource) {
this.datasource = datasource;
}
public IDatasource getDatasource() {
return datasource;
}
@Override
public void start() throws AnzoException {
indexLocation = (String) datasource.getConfigurationParameters().get(LuceneProperties.KEY_LUCENE_INDEX_LOCATION);
int pageSize = 0;
String indexPageSize = (String) datasource.getConfigurationParameters().get(IndexerProperties.KEY_INDEXER_PAGE_SIZE);
if (indexPageSize != null) {
pageSize = Integer.parseInt(indexPageSize);
}
indexerFactory = new ModelIndexerFactory();
search = indexerFactory.createSearchFromLocation(indexLocation);
search.setPageSize(pageSize);
}
/**
* Query the text indexer for a set of statements that match a text indexer query
*
* @param context
* context for this operation
* @param query
* query string
* @param offset
* index of first result to return
* @param limit
* maximum number of results to return
* @param handler
* Call-back handler that handles the results of this query
* @throws AnzoException
*/
@Override
protected void executeIndexQueryInternal(IOperationContext context, String query, int offset, int limit, IRDFHandler handler) throws AnzoException {
if (indexerFactory != null && search != null) {
NodeCentricOperationContext connectionContext = null;
try {
handler.startRDF();
connectionContext = datasource.getQueryContext(context);
IQuery indexQuery = indexerFactory.createQueryFromDefaultFieldAndText(LuceneConstants.INDEXER_FIELD_OBJECT, query);
List<IResult> results = search.executeQuery(indexQuery);
HashMap<Long, Boolean> acls = new HashMap<Long, Boolean>();
if (offset == 0 && limit == -1) {
for (IResult result : results) {
Statement sw = processResult(connectionContext, result, acls);
if (sw != null) {
handler.handleStatement(sw);
}
}
} else {
int resultCount = 0;
int returnCount = 0;
if (offset <= results.size()) {
Iterator<IResult> resultsIterator = results.iterator();
while (resultsIterator.hasNext() && returnCount < limit) {
IResult result = resultsIterator.next();
Statement sw = processResult(connectionContext, result, acls);
if (sw != null) {
resultCount++;
if (resultCount > offset) {
handler.handleStatement(sw);
returnCount++;
if (limit >= 0 && returnCount >= limit) {
break;
}
}
}
}
}
}
handler.endRDF();
} finally {
if (connectionContext != null) {
datasource.returnQueryContext(connectionContext);
}
}
} else {
throw new AnzoException(ExceptionConstants.INDEX.NOT_INITIALIZED);
}
// return statements;
}
/**
* Turns the results of an index query into a StatementWrapper. Returns null if it is not visible.
*
* @param context
* context for this operation
* @param result
* result to convert
* @param acls
* map of NamedGraph IDs to a boolean specifying if graph is visible for the userid
* @return statement wrapper for result
* @throws AnzoException
*/
private Statement processResult(NodeCentricOperationContext connectionContext, IResult result, HashMap<Long, Boolean> acls) throws AnzoException {
Long ngId = Long.valueOf(result.getFieldValue(LuceneConstants.INDEXER_FIELD_GRAPH_ID));
Boolean canRead = connectionContext.getOperationPrincipal().isSysadmin() ? Boolean.TRUE : acls.get(ngId);
URI ngURI = (URI) connectionContext.getNodeLayout().fetchValue(ngId, connectionContext.getConnection());
if (canRead == null) {
Set<URI> roles = datasource.getAuthorizationService().getRolesForGraph(connectionContext, ngURI, Privilege.READ);
canRead = Boolean.valueOf(Collections.memberOf(roles, connectionContext.getOperationPrincipal().getRoles()));
acls.put(ngId, canRead);
}
if (canRead.booleanValue()) {
Long subjectId = Long.valueOf(result.getFieldValue(LuceneConstants.INDEXER_FIELD_SUBJECT_ID));
Long predicateId = Long.valueOf(result.getFieldValue(LuceneConstants.INDEXER_FIELD_PREDICATE_ID));
Long objId = Long.valueOf(result.getFieldValue(LuceneConstants.INDEXER_FIELD_OBJ_NODE_ID));
Resource subj = (Resource) connectionContext.getNodeLayout().fetchValue(subjectId, connectionContext.getConnection());
URI pred = (URI) connectionContext.getNodeLayout().fetchValue(predicateId, connectionContext.getConnection());
Value obj = connectionContext.getNodeLayout().fetchValue(objId, connectionContext.getConnection());
return Constants.valueFactory.createStatement(subj, pred, obj, ngURI);
}
return null;
}
@Override
public Collection<Statement> executeIndexQueryInternal(IOperationContext context, String query, int offset, int limit) throws AnzoException {
return executeIndexQueryInternal(context, query, offset, limit, false);
}
/**
* Query the text indexer for a set of statements that match a text indexer query
*
* @param context
* context for this operation
* @param query
* query string
* @param offset
* index of first result to return
* @param limit
* maximum number of results to return
* @param prepopulateSolutionNodes
* should the index query populate the values of solution nodes before returning?
* @return set of statements that match a text indexer query
* @throws AnzoException
*/
private Collection<Statement> executeIndexQueryInternal(IOperationContext context, String query, int offset, int limit, boolean prepopulateSolutionNodes) throws AnzoException {
if (indexerFactory == null || search == null) {
throw new AnzoException(ExceptionConstants.INDEX.NOT_INITIALIZED);
}
ArrayList<Statement> resultStatements = new ArrayList<Statement>();
NodeCentricOperationContext connectionContext = null;
try {
connectionContext = datasource.getQueryContext(context);
IQuery indexQuery = indexerFactory.createQueryFromDefaultFieldAndText(LuceneConstants.INDEXER_FIELD_OBJECT, query);
List<IResult> results = search.executeQuery(indexQuery);
HashMap<Long, Boolean> acls = new HashMap<Long, Boolean>();
if (offset == 0 && limit == -1) {
for (IResult result : results) {
StatementWrapper sw = processResultToWrapper(connectionContext, result, acls);
if (sw != null) {
Quad q = new Quad(connectionContext.getConnection(), connectionContext.getNodeLayout().getNodeConverter(), sw.getGraphId(), sw.getSubjectId(), sw.getPredicateId(), sw.getObjectId());
resultStatements.add(q.asStatement());
}
}
} else {
int resultCount = 0;
int returnCount = 0;
if (offset <= results.size()) {
Iterator<IResult> resultsIterator = results.iterator();
while (resultsIterator.hasNext() && returnCount < limit) {
IResult result = resultsIterator.next();
StatementWrapper sw = processResultToWrapper(connectionContext, result, acls);
if (sw != null) {
resultCount++;
if (resultCount > offset) {
Quad q = new Quad(connectionContext.getConnection(), connectionContext.getNodeLayout().getNodeConverter(), sw.getGraphId(), sw.getSubjectId(), sw.getPredicateId(), sw.getObjectId());
resultStatements.add(q.asStatement());
returnCount++;
if (limit >= 0 && returnCount >= limit) {
break;
}
}
}
}
}
}
if (prepopulateSolutionNodes) {
for (Statement statement : resultStatements) {
populateRdbNode(statement.getSubject(), connectionContext.getConnection());
populateRdbNode(statement.getPredicate(), connectionContext.getConnection());
populateRdbNode(statement.getObject(), connectionContext.getConnection());
populateRdbNode(statement.getNamedGraphUri(), connectionContext.getConnection());
}
}
return resultStatements;
} finally {
if (connectionContext != null) {
datasource.returnQueryContext(connectionContext);
}
}
}
/**
* Query the text indexer for a set of statements that match a text indexer query
*
* @param context
* context for this operation
* @param query
* query string
* @param offset
* index of first result to return
* @param limit
* maximum number of results to return
* @param prepopulateSolutionNodes
* should the index query populate the values of solution nodes before returning?
* @return set of statements that match a text indexer query
* @throws AnzoException
*/
@Override
public Collection<Statement> executeIndexQueryInternal(IOperationContext context, TextMatchQuery query, int offset, int limit) throws AnzoException {
if (indexerFactory == null || search == null) {
throw new AnzoException(ExceptionConstants.INDEX.NOT_INITIALIZED);
}
ArrayList<Statement> resultStatements = new ArrayList<Statement>();
NodeCentricOperationContext connectionContext = null;
try {
connectionContext = datasource.getQueryContext(context);
ModelIndexQuery indexQuery = indexerFactory.createQueryFromDefaultFieldAndTextAndTerms(LuceneConstants.INDEXER_FIELD_OBJECT, query.literalQuery, query.terms);
List<IResult> results = search.executeQuery(indexQuery);
HashMap<Long, Boolean> acls = new HashMap<Long, Boolean>();
if (offset == 0 && limit == -1) {
for (IResult result : results) {
StatementWrapper sw = processResultToWrapper(connectionContext, result, acls);
if (sw != null) {
Quad q = new Quad(connectionContext.getConnection(), connectionContext.getNodeLayout().getNodeConverter(), sw.getGraphId(), sw.getSubjectId(), sw.getPredicateId(), sw.getObjectId());
resultStatements.add(q.asStatement());
}
}
} else {
int resultCount = 0;
int returnCount = 0;
if (offset <= results.size()) {
Iterator<IResult> resultsIterator = results.iterator();
while (resultsIterator.hasNext() && returnCount < limit) {
IResult result = resultsIterator.next();
StatementWrapper sw = processResultToWrapper(connectionContext, result, acls);
if (sw != null) {
resultCount++;
if (resultCount > offset) {
Quad q = new Quad(connectionContext.getConnection(), connectionContext.getNodeLayout().getNodeConverter(), sw.getGraphId(), sw.getSubjectId(), sw.getPredicateId(), sw.getObjectId());
resultStatements.add(q.asStatement());
returnCount++;
if (limit >= 0 && returnCount >= limit) {
break;
}
}
}
}
}
}
for (Statement statement : resultStatements) {
populateRdbNode(statement.getSubject(), connectionContext.getConnection());
populateRdbNode(statement.getPredicate(), connectionContext.getConnection());
populateRdbNode(statement.getObject(), connectionContext.getConnection());
populateRdbNode(statement.getNamedGraphUri(), connectionContext.getConnection());
}
return resultStatements;
} finally {
if (connectionContext != null) {
datasource.returnQueryContext(connectionContext);
}
}
}
/**
* Turns the results of an index query into a StatementWrapper. Returns null if it is not visible.
*
* @param context
* context for this operation
* @param result
* result to convert
* @param acls
* map of NamedGraph IDs to a boolean specifying if graph is visible for the userid
* @return statement wrapper for result
* @throws AnzoException
*/
protected StatementWrapper processResultToWrapper(NodeCentricOperationContext connectionContext, IResult result, HashMap<Long, Boolean> acls) throws AnzoException {
Long ngId = Long.valueOf(result.getFieldValue(LuceneConstants.INDEXER_FIELD_GRAPH_ID));
Boolean canRead = connectionContext.getOperationPrincipal().isSysadmin() ? Boolean.TRUE : acls.get(ngId);
URI ngURI = (URI) connectionContext.getNodeLayout().fetchValue(ngId, connectionContext.getConnection());
if (canRead == null) {
Set<URI> roles = datasource.getAuthorizationService().getRolesForGraph(connectionContext, ngURI, Privilege.READ);
canRead = Boolean.valueOf(Collections.memberOf(roles, connectionContext.getOperationPrincipal().getRoles()));
acls.put(ngId, canRead);
}
if (canRead.booleanValue()) {
Long subjectId = Long.valueOf(result.getFieldValue(LuceneConstants.INDEXER_FIELD_SUBJECT_ID));
Long predicateId = Long.valueOf(result.getFieldValue(LuceneConstants.INDEXER_FIELD_PREDICATE_ID));
Long objId = Long.valueOf(result.getFieldValue(LuceneConstants.INDEXER_FIELD_OBJ_NODE_ID));
return new StatementWrapper(ngId, subjectId, predicateId, objId, null);
}
return null;
}
private void populateRdbNode(Object node, Connection connection) throws RdbException {
if (node instanceof IRdbValue) {
IRdbValue value = (IRdbValue) node;
value.populate(connection);
}
}
}