/*******************************************************************************
* Copyright (c) 2004, 2007 IBM Corporation and Cambridge Semantics Incorporated.
* All rights reserved. This program and the accompanying materials
* are made available under the terms of the Eclipse Public License v1.0
* which accompanies this distribution, and is available at
* http://www.eclipse.org/legal/epl-v10.html
*
* File: $Source: /cvsroot/slrp/boca/com.ibm.adtech.boca.common/src/com/ibm/adtech/boca/rdb/layout/indexer/Attic/LiteralIndexer.java,v $
* Created by: Wing Yung ( <a href="mailto:wingyung@us.ibm.com">wingyung@us.ibm.com </a>)
* Created on: 10/11/2005
* Revision: $Id: LiteralIndexer.java 178 2007-07-31 14:22:33Z mroy $
*
* Contributors:
* IBM Corporation - initial API and implementation
* Cambridge Semantics Incorporated - Fork to Anzo
*******************************************************************************/
package org.openanzo.jdbc.layout.indexer;
import java.io.IOException;
import java.util.Dictionary;
import java.util.List;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.Term;
import org.openanzo.exceptions.ExceptionConstants;
import org.openanzo.exceptions.LogUtils;
import org.openanzo.indexer.IndexerException;
import org.openanzo.indexer.lucene.LuceneConstants;
import org.openanzo.indexer.lucene.LuceneIndexerBase;
import org.openanzo.jdbc.container.RDBQuadStore;
import org.openanzo.jdbc.container.sql.NodeSQL;
import org.openanzo.jdbc.container.sql.NodeSQL.GetAllLiteralsResult;
import org.openanzo.jdbc.layout.NodeType;
import org.openanzo.jdbc.utils.ClosableIterator;
import org.openanzo.jdbc.utils.RdbException;
import org.openanzo.rdf.Literal;
import org.openanzo.rdf.PlainLiteral;
import org.openanzo.rdf.utils.Pair;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* Indexer for Anzo Literals based on Lucene.
*
* @author Wing Yung ( <a href="mailto:wingyung@us.ibm.com">wingyung@us.ibm.com </a>)
*/
public class LiteralIndexer extends LuceneIndexerBase<Pair<Long, Literal>, RDBQuadStore> {
private static final Logger log = LoggerFactory.getLogger(LiteralIndexer.class.getName());
private final LiteralIndexQuery searcher;
/**
* Create new LiteralIndexer
*/
public LiteralIndexer() {
searcher = new LiteralIndexQuery();
}
@Override
public void initialize(Dictionary<? extends Object, ? extends Object> properties) throws IndexerException {
location = LiteralIndexerDictionary.getIndexLocation(properties);
if (location == null) {
throw new IndexerException(ExceptionConstants.INDEX.INDEX_CONFIG_PARAM_MISSING, LiteralIndexerProperties.KEY_LITERAL_INDEX_LOCATION);
}
boolean clear = LiteralIndexerDictionary.getIndexClear(properties);
boolean removeLock = LiteralIndexerDictionary.getRemoveLockFile(properties);
boolean rebuildIndex = initialize(location, clear, removeLock);
searcher.initialize(properties);
needsIndexRebuild = rebuildIndex;
}
@Override
public boolean needsIndexRebuild() {
return super.needsIndexRebuild();
}
/**
* Search the indexer for literals that match the query pattern
*
* @param queryStr
* query string to run
* @return list of literal IDs that match query
* @throws IndexerException
*/
public List<Long> query(String queryStr) throws IndexerException {
return searcher.query(queryStr);
}
@Override
public void clear() throws IndexerException {
try {
if (searcher.searcher != null) {
searcher.searcher.close();
searcher.searcher = null;
}
} catch (IOException e) {
throw new IndexerException(ExceptionConstants.INDEX.FAILED_INDEX_CLEAR, e);
}
super.clear();
}
@Override
public void close() throws IndexerException {
try {
if (searcher.searcher != null) {
searcher.searcher.close();
searcher.searcher = null;
}
} catch (IOException e) {
throw new IndexerException(ExceptionConstants.INDEX.FAILED_INDEX_CLOSE, e);
}
super.close();
}
public boolean index(Pair<Long, Literal> literal) throws IndexerException {
if (indexWriter != null) {
Document doc = createDocument(literal);
addDocument(doc);
return true;
}
return false;
}
public void remove(Pair<Long, Literal> literal) throws IndexerException {
if (indexWriter != null) {
deleteDocuments(new Term(LuceneConstants.INDEXER_FIELD_OBJ_NODE_ID, Long.toString(literal.first)));
}
}
/**
* Create a new document which indexes a Literal and its ID
*
* @param pair
* literal value
* @return new Document
*/
private static Document createDocument(Pair<Long, Literal> pair) {
Document doc = null;
boolean index = false;
String text = null;
Literal literal = pair.second;
if (literal instanceof PlainLiteral) {
index = true;
text = pair.second.getLabel();
} else {
// indexing all literals for now since dates and numbers are meaningful.
// TODO: figure out what from lucene can be used to allow for typed fields,
// like dates to be search-able, users won't be searching for xsd:datetime format
// strings, they will search using a variety of date string's often locale specific.
//
/*URI type = ((TypedLiteral) literal).getDatatype();
if (type.equals(XMLSchema.STRING)) {*/
index = true;
text = literal.getLabel();
/*} else {
// It's not an indexable type.
// Try index numbers or booleans?
}*/
}
if (index) {
doc = new Document();
if (log.isTraceEnabled())
log.trace(LogUtils.RDB_MARKER, "indexing text: " + text);
doc.add(new Field(LuceneConstants.INDEXER_FIELD_OBJECT, text, Field.Store.YES, Field.Index.ANALYZED));
doc.add(new Field(LuceneConstants.INDEXER_FIELD_OBJ_NODE_ID, Long.toString(pair.first), Field.Store.YES, Field.Index.NOT_ANALYZED));
}
return doc;
}
public int rebuild(RDBQuadStore connection) throws IndexerException {
clear();
int indexCount = 0;
int stmtCount = 0;
ClosableIterator<GetAllLiteralsResult> iter = null;
preIndex();
try {
iter = NodeSQL.getAllLiterals(connection.getStmtProvider(), connection.getConnection(), connection.getContainerName() + "_L", connection.getConfiguration().getOptimizationString());
while (iter.hasNext()) {
GetAllLiteralsResult result = iter.next();
try {
long nodeId = result.getId();
long indexWriterId = result.getModifierId();
String value = result.getValue();
NodeType type = NodeType.getById(nodeId);
Literal literal;
if (type == NodeType.LITERAL || type == NodeType.LONG_LITERAL) {
literal = connection.getNodeLayout().getPlainNodeLiteralLayout().convert(value, indexWriterId, connection.getConnection());
} else if (type == NodeType.TYPED_LITERAL || type == NodeType.TYPED_LONG_LITERAL) {
literal = connection.getNodeLayout().getTypedNodeLiteralLayout().convert(value, indexWriterId, connection.getConnection());
} else {
throw new IllegalStateException("Node must be a literal type.");
}
Document doc = LiteralIndexer.createDocument(new Pair<Long, Literal>(nodeId, literal));
if (doc != null) {
indexWriter.addDocument(doc);
}
} catch (IOException rdbe) {
throw new IndexerException(ExceptionConstants.INDEX.FAILED_REBUILD, rdbe);
}
stmtCount++;
}
} catch (RdbException rdbe) {
throw new IndexerException(ExceptionConstants.INDEX.FAILED_REBUILD, rdbe);
} finally {
if (iter != null)
iter.close();
}
if (log.isInfoEnabled())
log.info(LogUtils.RDB_MARKER, "{} total statements, {} indexed", stmtCount, indexCount);
postIndex();
needsIndexRebuild = false;
return stmtCount;
}
}