/* * Copyright (c) 2006-2011 Nuxeo SA (http://nuxeo.com/) and others. * * All rights reserved. This program and the accompanying materials * are made available under the terms of the Eclipse Public License v1.0 * which accompanies this distribution, and is available at * http://www.eclipse.org/legal/epl-v10.html * * Contributors: */ package org.eclipse.ecr.core.storage.sql.extensions; import java.io.IOException; import java.io.Reader; import java.sql.Clob; import java.sql.Connection; import java.sql.DatabaseMetaData; import java.sql.PreparedStatement; import java.sql.ResultSet; import java.sql.SQLException; import java.sql.Statement; import java.sql.Types; import java.util.Arrays; import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Set; import java.util.concurrent.ConcurrentHashMap; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.Term; import org.apache.lucene.queryParser.QueryParser; import org.apache.lucene.search.BooleanClause; import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.Hit; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Searcher; import org.apache.lucene.store.LockObtainFailedException; import org.h2.api.CloseListener; import org.h2.message.Message; import org.h2.store.fs.FileSystem; import org.h2.tools.SimpleResultSet; import org.h2.util.IOUtils; import org.h2.util.StringUtils; import org.h2.value.DataType; /** * An optimized Lucene-based fulltext indexing trigger and search. * * @author H2 Group * @author Florent Guillaume */ public class H2Fulltext { private static final Log log = LogFactory.getLog(H2Fulltext.class); private static final Map<String, IndexWriter> indexWriters = new ConcurrentHashMap<String, IndexWriter>(); private static final String FT_SCHEMA = "NXFT"; private static final String FT_TABLE = FT_SCHEMA + ".INDEXES"; private static final String PREFIX = "NXFT_"; private static final String FIELD_KEY = "KEY"; private static final String FIELD_TEXT = "TEXT"; private static final String DEFAULT_INDEX_NAME = "PUBLIC_FULLTEXT_default"; private static final String COL_KEY = "KEY"; // Utility class. private H2Fulltext() { } /** * Initializes fulltext search functionality for this database. This adds * the following Java functions to the database: * <ul> * <li>NXFT_CREATE_INDEX(nameString, schemaString, tableString, * columnListString, analyzerString)</li> * <li>NXFT_REINDEX()</li> * <li>NXFT_DROP_ALL()</li> * <li>NXFT_SEARCH(queryString, limitInt, offsetInt): result set</li> * </ul> * It also adds a schema NXFT to the database where bookkeeping information * is stored. This function may be called from a Java application, or by * using the SQL statements: * * <pre> * CREATE ALIAS IF NOT EXISTS NXFT_INIT FOR * "org.eclipse.ecr.core.storage.sql.extensions.H2Fulltext.init"; * CALL NXFT_INIT(); * </pre> * * @param conn */ public static void init(Connection conn) throws SQLException { Statement st = conn.createStatement(); st.execute("CREATE SCHEMA IF NOT EXISTS " + FT_SCHEMA); st.execute("CREATE TABLE IF NOT EXISTS " + FT_TABLE + "(NAME VARCHAR, SCHEMA VARCHAR, TABLE VARCHAR, COLUMNS VARCHAR, " + "ANALYZER VARCHAR, PRIMARY KEY(NAME))"); // BBB migrate old table without the "NAME" column ResultSet rs = st.executeQuery("SELECT * FROM INFORMATION_SCHEMA.COLUMNS WHERE " + "TABLE_SCHEMA = '" + FT_SCHEMA + "' AND TABLE_NAME = 'INDEXES' AND COLUMN_NAME = 'NAME'"); if (!rs.next()) { // BBB no NAME column, alter table to create it st.execute("ALTER TABLE " + FT_TABLE + " ADD COLUMN NAME VARCHAR"); st.execute("UPDATE " + FT_TABLE + " SET NAME = '" + DEFAULT_INDEX_NAME + "'"); } String className = H2Fulltext.class.getName(); st.execute("CREATE ALIAS IF NOT EXISTS " + PREFIX + "CREATE_INDEX FOR \"" + className + ".createIndex\""); st.execute("CREATE ALIAS IF NOT EXISTS " + PREFIX + "REINDEX FOR \"" + className + ".reindex\""); st.execute("CREATE ALIAS IF NOT EXISTS " + PREFIX + "DROP_ALL FOR \"" + className + ".dropAll\""); st.execute("CREATE ALIAS IF NOT EXISTS " + PREFIX + "SEARCH FOR \"" + className + ".search\""); } // ----- static methods called directly to initialize fulltext ----- /** * Creates a fulltext index for a table and column list. * <p> * A table may have any number of indexes at a time, but the index name must * be unique. If the index already exists, nothing is done, otherwise the * index is created and populated from existing data. * <p> * Usually called through: * * <pre> * CALL NXFT_CREATE_INDEX('indexname', 'myschema', 'mytable', ('col1', 'col2'), 'lucene.analyzer'); * </pre> * * @param conn the connection * @param indexName the index name * @param schema the schema name of the table * @param table the table name * @param columns the column list * @param analyzer the Lucene fulltext analyzer class */ public static void createIndex(Connection conn, String indexName, String schema, String table, String columns, String analyzer) throws SQLException { if (indexName == null) { indexName = DEFAULT_INDEX_NAME; } columns = columns.replace("(", "").replace(")", "").replace(" ", ""); PreparedStatement ps = conn.prepareStatement("DELETE FROM " + FT_TABLE + " WHERE NAME = ?"); ps.setString(1, indexName); ps.execute(); ps = conn.prepareStatement("INSERT INTO " + FT_TABLE + "(NAME, SCHEMA, TABLE, COLUMNS, ANALYZER) VALUES(?, ?, ?, ?, ?)"); ps.setString(1, indexName); ps.setString(2, schema); ps.setString(3, table); ps.setString(4, columns); ps.setString(5, analyzer); ps.execute(); ps.close(); createTrigger(conn, schema, table); } /** * Re-creates the fulltext index for this database. */ public static void reindex(Connection conn) throws SQLException { removeAllTriggers(conn); removeIndexFiles(conn); Statement st = conn.createStatement(); ResultSet rs = st.executeQuery("SELECT * FROM " + FT_TABLE); Set<String> done = new HashSet<String>(); while (rs.next()) { String schema = rs.getString("SCHEMA"); String table = rs.getString("TABLE"); String key = schema + '.' + table; if (!done.add(key)) { continue; } createTrigger(conn, schema, table); indexExistingRows(conn, schema, table); } st.close(); } private static void indexExistingRows(Connection conn, String schema, String table) throws SQLException { Trigger trigger = new Trigger(); trigger.init(conn, schema, null, table, false, Trigger.INSERT); Statement st = conn.createStatement(); ResultSet rs = st.executeQuery("SELECT * FROM " + StringUtils.quoteIdentifier(schema) + '.' + StringUtils.quoteIdentifier(table)); int n = rs.getMetaData().getColumnCount(); while (rs.next()) { Object[] row = new Object[n]; for (int i = 0; i < n; i++) { row[i] = rs.getObject(i + 1); } trigger.fire(conn, null, row); } st.close(); } /** * Creates a trigger for the indexes on a table. * <p> * Usually called through: * * <pre> * CALL NXFT_CREATE_TRIGGERS('myschema', 'mytable'); * </pre> * * @param conn the connection * @param schema the schema name of the table * @param table the table name */ private static void createTrigger(Connection conn, String schema, String table) throws SQLException { Statement st = conn.createStatement(); schema = StringUtils.quoteIdentifier(schema); String trigger = schema + '.' + StringUtils.quoteIdentifier(PREFIX + table); st.execute("DROP TRIGGER IF EXISTS " + trigger); st.execute(String.format("CREATE TRIGGER %s " + "AFTER INSERT, UPDATE, DELETE ON %s.%s " + "FOR EACH ROW CALL \"%s\"", trigger, schema, StringUtils.quoteIdentifier(table), H2Fulltext.Trigger.class.getName())); st.close(); } private static void removeAllTriggers(Connection conn) throws SQLException { Statement st = conn.createStatement(); ResultSet rs = st.executeQuery("SELECT * FROM INFORMATION_SCHEMA.TRIGGERS"); Statement st2 = conn.createStatement(); while (rs.next()) { String trigger = rs.getString("TRIGGER_NAME"); if (trigger.startsWith(PREFIX)) { st2.execute("DROP TRIGGER " + StringUtils.quoteIdentifier(rs.getString("TRIGGER_SCHEMA")) + "." + trigger); } } st.close(); st2.close(); } /** * Drops all fulltext indexes from the database. */ public static void dropAll(Connection conn) throws SQLException { Statement st = conn.createStatement(); st.execute("DROP SCHEMA IF EXISTS " + FT_SCHEMA); st.close(); removeAllTriggers(conn); removeIndexFiles(conn); } private static String fieldForIndex(String indexName) { if (DEFAULT_INDEX_NAME.equals(indexName)) { return FIELD_TEXT; } else { return FIELD_TEXT + '_' + indexName; } } /** * Searches from the given full text index. The returned result set has a * single ID column which holds the keys for the matching rows. * <p> * Usually called through: * * <pre> * SELECT * FROM NXFT_SEARCH(name, 'text'); * </pre> * * @param conn the connection * @param indexName the index name * @param text the search query * @return the result set */ @SuppressWarnings("unchecked") public static ResultSet search(Connection conn, String indexName, String text) throws SQLException { DatabaseMetaData meta = conn.getMetaData(); if (indexName == null) { indexName = DEFAULT_INDEX_NAME; } // find schema, table and analyzer PreparedStatement ps = conn.prepareStatement("SELECT SCHEMA, TABLE, ANALYZER FROM " + FT_TABLE + " WHERE NAME = ?"); ps.setString(1, indexName); ResultSet res = ps.executeQuery(); if (!res.next()) { throw new SQLException("No such index: " + indexName); } String schema = res.getString(1); String table = res.getString(2); String analyzer = res.getString(3); ps.close(); int type = getPrimaryKeyType(meta, schema, table); SimpleResultSet rs = new SimpleResultSet(); rs.addColumn(COL_KEY, type, 0, 0); if (meta.getURL().startsWith("jdbc:columnlist:")) { // this is just to query the result set columns return rs; } String indexPath = getIndexPath(conn); try { BooleanQuery query = new BooleanQuery(); QueryParser parser = new QueryParser(fieldForIndex(indexName), getAnalyzer(analyzer)); query.add(parser.parse(text), BooleanClause.Occur.MUST); getIndexWriter(indexPath, analyzer).commit(); Searcher searcher = new IndexSearcher(indexPath); Iterator<Hit> it = searcher.search(query).iterator(); for (; it.hasNext();) { Hit hit = it.next(); Object key = asObject(hit.get(FIELD_KEY), type); rs.addRow(new Object[] { key }); } // TODO keep it open if possible searcher.close(); } catch (Exception e) { throw convertException(e); } return rs; } private static int getPrimaryKeyType(DatabaseMetaData meta, String schema, String table) throws SQLException { // find primary key name String primaryKeyName = null; ResultSet rs = meta.getPrimaryKeys(null, schema, table); while (rs.next()) { if (primaryKeyName != null) { throw new SQLException( "Can only index primary keys on one column for " + schema + '.' + table); } primaryKeyName = rs.getString("COLUMN_NAME"); } if (primaryKeyName == null) { throw new SQLException("No primary key for " + schema + '.' + table); } rs.close(); // find primary key type rs = meta.getColumns(null, schema, table, primaryKeyName); if (!rs.next()) { throw new SQLException("Could not find primary key"); } int primaryKeyType = rs.getInt("DATA_TYPE"); rs.close(); return primaryKeyType; } private static Analyzer getAnalyzer(String analyzer) throws SQLException { try { return (Analyzer) Class.forName(analyzer).newInstance(); } catch (Exception e) { throw new SQLException(e.toString()); } } private static String getIndexPath(Connection conn) throws SQLException { Statement st = conn.createStatement(); ResultSet rs = st.executeQuery("CALL DATABASE_PATH()"); rs.next(); String path = rs.getString(1); if (path == null) { throw new SQLException( "Fulltext search for in-memory databases is not supported."); } st.close(); return path + ".lucene"; } private static IndexWriter getIndexWriter(String indexPath, String analyzer) throws SQLException { IndexWriter indexWriter = indexWriters.get(indexPath); if (indexWriter == null) { synchronized (indexWriters) { if (!indexWriters.containsKey(indexPath)) { try { boolean recreate = !IndexReader.indexExists(indexPath); indexWriter = new IndexWriter(indexPath, getAnalyzer(analyzer), recreate); } catch (LockObtainFailedException e) { log.error("Cannot open fulltext index", e); throw convertException(e); } catch (IOException e) { throw convertException(e); } indexWriters.put(indexPath, indexWriter); } } } return indexWriter; } private static void removeIndexFiles(Connection conn) throws SQLException { String path = getIndexPath(conn); IndexWriter index = indexWriters.remove(path); if (index != null) { try { index.commit(); index.close(); } catch (IOException e) { throw convertException(e); } } FileSystem.getInstance(path).deleteRecursive(path); } private static SQLException convertException(Exception e) { SQLException e2 = new SQLException("Error while indexing document"); e2.initCause(e); return e2; } protected static String asString(Object data, int type) throws SQLException { if (data == null) { return ""; } switch (type) { case Types.BIT: case DataType.TYPE_BOOLEAN: case Types.INTEGER: case Types.BIGINT: case Types.DECIMAL: case Types.DOUBLE: case Types.FLOAT: case Types.NUMERIC: case Types.REAL: case Types.SMALLINT: case Types.TINYINT: case Types.DATE: case Types.TIME: case Types.TIMESTAMP: case Types.LONGVARCHAR: case Types.CHAR: case Types.VARCHAR: return data.toString(); case Types.CLOB: try { if (data instanceof Clob) { data = ((Clob) data).getCharacterStream(); } return IOUtils.readStringAndClose((Reader) data, -1); } catch (IOException e) { throw Message.convert(e); } case Types.VARBINARY: case Types.LONGVARBINARY: case Types.BINARY: case Types.JAVA_OBJECT: case Types.OTHER: case Types.BLOB: case Types.STRUCT: case Types.REF: case Types.NULL: case Types.ARRAY: case DataType.TYPE_DATALINK: case Types.DISTINCT: throw new SQLException("Unsupported column data type: " + type); default: return ""; } } // simple cases only, used for primary key private static Object asObject(String string, int type) throws SQLException { switch (type) { case Types.BIGINT: return Long.valueOf(string); case Types.INTEGER: case Types.SMALLINT: case Types.TINYINT: return Integer.valueOf(string); case Types.LONGVARCHAR: case Types.CHAR: case Types.VARCHAR: return string; default: throw new SQLException("Unsupport data type for primary key: " + type); } } /** * Trigger used to update the lucene index upon row change. */ public static class Trigger implements org.h2.api.Trigger, CloseListener { private String indexPath; private IndexWriter indexWriter; /** Starting at 0. */ private int primaryKeyIndex; private int primaryKeyType; private Map<String, int[]> columnTypes; private Map<String, int[]> columnIndices; /** * Trigger interface: initialization. */ @Override public void init(Connection conn, String schema, String triggerName, String table, boolean before, int opType) throws SQLException { indexPath = getIndexPath(conn); DatabaseMetaData meta = conn.getMetaData(); // find primary key name String primaryKeyName = null; ResultSet rs = meta.getPrimaryKeys(null, schema, table); while (rs.next()) { if (primaryKeyName != null) { throw new SQLException( "Can only index primary keys on one column for: " + schema + '.' + table); } primaryKeyName = rs.getString("COLUMN_NAME"); } if (primaryKeyName == null) { throw new SQLException("No primary key for " + schema + '.' + table); } rs.close(); // find primary key type rs = meta.getColumns(null, schema, table, primaryKeyName); if (!rs.next()) { throw new SQLException("No primary key for: " + schema + '.' + table); } primaryKeyType = rs.getInt("DATA_TYPE"); primaryKeyIndex = rs.getInt("ORDINAL_POSITION") - 1; rs.close(); // find all columns info Map<String, Integer> allColumnTypes = new HashMap<String, Integer>(); Map<String, Integer> allColumnIndices = new HashMap<String, Integer>(); rs = meta.getColumns(null, schema, table, null); while (rs.next()) { String name = rs.getString("COLUMN_NAME"); int type = rs.getInt("DATA_TYPE"); int index = rs.getInt("ORDINAL_POSITION") - 1; allColumnTypes.put(name, Integer.valueOf(type)); allColumnIndices.put(name, Integer.valueOf(index)); } rs.close(); // find columns configured for indexing PreparedStatement ps = conn.prepareStatement("SELECT NAME, COLUMNS, ANALYZER FROM " + FT_TABLE + " WHERE SCHEMA = ? AND TABLE = ?"); ps.setString(1, schema); ps.setString(2, table); rs = ps.executeQuery(); columnTypes = new HashMap<String, int[]>(); columnIndices = new HashMap<String, int[]>(); while (rs.next()) { String indexName = rs.getString(1); String columns = rs.getString(2); String analyzer = rs.getString(3); List<String> columnNames = Arrays.asList(columns.split(",")); // find the columns' indices and types int[] types = new int[columnNames.size()]; int[] indices = new int[columnNames.size()]; int i = 0; for (String columnName : columnNames) { types[i] = allColumnTypes.get(columnName).intValue(); indices[i] = allColumnIndices.get(columnName).intValue(); i++; } columnTypes.put(indexName, types); columnIndices.put(indexName, indices); // only one call actually needed for this: indexWriter = getIndexWriter(indexPath, analyzer); } rs.close(); ps.close(); } /** * Trigger interface. */ @Override public void fire(Connection conn, Object[] oldRow, Object[] newRow) throws SQLException { if (indexWriter == null) { throw new SQLException("Fulltext index was not initialized"); } if (oldRow != null) { delete(oldRow); } if (newRow != null) { insert(newRow); } try { // need to flush otherwise some unit tests don't pass indexWriter.commit(); } catch (IOException e) { throw convertException(e); } } private void insert(Object[] row) throws SQLException { Document doc = new Document(); String key = asString(row[primaryKeyIndex], primaryKeyType); doc.add(new Field(FIELD_KEY, key, Field.Store.YES, Field.Index.UN_TOKENIZED)); // add fulltext for all indexes for (String indexName : columnTypes.keySet()) { int[] types = columnTypes.get(indexName); int[] indices = columnIndices.get(indexName); StringBuilder buf = new StringBuilder(); for (int i = 0; i < types.length; i++) { String data = asString(row[indices[i]], types[i]); if (i > 0) { buf.append(' '); } buf.append(data); } doc.add(new Field(fieldForIndex(indexName), buf.toString(), Field.Store.NO, Field.Index.TOKENIZED)); } try { indexWriter.addDocument(doc); } catch (IOException e) { throw convertException(e); } } private void delete(Object[] row) throws SQLException { String primaryKey = asString(row[primaryKeyIndex], primaryKeyType); try { indexWriter.deleteDocuments(new Term(FIELD_KEY, primaryKey)); } catch (IOException e) { throw convertException(e); } } @Override public void close() throws SQLException { if (indexWriter != null) { try { indexWriter.commit(); indexWriter.close(); indexWriter = null; indexWriters.remove(indexPath); } catch (Exception e) { throw convertException(e); } } } @Override public void remove() { // ignore } } }