H2Fulltext.java example

Explorer
org.eclipse.ecr-master
/*
 * Copyright (c) 2006-2011 Nuxeo SA (http://nuxeo.com/) and others.
 *
 * All rights reserved. This program and the accompanying materials
 * are made available under the terms of the Eclipse Public License v1.0
 * which accompanies this distribution, and is available at
 * http://www.eclipse.org/legal/epl-v10.html
 *
 * Contributors:
 */
package org.eclipse.ecr.core.storage.sql.extensions;

import java.io.IOException;
import java.io.Reader;
import java.sql.Clob;
import java.sql.Connection;
import java.sql.DatabaseMetaData;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.Statement;
import java.sql.Types;
import java.util.Arrays;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.ConcurrentHashMap;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.Hit;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Searcher;
import org.apache.lucene.store.LockObtainFailedException;
import org.h2.api.CloseListener;
import org.h2.message.Message;
import org.h2.store.fs.FileSystem;
import org.h2.tools.SimpleResultSet;
import org.h2.util.IOUtils;
import org.h2.util.StringUtils;
import org.h2.value.DataType;

/**
 * An optimized Lucene-based fulltext indexing trigger and search.
 *
 * @author H2 Group
 * @author Florent Guillaume
 */
public class H2Fulltext {

    private static final Log log = LogFactory.getLog(H2Fulltext.class);

    private static final Map<String, IndexWriter> indexWriters = new ConcurrentHashMap<String, IndexWriter>();

    private static final String FT_SCHEMA = "NXFT";

    private static final String FT_TABLE = FT_SCHEMA + ".INDEXES";

    private static final String PREFIX = "NXFT_";

    private static final String FIELD_KEY = "KEY";

    private static final String FIELD_TEXT = "TEXT";

    private static final String DEFAULT_INDEX_NAME = "PUBLIC_FULLTEXT_default";

    private static final String COL_KEY = "KEY";

    // Utility class.
    private H2Fulltext() {
    }

    /**
     * Initializes fulltext search functionality for this database. This adds
     * the following Java functions to the database:
     * <ul>
     * <li>NXFT_CREATE_INDEX(nameString, schemaString, tableString,
     * columnListString, analyzerString)</li>
     * <li>NXFT_REINDEX()</li>
     * <li>NXFT_DROP_ALL()</li>
     * <li>NXFT_SEARCH(queryString, limitInt, offsetInt): result set</li>
     * </ul>
     * It also adds a schema NXFT to the database where bookkeeping information
     * is stored. This function may be called from a Java application, or by
     * using the SQL statements:
     *
     * <pre>
     *  CREATE ALIAS IF NOT EXISTS NXFT_INIT FOR
     *      "org.eclipse.ecr.core.storage.sql.extensions.H2Fulltext.init";
     *  CALL NXFT_INIT();
     * </pre>
     *
     * @param conn
     */
    public static void init(Connection conn) throws SQLException {
        Statement st = conn.createStatement();
        st.execute("CREATE SCHEMA IF NOT EXISTS " + FT_SCHEMA);
        st.execute("CREATE TABLE IF NOT EXISTS "
                + FT_TABLE
                + "(NAME VARCHAR, SCHEMA VARCHAR, TABLE VARCHAR, COLUMNS VARCHAR, "
                + "ANALYZER VARCHAR, PRIMARY KEY(NAME))");
        // BBB migrate old table without the "NAME" column
        ResultSet rs = st.executeQuery("SELECT * FROM INFORMATION_SCHEMA.COLUMNS WHERE "
                + "TABLE_SCHEMA = '"
                + FT_SCHEMA
                + "' AND TABLE_NAME = 'INDEXES' AND COLUMN_NAME = 'NAME'");
        if (!rs.next()) {
            // BBB no NAME column, alter table to create it
            st.execute("ALTER TABLE " + FT_TABLE + " ADD COLUMN NAME VARCHAR");
            st.execute("UPDATE " + FT_TABLE + " SET NAME = '"
                    + DEFAULT_INDEX_NAME + "'");
        }

        String className = H2Fulltext.class.getName();
        st.execute("CREATE ALIAS IF NOT EXISTS " + PREFIX
                + "CREATE_INDEX FOR \"" + className + ".createIndex\"");
        st.execute("CREATE ALIAS IF NOT EXISTS " + PREFIX + "REINDEX FOR \""
                + className + ".reindex\"");
        st.execute("CREATE ALIAS IF NOT EXISTS " + PREFIX + "DROP_ALL FOR \""
                + className + ".dropAll\"");
        st.execute("CREATE ALIAS IF NOT EXISTS " + PREFIX + "SEARCH FOR \""
                + className + ".search\"");
    }

    // ----- static methods called directly to initialize fulltext -----

    /**
     * Creates a fulltext index for a table and column list.
     * <p>
     * A table may have any number of indexes at a time, but the index name must
     * be unique. If the index already exists, nothing is done, otherwise the
     * index is created and populated from existing data.
     * <p>
     * Usually called through:
     *
     * <pre>
     *   CALL NXFT_CREATE_INDEX('indexname', 'myschema', 'mytable', ('col1', 'col2'), 'lucene.analyzer');
     * </pre>
     *
     * @param conn the connection
     * @param indexName the index name
     * @param schema the schema name of the table
     * @param table the table name
     * @param columns the column list
     * @param analyzer the Lucene fulltext analyzer class
     */
    public static void createIndex(Connection conn, String indexName,
            String schema, String table, String columns, String analyzer)
            throws SQLException {
        if (indexName == null) {
            indexName = DEFAULT_INDEX_NAME;
        }
        columns = columns.replace("(", "").replace(")", "").replace(" ", "");
        PreparedStatement ps = conn.prepareStatement("DELETE FROM " + FT_TABLE
                + " WHERE NAME = ?");
        ps.setString(1, indexName);
        ps.execute();
        ps = conn.prepareStatement("INSERT INTO "
                + FT_TABLE
                + "(NAME, SCHEMA, TABLE, COLUMNS, ANALYZER) VALUES(?, ?, ?, ?, ?)");
        ps.setString(1, indexName);
        ps.setString(2, schema);
        ps.setString(3, table);
        ps.setString(4, columns);
        ps.setString(5, analyzer);
        ps.execute();
        ps.close();
        createTrigger(conn, schema, table);
    }

    /**
     * Re-creates the fulltext index for this database.
     */
    public static void reindex(Connection conn) throws SQLException {
        removeAllTriggers(conn);
        removeIndexFiles(conn);
        Statement st = conn.createStatement();
        ResultSet rs = st.executeQuery("SELECT * FROM " + FT_TABLE);
        Set<String> done = new HashSet<String>();
        while (rs.next()) {
            String schema = rs.getString("SCHEMA");
            String table = rs.getString("TABLE");
            String key = schema + '.' + table;
            if (!done.add(key)) {
                continue;
            }
            createTrigger(conn, schema, table);
            indexExistingRows(conn, schema, table);
        }
        st.close();
    }

    private static void indexExistingRows(Connection conn, String schema,
            String table) throws SQLException {
        Trigger trigger = new Trigger();
        trigger.init(conn, schema, null, table, false, Trigger.INSERT);
        Statement st = conn.createStatement();
        ResultSet rs = st.executeQuery("SELECT * FROM "
                + StringUtils.quoteIdentifier(schema) + '.'
                + StringUtils.quoteIdentifier(table));
        int n = rs.getMetaData().getColumnCount();
        while (rs.next()) {
            Object[] row = new Object[n];
            for (int i = 0; i < n; i++) {
                row[i] = rs.getObject(i + 1);
            }
            trigger.fire(conn, null, row);
        }
        st.close();
    }

    /**
     * Creates a trigger for the indexes on a table.
     * <p>
     * Usually called through:
     *
     * <pre>
     *   CALL NXFT_CREATE_TRIGGERS('myschema', 'mytable');
     * </pre>
     *
     * @param conn the connection
     * @param schema the schema name of the table
     * @param table the table name
     */
    private static void createTrigger(Connection conn, String schema,
            String table) throws SQLException {
        Statement st = conn.createStatement();
        schema = StringUtils.quoteIdentifier(schema);
        String trigger = schema + '.'
                + StringUtils.quoteIdentifier(PREFIX + table);
        st.execute("DROP TRIGGER IF EXISTS " + trigger);
        st.execute(String.format("CREATE TRIGGER %s "
                + "AFTER INSERT, UPDATE, DELETE ON %s.%s "
                + "FOR EACH ROW CALL \"%s\"", trigger, schema,
                StringUtils.quoteIdentifier(table),
                H2Fulltext.Trigger.class.getName()));
        st.close();
    }

    private static void removeAllTriggers(Connection conn) throws SQLException {
        Statement st = conn.createStatement();
        ResultSet rs = st.executeQuery("SELECT * FROM INFORMATION_SCHEMA.TRIGGERS");
        Statement st2 = conn.createStatement();
        while (rs.next()) {
            String trigger = rs.getString("TRIGGER_NAME");
            if (trigger.startsWith(PREFIX)) {
                st2.execute("DROP TRIGGER "
                        + StringUtils.quoteIdentifier(rs.getString("TRIGGER_SCHEMA"))
                        + "." + trigger);
            }
        }
        st.close();
        st2.close();
    }

    /**
     * Drops all fulltext indexes from the database.
     */
    public static void dropAll(Connection conn) throws SQLException {
        Statement st = conn.createStatement();
        st.execute("DROP SCHEMA IF EXISTS " + FT_SCHEMA);
        st.close();
        removeAllTriggers(conn);
        removeIndexFiles(conn);
    }

    private static String fieldForIndex(String indexName) {
        if (DEFAULT_INDEX_NAME.equals(indexName)) {
            return FIELD_TEXT;
        } else {
            return FIELD_TEXT + '_' + indexName;
        }
    }

    /**
     * Searches from the given full text index. The returned result set has a
     * single ID column which holds the keys for the matching rows.
     * <p>
     * Usually called through:
     *
     * <pre>
     *   SELECT * FROM NXFT_SEARCH(name, 'text');
     * </pre>
     *
     * @param conn the connection
     * @param indexName the index name
     * @param text the search query
     * @return the result set
     */
    @SuppressWarnings("unchecked")
    public static ResultSet search(Connection conn, String indexName,
            String text) throws SQLException {
        DatabaseMetaData meta = conn.getMetaData();
        if (indexName == null) {
            indexName = DEFAULT_INDEX_NAME;
        }

        // find schema, table and analyzer
        PreparedStatement ps = conn.prepareStatement("SELECT SCHEMA, TABLE, ANALYZER FROM "
                + FT_TABLE + " WHERE NAME = ?");
        ps.setString(1, indexName);
        ResultSet res = ps.executeQuery();
        if (!res.next()) {
            throw new SQLException("No such index: " + indexName);
        }
        String schema = res.getString(1);
        String table = res.getString(2);
        String analyzer = res.getString(3);
        ps.close();

        int type = getPrimaryKeyType(meta, schema, table);
        SimpleResultSet rs = new SimpleResultSet();
        rs.addColumn(COL_KEY, type, 0, 0);

        if (meta.getURL().startsWith("jdbc:columnlist:")) {
            // this is just to query the result set columns
            return rs;
        }

        String indexPath = getIndexPath(conn);
        try {
            BooleanQuery query = new BooleanQuery();
            QueryParser parser = new QueryParser(fieldForIndex(indexName),
                    getAnalyzer(analyzer));
            query.add(parser.parse(text), BooleanClause.Occur.MUST);

            getIndexWriter(indexPath, analyzer).commit();
            Searcher searcher = new IndexSearcher(indexPath);
            Iterator<Hit> it = searcher.search(query).iterator();
            for (; it.hasNext();) {
                Hit hit = it.next();
                Object key = asObject(hit.get(FIELD_KEY), type);
                rs.addRow(new Object[] { key });
            }
            // TODO keep it open if possible
            searcher.close();
        } catch (Exception e) {
            throw convertException(e);
        }
        return rs;
    }

    private static int getPrimaryKeyType(DatabaseMetaData meta, String schema,
            String table) throws SQLException {
        // find primary key name
        String primaryKeyName = null;
        ResultSet rs = meta.getPrimaryKeys(null, schema, table);
        while (rs.next()) {
            if (primaryKeyName != null) {
                throw new SQLException(
                        "Can only index primary keys on one column for "
                                + schema + '.' + table);
            }
            primaryKeyName = rs.getString("COLUMN_NAME");
        }
        if (primaryKeyName == null) {
            throw new SQLException("No primary key for " + schema + '.' + table);
        }
        rs.close();

        // find primary key type
        rs = meta.getColumns(null, schema, table, primaryKeyName);
        if (!rs.next()) {
            throw new SQLException("Could not find primary key");
        }
        int primaryKeyType = rs.getInt("DATA_TYPE");
        rs.close();

        return primaryKeyType;
    }

    private static Analyzer getAnalyzer(String analyzer) throws SQLException {
        try {
            return (Analyzer) Class.forName(analyzer).newInstance();
        } catch (Exception e) {
            throw new SQLException(e.toString());
        }
    }

    private static String getIndexPath(Connection conn) throws SQLException {
        Statement st = conn.createStatement();
        ResultSet rs = st.executeQuery("CALL DATABASE_PATH()");
        rs.next();
        String path = rs.getString(1);
        if (path == null) {
            throw new SQLException(
                    "Fulltext search for in-memory databases is not supported.");
        }
        st.close();
        return path + ".lucene";
    }

    private static IndexWriter getIndexWriter(String indexPath, String analyzer)
            throws SQLException {
        IndexWriter indexWriter = indexWriters.get(indexPath);
        if (indexWriter == null) {
            synchronized (indexWriters) {
                if (!indexWriters.containsKey(indexPath)) {
                    try {
                        boolean recreate = !IndexReader.indexExists(indexPath);
                        indexWriter = new IndexWriter(indexPath,
                                getAnalyzer(analyzer), recreate);
                    } catch (LockObtainFailedException e) {
                        log.error("Cannot open fulltext index", e);
                        throw convertException(e);
                    } catch (IOException e) {
                        throw convertException(e);
                    }
                    indexWriters.put(indexPath, indexWriter);
                }
            }
        }
        return indexWriter;
    }

    private static void removeIndexFiles(Connection conn) throws SQLException {
        String path = getIndexPath(conn);
        IndexWriter index = indexWriters.remove(path);
        if (index != null) {
            try {
                index.commit();
                index.close();
            } catch (IOException e) {
                throw convertException(e);
            }
        }
        FileSystem.getInstance(path).deleteRecursive(path);
    }

    private static SQLException convertException(Exception e) {
        SQLException e2 = new SQLException("Error while indexing document");
        e2.initCause(e);
        return e2;
    }

    protected static String asString(Object data, int type) throws SQLException {
        if (data == null) {
            return "";
        }
        switch (type) {
        case Types.BIT:
        case DataType.TYPE_BOOLEAN:
        case Types.INTEGER:
        case Types.BIGINT:
        case Types.DECIMAL:
        case Types.DOUBLE:
        case Types.FLOAT:
        case Types.NUMERIC:
        case Types.REAL:
        case Types.SMALLINT:
        case Types.TINYINT:
        case Types.DATE:
        case Types.TIME:
        case Types.TIMESTAMP:
        case Types.LONGVARCHAR:
        case Types.CHAR:
        case Types.VARCHAR:
            return data.toString();
        case Types.CLOB:
            try {
                if (data instanceof Clob) {
                    data = ((Clob) data).getCharacterStream();
                }
                return IOUtils.readStringAndClose((Reader) data, -1);
            } catch (IOException e) {
                throw Message.convert(e);
            }
        case Types.VARBINARY:
        case Types.LONGVARBINARY:
        case Types.BINARY:
        case Types.JAVA_OBJECT:
        case Types.OTHER:
        case Types.BLOB:
        case Types.STRUCT:
        case Types.REF:
        case Types.NULL:
        case Types.ARRAY:
        case DataType.TYPE_DATALINK:
        case Types.DISTINCT:
            throw new SQLException("Unsupported column data type: " + type);
        default:
            return "";
        }
    }

    // simple cases only, used for primary key
    private static Object asObject(String string, int type) throws SQLException {
        switch (type) {
        case Types.BIGINT:
            return Long.valueOf(string);
        case Types.INTEGER:
        case Types.SMALLINT:
        case Types.TINYINT:
            return Integer.valueOf(string);
        case Types.LONGVARCHAR:
        case Types.CHAR:
        case Types.VARCHAR:
            return string;
        default:
            throw new SQLException("Unsupport data type for primary key: "
                    + type);
        }
    }

    /**
     * Trigger used to update the lucene index upon row change.
     */
    public static class Trigger implements org.h2.api.Trigger, CloseListener {

        private String indexPath;

        private IndexWriter indexWriter;

        /** Starting at 0. */
        private int primaryKeyIndex;

        private int primaryKeyType;

        private Map<String, int[]> columnTypes;

        private Map<String, int[]> columnIndices;

        /**
         * Trigger interface: initialization.
         */
        @Override
        public void init(Connection conn, String schema, String triggerName,
                String table, boolean before, int opType) throws SQLException {
            indexPath = getIndexPath(conn);
            DatabaseMetaData meta = conn.getMetaData();

            // find primary key name
            String primaryKeyName = null;
            ResultSet rs = meta.getPrimaryKeys(null, schema, table);
            while (rs.next()) {
                if (primaryKeyName != null) {
                    throw new SQLException(
                            "Can only index primary keys on one column for: "
                                    + schema + '.' + table);
                }
                primaryKeyName = rs.getString("COLUMN_NAME");
            }
            if (primaryKeyName == null) {
                throw new SQLException("No primary key for " + schema + '.'
                        + table);
            }
            rs.close();

            // find primary key type
            rs = meta.getColumns(null, schema, table, primaryKeyName);
            if (!rs.next()) {
                throw new SQLException("No primary key for: " + schema + '.'
                        + table);
            }
            primaryKeyType = rs.getInt("DATA_TYPE");
            primaryKeyIndex = rs.getInt("ORDINAL_POSITION") - 1;
            rs.close();

            // find all columns info
            Map<String, Integer> allColumnTypes = new HashMap<String, Integer>();
            Map<String, Integer> allColumnIndices = new HashMap<String, Integer>();
            rs = meta.getColumns(null, schema, table, null);
            while (rs.next()) {
                String name = rs.getString("COLUMN_NAME");
                int type = rs.getInt("DATA_TYPE");
                int index = rs.getInt("ORDINAL_POSITION") - 1;
                allColumnTypes.put(name, Integer.valueOf(type));
                allColumnIndices.put(name, Integer.valueOf(index));
            }
            rs.close();

            // find columns configured for indexing
            PreparedStatement ps = conn.prepareStatement("SELECT NAME, COLUMNS, ANALYZER FROM "
                    + FT_TABLE + " WHERE SCHEMA = ? AND TABLE = ?");
            ps.setString(1, schema);
            ps.setString(2, table);
            rs = ps.executeQuery();
            columnTypes = new HashMap<String, int[]>();
            columnIndices = new HashMap<String, int[]>();
            while (rs.next()) {
                String indexName = rs.getString(1);
                String columns = rs.getString(2);
                String analyzer = rs.getString(3);
                List<String> columnNames = Arrays.asList(columns.split(","));

                // find the columns' indices and types
                int[] types = new int[columnNames.size()];
                int[] indices = new int[columnNames.size()];
                int i = 0;
                for (String columnName : columnNames) {
                    types[i] = allColumnTypes.get(columnName).intValue();
                    indices[i] = allColumnIndices.get(columnName).intValue();
                    i++;
                }
                columnTypes.put(indexName, types);
                columnIndices.put(indexName, indices);
                // only one call actually needed for this:
                indexWriter = getIndexWriter(indexPath, analyzer);
            }
            rs.close();
            ps.close();
        }

        /**
         * Trigger interface.
         */
        @Override
        public void fire(Connection conn, Object[] oldRow, Object[] newRow)
                throws SQLException {
            if (indexWriter == null) {
                throw new SQLException("Fulltext index was not initialized");
            }
            if (oldRow != null) {
                delete(oldRow);
            }
            if (newRow != null) {
                insert(newRow);
            }
            try {
                // need to flush otherwise some unit tests don't pass
                indexWriter.commit();
            } catch (IOException e) {
                throw convertException(e);
            }
        }

        private void insert(Object[] row) throws SQLException {
            Document doc = new Document();
            String key = asString(row[primaryKeyIndex], primaryKeyType);
            doc.add(new Field(FIELD_KEY, key, Field.Store.YES,
                    Field.Index.UN_TOKENIZED));

            // add fulltext for all indexes
            for (String indexName : columnTypes.keySet()) {
                int[] types = columnTypes.get(indexName);
                int[] indices = columnIndices.get(indexName);
                StringBuilder buf = new StringBuilder();
                for (int i = 0; i < types.length; i++) {
                    String data = asString(row[indices[i]], types[i]);
                    if (i > 0) {
                        buf.append(' ');
                    }
                    buf.append(data);
                }
                doc.add(new Field(fieldForIndex(indexName), buf.toString(),
                        Field.Store.NO, Field.Index.TOKENIZED));
            }

            try {
                indexWriter.addDocument(doc);
            } catch (IOException e) {
                throw convertException(e);
            }
        }

        private void delete(Object[] row) throws SQLException {
            String primaryKey = asString(row[primaryKeyIndex], primaryKeyType);
            try {
                indexWriter.deleteDocuments(new Term(FIELD_KEY, primaryKey));
            } catch (IOException e) {
                throw convertException(e);
            }
        }

        @Override
        public void close() throws SQLException {
            if (indexWriter != null) {
                try {
                    indexWriter.commit();
                    indexWriter.close();
                    indexWriter = null;
                    indexWriters.remove(indexPath);
                } catch (Exception e) {
                    throw convertException(e);
                }
            }
        }

        @Override
        public void remove() {
            // ignore
        }
    }

}