FullTextSearch.java example

Explorer
Piggydb-master
- src
- standalone
  - src
    - main
      - java
        marubinotto
        piggydb
        standalone
        PiggydbServer.java
        PiggydbSplashScreen.java
        Settings.java
package marubinotto.h2.fulltext;

import static marubinotto.h2.fulltext.InternalUtils.*;

import java.sql.Connection;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.Statement;
import java.sql.Types;
import java.util.Map;
import java.util.Set;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.h2.api.Trigger;
import org.h2.jdbc.JdbcConnection;
import org.h2.message.DbException;
import org.h2.tools.SimpleResultSet;
import org.h2.util.New;
import org.h2.util.StringUtils;

public class FullTextSearch {
	
	private static Log logger = LogFactory.getLog(FullTextSearch.class);
	
	public static final String SCHEMA = "FT";
	private static final String TRIGGER_PREFIX = "FT_";

    public static void init(Connection conn) throws SQLException {
        Statement stat = conn.createStatement();
        
        stat.execute("CREATE SCHEMA IF NOT EXISTS " + SCHEMA);
        
        // the structure of the index: word -> word ID -> row ID -> row info -> table info (IndexedTableInfo)
        
        // indexed table info
        //   COLUMNS: a list of column names (column separated) to index; null -> all columns are indexed
        stat.execute("CREATE TABLE IF NOT EXISTS " + SCHEMA
        	+ ".INDEXES(" +
        		"ID INT AUTO_INCREMENT PRIMARY KEY, " +
        		"SCHEMA VARCHAR, " +
        		"TABLE VARCHAR, " +
        		"COLUMNS VARCHAR, " +
        		"UNIQUE(SCHEMA, TABLE))");
        
        // word -> word ID
        stat.execute("CREATE TABLE IF NOT EXISTS " + SCHEMA
        	+ ".WORDS(" +
        		"ID INT AUTO_INCREMENT PRIMARY KEY, " +
        		"NAME VARCHAR, " +
        		"UNIQUE(NAME))");
        
        // indexed row
        //   KEY: condition(where) SQL by the key values for selecting the row
        stat.execute("CREATE TABLE IF NOT EXISTS " + SCHEMA
        	+ ".ROWS(" +
        		"ID IDENTITY, " +
        		"HASH INT, " +
        		"INDEXID INT, " +
        		"KEY VARCHAR, " +
        		"UNIQUE(HASH, INDEXID, KEY))");
        
        // word ID -> indexed row ID
        stat.execute("CREATE TABLE IF NOT EXISTS " + SCHEMA
        	+ ".MAP(" +
        		"ROWID INT, " +
        		"WORDID INT, " +
        		"PRIMARY KEY(WORDID, ROWID))");
        
        // NOTE: Should re-create alias because Java's name could be changed by refactoring
        stat.execute("DROP ALIAS IF EXISTS FT_CREATE_INDEX");
        stat.execute("DROP ALIAS IF EXISTS FT_DROP_INDEX");
        stat.execute("DROP ALIAS IF EXISTS FT_SEARCH");
        stat.execute("DROP ALIAS IF EXISTS FT_SEARCH_DATA");
        stat.execute("DROP ALIAS IF EXISTS FT_REINDEX");
        stat.execute("DROP ALIAS IF EXISTS FT_DROP_ALL");
        
        // aliases for static method of this class
        stat.execute("CREATE ALIAS IF NOT EXISTS FT_CREATE_INDEX FOR \"" + FullTextSearch.class.getName() + ".createIndex\"");
        stat.execute("CREATE ALIAS IF NOT EXISTS FT_DROP_INDEX FOR \"" + FullTextSearch.class.getName() + ".dropIndex\"");
        stat.execute("CREATE ALIAS IF NOT EXISTS FT_SEARCH FOR \"" + FullTextSearch.class.getName() + ".search\"");
        stat.execute("CREATE ALIAS IF NOT EXISTS FT_SEARCH_DATA FOR \"" + FullTextSearch.class.getName() + ".searchData\"");
        stat.execute("CREATE ALIAS IF NOT EXISTS FT_REINDEX FOR \"" + FullTextSearch.class.getName() + ".reindex\"");
        stat.execute("CREATE ALIAS IF NOT EXISTS FT_DROP_ALL FOR \"" + FullTextSearch.class.getName() + ".dropAll\"");
        
        // NOTE: Should re-create triggers because Java's name could be changed by refactoring
        recreateTriggers(conn);
        
        FullTextSearchContext context = FullTextSearchContext.getContext(conn);
        
        ResultSet rs = stat.executeQuery("SELECT * FROM " + SCHEMA + ".WORDS");
        Map<String, Integer> map = context.getWordList();
        map.clear();
        while (rs.next()) {
            String word = rs.getString("NAME");
            int id = rs.getInt("ID");
            word = context.convertWord(word);
            if (word != null) map.put(word, id);
        }
        
        logger.info("Initialized: (words " + map.size() + ")");
    }
    
    public static void createIndex(Connection conn, String schema, String table, String columnList) throws SQLException {
        init(conn);
        
        // Add an entry to INDEXES
        PreparedStatement prep = conn.prepareStatement("INSERT INTO " + SCHEMA
        	+ ".INDEXES(SCHEMA, TABLE, COLUMNS) VALUES(?, ?, ?)");
        prep.setString(1, schema);
        prep.setString(2, table);
        prep.setString(3, columnList);
        prep.execute();
        
        createTrigger(conn, schema, table);
        
        indexExistingRows(conn, schema, table);
    }
    
    /**
     * Re-creates the full text index for this database. Calling this method is
     * usually not needed, as the index is kept up-to-date automatically.
     */
    public static void reindex(Connection conn) throws SQLException {
        init(conn);
	
        removeAllTriggers(conn);
	
        // Clear the index cache
        FullTextSearchContext context = FullTextSearchContext.getContext(conn);
        context.getWordList().clear();
	
        // Delete all the indexes
        Statement stat = conn.createStatement();
        stat.execute("TRUNCATE TABLE " + SCHEMA + ".WORDS");
        stat.execute("TRUNCATE TABLE " + SCHEMA + ".ROWS");
        stat.execute("TRUNCATE TABLE " + SCHEMA + ".MAP");
	
        // Recreate triggers and indexes
        ResultSet rs = stat.executeQuery("SELECT * FROM " + SCHEMA + ".INDEXES");
        while (rs.next()) {
            String schema = rs.getString("SCHEMA");
            String table = rs.getString("TABLE");
            createTrigger(conn, schema, table);
            indexExistingRows(conn, schema, table);
        }
    }
    
    /**
     * Searches from the full text index for this database.
     * The returned result set has the following column:
     * <ul><li>QUERY (varchar): the query to use to get the data.
     * The query does not include 'SELECT * FROM '. Example:
     * PUBLIC.TEST WHERE ID = 1
     * </li><li>SCORE (float) the relevance score. This value is always 1.0
     * for the native fulltext search.
     * </li></ul>
     *
     * @param conn the connection
     * @param text the search query
     * @param limit the maximum number of rows or 0 for no limit
     * @param offset the offset or 0 for no offset
     * @return the result set
     */
    public static ResultSet search(Connection conn, String text, int limit, int offset) throws SQLException {
        try {
            return search(conn, text, limit, offset, false);
        } 
        catch (DbException e) {
            throw DbException.toSQLException(e);
        }
    }

    /**
     * Searches from the full text index for this database. The result contains
     * the primary key data as an array. The returned result set has the
     * following columns:
     * <ul>
     * <li>SCHEMA (varchar): the schema name. Example: PUBLIC </li>
     * <li>TABLE (varchar): the table name. Example: TEST </li>
     * <li>COLUMNS (array of varchar): comma separated list of quoted column
     * names. The column names are quoted if necessary. Example: (ID) </li>
     * <li>KEYS (array of values): comma separated list of values. Example: (1)
     * </li>
     * <li>SCORE (float) the relevance score. This value is always 1.0
     * for the native fulltext search.
     * </li>
     * </ul>
     *
     * @param conn the connection, which must be JdbcConnection
     * @param text the search query
     * @param limit the maximum number of rows or 0 for no limit
     * @param offset the offset or 0 for no offset
     * @return the result set
     */
    public static ResultSet searchData(Connection conn, String text, int limit, int offset) throws SQLException {
        try {
            return search(conn, text, limit, offset, true);
        } 
        catch (DbException e) {
            throw DbException.toSQLException(e);
        }
    }

    /**
     * Drop an existing full text index for a table. This method returns
     * silently if no index for this table exists.
     *
     * @param conn the connection
     * @param schema the schema name of the table (case sensitive)
     * @param table the table name (case sensitive)
     */
    public static void dropIndex(Connection conn, String schema, String table) throws SQLException {
        init(conn);
        
        // Delete the table info from INDEXES
        PreparedStatement prep = conn.prepareStatement(
        	"SELECT ID FROM " + SCHEMA + ".INDEXES WHERE SCHEMA=? AND TABLE=?");
        prep.setString(1, schema);
        prep.setString(2, table);
        ResultSet rs = prep.executeQuery();
        if (!rs.next()) {
            return;
        }
        int indexId = rs.getInt(1);
        prep = conn.prepareStatement("DELETE FROM " + SCHEMA  + ".INDEXES WHERE ID=?");
        prep.setInt(1, indexId);
        prep.execute();
        
        // Delete the trigger
        createOrDropTrigger(conn, schema, table, false);
        
        // Delete the entries in ROWS
        prep = conn.prepareStatement(
        	"DELETE FROM " + SCHEMA + ".ROWS WHERE INDEXID=? AND ROWNUM<10000");
        while (true) {
            prep.setInt(1, indexId);
            int deleted = prep.executeUpdate();
            if (deleted == 0) {
                break;
            }
        }
        
        // Delete the entries in MAP
        prep = conn.prepareStatement(
        	"DELETE FROM " + SCHEMA + ".MAP M " +
                "WHERE NOT EXISTS (SELECT * FROM " + SCHEMA + ".ROWS R WHERE R.ID=M.ROWID) AND ROWID<10000");
        while (true) {
            int deleted = prep.executeUpdate();
            if (deleted == 0) {
                break;
            }
        }
    }
    
    /**
     * Drops all full text indexes from the database.
     *
     * @param conn the connection
     */
    public static void dropAll(Connection conn) throws SQLException {
        init(conn);
        Statement stat = conn.createStatement();
        stat.execute("DROP SCHEMA IF EXISTS " + SCHEMA);
        removeAllTriggers(conn);
        FullTextSearchContext.getContext(conn).clearAll();
    }

    
// Internals
    
    protected static void indexExistingRows(Connection conn, String schema, String table) throws SQLException {
    	IndexUpdateTrigger existing = new IndexUpdateTrigger();
        existing.init(conn, schema, null, table, false, Trigger.INSERT);
        
        String sql = "SELECT * FROM " + StringUtils.quoteIdentifier(schema) + "." + StringUtils.quoteIdentifier(table);
        ResultSet rs = conn.createStatement().executeQuery(sql);
        int columnCount = rs.getMetaData().getColumnCount();
        while (rs.next()) {
            Object[] row = new Object[columnCount];
            for (int i = 0; i < columnCount; i++) {
                row[i] = rs.getObject(i + 1);
            }
            existing.fire(conn, null, row);
        }
    }

    protected static void createTrigger(Connection conn, String schema, String table) throws SQLException {
        createOrDropTrigger(conn, schema, table, true);
    }

    private static void createOrDropTrigger(Connection conn, String schema, String table, boolean create) 
    throws SQLException {
        Statement stat = conn.createStatement();
        String trigger = StringUtils.quoteIdentifier(schema) + "."
        	+ StringUtils.quoteIdentifier(TRIGGER_PREFIX + table);
        stat.execute("DROP TRIGGER IF EXISTS " + trigger);
        if (create) {
            StringBuilder buff = new StringBuilder("CREATE TRIGGER IF NOT EXISTS ");
            buff.append(trigger).
                append(" AFTER INSERT, UPDATE, DELETE ON ").
                append(StringUtils.quoteIdentifier(schema)).
                append('.').
                append(StringUtils.quoteIdentifier(table)).
                append(" FOR EACH ROW CALL \"").
                append(IndexUpdateTrigger.class.getName()).
                append("\"");
            stat.execute(buff.toString());
        }
    }
    
    protected static void removeAllTriggers(Connection conn) throws SQLException {
        Statement selectAllTriggers = conn.createStatement();
        ResultSet triggers = selectAllTriggers.executeQuery("SELECT * FROM INFORMATION_SCHEMA.TRIGGERS");
        Statement dropTrigger = conn.createStatement();
        while (triggers.next()) {
            String schema = triggers.getString("TRIGGER_SCHEMA");
            String name = triggers.getString("TRIGGER_NAME");
            if (name.startsWith(TRIGGER_PREFIX)) {
                name = StringUtils.quoteIdentifier(schema) + "." + StringUtils.quoteIdentifier(name);
                dropTrigger.execute("DROP TRIGGER " + name);
            }
        }
    }
    
    protected static void recreateTriggers(Connection conn) throws SQLException {
    	removeAllTriggers(conn);
    	
    	Statement stat = conn.createStatement();
    	ResultSet indexedTables = stat.executeQuery("SELECT * FROM " + SCHEMA + ".INDEXES");
        while (indexedTables.next()) {
            String schema = indexedTables.getString("SCHEMA");
            String table = indexedTables.getString("TABLE");
            createTrigger(conn, schema, table);
        }
    }
    
    
    //
    // search internals
    //
    
    private static final String SELECT_MAP_BY_WORD_ID = "SELECT ROWID FROM " + SCHEMA + ".MAP WHERE WORDID=?";
    private static final String SELECT_ROW_BY_ID = "SELECT KEY, INDEXID FROM " + SCHEMA + ".ROWS WHERE ID=?";
   
    /**
     * if asKeyValues is true, then conn must be JdbcConnection
     */
    protected static ResultSet search(Connection conn, String text, int limit, int offset, boolean asKeyValues) 
    throws SQLException {
        SimpleResultSet result = newSearchResultSet(asKeyValues);
        if (conn.getMetaData().getURL().startsWith("jdbc:columnlist:")) {
            // this is just to query the result set columns
            return result;
        }
        if (text == null || text.trim().length() == 0) {
            return result;
        }
        
        FullTextSearchContext context = FullTextSearchContext.getContext(conn);
        Set<String> inputWords = New.hashSet();
        context.splitIntoWords(text, inputWords);

        Map<String, Integer> indexedWords = context.getWordList();

        // Select the rows that contain all the words
        Set<Integer> rowIds = null, lastRowIds = null;	
        PreparedStatement prepSelectMapByWordId = context.prepare(conn, SELECT_MAP_BY_WORD_ID);
        for (String word : inputWords) {
            lastRowIds = rowIds;
            rowIds = New.hashSet();
            
            // Word ID
            Integer wordId = indexedWords.get(word);
            if (wordId == null) continue;
            
            // MAP: Word ID -> ROW IDs
            prepSelectMapByWordId.setInt(1, wordId.intValue());
            ResultSet maps = prepSelectMapByWordId.executeQuery();
            while (maps.next()) {
                Integer rowId = maps.getInt(1);
                if (lastRowIds == null || lastRowIds.contains(rowId)) {		// add a row that contains all the previous words
                    rowIds.add(rowId);
                }
            }
        }
        if (rowIds == null || rowIds.size() == 0) {
            return result;
        }
        
        // Get each of the rows by key values or condition SQL
        PreparedStatement prepSelectRowById = context.prepare(conn, SELECT_ROW_BY_ID);
        int rowCount = 0;
        for (int rowId : rowIds) {
            prepSelectRowById.setInt(1, rowId);
            ResultSet rs = prepSelectRowById.executeQuery();
            if (!rs.next()) continue;	// Missing the row corresponding to the ID in MAP
            
            if (offset > 0) {	// Skip rows until during the offset 
                offset--;
            } 
            else {
                String conditionSql = rs.getString(1);
                int tableInfoId = rs.getInt(2);
                IndexedTableInfo tableInfo = context.getIndexedTableInfo(tableInfoId);
                if (asKeyValues) {
                	// NOTE: the conn must be JdbcConnection
                    Object[][] columnAndValue = 
                    	parseConditionSqlToColumnsAndValues(conditionSql, (JdbcConnection)conn);
                    result.addRow(
                    	tableInfo.schema,
                    	tableInfo.table,
                        columnAndValue[0],
                        columnAndValue[1],
                        1.0);
                } 
                else {
                    String query = StringUtils.quoteIdentifier(tableInfo.schema) +
                        "." + StringUtils.quoteIdentifier(tableInfo.table) +
                        " WHERE " + conditionSql;
                    result.addRow(query, 1.0);
                }
                rowCount++;
                if (limit > 0 && rowCount >= limit) {
                    break;
                }
            }
        }
        return result;
    }
    
    protected static final String FIELD_SCORE = "SCORE";
    // A column name of the result set returned by the searchData method.
    protected static final String FIELD_SCHEMA = "SCHEMA";
    protected static final String FIELD_TABLE = "TABLE";
    protected static final String FIELD_COLUMNS = "COLUMNS";		// column names for keys
    protected static final String FIELD_KEYS = "KEYS";					// key values
    // The column name of the result set returned by the search method.
    private static final String FIELD_QUERY = "QUERY";
    
    protected static SimpleResultSet newSearchResultSet(boolean asKeyValues) {
        SimpleResultSet result = new SimpleResultSet();
        if (asKeyValues) {
            result.addColumn(FIELD_SCHEMA, Types.VARCHAR, 0, 0);
            result.addColumn(FIELD_TABLE, Types.VARCHAR, 0, 0);
            result.addColumn(FIELD_COLUMNS, Types.ARRAY, 0, 0);
            result.addColumn(FIELD_KEYS, Types.ARRAY, 0, 0);
        } 
        else {
            result.addColumn(FIELD_QUERY, Types.VARCHAR, 0, 0);
        }
        result.addColumn(FIELD_SCORE, Types.FLOAT, 0, 0);
        return result;
    }
}