package marubinotto.h2.fulltext; import static marubinotto.h2.fulltext.InternalUtils.*; import java.sql.Connection; import java.sql.PreparedStatement; import java.sql.ResultSet; import java.sql.SQLException; import java.sql.Statement; import java.sql.Types; import java.util.Map; import java.util.Set; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.h2.api.Trigger; import org.h2.jdbc.JdbcConnection; import org.h2.message.DbException; import org.h2.tools.SimpleResultSet; import org.h2.util.New; import org.h2.util.StringUtils; public class FullTextSearch { private static Log logger = LogFactory.getLog(FullTextSearch.class); public static final String SCHEMA = "FT"; private static final String TRIGGER_PREFIX = "FT_"; public static void init(Connection conn) throws SQLException { Statement stat = conn.createStatement(); stat.execute("CREATE SCHEMA IF NOT EXISTS " + SCHEMA); // the structure of the index: word -> word ID -> row ID -> row info -> table info (IndexedTableInfo) // indexed table info // COLUMNS: a list of column names (column separated) to index; null -> all columns are indexed stat.execute("CREATE TABLE IF NOT EXISTS " + SCHEMA + ".INDEXES(" + "ID INT AUTO_INCREMENT PRIMARY KEY, " + "SCHEMA VARCHAR, " + "TABLE VARCHAR, " + "COLUMNS VARCHAR, " + "UNIQUE(SCHEMA, TABLE))"); // word -> word ID stat.execute("CREATE TABLE IF NOT EXISTS " + SCHEMA + ".WORDS(" + "ID INT AUTO_INCREMENT PRIMARY KEY, " + "NAME VARCHAR, " + "UNIQUE(NAME))"); // indexed row // KEY: condition(where) SQL by the key values for selecting the row stat.execute("CREATE TABLE IF NOT EXISTS " + SCHEMA + ".ROWS(" + "ID IDENTITY, " + "HASH INT, " + "INDEXID INT, " + "KEY VARCHAR, " + "UNIQUE(HASH, INDEXID, KEY))"); // word ID -> indexed row ID stat.execute("CREATE TABLE IF NOT EXISTS " + SCHEMA + ".MAP(" + "ROWID INT, " + "WORDID INT, " + "PRIMARY KEY(WORDID, ROWID))"); // NOTE: Should re-create alias because Java's name could be changed by refactoring stat.execute("DROP ALIAS IF EXISTS FT_CREATE_INDEX"); stat.execute("DROP ALIAS IF EXISTS FT_DROP_INDEX"); stat.execute("DROP ALIAS IF EXISTS FT_SEARCH"); stat.execute("DROP ALIAS IF EXISTS FT_SEARCH_DATA"); stat.execute("DROP ALIAS IF EXISTS FT_REINDEX"); stat.execute("DROP ALIAS IF EXISTS FT_DROP_ALL"); // aliases for static method of this class stat.execute("CREATE ALIAS IF NOT EXISTS FT_CREATE_INDEX FOR \"" + FullTextSearch.class.getName() + ".createIndex\""); stat.execute("CREATE ALIAS IF NOT EXISTS FT_DROP_INDEX FOR \"" + FullTextSearch.class.getName() + ".dropIndex\""); stat.execute("CREATE ALIAS IF NOT EXISTS FT_SEARCH FOR \"" + FullTextSearch.class.getName() + ".search\""); stat.execute("CREATE ALIAS IF NOT EXISTS FT_SEARCH_DATA FOR \"" + FullTextSearch.class.getName() + ".searchData\""); stat.execute("CREATE ALIAS IF NOT EXISTS FT_REINDEX FOR \"" + FullTextSearch.class.getName() + ".reindex\""); stat.execute("CREATE ALIAS IF NOT EXISTS FT_DROP_ALL FOR \"" + FullTextSearch.class.getName() + ".dropAll\""); // NOTE: Should re-create triggers because Java's name could be changed by refactoring recreateTriggers(conn); FullTextSearchContext context = FullTextSearchContext.getContext(conn); ResultSet rs = stat.executeQuery("SELECT * FROM " + SCHEMA + ".WORDS"); Map<String, Integer> map = context.getWordList(); map.clear(); while (rs.next()) { String word = rs.getString("NAME"); int id = rs.getInt("ID"); word = context.convertWord(word); if (word != null) map.put(word, id); } logger.info("Initialized: (words " + map.size() + ")"); } public static void createIndex(Connection conn, String schema, String table, String columnList) throws SQLException { init(conn); // Add an entry to INDEXES PreparedStatement prep = conn.prepareStatement("INSERT INTO " + SCHEMA + ".INDEXES(SCHEMA, TABLE, COLUMNS) VALUES(?, ?, ?)"); prep.setString(1, schema); prep.setString(2, table); prep.setString(3, columnList); prep.execute(); createTrigger(conn, schema, table); indexExistingRows(conn, schema, table); } /** * Re-creates the full text index for this database. Calling this method is * usually not needed, as the index is kept up-to-date automatically. */ public static void reindex(Connection conn) throws SQLException { init(conn); removeAllTriggers(conn); // Clear the index cache FullTextSearchContext context = FullTextSearchContext.getContext(conn); context.getWordList().clear(); // Delete all the indexes Statement stat = conn.createStatement(); stat.execute("TRUNCATE TABLE " + SCHEMA + ".WORDS"); stat.execute("TRUNCATE TABLE " + SCHEMA + ".ROWS"); stat.execute("TRUNCATE TABLE " + SCHEMA + ".MAP"); // Recreate triggers and indexes ResultSet rs = stat.executeQuery("SELECT * FROM " + SCHEMA + ".INDEXES"); while (rs.next()) { String schema = rs.getString("SCHEMA"); String table = rs.getString("TABLE"); createTrigger(conn, schema, table); indexExistingRows(conn, schema, table); } } /** * Searches from the full text index for this database. * The returned result set has the following column: * <ul><li>QUERY (varchar): the query to use to get the data. * The query does not include 'SELECT * FROM '. Example: * PUBLIC.TEST WHERE ID = 1 * </li><li>SCORE (float) the relevance score. This value is always 1.0 * for the native fulltext search. * </li></ul> * * @param conn the connection * @param text the search query * @param limit the maximum number of rows or 0 for no limit * @param offset the offset or 0 for no offset * @return the result set */ public static ResultSet search(Connection conn, String text, int limit, int offset) throws SQLException { try { return search(conn, text, limit, offset, false); } catch (DbException e) { throw DbException.toSQLException(e); } } /** * Searches from the full text index for this database. The result contains * the primary key data as an array. The returned result set has the * following columns: * <ul> * <li>SCHEMA (varchar): the schema name. Example: PUBLIC </li> * <li>TABLE (varchar): the table name. Example: TEST </li> * <li>COLUMNS (array of varchar): comma separated list of quoted column * names. The column names are quoted if necessary. Example: (ID) </li> * <li>KEYS (array of values): comma separated list of values. Example: (1) * </li> * <li>SCORE (float) the relevance score. This value is always 1.0 * for the native fulltext search. * </li> * </ul> * * @param conn the connection, which must be JdbcConnection * @param text the search query * @param limit the maximum number of rows or 0 for no limit * @param offset the offset or 0 for no offset * @return the result set */ public static ResultSet searchData(Connection conn, String text, int limit, int offset) throws SQLException { try { return search(conn, text, limit, offset, true); } catch (DbException e) { throw DbException.toSQLException(e); } } /** * Drop an existing full text index for a table. This method returns * silently if no index for this table exists. * * @param conn the connection * @param schema the schema name of the table (case sensitive) * @param table the table name (case sensitive) */ public static void dropIndex(Connection conn, String schema, String table) throws SQLException { init(conn); // Delete the table info from INDEXES PreparedStatement prep = conn.prepareStatement( "SELECT ID FROM " + SCHEMA + ".INDEXES WHERE SCHEMA=? AND TABLE=?"); prep.setString(1, schema); prep.setString(2, table); ResultSet rs = prep.executeQuery(); if (!rs.next()) { return; } int indexId = rs.getInt(1); prep = conn.prepareStatement("DELETE FROM " + SCHEMA + ".INDEXES WHERE ID=?"); prep.setInt(1, indexId); prep.execute(); // Delete the trigger createOrDropTrigger(conn, schema, table, false); // Delete the entries in ROWS prep = conn.prepareStatement( "DELETE FROM " + SCHEMA + ".ROWS WHERE INDEXID=? AND ROWNUM<10000"); while (true) { prep.setInt(1, indexId); int deleted = prep.executeUpdate(); if (deleted == 0) { break; } } // Delete the entries in MAP prep = conn.prepareStatement( "DELETE FROM " + SCHEMA + ".MAP M " + "WHERE NOT EXISTS (SELECT * FROM " + SCHEMA + ".ROWS R WHERE R.ID=M.ROWID) AND ROWID<10000"); while (true) { int deleted = prep.executeUpdate(); if (deleted == 0) { break; } } } /** * Drops all full text indexes from the database. * * @param conn the connection */ public static void dropAll(Connection conn) throws SQLException { init(conn); Statement stat = conn.createStatement(); stat.execute("DROP SCHEMA IF EXISTS " + SCHEMA); removeAllTriggers(conn); FullTextSearchContext.getContext(conn).clearAll(); } // Internals protected static void indexExistingRows(Connection conn, String schema, String table) throws SQLException { IndexUpdateTrigger existing = new IndexUpdateTrigger(); existing.init(conn, schema, null, table, false, Trigger.INSERT); String sql = "SELECT * FROM " + StringUtils.quoteIdentifier(schema) + "." + StringUtils.quoteIdentifier(table); ResultSet rs = conn.createStatement().executeQuery(sql); int columnCount = rs.getMetaData().getColumnCount(); while (rs.next()) { Object[] row = new Object[columnCount]; for (int i = 0; i < columnCount; i++) { row[i] = rs.getObject(i + 1); } existing.fire(conn, null, row); } } protected static void createTrigger(Connection conn, String schema, String table) throws SQLException { createOrDropTrigger(conn, schema, table, true); } private static void createOrDropTrigger(Connection conn, String schema, String table, boolean create) throws SQLException { Statement stat = conn.createStatement(); String trigger = StringUtils.quoteIdentifier(schema) + "." + StringUtils.quoteIdentifier(TRIGGER_PREFIX + table); stat.execute("DROP TRIGGER IF EXISTS " + trigger); if (create) { StringBuilder buff = new StringBuilder("CREATE TRIGGER IF NOT EXISTS "); buff.append(trigger). append(" AFTER INSERT, UPDATE, DELETE ON "). append(StringUtils.quoteIdentifier(schema)). append('.'). append(StringUtils.quoteIdentifier(table)). append(" FOR EACH ROW CALL \""). append(IndexUpdateTrigger.class.getName()). append("\""); stat.execute(buff.toString()); } } protected static void removeAllTriggers(Connection conn) throws SQLException { Statement selectAllTriggers = conn.createStatement(); ResultSet triggers = selectAllTriggers.executeQuery("SELECT * FROM INFORMATION_SCHEMA.TRIGGERS"); Statement dropTrigger = conn.createStatement(); while (triggers.next()) { String schema = triggers.getString("TRIGGER_SCHEMA"); String name = triggers.getString("TRIGGER_NAME"); if (name.startsWith(TRIGGER_PREFIX)) { name = StringUtils.quoteIdentifier(schema) + "." + StringUtils.quoteIdentifier(name); dropTrigger.execute("DROP TRIGGER " + name); } } } protected static void recreateTriggers(Connection conn) throws SQLException { removeAllTriggers(conn); Statement stat = conn.createStatement(); ResultSet indexedTables = stat.executeQuery("SELECT * FROM " + SCHEMA + ".INDEXES"); while (indexedTables.next()) { String schema = indexedTables.getString("SCHEMA"); String table = indexedTables.getString("TABLE"); createTrigger(conn, schema, table); } } // // search internals // private static final String SELECT_MAP_BY_WORD_ID = "SELECT ROWID FROM " + SCHEMA + ".MAP WHERE WORDID=?"; private static final String SELECT_ROW_BY_ID = "SELECT KEY, INDEXID FROM " + SCHEMA + ".ROWS WHERE ID=?"; /** * if asKeyValues is true, then conn must be JdbcConnection */ protected static ResultSet search(Connection conn, String text, int limit, int offset, boolean asKeyValues) throws SQLException { SimpleResultSet result = newSearchResultSet(asKeyValues); if (conn.getMetaData().getURL().startsWith("jdbc:columnlist:")) { // this is just to query the result set columns return result; } if (text == null || text.trim().length() == 0) { return result; } FullTextSearchContext context = FullTextSearchContext.getContext(conn); Set<String> inputWords = New.hashSet(); context.splitIntoWords(text, inputWords); Map<String, Integer> indexedWords = context.getWordList(); // Select the rows that contain all the words Set<Integer> rowIds = null, lastRowIds = null; PreparedStatement prepSelectMapByWordId = context.prepare(conn, SELECT_MAP_BY_WORD_ID); for (String word : inputWords) { lastRowIds = rowIds; rowIds = New.hashSet(); // Word ID Integer wordId = indexedWords.get(word); if (wordId == null) continue; // MAP: Word ID -> ROW IDs prepSelectMapByWordId.setInt(1, wordId.intValue()); ResultSet maps = prepSelectMapByWordId.executeQuery(); while (maps.next()) { Integer rowId = maps.getInt(1); if (lastRowIds == null || lastRowIds.contains(rowId)) { // add a row that contains all the previous words rowIds.add(rowId); } } } if (rowIds == null || rowIds.size() == 0) { return result; } // Get each of the rows by key values or condition SQL PreparedStatement prepSelectRowById = context.prepare(conn, SELECT_ROW_BY_ID); int rowCount = 0; for (int rowId : rowIds) { prepSelectRowById.setInt(1, rowId); ResultSet rs = prepSelectRowById.executeQuery(); if (!rs.next()) continue; // Missing the row corresponding to the ID in MAP if (offset > 0) { // Skip rows until during the offset offset--; } else { String conditionSql = rs.getString(1); int tableInfoId = rs.getInt(2); IndexedTableInfo tableInfo = context.getIndexedTableInfo(tableInfoId); if (asKeyValues) { // NOTE: the conn must be JdbcConnection Object[][] columnAndValue = parseConditionSqlToColumnsAndValues(conditionSql, (JdbcConnection)conn); result.addRow( tableInfo.schema, tableInfo.table, columnAndValue[0], columnAndValue[1], 1.0); } else { String query = StringUtils.quoteIdentifier(tableInfo.schema) + "." + StringUtils.quoteIdentifier(tableInfo.table) + " WHERE " + conditionSql; result.addRow(query, 1.0); } rowCount++; if (limit > 0 && rowCount >= limit) { break; } } } return result; } protected static final String FIELD_SCORE = "SCORE"; // A column name of the result set returned by the searchData method. protected static final String FIELD_SCHEMA = "SCHEMA"; protected static final String FIELD_TABLE = "TABLE"; protected static final String FIELD_COLUMNS = "COLUMNS"; // column names for keys protected static final String FIELD_KEYS = "KEYS"; // key values // The column name of the result set returned by the search method. private static final String FIELD_QUERY = "QUERY"; protected static SimpleResultSet newSearchResultSet(boolean asKeyValues) { SimpleResultSet result = new SimpleResultSet(); if (asKeyValues) { result.addColumn(FIELD_SCHEMA, Types.VARCHAR, 0, 0); result.addColumn(FIELD_TABLE, Types.VARCHAR, 0, 0); result.addColumn(FIELD_COLUMNS, Types.ARRAY, 0, 0); result.addColumn(FIELD_KEYS, Types.ARRAY, 0, 0); } else { result.addColumn(FIELD_QUERY, Types.VARCHAR, 0, 0); } result.addColumn(FIELD_SCORE, Types.FLOAT, 0, 0); return result; } }