/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.mahout.cf.taste.impl.model.jdbc;
import javax.sql.DataSource;
import org.apache.mahout.cf.taste.common.TasteException;
import org.apache.mahout.cf.taste.impl.common.jdbc.AbstractJDBCComponent;
/**
* <p>
* A {@link org.apache.mahout.cf.taste.model.JDBCDataModel} backed by a MySQL database and
* accessed via JDBC. It may work with other JDBC databases. By default, this class
* assumes that there is a {@link DataSource} available under the JNDI name
* "jdbc/taste", which gives access to a database with a "taste_preferences" table with the following schema:
* </p>
*
* <table>
* <tr>
* <th>user_id</th>
* <th>item_id</th>
* <th>preference</th>
* </tr>
* <tr>
* <td>987</td>
* <td>123</td>
* <td>0.9</td>
* </tr>
* <tr>
* <td>987</td>
* <td>456</td>
* <td>0.1</td>
* </tr>
* <tr>
* <td>654</td>
* <td>123</td>
* <td>0.2</td>
* </tr>
* <tr>
* <td>654</td>
* <td>789</td>
* <td>0.3</td>
* </tr>
* </table>
*
* <p>
* {@code preference} must have a type compatible with the Java {@code float} type.
* {@code user_id} and {@code item_id} should be compatible with long type (BIGINT). For example,
* the following command sets up a suitable table in MySQL, complete with primary key and indexes:
* </p>
*
* <p>
*
* <pre>
* CREATE TABLE taste_preferences (
* user_id BIGINT NOT NULL,
* item_id BIGINT NOT NULL,
* preference FLOAT NOT NULL,
* PRIMARY KEY (user_id, item_id),
* INDEX (user_id),
* INDEX (item_id)
* )
* </pre>
*
* </p>
*
* <p>The table may optionally have a {@code timestamp} column whose type is compatible with Java
* {@code long}.</p>
*
* <h3>Performance Notes</h3>
*
* <p>
* See the notes in {@link AbstractJDBCDataModel} regarding using connection pooling. It's pretty vital to
* performance.
* </p>
*
* <p>
* Some experimentation suggests that MySQL's InnoDB engine is faster than MyISAM for these kinds of
* applications. While MyISAM is the default and, I believe, generally considered the lighter-weight and
* faster of the two engines, my guess is the row-level locking of InnoDB helps here. Your mileage may vary.
* </p>
*
* <p>
* Here are some key settings that can be tuned for MySQL, and suggested size for a data set of around 1
* million elements:
* </p>
*
* <ul>
* <li>innodb_buffer_pool_size=64M</li>
* <li>myisam_sort_buffer_size=64M</li>
* <li>query_cache_limit=64M</li>
* <li>query_cache_min_res_unit=512K</li>
* <li>query_cache_type=1</li>
* <li>query_cache_size=64M</li>
* </ul>
*
* <p>
* Also consider setting some parameters on the MySQL Connector/J driver:
* </p>
*
* <pre>
* cachePreparedStatements = true
* cachePrepStmts = true
* cacheResultSetMetadata = true
* alwaysSendSetIsolation = false
* elideSetAutoCommits = true
* </pre>
*
* <p>
* Thanks to Amila Jayasooriya for contributing MySQL notes above as part of Google Summer of Code 2007.
* </p>
*/
public class MySQLJDBCDataModel extends AbstractJDBCDataModel {
/**
* <p>
* Creates a using the default {@link DataSource} (named
* {@link #DEFAULT_DATASOURCE_NAME} and default table/column names.
* </p>
*
* @throws TasteException
* if {@link DataSource} can't be found
*/
public MySQLJDBCDataModel() throws TasteException {
this(DEFAULT_DATASOURCE_NAME);
}
/**
* <p>
* Creates a using the default {@link DataSource} found under the given name, and
* using default table/column names.
* </p>
*
* @param dataSourceName
* name of {@link DataSource} to look up
* @throws TasteException
* if {@link DataSource} can't be found
*/
public MySQLJDBCDataModel(String dataSourceName) throws TasteException {
this(AbstractJDBCComponent.lookupDataSource(dataSourceName),
DEFAULT_PREFERENCE_TABLE,
DEFAULT_USER_ID_COLUMN,
DEFAULT_ITEM_ID_COLUMN,
DEFAULT_PREFERENCE_COLUMN,
DEFAULT_PREFERENCE_TIME_COLUMN);
}
/**
* <p>
* Creates a using the given {@link DataSource} and default table/column names.
* </p>
*
* @param dataSource
* {@link DataSource} to use
*/
public MySQLJDBCDataModel(DataSource dataSource) {
this(dataSource,
DEFAULT_PREFERENCE_TABLE,
DEFAULT_USER_ID_COLUMN,
DEFAULT_ITEM_ID_COLUMN,
DEFAULT_PREFERENCE_COLUMN,
DEFAULT_PREFERENCE_TIME_COLUMN);
}
/**
* <p>
* Creates a using the given {@link DataSource} and default table/column names.
* </p>
*
* @param dataSource
* {@link DataSource} to use
* @param preferenceTable
* name of table containing preference data
* @param userIDColumn
* user ID column name
* @param itemIDColumn
* item ID column name
* @param preferenceColumn
* preference column name
* @param timestampColumn timestamp column name (may be null)
*/
public MySQLJDBCDataModel(DataSource dataSource,
String preferenceTable,
String userIDColumn,
String itemIDColumn,
String preferenceColumn,
String timestampColumn) {
super(dataSource, preferenceTable, userIDColumn, itemIDColumn, preferenceColumn,
// getPreferenceSQL
"SELECT " + preferenceColumn + " FROM " + preferenceTable + " WHERE " + userIDColumn + "=? AND "
+ itemIDColumn + "=?",
// getPreferenceTimeSQL
"SELECT " + timestampColumn + " FROM " + preferenceTable + " WHERE " + userIDColumn + "=? AND "
+ itemIDColumn + "=?",
// getUserSQL
"SELECT DISTINCT " + userIDColumn + ", " + itemIDColumn + ", " + preferenceColumn + " FROM " + preferenceTable
+ " WHERE " + userIDColumn + "=? ORDER BY " + itemIDColumn,
// getAllUsersSQL
"SELECT DISTINCT " + userIDColumn + ", " + itemIDColumn + ", " + preferenceColumn + " FROM " + preferenceTable
+ " ORDER BY " + userIDColumn + ", " + itemIDColumn,
// getNumItemsSQL
"SELECT COUNT(DISTINCT " + itemIDColumn + ") FROM " + preferenceTable,
// getNumUsersSQL
"SELECT COUNT(DISTINCT " + userIDColumn + ") FROM " + preferenceTable,
// setPreferenceSQL
"INSERT INTO " + preferenceTable + '(' + userIDColumn + ',' + itemIDColumn + ',' + preferenceColumn
+ ") VALUES (?,?,?) ON DUPLICATE KEY UPDATE " + preferenceColumn + "=?",
// removePreference SQL
"DELETE FROM " + preferenceTable + " WHERE " + userIDColumn + "=? AND " + itemIDColumn + "=?",
// getUsersSQL
"SELECT DISTINCT " + userIDColumn + " FROM " + preferenceTable + " ORDER BY " + userIDColumn,
// getItemsSQL
"SELECT DISTINCT " + itemIDColumn + " FROM " + preferenceTable + " ORDER BY " + itemIDColumn,
// getPrefsForItemSQL
"SELECT DISTINCT " + userIDColumn + ", " + itemIDColumn + ", " + preferenceColumn + " FROM " + preferenceTable
+ " WHERE " + itemIDColumn + "=? ORDER BY " + userIDColumn,
// getNumPreferenceForItemSQL
"SELECT COUNT(1) FROM " + preferenceTable + " WHERE " + itemIDColumn + "=?",
// getNumPreferenceForItemsSQL
"SELECT COUNT(1) FROM " + preferenceTable + " tp1 JOIN " + preferenceTable + " tp2 " + "USING ("
+ userIDColumn + ") WHERE tp1." + itemIDColumn + "=? and tp2." + itemIDColumn + "=?",
"SELECT MAX(" + preferenceColumn + ") FROM " + preferenceTable,
"SELECT MIN(" + preferenceColumn + ") FROM " + preferenceTable);
}
@Override
protected int getFetchSize() {
// Need to return this for MySQL Connector/J to make it use streaming mode
return Integer.MIN_VALUE;
}
}