/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.mahout.cf.taste.impl.recommender.slopeone.jdbc;
import org.apache.mahout.cf.taste.common.TasteException;
import org.apache.mahout.cf.taste.impl.model.jdbc.AbstractJDBCDataModel;
/**
* <p>
* MySQL-specific implementation. Should be used in conjunction with a
* {@link org.apache.mahout.cf.taste.impl.model.jdbc.MySQLJDBCDataModel}. This
* implementation stores item-item diffs in a MySQL database and encapsulates some other slope-one-specific
* operations that are needed on the preference data in the database. It assumes the database has a schema
* like:
* </p>
*
* <table>
* <tr>
* <th>item_id_a</th>
* <th>item_id_b</th>
* <th>average_diff</th>
* <th>standard_deviation</th>
* <th>count</th>
* </tr>
* <tr>
* <td>123</td>
* <td>234</td>
* <td>0.5</td>
* <td>0.12</td>
* <td>5</td>
* </tr>
* <tr>
* <td>123</td>
* <td>789</td>
* <td>-1.33</td>
* <td>0.2</td>
* <td>3</td>
* </tr>
* <tr>
* <td>234</td>
* <td>789</td>
* <td>2.1</td>
* <td>1.03</td>
* <td>1</td>
* </tr>
* </table>
*
* <p>
* {@code item_id_a} and {@code item_id_b} should have types compatible with the long primitive
* type. {@code average_diff} and {@code standard_deviation} must be compatible with
* {@code float} and {@code count} must be compatible with {@code int}.
* </p>
*
* <p>
* The following command sets up a suitable table in MySQL:
* </p>
*
* <p>
*
* <pre>
* CREATE TABLE taste_slopeone_diffs (
* item_id_a BIGINT NOT NULL,
* item_id_b BIGINT NOT NULL,
* average_diff FLOAT NOT NULL,
* standard_deviation FLOAT NOT NULL,
* count INT NOT NULL,
* PRIMARY KEY (item_id_a, item_id_b),
* INDEX (item_id_a),
* INDEX (item_id_b)
* )
* </pre>
*
* </p>
*/
public final class MySQLJDBCDiffStorage extends AbstractJDBCDiffStorage {
private static final int DEFAULT_MIN_DIFF_COUNT = 2;
public MySQLJDBCDiffStorage(AbstractJDBCDataModel dataModel) throws TasteException {
this(dataModel,
DEFAULT_DIFF_TABLE,
DEFAULT_ITEM_A_COLUMN,
DEFAULT_ITEM_B_COLUMN,
DEFAULT_COUNT_COLUMN,
DEFAULT_AVERAGE_DIFF_COLUMN,
DEFAULT_STDEV_COLUMN,
DEFAULT_MIN_DIFF_COUNT);
}
public MySQLJDBCDiffStorage(AbstractJDBCDataModel dataModel,
String diffsTable,
String itemIDAColumn,
String itemIDBColumn,
String countColumn,
String avgColumn,
String stdevColumn,
int minDiffCount) throws TasteException {
super(dataModel,
// getDiffSQL
"SELECT " + countColumn + ", " + avgColumn + ", " + stdevColumn + " FROM "
+ diffsTable + " WHERE " + itemIDAColumn + "=? AND " + itemIDBColumn + "=?",
// getDiffsSQL
"(SELECT " + countColumn + ", " + avgColumn + ", " + stdevColumn + ", " + itemIDAColumn
+ " FROM " + diffsTable + ", "
+ dataModel.getPreferenceTable() + " WHERE " + itemIDBColumn + "=? AND " + itemIDAColumn + " = "
+ dataModel.getItemIDColumn() + " AND " + dataModel.getUserIDColumn() + "=? ORDER BY "
+ itemIDAColumn + ") UNION ("
+ "SELECT " + countColumn + ", -" + avgColumn + ", " + stdevColumn + ", " + itemIDBColumn
+ " FROM " + diffsTable + ", "
+ dataModel.getPreferenceTable() + " WHERE " + itemIDAColumn + "=? AND " + itemIDBColumn + " = "
+ dataModel.getItemIDColumn() + " AND " + dataModel.getUserIDColumn() + "=? ORDER BY "
+ itemIDBColumn + ')',
// getAverageItemPrefSQL
"SELECT COUNT(1), AVG(" + dataModel.getPreferenceColumn() + ") FROM "
+ dataModel.getPreferenceTable() + " WHERE " + dataModel.getItemIDColumn() + "=?",
// getDiffsAffectedByUserSQL
"SELECT diffs." + countColumn + ", diffs." + avgColumn + ", diffs." + itemIDAColumn
+ ", diffs." + itemIDBColumn + ", prefs." + dataModel.getPreferenceColumn()
+ " FROM " + diffsTable + " AS diffs, " + dataModel.getPreferenceTable() + " AS prefs WHERE prefs."
+ dataModel.getUserIDColumn() + "=? AND (prefs." + dataModel.getItemIDColumn()
+ " = diffs." + itemIDAColumn + " OR prefs." + dataModel.getItemIDColumn()
+ " = diffs." + itemIDBColumn + ')',
// updateDiffSQLs
new String[] {
"UPDATE " + diffsTable + " SET "
+ avgColumn + " = " + avgColumn + " - (? / " + countColumn
+ ") WHERE " + itemIDAColumn + "=?",
"UPDATE " + diffsTable + " SET "
+ avgColumn + " = " + avgColumn + " + (? / " + countColumn
+ ") WHERE " + itemIDBColumn + "=?"},
// updateOneDiffSQL
"UPDATE " + diffsTable + " SET " + countColumn + "=?, " + avgColumn + "=? WHERE "
+ itemIDAColumn + "=? AND " + itemIDBColumn + "=?",
// addDiffSQL
"INSERT INTO " + diffsTable + " (" + itemIDAColumn + ", " + itemIDBColumn + ", " + avgColumn
+ ", " + stdevColumn + ", " + countColumn + ") VALUES (?,?,?,0,1)",
// removeDiffSQL
"DELETE FROM " + diffsTable + " WHERE " + itemIDAColumn + "=? AND " + itemIDBColumn + "=?",
// getRecommendableItemsSQL
"SELECT id FROM " + "(SELECT " + itemIDAColumn + " AS id FROM " + diffsTable + ", "
+ dataModel.getPreferenceTable() + " WHERE " + itemIDBColumn + " = "
+ dataModel.getItemIDColumn() + " AND " + dataModel.getUserIDColumn() + "=? UNION DISTINCT"
+ " SELECT " + itemIDBColumn + " AS id FROM " + diffsTable + ", "
+ dataModel.getPreferenceTable() + " WHERE " + itemIDAColumn + " = "
+ dataModel.getItemIDColumn() + " AND " + dataModel.getUserIDColumn() + "=?) "
+ "possible_item_ids WHERE id NOT IN (SELECT " + dataModel.getItemIDColumn() + " FROM "
+ dataModel.getPreferenceTable() + " WHERE " + dataModel.getUserIDColumn() + "=?)",
// deleteDiffsSQL
"TRUNCATE " + diffsTable,
// createDiffsSQL
"INSERT INTO " + diffsTable + " (" + itemIDAColumn + ", " + itemIDBColumn + ", " + avgColumn
+ ", " + stdevColumn + ", " + countColumn + ") SELECT prefsA." + dataModel.getItemIDColumn()
+ ", prefsB." + dataModel.getItemIDColumn() + ", AVG(prefsB." + dataModel.getPreferenceColumn()
+ " - prefsA." + dataModel.getPreferenceColumn() + "), STDDEV_POP(prefsB."
+ dataModel.getPreferenceColumn() + " - prefsA." + dataModel.getPreferenceColumn()
+ "), COUNT(1) AS count FROM " + dataModel.getPreferenceTable() + " prefsA, "
+ dataModel.getPreferenceTable() + " prefsB WHERE prefsA." + dataModel.getUserIDColumn()
+ " = prefsB." + dataModel.getUserIDColumn() + " AND prefsA." + dataModel.getItemIDColumn()
+ " < prefsB." + dataModel.getItemIDColumn() + ' ' + " GROUP BY prefsA."
+ dataModel.getItemIDColumn() + ", prefsB." + dataModel.getItemIDColumn()
+ " HAVING count >= ?",
// diffsExistSQL
"SELECT COUNT(1) FROM " + diffsTable,
minDiffCount);
}
/**
* @see org.apache.mahout.cf.taste.impl.model.jdbc.MySQLJDBCDataModel#getFetchSize()
*/
@Override
protected int getFetchSize() {
return Integer.MIN_VALUE;
}
}