/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */ package hivemall.mf; import hivemall.mf.FactorizedModel.RankInitScheme; import hivemall.utils.lang.mutable.MutableInt; import java.io.File; import org.apache.hadoop.hive.ql.exec.MapredContext; import org.apache.hadoop.hive.ql.exec.MapredContextAccessor; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.udf.generic.Collector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; import org.junit.Assert; import org.junit.Test; public class MatrixFactorizationSGDUDTFTest { private static final boolean DEBUG_PRINT = false; private static void print(String msg) { if (DEBUG_PRINT) System.out.print(msg); } private static void println(String msg) { if (DEBUG_PRINT) System.out.println(msg); } private static void println() { if (DEBUG_PRINT) System.out.println(); } @Test public void testDefaultInit() throws HiveException { println("--------------------------\n testGaussian()"); OnlineMatrixFactorizationUDTF mf = new MatrixFactorizationSGDUDTF(); ObjectInspector intOI = PrimitiveObjectInspectorFactory.javaIntObjectInspector; ObjectInspector floatOI = PrimitiveObjectInspectorFactory.javaFloatObjectInspector; //ObjectInspector param = ObjectInspectorUtils.getConstantObjectInspector(PrimitiveObjectInspectorFactory.javaStringObjectInspector, new String("-factor 3 -eta 0.0002")); ObjectInspector param = ObjectInspectorUtils.getConstantObjectInspector( PrimitiveObjectInspectorFactory.javaStringObjectInspector, new String("-factor 3")); ObjectInspector[] argOIs = new ObjectInspector[] {intOI, intOI, floatOI, param}; mf.initialize(argOIs); Assert.assertTrue(mf.rankInit == RankInitScheme.random); float[][] rating = { {5, 3, 0, 1}, {4, 0, 0, 1}, {1, 1, 0, 5}, {1, 0, 0, 4}, {0, 1, 5, 4}}; Object[] args = new Object[3]; final int num_iters = 100; for (int iter = 0; iter < num_iters; iter++) { for (int row = 0; row < rating.length; row++) { for (int col = 0, size = rating[row].length; col < size; col++) { //print(row + "," + col + ","); args[0] = row; args[1] = col; args[2] = (float) rating[row][col]; //println((float) rating[row][col]); mf.process(args); } } } for (int row = 0; row < rating.length; row++) { for (int col = 0, size = rating[row].length; col < size; col++) { double predicted = mf.predict(row, col); print(rating[row][col] + "[" + predicted + "]\t"); Assert.assertEquals(rating[row][col], predicted, 0.2d); } println(); } } @Test public void testRandInit() throws HiveException { println("--------------------------\n testRandInit()"); OnlineMatrixFactorizationUDTF mf = new MatrixFactorizationSGDUDTF(); ObjectInspector intOI = PrimitiveObjectInspectorFactory.javaIntObjectInspector; ObjectInspector floatOI = PrimitiveObjectInspectorFactory.javaFloatObjectInspector; ObjectInspector param = ObjectInspectorUtils.getConstantObjectInspector( PrimitiveObjectInspectorFactory.javaStringObjectInspector, new String( "-factor 3 -rankinit random")); ObjectInspector[] argOIs = new ObjectInspector[] {intOI, intOI, floatOI, param}; mf.initialize(argOIs); Assert.assertTrue(mf.rankInit == RankInitScheme.random); float[][] rating = { {5, 3, 0, 1}, {4, 0, 0, 1}, {1, 1, 0, 5}, {1, 0, 0, 4}, {0, 1, 5, 4}}; Object[] args = new Object[3]; final int num_iters = 100; for (int iter = 0; iter < num_iters; iter++) { for (int row = 0; row < rating.length; row++) { for (int col = 0, size = rating[row].length; col < size; col++) { args[0] = row; args[1] = col; args[2] = (float) rating[row][col]; mf.process(args); } } } for (int row = 0; row < rating.length; row++) { for (int col = 0, size = rating[row].length; col < size; col++) { double predicted = mf.predict(row, col); print(rating[row][col] + "[" + predicted + "]\t"); Assert.assertEquals(rating[row][col], predicted, 0.2d); } println(); } } @Test public void testGaussianInit() throws HiveException { println("--------------------------\n testGaussianInit()"); OnlineMatrixFactorizationUDTF mf = new MatrixFactorizationSGDUDTF(); ObjectInspector intOI = PrimitiveObjectInspectorFactory.javaIntObjectInspector; ObjectInspector floatOI = PrimitiveObjectInspectorFactory.javaFloatObjectInspector; ObjectInspector param = ObjectInspectorUtils.getConstantObjectInspector( PrimitiveObjectInspectorFactory.javaStringObjectInspector, new String( "-factor 3 -rankinit gaussian")); ObjectInspector[] argOIs = new ObjectInspector[] {intOI, intOI, floatOI, param}; mf.initialize(argOIs); Assert.assertTrue(mf.rankInit == RankInitScheme.gaussian); float[][] rating = { {5, 3, 0, 1}, {4, 0, 0, 1}, {1, 1, 0, 5}, {1, 0, 0, 4}, {0, 1, 5, 4}}; Object[] args = new Object[3]; final int num_iters = 100; for (int iter = 0; iter < num_iters; iter++) { for (int row = 0; row < rating.length; row++) { for (int col = 0, size = rating[row].length; col < size; col++) { args[0] = row; args[1] = col; args[2] = (float) rating[row][col]; mf.process(args); } } } for (int row = 0; row < rating.length; row++) { for (int col = 0, size = rating[row].length; col < size; col++) { double predicted = mf.predict(row, col); print(rating[row][col] + "[" + predicted + "]\t"); Assert.assertEquals(rating[row][col], predicted, 0.2d); } println(); } } @Test public void testIterationsWithoutFile() throws HiveException { println("--------------------------\n testIterationsWithoutFile()"); OnlineMatrixFactorizationUDTF mf = new MatrixFactorizationSGDUDTF(); ObjectInspector intOI = PrimitiveObjectInspectorFactory.javaIntObjectInspector; ObjectInspector floatOI = PrimitiveObjectInspectorFactory.javaFloatObjectInspector; int iters = 100; ObjectInspector param = ObjectInspectorUtils.getConstantObjectInspector( PrimitiveObjectInspectorFactory.javaStringObjectInspector, new String( "-factor 3 -iterations " + iters)); ObjectInspector[] argOIs = new ObjectInspector[] {intOI, intOI, floatOI, param}; MapredContext mrContext = MapredContextAccessor.create(true, null); mf.configure(mrContext); mf.initialize(argOIs); Assert.assertTrue(mf.rankInit == RankInitScheme.random); float[][] rating = { {5, 3, 0, 1}, {4, 0, 0, 1}, {1, 1, 0, 5}, {1, 0, 0, 4}, {0, 1, 5, 4}}; Object[] args = new Object[3]; for (int row = 0; row < rating.length; row++) { for (int col = 0, size = rating[row].length; col < size; col++) { args[0] = row; args[1] = col; args[2] = (float) rating[row][col]; mf.process(args); } } mf.runIterativeTraining(iters); for (int row = 0; row < rating.length; row++) { for (int col = 0, size = rating[row].length; col < size; col++) { double predicted = mf.predict(row, col); print(rating[row][col] + "[" + predicted + "]\t"); Assert.assertEquals(rating[row][col], predicted, 0.2d); } println(); } } @Test public void testIterationsCloseWithoutFile() throws HiveException { println("--------------------------\n testIterationsCloseWithoutFile()"); OnlineMatrixFactorizationUDTF mf = new MatrixFactorizationSGDUDTF(); ObjectInspector intOI = PrimitiveObjectInspectorFactory.javaIntObjectInspector; ObjectInspector floatOI = PrimitiveObjectInspectorFactory.javaFloatObjectInspector; int iters = 3; ObjectInspector param = ObjectInspectorUtils.getConstantObjectInspector( PrimitiveObjectInspectorFactory.javaStringObjectInspector, new String( "-factor 3 -iterations " + iters)); ObjectInspector[] argOIs = new ObjectInspector[] {intOI, intOI, floatOI, param}; MapredContext mrContext = MapredContextAccessor.create(true, null); mf.configure(mrContext); mf.initialize(argOIs); final MutableInt numCollected = new MutableInt(0); mf.setCollector(new Collector() { @Override public void collect(Object input) throws HiveException { numCollected.addValue(1); } }); Assert.assertTrue(mf.rankInit == RankInitScheme.random); float[][] rating = { {5, 3, 0, 1}, {4, 0, 0, 1}, {1, 1, 0, 5}, {1, 0, 0, 4}, {0, 1, 5, 4}}; Object[] args = new Object[3]; final int num_iters = 100; int trainingExamples = 0; for (int iter = 0; iter < num_iters; iter++) { for (int row = 0; row < rating.length; row++) { for (int col = 0, size = rating[row].length; col < size; col++) { args[0] = row; args[1] = col; args[2] = (float) rating[row][col]; mf.process(args); trainingExamples++; } } } mf.close(); Assert.assertEquals(trainingExamples * iters, mf.count); Assert.assertEquals(5, numCollected.intValue()); } @Test public void testFileBackedIterationsCloseWithConverge() throws HiveException { println("--------------------------\n testFileBackedIterationsCloseWithConverge()"); OnlineMatrixFactorizationUDTF mf = new MatrixFactorizationSGDUDTF(); ObjectInspector intOI = PrimitiveObjectInspectorFactory.javaIntObjectInspector; ObjectInspector floatOI = PrimitiveObjectInspectorFactory.javaFloatObjectInspector; int iters = 10; ObjectInspector param = ObjectInspectorUtils.getConstantObjectInspector( PrimitiveObjectInspectorFactory.javaStringObjectInspector, new String( "-factor 3 -iterations " + iters)); ObjectInspector[] argOIs = new ObjectInspector[] {intOI, intOI, floatOI, param}; MapredContext mrContext = MapredContextAccessor.create(true, null); mf.configure(mrContext); mf.initialize(argOIs); final MutableInt numCollected = new MutableInt(0); mf.setCollector(new Collector() { @Override public void collect(Object input) throws HiveException { numCollected.addValue(1); } }); Assert.assertTrue(mf.rankInit == RankInitScheme.random); float[][] rating = { {5, 3, 0, 1}, {4, 0, 0, 1}, {1, 1, 0, 5}, {1, 0, 0, 4}, {0, 1, 5, 4}}; Object[] args = new Object[3]; final int num_iters = 500; int trainingExamples = 0; for (int iter = 0; iter < num_iters; iter++) { for (int row = 0; row < rating.length; row++) { for (int col = 0, size = rating[row].length; col < size; col++) { args[0] = row; args[1] = col; args[2] = (float) rating[row][col]; mf.process(args); trainingExamples++; } } } File tmpFile = mf.fileIO.getFile(); mf.close(); Assert.assertTrue(mf.count < trainingExamples * iters); Assert.assertEquals(5, numCollected.intValue()); Assert.assertFalse(tmpFile.exists()); } @Test public void testFileBackedIterationsCloseNoConverge() throws HiveException { println("--------------------------\n testFileBackedIterationsCloseNoConverge()"); OnlineMatrixFactorizationUDTF mf = new MatrixFactorizationSGDUDTF(); ObjectInspector intOI = PrimitiveObjectInspectorFactory.javaIntObjectInspector; ObjectInspector floatOI = PrimitiveObjectInspectorFactory.javaFloatObjectInspector; int iters = 5; ObjectInspector param = ObjectInspectorUtils.getConstantObjectInspector( PrimitiveObjectInspectorFactory.javaStringObjectInspector, new String( "-disable_cv -factor 3 -iterations " + iters)); ObjectInspector[] argOIs = new ObjectInspector[] {intOI, intOI, floatOI, param}; MapredContext mrContext = MapredContextAccessor.create(true, null); mf.configure(mrContext); mf.initialize(argOIs); final MutableInt numCollected = new MutableInt(0); mf.setCollector(new Collector() { @Override public void collect(Object input) throws HiveException { numCollected.addValue(1); } }); Assert.assertTrue(mf.rankInit == RankInitScheme.random); float[][] rating = { {5, 3, 0, 1}, {4, 0, 0, 1}, {1, 1, 0, 5}, {1, 0, 0, 4}, {0, 1, 5, 4}}; Object[] args = new Object[3]; final int num_iters = 500; int trainingExamples = 0; for (int iter = 0; iter < num_iters; iter++) { for (int row = 0; row < rating.length; row++) { for (int col = 0, size = rating[row].length; col < size; col++) { args[0] = row; args[1] = col; args[2] = (float) rating[row][col]; mf.process(args); trainingExamples++; } } } File tmpFile = mf.fileIO.getFile(); mf.close(); Assert.assertEquals(trainingExamples * iters, mf.count); Assert.assertEquals(5, numCollected.intValue()); Assert.assertFalse(tmpFile.exists()); } }