package quickml.supervised.classifier.logRegression; import com.beust.jcommander.internal.Lists; import it.unimi.dsi.fastutil.ints.Int2DoubleOpenHashMap; import org.junit.Assert; import org.junit.Test; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import quickml.MathUtils; import quickml.data.AttributesMap; import quickml.supervised.classifier.logisticRegression.InstanceTransformerUtils; import quickml.supervised.classifier.logisticRegression.SparseSGD; import quickml.supervised.classifier.logisticRegression.SparseClassifierInstance; import java.util.*; import java.util.concurrent.ExecutionException; import java.util.concurrent.Future; import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeoutException; import static org.junit.Assert.*; /** * Created by chrisreeves on 10/13/15. */ public class SparseSGDTest { private static final Logger logger = LoggerFactory.getLogger(SparseSGDTest.class); @Test public void testMinimizeNoRegularization() throws Exception { List<SparseClassifierInstance> instances = new ArrayList<>(); AttributesMap attributesMap = new AttributesMap(); attributesMap.put("feature1", 1.0); Map<String, Integer> nameToValueMap = new HashMap<>(); nameToValueMap.put(InstanceTransformerUtils.BIAS_TERM, 0); nameToValueMap.put("feature1", 1); instances.add(new SparseClassifierInstance(attributesMap, 1.0, nameToValueMap)); instances.add(new SparseClassifierInstance(attributesMap, 0.0, nameToValueMap)); instances.add(new SparseClassifierInstance(attributesMap, 0.0, nameToValueMap)); instances.add(new SparseClassifierInstance(attributesMap, 0.0, nameToValueMap)); SparseSGD sgd = new SparseSGD() .maxEpochs(2000) .minEpochs(800) .costConvergenceThreshold(0.001) .weightConvergenceThreshold(0.00001) .learningRate(0.05) .minibatchSize(4) .useBoldDriver(false); double[] result = sgd.minimize(instances, 2); Assert.assertEquals(-1.098612, result[0]+result[1], 1E-3); //TODO: verify results int j= 0; for(double value : result) { logger.info("value at index {}, {}",j, value); j++; } } @Test public void testMinimizeVarWithRidge() throws Exception { List<SparseClassifierInstance> instances = new ArrayList<>(); AttributesMap attributesMap = new AttributesMap(); attributesMap.put("feature1", 1.0); Map<String, Integer> nameToValueMap = new HashMap<>(); nameToValueMap.put(InstanceTransformerUtils.BIAS_TERM, 0); nameToValueMap.put("feature1", 1); instances.add(new SparseClassifierInstance(attributesMap, 1.0, nameToValueMap)); instances.add(new SparseClassifierInstance(attributesMap, 0.0, nameToValueMap)); instances.add(new SparseClassifierInstance(attributesMap, 0.0, nameToValueMap)); instances.add(new SparseClassifierInstance(attributesMap, 0.0, nameToValueMap)); SparseSGD sgd = new SparseSGD() .maxEpochs(4000) .minEpochs(300) .costConvergenceThreshold(0.001) .weightConvergenceThreshold(0.00001) .learningRate(0.05) .minibatchSize(4) .useBoldDriver(false) .ridgeRegularizationConstant(1) .sparseParallelization(true); double[] result = sgd.minimize(instances, 2); double derivativeOfCostFunction = 4 * MathUtils.sigmoid(result[0] + result[1]) - 1 + result[1]; Assert.assertEquals(0.0, derivativeOfCostFunction, 1E-3); //TODO: verify results int j= 0; for(double value : result) { logger.info("value at index {}, {}",j, value); j++; } } @Test public void testIsConverged() throws Exception { double weights[] = new double[3]; double convergenceThreshold = 0.1; weights[0] = 1; weights[1] = 1; weights[2] = 1; double newWeights[] = Arrays.copyOf(weights, 3); assertTrue(SparseSGD.weightsConverged(weights, newWeights, convergenceThreshold)); newWeights[0] = weights[0]-convergenceThreshold; newWeights[1] = weights[1]-convergenceThreshold; newWeights[2] = weights[2]-convergenceThreshold; assertTrue(SparseSGD.weightsConverged(weights, newWeights, convergenceThreshold)); newWeights[0] = weights[0]-convergenceThreshold*2; newWeights[1] = weights[1]-convergenceThreshold*2; newWeights[2] = weights[2]-convergenceThreshold*2; assertFalse(SparseSGD.weightsConverged(weights, newWeights, convergenceThreshold)); } @Test public void testGetGradient() throws Exception { } @Test public void testGetCurrentMiniBatchSize(){ int totalNumInstances = 13; int miniBatchSize = 5; int finalMiniBatchStartIndex = miniBatchSize * 2; Assert.assertEquals(3, SparseSGD.getCurrentMiniBatchSize(miniBatchSize, totalNumInstances, finalMiniBatchStartIndex)); } @Test public void testGetThreadStartIndices() { int miniBatchStartIndex = 10; int minibatchSize = 3; int executorThreadCount = 2; int minInstancesForParrallization = 0; int[] startIndices = SparseSGD.getThreadStartIndices(miniBatchStartIndex, minibatchSize, executorThreadCount, minInstancesForParrallization); //3 instances in miniBatch, the last thread should be assigned 2 instances (at index 11, and 12) Assert.assertEquals(10, startIndices[0]); Assert.assertEquals(11, startIndices[1]); //test non parrellization minInstancesForParrallization = 100; startIndices = SparseSGD.getThreadStartIndices(miniBatchStartIndex, minibatchSize, executorThreadCount, minInstancesForParrallization); Assert.assertEquals(10, startIndices[0]); Assert.assertEquals(13, startIndices[1]); //test executors = minibatch size minInstancesForParrallization = 0; executorThreadCount = minibatchSize; startIndices = SparseSGD.getThreadStartIndices(miniBatchStartIndex, minibatchSize, executorThreadCount, minInstancesForParrallization); Assert.assertEquals(10, startIndices[0]); Assert.assertEquals(11, startIndices[1]); Assert.assertEquals(13, startIndices[3]); } @Test public void testApplyMaxGradientNorm() throws Exception { double[] gradient = new double[4]; gradient[0] = 0.5; gradient[1] = 0.75; gradient[2] = 0.5; gradient[3] = 0.25; SparseSGD.applyMaxGradientNorm(0.1, gradient); assertEquals(0.15, gradient[0], 0.01); } @Test public void testGetWorkerContributionToTheGradient(){ List<SparseClassifierInstance> instances = Lists.newArrayList(); HashMap<String, Integer> nameToIndexMap = new HashMap<>(); nameToIndexMap.put(InstanceTransformerUtils.BIAS_TERM, 0); nameToIndexMap.put("first", 1); nameToIndexMap.put("second", 2); AttributesMap attributes = AttributesMap.newHashMap(); attributes.put("first", 1.0); attributes.put("second", 1.0); instances.add(new SparseClassifierInstance(attributes, 1.0, nameToIndexMap)); double[] weights = new double[3]; weights[0] = 1.0; weights[1] = 1.0; weights[2] = 1.0; //expected gradient is -(1.0- 1/(1+e^-3))*1 =-0.04742587317 double expectedDerivative = -0.04742587317; double[] workerContributionToGradient = SparseSGD.getWorkerContributionToTheGradient(instances, weights); Assert.assertEquals(workerContributionToGradient[0], expectedDerivative, 1E-5); Assert.assertEquals(workerContributionToGradient[1], expectedDerivative, 1E-5); Assert.assertEquals(workerContributionToGradient[2], expectedDerivative, 1E-5); } @Test public void testGetWorkerContributionToTheGradient2(){ List<SparseClassifierInstance> instances = Lists.newArrayList(); HashMap<String, Integer> nameToIndexMap = new HashMap<>(); nameToIndexMap.put(InstanceTransformerUtils.BIAS_TERM, 0); nameToIndexMap.put("first", 1); nameToIndexMap.put("second", 2); AttributesMap attributes = AttributesMap.newHashMap(); attributes.put("first", 1.0); attributes.put("second", 0.5); instances.add(new SparseClassifierInstance(attributes, 1.0, nameToIndexMap)); double[] weights = new double[3]; weights[0] = 0.0; weights[1] = -1.0; weights[2] = 0.5; //expected gradient is -(1.0- 1/(1+e^0.75))*1 =-0.67917869917 double expectedDerivativePrefactor = -0.67917869917; double[] workerContributionToGradient = SparseSGD.getWorkerContributionToTheGradient(instances, weights); Assert.assertEquals(workerContributionToGradient[0], expectedDerivativePrefactor, 1E-5); Assert.assertEquals(workerContributionToGradient[1], expectedDerivativePrefactor, 1E-5); Assert.assertEquals(workerContributionToGradient[2], 0.5*expectedDerivativePrefactor, 1E-5); } @Test public void testSparseGetWorkerContributionToTheGradient(){ List<SparseClassifierInstance> instances = Lists.newArrayList(); HashMap<String, Integer> nameToIndexMap = new HashMap<>(); nameToIndexMap.put(InstanceTransformerUtils.BIAS_TERM, 0); nameToIndexMap.put("first", 1); nameToIndexMap.put("second", 2); AttributesMap attributes = AttributesMap.newHashMap(); attributes.put("first", 1.0); attributes.put("second", 1.0); instances.add(new SparseClassifierInstance(attributes, 1.0, nameToIndexMap)); double[] weights = new double[3]; weights[0] = 1.0; weights[1] = 1.0; weights[2] = 1.0; //expected gradient is -(1.0- 1/(1+e^-3))*1 =-0.04742587317 double expectedDerivative = -0.04742587317; Int2DoubleOpenHashMap workerContributionToGradient = SparseSGD.getSparseWorkerContributionToTheGradient(instances, weights, 0); Assert.assertEquals(workerContributionToGradient.get(0), expectedDerivative, 1E-5); Assert.assertEquals(workerContributionToGradient.get(1), expectedDerivative, 1E-5); Assert.assertEquals(workerContributionToGradient.get(2), expectedDerivative, 1E-5); } @Test public void testSparseGetWorkerContributionToTheGradient2(){ List<SparseClassifierInstance> instances = Lists.newArrayList(); HashMap<String, Integer> nameToIndexMap = new HashMap<>(); nameToIndexMap.put(InstanceTransformerUtils.BIAS_TERM, 0); nameToIndexMap.put("first", 1); nameToIndexMap.put("second", 2); AttributesMap attributes = AttributesMap.newHashMap(); attributes.put("first", 1.0); attributes.put("second", 0.5); instances.add(new SparseClassifierInstance(attributes, 1.0, nameToIndexMap)); double[] weights = new double[3]; weights[0] = 0.0; weights[1] = -1.0; weights[2] = 0.5; //expected gradient is -(1.0- 1/(1+e^0.75))*1 =-0.67917869917 double expectedDerivativePrefactor = -0.67917869917; Int2DoubleOpenHashMap workerContributionToGradient = SparseSGD.getSparseWorkerContributionToTheGradient(instances, weights, 0); Assert.assertEquals(workerContributionToGradient.get(0), expectedDerivativePrefactor, 1E-5); Assert.assertEquals(workerContributionToGradient.get(1), expectedDerivativePrefactor, 1E-5); Assert.assertEquals(workerContributionToGradient.get(2), 0.5*expectedDerivativePrefactor, 1E-5); } @Test public void testSparseReductionToTheGradient() { double[] gradient = new double[2]; List<Future<Int2DoubleOpenHashMap>> contributions = Lists.newArrayList(); Int2DoubleOpenHashMap int2DoubleOpenHashMap = new Int2DoubleOpenHashMap(); int2DoubleOpenHashMap.put(1, 1.0); contributions.add(new FakeFuture<Int2DoubleOpenHashMap>(int2DoubleOpenHashMap)); Int2DoubleOpenHashMap int2DoubleOpenHashMap2 = new Int2DoubleOpenHashMap(); int2DoubleOpenHashMap2.put(1, 0.75); int2DoubleOpenHashMap2.put(0, 0.5); contributions.add(new FakeFuture<Int2DoubleOpenHashMap>(int2DoubleOpenHashMap2)); SparseSGD.sparseReductionToTheGradient(gradient, contributions); Assert.assertEquals(gradient[0], 0.5, 1E-5); Assert.assertEquals(gradient[1], 1.75, 1E-5); } public static class FakeFuture<T> implements Future<T> { private T int2DoubleOpenHashMap; public FakeFuture(T int2DoubleOpenHashMap) { this.int2DoubleOpenHashMap = int2DoubleOpenHashMap; } @Override public boolean cancel(boolean mayInterruptIfRunning) { return false; } @Override public boolean isCancelled() { return false; } @Override public boolean isDone() { return true; } @Override public T get() throws InterruptedException, ExecutionException { return int2DoubleOpenHashMap; } @Override public T get(long timeout, TimeUnit unit) throws InterruptedException, ExecutionException, TimeoutException { return int2DoubleOpenHashMap; } } @Test public void testReductionToTheGradient() { double[] gradient = new double[2]; List<Future<double[]>> contributions = Lists.newArrayList(); double[] grad1 = new double[2]; grad1[1] = 1.0; contributions.add(new FakeFuture<double[]>(grad1)); double[] grad2 = new double[2]; grad2[1] = 0.75; grad2[0] = 0.5; contributions.add(new FakeFuture<double[]>(grad2)); SparseSGD.reductionToTheGradient(gradient, contributions); Assert.assertEquals(gradient[0], 0.5, 1E-5); Assert.assertEquals(gradient[1], 1.75, 1E-5); } }