/* * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.facebook.presto.ml; import com.google.common.collect.ImmutableMap; import com.google.common.collect.ImmutableSet; import org.testng.annotations.Test; import java.util.ArrayList; import java.util.HashSet; import java.util.List; import java.util.Set; import static com.facebook.presto.ml.TestUtils.getDataset; import static org.testng.Assert.assertEquals; import static org.testng.Assert.assertTrue; public class TestFeatureTransformations { @Test public void testUnitNormalizer() throws Exception { FeatureTransformation transformation = new FeatureUnitNormalizer(); Dataset dataset = getDataset(); boolean valueGreaterThanOne = false; for (FeatureVector vector : dataset.getDatapoints()) { for (double value : vector.getFeatures().values()) { if (value > 1) { valueGreaterThanOne = true; break; } } } // Make sure there is a feature that needs to be normalized assertTrue(valueGreaterThanOne); transformation.train(dataset); for (FeatureVector vector : transformation.transform(dataset).getDatapoints()) { for (double value : vector.getFeatures().values()) { assertTrue(value <= 1); } } } @Test public void testUnitNormalizerSimple() throws Exception { FeatureTransformation transformation = new FeatureUnitNormalizer(); List<Double> labels = new ArrayList<>(); List<FeatureVector> features = new ArrayList<>(); for (int i = 0; i < 3; i++) { labels.add(0.0); features.add(new FeatureVector(0, (double) i)); } Dataset dataset = new Dataset(labels, features, ImmutableMap.of()); transformation.train(dataset); Set<Double> featureValues = new HashSet<>(); for (FeatureVector vector : transformation.transform(dataset).getDatapoints()) { for (double value : vector.getFeatures().values()) { featureValues.add(value); } } assertEquals(featureValues, ImmutableSet.of(0.0, 0.5, 1.0)); } }