/* * Copyright 2016 * Ubiquitous Knowledge Processing (UKP) Lab * Technische Universität Darmstadt * <p> * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * <p> * http://www.apache.org/licenses/LICENSE-2.0 * <p> * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.dkpro.core.api.embeddings.text; import org.dkpro.core.api.embeddings.Vectorizer; import org.junit.Test; import java.io.File; import java.io.IOException; import java.util.Arrays; import static junit.framework.TestCase.assertFalse; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertTrue; public class TextFormatVectorizerTest { @Test public void testVectorizer() throws Exception { File modelFile = new File("src/test/resources/dummy.vec"); Vectorizer vectorizer = TextFormatVectorizer.load(modelFile); int expectedSize = 699; int expectedDimensions = 50; float[] expectedVectorHer = new float[] { -0.003060f, 0.003507f, -0.008743f, -0.002152f, -0.004767f, -0.007613f, 0.004302f, 0.002171f, -0.002029f, 0.001279f, 0.002584f, 0.002896f, 0.006834f, 0.000398f, 0.005685f, -0.006861f, -0.005104f, -0.006102f, 0.001795f, -0.005347f, 0.006562f, -0.009437f, -0.005975f, -0.007835f, 0.000151f, 0.008032f, -0.004748f, 0.006110f, -0.008335f, -0.005110f, -0.004147f, 0.005215f, -0.009278f, -0.008693f, -0.004793f, -0.006631f, 0.005200f, 0.003343f, -0.002542f, 0.006161f, 0.009828f, -0.001308f, 0.004804f, 0.001710f, 0.005781f, 0.002312f, -0.002556f, 0.007643f, 0.003270f, -0.000747f }; float[] expectedVectorPartiality = new float[] { 0.003056f, -0.004063f, 0.008095f, 0.008563f, -0.004409f, -0.000555f, 0.002892f, -0.003428f, -0.009526f, 0.005398f, 0.005198f, 0.000784f, 0.000739f, -0.002909f, -0.000911f, 0.001754f, 0.000432f, -0.000036f, 0.008569f, 0.009337f, -0.005302f, 0.002052f, -0.002820f, 0.002569f, 0.001306f, 0.008049f, 0.007594f, -0.001033f, 0.005302f, 0.003549f, 0.009340f, -0.007415f, -0.007822f, 0.003608f, 0.000588f, -0.005675f, 0.001786f, -0.004505f, -0.009239f, -0.009723f, -0.004875f, -0.000646f, -0.005204f, 0.004283f, 0.009239f, 0.002467f, -0.003054f, 0.009439f, -0.008374f, -0.007085f }; assertEquals(expectedSize, vectorizer.size()); assertEquals(expectedDimensions, vectorizer.dimensions()); assertFalse(vectorizer.isCaseless()); assertTrue(Arrays.equals(expectedVectorHer, vectorizer.vectorize("Her"))); assertTrue(Arrays.equals(expectedVectorPartiality, vectorizer.vectorize("partiality"))); } @Test public void testCaseless() throws IOException { File modelFile = new File("src/test/resources/dummy_lowercased.vec"); Vectorizer vectorizer = TextFormatVectorizer.load(modelFile); int expectedSize = 575; int expectedDimensions = 50; float[] expectedVectorExtensive = new float[] { 0.006224f, -0.001446f, -0.006190f, -0.006054f, 0.000934f, 0.007808f, -0.008502f, 0.004742f, -0.008128f, 0.003936f, 0.009614f, 0.009580f, -0.008128f, 0.008639f, -0.006202f, -0.002507f, -0.009479f, -0.007713f, 0.006366f, 0.005287f, 0.008215f, 0.001309f, 0.006467f, -0.009070f, -0.003769f, -0.000971f, 0.006644f, 0.002931f, 0.009900f, -0.009535f, -0.009741f, 0.007459f, 0.002521f, -0.008924f, -0.001111f, -0.009039f, 0.001334f, 0.007053f, 0.006536f, 0.000227f, -0.006283f, 0.000452f, 0.008366f, -0.005902f, -0.008318f, -0.003674f, 0.005740f, 0.001463f, -0.004165f, -0.009005f }; float[] expectedVectorPartiality = new float[] { 0.003056f, -0.004063f, 0.008095f, 0.008563f, -0.004409f, -0.000555f, 0.002892f, -0.003428f, -0.009526f, 0.005398f, 0.005198f, 0.000784f, 0.000739f, -0.002909f, -0.000911f, 0.001754f, 0.000432f, -0.000036f, 0.008569f, 0.009337f, -0.005302f, 0.002052f, -0.002820f, 0.002569f, 0.001306f, 0.008049f, 0.007594f, -0.001033f, 0.005302f, 0.003549f, 0.009340f, -0.007415f, -0.007822f, 0.003608f, 0.000588f, -0.005675f, 0.001786f, -0.004505f, -0.009239f, -0.009723f, -0.004875f, -0.000646f, -0.005204f, 0.004283f, 0.009239f, 0.002467f, -0.003054f, 0.009439f, -0.008374f, -0.007085f }; assertEquals(expectedSize, vectorizer.size()); assertEquals(expectedDimensions, vectorizer.dimensions()); assertTrue(vectorizer.isCaseless()); assertTrue(Arrays.equals(expectedVectorExtensive, vectorizer.vectorize("extensive"))); assertTrue(Arrays.equals(expectedVectorPartiality, vectorizer.vectorize("partiality"))); } }