/* * Copyright 2011 * Ubiquitous Knowledge Processing (UKP) Lab * Technische Universität Darmstadt * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package de.tudarmstadt.ukp.dkpro.core.api.frequency.util; import static org.junit.Assert.*; import java.io.File; import java.util.Arrays; import java.util.List; import org.junit.Rule; import org.junit.Test; import org.junit.rules.TemporaryFolder; public class FrequencyDistributionTest { @Rule public TemporaryFolder folder = new TemporaryFolder(); @Test public void fdTest() { List<String> tokens = Arrays .asList("This is a first test that contains a first test example".split(" ")); FrequencyDistribution<String> fd = new FrequencyDistribution<String>(); fd.incAll(tokens); System.out.println(fd); assertEquals(11, fd.getN()); assertEquals(8, fd.getB()); assertEquals(0, fd.getCount("humpelgrumpf")); assertEquals(1, fd.getCount("This")); assertEquals(2, fd.getCount("test")); assertEquals(2, fd.getMaxFreq()); assertEquals("a", fd.getSampleWithMaxFreq()); List<String> top3 = fd.getMostFrequentSamples(3); assertEquals(3, top3.size()); assertTrue(top3.contains("first")); assertTrue(top3.contains("a")); assertTrue(top3.contains("test")); } /** * Bug in PCJ: see http://sourceforge.net/p/pcj/bugs/15/ */ @Test public void testMaxIntHash() { String badKey = "'s_'s_not_noticed"; assertEquals(Integer.MIN_VALUE, badKey.hashCode()); FrequencyDistribution<String> fd = new FrequencyDistribution<String>(); fd.inc(badKey); } @Test public void clearTest() { List<String> tokens = Arrays .asList("This is a first test that contains a first test example".split(" ")); FrequencyDistribution<String> fd = new FrequencyDistribution<String>(); fd.incAll(tokens); fd.clear(); assertEquals(0L, fd.getMaxFreq()); assertNull(fd.getSampleWithMaxFreq()); } @Test public void saveAndLoadFdTest() throws Exception { List<String> tokens = Arrays .asList("This is a first test that contains a first test example".split(" ")); FrequencyDistribution<String> fd = new FrequencyDistribution<String>(); fd.incAll(tokens); File outputFile = folder.newFile(); fd.save(outputFile); FrequencyDistribution<String> loadedFd = new FrequencyDistribution<String>(); loadedFd.load(outputFile); assertEquals(11, loadedFd.getN()); assertEquals(8, loadedFd.getB()); assertEquals(0, loadedFd.getCount("humpelgrumpf")); assertEquals(1, loadedFd.getCount("This")); assertEquals(2, loadedFd.getCount("test")); assertEquals("a", loadedFd.getSampleWithMaxFreq()); } @Test public void fdTest_specialToken() { FrequencyDistribution<String> fd = new FrequencyDistribution<String>(); fd.inc(", "); fd.inc(". "); fd.inc("."); fd.inc(","); fd.inc("\t"); fd.inc(",\t"); System.out.println(fd); assertEquals(6, fd.getN()); assertEquals(6, fd.getB()); assertEquals(0, fd.getCount("humpelgrumpf")); assertEquals(1, fd.getCount(", ")); assertEquals(1, fd.getCount(",")); assertEquals(1, fd.getCount(". ")); assertEquals(1, fd.getCount(".")); assertEquals(1, fd.getCount("\t")); assertEquals(1, fd.getCount(",\t")); } }