/* * Copyright 2013 * Ubiquitous Knowledge Processing (UKP) Lab * Technische Universität Darmstadt * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package de.tudarmstadt.ukp.dkpro.core.performance; import static org.apache.uima.fit.factory.AnalysisEngineFactory.createEngine; import static org.apache.uima.fit.factory.CollectionReaderFactory.createReader; import static org.apache.uima.fit.util.JCasUtil.getType; import java.io.IOException; import java.util.ArrayList; import java.util.Iterator; import java.util.List; import java.util.Random; import org.apache.commons.lang.RandomStringUtils; import org.apache.commons.math3.stat.descriptive.SummaryStatistics; import org.apache.uima.analysis_engine.AnalysisEngine; import org.apache.uima.analysis_engine.AnalysisEngineDescription; import org.apache.uima.analysis_engine.AnalysisEngineProcessException; import org.apache.uima.cas.CAS; import org.apache.uima.cas.Type; import org.apache.uima.collection.CollectionException; import org.apache.uima.collection.CollectionReader; import org.apache.uima.collection.CollectionReaderDescription; import org.apache.uima.jcas.JCas; import org.apache.uima.resource.ResourceConfigurationException; import org.apache.uima.resource.ResourceInitializationException; import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence; import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token; public final class PerformanceTestUtil { private PerformanceTestUtil() { // No instances } public static SummaryStatistics measureWritePerformance(AnalysisEngineDescription aWriterDesc, Iterable<JCas> aTestData) throws ResourceInitializationException, AnalysisEngineProcessException { AnalysisEngine writer = createEngine(aWriterDesc); SummaryStatistics stats = new SummaryStatistics(); for (JCas jcas : aTestData) { long begin = System.currentTimeMillis(); writer.process(jcas); stats.addValue(System.currentTimeMillis() - begin); } writer.collectionProcessComplete(); writer.destroy(); return stats; } public static SummaryStatistics measureReadPerformance( CollectionReaderDescription aReaderDesc, JCas aJCas, int aIterations) throws ResourceInitializationException, CollectionException, IOException, ResourceConfigurationException { CollectionReader reader = createReader(aReaderDesc); SummaryStatistics stats = new SummaryStatistics(); CAS cas = aJCas.getCas(); for (int i = 0; i < aIterations; i++) { long begin = System.currentTimeMillis(); reader.getNext(cas); stats.addValue(System.currentTimeMillis() - begin); reader.reconfigure(); cas.reset(); } reader.close(); reader.destroy(); return stats; } /** * Initializes a CAS with random text, tokens, and sentences. * * @param aJCas the CAS * @param aTextSize the length of the text to be generated. * @param aAnnotationCount the number of annotations to be generated. * @param aSeed the random seed to allow for repeatable randomness. */ public static void initRandomCas(JCas aJCas, int aTextSize, int aAnnotationCount, long aSeed) { List<Type> types = new ArrayList<Type>(); types.add(getType(aJCas, Token.class)); types.add(getType(aJCas, Sentence.class)); // Iterator<Type> i = aJCas.getTypeSystem().getTypeIterator(); // while (i.hasNext()) { // Type t = i.next(); // if (t.isArray() || t.isPrimitive()) { // continue; // } // if (aJCas.getDocumentAnnotationFs().getType().getName().equals(t.getName())) { // continue; // } // types.add(t); // } // Initialize randomizer Random rnd = new Random(aSeed); // Shuffle the types for (int n = 0; n < 10; n++) { Type t = types.remove(rnd.nextInt(types.size())); types.add(t); } // Generate random text aJCas.setDocumentText(RandomStringUtils.random(aTextSize)); // Generate random annotations CAS cas = aJCas.getCas(); for (int n = 0; n < aAnnotationCount; n++) { Type t = types.get(n % types.size()); int length = rnd.nextInt(30); int begin = rnd.nextInt(aTextSize); int end = begin + length; if (end > aTextSize) { n--; // Skip and extend loop by one continue; } cas.addFsToIndexes(cas.createAnnotation(t, begin, end)); } } public static <T> Iterable<T> repeat(final T aObject, final int aCount) { return new Iterable<T>() { @Override public Iterator<T> iterator() { return new Iterator<T>() { private int i = 0; @Override public boolean hasNext() { return i < aCount; } @Override public T next() { i++; return aObject; } @Override public void remove() { throw new UnsupportedOperationException(); } }; } }; } }