/*
* Copyright 2014
* Ubiquitous Knowledge Processing (UKP) Lab
* Technische Universität Darmstadt
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package de.tudarmstadt.ukp.dkpro.core.textnormalizer.transformation;
import static org.apache.uima.fit.factory.AnalysisEngineFactory.createEngineDescription;
import static org.apache.uima.fit.factory.CollectionReaderFactory.createReaderDescription;
import static org.apache.uima.fit.util.JCasUtil.select;
import static org.junit.Assert.assertEquals;
import java.io.IOException;
import org.apache.uima.UIMAException;
import org.apache.uima.analysis_engine.AnalysisEngineDescription;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
import org.apache.uima.collection.CollectionReaderDescription;
import org.apache.uima.fit.component.JCasAnnotator_ImplBase;
import org.apache.uima.fit.pipeline.SimplePipeline;
import org.apache.uima.jcas.JCas;
import org.junit.Test;
import de.tudarmstadt.ukp.dkpro.core.api.metadata.type.DocumentMetaData;
import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence;
import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token;
import de.tudarmstadt.ukp.dkpro.core.api.transform.JCasTransformer_ImplBase;
import de.tudarmstadt.ukp.dkpro.core.io.text.StringReader;
import de.tudarmstadt.ukp.dkpro.core.textnormalizer.util.JCasHolder;
public class JCasTransformer_ImplBaseTest
{
@Test
public void testCopyNoAnnotations()
throws UIMAException, IOException
{
String inputText = "test";
int exptectedTokens = 0;
int expectedDocumentMetadata = 1;
int expectedSentences = 0;
CollectionReaderDescription reader = createReaderDescription(StringReader.class,
StringReader.PARAM_DOCUMENT_TEXT, inputText,
StringReader.PARAM_LANGUAGE, "en");
AnalysisEngineDescription annotator = createEngineDescription(TestAnnotator.class);
AnalysisEngineDescription transformer = createEngineDescription(TestTransformer.class);
AnalysisEngineDescription holder = createEngineDescription(JCasHolder.class);
SimplePipeline.runPipeline(reader, annotator, transformer, holder);
JCas jcas = JCasHolder.get();
assertEquals(exptectedTokens, select(jcas, Token.class).size());
assertEquals(expectedSentences, select(jcas, Sentence.class).size());
assertEquals(expectedDocumentMetadata, select(jcas, DocumentMetaData.class).size());
}
@Test
public void testAllTypesToCopy()
throws UIMAException, IOException
{
String inputText = "test";
int expectedTokens = 2;
int expectedDocumentMetadata = 1;
int expectedSentences = 1;
String[] typesToCopy = new String[] { Token.class.getName(), Sentence.class.getName() };
CollectionReaderDescription reader = createReaderDescription(StringReader.class,
StringReader.PARAM_DOCUMENT_TEXT, inputText,
StringReader.PARAM_LANGUAGE, "en");
AnalysisEngineDescription annotator = createEngineDescription(TestAnnotator.class);
AnalysisEngineDescription transformer = createEngineDescription(TestTransformer.class,
TestTransformer.PARAM_TYPES_TO_COPY, typesToCopy);
AnalysisEngineDescription holder = createEngineDescription(JCasHolder.class);
SimplePipeline.runPipeline(reader, annotator, transformer, holder);
JCas jcas = JCasHolder.get();
assertEquals(expectedTokens, select(jcas, Token.class).size());
assertEquals(expectedSentences, select(jcas, Sentence.class).size());
assertEquals(expectedDocumentMetadata, select(jcas, DocumentMetaData.class).size());
}
@Test
public void testOneTypeToCopy()
throws UIMAException, IOException
{
String inputText = "test";
int expectedTokens = 2;
int expectedDocumentMetadata = 1;
int expectedSentences = 0;
String[] typesToCopy = new String[] { Token.class.getName() };
CollectionReaderDescription reader = createReaderDescription(StringReader.class,
StringReader.PARAM_DOCUMENT_TEXT, inputText,
StringReader.PARAM_LANGUAGE, "en");
AnalysisEngineDescription annotator = createEngineDescription(TestAnnotator.class);
AnalysisEngineDescription transformer = createEngineDescription(TestTransformer.class,
TestTransformer.PARAM_TYPES_TO_COPY, typesToCopy);
AnalysisEngineDescription holder = createEngineDescription(JCasHolder.class);
SimplePipeline.runPipeline(reader, annotator, transformer, holder);
JCas jcas = JCasHolder.get();
assertEquals(expectedTokens, select(jcas, Token.class).size());
assertEquals(expectedSentences, select(jcas, Sentence.class).size());
assertEquals(expectedDocumentMetadata, select(jcas, DocumentMetaData.class).size());
}
public static class TestAnnotator
extends JCasAnnotator_ImplBase
{
@Override
public void process(JCas aJCas)
throws AnalysisEngineProcessException
{
Token token1 = new Token(aJCas);
token1.setBegin(0);
token1.setEnd(1);
token1.addToIndexes(aJCas);
Token token2 = new Token(aJCas);
token2.setBegin(2);
token2.setEnd(3);
token2.addToIndexes(aJCas);
Sentence sentence = new Sentence(aJCas);
sentence.setBegin(0);
sentence.setEnd(3);
sentence.addToIndexes(aJCas);
}
}
public static class TestTransformer
extends JCasTransformer_ImplBase
{
@Override
public void process(JCas aInput, JCas aOutput)
throws AnalysisEngineProcessException
{
// Just copy the text. DocumentMetaData has already been copied and the TYPES_TO_COPY
// will be copied when this method returns.
aOutput.setDocumentText(aInput.getDocumentText());
}
}
}