/*******************************************************************************
* Copyright 2016
* Ubiquitous Knowledge Processing (UKP) Lab
* Technische Universität Darmstadt
* <p>
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
* <p>
* http://www.apache.org/licenses/LICENSE-2.0
* <p>
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
******************************************************************************/
package de.tudarmstadt.ukp.dkpro.core.api.io.sequencegenerator;
import de.tudarmstadt.ukp.dkpro.core.api.featurepath.FeaturePathException;
import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.LexicalPhrase;
import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence;
import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token;
import org.apache.uima.UIMAException;
import org.apache.uima.jcas.JCas;
import org.junit.Assert;
import org.junit.Test;
import java.io.File;
import java.io.IOException;
import java.net.URL;
import java.util.List;
import static de.tudarmstadt.ukp.dkpro.core.api.io.sequencegenerator.StringSequenceGeneratorTest.*;
import static org.junit.Assert.assertEquals;
public class PhraseSequenceGeneratorTest
{
@Test
public void testTokenSequences()
throws Exception
{
String featurePath = Token.class.getName();
int expectedSize = 2;
String expectedFirstToken = "Token1";
String expectedLastToken = "Token2";
JCas jCas = jCasWithTokens();
PhraseSequenceGenerator sequenceGenerator = new PhraseSequenceGenerator.Builder()
.featurePath(featurePath)
.build();
LexicalPhrase[] sequence = sequenceGenerator.tokenSequences(jCas).get(0);
assertEquals(expectedSize, sequence.length);
assertEquals(expectedFirstToken, sequence[0].getText());
assertEquals(expectedLastToken, sequence[sequence.length - 1].getText());
}
@Test
public void testLemmaSequences()
throws IOException, FeaturePathException, UIMAException
{
String featurePath = Token.class.getName() + "/lemma/value";
int expectedSize = 2;
String expectedFirstToken = "lemma1";
String expectedLastToken = "lemma2";
JCas jCas = jCasWithLemmas();
PhraseSequenceGenerator sequenceGenerator = new PhraseSequenceGenerator.Builder()
.featurePath(featurePath)
.build();
LexicalPhrase[] sequence = sequenceGenerator.tokenSequences(jCas).get(0);
assertEquals(expectedSize, sequence.length);
assertEquals(expectedFirstToken, sequence[0].getText());
assertEquals(expectedLastToken, sequence[sequence.length - 1].getText());
}
@Test
public void testGenerateSequenceFeaturePathCovering()
throws FeaturePathException, UIMAException, IOException
{
String featurePath = Token.class.getName();
int expectedSize = 2;
String expectedFirstToken = "Token1";
String expectedLastToken = "Token2";
String covering = Sentence.class.getTypeName();
JCas jCas = jCasWithSentence();
PhraseSequenceGenerator sequenceGenerator = new PhraseSequenceGenerator.Builder()
.featurePath(featurePath)
.lowercase(false)
.coveringType(covering)
.build();
List<LexicalPhrase[]> sequences = sequenceGenerator.tokenSequences(jCas);
assertEquals(1, sequences.size());
LexicalPhrase[] sequence = sequences.get(0);
Assert.assertEquals(expectedSize, sequence.length);
Assert.assertEquals(expectedFirstToken, sequence[0].getText());
Assert.assertEquals(expectedLastToken, sequence[sequence.length - 1].getText());
}
@Test
public void testGenerateSequenceStopwordsURL()
throws FeaturePathException, UIMAException, IOException
{
int expectedSize = 2;
URL stopwordsFile = this.getClass().getResource("/stopwords.txt");
String expectedFirstToken = "";
JCas jCas = jCasWithTokens();
PhraseSequenceGenerator sequenceGenerator = new PhraseSequenceGenerator.Builder()
.stopwordsURL(stopwordsFile)
.lowercase(false)
.build();
List<LexicalPhrase[]> sequences = sequenceGenerator.tokenSequences(jCas);
assertEquals(1, sequences.size());
LexicalPhrase[] sequence = sequences.get(0);
Assert.assertEquals(expectedSize, sequence.length);
Assert.assertEquals(expectedFirstToken, sequence[0].getText());
}
@Test
public void testGenerateSequenceStopwordsFile()
throws FeaturePathException, UIMAException, IOException
{
int expectedSize = 2;
File stopwordsFile = new File("src/test/resources/stopwords.txt");
String expectedFirstToken = "";
JCas jCas = jCasWithTokens();
PhraseSequenceGenerator sequenceGenerator = new PhraseSequenceGenerator.Builder()
.stopwordsFile(stopwordsFile)
.lowercase(false)
.build();
List<LexicalPhrase[]> sequences = sequenceGenerator.tokenSequences(jCas);
assertEquals(1, sequences.size());
LexicalPhrase[] sequence = sequences.get(0);
Assert.assertEquals(expectedSize, sequence.length);
Assert.assertEquals(expectedFirstToken, sequence[0].getText());
}
@Test
public void testGenerateSequenceStopwordsFileString()
throws FeaturePathException, UIMAException, IOException
{
int expectedSize = 2;
String stopwordsFile = "src/test/resources/stopwords.txt";
String expectedFirstToken = "";
JCas jCas = jCasWithTokens();
PhraseSequenceGenerator sequenceGenerator = new PhraseSequenceGenerator.Builder()
.stopwordsFile(stopwordsFile)
.lowercase(false)
.build();
List<LexicalPhrase[]> sequences = sequenceGenerator.tokenSequences(jCas);
assertEquals(1, sequences.size());
LexicalPhrase[] sequence = sequences.get(0);
Assert.assertEquals(expectedSize, sequence.length);
Assert.assertEquals(expectedFirstToken, sequence[0].getText());
}
}