/**
* Copyright 2007-2014
* Ubiquitous Knowledge Processing (UKP) Lab
* Technische Universität Darmstadt
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see http://www.gnu.org/licenses/.
*/
package de.tudarmstadt.ukp.dkpro.core.arktools;
import static org.apache.uima.fit.factory.AnalysisEngineFactory.createEngine;
import static org.apache.uima.fit.util.JCasUtil.select;
import static de.tudarmstadt.ukp.dkpro.core.testing.AssertAnnotations.*;
import org.apache.uima.analysis_engine.AnalysisEngine;
import org.apache.uima.jcas.JCas;
import org.junit.Rule;
import org.junit.Test;
import de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.pos.POS;
import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token;
import de.tudarmstadt.ukp.dkpro.core.testing.DkproTestContext;
public class ArktweetTaggerTest
{
@Test
public void arktweetTaggerTest()
throws Exception
{
runTest("en",
"@Gunservatively obozo will go nuts when PA elects a Republican Governor next Tue. Can you say redistricting?",
new String[] { "@Gunservatively", "obozo", "will", "go", "nuts", "when", "PA",
"elects", "a", "Republican", "Governor", "next", "Tue", ".", "Can", "you",
"say", "redistricting", "?" },
new String[] { "@", "^", "V", "V", "A", "R", "^", "V", "D", "A", "N", "P", "^", ",",
"V", "O", "V", "V", "," },
new String[] { "AT", "PROPN", "VERB", "VERB", "ADJ", "ADV", "PROPN", "VERB", "DET",
"ADJ", "NOUN", "ADP", "PROPN", "PUNCT", "VERB", "PRON", "VERB", "VERB",
"PUNCT" }
);
runTest("en",
"Spending the day withhh mommma !",
new String[] { "Spending", "the", "day", "withhh", "mommma", "!" },
new String[] { "V", "D", "N", "P", "N", "," },
new String[] { "VERB", "DET", "NOUN", "ADP", "NOUN", "PUNCT" }
);
runTest("en",
"lmao ... s/o to the cool ass asian officer 4 #1 not runnin my license and #2 not takin dru boo to jail . Thank u God . #amen",
new String[] { "lmao", "...", "s/o", "to", "the", "cool", "ass", "asian", "officer",
"4", "#1", "not", "runnin", "my", "license", "and", "#2", "not", "takin",
"dru", "boo", "to", "jail", ".", "Thank", "u", "God", ".", "#amen" },
new String[] { "!", ",", "V", "P", "D", "A", "N", "A", "N", "P", "$", "R", "V", "D",
"N", "&", "$", "R", "V", "N", "N", "P", "N", ",", "V", "O", "^", ",", "#" },
new String[] { "INT", "PUNCT", "VERB", "ADP", "DET", "ADJ", "NOUN", "ADJ", "NOUN",
"ADP", "NUM", "ADV", "VERB", "DET", "NOUN", "CONJ", "NUM", "ADV", "VERB",
"NOUN", "NOUN", "ADP", "NOUN", "PUNCT", "VERB", "PRON", "PROPN", "PUNCT",
"HASH" }
);
runTest("en",
"Different smiley styles :) :-) (^_^) ^o #smiley",
new String[] { "Different", "smiley", "styles", ":)", ":-)", "(^_^)", "^o",
"#smiley" },
new String[] { "A", "A", "N", "E", "E", "E", "E", "#" },
new String[] { "ADJ", "ADJ", "NOUN", "EMO", "EMO", "EMO", "EMO", "HASH" }
);
}
// // Test for issue 335
// @Test
// public void bugTest() throws Exception {
// runTest("en",
// "company's mo",
// new String[] { "company's", "mo" },
// new String[] { "S", "N", },
// new String[] { "NN", "NN" }
// );
// }
private JCas runTest(String language, String testDocument, String[] tokens, String[] tags,
String[] tagClasses)
throws Exception
{
AnalysisEngine tokenizer = createEngine(
ArktweetTokenizer.class
);
AnalysisEngine tagger = createEngine(
ArktweetPosTagger.class,
ArktweetPosTagger.PARAM_VARIANT, "default"
);
JCas aJCas = tagger.newJCas();
aJCas.setDocumentLanguage(language);
aJCas.setDocumentText(testDocument);
tokenizer.process(aJCas);
tagger.process(aJCas);
assertToken(tokens, select(aJCas, Token.class));
assertPOS(tagClasses, tags, select(aJCas, POS.class));
return aJCas;
}
@Rule
public DkproTestContext testContext = new DkproTestContext();
}