/******************************************************************************* * Copyright 2016 * Ubiquitous Knowledge Processing (UKP) Lab * Technische Universität Darmstadt * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. ******************************************************************************/ package de.tudarmstadt.ukp.uby.uima.annotator.test; import static org.apache.uima.fit.factory.AnalysisEngineFactory.createEngine; import static org.apache.uima.fit.factory.AnalysisEngineFactory.createEngineDescription; import static org.apache.uima.fit.factory.ExternalResourceFactory.createExternalResourceDescription; import static org.apache.uima.fit.util.JCasUtil.select; import java.io.File; import java.io.FileNotFoundException; import org.apache.uima.UIMAException; import org.apache.uima.analysis_engine.AnalysisEngine; import org.apache.uima.analysis_engine.AnalysisEngineDescription; import org.apache.uima.fit.testing.factory.TokenBuilder; import org.apache.uima.fit.util.JCasUtil; import org.apache.uima.jcas.JCas; import org.dom4j.DocumentException; import org.junit.Ignore; import org.junit.Test; import de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.pos.ADJ; import de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.pos.NN; import de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.pos.POS; import de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.pos.V; import de.tudarmstadt.ukp.dkpro.core.api.resources.ResourceObjectProviderBase; import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Lemma; import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence; import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token; import de.tudarmstadt.ukp.dkpro.core.api.semantics.type.SemanticField; import de.tudarmstadt.ukp.dkpro.core.testing.AssertAnnotations; import de.tudarmstadt.ukp.lmf.transform.DBConfig; import de.tudarmstadt.ukp.lmf.transform.LMFDBUtils; import de.tudarmstadt.ukp.lmf.transform.XMLToDBTransformer; import de.tudarmstadt.ukp.uby.resource.UbyResource; import de.tudarmstadt.ukp.uby.resource.UbySemanticFieldResource; import de.tudarmstadt.ukp.uby.uima.annotator.UbySemanticFieldAnnotator; public class UbySemanticFieldAnnotatorTest { @Test public void testUbySemanticFieldAnnotatorOnInMemDb() throws Exception { String[] semanticFields = new String[] { "communication", "communication", "UNKNOWN", "communication", "UNKNOWN" }; runAnnotatorTestOnInMemDb("en", "Answers question most questions .", new String[] { "answer", "question", "most", "question", "." }, new String[] { "NN", "V", "NOT_RELEVANT", "NN", "$." }, semanticFields); // botnet: Wiktionary domain=question (just for the test case) -> question: WordNet semantic field = communication runAnnotatorTestOnInMemDb("en", "Botnets question most questions .", new String[] { "botnet", "question", "most", "question", "." }, new String[] { "NN", "V", "NOT_RELEVANT", "NN", "$." }, semanticFields); } @Ignore @Test public void testUbySemanticFieldAnnotatorOnMySqlDb() throws Exception { String[] lemmas = new String[] { "vanilla", "in", "the", "blue", "sky", "prefer", "braveness", "over", "jumpiness", "." }; String[] pos = new String[] { "NN", "NOT_RELEVANT", "NOT_RELEVANT", "ADJ", "NN", "V", "NN", "NOT_RELEVANT", "NN", "$." }; String[] semanticFields = new String[] { "plant", "UNKNOWN", "UNKNOWN", "all" , "object" , "emotion" , "attribute" , "UNKNOWN" , "feeling" , "UNKNOWN" }; runAnnotatorTestOnMySqlDb("en", "Vanilla in the blue sky prefers braveness over jumpiness .", lemmas, pos, semanticFields); String[] lemmas2 = new String[] { "vanilla", "in", "the", "distantGalaxyBehindJupiter", "prefer", "braveness", "over", "jumpiness", "." }; String[] pos2 = new String[] { "NN", "NOT_RELEVANT", "NOT_RELEVANT", "NN", "V", "NN", "NOT_RELEVANT", "NN", "$." }; String[] semanticFields2 = new String[] { "plant", "UNKNOWN", "UNKNOWN", "UNKNOWN" , "emotion" , "attribute" , "UNKNOWN" , "feeling" , "UNKNOWN" }; runAnnotatorTestOnMySqlDb("en", "Vanilla in the distantGalaxyBehindJupiter prefers braveness over jumpiness .", lemmas2, pos2, semanticFields2); } /** * This is the test case that uses an embedded DB * use of in-memory DB is commented out * */ private void runAnnotatorTestOnInMemDb(String language, String testDocument, String[] documentLemmas, String[] documentPosTags, String[] documentUbySemanticFields) throws UIMAException, FileNotFoundException, DocumentException, IllegalArgumentException { DBConfig dbConfig = new DBConfig("jdbc:h2:mem:test;DB_CLOSE_DELAY=-1","org.h2.Driver","h2","root","pass",false); LMFDBUtils.createTables(dbConfig); XMLToDBTransformer transformer; transformer = new XMLToDBTransformer(dbConfig); transformer.transform(new File("src/test/resources/UbyTestLexicon.xml"),"UbyTest"); AnalysisEngineDescription processor = createEngineDescription( createEngineDescription(UbySemanticFieldAnnotator.class, UbySemanticFieldAnnotator.PARAM_UBY_SEMANTIC_FIELD_RESOURCE, createExternalResourceDescription(UbySemanticFieldResource.class, UbySemanticFieldResource.RES_UBY, createExternalResourceDescription(UbyResource.class, UbyResource.PARAM_MODEL_LOCATION, ResourceObjectProviderBase.NOT_REQUIRED, UbyResource.PARAM_URL, "jdbc:h2:mem:test;DB_CLOSE_DELAY=-1", UbyResource.PARAM_DRIVER, "org.h2.Driver", UbyResource.PARAM_DIALECT, "h2", UbyResource.PARAM_USERNAME, "root", UbyResource.PARAM_PASSWORD, "pass" ))) ); AnalysisEngine engine = createEngine(processor); JCas aJCas = engine.newJCas(); aJCas.setDocumentLanguage(language); TokenBuilder<Token, Sentence> tb = new TokenBuilder<Token, Sentence>(Token.class, Sentence.class); tb.buildTokens(aJCas, testDocument); int offset = 0; for (Token token : JCasUtil.select(aJCas, Token.class)) { if (documentPosTags[offset].matches("NN")) { NN nn = new NN(aJCas, token.getBegin(), token.getEnd()); nn.setPosValue(documentPosTags[offset]); nn.addToIndexes(); token.setPos(nn); } else if (documentPosTags[offset].matches("V")) { V v = new V(aJCas, token.getBegin(), token.getEnd()); v.setPosValue(documentPosTags[offset]); v.addToIndexes(); token.setPos(v); } else if (documentPosTags[offset].matches("ADJ")) { ADJ adj = new ADJ(aJCas, token.getBegin(), token.getEnd()); adj.setPosValue(documentPosTags[offset]); adj.addToIndexes(); token.setPos(adj); } else { POS pos = new POS(aJCas, token.getBegin(), token.getEnd()); pos.setPosValue(documentPosTags[offset]); pos.addToIndexes(); token.setPos(pos); } Lemma lemma = new Lemma(aJCas, token.getBegin(), token.getEnd()); lemma.setValue(documentLemmas[offset]); lemma.addToIndexes(); token.setLemma(lemma); offset++; } engine.process(aJCas); AssertAnnotations.assertSemanticField(documentUbySemanticFields, select(aJCas, SemanticField.class)); } private void runAnnotatorTestOnMySqlDb(String language, String testDocument, String[] documentLemmas, String[] documentPosTags, String[] documentUbySemanticFields) throws UIMAException { AnalysisEngineDescription processor = createEngineDescription( createEngineDescription( UbySemanticFieldAnnotator.class, UbySemanticFieldAnnotator.PARAM_UBY_SEMANTIC_FIELD_RESOURCE, createExternalResourceDescription(UbySemanticFieldResource.class, UbySemanticFieldResource.RES_UBY, createExternalResourceDescription(UbyResource.class, UbyResource.PARAM_MODEL_LOCATION, ResourceObjectProviderBase.NOT_REQUIRED, UbyResource.PARAM_URL, "localhost/uby_lite_0_6_0", UbyResource.PARAM_DRIVER, "com.mysql.jdbc.Driver", UbyResource.PARAM_DIALECT, "mysql", UbyResource.PARAM_USERNAME, "root", UbyResource.PARAM_PASSWORD, "pass")))); AnalysisEngine engine = createEngine(processor); JCas aJCas = engine.newJCas(); aJCas.setDocumentLanguage(language); TokenBuilder<Token, Sentence> tb = new TokenBuilder<Token, Sentence>(Token.class, Sentence.class); tb.buildTokens(aJCas, testDocument); int offset = 0; for (Token token : JCasUtil.select(aJCas, Token.class)) { if (documentPosTags[offset].matches("NN")) { NN nn = new NN(aJCas, token.getBegin(), token.getEnd()); nn.setPosValue(documentPosTags[offset]); nn.addToIndexes(); token.setPos(nn); } else if (documentPosTags[offset].matches("V")) { V v = new V(aJCas, token.getBegin(), token.getEnd()); v.setPosValue(documentPosTags[offset]); v.addToIndexes(); token.setPos(v); } else if (documentPosTags[offset].matches("ADJ")) { ADJ adj = new ADJ(aJCas, token.getBegin(), token.getEnd()); adj.setPosValue(documentPosTags[offset]); adj.addToIndexes(); token.setPos(adj); } else { POS pos = new POS(aJCas, token.getBegin(), token.getEnd()); pos.setPosValue(documentPosTags[offset]); pos.addToIndexes(); token.setPos(pos); } Lemma lemma = new Lemma(aJCas, token.getBegin(), token.getEnd()); lemma.setValue(documentLemmas[offset]); lemma.addToIndexes(); token.setLemma(lemma); offset++; } engine.process(aJCas); AssertAnnotations.assertSemanticField(documentUbySemanticFields, select(aJCas, SemanticField.class)); } }