/**
* Copyright 2007-2017
* Ubiquitous Knowledge Processing (UKP) Lab
* Technische Universität Darmstadt
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package de.tudarmstadt.ukp.dkpro.core.flextag;
import java.io.File;
import java.io.IOException;
import java.net.URL;
import java.util.ArrayList;
import java.util.List;
import org.apache.uima.UimaContext;
import org.apache.uima.analysis_engine.AnalysisEngine;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
import org.apache.uima.cas.Type;
import org.apache.uima.fit.component.JCasAnnotator_ImplBase;
import org.apache.uima.fit.descriptor.ConfigurationParameter;
import org.apache.uima.fit.factory.AnalysisEngineFactory;
import org.apache.uima.fit.util.JCasUtil;
import org.apache.uima.jcas.JCas;
import org.apache.uima.resource.ResourceInitializationException;
import org.dkpro.tc.api.type.TextClassificationOutcome;
import org.dkpro.tc.ml.uima.TcAnnotator;
import de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.pos.POS;
import de.tudarmstadt.ukp.dkpro.core.api.parameter.ComponentParameters;
import de.tudarmstadt.ukp.dkpro.core.api.resources.MappingProvider;
import de.tudarmstadt.ukp.dkpro.core.api.resources.MappingProviderFactory;
import de.tudarmstadt.ukp.dkpro.core.api.resources.ModelProviderBase;
import de.tudarmstadt.ukp.dkpro.core.api.resources.ResourceUtils;
import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence;
import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token;
/**
* Flexible part-of-speech tagger.
*/
public class FlexTagPosTagger
extends JCasAnnotator_ImplBase
{
public static final String PARAM_MODEL_LOCATION = ComponentParameters.PARAM_MODEL_LOCATION;
@ConfigurationParameter(name = PARAM_MODEL_LOCATION, mandatory = false)
private String modelLocation;
public static final String PARAM_LANGUAGE = ComponentParameters.PARAM_LANGUAGE;
@ConfigurationParameter(name = PARAM_LANGUAGE, mandatory = false)
private String language;
public static final String PARAM_VARIANT = ComponentParameters.PARAM_VARIANT;
@ConfigurationParameter(name = PARAM_VARIANT, mandatory = false)
private String variant;
public static final String PARAM_POS_MAPPING_LOCATION = ComponentParameters.PARAM_POS_MAPPING_LOCATION;
@ConfigurationParameter(name = PARAM_POS_MAPPING_LOCATION, mandatory = false)
private String posMappingLocation;
private AnalysisEngine flexTagEngine = null;
private ModelProviderBase<File> modelProvider = null;
private MappingProvider mappingProvider=null;
@Override
public void initialize(final UimaContext context)
throws ResourceInitializationException
{
super.initialize(context);
initModelProvider();
mappingProvider = MappingProviderFactory.createPosMappingProvider(posMappingLocation,
language, modelProvider);
flexTagEngine = AnalysisEngineFactory.createEngine(TcAnnotator.class,
TcAnnotator.PARAM_TC_MODEL_LOCATION, modelProvider.getResource(),
TcAnnotator.PARAM_NAME_SEQUENCE_ANNOTATION, Sentence.class.getName(),
TcAnnotator.PARAM_NAME_UNIT_ANNOTATION, Token.class.getName());
}
@Override
public void process(JCas aJCas)
throws AnalysisEngineProcessException
{
mappingProvider.configure(aJCas.getCas());
flexTagEngine.process(aJCas);
annotateTaggingResultsLinkToTokens(aJCas);
}
private void annotateTaggingResultsLinkToTokens(JCas aJCas)
{
List<Token> tokens = getTokens(aJCas);
List<TextClassificationOutcome> outcomes = getPredictions(aJCas);
for (int i = 0; i < tokens.size(); i++) {
Token token = tokens.get(i);
String outcome = outcomes.get(i).getOutcome();
POS p = createPartOfSpeechAnnotationFromOutcome(aJCas, token.getBegin(),
token.getEnd(), outcome);
token.setPos(p);
}
}
private POS createPartOfSpeechAnnotationFromOutcome(JCas aJCas, int begin, int end,
String aOutcome)
{
Type posTag = mappingProvider.getTagType(aOutcome);
POS posAnno = (POS) aJCas.getCas().createAnnotation(posTag, begin, end);
posAnno.setPosValue(aOutcome);
posAnno.setCoarseValue(posAnno.getClass().equals(POS.class) ? null
: posAnno.getType().getShortName().intern());
posAnno.addToIndexes();
return posAnno;
}
private List<TextClassificationOutcome> getPredictions(JCas aJCas)
{
return new ArrayList<TextClassificationOutcome>(JCasUtil.select(aJCas,
TextClassificationOutcome.class));
}
private List<Token> getTokens(JCas aJCas)
{
return new ArrayList<Token>(JCasUtil.select(aJCas, Token.class));
}
private void initModelProvider()
throws ResourceInitializationException
{
modelProvider = new ModelProviderBase<File>()
{
{
setContextObject(FlexTagPosTagger.this);
setDefault(ARTIFACT_ID, "${groupId}.flextag-model-${language}-${variant}");
setDefault(LOCATION,
"classpath:/${package}/lib/tagger-${language}-${variant}.properties");
setOverride(LOCATION, modelLocation);
setOverride(LANGUAGE, language);
setOverride(VARIANT, variant);
}
@Override
protected File produceResource(URL aUrl)
throws IOException
{
File folder = ResourceUtils.getClasspathAsFolder(aUrl.toString(), true);
return folder;
}
};
try {
modelProvider.configure();
}
catch (IOException e) {
throw new ResourceInitializationException(e);
}
}
}