/*
* Copyright 2013
* Ubiquitous Knowledge Processing (UKP) Lab
* Technische Universität Darmstadt
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package de.tudarmstadt.ukp.dkpro.core.opennlp.internal;
import static java.util.Collections.singletonMap;
import java.util.Map;
import java.util.Properties;
import java.util.Set;
import java.util.TreeSet;
import de.tudarmstadt.ukp.dkpro.core.api.metadata.TagsetBase;
import opennlp.tools.ml.model.MaxentModel;
import opennlp.tools.ml.model.SequenceClassificationModel;
import opennlp.tools.parser.ParserModel;
public class OpenNlpParserTagsetDescriptionProvider
extends TagsetBase
{
private String name;
private String layer;
private ParserModel model;
private Properties metadata;
public OpenNlpParserTagsetDescriptionProvider(String aName, Class<?> aLayer, ParserModel aModel,
Properties aMetadata)
{
name = aName;
layer = aLayer.getName();
model = aModel;
metadata = aMetadata;
}
@Override
public Map<String, String> getLayers()
{
return singletonMap(layer, name);
}
public ParserModel getModel()
{
return model;
}
@Override
public Set<String> listTags(String aLayer, String aTagsetName)
{
Set<String> tagSet = new TreeSet<String>();
SequenceClassificationModel<String> seqModel = model.getParserChunkerModel()
.getChunkerSequenceModel();
collect(seqModel.getOutcomes(), tagSet);
if (model.getBuildModel() != null) {
collect(model.getBuildModel(), tagSet);
}
return tagSet;
}
private void collect(MaxentModel aMaxEnt, Set<String> aTagSet)
{
String[] tags = new String[aMaxEnt.getNumOutcomes()];
for (int i = 0; i < aMaxEnt.getNumOutcomes(); i++) {
tags[i] = aMaxEnt.getOutcome(i);
}
collect(tags, aTagSet);
}
private void collect(String[] aOutcomes, Set<String> aTagSet)
{
for (String tag : aOutcomes) {
String t = tag;
if (tag.startsWith("C-") || tag.startsWith("S-")) {
t = tag.substring(2);
}
if (metadata.containsKey("constituent.tag.map." + t)) {
t = metadata.getProperty("constituent.tag.map." + t);
}
aTagSet.add(t);
}
}
}