package de.berlin.hu.banner.util;
import banner.eval.BANNER;
import banner.eval.dataset.Dataset;
import banner.postprocessing.LocalAbbreviationPostProcessor;
import banner.postprocessing.ParenthesisPostProcessor;
import banner.postprocessing.PostProcessor;
import banner.postprocessing.SequentialPostProcessor;
import banner.tagging.TagFormat;
import banner.tagging.dictionary.DictionaryTagger;
import banner.tokenization.Tokenizer;
import banner.types.Mention.MentionType;
import banner.types.Sentence.OverlapOption;
import dragon.nlp.tool.HeppleTagger;
import dragon.nlp.tool.MedPostTagger;
import dragon.nlp.tool.lemmatiser.EngLemmatiser;
import org.apache.commons.configuration.HierarchicalConfiguration;
import java.util.EnumSet;
import java.util.HashSet;
import java.util.Set;
public class ConfigUtil {
public static Dataset getDataset(HierarchicalConfiguration config)
{
Tokenizer tokenizer = getTokenizer(config);
HierarchicalConfiguration localConfig = config.configurationAt(BANNER.class.getPackage().getName());
String datasetName = localConfig.getString("datasetName");
Dataset dataset = null;
try
{
dataset = (Dataset)Class.forName(datasetName).newInstance();
}
catch (Exception e)
{
throw new RuntimeException(e);
}
dataset.setTokenizer(tokenizer);
dataset.load(config);
return dataset;
}
public static TagFormat getTagFormat(HierarchicalConfiguration config)
{
HierarchicalConfiguration localConfig = config.configurationAt(BANNER.class.getPackage().getName());
return TagFormat.valueOf(localConfig.getString("tagFormat"));
}
public static Tokenizer getTokenizer(HierarchicalConfiguration config)
{
HierarchicalConfiguration localConfig = config.configurationAt(BANNER.class.getPackage().getName());
try
{
String tokenizerName = localConfig.getString("tokenizer");
Tokenizer tokenizer = (Tokenizer)Class.forName(tokenizerName).newInstance();
return tokenizer;
}
catch (Exception e)
{
throw new RuntimeException(e);
}
}
public static DictionaryTagger getDictionary(HierarchicalConfiguration config)
{
Tokenizer tokenizer = getTokenizer(config);
HierarchicalConfiguration localConfig = config.configurationAt(BANNER.class.getPackage().getName());
String dictionaryName = localConfig.getString("dictionaryTagger");
if (dictionaryName == null)
return null;
DictionaryTagger dictionary = null;
try
{
dictionary = (DictionaryTagger)Class.forName(dictionaryName).newInstance();
dictionary.configure(config, tokenizer);
dictionary.load(config);
}
catch (Exception e)
{
throw new RuntimeException(e);
}
return dictionary;
}
public static PostProcessor getPostProcessor(HierarchicalConfiguration config)
{
// Guaranteed not to be null
HierarchicalConfiguration localConfig = config.configurationAt(BANNER.class.getPackage().getName());
SequentialPostProcessor postProcessor = new SequentialPostProcessor();
if (localConfig.containsKey("useParenthesisPostProcessing"))
if (localConfig.getBoolean("useParenthesisPostProcessing"))
postProcessor.addPostProcessor(new ParenthesisPostProcessor());
if (localConfig.containsKey("useLocalAbbreviationPostProcessing"))
if (localConfig.getBoolean("useLocalAbbreviationPostProcessing"))
postProcessor.addPostProcessor(new LocalAbbreviationPostProcessor());
return postProcessor;
}
public static int getCRFOrder(HierarchicalConfiguration config)
{
HierarchicalConfiguration localConfig = config.configurationAt(BANNER.class.getPackage().getName());
return localConfig.getInt("crfOrder");
}
public static dragon.nlp.tool.Tagger getPosTagger(HierarchicalConfiguration config)
{
HierarchicalConfiguration localConfig = config.configurationAt(BANNER.class.getPackage().getName());
String posTagger = localConfig.getString("posTagger");
if (posTagger == null)
return null;
String posTaggerDataDirectory = localConfig.getString("posTaggerDataDirectory");
if (posTaggerDataDirectory == null)
throw new IllegalArgumentException("Must specify data directory for POS tagger");
if (posTagger.equals(HeppleTagger.class.getName()))
return new HeppleTagger(posTaggerDataDirectory);
else if (posTagger.equals(MedPostTagger.class.getName()))
return new MedPostTagger(posTaggerDataDirectory);
else
throw new IllegalArgumentException("Unknown POS tagger type: " + posTagger);
}
public static EngLemmatiser getLemmatiser(HierarchicalConfiguration config)
{
HierarchicalConfiguration localConfig = config.configurationAt(BANNER.class.getPackage().getName());
String lemmatiserDataDirectory = localConfig.getString("lemmatiserDataDirectory");
if (lemmatiserDataDirectory == null)
return null;
return new EngLemmatiser(lemmatiserDataDirectory, false, true);
}
public static Set<MentionType> getMentionTypes(HierarchicalConfiguration config)
{
HierarchicalConfiguration localConfig = config.configurationAt(BANNER.class.getPackage().getName());
String mentionTypesStr = localConfig.getString("mentionTypes");
if (mentionTypesStr == null)
throw new RuntimeException("Configuration must contain parameter \"mentionTypes\"");
Set<MentionType> mentionTypes = new HashSet<MentionType>();
for (String mentionTypeName : mentionTypesStr.split("\\s+"))
mentionTypes.add(MentionType.valueOf(mentionTypeName));
return EnumSet.copyOf(mentionTypes);
}
public static OverlapOption getSameTypeOverlapOption(HierarchicalConfiguration config)
{
HierarchicalConfiguration localConfig = config.configurationAt(BANNER.class.getPackage().getName());
String sameTypeOverlapOption = localConfig.getString("sameTypeOverlapOption");
if (sameTypeOverlapOption == null)
throw new RuntimeException("Configuration must contain parameter \"sameTypeOverlapOption\"");
return OverlapOption.valueOf(sameTypeOverlapOption);
}
public static OverlapOption getDifferentTypeOverlapOption(HierarchicalConfiguration config)
{
HierarchicalConfiguration localConfig = config.configurationAt(BANNER.class.getPackage().getName());
String differentTypeOverlapOption = localConfig.getString("differentTypeOverlapOption");
if (differentTypeOverlapOption == null)
throw new RuntimeException("Configuration must contain parameter \"differentTypeOverlapOption\"");
return OverlapOption.valueOf(differentTypeOverlapOption);
}
}