package de.berlin.hu.uima.ae.tagger.banner;
import java.io.IOException;
import java.io.ObjectInputStream;
import java.net.URL;
import java.util.zip.GZIPInputStream;
import cc.mallet.fst.CRF;
import dragon.nlp.tool.Lemmatiser;
import banner.tagging.CRFTagger;
import banner.tagging.FeatureSet;
import banner.tagging.Tagger;
public class CRFWrapper extends CRFTagger {
/**
* Loads a {@link CRFTagger} from the specified file. As the lemmatiser and part-of-speech tagger both require data,
* these cannot be written to disk and must be passed in new.
*
* @param f
* The file to load the CRFTagger from, as written by the {@link} write() method.
* @param lemmatiser
* The {@link Lemmatiser} to use
* @param posTagger
* The part-of-speech {@link dragon.nlp.tool.Tagger} to use
* @throws IOException
* @return A new instance of the CRFTagger contained in the specified file
*/
public static CRFTagger load(URL f, Lemmatiser lemmatiser, dragon.nlp.tool.Tagger posTagger, Tagger preTagger) throws IOException
{
try
{
ObjectInputStream ois = new ObjectInputStream(new GZIPInputStream (f.openStream()));
CRF model = (CRF) ois.readObject();
// TODO Test this
FeatureSet featureSet = (FeatureSet) ois.readObject();
if (lemmatiser != null)
featureSet.setLemmatiser(lemmatiser);
if (posTagger != null)
featureSet.setPosTagger(posTagger);
if (preTagger != null)
featureSet.setPreTagger(preTagger);
int order = ois.readInt();
ois.close();
return new CRFWrapper(model, featureSet, order);
}
catch (ClassNotFoundException e)
{
throw new RuntimeException(e);
}
}
protected CRFWrapper(CRF model, FeatureSet featureSet, int order) {
super(model, featureSet, order);
model.getInputPipe().getDataAlphabet().stopGrowth();
}
}