package edu.stanford.nlp.trees.international.pennchinese;
import java.io.Reader;
import edu.stanford.nlp.trees.*;
/**
* The {@code CTBTreeReaderFactory} is a factory for creating a
* TreeReader suitable for the Penn Chinese Treebank (CTB).
* It knows how to ignore the SGML tags in those files.
* The default reader doesn't delete empty nodes, but an
* additional static class is provided whose default constructor
* does give a TreeReader that deletes empty nodes in trees.
*
* @author Christopher Manning
*/
public class CTBTreeReaderFactory implements TreeReaderFactory {
private final TreeNormalizer tn;
private final boolean discardFrags;
public CTBTreeReaderFactory() {
this(new TreeNormalizer());
}
public CTBTreeReaderFactory(TreeNormalizer tn) {
this(tn, false);
}
public CTBTreeReaderFactory(TreeNormalizer tn, boolean discardFrags) {
this.tn = tn;
this.discardFrags = discardFrags;
}
/**
* Create a new {@code TreeReader} using the provided
* {@code Reader}.
*
* @param in The {@code Reader} to build on
* @return The new TreeReader
*/
@Override
public TreeReader newTreeReader(Reader in) {
if (discardFrags) {
return new FragDiscardingPennTreeReader(in, new LabeledScoredTreeFactory(), tn, new CHTBTokenizer(in));
} else {
return new PennTreeReader(in, new LabeledScoredTreeFactory(), tn, new CHTBTokenizer(in));
}
}
public static class NoEmptiesCTBTreeReaderFactory extends CTBTreeReaderFactory {
public NoEmptiesCTBTreeReaderFactory() {
super(new BobChrisTreeNormalizer());
}
} // end static class NoEmptiesCTBTreeReaderFactory
}