package cc.mallet.util;
import cc.mallet.types.*;
import java.util.logging.*;
import java.io.*;
public class DocumentLengths {
protected static Logger logger = MalletLogger.getLogger(DocumentLengths.class.getName());
static cc.mallet.util.CommandOption.String inputFile = new cc.mallet.util.CommandOption.String
(DocumentLengths.class, "input", "FILENAME", true, null,
"Filename for the input instance list", null);
public static void main(String[] args) throws Exception {
CommandOption.setSummary (DocumentLengths.class,
"Print the length of FeatureSequences in an instance list");
CommandOption.process (DocumentLengths.class, args);
InstanceList instances = InstanceList.load (new File(inputFile.value));
for (Instance instance: instances) {
if (! (instance.getData() instanceof FeatureSequence)) {
System.err.println("DocumentLengths is only applicable to FeatureSequence objects (use --keep-sequence when importing)");
System.exit(1);
}
FeatureSequence words = (FeatureSequence) instance.getData();
System.out.println(words.size());
}
}
}