package kea.main;
/*
* KEAKeyphraseExtractor.java
* Copyright (C) 2001-2006 Eibe Frank, Olena Medelyan
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
import java.io.BufferedInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.InputStreamReader;
import java.io.ObjectInputStream;
import java.io.OutputStreamWriter;
import java.io.PrintWriter;
import java.util.Enumeration;
import java.util.HashMap;
import java.util.Hashtable;
import java.util.Iterator;
import java.util.Vector;
import edu.unc.ils.mrc.hive.api.SKOSScheme;
import weka.core.Attribute;
import weka.core.FastVector;
import weka.core.Instance;
import weka.core.Instances;
import weka.core.Option;
import weka.core.OptionHandler;
import weka.core.Utils;
import kea.filters.KEAFilter;
import kea.filters.KEAPhraseFilter;
import kea.stemmers.SremovalStemmer;
import kea.stemmers.Stemmer;
import kea.stopwords.Stopwords;
import kea.stopwords.StopwordsEnglish;
import kea.util.Counter;
import kea.vocab.Vocabulary;
import kea.vocab.VocabularyH2;
import kea.vocab.VocabularySesame;
/**
* Extracts keyphrases from the documents in a given directory. Assumes that the
* file names for the documents end with ".txt". Puts extracted keyphrases into
* corresponding files ending with ".key" (if those are not already present).
* Optionally an encoding for the documents/keyphrases can be defined (e.g. for
* Chinese text). Documents for which ".key" exists, are used for evaluation.
*
* Valid options are:
* <p>
*
* -l "directory name"<br>
* Specifies name of directory.
* <p>
*
* -m "model name"<br>
* Specifies name of model.
* <p>
*
* -v "vocabulary name"<br>
* Specifies name of vocabulary.
* <p>
*
* -f "vocabulary format"<br>
* Specifies format of vocabulary (text or skos).
* <p>
*
* -i "document language" <br>
* Specifies document language (en, es, de, fr).
* <p>
*
* -e "encoding"<br>
* Specifies encoding.
* <p>
*
* -n <br>
* Specifies number of phrases to be output (default: 5).
* <p>
*
* -t "name of class implementing stemmer"<br>
* Sets stemmer to use (default: SremovalStemmer).
* <p>
*
* -s "name of class implementing stopwords"<br>
* Sets stemmer to use (default: StopwordsEnglish).
* <p>
*
* -d<br>
* Turns debugging mode on.
* <p>
*
* -g<br>
* Build global dictionaries from the test set.
* <p>
*
* -a<br>
* Also write stemmed phrase and score into ".key" file.
* <p>
*
* @author Eibe Frank (eibe@cs.waikato.ac.nz)
* @version 1.0
*/
public class KEAKeyphraseExtractor implements OptionHandler {
/** Stopwords path */
String m_stopwordsPath;
/** Name of directory */
String m_dirName = null;
/** Name of model */
String m_modelName = null;
/** Name of vocabulary */
String m_vocabulary = null;
/** Vocabulary format */
String m_vocabularyFormat = null;
/** Document language */
String m_documentLanguage = "en";
/** Encoding */
String m_encoding = "default";
/** Debugging mode? */
boolean m_debug = false;
/** The KEA filter object */
private KEAFilter m_KEAFilter = null;
/** The number of phrases to extract. */
int m_numPhrases = 10;
/** The stemmer to be used */
private Stemmer m_Stemmer = new SremovalStemmer();
/** The list of stop words to be used */
private Stopwords m_Stopwords;
private SKOSScheme m_Scheme;
/** Also write stemmed phrase and score into .key file. */
boolean m_AdditionalInfo = false;
/** Build global dictionaries from the test set. */
boolean m_buildGlobal = false;
private SKOSScheme schema;
private Vocabulary vocabulary;
public KEAKeyphraseExtractor(SKOSScheme schema) {
this.m_KEAFilter = new KEAFilter();
this.schema = schema;
m_vocabularyFormat = "skos";
try
{
String h2path = new File(schema.getRdfPath()).getParentFile().getAbsolutePath();
//h2path += File.separator + schema.getName().toLowerCase() + "H2" + File.separator + schema.getName().toLowerCase();
this.vocabulary = new VocabularyH2(schema.getName(), h2path, m_documentLanguage, schema.getManager());
} catch (Exception e) {
e.printStackTrace();
}
//this.vocabulary = new VocabularySesame(m_vocabulary, m_vocabularyFormat,
//m_documentLanguage, schema.getManager());
}
public void loadThesaurus() {
System.out.println("SCHEMA LOADED IN KEYPHRASE EXTRACTOR " + schema.getLongName());
this.m_KEAFilter.loadThesaurus(m_Stemmer, m_Stopwords,this.vocabulary);
this.m_KEAFilter.setVocabulary(schema.getName().toLowerCase());
}
/**
* Get the value of AdditionalInfo.
*
* @return Value of AdditionalInfo.
*/
public boolean getAdditionalInfo() {
return m_AdditionalInfo;
}
/**
* Set the value of AdditionalInfo.
*
* @param newAdditionalInfo
* Value to assign to AdditionalInfo.
*/
public void setAdditionalInfo(boolean newAdditionalInfo) {
m_AdditionalInfo = newAdditionalInfo;
}
/**
* Get the value of BuildGlobal.
*
* @return Value of BuildGlobal.
*/
public boolean getBuildGlobal() {
return m_buildGlobal;
}
/**
* Set the value of BuildGlobal.
*
* @param newBuildGlobal
* Value to assign to BuildGlobal.
*/
public void setBuildGlobal(boolean newBuildGlobal) {
m_buildGlobal = newBuildGlobal;
}
/**
* Get the value of numPhrases.
*
* @return Value of numPhrases.
*/
public int getNumPhrases() {
return m_numPhrases;
}
/**
* Get the Stemmer value.
*
* @return the Stemmer value.
*/
public Stemmer getStemmer() {
return m_Stemmer;
}
/**
* Set the Stemmer value.
*
* @param newStemmer
* The new Stemmer value.
*/
public void setStemmer(Stemmer newStemmer) {
this.m_Stemmer = newStemmer;
}
/**
* Get the Stopwords value.
*
* @return the Stopwords value.
*/
public Stopwords getStopwords() {
return m_Stopwords;
}
public void setMinNumOccur(int newMinNumOccur) {
this.m_KEAFilter.setMinNumOccur(newMinNumOccur);
}
public void setStopwords(String stopwordsPath) {
this.m_Stopwords = new StopwordsEnglish(stopwordsPath);
}
/**
* Set the Stopwords value.
*
* @param newStopwords
* The new Stopwords value.
*/
public void setStopwords(Stopwords newStopwords) {
this.m_Stopwords = newStopwords;
}
/**
* Set the value of numPhrases.
*
* @param newnumPhrases
* Value to assign to numPhrases.
*/
public void setNumPhrases(int newnumPhrases) {
m_numPhrases = newnumPhrases;
}
/**
* Get the value of debug.
*
* @return Value of debug.
*/
public boolean getDebug() {
return m_debug;
}
/**
* Set the value of debug.
*
* @param newdebug
* Value to assign to debug.
*/
public void setDebug(boolean newdebug) {
m_debug = newdebug;
}
/**
* Get the value of encoding.
*
* @return Value of encoding.
*/
public String getEncoding() {
return m_encoding;
}
/**
* Set the value of encoding.
*
* @param newencoding
* Value to assign to encoding.
*/
public void setEncoding(String newencoding) {
m_encoding = newencoding;
}
/**
* Get the value of vocabulary name.
*
* @return Value of vocabulary name.
*/
public String getVocabulary() {
return m_vocabulary;
}
/**
* Set the value of vocabulary name.
*
* @param newvocabulary
* Value to assign to vocabulary name.
*/
public void setVocabulary(String newvocabulary) {
m_vocabulary = newvocabulary;
}
/**
* Get the value of vocabulary format.
*
* @return Value of vocabulary format.
*/
public String getVocabularyFormat() {
return m_vocabularyFormat;
}
/**
* Set the value of vocabulary format.
*
* @param newvocabularyFormat
* Value to assign to vocabularyFormat .
*/
public void setVocabularyFormat(String newvocabularyFormat) {
m_vocabularyFormat = newvocabularyFormat;
}
/**
* Get the value of document language.
*
* @return Value of document language.
*/
public String getDocumentLanguage() {
return m_documentLanguage;
}
/**
* Set the value of document language.
*
* @param newdocumentLanguage
* Value to assign to document language.
*/
public void setDocumentLanguage(String newdocumentLanguage) {
m_documentLanguage = newdocumentLanguage;
}
/**
* Get the value of modelName.
*
* @return Value of modelName.
*/
public String getModelName() {
return m_modelName;
}
/**
* Set the value of modelName.
*
* @param newmodelName
* Value to assign to modelName.
*/
public void setModelName(String newmodelName) {
m_modelName = newmodelName;
}
/**
* Get the value of dirName.
*
* @return Value of dirName.
*/
public String getDirName() {
return m_dirName;
}
/**
* Set the value of dirName.
*
* @param newdirName
* Value to assign to dirName.
*/
public void setDirName(String newdirName) {
m_dirName = newdirName;
}
/**
* Parses a given list of options controlling the behaviour of this object.
* Valid options are:
* <p>
*
* -l "directory name"<br>
* Specifies name of directory.
* <p>
*
* -m "model name"<br>
* Specifies name of model.
* <p>
*
* -v "vocabulary name"<br>
* Specifies vocabulary name.
* <p>
*
* -f "vocabulary format"<br>
* Specifies vocabulary format.
* <p>
*
* -i "document language" <br>
* Specifies document language.
* <p>
*
* -e "encoding"<br>
* Specifies encoding.
* <p>
*
* -n<br>
* Specifies number of phrases to be output (default: 5).
* <p>
*
* -d<br>
* Turns debugging mode on.
* <p>
*
* -b<br>
* Builds global dictionaries for computing TFxIDF from the test collection.
* <p>
*
* -a<br>
* Also write stemmed phrase and score into ".key" file.
* <p>
*
* @param options
* the list of options as an array of strings
* @exception Exception
* if an option is not supported
*/
public void setOptions(String[] options) throws Exception {
String dirName = Utils.getOption('l', options);
if (dirName.length() > 0) {
setDirName(dirName);
} else {
setDirName(null);
throw new Exception("Name of directory required argument.");
}
String modelName = Utils.getOption('m', options);
if (modelName.length() > 0) {
setModelName(modelName);
} else {
setModelName(null);
throw new Exception("Name of model required argument.");
}
String vocabularyName = Utils.getOption('v', options);
if (vocabularyName.length() > 0) {
setVocabulary(vocabularyName);
} else {
setVocabulary(null);
throw new Exception("Name of vocabulary required argument.");
}
String vocabularyFormat = Utils.getOption('f', options);
if (!getVocabulary().equals("none")) {
if (vocabularyFormat.length() > 0) {
if (vocabularyFormat.equals("skos")
|| vocabularyFormat.equals("text")) {
setVocabularyFormat(vocabularyFormat);
} else {
throw new Exception(
"Unsupported format of vocabulary. It should be either \"skos\" or \"text\".");
}
} else {
setVocabularyFormat(null);
throw new Exception(
"If a controlled vocabulary is used, format of vocabulary required argument (skos or text).");
}
} else {
setVocabularyFormat(null);
}
String encoding = Utils.getOption('e', options);
if (encoding.length() > 0) {
setEncoding(encoding);
} else {
setEncoding("default");
}
String documentLanguage = Utils.getOption('i', options);
if (documentLanguage.length() > 0) {
setDocumentLanguage(documentLanguage);
} else {
setDocumentLanguage("en");
}
String numPhrases = Utils.getOption('n', options);
if (numPhrases.length() > 0) {
setNumPhrases(Integer.parseInt(numPhrases));
} else {
setNumPhrases(5);
}
String stemmerString = Utils.getOption('t', options);
if (stemmerString.length() > 0) {
stemmerString = "kea.stemmers.".concat(stemmerString);
setStemmer((Stemmer) Class.forName(stemmerString).newInstance());
}
String stopwordsString = Utils.getOption('s', options);
if (stopwordsString.length() > 0) {
stopwordsString = "kea.stopwords.".concat(stopwordsString);
setStopwords((Stopwords) Class.forName(stopwordsString)
.newInstance());
}
setDebug(Utils.getFlag('d', options));
setBuildGlobal(Utils.getFlag('b', options));
setAdditionalInfo(Utils.getFlag('a', options));
Utils.checkForRemainingOptions(options);
}
/**
* Gets the current option settings.
*
* @return an array of strings suitable for passing to setOptions
*/
public String[] getOptions() {
String[] options = new String[21];
int current = 0;
options[current++] = "-l";
options[current++] = "" + (getDirName());
options[current++] = "-m";
options[current++] = "" + (getModelName());
options[current++] = "-v";
options[current++] = "" + (getVocabulary());
options[current++] = "-f";
options[current++] = "" + (getVocabularyFormat());
options[current++] = "-e";
options[current++] = "" + (getEncoding());
options[current++] = "-i";
options[current++] = "" + (getDocumentLanguage());
options[current++] = "-n";
options[current++] = "" + (getNumPhrases());
options[current++] = "-t";
options[current++] = "" + (getStemmer().getClass().getName());
options[current++] = "-s";
options[current++] = "" + (getStopwords().getClass().getName());
if (getDebug()) {
options[current++] = "-d";
}
if (getBuildGlobal()) {
options[current++] = "-b";
}
if (getAdditionalInfo()) {
options[current++] = "-a";
}
while (current < options.length) {
options[current++] = "";
}
return options;
}
/**
* Returns an enumeration describing the available options.
*
* @return an enumeration of all the available options
*/
public Enumeration listOptions() {
Vector newVector = new Vector(13);
newVector.addElement(new Option("\tSpecifies name of directory.", "l",
1, "-l <directory name>"));
newVector.addElement(new Option("\tSpecifies name of model.", "m", 1,
"-m <model name>"));
newVector.addElement(new Option("\tSpecifies vocabulary name.", "v", 1,
"-v <vocabulary name>"));
newVector.addElement(new Option("\tSpecifies vocabulary format.", "f",
1, "-f <vocabulary format>"));
newVector.addElement(new Option("\tSpecifies encoding.", "e", 1,
"-e <encoding>"));
newVector.addElement(new Option(
"\tSpecifies document language (en (default), es, de, fr).",
"i", 1, "-i <document language>"));
newVector.addElement(new Option(
"\tSpecifies number of phrases to be output (default: 5).",
"n", 1, "-n"));
newVector.addElement(new Option(
"\tSet the stemmer to use (default: SremovalStemmer).", "t", 1,
"-t <name of stemmer class>"));
newVector
.addElement(new Option(
"\tSet the stopwords class to use (default: EnglishStopwords).",
"s", 1, "-s <name of stopwords class>"));
newVector.addElement(new Option("\tTurns debugging mode on.", "d", 0,
"-d"));
newVector
.addElement(new Option(
"\tBuilds global dictionaries for computing TFIDF from the test collection.",
"b", 0, "-b"));
newVector.addElement(new Option(
"\tAlso write stemmed phrase and score into \".key\" file.",
"a", 0, "-a"));
return newVector.elements();
}
/**
* Collects the stems of the file names.
*/
public Hashtable collectStems() throws Exception {
Hashtable stems = new Hashtable();
try {
File dir = new File(m_dirName);
String[] files = dir.list();
for (int i = 0; i < files.length; i++) {
if (files[i].endsWith(".txt")) {
String stem = files[i].substring(0, files[i].length() - 4);
if (!stems.containsKey(stem)) {
stems.put(stem, new Double(0));
}
}
}
} catch (Exception e) {
throw new Exception("Problem opening directory " + m_dirName);
}
return stems;
}
/**
* Builds the model from the files
*/
public synchronized void extractKeyphrases(Hashtable stems) throws Exception {
Vector stats = new Vector();
// Check whether there is actually any data
// = if there any files in the directory
if (stems.size() == 0) {
throw new Exception("Couldn't find any data!");
}
this.m_KEAFilter.setNumPhrases(m_numPhrases);
this.m_KEAFilter.setVocabulary(m_vocabulary);
this.m_KEAFilter.setVocabularyFormat(m_vocabularyFormat);
this.m_KEAFilter.setDocumentLanguage(getDocumentLanguage());
this.m_KEAFilter.setStemmer(m_Stemmer);
this.m_KEAFilter.setStopwords(m_Stopwords);
if (getVocabulary().equals("none")) {
this.m_KEAFilter.m_NODEfeature = false;
} else {
// Know thesaurus is loaded in the constructor
//m_KEAFilter.loadThesaurus(m_Stemmer, m_Stopwords, vocabularyDir, manager);
}
FastVector atts = new FastVector(3);
atts.addElement(new Attribute("doc", (FastVector) null));
atts.addElement(new Attribute("keyphrases", (FastVector) null));
atts.addElement(new Attribute("filename", (String) null));
Instances data = new Instances("keyphrase_training_data", atts, 0);
if (this.m_KEAFilter.m_Dictionary == null) {
buildGlobalDictionaries(stems);
}
System.out.println("-- Extracting Keyphrases... ");
// Extract keyphrases
Enumeration elem = stems.keys();
// Enumeration over all files in the directory (now in the hash):
while (elem.hasMoreElements()) {
String str = (String) elem.nextElement();
double[] newInst = new double[2];
try {
File txt = new File(m_dirName + "/" + str + ".txt");
InputStreamReader is;
if (!m_encoding.equals("default")) {
is = new InputStreamReader(new FileInputStream(txt),
m_encoding);
} else {
is = new InputStreamReader(new FileInputStream(txt));
}
StringBuffer txtStr = new StringBuffer();
int c;
while ((c = is.read()) != -1) {
txtStr.append((char) c);
}
is.close();
newInst[0] = (double) data.attribute(0).addStringValue(
txtStr.toString());
} catch (Exception e) {
if (m_debug) {
System.err.println("Can't read document " + str + ".txt");
}
newInst[0] = Instance.missingValue();
}
try {
File key = new File(m_dirName + "/" + str + ".key");
InputStreamReader is;
if (!m_encoding.equals("default")) {
is = new InputStreamReader(new FileInputStream(key),
m_encoding);
} else {
is = new InputStreamReader(new FileInputStream(key));
}
StringBuffer keyStr = new StringBuffer();
int c;
// keyStr = keyphrases in the str.key file
// Kea assumes, that these keyphrases were assigned by the
// author
// and evaluates extracted keyphrases againse these
while ((c = is.read()) != -1) {
keyStr.append((char) c);
}
is.close();
newInst[1] = (double) data.attribute(1).addStringValue(
keyStr.toString());
} catch (Exception e) {
if (m_debug) {
System.err.println("No existing keyphrases for stem " + str
+ ".");
}
newInst[1] = Instance.missingValue();
}
data.add(new Instance(1.0, newInst));
this.m_KEAFilter.input(data.instance(0),vocabulary);
data = data.stringFreeStructure();
if (m_debug) {
System.err.println("-- Document: " + str);
}
Instance[] topRankedInstances = new Instance[m_numPhrases];
Instance inst;
// Iterating over all extracted keyphrases (inst)
while ((inst = this.m_KEAFilter.output()) != null) {
int index = (int) inst.value(this.m_KEAFilter.getRankIndex()) - 1;
if (index < m_numPhrases) {
topRankedInstances[index] = inst;
}
}
if (m_debug) {
System.err.println("-- Keyphrases and feature values:");
}
FileOutputStream out = null;
PrintWriter printer = null;
File key = new File(m_dirName + "/" + str + ".key");
if (!key.exists()) {
out = new FileOutputStream(m_dirName + "/" + str + ".key");
if (!m_encoding.equals("default")) {
printer = new PrintWriter(new OutputStreamWriter(out,
m_encoding));
} else {
printer = new PrintWriter(out);
}
}
double numExtracted = 0, numCorrect = 0;
for (int i = 0; i < m_numPhrases; i++) {
if (topRankedInstances[i] != null) {
if (!topRankedInstances[i].isMissing(topRankedInstances[i]
.numAttributes() - 1)) {
numExtracted += 1.0;
}
if ((int) topRankedInstances[i].value(topRankedInstances[i]
.numAttributes() - 1) == 1) {
numCorrect += 1.0;
}
if (printer != null) {
printer.print(topRankedInstances[i]
.stringValue(this.m_KEAFilter
.getUnstemmedPhraseIndex()));
if (m_AdditionalInfo) {
printer.print("\t");
printer.print(topRankedInstances[i]
.stringValue(this.m_KEAFilter
.getStemmedPhraseIndex()));
printer.print("\t");
printer.print(Utils.doubleToString(
topRankedInstances[i].value(this.m_KEAFilter
.getProbabilityIndex()), 4));
}
printer.println();
}
if (m_debug) {
System.err.println(topRankedInstances[i]);
}
}
}
if (numExtracted > 0) {
if (m_debug) {
System.err.println("-- " + numCorrect + " correct");
}
stats.addElement(new Double(numCorrect));
}
if (printer != null) {
printer.flush();
printer.close();
out.close();
}
}
double[] st = new double[stats.size()];
for (int i = 0; i < stats.size(); i++) {
st[i] = ((Double) stats.elementAt(i)).doubleValue();
}
double avg = Utils.mean(st);
double stdDev = Math.sqrt(Utils.variance(st));
System.out
.println("Avg. number of matching keyphrases compared to existing ones : "
+ Utils.doubleToString(avg, 2)
+ " +/- "
+ Utils.doubleToString(stdDev, 2));
System.out.println("Based on " + stats.size() + " documents");
// m_KEAFilter.batchFinished();
}
private void buildGlobalDictionaries(Hashtable stems) throws Exception {
System.err
.println("--- Building global dictionaries from the test collection.. ");
// Build dictionary of n-grams with associated
// document frequencies
this.m_KEAFilter.m_Dictionary = new HashMap();
Enumeration elem = stems.keys();
// Enumeration over all files in the directory (now in the hash):
while (elem.hasMoreElements()) {
String str = (String) elem.nextElement();
File txt = new File(m_dirName + "/" + str + ".txt");
InputStreamReader is;
if (!m_encoding.equals("default")) {
is = new InputStreamReader(new FileInputStream(txt), m_encoding);
} else {
is = new InputStreamReader(new FileInputStream(txt));
}
StringBuffer txtStr = new StringBuffer();
int c;
while ((c = is.read()) != -1) {
txtStr.append((char) c);
}
KEAPhraseFilter kpf = new KEAPhraseFilter();
HashMap hash = this.m_KEAFilter.getPhrasesForDictionary(kpf
.tokenize(txtStr.toString()),this.vocabulary);
Iterator it = hash.keySet().iterator();
while (it.hasNext()) {
String phrase = (String) it.next();
Counter counter = (Counter) this.m_KEAFilter.m_Dictionary
.get(phrase);
if (counter == null) {
this.m_KEAFilter.m_Dictionary.put(phrase, new Counter());
} else {
counter.increment();
}
}
}
}
/**
* Loads the extraction model from the file.
*/
public void loadModel() throws Exception {
BufferedInputStream inStream = new BufferedInputStream(
new FileInputStream(m_modelName));
System.out.println("This is the model that has been loaded -------------->" + m_modelName);
ObjectInputStream in = new ObjectInputStream(inStream);
this.m_KEAFilter = (KEAFilter) in.readObject();
// If TFxIDF values are to be computed from the test corpus
if (m_buildGlobal == true) {
if (m_debug) {
System.err
.println("-- The global dictionaries will be built from this test collection..");
}
this.m_KEAFilter.m_Dictionary = null;
}
in.close();
}
/**
* The main method.
*/
public static void main(String[] ops) {
KEAKeyphraseExtractor kmb = new KEAKeyphraseExtractor(null);
try {
// Checking and Setting Options selected by the user:
kmb.setOptions(ops);
System.err.print("Extracting keyphrases with options: ");
// Reading Options, which were set above and output them:
String[] optionSettings = kmb.getOptions();
for (int i = 0; i < optionSettings.length; i++) {
System.err.print(optionSettings[i] + " ");
}
System.err.println();
// Loading selected Model:
System.err.println("-- Loading the Model... ");
kmb.loadModel();
// Extracting Keyphrases from all files in the selected directory
// stem == the name of the file without ".txt"
kmb.extractKeyphrases(kmb.collectStems());
} catch (Exception e) {
e.printStackTrace();
System.err.println(e.getMessage());
System.err.println("\nOptions:\n");
Enumeration en = kmb.listOptions();
while (en.hasMoreElements()) {
Option option = (Option) en.nextElement();
System.err.println(option.synopsis());
System.err.println(option.description());
}
}
}
}