//AmbiguityClasses -- StanfordMaxEnt, A Maximum Entropy Toolkit
//Copyright (c) 2002-2008 Leland Stanford Junior University
//This program is free software; you can redistribute it and/or
//modify it under the terms of the GNU General Public License
//as published by the Free Software Foundation; either version 2
//of the License, or (at your option) any later version.
//This program is distributed in the hope that it will be useful,
//but WITHOUT ANY WARRANTY; without even the implied warranty of
//MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
//GNU General Public License for more details.
//You should have received a copy of the GNU General Public License
//along with this program; if not, write to the Free Software
//Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
//For more information, bug reports, fixes, contact:
//Christopher Manning
//Dept of Computer Science, Gates 1A
//Stanford CA 94305-9010
//USA
//Support/Questions: java-nlp-user@lists.stanford.edu
//Licensing: java-nlp-support@lists.stanford.edu
//http://www-nlp.stanford.edu/software/tagger.shtml
package edu.stanford.nlp.tagger.maxent;
import edu.stanford.nlp.util.Index;
import edu.stanford.nlp.util.HashIndex;
/**
* A collection of Ambiguity Class.
* <i>The code currently here is rotted and would need to be revived.</i>
*
* @author Kristina Toutanova
* @version 1.0
*/
// TODO: if it's rotted and not used anywhere, can we just get rid of it all? [CDM: It would be nice to keep and revive someday. It is a nice and sometimes useful idea.]
public class AmbiguityClasses {
private final Index<AmbiguityClass> classes;
private static final String naWord = "NA";
// TODO: this isn't used anywhere, either
// protected final AmbiguityClass naClass = new AmbiguityClass(null, false, null, null);
public AmbiguityClasses(TTags ttags) {
classes = new HashIndex<>();
// naClass.init(naWord, ttags);
}
private int add(AmbiguityClass a) {
if(classes.contains(a)) {
return classes.indexOf(a);
}
classes.add(a);
return classes.indexOf(a);
}
protected int getClass(String word, Dictionary dict, int veryCommonWordThresh, TTags ttags) {
if (word.equals(naWord)) {
return -2;
}
if (dict.isUnknown(word)) {
return -1;
}
boolean veryCommon = dict.sum(word) > veryCommonWordThresh;
AmbiguityClass a = new AmbiguityClass(word, veryCommon, dict, ttags);
// TODO: surely it would be faster and not too expensive to cache
// the results of creating a whole bunch of these, since we're
// probably constructing the same AmbiguityClass multiple times
// for each word. Furthermore, the separation of having two
// constructors here is pretty awful, quite frankly.
return add(a);
}
/*
public void print() {
Object[] arrClasses = classes.objectsList().toArray();//s.keySet().toArray();
System.out.println(arrClasses.length);
// System.out.println("Number of ambiguity classes is " + arrClasses.length);
// for (int i = 0; i < arrClasses.length; i++) {
// ((AmbiguityClass) arrClasses[i]).print();
// }
}
public void save(String filename) {
try {
DataOutputStream rf = IOUtils.getDataOutputStream(filename);
Object[] arrClasses = classes.objectsList().toArray();//s.keySet().toArray();
// System.out.println("Number of ambiguity classes is " + arrClasses.length);
// rf.writeInt(arrClasses.length);
// for (int i = 0; i < arrClasses.length; i++) {
//rf.writeUTF(((AmbiguityClass) (arrClasses[i])).getWord());
// }
rf.close();
} catch (Exception e) {
e.printStackTrace();
}
}// save
public void save(DataOutputStream file) {
try {
Object[] arrClasses = classes.objectsList().toArray();//s.keySet().toArray();
// System.out.println("Number of ambiguity classes is " + arrClasses.length);
// file.writeInt(arrClasses.length);
for (int i = 0; i < arrClasses.length; i++) {
//rf.writeUTF(((AmbiguityClass) (arrClasses[i])).getWord());
AmbiguityClass cur = (AmbiguityClass) arrClasses[i];
file.writeBoolean(cur.single);
file.writeUTF(cur.getWord());
}
} catch (Exception e) {
e.printStackTrace();
}
}// save
public void read(String filename) {
try {
InDataStreamFile rf = new InDataStreamFile(filename);
int len = rf.readInt();//this is the number of ambiguity classes
for (int i = 0; i < len; i++) {
boolean singleton = rf.readBoolean();
// int len_buff = rf.readInt();
// byte[] buff = new byte[len_buff];
// rf.read(buff);
String word = rf.readUTF();//new String(buff);
word = TestSentence.toNice(word);
add(new AmbiguityClass(word, singleton));
//init();
}//i
rf.close();
} catch (IOException e) {
e.printStackTrace();
}
}
public void read(InDataStreamFile file) {
try {
int len = file.readInt();//this is the number of ambiguity classes
for (int i = 0; i < len; i++) {
boolean singleton = file.readBoolean();
String word = file.readUTF();//new String(buff);
word = TestSentence.toNice(word);
add(new AmbiguityClass(word, singleton));
}//i
} catch (IOException e) {
e.printStackTrace();
}
}
*/
}