package edu.stanford.nlp.tagger.maxent;
import edu.stanford.nlp.util.logging.Redwood;
import java.io.FileInputStream;
import java.io.ObjectInputStream;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.List;
import edu.stanford.nlp.util.Pair;
/** Maintains a set of feature extractors for a maxent POS tagger and applies them.
*
* @author Kristina Toutanova
* @version 1.0
*/
public class Extractors implements Serializable {
/** A logger for this class */
private static final Redwood.RedwoodChannels log = Redwood.channels(Extractors.class);
private final Extractor[] v;
private static final boolean DEBUG = false;
transient List<Pair<Integer,Extractor>>
local, // extractors only looking at current word
localContext, // extractors only looking at words, except those in "local"
dynamic; // extractors depending on class labels
/**
* Set the extractors from an array.
*
* @param extrs The array of extractors. It is copied in this init.
*/
public Extractors(Extractor[] extrs) {
v = new Extractor[extrs.length];
System.arraycopy(extrs, 0, v, 0, extrs.length);
initTypes();
}
/**
* Determine type of each feature extractor.
*/
void initTypes() {
local = new ArrayList<>();
localContext = new ArrayList<>();
dynamic = new ArrayList<>();
for(int i=0; i<v.length; ++i) {
Extractor e = v[i];
if(e.isLocal() && e.isDynamic())
throw new RuntimeException("Extractors can't both be local and dynamic!");
if(e.isLocal()) {
local.add(Pair.makePair(i,e));
//localContext.put(i,e);
} else if(e.isDynamic()) {
dynamic.add(Pair.makePair(i,e));
} else {
localContext.add(Pair.makePair(i,e));
}
}
if(DEBUG) {
log.info("Extractors: " + this);
log.info("Local: " + local.size() + " extractors");
log.info("Local context: " + localContext.size() + " extractors");
log.info("Dynamic: " + dynamic.size() + " extractors");
}
}
/**
* Extract using the i'th extractor.
* @param i The extractor to use
* @param h The history to extract from
* @return String The feature value
*/
String extract(int i, History h) {
return v[i].extract(h);
}
boolean equals(History h, History h1) {
for (Extractor extractor : v) {
if ( ! (extractor.extract(h).equals(extractor.extract(h1)))) {
return false;
}
}
return true;
}
/** Find maximum left context of extractors. Used in TagInference to decide windows for dynamic programming.
* @return The maximum of the left contexts used by all extractors.
*/
int leftContext() {
int max = 0;
for (Extractor extractor : v) {
int lf = extractor.leftContext();
if (lf > max) {
max = lf;
}
}
return max;
}
/** Find maximum right context of extractors. Used in TagInference to decide windows for dynamic programming.
* @return The maximum of the right contexts used by all extractors.
*/
int rightContext() {
int max = 0;
for (Extractor extractor : v) {
int lf = extractor.rightContext();
if (lf > max) {
max = lf;
}
}
return max;
}
public int size() {
return v.length;
}
protected void setGlobalHolder(MaxentTagger tagger) {
for (Extractor extractor : v) {
extractor.setGlobalHolder(tagger);
}
}
/*
public void save(String filename) {
try {
DataOutputStream rf = IOUtils.getDataOutputStream(filename);
rf.writeInt(v.length);
for (Extractor extr : v) {
rf.writeBytes(extr.toString());
}
rf.close();
} catch (IOException e) {
e.printStackTrace();
}
}
public void read(String filename) {
try {
InDataStreamFile rf = new InDataStreamFile(filename);
int len = rf.readInt();
v = new Extractor[len];
//GlobalHolder.init();
} catch (IOException e) {
e.printStackTrace();
}
}
*/
Extractor get(int index) {
return v[index];
}
@Override
public String toString() {
StringBuilder sb = new StringBuilder("Extractors[");
for (int i = 0; i < v.length; i++) {
sb.append(v[i]);
if (i < v.length - 1) {
sb.append(", ");
}
}
sb.append(']');
return sb.toString();
}
/**
* Prints out the pair of {@code Extractors} objects found in the
* file that is the first and only argument.
* @param args Filename of extractors file (standardly written with
* {@code .ex} extension)
*/
public static void main(String[] args) {
try {
ObjectInputStream in = new ObjectInputStream(new FileInputStream(args[0]));
Extractors extrs = (Extractors) in.readObject();
Extractors extrsRare = (Extractors) in.readObject();
in.close();
System.out.println("All words: " + extrs);
System.out.println("Rare words: " + extrsRare);
} catch (Exception e) {
throw new RuntimeException(e);
}
}
private static final long serialVersionUID = -4777107742414749890L;
}