package edu.stanford.nlp.international.morph;
import java.io.Serializable;
import java.util.Arrays;
import java.util.List;
import java.util.Map;
import edu.stanford.nlp.international.morph.MorphoFeatureSpecification.MorphoFeatureType;
import edu.stanford.nlp.util.Generics;
/**
* Holds a set of morphosyntactic features for a given surface form.
*
* @author Spence Green
*
*/
public class MorphoFeatures implements Serializable {
private static final long serialVersionUID = -3893316324305154940L;
public static final String KEY_VAL_DELIM = ":";
protected final Map<MorphoFeatureType,String> fSpec;
protected String altTag;
public MorphoFeatures() {
fSpec = Generics.newHashMap();
}
public MorphoFeatures(MorphoFeatures other) {
this();
for(Map.Entry<MorphoFeatureType, String> entry : other.fSpec.entrySet())
this.fSpec.put(entry.getKey(), entry.getValue());
this.altTag = other.altTag;
}
public void addFeature(MorphoFeatureType feat, String val) {
fSpec.put(feat, val);
}
public boolean hasFeature(MorphoFeatureType feat) {
return fSpec.containsKey(feat);
}
public String getValue(MorphoFeatureType feat) {
return hasFeature(feat) ? fSpec.get(feat) : "";
}
public int numFeatureMatches(MorphoFeatures other) {
int nMatches = 0;
for(Map.Entry<MorphoFeatureType, String> fPair : fSpec.entrySet()) {
if(other.hasFeature(fPair.getKey()) && other.getValue(fPair.getKey()).equals(fPair.getValue()))
nMatches++;
}
return nMatches;
}
public int numActiveFeatures() { return fSpec.keySet().size(); }
/**
* Build a POS tag consisting of a base category plus inflectional features.
*
* @param baseTag
* @return the tag
*/
public String getTag(String baseTag) {
return baseTag + toString();
}
public void setAltTag(String tag) { altTag = tag; }
/**
* An alternate tag form than the one produced by getTag(). Subclasses
* may want to use this form to implement someone else's tagset (e.g., CC, ERTS, etc.)
*
* @return the tag
*/
public String getAltTag() {
return altTag;
}
/**
* Assumes that the tag string has been formed using a call to getTag(). As such,
* it removes the basic category from the feature string.
* <p>
* Note that this method returns a <b>new</b> MorphoFeatures object. As a result, it
* behaves like a static method, but is non-static so that subclasses can override
* this method.
*
* @param str
*/
public MorphoFeatures fromTagString(String str) {
List<String> feats = Arrays.asList(str.split("\\-"));
MorphoFeatures mFeats = new MorphoFeatures();
for(String fPair : feats) {
String[] keyValue = fPair.split(KEY_VAL_DELIM);
if(keyValue.length != 2)//Manual state split annotations
continue;
MorphoFeatureType fName = MorphoFeatureType.valueOf(keyValue[0].trim());
mFeats.addFeature(fName, keyValue[1].trim());
}
return mFeats;
}
/**
* values() returns the values in the order in which they are declared. Thus we will not have
* the case where two feature types can yield two strings:
* -feat1:A-feat2:B
* -feat2:B-feat1:A
*/
@Override
public String toString() {
StringBuilder sb = new StringBuilder();
for(MorphoFeatureType feat : MorphoFeatureType.values()) {
if(fSpec.containsKey(feat)) {
sb.append(String.format("-%s%s%s",feat.toString(),KEY_VAL_DELIM,fSpec.get(feat)));
}
}
return sb.toString();
}
}