/* Copyright (C) 2002 Univ. of Massachusetts Amherst, Computer Science Dept.
This file is part of "MALLET" (MAchine Learning for LanguagE Toolkit).
http://www.cs.umass.edu/~mccallum/mallet
This software is provided under the terms of the Common Public License,
version 1.0, as published by http://www.opensource.org. For further
information, see the file `LICENSE' included with this distribution. */
/**
@author Andrew McCallum <a href="mailto:mccallum@cs.umass.edu">mccallum@cs.umass.edu</a>
*/
package edu.nd.nina.types;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.logging.Logger;
import java.util.regex.Pattern;
public class FeatureConjunction {
private static Logger logger = Logger.getLogger(FeatureConjunction.class
.getName());
static private final String conjunctionString = "_&_";
static private final String negationString = "!";
static private final Pattern conjunctionPattern = Pattern
.compile(conjunctionString);
String name;
Alphabet dictionary;
int[] features;
boolean[] negations; // true here means Feature must be present
int index = -1; // -1 if this conjunction isn't yet part of Alphabet
/**
* If negations[i] is true, insist that the feature has non-zero value; if
* false, insist that it has zero value. Note: Does not check to make sure
* that it hasn't already been added. If negations[] is null, then assume
* all negations[i] are true.
*/
public FeatureConjunction(String name, Alphabet dictionary, int[] features,
boolean[] negations, boolean checkSorted, boolean copyFeatures,
boolean copyNegations) {
assert (negations == null || features.length == negations.length);
this.dictionary = dictionary;
if (copyFeatures) {
this.features = new int[features.length];
System.arraycopy(features, 0, this.features, 0, features.length);
} else {
this.features = features;
}
if (copyNegations && negations != null) {
this.negations = new boolean[negations.length];
System.arraycopy(negations, 0, this.negations, 0, negations.length);
} else {
this.negations = negations;
}
if (checkSorted) {
for (int i = this.features.length - 1; i >= 0; i--) {
boolean swapped = false;
for (int j = 0; j < i; j++) {
if (features[i - 1] > features[i]) {
int tmpf = this.features[i];
this.features[i] = this.features[i - 1];
this.features[i - 1] = tmpf;
if (negations != null) {
boolean tmpb = this.negations[i];
this.negations[i] = this.negations[i - 1];
this.negations[i - 1] = tmpb;
}
swapped = true;
} else if (features[i - 1] == features[i])
throw new IllegalArgumentException(
"Same Feature cannot occur twice.");
}
if (!swapped)
break;
}
}
if (name != null)
this.name = name;
else {
StringBuffer sb = new StringBuffer();
for (int i = 0; i < this.features.length; i++) {
if (negations != null && this.negations[i] == false)
sb.append(negationString);
if (i > 0)
sb.append(conjunctionString);
sb.append(dictionary.lookupObject(features[i]).toString());
}
// Shouldn't sb.toString() be saved in this.name here? -akm 1/08
}
}
public FeatureConjunction(String name, Alphabet dictionary, int[] features,
boolean[] negations, boolean checkSorted) {
this(name, dictionary, features, negations, checkSorted, true, true);
}
public FeatureConjunction(String name, Alphabet dictionary, int[] features,
boolean[] negations) {
this(name, dictionary, features, negations, true);
}
public static boolean isValidConjunction(int[] features) {
for (int i = 1; i < features.length; i++)
if (features[i - 1] >= features[i])
return false;
return true;
}
// Always in "Alphabet index" order
// xxx This one doesn't check for duplicates among sub-constituents in the
// conjunction, as
// the next method does.
public static String getName(Alphabet dictionary, int[] features,
boolean[] negations) {
// if (true) {
if (negations != null)
for (int i = 0; i < negations.length; i++)
if (negations[i])
throw new UnsupportedOperationException(
"Doesn't yet check for sub-duplicates with negations.");
return getName(dictionary, features);
// }
// Split apart any feature[i] that is itself a conjunction feature
// int[] featureIndices = getFeatureIndices (dictionary,
// dictionary.lookupObject(
// xxx Add code here to do the sorting...
// Make sure the the features area sorted
/*
* for (int i = 1; i < features.length; i++) if (features[i-1] >=
* features[i]) throw new IllegalArgumentException
* ("feature index not sorted, or contains duplicate"); StringBuffer sb
* = new StringBuffer (); for (int i = 0; i < features.length; i++) { if
* (i > 0) sb.append (conjunctionString); if (negations != null &&
* negations[i]) sb.append (negationString); sb.append
* (dictionary.lookupObject(features[i]).toString()); }
*
* return sb.toString();
*/
}
// Always in "Alphabet index" order
public static String getName(Alphabet dictionary, int[] features) {
// Split apart any feature[i] that is itself a conjunction feature
for (int i = 0; i < features.length; i++) {
int[] featureIndices = getFeatureIndices(dictionary,
(String) dictionary.lookupObject(features[i]));
if (featureIndices.length > 1) {
int newLength = features.length - 1 + featureIndices.length;
int[] newFeatures = new int[newLength];
int n = 0;
for (int j = 0; j < i; j++)
newFeatures[n++] = features[j];
for (int j = 0; j < featureIndices.length; j++)
newFeatures[n++] = featureIndices[j];
for (int j = i + 1; j < features.length; j++)
newFeatures[n++] = features[j];
Arrays.sort(newFeatures);
return getName(dictionary, newFeatures);
}
}
// xxx Add code here to do the sorting...
// Make sure the the features area sorted, and remove any duplicates
for (int i = 1; i < features.length; i++) {
if (features[i - 1] == features[i]) {
// Remove duplicate and try again
int[] newFeatures = new int[features.length - 1];
int n = 0;
for (int j = 0; j < i; j++)
newFeatures[n++] = features[j];
for (int j = i + 1; j < features.length; j++)
newFeatures[n++] = features[j];
return getName(dictionary, newFeatures);
}
if (features[i - 1] > features[i])
throw new IllegalArgumentException("feature indices not sorted");
}
StringBuffer sb = new StringBuffer();
for (int i = 0; i < features.length; i++) {
if (i > 0)
sb.append(conjunctionString);
sb.append(dictionary.lookupObject(features[i]).toString());
}
return sb.toString();
}
public static boolean featuresOverlap(Alphabet dictionary, int feature1,
int feature2) {
if (feature1 == feature2)
return true;
int[] fis1 = getFeatureIndices(dictionary,
(String) dictionary.lookupObject(feature1));
int[] fis2 = getFeatureIndices(dictionary,
(String) dictionary.lookupObject(feature2));
for (int i = 0, j = 0; i < fis1.length; i++) {
assert (i >= fis1.length - 2 || fis1[i] < fis1[i + 1]);
assert (j >= fis2.length - 2 || fis2[j] < fis2[j + 1]);
while (fis2[j] < fis1[i] && j < fis2.length - 1)
j++;
if (fis1[i] == fis2[j])
return true;
}
return false;
}
// Always in "Alphabet index" order
public static String getName(Alphabet dictionary, int feature1, int feature2) {
if (feature1 < feature2)
return getName(dictionary, new int[] { feature1, feature2 });
else
return getName(dictionary, new int[] { feature2, feature1 });
// assert (feature1 != feature2);
// String string1 = dictionary.lookupObject(feature1).toString();
// String string2 = dictionary.lookupObject(feature2).toString();
// if (feature1 < feature2)
// return string1 + conjunctionString + string2;
// else
// return string2 + conjunctionString + string1;
}
public static int[] getFeatureIndices(Alphabet dictionary,
String featureConjunctionName) {
String[] featureNames = conjunctionPattern
.split(featureConjunctionName);
int[] ret = new int[featureNames.length];
for (int i = 0; i < featureNames.length; i++) {
assert (!featureNames[i].startsWith(negationString));
ret[i] = dictionary.lookupIndex(featureNames[i], false);
logger.fine(i + "th feature: " + featureNames[i] + " in "
+ featureConjunctionName);
assert (ret[i] != -1) : "Couldn't find index for " + i
+ "th feature: " + featureNames[i] + " in "
+ featureConjunctionName;
}
java.util.Arrays.sort(ret);
return ret;
}
public FeatureConjunction(Alphabet dictionary, int[] features,
boolean[] negations) {
this(getName(dictionary, features, negations), dictionary, features,
negations, true);
}
public FeatureConjunction(Alphabet dictionary, int[] features) {
this(getName(dictionary, features, null), dictionary, features, null,
true, true, false);
}
public boolean satisfiedBy(FeatureVector fv) {
if (fv.getAlphabet() != dictionary)
throw new IllegalArgumentException("Vocabularies do not match.");
int fvsize = fv.numLocations();
int fvl = 0;
for (int fcl = 0; fcl < features.length; fcl++) {
int fcli = features[fcl];
while (fvl < fvsize && fv.indexAtLocation(fvl) < fcli)
fvl++;
if (fvl < fvsize && fv.indexAtLocation(fvl) == fcli
&& fv.valueAtLocation(fvl) != 0) {
// The fcli'th Feature of the FeatureConjunction is present in
// the FeatureVector
if (negations != null && negations[fcl] == false)
// but this Feature was negated in the FeatureConjunction,
// so not satisfied
return false;
} else if (negations == null || negations[fcl] == true)
// The fcli'th Feature of the FeatureConjunction is not present
// in the FeatureVector
// and this Feature was unnegated in the FeatureConjunction, so
// not satisfied
return false;
}
return true;
}
public int getIndex() {
return index;
}
public void addTo(AugmentableFeatureVector fv, double value,
FeatureSelection fs) {
// xxx This could be simplified for the special case of a
// FeatureConjunction with only one conjunct
if (this.satisfiedBy(fv)) {
index = fv.getAlphabet().lookupIndex(name);
// Make sure that this feature is selected
if (fs != null)
fs.add(index);
if (index >= 0 && fv.value(index) > 0)
// Don't add features that are already there
return;
assert (index != -1);
fv.add(index, value);
}
}
public void addTo(AugmentableFeatureVector fv, double value) {
addTo(fv, value, null);
}
public void addTo(AugmentableFeatureVector fv) {
this.addTo(fv, 1.0);
}
public static class List {
ArrayList<FeatureConjunction> conjunctions;
public List() {
this.conjunctions = new ArrayList<FeatureConjunction>();
}
public int size() {
return conjunctions.size();
}
public FeatureConjunction get(int i) {
return (FeatureConjunction) conjunctions.get(i);
}
public void add(FeatureConjunction fc) {
if (conjunctions.size() > 0
&& fc.dictionary != conjunctions.get(0).dictionary)
throw new IllegalArgumentException("Alphabet does not match.");
conjunctions.add(fc);
}
public void addTo(AugmentableFeatureVector fv, double value,
FeatureSelection fs) {
// xxx Make this more efficient
for (int i = 0; i < conjunctions.size(); i++)
((FeatureConjunction) conjunctions.get(i)).addTo(fv, value, fs);
}
public void addTo(AugmentableFeatureVector fv, double value) {
addTo(fv, value, null);
}
}
}