/* Copyright (C) 2011 Univ. of Massachusetts Amherst, Computer Science Dept.
This file is part of "MALLET" (MAchine Learning for LanguagE Toolkit).
http://www.cs.umass.edu/~mccallum/mallet
This software is provided under the terms of the Common Public License,
version 1.0, as published by http://www.opensource.org. For further
information, see the file `LICENSE' included with this distribution. */
package cc.mallet.fst.semi_supervised;
import java.io.BufferedReader;
import java.io.Reader;
import java.util.Arrays;
import java.util.HashMap;
import cc.mallet.types.InstanceList;
/**
* Expectation constraint utilities for fst package.
*
* @author Gregory Druck
*/
public class FSTConstraintUtil {
public static HashMap<Integer,double[][]> loadGEConstraints(Reader fileReader, InstanceList data) {
HashMap<Integer,double[][]> constraints = new HashMap<Integer,double[][]>();
for (int li = 0; li < data.getTargetAlphabet().size(); li++) {
System.err.println(data.getTargetAlphabet().lookupObject(li));
}
try {
BufferedReader reader = new BufferedReader(fileReader);
String line = reader.readLine();
while (line != null) {
String[] split = line.split("\\s+");
// assume the feature name has no spaces
String featureName = split[0];
int featureIndex = data.getDataAlphabet().lookupIndex(featureName,false);
if (featureIndex == -1) {
throw new RuntimeException("Feature " + featureName + " not found in the alphabet!");
}
double[][] probs = new double[data.getTargetAlphabet().size()][2];
for (int i = 0; i < probs.length; i++) Arrays.fill(probs[i ],Double.NEGATIVE_INFINITY);
for (int index = 1; index < split.length; index++) {
String[] labelSplit = split[index].split(":");
int li = data.getTargetAlphabet().lookupIndex(labelSplit[0],false);
assert (li != -1) : labelSplit[0];
if (labelSplit[1].contains(",")) {
String[] rangeSplit = labelSplit[1].split(",");
double lower = Double.parseDouble(rangeSplit[0]);
double upper = Double.parseDouble(rangeSplit[1]);
probs[li][0] = lower;
probs[li][1] = upper;
}
else {
double prob = Double.parseDouble(labelSplit[1]);
probs[li][0] = prob;
probs[li][1] = prob;
}
}
constraints.put(featureIndex, probs);
line = reader.readLine();
}
}
catch (Exception e) {
e.printStackTrace();
System.exit(1);
}
return constraints;
}
}