package joshua.discriminative.training.risk_annealer;
/** we assume the objective function is:
* gainFactor*gain(\theta, scalingFactor) + temperature*Entropy(p(y))
* */
public abstract class GradientComputer {
protected double scalingFactor;//affect the probability distribution itself
protected double temperature;//affect the gradient and function value, but not the probability distribution itself
//if the avg bleu score on dev set is around 30%, this should be one; otherwise: gain_factor = 0.3/avg
//the larger this number, the more focused on the gain optimization, instead of regularization
//so, the more the regularization you want, the smaller you should have the gain_factor
protected double gainFactor = 1.0; //to control how much we want to optimize the expected gain, instead of the entropy
protected boolean shouldComputeGradientForScalingFactor=false;
protected double[] gradientsForTheta;
protected double gradientForScalingFactor = 0;
protected double functionValue; //func_val=sum_expected_gain*gain_factor+sum_entropy*cooling_temperature
protected int numFeatures;//number of features, *excluding* the possible scaling feature
public GradientComputer(int numFeatures, double gainFactor, double scalingFactor, double temperature, boolean shouldComputeGradientForScalingFactor){
this.numFeatures = numFeatures;
this.gainFactor = gainFactor;
this.scalingFactor = scalingFactor;
this.temperature = temperature;
this.shouldComputeGradientForScalingFactor = shouldComputeGradientForScalingFactor;
this.gradientsForTheta = new double[numFeatures];
}
/** use the latest theta, scalingFactor, and temperature to recompute gradient and function value
* if(shouldComputeGradientForScalingFactor==true) then, the first position is the weight for the scalingFactor
* save results in: gradientsForTheta, gradientForScalingFactor, and functionValue
**/
public abstract void reComputeFunctionValueAndGradient(double[] theta);
public abstract void printLastestStatistics();
/**if(compute_scaling_gradient==true) then, the first position is the gradient for the scalingFactor
* */
public final double[] getLatestGradient(){
double[] res = null;
if(shouldComputeGradientForScalingFactor){
double[] gradients2 = new double[gradientsForTheta.length+1];
gradients2[0] = gradientForScalingFactor;//first postition for the gradient of scalingFactor
for(int i=0; i<gradientsForTheta.length; i++)
gradients2[i+1] = gradientsForTheta[i];
res = gradients2;
}else{
res = gradientsForTheta;
}
//=== sanity check
for(int i=0; i<res.length; i++)
if(Double.isNaN(res[i])){System.out.println("gradient value isNaN"); System.exit(1);}
return res;
}
public final double getLatestFunctionValue(){
//=== sanity check
if(Double.isNaN(functionValue)){System.out.println("func_val isNaN"); System.exit(1);}
return functionValue;
}
public final void setScalingFactor(double annealing_scale_){
scalingFactor = annealing_scale_;
}
public final double getScalingFactor(){
return scalingFactor;
}
public final void setTemperature(double cooling_temperature_){
temperature = cooling_temperature_;
}
public final double getTemperature(){
return temperature;
}
public final void setGainFactor(double gain_factor_){
gainFactor = gain_factor_;
}
public final double getGainFactor(){
return gainFactor;
}
public final void setComputeGradientForScalingFactor(boolean in){
shouldComputeGradientForScalingFactor = in;
}
public final boolean isComputeGradientForScalingFactor(){
return shouldComputeGradientForScalingFactor;
}
}