GradientComputer.java example

Explorer
relax-decode-master
- third-party
package joshua.discriminative.training.risk_annealer;


/** we assume the objective function is: 
 * gainFactor*gain(\theta, scalingFactor) + temperature*Entropy(p(y))
 * */
public abstract class GradientComputer {
	
	protected double scalingFactor;//affect the probability distribution itself
	protected double temperature;//affect the gradient and function value, but not the probability distribution itself
	
	//if the avg bleu score on dev set is around 30%, this should be one; otherwise: gain_factor = 0.3/avg
	//the larger this number, the more focused on the gain optimization, instead of regularization
	//so, the more the regularization you want, the smaller you should have the gain_factor
	protected double gainFactor = 1.0; //to control how much we want to optimize the expected gain, instead of the entropy
	
	protected boolean shouldComputeGradientForScalingFactor=false;
	
	protected double[] gradientsForTheta;
	protected double gradientForScalingFactor = 0;
	protected double functionValue; //func_val=sum_expected_gain*gain_factor+sum_entropy*cooling_temperature

	protected int numFeatures;//number of features, *excluding* the possible scaling feature
	
	
	public GradientComputer(int numFeatures, double gainFactor, double scalingFactor, double temperature, boolean shouldComputeGradientForScalingFactor){
		this.numFeatures = numFeatures;
		this.gainFactor = gainFactor;
		this.scalingFactor = scalingFactor;
		this.temperature = temperature;
		this.shouldComputeGradientForScalingFactor = shouldComputeGradientForScalingFactor;
		this.gradientsForTheta = new double[numFeatures];
	}
	
	
	/** use the latest theta, scalingFactor, and temperature to recompute gradient and function value
	 * if(shouldComputeGradientForScalingFactor==true) then, the first position is the weight for the scalingFactor
	 * save results in: gradientsForTheta, gradientForScalingFactor, and functionValue
	 **/
	public abstract void reComputeFunctionValueAndGradient(double[] theta);
	
	public abstract void printLastestStatistics();
	
	
	/**if(compute_scaling_gradient==true) then, the first position is the gradient for the scalingFactor
	 * */
	public final double[] getLatestGradient(){
		double[] res = null;
		
		if(shouldComputeGradientForScalingFactor){
			double[] gradients2 = new double[gradientsForTheta.length+1];
			gradients2[0] = gradientForScalingFactor;//first postition for the gradient of scalingFactor
			for(int i=0; i<gradientsForTheta.length; i++)
				gradients2[i+1] = gradientsForTheta[i];						
			res =  gradients2;
		}else{
			res = gradientsForTheta;
		}
		
		//=== sanity check
		for(int i=0; i<res.length; i++)
			if(Double.isNaN(res[i])){System.out.println("gradient value isNaN"); System.exit(1);} 
		
		return res;
	}
	
	public final double getLatestFunctionValue(){
		//=== sanity check
		if(Double.isNaN(functionValue)){System.out.println("func_val isNaN"); System.exit(1);} 
		
		return functionValue;
	}

	
	public final void setScalingFactor(double annealing_scale_){
		scalingFactor = annealing_scale_;
	}
	
	public final double getScalingFactor(){
		return scalingFactor;
	}
	
	public final void setTemperature(double cooling_temperature_){
		temperature = cooling_temperature_;
	}

	public final double getTemperature(){
		return temperature;
	}
	
	public final void setGainFactor(double gain_factor_){
		gainFactor = gain_factor_;
	}
	
	public final double getGainFactor(){
		return gainFactor;
	}
	
	public final void setComputeGradientForScalingFactor(boolean in){
		shouldComputeGradientForScalingFactor = in;
	}
	
	public final boolean isComputeGradientForScalingFactor(){
		return shouldComputeGradientForScalingFactor;
	}
			
}