package quickml.experiments;
import com.google.common.collect.Lists;
import quickml.data.*;
import quickml.data.instances.Instance;
import quickml.data.instances.InstanceImpl;
import quickml.supervised.ensembles.randomForest.randomDecisionForest.RandomDecisionForest;
import java.io.Serializable;
import java.util.List;
import java.util.Random;
/**
* TODO: This should probably be removed or replaced
*/
public class TrainingDataGenerator2 {
private Random uniformRandom;
private int instances;
private int numPredictiveAttributes;
private double latentVariable; //a latent variable controls clickClassification
private double decayConstantForNoClickEvent; //the higher the value, the less likely a No click event becomes.
private double maxProbabilityOfClick;
private String bidRequestAttributes[];
public TrainingDataGenerator2(int instances, double maxProbabilityOfClick, String bidRequestAttributes[]) {
this.uniformRandom = new Random();
this.instances = instances;
this.maxProbabilityOfClick = maxProbabilityOfClick;
this.bidRequestAttributes = bidRequestAttributes;
this.numPredictiveAttributes = bidRequestAttributes.length;
initializeClickProbabilityDistribution();
}
public void getAverageDeviationInPredictedProbabilities(int samples, double onlyConsiderSamplesAboveThisProbability, RandomDecisionForest randomDecisionForest) {
AttributesMap attributes;
double predictedProb;
double rawPredictedProb;
double actualClickProbability;
double deviation = 0;
for (int i = 0; i < samples; i++) {
attributes = getAttributesForAnInstance();
actualClickProbability = getClickProbabilityFromLatentVariableValue();
if (actualClickProbability > onlyConsiderSamplesAboveThisProbability) {
predictedProb = randomDecisionForest.getProbability(attributes, 1.0);
// predictedProb = randomForest.calibrator.predict(rawPredictedProb);
deviation += Math.abs(actualClickProbability - predictedProb) / actualClickProbability;
// System.out.println("actualClickProbability : predictedProb : rawProb" + actualClickProbability + " : " + predictedProb + " " + rawPredictedProb);
}
else
i--;
}
// PrintStream treeView = new PrintStream(System.out);
// bidderPredictiveModel.clickPredictor.dump(treeView); //prints a sample oldTree for debugging purposes
System.out.println("average deviation" + deviation/samples);
}
private AttributesMap getAttributesForAnInstance() {
AttributesMap attributes = AttributesMap.newHashMap() ;
double attributeValue;
latentVariable = 0;
String key;
for (int j = 0; j < numPredictiveAttributes; j++) {
attributeValue = getAttributeValAndTheEffectOnTheLatentVariable(j);
key = bidRequestAttributes[j]; // Integer.toString(j);
attributes.put(key, attributeValue);
}
return attributes;
}
private double getClickProbabilityFromLatentVariableValue() {
return maxProbabilityOfClick * (1 - Math.exp(-decayConstantForNoClickEvent * latentVariable) );
}
private void initializeClickProbabilityDistribution() {
double standardDeviationOfUniformVariableOn0to1 = Math.sqrt(1.0/12);
double meanOfUniformVariableOn0to1 = 0.5;
int stdsAboveTheMeanForRelevance = 16; //make this number higher if you want more instances that have high click probabilities.
double standardDeviationOfClassificationVariable = standardDeviationOfUniformVariableOn0to1 / Math.sqrt(numPredictiveAttributes);
this.decayConstantForNoClickEvent = 1/(meanOfUniformVariableOn0to1 + stdsAboveTheMeanForRelevance*standardDeviationOfClassificationVariable);
System.out.println("decay constant " + decayConstantForNoClickEvent );
//System.exit(0);
}
public List<Instance<AttributesMap, Serializable>> createTrainingData() {
List<Instance<AttributesMap, Serializable>> trainingData = Lists.newArrayList();
Instance<AttributesMap, Serializable> instance;
AttributesMap attributes;
Double clickClassification;
for (int i = 0; i < instances; i++) {
attributes = getAttributesForAnInstance();
clickClassification = setClickValue();
instance = new InstanceImpl<AttributesMap, Serializable>(attributes, clickClassification);
trainingData.add(instance);
}
return trainingData;
}
private double setClickValue() {
double clickProbability = getClickProbabilityFromLatentVariableValue();
double rand = uniformRandom.nextDouble();
double clickClassification = rand < clickProbability ? 1.0 : 0.0;
return clickClassification;
}
private double getAttributeValAndTheEffectOnTheLatentVariable(int attributeNumber) {
double attributeVal = uniformRandom.nextDouble();
latentVariable += attributeVal/numPredictiveAttributes;
int attributeValForPredictiveModel = (int)(attributeVal*10000);
return attributeValForPredictiveModel;
}
}