/*
* avenir: Predictive analytic based on Hadoop Map Reduce
* Author: Pranab Ghosh
*
* Licensed under the Apache License, Version 2.0 (the "License"); you
* may not use this file except in compliance with the License. You may
* obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
* implied. See the License for the specific language governing
* permissions and limitations under the License.
*/
package org.avenir.reinforce;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.chombo.util.ConfigUtility;
import org.chombo.util.Utility;
/**
* Sampson sampler probabilistic matching reinforcement learning
* @author pranab
*
*/
public class SampsonSamplerLearner extends ReinforcementLearner {
protected Map<String, List<Integer>> rewardDistr = new HashMap<String, List<Integer>>();
private int minSampleSize;
private int maxReward;
/**
* @param actionID
* @param reward
*/
@Override
public void setReward(String actionID, int reward) {
List<Integer> rewards = rewardDistr.get(actionID);
if (null == rewards) {
rewards = new ArrayList<Integer>();
rewardDistr.put(actionID, rewards);
}
rewards.add(reward);
findAction(actionID).reward(reward);
}
/**
* Select action
* @return
*/
@Override
public Action nextAction() {
String slectedActionID = null;
int maxRewardCurrent = 0;
int reward = 0;
++totalTrialCount;
for (String actionID : rewardDistr.keySet()) {
List<Integer> rewards = rewardDistr.get(actionID);
if (rewards.size() > minSampleSize) {
reward = Utility.selectRandom(rewards);
reward = enforce(actionID, reward);
} else {
reward = (int) (Math.random() * maxReward);
}
if (reward > maxRewardCurrent) {
slectedActionID= actionID;
maxRewardCurrent = reward;
}
}
Action selAction = findAction(slectedActionID);
selAction.select();
return selAction;
}
/**
* @param actionID
* @param reward
* @return
*/
public int enforce(String actionID, int reward) {
return reward;
}
@Override
public void initialize(Map<String, Object> config) {
super.initialize(config);
minSampleSize = ConfigUtility.getInt(config, "min.sample.size");
maxReward = ConfigUtility.getInt(config, "max.reward");
}
}