package AgentProvider.Implementation.Agents;
import AgentSystemPluginAPI.Contract.IStateActionGenerator;
import AgentSystemPluginAPI.Contract.StateAction;
import AgentSystemPluginAPI.Services.IAgent;
import EnvironmentPluginAPI.Exceptions.TechnicalException;
import ZeroTypes.Exceptions.ErrorMessages;
import java.util.Random;
import java.util.Set;
/**
* The abstract implementation of an epsilon-greedy agent.
* <p/>
* Note, that the agent's learning parameters are all 0.0f by default.
*/
abstract class EpsilonGreedyAgent implements IAgent {
//agent settings
private float epsilon;
private float lambda;
private float gamma;
private float alpha;
private String name;
//epsilon greedy implementation
protected final IDictionary qValues;
private IStateActionGenerator stateActionGenerator;
//technical
private final IAgentSettingUpdatedListener settingUpdatedListener;
//caching
private Random random = new Random();
public EpsilonGreedyAgent(String name, IDictionary qValues, IStateActionGenerator stateActionGenerator,IAgentSettingUpdatedListener settingUpdatedListener) {
this.name = name;
this.stateActionGenerator = stateActionGenerator;
this.settingUpdatedListener = settingUpdatedListener;
this.qValues = qValues;
}
protected StateAction getBestAction(StateAction state) throws TechnicalException {
float value = Float.NEGATIVE_INFINITY;
float tmp = 0;
StateAction result = null;
for (StateAction action : stateActionGenerator.getAllPossibleActions(state)) {
tmp = qValues.getValue(action);
if(tmp >= value) {
value = tmp;
result = action;
}
}
return result;
}
protected StateAction getEpsilonInfluencedAction(StateAction state) throws TechnicalException {
StateAction result;
// get all possible actions and test their validity
Set<StateAction> possibleActions = stateActionGenerator.getAllPossibleActions(state);
for(StateAction sa : possibleActions) {
if(sa == null || sa.getCompressedRepresentation() == null) {
throw new RuntimeException(ErrorMessages.get("erroneousStateActionGenerator"));
}
}
// If there are alternatives:
// normally choose the best one, but by chance choose one with a worse expected reward
// else take the single one.
if(possibleActions.size() > 1 && epsilon >= random.nextFloat()) {
possibleActions.remove(getBestAction(state));
StateAction[] stateActions = new StateAction[possibleActions.size()];
int i = 0;
for(StateAction action : possibleActions) {
stateActions[i] = new StateAction(state.getStateDescription(), action.getActionDescription());
i++;
}
result = stateActions[random.nextInt(stateActions.length)];
} else {
result = getBestAction(state);
}
return result;
}
public abstract StateAction step(float rewardForLastStep, StateAction newState) throws TechnicalException;
private void fireSettingChangedEvent(AgentSettingName name) {
switch (name) {
case ALPHA:
settingUpdatedListener.onAgentSettingUpdated(this, AgentSettingName.ALPHA, alpha);
break;
case EPSILON:
settingUpdatedListener.onAgentSettingUpdated(this, AgentSettingName.EPSILON, epsilon);
break;
case GAMMA:
settingUpdatedListener.onAgentSettingUpdated(this, AgentSettingName.GAMMA, gamma);
break;
case LAMBDA:
settingUpdatedListener.onAgentSettingUpdated(this, AgentSettingName.LAMBDA, lambda);
break;
}
}
public void setAlpha(float alpha) {
this.alpha = alpha;
fireSettingChangedEvent(AgentSettingName.ALPHA);
}
public float getAlpha() {
return alpha;
}
public void setEpsilon(float epsilon) {
this.epsilon = epsilon;
fireSettingChangedEvent(AgentSettingName.EPSILON);
}
public float getEpsilon() {
return epsilon;
}
public void setGamma(float gamma) {
this.gamma = gamma;
fireSettingChangedEvent(AgentSettingName.GAMMA);
}
public float getGamma() {
return gamma;
}
public void setLambda(float lambda) {
this.lambda = lambda;
fireSettingChangedEvent(AgentSettingName.LAMBDA);
}
public float getLambda() {
return lambda;
}
public String getName() {
return name;
}
}