1
私は自己運転用の自動車プログラムのための下のコードで作業しています。私は私のchoose_action関数に問題があります。エージェントは、以下のステップで最高のQ値を持つ行動の選択肢からランダムに行動を選択する必要があります。最適な行動選択をランダム化
「他: アクション= maxQaction」
しかし、私はそれが今それを書かれている方法を毎回同じ行動を選択するだけです。誰も最高のQ値の選択を無作為化する方法を提案することはできますか、おそらく私はリストを使用することができます。トリックをした
Code:
import random
import math
from environment import Agent, Environment
from planner import RoutePlanner
from simulator import Simulator
import itertools
class LearningAgent(Agent):
""" An agent that learns to drive in the Smartcab world.
This is the object you will be modifying. """
def __init__(self, env, learning=False, epsilon=1.0, alpha=0.5):
super(LearningAgent, self).__init__(env) # Set the agent in the evironment
self.planner = RoutePlanner(self.env, self) # Create a route planner
self.valid_actions = self.env.valid_actions # The set of valid actions
# Set parameters of the learning agent
self.learning = learning # Whether the agent is expected to learn
self.Q = dict() # Create a Q-table which will be a dictionary of tuples
self.epsilon = epsilon # Random exploration factor
self.alpha = alpha # Learning factor
###########
## TO DO ##
###########
# Set any additional class parameters as needed
self.states = [
['red', 'green'], #light
['left', 'right', 'forward', None], #vehicleleft
['left', 'right', 'forward', None], #vehicleright
['left', 'right', 'forward', None], #vehicleoncoming
['left', 'right', 'forward'] #waypoint
]
self.x = 0
random.seed(42)
self.q_maker = dict((k, 0.0) for k in self.valid_actions)
for prod_state in itertools.product(*self.states):
self.Q[prod_state] = self.q_maker.copy()
def reset(self, destination=None, testing=False):
""" The reset function is called at the beginning of each trial.
'testing' is set to True if testing trials are being used
once training trials have completed. """
# Select the destination as the new location to route to
self.planner.route_to(destination)
###########
## TO DO ##
###########
# Update epsilon using a decay function of your choice
# Update additional class parameters as needed
# If 'testing' is True, set epsilon and alpha to 0
#Added for Question 6
#self.x = self.x + 1
if testing:
self.epsilon = 0.0
self.alpha = 0.0
else:
#self.epsilon = self.epsilon - 0.05 for question 6
self.x += 1
self.epsilon = math.exp(-self.alpha*self.x)
#self.epsilon = math.fabs(math.cos(self.alpha*self.x))
# self.epsilon = 1.0/(self.x**2)
# self.epsilon = self.alpha**self.x
return None
def build_state(self):
""" The build_state function is called when the agent requests data from the
environment. The next waypoint, the intersection inputs, and the deadline
are all features available to the agent. """
# Collect data about the environment
waypoint = self.planner.next_waypoint() # The next waypoint
inputs = self.env.sense(self) # Visual input - intersection light and traffic
deadline = self.env.get_deadline(self) # Remaining deadline
###########
## TO DO ##
###########
# Set 'state' as a tuple of relevant data for the agent
#state = (waypoint, inputs['light'], inputs['left'], inputs['right'], inputs['oncoming']) #None modified for "Update the Driving Agent State"
state = (inputs['light'], inputs['left'], inputs['right'], inputs['oncoming'],waypoint)
return state
def get_maxQ(self, state):
""" The get_max_Q function is called when the agent is asked to find the
maximum Q-value of all actions based on the 'state' the smartcab is in. """
###########
## TO DO ##
###########
# Calculate the maximum Q-value of all actions for a given state
action_selections = self.Q[state]
maxQ = max(action_selections.items(), key=lambda x: x[1])[1]
return maxQ
def createQ(self, state):
""" The createQ function is called when a state is generated by the agent. """
###########
## TO DO ##
###########
# When learning, check if the 'state' is not in the Q-table
# If it is not, create a new dictionary for that state
# Then, for each action available, set the initial Q-value to 0.0
if not self.learning:
return
if not state in self.Q:
self.Q[state] = self.q_maker.copy()
return
def choose_action(self, state):
""" The choose_action function is called when the agent is asked to choose
which action to take, based on the 'state' the smartcab is in. """
# Set the agent state and default action
self.state = state
self.next_waypoint = self.planner.next_waypoint()
action = random.choice([None, 'forward', 'left', 'right']) ##None ##Modified from None for question 3
#action = None # added after first submission
###########
## TO DO ##
###########
# When not learning, choose a random action
# When learning, choose a random action with 'epsilon' probability
# Otherwise, choose an action with the highest Q-value for the current state
action_selections = self.Q[state]
maxQaction = max(action_selections.items(), key=lambda x: x[1])[0]
if self.learning:
choose_using_epsilon = random.random() < 1 - self.epsilon
if not choose_using_epsilon:
valid_actions = filter(lambda x: x != maxQaction,
Environment.valid_actions)
action = random.choice(valid_actions)
else:
action = maxQaction
else:
action = random.choice(Environment.valid_actions)
return action
感謝。 – user3476463