2017-09-27 15 views
1

私は自己運転用の自動車プログラムのための下のコードで作業しています。私は私のchoose_action関数に問題があります。エージェントは、以下のステップで最高のQ値を持つ行動の選択肢からランダムに行動を選択する必要があります。最適な行動選択をランダム化

「他: アクション= maxQaction」

しかし、私はそれが今それを書かれている方法を毎回同じ行動を選択するだけです。誰も最高のQ値の選択を無作為化する方法を提案することはできますか、おそらく私はリストを使用することができます。トリックをした

Code: 
import random 
import math 
from environment import Agent, Environment 
from planner import RoutePlanner 
from simulator import Simulator 
import itertools 

class LearningAgent(Agent): 
    """ An agent that learns to drive in the Smartcab world. 
     This is the object you will be modifying. """ 

    def __init__(self, env, learning=False, epsilon=1.0, alpha=0.5): 
     super(LearningAgent, self).__init__(env)  # Set the agent in the evironment 
     self.planner = RoutePlanner(self.env, self) # Create a route planner 
     self.valid_actions = self.env.valid_actions # The set of valid actions 

     # Set parameters of the learning agent 
     self.learning = learning # Whether the agent is expected to learn 
     self.Q = dict()   # Create a Q-table which will be a dictionary of tuples 
     self.epsilon = epsilon # Random exploration factor 
     self.alpha = alpha  # Learning factor 

     ########### 
     ## TO DO ## 
     ########### 
     # Set any additional class parameters as needed 

     self.states = [    
      ['red', 'green'],     #light 
      ['left', 'right', 'forward', None], #vehicleleft 
      ['left', 'right', 'forward', None], #vehicleright 
      ['left', 'right', 'forward', None], #vehicleoncoming 
      ['left', 'right', 'forward']  #waypoint 
     ] 

     self.x = 0 
     random.seed(42) 

     self.q_maker = dict((k, 0.0) for k in self.valid_actions) 

     for prod_state in itertools.product(*self.states): 
      self.Q[prod_state] = self.q_maker.copy() 



    def reset(self, destination=None, testing=False): 
     """ The reset function is called at the beginning of each trial. 
      'testing' is set to True if testing trials are being used 
      once training trials have completed. """ 

     # Select the destination as the new location to route to 
     self.planner.route_to(destination) 

     ########### 
     ## TO DO ## 
     ########### 
     # Update epsilon using a decay function of your choice 
     # Update additional class parameters as needed 
     # If 'testing' is True, set epsilon and alpha to 0 

     #Added for Question 6 
     #self.x = self.x + 1 
     if testing: 
      self.epsilon = 0.0 
      self.alpha = 0.0 
     else: 
      #self.epsilon = self.epsilon - 0.05 for question 6 

      self.x += 1 
      self.epsilon = math.exp(-self.alpha*self.x) 
      #self.epsilon = math.fabs(math.cos(self.alpha*self.x)) 
      # self.epsilon = 1.0/(self.x**2) 
      # self.epsilon = self.alpha**self.x 


     return None 

    def build_state(self): 
     """ The build_state function is called when the agent requests data from the 
      environment. The next waypoint, the intersection inputs, and the deadline 
      are all features available to the agent. """ 

     # Collect data about the environment 
     waypoint = self.planner.next_waypoint() # The next waypoint 
     inputs = self.env.sense(self)   # Visual input - intersection light and traffic 
     deadline = self.env.get_deadline(self) # Remaining deadline 

     ########### 
     ## TO DO ## 
     ########### 
     # Set 'state' as a tuple of relevant data for the agent   
     #state = (waypoint, inputs['light'], inputs['left'], inputs['right'], inputs['oncoming']) #None modified for "Update the Driving Agent State" 

     state = (inputs['light'], inputs['left'], inputs['right'], inputs['oncoming'],waypoint) 

     return state 


    def get_maxQ(self, state): 
     """ The get_max_Q function is called when the agent is asked to find the 
      maximum Q-value of all actions based on the 'state' the smartcab is in. """ 

     ########### 
     ## TO DO ## 
     ########### 
     # Calculate the maximum Q-value of all actions for a given state 


     action_selections = self.Q[state] 

     maxQ = max(action_selections.items(), key=lambda x: x[1])[1] 

     return maxQ 



    def createQ(self, state): 
     """ The createQ function is called when a state is generated by the agent. """ 

     ########### 
     ## TO DO ## 
     ########### 
     # When learning, check if the 'state' is not in the Q-table 
     # If it is not, create a new dictionary for that state 
     # Then, for each action available, set the initial Q-value to 0.0 

     if not self.learning: 
      return 

     if not state in self.Q: 
      self.Q[state] = self.q_maker.copy() 

     return 


    def choose_action(self, state): 
     """ The choose_action function is called when the agent is asked to choose 
      which action to take, based on the 'state' the smartcab is in. """ 

     # Set the agent state and default action 
     self.state = state 
     self.next_waypoint = self.planner.next_waypoint() 
     action = random.choice([None, 'forward', 'left', 'right']) ##None ##Modified from None for question 3 
     #action = None # added after first submission 

     ########### 
     ## TO DO ## 
     ########### 
     # When not learning, choose a random action 
     # When learning, choose a random action with 'epsilon' probability 
     # Otherwise, choose an action with the highest Q-value for the current state 



     action_selections = self.Q[state] 
     maxQaction = max(action_selections.items(), key=lambda x: x[1])[0] 
     if self.learning: 
      choose_using_epsilon = random.random() < 1 - self.epsilon 
      if not choose_using_epsilon: 
       valid_actions = filter(lambda x: x != maxQaction, 
        Environment.valid_actions) 
       action = random.choice(valid_actions) 
      else: 
       action = maxQaction 
     else: 
      action = random.choice(Environment.valid_actions) 
     return action 

答えて

1
if not self.learning or random.random() < self.epsilon: 
      action = random.choice(self.valid_actions) 
     else: 
      maxQaction= self.get_maxQ(state) 
      maxQaction= [] # build list of actions that match the max Q value 
      for act in self.Q[state]: 
       if self.Q[state][act] == maxQ: 
        maxQaction.append(act) 
      action = random.choice(maxQaction) # choose one randomly 
+0

感謝。 – user3476463

関連する問題