2017-08-24 13 views
0

強化学習を使用して2048を再生するようにNNをトレーニングしています。少なくとも、私はこれが初めてだからだと思う。ValueError:図形(9、)と(4)が整列していません

これはNeuralNetwork.pyは、次のようになります。

import random 
import numpy as np 

def nonlin(x, deriv=False): 
    if(deriv==True): 
     return x * (1-x) 
    return 1/(1+np.exp(-x)) 


np.random.seed(1) 


class NeuralNetwork: 

    next_ID = 0 

    def __init__(self, HyperParams): 
     self.synapses = [] 
     for synapse in range(len(HyperParams)-1): 
      self.synapses.append(2*np.random.random((HyperParams[synapse], HyperParams[synapse+1]))-1) 
     self.score = 0 
     # self.name = words[random.randint(0, len(words))].strip() 
     self.name = str(NeuralNetwork.next_ID) 
     NeuralNetwork.next_ID += 1 


    def train_batch(self, epoch, state, outcome): 
     for i in range(epoch): 
      self.layers = [] 
      self.layers.append(state) 
      for j in range(len(self.synapses)): 
       self.layers.append(nonlin(np.dot(self.layers[-1], self.synapses[j]))) 

      error = outcome - self.layers[-1] 
      if (i % 1000) == 0: print(str(np.mean(np.abs(error)))) 

      for j in range(1,1+len(self.synapses)): 
       delta = error * nonlin(self.layers[-j], True) 
       error = delta.dot(self.synapses[-j].T) 
       self.synapses[-j] += self.layers[-(j+1)].T.dot(delta) 


    def train(self, state, outcome): 
     self.layers = [] 
     self.layers.append(state) 
     for j in range(len(self.synapses)): 
      self.layers.append(nonlin(np.dot(self.layers[-1], self.synapses[j]))) 

     error = outcome - self.layers[-1] 
     print("error: ", error.shape) 
     for j in range(1,1+len(self.synapses)): 
      delta = error * nonlin(self.layers[-j], True) 
      print("delta: ", delta.shape) 
      error = delta.dot(self.synapses[-j].T) 
      print("layer: ", self.layers[-(j+1)].shape) 
      print("layer.T: ", self.layers[-(j+1)].T.shape) 

      # this is the issue 
      print("dot: ", self.layers[-(j+1)].T.dot(delta).shape) 
      self.synapses[-j] += self.layers[-(j+1)].T.dot(delta) 


    def next_gen(self): 
     child = NeuralNetwork([1]) 
     for synapse in self.synapses: 
      # add variation 
      child.synapses.append(synapse + 0.1*np.random.random(synapse.shape)-0.05) 
     # child.name += " son of " + self.name 
     child.name += "<-" + self.name 
     return child 

    def feed(self, state): 
     self.layers = [] 
     self.layers.append(state) 
     for j in range(len(self.synapses)): 
      self.layers.append(nonlin(np.dot(self.layers[-1], self.synapses[j]))) 
     return self.layers[-1] 

これは2048.pyは、次のようになります。私はそれが2に遭遇したときnumpyのは、何をすべきか知っているだろうと言われた

import random 
import os 
import sys 
import math 
import numpy as np 
from NeuralNetwork import * 

# global vars, constants and setup 
board = {} 
row_size = 4 
random.seed(1) 
HP = (16,9,4) 

# set up game board 
for i in range(row_size): # row 
    for j in range(row_size): #column 
     board[(i,j)] = 0 


# display function 
def display(): 
    for i in range(row_size): 
     print('\t'.join([str(board[(i,j)]) for j in range(row_size)])) 
    print() 


# logic function 
def logic(move, NN): 
    """ 
    char move is the move, one of any in "asdw" 
    NN is a NeuralNetwork object 
    """ 
    # print("mov", move) 
    score = 0 
    if move == 's': 
     for j in range(row_size): # columns 
      row_pointer = row_size-1 
      for i in reversed(range(row_size-1)): # go up the rows 
       if board[(i, j)] != 0: 
        # if there is a non-empty square above, and this is a zero #check 
        if board[(row_pointer, j)] == 0: 
         board[(row_pointer, j)] = board[(i, j)] 
         board[(i, j)] = 0 
         # row_pointer -= 1 # This is the new block to focus on 

        # if there is a non-empty square above, and they are not equivalent 
        elif board[(i, j)] != board[(row_pointer, j)]: 
         # while this intuitively is not a swap, without it I would need to zero board[(i,j)] 
         # that zero would cause problems if row_pointer-1 == i 
         board[(row_pointer-1, j)], board[(i, j)] = board[(i, j)], board[(row_pointer-1, j)] 
         row_pointer -= 1 # This is the new block to focus on 

        # if there is a non-empty square above, and they are the same 
        elif board[(i, j)] == board[(row_pointer, j)]: 
         board[(row_pointer, j)] += board[(i, j)] 
         board[(i, j)] = 0 
         score += board[(row_pointer, j)] + math.log(board[(row_pointer, j)], 2) 
    elif move == 'w': 
     for j in range(row_size): # columns 
      row_pointer = 0 
      for i in range(1, row_size): # go down the rows 
       if board[(i, j)] != 0: 
        # if there is a non-empty square above, and this is a zero 
        if board[(row_pointer, j)] == 0: 
         board[(row_pointer, j)] = board[(i, j)] 
         board[(i, j)] = 0 

        # if there is a non-empty square above, and they are not equivalent 
        elif board[(i, j)] != board[(row_pointer, j)]: 
         board[(row_pointer+1, j)], board[(i, j)] = board[(i, j)], board[(row_pointer+1, j)] 
         row_pointer += 1 # This is the new block to focus on 

        # if there is a non-empty square above, and they are the same 
        elif board[(i, j)] == board[(row_pointer, j)]: 
         board[(row_pointer, j)] += board[(i, j)] 
         board[(i, j)] = 0 
         score += board[(row_pointer, j)] + math.log(board[(row_pointer, j)], 2) 
    elif move == 'a': 
     for i in range(row_size): # rows 
      column_pointer = 0 
      for j in range(1, row_size): # go right through the columns 
       if board[(i, j)] != 0: 
        # if there is a non-empty square above, and this is a zero 
        if board[(i, column_pointer)] == 0: 
         board[(i, column_pointer)] = board[(i, j)] 
         board[(i, j)] = 0 

        # if there is a non-empty square above, and they are not equivalent 
        elif board[(i, j)] != board[(i, column_pointer)]: 
         board[(i, column_pointer+1)], board[(i, j)] = board[(i, j)], board[(i, column_pointer+1)] 
         column_pointer += 1 # This is the new block to focus on 

        # if there is a non-empty square above, and they are the same 
        elif board[(i, j)] == board[(i, column_pointer)]: 
         board[(i, column_pointer)] += board[(i, j)] 
         board[(i, j)] = 0 
         score += board[(i, column_pointer)] + math.log(board[(i, column_pointer)], 2) 
    elif move == 'd': 
     for i in range(row_size): # rows 
      column_pointer = row_size-1 
      for j in reversed(range(row_size-1)): # go left through the columns 
       if board[(i, j)] != 0: 
        # if there is a non-empty square above, and this is a zero 
        if board[(i, column_pointer)] == 0: 
         board[(i, column_pointer)] = board[(i, j)] 
         board[(i, j)] = 0 

        # if there is a non-empty square above, and they are not equivalent 
        elif board[(i, j)] != board[(i, column_pointer)]: 
         board[(i, column_pointer-1)], board[(i, j)] = board[(i, j)], board[(i, column_pointer-1)] 
         column_pointer -= 1 # This is the new block to focus on 

        # if there is a non-empty square above, and they are the same 
        elif board[(i, j)] == board[(i, column_pointer)]: 
         board[(i, column_pointer)] += board[(i, j)] 
         board[(i, j)] = 0 
         score += board[(i, column_pointer)] + math.log(board[(i, column_pointer)], 2) 

    else: 
     print("something is wrong") 
    NN.score += score 
    return score 


# checks to see whether there are any valid moves in a full board with no 0's 
def is_game_over(): 
    # check the top-left square 
    for i in range(row_size-1): 
     for j in range(row_size-1): 
      if board[(i,j)] in [board[(i+1,j)], board[(i,j+1)]]: # check the one below and to the right 
       return False 
    # Check the right-most column 
    for j in range(row_size-1): 
     if board[(row_size-1,j)] == board[(row_size-1,j+1)]: 
      return False 
    # Check the bottom row 
    for i in range(row_size-1): 
     if board[(i,row_size-1)] == board[(i+1,row_size-1)]: 
      return False 
    # There is no way to combine, game over 
    return True 


# NN controls 
NN = NeuralNetwork(HP) 

for step in range(10): 
    # set up game board 
    for i in range(row_size): # row 
     for j in range(row_size): #column 
      board[(i,j)] = 0 

    previous_board = [] 
    quit = False 
    # game loop 
    while not quit: 
     # set a new empty tile to a 2 
     while True: 
      i = random.randint(0,row_size-1) 
      j = random.randint(0,row_size-1) 
      # print(i,j,board[(i,j)]) 
      if board[(i,j)] != 0: continue 
      else: board[(i,j)] = 2 ; break 


     # View 
     # display() 


     # normalize data and make a guess with nn 
     state = np.array([board[(i,j)] for j in range(row_size) for i in range(row_size)]) 
     state[state==0] = 1 
     state = np.log2(state) 
     state = state/np.max(state) 
     # print('\n'.join(['\t'.join([str(state[j*row_size+i]) for j in range(row_size)])for i in range(row_size)])) 
     move = NN.feed(state) 


     # move 
     reward = 0 
     previous_board = list(board.values()) 
     while True: 
      if len(move[move == 0]) == 4: 
       if is_game_over(): 
        # print("Game Over") 
        quit = True 
        break 
      reward = logic("asdw"[move.argmax()], NN) 
      if previous_board == list(board.values()): move[move.argmax()] = 0 ; continue 
      else: break 

     if reward: 
      reward = nonlin(math.log2(reward)-math.log2(2048)) 
      move[np.argmax(move)] += reward 
      NN.train(state, move) 
    display() 


    print("score: " + str(NN.score)) 

    NN.score = 0 

1-D配列は点滅しますが、それは起こっていません。これらの配列を2次元にして、内部の次元を1にする必要がありますか?手伝ってくれる?あなたが見ることができるように、彼らは両方の1Dベクトルで

Traceback (most recent call last): 
    File "2048.py", line 195, in <module> 
    NN.train(state, move) 
    File "/home/jeff/Programs/grad_descent/NeuralNetwork.py", line 71, in train 
    print("dot: ", self.layers[-(j+1)].T.dot(delta).shape) 
ValueError: shapes (9,) and (4,) not aligned: 9 (dim 0) != 4 (dim 0) 

ので、numpyのは、ちょうどそれらに点在する必要があります

は、ここに完全なエラーです。

+0

ベクターは、まだ製品にそれらをドットするために、同じ大きさを持っている必要があります。あなたはそれらの製品の内側/点にしようとしていますか、それとも外側の製品ですか(あなたは結果が単一の数字か9x4のマトリックスであると思いますか? –

+0

はい、私はドットから9x4 ndarrayを作成しようとしています。 – Jeff

+1

それから内製品ではなく、外製品(https://docs.scipy.org/doc/numpy/reference/generated/numpy.outer.html)が必要です。 –

答えて

0

np.newaxisを使用して、明示的な1次元列表現を指定すると機能します。

注:スカラー出力を検索する場合、2つのベクトルはequal lengthである必要があります。 OPのエラーメッセージは、長さが9、長さが4ベクトルの内積を取ろうとしていることを示しています。私はあなたが実際に.dot()外の商品を返すことを望んでいると仮定しています。そうでない場合は、内製品は機能しません。その場合は、2つの等しい長さのベクトルが表示されることが予想される場所にない理由を調べてください。

付:

a = np.array([1,2,3]) 
b = np.array([2,3,4,5]) 

abの形状は、それぞれ、(3,)(4,)ある:

try: 
    print(a.shape) 
    print(b.shape) 
    print("a.b: \n{}".format(np.dot(a,b.T))) 
except ValueError as e: 
    print("failed: {}".format(e)) 

出力:newaxis

(3,) 
(4,) 
failed: shapes (3,) and (4,) not aligned: 3 (dim 0) != 4 (dim 0) 

、形状はとなりますと(4,1)

aa = a[:, np.newaxis] 
bb = b[:, np.newaxis] 

try: 
    print(aa.shape) 
    print(bb.shape) 
    print("aa.bb: \n{}".format(np.dot(aa,bb.T))) 
except ValueError as e: 
    print("failed: {}".format(e)) 

出力:

(3, 1) 
(4, 1) 
aa.bb: 
[[ 2 3 4 5] 
[ 4 6 8 10] 
[ 6 9 12 15]] 
関連する問題