強化学習を使用して2048を再生するようにNNをトレーニングしています。少なくとも、私はこれが初めてだからだと思う。ValueError:図形(9、)と(4)が整列していません
これはNeuralNetwork.pyは、次のようになります。
import random
import numpy as np
def nonlin(x, deriv=False):
if(deriv==True):
return x * (1-x)
return 1/(1+np.exp(-x))
np.random.seed(1)
class NeuralNetwork:
next_ID = 0
def __init__(self, HyperParams):
self.synapses = []
for synapse in range(len(HyperParams)-1):
self.synapses.append(2*np.random.random((HyperParams[synapse], HyperParams[synapse+1]))-1)
self.score = 0
# self.name = words[random.randint(0, len(words))].strip()
self.name = str(NeuralNetwork.next_ID)
NeuralNetwork.next_ID += 1
def train_batch(self, epoch, state, outcome):
for i in range(epoch):
self.layers = []
self.layers.append(state)
for j in range(len(self.synapses)):
self.layers.append(nonlin(np.dot(self.layers[-1], self.synapses[j])))
error = outcome - self.layers[-1]
if (i % 1000) == 0: print(str(np.mean(np.abs(error))))
for j in range(1,1+len(self.synapses)):
delta = error * nonlin(self.layers[-j], True)
error = delta.dot(self.synapses[-j].T)
self.synapses[-j] += self.layers[-(j+1)].T.dot(delta)
def train(self, state, outcome):
self.layers = []
self.layers.append(state)
for j in range(len(self.synapses)):
self.layers.append(nonlin(np.dot(self.layers[-1], self.synapses[j])))
error = outcome - self.layers[-1]
print("error: ", error.shape)
for j in range(1,1+len(self.synapses)):
delta = error * nonlin(self.layers[-j], True)
print("delta: ", delta.shape)
error = delta.dot(self.synapses[-j].T)
print("layer: ", self.layers[-(j+1)].shape)
print("layer.T: ", self.layers[-(j+1)].T.shape)
# this is the issue
print("dot: ", self.layers[-(j+1)].T.dot(delta).shape)
self.synapses[-j] += self.layers[-(j+1)].T.dot(delta)
def next_gen(self):
child = NeuralNetwork([1])
for synapse in self.synapses:
# add variation
child.synapses.append(synapse + 0.1*np.random.random(synapse.shape)-0.05)
# child.name += " son of " + self.name
child.name += "<-" + self.name
return child
def feed(self, state):
self.layers = []
self.layers.append(state)
for j in range(len(self.synapses)):
self.layers.append(nonlin(np.dot(self.layers[-1], self.synapses[j])))
return self.layers[-1]
これは2048.pyは、次のようになります。私はそれが2に遭遇したときnumpyのは、何をすべきか知っているだろうと言われた
import random
import os
import sys
import math
import numpy as np
from NeuralNetwork import *
# global vars, constants and setup
board = {}
row_size = 4
random.seed(1)
HP = (16,9,4)
# set up game board
for i in range(row_size): # row
for j in range(row_size): #column
board[(i,j)] = 0
# display function
def display():
for i in range(row_size):
print('\t'.join([str(board[(i,j)]) for j in range(row_size)]))
print()
# logic function
def logic(move, NN):
"""
char move is the move, one of any in "asdw"
NN is a NeuralNetwork object
"""
# print("mov", move)
score = 0
if move == 's':
for j in range(row_size): # columns
row_pointer = row_size-1
for i in reversed(range(row_size-1)): # go up the rows
if board[(i, j)] != 0:
# if there is a non-empty square above, and this is a zero #check
if board[(row_pointer, j)] == 0:
board[(row_pointer, j)] = board[(i, j)]
board[(i, j)] = 0
# row_pointer -= 1 # This is the new block to focus on
# if there is a non-empty square above, and they are not equivalent
elif board[(i, j)] != board[(row_pointer, j)]:
# while this intuitively is not a swap, without it I would need to zero board[(i,j)]
# that zero would cause problems if row_pointer-1 == i
board[(row_pointer-1, j)], board[(i, j)] = board[(i, j)], board[(row_pointer-1, j)]
row_pointer -= 1 # This is the new block to focus on
# if there is a non-empty square above, and they are the same
elif board[(i, j)] == board[(row_pointer, j)]:
board[(row_pointer, j)] += board[(i, j)]
board[(i, j)] = 0
score += board[(row_pointer, j)] + math.log(board[(row_pointer, j)], 2)
elif move == 'w':
for j in range(row_size): # columns
row_pointer = 0
for i in range(1, row_size): # go down the rows
if board[(i, j)] != 0:
# if there is a non-empty square above, and this is a zero
if board[(row_pointer, j)] == 0:
board[(row_pointer, j)] = board[(i, j)]
board[(i, j)] = 0
# if there is a non-empty square above, and they are not equivalent
elif board[(i, j)] != board[(row_pointer, j)]:
board[(row_pointer+1, j)], board[(i, j)] = board[(i, j)], board[(row_pointer+1, j)]
row_pointer += 1 # This is the new block to focus on
# if there is a non-empty square above, and they are the same
elif board[(i, j)] == board[(row_pointer, j)]:
board[(row_pointer, j)] += board[(i, j)]
board[(i, j)] = 0
score += board[(row_pointer, j)] + math.log(board[(row_pointer, j)], 2)
elif move == 'a':
for i in range(row_size): # rows
column_pointer = 0
for j in range(1, row_size): # go right through the columns
if board[(i, j)] != 0:
# if there is a non-empty square above, and this is a zero
if board[(i, column_pointer)] == 0:
board[(i, column_pointer)] = board[(i, j)]
board[(i, j)] = 0
# if there is a non-empty square above, and they are not equivalent
elif board[(i, j)] != board[(i, column_pointer)]:
board[(i, column_pointer+1)], board[(i, j)] = board[(i, j)], board[(i, column_pointer+1)]
column_pointer += 1 # This is the new block to focus on
# if there is a non-empty square above, and they are the same
elif board[(i, j)] == board[(i, column_pointer)]:
board[(i, column_pointer)] += board[(i, j)]
board[(i, j)] = 0
score += board[(i, column_pointer)] + math.log(board[(i, column_pointer)], 2)
elif move == 'd':
for i in range(row_size): # rows
column_pointer = row_size-1
for j in reversed(range(row_size-1)): # go left through the columns
if board[(i, j)] != 0:
# if there is a non-empty square above, and this is a zero
if board[(i, column_pointer)] == 0:
board[(i, column_pointer)] = board[(i, j)]
board[(i, j)] = 0
# if there is a non-empty square above, and they are not equivalent
elif board[(i, j)] != board[(i, column_pointer)]:
board[(i, column_pointer-1)], board[(i, j)] = board[(i, j)], board[(i, column_pointer-1)]
column_pointer -= 1 # This is the new block to focus on
# if there is a non-empty square above, and they are the same
elif board[(i, j)] == board[(i, column_pointer)]:
board[(i, column_pointer)] += board[(i, j)]
board[(i, j)] = 0
score += board[(i, column_pointer)] + math.log(board[(i, column_pointer)], 2)
else:
print("something is wrong")
NN.score += score
return score
# checks to see whether there are any valid moves in a full board with no 0's
def is_game_over():
# check the top-left square
for i in range(row_size-1):
for j in range(row_size-1):
if board[(i,j)] in [board[(i+1,j)], board[(i,j+1)]]: # check the one below and to the right
return False
# Check the right-most column
for j in range(row_size-1):
if board[(row_size-1,j)] == board[(row_size-1,j+1)]:
return False
# Check the bottom row
for i in range(row_size-1):
if board[(i,row_size-1)] == board[(i+1,row_size-1)]:
return False
# There is no way to combine, game over
return True
# NN controls
NN = NeuralNetwork(HP)
for step in range(10):
# set up game board
for i in range(row_size): # row
for j in range(row_size): #column
board[(i,j)] = 0
previous_board = []
quit = False
# game loop
while not quit:
# set a new empty tile to a 2
while True:
i = random.randint(0,row_size-1)
j = random.randint(0,row_size-1)
# print(i,j,board[(i,j)])
if board[(i,j)] != 0: continue
else: board[(i,j)] = 2 ; break
# View
# display()
# normalize data and make a guess with nn
state = np.array([board[(i,j)] for j in range(row_size) for i in range(row_size)])
state[state==0] = 1
state = np.log2(state)
state = state/np.max(state)
# print('\n'.join(['\t'.join([str(state[j*row_size+i]) for j in range(row_size)])for i in range(row_size)]))
move = NN.feed(state)
# move
reward = 0
previous_board = list(board.values())
while True:
if len(move[move == 0]) == 4:
if is_game_over():
# print("Game Over")
quit = True
break
reward = logic("asdw"[move.argmax()], NN)
if previous_board == list(board.values()): move[move.argmax()] = 0 ; continue
else: break
if reward:
reward = nonlin(math.log2(reward)-math.log2(2048))
move[np.argmax(move)] += reward
NN.train(state, move)
display()
print("score: " + str(NN.score))
NN.score = 0
1-D配列は点滅しますが、それは起こっていません。これらの配列を2次元にして、内部の次元を1にする必要がありますか?手伝ってくれる?あなたが見ることができるように、彼らは両方の1Dベクトルで
Traceback (most recent call last):
File "2048.py", line 195, in <module>
NN.train(state, move)
File "/home/jeff/Programs/grad_descent/NeuralNetwork.py", line 71, in train
print("dot: ", self.layers[-(j+1)].T.dot(delta).shape)
ValueError: shapes (9,) and (4,) not aligned: 9 (dim 0) != 4 (dim 0)
ので、numpyのは、ちょうどそれらに点在する必要があります
は、ここに完全なエラーです。
ベクターは、まだ製品にそれらをドットするために、同じ大きさを持っている必要があります。あなたはそれらの製品の内側/点にしようとしていますか、それとも外側の製品ですか(あなたは結果が単一の数字か9x4のマトリックスであると思いますか? –
はい、私はドットから9x4 ndarrayを作成しようとしています。 – Jeff
それから内製品ではなく、外製品(https://docs.scipy.org/doc/numpy/reference/generated/numpy.outer.html)が必要です。 –