以下のコードを貼り付けています。コードの 'forward'部分は、 "assert root_emb == 1 + emb [0] * emb [1]"の合格によって動作するようです。ただし、トレーニングステップ(アサート後の行)を実行すると、ウィルスループ中に書き込まれたTensorArrayの問題を示唆する奇妙なエラーが表示されます。 tf.while_loopでTensorArrayのグラデーションをとるときに奇妙なエラーが発生する
tensorflow.python.framework.errors.InvalidArgumentError: TensorArray [email protected]: Could not read from TensorArray index 2 because it has not yet been written to. [[Node: gradients/while/TensorArrayWrite_grad/TensorArrayRead = TensorArrayRead[_class=["loc:@TensorArray"], dtype=DT_FLOAT, _device="/job:localhost/replica:0/task:0/cpu:0"](gradients/while/TensorArrayWrite_grad/TensorArrayGrad/TensorArrayGrad, gradients/while/TensorArrayWrite_grad/TensorArrayRead/StackPop, gradients/while/TensorArrayWrite_grad/TensorArrayGrad/gradient_flow)]] Caused by op u'gradients/while/TensorArrayWrite_grad/TensorArrayRead', defined at: File "minimal.py", line 82, in model = TreeRNN(8, 1, 1, degree=2) File "minimal.py", line 61, in init self.grad = tf.gradients(self.loss, self.params) File "/Library/Python/2.7/site-packages/tensorflow/python/ops/gradients.py", line 481, in gradients in_grads = _AsList(grad_fn(op, *out_grads)) File "/Library/Python/2.7/site-packages/tensorflow/python/ops/tensor_array_grad.py", line 115, in _TensorArrayWriteGrad grad = g.read(index) File "/Library/Python/2.7/site-packages/tensorflow/python/ops/tensor_array_ops.py", line 177, in read dtype=self._dtype, name=name) File "/Library/Python/2.7/site-packages/tensorflow/python/ops/gen_data_flow_ops.py", line 781, in _tensor_array_read flow_in=flow_in, dtype=dtype, name=name) File "/Library/Python/2.7/site-packages/tensorflow/python/ops/op_def_library.py", line 694, in apply_op op_def=op_def) File "/Library/Python/2.7/site-packages/tensorflow/python/framework/ops.py", line 2154, in create_op original_op=self._default_original_op, op_def=op_def) File "/Library/Python/2.7/site-packages/tensorflow/python/framework/ops.py", line 1154, in init self._traceback = _extract_stack()
...which was originally created as op u'while/TensorArrayWrite', defined at: File "minimal.py", line 82, in model = TreeRNN(8, 1, 1, degree=2) File "minimal.py", line 50, in init loop_vars=(self.time, node_emb, tf.zeros([1]))) File "/Library/Python/2.7/site-packages/tensorflow/python/ops/control_flow_ops.py", line 1681, in While back_prop=back_prop, swap_memory=swap_memory, name=name) File "/Library/Python/2.7/site-packages/tensorflow/python/ops/control_flow_ops.py", line 1671, in while_loop result = context.BuildLoop(cond, body, loop_vars) File "/Library/Python/2.7/site-packages/tensorflow/python/ops/control_flow_ops.py", line 1572, in BuildLoop body_result = body(*vars_for_body_with_tensor_arrays) File "minimal.py", line 43, in _recurrence new_node_emb = node_emb.write(children_and_parent[-1], parent_emb) File "/Library/Python/2.7/site-packages/tensorflow/python/ops/tensor_array_ops.py", line 200, in write name=name) File "/Library/Python/2.7/site-packages/tensorflow/python/ops/gen_data_flow_ops.py", line 875, in _tensor_array_write value=value, flow_in=flow_in, name=name) File "/Library/Python/2.7/site-packages/tensorflow/python/ops/op_def_library.py", line 694, in apply_op op_def=op_def)
import numpy as np
import tensorflow as tf
from tensorflow.python.ops import tensor_array_ops, control_flow_ops
class TreeRNN(object):
def __init__(self, num_emb, emb_dim, output_dim, degree=2, learning_rate=0.01):
self.num_emb = num_emb
self.emb_dim = emb_dim
self.output_dim = output_dim
self.degree= degree
self.learning_rate = tf.Variable(float(learning_rate), trainable=False)
self.embeddings = tf.Variable(self.init_matrix([self.num_emb, self.emb_dim]))
self.recursive_unit = self.create_recursive_unit()
self.W_out = tf.Variable(self.init_matrix([self.output_dim, self.emb_dim]))
self.b_out = tf.Variable(self.init_vector([self.output_dim]))
self.x = tf.placeholder(tf.int32, shape=[None]) # word indices
self.tree = tf.placeholder(tf.int32, shape=[None, self.degree + 1])
self.y = tf.placeholder(tf.float32, shape=[self.output_dim])
num_words, = tf.unpack(tf.shape(self.x), 1) # also num leaves
emb_x = tf.gather(self.embeddings, self.x)
node_emb = tensor_array_ops.TensorArray(
dtype=tf.float32, size=num_words - 1, dynamic_size=True,
clear_after_read=False)
node_emb = node_emb.unpack(emb_x)
num_nodes, _ = tf.unpack(tf.shape(self.tree), 2) # num internal nodes
tree_traversal = tensor_array_ops.TensorArray(
dtype=tf.int32, size=num_nodes)
tree_traversal = tree_traversal.unpack(self.tree)
def _recurrence(t, node_emb, _):
node_info = tree_traversal.read(t)
children_and_parent = tf.unpack(node_info, self.degree + 1)
child_emb = []
for i in xrange(self.degree):
child_emb.append(node_emb.read(children_and_parent[i]))
parent_emb = self.recursive_unit(child_emb)
new_node_emb = node_emb.write(children_and_parent[-1], parent_emb)
return t + 1, new_node_emb, parent_emb
self.time = tf.constant(0, dtype=tf.int32, name='time')
_, _, final_emb = control_flow_ops.While(
cond=lambda t, _1, _2: t < num_nodes,
body=_recurrence,
loop_vars=(self.time, node_emb, tf.zeros([1])))
self.final_state = final_emb
self.pred_y = self.activation(
tf.matmul(self.W_out, tf.reshape(self.final_state, [self.emb_dim, 1]))
+ self.b_out)
self.loss = self.loss_fn(self.y, self.pred_y)
self.params = tf.trainable_variables()
opt = tf.train.GradientDescentOptimizer(self.learning_rate)
self.grad = tf.gradients(self.loss, self.params)
self.updates = opt.apply_gradients(zip(self.grad, self.params))
def init_matrix(self, shape):
return tf.random_normal(shape, stddev=0.1)
def init_vector(self, shape):
return tf.zeros(shape)
def create_recursive_unit(self):
def unit(child_emb): # very simple
return 1 + child_emb[0] * child_emb[1]
return unit
def activation(self, inp):
return tf.sigmoid(inp)
def loss_fn(self, y, pred_y):
return tf.reduce_sum(tf.square(y - pred_y))
model = TreeRNN(8, 1, 1, degree=2)
sess = tf.Session()
sess.run(tf.initialize_all_variables())
root_emb = sess.run([model.final_state],
feed_dict={model.x: np.array([0, 1]), model.tree: np.array([[0, 1, 2]])})
emb, = sess.run([model.embeddings])
assert root_emb == 1 + emb[0] * emb[1]
out = sess.run([model.updates, model.loss],
feed_dict={model.x: np.array([0, 1]),
model.tree: np.array([[0, 1, 2]]),
model.y: np.array([0])})
それが財産をトリアージすることができるよう、多分githubの上の問題を提出、バグのような音? –
OK - 行います。その間に可能な回避策をご存知ですか?コードはwhileループ(反復を書き出す)なしで動作することができるので、私ができることの1つになるかもしれません。 –