2017-03-03 14 views
0

TensorFlowのrnnのtutorialに従っていますが、入力形式に関する質問があります。ndarrayを分割すると予期せぬ結果が出る(TensorFlow RNNチュートリアル)

彼らはraw_x(1つのホットベクトル)をとり、基本的に最初に長さ200の部分(batch_size)で切り取ってdata_xを形成しています。それはいいです。その後、彼らは、さらに有する長5(num_step、またはグラフ幅)の部分にデータ_xを切断

:私はデータを見ると

for i in range(epoch_size): 
    x = data_x[:, i * num_steps:(i + 1) * num_steps] 
    y = data_y[:, i * num_steps:(i + 1) * num_steps] 
    yield (x, y) 

ただし、Xのスライスは、データ_xと一致しません。最初のものはそうですが、それらは分岐します。

私は上記のコードを誤解していますか?私はxがどのように作成されているのか、それがどのように見えるのかを理解したいと思います。

enter image description here

enter image description here

私は2番目の項目は0 1 0 1 0

であることを期待していた。また、私はこのことから、あなたは完全にデータを通過する際エポックがあると思いました彼らは1000の部分(エポックサイズ)のデータを分割したようですか?

助けがあれば、これは私の完全なコードです。私はxで何が起こっているのか把握しようとしています。 48行目:

import numpy as np 
import tensorflow as tf 
# %matplotlib inline 
import matplotlib.pyplot as plt 

# Global config variables 
num_steps = 5 # number of truncated backprop steps ('n' in the discussion above) 
batch_size = 200 
num_classes = 2 
state_size = 4 
learning_rate = 0.1 

def gen_data(size=1000000): 
    print('generating data'); 
    X = np.array(np.random.choice(2, size=(size,))) 
    Y = [] 
    for i in range(size): 
     threshold = 0.5 
     if X[i-3] == 1: 
      threshold += 0.5 
     if X[i-8] == 1: 
      threshold -= 0.25 
     if np.random.rand() > threshold: 
      Y.append(0) 
     else: 
      Y.append(1) 
    return X, np.array(Y) 

# adapted from https://github.com/tensorflow/tensorflow/blob/master/tensorflow/models/rnn/ptb/reader.py 
def gen_batch(raw_data, batch_size, num_steps): 
    print('generating batches'); 
    raw_x, raw_y = raw_data 
    data_length = len(raw_x) 

    # partition raw data into batches and stack them vertically in a data matrix 
    batch_partition_length = data_length // batch_size 
    data_x = np.zeros([batch_size, batch_partition_length], dtype=np.int32) 
    data_y = np.zeros([batch_size, batch_partition_length], dtype=np.int32) 
    for i in range(batch_size): 
     data_x[i] = raw_x[batch_partition_length * i:batch_partition_length * (i + 1)] 
     data_y[i] = raw_y[batch_partition_length * i:batch_partition_length * (i + 1)] 
    # further divide batch partitions into num_steps for truncated backprop 
    epoch_size = batch_partition_length // num_steps 

    for i in range(epoch_size): 
     x = data_x[:, i * num_steps:(i + 1) * num_steps] 
     y = data_y[:, i * num_steps:(i + 1) * num_steps] 
     yield (x, y) 

def gen_epochs(n, num_steps): 
    for i in range(n): 
     yield gen_batch(gen_data(), batch_size, num_steps) 


""" 
Placeholders 
""" 

x = tf.placeholder(tf.int32, [batch_size, num_steps], name='input_placeholder') 
y = tf.placeholder(tf.int32, [batch_size, num_steps], name='labels_placeholder') 
init_state = tf.zeros([batch_size, state_size]) 

""" 
RNN Inputs 
""" 

# Turn our x placeholder into a list of one-hot tensors: 
# rnn_inputs is a list of num_steps tensors with shape [batch_size, num_classes] 
x_one_hot = tf.one_hot(x, num_classes) 
rnn_inputs = tf.unstack(x_one_hot, axis=1) 
""" 
Definition of rnn_cell 

This is very similar to the __call__ method on Tensorflow's BasicRNNCell. See: 
https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/ops/rnn_cell.py 
""" 
with tf.variable_scope('rnn_cell'): 
    W = tf.get_variable('W', [num_classes + state_size, state_size]) 
    b = tf.get_variable('b', [state_size], initializer=tf.constant_initializer(0.0)) 

def rnn_cell(rnn_input, state): 
    with tf.variable_scope('rnn_cell', reuse=True): 
     W = tf.get_variable('W', [num_classes + state_size, state_size]) 
     b = tf.get_variable('b', [state_size], initializer=tf.constant_initializer(0.0)) 
    return tf.tanh(tf.matmul(tf.concat(axis=1, values=[rnn_input, state]), W) + b) 
""" 
Adding rnn_cells to graph 

This is a simplified version of the "rnn" function from Tensorflow's api. See: 
https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/ops/rnn.py 
""" 
state = init_state 
rnn_outputs = [] 
for rnn_input in rnn_inputs: 
    state = rnn_cell(rnn_input, state) 
    rnn_outputs.append(state) 
final_state = rnn_outputs[-1] 
""" 
Predictions, loss, training step 

Losses and total_loss are simlar to the "sequence_loss_by_example" and "sequence_loss" 
functions, respectively, from Tensorflow's api. See: 
https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/ops/seq2seq.py 
""" 

#logits and predictions 
with tf.variable_scope('softmax'): 
    W = tf.get_variable('W', [state_size, num_classes]) 
    b = tf.get_variable('b', [num_classes], initializer=tf.constant_initializer(0.0)) 
logits = [tf.matmul(rnn_output, W) + b for rnn_output in rnn_outputs] 
predictions = [tf.nn.softmax(logit) for logit in logits] 

# Turn our y placeholder into a list labels 
y_as_list = [tf.squeeze(i, axis=[1]) for i in tf.split(axis=1, num_or_size_splits=num_steps, value=y)] 

#losses and train_step 
losses = [tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logit,labels=label) for \ 
      logit, label in zip(logits, y_as_list)] 
total_loss = tf.reduce_mean(losses) 
train_step = tf.train.AdagradOptimizer(learning_rate).minimize(total_loss) 
""" 
Function to train the network 
""" 

def train_network(num_epochs, num_steps, state_size=4, verbose=True): 
    with tf.Session() as sess: 
     sess.run(tf.global_variables_initializer()) 
     training_losses = [] 
     for idx, epoch in enumerate(gen_epochs(num_epochs, num_steps)): 
      training_loss = 0 
      training_state = np.zeros((batch_size, state_size)) 
      if verbose: 
       print("\nEPOCH", idx) 
      for step, (X, Y) in enumerate(epoch): 
       tr_losses, training_loss_, training_state, _ = \ 
        sess.run([losses, 
           total_loss, 
           final_state, 
           train_step], 
            feed_dict={x:X, y:Y, init_state:training_state}) 
       training_loss += training_loss_ 
       if step % 100 == 0 and step > 0: 
        if verbose: 
         print("Average loss at step", step, 
           "for last 250 steps:", training_loss/100) 
        training_losses.append(training_loss/100) 
        training_loss = 0 

    return training_losses 

training_losses = train_network(1,num_steps) 
plt.plot(training_losses) 

答えて

0

実際にバッチが転置されたようです。

したがって、x-matrixの最初の要素(200 x 5)は、x_rawの最初の5つの要素に適合します。

次のイテレーションでのみ、x_rawの次の5-10要素がxの最初の要素に(再び)入ります。

関連する問題