Tensorflowを使用して単純なニューラルネットワークをトレーニングしようとしています。私はすでにmnistデータセットで問題なく同じようなネットを実行しましたが、コードを自分のデータに適用してGPUコンピュータで実行しようとすると、メモリが枯渇してしまいます。 私はすでに試した: - BATCH_SIZE の削減 - いくつかのエポックに トレーニング - クラス の一部をコメントアウト - ここではいくつかの画像(10枚の画像の代わりに〜75K)アマゾンでP2xlarge GPUを実行してもメモリエラーが発生する


from __future__ import absolute_import 
from __future__ import division 
from __future__ import print_function 
import math 
import numpy as np 
import random 
import tensorflow as tf 
import gzip 
import os 
import random 
import glob 
import csv 
import numpy as np 
from six.moves import urllib 
from six.moves import xrange # pylint: disable=redefined-builtin 

class DataSet(object): 
    def __init__(self, images, labels, fake_data=False, one_hot=False): 
    """Construct a DataSet. one_hot arg is used only if fake_data is true.""" 
    if fake_data: 
     self._num_examples = 10000 
     self.one_hot = one_hot 
     assert images.shape[0] == labels.shape[0], (
      'images.shape: %s labels.shape: %s' % (images.shape, 
     self._num_examples = images.shape[0] 
     # This part is commented out because I kept getting memory exhaustion when using the big dataset ~75k images (224,224,3) 
     # Convert shape from [num examples, rows, columns, depth] 
     # to [num examples, rows*columns] (assuming depth == 1) 
     # assert images.shape[3] == 3 
     # images = images.reshape(images.shape[0], 
     #       images.shape[1] * images.shape[2] * images.shape[3]) 
     # # Convert from [0, 255] -> [0.0, 1.0]. 
     # images = images.astype(np.float32) 
     # images = np.multiply(images, 1.0/255.0) 
    self._images = images 
    self._labels = labels 
    self._epochs_completed = 0 
    self._index_in_epoch = 0 
    def images(self): 
    return self._images 
    def labels(self): 
    return self._labels 
    def num_examples(self): 
    return self._num_examples 
    def epochs_completed(self): 
    return self._epochs_completed 
    def next_batch(self, batch_size, fake_data=False): 
    """Return the next `batch_size` examples from this data set.""" 
    if fake_data: 
     fake_image = [1] * 784 
     if self.one_hot: 
     fake_label = [1] + [0] * 9 
     fake_label = 0 
     return [fake_image for _ in xrange(batch_size)], [ 
      fake_label for _ in xrange(batch_size)] 
    start = self._index_in_epoch 
    self._index_in_epoch += batch_size 
    if self._index_in_epoch > self._num_examples: 
     # Finished epoch 
     self._epochs_completed += 1 
     # Shuffle the data 
     perm = np.arange(self._num_examples) 
     self._images = self._images[perm] 
     self._labels = self._labels[perm] 
     # Start next epoch 
     start = 0 
     self._index_in_epoch = batch_size 
     assert batch_size <= self._num_examples 
    end = self._index_in_epoch 
    return self._images[start:end], self._labels[start:end] 

def read_data_sets(train_data, train_labels, test_data, test_labels,fake_data=False, one_hot=False): 
    class DataSets(object): 
    data_sets = DataSets() 
    if fake_data: 
    data_sets.train = DataSet([], [], fake_data=True, one_hot=one_hot) 
    data_sets.validation = DataSet([], [], fake_data=True, one_hot=one_hot) 
    data_sets.test = DataSet([], [], fake_data=True, one_hot=one_hot) 
    return data_sets 
    data_sets.train = DataSet(train_data, train_labels) 
    data_sets.test = DataSet(test_data, test_labels) 
    return data_sets 

def randomize(a, b): 
    assert len(a) == len(b) 
    # Generate the permutation index array. 
    permutation = np.random.permutation(a.shape[0]) 
    # Shuffle the arrays by giving the permutation in the square brackets. 
    shuffled_a = a[permutation] 
    shuffled_b = b[permutation] 
    return shuffled_a, shuffled_b 

training_images = np.load('data_small/training_images.npy') 
training_labels = np.load('data_small/training_labels.npy') 
test_images = np.load('data_small/test_images.npy') 
test_labels = np.load('data_small/test_labels.npy') 

training_images, training_labels = randomize(training_images, training_labels) 

avec = read_data_sets(training_images, training_labels, test_images, test_labels) 

batch_size = 1 #53 
print ('The batch size is: ',batch_size) 

images = tf.placeholder(tf.float32, [None, 224*224*3]) 
# Kept getting a error when I initially set placeholder as [-1,224,224,3] 
images = tf.reshape(images, [-1,224,224,3]) 
labels = tf.placeholder(tf.float32, [None, 1]) 
keep_rate = 0.8 
keep_prob = tf.placeholder(tf.float32) 

def conv2d(x, W): 
    return tf.nn.conv2d(x, W, strides=[1,1,1,1], padding='SAME') 

def maxpool2d(x): 
    #      size of window   movement of window 
    return tf.nn.max_pool(x, ksize=[1,2,2,1], strides=[1,2,2,1], padding='SAME') 

weights = {'W_conv1':tf.Variable(tf.truncated_normal([3,3,3,64], stddev=1e-4)), 
      'W_conv2':tf.Variable(tf.truncated_normal([3,3,64,64], stddev=1e-4)), 
      'W_conv3':tf.Variable(tf.truncated_normal([3,3,64,128], stddev=1e-4)), 
      'W_conv4':tf.Variable(tf.truncated_normal([3,3,128,128], stddev=1e-4)), 
      'W_conv5':tf.Variable(tf.truncated_normal([3,3,128,256], stddev=1e-4)), 
      'W_conv6':tf.Variable(tf.truncated_normal([3,3,256,256], stddev=1e-4)), 
      'W_conv7':tf.Variable(tf.truncated_normal([3,3,256,256], stddev=1e-4)), 
      'W_fc':tf.Variable(tf.truncated_normal([28*28*256,4096], stddev=1e-4)), 
      'W_fc2':tf.Variable(tf.truncated_normal([4096,2622], stddev=1e-4)), 
      'reg':tf.Variable(tf.truncated_normal([2622,1], stddev=1e-4))} 

biases = {'b_conv1':tf.Variable(tf.constant(0.1, shape=[64])), 
      'b_conv2':tf.Variable(tf.constant(0.1, shape=[64])), 
      'b_conv3':tf.Variable(tf.constant(0.1, shape=[128])), 
      'b_conv4':tf.Variable(tf.constant(0.1, shape=[128])), 
      'b_conv5':tf.Variable(tf.constant(0.1, shape=[256])), 
      'b_conv6':tf.Variable(tf.constant(0.1, shape=[256])), 
      'b_conv7':tf.Variable(tf.constant(0.1, shape=[256])), 
      'b_fc':tf.Variable(tf.constant(0.1, shape=[4096])), 
      'b_fc2':tf.Variable(tf.constant(0.1, shape=[2622])), 
      'b_reg':tf.Variable(tf.constant(0.1, shape=[1]))} 

conv1 = tf.nn.relu(conv2d(images, weights['W_conv1']) + biases['b_conv1']) 

conv1 = tf.Print(conv1, [conv1], "conv1: ") 
conv2 = tf.nn.relu(conv2d(conv1, weights['W_conv2']) + biases['b_conv2']) 
conv2 = maxpool2d(conv2) 
conv2 = tf.Print(conv2, [conv2], "conv2: ") 

conv3 = tf.nn.relu(conv2d(conv2, weights['W_conv3']) + biases['b_conv3']) 
conv3 = tf.Print(conv3, [conv3], "conv3: ") 

conv4 = tf.nn.relu(conv2d(conv3, weights['W_conv4']) + biases['b_conv4']) 
conv4 = maxpool2d(conv4) 
conv4 = tf.Print(conv4, [conv4], "conv4: ") 

conv5 = tf.nn.relu(conv2d(conv4, weights['W_conv5']) + biases['b_conv5']) 
conv5 = tf.Print(conv5, [conv5], "conv5: ") 

conv6 = tf.nn.relu(conv2d(conv5, weights['W_conv6']) + biases['b_conv6']) 
conv6 = tf.Print(conv6, [conv6], "conv6: ") 

conv7 = tf.nn.relu(conv2d(conv6, weights['W_conv7']) + biases['b_conv7']) 
conv7 = maxpool2d(conv7) 
conv7 = tf.Print(conv7, [conv7], "conv7: ") 

fc = tf.reshape(conv7,[-1, 28*28*256]) 
fc = tf.nn.relu(tf.matmul(fc, weights['W_fc'])+biases['b_fc']) 
fc = tf.nn.dropout(fc, keep_rate) 

fc2 = tf.matmul(fc, weights['W_fc2'])+biases['b_fc2'] 
fc2 = tf.nn.dropout(fc2, keep_rate) 

pred = tf.add(tf.matmul(fc2, weights['reg']), biases['b_reg']) 

loss = tf.reduce_mean(tf.square(pred-labels)) 
opt = tf.train.RMSPropOptimizer(0.001) 
train_op = opt.minimize(loss) 
hm_epochs = 5 
print ('Total epochs: ', hm_epochs) 
saver = tf.train.Saver() 
init_op = tf.global_variables_initializer() 

with tf.Session() as sess: 
    print('Begin session') 
    sess.run(init_op) #initializea all variables 
    for epoch in range(hm_epochs): 
    print('Begin epoch:',epoch) 
    epoch_loss = 0 
    for _ in range (int(avec.train.num_examples/batch_size)): 
     #batcha myndum og labels 
     np_images, np_labels = avec.train.next_batch(batch_size) 
     print('np_images shape:',np_images.shape) 
     print('np_labels shape:',np_labels.shape) 
     #set batchinn inn i feed_dictid mitt 
     feed = {images: np_images, labels: np_labels} 
     # the training step, run the loss, pred and train_op and the data is fed with the feed_dict 
     np_loss, np_pred, _ = sess.run([loss, pred, train_op], feed_dict = feed) 

     epoch_loss += np_loss 
    print ('Epoch', epoch+1, 'completed out of', hm_epochs, 'loss: ', epoch_loss/(avec.train.num_examples/batch_size)) 
    #save_path = saver.save(sess, "model1.ckpt") 
    #print("Model saved in file: %s" % save_path) 


W tensorflow/core/common_runtime/bfc_allocator.cc:274] *****************************************************************************************xxxxxxxxxxx 
W tensorflow/core/common_runtime/bfc_allocator.cc:275] Ran out of memory trying to allocate 1.0KiB. See logs for memory state. 
W tensorflow/core/framework/op_kernel.cc:993] Resource exhausted: OOM when allocating tensor with shape[256] 
無効にGPUが(CUDA表示デバイス)、バッチサイズ1とCPU上で実行し、把握は、珍しい、どのテンソルが大きすぎるかを知るためにメモリをプロファイリングする –




INPUT: [224x224x3] memory: 224*224*3=150K weights: 0 
CONV1: [224x224x64] memory: 224*224*64=3.2M weights: (3*3*3)*64 = 1,728 
CONV2: [224x224x64] memory: 224*224*64=3.2M weights: (3*3*64)*64 = 36,864 
POOL2: [112x112x64] memory: 112*112*64=800K weights: 0 

CONV3: [112x112x128] memory: 112*112*128=1.6M weights: (3*3*64)*128 = 73,728 
CONV4: [112x112x128] memory: 112*112*128=1.6M weights: (3*3*128)*128 = 147,456 
POOL4: [56x56x128] memory: 56*56*128=400K  weights: 0 

CONV5: [56x56x256] memory: 56*56*256=800K weights: (3*3*128)*256 = 294,912 
CONV6: [56x56x256] memory: 56*56*256=800K weights: (3*3*256)*256 = 589,824 
CONV7: [56x56x256] memory: 56*56*256=800K weights: (3*3*256)*256 = 589,824 
POOL7: [28x28x256] memory: 28*28*256=200K weights: 0 

FC1: [1x1x4096]  memory: 4096 weights: 28*28*256*4096 = 822,083,584 
FC2: [1x1x2622]  memory: 2622 weights: 4096*2622 = 10,739,712 

TOTAL memory: 14M * 4 bytes ~= 60MB/image (only forward) 
TOTAL memory: 2 * 14M * 4 bytes ~= 120MB/image (forward + backward) 

Batch_Size = 128 
TOTAL memory: 128 * 14M * 4 bytes ~= 8GB (batch-forward) 




これは私が思ったことです、私はすべてのメモリを使い果たしたいくつかのチャンクにメモリを割り当てているようです。コンソールから投稿にエラーを追加しました。 このエラーは、すべてのレイヤーで繰り返されます。 –


サイズは以下のとおりです。 data_small/training_images.npy - 1.1メガバイト data_small/testing_images.npy - 452B 最終的に私が使用したい: データ/ training_images.npy - 11.28ギガバイト データ/ testing_images.npy - 5.47ギガバイト –


ヤロスラフの提案を試しましたか? – hars
