2017-11-15 9 views
0

次のスクリプトで、CNNをmnistデータでトレーニングします。 データはdataset_dirにあり、すべての画像は.pngです。ラベルはイメージのフォルダです。 イメージパスとラベルを取得するジェネレータでCNNを訓練したいと思います。私は同じことをしようとしたが、その後、発電機にそれらを与えること(だけのパスを与えていない)の前に、すべてのイメージのロードをそのはるかに良い場合ケラス:ジェネレータの画像をロードします。ジェネレータの外の画像をロードします。

from __future__ import print_function 
import cv2, numpy as np 
import random, os, glob, time 
import keras 
from keras.models import Sequential 
from keras.layers import Dense, Dropout, Flatten 
from keras.layers import Conv2D, MaxPooling2D 

dataset_dir = '/home/viktor/PycharmProjects/Datasets/mnist_png/training/**/*.png' #lies alle bilder ein, egal ob train oder test set, wir splitten spaeter 
NUM_CLASSES = 10 
BATCH_SIZE = 128 
NUM_EPOCHS = 1 

def paths_and_labels(dataset_dir): 
    # dataset_dir = '/home/viktor/PycharmProjects/Datasets/mnist_png/training/**/*.png' # dataset_dir soll so aussehen, die zwei ** sind die klassennamen, ordner** ist also ordner_auto, ordner_person,... 
    paths = glob.glob(dataset_dir) 
    labels = [] 
    labels.append([os.path.basename(os.path.dirname(path)) for path in paths]) 
    labels = labels[0] #labels als string 

    class_names = list(set(labels)) # welche klassen gibt es, string 
    labels_classes = np.zeros([len(labels)], dtype=int) 
    for i, class_name in enumerate(class_names): 
     class_index = [j for j, x in enumerate(labels) if x == class_name] 
     labels_classes[class_index] = i # labels als int, um die klasse als string zu bekommen, mach = class_names[labels_class[nr]] 

    paths_and_labels_classes = list(zip(paths, labels_classes)) 
    random.shuffle(paths_and_labels_classes) 
    paths, labels_classes = zip(*paths_and_labels_classes) 

    train_paths = paths[0:int(0.6 * len(paths))] 
    train_labels = labels_classes[0:int(0.6 * len(labels_classes))] 

    test_paths = paths[int(0.8 * len(paths)):] 
    test_labels = labels_classes[int(0.8 * len(labels_classes)):] 

    val_paths = paths[int(0.6 * len(paths)):int(0.8 * len(paths))] 
    val_labels = labels_classes[int(0.6 * len(paths)):int(0.8 * len(paths))] 

    return train_paths, train_labels, test_paths, test_labels, val_paths, val_labels, class_names 

def generator(image_paths, labels, batch_size): 
    batch_features = np.zeros((batch_size, 28, 28, 1)) #oder 224,224,3 
    batch_labels = np.zeros((batch_size, NUM_CLASSES)) 
    while True: 
     for i in range(batch_size): 
      # choose random index of one image in image_paths 
      index = np.random.choice(len(image_paths), 1, replace=False)[0] 
      im = cv2.resize(cv2.imread(image_paths[index],0), (28, 28)).astype(np.float32) 
      im = np.expand_dims(im, axis=2) # weil es ein CHANNEL ist, spaeter muss das glaube ich weg 
      batch_features[i] = im 
      batch_labels[i][labels[index]] = 1 
     batch_features = batch_features.astype('float32') 
     batch_features /= 255 
     yield (batch_features, batch_labels) 

train_paths, train_labels, test_paths, test_labels, val_paths, val_labels, c_names = paths_and_labels(dataset_dir) 
input_shape = (28, 28, 1) 

model = Sequential() 
model.add(Conv2D(32, kernel_size=(3, 3), 
       activation='relu', 
       input_shape=input_shape)) 
model.add(Conv2D(64, (3, 3), activation='relu')) 
model.add(MaxPooling2D(pool_size=(2, 2))) 
model.add(Dropout(0.25)) 
model.add(Flatten()) 
model.add(Dense(128, activation='relu')) 
model.add(Dropout(0.5)) 
model.add(Dense(NUM_CLASSES, activation='softmax')) 

model.compile(loss=keras.losses.categorical_crossentropy, 
       optimizer=keras.optimizers.Adadelta(), 
       metrics=['accuracy']) 
model.summary() 
#my_callback = [keras.callbacks.TensorBoard(log_dir='/home/viktor/PycharmProjects/CNN_Object_Classification/logs2', histogram_freq=0, batch_size=32, write_graph=True, write_grads=False, write_images=False, embeddings_freq=0, embeddings_layer_names=None, embeddings_metadata=None)] 
start_time = time.clock() 
model.fit_generator(generator=generator(train_paths, train_labels, BATCH_SIZE), steps_per_epoch=int(len(train_paths)/BATCH_SIZE), epochs=NUM_EPOCHS)#,callbacks=my_callback) 
elapsed_time = time.clock() - start_time 
print('elapsed time: ', elapsed_time) 

結果は

1/281 [..............................] - ETA: 493s - loss: 16.0029 - acc: 0.1250 
    2/281 [..............................] - ETA: 255s - loss: 15.9644 - acc: 0.0820 
    3/281 [..............................] - ETA: 175s - loss: 39.3005 - acc: 0.0729 
    4/281 [..............................] - ETA: 135s - loss: 49.8761 - acc: 0.0742 
    5/281 [..............................] - ETA: 113s - loss: 55.0494 - acc: 0.0703 

277/281 [============================>.] - ETA: 0s - loss: 25.6649 - acc: 0.0346 
278/281 [============================>.] - ETA: 0s - loss: 25.6554 - acc: 0.0345 
279/281 [============================>.] - ETA: 0s - loss: 25.6460 - acc: 0.0343 
280/281 [============================>.] - ETA: 0s - loss: 25.6367 - acc: 0.0342 
281/281 [==============================] - 22s - loss: 25.6274 - acc: 0.0341 
elapsed time: 37.915506 

良いものではありません。

from __future__ import print_function 
import cv2, numpy as np 
import random, os, glob, time 
import keras 
from keras.models import Sequential 
from keras.layers import Dense, Dropout, Flatten 
from keras.layers import Conv2D, MaxPooling2D 

dataset_dir = '/home/viktor/PycharmProjects/Datasets/mnist_png/training/**/*.png' #lies alle bilder ein, egal ob train oder test set, wir splitten spaeter 
NUM_CLASSES = 10 
BATCH_SIZE = 128 
NUM_EPOCHS = 1 

def paths_and_labels(dataset_dir): 
    # dataset_dir = '/home/viktor/PycharmProjects/Datasets/mnist_png/training/**/*.png' # dataset_dir soll so aussehen, die zwei ** sind die klassennamen, ordner** ist also ordner_auto, ordner_person,... 
    paths = glob.glob(dataset_dir) 
    labels = [] 
    labels.append([os.path.basename(os.path.dirname(path)) for path in paths]) 
    labels = labels[0] #labels als string 

    class_names = list(set(labels)) # welche klassen gibt es, string 
    labels_classes = np.zeros([len(labels)], dtype=int) 
    for i, class_name in enumerate(class_names): 
     class_index = [j for j, x in enumerate(labels) if x == class_name] 
     labels_classes[class_index] = i # labels als int, um die klasse als string zu bekommen, mach = class_names[labels_class[nr]] 

    paths_and_labels_classes = list(zip(paths, labels_classes)) 
    random.shuffle(paths_and_labels_classes) 
    paths, labels_classes = zip(*paths_and_labels_classes) 

    train_paths = paths[0:int(0.6 * len(paths))] 
    train_labels = labels_classes[0:int(0.6 * len(labels_classes))] 

    test_paths = paths[int(0.8 * len(paths)):] 
    test_labels = labels_classes[int(0.8 * len(labels_classes)):] 

    val_paths = paths[int(0.6 * len(paths)):int(0.8 * len(paths))] 
    val_labels = labels_classes[int(0.6 * len(paths)):int(0.8 * len(paths))] 

    train_images = np.zeros((len(train_paths), 28, 28, 1)) # oder 224,224,3 
    train_labels_bin = np.zeros((len(train_paths), NUM_CLASSES)) 

    for i in range(len(train_paths)): 
     im = cv2.resize(cv2.imread(train_paths[i], 0), (28, 28)).astype(np.float32) 
     im = np.expand_dims(im, axis=2) # weil es ein CHANNEL ist, spaeter muss das glaube ich weg 
     train_images[i] = im 
     train_labels_bin[i][train_labels[i]] = 1 
    train_images = train_images.astype('float32') 
    train_images /= 255 

    return train_images, train_labels_bin 

def generator(images, labels, batch_size): 
    batch_features = np.zeros((batch_size, 28, 28, 1),np.float32) #oder 224,224,3 
    batch_labels = np.zeros((batch_size, NUM_CLASSES)) 
    while True: 
     for i in range(batch_size): 
      # choose random index of one image in image_paths 
      index = np.random.choice(len(images), 1, replace=False)[0] 
      batch_features[i] = images[index] 
      batch_labels[i] = labels[index] 
     yield (batch_features, batch_labels) 

train_images, train_labels_bin = paths_and_labels(dataset_dir) 
input_shape = (28, 28, 1) 

model = Sequential() 
model.add(Conv2D(32, kernel_size=(3, 3), 
       activation='relu', 
       input_shape=input_shape)) 
model.add(Conv2D(64, (3, 3), activation='relu')) 
model.add(MaxPooling2D(pool_size=(2, 2))) 
model.add(Dropout(0.25)) 
model.add(Flatten()) 
model.add(Dense(128, activation='relu')) 
model.add(Dropout(0.5)) 
model.add(Dense(NUM_CLASSES, activation='softmax')) 

model.compile(loss=keras.losses.categorical_crossentropy, 
       optimizer=keras.optimizers.Adadelta(), 
       metrics=['accuracy']) 
model.summary() 
#my_callback = [keras.callbacks.TensorBoard(log_dir='/home/viktor/PycharmProjects/CNN_Object_Classification/logs2', histogram_freq=0, batch_size=32, write_graph=True, write_grads=False, write_images=False, embeddings_freq=0, embeddings_layer_names=None, embeddings_metadata=None)] 
start_time = time.clock() 
model.fit_generator(generator=generator(train_images, train_labels_bin, BATCH_SIZE), steps_per_epoch=int(len(train_images)/BATCH_SIZE), epochs=NUM_EPOCHS)#,callbacks=my_callback) 
elapsed_time = time.clock() - start_time 
print('elapsed time: ', elapsed_time) 

結果:発電機の出力が同じであれば

1/281 [..............................] - ETA: 461s - loss: 2.3143 - acc: 0.0859 
    2/281 [..............................] - ETA: 238s - loss: 2.3017 - acc: 0.1133 
    3/281 [..............................] - ETA: 164s - loss: 2.2830 - acc: 0.1536 
    4/281 [..............................] - ETA: 128s - loss: 2.2632 - acc: 0.1953 
    5/281 [..............................] - ETA: 106s - loss: 2.2439 - acc: 0.2219 

277/281 [============================>.] - ETA: 0s - loss: 0.4778 - acc: 0.8619 
278/281 [============================>.] - ETA: 0s - loss: 0.4766 - acc: 0.8623 
279/281 [============================>.] - ETA: 0s - loss: 0.4759 - acc: 0.8626 
280/281 [============================>.] - ETA: 0s - loss: 0.4747 - acc: 0.8629 
281/281 [==============================] - 23s - loss: 0.4735 - acc: 0.8632 
elapsed time: 37.089643 

は、私はすでに確認しました。それはそうです。 私の質問は、最初のスクリプトでなぜその損失が悪いのですか?すべてが同じです。唯一の違いは、最初のスクリプトでは、ジェネレータ関数のデータをロードするということです。 2番目のスクリプトでは、ジェネレータ関数の外部にデータをロードします。

答えて

0

私は私のミス

FKTがあるべき権利ジェネレータが見つかりました:

def generator(image_paths, labels, batch_size): 

    while True: 
     batch_features = np.zeros((batch_size, 28, 28, 1)) # oder 224,224,3 
     batch_labels = np.zeros((batch_size, NUM_CLASSES)) 
     for i in range(batch_size): 
      # choose random index of one image in image_paths 
      index = np.random.choice(len(image_paths), 1, replace=False)[0] 
      im = cv2.resize(cv2.imread(image_paths[index],0), (28, 28)).astype(np.float32) 
      im = np.expand_dims(im, axis=2) # weil es ein CHANNEL ist, spaeter muss das glaube ich weg 
      batch_features[i] = im 
      batch_labels[i][labels[index]] = 1 
     batch_features = batch_features.astype('float32') 
     batch_features /= 255 
     yield (batch_features, batch_labels) 
関連する問題