2017-08-07 5 views
1
wordsList = np.load('training_data/wordsList.npy') 
wordsList = wordsList.tolist() #Originally loaded as numpy array 
wordsList = [word.decode('UTF-8') for word in wordsList] #Encode words as UTF-8 
wordVectors = np.load('training_data/wordVectors.npy') 

は、トレーニングと試験方法は、ここで私は与えられた文のための1または0の形で出力を予測しようとしていますテンソルフローでpython3を使用してLSTMモデルのセンチメントを予測するにはどうすればよいですか?

def getTrainBatch(): 
    labels = [] 
    arr = np.zeros([batchSize, maxSeqLength]) 
    for i in range(batchSize): 
     if (i % 2 == 0): 
      num = randint(1,11499) 
      labels.append([1,0]) 
     else: 
      num = randint(13499,24999) 
      labels.append([0,1]) 
     arr[i] = ids[num-1:num] 
    return arr, labels 

def getTestBatch(): 
    labels = [] 
    arr = np.zeros([batchSize, maxSeqLength]) 
    for i in range(batchSize): 
     num = randint(11499,13499) 
     if (num <= 12499): 
      labels.append([1,0]) 
     else: 
      labels.append([0,1]) 
     arr[i] = ids[num-1:num] 
    return arr, labels 

with tf.device('/gpu:0'): 
    batchSize = 24 
    lstmUnits = 64 
    numClasses = 2 
    iterations = 100000 

    tf.reset_default_graph() 

    labels = tf.placeholder(tf.float32, [batchSize, numClasses]) 
    input_data = tf.placeholder(tf.int32, [batchSize, maxSeqLength]) 

    data = tf.Variable(tf.zeros([batchSize, maxSeqLength, numDimensions]), dtype=tf.float32) 
    data = tf.nn.embedding_lookup(wordVectors, input_data) 

    lstmCell = tf.contrib.rnn.BasicLSTMCell(lstmUnits) 
    lstmCell = tf.contrib.rnn.DropoutWrapper(cell=lstmCell, output_keep_prob=0.75) 
    value, _ = tf.nn.dynamic_rnn(lstmCell, data, dtype=tf.float32) 

with tf.device('/gpu:0'): 
    weight = tf.Variable(tf.truncated_normal([lstmUnits, numClasses])) 
    bias = tf.Variable(tf.constant(0.1, shape=[numClasses])) 
    value = tf.transpose(value, [1, 0, 2]) 
    last = tf.gather(value, int(value.get_shape()[0]) - 1) 
    prediction = (tf.matmul(last, weight) + bias) 

correctPred = tf.equal(tf.argmax(prediction,1), tf.argmax(labels,1)) 
accuracy = tf.reduce_mean(tf.cast(correctPred, tf.float32)) 

loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=prediction, labels=labels)) 
optimizer = tf.train.AdamOptimizer().minimize(loss) 

sess = tf.InteractiveSession() 
saver = tf.train.Saver() 
sess.run(tf.global_variables_initializer()) 

with tf.device('/gpu:0'): 
    for i in range(iterations): 
     nextBatch, nextBatchLabels = getTrainBatch(); 
     sess.run(optimizer, {input_data: nextBatch, labels: nextBatchLabels}) 

iterations = 10 
for i in range(iterations): 
    nextBatch, nextBatchLabels = getTestBatch(); 
    sess.run(accuracy, {input_data: nextBatch, labels: nextBatchLabels}) 

変数

with tf.device('/gpu:0'): 
    ids = np.zeros((numFiles, maxSeqLength), dtype='int32') 
    fileCounter = 0 
    for pf in positiveFiles: 
     with open(pf, "r") as f: 
      indexCounter = 0 
      line=f.readline() 
      cleanedLine = cleanSentences(line) 
      split = cleanedLine.split() 
      for word in split: 
       try: 
        ids[fileCounter][indexCounter] = wordsList.index(word) 
       except ValueError: 
        ids[fileCounter][indexCounter] = 399999 #Vector for unkown words 
       #print('value :' + str(ids)) 
       indexCounter = indexCounter + 1 
       if indexCounter >= maxSeqLength: 
        break 
      fileCounter = fileCounter + 1 

    for nf in negativeFiles: 
     with open(nf, "r") as f: 
      indexCounter = 0 
      line=f.readline() 
      cleanedLine = cleanSentences(line) 
      split = cleanedLine.split() 
      for word in split: 
       try: 
        ids[fileCounter][indexCounter] = wordsList.index(word) 
       except ValueError: 
        ids[fileCounter][indexCounter] = 399999 #Vector for unkown words 
       # print('value :' + str(ids)) 
       indexCounter = indexCounter + 1 
       if indexCounter >= maxSeqLength: 
        break 
      fileCounter = fileCounter + 1 
    #Pass into embedding function and see if it evaluates. 

np.save('idsMatrix', ids) 

batchSize = 24 
にいくつかのpositiveFilesとnegativeFilesをロード。 このファイルをチェックポイントからこの読み込み後に...どのように私は文が正(1)または負(0)であるかをテストすると思います。

new_saver = tf.train.import_meta_graph('models/pretrained....') 
new_saver.restore(sess, tf.train.latest_checkpoint('models/./')) 

助けてください。

答えて

0

入力と出力のネーミングを使用して、グラフからテンソルを取得して予測を行います。

new_saver = tf.train.import_meta_graph('/path/to/model.meta') 
new_saver.restore(sess, '/path/to/model') 
with tf.Session() as sess: 
    g = tf.get_default_graph() 
    inputs = g.get_tensor_by_name('inputs:0') 
    prediction = g.get_tensor_by_name('prediction:0') 
    prediction_ = sess.run(prediction, {inputs: your_inputs}) 
をmodel.metaし、モデルにはこちらを相対/絶対パスを使用します。私は予測が復元するため

... 
input_data = tf.placeholder(tf.int32, [batchSize, maxSeqLength], name='inputs') 
... 
prediction = (tf.matmul(last, weight) + bias) 
# you may use softmax if you want probabilities for prediction, but not for calculating the loss 
# prediction = tf.nn.softmax(prediction) 
prediction = tf.identity(prediction, name='prediction') 
... 
with tf.device('/gpu:0'): 
    for i in range(iterations): 
     nextBatch, nextBatchLabels = getTrainBatch(); 
     sess.run(optimizer, {input_data: nextBatch, labels: nextBatchLabels} 
    saver.save(sess, 'model') 

コードを軌道に乗るために、いくつかの必要な変更や追加のコードを示唆しています

関連する問題