おかげヤロスラフ:
shape = (6000, 6000)
with tf.device("/gpu:0"):
random_matrix_gpu = tf.zeros(shape)
result_op_gpu = tf.reduce_sum(tf.matmul(random_matrix_gpu,tf.transpose(random_matrix_gpu)))
with tf.device("/cpu:0"):
random_matrix_cpu = tf.zeros(shape)
result_op_cpu =
tf.reduce_sum(tf.matmul(random_matrix_cpu,tf.transpose(random_matrix_cpu)))
config = tf.ConfigProto(graph_options=tf.GraphOptions(optimizer_options=tf.OptimizerOptions(opt_level=tf.OptimizerOptions.L0)))
sess = tf.Session(config=config)
def profile(op, msg):
start_time = time.time()
sess.run(op)
print(msg, time.time()-start_time)
profile(result_op_cpu, "cpu1")
profile(result_op_cpu, "cpu2")
profile(result_op_gpu, "gpu1")
profile(result_op_gpu, "gpu2")
は、私はこれを参照してください!誰かが興味を持っている場合に備えて、私は実行したコードと結果を提供します。コードを試してみたら、数分間お待ちください。
コード:
import sys
import numpy as np
import tensorflow as tf
from datetime import datetime
device_names = ["/cpu:0", "/gpu:0", "/gpu:1"]
shapes = [(3000, 3000), (6000, 6000), (9000, 9000), (12000, 12000)]
messages = ["RESULTS\n"]
def timing_run(matrix_type, config_name, warmup):
configs = {"simple": tf.ConfigProto(log_device_placement=False),
"optim": tf.ConfigProto(graph_options=tf.GraphOptions(optimizer_options=tf.OptimizerOptions(opt_level=tf.OptimizerOptions.L0)))}
messages.append("matrix={}+config={}+warmup={}".format(matrix_type, config_name, warmup))
tf.set_random_seed(1234)
for device_name in device_names:
for shape in shapes:
with tf.device(device_name):
if matrix_type == "random_uniform":
random_matrix = tf.random_uniform(shape=shape,
minval=0,
maxval=1,
seed=1234)
else:
random_matrix = tf.zeros(shape)
result_op = tf.reduce_sum(tf.matmul(random_matrix,tf.transpose(random_matrix)))
session = tf.Session(config=configs[config_name])
result1, result2 = -1.0, -1.0
# warm up
start_time1 = datetime.now()
result1 = session.run(result_op)
time_diff1 = datetime.now() - start_time1
messages.append((device_name,
"shape = {}".format(shape),
"times = {} seconds".format(time_diff1.total_seconds()),
"result = {}".format(result1)))
if warmup:
# warmed up - runs if warmup=True.
start_time2 = datetime.now()
result2 = session.run(result_op)
time_diff2 = datetime.now() - start_time2
messages.append((device_name,
"shape = {}".format(shape),
"times = {} seconds".format(time_diff2.total_seconds()),
"result = {}".format(result1),
"*****WARMED UP*****"))
session.close()
messages.append("++++++++++++++++++++++++++++++++++++++++++++++++++++")
messages.append("\n\n")
if __name__ == "__main__":
timing_run(matrix_type="random_uniform", config_name="simple", warmup=False)
timing_run(matrix_type="random_uniform", config_name="simple", warmup=True)
timing_run(matrix_type="random_uniform", config_name="optim", warmup=False)
timing_run(matrix_type="zeros", config_name="simple", warmup=False)
timing_run(matrix_type="zeros", config_name="simple", warmup=True)
timing_run(matrix_type="zeros", config_name="optim", warmup=False)
# print timings
for e in messages:
print(e)
概要:あなたの提案ヤロスラフため
matrix=random_uniform+config=simple+warmup=False
('/cpu:0', 'shape = (3000, 3000)', 'times = 0.428429 seconds', 'result = 6754431488.0')
('/cpu:0', 'shape = (6000, 6000)', 'times = 2.806464 seconds', 'result = 54023852032.0')
('/cpu:0', 'shape = (9000, 9000)', 'times = 9.36232 seconds', 'result = 184425938944.0')
('/cpu:0', 'shape = (12000, 12000)', 'times = 22.376751 seconds', 'result = 439655661568.0')
++++++++++++++++++++++++++++++++++++++++++++++++++++
('/gpu:0', 'shape = (3000, 3000)', 'times = 0.392957 seconds', 'result = 6754390016.0')
('/gpu:0', 'shape = (6000, 6000)', 'times = 0.082889 seconds', 'result = 54006833152.0')
('/gpu:0', 'shape = (9000, 9000)', 'times = 0.221844 seconds', 'result = 182251814912.0')
('/gpu:0', 'shape = (12000, 12000)', 'times = 0.438476 seconds', 'result = 431995879424.0')
++++++++++++++++++++++++++++++++++++++++++++++++++++
('/gpu:1', 'shape = (3000, 3000)', 'times = 0.483864 seconds', 'result = 6754393088.0')
('/gpu:1', 'shape = (6000, 6000)', 'times = 0.097571 seconds', 'result = 54006833152.0')
('/gpu:1', 'shape = (9000, 9000)', 'times = 0.250176 seconds', 'result = 182252044288.0')
('/gpu:1', 'shape = (12000, 12000)', 'times = 0.473314 seconds', 'result = 431996567552.0')
++++++++++++++++++++++++++++++++++++++++++++++++++++
matrix=random_uniform+config=simple+warmup=True
('/cpu:0', 'shape = (3000, 3000)', 'times = 0.475717 seconds', 'result = 6754431488.0')
('/cpu:0', 'shape = (3000, 3000)', 'times = 0.379542 seconds', 'result = 6754431488.0', '*****WARMED UP*****')
('/cpu:0', 'shape = (6000, 6000)', 'times = 2.856803 seconds', 'result = 54023852032.0')
('/cpu:0', 'shape = (6000, 6000)', 'times = 2.798967 seconds', 'result = 54023852032.0', '*****WARMED UP*****')
('/cpu:0', 'shape = (9000, 9000)', 'times = 9.447787 seconds', 'result = 184425938944.0')
('/cpu:0', 'shape = (9000, 9000)', 'times = 9.385646 seconds', 'result = 184425938944.0', '*****WARMED UP*****')
('/cpu:0', 'shape = (12000, 12000)', 'times = 21.752967 seconds', 'result = 439655661568.0')
('/cpu:0', 'shape = (12000, 12000)', 'times = 21.832136 seconds', 'result = 439655661568.0', '*****WARMED UP*****')
++++++++++++++++++++++++++++++++++++++++++++++++++++
('/gpu:0', 'shape = (3000, 3000)', 'times = 0.067066 seconds', 'result = 6754394624.0')
('/gpu:0', 'shape = (3000, 3000)', 'times = 0.008072 seconds', 'result = 6754394624.0', '*****WARMED UP*****')
('/gpu:0', 'shape = (6000, 6000)', 'times = 0.123611 seconds', 'result = 54006833152.0')
('/gpu:0', 'shape = (6000, 6000)', 'times = 0.057391 seconds', 'result = 54006833152.0', '*****WARMED UP*****')
('/gpu:0', 'shape = (9000, 9000)', 'times = 0.248432 seconds', 'result = 182251913216.0')
('/gpu:0', 'shape = (9000, 9000)', 'times = 0.18535 seconds', 'result = 182251913216.0', '*****WARMED UP*****')
('/gpu:0', 'shape = (12000, 12000)', 'times = 0.48081 seconds', 'result = 431996043264.0')
('/gpu:0', 'shape = (12000, 12000)', 'times = 0.412447 seconds', 'result = 431996043264.0', '*****WARMED UP*****')
++++++++++++++++++++++++++++++++++++++++++++++++++++
('/gpu:1', 'shape = (3000, 3000)', 'times = 0.105071 seconds', 'result = 6754395648.0')
('/gpu:1', 'shape = (3000, 3000)', 'times = 0.008107 seconds', 'result = 6754395648.0', '*****WARMED UP*****')
('/gpu:1', 'shape = (6000, 6000)', 'times = 0.137264 seconds', 'result = 54006849536.0')
('/gpu:1', 'shape = (6000, 6000)', 'times = 0.064462 seconds', 'result = 54006849536.0', '*****WARMED UP*****')
('/gpu:1', 'shape = (9000, 9000)', 'times = 0.280302 seconds', 'result = 182251831296.0')
('/gpu:1', 'shape = (9000, 9000)', 'times = 0.191399 seconds', 'result = 182251831296.0', '*****WARMED UP*****')
('/gpu:1', 'shape = (12000, 12000)', 'times = 0.509208 seconds', 'result = 431996534784.0')
('/gpu:1', 'shape = (12000, 12000)', 'times = 0.4263 seconds', 'result = 431996534784.0', '*****WARMED UP*****')
++++++++++++++++++++++++++++++++++++++++++++++++++++
matrix=random_uniform+config=optim+warmup=False
('/cpu:0', 'shape = (3000, 3000)', 'times = 0.552631 seconds', 'result = 6754431488.0')
('/cpu:0', 'shape = (6000, 6000)', 'times = 2.894024 seconds', 'result = 54023852032.0')
('/cpu:0', 'shape = (9000, 9000)', 'times = 9.394226 seconds', 'result = 184425938944.0')
('/cpu:0', 'shape = (12000, 12000)', 'times = 21.870817 seconds', 'result = 439655661568.0')
++++++++++++++++++++++++++++++++++++++++++++++++++++
('/gpu:0', 'shape = (3000, 3000)', 'times = 0.107416 seconds', 'result = 6754392576.0')
('/gpu:0', 'shape = (6000, 6000)', 'times = 0.163633 seconds', 'result = 54006804480.0')
('/gpu:0', 'shape = (9000, 9000)', 'times = 0.304741 seconds', 'result = 182251667456.0')
('/gpu:0', 'shape = (12000, 12000)', 'times = 0.526494 seconds', 'result = 431995944960.0')
++++++++++++++++++++++++++++++++++++++++++++++++++++
('/gpu:1', 'shape = (3000, 3000)', 'times = 0.119625 seconds', 'result = 6754394624.0')
('/gpu:1', 'shape = (6000, 6000)', 'times = 0.203158 seconds', 'result = 54006800384.0')
('/gpu:1', 'shape = (9000, 9000)', 'times = 0.317646 seconds', 'result = 182251978752.0')
('/gpu:1', 'shape = (12000, 12000)', 'times = 0.544184 seconds', 'result = 431996076032.0')
++++++++++++++++++++++++++++++++++++++++++++++++++++
matrix=zeros+config=simple+warmup=False
('/cpu:0', 'shape = (3000, 3000)', 'times = 0.632157 seconds', 'result = 0.0')
('/cpu:0', 'shape = (6000, 6000)', 'times = 2.901679 seconds', 'result = 0.0')
('/cpu:0', 'shape = (9000, 9000)', 'times = 9.345713 seconds', 'result = 0.0')
('/cpu:0', 'shape = (12000, 12000)', 'times = 21.707619 seconds', 'result = 0.0')
++++++++++++++++++++++++++++++++++++++++++++++++++++
('/gpu:0', 'shape = (3000, 3000)', 'times = 0.498451 seconds', 'result = 0.0')
('/gpu:0', 'shape = (6000, 6000)', 'times = 2.900121 seconds', 'result = 0.0')
('/gpu:0', 'shape = (9000, 9000)', 'times = 9.4296 seconds', 'result = 0.0')
('/gpu:0', 'shape = (12000, 12000)', 'times = 21.750406 seconds', 'result = 0.0')
++++++++++++++++++++++++++++++++++++++++++++++++++++
('/gpu:1', 'shape = (3000, 3000)', 'times = 0.523286 seconds', 'result = 0.0')
('/gpu:1', 'shape = (6000, 6000)', 'times = 2.887522 seconds', 'result = 0.0')
('/gpu:1', 'shape = (9000, 9000)', 'times = 9.377383 seconds', 'result = 0.0')
('/gpu:1', 'shape = (12000, 12000)', 'times = 21.639043 seconds', 'result = 0.0')
++++++++++++++++++++++++++++++++++++++++++++++++++++
matrix=zeros+config=simple+warmup=True
('/cpu:0', 'shape = (3000, 3000)', 'times = 0.520212 seconds', 'result = 0.0')
('/cpu:0', 'shape = (3000, 3000)', 'times = 0.000172 seconds', 'result = 0.0', '*****WARMED UP*****')
('/cpu:0', 'shape = (6000, 6000)', 'times = 2.914485 seconds', 'result = 0.0')
('/cpu:0', 'shape = (6000, 6000)', 'times = 0.000166 seconds', 'result = 0.0', '*****WARMED UP*****')
('/cpu:0', 'shape = (9000, 9000)', 'times = 9.346122 seconds', 'result = 0.0')
('/cpu:0', 'shape = (9000, 9000)', 'times = 0.000207 seconds', 'result = 0.0', '*****WARMED UP*****')
('/cpu:0', 'shape = (12000, 12000)', 'times = 21.715376 seconds', 'result = 0.0')
('/cpu:0', 'shape = (12000, 12000)', 'times = 0.0002 seconds', 'result = 0.0', '*****WARMED UP*****')
++++++++++++++++++++++++++++++++++++++++++++++++++++
('/gpu:0', 'shape = (3000, 3000)', 'times = 0.556841 seconds', 'result = 0.0')
('/gpu:0', 'shape = (3000, 3000)', 'times = 0.000234 seconds', 'result = 0.0', '*****WARMED UP*****')
('/gpu:0', 'shape = (6000, 6000)', 'times = 2.936608 seconds', 'result = 0.0')
('/gpu:0', 'shape = (6000, 6000)', 'times = 0.000244 seconds', 'result = 0.0', '*****WARMED UP*****')
('/gpu:0', 'shape = (9000, 9000)', 'times = 9.34956 seconds', 'result = 0.0')
('/gpu:0', 'shape = (9000, 9000)', 'times = 0.000246 seconds', 'result = 0.0', '*****WARMED UP*****')
('/gpu:0', 'shape = (12000, 12000)', 'times = 21.634354 seconds', 'result = 0.0')
('/gpu:0', 'shape = (12000, 12000)', 'times = 0.000221 seconds', 'result = 0.0', '*****WARMED UP*****')
++++++++++++++++++++++++++++++++++++++++++++++++++++
('/gpu:1', 'shape = (3000, 3000)', 'times = 0.562244 seconds', 'result = 0.0')
('/gpu:1', 'shape = (3000, 3000)', 'times = 0.000255 seconds', 'result = 0.0', '*****WARMED UP*****')
('/gpu:1', 'shape = (6000, 6000)', 'times = 2.961658 seconds', 'result = 0.0')
('/gpu:1', 'shape = (6000, 6000)', 'times = 0.000237 seconds', 'result = 0.0', '*****WARMED UP*****')
('/gpu:1', 'shape = (9000, 9000)', 'times = 9.308582 seconds', 'result = 0.0')
('/gpu:1', 'shape = (9000, 9000)', 'times = 0.000239 seconds', 'result = 0.0', '*****WARMED UP*****')
('/gpu:1', 'shape = (12000, 12000)', 'times = 21.707127 seconds', 'result = 0.0')
('/gpu:1', 'shape = (12000, 12000)', 'times = 0.000261 seconds', 'result = 0.0', '*****WARMED UP*****')
++++++++++++++++++++++++++++++++++++++++++++++++++++
matrix=zeros+config=optim+warmup=False
('/cpu:0', 'shape = (3000, 3000)', 'times = 0.560451 seconds', 'result = 0.0')
('/cpu:0', 'shape = (6000, 6000)', 'times = 2.978946 seconds', 'result = 0.0')
('/cpu:0', 'shape = (9000, 9000)', 'times = 9.3279 seconds', 'result = 0.0')
('/cpu:0', 'shape = (12000, 12000)', 'times = 21.694664 seconds', 'result = 0.0')
++++++++++++++++++++++++++++++++++++++++++++++++++++
('/gpu:0', 'shape = (3000, 3000)', 'times = 0.249778 seconds', 'result = 0.0')
('/gpu:0', 'shape = (6000, 6000)', 'times = 0.365332 seconds', 'result = 0.0')
('/gpu:0', 'shape = (9000, 9000)', 'times = 0.663667 seconds', 'result = 0.0')
('/gpu:0', 'shape = (12000, 12000)', 'times = 1.032716 seconds', 'result = 0.0')
++++++++++++++++++++++++++++++++++++++++++++++++++++
('/gpu:1', 'shape = (3000, 3000)', 'times = 0.299856 seconds', 'result = 0.0')
('/gpu:1', 'shape = (6000, 6000)', 'times = 0.294592 seconds', 'result = 0.0')
('/gpu:1', 'shape = (9000, 9000)', 'times = 0.55067 seconds', 'result = 0.0')
('/gpu:1', 'shape = (12000, 12000)', 'times = 0.806868 seconds', 'result = 0.0')
++++++++++++++++++++++++++++++++++++++++++++++++++++
感謝。私は、各サイズごとにウォームスタートアップを試して、報告して戻します。この暖かいスタートアップ効果は、例えばtf.zerosとtf.random_uniformのように依存していますか? – Prabu
あなたのタイミング・ナンバーで判断すると、それはオプションに依存しているようです –