私はpyopenclとopenclを初めて使ったので、私は正式に立ち往生しているこのプロジェクトのポイントに驚いたことはありません。このコードを実行すると、タイトルエラーが発生するのはなぜですか?pyopencl.cffi_cl.RuntimeError:clenqueuendrangekernelが失敗しました:OUT_OF_RESOURCES
コード:
from __future__ import division
import numpy as np
import pyopencl as cl
from time import time
import sys
from math import sqrt
ctx = cl.create_some_context()
queue = cl.CommandQueue(ctx)
mf = cl.mem_flags
prg = cl.Program(ctx, """
__kernel void rmse(__global const float* x,
__global const float* y_true,
__global const float* params,
const ushort numPoints,
__global float* result)
{
__local scratch[512];
int numPointsPerThread = (512 + numPoints - 1)/512;
int start = 512*numPointsPerThread;
int end = (start + numPointsPerThread < numPoints) ? start+numPointsPerThread
: numPoints;
for(int i=start; i<end; i++)
{
float y_i = -y_true[i]; float x_i = x[i];
for(int k=0; k<100; k++)
y_i += sqrt(pow(x_i, params[k]) + params[k]);
scratch[i] = y_i*y_i;
}
barrier(CLK_LOCAL_MEM_FENCE);
for(int numThreads = 256; numThreads > 1; numThreads >>= 1)
{
if(get_local_id(0) < numThreads)
scratch[get_local_id(0)] += scratch[get_local_id(0)*2+1];
barrier(CLK_LOCAL_MEM_FENCE);
}
if (get_local_id(0) == 0) *result = sqrt(scratch[0]/(float)numPoints);
}
""").build()
# Run the experiment
if len(sys.argv) != 2:
print ("Wrong number of arguments!")
print ("Usage: GPU #RMSE_evaluations")
print ("(#data_points is taken from test.csv)")
sys.exit(1)
numEvals = int(sys.argv[1])
#data = np.genfromtxt("test.csv", delimiter=',', dtype=np.float32)
data = np.full((1000,2), 42, dtype=np.float32)
x = np.array(data[:,0],dtype=np.float32)
x_g = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=x)
y_true = np.array(data[:,1],dtype=np.float32)
y_true_g = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=y_true)
result_g = cl.Buffer(ctx, mf.WRITE_ONLY, 4)
start = time()
param_indices = np.arange(0,100,1, dtype=np.float32)
RMSE = np.empty(numEvals, dtype=np.float32)
for i in xrange(numEvals):
params = np.sin(param_indices*i)+1
params_g = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=params)
prg.rmse(queue, (512,), (512,),
x_g,
y_true_g,
params_g,
np.uint16(len(x)),
result_g)
cl.enqueue_copy(queue, RMSE[i], result_g).wait()
params_g.release()
end = time()
print("Sum RMSE:", np.sum(RMSE))
print("Time elapsed (s):", end-start)
print("Time per RMSE evaluation (s):", (end-start)/numEvals)