JCuda。既に使用しているポインタを再利用

私はJCUDAでの作業に問題があります。私はCUFFTライブラリを使用して1D FFTを作成する作業がありますが、結果は2になるはずです。そこで、CUFFT_R2Cタイプの1D FFTを作成することにしました。次の責任を負うクラス：JCuda。既に使用しているポインタを再利用

public class FFTTransformer { 

    private Pointer inputDataPointer; 

    private Pointer outputDataPointer; 

    private int fftType; 

    private float[] inputData; 

    private float[] outputData; 

    private int batchSize = 1; 

    public FFTTransformer (int type, float[] inputData) { 
     this.fftType = type; 
     this.inputData = inputData; 
     inputDataPointer = new CUdeviceptr(); 

     JCuda.cudaMalloc(inputDataPointer, inputData.length * Sizeof.FLOAT); 
     JCuda.cudaMemcpy(inputDataPointer, Pointer.to(inputData), 
       inputData.length * Sizeof.FLOAT, cudaMemcpyKind.cudaMemcpyHostToDevice); 

     outputDataPointer = new CUdeviceptr(); 
     JCuda.cudaMalloc(outputDataPointer, (inputData.length + 2) * Sizeof.FLOAT); 

    } 

    public Pointer getInputDataPointer() { 
     return inputDataPointer; 
    } 

    public Pointer getOutputDataPointer() { 
     return outputDataPointer; 
    } 

    public int getFftType() { 
     return fftType; 
    } 

    public void setFftType(int fftType) { 
     this.fftType = fftType; 
    } 

    public float[] getInputData() { 
     return inputData; 
    } 

    public int getBatchSize() { 
     return batchSize; 
    } 

    public void setBatchSize(int batchSize) { 
     this.batchSize = batchSize; 
    } 

    public float[] getOutputData() { 
     return outputData; 
    } 

    private void R2CTransform() { 

     cufftHandle plan = new cufftHandle(); 

     JCufft.cufftPlan1d(plan, inputData.length, cufftType.CUFFT_R2C, batchSize); 

     JCufft.cufftExecR2C(plan, inputDataPointer, outputDataPointer); 

     JCufft.cufftDestroy(plan); 
    } 

    private void C2CTransform(){ 

     cufftHandle plan = new cufftHandle(); 

     JCufft.cufftPlan1d(plan, inputData.length, cufftType.CUFFT_C2C, batchSize); 

     JCufft.cufftExecC2C(plan, inputDataPointer, outputDataPointer, fftType); 

     JCufft.cufftDestroy(plan); 
    } 

    public void transform(){ 
     if (fftType == JCufft.CUFFT_FORWARD) { 
      R2CTransform(); 
     } else { 
      C2CTransform(); 
     } 
    } 

    public float[] getFFTResult() { 
     outputData = new float[inputData.length + 2]; 
     JCuda.cudaMemcpy(Pointer.to(outputData), outputDataPointer, 
       outputData.length * Sizeof.FLOAT, cudaMemcpyKind.cudaMemcpyDeviceToHost); 
     return outputData; 
    } 

    public void releaseGPUResources(){ 
     JCuda.cudaFree(inputDataPointer); 
     JCuda.cudaFree(outputDataPointer); 
    } 

    public static void main(String... args) { 
     float[] inputData = new float[65536]; 
     for(int i = 0; i < inputData.length; i++) { 
      inputData[i] = (float) Math.sin(i); 
     } 
     FFTTransformer transformer = new FFTTransformer(JCufft.CUFFT_FORWARD, inputData); 
     transformer.transform(); 
     float[] result = transformer.getFFTResult(); 

     HilbertSpectrumTicksKernelInvoker.multiplyOn2(transformer.getOutputDataPointer(), inputData.length+2); 

     transformer.releaseGPUResources(); 
    } 
}

クワッドカーネル機能を使用する方法。 Javaメソッドのコード：

public static void multiplyOn2(Pointer inputDataPointer, int dataSize){ 

     // Enable exceptions and omit all subsequent error checks 
     JCudaDriver.setExceptionsEnabled(true); 

     // Create the PTX file by calling the NVCC 
     String ptxFileName = null; 
     try { 
      ptxFileName = FileService.preparePtxFile("resources\\HilbertSpectrumTicksKernel.cu"); 
     } catch (IOException e) { 
      // TODO Auto-generated catch block 
      e.printStackTrace(); 
     } 

     // Initialize the driver and create a context for the first device. 
     cuInit(0); 
     CUdevice device = new CUdevice(); 
     cuDeviceGet(device, 0); 
     CUcontext context = new CUcontext(); 
     cuCtxCreate(context, 0, device); 

     // Load the ptx file. 
     CUmodule module = new CUmodule(); 
     cuModuleLoad(module, ptxFileName); 

     // Obtain a function pointer to the "add" function. 
     CUfunction function = new CUfunction(); 
     cuModuleGetFunction(function, module, "calcSpectrumSamples"); 

     // Set up the kernel parameters: A pointer to an array 
     // of pointers which point to the actual values. 
     int N = (dataSize + 1)/2 + 1; 
     int pair = (dataSize + 1) % 2 > 0 ? 1 : -1; 

     Pointer kernelParameters = Pointer.to(Pointer.to(inputDataPointer), 
       Pointer.to(new int[] { dataSize }), 
       Pointer.to(new int[] { N }), Pointer.to(new int[] { pair })); 

     // Call the kernel function. 
     int blockSizeX = 128; 
     int gridSizeX = (int) Math.ceil((double) dataSize/blockSizeX); 
     cuLaunchKernel(function, gridSizeX, 1, 1, // Grid dimension 
       blockSizeX, 1, 1, // Block dimension 
       0, null, // Shared memory size and stream 
       kernelParameters, null // Kernel- and extra parameters 
     ); 
     cuCtxSynchronize(); 

     // Allocate host output memory and copy the device output 
     // to the host. 
     float freq[] = new float[dataSize]; 
     cuMemcpyDtoH(Pointer.to(freq), (CUdeviceptr)inputDataPointer, dataSize 
       * Sizeof.FLOAT);

とカーネル関数は次である：

extern "C" 

__global__ void calcSpectrumSamples(float* complexData, int dataSize, int N, int pair) { 

    int i = threadIdx.x + blockIdx.x * blockDim.x; 

    if(i >= dataSize) return; 

    complexData[i] = complexData[i] * 2; 
}

しかし、私は（デバイスメモリに）FFTの結果を指すポインタを渡すためにしようとしていますmultiplyOn2メソッドは、cuCtxSynchronize（）呼び出しで例外をスローします。例外：

Exception in thread "main" jcuda.CudaException: CUDA_ERROR_UNKNOWN 
    at jcuda.driver.JCudaDriver.checkResult(JCudaDriver.java:263) 
    at jcuda.driver.JCudaDriver.cuCtxSynchronize(JCudaDriver.java:1709) 
    at com.ifntung.cufft.HilbertSpectrumTicksKernelInvoker.multiplyOn2(HilbertSpectrumTicksKernelInvoker.java:73) 
    at com.ifntung.cufft.FFTTransformer.main(FFTTransformer.java:123)

私はVisual Studion C++を使用して同じことをしようとしていましたが、問題はありませんでした。手伝っていただけませんか。

P.S. 私はこの問題を解決できますが、デバイスメモリからホストメモリにデータをコピーし、新しいcuda関数を呼び出す前に毎回新しいポインタを作成してコピーバックする必要があり、プログラムの実行が遅くなります。

出典

2012-05-13 user1392147