ホスト構造体内のデバイス配列へのポインタ

-1

ホストとデバイスの両方の配列を1つの場所に保持する構造体を作成しようとしています。私は後でそれをリンクリストの要素に展開しようと考えています。デバイス上のdoubleの配列にホストと* Dポイントのdoubleの配列にホスト構造体内のデバイス配列へのポインタ

typedef struct Data{ 
    double *h; 
    double *d; 
} Data;

* hのポイント：基本的な構造体は、このようになります。

全体の構造体をデバイス（CUDA cudaMemcpy Struct of Arrays）にコピーする方法については様々な回答がありますが、それらのどれも私が必要とするものはまったくありません。私は次のコードを持っていますが、不正なメモリアクセスエラーが発生し続けます。

#include <stdio.h> 
#include <stdlib.h> 
#include <math.h> 
#include "cuda.h" 

/* 
* CUDA Error stuff 
*/ 

static void HandleError(cudaError_t err, 
         const char *file, 
         int line) { 
    if (err != cudaSuccess) { 
     printf("%s in %s at line %d\n", cudaGetErrorString(err), 
       file, line); 
     exit(EXIT_FAILURE); 
    } 
} 
#define HANDLE_ERROR(err) (HandleError(err, __FILE__, __LINE__)) 


#define HANDLE_NULL(a) {if (a == NULL) { \ 
          printf("Host memory failed in %s at line %d\n", \ 
            __FILE__, __LINE__); \ 
          exit(EXIT_FAILURE);}} 

//malloc error code 
int errMsg(const char *message, int errorCode) 
{ 
    printf("%s\n", message); 
    return errorCode; 
} 

typedef struct Data{ 
    double *h; 
    double *d; 
} Data; 

__global__ void kernel(Data *d) 
{ 
    int tid = blockIdx.x * blockDim.x + threadIdx.x; 
    if(tid<100){ 
     d->d[tid] = 2; 
    } 

} 

int main() 
{ 
    Data *d; 
    d = (Data*)malloc(sizeof(Data)); 

    d->h = (double*)malloc(sizeof(double)*100); 
    HANDLE_ERROR(cudaMalloc((void**) &(d->d), 100*sizeof(double))); 

    for(int i=0; i<100; i++){ 
     d->h[i] = i; 
    } 

    HANDLE_ERROR(cudaMemcpy(d->d, d->h, 100*sizeof(double), cudaMemcpyHostToDevice)); 

    printf("%f\n", d->h[1]); 

    kernel<<<1, 102>>>(d); 

    printf("done\n"); 

    { 
    cudaError_t cudaerr = cudaDeviceSynchronize(); 
    if (cudaerr != cudaSuccess) 
     printf("kernel launch failed with error \"%s\"->\n", 
       cudaGetErrorString(cudaerr)); 
    } 

    HANDLE_ERROR(cudaMemcpy(d->h, d->d, 100*sizeof(double), cudaMemcpyDeviceToHost)); 
    printf("%f\n", d->h[99]); 


    return 0; 
}

私が手出力は次のようになります。

1.000000 
done 
kernel launch failed with error "an illegal memory access was encountered"-> 
an illegal memory access was encountered in linkedListGPU.cu at line 77

私は、私は少しだけ私のポインタを台無しにしている疑いがあります。エラー処理コードはWileyの入門書からCUDAの本までありますが、ここでコードが許可されていなければ削除します。

ありがとうございました。。

kernel<<<1, 102>>>(d); 
       ^
        this is a pointer to memory on the host

、その後：問題はdそのものです

出典

2017-04-01 Tom Wilshaw

ホストポインタをデバイスに渡して、カーネル内でそのデバイスにアクセスしようとしています。それは明らかに動作するつもりはありません – talonmies

ありがとうございました、カーネルコールを（d-> d）に変更して、カーネルコードを修正してください。申し訳ありませんが、私は現時点ではデバイス上の構造体に混乱しています。 –

は、あなたがそうのようにカーネルにd構造体のポインタを渡すdとhポインタが含まれているホストに割り当てられた構造体（へのポインタでありますここではデバイスコードでそのポインタを逆参照しようとしています。

d->...; 
    ^
    This operator dereferences the pointer to the left of it

あなたが不正なメモリアクセスを取得

。

これを修正するには、少なくとも2つの明白な方法があります。

構造体をポインタではなく値で渡します。

ここでは一例であり：

$ cat t1311.cu 
#include <stdio.h> 
#include <stdlib.h> 
#include <math.h> 
#include "cuda.h" 

/* 
* CUDA Error stuff 
*/ 

static void HandleError(cudaError_t err, 
         const char *file, 
         int line) { 
    if (err != cudaSuccess) { 
     printf("%s in %s at line %d\n", cudaGetErrorString(err), 
       file, line); 
     exit(EXIT_FAILURE); 
    } 
} 
#define HANDLE_ERROR(err) (HandleError(err, __FILE__, __LINE__)) 


#define HANDLE_NULL(a) {if (a == NULL) { \ 
          printf("Host memory failed in %s at line %d\n", \ 
            __FILE__, __LINE__); \ 
          exit(EXIT_FAILURE);}} 

//malloc error code 
int errMsg(const char *message, int errorCode) 
{ 
    printf("%s\n", message); 
    return errorCode; 
} 

typedef struct Data{ 
    double *h; 
    double *d; 
} Data; 

__global__ void kernel(Data d) 
{ 
    int tid = blockIdx.x * blockDim.x + threadIdx.x; 
    if(tid<100){ 
     d.d[tid] = 2; 
    } 

} 

int main() 
{ 
    Data d; 

    d.h = (double*)malloc(sizeof(double)*100); 
    HANDLE_ERROR(cudaMalloc((void**) &(d.d), 100*sizeof(double))); 

    for(int i=0; i<100; i++){ 
     d.h[i] = i; 
    } 

    HANDLE_ERROR(cudaMemcpy(d.d, d.h, 100*sizeof(double), cudaMemcpyHostToDevice)); 

    printf("%f\n", d.h[1]); 

    kernel<<<1, 102>>>(d); 

    printf("done\n"); 

    { 
    cudaError_t cudaerr = cudaDeviceSynchronize(); 
    if (cudaerr != cudaSuccess) 
     printf("kernel launch failed with error \"%s\"->\n", 
       cudaGetErrorString(cudaerr)); 
    } 

    HANDLE_ERROR(cudaMemcpy(d.h, d.d, 100*sizeof(double), cudaMemcpyDeviceToHost)); 
    printf("%f\n", d.h[99]); 


    return 0; 
} 
$ nvcc -arch=sm_35 -o t1311 t1311.cu 
$ cuda-memcheck ./t1311 
========= CUDA-MEMCHECK 
1.000000 
done 
2.000000 
========= ERROR SUMMARY: 0 errors 
$

にそのdホスト・ポインタは構造体のデバイスのコピーを作成：

こちら例：

$ cat t1311.cu 
#include <stdio.h> 
#include <stdlib.h> 
#include <math.h> 
#include "cuda.h" 

/* 
* CUDA Error stuff 
*/ 

static void HandleError(cudaError_t err, 
         const char *file, 
         int line) { 
    if (err != cudaSuccess) { 
     printf("%s in %s at line %d\n", cudaGetErrorString(err), 
       file, line); 
     exit(EXIT_FAILURE); 
    } 
} 
#define HANDLE_ERROR(err) (HandleError(err, __FILE__, __LINE__)) 


#define HANDLE_NULL(a) {if (a == NULL) { \ 
          printf("Host memory failed in %s at line %d\n", \ 
            __FILE__, __LINE__); \ 
          exit(EXIT_FAILURE);}} 

//malloc error code 
int errMsg(const char *message, int errorCode) 
{ 
    printf("%s\n", message); 
    return errorCode; 
} 

typedef struct Data{ 
    double *h; 
    double *d; 
} Data; 

__global__ void kernel(Data *d) 
{ 
    int tid = blockIdx.x * blockDim.x + threadIdx.x; 
    if(tid<100){ 
     d->d[tid] = 2; 
    } 

} 

int main() 
{ 
    Data *d, *dev_d; 
    d = (Data*)malloc(sizeof(Data)); 
    HANDLE_ERROR(cudaMalloc(&dev_d, sizeof(Data))); 
    d->h = (double*)malloc(sizeof(double)*100); 
    HANDLE_ERROR(cudaMalloc((void**) &(d->d), 100*sizeof(double))); 

    for(int i=0; i<100; i++){ 
     d->h[i] = i; 
    } 

    HANDLE_ERROR(cudaMemcpy(d->d, d->h, 100*sizeof(double), cudaMemcpyHostToDevice)); 
    HANDLE_ERROR(cudaMemcpy(dev_d, d, sizeof(Data), cudaMemcpyHostToDevice)); 
    printf("%f\n", d->h[1]); 

    kernel<<<1, 102>>>(dev_d); 

    printf("done\n"); 

    { 
    cudaError_t cudaerr = cudaDeviceSynchronize(); 
    if (cudaerr != cudaSuccess) 
     printf("kernel launch failed with error \"%s\"->\n", 
       cudaGetErrorString(cudaerr)); 
    } 

    HANDLE_ERROR(cudaMemcpy(d->h, d->d, 100*sizeof(double), cudaMemcpyDeviceToHost)); 
    printf("%f\n", d->h[99]); 


    return 0; 
} 
$ nvcc -arch=sm_35 -o t1311 t1311.cu 
$ cuda-memcheck ./t1311 
========= CUDA-MEMCHECK 
1.000000 
done 
2.000000 
========= ERROR SUMMARY: 0 errors 
$

脇に、メソッドは、hereを説明して、デバッグプロセスを少し遠くまで運んでいます。

出典

2017-04-01 19:49:14

ホスト構造体内のデバイス配列へのポインタ

答えて

関連する問題