0
ベクトルに行列を乗算するコードを記述します。私はMPIを使用します。行列は、行からなるチャンクの分布です。チャンクのサイズは必ずしも等しいとは限りません。チャンクは正しく動作しますが、これを実行しようとすると、空のベクトルが半分になります。しかし、私は完全なベクトルを受け取ると予想された。添付のコードを見てください。私は問題がMPI_Gatherv関数にあると思う。実行した後MPI_Gathervが正しく動作しない
#include <stdio.h>
#include <mpi.h>
#include <stdlib.h>
#define COLUMN 4
#define ROW 7
#define dp 100.0f
// Local start
#define chunk_low(commrank, commsize, nvert) \
((commrank) * (nvert)/(commsize))
// Local end
#define chunk_height(commrank, commsize, nvert) \
(chunk_low((commrank) + 1, commsize, nvert) - 1)
// Local size
#define chunk_size(commrank, commsize, nvert) \
(chunk_height(commrank, commsize, nvert) - \
chunk_low(commrank, commsize, nvert) + 1)
// Matrix initialization function
void init_matrix(int column, int row, float *matrix)
{
int j, i;
printf("\nMatrix\n");
for(i=0; i < row; i++){
for(j=0; j < column; j++){
matrix[i*column+j] = i * column + j; // (float)rand()/RAND_MAX * dp *2.0f - dp;
printf(" %f ", matrix[i * column + j]);
}
printf("\n");
}
printf("\n");
}
int main(int argc, char **argv)
{
int rank, size;
int i, j;
float *vm, *local_matrix, *result, *vector;
double time1, time2;
int *displs, *rcounts, *scounts;
vm = (float *)calloc(ROW * COLUMN, sizeof(float));
vector = malloc(COLUMN * sizeof(float));
MPI_Init(&argc, &argv);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
MPI_Comm_size(MPI_COMM_WORLD, &size);
/* Process 0 - master */
if (rank==0)
{
printf("\nNumbers of proccesses %d. \nElements in vector %d.\n", size, COLUMN);
/* Init vector vA */
init_matrix(COLUMN, ROW, vm);
for (i = 0; i < COLUMN; i++) {
vector[i] = (11 * 5) + (11 * i);
}
result = (float *)calloc(ROW, sizeof(float));
//Time begining calculating of programm
time1=MPI_Wtime();
}
/* End of work process 0 */
displs = (int *)malloc(sizeof(int) * size);
scounts = (int *)malloc(sizeof(int) * size);
rcounts = (int *)malloc(sizeof(int) * size);
for (i = 0; i < size; i++) {
displs[i] = chunk_low(i, size, ROW) * COLUMN; // Position initialization
rcounts[i] = scounts[i] = chunk_size(i, size, ROW) * COLUMN;
}
local_matrix = (float *)calloc(chunk_size(rank, size, ROW) * COLUMN, sizeof(float));
MPI_Bcast(vector, COLUMN, MPI_DOUBLE, 0, MPI_COMM_WORLD);
MPI_Scatterv(vm, scounts, displs, MPI_FLOAT, local_matrix,
rcounts[rank], MPI_FLOAT, 0, MPI_COMM_WORLD);
MPI_Barrier(MPI_COMM_WORLD);
int local_row = scounts[rank]/COLUMN;
float *local_result = (float *)calloc(local_row, sizeof(float));;
for(i = 0; i < local_row; i++) {
for (j = 0; j < COLUMN; j++) {
local_result[i] += local_matrix[i * COLUMN + j] * vector[j];
}
}
MPI_Gatherv(local_result, local_row, MPI_FLOAT, result, rcounts, displs, MPI_FLOAT, 0, MPI_COMM_WORLD);
MPI_Barrier(MPI_COMM_WORLD);
/* Only master-process */
if (rank==0)
{
//Time ending programm
time2=MPI_Wtime();
printf("\nTime parallel calculation = %f s.\n",time2-time1);
for (i = 0; i < ROW; i++)
{
printf(" %f\n", result[i]);
}
}
// End work of master-process
/* Delete storage arrays of process */
free(displs);
free(scounts);
free(rcounts);
free(local_matrix);
MPI_Finalize();
return 0;
}
私が期待されたこのコード: 484.000000 1628.000000 2772.000000 3916.000000 5060.000000 6204.000000 7348.000000
しかし、この結果を得る: 484.000000 1628.000000 0.000000 0.000000 0.000000 0.000000 0.000000