Matrix Transposition Using HyperCube Network (Algorithm in C++)

Matrix Transposition Using HyperCube Network (Algorithm in C++)
[code lang=”cpp”]// hyper_rmta.cpp : Defines the entry point for the console application.
//#include < stdio.h >
#include “/opt/mpich/gnu/include/mpi.h”
#include < malloc.h >
#include < math.h >
#include < sys/times.h >
#include < stdlib.h >
#include < unistd.h>// hyper_rmta.cpp : Defines the entry point for the console application.
//int const ROWS=1024;
int const CALS=1024;
int A[ROWS][CALS];
int AT[CALS][ROWS];
int Atmp[ROWS][CALS];int decide_quarter(int sqrt_prcnt,int sqprcnt_2,int me);
int ** prpare_members(int prcnt,int sqrt_prcnt);
MPI_Comm * create_comms(MPI_Comm comm,int prcnt);
void rmta(MPI_Comm comm);
void local_transpose();
void print_transposed();
int log_base2(int p);
int main(int argc, char* argv[])
{
int my_rank;
int num_proc;
int dest= 0;double t1, t2,t3,t4;
struct tms tb1, tb2;
double ticspersec;MPI_Init(&argc, &argv);
/* get my_rank */
MPI_Comm_rank(MPI_COMM_WORLD, &my_rank);
/* find out how many processes there are */
MPI_Comm_size(MPI_COMM_WORLD, &num_proc);MPI_Errhandler_set(MPI_COMM_WORLD,MPI_ERRORS_RETURN);

MPI_Comm_rank(MPI_COMM_WORLD, &my_rank);
if (my_rank==0){
printf(“start…\n”);
ticspersec = (double) sysconf(_SC_CLK_TCK);
t1=(double)times(&tb1);
}
for (int i=0;i < ROWS;i++){
for (int j=0;j < CALS;j++){
A[i][j]=my_rank*100+i*10+j;
}
}
printf(“my_rank: %d\n”,my_rank);
rmta(MPI_COMM_WORLD);

MPI_Comm_rank(MPI_COMM_WORLD, &my_rank);
if (my_rank==0){
t2=(double)times(&tb2);
printf(“Total Time: %lf sec \n”,(t2 – t1) / ticspersec);
printf(“finito…\n”);
}

MPI_Finalize();
return 0;
}

MPI_Comm * create_comms(MPI_Comm comm,int prcnt){
int tag= 0;
MPI_Status status;
MPI_Comm * comms=(MPI_Comm *)malloc(sizeof(MPI_Comm)*4);
MPI_Group orig_group,group;
MPI_Comm_group(comm, &orig_group);
int my_rank,resultlen;
char err_buffer[MPI_MAX_ERROR_STRING];
int sqrt_prcnt=sqrt(prcnt);
int quarter_size=prcnt/4;
int ** members=prpare_members(prcnt,sqrt_prcnt);
for(int i=0;i < 4;i++){
MPI_Group_incl(orig_group, quarter_size, members[i], &group);
MPI_Comm_create(comm, group, &comms[i]);
}
// RMTA transpose
for (int j=0;j < 4;j++){
int ierr=MPI_Comm_rank(comms[j], &my_rank);
int ierr_w=0;
int recved=0;
if (ierr==0){
ierr_w=0;
switch(j){
case 0:
ierr_w=MPI_Recv(&Atmp,CALS*ROWS,MPI_INT,members[2][my_rank],tag,comm,&status);
ierr_w=MPI_Send(&Atmp,CALS*ROWS,MPI_INT,members[1][my_rank],tag,comm);
break;
case 1:
ierr_w=MPI_Send(&A,CALS*ROWS,MPI_INT,members[3][my_rank],tag,comm);
ierr_w=MPI_Recv(&A,CALS*ROWS,MPI_INT,members[0][my_rank],tag,comm,&status);
break;
case 2:
ierr_w=MPI_Send(&A,CALS*ROWS,MPI_INT,members[0][my_rank],tag,comm);
ierr_w=MPI_Recv(&A,CALS*ROWS,MPI_INT,members[3][my_rank],tag,comm,&status);
break;
case 3:
ierr_w=MPI_Recv(&Atmp,CALS*ROWS,MPI_INT,members[1][my_rank],tag,comm,&status);
ierr_w=MPI_Send(&Atmp,CALS*ROWS,MPI_INT,members[2][my_rank],tag,comm);
break;
}
MPI_Barrier(comm);
if (ierr_w!=0){
MPI_Error_string(ierr_w,err_buffer,&resultlen);
printf(“ERROR: “);printf(err_buffer);printf(“\n”);
}
rmta(comms[j]);
}
}
printf(“\n”);

return comms;
}

int ** prpare_members(int prcnt, int sqrt_prcnt){
int prcnt_4=prcnt/4;
int sqprcnt_2=sqrt_prcnt/2;
int **members=(int **)malloc(sizeof(int)*prcnt);
*members=new int[4];
for (int j=0;j < 4;j++){
members[j]=new int[prcnt_4];
}
int *cnt;cnt = new int[4];
cnt[0]=0;cnt[1]=0;cnt[2]=0;cnt[3]=0;
int quarter;
for (int i=0;i < prcnt;i++){
quarter=decide_quarter(sqrt_prcnt,sqprcnt_2,i);
members[quarter-1][cnt[quarter-1]]=i;cnt[quarter-1]++;
}
return members;
}

int decide_quarter(int sqrt_prcnt,int sqprcnt_2,int me){
int row=me/sqrt_prcnt;
int col=me % sqrt_prcnt;
if (row > =sqprcnt_2){
if (col > =sqprcnt_2){
return 4;
}else{
return 3;
}
}else{
if (col > =sqprcnt_2){
return 2;
}else{
return 1;
}
}
}

void rmta(MPI_Comm comm){
int num_proc;
int my_rank;
MPI_Group grp1,grp2,orig_group;
MPI_Comm comm_tmp1,comm_tmp2;
int ierr=0;

ierr=MPI_Comm_size(comm, &num_proc);
int stages=0;
if (ierr==0){
stages=log_base2(num_proc);
}else{
num_proc=1;
}

printf(“stages: %d\t num_proc:%d\t ierr: %d\n”,stages,num_proc,ierr);

if (stages%2==0){
if (num_proc > 1){
MPI_Comm * comms=create_comms(comm,num_proc);

}else{
local_transpose();
print_transposed();
}
}else{

MPI_Comm_group(comm, &orig_group);
int * members=(int *)malloc(sizeof(int)*num_proc/2);
int nr=0;
for(int p=0;p < num_proc/2;p++){
members[nr]=p;
nr++;
}
MPI_Group_incl(orig_group, num_proc/2, members, &grp1);
MPI_Comm_create(comm, grp1, &comm_tmp1);
MPI_Comm_rank(comm, &my_rank);
if (my_rank < num_proc/2){
rmta(comm_tmp1);
}
nr=0;
for(int p_1=num_proc/2;p_1 < num_proc;p_1++){
members[nr]=p_1;
nr++;
}
MPI_Group_incl(orig_group, num_proc/2, members, &grp2);
MPI_Comm_create(comm, grp2, &comm_tmp2);
if (!(my_rank < num_proc/2)){
rmta(comm_tmp2);
}
}
}
void local_transpose(){
for (int i=0;i < ROWS;i++){
for (int j=0;j < CALS;j++){
AT[j][i]=A[i][j];
}
}
}
void print_transposed(){
for (int i=0;i < CALS;i++){
for (int j=0;j < ROWS;j++){
printf(“%d\t”,AT[i][j]);
}
printf(“\n”);
}
}
int log_base2(int p) {

int return_val = 0;
unsigned q;

q = (unsigned) p;
while(q != 1) {
q = q > > 1;
return_val++;
}
return return_val;
} /* log_base2 */
[/code]

2 thoughts on “Matrix Transposition Using HyperCube Network (Algorithm in C++)

  1. aurimas.norkevicius Post author

    I feel the need to annonce this blog on comp.lang.c; Because the post “Matrix Transposition Using HyperCube Network (Algorithm in C++)”, suits your
    forum theme and are useful for people who would like to know more about c++/c programming in conjunction with MPI function library. The analysed theme is unuique over the internet and i couldn’t find publicated the same algorithm.

Leave a Reply