Merge release-4-5-patches into release-4-6

author Roland Schulz <roland@utk.edu>

Sun, 29 Apr 2012 01:45:00 +0000 (21:45 -0400)

committer Roland Schulz <roland@utk.edu>

Sun, 29 Apr 2012 01:47:06 +0000 (21:47 -0400)
author Roland Schulz <roland@utk.edu>
Sun, 29 Apr 2012 01:45:00 +0000 (21:45 -0400)
committer Roland Schulz <roland@utk.edu>
Sun, 29 Apr 2012 01:47:06 +0000 (21:47 -0400)
diff --cc CMakeLists.txt
Simple merge
diff --cc src/mdlib/fft5d.c

index f070e6de3ccee67737edfd2efe3c8c992a7f5fdb,86091776d80050aedb1e9dcb8d740ddb7d6664c0..7f99a6598b9de8e5febb59ffdd1f5eba80076774
--- 1/src/mdlib/fft5d.c
--- 2/src/mdlib/fft5d.c
+++ b/src/mdlib/fft5d.c
@@@ -871,207 -851,86 +871,207 @@@ void fft5d_execute(fft5d_plan plan,int 
       }
   #endif
   
+ +        s=0;
+ +
       /*lin: x,y,z*/
- -    if (plan->flags&FFT5D_DEBUG) print_localdata(lin, "%d %d: copy in lin\n", s, plan);
- -    for (s=0;s<2;s++) {
- -        if (times!=0)
- -            time=MPI_Wtime();
- -        
- -        if ((plan->flags&FFT5D_REALCOMPLEX) && !(plan->flags&FFT5D_BACKWARD) && s==0) {
- -            gmx_fft_many_1d_real(p1d[s],(plan->flags&FFT5D_BACKWARD)?GMX_FFT_COMPLEX_TO_REAL:GMX_FFT_REAL_TO_COMPLEX,lin,lout);
- -        } else {
- -            gmx_fft_many_1d(     p1d[s],(plan->flags&FFT5D_BACKWARD)?GMX_FFT_BACKWARD:GMX_FFT_FORWARD,               lin,lout);
+ +        if (plan->flags&FFT5D_DEBUG && thread == 0)
+ +        {
+ +            print_localdata(lin, "%d %d: copy in lin\n", s, plan);
           }
- -        if (times!=0)
- -            time_fft+=MPI_Wtime()-time;
- -    
- -        if (plan->flags&FFT5D_DEBUG) print_localdata(lout, "%d %d: FFT %d\n", s, plan);
- -        
+ +
+ +        for (s=0;s<2;s++) {  /*loop over first two FFT steps (corner rotations)*/
+ +
   #ifdef GMX_MPI
-         if (GMX_PARALLEL_ENV_INITIALIZED && cart[s] !=0 && P[s]>1 )
+         if (GMX_PARALLEL_ENV_INITIALIZED && cart[s]!=MPI_COMM_NULL && P[s]>1)
           {
- -            if (times!=0)
- -                time=MPI_Wtime(); 
- -            /*prepare for AllToAll
- -              1. (most outer) axes (x) is split into P[s] parts of size N[s] 
- -              for sending*/
- -            splitaxes(lin,lout,N[s],M[s],K[s], pN[s],pM[s],pK[s],P[s],C[s],iNout[s],oNout[s]);
+ +            bParallelDim = 1;
+ +        }
+ +        else
+ +#endif
+ +        {
+ +            bParallelDim = 0;
+ +        }
   
- -            if (times!=0)
+ +        /* ---------- START FFT ------------ */
+ +#ifdef NOGMX
+ +        if (times!=0 && thread == 0)
+ +        {
+ +            time=MPI_Wtime();
+ +        }
+ +#endif
+ +
+ +        if (bParallelDim) {
+ +            fftout = lout;
+ +        }
+ +        else
+ +        {
+ +            if (s==0)
+ +            {
+ +                fftout = lout3;
+ +            } else
+ +            {
+ +                fftout = lout2;
+ +            }
+ +        }
+ +
+ +        tstart = (thread*pM[s]*pK[s]/plan->nthreads)*C[s];
+ +        if ((plan->flags&FFT5D_REALCOMPLEX) && !(plan->flags&FFT5D_BACKWARD) && s==0)
+ +        {
+ +            gmx_fft_many_1d_real(p1d[s][thread],(plan->flags&FFT5D_BACKWARD)?GMX_FFT_COMPLEX_TO_REAL:GMX_FFT_REAL_TO_COMPLEX,lin+tstart,fftout+tstart);
+ +        } else
+ +        {
+ +            gmx_fft_many_1d(     p1d[s][thread],(plan->flags&FFT5D_BACKWARD)?GMX_FFT_BACKWARD:GMX_FFT_FORWARD,               lin+tstart,fftout+tstart);
+ +
+ +        }
+ +
+ +#ifdef NOGMX
+ +        if (times != NULL && thread == 0)
+ +        {
+ +            time_fft+=MPI_Wtime()-time;
+ +        }
+ +#endif
+ +        if (plan->flags&FFT5D_DEBUG && thread == 0)
+ +        {
+ +            print_localdata(lout, "%d %d: FFT %d\n", s, plan);
+ +        }
+ +        /* ---------- END FFT ------------ */
+ +
+ +        /* ---------- START SPLIT + TRANSPOSE------------ (if parallel in in this dimension)*/
+ +        if (bParallelDim) {
+ +#ifdef NOGMX
+ +            if (times != NULL && thread == 0)
               {
- -                time_local+=MPI_Wtime()-time;
- -            
- -                /*send, recv*/
                   time=MPI_Wtime();
               }
+ +#endif
+ +            /*prepare for A
+ +llToAll
+ +              1. (most outer) axes (x) is split into P[s] parts of size N[s]
+ +              for sending*/
+ +            if (pM[s]>0)
+ +            {
+ +                tend = ((thread+1)*pM[s]*pK[s]/plan->nthreads);
+ +                tstart/=C[s];
+ +                splitaxes(lout2,lout,N[s],M[s],K[s], pN[s],pM[s],pK[s],P[s],C[s],iNout[s],oNout[s],tstart%pM[s],tstart/pM[s],tend%pM[s],tend/pM[s]);
+ +            }
+ +#pragma omp barrier /*barrier required before AllToAll (all input has to be their) - before timing to make timing more acurate*/
+ +#ifdef NOGMX
+ +            if (times != NULL && thread == 0)
+ +            {
+ +                time_local+=MPI_Wtime()-time;
+ +            }
+ +#endif
   
+ +        /* ---------- END SPLIT , START TRANSPOSE------------ */
+ +
+ +            if (thread == 0)
+ +            {
+ +#ifdef NOGMX
+ +                if (times!=0)
+ +                {
+ +                    time=MPI_Wtime();
+ +                }
+ +#else
+ +                wallcycle_start(times,ewcPME_FFTCOMM);
+ +#endif
   #ifdef FFT5D_MPI_TRANSPOSE
- -            FFTW(execute)(mpip[s]);  
+ +                FFTW(execute)(mpip[s]);
   #else
- -            if ((s==0 && !(plan->flags&FFT5D_ORDER_YZ)) || (s==1 && (plan->flags&FFT5D_ORDER_YZ))) 
- -                MPI_Alltoall(lin,N[s]*pM[s]*K[s]*sizeof(t_complex)/sizeof(real),GMX_MPI_REAL,lout,N[s]*pM[s]*K[s]*sizeof(t_complex)/sizeof(real),GMX_MPI_REAL,cart[s]);
- -            else
- -                MPI_Alltoall(lin,N[s]*M[s]*pK[s]*sizeof(t_complex)/sizeof(real),GMX_MPI_REAL,lout,N[s]*M[s]*pK[s]*sizeof(t_complex)/sizeof(real),GMX_MPI_REAL,cart[s]);
- -#endif /*FFT5D_MPI_TRANSPOSE*/
- -            if (times!=0)
- -                time_mpi[s]=MPI_Wtime()-time;
- -        }
+ +#ifdef GMX_MPI
+ +                if ((s==0 && !(plan->flags&FFT5D_ORDER_YZ)) || (s==1 && (plan->flags&FFT5D_ORDER_YZ)))
+ +                    MPI_Alltoall(lout2,N[s]*pM[s]*K[s]*sizeof(t_complex)/sizeof(real),GMX_MPI_REAL,lout3,N[s]*pM[s]*K[s]*sizeof(t_complex)/sizeof(real),GMX_MPI_REAL,cart[s]);
+ +                else
+ +                    MPI_Alltoall(lout2,N[s]*M[s]*pK[s]*sizeof(t_complex)/sizeof(real),GMX_MPI_REAL,lout3,N[s]*M[s]*pK[s]*sizeof(t_complex)/sizeof(real),GMX_MPI_REAL,cart[s]);
+ +#else
+ +                gmx_incons("fft5d MPI call without MPI configuration");
   #endif /*GMX_MPI*/
+ +#endif /*FFT5D_MPI_TRANSPOSE*/
+ +#ifdef NOGMX
+ +                if (times!=0)
+ +                {
+ +                    time_mpi[s]=MPI_Wtime()-time;
+ +                }
+ +#else
+ +                wallcycle_stop(times,ewcPME_FFTCOMM);
+ +#endif
+ +            }  /*master*/
+ +        }  /* bPrallelDim */
+ +#pragma omp barrier  /*both needed for parallel and non-parallel dimension (either have to wait on data from AlltoAll or from last FFT*/
   
- -    
- -        if (times!=0)
+ +        /* ---------- END SPLIT + TRANSPOSE------------ */
+ +
+ +        /* ---------- START JOIN ------------ */
+ +#ifdef NOGMX
+ +        if (times != NULL && thread == 0)
+ +        {
               time=MPI_Wtime();
+ +        }
+ +#endif
+ +
+ +        if (bParallelDim) {
+ +            joinin = lout3;
+ +        } else {
+ +            joinin = fftout;
+ +        }
           /*bring back in matrix form 
             thus make  new 1. axes contiguos
- -          also local transpose 1 and 2/3 */
- -        if ((s==0 && !(plan->flags&FFT5D_ORDER_YZ)) || (s==1 && (plan->flags&FFT5D_ORDER_YZ))) 
- -            joinAxesTrans13(lin,lout,N[s],pM[s],K[s],pN[s],pM[s],pK[s],P[s],C[s+1],iNin[s+1],oNin[s+1]);
- -        else 
- -            joinAxesTrans12(lin,lout,N[s],M[s],pK[s],pN[s],pM[s],pK[s],P[s],C[s+1],iNin[s+1],oNin[s+1]);    
- -        if (times!=0)
+ +          also local transpose 1 and 2/3 
+ +          runs on thread used for following FFT (thus needing a barrier before but not afterwards)
+ +        */
+ +        if ((s==0 && !(plan->flags&FFT5D_ORDER_YZ)) || (s==1 && (plan->flags&FFT5D_ORDER_YZ))) {
+ +            if (pM[s]>0)
+ +            {
+ +                tstart = ( thread   *pM[s]*pN[s]/plan->nthreads);
+ +                tend   = ((thread+1)*pM[s]*pN[s]/plan->nthreads);
+ +                joinAxesTrans13(lin,joinin,N[s],pM[s],K[s],pN[s],pM[s],pK[s],P[s],C[s+1],iNin[s+1],oNin[s+1],tstart%pM[s],tstart/pM[s],tend%pM[s],tend/pM[s]);
+ +            }
+ +        }
+ +        else {
+ +            if (pN[s]>0)
+ +            {
+ +                tstart = ( thread   *pK[s]*pN[s]/plan->nthreads);
+ +                tend   = ((thread+1)*pK[s]*pN[s]/plan->nthreads);
+ +                joinAxesTrans12(lin,joinin,N[s],M[s],pK[s],pN[s],pM[s],pK[s],P[s],C[s+1],iNin[s+1],oNin[s+1],tstart%pN[s],tstart/pN[s],tend%pN[s],tend/pN[s]);
+ +            }
+ +        }
+ +
+ +#ifdef NOGMX
+ +        if (times != NULL && thread == 0)
+ +        {
               time_local+=MPI_Wtime()-time;
- -    
- -        if (plan->flags&FFT5D_DEBUG) print_localdata(lin, "%d %d: tranposed %d\n", s+1, plan);
- -                
+ +        }
+ +#endif
+ +        if (plan->flags&FFT5D_DEBUG && thread == 0)
+ +        {
+ +            print_localdata(lin, "%d %d: tranposed %d\n", s+1, plan);
+ +        }
+ +        /* ---------- END JOIN ------------ */
+ +
           /*if (debug) print_localdata(lin, "%d %d: transposed x-z\n", N1, M0, K, ZYX, coor);*/
- -    }    
- -    
- -    if (times!=0)
- -        time=MPI_Wtime();
- -    if (plan->flags&FFT5D_INPLACE) lout=lin;
+ +    }  /* for(s=0;s<2;s++) */
+ +#ifdef NOGMX
+ +        if (times != NULL && thread == 0)
+ +        {
+ +            time=MPI_Wtime();
+ +        }
+ +#endif
+ +
+ +    if (plan->flags&FFT5D_INPLACE) lout=lin; /*in place currently not supported*/
+ +
+ +    /*  ----------- FFT ----------- */
+ +    tstart = (thread*pM[s]*pK[s]/plan->nthreads)*C[s];
       if ((plan->flags&FFT5D_REALCOMPLEX) && (plan->flags&FFT5D_BACKWARD)) {
- -        gmx_fft_many_1d_real(p1d[s],(plan->flags&FFT5D_BACKWARD)?GMX_FFT_COMPLEX_TO_REAL:GMX_FFT_REAL_TO_COMPLEX,lin,lout);
+ +        gmx_fft_many_1d_real(p1d[s][thread],(plan->flags&FFT5D_BACKWARD)?GMX_FFT_COMPLEX_TO_REAL:GMX_FFT_REAL_TO_COMPLEX,lin+tstart,lout+tstart);
       } else {
- -        gmx_fft_many_1d(     p1d[s],(plan->flags&FFT5D_BACKWARD)?GMX_FFT_BACKWARD:GMX_FFT_FORWARD,               lin,lout);
+ +        gmx_fft_many_1d(     p1d[s][thread],(plan->flags&FFT5D_BACKWARD)?GMX_FFT_BACKWARD:GMX_FFT_FORWARD,               lin+tstart,lout+tstart);
       }
+ +    /* ------------ END FFT ---------*/
   
- -    if (times!=0)
+ +#ifdef NOGMX
+ +    if (times != NULL && thread == 0)
+ +    {
           time_fft+=MPI_Wtime()-time;
- -    if (plan->flags&FFT5D_DEBUG) print_localdata(lout, "%d %d: FFT %d\n", s, plan);
- -    /*if (debug) print_localdata(lout, "%d %d: FFT in y\n", N1, M, K0, YZX, coor);*/
- -    
- -    if (times!=0) {
+ +
           times->fft+=time_fft;
           times->local+=time_local;
           times->mpi2+=time_mpi[1];
diff --cc src/mdlib/minimize.c
Simple merge
diff --cc src/mdlib/pme.c

index ed45fbfb4310a7feec78d608989c24071303c15d,735c0e878ae5f68e226540f6712e8a854155f178..02cea6e459f911ca99b665b8aa37c8ecb374e9ce
--- 1/src/mdlib/pme.c
--- 2/src/mdlib/pme.c
+++ b/src/mdlib/pme.c
@@@ -2944,10 -2057,10 +2944,10 @@@ int gmx_pme_init(gmx_pme_t *         pm
       pme->nnodes_minor        = nnodes_minor;
   
   #ifdef GMX_MPI
- -    if (nnodes_major*nnodes_minor > 1 && PAR(cr)) 
+ +    if (nnodes_major*nnodes_minor > 1)
       {
-         pme->mpi_comm        = cr->mpi_comm_mygroup;
+         pme->mpi_comm = cr->mpi_comm_mygroup;
- -        
+ +
           MPI_Comm_rank(pme->mpi_comm,&pme->nodeid);
           MPI_Comm_size(pme->mpi_comm,&pme->nnodes);
           if (pme->nnodes != nnodes_major*nnodes_minor)
author	Roland Schulz <roland@utk.edu>
	Sun, 29 Apr 2012 01:45:00 +0000 (21:45 -0400)
committer	Roland Schulz <roland@utk.edu>
	Sun, 29 Apr 2012 01:47:06 +0000 (21:47 -0400)
		1	2
CMakeLists.txt	patch \|	diff1 \|	diff2 \|	blob \| history
src/mdlib/fft5d.c	patch \|	diff1 \|	diff2 \|	blob \| history
src/mdlib/minimize.c	patch \|	diff1 \|	diff2 \|	blob \| history
src/mdlib/pme.c	patch \|	diff1 \|	diff2 \|	blob \| history