Merge release-4-5-patches into release-4-6
authorRoland Schulz <roland@utk.edu>
Sun, 29 Apr 2012 01:45:00 +0000 (21:45 -0400)
committerRoland Schulz <roland@utk.edu>
Sun, 29 Apr 2012 01:47:06 +0000 (21:47 -0400)
Conflicts:
src/mdlib/pme.c

Change-Id: I0a77a63162168d3a07ab957a9bf7b367a9f05eac

1  2 
CMakeLists.txt
src/mdlib/fft5d.c
src/mdlib/minimize.c
src/mdlib/pme.c

diff --cc CMakeLists.txt
Simple merge
index f070e6de3ccee67737edfd2efe3c8c992a7f5fdb,86091776d80050aedb1e9dcb8d740ddb7d6664c0..7f99a6598b9de8e5febb59ffdd1f5eba80076774
@@@ -871,207 -851,86 +871,207 @@@ void fft5d_execute(fft5d_plan plan,int 
      }
  #endif
  
 +        s=0;
 +
      /*lin: x,y,z*/
 -    if (plan->flags&FFT5D_DEBUG) print_localdata(lin, "%d %d: copy in lin\n", s, plan);
 -    for (s=0;s<2;s++) {
 -        if (times!=0)
 -            time=MPI_Wtime();
 -        
 -        if ((plan->flags&FFT5D_REALCOMPLEX) && !(plan->flags&FFT5D_BACKWARD) && s==0) {
 -            gmx_fft_many_1d_real(p1d[s],(plan->flags&FFT5D_BACKWARD)?GMX_FFT_COMPLEX_TO_REAL:GMX_FFT_REAL_TO_COMPLEX,lin,lout);
 -        } else {
 -            gmx_fft_many_1d(     p1d[s],(plan->flags&FFT5D_BACKWARD)?GMX_FFT_BACKWARD:GMX_FFT_FORWARD,               lin,lout);
 +        if (plan->flags&FFT5D_DEBUG && thread == 0)
 +        {
 +            print_localdata(lin, "%d %d: copy in lin\n", s, plan);
          }
 -        if (times!=0)
 -            time_fft+=MPI_Wtime()-time;
 -    
 -        if (plan->flags&FFT5D_DEBUG) print_localdata(lout, "%d %d: FFT %d\n", s, plan);
 -        
 +
 +        for (s=0;s<2;s++) {  /*loop over first two FFT steps (corner rotations)*/
 +
  #ifdef GMX_MPI
-         if (GMX_PARALLEL_ENV_INITIALIZED && cart[s] !=0 && P[s]>1 )
+         if (GMX_PARALLEL_ENV_INITIALIZED && cart[s]!=MPI_COMM_NULL && P[s]>1)
          {
 -            if (times!=0)
 -                time=MPI_Wtime(); 
 -            /*prepare for AllToAll
 -              1. (most outer) axes (x) is split into P[s] parts of size N[s] 
 -              for sending*/
 -            splitaxes(lin,lout,N[s],M[s],K[s], pN[s],pM[s],pK[s],P[s],C[s],iNout[s],oNout[s]);
 +            bParallelDim = 1;
 +        }
 +        else
 +#endif
 +        {
 +            bParallelDim = 0;
 +        }
  
 -            if (times!=0)
 +        /* ---------- START FFT ------------ */
 +#ifdef NOGMX
 +        if (times!=0 && thread == 0)
 +        {
 +            time=MPI_Wtime();
 +        }
 +#endif
 +
 +        if (bParallelDim) {
 +            fftout = lout;
 +        }
 +        else
 +        {
 +            if (s==0)
 +            {
 +                fftout = lout3;
 +            } else
 +            {
 +                fftout = lout2;
 +            }
 +        }
 +
 +        tstart = (thread*pM[s]*pK[s]/plan->nthreads)*C[s];
 +        if ((plan->flags&FFT5D_REALCOMPLEX) && !(plan->flags&FFT5D_BACKWARD) && s==0)
 +        {
 +            gmx_fft_many_1d_real(p1d[s][thread],(plan->flags&FFT5D_BACKWARD)?GMX_FFT_COMPLEX_TO_REAL:GMX_FFT_REAL_TO_COMPLEX,lin+tstart,fftout+tstart);
 +        } else
 +        {
 +            gmx_fft_many_1d(     p1d[s][thread],(plan->flags&FFT5D_BACKWARD)?GMX_FFT_BACKWARD:GMX_FFT_FORWARD,               lin+tstart,fftout+tstart);
 +
 +        }
 +
 +#ifdef NOGMX
 +        if (times != NULL && thread == 0)
 +        {
 +            time_fft+=MPI_Wtime()-time;
 +        }
 +#endif
 +        if (plan->flags&FFT5D_DEBUG && thread == 0)
 +        {
 +            print_localdata(lout, "%d %d: FFT %d\n", s, plan);
 +        }
 +        /* ---------- END FFT ------------ */
 +
 +        /* ---------- START SPLIT + TRANSPOSE------------ (if parallel in in this dimension)*/
 +        if (bParallelDim) {
 +#ifdef NOGMX
 +            if (times != NULL && thread == 0)
              {
 -                time_local+=MPI_Wtime()-time;
 -            
 -                /*send, recv*/
                  time=MPI_Wtime();
              }
 +#endif
 +            /*prepare for A
 +llToAll
 +              1. (most outer) axes (x) is split into P[s] parts of size N[s]
 +              for sending*/
 +            if (pM[s]>0)
 +            {
 +                tend = ((thread+1)*pM[s]*pK[s]/plan->nthreads);
 +                tstart/=C[s];
 +                splitaxes(lout2,lout,N[s],M[s],K[s], pN[s],pM[s],pK[s],P[s],C[s],iNout[s],oNout[s],tstart%pM[s],tstart/pM[s],tend%pM[s],tend/pM[s]);
 +            }
 +#pragma omp barrier /*barrier required before AllToAll (all input has to be their) - before timing to make timing more acurate*/
 +#ifdef NOGMX
 +            if (times != NULL && thread == 0)
 +            {
 +                time_local+=MPI_Wtime()-time;
 +            }
 +#endif
  
 +        /* ---------- END SPLIT , START TRANSPOSE------------ */
 +
 +            if (thread == 0)
 +            {
 +#ifdef NOGMX
 +                if (times!=0)
 +                {
 +                    time=MPI_Wtime();
 +                }
 +#else
 +                wallcycle_start(times,ewcPME_FFTCOMM);
 +#endif
  #ifdef FFT5D_MPI_TRANSPOSE
 -            FFTW(execute)(mpip[s]);  
 +                FFTW(execute)(mpip[s]);
  #else
 -            if ((s==0 && !(plan->flags&FFT5D_ORDER_YZ)) || (s==1 && (plan->flags&FFT5D_ORDER_YZ))) 
 -                MPI_Alltoall(lin,N[s]*pM[s]*K[s]*sizeof(t_complex)/sizeof(real),GMX_MPI_REAL,lout,N[s]*pM[s]*K[s]*sizeof(t_complex)/sizeof(real),GMX_MPI_REAL,cart[s]);
 -            else
 -                MPI_Alltoall(lin,N[s]*M[s]*pK[s]*sizeof(t_complex)/sizeof(real),GMX_MPI_REAL,lout,N[s]*M[s]*pK[s]*sizeof(t_complex)/sizeof(real),GMX_MPI_REAL,cart[s]);
 -#endif /*FFT5D_MPI_TRANSPOSE*/
 -            if (times!=0)
 -                time_mpi[s]=MPI_Wtime()-time;
 -        }
 +#ifdef GMX_MPI
 +                if ((s==0 && !(plan->flags&FFT5D_ORDER_YZ)) || (s==1 && (plan->flags&FFT5D_ORDER_YZ)))
 +                    MPI_Alltoall(lout2,N[s]*pM[s]*K[s]*sizeof(t_complex)/sizeof(real),GMX_MPI_REAL,lout3,N[s]*pM[s]*K[s]*sizeof(t_complex)/sizeof(real),GMX_MPI_REAL,cart[s]);
 +                else
 +                    MPI_Alltoall(lout2,N[s]*M[s]*pK[s]*sizeof(t_complex)/sizeof(real),GMX_MPI_REAL,lout3,N[s]*M[s]*pK[s]*sizeof(t_complex)/sizeof(real),GMX_MPI_REAL,cart[s]);
 +#else
 +                gmx_incons("fft5d MPI call without MPI configuration");
  #endif /*GMX_MPI*/
 +#endif /*FFT5D_MPI_TRANSPOSE*/
 +#ifdef NOGMX
 +                if (times!=0)
 +                {
 +                    time_mpi[s]=MPI_Wtime()-time;
 +                }
 +#else
 +                wallcycle_stop(times,ewcPME_FFTCOMM);
 +#endif
 +            }  /*master*/
 +        }  /* bPrallelDim */
 +#pragma omp barrier  /*both needed for parallel and non-parallel dimension (either have to wait on data from AlltoAll or from last FFT*/
  
 -    
 -        if (times!=0)
 +        /* ---------- END SPLIT + TRANSPOSE------------ */
 +
 +        /* ---------- START JOIN ------------ */
 +#ifdef NOGMX
 +        if (times != NULL && thread == 0)
 +        {
              time=MPI_Wtime();
 +        }
 +#endif
 +
 +        if (bParallelDim) {
 +            joinin = lout3;
 +        } else {
 +            joinin = fftout;
 +        }
          /*bring back in matrix form 
            thus make  new 1. axes contiguos
 -          also local transpose 1 and 2/3 */
 -        if ((s==0 && !(plan->flags&FFT5D_ORDER_YZ)) || (s==1 && (plan->flags&FFT5D_ORDER_YZ))) 
 -            joinAxesTrans13(lin,lout,N[s],pM[s],K[s],pN[s],pM[s],pK[s],P[s],C[s+1],iNin[s+1],oNin[s+1]);
 -        else 
 -            joinAxesTrans12(lin,lout,N[s],M[s],pK[s],pN[s],pM[s],pK[s],P[s],C[s+1],iNin[s+1],oNin[s+1]);    
 -        if (times!=0)
 +          also local transpose 1 and 2/3 
 +          runs on thread used for following FFT (thus needing a barrier before but not afterwards)
 +        */
 +        if ((s==0 && !(plan->flags&FFT5D_ORDER_YZ)) || (s==1 && (plan->flags&FFT5D_ORDER_YZ))) {
 +            if (pM[s]>0)
 +            {
 +                tstart = ( thread   *pM[s]*pN[s]/plan->nthreads);
 +                tend   = ((thread+1)*pM[s]*pN[s]/plan->nthreads);
 +                joinAxesTrans13(lin,joinin,N[s],pM[s],K[s],pN[s],pM[s],pK[s],P[s],C[s+1],iNin[s+1],oNin[s+1],tstart%pM[s],tstart/pM[s],tend%pM[s],tend/pM[s]);
 +            }
 +        }
 +        else {
 +            if (pN[s]>0)
 +            {
 +                tstart = ( thread   *pK[s]*pN[s]/plan->nthreads);
 +                tend   = ((thread+1)*pK[s]*pN[s]/plan->nthreads);
 +                joinAxesTrans12(lin,joinin,N[s],M[s],pK[s],pN[s],pM[s],pK[s],P[s],C[s+1],iNin[s+1],oNin[s+1],tstart%pN[s],tstart/pN[s],tend%pN[s],tend/pN[s]);
 +            }
 +        }
 +
 +#ifdef NOGMX
 +        if (times != NULL && thread == 0)
 +        {
              time_local+=MPI_Wtime()-time;
 -    
 -        if (plan->flags&FFT5D_DEBUG) print_localdata(lin, "%d %d: tranposed %d\n", s+1, plan);
 -                
 +        }
 +#endif
 +        if (plan->flags&FFT5D_DEBUG && thread == 0)
 +        {
 +            print_localdata(lin, "%d %d: tranposed %d\n", s+1, plan);
 +        }
 +        /* ---------- END JOIN ------------ */
 +
          /*if (debug) print_localdata(lin, "%d %d: transposed x-z\n", N1, M0, K, ZYX, coor);*/
 -    }    
 -    
 -    if (times!=0)
 -        time=MPI_Wtime();
 -    if (plan->flags&FFT5D_INPLACE) lout=lin;
 +    }  /* for(s=0;s<2;s++) */
 +#ifdef NOGMX
 +        if (times != NULL && thread == 0)
 +        {
 +            time=MPI_Wtime();
 +        }
 +#endif
 +
 +    if (plan->flags&FFT5D_INPLACE) lout=lin; /*in place currently not supported*/
 +
 +    /*  ----------- FFT ----------- */
 +    tstart = (thread*pM[s]*pK[s]/plan->nthreads)*C[s];
      if ((plan->flags&FFT5D_REALCOMPLEX) && (plan->flags&FFT5D_BACKWARD)) {
 -        gmx_fft_many_1d_real(p1d[s],(plan->flags&FFT5D_BACKWARD)?GMX_FFT_COMPLEX_TO_REAL:GMX_FFT_REAL_TO_COMPLEX,lin,lout);
 +        gmx_fft_many_1d_real(p1d[s][thread],(plan->flags&FFT5D_BACKWARD)?GMX_FFT_COMPLEX_TO_REAL:GMX_FFT_REAL_TO_COMPLEX,lin+tstart,lout+tstart);
      } else {
 -        gmx_fft_many_1d(     p1d[s],(plan->flags&FFT5D_BACKWARD)?GMX_FFT_BACKWARD:GMX_FFT_FORWARD,               lin,lout);
 +        gmx_fft_many_1d(     p1d[s][thread],(plan->flags&FFT5D_BACKWARD)?GMX_FFT_BACKWARD:GMX_FFT_FORWARD,               lin+tstart,lout+tstart);
      }
 +    /* ------------ END FFT ---------*/
  
 -    if (times!=0)
 +#ifdef NOGMX
 +    if (times != NULL && thread == 0)
 +    {
          time_fft+=MPI_Wtime()-time;
 -    if (plan->flags&FFT5D_DEBUG) print_localdata(lout, "%d %d: FFT %d\n", s, plan);
 -    /*if (debug) print_localdata(lout, "%d %d: FFT in y\n", N1, M, K0, YZX, coor);*/
 -    
 -    if (times!=0) {
 +
          times->fft+=time_fft;
          times->local+=time_local;
          times->mpi2+=time_mpi[1];
Simple merge
diff --cc src/mdlib/pme.c
index ed45fbfb4310a7feec78d608989c24071303c15d,735c0e878ae5f68e226540f6712e8a854155f178..02cea6e459f911ca99b665b8aa37c8ecb374e9ce
@@@ -2944,10 -2057,10 +2944,10 @@@ int gmx_pme_init(gmx_pme_t *         pm
      pme->nnodes_minor        = nnodes_minor;
  
  #ifdef GMX_MPI
 -    if (nnodes_major*nnodes_minor > 1 && PAR(cr)) 
 +    if (nnodes_major*nnodes_minor > 1)
      {
-         pme->mpi_comm        = cr->mpi_comm_mygroup;
+         pme->mpi_comm = cr->mpi_comm_mygroup;
 -        
 +
          MPI_Comm_rank(pme->mpi_comm,&pme->nodeid);
          MPI_Comm_size(pme->mpi_comm,&pme->nnodes);
          if (pme->nnodes != nnodes_major*nnodes_minor)