mark_as_advanced(USE_VERSION_H)
option(GMX_DEFAULT_SUFFIX "Use default GROMACS suffixes" ON)
-set(GMX_BINARY_SUFFIX "" CACHE STRING "Suffix for GROMACS binaries (default: _d for double).")
+set(GMX_BINARY_SUFFIX "" CACHE STRING "Suffix for GROMACS binaries (default: _d for double, _mpi for mpi, _mpi_d for mpi and double).")
set(GMX_LIBS_SUFFIX ""
CACHE STRING "Suffix for GROMACS libs (default: _d for double, _mpi for mpi, _mpi_d for mpi and double).")
if (GMX_DEFAULT_SUFFIX)
+ set(GMX_BINARY_SUFFIX "")
+ set(GMX_LIBS_SUFFIX "")
if (GMX_MPI)
+ set(GMX_BINARY_SUFFIX "_mpi")
set(GMX_LIBS_SUFFIX "_mpi")
endif(GMX_MPI)
if (GMX_DOUBLE)
- set (GMX_BINARY_SUFFIX "_d")
+ set (GMX_BINARY_SUFFIX "${GMX_BINARY_SUFFIX}_d")
set (GMX_LIBS_SUFFIX "${GMX_LIBS_SUFFIX}_d")
endif(GMX_DOUBLE)
mark_as_advanced(FORCE GMX_BINARY_SUFFIX GMX_LIBS_SUFFIX)
+ message(STATUS "Using default binary suffix: \"${GMX_BINARY_SUFFIX}\"")
+ message(STATUS "Using default library suffix: \"${GMX_LIBS_SUFFIX}\"")
else(GMX_DEFAULT_SUFFIX)
- mark_as_advanced(CLEAR GMX_BINARY_SUFFIX GMX_LIBS_SUFFIX)
+ mark_as_advanced(CLEAR GMX_BINARY_SUFFIX GMX_LIBS_SUFFIX)
+ message(STATUS "Using manually set binary suffix: \"${GMX_BINARY_SUFFIX}\"")
+ message(STATUS "Using manually set library suffix: \"${GMX_LIBS_SUFFIX}\"")
endif(GMX_DEFAULT_SUFFIX)
set(PKG_CFLAGS "")
[ implicit_genborn_params ]
; atype sar st pi gbr hct
-;H0 0.1 1 1 0.125 0.85 ; H
C 0.172 1 1.554 0.1875 0.72 ; C
CA 0.18 1 1.037 0.1875 0.72 ; C
CB 0.172 0.012 1.554 0.1875 0.72 ; C
C* 0.172 0.012 1.554 0.1875 0.72 ; C
H 0.1 1 1 0.115 0.85 ; H
HC 0.1 1 1 0.125 0.85 ; H
+H0 0.1 1 1 0.125 0.85 ; H
H1 0.1 1 1 0.125 0.85 ; H
HA 0.1 1 1 0.125 0.85 ; H
H4 0.1 1 1 0.115 0.85 ; H
CT3 OC MCH3
CT3 OH1 MCH3
CT3 OS MCH3
- CT3 S MCH3
- CT3 SM MCH3
+ CT3 S MCH3S
CTL3 CL MCH3
CTL3 OSL MCH3
CTL3 CTL1 MCH3
MWT4 0.0000 ; TIP4P
MNH3 0.0000 ; vsite (rigid tetrahedrical NH3 group)
MNH2 0.0000 ; vsite
-MCH3 0.0000 ; vsite (rigid CH3 group)
+MCH3 0.0000 ; vsite (rigid CH3 group connected to carbons)
+MCH3S 0.0000 ; vsite (rigid CH3 group connected to S)
; DNA and RNA section
P 30.974 ; Phosphorus ### For DNA
P2 30.974000 ; pyrophosphate phosphorus (see toppar_all27_na_nad_ppi.str) ### For DNA
MCH3 CT1 2 0.206892
MCH3 CT2 2 0.206000
MCH3 CT3 2 0.206179
-MCH3 MCH3 2 0.185689
-MCH3 S 2 0.233335
+MCH3 MCH3 2 0.187164
+MCH3S S 2 0.233335
+MCH3S MCH3S 2 0.185689
#else
; no heavy hydrogens.
; constraints for the rigid NH3 groups
MCH3 CT2 2 0.167162
MCH3 CT3 2 0.167354
MCH3 MCH3 2 0.093582
-MCH3 S 2 0.195314
+MCH3S S 2 0.195314
+MCH3S MCH3S 2 0.092844
#endif
; angle-derived constraints for OH and SH groups in proteins
; The constraint A-C is calculated from the angle A-B-C and bonds A-B, B-C.
; special dummy-type particles
MNH3 0 0.000000 0.00 A 0.0 0.0
MNH2 0 0.000000 0.00 A 0.0 0.0
+; CH3 bound to carbons
MCH3 0 0.000000 0.00 A 0.0 0.0
+; CH3 bound to sulfur, type S only
+MCH3S 0 0.000000 0.00 A 0.0 0.0
; Ions and noble gases (useful for tutorials)
Cu2+ 29 63.54600 2.00 A 2.08470e-01 4.76976e+00
Ar 18 39.94800 0.00 A 3.41000e-01 2.74580e-02
return ndiv;
}
+static gmx_bool is_prime(int n)
+{
+ int i;
+
+ i = 2;
+ while (i*i <= n)
+ {
+ if (n % i == 0)
+ {
+ return FALSE;
+ }
+ i++;
+ }
+
+ return TRUE;
+}
+
+static int lcd(int n1,int n2)
+{
+ int d,i;
+
+ d = 1;
+ for(i=2; (i<=n1 && i<=n2); i++)
+ {
+ if (n1 % i == 0 && n2 % i == 0)
+ {
+ d = i;
+ }
+ }
+
+ return d;
+}
+
static gmx_bool fits_pme_ratio(int nnodes,int npme,float ratio)
{
return ((double)npme/(double)nnodes > 0.95*ratio);
int nnodes,int npme,float ratio)
{
int ndiv,*div,*mdiv,ldiv;
+ int npp_root3,npme_root2;
ndiv = factorize(nnodes-npme,&div,&mdiv);
ldiv = div[ndiv-1];
sfree(div);
sfree(mdiv);
+
+ npp_root3 = (int)(pow(nnodes-npme,1.0/3.0) + 0.5);
+ npme_root2 = (int)(sqrt(npme) + 0.5);
+
/* The check below gives a reasonable division:
* factor 5 allowed at 5 or more PP nodes,
* factor 7 allowed at 49 or more PP nodes.
*/
- if (ldiv > 3 + (int)(pow(nnodes-npme,1.0/3.0) + 0.5))
+ if (ldiv > 3 + npp_root3)
+ {
+ return FALSE;
+ }
+
+ /* Check if the number of PP and PME nodes have a reasonable sized
+ * denominator in common, such that we can use 2D PME decomposition
+ * when required (which requires nx_pp == nx_pme).
+ * The factor of 2 allows for a maximum ratio of 2^2=4
+ * between nx_pme and ny_pme.
+ */
+ if (lcd(nnodes-npme,npme)*2 < npme_root2)
{
return FALSE;
}
return npme;
}
-static int lcd(int n1,int n2)
-{
- int d,i;
-
- d = 1;
- for(i=2; (i<=n1 && i<=n2); i++)
- {
- if (n1 % i == 0 && n2 % i == 0)
- {
- d = i;
- }
- }
-
- return d;
-}
-
static int div_up(int n,int f)
{
return (n + f - 1)/f;
if (MASTER(cr))
{
+ if (cr->nnodes > 12 && is_prime(cr->nnodes))
+ {
+ gmx_fatal(FARGS,"The number of nodes you selected (%d) is a large prime. In most cases this will lead to bad performance. Choose a non-prime number, or set the decomposition (option -dd) manually.",cr->nnodes);
+ }
+
if (EEL_PME(ir->coulombtype))
{
if (cr->npmenodes >= 0)
NG, MG, KG is size of global data*/
static void splitaxes(t_complex* lout,const t_complex* lin,
int maxN,int maxM,int maxK, int pN, int pM, int pK,
- int P,int NG,int *N, int* oN) {
+ int P,int NG,int *N, int* oN)
+{
int x,y,z,i;
int in_i,out_i,in_z,out_z,in_y,out_y;
- for (i=0;i<P;i++) { /*index cube along long axis*/
- in_i = i*maxN*maxM*maxK;
- out_i = oN[i];
#ifdef FFT5D_THREADS
-#pragma omp parallel for private(in_z,out_z,y,in_y,out_y,x)
+ int zi;
+
+ /* In the thread parallel case we want to loop over z and i
+ * in a single for loop to allow for better load balancing.
+ */
+#pragma omp parallel for private(z,in_z,out_z,i,in_i,out_i,y,in_y,out_y,x) schedule(static)
+ for (zi=0; zi<pK*P; zi++)
+ {
+ z = zi/P;
+ i = zi - z*P;
+#else
+ for (z=0; z<pK; z++) /*3. z l*/
+ {
+#endif
+ in_z = z*maxN*maxM;
+ out_z = z*NG*pM;
+
+#ifndef FFT5D_THREADS
+ for (i=0; i<P; i++) /*index cube along long axis*/
#endif
- for (z=0;z<pK;z++) { /*3. z l*/
- in_z = in_i + z*maxN*maxM;
- out_z = out_i + z*NG*pM;
+ {
+ in_i = in_z + i*maxN*maxM*maxK;
+ out_i = out_z + oN[i];
for (y=0;y<pM;y++) { /*2. y k*/
- in_y = in_z + y*maxN;
- out_y = out_z + y*NG;
+ in_y = in_i + y*maxN;
+ out_y = out_i + y*NG;
for (x=0;x<N[i];x++) { /*1. x j*/
lout[in_y+x] = lin[out_y+x];
/*after split important that each processor chunk i has size maxN*maxM*maxK and thus being the same size*/
int i,x,y,z;
int in_i,out_i,in_x,out_x,in_z,out_z;
- for (i=0;i<P;i++) { /*index cube along long axis*/
- in_i = oK[i];
- out_i = i*maxM*maxN*maxK;
#ifdef FFT5D_THREADS
-#pragma omp parallel for private(in_x,out_x,z,in_z,out_z,y)
+ int xi;
+
+ /* In the thread parallel case we want to loop over x and i
+ * in a single for loop to allow for better load balancing.
+ */
+#pragma omp parallel for private(x,in_x,out_x,i,in_i,out_i,z,in_z,out_z,y) schedule(static)
+ for (xi=0; xi<pN*P; xi++)
+ {
+ x = xi/P;
+ i = xi - x*P;
+#else
+ for (x=0;x<pN;x++) /*1.j*/
+ {
#endif
- for (x=0;x<pN;x++) { /*1.j*/
- in_x = in_i + x*KG*pM;
- out_x = out_i + x;
- for (z=0;z<K[i];z++) { /*3.l*/
- in_z = in_x + z;
- out_z = out_x + z*maxM*maxN;
+ in_x = x*KG*pM;
+ out_x = x;
+
+#ifndef FFT5D_THREADS
+ for (i=0;i<P;i++) /*index cube along long axis*/
+#endif
+ {
+ in_i = in_x + oK[i];
+ out_i = out_x + i*maxM*maxN*maxK;
+ for (z=0;z<K[i];z++) /*3.l*/
+ {
+ in_z = in_i + z;
+ out_z = out_i + z*maxM*maxN;
for (y=0;y<pM;y++) { /*2.k*/
lin[in_z+y*KG] = lout[out_z+y*maxN];
}
int i,z,y,x;
int in_i,out_i,in_z,out_z,in_x,out_x;
- for (i=0;i<P;i++) { /*index cube along long axis*/
- in_i = oM[i];
- out_i = i*maxM*maxN*maxK;
#ifdef FFT5D_THREADS
-#pragma omp parallel for private(in_z,out_z,in_x,out_x,x,y)
+ int zi;
+
+ /* In the thread parallel case we want to loop over z and i
+ * in a single for loop to allow for better load balancing.
+ */
+#pragma omp parallel for private(i,in_i,out_i,z,in_z,out_z,in_x,out_x,x,y) schedule(static)
+ for (zi=0; zi<pK*P; zi++)
+ {
+ z = zi/P;
+ i = zi - z*P;
+#else
+ for (z=0; z<pK; z++)
+ {
#endif
- for (z=0;z<pK;z++) {
- in_z = in_i + z*MG*pN;
- out_z = out_i + z*maxM*maxN;
+ in_z = z*MG*pN;
+ out_z = z*maxM*maxN;
+
+#ifndef FFT5D_THREADS
+ for (i=0; i<P; i++) /*index cube along long axis*/
+#endif
+ {
+ in_i = in_z + oM[i];
+ out_i = out_z + i*maxM*maxN*maxK;
for (x=0;x<pN;x++) {
- in_x = in_z + x*MG;
- out_x = out_z + x;
+ in_x = in_i + x*MG;
+ out_x = out_i + x;
for (y=0;y<M[i];y++) {
lin[in_x+y] = lout[out_x+y*maxN];
}
"[PAR]",
"Option [TT]-xpma[tt] writes the atomic covariance matrix to an xpm file,",
"i.e. for each atom pair the sum of the xx, yy and zz covariances is",
- "written."
+ "written.",
+ "[PAR]",
+ "Note that the diagonalization of a matrix requires memory and time",
+ "that will increase at least as fast as than the square of the number",
+ "of atoms involved. It is easy to run out of memory, in which",
+ "case this tool will probably exit with a 'Segmentation fault'. You",
+ "should consider carefully whether a reduced set of atoms will meet",
+ "your needs for lower costs."
};
static gmx_bool bFit=TRUE,bRef=FALSE,bM=FALSE,bPBC=TRUE;
static int end=-1;