set(CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/cmake)
if(CMAKE_INSTALL_PREFIX_INITIALIZED_TO_DEFAULT)
-set(CMAKE_INSTALL_PREFIX "/usr/local/gromacs" CACHE STRING "" FORCE)
+set(CMAKE_INSTALL_PREFIX "/usr/local/gromacs" CACHE STRING "Installation prefix (installation will need write permissions here)" FORCE)
endif()
if(NOT CMAKE_BUILD_TYPE)
########################################################################
# User input options #
########################################################################
-option(GMX_DOUBLE "Use double precision. Much slower, use only if needed!)" OFF)
+option(GMX_DOUBLE "Use double precision (much slower, use only if you really need it)" OFF)
option(GMX_MPI "Build a parallel (message-passing) version of GROMACS" OFF)
-option(GMX_THREADS "Build a parallel (thread-based) version of GROMACS)" ON)
+option(GMX_THREADS "Build a parallel (thread-based) version of GROMACS (cannot be combined with MPI yet)" ON)
option(GMX_SOFTWARE_INVSQRT "Use GROMACS software 1/sqrt" ON)
mark_as_advanced(GMX_SOFTWARE_INVSQRT)
option(GMX_FAHCORE "Build a library with mdrun functionality" OFF)
mark_as_advanced(GMX_FAHCORE)
-option(GMX_OPENMM "Accelerated execution on GPUs through the OpenMM library" OFF)
+option(GMX_OPENMM "Accelerated execution on GPUs through the OpenMM library (rerun cmake after changing to see relevant options)" OFF)
set(GMX_ACCELERATION "auto"
CACHE STRING "Accelerated kernels. Pick one of: auto, none, SSE, BlueGene, Power6, ia64, altivec")
option(USE_VERSION_H "Generate development version string/information" ON)
mark_as_advanced(USE_VERSION_H)
-option(GMX_DEFAULT_SUFFIX "Use default GROMACS suffixes" ON)
-set(GMX_BINARY_SUFFIX "" CACHE STRING "Suffix for GROMACS binaries (default: _d for double).")
+option(GMX_DEFAULT_SUFFIX "Use default suffixes for GROMACS binaries and libs (_d for double, _mpi for MPI; rerun cmake after changing to see relevant options)" ON)
+set(GMX_BINARY_SUFFIX "" CACHE STRING "Suffix for GROMACS binaries (default: _d for double, _mpi for MPI, _mpi_d for MPI and double).")
set(GMX_LIBS_SUFFIX ""
- CACHE STRING "Suffix for GROMACS libs (default: _d for double, _mpi for mpi, _mpi_d for mpi and double).")
+ CACHE STRING "Suffix for GROMACS libs (default: _d for double, _mpi for MPI, _mpi_d for MPI and double).")
if (GMX_DEFAULT_SUFFIX)
+ set(GMX_BINARY_SUFFIX "")
+ set(GMX_LIBS_SUFFIX "")
if (GMX_MPI)
+ set(GMX_BINARY_SUFFIX "_mpi")
set(GMX_LIBS_SUFFIX "_mpi")
endif(GMX_MPI)
if (GMX_DOUBLE)
- set (GMX_BINARY_SUFFIX "_d")
+ set (GMX_BINARY_SUFFIX "${GMX_BINARY_SUFFIX}_d")
set (GMX_LIBS_SUFFIX "${GMX_LIBS_SUFFIX}_d")
endif(GMX_DOUBLE)
mark_as_advanced(FORCE GMX_BINARY_SUFFIX GMX_LIBS_SUFFIX)
+ message(STATUS "Using default binary suffix: \"${GMX_BINARY_SUFFIX}\"")
+ message(STATUS "Using default library suffix: \"${GMX_LIBS_SUFFIX}\"")
else(GMX_DEFAULT_SUFFIX)
- mark_as_advanced(CLEAR GMX_BINARY_SUFFIX GMX_LIBS_SUFFIX)
+ mark_as_advanced(CLEAR GMX_BINARY_SUFFIX GMX_LIBS_SUFFIX)
+ message(STATUS "Using manually set binary suffix: \"${GMX_BINARY_SUFFIX}\"")
+ message(STATUS "Using manually set library suffix: \"${GMX_LIBS_SUFFIX}\"")
endif(GMX_DEFAULT_SUFFIX)
set(PKG_CFLAGS "")
# this should at some point become VERSION_LESS
if(NOT Git_FOUND OR Git_VERSION STRLESS "1.5.1")
message("No compatible git version found, won't be able to generate proper development version information.")
+ set(USE_VERSION_H OFF)
endif()
endif()
else()
CFLAGS="$CFLAGS $PTHREAD_CFLAGS -I$srcdir/include"
CXXFLAGS="$CXXFLAGS $PTHREAD_CFLAGS -I$srcdir/include"
CC="$PTHREAD_CC "
- AC_DEFINE(THREAD_PTHREADS,,[Use pthreads for thread_mpi multithreading])
+ AC_DEFINE(THREAD_PTHREADS,,[Use pthreads for multithreading])
# profiling
AC_ARG_ENABLE(tmpi-profiling,
}
+static int gmx_mm_check_and_reset_overflow(void)
+{
+ int MXCSR;
+ int sse_overflow;
+
+ MXCSR = _mm_getcsr();
+ /* The overflow flag is bit 3 in the register */
+ if (MXCSR & 0x0008)
+ {
+ sse_overflow = 1;
+ /* Set the overflow flag to zero */
+ MXCSR = MXCSR & 0xFFF7;
+ _mm_setcsr(MXCSR);
+ }
+ else
+ {
+ sse_overflow = 0;
+ }
+
+ return sse_overflow;
+}
+
static inline __m128d
gmx_mm_invsqrt_pd(__m128d x)
}
+static int gmx_mm_check_and_reset_overflow(void)
+{
+ int MXCSR;
+ int sse_overflow;
+
+ MXCSR = _mm_getcsr();
+ /* The overflow flag is bit 3 in the register */
+ if (MXCSR & 0x0008)
+ {
+ sse_overflow = 1;
+ /* Set the overflow flag to zero */
+ MXCSR = MXCSR & 0xFFF7;
+ _mm_setcsr(MXCSR);
+ }
+ else
+ {
+ sse_overflow = 0;
+ }
+
+ return sse_overflow;
+}
+
+
/************************
* *
* Simple math routines *
These functions set/read the energyhistory_t structure
that is written to checkpoints in checkpoint.c */
-/* initialize the energyhistory_t data structure */
-void init_energyhistory(energyhistory_t * enerhist);
-
/* Set the energyhistory_t data structure from a mdebin structure */
void update_energyhistory(energyhistory_t * enerhist,t_mdebin * mdebin);
void init_mtop(gmx_mtop_t *mtop);
void init_top (t_topology *top);
void init_inputrec(t_inputrec *ir);
+void init_energyhistory(energyhistory_t * enerhist);
+void done_energyhistory(energyhistory_t * enerhist);
void init_gtc_state(t_state *state,int ngtc, int nnhpres, int nhchainlength);
void init_state(t_state *state,int natoms,int ngtc, int nnhpres, int nhchainlength);
[ implicit_genborn_params ]
; atype sar st pi gbr hct
-;H0 0.1 1 1 0.125 0.85 ; H
C 0.172 1 1.554 0.1875 0.72 ; C
CA 0.18 1 1.037 0.1875 0.72 ; C
CB 0.172 0.012 1.554 0.1875 0.72 ; C
C* 0.172 0.012 1.554 0.1875 0.72 ; C
H 0.1 1 1 0.115 0.85 ; H
HC 0.1 1 1 0.125 0.85 ; H
+H0 0.1 1 1 0.125 0.85 ; H
H1 0.1 1 1 0.125 0.85 ; H
HA 0.1 1 1 0.125 0.85 ; H
H4 0.1 1 1 0.115 0.85 ; H
aminoacids.n.tdb dna.rtp forcefield.itp spc.itp \
aminoacids.r2b ffbonded.itp gb.itp tip3p.itp \
aminoacids.rtp ffnabonded.itp ions.itp tip4p.itp \
-spce.itp tips3p.itp watermodels.dat
+spce.itp tips3p.itp watermodels.dat tip5p.itp
EXTRA_DIST = ${topol_DATA}
CT3 OC MCH3
CT3 OH1 MCH3
CT3 OS MCH3
- CT3 S MCH3
- CT3 SM MCH3
+ CT3 S MCH3S
CTL3 CL MCH3
CTL3 OSL MCH3
CTL3 CTL1 MCH3
OWT4 15.9994 ; TIP4P
HWT4 1.0080 ; TIP4P
MWT4 0.0000 ; TIP4P
+OWT5 15.9994 ; TIP5P
+HWT5 1.0080 ; TIP5P
+MWT5 0.0000 ; TIP5P
MNH3 0.0000 ; vsite (rigid tetrahedrical NH3 group)
MNH2 0.0000 ; vsite
-MCH3 0.0000 ; vsite (rigid CH3 group)
+MCH3 0.0000 ; vsite (rigid CH3 group connected to carbons)
+MCH3S 0.0000 ; vsite (rigid CH3 group connected to S)
; DNA and RNA section
P 30.974 ; Phosphorus ### For DNA
P2 30.974000 ; pyrophosphate phosphorus (see toppar_all27_na_nad_ppi.str) ### For DNA
MCH3 CT1 2 0.206892
MCH3 CT2 2 0.206000
MCH3 CT3 2 0.206179
-MCH3 MCH3 2 0.185689
-MCH3 S 2 0.233335
+MCH3 MCH3 2 0.187164
+MCH3S S 2 0.233335
+MCH3S MCH3S 2 0.185689
#else
; no heavy hydrogens.
; constraints for the rigid NH3 groups
MCH3 CT2 2 0.167162
MCH3 CT3 2 0.167354
MCH3 MCH3 2 0.093582
-MCH3 S 2 0.195314
+MCH3S S 2 0.195314
+MCH3S MCH3S 2 0.092844
#endif
; angle-derived constraints for OH and SH groups in proteins
; The constraint A-C is calculated from the angle A-B-C and bonds A-B, B-C.
OWT4 8 9.951400 0.0 A 3.15365e-01 6.48520e-01 ; TIP4p O
HWT4 1 4.032000 0.52 A 0.0 0.0 ; TIP4p H
MWT4 0 0.000000 -1.04 A 0.0 0.0 ; TIP 4p vsite
+OWT5 8 9.951400 0.0 A 3.12000e-01 6.69440e-01 ; TIP5p O
+HWT5 1 4.032000 0.241 A 0.0 0.0 ; TIP5p H
+MWT5 0 0.000000 -0.241 A 0.0 0.0 ; TIP5p vsite
OW 8 9.951400 -0.82 A 3.16557e-01 6.50194e-01 ; SPC 0
HW 1 4.032000 0.41 A 0.0 0.0; SPC H
#else
OWT4 8 15.999400 0.0 A 3.15365e-01 6.48520e-01 ; TIP4p O
HWT4 1 1.008000 0.52 A 0.0 0.0 ; TIP4p H
MWT4 0 0.000000 -1.04 A 0.0 0.0 ; TIP4p vsite
+OWT5 8 15.999400 0.0 A 3.12000e-01 6.69440e-01 ; TIP5p O
+HWT5 1 1.008000 0.241 A 0.0 0.0 ; TIP5p H
+MWT5 0 0.000000 -0.241 A 0.0 0.0 ; TIP5p vsite
OW 8 15.999400 -0.82 A 3.16557e-01 6.50194e-01 ; SPC O
HW 1 1.008000 0.41 A 0.0 0.0 ; SPC H
#endif
; special dummy-type particles
MNH3 0 0.000000 0.00 A 0.0 0.0
MNH2 0 0.000000 0.00 A 0.0 0.0
+; CH3 bound to carbons
MCH3 0 0.000000 0.00 A 0.0 0.0
+; CH3 bound to sulfur, type S only
+MCH3S 0 0.000000 0.00 A 0.0 0.0
; Ions and noble gases (useful for tutorials)
Cu2+ 29 63.54600 2.00 A 2.08470e-01 4.76976e+00
Ar 18 39.94800 0.00 A 3.41000e-01 2.74580e-02
--- /dev/null
+[ moleculetype ]
+; molname nrexcl
+SOL 2
+
+[ atoms ]
+; id at type res nr res name at name cg nr charge mass
+ 1 OWT5 1 SOL OW 1 0 15.99940
+ 2 HWT5 1 SOL HW1 1 0.241 1.00800
+ 3 HWT5 1 SOL HW2 1 0.241 1.00800
+ 4 MWT5 1 SOL LP1 1 -0.241 0.00000
+ 5 MWT5 1 SOL LP2 1 -0.241 0.00000
+
+#ifndef FLEXIBLE
+
+[ settles ]
+; i funct doh dhh
+1 1 0.09572 0.15139
+
+#else
+
+[ bonds ]
+; i j funct length force.c.
+1 2 1 0.09572 502416.0 0.09572 502416.0
+1 3 1 0.09572 502416.0 0.09572 502416.0
+
+[ angles ]
+; i j k funct angle force.c.
+2 1 3 1 104.52 628.02 104.52 628.02
+
+#endif
+
+
+[ virtual_sites3 ]
+; Vsite from funct a b c
+4 1 2 3 4 -0.344908262 -0.34490826 -6.4437903493
+5 1 2 3 4 -0.344908262 -0.34490826 6.4437903493
+
+
+[ exclusions ]
+1 2 3 4 5
+2 1 3 4 5
+3 1 2 4 5
+4 1 2 3 5
+5 1 2 3 4
+
+
+; The positions of the vsites are computed as follows:
+;
+; LP1 LP2
+;
+; O
+;
+; H1 H2
+;
+; angle A (H1-O-H2) = 104.52
+; angle B (M1-O-M2) = 109.47
+; dist C (H-O) = 0.09572 nm
+; dist D (M-O) = 0.070 nm
+;
+;atom x y z
+;O 0.0 0.0 0.0
+;H1 0.585882276 0.756950327 0.0
+;H2 0.585882276 -0.756950327 0.0
+;M1 -0.404151276 0.0 0.571543301
+;M2 -0.404151276 0.0 -0.571543301
+; Dummy pos x4 = x1 + a4*(x2-x1) + b4*(x3-x1) + c4*((x2-x1) x (x3-x1))
+; Dummy pos x5 = x1 + a5*(x2-x1) + b5*(x3-x1) + c5*((x2-x1) x (x3-x1))
+; a4 = b4 = a5 = b5 = (D*cos(B/2)) / (2*C*cos(A/2)) = -0.34490826
+; c5 = -c4 = (D * sin(B/2))/ (C^2 * sin(A)) = 6.4437903
}
/* remove a t_fileio into the list. We assume the fio is locked, and we leave
- it locked. */
-static void gmx_fio_remove(t_fileio *fio, gmx_bool global_lock)
+ it locked.
+ NOTE: We also assume that the open_file_mutex has been locked */
+static void gmx_fio_remove(t_fileio *fio)
{
t_fileio *prev;
-#ifdef GMX_THREADS
- /* first lock the big open_files mutex. */
- /* We don't want two processes operating on this list at the same time */
- if (global_lock)
- tMPI_Thread_mutex_lock(&open_file_mutex);
-#endif
-
/* lock prev, because we're changing it */
gmx_fio_lock(fio->prev);
/* and make sure we point nowhere in particular */
fio->next=fio->prev=fio;
-
-#ifdef GMX_THREADS
- /* now unlock the big open_files mutex. */
- if (global_lock)
- tMPI_Thread_mutex_unlock(&open_file_mutex);
-#endif
-
}
{
int rc = 0;
+#ifdef GMX_THREADS
+ /* first lock the big open_files mutex. */
+ /* We don't want two processes operating on the list at the same time */
+ tMPI_Thread_mutex_lock(&open_file_mutex);
+#endif
+
gmx_fio_lock(fio);
/* first remove it from the list */
- gmx_fio_remove(fio, TRUE);
+ gmx_fio_remove(fio);
rc=gmx_fio_close_locked(fio);
gmx_fio_unlock(fio);
sfree(fio);
+#ifdef GMX_THREADS
+ tMPI_Thread_mutex_unlock(&open_file_mutex);
+#endif
+
return rc;
}
if (cur->fp == fp)
{
rc=gmx_fio_close_locked(cur);
- gmx_fio_remove(cur,FALSE);
+ gmx_fio_remove(cur);
gmx_fio_stop_getting_next(cur);
break;
}
grid_index(gmx_ana_nbsearch_t *d, const ivec cell)
{
return cell[XX] + cell[YY] * d->ncelldim[XX]
- + cell[ZZ] * d->ncelldim[YY] * d->ncelldim[ZZ];
+ + cell[ZZ] * d->ncelldim[XX] * d->ncelldim[YY];
}
/*! \brief
eks->mvcos = 0;
}
-static void init_energyhistory(energyhistory_t *enh)
+void init_energyhistory(energyhistory_t * enerhist)
{
- enh->ener_ave = NULL;
- enh->ener_sum = NULL;
- enh->ener_sum_sim = NULL;
- enh->nener = 0;
+ enerhist->nener = 0;
+
+ enerhist->ener_ave = NULL;
+ enerhist->ener_sum = NULL;
+ enerhist->ener_sum_sim = NULL;
+ enerhist->dht = NULL;
+
+ enerhist->nsteps = 0;
+ enerhist->nsum = 0;
+ enerhist->nsteps_sim = 0;
+ enerhist->nsum_sim = 0;
+
+ enerhist->dht = NULL;
+}
+
+static void done_delta_h_history(delta_h_history_t *dht)
+{
+ int i;
+
+ for(i=0; i<dht->nndh; i++)
+ {
+ sfree(dht->dh[i]);
+ }
+ sfree(dht->dh);
+ sfree(dht->ndh);
+}
+
+void done_energyhistory(energyhistory_t * enerhist)
+{
+ sfree(enerhist->ener_ave);
+ sfree(enerhist->ener_sum);
+ sfree(enerhist->ener_sum_sim);
+ sfree(enerhist->dht);
+
+ if (enerhist->dht != NULL)
+ {
+ done_delta_h_history(enerhist->dht);
+ sfree(enerhist->dht);
+ }
}
void init_gtc_state(t_state *state, int ngtc, int nnhpres, int nhchainlength)
}
}
-static char *fgets3(FILE *fp,char ptr[],int *len)
+/* reads a line into ptr, adjusting len and renewing ptr if neccesary */
+static char *fgets3(FILE *fp,char **ptr,int *len, int maxlen)
{
char *p;
- int slen;
+ int len_remaining=*len; /* remaining amount of allocated bytes in buf */
+ int curp=0; /* current position in buf to read into */
- if (fgets(ptr,*len-1,fp) == NULL)
+ do
{
- return NULL;
- }
- p = ptr;
- while ((strchr(ptr,'\n') == NULL) && (!feof(fp)))
- {
- /* This line is longer than len characters, let's increase len! */
- *len += STRLEN;
- p += STRLEN;
- srenew(ptr,*len);
- if (fgets(p-1,STRLEN,fp) == NULL)
+ if (len_remaining < 2)
{
- break;
+ if ( *len + STRLEN < maxlen)
+ {
+ /* This line is longer than len characters, let's increase len! */
+ *len += STRLEN;
+ len_remaining += STRLEN;
+ srenew(*ptr, *len);
+ }
+ else
+ {
+ /*something is wrong, we'll just keep reading and return NULL*/
+ len_remaining = STRLEN;
+ curp=0;
+ }
}
+ if (fgets(*ptr + curp, len_remaining, fp)==NULL)
+ {
+ /* if last line, skip */
+ return NULL;
+ }
+ curp += len_remaining-1; /* overwrite the nul char in next iteration */
+ len_remaining = 1;
+ }
+ while ((strchr(*ptr,'\n') == NULL) && (!feof(fp)));
+
+ if ( *len + STRLEN >= maxlen )
+ {
+ return NULL; /* this line was too long */
}
+
if (feof(fp))
{
/* We reached EOF before '\n', skip this last line. */
return NULL;
}
- slen = strlen(ptr);
- if (ptr[slen-1] == '\n')
{
- ptr[slen-1] = '\0';
+ /* now remove newline */
+ int slen = strlen(*ptr);
+ if ((*ptr)[slen-1] == '\n')
+ {
+ (*ptr)[slen-1] = '\0';
+ }
}
- return ptr;
+ return *ptr;
}
static int wordcount(char *ptr)
*legend = NULL;
}
- while ((ptr = fgets3(fp,tmpbuf,&len)) != NULL && ptr[0]!='&') {
+ while ((ptr = fgets3(fp,&tmpbuf,&len,10*STRLEN)) != NULL && ptr[0]!='&') {
line++;
trim(ptr);
if (ptr[0] == '@') {
if (MASTER(cr))
{
- /* Update mdebin with energy history if appending to output files */
- if ( Flags & MD_APPENDFILES )
+ if (opt2bSet("-cpi",nfile,fnm))
{
- restore_energyhistory_from_state(mdebin,&state_global->enerhist);
+ /* Update mdebin with energy history if appending to output files */
+ if ( Flags & MD_APPENDFILES )
+ {
+ restore_energyhistory_from_state(mdebin,&state_global->enerhist);
+ }
+ else
+ {
+ /* We might have read an energy history from checkpoint,
+ * free the allocated memory and reset the counts.
+ */
+ done_energyhistory(&state_global->enerhist);
+ init_energyhistory(&state_global->enerhist);
+ }
}
- /* Set the initial energy history in state to zero by updating once */
+ /* Set the initial energy history in state by updating once */
update_energyhistory(&state_global->enerhist,mdebin);
}
static int factorize(int n,int **fac,int **mfac)
{
- int d,ndiv;
-
- /* Decompose n in factors */
- snew(*fac,n/2);
- snew(*mfac,n/2);
- d = 2;
- ndiv = 0;
- while (n > 1)
- {
- while (n % d == 0)
- {
- if (ndiv == 0 || (*fac)[ndiv-1] != d)
- {
- ndiv++;
- (*fac)[ndiv-1] = d;
- }
- (*mfac)[ndiv-1]++;
- n /= d;
- }
- d++;
- }
+ int d,ndiv;
+
+ /* Decompose n in factors */
+ snew(*fac,n/2);
+ snew(*mfac,n/2);
+ d = 2;
+ ndiv = 0;
+ while (n > 1)
+ {
+ while (n % d == 0)
+ {
+ if (ndiv == 0 || (*fac)[ndiv-1] != d)
+ {
+ ndiv++;
+ (*fac)[ndiv-1] = d;
+ }
+ (*mfac)[ndiv-1]++;
+ n /= d;
+ }
+ d++;
+ }
- return ndiv;
+ return ndiv;
+}
+
+static gmx_bool largest_divisor(int n)
+{
+ int ndiv,*div,*mdiv,ldiv;
+
+ ndiv = factorize(n,&div,&mdiv);
+ ldiv = div[ndiv-1];
+ sfree(div);
+ sfree(mdiv);
+
+ return ldiv;
+}
+
+static int lcd(int n1,int n2)
+{
+ int d,i;
+
+ d = 1;
+ for(i=2; (i<=n1 && i<=n2); i++)
+ {
+ if (n1 % i == 0 && n2 % i == 0)
+ {
+ d = i;
+ }
+ }
+
+ return d;
}
static gmx_bool fits_pme_ratio(int nnodes,int npme,float ratio)
int nnodes,int npme,float ratio)
{
int ndiv,*div,*mdiv,ldiv;
+ int npp_root3,npme_root2;
ndiv = factorize(nnodes-npme,&div,&mdiv);
ldiv = div[ndiv-1];
sfree(div);
sfree(mdiv);
+
+ npp_root3 = (int)(pow(nnodes-npme,1.0/3.0) + 0.5);
+ npme_root2 = (int)(sqrt(npme) + 0.5);
+
/* The check below gives a reasonable division:
* factor 5 allowed at 5 or more PP nodes,
* factor 7 allowed at 49 or more PP nodes.
*/
- if (ldiv > 3 + (int)(pow(nnodes-npme,1.0/3.0) + 0.5))
+ if (ldiv > 3 + npp_root3)
+ {
+ return FALSE;
+ }
+
+ /* Check if the number of PP and PME nodes have a reasonable sized
+ * denominator in common, such that we can use 2D PME decomposition
+ * when required (which requires nx_pp == nx_pme).
+ * The factor of 2 allows for a maximum ratio of 2^2=4
+ * between nx_pme and ny_pme.
+ */
+ if (lcd(nnodes-npme,npme)*2 < npme_root2)
{
return FALSE;
}
return npme;
}
-static int lcd(int n1,int n2)
-{
- int d,i;
-
- d = 1;
- for(i=2; (i<=n1 && i<=n2); i++)
- {
- if (n1 % i == 0 && n2 % i == 0)
- {
- d = i;
- }
- }
-
- return d;
-}
-
static int div_up(int n,int f)
{
return (n + f - 1)/f;
gmx_bool bInterCGBondeds,gmx_bool bInterCGMultiBody)
{
int npme,nkx,nky;
+ int ldiv;
real limit;
if (MASTER(cr))
{
+ if (cr->nnodes > 12)
+ {
+ ldiv = largest_divisor(cr->nnodes);
+ /* Check if the largest divisor is more than nnodes^2/3 */
+ if (ldiv*ldiv*ldiv > cr->nnodes*cr->nnodes)
+ {
+ gmx_fatal(FARGS,"The number of nodes you selected (%d) contains a large prime factor %d. In most cases this will lead to bad performance. Choose a number with smaller prime factors or set the decomposition (option -dd) manually.",
+ cr->nnodes,ldiv);
+ }
+ }
+
if (EEL_PME(ir->coulombtype))
{
if (cr->npmenodes >= 0)
NG, MG, KG is size of global data*/
static void splitaxes(t_complex* lout,const t_complex* lin,
int maxN,int maxM,int maxK, int pN, int pM, int pK,
- int P,int NG,int *N, int* oN) {
+ int P,int NG,int *N, int* oN)
+{
int x,y,z,i;
int in_i,out_i,in_z,out_z,in_y,out_y;
- for (i=0;i<P;i++) { /*index cube along long axis*/
- in_i = i*maxN*maxM*maxK;
- out_i = oN[i];
#ifdef FFT5D_THREADS
-#pragma omp parallel for private(in_z,out_z,y,in_y,out_y,x)
+ int zi;
+
+ /* In the thread parallel case we want to loop over z and i
+ * in a single for loop to allow for better load balancing.
+ */
+#pragma omp parallel for private(z,in_z,out_z,i,in_i,out_i,y,in_y,out_y,x) schedule(static)
+ for (zi=0; zi<pK*P; zi++)
+ {
+ z = zi/P;
+ i = zi - z*P;
+#else
+ for (z=0; z<pK; z++) /*3. z l*/
+ {
+#endif
+ in_z = z*maxN*maxM;
+ out_z = z*NG*pM;
+
+#ifndef FFT5D_THREADS
+ for (i=0; i<P; i++) /*index cube along long axis*/
#endif
- for (z=0;z<pK;z++) { /*3. z l*/
- in_z = in_i + z*maxN*maxM;
- out_z = out_i + z*NG*pM;
+ {
+ in_i = in_z + i*maxN*maxM*maxK;
+ out_i = out_z + oN[i];
for (y=0;y<pM;y++) { /*2. y k*/
- in_y = in_z + y*maxN;
- out_y = out_z + y*NG;
+ in_y = in_i + y*maxN;
+ out_y = out_i + y*NG;
for (x=0;x<N[i];x++) { /*1. x j*/
lout[in_y+x] = lin[out_y+x];
/*after split important that each processor chunk i has size maxN*maxM*maxK and thus being the same size*/
int i,x,y,z;
int in_i,out_i,in_x,out_x,in_z,out_z;
- for (i=0;i<P;i++) { /*index cube along long axis*/
- in_i = oK[i];
- out_i = i*maxM*maxN*maxK;
#ifdef FFT5D_THREADS
-#pragma omp parallel for private(in_x,out_x,z,in_z,out_z,y)
+ int xi;
+
+ /* In the thread parallel case we want to loop over x and i
+ * in a single for loop to allow for better load balancing.
+ */
+#pragma omp parallel for private(x,in_x,out_x,i,in_i,out_i,z,in_z,out_z,y) schedule(static)
+ for (xi=0; xi<pN*P; xi++)
+ {
+ x = xi/P;
+ i = xi - x*P;
+#else
+ for (x=0;x<pN;x++) /*1.j*/
+ {
#endif
- for (x=0;x<pN;x++) { /*1.j*/
- in_x = in_i + x*KG*pM;
- out_x = out_i + x;
- for (z=0;z<K[i];z++) { /*3.l*/
- in_z = in_x + z;
- out_z = out_x + z*maxM*maxN;
+ in_x = x*KG*pM;
+ out_x = x;
+
+#ifndef FFT5D_THREADS
+ for (i=0;i<P;i++) /*index cube along long axis*/
+#endif
+ {
+ in_i = in_x + oK[i];
+ out_i = out_x + i*maxM*maxN*maxK;
+ for (z=0;z<K[i];z++) /*3.l*/
+ {
+ in_z = in_i + z;
+ out_z = out_i + z*maxM*maxN;
for (y=0;y<pM;y++) { /*2.k*/
lin[in_z+y*KG] = lout[out_z+y*maxN];
}
int i,z,y,x;
int in_i,out_i,in_z,out_z,in_x,out_x;
- for (i=0;i<P;i++) { /*index cube along long axis*/
- in_i = oM[i];
- out_i = i*maxM*maxN*maxK;
#ifdef FFT5D_THREADS
-#pragma omp parallel for private(in_z,out_z,in_x,out_x,x,y)
+ int zi;
+
+ /* In the thread parallel case we want to loop over z and i
+ * in a single for loop to allow for better load balancing.
+ */
+#pragma omp parallel for private(i,in_i,out_i,z,in_z,out_z,in_x,out_x,x,y) schedule(static)
+ for (zi=0; zi<pK*P; zi++)
+ {
+ z = zi/P;
+ i = zi - z*P;
+#else
+ for (z=0; z<pK; z++)
+ {
#endif
- for (z=0;z<pK;z++) {
- in_z = in_i + z*MG*pN;
- out_z = out_i + z*maxM*maxN;
+ in_z = z*MG*pN;
+ out_z = z*maxM*maxN;
+
+#ifndef FFT5D_THREADS
+ for (i=0; i<P; i++) /*index cube along long axis*/
+#endif
+ {
+ in_i = in_z + oM[i];
+ out_i = out_z + i*maxM*maxN*maxK;
for (x=0;x<pN;x++) {
- in_x = in_z + x*MG;
- out_x = out_z + x;
+ in_x = in_i + x*MG;
+ out_x = out_i + x;
for (y=0;y<M[i];y++) {
lin[in_x+y] = lout[out_x+y*maxN];
}
}
-void init_energyhistory(energyhistory_t * enerhist)
-{
- enerhist->nener = 0;
-
- enerhist->ener_ave = NULL;
- enerhist->ener_sum = NULL;
- enerhist->ener_sum_sim = NULL;
- enerhist->dht = NULL;
-
- enerhist->nsteps = 0;
- enerhist->nsum = 0;
- enerhist->nsteps_sim = 0;
- enerhist->nsum_sim = 0;
-}
-
void update_energyhistory(energyhistory_t * enerhist,t_mdebin * mdebin)
{
int i;
pme->nky <= pme->pme_order*(pme->nnodes_minor > 1 ? 2 : 1) ||
pme->nkz <= pme->pme_order)
{
- gmx_fatal(FARGS,"The pme grid dimensions need to be larger than pme_order (%d) and in parallel larger than 2*pme_ordern for x and/or y",pme->pme_order);
+ gmx_fatal(FARGS,"The pme grid dimensions need to be larger than pme_order (%d) and in parallel larger than 2*pme_order for x and/or y",pme->pme_order);
}
if (pme->nnodes > 1) {
snew(td->v,td->nx);
snew(td->f,td->nx);
}
- for(i=td->nx0; (i<td->nx); i++)
+ for(i=0; (i<td->nx); i++)
td->x[i] = i/tabscale;
}
td->f[i] = Ftab;
}
+ /* Continue the table linearly from nx0 to 0.
+ * These values are only required for energy minimization with overlap or TPI.
+ */
+ for(i=td->nx0-1; i>=0; i--) {
+ td->v[i] = td->v[i+1] + td->f[i+1]*(td->x[i+1] - td->x[i]);
+ td->f[i] = td->f[i+1];
+ }
+
#ifdef DEBUG_SWITCH
gmx_fio_fclose(fp);
#endif
#include "pme.h"
#include "gbutil.h"
+#if ( defined(GMX_IA32_SSE) || defined(GMX_X86_64_SSE) || defined(GMX_X86_64_SSE2) )
+#if defined(GMX_DOUBLE)
+#include "gmx_sse2_double.h"
+#else
+#include "gmx_sse2_single.h"
+#endif
+#endif
+
static void global_max(t_commrec *cr,int *n)
{
int nbin;
double invbinw,*bin,refvolshift,logV,bUlogV;
real dvdl,prescorr,enercorr,dvdlcorr;
+ gmx_bool bEnergyOutOfBounds;
const char *tpid_leg[2]={"direct","reweighted"};
/* Since numerical problems can lead to extreme negative energies
enerd->term[F_DISPCORR] = enercorr;
enerd->term[F_EPOT] += enercorr;
enerd->term[F_PRES] += prescorr;
- enerd->term[F_DVDL] += dvdlcorr;
-
+ enerd->term[F_DVDL] += dvdlcorr;
+
+ epot = enerd->term[F_EPOT];
+ bEnergyOutOfBounds = FALSE;
+#if ( defined(GMX_IA32_SSE) || defined(GMX_X86_64_SSE) || defined(GMX_X86_64_SSE2) )
+ /* With SSE the energy can overflow, check for this */
+ if (gmx_mm_check_and_reset_overflow())
+ {
+ if (debug)
+ {
+ fprintf(debug,"Found an SSE overflow, assuming the energy is out of bounds\n");
+ }
+ bEnergyOutOfBounds = TRUE;
+ }
+#endif
/* If the compiler doesn't optimize this check away
* we catch the NAN energies. With tables extreme negative
* energies might occur close to r=0.
*/
- epot = enerd->term[F_EPOT];
if (epot != epot || epot*beta < bU_neg_limit)
+ {
+ bEnergyOutOfBounds = TRUE;
+ }
+ if (bEnergyOutOfBounds)
{
if (debug)
{
"[PAR]",
"Option [TT]-xpma[tt] writes the atomic covariance matrix to an xpm file,",
"i.e. for each atom pair the sum of the xx, yy and zz covariances is",
- "written."
+ "written.",
+ "[PAR]",
+ "Note that the diagonalization of a matrix requires memory and time",
+ "that will increase at least as fast as than the square of the number",
+ "of atoms involved. It is easy to run out of memory, in which",
+ "case this tool will probably exit with a 'Segmentation fault'. You",
+ "should consider carefully whether a reduced set of atoms will meet",
+ "your needs for lower costs."
};
static gmx_bool bFit=TRUE,bRef=FALSE,bM=FALSE,bPBC=TRUE;
static int end=-1;
if (MASTER(cr))
{
- /* Update mdebin with energy history if appending to output files */
- if ( Flags & MD_APPENDFILES )
+ if (opt2bSet("-cpi",nfile,fnm))
{
- restore_energyhistory_from_state(mdebin,&state_global->enerhist);
+ /* Update mdebin with energy history if appending to output files */
+ if ( Flags & MD_APPENDFILES )
+ {
+ restore_energyhistory_from_state(mdebin,&state_global->enerhist);
+ }
+ else
+ {
+ /* We might have read an energy history from checkpoint,
+ * free the allocated memory and reset the counts.
+ */
+ done_energyhistory(&state_global->enerhist);
+ init_energyhistory(&state_global->enerhist);
+ }
}
- /* Set the initial energy history in state to zero by updating once */
+ /* Set the initial energy history in state by updating once */
update_energyhistory(&state_global->enerhist,mdebin);
}