#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
-
+#ifdef __linux
+#define _GNU_SOURCE
+#include <sched.h>
+#include <sys/syscall.h>
+#endif
#include <signal.h>
#include <stdlib.h>
-#if ((defined WIN32 || defined _WIN32 || defined WIN64 || defined _WIN64) && !defined __CYGWIN__ && !defined __CYGWIN32__)
-/* _isnan() */
-#include <float.h>
-#endif
-
#include "typedefs.h"
#include "smalloc.h"
#include "sysstuff.h"
#include "disre.h"
#include "orires.h"
#include "dihre.h"
-#include "pppm.h"
#include "pme.h"
#include "mdatoms.h"
#include "repl_ex.h"
#include "sighandler.h"
#include "tpxio.h"
#include "txtdump.h"
-
+#include "pull_rotation.h"
#include "md_openmm.h"
#ifdef GMX_LIB_MPI
#include <mpi.h>
#endif
-#ifdef GMX_THREADS
+#ifdef GMX_THREAD_MPI
#include "tmpi.h"
#endif
#include "md_openmm.h"
#endif
+#ifdef GMX_OPENMP
+#include <omp.h>
+#endif
+
typedef struct {
gmx_integrator_t *func;
gmx_large_int_t deform_init_init_step_tpx;
matrix deform_init_box_tpx;
-#ifdef GMX_THREADS
+#ifdef GMX_THREAD_MPI
tMPI_Thread_mutex_t deform_init_box_mutex=TMPI_THREAD_MUTEX_INITIALIZER;
#endif
-#ifdef GMX_THREADS
+#ifdef GMX_THREAD_MPI
struct mdrunner_arglist
{
FILE *fplog;
}
-/* get the number of threads based on how many there were requested,
- which algorithms we're using, and how many particles there are. */
-static int get_nthreads(int nthreads_requested, t_inputrec *inputrec,
- gmx_mtop_t *mtop)
+/* Get the number of threads to use for thread-MPI based on how many
+ * were requested, which algorithms we're using,
+ * and how many particles there are.
+ */
+static int get_nthreads_mpi(int nthreads_requested, t_inputrec *inputrec,
+ gmx_mtop_t *mtop)
{
int nthreads,nthreads_new;
int min_atoms_per_thread;
gmx_large_int_t reset_counters;
gmx_edsam_t ed=NULL;
t_commrec *cr_old=cr;
- int nthreads=1;
+ int nthreads_mpi=1;
+ int nthreads_pme=1;
/* CAUTION: threads may be started later on in this function, so
cr doesn't reflect the final parallel state right now */
read_tpx_state(ftp2fn(efTPX,nfile,fnm),inputrec,state,NULL,mtop);
/* NOW the threads will be started: */
-#ifdef GMX_THREADS
- nthreads = get_nthreads(nthreads_requested, inputrec, mtop);
+#ifdef GMX_THREAD_MPI
+ nthreads_mpi = get_nthreads_mpi(nthreads_requested, inputrec, mtop);
- if (nthreads > 1)
+ if (nthreads_mpi > 1)
{
/* now start the threads. */
- cr=mdrunner_start_threads(nthreads, fplog, cr_old, nfile, fnm,
+ cr=mdrunner_start_threads(nthreads_mpi, fplog, cr_old, nfile, fnm,
oenv, bVerbose, bCompact, nstglobalcomm,
ddxyz, dd_node_order, rdd, rconstr,
dddlb_opt, dlb_scale, ddcsx, ddcsy, ddcsz,
#ifndef GMX_MPI
"but mdrun was compiled without threads or MPI enabled"
#else
-#ifdef GMX_THREADS
+#ifdef GMX_THREAD_MPI
"but the number of threads (option -nt) is 1"
#else
"but mdrun was not started through mpirun/mpiexec or only one process was requested through mpirun/mpiexec"
if (!EEL_PME(inputrec->coulombtype) || (Flags & MD_PARTDEC))
{
+ if (cr->npmenodes > 0)
+ {
+ if (!EEL_PME(inputrec->coulombtype))
+ {
+ gmx_fatal_collective(FARGS,cr,NULL,
+ "PME nodes are requested, but the system does not use PME electrostatics");
+ }
+ if (Flags & MD_PARTDEC)
+ {
+ gmx_fatal_collective(FARGS,cr,NULL,
+ "PME nodes are requested, but particle decomposition does not support separate PME nodes");
+ }
+ }
+
cr->npmenodes = 0;
}
* This should be thread safe, since they are only written once
* and with identical values.
*/
-#ifdef GMX_THREADS
+#ifdef GMX_THREAD_MPI
tMPI_Thread_mutex_lock(&deform_init_box_mutex);
#endif
deform_init_init_step_tpx = inputrec->init_step;
copy_mat(box,deform_init_box_tpx);
-#ifdef GMX_THREADS
+#ifdef GMX_THREAD_MPI
tMPI_Thread_mutex_unlock(&deform_init_box_mutex);
#endif
}
}
if (((MASTER(cr) || (Flags & MD_SEPPOT)) && (Flags & MD_APPENDFILES))
-#ifdef GMX_THREADS
+#ifdef GMX_THREAD_MPI
/* With thread MPI only the master node/thread exists in mdrun.c,
* therefore non-master nodes need to open the "seppot" log file here.
*/
gmx_setup_nodecomm(fplog,cr);
}
- wcycle = wallcycle_init(fplog,resetstep,cr);
+ /* get number of OpenMP/PME threads
+ * env variable should be read only on one node to make sure it is identical everywhere */
+#ifdef GMX_OPENMP
+ if (EEL_PME(inputrec->coulombtype))
+ {
+ if (MASTER(cr))
+ {
+ char *ptr;
+ if ((ptr=getenv("GMX_PME_NTHREADS")) != NULL)
+ {
+ sscanf(ptr,"%d",&nthreads_pme);
+ }
+ if (fplog != NULL && nthreads_pme > 1)
+ {
+ fprintf(fplog,"Using %d threads for PME\n",nthreads_pme);
+ }
+ }
+ if (PAR(cr))
+ {
+ gmx_bcast_sim(sizeof(nthreads_pme),&nthreads_pme,cr);
+ }
+ }
+#endif
+
+ wcycle = wallcycle_init(fplog,resetstep,cr,nthreads_pme);
if (PAR(cr))
{
/* Master synchronizes its value of reset_counters with all nodes
fr = mk_forcerec();
init_forcerec(fplog,oenv,fr,fcd,inputrec,mtop,cr,box,FALSE,
opt2fn("-table",nfile,fnm),
+ opt2fn("-tabletf",nfile,fnm),
opt2fn("-tablep",nfile,fnm),
opt2fn("-tableb",nfile,fnm),FALSE,pforce);
/* version for PCA_NOT_READ_NODE (see md.c) */
/*init_forcerec(fplog,fr,fcd,inputrec,mtop,cr,box,FALSE,
- "nofile","nofile","nofile",FALSE,pforce);
+ "nofile","nofile","nofile","nofile",FALSE,pforce);
*/
fr->bSepDVDL = ((Flags & MD_SEPPOT) == MD_SEPPOT);
}
}
- /* Initiate PPPM if necessary */
- if (fr->eeltype == eelPPPM)
- {
- if (mdatoms->nChargePerturbed)
- {
- gmx_fatal(FARGS,"Free energy with %s is not implemented",
- eel_names[fr->eeltype]);
- }
- status = gmx_pppm_init(fplog,cr,oenv,FALSE,TRUE,box,
- getenv("GMXGHAT"),inputrec, (Flags & MD_REPRODUCIBLE));
- if (status != 0)
- {
- gmx_fatal(FARGS,"Error %d initializing PPPM",status);
- }
- }
-
if (EEL_PME(fr->eeltype))
{
ewaldcoeff = fr->ewaldcoeff;
/* The PME only nodes need to know nChargePerturbed */
gmx_bcast_sim(sizeof(nChargePerturbed),&nChargePerturbed,cr);
}
+
+
+ /* Set CPU affinity. Can be important for performance.
+ On some systems (e.g. Cray) CPU Affinity is set by default.
+ But default assigning doesn't work (well) with only some ranks
+ having threads. This causes very low performance.
+ External tools have cumbersome syntax for setting affinity
+ in the case that only some ranks have threads.
+ Thus it is important that GROMACS sets the affinity internally at
+ if only PME is using threads.
+ */
+
+#ifdef GMX_OPENMP
+#ifdef __linux
+#ifdef GMX_LIB_MPI
+ {
+ int core;
+ MPI_Comm comm_intra; /* intra communicator (but different to nc.comm_intra includes PME nodes) */
+ MPI_Comm_split(MPI_COMM_WORLD,gmx_hostname_num(),gmx_node_rank(),&comm_intra);
+ int local_omp_nthreads = (cr->duty & DUTY_PME) ? nthreads_pme : 1; /* threads on this node */
+ MPI_Scan(&local_omp_nthreads,&core, 1, MPI_INT, MPI_SUM, comm_intra);
+ core-=local_omp_nthreads; /* make exclusive scan */
+#pragma omp parallel firstprivate(core) num_threads(local_omp_nthreads)
+ {
+ cpu_set_t mask;
+ CPU_ZERO(&mask);
+ core+=omp_get_thread_num();
+ CPU_SET(core,&mask);
+ sched_setaffinity((pid_t) syscall (SYS_gettid),sizeof(cpu_set_t),&mask);
+ }
+ }
+#endif /*GMX_MPI*/
+#endif /*__linux*/
+#endif /*GMX_OPENMP*/
+
if (cr->duty & DUTY_PME)
{
status = gmx_pme_init(pmedata,cr,npme_major,npme_minor,inputrec,
mtop ? mtop->natoms : 0,nChargePerturbed,
- (Flags & MD_REPRODUCIBLE));
+ (Flags & MD_REPRODUCIBLE),nthreads_pme);
if (status != 0)
{
gmx_fatal(FARGS,"Error %d initializing PME",status);
init_pull(fplog,inputrec,nfile,fnm,mtop,cr,oenv,
EI_DYNAMICS(inputrec->eI) && MASTER(cr),Flags);
}
+
+ if (inputrec->bRot)
+ {
+ /* Initialize enforced rotation code */
+ init_rot(fplog,inputrec,nfile,fnm,cr,state->x,state->box,mtop,oenv,
+ bVerbose,Flags);
+ }
constr = init_constraints(fplog,mtop,inputrec,ed,state,cr);
{
finish_pull(fplog,inputrec->pull);
}
+
+ if (inputrec->bRot)
+ {
+ finish_rot(fplog,inputrec->rot);
+ }
+
}
else
{
rc=(int)gmx_get_stop_condition();
-#ifdef GMX_THREADS
+#ifdef GMX_THREAD_MPI
/* we need to join all threads. The sub-threads join when they
exit this function, but the master thread needs to be told to
wait for that. */