2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5 * Copyright (c) 2001-2006, The GROMACS development team,
6 * check out http://www.gromacs.org for more information.
7 * Copyright (c) 2012, by the GROMACS development team, led by
8 * David van der Spoel, Berk Hess, Erik Lindahl, and including many
9 * others, as listed in the AUTHORS file in the top-level source
10 * directory and at http://www.gromacs.org.
12 * GROMACS is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU Lesser General Public License
14 * as published by the Free Software Foundation; either version 2.1
15 * of the License, or (at your option) any later version.
17 * GROMACS is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * Lesser General Public License for more details.
22 * You should have received a copy of the GNU Lesser General Public
23 * License along with GROMACS; if not, see
24 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
25 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
27 * If you want to redistribute modifications to GROMACS, please
28 * consider that scientific software is very special. Version
29 * control is crucial - bugs must be traceable. We will be happy to
30 * consider code for inclusion in the official distribution, but
31 * derived work must not be called official GROMACS. Details are found
32 * in the README & COPYING files - if they are missing, get the
33 * official version at http://www.gromacs.org.
35 * To help us fund GROMACS development, we humbly ask that you cite
36 * the research papers on the package. Check out http://www.gromacs.org.
52 #include "gmx_fatal.h"
74 static int comp_xptr(const void *a,const void *b)
82 if ((dx = (xptr[va][XX] - xptr[vb][XX])) < 0)
90 static void do_my_pme(FILE *fp,real tm,gmx_bool bVerbose,t_inputrec *ir,
91 rvec x[],rvec xbuf[],rvec f[],
92 real charge[],real qbuf[],real qqbuf[],
93 matrix box,gmx_bool bSort,
94 t_commrec *cr,t_nsborder *nsb,t_nrnb *nrnb,
95 t_block *excl,real qtot,
96 t_forcerec *fr,int index[],FILE *fp_xvg,
97 int ngroups,unsigned short cENER[])
99 real ener,vcorr,q,xx,dvdl=0,vdip,vcharge;
100 tensor vir,vir_corr,vir_tot;
105 /* Initiate local variables */
111 fprintf(fp,"There are %d energy groups\n",ngroups);
113 for(i=0; (i<ngroups); i++)
114 snew(epme[i],ngroups);
117 /* Put x is in the box, this part needs to be parallellized properly */
118 /*put_atoms_in_box(box,nsb->natoms,x);*/
119 /* Here sorting of X (and q) is done.
120 * Alternatively, one could just put the atoms in one of the
121 * cr->nnodes slabs. That is much cheaper than sorting.
123 for(i=0; (i<nsb->natoms); i++)
127 qsort(index,nsb->natoms,sizeof(index[0]),comp_xptr);
128 xptr = NULL; /* To trap unintentional use of the ptr */
130 /* After sorting we only need the part that is to be computed on
131 * this processor. We also compute the mu_tot here (system dipole)
133 clear_rvec(mu_tot[0]);
134 for(i=START(nsb); (i<START(nsb)+HOMENR(nsb)); i++) {
138 for(m=0; (m<DIM); m++) {
141 mu_tot[0][m] += q*xx;
145 copy_rvec(mu_tot[0],mu_tot[1]);
147 pr_rvec(debug,0,"qbuf",qbuf,nsb->natoms,TRUE);
148 pr_rvecs(debug,0,"xbuf",xbuf,nsb->natoms);
149 pr_rvecs(debug,0,"box",box,DIM);
151 for(ig=0; (ig<ngroups); ig++) {
152 for(jg=ig; (jg<ngroups); jg++) {
154 for(i=START(nsb); (i<START(nsb)+HOMENR(nsb)); i++) {
155 if ((cENER[i] == ig) || (cENER[i] == jg))
164 ener = do_pme(fp,bVerbose,ir,xbuf,f,qptr,qptr,box,cr,
165 nsb,nrnb,vir,fr->ewaldcoeff,FALSE,0,&dvdl,FALSE);
166 vcorr = ewald_LRcorrection(fp,nsb,cr,fr,qptr,qptr,excl,xbuf,box,mu_tot,
167 ir->ewald_geometry,ir->epsilon_surface,
168 0,&dvdl,&vdip,&vcharge);
170 gmx_sum(1,&vcorr,cr);
172 epme[ig][jg] = ener+vcorr;
177 fprintf(fp_xvg,"%10.3f",tm);
178 for(ig=0; (ig<ngroups); ig++) {
179 for(jg=ig; (jg<ngroups); jg++) {
181 epme[ig][jg] -= epme[ig][ig]+epme[jg][jg];
183 fprintf(fp_xvg," %12.5e",epme[ig][jg]);
187 fprintf(fp_xvg,"\n");
190 fprintf(fp,"Time: %10.3f Energy: %12.5e Correction: %12.5e Total: %12.5e\n",
191 tm,ener,vcorr,ener+vcorr);
193 fprintf(fp_xvg,"%10.3f %12.5e %12.5e %12.5e\n",tm,ener+vcorr,vdip,vcharge);
195 m_add(vir,vir_corr,vir_tot);
196 gmx_sum(9,vir_tot[0],cr);
197 pr_rvecs(fp,0,"virial",vir_tot,DIM);
203 int main(int argc,char *argv[])
205 static char *desc[] = {
206 "The [TT]pmetest[tt] program tests the scaling of the PME code. When only given",
207 "a [TT].tpr[tt] file it will compute PME for one frame. When given a trajectory",
208 "it will do so for all the frames in the trajectory. Before the PME",
209 "routine is called the coordinates are sorted along the X-axis.[PAR]",
210 "As an extra service to the public the program can also compute",
211 "long-range Coulomb energies for components of the system. When the",
212 "[TT]-groups[tt] flag is given to the program the energy groups",
213 "from the [TT].tpr[tt] file will be read, and half an energy matrix computed."
216 static t_filenm fnm[] = {
217 { efTPX, NULL, NULL, ffREAD },
218 { efTRN, "-o", NULL, ffWRITE },
219 { efLOG, "-g", "pme", ffWRITE },
220 { efTRX, "-f", NULL, ffOPTRD },
221 { efXVG, "-x", "ener-pme", ffWRITE }
223 #define NFILE asize(fnm)
225 /* Command line options ! */
226 static gmx_bool bVerbose=FALSE;
227 static gmx_bool bOptFFT=FALSE;
228 static gmx_bool bSort=FALSE;
229 static int ewald_geometry=eewg3D;
231 static int pme_order=0;
232 static rvec grid = { -1, -1, -1 };
233 static real rc = 0.0;
234 static real dtol = 0.0;
235 static gmx_bool bGroups = FALSE;
236 static t_pargs pa[] = {
237 { "-np", FALSE, etINT, {&nnodes},
238 "Number of nodes, must be the same as used for [TT]grompp[tt]" },
239 { "-v", FALSE, etBOOL,{&bVerbose},
240 "Be loud and noisy" },
241 { "-sort", FALSE, etBOOL,{&bSort},
242 "Sort coordinates. Crucial for domain decomposition." },
243 { "-grid", FALSE, etRVEC,{&grid},
244 "Number of grid cells in X, Y, Z dimension (if -1 use from [TT].tpr[tt])" },
245 { "-order", FALSE, etINT, {&pme_order},
246 "Order of the PME spreading algorithm" },
247 { "-groups", FALSE, etBOOL, {&bGroups},
248 "Compute half an energy matrix based on the energy groups in your [TT].tpr[tt] file" },
249 { "-rc", FALSE, etREAL, {&rc},
250 "Rcoulomb for Ewald summation" },
251 { "-tol", FALSE, etREAL, {&dtol},
252 "Tolerance for Ewald summation" }
263 int natoms,step,status,i,ncg,root;
264 real t,lambda,ewaldcoeff,qtot;
268 real *charge,*qbuf,*qqbuf;
271 /* Start the actual parallel code if necessary */
272 cr = init_par(&argc,&argv);
276 CopyRight(stderr,argv[0]);
278 /* Parse command line on all processors, arguments are passed on in
279 * init_par (see above)
281 parse_common_args(&argc,argv,
282 PCA_KEEP_ARGS | PCA_NOEXIT_ON_ARGS | PCA_BE_NICE |
283 PCA_CAN_SET_DEFFNM | (MASTER(cr) ? 0 : PCA_QUIET),
284 NFILE,fnm,asize(pa),pa,asize(desc),desc,0,NULL);
288 gmx_fatal(FARGS,"GROMACS compiled without MPI support - can't do parallel runs");
291 /* Open log files on all processors */
292 open_log(ftp2fn(efLOG,NFILE,fnm),cr);
296 /* Read tpr file etc. */
297 read_tpxheader(ftp2fn(efTPX,NFILE,fnm),&tpx,FALSE,NULL,NULL);
299 read_tpx(ftp2fn(efTPX,NFILE,fnm),&step,&t,&lambda,ir,
300 box,&natoms,x,NULL,NULL,&top);
304 for(i=0; (i<natoms); i++) {
305 charge[i] = top.atoms.atom[i].q;
310 if (opt2parg_bSet("-grid",asize(pa),pa)) {
315 /* Check command line parameters for consistency */
316 if ((ir->nkx <= 0) || (ir->nky <= 0) || (ir->nkz <= 0))
317 gmx_fatal(FARGS,"PME grid = %d %d %d",ir->nkx,ir->nky,ir->nkz);
318 if (opt2parg_bSet("-rc",asize(pa),pa))
320 if (ir->rcoulomb <= 0)
321 gmx_fatal(FARGS,"rcoulomb should be > 0 (not %f)",ir->rcoulomb);
322 if (opt2parg_bSet("-order",asize(pa),pa))
323 ir->pme_order = pme_order;
324 if (ir->pme_order <= 0)
325 gmx_fatal(FARGS,"pme_order should be > 0 (not %d)",ir->pme_order);
326 if (opt2parg_bSet("-tol",asize(pa),pa))
327 ir->ewald_rtol = dtol;
328 if (ir->ewald_rtol <= 0)
329 gmx_fatal(FARGS,"ewald_tol should be > 0 (not %f)",ir->ewald_rtol);
335 /* Add parallellization code here */
338 ncg = top.blocks[ebCGS].multinr[0];
339 for(i=0; (i<cr->nnodes-1); i++)
340 top.blocks[ebCGS].multinr[i] = min(ncg,(ncg*(i+1))/cr->nnodes);
341 for( ; (i<MAXNODES); i++)
342 top.blocks[ebCGS].multinr[i] = ncg;
345 /* Set some variables to zero to avoid core dumps */
346 ir->opts.ngtc = ir->opts.ngacc = ir->opts.ngfrz = ir->opts.ngener = 0;
348 /* Distribute the data over processors */
349 MPI_Bcast(&natoms,1,MPI_INT,root,MPI_COMM_WORLD);
350 MPI_Bcast(ir,sizeof(*ir),MPI_BYTE,root,MPI_COMM_WORLD);
351 MPI_Bcast(&qtot,1,GMX_MPI_REAL,root,MPI_COMM_WORLD);
354 /* Call some dedicated communication routines, master sends n-1 times */
356 for(i=1; (i<cr->nnodes); i++) {
357 mv_block(i,&(top.blocks[ebCGS]));
358 mv_block(i,&(top.atoms.excl));
362 ld_block(root,&(top.blocks[ebCGS]));
363 ld_block(root,&(top.atoms.excl));
370 MPI_Bcast(charge,natoms,GMX_MPI_REAL,root,MPI_COMM_WORLD);
373 ewaldcoeff = calc_ewaldcoeff(ir->rcoulomb,ir->ewald_rtol);
377 pr_inputrec(stdlog,0,"Inputrec",ir);
379 /* Allocate memory for temp arrays etc. */
386 /* Initialize the PME code */
387 init_pme(stdlog,cr,ir->nkx,ir->nky,ir->nkz,ir->pme_order,
388 natoms,FALSE,bOptFFT,ewald_geometry);
390 /* MFlops accounting */
393 /* Initialize the work division */
394 calc_nsb(stdlog,&(top.blocks[ebCGS]),cr->nnodes,nsb,0);
395 nsb->nodeid = cr->nodeid;
396 print_nsb(stdlog,"pmetest",nsb);
398 /* Initiate forcerec */
399 mdatoms = atoms2md(stdlog,&top.atoms,ir->opts.nFreeze,ir->eI,
400 ir->delta_t,0,ir->opts.tau_t,FALSE,FALSE);
402 init_forcerec(stdlog,fr,ir,&top,cr,mdatoms,nsb,box,FALSE,NULL,NULL,FALSE);
404 /* First do PME based on coordinates in tpr file, send them to
405 * other processors if needed.
408 fprintf(stdlog,"-----\n"
409 "Results based on tpr file %s\n",ftp2fn(efTPX,NFILE,fnm));
412 MPI_Bcast(x[0],natoms*DIM,GMX_MPI_REAL,root,MPI_COMM_WORLD);
413 MPI_Bcast(box[0],DIM*DIM,GMX_MPI_REAL,root,MPI_COMM_WORLD);
414 MPI_Bcast(&t,1,GMX_MPI_REAL,root,MPI_COMM_WORLD);
417 do_my_pme(stdlog,0,bVerbose,ir,x,xbuf,f,charge,qbuf,qqbuf,box,bSort,
418 cr,nsb,&nrnb,&(top.atoms.excl),qtot,fr,index,NULL,
419 bGroups ? ir->opts.ngener : 1,mdatoms->cENER);
421 /* If we have a trajectry file, we will read the frames in it and compute
424 if (ftp2bSet(efTRX,NFILE,fnm)) {
425 fprintf(stdlog,"-----\n"
426 "Results based on trx file %s\n",ftp2fn(efTRX,NFILE,fnm));
429 natoms = read_first_x(&status,ftp2fn(efTRX,NFILE,fnm),&t,&x,box);
430 if (natoms != top.atoms.nr)
431 gmx_fatal(FARGS,"natoms in trx = %d, in tpr = %d",natoms,top.atoms.nr);
432 fp = xvgropen(ftp2fn(efXVG,NFILE,fnm),"PME Energy","Time (ps)","E (kJ/mol)");
437 /* Send coordinates, box and time to the other nodes */
440 MPI_Bcast(x[0],natoms*DIM,GMX_MPI_REAL,root,MPI_COMM_WORLD);
441 MPI_Bcast(box[0],DIM*DIM,GMX_MPI_REAL,root,MPI_COMM_WORLD);
442 MPI_Bcast(&t,1,GMX_MPI_REAL,root,MPI_COMM_WORLD);
445 rm_pbc(&top.idef,nsb->natoms,box,x,x);
446 /* Call the PME wrapper function */
447 do_my_pme(stdlog,t,bVerbose,ir,x,xbuf,f,charge,qbuf,qqbuf,box,bSort,cr,
448 nsb,&nrnb,&(top.atoms.excl),qtot,fr,index,fp,
449 bGroups ? ir->opts.ngener : 1,mdatoms->cENER);
450 /* Only the master processor reads more data */
452 bCont = read_next_x(status,&t,natoms,x,box);
453 /* Check whether we need to continue */
456 MPI_Bcast(&bCont,1,MPI_INT,root,MPI_COMM_WORLD);
461 /* Finish I/O, close files */
469 /* Do some final I/O about performance, might be useful in debugging */
470 fprintf(stdlog,"-----\n");
471 print_nrnb(stdlog,&nrnb);
474 /* Finish the parallel stuff */
475 if (gmx_parallel_env_initialized())
478 /* Thank the audience, as usual */