2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5 * Copyright (c) 2001-2004, The GROMACS development team.
6 * Copyright (c) 2013,2014, by the GROMACS development team, led by
7 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
8 * and including many others, as listed in the AUTHORS file in the
9 * top-level source directory and at http://www.gromacs.org.
11 * GROMACS is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public License
13 * as published by the Free Software Foundation; either version 2.1
14 * of the License, or (at your option) any later version.
16 * GROMACS is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Lesser General Public License for more details.
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with GROMACS; if not, see
23 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
24 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
26 * If you want to redistribute modifications to GROMACS, please
27 * consider that scientific software is very special. Version
28 * control is crucial - bugs must be traceable. We will be happy to
29 * consider code for inclusion in the official distribution, but
30 * derived work must not be called official GROMACS. Details are found
31 * in the README & COPYING files - if they are missing, get the
32 * official version at http://www.gromacs.org.
34 * To help us fund GROMACS development, we humbly ask that you cite
35 * the research papers on the package. Check out http://www.gromacs.org.
47 #include "gromacs/math/utilities.h"
51 #include "gmx_fatal.h"
52 #include "gmx_fatal_collective.h"
56 #include "nonbonded.h"
65 #include "md_support.h"
66 #include "md_logging.h"
71 #include "mtop_util.h"
72 #include "nbnxn_search.h"
73 #include "nbnxn_atomdata.h"
74 #include "nbnxn_consts.h"
75 #include "gmx_omp_nthreads.h"
76 #include "gmx_detect_hardware.h"
79 /* MSVC definition for __cpuid() */
83 #include "types/nbnxn_cuda_types_ext.h"
84 #include "gpu_utils.h"
85 #include "nbnxn_cuda_data_mgmt.h"
86 #include "pmalloc_cuda.h"
88 t_forcerec *mk_forcerec(void)
98 static void pr_nbfp(FILE *fp, real *nbfp, gmx_bool bBHAM, int atnr)
102 for (i = 0; (i < atnr); i++)
104 for (j = 0; (j < atnr); j++)
106 fprintf(fp, "%2d - %2d", i, j);
109 fprintf(fp, " a=%10g, b=%10g, c=%10g\n", BHAMA(nbfp, atnr, i, j),
110 BHAMB(nbfp, atnr, i, j), BHAMC(nbfp, atnr, i, j)/6.0);
114 fprintf(fp, " c6=%10g, c12=%10g\n", C6(nbfp, atnr, i, j)/6.0,
115 C12(nbfp, atnr, i, j)/12.0);
122 static real *mk_nbfp(const gmx_ffparams_t *idef, gmx_bool bBHAM)
130 snew(nbfp, 3*atnr*atnr);
131 for (i = k = 0; (i < atnr); i++)
133 for (j = 0; (j < atnr); j++, k++)
135 BHAMA(nbfp, atnr, i, j) = idef->iparams[k].bham.a;
136 BHAMB(nbfp, atnr, i, j) = idef->iparams[k].bham.b;
137 /* nbfp now includes the 6.0 derivative prefactor */
138 BHAMC(nbfp, atnr, i, j) = idef->iparams[k].bham.c*6.0;
144 snew(nbfp, 2*atnr*atnr);
145 for (i = k = 0; (i < atnr); i++)
147 for (j = 0; (j < atnr); j++, k++)
149 /* nbfp now includes the 6.0/12.0 derivative prefactors */
150 C6(nbfp, atnr, i, j) = idef->iparams[k].lj.c6*6.0;
151 C12(nbfp, atnr, i, j) = idef->iparams[k].lj.c12*12.0;
159 static real *mk_nbfp_combination_rule(const gmx_ffparams_t *idef, int comb_rule)
163 real c6i, c6j, c12i, c12j, epsi, epsj, sigmai, sigmaj;
167 snew(nbfp, 2*atnr*atnr);
168 for (i = 0; i < atnr; ++i)
170 for (j = 0; j < atnr; ++j)
172 c6i = idef->iparams[i*(atnr+1)].lj.c6;
173 c12i = idef->iparams[i*(atnr+1)].lj.c12;
174 c6j = idef->iparams[j*(atnr+1)].lj.c6;
175 c12j = idef->iparams[j*(atnr+1)].lj.c12;
176 c6 = sqrt(c6i * c6j);
177 c12 = sqrt(c12i * c12j);
178 if (comb_rule == eCOMB_ARITHMETIC
179 && !gmx_numzero(c6) && !gmx_numzero(c12))
181 sigmai = pow(c12i / c6i, 1.0/6.0);
182 sigmaj = pow(c12j / c6j, 1.0/6.0);
183 epsi = c6i * c6i / c12i;
184 epsj = c6j * c6j / c12j;
185 c6 = epsi * epsj * pow(0.5*(sigmai+sigmaj), 6);
186 c12 = epsi * epsj * pow(0.5*(sigmai+sigmaj), 12);
188 C6(nbfp, atnr, i, j) = c6*6.0;
189 C12(nbfp, atnr, i, j) = c12*12.0;
195 /* This routine sets fr->solvent_opt to the most common solvent in the
196 * system, e.g. esolSPC or esolTIP4P. It will also mark each charge group in
197 * the fr->solvent_type array with the correct type (or esolNO).
199 * Charge groups that fulfill the conditions but are not identical to the
200 * most common one will be marked as esolNO in the solvent_type array.
202 * TIP3p is identical to SPC for these purposes, so we call it
203 * SPC in the arrays (Apologies to Bill Jorgensen ;-)
205 * NOTE: QM particle should not
206 * become an optimized solvent. Not even if there is only one charge
216 } solvent_parameters_t;
219 check_solvent_cg(const gmx_moltype_t *molt,
222 const unsigned char *qm_grpnr,
223 const t_grps *qm_grps,
225 int *n_solvent_parameters,
226 solvent_parameters_t **solvent_parameters_p,
230 const t_blocka * excl;
241 solvent_parameters_t *solvent_parameters;
243 /* We use a list with parameters for each solvent type.
244 * Every time we discover a new molecule that fulfills the basic
245 * conditions for a solvent we compare with the previous entries
246 * in these lists. If the parameters are the same we just increment
247 * the counter for that type, and otherwise we create a new type
248 * based on the current molecule.
250 * Once we've finished going through all molecules we check which
251 * solvent is most common, and mark all those molecules while we
252 * clear the flag on all others.
255 solvent_parameters = *solvent_parameters_p;
257 /* Mark the cg first as non optimized */
260 /* Check if this cg has no exclusions with atoms in other charge groups
261 * and all atoms inside the charge group excluded.
262 * We only have 3 or 4 atom solvent loops.
264 if (GET_CGINFO_EXCL_INTER(cginfo) ||
265 !GET_CGINFO_EXCL_INTRA(cginfo))
270 /* Get the indices of the first atom in this charge group */
271 j0 = molt->cgs.index[cg0];
272 j1 = molt->cgs.index[cg0+1];
274 /* Number of atoms in our molecule */
280 "Moltype '%s': there are %d atoms in this charge group\n",
284 /* Check if it could be an SPC (3 atoms) or TIP4p (4) water,
287 if (nj < 3 || nj > 4)
292 /* Check if we are doing QM on this group */
294 if (qm_grpnr != NULL)
296 for (j = j0; j < j1 && !qm; j++)
298 qm = (qm_grpnr[j] < qm_grps->nr - 1);
301 /* Cannot use solvent optimization with QM */
307 atom = molt->atoms.atom;
309 /* Still looks like a solvent, time to check parameters */
311 /* If it is perturbed (free energy) we can't use the solvent loops,
312 * so then we just skip to the next molecule.
316 for (j = j0; j < j1 && !perturbed; j++)
318 perturbed = PERTURBED(atom[j]);
326 /* Now it's only a question if the VdW and charge parameters
327 * are OK. Before doing the check we compare and see if they are
328 * identical to a possible previous solvent type.
329 * First we assign the current types and charges.
331 for (j = 0; j < nj; j++)
333 tmp_vdwtype[j] = atom[j0+j].type;
334 tmp_charge[j] = atom[j0+j].q;
337 /* Does it match any previous solvent type? */
338 for (k = 0; k < *n_solvent_parameters; k++)
343 /* We can only match SPC with 3 atoms and TIP4p with 4 atoms */
344 if ( (solvent_parameters[k].model == esolSPC && nj != 3) ||
345 (solvent_parameters[k].model == esolTIP4P && nj != 4) )
350 /* Check that types & charges match for all atoms in molecule */
351 for (j = 0; j < nj && match == TRUE; j++)
353 if (tmp_vdwtype[j] != solvent_parameters[k].vdwtype[j])
357 if (tmp_charge[j] != solvent_parameters[k].charge[j])
364 /* Congratulations! We have a matched solvent.
365 * Flag it with this type for later processing.
368 solvent_parameters[k].count += nmol;
370 /* We are done with this charge group */
375 /* If we get here, we have a tentative new solvent type.
376 * Before we add it we must check that it fulfills the requirements
377 * of the solvent optimized loops. First determine which atoms have
380 for (j = 0; j < nj; j++)
383 tjA = tmp_vdwtype[j];
385 /* Go through all other tpes and see if any have non-zero
386 * VdW parameters when combined with this one.
388 for (k = 0; k < fr->ntype && (has_vdw[j] == FALSE); k++)
390 /* We already checked that the atoms weren't perturbed,
391 * so we only need to check state A now.
395 has_vdw[j] = (has_vdw[j] ||
396 (BHAMA(fr->nbfp, fr->ntype, tjA, k) != 0.0) ||
397 (BHAMB(fr->nbfp, fr->ntype, tjA, k) != 0.0) ||
398 (BHAMC(fr->nbfp, fr->ntype, tjA, k) != 0.0));
403 has_vdw[j] = (has_vdw[j] ||
404 (C6(fr->nbfp, fr->ntype, tjA, k) != 0.0) ||
405 (C12(fr->nbfp, fr->ntype, tjA, k) != 0.0));
410 /* Now we know all we need to make the final check and assignment. */
414 * For this we require thatn all atoms have charge,
415 * the charges on atom 2 & 3 should be the same, and only
416 * atom 1 might have VdW.
418 if (has_vdw[1] == FALSE &&
419 has_vdw[2] == FALSE &&
420 tmp_charge[0] != 0 &&
421 tmp_charge[1] != 0 &&
422 tmp_charge[2] == tmp_charge[1])
424 srenew(solvent_parameters, *n_solvent_parameters+1);
425 solvent_parameters[*n_solvent_parameters].model = esolSPC;
426 solvent_parameters[*n_solvent_parameters].count = nmol;
427 for (k = 0; k < 3; k++)
429 solvent_parameters[*n_solvent_parameters].vdwtype[k] = tmp_vdwtype[k];
430 solvent_parameters[*n_solvent_parameters].charge[k] = tmp_charge[k];
433 *cg_sp = *n_solvent_parameters;
434 (*n_solvent_parameters)++;
439 /* Or could it be a TIP4P?
440 * For this we require thatn atoms 2,3,4 have charge, but not atom 1.
441 * Only atom 1 mght have VdW.
443 if (has_vdw[1] == FALSE &&
444 has_vdw[2] == FALSE &&
445 has_vdw[3] == FALSE &&
446 tmp_charge[0] == 0 &&
447 tmp_charge[1] != 0 &&
448 tmp_charge[2] == tmp_charge[1] &&
451 srenew(solvent_parameters, *n_solvent_parameters+1);
452 solvent_parameters[*n_solvent_parameters].model = esolTIP4P;
453 solvent_parameters[*n_solvent_parameters].count = nmol;
454 for (k = 0; k < 4; k++)
456 solvent_parameters[*n_solvent_parameters].vdwtype[k] = tmp_vdwtype[k];
457 solvent_parameters[*n_solvent_parameters].charge[k] = tmp_charge[k];
460 *cg_sp = *n_solvent_parameters;
461 (*n_solvent_parameters)++;
465 *solvent_parameters_p = solvent_parameters;
469 check_solvent(FILE * fp,
470 const gmx_mtop_t * mtop,
472 cginfo_mb_t *cginfo_mb)
475 const t_block * mols;
476 const gmx_moltype_t *molt;
477 int mb, mol, cg_mol, at_offset, cg_offset, am, cgm, i, nmol_ch, nmol;
478 int n_solvent_parameters;
479 solvent_parameters_t *solvent_parameters;
485 fprintf(debug, "Going to determine what solvent types we have.\n");
490 n_solvent_parameters = 0;
491 solvent_parameters = NULL;
492 /* Allocate temporary array for solvent type */
493 snew(cg_sp, mtop->nmolblock);
497 for (mb = 0; mb < mtop->nmolblock; mb++)
499 molt = &mtop->moltype[mtop->molblock[mb].type];
501 /* Here we have to loop over all individual molecules
502 * because we need to check for QMMM particles.
504 snew(cg_sp[mb], cginfo_mb[mb].cg_mod);
505 nmol_ch = cginfo_mb[mb].cg_mod/cgs->nr;
506 nmol = mtop->molblock[mb].nmol/nmol_ch;
507 for (mol = 0; mol < nmol_ch; mol++)
510 am = mol*cgs->index[cgs->nr];
511 for (cg_mol = 0; cg_mol < cgs->nr; cg_mol++)
513 check_solvent_cg(molt, cg_mol, nmol,
514 mtop->groups.grpnr[egcQMMM] ?
515 mtop->groups.grpnr[egcQMMM]+at_offset+am : 0,
516 &mtop->groups.grps[egcQMMM],
518 &n_solvent_parameters, &solvent_parameters,
519 cginfo_mb[mb].cginfo[cgm+cg_mol],
520 &cg_sp[mb][cgm+cg_mol]);
523 cg_offset += cgs->nr;
524 at_offset += cgs->index[cgs->nr];
527 /* Puh! We finished going through all charge groups.
528 * Now find the most common solvent model.
531 /* Most common solvent this far */
533 for (i = 0; i < n_solvent_parameters; i++)
536 solvent_parameters[i].count > solvent_parameters[bestsp].count)
544 bestsol = solvent_parameters[bestsp].model;
551 #ifdef DISABLE_WATER_NLIST
556 for (mb = 0; mb < mtop->nmolblock; mb++)
558 cgs = &mtop->moltype[mtop->molblock[mb].type].cgs;
559 nmol = (mtop->molblock[mb].nmol*cgs->nr)/cginfo_mb[mb].cg_mod;
560 for (i = 0; i < cginfo_mb[mb].cg_mod; i++)
562 if (cg_sp[mb][i] == bestsp)
564 SET_CGINFO_SOLOPT(cginfo_mb[mb].cginfo[i], bestsol);
569 SET_CGINFO_SOLOPT(cginfo_mb[mb].cginfo[i], esolNO);
576 if (bestsol != esolNO && fp != NULL)
578 fprintf(fp, "\nEnabling %s-like water optimization for %d molecules.\n\n",
580 solvent_parameters[bestsp].count);
583 sfree(solvent_parameters);
584 fr->solvent_opt = bestsol;
588 acNONE = 0, acCONSTRAINT, acSETTLE
591 static cginfo_mb_t *init_cginfo_mb(FILE *fplog, const gmx_mtop_t *mtop,
592 t_forcerec *fr, gmx_bool bNoSolvOpt,
593 gmx_bool *bExcl_IntraCGAll_InterCGNone)
596 const t_blocka *excl;
597 const gmx_moltype_t *molt;
598 const gmx_molblock_t *molb;
599 cginfo_mb_t *cginfo_mb;
602 int cg_offset, a_offset, cgm, am;
603 int mb, m, ncg_tot, cg, a0, a1, gid, ai, j, aj, excl_nalloc;
607 gmx_bool bId, *bExcl, bExclIntraAll, bExclInter, bHaveVDW, bHaveQ;
609 ncg_tot = ncg_mtop(mtop);
610 snew(cginfo_mb, mtop->nmolblock);
612 snew(type_VDW, fr->ntype);
613 for (ai = 0; ai < fr->ntype; ai++)
615 type_VDW[ai] = FALSE;
616 for (j = 0; j < fr->ntype; j++)
618 type_VDW[ai] = type_VDW[ai] ||
620 C6(fr->nbfp, fr->ntype, ai, j) != 0 ||
621 C12(fr->nbfp, fr->ntype, ai, j) != 0;
625 *bExcl_IntraCGAll_InterCGNone = TRUE;
628 snew(bExcl, excl_nalloc);
631 for (mb = 0; mb < mtop->nmolblock; mb++)
633 molb = &mtop->molblock[mb];
634 molt = &mtop->moltype[molb->type];
638 /* Check if the cginfo is identical for all molecules in this block.
639 * If so, we only need an array of the size of one molecule.
640 * Otherwise we make an array of #mol times #cgs per molecule.
644 for (m = 0; m < molb->nmol; m++)
646 am = m*cgs->index[cgs->nr];
647 for (cg = 0; cg < cgs->nr; cg++)
650 a1 = cgs->index[cg+1];
651 if (ggrpnr(&mtop->groups, egcENER, a_offset+am+a0) !=
652 ggrpnr(&mtop->groups, egcENER, a_offset +a0))
656 if (mtop->groups.grpnr[egcQMMM] != NULL)
658 for (ai = a0; ai < a1; ai++)
660 if (mtop->groups.grpnr[egcQMMM][a_offset+am+ai] !=
661 mtop->groups.grpnr[egcQMMM][a_offset +ai])
670 cginfo_mb[mb].cg_start = cg_offset;
671 cginfo_mb[mb].cg_end = cg_offset + molb->nmol*cgs->nr;
672 cginfo_mb[mb].cg_mod = (bId ? 1 : molb->nmol)*cgs->nr;
673 snew(cginfo_mb[mb].cginfo, cginfo_mb[mb].cg_mod);
674 cginfo = cginfo_mb[mb].cginfo;
676 /* Set constraints flags for constrained atoms */
677 snew(a_con, molt->atoms.nr);
678 for (ftype = 0; ftype < F_NRE; ftype++)
680 if (interaction_function[ftype].flags & IF_CONSTRAINT)
685 for (ia = 0; ia < molt->ilist[ftype].nr; ia += 1+nral)
689 for (a = 0; a < nral; a++)
691 a_con[molt->ilist[ftype].iatoms[ia+1+a]] =
692 (ftype == F_SETTLE ? acSETTLE : acCONSTRAINT);
698 for (m = 0; m < (bId ? 1 : molb->nmol); m++)
701 am = m*cgs->index[cgs->nr];
702 for (cg = 0; cg < cgs->nr; cg++)
705 a1 = cgs->index[cg+1];
707 /* Store the energy group in cginfo */
708 gid = ggrpnr(&mtop->groups, egcENER, a_offset+am+a0);
709 SET_CGINFO_GID(cginfo[cgm+cg], gid);
711 /* Check the intra/inter charge group exclusions */
712 if (a1-a0 > excl_nalloc)
714 excl_nalloc = a1 - a0;
715 srenew(bExcl, excl_nalloc);
717 /* bExclIntraAll: all intra cg interactions excluded
718 * bExclInter: any inter cg interactions excluded
720 bExclIntraAll = TRUE;
724 for (ai = a0; ai < a1; ai++)
726 /* Check VDW and electrostatic interactions */
727 bHaveVDW = bHaveVDW || (type_VDW[molt->atoms.atom[ai].type] ||
728 type_VDW[molt->atoms.atom[ai].typeB]);
729 bHaveQ = bHaveQ || (molt->atoms.atom[ai].q != 0 ||
730 molt->atoms.atom[ai].qB != 0);
732 /* Clear the exclusion list for atom ai */
733 for (aj = a0; aj < a1; aj++)
735 bExcl[aj-a0] = FALSE;
737 /* Loop over all the exclusions of atom ai */
738 for (j = excl->index[ai]; j < excl->index[ai+1]; j++)
741 if (aj < a0 || aj >= a1)
750 /* Check if ai excludes a0 to a1 */
751 for (aj = a0; aj < a1; aj++)
755 bExclIntraAll = FALSE;
762 SET_CGINFO_CONSTR(cginfo[cgm+cg]);
765 SET_CGINFO_SETTLE(cginfo[cgm+cg]);
773 SET_CGINFO_EXCL_INTRA(cginfo[cgm+cg]);
777 SET_CGINFO_EXCL_INTER(cginfo[cgm+cg]);
779 if (a1 - a0 > MAX_CHARGEGROUP_SIZE)
781 /* The size in cginfo is currently only read with DD */
782 gmx_fatal(FARGS, "A charge group has size %d which is larger than the limit of %d atoms", a1-a0, MAX_CHARGEGROUP_SIZE);
786 SET_CGINFO_HAS_VDW(cginfo[cgm+cg]);
790 SET_CGINFO_HAS_Q(cginfo[cgm+cg]);
792 /* Store the charge group size */
793 SET_CGINFO_NATOMS(cginfo[cgm+cg], a1-a0);
795 if (!bExclIntraAll || bExclInter)
797 *bExcl_IntraCGAll_InterCGNone = FALSE;
804 cg_offset += molb->nmol*cgs->nr;
805 a_offset += molb->nmol*cgs->index[cgs->nr];
809 /* the solvent optimizer is called after the QM is initialized,
810 * because we don't want to have the QM subsystemto become an
814 check_solvent(fplog, mtop, fr, cginfo_mb);
816 if (getenv("GMX_NO_SOLV_OPT"))
820 fprintf(fplog, "Found environment variable GMX_NO_SOLV_OPT.\n"
821 "Disabling all solvent optimization\n");
823 fr->solvent_opt = esolNO;
827 fr->solvent_opt = esolNO;
829 if (!fr->solvent_opt)
831 for (mb = 0; mb < mtop->nmolblock; mb++)
833 for (cg = 0; cg < cginfo_mb[mb].cg_mod; cg++)
835 SET_CGINFO_SOLOPT(cginfo_mb[mb].cginfo[cg], esolNO);
843 static int *cginfo_expand(int nmb, cginfo_mb_t *cgi_mb)
848 ncg = cgi_mb[nmb-1].cg_end;
851 for (cg = 0; cg < ncg; cg++)
853 while (cg >= cgi_mb[mb].cg_end)
858 cgi_mb[mb].cginfo[(cg - cgi_mb[mb].cg_start) % cgi_mb[mb].cg_mod];
864 static void set_chargesum(FILE *log, t_forcerec *fr, const gmx_mtop_t *mtop)
866 /*This now calculates sum for q and c6*/
867 double qsum, q2sum, q, c6sum, c6, sqrc6;
869 const t_atoms *atoms;
874 for (mb = 0; mb < mtop->nmolblock; mb++)
876 nmol = mtop->molblock[mb].nmol;
877 atoms = &mtop->moltype[mtop->molblock[mb].type].atoms;
878 for (i = 0; i < atoms->nr; i++)
880 q = atoms->atom[i].q;
883 c6 = mtop->ffparams.iparams[atoms->atom[i].type*(mtop->ffparams.atnr+1)].lj.c6;
885 c6sum += nmol*sqrc6*sqrc6;
889 fr->q2sum[0] = q2sum;
890 fr->c6sum[0] = c6sum/12;
892 if (fr->efep != efepNO)
897 for (mb = 0; mb < mtop->nmolblock; mb++)
899 nmol = mtop->molblock[mb].nmol;
900 atoms = &mtop->moltype[mtop->molblock[mb].type].atoms;
901 for (i = 0; i < atoms->nr; i++)
903 q = atoms->atom[i].qB;
906 c6 = mtop->ffparams.iparams[atoms->atom[i].typeB*(mtop->ffparams.atnr+1)].lj.c6;
908 c6sum += nmol*sqrc6*sqrc6;
911 fr->q2sum[1] = q2sum;
912 fr->c6sum[1] = c6sum/12;
917 fr->qsum[1] = fr->qsum[0];
918 fr->q2sum[1] = fr->q2sum[0];
919 fr->c6sum[1] = fr->c6sum[0];
923 if (fr->efep == efepNO)
925 fprintf(log, "System total charge: %.3f\n", fr->qsum[0]);
929 fprintf(log, "System total charge, top. A: %.3f top. B: %.3f\n",
930 fr->qsum[0], fr->qsum[1]);
935 void update_forcerec(t_forcerec *fr, matrix box)
937 if (fr->eeltype == eelGRF)
939 calc_rffac(NULL, fr->eeltype, fr->epsilon_r, fr->epsilon_rf,
940 fr->rcoulomb, fr->temp, fr->zsquare, box,
941 &fr->kappa, &fr->k_rf, &fr->c_rf);
945 void set_avcsixtwelve(FILE *fplog, t_forcerec *fr, const gmx_mtop_t *mtop)
947 const t_atoms *atoms, *atoms_tpi;
948 const t_blocka *excl;
949 int mb, nmol, nmolc, i, j, tpi, tpj, j1, j2, k, n, nexcl, q;
950 #if (defined SIZEOF_LONG_LONG_INT) && (SIZEOF_LONG_LONG_INT >= 8)
951 long long int npair, npair_ij, tmpi, tmpj;
953 double npair, npair_ij, tmpi, tmpj;
955 double csix, ctwelve;
959 real *nbfp_comb = NULL;
965 /* For LJ-PME, we want to correct for the difference between the
966 * actual C6 values and the C6 values used by the LJ-PME based on
967 * combination rules. */
969 if (EVDW_PME(fr->vdwtype))
971 nbfp_comb = mk_nbfp_combination_rule(&mtop->ffparams,
972 (fr->ljpme_combination_rule == eljpmeLB) ? eCOMB_ARITHMETIC : eCOMB_GEOMETRIC);
973 for (tpi = 0; tpi < ntp; ++tpi)
975 for (tpj = 0; tpj < ntp; ++tpj)
977 C6(nbfp_comb, ntp, tpi, tpj) =
978 C6(nbfp, ntp, tpi, tpj) - C6(nbfp_comb, ntp, tpi, tpj);
979 C12(nbfp_comb, ntp, tpi, tpj) = C12(nbfp, ntp, tpi, tpj);
984 for (q = 0; q < (fr->efep == efepNO ? 1 : 2); q++)
992 /* Count the types so we avoid natoms^2 operations */
993 snew(typecount, ntp);
994 gmx_mtop_count_atomtypes(mtop, q, typecount);
996 for (tpi = 0; tpi < ntp; tpi++)
998 for (tpj = tpi; tpj < ntp; tpj++)
1000 tmpi = typecount[tpi];
1001 tmpj = typecount[tpj];
1004 npair_ij = tmpi*tmpj;
1008 npair_ij = tmpi*(tmpi - 1)/2;
1012 /* nbfp now includes the 6.0 derivative prefactor */
1013 csix += npair_ij*BHAMC(nbfp, ntp, tpi, tpj)/6.0;
1017 /* nbfp now includes the 6.0/12.0 derivative prefactors */
1018 csix += npair_ij* C6(nbfp, ntp, tpi, tpj)/6.0;
1019 ctwelve += npair_ij* C12(nbfp, ntp, tpi, tpj)/12.0;
1025 /* Subtract the excluded pairs.
1026 * The main reason for substracting exclusions is that in some cases
1027 * some combinations might never occur and the parameters could have
1028 * any value. These unused values should not influence the dispersion
1031 for (mb = 0; mb < mtop->nmolblock; mb++)
1033 nmol = mtop->molblock[mb].nmol;
1034 atoms = &mtop->moltype[mtop->molblock[mb].type].atoms;
1035 excl = &mtop->moltype[mtop->molblock[mb].type].excls;
1036 for (i = 0; (i < atoms->nr); i++)
1040 tpi = atoms->atom[i].type;
1044 tpi = atoms->atom[i].typeB;
1046 j1 = excl->index[i];
1047 j2 = excl->index[i+1];
1048 for (j = j1; j < j2; j++)
1055 tpj = atoms->atom[k].type;
1059 tpj = atoms->atom[k].typeB;
1063 /* nbfp now includes the 6.0 derivative prefactor */
1064 csix -= nmol*BHAMC(nbfp, ntp, tpi, tpj)/6.0;
1068 /* nbfp now includes the 6.0/12.0 derivative prefactors */
1069 csix -= nmol*C6 (nbfp, ntp, tpi, tpj)/6.0;
1070 ctwelve -= nmol*C12(nbfp, ntp, tpi, tpj)/12.0;
1080 /* Only correct for the interaction of the test particle
1081 * with the rest of the system.
1084 &mtop->moltype[mtop->molblock[mtop->nmolblock-1].type].atoms;
1087 for (mb = 0; mb < mtop->nmolblock; mb++)
1089 nmol = mtop->molblock[mb].nmol;
1090 atoms = &mtop->moltype[mtop->molblock[mb].type].atoms;
1091 for (j = 0; j < atoms->nr; j++)
1094 /* Remove the interaction of the test charge group
1097 if (mb == mtop->nmolblock-1)
1101 if (mb == 0 && nmol == 1)
1103 gmx_fatal(FARGS, "Old format tpr with TPI, please generate a new tpr file");
1108 tpj = atoms->atom[j].type;
1112 tpj = atoms->atom[j].typeB;
1114 for (i = 0; i < fr->n_tpi; i++)
1118 tpi = atoms_tpi->atom[i].type;
1122 tpi = atoms_tpi->atom[i].typeB;
1126 /* nbfp now includes the 6.0 derivative prefactor */
1127 csix += nmolc*BHAMC(nbfp, ntp, tpi, tpj)/6.0;
1131 /* nbfp now includes the 6.0/12.0 derivative prefactors */
1132 csix += nmolc*C6 (nbfp, ntp, tpi, tpj)/6.0;
1133 ctwelve += nmolc*C12(nbfp, ntp, tpi, tpj)/12.0;
1140 if (npair - nexcl <= 0 && fplog)
1142 fprintf(fplog, "\nWARNING: There are no atom pairs for dispersion correction\n\n");
1148 csix /= npair - nexcl;
1149 ctwelve /= npair - nexcl;
1153 fprintf(debug, "Counted %d exclusions\n", nexcl);
1154 fprintf(debug, "Average C6 parameter is: %10g\n", (double)csix);
1155 fprintf(debug, "Average C12 parameter is: %10g\n", (double)ctwelve);
1157 fr->avcsix[q] = csix;
1158 fr->avctwelve[q] = ctwelve;
1161 if (EVDW_PME(fr->vdwtype))
1168 if (fr->eDispCorr == edispcAllEner ||
1169 fr->eDispCorr == edispcAllEnerPres)
1171 fprintf(fplog, "Long Range LJ corr.: <C6> %10.4e, <C12> %10.4e\n",
1172 fr->avcsix[0], fr->avctwelve[0]);
1176 fprintf(fplog, "Long Range LJ corr.: <C6> %10.4e\n", fr->avcsix[0]);
1182 static void set_bham_b_max(FILE *fplog, t_forcerec *fr,
1183 const gmx_mtop_t *mtop)
1185 const t_atoms *at1, *at2;
1186 int mt1, mt2, i, j, tpi, tpj, ntypes;
1192 fprintf(fplog, "Determining largest Buckingham b parameter for table\n");
1199 for (mt1 = 0; mt1 < mtop->nmoltype; mt1++)
1201 at1 = &mtop->moltype[mt1].atoms;
1202 for (i = 0; (i < at1->nr); i++)
1204 tpi = at1->atom[i].type;
1207 gmx_fatal(FARGS, "Atomtype[%d] = %d, maximum = %d", i, tpi, ntypes);
1210 for (mt2 = mt1; mt2 < mtop->nmoltype; mt2++)
1212 at2 = &mtop->moltype[mt2].atoms;
1213 for (j = 0; (j < at2->nr); j++)
1215 tpj = at2->atom[j].type;
1218 gmx_fatal(FARGS, "Atomtype[%d] = %d, maximum = %d", j, tpj, ntypes);
1220 b = BHAMB(nbfp, ntypes, tpi, tpj);
1221 if (b > fr->bham_b_max)
1225 if ((b < bmin) || (bmin == -1))
1235 fprintf(fplog, "Buckingham b parameters, min: %g, max: %g\n",
1236 bmin, fr->bham_b_max);
1240 static void make_nbf_tables(FILE *fp, const output_env_t oenv,
1241 t_forcerec *fr, real rtab,
1242 const t_commrec *cr,
1243 const char *tabfn, char *eg1, char *eg2,
1253 fprintf(debug, "No table file name passed, can not read table, can not do non-bonded interactions\n");
1258 sprintf(buf, "%s", tabfn);
1261 /* Append the two energy group names */
1262 sprintf(buf + strlen(tabfn) - strlen(ftp2ext(efXVG)) - 1, "_%s_%s.%s",
1263 eg1, eg2, ftp2ext(efXVG));
1265 nbl->table_elec_vdw = make_tables(fp, oenv, fr, MASTER(cr), buf, rtab, 0);
1266 /* Copy the contents of the table to separate coulomb and LJ tables too,
1267 * to improve cache performance.
1269 /* For performance reasons we want
1270 * the table data to be aligned to 16-byte. The pointers could be freed
1271 * but currently aren't.
1273 nbl->table_elec.interaction = GMX_TABLE_INTERACTION_ELEC;
1274 nbl->table_elec.format = nbl->table_elec_vdw.format;
1275 nbl->table_elec.r = nbl->table_elec_vdw.r;
1276 nbl->table_elec.n = nbl->table_elec_vdw.n;
1277 nbl->table_elec.scale = nbl->table_elec_vdw.scale;
1278 nbl->table_elec.scale_exp = nbl->table_elec_vdw.scale_exp;
1279 nbl->table_elec.formatsize = nbl->table_elec_vdw.formatsize;
1280 nbl->table_elec.ninteractions = 1;
1281 nbl->table_elec.stride = nbl->table_elec.formatsize * nbl->table_elec.ninteractions;
1282 snew_aligned(nbl->table_elec.data, nbl->table_elec.stride*(nbl->table_elec.n+1), 32);
1284 nbl->table_vdw.interaction = GMX_TABLE_INTERACTION_VDWREP_VDWDISP;
1285 nbl->table_vdw.format = nbl->table_elec_vdw.format;
1286 nbl->table_vdw.r = nbl->table_elec_vdw.r;
1287 nbl->table_vdw.n = nbl->table_elec_vdw.n;
1288 nbl->table_vdw.scale = nbl->table_elec_vdw.scale;
1289 nbl->table_vdw.scale_exp = nbl->table_elec_vdw.scale_exp;
1290 nbl->table_vdw.formatsize = nbl->table_elec_vdw.formatsize;
1291 nbl->table_vdw.ninteractions = 2;
1292 nbl->table_vdw.stride = nbl->table_vdw.formatsize * nbl->table_vdw.ninteractions;
1293 snew_aligned(nbl->table_vdw.data, nbl->table_vdw.stride*(nbl->table_vdw.n+1), 32);
1295 for (i = 0; i <= nbl->table_elec_vdw.n; i++)
1297 for (j = 0; j < 4; j++)
1299 nbl->table_elec.data[4*i+j] = nbl->table_elec_vdw.data[12*i+j];
1301 for (j = 0; j < 8; j++)
1303 nbl->table_vdw.data[8*i+j] = nbl->table_elec_vdw.data[12*i+4+j];
1308 static void count_tables(int ftype1, int ftype2, const gmx_mtop_t *mtop,
1309 int *ncount, int **count)
1311 const gmx_moltype_t *molt;
1313 int mt, ftype, stride, i, j, tabnr;
1315 for (mt = 0; mt < mtop->nmoltype; mt++)
1317 molt = &mtop->moltype[mt];
1318 for (ftype = 0; ftype < F_NRE; ftype++)
1320 if (ftype == ftype1 || ftype == ftype2)
1322 il = &molt->ilist[ftype];
1323 stride = 1 + NRAL(ftype);
1324 for (i = 0; i < il->nr; i += stride)
1326 tabnr = mtop->ffparams.iparams[il->iatoms[i]].tab.table;
1329 gmx_fatal(FARGS, "A bonded table number is smaller than 0: %d\n", tabnr);
1331 if (tabnr >= *ncount)
1333 srenew(*count, tabnr+1);
1334 for (j = *ncount; j < tabnr+1; j++)
1347 static bondedtable_t *make_bonded_tables(FILE *fplog,
1348 int ftype1, int ftype2,
1349 const gmx_mtop_t *mtop,
1350 const char *basefn, const char *tabext)
1352 int i, ncount, *count;
1360 count_tables(ftype1, ftype2, mtop, &ncount, &count);
1365 for (i = 0; i < ncount; i++)
1369 sprintf(tabfn, "%s", basefn);
1370 sprintf(tabfn + strlen(basefn) - strlen(ftp2ext(efXVG)) - 1, "_%s%d.%s",
1371 tabext, i, ftp2ext(efXVG));
1372 tab[i] = make_bonded_table(fplog, tabfn, NRAL(ftype1)-2);
1381 void forcerec_set_ranges(t_forcerec *fr,
1382 int ncg_home, int ncg_force,
1384 int natoms_force_constr, int natoms_f_novirsum)
1389 /* fr->ncg_force is unused in the standard code,
1390 * but it can be useful for modified code dealing with charge groups.
1392 fr->ncg_force = ncg_force;
1393 fr->natoms_force = natoms_force;
1394 fr->natoms_force_constr = natoms_force_constr;
1396 if (fr->natoms_force_constr > fr->nalloc_force)
1398 fr->nalloc_force = over_alloc_dd(fr->natoms_force_constr);
1402 srenew(fr->f_twin, fr->nalloc_force);
1406 if (fr->bF_NoVirSum)
1408 fr->f_novirsum_n = natoms_f_novirsum;
1409 if (fr->f_novirsum_n > fr->f_novirsum_nalloc)
1411 fr->f_novirsum_nalloc = over_alloc_dd(fr->f_novirsum_n);
1412 srenew(fr->f_novirsum_alloc, fr->f_novirsum_nalloc);
1417 fr->f_novirsum_n = 0;
1421 static real cutoff_inf(real cutoff)
1425 cutoff = GMX_CUTOFF_INF;
1431 static void make_adress_tf_tables(FILE *fp, const output_env_t oenv,
1432 t_forcerec *fr, const t_inputrec *ir,
1433 const char *tabfn, const gmx_mtop_t *mtop,
1441 gmx_fatal(FARGS, "No thermoforce table file given. Use -tabletf to specify a file\n");
1445 snew(fr->atf_tabs, ir->adress->n_tf_grps);
1447 sprintf(buf, "%s", tabfn);
1448 for (i = 0; i < ir->adress->n_tf_grps; i++)
1450 j = ir->adress->tf_table_index[i]; /* get energy group index */
1451 sprintf(buf + strlen(tabfn) - strlen(ftp2ext(efXVG)) - 1, "tf_%s.%s",
1452 *(mtop->groups.grpname[mtop->groups.grps[egcENER].nm_ind[j]]), ftp2ext(efXVG));
1455 fprintf(fp, "loading tf table for energygrp index %d from %s\n", ir->adress->tf_table_index[i], buf);
1457 fr->atf_tabs[i] = make_atf_table(fp, oenv, fr, buf, box);
1462 gmx_bool can_use_allvsall(const t_inputrec *ir, gmx_bool bPrintNote, t_commrec *cr, FILE *fp)
1469 ir->rcoulomb == 0 &&
1471 ir->ePBC == epbcNONE &&
1472 ir->vdwtype == evdwCUT &&
1473 ir->coulombtype == eelCUT &&
1474 ir->efep == efepNO &&
1475 (ir->implicit_solvent == eisNO ||
1476 (ir->implicit_solvent == eisGBSA && (ir->gb_algorithm == egbSTILL ||
1477 ir->gb_algorithm == egbHCT ||
1478 ir->gb_algorithm == egbOBC))) &&
1479 getenv("GMX_NO_ALLVSALL") == NULL
1482 if (bAllvsAll && ir->opts.ngener > 1)
1484 const char *note = "NOTE: Can not use all-vs-all force loops, because there are multiple energy monitor groups; you might get significantly higher performance when using only a single energy monitor group.\n";
1490 fprintf(stderr, "\n%s\n", note);
1494 fprintf(fp, "\n%s\n", note);
1500 if (bAllvsAll && fp && MASTER(cr))
1502 fprintf(fp, "\nUsing accelerated all-vs-all kernels.\n\n");
1509 static void init_forcerec_f_threads(t_forcerec *fr, int nenergrp)
1513 /* These thread local data structures are used for bondeds only */
1514 fr->nthreads = gmx_omp_nthreads_get(emntBonded);
1516 if (fr->nthreads > 1)
1518 snew(fr->f_t, fr->nthreads);
1519 /* Thread 0 uses the global force and energy arrays */
1520 for (t = 1; t < fr->nthreads; t++)
1522 fr->f_t[t].f = NULL;
1523 fr->f_t[t].f_nalloc = 0;
1524 snew(fr->f_t[t].fshift, SHIFTS);
1525 fr->f_t[t].grpp.nener = nenergrp*nenergrp;
1526 for (i = 0; i < egNR; i++)
1528 snew(fr->f_t[t].grpp.ener[i], fr->f_t[t].grpp.nener);
1535 static void pick_nbnxn_kernel_cpu(const t_inputrec gmx_unused *ir,
1539 *kernel_type = nbnxnk4x4_PlainC;
1540 *ewald_excl = ewaldexclTable;
1542 #ifdef GMX_NBNXN_SIMD
1544 #ifdef GMX_NBNXN_SIMD_4XN
1545 *kernel_type = nbnxnk4xN_SIMD_4xN;
1547 #ifdef GMX_NBNXN_SIMD_2XNN
1548 /* We expect the 2xNN kernels to be faster in most cases */
1549 *kernel_type = nbnxnk4xN_SIMD_2xNN;
1552 #if defined GMX_NBNXN_SIMD_4XN && defined GMX_X86_AVX_256
1553 if (EEL_RF(ir->coulombtype) || ir->coulombtype == eelCUT)
1555 /* The raw pair rate of the 4x8 kernel is higher than 2x(4+4),
1556 * 10% with HT, 50% without HT, but extra zeros interactions
1557 * can compensate. As we currently don't detect the actual use
1558 * of HT, switch to 4x8 to avoid a potential performance hit.
1560 *kernel_type = nbnxnk4xN_SIMD_4xN;
1563 if (getenv("GMX_NBNXN_SIMD_4XN") != NULL)
1565 #ifdef GMX_NBNXN_SIMD_4XN
1566 *kernel_type = nbnxnk4xN_SIMD_4xN;
1568 gmx_fatal(FARGS, "SIMD 4xN kernels requested, but Gromacs has been compiled without support for these kernels");
1571 if (getenv("GMX_NBNXN_SIMD_2XNN") != NULL)
1573 #ifdef GMX_NBNXN_SIMD_2XNN
1574 *kernel_type = nbnxnk4xN_SIMD_2xNN;
1576 gmx_fatal(FARGS, "SIMD 2x(N+N) kernels requested, but Gromacs has been compiled without support for these kernels");
1580 /* Analytical Ewald exclusion correction is only an option in
1581 * the SIMD kernel. On BlueGene/Q, this is faster regardless
1582 * of precision. In single precision, this is faster on
1583 * Bulldozer, and slightly faster on Sandy Bridge.
1585 #if ((defined GMX_X86_AVX_128_FMA || defined GMX_X86_AVX_256) && !defined GMX_DOUBLE) || (defined GMX_CPU_ACCELERATION_IBM_QPX)
1586 *ewald_excl = ewaldexclAnalytical;
1588 if (getenv("GMX_NBNXN_EWALD_TABLE") != NULL)
1590 *ewald_excl = ewaldexclTable;
1592 if (getenv("GMX_NBNXN_EWALD_ANALYTICAL") != NULL)
1594 *ewald_excl = ewaldexclAnalytical;
1598 #endif /* GMX_NBNXN_SIMD */
1602 const char *lookup_nbnxn_kernel_name(int kernel_type)
1604 const char *returnvalue = NULL;
1605 switch (kernel_type)
1608 returnvalue = "not set";
1610 case nbnxnk4x4_PlainC:
1611 returnvalue = "plain C";
1613 case nbnxnk4xN_SIMD_4xN:
1614 case nbnxnk4xN_SIMD_2xNN:
1615 #ifdef GMX_NBNXN_SIMD
1617 /* We have x86 SSE2 compatible SIMD */
1618 #ifdef GMX_X86_AVX_128_FMA
1619 returnvalue = "AVX-128-FMA";
1621 #if defined GMX_X86_AVX_256 || defined __AVX__
1622 /* x86 SIMD intrinsics can be converted to SSE or AVX depending
1623 * on compiler flags. As we use nearly identical intrinsics,
1624 * compiling for AVX without an AVX macros effectively results
1626 * For gcc we check for __AVX__
1627 * At least a check for icc should be added (if there is a macro)
1629 #if defined GMX_X86_AVX_256 && !defined GMX_NBNXN_HALF_WIDTH_SIMD
1630 returnvalue = "AVX-256";
1632 returnvalue = "AVX-128";
1635 #ifdef GMX_X86_SSE4_1
1636 returnvalue = "SSE4.1";
1638 returnvalue = "SSE2";
1642 #else /* GMX_X86_SSE2 */
1643 /* not GMX_X86_SSE2, but other SIMD */
1644 returnvalue = "SIMD";
1645 #endif /* GMX_X86_SSE2 */
1646 #else /* GMX_NBNXN_SIMD */
1647 returnvalue = "not available";
1648 #endif /* GMX_NBNXN_SIMD */
1650 case nbnxnk8x8x8_CUDA: returnvalue = "CUDA"; break;
1651 case nbnxnk8x8x8_PlainC: returnvalue = "plain C"; break;
1655 gmx_fatal(FARGS, "Illegal kernel type selected");
1662 static void pick_nbnxn_kernel(FILE *fp,
1663 const t_commrec *cr,
1664 gmx_bool use_cpu_acceleration,
1666 gmx_bool bEmulateGPU,
1667 const t_inputrec *ir,
1670 gmx_bool bDoNonbonded)
1672 assert(kernel_type);
1674 *kernel_type = nbnxnkNotSet;
1675 *ewald_excl = ewaldexclTable;
1679 *kernel_type = nbnxnk8x8x8_PlainC;
1683 md_print_warn(cr, fp, "Emulating a GPU run on the CPU (slow)");
1688 *kernel_type = nbnxnk8x8x8_CUDA;
1691 if (*kernel_type == nbnxnkNotSet)
1693 if (use_cpu_acceleration)
1695 pick_nbnxn_kernel_cpu(ir, kernel_type, ewald_excl);
1699 *kernel_type = nbnxnk4x4_PlainC;
1703 if (bDoNonbonded && fp != NULL)
1705 fprintf(fp, "\nUsing %s %dx%d non-bonded kernels\n\n",
1706 lookup_nbnxn_kernel_name(*kernel_type),
1707 nbnxn_kernel_pairlist_simple(*kernel_type) ? NBNXN_CPU_CLUSTER_I_SIZE : NBNXN_GPU_CLUSTER_SIZE,
1708 nbnxn_kernel_to_cj_size(*kernel_type));
1712 static void pick_nbnxn_resources(const t_commrec *cr,
1713 const gmx_hw_info_t *hwinfo,
1714 gmx_bool bDoNonbonded,
1716 gmx_bool *bEmulateGPU,
1717 const gmx_gpu_opt_t *gpu_opt)
1719 gmx_bool bEmulateGPUEnvVarSet;
1720 char gpu_err_str[STRLEN];
1724 bEmulateGPUEnvVarSet = (getenv("GMX_EMULATE_GPU") != NULL);
1726 /* Run GPU emulation mode if GMX_EMULATE_GPU is defined. Because
1727 * GPUs (currently) only handle non-bonded calculations, we will
1728 * automatically switch to emulation if non-bonded calculations are
1729 * turned off via GMX_NO_NONBONDED - this is the simple and elegant
1730 * way to turn off GPU initialization, data movement, and cleanup.
1732 * GPU emulation can be useful to assess the performance one can expect by
1733 * adding GPU(s) to the machine. The conditional below allows this even
1734 * if mdrun is compiled without GPU acceleration support.
1735 * Note that you should freezing the system as otherwise it will explode.
1737 *bEmulateGPU = (bEmulateGPUEnvVarSet ||
1739 gpu_opt->ncuda_dev_use > 0));
1741 /* Enable GPU mode when GPUs are available or no GPU emulation is requested.
1743 if (gpu_opt->ncuda_dev_use > 0 && !(*bEmulateGPU))
1745 /* Each PP node will use the intra-node id-th device from the
1746 * list of detected/selected GPUs. */
1747 if (!init_gpu(cr->rank_pp_intranode, gpu_err_str,
1748 &hwinfo->gpu_info, gpu_opt))
1750 /* At this point the init should never fail as we made sure that
1751 * we have all the GPUs we need. If it still does, we'll bail. */
1752 gmx_fatal(FARGS, "On node %d failed to initialize GPU #%d: %s",
1754 get_gpu_device_id(&hwinfo->gpu_info, gpu_opt,
1755 cr->rank_pp_intranode),
1759 /* Here we actually turn on hardware GPU acceleration */
1764 gmx_bool uses_simple_tables(int cutoff_scheme,
1765 nonbonded_verlet_t *nbv,
1768 gmx_bool bUsesSimpleTables = TRUE;
1771 switch (cutoff_scheme)
1774 bUsesSimpleTables = TRUE;
1777 assert(NULL != nbv && NULL != nbv->grp);
1778 grp_index = (group < 0) ? 0 : (nbv->ngrp - 1);
1779 bUsesSimpleTables = nbnxn_kernel_pairlist_simple(nbv->grp[grp_index].kernel_type);
1782 gmx_incons("unimplemented");
1784 return bUsesSimpleTables;
1787 static void init_ewald_f_table(interaction_const_t *ic,
1788 gmx_bool bUsesSimpleTables,
1793 if (bUsesSimpleTables)
1795 /* With a spacing of 0.0005 we are at the force summation accuracy
1796 * for the SSE kernels for "normal" atomistic simulations.
1798 ic->tabq_scale = ewald_spline3_table_scale(ic->ewaldcoeff_q,
1801 maxr = (rtab > ic->rcoulomb) ? rtab : ic->rcoulomb;
1802 ic->tabq_size = (int)(maxr*ic->tabq_scale) + 2;
1806 ic->tabq_size = GPU_EWALD_COULOMB_FORCE_TABLE_SIZE;
1807 /* Subtract 2 iso 1 to avoid access out of range due to rounding */
1808 ic->tabq_scale = (ic->tabq_size - 2)/ic->rcoulomb;
1811 sfree_aligned(ic->tabq_coul_FDV0);
1812 sfree_aligned(ic->tabq_coul_F);
1813 sfree_aligned(ic->tabq_coul_V);
1815 /* Create the original table data in FDV0 */
1816 snew_aligned(ic->tabq_coul_FDV0, ic->tabq_size*4, 32);
1817 snew_aligned(ic->tabq_coul_F, ic->tabq_size, 32);
1818 snew_aligned(ic->tabq_coul_V, ic->tabq_size, 32);
1819 table_spline3_fill_ewald_lr(ic->tabq_coul_F, ic->tabq_coul_V, ic->tabq_coul_FDV0,
1820 ic->tabq_size, 1/ic->tabq_scale, ic->ewaldcoeff_q);
1823 void init_interaction_const_tables(FILE *fp,
1824 interaction_const_t *ic,
1825 gmx_bool bUsesSimpleTables,
1830 if (ic->eeltype == eelEWALD || EEL_PME(ic->eeltype))
1832 init_ewald_f_table(ic, bUsesSimpleTables, rtab);
1836 fprintf(fp, "Initialized non-bonded Ewald correction tables, spacing: %.2e size: %d\n\n",
1837 1/ic->tabq_scale, ic->tabq_size);
1842 static void init_interaction_const(FILE *fp,
1843 const t_commrec gmx_unused *cr,
1844 interaction_const_t **interaction_const,
1845 const t_forcerec *fr,
1848 interaction_const_t *ic;
1849 gmx_bool bUsesSimpleTables = TRUE;
1853 /* Just allocate something so we can free it */
1854 snew_aligned(ic->tabq_coul_FDV0, 16, 32);
1855 snew_aligned(ic->tabq_coul_F, 16, 32);
1856 snew_aligned(ic->tabq_coul_V, 16, 32);
1858 ic->rlist = fr->rlist;
1859 ic->rlistlong = fr->rlistlong;
1862 ic->rvdw = fr->rvdw;
1863 if (fr->vdw_modifier == eintmodPOTSHIFT)
1865 ic->sh_invrc6 = pow(ic->rvdw, -6.0);
1872 /* Electrostatics */
1873 ic->eeltype = fr->eeltype;
1874 ic->rcoulomb = fr->rcoulomb;
1875 ic->epsilon_r = fr->epsilon_r;
1876 ic->epsfac = fr->epsfac;
1879 ic->ewaldcoeff_q = fr->ewaldcoeff_q;
1880 ic->ewaldcoeff_lj = fr->ewaldcoeff_lj;
1882 if (fr->coulomb_modifier == eintmodPOTSHIFT)
1884 ic->sh_ewald = gmx_erfc(ic->ewaldcoeff_q*ic->rcoulomb);
1891 /* Reaction-field */
1892 if (EEL_RF(ic->eeltype))
1894 ic->epsilon_rf = fr->epsilon_rf;
1895 ic->k_rf = fr->k_rf;
1896 ic->c_rf = fr->c_rf;
1900 /* For plain cut-off we might use the reaction-field kernels */
1901 ic->epsilon_rf = ic->epsilon_r;
1903 if (fr->coulomb_modifier == eintmodPOTSHIFT)
1905 ic->c_rf = 1/ic->rcoulomb;
1915 fprintf(fp, "Potential shift: LJ r^-12: %.3f r^-6 %.3f",
1916 sqr(ic->sh_invrc6), ic->sh_invrc6);
1917 if (ic->eeltype == eelCUT)
1919 fprintf(fp, ", Coulomb %.3f", ic->c_rf);
1921 else if (EEL_PME(ic->eeltype))
1923 fprintf(fp, ", Ewald %.3e", ic->sh_ewald);
1928 *interaction_const = ic;
1930 if (fr->nbv != NULL && fr->nbv->bUseGPU)
1932 nbnxn_cuda_init_const(fr->nbv->cu_nbv, ic, fr->nbv->grp);
1934 /* With tMPI + GPUs some ranks may be sharing GPU(s) and therefore
1935 * also sharing texture references. To keep the code simple, we don't
1936 * treat texture references as shared resources, but this means that
1937 * the coulomb_tab and nbfp texture refs will get updated by multiple threads.
1938 * Hence, to ensure that the non-bonded kernels don't start before all
1939 * texture binding operations are finished, we need to wait for all ranks
1940 * to arrive here before continuing.
1942 * Note that we could omit this barrier if GPUs are not shared (or
1943 * texture objects are used), but as this is initialization code, there
1944 * is not point in complicating things.
1946 #ifdef GMX_THREAD_MPI
1951 #endif /* GMX_THREAD_MPI */
1954 bUsesSimpleTables = uses_simple_tables(fr->cutoff_scheme, fr->nbv, -1);
1955 init_interaction_const_tables(fp, ic, bUsesSimpleTables, rtab);
1958 static void init_nb_verlet(FILE *fp,
1959 nonbonded_verlet_t **nb_verlet,
1960 const t_inputrec *ir,
1961 const t_forcerec *fr,
1962 const t_commrec *cr,
1963 const char *nbpu_opt)
1965 nonbonded_verlet_t *nbv;
1968 gmx_bool bEmulateGPU, bHybridGPURun = FALSE;
1970 nbnxn_alloc_t *nb_alloc;
1971 nbnxn_free_t *nb_free;
1975 pick_nbnxn_resources(cr, fr->hwinfo,
1983 nbv->ngrp = (DOMAINDECOMP(cr) ? 2 : 1);
1984 for (i = 0; i < nbv->ngrp; i++)
1986 nbv->grp[i].nbl_lists.nnbl = 0;
1987 nbv->grp[i].nbat = NULL;
1988 nbv->grp[i].kernel_type = nbnxnkNotSet;
1990 if (i == 0) /* local */
1992 pick_nbnxn_kernel(fp, cr, fr->use_cpu_acceleration,
1993 nbv->bUseGPU, bEmulateGPU, ir,
1994 &nbv->grp[i].kernel_type,
1995 &nbv->grp[i].ewald_excl,
1998 else /* non-local */
2000 if (nbpu_opt != NULL && strcmp(nbpu_opt, "gpu_cpu") == 0)
2002 /* Use GPU for local, select a CPU kernel for non-local */
2003 pick_nbnxn_kernel(fp, cr, fr->use_cpu_acceleration,
2005 &nbv->grp[i].kernel_type,
2006 &nbv->grp[i].ewald_excl,
2009 bHybridGPURun = TRUE;
2013 /* Use the same kernel for local and non-local interactions */
2014 nbv->grp[i].kernel_type = nbv->grp[0].kernel_type;
2015 nbv->grp[i].ewald_excl = nbv->grp[0].ewald_excl;
2022 /* init the NxN GPU data; the last argument tells whether we'll have
2023 * both local and non-local NB calculation on GPU */
2024 nbnxn_cuda_init(fp, &nbv->cu_nbv,
2025 &fr->hwinfo->gpu_info, fr->gpu_opt,
2026 cr->rank_pp_intranode,
2027 (nbv->ngrp > 1) && !bHybridGPURun);
2029 if ((env = getenv("GMX_NB_MIN_CI")) != NULL)
2033 nbv->min_ci_balanced = strtol(env, &end, 10);
2034 if (!end || (*end != 0) || nbv->min_ci_balanced <= 0)
2036 gmx_fatal(FARGS, "Invalid value passed in GMX_NB_MIN_CI=%s, positive integer required", env);
2041 fprintf(debug, "Neighbor-list balancing parameter: %d (passed as env. var.)\n",
2042 nbv->min_ci_balanced);
2047 nbv->min_ci_balanced = nbnxn_cuda_min_ci_balanced(nbv->cu_nbv);
2050 fprintf(debug, "Neighbor-list balancing parameter: %d (auto-adjusted to the number of GPU multi-processors)\n",
2051 nbv->min_ci_balanced);
2057 nbv->min_ci_balanced = 0;
2062 nbnxn_init_search(&nbv->nbs,
2063 DOMAINDECOMP(cr) ? &cr->dd->nc : NULL,
2064 DOMAINDECOMP(cr) ? domdec_zones(cr->dd) : NULL,
2065 gmx_omp_nthreads_get(emntNonbonded));
2067 for (i = 0; i < nbv->ngrp; i++)
2069 if (nbv->grp[0].kernel_type == nbnxnk8x8x8_CUDA)
2071 nb_alloc = &pmalloc;
2080 nbnxn_init_pairlist_set(&nbv->grp[i].nbl_lists,
2081 nbnxn_kernel_pairlist_simple(nbv->grp[i].kernel_type),
2082 /* 8x8x8 "non-simple" lists are ATM always combined */
2083 !nbnxn_kernel_pairlist_simple(nbv->grp[i].kernel_type),
2087 nbv->grp[0].kernel_type != nbv->grp[i].kernel_type)
2089 snew(nbv->grp[i].nbat, 1);
2090 nbnxn_atomdata_init(fp,
2092 nbv->grp[i].kernel_type,
2093 fr->ntype, fr->nbfp,
2095 nbnxn_kernel_pairlist_simple(nbv->grp[i].kernel_type) ? gmx_omp_nthreads_get(emntNonbonded) : 1,
2100 nbv->grp[i].nbat = nbv->grp[0].nbat;
2105 void init_forcerec(FILE *fp,
2106 const output_env_t oenv,
2109 const t_inputrec *ir,
2110 const gmx_mtop_t *mtop,
2111 const t_commrec *cr,
2117 const char *nbpu_opt,
2118 gmx_bool bNoSolvOpt,
2121 int i, j, m, natoms, ngrp, negp_pp, negptable, egi, egj;
2126 gmx_bool bGenericKernelOnly;
2127 gmx_bool bTab, bSep14tab, bNormalnblists;
2129 int *nm_ind, egp_flags;
2131 if (fr->hwinfo == NULL)
2133 /* Detect hardware, gather information.
2134 * In mdrun, hwinfo has already been set before calling init_forcerec.
2135 * Here we ignore GPUs, as tools will not use them anyhow.
2137 fr->hwinfo = gmx_detect_hardware(fp, cr, FALSE);
2140 /* By default we turn acceleration on, but it might be turned off further down... */
2141 fr->use_cpu_acceleration = TRUE;
2143 fr->bDomDec = DOMAINDECOMP(cr);
2145 natoms = mtop->natoms;
2147 if (check_box(ir->ePBC, box))
2149 gmx_fatal(FARGS, check_box(ir->ePBC, box));
2152 /* Test particle insertion ? */
2155 /* Set to the size of the molecule to be inserted (the last one) */
2156 /* Because of old style topologies, we have to use the last cg
2157 * instead of the last molecule type.
2159 cgs = &mtop->moltype[mtop->molblock[mtop->nmolblock-1].type].cgs;
2160 fr->n_tpi = cgs->index[cgs->nr] - cgs->index[cgs->nr-1];
2161 if (fr->n_tpi != mtop->mols.index[mtop->mols.nr] - mtop->mols.index[mtop->mols.nr-1])
2163 gmx_fatal(FARGS, "The molecule to insert can not consist of multiple charge groups.\nMake it a single charge group.");
2171 /* Copy AdResS parameters */
2174 fr->adress_type = ir->adress->type;
2175 fr->adress_const_wf = ir->adress->const_wf;
2176 fr->adress_ex_width = ir->adress->ex_width;
2177 fr->adress_hy_width = ir->adress->hy_width;
2178 fr->adress_icor = ir->adress->icor;
2179 fr->adress_site = ir->adress->site;
2180 fr->adress_ex_forcecap = ir->adress->ex_forcecap;
2181 fr->adress_do_hybridpairs = ir->adress->do_hybridpairs;
2184 snew(fr->adress_group_explicit, ir->adress->n_energy_grps);
2185 for (i = 0; i < ir->adress->n_energy_grps; i++)
2187 fr->adress_group_explicit[i] = ir->adress->group_explicit[i];
2190 fr->n_adress_tf_grps = ir->adress->n_tf_grps;
2191 snew(fr->adress_tf_table_index, fr->n_adress_tf_grps);
2192 for (i = 0; i < fr->n_adress_tf_grps; i++)
2194 fr->adress_tf_table_index[i] = ir->adress->tf_table_index[i];
2196 copy_rvec(ir->adress->refs, fr->adress_refs);
2200 fr->adress_type = eAdressOff;
2201 fr->adress_do_hybridpairs = FALSE;
2204 /* Copy the user determined parameters */
2205 fr->userint1 = ir->userint1;
2206 fr->userint2 = ir->userint2;
2207 fr->userint3 = ir->userint3;
2208 fr->userint4 = ir->userint4;
2209 fr->userreal1 = ir->userreal1;
2210 fr->userreal2 = ir->userreal2;
2211 fr->userreal3 = ir->userreal3;
2212 fr->userreal4 = ir->userreal4;
2215 fr->fc_stepsize = ir->fc_stepsize;
2218 fr->efep = ir->efep;
2219 fr->sc_alphavdw = ir->fepvals->sc_alpha;
2220 if (ir->fepvals->bScCoul)
2222 fr->sc_alphacoul = ir->fepvals->sc_alpha;
2223 fr->sc_sigma6_min = pow(ir->fepvals->sc_sigma_min, 6);
2227 fr->sc_alphacoul = 0;
2228 fr->sc_sigma6_min = 0; /* only needed when bScCoul is on */
2230 fr->sc_power = ir->fepvals->sc_power;
2231 fr->sc_r_power = ir->fepvals->sc_r_power;
2232 fr->sc_sigma6_def = pow(ir->fepvals->sc_sigma, 6);
2234 env = getenv("GMX_SCSIGMA_MIN");
2238 sscanf(env, "%lf", &dbl);
2239 fr->sc_sigma6_min = pow(dbl, 6);
2242 fprintf(fp, "Setting the minimum soft core sigma to %g nm\n", dbl);
2246 fr->bNonbonded = TRUE;
2247 if (getenv("GMX_NO_NONBONDED") != NULL)
2249 /* turn off non-bonded calculations */
2250 fr->bNonbonded = FALSE;
2251 md_print_warn(cr, fp,
2252 "Found environment variable GMX_NO_NONBONDED.\n"
2253 "Disabling nonbonded calculations.\n");
2256 bGenericKernelOnly = FALSE;
2258 /* We now check in the NS code whether a particular combination of interactions
2259 * can be used with water optimization, and disable it if that is not the case.
2262 if (getenv("GMX_NB_GENERIC") != NULL)
2267 "Found environment variable GMX_NB_GENERIC.\n"
2268 "Disabling all interaction-specific nonbonded kernels, will only\n"
2269 "use the slow generic ones in src/gmxlib/nonbonded/nb_generic.c\n\n");
2271 bGenericKernelOnly = TRUE;
2274 if (bGenericKernelOnly == TRUE)
2279 if ( (getenv("GMX_DISABLE_CPU_ACCELERATION") != NULL) || (getenv("GMX_NOOPTIMIZEDKERNELS") != NULL) )
2281 fr->use_cpu_acceleration = FALSE;
2285 "\nFound environment variable GMX_DISABLE_CPU_ACCELERATION.\n"
2286 "Disabling all CPU architecture-specific (e.g. SSE2/SSE4/AVX) routines.\n\n");
2290 fr->bBHAM = (mtop->ffparams.functype[0] == F_BHAM);
2292 /* Check if we can/should do all-vs-all kernels */
2293 fr->bAllvsAll = can_use_allvsall(ir, FALSE, NULL, NULL);
2294 fr->AllvsAll_work = NULL;
2295 fr->AllvsAll_workgb = NULL;
2297 /* All-vs-all kernels have not been implemented in 4.6, and
2298 * the SIMD group kernels are also buggy in this case. Non-accelerated
2299 * group kernels are OK. See Redmine #1249. */
2302 fr->bAllvsAll = FALSE;
2303 fr->use_cpu_acceleration = FALSE;
2307 "\nYour simulation settings would have triggered the efficient all-vs-all\n"
2308 "kernels in GROMACS 4.5, but these have not been implemented in GROMACS\n"
2309 "4.6. Also, we can't use the accelerated SIMD kernels here because\n"
2310 "of an unfixed bug. The reference C kernels are correct, though, so\n"
2311 "we are proceeding by disabling all CPU architecture-specific\n"
2312 "(e.g. SSE2/SSE4/AVX) routines. If performance is important, please\n"
2313 "use GROMACS 4.5.7 or try cutoff-scheme = Verlet.\n\n");
2317 /* Neighbour searching stuff */
2318 fr->cutoff_scheme = ir->cutoff_scheme;
2319 fr->bGrid = (ir->ns_type == ensGRID);
2320 fr->ePBC = ir->ePBC;
2322 /* Determine if we will do PBC for distances in bonded interactions */
2323 if (fr->ePBC == epbcNONE)
2325 fr->bMolPBC = FALSE;
2329 if (!DOMAINDECOMP(cr))
2331 /* The group cut-off scheme and SHAKE assume charge groups
2332 * are whole, but not using molpbc is faster in most cases.
2334 if (fr->cutoff_scheme == ecutsGROUP ||
2335 (ir->eConstrAlg == econtSHAKE &&
2336 (gmx_mtop_ftype_count(mtop, F_CONSTR) > 0 ||
2337 gmx_mtop_ftype_count(mtop, F_CONSTRNC) > 0)))
2339 fr->bMolPBC = ir->bPeriodicMols;
2344 if (getenv("GMX_USE_GRAPH") != NULL)
2346 fr->bMolPBC = FALSE;
2349 fprintf(fp, "\nGMX_MOLPBC is set, using the graph for bonded interactions\n\n");
2356 fr->bMolPBC = dd_bonded_molpbc(cr->dd, fr->ePBC);
2359 fr->bGB = (ir->implicit_solvent == eisGBSA);
2361 fr->rc_scaling = ir->refcoord_scaling;
2362 copy_rvec(ir->posres_com, fr->posres_com);
2363 copy_rvec(ir->posres_comB, fr->posres_comB);
2364 fr->rlist = cutoff_inf(ir->rlist);
2365 fr->rlistlong = cutoff_inf(ir->rlistlong);
2366 fr->eeltype = ir->coulombtype;
2367 fr->vdwtype = ir->vdwtype;
2368 fr->ljpme_combination_rule = ir->ljpme_combination_rule;
2370 fr->coulomb_modifier = ir->coulomb_modifier;
2371 fr->vdw_modifier = ir->vdw_modifier;
2373 /* Electrostatics: Translate from interaction-setting-in-mdp-file to kernel interaction format */
2374 switch (fr->eeltype)
2377 fr->nbkernel_elec_interaction = (fr->bGB) ? GMX_NBKERNEL_ELEC_GENERALIZEDBORN : GMX_NBKERNEL_ELEC_COULOMB;
2383 fr->nbkernel_elec_interaction = GMX_NBKERNEL_ELEC_REACTIONFIELD;
2387 fr->nbkernel_elec_interaction = GMX_NBKERNEL_ELEC_REACTIONFIELD;
2388 fr->coulomb_modifier = eintmodEXACTCUTOFF;
2397 case eelPMEUSERSWITCH:
2398 fr->nbkernel_elec_interaction = GMX_NBKERNEL_ELEC_CUBICSPLINETABLE;
2403 fr->nbkernel_elec_interaction = GMX_NBKERNEL_ELEC_EWALD;
2407 gmx_fatal(FARGS, "Unsupported electrostatic interaction: %s", eel_names[fr->eeltype]);
2411 /* Vdw: Translate from mdp settings to kernel format */
2412 switch (fr->vdwtype)
2418 fr->nbkernel_vdw_interaction = GMX_NBKERNEL_VDW_BUCKINGHAM;
2422 fr->nbkernel_vdw_interaction = GMX_NBKERNEL_VDW_LENNARDJONES;
2429 case evdwENCADSHIFT:
2430 fr->nbkernel_vdw_interaction = GMX_NBKERNEL_VDW_CUBICSPLINETABLE;
2434 gmx_fatal(FARGS, "Unsupported vdw interaction: %s", evdw_names[fr->vdwtype]);
2438 /* These start out identical to ir, but might be altered if we e.g. tabulate the interaction in the kernel */
2439 fr->nbkernel_elec_modifier = fr->coulomb_modifier;
2440 fr->nbkernel_vdw_modifier = fr->vdw_modifier;
2442 fr->bTwinRange = fr->rlistlong > fr->rlist;
2443 fr->bEwald = (EEL_PME(fr->eeltype) || fr->eeltype == eelEWALD);
2445 fr->reppow = mtop->ffparams.reppow;
2447 if (ir->cutoff_scheme == ecutsGROUP)
2449 fr->bvdwtab = (fr->vdwtype != evdwCUT ||
2450 !gmx_within_tol(fr->reppow, 12.0, 10*GMX_DOUBLE_EPS));
2451 /* We have special kernels for standard Ewald and PME, but the pme-switch ones are tabulated above */
2452 fr->bcoultab = !(fr->eeltype == eelCUT ||
2453 fr->eeltype == eelEWALD ||
2454 fr->eeltype == eelPME ||
2455 fr->eeltype == eelRF ||
2456 fr->eeltype == eelRF_ZERO);
2458 /* If the user absolutely wants different switch/shift settings for coul/vdw, it is likely
2459 * going to be faster to tabulate the interaction than calling the generic kernel.
2461 if (fr->nbkernel_elec_modifier == eintmodPOTSWITCH && fr->nbkernel_vdw_modifier == eintmodPOTSWITCH)
2463 if ((fr->rcoulomb_switch != fr->rvdw_switch) || (fr->rcoulomb != fr->rvdw))
2465 fr->bcoultab = TRUE;
2468 else if ((fr->nbkernel_elec_modifier == eintmodPOTSHIFT && fr->nbkernel_vdw_modifier == eintmodPOTSHIFT) ||
2469 ((fr->nbkernel_elec_interaction == GMX_NBKERNEL_ELEC_REACTIONFIELD &&
2470 fr->nbkernel_elec_modifier == eintmodEXACTCUTOFF &&
2471 (fr->nbkernel_vdw_modifier == eintmodPOTSWITCH || fr->nbkernel_vdw_modifier == eintmodPOTSHIFT))))
2473 if (fr->rcoulomb != fr->rvdw)
2475 fr->bcoultab = TRUE;
2479 if (getenv("GMX_REQUIRE_TABLES"))
2482 fr->bcoultab = TRUE;
2487 fprintf(fp, "Table routines are used for coulomb: %s\n", bool_names[fr->bcoultab]);
2488 fprintf(fp, "Table routines are used for vdw: %s\n", bool_names[fr->bvdwtab ]);
2491 if (fr->bvdwtab == TRUE)
2493 fr->nbkernel_vdw_interaction = GMX_NBKERNEL_VDW_CUBICSPLINETABLE;
2494 fr->nbkernel_vdw_modifier = eintmodNONE;
2496 if (fr->bcoultab == TRUE)
2498 fr->nbkernel_elec_interaction = GMX_NBKERNEL_ELEC_CUBICSPLINETABLE;
2499 fr->nbkernel_elec_modifier = eintmodNONE;
2503 if (ir->cutoff_scheme == ecutsVERLET)
2505 if (!gmx_within_tol(fr->reppow, 12.0, 10*GMX_DOUBLE_EPS))
2507 gmx_fatal(FARGS, "Cut-off scheme %S only supports LJ repulsion power 12", ecutscheme_names[ir->cutoff_scheme]);
2509 fr->bvdwtab = FALSE;
2510 fr->bcoultab = FALSE;
2513 /* Tables are used for direct ewald sum */
2516 if (EEL_PME(ir->coulombtype))
2520 fprintf(fp, "Will do PME sum in reciprocal space for electrostatic interactions.\n");
2522 if (ir->coulombtype == eelP3M_AD)
2524 please_cite(fp, "Hockney1988");
2525 please_cite(fp, "Ballenegger2012");
2529 please_cite(fp, "Essmann95a");
2532 if (ir->ewald_geometry == eewg3DC)
2536 fprintf(fp, "Using the Ewald3DC correction for systems with a slab geometry.\n");
2538 please_cite(fp, "In-Chul99a");
2541 fr->ewaldcoeff_q = calc_ewaldcoeff_q(ir->rcoulomb, ir->ewald_rtol);
2542 init_ewald_tab(&(fr->ewald_table), ir, fp);
2545 fprintf(fp, "Using a Gaussian width (1/beta) of %g nm for Ewald\n",
2546 1/fr->ewaldcoeff_q);
2550 if (EVDW_PME(ir->vdwtype))
2554 fprintf(fp, "Will do PME sum in reciprocal space for LJ dispersion interactions.\n");
2556 please_cite(fp, "Essman95a");
2557 fr->ewaldcoeff_lj = calc_ewaldcoeff_lj(ir->rvdw, ir->ewald_rtol_lj);
2560 fprintf(fp, "Using a Gaussian width (1/beta) of %g nm for LJ Ewald\n",
2561 1/fr->ewaldcoeff_lj);
2565 /* Electrostatics */
2566 fr->epsilon_r = ir->epsilon_r;
2567 fr->epsilon_rf = ir->epsilon_rf;
2568 fr->fudgeQQ = mtop->ffparams.fudgeQQ;
2569 fr->rcoulomb_switch = ir->rcoulomb_switch;
2570 fr->rcoulomb = cutoff_inf(ir->rcoulomb);
2572 /* Parameters for generalized RF */
2576 if (fr->eeltype == eelGRF)
2578 init_generalized_rf(fp, mtop, ir, fr);
2581 fr->bF_NoVirSum = (EEL_FULL(fr->eeltype) || EVDW_PME(fr->vdwtype) ||
2582 gmx_mtop_ftype_count(mtop, F_POSRES) > 0 ||
2583 gmx_mtop_ftype_count(mtop, F_FBPOSRES) > 0 ||
2584 IR_ELEC_FIELD(*ir) ||
2585 (fr->adress_icor != eAdressICOff)
2588 if (fr->cutoff_scheme == ecutsGROUP &&
2589 ncg_mtop(mtop) > fr->cg_nalloc && !DOMAINDECOMP(cr))
2591 /* Count the total number of charge groups */
2592 fr->cg_nalloc = ncg_mtop(mtop);
2593 srenew(fr->cg_cm, fr->cg_nalloc);
2595 if (fr->shift_vec == NULL)
2597 snew(fr->shift_vec, SHIFTS);
2600 if (fr->fshift == NULL)
2602 snew(fr->fshift, SHIFTS);
2605 if (fr->nbfp == NULL)
2607 fr->ntype = mtop->ffparams.atnr;
2608 fr->nbfp = mk_nbfp(&mtop->ffparams, fr->bBHAM);
2611 /* Copy the energy group exclusions */
2612 fr->egp_flags = ir->opts.egp_flags;
2614 /* Van der Waals stuff */
2615 fr->rvdw = cutoff_inf(ir->rvdw);
2616 fr->rvdw_switch = ir->rvdw_switch;
2617 if ((fr->vdwtype != evdwCUT) && (fr->vdwtype != evdwUSER) && !fr->bBHAM)
2619 if (fr->rvdw_switch >= fr->rvdw)
2621 gmx_fatal(FARGS, "rvdw_switch (%f) must be < rvdw (%f)",
2622 fr->rvdw_switch, fr->rvdw);
2626 fprintf(fp, "Using %s Lennard-Jones, switch between %g and %g nm\n",
2627 (fr->eeltype == eelSWITCH) ? "switched" : "shifted",
2628 fr->rvdw_switch, fr->rvdw);
2632 if (fr->bBHAM && EVDW_PME(fr->vdwtype))
2634 gmx_fatal(FARGS, "LJ PME not supported with Buckingham");
2637 if (fr->bBHAM && (fr->vdwtype == evdwSHIFT || fr->vdwtype == evdwSWITCH))
2639 gmx_fatal(FARGS, "Switch/shift interaction not supported with Buckingham");
2644 fprintf(fp, "Cut-off's: NS: %g Coulomb: %g %s: %g\n",
2645 fr->rlist, fr->rcoulomb, fr->bBHAM ? "BHAM" : "LJ", fr->rvdw);
2648 fr->eDispCorr = ir->eDispCorr;
2649 if (ir->eDispCorr != edispcNO)
2651 set_avcsixtwelve(fp, fr, mtop);
2656 set_bham_b_max(fp, fr, mtop);
2659 fr->gb_epsilon_solvent = ir->gb_epsilon_solvent;
2661 /* Copy the GBSA data (radius, volume and surftens for each
2662 * atomtype) from the topology atomtype section to forcerec.
2664 snew(fr->atype_radius, fr->ntype);
2665 snew(fr->atype_vol, fr->ntype);
2666 snew(fr->atype_surftens, fr->ntype);
2667 snew(fr->atype_gb_radius, fr->ntype);
2668 snew(fr->atype_S_hct, fr->ntype);
2670 if (mtop->atomtypes.nr > 0)
2672 for (i = 0; i < fr->ntype; i++)
2674 fr->atype_radius[i] = mtop->atomtypes.radius[i];
2676 for (i = 0; i < fr->ntype; i++)
2678 fr->atype_vol[i] = mtop->atomtypes.vol[i];
2680 for (i = 0; i < fr->ntype; i++)
2682 fr->atype_surftens[i] = mtop->atomtypes.surftens[i];
2684 for (i = 0; i < fr->ntype; i++)
2686 fr->atype_gb_radius[i] = mtop->atomtypes.gb_radius[i];
2688 for (i = 0; i < fr->ntype; i++)
2690 fr->atype_S_hct[i] = mtop->atomtypes.S_hct[i];
2694 /* Generate the GB table if needed */
2698 fr->gbtabscale = 2000;
2700 fr->gbtabscale = 500;
2704 fr->gbtab = make_gb_table(oenv, fr);
2706 init_gb(&fr->born, cr, fr, ir, mtop, ir->gb_algorithm);
2708 /* Copy local gb data (for dd, this is done in dd_partition_system) */
2709 if (!DOMAINDECOMP(cr))
2711 make_local_gb(cr, fr->born, ir->gb_algorithm);
2715 /* Set the charge scaling */
2716 if (fr->epsilon_r != 0)
2718 fr->epsfac = ONE_4PI_EPS0/fr->epsilon_r;
2722 /* eps = 0 is infinite dieletric: no coulomb interactions */
2726 /* Reaction field constants */
2727 if (EEL_RF(fr->eeltype))
2729 calc_rffac(fp, fr->eeltype, fr->epsilon_r, fr->epsilon_rf,
2730 fr->rcoulomb, fr->temp, fr->zsquare, box,
2731 &fr->kappa, &fr->k_rf, &fr->c_rf);
2734 /*This now calculates sum for q and c6*/
2735 set_chargesum(fp, fr, mtop);
2737 /* if we are using LR electrostatics, and they are tabulated,
2738 * the tables will contain modified coulomb interactions.
2739 * Since we want to use the non-shifted ones for 1-4
2740 * coulombic interactions, we must have an extra set of tables.
2743 /* Construct tables.
2744 * A little unnecessary to make both vdw and coul tables sometimes,
2745 * but what the heck... */
2747 bTab = fr->bcoultab || fr->bvdwtab || fr->bEwald;
2749 bSep14tab = ((!bTab || fr->eeltype != eelCUT || fr->vdwtype != evdwCUT ||
2750 fr->bBHAM || fr->bEwald) &&
2751 (gmx_mtop_ftype_count(mtop, F_LJ14) > 0 ||
2752 gmx_mtop_ftype_count(mtop, F_LJC14_Q) > 0 ||
2753 gmx_mtop_ftype_count(mtop, F_LJC_PAIRS_NB) > 0));
2755 negp_pp = ir->opts.ngener - ir->nwall;
2759 bNormalnblists = TRUE;
2764 bNormalnblists = (ir->eDispCorr != edispcNO);
2765 for (egi = 0; egi < negp_pp; egi++)
2767 for (egj = egi; egj < negp_pp; egj++)
2769 egp_flags = ir->opts.egp_flags[GID(egi, egj, ir->opts.ngener)];
2770 if (!(egp_flags & EGP_EXCL))
2772 if (egp_flags & EGP_TABLE)
2778 bNormalnblists = TRUE;
2785 fr->nnblists = negptable + 1;
2789 fr->nnblists = negptable;
2791 if (fr->nnblists > 1)
2793 snew(fr->gid2nblists, ir->opts.ngener*ir->opts.ngener);
2802 snew(fr->nblists, fr->nnblists);
2804 /* This code automatically gives table length tabext without cut-off's,
2805 * in that case grompp should already have checked that we do not need
2806 * normal tables and we only generate tables for 1-4 interactions.
2808 rtab = ir->rlistlong + ir->tabext;
2812 /* make tables for ordinary interactions */
2815 make_nbf_tables(fp, oenv, fr, rtab, cr, tabfn, NULL, NULL, &fr->nblists[0]);
2818 make_nbf_tables(fp, oenv, fr, rtab, cr, tabfn, NULL, NULL, &fr->nblists[fr->nnblists/2]);
2822 fr->tab14 = fr->nblists[0].table_elec_vdw;
2832 /* Read the special tables for certain energy group pairs */
2833 nm_ind = mtop->groups.grps[egcENER].nm_ind;
2834 for (egi = 0; egi < negp_pp; egi++)
2836 for (egj = egi; egj < negp_pp; egj++)
2838 egp_flags = ir->opts.egp_flags[GID(egi, egj, ir->opts.ngener)];
2839 if ((egp_flags & EGP_TABLE) && !(egp_flags & EGP_EXCL))
2841 nbl = &(fr->nblists[m]);
2842 if (fr->nnblists > 1)
2844 fr->gid2nblists[GID(egi, egj, ir->opts.ngener)] = m;
2846 /* Read the table file with the two energy groups names appended */
2847 make_nbf_tables(fp, oenv, fr, rtab, cr, tabfn,
2848 *mtop->groups.grpname[nm_ind[egi]],
2849 *mtop->groups.grpname[nm_ind[egj]],
2853 make_nbf_tables(fp, oenv, fr, rtab, cr, tabfn,
2854 *mtop->groups.grpname[nm_ind[egi]],
2855 *mtop->groups.grpname[nm_ind[egj]],
2856 &fr->nblists[fr->nnblists/2+m]);
2860 else if (fr->nnblists > 1)
2862 fr->gid2nblists[GID(egi, egj, ir->opts.ngener)] = 0;
2870 /* generate extra tables with plain Coulomb for 1-4 interactions only */
2871 fr->tab14 = make_tables(fp, oenv, fr, MASTER(cr), tabpfn, rtab,
2872 GMX_MAKETABLES_14ONLY);
2875 /* Read AdResS Thermo Force table if needed */
2876 if (fr->adress_icor == eAdressICThermoForce)
2878 /* old todo replace */
2880 if (ir->adress->n_tf_grps > 0)
2882 make_adress_tf_tables(fp, oenv, fr, ir, tabfn, mtop, box);
2887 /* load the default table */
2888 snew(fr->atf_tabs, 1);
2889 fr->atf_tabs[DEFAULT_TF_TABLE] = make_atf_table(fp, oenv, fr, tabafn, box);
2894 fr->nwall = ir->nwall;
2895 if (ir->nwall && ir->wall_type == ewtTABLE)
2897 make_wall_tables(fp, oenv, ir, tabfn, &mtop->groups, fr);
2902 fcd->bondtab = make_bonded_tables(fp,
2903 F_TABBONDS, F_TABBONDSNC,
2905 fcd->angletab = make_bonded_tables(fp,
2908 fcd->dihtab = make_bonded_tables(fp,
2916 fprintf(debug, "No fcdata or table file name passed, can not read table, can not do bonded interactions\n");
2920 /* QM/MM initialization if requested
2924 fprintf(stderr, "QM/MM calculation requested.\n");
2927 fr->bQMMM = ir->bQMMM;
2928 fr->qr = mk_QMMMrec();
2930 /* Set all the static charge group info */
2931 fr->cginfo_mb = init_cginfo_mb(fp, mtop, fr, bNoSolvOpt,
2932 &fr->bExcl_IntraCGAll_InterCGNone);
2933 if (DOMAINDECOMP(cr))
2939 fr->cginfo = cginfo_expand(mtop->nmolblock, fr->cginfo_mb);
2942 if (!DOMAINDECOMP(cr))
2944 /* When using particle decomposition, the effect of the second argument,
2945 * which sets fr->hcg, is corrected later in do_md and init_em.
2947 forcerec_set_ranges(fr, ncg_mtop(mtop), ncg_mtop(mtop),
2948 mtop->natoms, mtop->natoms, mtop->natoms);
2951 fr->print_force = print_force;
2954 /* coarse load balancing vars */
2959 /* Initialize neighbor search */
2960 init_ns(fp, cr, &fr->ns, fr, mtop);
2962 if (cr->duty & DUTY_PP)
2964 gmx_nonbonded_setup(fr, bGenericKernelOnly);
2968 gmx_setup_adress_kernels(fp,bGenericKernelOnly);
2973 /* Initialize the thread working data for bonded interactions */
2974 init_forcerec_f_threads(fr, mtop->groups.grps[egcENER].nr);
2976 snew(fr->excl_load, fr->nthreads+1);
2978 if (fr->cutoff_scheme == ecutsVERLET)
2980 if (ir->rcoulomb != ir->rvdw)
2982 gmx_fatal(FARGS, "With Verlet lists rcoulomb and rvdw should be identical");
2985 init_nb_verlet(fp, &fr->nbv, ir, fr, cr, nbpu_opt);
2988 /* fr->ic is used both by verlet and group kernels (to some extent) now */
2989 init_interaction_const(fp, cr, &fr->ic, fr, rtab);
2991 if (ir->eDispCorr != edispcNO)
2993 calc_enervirdiff(fp, ir->eDispCorr, fr);
2997 #define pr_real(fp, r) fprintf(fp, "%s: %e\n",#r, r)
2998 #define pr_int(fp, i) fprintf((fp), "%s: %d\n",#i, i)
2999 #define pr_bool(fp, b) fprintf((fp), "%s: %s\n",#b, bool_names[b])
3001 void pr_forcerec(FILE *fp, t_forcerec *fr)
3005 pr_real(fp, fr->rlist);
3006 pr_real(fp, fr->rcoulomb);
3007 pr_real(fp, fr->fudgeQQ);
3008 pr_bool(fp, fr->bGrid);
3009 pr_bool(fp, fr->bTwinRange);
3010 /*pr_int(fp,fr->cg0);
3011 pr_int(fp,fr->hcg);*/
3012 for (i = 0; i < fr->nnblists; i++)
3014 pr_int(fp, fr->nblists[i].table_elec_vdw.n);
3016 pr_real(fp, fr->rcoulomb_switch);
3017 pr_real(fp, fr->rcoulomb);
3022 void forcerec_set_excl_load(t_forcerec *fr,
3023 const gmx_localtop_t *top, const t_commrec *cr)
3026 int t, i, j, ntot, n, ntarget;
3028 if (cr != NULL && PARTDECOMP(cr))
3030 /* No OpenMP with particle decomposition */
3038 ind = top->excls.index;
3042 for (i = 0; i < top->excls.nr; i++)
3044 for (j = ind[i]; j < ind[i+1]; j++)
3053 fr->excl_load[0] = 0;
3056 for (t = 1; t <= fr->nthreads; t++)
3058 ntarget = (ntot*t)/fr->nthreads;
3059 while (i < top->excls.nr && n < ntarget)
3061 for (j = ind[i]; j < ind[i+1]; j++)
3070 fr->excl_load[t] = i;