2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5 * Copyright (c) 2001-2004, The GROMACS development team.
6 * Copyright (c) 2013,2014, by the GROMACS development team, led by
7 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
8 * and including many others, as listed in the AUTHORS file in the
9 * top-level source directory and at http://www.gromacs.org.
11 * GROMACS is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public License
13 * as published by the Free Software Foundation; either version 2.1
14 * of the License, or (at your option) any later version.
16 * GROMACS is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Lesser General Public License for more details.
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with GROMACS; if not, see
23 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
24 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
26 * If you want to redistribute modifications to GROMACS, please
27 * consider that scientific software is very special. Version
28 * control is crucial - bugs must be traceable. We will be happy to
29 * consider code for inclusion in the official distribution, but
30 * derived work must not be called official GROMACS. Details are found
31 * in the README & COPYING files - if they are missing, get the
32 * official version at http://www.gromacs.org.
34 * To help us fund GROMACS development, we humbly ask that you cite
35 * the research papers on the package. Check out http://www.gromacs.org.
47 #include "gromacs/math/utilities.h"
51 #include "gmx_fatal.h"
52 #include "gmx_fatal_collective.h"
56 #include "nonbonded.h"
65 #include "md_support.h"
66 #include "md_logging.h"
70 #include "mtop_util.h"
71 #include "nbnxn_search.h"
72 #include "nbnxn_atomdata.h"
73 #include "nbnxn_consts.h"
74 #include "gmx_omp_nthreads.h"
75 #include "gmx_detect_hardware.h"
79 /* MSVC definition for __cpuid() */
83 #include "types/nbnxn_cuda_types_ext.h"
84 #include "gpu_utils.h"
85 #include "nbnxn_cuda_data_mgmt.h"
86 #include "pmalloc_cuda.h"
88 t_forcerec *mk_forcerec(void)
98 static void pr_nbfp(FILE *fp, real *nbfp, gmx_bool bBHAM, int atnr)
102 for (i = 0; (i < atnr); i++)
104 for (j = 0; (j < atnr); j++)
106 fprintf(fp, "%2d - %2d", i, j);
109 fprintf(fp, " a=%10g, b=%10g, c=%10g\n", BHAMA(nbfp, atnr, i, j),
110 BHAMB(nbfp, atnr, i, j), BHAMC(nbfp, atnr, i, j)/6.0);
114 fprintf(fp, " c6=%10g, c12=%10g\n", C6(nbfp, atnr, i, j)/6.0,
115 C12(nbfp, atnr, i, j)/12.0);
122 static real *mk_nbfp(const gmx_ffparams_t *idef, gmx_bool bBHAM)
130 snew(nbfp, 3*atnr*atnr);
131 for (i = k = 0; (i < atnr); i++)
133 for (j = 0; (j < atnr); j++, k++)
135 BHAMA(nbfp, atnr, i, j) = idef->iparams[k].bham.a;
136 BHAMB(nbfp, atnr, i, j) = idef->iparams[k].bham.b;
137 /* nbfp now includes the 6.0 derivative prefactor */
138 BHAMC(nbfp, atnr, i, j) = idef->iparams[k].bham.c*6.0;
144 snew(nbfp, 2*atnr*atnr);
145 for (i = k = 0; (i < atnr); i++)
147 for (j = 0; (j < atnr); j++, k++)
149 /* nbfp now includes the 6.0/12.0 derivative prefactors */
150 C6(nbfp, atnr, i, j) = idef->iparams[k].lj.c6*6.0;
151 C12(nbfp, atnr, i, j) = idef->iparams[k].lj.c12*12.0;
159 static real *mk_nbfp_combination_rule(const gmx_ffparams_t *idef, int comb_rule)
163 real c6i, c6j, c12i, c12j, epsi, epsj, sigmai, sigmaj;
167 snew(nbfp, 2*atnr*atnr);
168 for (i = 0; i < atnr; ++i)
170 for (j = 0; j < atnr; ++j)
172 c6i = idef->iparams[i*(atnr+1)].lj.c6;
173 c12i = idef->iparams[i*(atnr+1)].lj.c12;
174 c6j = idef->iparams[j*(atnr+1)].lj.c6;
175 c12j = idef->iparams[j*(atnr+1)].lj.c12;
176 c6 = sqrt(c6i * c6j);
177 c12 = sqrt(c12i * c12j);
178 if (comb_rule == eCOMB_ARITHMETIC
179 && !gmx_numzero(c6) && !gmx_numzero(c12))
181 sigmai = pow(c12i / c6i, 1.0/6.0);
182 sigmaj = pow(c12j / c6j, 1.0/6.0);
183 epsi = c6i * c6i / c12i;
184 epsj = c6j * c6j / c12j;
185 c6 = epsi * epsj * pow(0.5*(sigmai+sigmaj), 6);
186 c12 = epsi * epsj * pow(0.5*(sigmai+sigmaj), 12);
188 C6(nbfp, atnr, i, j) = c6*6.0;
189 C12(nbfp, atnr, i, j) = c12*12.0;
195 /* This routine sets fr->solvent_opt to the most common solvent in the
196 * system, e.g. esolSPC or esolTIP4P. It will also mark each charge group in
197 * the fr->solvent_type array with the correct type (or esolNO).
199 * Charge groups that fulfill the conditions but are not identical to the
200 * most common one will be marked as esolNO in the solvent_type array.
202 * TIP3p is identical to SPC for these purposes, so we call it
203 * SPC in the arrays (Apologies to Bill Jorgensen ;-)
205 * NOTE: QM particle should not
206 * become an optimized solvent. Not even if there is only one charge
216 } solvent_parameters_t;
219 check_solvent_cg(const gmx_moltype_t *molt,
222 const unsigned char *qm_grpnr,
223 const t_grps *qm_grps,
225 int *n_solvent_parameters,
226 solvent_parameters_t **solvent_parameters_p,
230 const t_blocka * excl;
241 solvent_parameters_t *solvent_parameters;
243 /* We use a list with parameters for each solvent type.
244 * Every time we discover a new molecule that fulfills the basic
245 * conditions for a solvent we compare with the previous entries
246 * in these lists. If the parameters are the same we just increment
247 * the counter for that type, and otherwise we create a new type
248 * based on the current molecule.
250 * Once we've finished going through all molecules we check which
251 * solvent is most common, and mark all those molecules while we
252 * clear the flag on all others.
255 solvent_parameters = *solvent_parameters_p;
257 /* Mark the cg first as non optimized */
260 /* Check if this cg has no exclusions with atoms in other charge groups
261 * and all atoms inside the charge group excluded.
262 * We only have 3 or 4 atom solvent loops.
264 if (GET_CGINFO_EXCL_INTER(cginfo) ||
265 !GET_CGINFO_EXCL_INTRA(cginfo))
270 /* Get the indices of the first atom in this charge group */
271 j0 = molt->cgs.index[cg0];
272 j1 = molt->cgs.index[cg0+1];
274 /* Number of atoms in our molecule */
280 "Moltype '%s': there are %d atoms in this charge group\n",
284 /* Check if it could be an SPC (3 atoms) or TIP4p (4) water,
287 if (nj < 3 || nj > 4)
292 /* Check if we are doing QM on this group */
294 if (qm_grpnr != NULL)
296 for (j = j0; j < j1 && !qm; j++)
298 qm = (qm_grpnr[j] < qm_grps->nr - 1);
301 /* Cannot use solvent optimization with QM */
307 atom = molt->atoms.atom;
309 /* Still looks like a solvent, time to check parameters */
311 /* If it is perturbed (free energy) we can't use the solvent loops,
312 * so then we just skip to the next molecule.
316 for (j = j0; j < j1 && !perturbed; j++)
318 perturbed = PERTURBED(atom[j]);
326 /* Now it's only a question if the VdW and charge parameters
327 * are OK. Before doing the check we compare and see if they are
328 * identical to a possible previous solvent type.
329 * First we assign the current types and charges.
331 for (j = 0; j < nj; j++)
333 tmp_vdwtype[j] = atom[j0+j].type;
334 tmp_charge[j] = atom[j0+j].q;
337 /* Does it match any previous solvent type? */
338 for (k = 0; k < *n_solvent_parameters; k++)
343 /* We can only match SPC with 3 atoms and TIP4p with 4 atoms */
344 if ( (solvent_parameters[k].model == esolSPC && nj != 3) ||
345 (solvent_parameters[k].model == esolTIP4P && nj != 4) )
350 /* Check that types & charges match for all atoms in molecule */
351 for (j = 0; j < nj && match == TRUE; j++)
353 if (tmp_vdwtype[j] != solvent_parameters[k].vdwtype[j])
357 if (tmp_charge[j] != solvent_parameters[k].charge[j])
364 /* Congratulations! We have a matched solvent.
365 * Flag it with this type for later processing.
368 solvent_parameters[k].count += nmol;
370 /* We are done with this charge group */
375 /* If we get here, we have a tentative new solvent type.
376 * Before we add it we must check that it fulfills the requirements
377 * of the solvent optimized loops. First determine which atoms have
380 for (j = 0; j < nj; j++)
383 tjA = tmp_vdwtype[j];
385 /* Go through all other tpes and see if any have non-zero
386 * VdW parameters when combined with this one.
388 for (k = 0; k < fr->ntype && (has_vdw[j] == FALSE); k++)
390 /* We already checked that the atoms weren't perturbed,
391 * so we only need to check state A now.
395 has_vdw[j] = (has_vdw[j] ||
396 (BHAMA(fr->nbfp, fr->ntype, tjA, k) != 0.0) ||
397 (BHAMB(fr->nbfp, fr->ntype, tjA, k) != 0.0) ||
398 (BHAMC(fr->nbfp, fr->ntype, tjA, k) != 0.0));
403 has_vdw[j] = (has_vdw[j] ||
404 (C6(fr->nbfp, fr->ntype, tjA, k) != 0.0) ||
405 (C12(fr->nbfp, fr->ntype, tjA, k) != 0.0));
410 /* Now we know all we need to make the final check and assignment. */
414 * For this we require thatn all atoms have charge,
415 * the charges on atom 2 & 3 should be the same, and only
416 * atom 1 might have VdW.
418 if (has_vdw[1] == FALSE &&
419 has_vdw[2] == FALSE &&
420 tmp_charge[0] != 0 &&
421 tmp_charge[1] != 0 &&
422 tmp_charge[2] == tmp_charge[1])
424 srenew(solvent_parameters, *n_solvent_parameters+1);
425 solvent_parameters[*n_solvent_parameters].model = esolSPC;
426 solvent_parameters[*n_solvent_parameters].count = nmol;
427 for (k = 0; k < 3; k++)
429 solvent_parameters[*n_solvent_parameters].vdwtype[k] = tmp_vdwtype[k];
430 solvent_parameters[*n_solvent_parameters].charge[k] = tmp_charge[k];
433 *cg_sp = *n_solvent_parameters;
434 (*n_solvent_parameters)++;
439 /* Or could it be a TIP4P?
440 * For this we require thatn atoms 2,3,4 have charge, but not atom 1.
441 * Only atom 1 mght have VdW.
443 if (has_vdw[1] == FALSE &&
444 has_vdw[2] == FALSE &&
445 has_vdw[3] == FALSE &&
446 tmp_charge[0] == 0 &&
447 tmp_charge[1] != 0 &&
448 tmp_charge[2] == tmp_charge[1] &&
451 srenew(solvent_parameters, *n_solvent_parameters+1);
452 solvent_parameters[*n_solvent_parameters].model = esolTIP4P;
453 solvent_parameters[*n_solvent_parameters].count = nmol;
454 for (k = 0; k < 4; k++)
456 solvent_parameters[*n_solvent_parameters].vdwtype[k] = tmp_vdwtype[k];
457 solvent_parameters[*n_solvent_parameters].charge[k] = tmp_charge[k];
460 *cg_sp = *n_solvent_parameters;
461 (*n_solvent_parameters)++;
465 *solvent_parameters_p = solvent_parameters;
469 check_solvent(FILE * fp,
470 const gmx_mtop_t * mtop,
472 cginfo_mb_t *cginfo_mb)
475 const t_block * mols;
476 const gmx_moltype_t *molt;
477 int mb, mol, cg_mol, at_offset, cg_offset, am, cgm, i, nmol_ch, nmol;
478 int n_solvent_parameters;
479 solvent_parameters_t *solvent_parameters;
485 fprintf(debug, "Going to determine what solvent types we have.\n");
490 n_solvent_parameters = 0;
491 solvent_parameters = NULL;
492 /* Allocate temporary array for solvent type */
493 snew(cg_sp, mtop->nmolblock);
497 for (mb = 0; mb < mtop->nmolblock; mb++)
499 molt = &mtop->moltype[mtop->molblock[mb].type];
501 /* Here we have to loop over all individual molecules
502 * because we need to check for QMMM particles.
504 snew(cg_sp[mb], cginfo_mb[mb].cg_mod);
505 nmol_ch = cginfo_mb[mb].cg_mod/cgs->nr;
506 nmol = mtop->molblock[mb].nmol/nmol_ch;
507 for (mol = 0; mol < nmol_ch; mol++)
510 am = mol*cgs->index[cgs->nr];
511 for (cg_mol = 0; cg_mol < cgs->nr; cg_mol++)
513 check_solvent_cg(molt, cg_mol, nmol,
514 mtop->groups.grpnr[egcQMMM] ?
515 mtop->groups.grpnr[egcQMMM]+at_offset+am : 0,
516 &mtop->groups.grps[egcQMMM],
518 &n_solvent_parameters, &solvent_parameters,
519 cginfo_mb[mb].cginfo[cgm+cg_mol],
520 &cg_sp[mb][cgm+cg_mol]);
523 cg_offset += cgs->nr;
524 at_offset += cgs->index[cgs->nr];
527 /* Puh! We finished going through all charge groups.
528 * Now find the most common solvent model.
531 /* Most common solvent this far */
533 for (i = 0; i < n_solvent_parameters; i++)
536 solvent_parameters[i].count > solvent_parameters[bestsp].count)
544 bestsol = solvent_parameters[bestsp].model;
551 #ifdef DISABLE_WATER_NLIST
556 for (mb = 0; mb < mtop->nmolblock; mb++)
558 cgs = &mtop->moltype[mtop->molblock[mb].type].cgs;
559 nmol = (mtop->molblock[mb].nmol*cgs->nr)/cginfo_mb[mb].cg_mod;
560 for (i = 0; i < cginfo_mb[mb].cg_mod; i++)
562 if (cg_sp[mb][i] == bestsp)
564 SET_CGINFO_SOLOPT(cginfo_mb[mb].cginfo[i], bestsol);
569 SET_CGINFO_SOLOPT(cginfo_mb[mb].cginfo[i], esolNO);
576 if (bestsol != esolNO && fp != NULL)
578 fprintf(fp, "\nEnabling %s-like water optimization for %d molecules.\n\n",
580 solvent_parameters[bestsp].count);
583 sfree(solvent_parameters);
584 fr->solvent_opt = bestsol;
588 acNONE = 0, acCONSTRAINT, acSETTLE
591 static cginfo_mb_t *init_cginfo_mb(FILE *fplog, const gmx_mtop_t *mtop,
592 t_forcerec *fr, gmx_bool bNoSolvOpt,
593 gmx_bool *bExcl_IntraCGAll_InterCGNone)
596 const t_blocka *excl;
597 const gmx_moltype_t *molt;
598 const gmx_molblock_t *molb;
599 cginfo_mb_t *cginfo_mb;
602 int cg_offset, a_offset, cgm, am;
603 int mb, m, ncg_tot, cg, a0, a1, gid, ai, j, aj, excl_nalloc;
607 gmx_bool bId, *bExcl, bExclIntraAll, bExclInter, bHaveVDW, bHaveQ;
609 ncg_tot = ncg_mtop(mtop);
610 snew(cginfo_mb, mtop->nmolblock);
612 snew(type_VDW, fr->ntype);
613 for (ai = 0; ai < fr->ntype; ai++)
615 type_VDW[ai] = FALSE;
616 for (j = 0; j < fr->ntype; j++)
618 type_VDW[ai] = type_VDW[ai] ||
620 C6(fr->nbfp, fr->ntype, ai, j) != 0 ||
621 C12(fr->nbfp, fr->ntype, ai, j) != 0;
625 *bExcl_IntraCGAll_InterCGNone = TRUE;
628 snew(bExcl, excl_nalloc);
631 for (mb = 0; mb < mtop->nmolblock; mb++)
633 molb = &mtop->molblock[mb];
634 molt = &mtop->moltype[molb->type];
638 /* Check if the cginfo is identical for all molecules in this block.
639 * If so, we only need an array of the size of one molecule.
640 * Otherwise we make an array of #mol times #cgs per molecule.
644 for (m = 0; m < molb->nmol; m++)
646 am = m*cgs->index[cgs->nr];
647 for (cg = 0; cg < cgs->nr; cg++)
650 a1 = cgs->index[cg+1];
651 if (ggrpnr(&mtop->groups, egcENER, a_offset+am+a0) !=
652 ggrpnr(&mtop->groups, egcENER, a_offset +a0))
656 if (mtop->groups.grpnr[egcQMMM] != NULL)
658 for (ai = a0; ai < a1; ai++)
660 if (mtop->groups.grpnr[egcQMMM][a_offset+am+ai] !=
661 mtop->groups.grpnr[egcQMMM][a_offset +ai])
670 cginfo_mb[mb].cg_start = cg_offset;
671 cginfo_mb[mb].cg_end = cg_offset + molb->nmol*cgs->nr;
672 cginfo_mb[mb].cg_mod = (bId ? 1 : molb->nmol)*cgs->nr;
673 snew(cginfo_mb[mb].cginfo, cginfo_mb[mb].cg_mod);
674 cginfo = cginfo_mb[mb].cginfo;
676 /* Set constraints flags for constrained atoms */
677 snew(a_con, molt->atoms.nr);
678 for (ftype = 0; ftype < F_NRE; ftype++)
680 if (interaction_function[ftype].flags & IF_CONSTRAINT)
685 for (ia = 0; ia < molt->ilist[ftype].nr; ia += 1+nral)
689 for (a = 0; a < nral; a++)
691 a_con[molt->ilist[ftype].iatoms[ia+1+a]] =
692 (ftype == F_SETTLE ? acSETTLE : acCONSTRAINT);
698 for (m = 0; m < (bId ? 1 : molb->nmol); m++)
701 am = m*cgs->index[cgs->nr];
702 for (cg = 0; cg < cgs->nr; cg++)
705 a1 = cgs->index[cg+1];
707 /* Store the energy group in cginfo */
708 gid = ggrpnr(&mtop->groups, egcENER, a_offset+am+a0);
709 SET_CGINFO_GID(cginfo[cgm+cg], gid);
711 /* Check the intra/inter charge group exclusions */
712 if (a1-a0 > excl_nalloc)
714 excl_nalloc = a1 - a0;
715 srenew(bExcl, excl_nalloc);
717 /* bExclIntraAll: all intra cg interactions excluded
718 * bExclInter: any inter cg interactions excluded
720 bExclIntraAll = TRUE;
724 for (ai = a0; ai < a1; ai++)
726 /* Check VDW and electrostatic interactions */
727 bHaveVDW = bHaveVDW || (type_VDW[molt->atoms.atom[ai].type] ||
728 type_VDW[molt->atoms.atom[ai].typeB]);
729 bHaveQ = bHaveQ || (molt->atoms.atom[ai].q != 0 ||
730 molt->atoms.atom[ai].qB != 0);
732 /* Clear the exclusion list for atom ai */
733 for (aj = a0; aj < a1; aj++)
735 bExcl[aj-a0] = FALSE;
737 /* Loop over all the exclusions of atom ai */
738 for (j = excl->index[ai]; j < excl->index[ai+1]; j++)
741 if (aj < a0 || aj >= a1)
750 /* Check if ai excludes a0 to a1 */
751 for (aj = a0; aj < a1; aj++)
755 bExclIntraAll = FALSE;
762 SET_CGINFO_CONSTR(cginfo[cgm+cg]);
765 SET_CGINFO_SETTLE(cginfo[cgm+cg]);
773 SET_CGINFO_EXCL_INTRA(cginfo[cgm+cg]);
777 SET_CGINFO_EXCL_INTER(cginfo[cgm+cg]);
779 if (a1 - a0 > MAX_CHARGEGROUP_SIZE)
781 /* The size in cginfo is currently only read with DD */
782 gmx_fatal(FARGS, "A charge group has size %d which is larger than the limit of %d atoms", a1-a0, MAX_CHARGEGROUP_SIZE);
786 SET_CGINFO_HAS_VDW(cginfo[cgm+cg]);
790 SET_CGINFO_HAS_Q(cginfo[cgm+cg]);
792 /* Store the charge group size */
793 SET_CGINFO_NATOMS(cginfo[cgm+cg], a1-a0);
795 if (!bExclIntraAll || bExclInter)
797 *bExcl_IntraCGAll_InterCGNone = FALSE;
804 cg_offset += molb->nmol*cgs->nr;
805 a_offset += molb->nmol*cgs->index[cgs->nr];
809 /* the solvent optimizer is called after the QM is initialized,
810 * because we don't want to have the QM subsystemto become an
814 check_solvent(fplog, mtop, fr, cginfo_mb);
816 if (getenv("GMX_NO_SOLV_OPT"))
820 fprintf(fplog, "Found environment variable GMX_NO_SOLV_OPT.\n"
821 "Disabling all solvent optimization\n");
823 fr->solvent_opt = esolNO;
827 fr->solvent_opt = esolNO;
829 if (!fr->solvent_opt)
831 for (mb = 0; mb < mtop->nmolblock; mb++)
833 for (cg = 0; cg < cginfo_mb[mb].cg_mod; cg++)
835 SET_CGINFO_SOLOPT(cginfo_mb[mb].cginfo[cg], esolNO);
843 static int *cginfo_expand(int nmb, cginfo_mb_t *cgi_mb)
848 ncg = cgi_mb[nmb-1].cg_end;
851 for (cg = 0; cg < ncg; cg++)
853 while (cg >= cgi_mb[mb].cg_end)
858 cgi_mb[mb].cginfo[(cg - cgi_mb[mb].cg_start) % cgi_mb[mb].cg_mod];
864 static void set_chargesum(FILE *log, t_forcerec *fr, const gmx_mtop_t *mtop)
866 /*This now calculates sum for q and c6*/
867 double qsum, q2sum, q, c6sum, c6, sqrc6;
869 const t_atoms *atoms;
874 for (mb = 0; mb < mtop->nmolblock; mb++)
876 nmol = mtop->molblock[mb].nmol;
877 atoms = &mtop->moltype[mtop->molblock[mb].type].atoms;
878 for (i = 0; i < atoms->nr; i++)
880 q = atoms->atom[i].q;
883 c6 = mtop->ffparams.iparams[atoms->atom[i].type*(mtop->ffparams.atnr+1)].lj.c6;
885 c6sum += nmol*sqrc6*sqrc6;
889 fr->q2sum[0] = q2sum;
890 fr->c6sum[0] = c6sum/12;
892 if (fr->efep != efepNO)
897 for (mb = 0; mb < mtop->nmolblock; mb++)
899 nmol = mtop->molblock[mb].nmol;
900 atoms = &mtop->moltype[mtop->molblock[mb].type].atoms;
901 for (i = 0; i < atoms->nr; i++)
903 q = atoms->atom[i].qB;
906 c6 = mtop->ffparams.iparams[atoms->atom[i].typeB*(mtop->ffparams.atnr+1)].lj.c6;
908 c6sum += nmol*sqrc6*sqrc6;
911 fr->q2sum[1] = q2sum;
912 fr->c6sum[1] = c6sum/12;
917 fr->qsum[1] = fr->qsum[0];
918 fr->q2sum[1] = fr->q2sum[0];
919 fr->c6sum[1] = fr->c6sum[0];
923 if (fr->efep == efepNO)
925 fprintf(log, "System total charge: %.3f\n", fr->qsum[0]);
929 fprintf(log, "System total charge, top. A: %.3f top. B: %.3f\n",
930 fr->qsum[0], fr->qsum[1]);
935 void update_forcerec(t_forcerec *fr, matrix box)
937 if (fr->eeltype == eelGRF)
939 calc_rffac(NULL, fr->eeltype, fr->epsilon_r, fr->epsilon_rf,
940 fr->rcoulomb, fr->temp, fr->zsquare, box,
941 &fr->kappa, &fr->k_rf, &fr->c_rf);
945 void set_avcsixtwelve(FILE *fplog, t_forcerec *fr, const gmx_mtop_t *mtop)
947 const t_atoms *atoms, *atoms_tpi;
948 const t_blocka *excl;
949 int mb, nmol, nmolc, i, j, tpi, tpj, j1, j2, k, n, nexcl, q;
950 gmx_int64_t npair, npair_ij, tmpi, tmpj;
951 double csix, ctwelve;
955 real *nbfp_comb = NULL;
961 /* For LJ-PME, we want to correct for the difference between the
962 * actual C6 values and the C6 values used by the LJ-PME based on
963 * combination rules. */
965 if (EVDW_PME(fr->vdwtype))
967 nbfp_comb = mk_nbfp_combination_rule(&mtop->ffparams,
968 (fr->ljpme_combination_rule == eljpmeLB) ? eCOMB_ARITHMETIC : eCOMB_GEOMETRIC);
969 for (tpi = 0; tpi < ntp; ++tpi)
971 for (tpj = 0; tpj < ntp; ++tpj)
973 C6(nbfp_comb, ntp, tpi, tpj) =
974 C6(nbfp, ntp, tpi, tpj) - C6(nbfp_comb, ntp, tpi, tpj);
975 C12(nbfp_comb, ntp, tpi, tpj) = C12(nbfp, ntp, tpi, tpj);
980 for (q = 0; q < (fr->efep == efepNO ? 1 : 2); q++)
988 /* Count the types so we avoid natoms^2 operations */
989 snew(typecount, ntp);
990 gmx_mtop_count_atomtypes(mtop, q, typecount);
992 for (tpi = 0; tpi < ntp; tpi++)
994 for (tpj = tpi; tpj < ntp; tpj++)
996 tmpi = typecount[tpi];
997 tmpj = typecount[tpj];
1000 npair_ij = tmpi*tmpj;
1004 npair_ij = tmpi*(tmpi - 1)/2;
1008 /* nbfp now includes the 6.0 derivative prefactor */
1009 csix += npair_ij*BHAMC(nbfp, ntp, tpi, tpj)/6.0;
1013 /* nbfp now includes the 6.0/12.0 derivative prefactors */
1014 csix += npair_ij* C6(nbfp, ntp, tpi, tpj)/6.0;
1015 ctwelve += npair_ij* C12(nbfp, ntp, tpi, tpj)/12.0;
1021 /* Subtract the excluded pairs.
1022 * The main reason for substracting exclusions is that in some cases
1023 * some combinations might never occur and the parameters could have
1024 * any value. These unused values should not influence the dispersion
1027 for (mb = 0; mb < mtop->nmolblock; mb++)
1029 nmol = mtop->molblock[mb].nmol;
1030 atoms = &mtop->moltype[mtop->molblock[mb].type].atoms;
1031 excl = &mtop->moltype[mtop->molblock[mb].type].excls;
1032 for (i = 0; (i < atoms->nr); i++)
1036 tpi = atoms->atom[i].type;
1040 tpi = atoms->atom[i].typeB;
1042 j1 = excl->index[i];
1043 j2 = excl->index[i+1];
1044 for (j = j1; j < j2; j++)
1051 tpj = atoms->atom[k].type;
1055 tpj = atoms->atom[k].typeB;
1059 /* nbfp now includes the 6.0 derivative prefactor */
1060 csix -= nmol*BHAMC(nbfp, ntp, tpi, tpj)/6.0;
1064 /* nbfp now includes the 6.0/12.0 derivative prefactors */
1065 csix -= nmol*C6 (nbfp, ntp, tpi, tpj)/6.0;
1066 ctwelve -= nmol*C12(nbfp, ntp, tpi, tpj)/12.0;
1076 /* Only correct for the interaction of the test particle
1077 * with the rest of the system.
1080 &mtop->moltype[mtop->molblock[mtop->nmolblock-1].type].atoms;
1083 for (mb = 0; mb < mtop->nmolblock; mb++)
1085 nmol = mtop->molblock[mb].nmol;
1086 atoms = &mtop->moltype[mtop->molblock[mb].type].atoms;
1087 for (j = 0; j < atoms->nr; j++)
1090 /* Remove the interaction of the test charge group
1093 if (mb == mtop->nmolblock-1)
1097 if (mb == 0 && nmol == 1)
1099 gmx_fatal(FARGS, "Old format tpr with TPI, please generate a new tpr file");
1104 tpj = atoms->atom[j].type;
1108 tpj = atoms->atom[j].typeB;
1110 for (i = 0; i < fr->n_tpi; i++)
1114 tpi = atoms_tpi->atom[i].type;
1118 tpi = atoms_tpi->atom[i].typeB;
1122 /* nbfp now includes the 6.0 derivative prefactor */
1123 csix += nmolc*BHAMC(nbfp, ntp, tpi, tpj)/6.0;
1127 /* nbfp now includes the 6.0/12.0 derivative prefactors */
1128 csix += nmolc*C6 (nbfp, ntp, tpi, tpj)/6.0;
1129 ctwelve += nmolc*C12(nbfp, ntp, tpi, tpj)/12.0;
1136 if (npair - nexcl <= 0 && fplog)
1138 fprintf(fplog, "\nWARNING: There are no atom pairs for dispersion correction\n\n");
1144 csix /= npair - nexcl;
1145 ctwelve /= npair - nexcl;
1149 fprintf(debug, "Counted %d exclusions\n", nexcl);
1150 fprintf(debug, "Average C6 parameter is: %10g\n", (double)csix);
1151 fprintf(debug, "Average C12 parameter is: %10g\n", (double)ctwelve);
1153 fr->avcsix[q] = csix;
1154 fr->avctwelve[q] = ctwelve;
1157 if (EVDW_PME(fr->vdwtype))
1164 if (fr->eDispCorr == edispcAllEner ||
1165 fr->eDispCorr == edispcAllEnerPres)
1167 fprintf(fplog, "Long Range LJ corr.: <C6> %10.4e, <C12> %10.4e\n",
1168 fr->avcsix[0], fr->avctwelve[0]);
1172 fprintf(fplog, "Long Range LJ corr.: <C6> %10.4e\n", fr->avcsix[0]);
1178 static void set_bham_b_max(FILE *fplog, t_forcerec *fr,
1179 const gmx_mtop_t *mtop)
1181 const t_atoms *at1, *at2;
1182 int mt1, mt2, i, j, tpi, tpj, ntypes;
1188 fprintf(fplog, "Determining largest Buckingham b parameter for table\n");
1195 for (mt1 = 0; mt1 < mtop->nmoltype; mt1++)
1197 at1 = &mtop->moltype[mt1].atoms;
1198 for (i = 0; (i < at1->nr); i++)
1200 tpi = at1->atom[i].type;
1203 gmx_fatal(FARGS, "Atomtype[%d] = %d, maximum = %d", i, tpi, ntypes);
1206 for (mt2 = mt1; mt2 < mtop->nmoltype; mt2++)
1208 at2 = &mtop->moltype[mt2].atoms;
1209 for (j = 0; (j < at2->nr); j++)
1211 tpj = at2->atom[j].type;
1214 gmx_fatal(FARGS, "Atomtype[%d] = %d, maximum = %d", j, tpj, ntypes);
1216 b = BHAMB(nbfp, ntypes, tpi, tpj);
1217 if (b > fr->bham_b_max)
1221 if ((b < bmin) || (bmin == -1))
1231 fprintf(fplog, "Buckingham b parameters, min: %g, max: %g\n",
1232 bmin, fr->bham_b_max);
1236 static void make_nbf_tables(FILE *fp, const output_env_t oenv,
1237 t_forcerec *fr, real rtab,
1238 const t_commrec *cr,
1239 const char *tabfn, char *eg1, char *eg2,
1249 fprintf(debug, "No table file name passed, can not read table, can not do non-bonded interactions\n");
1254 sprintf(buf, "%s", tabfn);
1257 /* Append the two energy group names */
1258 sprintf(buf + strlen(tabfn) - strlen(ftp2ext(efXVG)) - 1, "_%s_%s.%s",
1259 eg1, eg2, ftp2ext(efXVG));
1261 nbl->table_elec_vdw = make_tables(fp, oenv, fr, MASTER(cr), buf, rtab, 0);
1262 /* Copy the contents of the table to separate coulomb and LJ tables too,
1263 * to improve cache performance.
1265 /* For performance reasons we want
1266 * the table data to be aligned to 16-byte. The pointers could be freed
1267 * but currently aren't.
1269 nbl->table_elec.interaction = GMX_TABLE_INTERACTION_ELEC;
1270 nbl->table_elec.format = nbl->table_elec_vdw.format;
1271 nbl->table_elec.r = nbl->table_elec_vdw.r;
1272 nbl->table_elec.n = nbl->table_elec_vdw.n;
1273 nbl->table_elec.scale = nbl->table_elec_vdw.scale;
1274 nbl->table_elec.scale_exp = nbl->table_elec_vdw.scale_exp;
1275 nbl->table_elec.formatsize = nbl->table_elec_vdw.formatsize;
1276 nbl->table_elec.ninteractions = 1;
1277 nbl->table_elec.stride = nbl->table_elec.formatsize * nbl->table_elec.ninteractions;
1278 snew_aligned(nbl->table_elec.data, nbl->table_elec.stride*(nbl->table_elec.n+1), 32);
1280 nbl->table_vdw.interaction = GMX_TABLE_INTERACTION_VDWREP_VDWDISP;
1281 nbl->table_vdw.format = nbl->table_elec_vdw.format;
1282 nbl->table_vdw.r = nbl->table_elec_vdw.r;
1283 nbl->table_vdw.n = nbl->table_elec_vdw.n;
1284 nbl->table_vdw.scale = nbl->table_elec_vdw.scale;
1285 nbl->table_vdw.scale_exp = nbl->table_elec_vdw.scale_exp;
1286 nbl->table_vdw.formatsize = nbl->table_elec_vdw.formatsize;
1287 nbl->table_vdw.ninteractions = 2;
1288 nbl->table_vdw.stride = nbl->table_vdw.formatsize * nbl->table_vdw.ninteractions;
1289 snew_aligned(nbl->table_vdw.data, nbl->table_vdw.stride*(nbl->table_vdw.n+1), 32);
1291 for (i = 0; i <= nbl->table_elec_vdw.n; i++)
1293 for (j = 0; j < 4; j++)
1295 nbl->table_elec.data[4*i+j] = nbl->table_elec_vdw.data[12*i+j];
1297 for (j = 0; j < 8; j++)
1299 nbl->table_vdw.data[8*i+j] = nbl->table_elec_vdw.data[12*i+4+j];
1304 static void count_tables(int ftype1, int ftype2, const gmx_mtop_t *mtop,
1305 int *ncount, int **count)
1307 const gmx_moltype_t *molt;
1309 int mt, ftype, stride, i, j, tabnr;
1311 for (mt = 0; mt < mtop->nmoltype; mt++)
1313 molt = &mtop->moltype[mt];
1314 for (ftype = 0; ftype < F_NRE; ftype++)
1316 if (ftype == ftype1 || ftype == ftype2)
1318 il = &molt->ilist[ftype];
1319 stride = 1 + NRAL(ftype);
1320 for (i = 0; i < il->nr; i += stride)
1322 tabnr = mtop->ffparams.iparams[il->iatoms[i]].tab.table;
1325 gmx_fatal(FARGS, "A bonded table number is smaller than 0: %d\n", tabnr);
1327 if (tabnr >= *ncount)
1329 srenew(*count, tabnr+1);
1330 for (j = *ncount; j < tabnr+1; j++)
1343 static bondedtable_t *make_bonded_tables(FILE *fplog,
1344 int ftype1, int ftype2,
1345 const gmx_mtop_t *mtop,
1346 const char *basefn, const char *tabext)
1348 int i, ncount, *count;
1356 count_tables(ftype1, ftype2, mtop, &ncount, &count);
1361 for (i = 0; i < ncount; i++)
1365 sprintf(tabfn, "%s", basefn);
1366 sprintf(tabfn + strlen(basefn) - strlen(ftp2ext(efXVG)) - 1, "_%s%d.%s",
1367 tabext, i, ftp2ext(efXVG));
1368 tab[i] = make_bonded_table(fplog, tabfn, NRAL(ftype1)-2);
1377 void forcerec_set_ranges(t_forcerec *fr,
1378 int ncg_home, int ncg_force,
1380 int natoms_force_constr, int natoms_f_novirsum)
1385 /* fr->ncg_force is unused in the standard code,
1386 * but it can be useful for modified code dealing with charge groups.
1388 fr->ncg_force = ncg_force;
1389 fr->natoms_force = natoms_force;
1390 fr->natoms_force_constr = natoms_force_constr;
1392 if (fr->natoms_force_constr > fr->nalloc_force)
1394 fr->nalloc_force = over_alloc_dd(fr->natoms_force_constr);
1398 srenew(fr->f_twin, fr->nalloc_force);
1402 if (fr->bF_NoVirSum)
1404 fr->f_novirsum_n = natoms_f_novirsum;
1405 if (fr->f_novirsum_n > fr->f_novirsum_nalloc)
1407 fr->f_novirsum_nalloc = over_alloc_dd(fr->f_novirsum_n);
1408 srenew(fr->f_novirsum_alloc, fr->f_novirsum_nalloc);
1413 fr->f_novirsum_n = 0;
1417 static real cutoff_inf(real cutoff)
1421 cutoff = GMX_CUTOFF_INF;
1427 static void make_adress_tf_tables(FILE *fp, const output_env_t oenv,
1428 t_forcerec *fr, const t_inputrec *ir,
1429 const char *tabfn, const gmx_mtop_t *mtop,
1437 gmx_fatal(FARGS, "No thermoforce table file given. Use -tabletf to specify a file\n");
1441 snew(fr->atf_tabs, ir->adress->n_tf_grps);
1443 sprintf(buf, "%s", tabfn);
1444 for (i = 0; i < ir->adress->n_tf_grps; i++)
1446 j = ir->adress->tf_table_index[i]; /* get energy group index */
1447 sprintf(buf + strlen(tabfn) - strlen(ftp2ext(efXVG)) - 1, "tf_%s.%s",
1448 *(mtop->groups.grpname[mtop->groups.grps[egcENER].nm_ind[j]]), ftp2ext(efXVG));
1451 fprintf(fp, "loading tf table for energygrp index %d from %s\n", ir->adress->tf_table_index[i], buf);
1453 fr->atf_tabs[i] = make_atf_table(fp, oenv, fr, buf, box);
1458 gmx_bool can_use_allvsall(const t_inputrec *ir, gmx_bool bPrintNote, t_commrec *cr, FILE *fp)
1465 ir->rcoulomb == 0 &&
1467 ir->ePBC == epbcNONE &&
1468 ir->vdwtype == evdwCUT &&
1469 ir->coulombtype == eelCUT &&
1470 ir->efep == efepNO &&
1471 (ir->implicit_solvent == eisNO ||
1472 (ir->implicit_solvent == eisGBSA && (ir->gb_algorithm == egbSTILL ||
1473 ir->gb_algorithm == egbHCT ||
1474 ir->gb_algorithm == egbOBC))) &&
1475 getenv("GMX_NO_ALLVSALL") == NULL
1478 if (bAllvsAll && ir->opts.ngener > 1)
1480 const char *note = "NOTE: Can not use all-vs-all force loops, because there are multiple energy monitor groups; you might get significantly higher performance when using only a single energy monitor group.\n";
1486 fprintf(stderr, "\n%s\n", note);
1490 fprintf(fp, "\n%s\n", note);
1496 if (bAllvsAll && fp && MASTER(cr))
1498 fprintf(fp, "\nUsing SIMD all-vs-all kernels.\n\n");
1505 static void init_forcerec_f_threads(t_forcerec *fr, int nenergrp)
1509 /* These thread local data structures are used for bondeds only */
1510 fr->nthreads = gmx_omp_nthreads_get(emntBonded);
1512 if (fr->nthreads > 1)
1514 snew(fr->f_t, fr->nthreads);
1515 /* Thread 0 uses the global force and energy arrays */
1516 for (t = 1; t < fr->nthreads; t++)
1518 fr->f_t[t].f = NULL;
1519 fr->f_t[t].f_nalloc = 0;
1520 snew(fr->f_t[t].fshift, SHIFTS);
1521 fr->f_t[t].grpp.nener = nenergrp*nenergrp;
1522 for (i = 0; i < egNR; i++)
1524 snew(fr->f_t[t].grpp.ener[i], fr->f_t[t].grpp.nener);
1531 static void pick_nbnxn_kernel_cpu(const t_inputrec gmx_unused *ir,
1535 *kernel_type = nbnxnk4x4_PlainC;
1536 *ewald_excl = ewaldexclTable;
1538 #ifdef GMX_NBNXN_SIMD
1540 #ifdef GMX_NBNXN_SIMD_4XN
1541 *kernel_type = nbnxnk4xN_SIMD_4xN;
1543 #ifdef GMX_NBNXN_SIMD_2XNN
1544 /* We expect the 2xNN kernels to be faster in most cases */
1545 *kernel_type = nbnxnk4xN_SIMD_2xNN;
1548 #if defined GMX_NBNXN_SIMD_4XN && defined GMX_SIMD_X86_AVX_256_OR_HIGHER
1549 if (EEL_RF(ir->coulombtype) || ir->coulombtype == eelCUT)
1551 /* The raw pair rate of the 4x8 kernel is higher than 2x(4+4),
1552 * 10% with HT, 50% without HT, but extra zeros interactions
1553 * can compensate. As we currently don't detect the actual use
1554 * of HT, switch to 4x8 to avoid a potential performance hit.
1556 *kernel_type = nbnxnk4xN_SIMD_4xN;
1559 if (getenv("GMX_NBNXN_SIMD_4XN") != NULL)
1561 #ifdef GMX_NBNXN_SIMD_4XN
1562 *kernel_type = nbnxnk4xN_SIMD_4xN;
1564 gmx_fatal(FARGS, "SIMD 4xN kernels requested, but Gromacs has been compiled without support for these kernels");
1567 if (getenv("GMX_NBNXN_SIMD_2XNN") != NULL)
1569 #ifdef GMX_NBNXN_SIMD_2XNN
1570 *kernel_type = nbnxnk4xN_SIMD_2xNN;
1572 gmx_fatal(FARGS, "SIMD 2x(N+N) kernels requested, but Gromacs has been compiled without support for these kernels");
1576 /* Analytical Ewald exclusion correction is only an option in
1577 * the SIMD kernel. On BlueGene/Q, this is faster regardless
1578 * of precision. In single precision, this is faster on
1579 * Bulldozer, and slightly faster on Sandy Bridge.
1581 #if ((defined GMX_SIMD_X86_AVX_128_FMA_OR_HIGHER || defined GMX_SIMD_X86_AVX_256_OR_HIGHER || defined __MIC__) && !defined GMX_DOUBLE) || (defined GMX_SIMD_IBM_QPX)
1582 *ewald_excl = ewaldexclAnalytical;
1584 if (getenv("GMX_NBNXN_EWALD_TABLE") != NULL)
1586 *ewald_excl = ewaldexclTable;
1588 if (getenv("GMX_NBNXN_EWALD_ANALYTICAL") != NULL)
1590 *ewald_excl = ewaldexclAnalytical;
1594 #endif /* GMX_NBNXN_SIMD */
1598 const char *lookup_nbnxn_kernel_name(int kernel_type)
1600 const char *returnvalue = NULL;
1601 switch (kernel_type)
1604 returnvalue = "not set";
1606 case nbnxnk4x4_PlainC:
1607 returnvalue = "plain C";
1609 case nbnxnk4xN_SIMD_4xN:
1610 case nbnxnk4xN_SIMD_2xNN:
1611 #ifdef GMX_NBNXN_SIMD
1612 #ifdef GMX_SIMD_X86_SSE2_OR_HIGHER
1613 /* We have x86 SSE2 compatible SIMD */
1614 #ifdef GMX_SIMD_X86_AVX_128_FMA_OR_HIGHER
1615 returnvalue = "AVX-128-FMA";
1617 #if defined GMX_SIMD_X86_AVX_256_OR_HIGHER || defined __AVX__
1618 /* x86 SIMD intrinsics can be converted to SSE or AVX depending
1619 * on compiler flags. As we use nearly identical intrinsics,
1620 * compiling for AVX without an AVX macros effectively results
1622 * For gcc we check for __AVX__
1623 * At least a check for icc should be added (if there is a macro)
1625 #if defined GMX_SIMD_X86_AVX_256_OR_HIGHER && !defined GMX_NBNXN_HALF_WIDTH_SIMD
1626 returnvalue = "AVX-256";
1628 returnvalue = "AVX-128";
1631 #ifdef GMX_SIMD_X86_SSE4_1_OR_HIGHER
1632 returnvalue = "SSE4.1";
1634 returnvalue = "SSE2";
1638 #else /* GMX_SIMD_X86_SSE2_OR_HIGHER */
1639 /* not GMX_SIMD_X86_SSE2_OR_HIGHER, but other SIMD */
1640 returnvalue = "SIMD";
1641 #endif /* GMX_SIMD_X86_SSE2_OR_HIGHER */
1642 #else /* GMX_NBNXN_SIMD */
1643 returnvalue = "not available";
1644 #endif /* GMX_NBNXN_SIMD */
1646 case nbnxnk8x8x8_CUDA: returnvalue = "CUDA"; break;
1647 case nbnxnk8x8x8_PlainC: returnvalue = "plain C"; break;
1651 gmx_fatal(FARGS, "Illegal kernel type selected");
1658 static void pick_nbnxn_kernel(FILE *fp,
1659 const t_commrec *cr,
1660 gmx_bool use_simd_kernels,
1662 gmx_bool bEmulateGPU,
1663 const t_inputrec *ir,
1666 gmx_bool bDoNonbonded)
1668 assert(kernel_type);
1670 *kernel_type = nbnxnkNotSet;
1671 *ewald_excl = ewaldexclTable;
1675 *kernel_type = nbnxnk8x8x8_PlainC;
1679 md_print_warn(cr, fp, "Emulating a GPU run on the CPU (slow)");
1684 *kernel_type = nbnxnk8x8x8_CUDA;
1687 if (*kernel_type == nbnxnkNotSet)
1689 if (use_simd_kernels)
1691 pick_nbnxn_kernel_cpu(ir, kernel_type, ewald_excl);
1695 *kernel_type = nbnxnk4x4_PlainC;
1699 if (bDoNonbonded && fp != NULL)
1701 fprintf(fp, "\nUsing %s %dx%d non-bonded kernels\n\n",
1702 lookup_nbnxn_kernel_name(*kernel_type),
1703 nbnxn_kernel_pairlist_simple(*kernel_type) ? NBNXN_CPU_CLUSTER_I_SIZE : NBNXN_GPU_CLUSTER_SIZE,
1704 nbnxn_kernel_to_cj_size(*kernel_type));
1708 static void pick_nbnxn_resources(const t_commrec *cr,
1709 const gmx_hw_info_t *hwinfo,
1710 gmx_bool bDoNonbonded,
1712 gmx_bool *bEmulateGPU,
1713 const gmx_gpu_opt_t *gpu_opt)
1715 gmx_bool bEmulateGPUEnvVarSet;
1716 char gpu_err_str[STRLEN];
1720 bEmulateGPUEnvVarSet = (getenv("GMX_EMULATE_GPU") != NULL);
1722 /* Run GPU emulation mode if GMX_EMULATE_GPU is defined. Because
1723 * GPUs (currently) only handle non-bonded calculations, we will
1724 * automatically switch to emulation if non-bonded calculations are
1725 * turned off via GMX_NO_NONBONDED - this is the simple and elegant
1726 * way to turn off GPU initialization, data movement, and cleanup.
1728 * GPU emulation can be useful to assess the performance one can expect by
1729 * adding GPU(s) to the machine. The conditional below allows this even
1730 * if mdrun is compiled without GPU acceleration support.
1731 * Note that you should freezing the system as otherwise it will explode.
1733 *bEmulateGPU = (bEmulateGPUEnvVarSet ||
1735 gpu_opt->ncuda_dev_use > 0));
1737 /* Enable GPU mode when GPUs are available or no GPU emulation is requested.
1739 if (gpu_opt->ncuda_dev_use > 0 && !(*bEmulateGPU))
1741 /* Each PP node will use the intra-node id-th device from the
1742 * list of detected/selected GPUs. */
1743 if (!init_gpu(cr->rank_pp_intranode, gpu_err_str,
1744 &hwinfo->gpu_info, gpu_opt))
1746 /* At this point the init should never fail as we made sure that
1747 * we have all the GPUs we need. If it still does, we'll bail. */
1748 gmx_fatal(FARGS, "On node %d failed to initialize GPU #%d: %s",
1750 get_gpu_device_id(&hwinfo->gpu_info, gpu_opt,
1751 cr->rank_pp_intranode),
1755 /* Here we actually turn on hardware GPU acceleration */
1760 gmx_bool uses_simple_tables(int cutoff_scheme,
1761 nonbonded_verlet_t *nbv,
1764 gmx_bool bUsesSimpleTables = TRUE;
1767 switch (cutoff_scheme)
1770 bUsesSimpleTables = TRUE;
1773 assert(NULL != nbv && NULL != nbv->grp);
1774 grp_index = (group < 0) ? 0 : (nbv->ngrp - 1);
1775 bUsesSimpleTables = nbnxn_kernel_pairlist_simple(nbv->grp[grp_index].kernel_type);
1778 gmx_incons("unimplemented");
1780 return bUsesSimpleTables;
1783 static void init_ewald_f_table(interaction_const_t *ic,
1784 gmx_bool bUsesSimpleTables,
1789 if (bUsesSimpleTables)
1791 /* With a spacing of 0.0005 we are at the force summation accuracy
1792 * for the SSE kernels for "normal" atomistic simulations.
1794 ic->tabq_scale = ewald_spline3_table_scale(ic->ewaldcoeff_q,
1797 maxr = (rtab > ic->rcoulomb) ? rtab : ic->rcoulomb;
1798 ic->tabq_size = (int)(maxr*ic->tabq_scale) + 2;
1802 ic->tabq_size = GPU_EWALD_COULOMB_FORCE_TABLE_SIZE;
1803 /* Subtract 2 iso 1 to avoid access out of range due to rounding */
1804 ic->tabq_scale = (ic->tabq_size - 2)/ic->rcoulomb;
1807 sfree_aligned(ic->tabq_coul_FDV0);
1808 sfree_aligned(ic->tabq_coul_F);
1809 sfree_aligned(ic->tabq_coul_V);
1811 /* Create the original table data in FDV0 */
1812 snew_aligned(ic->tabq_coul_FDV0, ic->tabq_size*4, 32);
1813 snew_aligned(ic->tabq_coul_F, ic->tabq_size, 32);
1814 snew_aligned(ic->tabq_coul_V, ic->tabq_size, 32);
1815 table_spline3_fill_ewald_lr(ic->tabq_coul_F, ic->tabq_coul_V, ic->tabq_coul_FDV0,
1816 ic->tabq_size, 1/ic->tabq_scale, ic->ewaldcoeff_q);
1819 void init_interaction_const_tables(FILE *fp,
1820 interaction_const_t *ic,
1821 gmx_bool bUsesSimpleTables,
1826 if (ic->eeltype == eelEWALD || EEL_PME(ic->eeltype))
1828 init_ewald_f_table(ic, bUsesSimpleTables, rtab);
1832 fprintf(fp, "Initialized non-bonded Ewald correction tables, spacing: %.2e size: %d\n\n",
1833 1/ic->tabq_scale, ic->tabq_size);
1838 static void clear_force_switch_constants(shift_consts_t *sc)
1845 static void force_switch_constants(real p,
1849 /* Here we determine the coefficient for shifting the force to zero
1850 * between distance rsw and the cut-off rc.
1851 * For a potential of r^-p, we have force p*r^-(p+1).
1852 * But to save flops we absorb p in the coefficient.
1854 * force/p = r^-(p+1) + c2*r^2 + c3*r^3
1855 * potential = r^-p + c2/3*r^3 + c3/4*r^4 + cpot
1857 sc->c2 = ((p + 1)*rsw - (p + 4)*rc)/(pow(rc, p + 2)*pow(rc - rsw, 2));
1858 sc->c3 = -((p + 1)*rsw - (p + 3)*rc)/(pow(rc, p + 2)*pow(rc - rsw, 3));
1859 sc->cpot = -pow(rc, -p) + p*sc->c2/3*pow(rc - rsw, 3) + p*sc->c3/4*pow(rc - rsw, 4);
1862 static void potential_switch_constants(real rsw, real rc,
1863 switch_consts_t *sc)
1865 /* The switch function is 1 at rsw and 0 at rc.
1866 * The derivative and second derivate are zero at both ends.
1867 * rsw = max(r - r_switch, 0)
1868 * sw = 1 + c3*rsw^3 + c4*rsw^4 + c5*rsw^5
1869 * dsw = 3*c3*rsw^2 + 4*c4*rsw^3 + 5*c5*rsw^4
1870 * force = force*dsw - potential*sw
1873 sc->c3 = -10*pow(rc - rsw, -3);
1874 sc->c4 = 15*pow(rc - rsw, -4);
1875 sc->c5 = -6*pow(rc - rsw, -5);
1879 init_interaction_const(FILE *fp,
1880 const t_commrec gmx_unused *cr,
1881 interaction_const_t **interaction_const,
1882 const t_forcerec *fr,
1885 interaction_const_t *ic;
1886 gmx_bool bUsesSimpleTables = TRUE;
1890 /* Just allocate something so we can free it */
1891 snew_aligned(ic->tabq_coul_FDV0, 16, 32);
1892 snew_aligned(ic->tabq_coul_F, 16, 32);
1893 snew_aligned(ic->tabq_coul_V, 16, 32);
1895 ic->rlist = fr->rlist;
1896 ic->rlistlong = fr->rlistlong;
1899 ic->vdw_modifier = fr->vdw_modifier;
1900 ic->rvdw = fr->rvdw;
1901 ic->rvdw_switch = fr->rvdw_switch;
1902 clear_force_switch_constants(&ic->dispersion_shift);
1903 clear_force_switch_constants(&ic->repulsion_shift);
1905 switch (ic->vdw_modifier)
1907 case eintmodPOTSHIFT:
1908 /* Only shift the potential, don't touch the force */
1909 ic->dispersion_shift.cpot = -pow(ic->rvdw, -6.0);
1910 ic->repulsion_shift.cpot = -pow(ic->rvdw, -12.0);
1912 case eintmodFORCESWITCH:
1913 /* Switch the force, switch and shift the potential */
1914 force_switch_constants(6.0, ic->rvdw_switch, ic->rvdw,
1915 &ic->dispersion_shift);
1916 force_switch_constants(12.0, ic->rvdw_switch, ic->rvdw,
1917 &ic->repulsion_shift);
1919 case eintmodPOTSWITCH:
1920 /* Switch the potential and force */
1921 potential_switch_constants(ic->rvdw_switch, ic->rvdw,
1925 case eintmodEXACTCUTOFF:
1926 /* Nothing to do here */
1929 gmx_incons("unimplemented potential modifier");
1932 ic->sh_invrc6 = -ic->dispersion_shift.cpot;
1934 /* Electrostatics */
1935 ic->eeltype = fr->eeltype;
1936 ic->rcoulomb = fr->rcoulomb;
1937 ic->epsilon_r = fr->epsilon_r;
1938 ic->epsfac = fr->epsfac;
1941 ic->ewaldcoeff_q = fr->ewaldcoeff_q;
1942 ic->ewaldcoeff_lj = fr->ewaldcoeff_lj;
1944 if (fr->coulomb_modifier == eintmodPOTSHIFT)
1946 ic->sh_ewald = gmx_erfc(ic->ewaldcoeff_q*ic->rcoulomb);
1953 /* Reaction-field */
1954 if (EEL_RF(ic->eeltype))
1956 ic->epsilon_rf = fr->epsilon_rf;
1957 ic->k_rf = fr->k_rf;
1958 ic->c_rf = fr->c_rf;
1962 /* For plain cut-off we might use the reaction-field kernels */
1963 ic->epsilon_rf = ic->epsilon_r;
1965 if (fr->coulomb_modifier == eintmodPOTSHIFT)
1967 ic->c_rf = 1/ic->rcoulomb;
1977 fprintf(fp, "Potential shift: LJ r^-12: %.3f r^-6 %.3f",
1978 ic->repulsion_shift.cpot, ic->dispersion_shift.cpot);
1979 if (ic->eeltype == eelCUT)
1981 fprintf(fp, ", Coulomb %.3f", -ic->c_rf);
1983 else if (EEL_PME(ic->eeltype))
1985 fprintf(fp, ", Ewald %.3e", -ic->sh_ewald);
1990 *interaction_const = ic;
1992 if (fr->nbv != NULL && fr->nbv->bUseGPU)
1994 nbnxn_cuda_init_const(fr->nbv->cu_nbv, ic, fr->nbv->grp);
1996 /* With tMPI + GPUs some ranks may be sharing GPU(s) and therefore
1997 * also sharing texture references. To keep the code simple, we don't
1998 * treat texture references as shared resources, but this means that
1999 * the coulomb_tab and nbfp texture refs will get updated by multiple threads.
2000 * Hence, to ensure that the non-bonded kernels don't start before all
2001 * texture binding operations are finished, we need to wait for all ranks
2002 * to arrive here before continuing.
2004 * Note that we could omit this barrier if GPUs are not shared (or
2005 * texture objects are used), but as this is initialization code, there
2006 * is not point in complicating things.
2008 #ifdef GMX_THREAD_MPI
2013 #endif /* GMX_THREAD_MPI */
2016 bUsesSimpleTables = uses_simple_tables(fr->cutoff_scheme, fr->nbv, -1);
2017 init_interaction_const_tables(fp, ic, bUsesSimpleTables, rtab);
2020 static void init_nb_verlet(FILE *fp,
2021 nonbonded_verlet_t **nb_verlet,
2022 const t_inputrec *ir,
2023 const t_forcerec *fr,
2024 const t_commrec *cr,
2025 const char *nbpu_opt)
2027 nonbonded_verlet_t *nbv;
2030 gmx_bool bEmulateGPU, bHybridGPURun = FALSE;
2032 nbnxn_alloc_t *nb_alloc;
2033 nbnxn_free_t *nb_free;
2037 pick_nbnxn_resources(cr, fr->hwinfo,
2045 nbv->ngrp = (DOMAINDECOMP(cr) ? 2 : 1);
2046 for (i = 0; i < nbv->ngrp; i++)
2048 nbv->grp[i].nbl_lists.nnbl = 0;
2049 nbv->grp[i].nbat = NULL;
2050 nbv->grp[i].kernel_type = nbnxnkNotSet;
2052 if (i == 0) /* local */
2054 pick_nbnxn_kernel(fp, cr, fr->use_simd_kernels,
2055 nbv->bUseGPU, bEmulateGPU, ir,
2056 &nbv->grp[i].kernel_type,
2057 &nbv->grp[i].ewald_excl,
2060 else /* non-local */
2062 if (nbpu_opt != NULL && strcmp(nbpu_opt, "gpu_cpu") == 0)
2064 /* Use GPU for local, select a CPU kernel for non-local */
2065 pick_nbnxn_kernel(fp, cr, fr->use_simd_kernels,
2067 &nbv->grp[i].kernel_type,
2068 &nbv->grp[i].ewald_excl,
2071 bHybridGPURun = TRUE;
2075 /* Use the same kernel for local and non-local interactions */
2076 nbv->grp[i].kernel_type = nbv->grp[0].kernel_type;
2077 nbv->grp[i].ewald_excl = nbv->grp[0].ewald_excl;
2084 /* init the NxN GPU data; the last argument tells whether we'll have
2085 * both local and non-local NB calculation on GPU */
2086 nbnxn_cuda_init(fp, &nbv->cu_nbv,
2087 &fr->hwinfo->gpu_info, fr->gpu_opt,
2088 cr->rank_pp_intranode,
2089 (nbv->ngrp > 1) && !bHybridGPURun);
2091 if ((env = getenv("GMX_NB_MIN_CI")) != NULL)
2095 nbv->min_ci_balanced = strtol(env, &end, 10);
2096 if (!end || (*end != 0) || nbv->min_ci_balanced <= 0)
2098 gmx_fatal(FARGS, "Invalid value passed in GMX_NB_MIN_CI=%s, positive integer required", env);
2103 fprintf(debug, "Neighbor-list balancing parameter: %d (passed as env. var.)\n",
2104 nbv->min_ci_balanced);
2109 nbv->min_ci_balanced = nbnxn_cuda_min_ci_balanced(nbv->cu_nbv);
2112 fprintf(debug, "Neighbor-list balancing parameter: %d (auto-adjusted to the number of GPU multi-processors)\n",
2113 nbv->min_ci_balanced);
2119 nbv->min_ci_balanced = 0;
2124 nbnxn_init_search(&nbv->nbs,
2125 DOMAINDECOMP(cr) ? &cr->dd->nc : NULL,
2126 DOMAINDECOMP(cr) ? domdec_zones(cr->dd) : NULL,
2127 gmx_omp_nthreads_get(emntNonbonded));
2129 for (i = 0; i < nbv->ngrp; i++)
2131 if (nbv->grp[0].kernel_type == nbnxnk8x8x8_CUDA)
2133 nb_alloc = &pmalloc;
2142 nbnxn_init_pairlist_set(&nbv->grp[i].nbl_lists,
2143 nbnxn_kernel_pairlist_simple(nbv->grp[i].kernel_type),
2144 /* 8x8x8 "non-simple" lists are ATM always combined */
2145 !nbnxn_kernel_pairlist_simple(nbv->grp[i].kernel_type),
2149 nbv->grp[0].kernel_type != nbv->grp[i].kernel_type)
2151 gmx_bool bSimpleList;
2153 bSimpleList = nbnxn_kernel_pairlist_simple(nbv->grp[i].kernel_type);
2155 snew(nbv->grp[i].nbat, 1);
2156 nbnxn_atomdata_init(fp,
2158 nbv->grp[i].kernel_type,
2159 /* SIMD LJ switch kernels don't support LJ combination rules */
2160 bSimpleList && !(fr->vdw_modifier == eintmodFORCESWITCH || fr->vdw_modifier == eintmodPOTSWITCH),
2161 fr->ntype, fr->nbfp,
2163 bSimpleList ? gmx_omp_nthreads_get(emntNonbonded) : 1,
2168 nbv->grp[i].nbat = nbv->grp[0].nbat;
2173 void init_forcerec(FILE *fp,
2174 const output_env_t oenv,
2177 const t_inputrec *ir,
2178 const gmx_mtop_t *mtop,
2179 const t_commrec *cr,
2185 const char *nbpu_opt,
2186 gmx_bool bNoSolvOpt,
2189 int i, j, m, natoms, ngrp, negp_pp, negptable, egi, egj;
2194 gmx_bool bGenericKernelOnly;
2195 gmx_bool bMakeTables, bMakeSeparate14Table, bSomeNormalNbListsAreInUse;
2197 int *nm_ind, egp_flags;
2199 if (fr->hwinfo == NULL)
2201 /* Detect hardware, gather information.
2202 * In mdrun, hwinfo has already been set before calling init_forcerec.
2203 * Here we ignore GPUs, as tools will not use them anyhow.
2205 fr->hwinfo = gmx_detect_hardware(fp, cr, FALSE);
2208 /* By default we turn SIMD kernels on, but it might be turned off further down... */
2209 fr->use_simd_kernels = TRUE;
2211 fr->bDomDec = DOMAINDECOMP(cr);
2213 natoms = mtop->natoms;
2215 if (check_box(ir->ePBC, box))
2217 gmx_fatal(FARGS, check_box(ir->ePBC, box));
2220 /* Test particle insertion ? */
2223 /* Set to the size of the molecule to be inserted (the last one) */
2224 /* Because of old style topologies, we have to use the last cg
2225 * instead of the last molecule type.
2227 cgs = &mtop->moltype[mtop->molblock[mtop->nmolblock-1].type].cgs;
2228 fr->n_tpi = cgs->index[cgs->nr] - cgs->index[cgs->nr-1];
2229 if (fr->n_tpi != mtop->mols.index[mtop->mols.nr] - mtop->mols.index[mtop->mols.nr-1])
2231 gmx_fatal(FARGS, "The molecule to insert can not consist of multiple charge groups.\nMake it a single charge group.");
2239 /* Copy AdResS parameters */
2242 fr->adress_type = ir->adress->type;
2243 fr->adress_const_wf = ir->adress->const_wf;
2244 fr->adress_ex_width = ir->adress->ex_width;
2245 fr->adress_hy_width = ir->adress->hy_width;
2246 fr->adress_icor = ir->adress->icor;
2247 fr->adress_site = ir->adress->site;
2248 fr->adress_ex_forcecap = ir->adress->ex_forcecap;
2249 fr->adress_do_hybridpairs = ir->adress->do_hybridpairs;
2252 snew(fr->adress_group_explicit, ir->adress->n_energy_grps);
2253 for (i = 0; i < ir->adress->n_energy_grps; i++)
2255 fr->adress_group_explicit[i] = ir->adress->group_explicit[i];
2258 fr->n_adress_tf_grps = ir->adress->n_tf_grps;
2259 snew(fr->adress_tf_table_index, fr->n_adress_tf_grps);
2260 for (i = 0; i < fr->n_adress_tf_grps; i++)
2262 fr->adress_tf_table_index[i] = ir->adress->tf_table_index[i];
2264 copy_rvec(ir->adress->refs, fr->adress_refs);
2268 fr->adress_type = eAdressOff;
2269 fr->adress_do_hybridpairs = FALSE;
2272 /* Copy the user determined parameters */
2273 fr->userint1 = ir->userint1;
2274 fr->userint2 = ir->userint2;
2275 fr->userint3 = ir->userint3;
2276 fr->userint4 = ir->userint4;
2277 fr->userreal1 = ir->userreal1;
2278 fr->userreal2 = ir->userreal2;
2279 fr->userreal3 = ir->userreal3;
2280 fr->userreal4 = ir->userreal4;
2283 fr->fc_stepsize = ir->fc_stepsize;
2286 fr->efep = ir->efep;
2287 fr->sc_alphavdw = ir->fepvals->sc_alpha;
2288 if (ir->fepvals->bScCoul)
2290 fr->sc_alphacoul = ir->fepvals->sc_alpha;
2291 fr->sc_sigma6_min = pow(ir->fepvals->sc_sigma_min, 6);
2295 fr->sc_alphacoul = 0;
2296 fr->sc_sigma6_min = 0; /* only needed when bScCoul is on */
2298 fr->sc_power = ir->fepvals->sc_power;
2299 fr->sc_r_power = ir->fepvals->sc_r_power;
2300 fr->sc_sigma6_def = pow(ir->fepvals->sc_sigma, 6);
2302 env = getenv("GMX_SCSIGMA_MIN");
2306 sscanf(env, "%lf", &dbl);
2307 fr->sc_sigma6_min = pow(dbl, 6);
2310 fprintf(fp, "Setting the minimum soft core sigma to %g nm\n", dbl);
2314 fr->bNonbonded = TRUE;
2315 if (getenv("GMX_NO_NONBONDED") != NULL)
2317 /* turn off non-bonded calculations */
2318 fr->bNonbonded = FALSE;
2319 md_print_warn(cr, fp,
2320 "Found environment variable GMX_NO_NONBONDED.\n"
2321 "Disabling nonbonded calculations.\n");
2324 bGenericKernelOnly = FALSE;
2326 /* We now check in the NS code whether a particular combination of interactions
2327 * can be used with water optimization, and disable it if that is not the case.
2330 if (getenv("GMX_NB_GENERIC") != NULL)
2335 "Found environment variable GMX_NB_GENERIC.\n"
2336 "Disabling all interaction-specific nonbonded kernels, will only\n"
2337 "use the slow generic ones in src/gmxlib/nonbonded/nb_generic.c\n\n");
2339 bGenericKernelOnly = TRUE;
2342 if (bGenericKernelOnly == TRUE)
2347 if ( (getenv("GMX_DISABLE_SIMD_KERNELS") != NULL) || (getenv("GMX_NOOPTIMIZEDKERNELS") != NULL) )
2349 fr->use_simd_kernels = FALSE;
2353 "\nFound environment variable GMX_DISABLE_SIMD_KERNELS.\n"
2354 "Disabling the usage of any SIMD-specific kernel routines (e.g. SSE2/SSE4.1/AVX).\n\n");
2358 fr->bBHAM = (mtop->ffparams.functype[0] == F_BHAM);
2360 /* Check if we can/should do all-vs-all kernels */
2361 fr->bAllvsAll = can_use_allvsall(ir, FALSE, NULL, NULL);
2362 fr->AllvsAll_work = NULL;
2363 fr->AllvsAll_workgb = NULL;
2365 /* All-vs-all kernels have not been implemented in 4.6, and
2366 * the SIMD group kernels are also buggy in this case. Non-SIMD
2367 * group kernels are OK. See Redmine #1249. */
2370 fr->bAllvsAll = FALSE;
2371 fr->use_simd_kernels = FALSE;
2375 "\nYour simulation settings would have triggered the efficient all-vs-all\n"
2376 "kernels in GROMACS 4.5, but these have not been implemented in GROMACS\n"
2377 "4.6. Also, we can't use the accelerated SIMD kernels here because\n"
2378 "of an unfixed bug. The reference C kernels are correct, though, so\n"
2379 "we are proceeding by disabling all CPU architecture-specific\n"
2380 "(e.g. SSE2/SSE4/AVX) routines. If performance is important, please\n"
2381 "use GROMACS 4.5.7 or try cutoff-scheme = Verlet.\n\n");
2385 /* Neighbour searching stuff */
2386 fr->cutoff_scheme = ir->cutoff_scheme;
2387 fr->bGrid = (ir->ns_type == ensGRID);
2388 fr->ePBC = ir->ePBC;
2390 /* Determine if we will do PBC for distances in bonded interactions */
2391 if (fr->ePBC == epbcNONE)
2393 fr->bMolPBC = FALSE;
2397 if (!DOMAINDECOMP(cr))
2399 /* The group cut-off scheme and SHAKE assume charge groups
2400 * are whole, but not using molpbc is faster in most cases.
2402 if (fr->cutoff_scheme == ecutsGROUP ||
2403 (ir->eConstrAlg == econtSHAKE &&
2404 (gmx_mtop_ftype_count(mtop, F_CONSTR) > 0 ||
2405 gmx_mtop_ftype_count(mtop, F_CONSTRNC) > 0)))
2407 fr->bMolPBC = ir->bPeriodicMols;
2412 if (getenv("GMX_USE_GRAPH") != NULL)
2414 fr->bMolPBC = FALSE;
2417 fprintf(fp, "\nGMX_MOLPBC is set, using the graph for bonded interactions\n\n");
2424 fr->bMolPBC = dd_bonded_molpbc(cr->dd, fr->ePBC);
2427 fr->bGB = (ir->implicit_solvent == eisGBSA);
2429 fr->rc_scaling = ir->refcoord_scaling;
2430 copy_rvec(ir->posres_com, fr->posres_com);
2431 copy_rvec(ir->posres_comB, fr->posres_comB);
2432 fr->rlist = cutoff_inf(ir->rlist);
2433 fr->rlistlong = cutoff_inf(ir->rlistlong);
2434 fr->eeltype = ir->coulombtype;
2435 fr->vdwtype = ir->vdwtype;
2436 fr->ljpme_combination_rule = ir->ljpme_combination_rule;
2438 fr->coulomb_modifier = ir->coulomb_modifier;
2439 fr->vdw_modifier = ir->vdw_modifier;
2441 /* Electrostatics: Translate from interaction-setting-in-mdp-file to kernel interaction format */
2442 switch (fr->eeltype)
2445 fr->nbkernel_elec_interaction = (fr->bGB) ? GMX_NBKERNEL_ELEC_GENERALIZEDBORN : GMX_NBKERNEL_ELEC_COULOMB;
2451 fr->nbkernel_elec_interaction = GMX_NBKERNEL_ELEC_REACTIONFIELD;
2455 fr->nbkernel_elec_interaction = GMX_NBKERNEL_ELEC_REACTIONFIELD;
2456 fr->coulomb_modifier = eintmodEXACTCUTOFF;
2465 case eelPMEUSERSWITCH:
2466 fr->nbkernel_elec_interaction = GMX_NBKERNEL_ELEC_CUBICSPLINETABLE;
2471 fr->nbkernel_elec_interaction = GMX_NBKERNEL_ELEC_EWALD;
2475 gmx_fatal(FARGS, "Unsupported electrostatic interaction: %s", eel_names[fr->eeltype]);
2479 /* Vdw: Translate from mdp settings to kernel format */
2480 switch (fr->vdwtype)
2486 fr->nbkernel_vdw_interaction = GMX_NBKERNEL_VDW_BUCKINGHAM;
2490 fr->nbkernel_vdw_interaction = GMX_NBKERNEL_VDW_LENNARDJONES;
2497 case evdwENCADSHIFT:
2498 fr->nbkernel_vdw_interaction = GMX_NBKERNEL_VDW_CUBICSPLINETABLE;
2502 gmx_fatal(FARGS, "Unsupported vdw interaction: %s", evdw_names[fr->vdwtype]);
2506 /* These start out identical to ir, but might be altered if we e.g. tabulate the interaction in the kernel */
2507 fr->nbkernel_elec_modifier = fr->coulomb_modifier;
2508 fr->nbkernel_vdw_modifier = fr->vdw_modifier;
2510 fr->bTwinRange = fr->rlistlong > fr->rlist;
2511 fr->bEwald = (EEL_PME(fr->eeltype) || fr->eeltype == eelEWALD);
2513 fr->reppow = mtop->ffparams.reppow;
2515 if (ir->cutoff_scheme == ecutsGROUP)
2517 fr->bvdwtab = (fr->vdwtype != evdwCUT ||
2518 !gmx_within_tol(fr->reppow, 12.0, 10*GMX_DOUBLE_EPS));
2519 /* We have special kernels for standard Ewald and PME, but the pme-switch ones are tabulated above */
2520 fr->bcoultab = !(fr->eeltype == eelCUT ||
2521 fr->eeltype == eelEWALD ||
2522 fr->eeltype == eelPME ||
2523 fr->eeltype == eelRF ||
2524 fr->eeltype == eelRF_ZERO);
2526 /* If the user absolutely wants different switch/shift settings for coul/vdw, it is likely
2527 * going to be faster to tabulate the interaction than calling the generic kernel.
2529 if (fr->nbkernel_elec_modifier == eintmodPOTSWITCH && fr->nbkernel_vdw_modifier == eintmodPOTSWITCH)
2531 if ((fr->rcoulomb_switch != fr->rvdw_switch) || (fr->rcoulomb != fr->rvdw))
2533 fr->bcoultab = TRUE;
2536 else if ((fr->nbkernel_elec_modifier == eintmodPOTSHIFT && fr->nbkernel_vdw_modifier == eintmodPOTSHIFT) ||
2537 ((fr->nbkernel_elec_interaction == GMX_NBKERNEL_ELEC_REACTIONFIELD &&
2538 fr->nbkernel_elec_modifier == eintmodEXACTCUTOFF &&
2539 (fr->nbkernel_vdw_modifier == eintmodPOTSWITCH || fr->nbkernel_vdw_modifier == eintmodPOTSHIFT))))
2541 if (fr->rcoulomb != fr->rvdw)
2543 fr->bcoultab = TRUE;
2547 if (getenv("GMX_REQUIRE_TABLES"))
2550 fr->bcoultab = TRUE;
2555 fprintf(fp, "Table routines are used for coulomb: %s\n", bool_names[fr->bcoultab]);
2556 fprintf(fp, "Table routines are used for vdw: %s\n", bool_names[fr->bvdwtab ]);
2559 if (fr->bvdwtab == TRUE)
2561 fr->nbkernel_vdw_interaction = GMX_NBKERNEL_VDW_CUBICSPLINETABLE;
2562 fr->nbkernel_vdw_modifier = eintmodNONE;
2564 if (fr->bcoultab == TRUE)
2566 fr->nbkernel_elec_interaction = GMX_NBKERNEL_ELEC_CUBICSPLINETABLE;
2567 fr->nbkernel_elec_modifier = eintmodNONE;
2571 if (ir->cutoff_scheme == ecutsVERLET)
2573 if (!gmx_within_tol(fr->reppow, 12.0, 10*GMX_DOUBLE_EPS))
2575 gmx_fatal(FARGS, "Cut-off scheme %S only supports LJ repulsion power 12", ecutscheme_names[ir->cutoff_scheme]);
2577 fr->bvdwtab = FALSE;
2578 fr->bcoultab = FALSE;
2581 /* Tables are used for direct ewald sum */
2584 if (EEL_PME(ir->coulombtype))
2588 fprintf(fp, "Will do PME sum in reciprocal space for electrostatic interactions.\n");
2590 if (ir->coulombtype == eelP3M_AD)
2592 please_cite(fp, "Hockney1988");
2593 please_cite(fp, "Ballenegger2012");
2597 please_cite(fp, "Essmann95a");
2600 if (ir->ewald_geometry == eewg3DC)
2604 fprintf(fp, "Using the Ewald3DC correction for systems with a slab geometry.\n");
2606 please_cite(fp, "In-Chul99a");
2609 fr->ewaldcoeff_q = calc_ewaldcoeff_q(ir->rcoulomb, ir->ewald_rtol);
2610 init_ewald_tab(&(fr->ewald_table), ir, fp);
2613 fprintf(fp, "Using a Gaussian width (1/beta) of %g nm for Ewald\n",
2614 1/fr->ewaldcoeff_q);
2618 if (EVDW_PME(ir->vdwtype))
2622 fprintf(fp, "Will do PME sum in reciprocal space for LJ dispersion interactions.\n");
2624 please_cite(fp, "Essman95a");
2625 fr->ewaldcoeff_lj = calc_ewaldcoeff_lj(ir->rvdw, ir->ewald_rtol_lj);
2628 fprintf(fp, "Using a Gaussian width (1/beta) of %g nm for LJ Ewald\n",
2629 1/fr->ewaldcoeff_lj);
2633 /* Electrostatics */
2634 fr->epsilon_r = ir->epsilon_r;
2635 fr->epsilon_rf = ir->epsilon_rf;
2636 fr->fudgeQQ = mtop->ffparams.fudgeQQ;
2637 fr->rcoulomb_switch = ir->rcoulomb_switch;
2638 fr->rcoulomb = cutoff_inf(ir->rcoulomb);
2640 /* Parameters for generalized RF */
2644 if (fr->eeltype == eelGRF)
2646 init_generalized_rf(fp, mtop, ir, fr);
2649 fr->bF_NoVirSum = (EEL_FULL(fr->eeltype) || EVDW_PME(fr->vdwtype) ||
2650 gmx_mtop_ftype_count(mtop, F_POSRES) > 0 ||
2651 gmx_mtop_ftype_count(mtop, F_FBPOSRES) > 0 ||
2652 IR_ELEC_FIELD(*ir) ||
2653 (fr->adress_icor != eAdressICOff)
2656 if (fr->cutoff_scheme == ecutsGROUP &&
2657 ncg_mtop(mtop) > fr->cg_nalloc && !DOMAINDECOMP(cr))
2659 /* Count the total number of charge groups */
2660 fr->cg_nalloc = ncg_mtop(mtop);
2661 srenew(fr->cg_cm, fr->cg_nalloc);
2663 if (fr->shift_vec == NULL)
2665 snew(fr->shift_vec, SHIFTS);
2668 if (fr->fshift == NULL)
2670 snew(fr->fshift, SHIFTS);
2673 if (fr->nbfp == NULL)
2675 fr->ntype = mtop->ffparams.atnr;
2676 fr->nbfp = mk_nbfp(&mtop->ffparams, fr->bBHAM);
2679 /* Copy the energy group exclusions */
2680 fr->egp_flags = ir->opts.egp_flags;
2682 /* Van der Waals stuff */
2683 fr->rvdw = cutoff_inf(ir->rvdw);
2684 fr->rvdw_switch = ir->rvdw_switch;
2685 if ((fr->vdwtype != evdwCUT) && (fr->vdwtype != evdwUSER) && !fr->bBHAM)
2687 if (fr->rvdw_switch >= fr->rvdw)
2689 gmx_fatal(FARGS, "rvdw_switch (%f) must be < rvdw (%f)",
2690 fr->rvdw_switch, fr->rvdw);
2694 fprintf(fp, "Using %s Lennard-Jones, switch between %g and %g nm\n",
2695 (fr->eeltype == eelSWITCH) ? "switched" : "shifted",
2696 fr->rvdw_switch, fr->rvdw);
2700 if (fr->bBHAM && EVDW_PME(fr->vdwtype))
2702 gmx_fatal(FARGS, "LJ PME not supported with Buckingham");
2705 if (fr->bBHAM && (fr->vdwtype == evdwSHIFT || fr->vdwtype == evdwSWITCH))
2707 gmx_fatal(FARGS, "Switch/shift interaction not supported with Buckingham");
2712 fprintf(fp, "Cut-off's: NS: %g Coulomb: %g %s: %g\n",
2713 fr->rlist, fr->rcoulomb, fr->bBHAM ? "BHAM" : "LJ", fr->rvdw);
2716 fr->eDispCorr = ir->eDispCorr;
2717 if (ir->eDispCorr != edispcNO)
2719 set_avcsixtwelve(fp, fr, mtop);
2724 set_bham_b_max(fp, fr, mtop);
2727 fr->gb_epsilon_solvent = ir->gb_epsilon_solvent;
2729 /* Copy the GBSA data (radius, volume and surftens for each
2730 * atomtype) from the topology atomtype section to forcerec.
2732 snew(fr->atype_radius, fr->ntype);
2733 snew(fr->atype_vol, fr->ntype);
2734 snew(fr->atype_surftens, fr->ntype);
2735 snew(fr->atype_gb_radius, fr->ntype);
2736 snew(fr->atype_S_hct, fr->ntype);
2738 if (mtop->atomtypes.nr > 0)
2740 for (i = 0; i < fr->ntype; i++)
2742 fr->atype_radius[i] = mtop->atomtypes.radius[i];
2744 for (i = 0; i < fr->ntype; i++)
2746 fr->atype_vol[i] = mtop->atomtypes.vol[i];
2748 for (i = 0; i < fr->ntype; i++)
2750 fr->atype_surftens[i] = mtop->atomtypes.surftens[i];
2752 for (i = 0; i < fr->ntype; i++)
2754 fr->atype_gb_radius[i] = mtop->atomtypes.gb_radius[i];
2756 for (i = 0; i < fr->ntype; i++)
2758 fr->atype_S_hct[i] = mtop->atomtypes.S_hct[i];
2762 /* Generate the GB table if needed */
2766 fr->gbtabscale = 2000;
2768 fr->gbtabscale = 500;
2772 fr->gbtab = make_gb_table(oenv, fr);
2774 init_gb(&fr->born, fr, ir, mtop, ir->gb_algorithm);
2776 /* Copy local gb data (for dd, this is done in dd_partition_system) */
2777 if (!DOMAINDECOMP(cr))
2779 make_local_gb(cr, fr->born, ir->gb_algorithm);
2783 /* Set the charge scaling */
2784 if (fr->epsilon_r != 0)
2786 fr->epsfac = ONE_4PI_EPS0/fr->epsilon_r;
2790 /* eps = 0 is infinite dieletric: no coulomb interactions */
2794 /* Reaction field constants */
2795 if (EEL_RF(fr->eeltype))
2797 calc_rffac(fp, fr->eeltype, fr->epsilon_r, fr->epsilon_rf,
2798 fr->rcoulomb, fr->temp, fr->zsquare, box,
2799 &fr->kappa, &fr->k_rf, &fr->c_rf);
2802 /*This now calculates sum for q and c6*/
2803 set_chargesum(fp, fr, mtop);
2805 /* if we are using LR electrostatics, and they are tabulated,
2806 * the tables will contain modified coulomb interactions.
2807 * Since we want to use the non-shifted ones for 1-4
2808 * coulombic interactions, we must have an extra set of tables.
2811 /* Construct tables.
2812 * A little unnecessary to make both vdw and coul tables sometimes,
2813 * but what the heck... */
2815 bMakeTables = fr->bcoultab || fr->bvdwtab || fr->bEwald ||
2816 (ir->eDispCorr != edispcNO && ir_vdw_switched(ir));
2818 bMakeSeparate14Table = ((!bMakeTables || fr->eeltype != eelCUT || fr->vdwtype != evdwCUT ||
2819 fr->bBHAM || fr->bEwald) &&
2820 (gmx_mtop_ftype_count(mtop, F_LJ14) > 0 ||
2821 gmx_mtop_ftype_count(mtop, F_LJC14_Q) > 0 ||
2822 gmx_mtop_ftype_count(mtop, F_LJC_PAIRS_NB) > 0));
2824 negp_pp = ir->opts.ngener - ir->nwall;
2828 bSomeNormalNbListsAreInUse = TRUE;
2833 bSomeNormalNbListsAreInUse = (ir->eDispCorr != edispcNO);
2834 for (egi = 0; egi < negp_pp; egi++)
2836 for (egj = egi; egj < negp_pp; egj++)
2838 egp_flags = ir->opts.egp_flags[GID(egi, egj, ir->opts.ngener)];
2839 if (!(egp_flags & EGP_EXCL))
2841 if (egp_flags & EGP_TABLE)
2847 bSomeNormalNbListsAreInUse = TRUE;
2852 if (bSomeNormalNbListsAreInUse)
2854 fr->nnblists = negptable + 1;
2858 fr->nnblists = negptable;
2860 if (fr->nnblists > 1)
2862 snew(fr->gid2nblists, ir->opts.ngener*ir->opts.ngener);
2871 snew(fr->nblists, fr->nnblists);
2873 /* This code automatically gives table length tabext without cut-off's,
2874 * in that case grompp should already have checked that we do not need
2875 * normal tables and we only generate tables for 1-4 interactions.
2877 rtab = ir->rlistlong + ir->tabext;
2881 /* make tables for ordinary interactions */
2882 if (bSomeNormalNbListsAreInUse)
2884 make_nbf_tables(fp, oenv, fr, rtab, cr, tabfn, NULL, NULL, &fr->nblists[0]);
2887 make_nbf_tables(fp, oenv, fr, rtab, cr, tabfn, NULL, NULL, &fr->nblists[fr->nnblists/2]);
2889 if (!bMakeSeparate14Table)
2891 fr->tab14 = fr->nblists[0].table_elec_vdw;
2901 /* Read the special tables for certain energy group pairs */
2902 nm_ind = mtop->groups.grps[egcENER].nm_ind;
2903 for (egi = 0; egi < negp_pp; egi++)
2905 for (egj = egi; egj < negp_pp; egj++)
2907 egp_flags = ir->opts.egp_flags[GID(egi, egj, ir->opts.ngener)];
2908 if ((egp_flags & EGP_TABLE) && !(egp_flags & EGP_EXCL))
2910 nbl = &(fr->nblists[m]);
2911 if (fr->nnblists > 1)
2913 fr->gid2nblists[GID(egi, egj, ir->opts.ngener)] = m;
2915 /* Read the table file with the two energy groups names appended */
2916 make_nbf_tables(fp, oenv, fr, rtab, cr, tabfn,
2917 *mtop->groups.grpname[nm_ind[egi]],
2918 *mtop->groups.grpname[nm_ind[egj]],
2922 make_nbf_tables(fp, oenv, fr, rtab, cr, tabfn,
2923 *mtop->groups.grpname[nm_ind[egi]],
2924 *mtop->groups.grpname[nm_ind[egj]],
2925 &fr->nblists[fr->nnblists/2+m]);
2929 else if (fr->nnblists > 1)
2931 fr->gid2nblists[GID(egi, egj, ir->opts.ngener)] = 0;
2937 if (bMakeSeparate14Table)
2939 /* generate extra tables with plain Coulomb for 1-4 interactions only */
2940 fr->tab14 = make_tables(fp, oenv, fr, MASTER(cr), tabpfn, rtab,
2941 GMX_MAKETABLES_14ONLY);
2944 /* Read AdResS Thermo Force table if needed */
2945 if (fr->adress_icor == eAdressICThermoForce)
2947 /* old todo replace */
2949 if (ir->adress->n_tf_grps > 0)
2951 make_adress_tf_tables(fp, oenv, fr, ir, tabfn, mtop, box);
2956 /* load the default table */
2957 snew(fr->atf_tabs, 1);
2958 fr->atf_tabs[DEFAULT_TF_TABLE] = make_atf_table(fp, oenv, fr, tabafn, box);
2963 fr->nwall = ir->nwall;
2964 if (ir->nwall && ir->wall_type == ewtTABLE)
2966 make_wall_tables(fp, oenv, ir, tabfn, &mtop->groups, fr);
2971 fcd->bondtab = make_bonded_tables(fp,
2972 F_TABBONDS, F_TABBONDSNC,
2974 fcd->angletab = make_bonded_tables(fp,
2977 fcd->dihtab = make_bonded_tables(fp,
2985 fprintf(debug, "No fcdata or table file name passed, can not read table, can not do bonded interactions\n");
2989 /* QM/MM initialization if requested
2993 fprintf(stderr, "QM/MM calculation requested.\n");
2996 fr->bQMMM = ir->bQMMM;
2997 fr->qr = mk_QMMMrec();
2999 /* Set all the static charge group info */
3000 fr->cginfo_mb = init_cginfo_mb(fp, mtop, fr, bNoSolvOpt,
3001 &fr->bExcl_IntraCGAll_InterCGNone);
3002 if (DOMAINDECOMP(cr))
3008 fr->cginfo = cginfo_expand(mtop->nmolblock, fr->cginfo_mb);
3011 if (!DOMAINDECOMP(cr))
3013 forcerec_set_ranges(fr, ncg_mtop(mtop), ncg_mtop(mtop),
3014 mtop->natoms, mtop->natoms, mtop->natoms);
3017 fr->print_force = print_force;
3020 /* coarse load balancing vars */
3025 /* Initialize neighbor search */
3026 init_ns(fp, cr, &fr->ns, fr, mtop);
3028 if (cr->duty & DUTY_PP)
3030 gmx_nonbonded_setup(fr, bGenericKernelOnly);
3034 gmx_setup_adress_kernels(fp,bGenericKernelOnly);
3039 /* Initialize the thread working data for bonded interactions */
3040 init_forcerec_f_threads(fr, mtop->groups.grps[egcENER].nr);
3042 snew(fr->excl_load, fr->nthreads+1);
3044 if (fr->cutoff_scheme == ecutsVERLET)
3046 if (ir->rcoulomb != ir->rvdw)
3048 gmx_fatal(FARGS, "With Verlet lists rcoulomb and rvdw should be identical");
3051 init_nb_verlet(fp, &fr->nbv, ir, fr, cr, nbpu_opt);
3054 /* fr->ic is used both by verlet and group kernels (to some extent) now */
3055 init_interaction_const(fp, cr, &fr->ic, fr, rtab);
3057 if (ir->eDispCorr != edispcNO)
3059 calc_enervirdiff(fp, ir->eDispCorr, fr);
3063 #define pr_real(fp, r) fprintf(fp, "%s: %e\n",#r, r)
3064 #define pr_int(fp, i) fprintf((fp), "%s: %d\n",#i, i)
3065 #define pr_bool(fp, b) fprintf((fp), "%s: %s\n",#b, bool_names[b])
3067 void pr_forcerec(FILE *fp, t_forcerec *fr)
3071 pr_real(fp, fr->rlist);
3072 pr_real(fp, fr->rcoulomb);
3073 pr_real(fp, fr->fudgeQQ);
3074 pr_bool(fp, fr->bGrid);
3075 pr_bool(fp, fr->bTwinRange);
3076 /*pr_int(fp,fr->cg0);
3077 pr_int(fp,fr->hcg);*/
3078 for (i = 0; i < fr->nnblists; i++)
3080 pr_int(fp, fr->nblists[i].table_elec_vdw.n);
3082 pr_real(fp, fr->rcoulomb_switch);
3083 pr_real(fp, fr->rcoulomb);
3088 void forcerec_set_excl_load(t_forcerec *fr,
3089 const gmx_localtop_t *top)
3092 int t, i, j, ntot, n, ntarget;
3094 ind = top->excls.index;
3098 for (i = 0; i < top->excls.nr; i++)
3100 for (j = ind[i]; j < ind[i+1]; j++)
3109 fr->excl_load[0] = 0;
3112 for (t = 1; t <= fr->nthreads; t++)
3114 ntarget = (ntot*t)/fr->nthreads;
3115 while (i < top->excls.nr && n < ntarget)
3117 for (j = ind[i]; j < ind[i+1]; j++)
3126 fr->excl_load[t] = i;