2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5 * Copyright (c) 2001-2004, The GROMACS development team.
6 * Copyright (c) 2013,2014,2015, by the GROMACS development team, led by
7 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
8 * and including many others, as listed in the AUTHORS file in the
9 * top-level source directory and at http://www.gromacs.org.
11 * GROMACS is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public License
13 * as published by the Free Software Foundation; either version 2.1
14 * of the License, or (at your option) any later version.
16 * GROMACS is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Lesser General Public License for more details.
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with GROMACS; if not, see
23 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
24 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
26 * If you want to redistribute modifications to GROMACS, please
27 * consider that scientific software is very special. Version
28 * control is crucial - bugs must be traceable. We will be happy to
29 * consider code for inclusion in the official distribution, but
30 * derived work must not be called official GROMACS. Details are found
31 * in the README & COPYING files - if they are missing, get the
32 * official version at http://www.gromacs.org.
34 * To help us fund GROMACS development, we humbly ask that you cite
35 * the research papers on the package. Check out http://www.gromacs.org.
45 #include "gromacs/domdec/domdec.h"
46 #include "gromacs/ewald/ewald.h"
47 #include "gromacs/gmxlib/cuda_tools/pmalloc_cuda.h"
48 #include "gromacs/gmxlib/gpu_utils/gpu_utils.h"
49 #include "gromacs/legacyheaders/copyrite.h"
50 #include "gromacs/legacyheaders/force.h"
51 #include "gromacs/legacyheaders/gmx_detect_hardware.h"
52 #include "gromacs/legacyheaders/gmx_omp_nthreads.h"
53 #include "gromacs/legacyheaders/inputrec.h"
54 #include "gromacs/legacyheaders/macros.h"
55 #include "gromacs/legacyheaders/md_logging.h"
56 #include "gromacs/legacyheaders/md_support.h"
57 #include "gromacs/legacyheaders/names.h"
58 #include "gromacs/legacyheaders/network.h"
59 #include "gromacs/legacyheaders/nonbonded.h"
60 #include "gromacs/legacyheaders/ns.h"
61 #include "gromacs/legacyheaders/qmmm.h"
62 #include "gromacs/legacyheaders/tables.h"
63 #include "gromacs/legacyheaders/txtdump.h"
64 #include "gromacs/legacyheaders/typedefs.h"
65 #include "gromacs/legacyheaders/types/commrec.h"
66 #include "gromacs/legacyheaders/types/nbnxn_cuda_types_ext.h"
67 #include "gromacs/listed-forces/manage-threading.h"
68 #include "gromacs/math/calculate-ewald-splitting-coefficient.h"
69 #include "gromacs/math/units.h"
70 #include "gromacs/math/utilities.h"
71 #include "gromacs/math/vec.h"
72 #include "gromacs/mdlib/forcerec-threading.h"
73 #include "gromacs/mdlib/nb_verlet.h"
74 #include "gromacs/mdlib/nbnxn_atomdata.h"
75 #include "gromacs/mdlib/nbnxn_consts.h"
76 #include "gromacs/mdlib/nbnxn_search.h"
77 #include "gromacs/mdlib/nbnxn_simd.h"
78 #include "gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_data_mgmt.h"
79 #include "gromacs/pbcutil/ishift.h"
80 #include "gromacs/pbcutil/pbc.h"
81 #include "gromacs/simd/simd.h"
82 #include "gromacs/topology/mtop_util.h"
83 #include "gromacs/utility/fatalerror.h"
84 #include "gromacs/utility/smalloc.h"
86 t_forcerec *mk_forcerec(void)
96 static void pr_nbfp(FILE *fp, real *nbfp, gmx_bool bBHAM, int atnr)
100 for (i = 0; (i < atnr); i++)
102 for (j = 0; (j < atnr); j++)
104 fprintf(fp, "%2d - %2d", i, j);
107 fprintf(fp, " a=%10g, b=%10g, c=%10g\n", BHAMA(nbfp, atnr, i, j),
108 BHAMB(nbfp, atnr, i, j), BHAMC(nbfp, atnr, i, j)/6.0);
112 fprintf(fp, " c6=%10g, c12=%10g\n", C6(nbfp, atnr, i, j)/6.0,
113 C12(nbfp, atnr, i, j)/12.0);
120 static real *mk_nbfp(const gmx_ffparams_t *idef, gmx_bool bBHAM)
128 snew(nbfp, 3*atnr*atnr);
129 for (i = k = 0; (i < atnr); i++)
131 for (j = 0; (j < atnr); j++, k++)
133 BHAMA(nbfp, atnr, i, j) = idef->iparams[k].bham.a;
134 BHAMB(nbfp, atnr, i, j) = idef->iparams[k].bham.b;
135 /* nbfp now includes the 6.0 derivative prefactor */
136 BHAMC(nbfp, atnr, i, j) = idef->iparams[k].bham.c*6.0;
142 snew(nbfp, 2*atnr*atnr);
143 for (i = k = 0; (i < atnr); i++)
145 for (j = 0; (j < atnr); j++, k++)
147 /* nbfp now includes the 6.0/12.0 derivative prefactors */
148 C6(nbfp, atnr, i, j) = idef->iparams[k].lj.c6*6.0;
149 C12(nbfp, atnr, i, j) = idef->iparams[k].lj.c12*12.0;
157 static real *make_ljpme_c6grid(const gmx_ffparams_t *idef, t_forcerec *fr)
160 real c6, c6i, c6j, c12i, c12j, epsi, epsj, sigmai, sigmaj;
162 const real oneOverSix = 1.0 / 6.0;
164 /* For LJ-PME simulations, we correct the energies with the reciprocal space
165 * inside of the cut-off. To do this the non-bonded kernels needs to have
166 * access to the C6-values used on the reciprocal grid in pme.c
170 snew(grid, 2*atnr*atnr);
171 for (i = k = 0; (i < atnr); i++)
173 for (j = 0; (j < atnr); j++, k++)
175 c6i = idef->iparams[i*(atnr+1)].lj.c6;
176 c12i = idef->iparams[i*(atnr+1)].lj.c12;
177 c6j = idef->iparams[j*(atnr+1)].lj.c6;
178 c12j = idef->iparams[j*(atnr+1)].lj.c12;
179 c6 = sqrt(c6i * c6j);
180 if (fr->ljpme_combination_rule == eljpmeLB
181 && !gmx_numzero(c6) && !gmx_numzero(c12i) && !gmx_numzero(c12j))
183 sigmai = pow(c12i / c6i, oneOverSix);
184 sigmaj = pow(c12j / c6j, oneOverSix);
185 epsi = c6i * c6i / c12i;
186 epsj = c6j * c6j / c12j;
187 c6 = sqrt(epsi * epsj) * pow(0.5*(sigmai+sigmaj), 6);
189 /* Store the elements at the same relative positions as C6 in nbfp in order
190 * to simplify access in the kernels
192 grid[2*(atnr*i+j)] = c6*6.0;
198 static real *mk_nbfp_combination_rule(const gmx_ffparams_t *idef, int comb_rule)
202 real c6i, c6j, c12i, c12j, epsi, epsj, sigmai, sigmaj;
204 const real oneOverSix = 1.0 / 6.0;
207 snew(nbfp, 2*atnr*atnr);
208 for (i = 0; i < atnr; ++i)
210 for (j = 0; j < atnr; ++j)
212 c6i = idef->iparams[i*(atnr+1)].lj.c6;
213 c12i = idef->iparams[i*(atnr+1)].lj.c12;
214 c6j = idef->iparams[j*(atnr+1)].lj.c6;
215 c12j = idef->iparams[j*(atnr+1)].lj.c12;
216 c6 = sqrt(c6i * c6j);
217 c12 = sqrt(c12i * c12j);
218 if (comb_rule == eCOMB_ARITHMETIC
219 && !gmx_numzero(c6) && !gmx_numzero(c12))
221 sigmai = pow(c12i / c6i, oneOverSix);
222 sigmaj = pow(c12j / c6j, oneOverSix);
223 epsi = c6i * c6i / c12i;
224 epsj = c6j * c6j / c12j;
225 c6 = sqrt(epsi * epsj) * pow(0.5*(sigmai+sigmaj), 6);
226 c12 = sqrt(epsi * epsj) * pow(0.5*(sigmai+sigmaj), 12);
228 C6(nbfp, atnr, i, j) = c6*6.0;
229 C12(nbfp, atnr, i, j) = c12*12.0;
235 /* This routine sets fr->solvent_opt to the most common solvent in the
236 * system, e.g. esolSPC or esolTIP4P. It will also mark each charge group in
237 * the fr->solvent_type array with the correct type (or esolNO).
239 * Charge groups that fulfill the conditions but are not identical to the
240 * most common one will be marked as esolNO in the solvent_type array.
242 * TIP3p is identical to SPC for these purposes, so we call it
243 * SPC in the arrays (Apologies to Bill Jorgensen ;-)
245 * NOTE: QM particle should not
246 * become an optimized solvent. Not even if there is only one charge
256 } solvent_parameters_t;
259 check_solvent_cg(const gmx_moltype_t *molt,
262 const unsigned char *qm_grpnr,
263 const t_grps *qm_grps,
265 int *n_solvent_parameters,
266 solvent_parameters_t **solvent_parameters_p,
276 real tmp_charge[4] = { 0.0 }; /* init to zero to make gcc4.8 happy */
277 int tmp_vdwtype[4] = { 0 }; /* init to zero to make gcc4.8 happy */
280 solvent_parameters_t *solvent_parameters;
282 /* We use a list with parameters for each solvent type.
283 * Every time we discover a new molecule that fulfills the basic
284 * conditions for a solvent we compare with the previous entries
285 * in these lists. If the parameters are the same we just increment
286 * the counter for that type, and otherwise we create a new type
287 * based on the current molecule.
289 * Once we've finished going through all molecules we check which
290 * solvent is most common, and mark all those molecules while we
291 * clear the flag on all others.
294 solvent_parameters = *solvent_parameters_p;
296 /* Mark the cg first as non optimized */
299 /* Check if this cg has no exclusions with atoms in other charge groups
300 * and all atoms inside the charge group excluded.
301 * We only have 3 or 4 atom solvent loops.
303 if (GET_CGINFO_EXCL_INTER(cginfo) ||
304 !GET_CGINFO_EXCL_INTRA(cginfo))
309 /* Get the indices of the first atom in this charge group */
310 j0 = molt->cgs.index[cg0];
311 j1 = molt->cgs.index[cg0+1];
313 /* Number of atoms in our molecule */
319 "Moltype '%s': there are %d atoms in this charge group\n",
323 /* Check if it could be an SPC (3 atoms) or TIP4p (4) water,
326 if (nj < 3 || nj > 4)
331 /* Check if we are doing QM on this group */
333 if (qm_grpnr != NULL)
335 for (j = j0; j < j1 && !qm; j++)
337 qm = (qm_grpnr[j] < qm_grps->nr - 1);
340 /* Cannot use solvent optimization with QM */
346 atom = molt->atoms.atom;
348 /* Still looks like a solvent, time to check parameters */
350 /* If it is perturbed (free energy) we can't use the solvent loops,
351 * so then we just skip to the next molecule.
355 for (j = j0; j < j1 && !perturbed; j++)
357 perturbed = PERTURBED(atom[j]);
365 /* Now it's only a question if the VdW and charge parameters
366 * are OK. Before doing the check we compare and see if they are
367 * identical to a possible previous solvent type.
368 * First we assign the current types and charges.
370 for (j = 0; j < nj; j++)
372 tmp_vdwtype[j] = atom[j0+j].type;
373 tmp_charge[j] = atom[j0+j].q;
376 /* Does it match any previous solvent type? */
377 for (k = 0; k < *n_solvent_parameters; k++)
382 /* We can only match SPC with 3 atoms and TIP4p with 4 atoms */
383 if ( (solvent_parameters[k].model == esolSPC && nj != 3) ||
384 (solvent_parameters[k].model == esolTIP4P && nj != 4) )
389 /* Check that types & charges match for all atoms in molecule */
390 for (j = 0; j < nj && match == TRUE; j++)
392 if (tmp_vdwtype[j] != solvent_parameters[k].vdwtype[j])
396 if (tmp_charge[j] != solvent_parameters[k].charge[j])
403 /* Congratulations! We have a matched solvent.
404 * Flag it with this type for later processing.
407 solvent_parameters[k].count += nmol;
409 /* We are done with this charge group */
414 /* If we get here, we have a tentative new solvent type.
415 * Before we add it we must check that it fulfills the requirements
416 * of the solvent optimized loops. First determine which atoms have
419 for (j = 0; j < nj; j++)
422 tjA = tmp_vdwtype[j];
424 /* Go through all other tpes and see if any have non-zero
425 * VdW parameters when combined with this one.
427 for (k = 0; k < fr->ntype && (has_vdw[j] == FALSE); k++)
429 /* We already checked that the atoms weren't perturbed,
430 * so we only need to check state A now.
434 has_vdw[j] = (has_vdw[j] ||
435 (BHAMA(fr->nbfp, fr->ntype, tjA, k) != 0.0) ||
436 (BHAMB(fr->nbfp, fr->ntype, tjA, k) != 0.0) ||
437 (BHAMC(fr->nbfp, fr->ntype, tjA, k) != 0.0));
442 has_vdw[j] = (has_vdw[j] ||
443 (C6(fr->nbfp, fr->ntype, tjA, k) != 0.0) ||
444 (C12(fr->nbfp, fr->ntype, tjA, k) != 0.0));
449 /* Now we know all we need to make the final check and assignment. */
453 * For this we require thatn all atoms have charge,
454 * the charges on atom 2 & 3 should be the same, and only
455 * atom 1 might have VdW.
457 if (has_vdw[1] == FALSE &&
458 has_vdw[2] == FALSE &&
459 tmp_charge[0] != 0 &&
460 tmp_charge[1] != 0 &&
461 tmp_charge[2] == tmp_charge[1])
463 srenew(solvent_parameters, *n_solvent_parameters+1);
464 solvent_parameters[*n_solvent_parameters].model = esolSPC;
465 solvent_parameters[*n_solvent_parameters].count = nmol;
466 for (k = 0; k < 3; k++)
468 solvent_parameters[*n_solvent_parameters].vdwtype[k] = tmp_vdwtype[k];
469 solvent_parameters[*n_solvent_parameters].charge[k] = tmp_charge[k];
472 *cg_sp = *n_solvent_parameters;
473 (*n_solvent_parameters)++;
478 /* Or could it be a TIP4P?
479 * For this we require thatn atoms 2,3,4 have charge, but not atom 1.
480 * Only atom 1 mght have VdW.
482 if (has_vdw[1] == FALSE &&
483 has_vdw[2] == FALSE &&
484 has_vdw[3] == FALSE &&
485 tmp_charge[0] == 0 &&
486 tmp_charge[1] != 0 &&
487 tmp_charge[2] == tmp_charge[1] &&
490 srenew(solvent_parameters, *n_solvent_parameters+1);
491 solvent_parameters[*n_solvent_parameters].model = esolTIP4P;
492 solvent_parameters[*n_solvent_parameters].count = nmol;
493 for (k = 0; k < 4; k++)
495 solvent_parameters[*n_solvent_parameters].vdwtype[k] = tmp_vdwtype[k];
496 solvent_parameters[*n_solvent_parameters].charge[k] = tmp_charge[k];
499 *cg_sp = *n_solvent_parameters;
500 (*n_solvent_parameters)++;
504 *solvent_parameters_p = solvent_parameters;
508 check_solvent(FILE * fp,
509 const gmx_mtop_t * mtop,
511 cginfo_mb_t *cginfo_mb)
514 const gmx_moltype_t *molt;
515 int mb, mol, cg_mol, at_offset, am, cgm, i, nmol_ch, nmol;
516 int n_solvent_parameters;
517 solvent_parameters_t *solvent_parameters;
523 fprintf(debug, "Going to determine what solvent types we have.\n");
526 n_solvent_parameters = 0;
527 solvent_parameters = NULL;
528 /* Allocate temporary array for solvent type */
529 snew(cg_sp, mtop->nmolblock);
532 for (mb = 0; mb < mtop->nmolblock; mb++)
534 molt = &mtop->moltype[mtop->molblock[mb].type];
536 /* Here we have to loop over all individual molecules
537 * because we need to check for QMMM particles.
539 snew(cg_sp[mb], cginfo_mb[mb].cg_mod);
540 nmol_ch = cginfo_mb[mb].cg_mod/cgs->nr;
541 nmol = mtop->molblock[mb].nmol/nmol_ch;
542 for (mol = 0; mol < nmol_ch; mol++)
545 am = mol*cgs->index[cgs->nr];
546 for (cg_mol = 0; cg_mol < cgs->nr; cg_mol++)
548 check_solvent_cg(molt, cg_mol, nmol,
549 mtop->groups.grpnr[egcQMMM] ?
550 mtop->groups.grpnr[egcQMMM]+at_offset+am : 0,
551 &mtop->groups.grps[egcQMMM],
553 &n_solvent_parameters, &solvent_parameters,
554 cginfo_mb[mb].cginfo[cgm+cg_mol],
555 &cg_sp[mb][cgm+cg_mol]);
558 at_offset += cgs->index[cgs->nr];
561 /* Puh! We finished going through all charge groups.
562 * Now find the most common solvent model.
565 /* Most common solvent this far */
567 for (i = 0; i < n_solvent_parameters; i++)
570 solvent_parameters[i].count > solvent_parameters[bestsp].count)
578 bestsol = solvent_parameters[bestsp].model;
586 for (mb = 0; mb < mtop->nmolblock; mb++)
588 cgs = &mtop->moltype[mtop->molblock[mb].type].cgs;
589 nmol = (mtop->molblock[mb].nmol*cgs->nr)/cginfo_mb[mb].cg_mod;
590 for (i = 0; i < cginfo_mb[mb].cg_mod; i++)
592 if (cg_sp[mb][i] == bestsp)
594 SET_CGINFO_SOLOPT(cginfo_mb[mb].cginfo[i], bestsol);
599 SET_CGINFO_SOLOPT(cginfo_mb[mb].cginfo[i], esolNO);
606 if (bestsol != esolNO && fp != NULL)
608 fprintf(fp, "\nEnabling %s-like water optimization for %d molecules.\n\n",
610 solvent_parameters[bestsp].count);
613 sfree(solvent_parameters);
614 fr->solvent_opt = bestsol;
618 acNONE = 0, acCONSTRAINT, acSETTLE
621 static cginfo_mb_t *init_cginfo_mb(FILE *fplog, const gmx_mtop_t *mtop,
622 t_forcerec *fr, gmx_bool bNoSolvOpt,
623 gmx_bool *bFEP_NonBonded,
624 gmx_bool *bExcl_IntraCGAll_InterCGNone)
627 const t_blocka *excl;
628 const gmx_moltype_t *molt;
629 const gmx_molblock_t *molb;
630 cginfo_mb_t *cginfo_mb;
633 int cg_offset, a_offset;
634 int mb, m, cg, a0, a1, gid, ai, j, aj, excl_nalloc;
638 gmx_bool bId, *bExcl, bExclIntraAll, bExclInter, bHaveVDW, bHaveQ, bHavePerturbedAtoms;
640 snew(cginfo_mb, mtop->nmolblock);
642 snew(type_VDW, fr->ntype);
643 for (ai = 0; ai < fr->ntype; ai++)
645 type_VDW[ai] = FALSE;
646 for (j = 0; j < fr->ntype; j++)
648 type_VDW[ai] = type_VDW[ai] ||
650 C6(fr->nbfp, fr->ntype, ai, j) != 0 ||
651 C12(fr->nbfp, fr->ntype, ai, j) != 0;
655 *bFEP_NonBonded = FALSE;
656 *bExcl_IntraCGAll_InterCGNone = TRUE;
659 snew(bExcl, excl_nalloc);
662 for (mb = 0; mb < mtop->nmolblock; mb++)
664 molb = &mtop->molblock[mb];
665 molt = &mtop->moltype[molb->type];
669 /* Check if the cginfo is identical for all molecules in this block.
670 * If so, we only need an array of the size of one molecule.
671 * Otherwise we make an array of #mol times #cgs per molecule.
674 for (m = 0; m < molb->nmol; m++)
676 int am = m*cgs->index[cgs->nr];
677 for (cg = 0; cg < cgs->nr; cg++)
680 a1 = cgs->index[cg+1];
681 if (ggrpnr(&mtop->groups, egcENER, a_offset+am+a0) !=
682 ggrpnr(&mtop->groups, egcENER, a_offset +a0))
686 if (mtop->groups.grpnr[egcQMMM] != NULL)
688 for (ai = a0; ai < a1; ai++)
690 if (mtop->groups.grpnr[egcQMMM][a_offset+am+ai] !=
691 mtop->groups.grpnr[egcQMMM][a_offset +ai])
700 cginfo_mb[mb].cg_start = cg_offset;
701 cginfo_mb[mb].cg_end = cg_offset + molb->nmol*cgs->nr;
702 cginfo_mb[mb].cg_mod = (bId ? 1 : molb->nmol)*cgs->nr;
703 snew(cginfo_mb[mb].cginfo, cginfo_mb[mb].cg_mod);
704 cginfo = cginfo_mb[mb].cginfo;
706 /* Set constraints flags for constrained atoms */
707 snew(a_con, molt->atoms.nr);
708 for (ftype = 0; ftype < F_NRE; ftype++)
710 if (interaction_function[ftype].flags & IF_CONSTRAINT)
715 for (ia = 0; ia < molt->ilist[ftype].nr; ia += 1+nral)
719 for (a = 0; a < nral; a++)
721 a_con[molt->ilist[ftype].iatoms[ia+1+a]] =
722 (ftype == F_SETTLE ? acSETTLE : acCONSTRAINT);
728 for (m = 0; m < (bId ? 1 : molb->nmol); m++)
731 int am = m*cgs->index[cgs->nr];
732 for (cg = 0; cg < cgs->nr; cg++)
735 a1 = cgs->index[cg+1];
737 /* Store the energy group in cginfo */
738 gid = ggrpnr(&mtop->groups, egcENER, a_offset+am+a0);
739 SET_CGINFO_GID(cginfo[cgm+cg], gid);
741 /* Check the intra/inter charge group exclusions */
742 if (a1-a0 > excl_nalloc)
744 excl_nalloc = a1 - a0;
745 srenew(bExcl, excl_nalloc);
747 /* bExclIntraAll: all intra cg interactions excluded
748 * bExclInter: any inter cg interactions excluded
750 bExclIntraAll = TRUE;
754 bHavePerturbedAtoms = FALSE;
755 for (ai = a0; ai < a1; ai++)
757 /* Check VDW and electrostatic interactions */
758 bHaveVDW = bHaveVDW || (type_VDW[molt->atoms.atom[ai].type] ||
759 type_VDW[molt->atoms.atom[ai].typeB]);
760 bHaveQ = bHaveQ || (molt->atoms.atom[ai].q != 0 ||
761 molt->atoms.atom[ai].qB != 0);
763 bHavePerturbedAtoms = bHavePerturbedAtoms || (PERTURBED(molt->atoms.atom[ai]) != 0);
765 /* Clear the exclusion list for atom ai */
766 for (aj = a0; aj < a1; aj++)
768 bExcl[aj-a0] = FALSE;
770 /* Loop over all the exclusions of atom ai */
771 for (j = excl->index[ai]; j < excl->index[ai+1]; j++)
774 if (aj < a0 || aj >= a1)
783 /* Check if ai excludes a0 to a1 */
784 for (aj = a0; aj < a1; aj++)
788 bExclIntraAll = FALSE;
795 SET_CGINFO_CONSTR(cginfo[cgm+cg]);
798 SET_CGINFO_SETTLE(cginfo[cgm+cg]);
806 SET_CGINFO_EXCL_INTRA(cginfo[cgm+cg]);
810 SET_CGINFO_EXCL_INTER(cginfo[cgm+cg]);
812 if (a1 - a0 > MAX_CHARGEGROUP_SIZE)
814 /* The size in cginfo is currently only read with DD */
815 gmx_fatal(FARGS, "A charge group has size %d which is larger than the limit of %d atoms", a1-a0, MAX_CHARGEGROUP_SIZE);
819 SET_CGINFO_HAS_VDW(cginfo[cgm+cg]);
823 SET_CGINFO_HAS_Q(cginfo[cgm+cg]);
825 if (bHavePerturbedAtoms && fr->efep != efepNO)
827 SET_CGINFO_FEP(cginfo[cgm+cg]);
828 *bFEP_NonBonded = TRUE;
830 /* Store the charge group size */
831 SET_CGINFO_NATOMS(cginfo[cgm+cg], a1-a0);
833 if (!bExclIntraAll || bExclInter)
835 *bExcl_IntraCGAll_InterCGNone = FALSE;
842 cg_offset += molb->nmol*cgs->nr;
843 a_offset += molb->nmol*cgs->index[cgs->nr];
847 /* the solvent optimizer is called after the QM is initialized,
848 * because we don't want to have the QM subsystemto become an
852 check_solvent(fplog, mtop, fr, cginfo_mb);
854 if (getenv("GMX_NO_SOLV_OPT"))
858 fprintf(fplog, "Found environment variable GMX_NO_SOLV_OPT.\n"
859 "Disabling all solvent optimization\n");
861 fr->solvent_opt = esolNO;
865 fr->solvent_opt = esolNO;
867 if (!fr->solvent_opt)
869 for (mb = 0; mb < mtop->nmolblock; mb++)
871 for (cg = 0; cg < cginfo_mb[mb].cg_mod; cg++)
873 SET_CGINFO_SOLOPT(cginfo_mb[mb].cginfo[cg], esolNO);
881 static int *cginfo_expand(int nmb, cginfo_mb_t *cgi_mb)
886 ncg = cgi_mb[nmb-1].cg_end;
889 for (cg = 0; cg < ncg; cg++)
891 while (cg >= cgi_mb[mb].cg_end)
896 cgi_mb[mb].cginfo[(cg - cgi_mb[mb].cg_start) % cgi_mb[mb].cg_mod];
902 static void set_chargesum(FILE *log, t_forcerec *fr, const gmx_mtop_t *mtop)
904 /*This now calculates sum for q and c6*/
905 double qsum, q2sum, q, c6sum, c6;
907 const t_atoms *atoms;
912 for (mb = 0; mb < mtop->nmolblock; mb++)
914 nmol = mtop->molblock[mb].nmol;
915 atoms = &mtop->moltype[mtop->molblock[mb].type].atoms;
916 for (i = 0; i < atoms->nr; i++)
918 q = atoms->atom[i].q;
921 c6 = mtop->ffparams.iparams[atoms->atom[i].type*(mtop->ffparams.atnr+1)].lj.c6;
926 fr->q2sum[0] = q2sum;
927 fr->c6sum[0] = c6sum;
929 if (fr->efep != efepNO)
934 for (mb = 0; mb < mtop->nmolblock; mb++)
936 nmol = mtop->molblock[mb].nmol;
937 atoms = &mtop->moltype[mtop->molblock[mb].type].atoms;
938 for (i = 0; i < atoms->nr; i++)
940 q = atoms->atom[i].qB;
943 c6 = mtop->ffparams.iparams[atoms->atom[i].typeB*(mtop->ffparams.atnr+1)].lj.c6;
947 fr->q2sum[1] = q2sum;
948 fr->c6sum[1] = c6sum;
953 fr->qsum[1] = fr->qsum[0];
954 fr->q2sum[1] = fr->q2sum[0];
955 fr->c6sum[1] = fr->c6sum[0];
959 if (fr->efep == efepNO)
961 fprintf(log, "System total charge: %.3f\n", fr->qsum[0]);
965 fprintf(log, "System total charge, top. A: %.3f top. B: %.3f\n",
966 fr->qsum[0], fr->qsum[1]);
971 void update_forcerec(t_forcerec *fr, matrix box)
973 if (fr->eeltype == eelGRF)
975 calc_rffac(NULL, fr->eeltype, fr->epsilon_r, fr->epsilon_rf,
976 fr->rcoulomb, fr->temp, fr->zsquare, box,
977 &fr->kappa, &fr->k_rf, &fr->c_rf);
981 void set_avcsixtwelve(FILE *fplog, t_forcerec *fr, const gmx_mtop_t *mtop)
983 const t_atoms *atoms, *atoms_tpi;
984 const t_blocka *excl;
985 int mb, nmol, nmolc, i, j, tpi, tpj, j1, j2, k, nexcl, q;
986 gmx_int64_t npair, npair_ij, tmpi, tmpj;
987 double csix, ctwelve;
991 real *nbfp_comb = NULL;
997 /* For LJ-PME, we want to correct for the difference between the
998 * actual C6 values and the C6 values used by the LJ-PME based on
999 * combination rules. */
1001 if (EVDW_PME(fr->vdwtype))
1003 nbfp_comb = mk_nbfp_combination_rule(&mtop->ffparams,
1004 (fr->ljpme_combination_rule == eljpmeLB) ? eCOMB_ARITHMETIC : eCOMB_GEOMETRIC);
1005 for (tpi = 0; tpi < ntp; ++tpi)
1007 for (tpj = 0; tpj < ntp; ++tpj)
1009 C6(nbfp_comb, ntp, tpi, tpj) =
1010 C6(nbfp, ntp, tpi, tpj) - C6(nbfp_comb, ntp, tpi, tpj);
1011 C12(nbfp_comb, ntp, tpi, tpj) = C12(nbfp, ntp, tpi, tpj);
1016 for (q = 0; q < (fr->efep == efepNO ? 1 : 2); q++)
1024 /* Count the types so we avoid natoms^2 operations */
1025 snew(typecount, ntp);
1026 gmx_mtop_count_atomtypes(mtop, q, typecount);
1028 for (tpi = 0; tpi < ntp; tpi++)
1030 for (tpj = tpi; tpj < ntp; tpj++)
1032 tmpi = typecount[tpi];
1033 tmpj = typecount[tpj];
1036 npair_ij = tmpi*tmpj;
1040 npair_ij = tmpi*(tmpi - 1)/2;
1044 /* nbfp now includes the 6.0 derivative prefactor */
1045 csix += npair_ij*BHAMC(nbfp, ntp, tpi, tpj)/6.0;
1049 /* nbfp now includes the 6.0/12.0 derivative prefactors */
1050 csix += npair_ij* C6(nbfp, ntp, tpi, tpj)/6.0;
1051 ctwelve += npair_ij* C12(nbfp, ntp, tpi, tpj)/12.0;
1057 /* Subtract the excluded pairs.
1058 * The main reason for substracting exclusions is that in some cases
1059 * some combinations might never occur and the parameters could have
1060 * any value. These unused values should not influence the dispersion
1063 for (mb = 0; mb < mtop->nmolblock; mb++)
1065 nmol = mtop->molblock[mb].nmol;
1066 atoms = &mtop->moltype[mtop->molblock[mb].type].atoms;
1067 excl = &mtop->moltype[mtop->molblock[mb].type].excls;
1068 for (i = 0; (i < atoms->nr); i++)
1072 tpi = atoms->atom[i].type;
1076 tpi = atoms->atom[i].typeB;
1078 j1 = excl->index[i];
1079 j2 = excl->index[i+1];
1080 for (j = j1; j < j2; j++)
1087 tpj = atoms->atom[k].type;
1091 tpj = atoms->atom[k].typeB;
1095 /* nbfp now includes the 6.0 derivative prefactor */
1096 csix -= nmol*BHAMC(nbfp, ntp, tpi, tpj)/6.0;
1100 /* nbfp now includes the 6.0/12.0 derivative prefactors */
1101 csix -= nmol*C6 (nbfp, ntp, tpi, tpj)/6.0;
1102 ctwelve -= nmol*C12(nbfp, ntp, tpi, tpj)/12.0;
1112 /* Only correct for the interaction of the test particle
1113 * with the rest of the system.
1116 &mtop->moltype[mtop->molblock[mtop->nmolblock-1].type].atoms;
1119 for (mb = 0; mb < mtop->nmolblock; mb++)
1121 nmol = mtop->molblock[mb].nmol;
1122 atoms = &mtop->moltype[mtop->molblock[mb].type].atoms;
1123 for (j = 0; j < atoms->nr; j++)
1126 /* Remove the interaction of the test charge group
1129 if (mb == mtop->nmolblock-1)
1133 if (mb == 0 && nmol == 1)
1135 gmx_fatal(FARGS, "Old format tpr with TPI, please generate a new tpr file");
1140 tpj = atoms->atom[j].type;
1144 tpj = atoms->atom[j].typeB;
1146 for (i = 0; i < fr->n_tpi; i++)
1150 tpi = atoms_tpi->atom[i].type;
1154 tpi = atoms_tpi->atom[i].typeB;
1158 /* nbfp now includes the 6.0 derivative prefactor */
1159 csix += nmolc*BHAMC(nbfp, ntp, tpi, tpj)/6.0;
1163 /* nbfp now includes the 6.0/12.0 derivative prefactors */
1164 csix += nmolc*C6 (nbfp, ntp, tpi, tpj)/6.0;
1165 ctwelve += nmolc*C12(nbfp, ntp, tpi, tpj)/12.0;
1172 if (npair - nexcl <= 0 && fplog)
1174 fprintf(fplog, "\nWARNING: There are no atom pairs for dispersion correction\n\n");
1180 csix /= npair - nexcl;
1181 ctwelve /= npair - nexcl;
1185 fprintf(debug, "Counted %d exclusions\n", nexcl);
1186 fprintf(debug, "Average C6 parameter is: %10g\n", (double)csix);
1187 fprintf(debug, "Average C12 parameter is: %10g\n", (double)ctwelve);
1189 fr->avcsix[q] = csix;
1190 fr->avctwelve[q] = ctwelve;
1193 if (EVDW_PME(fr->vdwtype))
1200 if (fr->eDispCorr == edispcAllEner ||
1201 fr->eDispCorr == edispcAllEnerPres)
1203 fprintf(fplog, "Long Range LJ corr.: <C6> %10.4e, <C12> %10.4e\n",
1204 fr->avcsix[0], fr->avctwelve[0]);
1208 fprintf(fplog, "Long Range LJ corr.: <C6> %10.4e\n", fr->avcsix[0]);
1214 static void set_bham_b_max(FILE *fplog, t_forcerec *fr,
1215 const gmx_mtop_t *mtop)
1217 const t_atoms *at1, *at2;
1218 int mt1, mt2, i, j, tpi, tpj, ntypes;
1224 fprintf(fplog, "Determining largest Buckingham b parameter for table\n");
1231 for (mt1 = 0; mt1 < mtop->nmoltype; mt1++)
1233 at1 = &mtop->moltype[mt1].atoms;
1234 for (i = 0; (i < at1->nr); i++)
1236 tpi = at1->atom[i].type;
1239 gmx_fatal(FARGS, "Atomtype[%d] = %d, maximum = %d", i, tpi, ntypes);
1242 for (mt2 = mt1; mt2 < mtop->nmoltype; mt2++)
1244 at2 = &mtop->moltype[mt2].atoms;
1245 for (j = 0; (j < at2->nr); j++)
1247 tpj = at2->atom[j].type;
1250 gmx_fatal(FARGS, "Atomtype[%d] = %d, maximum = %d", j, tpj, ntypes);
1252 b = BHAMB(nbfp, ntypes, tpi, tpj);
1253 if (b > fr->bham_b_max)
1257 if ((b < bmin) || (bmin == -1))
1267 fprintf(fplog, "Buckingham b parameters, min: %g, max: %g\n",
1268 bmin, fr->bham_b_max);
1272 static void make_nbf_tables(FILE *fp, const output_env_t oenv,
1273 t_forcerec *fr, real rtab,
1274 const t_commrec *cr,
1275 const char *tabfn, char *eg1, char *eg2,
1285 fprintf(debug, "No table file name passed, can not read table, can not do non-bonded interactions\n");
1290 sprintf(buf, "%s", tabfn);
1293 /* Append the two energy group names */
1294 sprintf(buf + strlen(tabfn) - strlen(ftp2ext(efXVG)) - 1, "_%s_%s.%s",
1295 eg1, eg2, ftp2ext(efXVG));
1297 nbl->table_elec_vdw = make_tables(fp, oenv, fr, MASTER(cr), buf, rtab, 0);
1298 /* Copy the contents of the table to separate coulomb and LJ tables too,
1299 * to improve cache performance.
1301 /* For performance reasons we want
1302 * the table data to be aligned to 16-byte. The pointers could be freed
1303 * but currently aren't.
1305 nbl->table_elec.interaction = GMX_TABLE_INTERACTION_ELEC;
1306 nbl->table_elec.format = nbl->table_elec_vdw.format;
1307 nbl->table_elec.r = nbl->table_elec_vdw.r;
1308 nbl->table_elec.n = nbl->table_elec_vdw.n;
1309 nbl->table_elec.scale = nbl->table_elec_vdw.scale;
1310 nbl->table_elec.scale_exp = nbl->table_elec_vdw.scale_exp;
1311 nbl->table_elec.formatsize = nbl->table_elec_vdw.formatsize;
1312 nbl->table_elec.ninteractions = 1;
1313 nbl->table_elec.stride = nbl->table_elec.formatsize * nbl->table_elec.ninteractions;
1314 snew_aligned(nbl->table_elec.data, nbl->table_elec.stride*(nbl->table_elec.n+1), 32);
1316 nbl->table_vdw.interaction = GMX_TABLE_INTERACTION_VDWREP_VDWDISP;
1317 nbl->table_vdw.format = nbl->table_elec_vdw.format;
1318 nbl->table_vdw.r = nbl->table_elec_vdw.r;
1319 nbl->table_vdw.n = nbl->table_elec_vdw.n;
1320 nbl->table_vdw.scale = nbl->table_elec_vdw.scale;
1321 nbl->table_vdw.scale_exp = nbl->table_elec_vdw.scale_exp;
1322 nbl->table_vdw.formatsize = nbl->table_elec_vdw.formatsize;
1323 nbl->table_vdw.ninteractions = 2;
1324 nbl->table_vdw.stride = nbl->table_vdw.formatsize * nbl->table_vdw.ninteractions;
1325 snew_aligned(nbl->table_vdw.data, nbl->table_vdw.stride*(nbl->table_vdw.n+1), 32);
1327 for (i = 0; i <= nbl->table_elec_vdw.n; i++)
1329 for (j = 0; j < 4; j++)
1331 nbl->table_elec.data[4*i+j] = nbl->table_elec_vdw.data[12*i+j];
1333 for (j = 0; j < 8; j++)
1335 nbl->table_vdw.data[8*i+j] = nbl->table_elec_vdw.data[12*i+4+j];
1340 static void count_tables(int ftype1, int ftype2, const gmx_mtop_t *mtop,
1341 int *ncount, int **count)
1343 const gmx_moltype_t *molt;
1345 int mt, ftype, stride, i, j, tabnr;
1347 for (mt = 0; mt < mtop->nmoltype; mt++)
1349 molt = &mtop->moltype[mt];
1350 for (ftype = 0; ftype < F_NRE; ftype++)
1352 if (ftype == ftype1 || ftype == ftype2)
1354 il = &molt->ilist[ftype];
1355 stride = 1 + NRAL(ftype);
1356 for (i = 0; i < il->nr; i += stride)
1358 tabnr = mtop->ffparams.iparams[il->iatoms[i]].tab.table;
1361 gmx_fatal(FARGS, "A bonded table number is smaller than 0: %d\n", tabnr);
1363 if (tabnr >= *ncount)
1365 srenew(*count, tabnr+1);
1366 for (j = *ncount; j < tabnr+1; j++)
1379 static bondedtable_t *make_bonded_tables(FILE *fplog,
1380 int ftype1, int ftype2,
1381 const gmx_mtop_t *mtop,
1382 const char *basefn, const char *tabext)
1384 int i, ncount, *count;
1392 count_tables(ftype1, ftype2, mtop, &ncount, &count);
1397 for (i = 0; i < ncount; i++)
1401 sprintf(tabfn, "%s", basefn);
1402 sprintf(tabfn + strlen(basefn) - strlen(ftp2ext(efXVG)) - 1, "_%s%d.%s",
1403 tabext, i, ftp2ext(efXVG));
1404 tab[i] = make_bonded_table(fplog, tabfn, NRAL(ftype1)-2);
1413 void forcerec_set_ranges(t_forcerec *fr,
1414 int ncg_home, int ncg_force,
1416 int natoms_force_constr, int natoms_f_novirsum)
1421 /* fr->ncg_force is unused in the standard code,
1422 * but it can be useful for modified code dealing with charge groups.
1424 fr->ncg_force = ncg_force;
1425 fr->natoms_force = natoms_force;
1426 fr->natoms_force_constr = natoms_force_constr;
1428 if (fr->natoms_force_constr > fr->nalloc_force)
1430 fr->nalloc_force = over_alloc_dd(fr->natoms_force_constr);
1434 srenew(fr->f_twin, fr->nalloc_force);
1438 if (fr->bF_NoVirSum)
1440 fr->f_novirsum_n = natoms_f_novirsum;
1441 if (fr->f_novirsum_n > fr->f_novirsum_nalloc)
1443 fr->f_novirsum_nalloc = over_alloc_dd(fr->f_novirsum_n);
1444 srenew(fr->f_novirsum_alloc, fr->f_novirsum_nalloc);
1449 fr->f_novirsum_n = 0;
1453 static real cutoff_inf(real cutoff)
1457 cutoff = GMX_CUTOFF_INF;
1463 static void make_adress_tf_tables(FILE *fp, const output_env_t oenv,
1464 t_forcerec *fr, const t_inputrec *ir,
1465 const char *tabfn, const gmx_mtop_t *mtop,
1473 gmx_fatal(FARGS, "No thermoforce table file given. Use -tabletf to specify a file\n");
1477 snew(fr->atf_tabs, ir->adress->n_tf_grps);
1479 sprintf(buf, "%s", tabfn);
1480 for (i = 0; i < ir->adress->n_tf_grps; i++)
1482 j = ir->adress->tf_table_index[i]; /* get energy group index */
1483 sprintf(buf + strlen(tabfn) - strlen(ftp2ext(efXVG)) - 1, "tf_%s.%s",
1484 *(mtop->groups.grpname[mtop->groups.grps[egcENER].nm_ind[j]]), ftp2ext(efXVG));
1487 fprintf(fp, "loading tf table for energygrp index %d from %s\n", ir->adress->tf_table_index[i], buf);
1489 fr->atf_tabs[i] = make_atf_table(fp, oenv, fr, buf, box);
1494 gmx_bool can_use_allvsall(const t_inputrec *ir, gmx_bool bPrintNote, t_commrec *cr, FILE *fp)
1501 ir->rcoulomb == 0 &&
1503 ir->ePBC == epbcNONE &&
1504 ir->vdwtype == evdwCUT &&
1505 ir->coulombtype == eelCUT &&
1506 ir->efep == efepNO &&
1507 (ir->implicit_solvent == eisNO ||
1508 (ir->implicit_solvent == eisGBSA && (ir->gb_algorithm == egbSTILL ||
1509 ir->gb_algorithm == egbHCT ||
1510 ir->gb_algorithm == egbOBC))) &&
1511 getenv("GMX_NO_ALLVSALL") == NULL
1514 if (bAllvsAll && ir->opts.ngener > 1)
1516 const char *note = "NOTE: Can not use all-vs-all force loops, because there are multiple energy monitor groups; you might get significantly higher performance when using only a single energy monitor group.\n";
1522 fprintf(stderr, "\n%s\n", note);
1526 fprintf(fp, "\n%s\n", note);
1532 if (bAllvsAll && fp && MASTER(cr))
1534 fprintf(fp, "\nUsing SIMD all-vs-all kernels.\n\n");
1541 gmx_bool nbnxn_acceleration_supported(FILE *fplog,
1542 const t_commrec *cr,
1543 const t_inputrec *ir,
1546 if (!bGPU && (ir->vdwtype == evdwPME && ir->ljpme_combination_rule == eljpmeLB))
1548 md_print_warn(cr, fplog, "LJ-PME with Lorentz-Berthelot is not supported with %s, falling back to %s\n",
1549 bGPU ? "GPUs" : "SIMD kernels",
1550 bGPU ? "CPU only" : "plain-C kernels");
1558 static void pick_nbnxn_kernel_cpu(const t_inputrec gmx_unused *ir,
1562 *kernel_type = nbnxnk4x4_PlainC;
1563 *ewald_excl = ewaldexclTable;
1565 #ifdef GMX_NBNXN_SIMD
1567 #ifdef GMX_NBNXN_SIMD_4XN
1568 *kernel_type = nbnxnk4xN_SIMD_4xN;
1570 #ifdef GMX_NBNXN_SIMD_2XNN
1571 *kernel_type = nbnxnk4xN_SIMD_2xNN;
1574 #if defined GMX_NBNXN_SIMD_2XNN && defined GMX_NBNXN_SIMD_4XN
1575 /* We need to choose if we want 2x(N+N) or 4xN kernels.
1576 * Currently this is based on the SIMD acceleration choice,
1577 * but it might be better to decide this at runtime based on CPU.
1579 * 4xN calculates more (zero) interactions, but has less pair-search
1580 * work and much better kernel instruction scheduling.
1582 * Up till now we have only seen that on Intel Sandy/Ivy Bridge,
1583 * which doesn't have FMA, both the analytical and tabulated Ewald
1584 * kernels have similar pair rates for 4x8 and 2x(4+4), so we choose
1585 * 2x(4+4) because it results in significantly fewer pairs.
1586 * For RF, the raw pair rate of the 4x8 kernel is higher than 2x(4+4),
1587 * 10% with HT, 50% without HT. As we currently don't detect the actual
1588 * use of HT, use 4x8 to avoid a potential performance hit.
1589 * On Intel Haswell 4x8 is always faster.
1591 *kernel_type = nbnxnk4xN_SIMD_4xN;
1593 #ifndef GMX_SIMD_HAVE_FMA
1594 if (EEL_PME_EWALD(ir->coulombtype) ||
1595 EVDW_PME(ir->vdwtype))
1597 /* We have Ewald kernels without FMA (Intel Sandy/Ivy Bridge).
1598 * There are enough instructions to make 2x(4+4) efficient.
1600 *kernel_type = nbnxnk4xN_SIMD_2xNN;
1603 #endif /* GMX_NBNXN_SIMD_2XNN && GMX_NBNXN_SIMD_4XN */
1606 if (getenv("GMX_NBNXN_SIMD_4XN") != NULL)
1608 #ifdef GMX_NBNXN_SIMD_4XN
1609 *kernel_type = nbnxnk4xN_SIMD_4xN;
1611 gmx_fatal(FARGS, "SIMD 4xN kernels requested, but Gromacs has been compiled without support for these kernels");
1614 if (getenv("GMX_NBNXN_SIMD_2XNN") != NULL)
1616 #ifdef GMX_NBNXN_SIMD_2XNN
1617 *kernel_type = nbnxnk4xN_SIMD_2xNN;
1619 gmx_fatal(FARGS, "SIMD 2x(N+N) kernels requested, but Gromacs has been compiled without support for these kernels");
1623 /* Analytical Ewald exclusion correction is only an option in
1625 * Since table lookup's don't parallelize with SIMD, analytical
1626 * will probably always be faster for a SIMD width of 8 or more.
1627 * With FMA analytical is sometimes faster for a width if 4 as well.
1628 * On BlueGene/Q, this is faster regardless of precision.
1629 * In single precision, this is faster on Bulldozer.
1631 #if GMX_SIMD_REAL_WIDTH >= 8 || \
1632 (GMX_SIMD_REAL_WIDTH >= 4 && defined GMX_SIMD_HAVE_FMA && !defined GMX_DOUBLE) || \
1633 defined GMX_SIMD_IBM_QPX
1634 *ewald_excl = ewaldexclAnalytical;
1636 if (getenv("GMX_NBNXN_EWALD_TABLE") != NULL)
1638 *ewald_excl = ewaldexclTable;
1640 if (getenv("GMX_NBNXN_EWALD_ANALYTICAL") != NULL)
1642 *ewald_excl = ewaldexclAnalytical;
1646 #endif /* GMX_NBNXN_SIMD */
1650 const char *lookup_nbnxn_kernel_name(int kernel_type)
1652 const char *returnvalue = NULL;
1653 switch (kernel_type)
1656 returnvalue = "not set";
1658 case nbnxnk4x4_PlainC:
1659 returnvalue = "plain C";
1661 case nbnxnk4xN_SIMD_4xN:
1662 case nbnxnk4xN_SIMD_2xNN:
1663 #ifdef GMX_NBNXN_SIMD
1664 #if defined GMX_SIMD_X86_SSE2
1665 returnvalue = "SSE2";
1666 #elif defined GMX_SIMD_X86_SSE4_1
1667 returnvalue = "SSE4.1";
1668 #elif defined GMX_SIMD_X86_AVX_128_FMA
1669 returnvalue = "AVX_128_FMA";
1670 #elif defined GMX_SIMD_X86_AVX_256
1671 returnvalue = "AVX_256";
1672 #elif defined GMX_SIMD_X86_AVX2_256
1673 returnvalue = "AVX2_256";
1675 returnvalue = "SIMD";
1677 #else /* GMX_NBNXN_SIMD */
1678 returnvalue = "not available";
1679 #endif /* GMX_NBNXN_SIMD */
1681 case nbnxnk8x8x8_CUDA: returnvalue = "CUDA"; break;
1682 case nbnxnk8x8x8_PlainC: returnvalue = "plain C"; break;
1686 gmx_fatal(FARGS, "Illegal kernel type selected");
1693 static void pick_nbnxn_kernel(FILE *fp,
1694 const t_commrec *cr,
1695 gmx_bool use_simd_kernels,
1697 gmx_bool bEmulateGPU,
1698 const t_inputrec *ir,
1701 gmx_bool bDoNonbonded)
1703 assert(kernel_type);
1705 *kernel_type = nbnxnkNotSet;
1706 *ewald_excl = ewaldexclTable;
1710 *kernel_type = nbnxnk8x8x8_PlainC;
1714 md_print_warn(cr, fp, "Emulating a GPU run on the CPU (slow)");
1719 *kernel_type = nbnxnk8x8x8_CUDA;
1722 if (*kernel_type == nbnxnkNotSet)
1724 /* LJ PME with LB combination rule does 7 mesh operations.
1725 * This so slow that we don't compile SIMD non-bonded kernels for that.
1727 if (use_simd_kernels &&
1728 nbnxn_acceleration_supported(fp, cr, ir, FALSE))
1730 pick_nbnxn_kernel_cpu(ir, kernel_type, ewald_excl);
1734 *kernel_type = nbnxnk4x4_PlainC;
1738 if (bDoNonbonded && fp != NULL)
1740 fprintf(fp, "\nUsing %s %dx%d non-bonded kernels\n\n",
1741 lookup_nbnxn_kernel_name(*kernel_type),
1742 nbnxn_kernel_pairlist_simple(*kernel_type) ? NBNXN_CPU_CLUSTER_I_SIZE : NBNXN_GPU_CLUSTER_SIZE,
1743 nbnxn_kernel_to_cj_size(*kernel_type));
1745 if (nbnxnk4x4_PlainC == *kernel_type ||
1746 nbnxnk8x8x8_PlainC == *kernel_type)
1748 md_print_warn(cr, fp,
1749 "WARNING: Using the slow %s kernels. This should\n"
1750 "not happen during routine usage on supported platforms.\n\n",
1751 lookup_nbnxn_kernel_name(*kernel_type));
1756 static void pick_nbnxn_resources(FILE *fp,
1757 const t_commrec *cr,
1758 const gmx_hw_info_t *hwinfo,
1759 gmx_bool bDoNonbonded,
1761 gmx_bool *bEmulateGPU,
1762 const gmx_gpu_opt_t *gpu_opt)
1764 gmx_bool bEmulateGPUEnvVarSet;
1765 char gpu_err_str[STRLEN];
1769 bEmulateGPUEnvVarSet = (getenv("GMX_EMULATE_GPU") != NULL);
1771 /* Run GPU emulation mode if GMX_EMULATE_GPU is defined. Because
1772 * GPUs (currently) only handle non-bonded calculations, we will
1773 * automatically switch to emulation if non-bonded calculations are
1774 * turned off via GMX_NO_NONBONDED - this is the simple and elegant
1775 * way to turn off GPU initialization, data movement, and cleanup.
1777 * GPU emulation can be useful to assess the performance one can expect by
1778 * adding GPU(s) to the machine. The conditional below allows this even
1779 * if mdrun is compiled without GPU acceleration support.
1780 * Note that you should freezing the system as otherwise it will explode.
1782 *bEmulateGPU = (bEmulateGPUEnvVarSet ||
1784 gpu_opt->ncuda_dev_use > 0));
1786 /* Enable GPU mode when GPUs are available or no GPU emulation is requested.
1788 if (gpu_opt->ncuda_dev_use > 0 && !(*bEmulateGPU))
1790 /* Each PP node will use the intra-node id-th device from the
1791 * list of detected/selected GPUs. */
1792 if (!init_gpu(fp, cr->rank_pp_intranode, gpu_err_str,
1793 &hwinfo->gpu_info, gpu_opt))
1795 /* At this point the init should never fail as we made sure that
1796 * we have all the GPUs we need. If it still does, we'll bail. */
1797 gmx_fatal(FARGS, "On rank %d failed to initialize GPU #%d: %s",
1799 get_gpu_device_id(&hwinfo->gpu_info, gpu_opt,
1800 cr->rank_pp_intranode),
1804 /* Here we actually turn on hardware GPU acceleration */
1809 gmx_bool uses_simple_tables(int cutoff_scheme,
1810 nonbonded_verlet_t *nbv,
1813 gmx_bool bUsesSimpleTables = TRUE;
1816 switch (cutoff_scheme)
1819 bUsesSimpleTables = TRUE;
1822 assert(NULL != nbv && NULL != nbv->grp);
1823 grp_index = (group < 0) ? 0 : (nbv->ngrp - 1);
1824 bUsesSimpleTables = nbnxn_kernel_pairlist_simple(nbv->grp[grp_index].kernel_type);
1827 gmx_incons("unimplemented");
1829 return bUsesSimpleTables;
1832 static void init_ewald_f_table(interaction_const_t *ic,
1833 gmx_bool bUsesSimpleTables,
1838 if (bUsesSimpleTables)
1840 /* Get the Ewald table spacing based on Coulomb and/or LJ
1841 * Ewald coefficients and rtol.
1843 ic->tabq_scale = ewald_spline3_table_scale(ic);
1845 maxr = (rtab > ic->rcoulomb) ? rtab : ic->rcoulomb;
1846 ic->tabq_size = (int)(maxr*ic->tabq_scale) + 2;
1850 ic->tabq_size = GPU_EWALD_COULOMB_FORCE_TABLE_SIZE;
1851 /* Subtract 2 iso 1 to avoid access out of range due to rounding */
1852 ic->tabq_scale = (ic->tabq_size - 2)/ic->rcoulomb;
1855 sfree_aligned(ic->tabq_coul_FDV0);
1856 sfree_aligned(ic->tabq_coul_F);
1857 sfree_aligned(ic->tabq_coul_V);
1859 sfree_aligned(ic->tabq_vdw_FDV0);
1860 sfree_aligned(ic->tabq_vdw_F);
1861 sfree_aligned(ic->tabq_vdw_V);
1863 if (ic->eeltype == eelEWALD || EEL_PME(ic->eeltype))
1865 /* Create the original table data in FDV0 */
1866 snew_aligned(ic->tabq_coul_FDV0, ic->tabq_size*4, 32);
1867 snew_aligned(ic->tabq_coul_F, ic->tabq_size, 32);
1868 snew_aligned(ic->tabq_coul_V, ic->tabq_size, 32);
1869 table_spline3_fill_ewald_lr(ic->tabq_coul_F, ic->tabq_coul_V, ic->tabq_coul_FDV0,
1870 ic->tabq_size, 1/ic->tabq_scale, ic->ewaldcoeff_q, v_q_ewald_lr);
1873 if (EVDW_PME(ic->vdwtype))
1875 snew_aligned(ic->tabq_vdw_FDV0, ic->tabq_size*4, 32);
1876 snew_aligned(ic->tabq_vdw_F, ic->tabq_size, 32);
1877 snew_aligned(ic->tabq_vdw_V, ic->tabq_size, 32);
1878 table_spline3_fill_ewald_lr(ic->tabq_vdw_F, ic->tabq_vdw_V, ic->tabq_vdw_FDV0,
1879 ic->tabq_size, 1/ic->tabq_scale, ic->ewaldcoeff_lj, v_lj_ewald_lr);
1883 void init_interaction_const_tables(FILE *fp,
1884 interaction_const_t *ic,
1885 gmx_bool bUsesSimpleTables,
1888 if (ic->eeltype == eelEWALD || EEL_PME(ic->eeltype) || EVDW_PME(ic->vdwtype))
1890 init_ewald_f_table(ic, bUsesSimpleTables, rtab);
1894 fprintf(fp, "Initialized non-bonded Ewald correction tables, spacing: %.2e size: %d\n\n",
1895 1/ic->tabq_scale, ic->tabq_size);
1900 static void clear_force_switch_constants(shift_consts_t *sc)
1907 static void force_switch_constants(real p,
1911 /* Here we determine the coefficient for shifting the force to zero
1912 * between distance rsw and the cut-off rc.
1913 * For a potential of r^-p, we have force p*r^-(p+1).
1914 * But to save flops we absorb p in the coefficient.
1916 * force/p = r^-(p+1) + c2*r^2 + c3*r^3
1917 * potential = r^-p + c2/3*r^3 + c3/4*r^4 + cpot
1919 sc->c2 = ((p + 1)*rsw - (p + 4)*rc)/(pow(rc, p + 2)*pow(rc - rsw, 2));
1920 sc->c3 = -((p + 1)*rsw - (p + 3)*rc)/(pow(rc, p + 2)*pow(rc - rsw, 3));
1921 sc->cpot = -pow(rc, -p) + p*sc->c2/3*pow(rc - rsw, 3) + p*sc->c3/4*pow(rc - rsw, 4);
1924 static void potential_switch_constants(real rsw, real rc,
1925 switch_consts_t *sc)
1927 /* The switch function is 1 at rsw and 0 at rc.
1928 * The derivative and second derivate are zero at both ends.
1929 * rsw = max(r - r_switch, 0)
1930 * sw = 1 + c3*rsw^3 + c4*rsw^4 + c5*rsw^5
1931 * dsw = 3*c3*rsw^2 + 4*c4*rsw^3 + 5*c5*rsw^4
1932 * force = force*dsw - potential*sw
1935 sc->c3 = -10*pow(rc - rsw, -3);
1936 sc->c4 = 15*pow(rc - rsw, -4);
1937 sc->c5 = -6*pow(rc - rsw, -5);
1941 init_interaction_const(FILE *fp,
1942 const t_commrec gmx_unused *cr,
1943 interaction_const_t **interaction_const,
1944 const t_forcerec *fr,
1947 interaction_const_t *ic;
1948 gmx_bool bUsesSimpleTables = TRUE;
1949 const real minusSix = -6.0;
1950 const real minusTwelve = -12.0;
1954 /* Just allocate something so we can free it */
1955 snew_aligned(ic->tabq_coul_FDV0, 16, 32);
1956 snew_aligned(ic->tabq_coul_F, 16, 32);
1957 snew_aligned(ic->tabq_coul_V, 16, 32);
1959 ic->rlist = fr->rlist;
1960 ic->rlistlong = fr->rlistlong;
1963 ic->vdwtype = fr->vdwtype;
1964 ic->vdw_modifier = fr->vdw_modifier;
1965 ic->rvdw = fr->rvdw;
1966 ic->rvdw_switch = fr->rvdw_switch;
1967 ic->ewaldcoeff_lj = fr->ewaldcoeff_lj;
1968 ic->ljpme_comb_rule = fr->ljpme_combination_rule;
1969 ic->sh_lj_ewald = 0;
1970 clear_force_switch_constants(&ic->dispersion_shift);
1971 clear_force_switch_constants(&ic->repulsion_shift);
1973 switch (ic->vdw_modifier)
1975 case eintmodPOTSHIFT:
1976 /* Only shift the potential, don't touch the force */
1977 ic->dispersion_shift.cpot = -pow(ic->rvdw, minusSix);
1978 ic->repulsion_shift.cpot = -pow(ic->rvdw, minusTwelve);
1979 if (EVDW_PME(ic->vdwtype))
1983 crc2 = sqr(ic->ewaldcoeff_lj*ic->rvdw);
1984 ic->sh_lj_ewald = (exp(-crc2)*(1 + crc2 + 0.5*crc2*crc2) - 1)*pow(ic->rvdw, minusSix);
1987 case eintmodFORCESWITCH:
1988 /* Switch the force, switch and shift the potential */
1989 force_switch_constants(6.0, ic->rvdw_switch, ic->rvdw,
1990 &ic->dispersion_shift);
1991 force_switch_constants(12.0, ic->rvdw_switch, ic->rvdw,
1992 &ic->repulsion_shift);
1994 case eintmodPOTSWITCH:
1995 /* Switch the potential and force */
1996 potential_switch_constants(ic->rvdw_switch, ic->rvdw,
2000 case eintmodEXACTCUTOFF:
2001 /* Nothing to do here */
2004 gmx_incons("unimplemented potential modifier");
2007 ic->sh_invrc6 = -ic->dispersion_shift.cpot;
2009 /* Electrostatics */
2010 ic->eeltype = fr->eeltype;
2011 ic->coulomb_modifier = fr->coulomb_modifier;
2012 ic->rcoulomb = fr->rcoulomb;
2013 ic->epsilon_r = fr->epsilon_r;
2014 ic->epsfac = fr->epsfac;
2015 ic->ewaldcoeff_q = fr->ewaldcoeff_q;
2017 if (fr->coulomb_modifier == eintmodPOTSHIFT)
2019 ic->sh_ewald = gmx_erfc(ic->ewaldcoeff_q*ic->rcoulomb);
2026 /* Reaction-field */
2027 if (EEL_RF(ic->eeltype))
2029 ic->epsilon_rf = fr->epsilon_rf;
2030 ic->k_rf = fr->k_rf;
2031 ic->c_rf = fr->c_rf;
2035 /* For plain cut-off we might use the reaction-field kernels */
2036 ic->epsilon_rf = ic->epsilon_r;
2038 if (fr->coulomb_modifier == eintmodPOTSHIFT)
2040 ic->c_rf = 1/ic->rcoulomb;
2050 real dispersion_shift;
2052 dispersion_shift = ic->dispersion_shift.cpot;
2053 if (EVDW_PME(ic->vdwtype))
2055 dispersion_shift -= ic->sh_lj_ewald;
2057 fprintf(fp, "Potential shift: LJ r^-12: %.3e r^-6: %.3e",
2058 ic->repulsion_shift.cpot, dispersion_shift);
2060 if (ic->eeltype == eelCUT)
2062 fprintf(fp, ", Coulomb %.e", -ic->c_rf);
2064 else if (EEL_PME(ic->eeltype))
2066 fprintf(fp, ", Ewald %.3e", -ic->sh_ewald);
2071 *interaction_const = ic;
2073 if (fr->nbv != NULL && fr->nbv->bUseGPU)
2075 nbnxn_cuda_init_const(fr->nbv->cu_nbv, ic, fr->nbv->grp);
2077 /* With tMPI + GPUs some ranks may be sharing GPU(s) and therefore
2078 * also sharing texture references. To keep the code simple, we don't
2079 * treat texture references as shared resources, but this means that
2080 * the coulomb_tab and nbfp texture refs will get updated by multiple threads.
2081 * Hence, to ensure that the non-bonded kernels don't start before all
2082 * texture binding operations are finished, we need to wait for all ranks
2083 * to arrive here before continuing.
2085 * Note that we could omit this barrier if GPUs are not shared (or
2086 * texture objects are used), but as this is initialization code, there
2087 * is not point in complicating things.
2089 #ifdef GMX_THREAD_MPI
2094 #endif /* GMX_THREAD_MPI */
2097 bUsesSimpleTables = uses_simple_tables(fr->cutoff_scheme, fr->nbv, -1);
2098 init_interaction_const_tables(fp, ic, bUsesSimpleTables, rtab);
2101 static void init_nb_verlet(FILE *fp,
2102 nonbonded_verlet_t **nb_verlet,
2103 gmx_bool bFEP_NonBonded,
2104 const t_inputrec *ir,
2105 const t_forcerec *fr,
2106 const t_commrec *cr,
2107 const char *nbpu_opt)
2109 nonbonded_verlet_t *nbv;
2112 gmx_bool bEmulateGPU, bHybridGPURun = FALSE;
2114 nbnxn_alloc_t *nb_alloc;
2115 nbnxn_free_t *nb_free;
2119 pick_nbnxn_resources(fp, cr, fr->hwinfo,
2127 nbv->ngrp = (DOMAINDECOMP(cr) ? 2 : 1);
2128 for (i = 0; i < nbv->ngrp; i++)
2130 nbv->grp[i].nbl_lists.nnbl = 0;
2131 nbv->grp[i].nbat = NULL;
2132 nbv->grp[i].kernel_type = nbnxnkNotSet;
2134 if (i == 0) /* local */
2136 pick_nbnxn_kernel(fp, cr, fr->use_simd_kernels,
2137 nbv->bUseGPU, bEmulateGPU, ir,
2138 &nbv->grp[i].kernel_type,
2139 &nbv->grp[i].ewald_excl,
2142 else /* non-local */
2144 if (nbpu_opt != NULL && strcmp(nbpu_opt, "gpu_cpu") == 0)
2146 /* Use GPU for local, select a CPU kernel for non-local */
2147 pick_nbnxn_kernel(fp, cr, fr->use_simd_kernels,
2149 &nbv->grp[i].kernel_type,
2150 &nbv->grp[i].ewald_excl,
2153 bHybridGPURun = TRUE;
2157 /* Use the same kernel for local and non-local interactions */
2158 nbv->grp[i].kernel_type = nbv->grp[0].kernel_type;
2159 nbv->grp[i].ewald_excl = nbv->grp[0].ewald_excl;
2166 /* init the NxN GPU data; the last argument tells whether we'll have
2167 * both local and non-local NB calculation on GPU */
2168 nbnxn_cuda_init(fp, &nbv->cu_nbv,
2169 &fr->hwinfo->gpu_info, fr->gpu_opt,
2170 cr->rank_pp_intranode,
2171 (nbv->ngrp > 1) && !bHybridGPURun);
2173 if ((env = getenv("GMX_NB_MIN_CI")) != NULL)
2177 nbv->min_ci_balanced = strtol(env, &end, 10);
2178 if (!end || (*end != 0) || nbv->min_ci_balanced <= 0)
2180 gmx_fatal(FARGS, "Invalid value passed in GMX_NB_MIN_CI=%s, positive integer required", env);
2185 fprintf(debug, "Neighbor-list balancing parameter: %d (passed as env. var.)\n",
2186 nbv->min_ci_balanced);
2191 nbv->min_ci_balanced = nbnxn_cuda_min_ci_balanced(nbv->cu_nbv);
2194 fprintf(debug, "Neighbor-list balancing parameter: %d (auto-adjusted to the number of GPU multi-processors)\n",
2195 nbv->min_ci_balanced);
2201 nbv->min_ci_balanced = 0;
2206 nbnxn_init_search(&nbv->nbs,
2207 DOMAINDECOMP(cr) ? &cr->dd->nc : NULL,
2208 DOMAINDECOMP(cr) ? domdec_zones(cr->dd) : NULL,
2210 gmx_omp_nthreads_get(emntPairsearch));
2212 for (i = 0; i < nbv->ngrp; i++)
2214 if (nbv->grp[0].kernel_type == nbnxnk8x8x8_CUDA)
2216 nb_alloc = &pmalloc;
2225 nbnxn_init_pairlist_set(&nbv->grp[i].nbl_lists,
2226 nbnxn_kernel_pairlist_simple(nbv->grp[i].kernel_type),
2227 /* 8x8x8 "non-simple" lists are ATM always combined */
2228 !nbnxn_kernel_pairlist_simple(nbv->grp[i].kernel_type),
2232 nbv->grp[0].kernel_type != nbv->grp[i].kernel_type)
2234 gmx_bool bSimpleList;
2235 int enbnxninitcombrule;
2237 bSimpleList = nbnxn_kernel_pairlist_simple(nbv->grp[i].kernel_type);
2239 if (bSimpleList && (fr->vdwtype == evdwCUT && (fr->vdw_modifier == eintmodNONE || fr->vdw_modifier == eintmodPOTSHIFT)))
2241 /* Plain LJ cut-off: we can optimize with combination rules */
2242 enbnxninitcombrule = enbnxninitcombruleDETECT;
2244 else if (fr->vdwtype == evdwPME)
2246 /* LJ-PME: we need to use a combination rule for the grid */
2247 if (fr->ljpme_combination_rule == eljpmeGEOM)
2249 enbnxninitcombrule = enbnxninitcombruleGEOM;
2253 enbnxninitcombrule = enbnxninitcombruleLB;
2258 /* We use a full combination matrix: no rule required */
2259 enbnxninitcombrule = enbnxninitcombruleNONE;
2263 snew(nbv->grp[i].nbat, 1);
2264 nbnxn_atomdata_init(fp,
2266 nbv->grp[i].kernel_type,
2268 fr->ntype, fr->nbfp,
2270 bSimpleList ? gmx_omp_nthreads_get(emntNonbonded) : 1,
2275 nbv->grp[i].nbat = nbv->grp[0].nbat;
2280 gmx_bool usingGpu(nonbonded_verlet_t *nbv)
2282 return nbv != NULL && nbv->bUseGPU;
2285 void init_forcerec(FILE *fp,
2286 const output_env_t oenv,
2289 const t_inputrec *ir,
2290 const gmx_mtop_t *mtop,
2291 const t_commrec *cr,
2297 const char *nbpu_opt,
2298 gmx_bool bNoSolvOpt,
2301 int i, m, negp_pp, negptable, egi, egj;
2306 gmx_bool bGenericKernelOnly;
2307 gmx_bool bMakeTables, bMakeSeparate14Table, bSomeNormalNbListsAreInUse;
2308 gmx_bool bFEP_NonBonded;
2309 int *nm_ind, egp_flags;
2311 if (fr->hwinfo == NULL)
2313 /* Detect hardware, gather information.
2314 * In mdrun, hwinfo has already been set before calling init_forcerec.
2315 * Here we ignore GPUs, as tools will not use them anyhow.
2317 fr->hwinfo = gmx_detect_hardware(fp, cr, FALSE);
2320 /* By default we turn SIMD kernels on, but it might be turned off further down... */
2321 fr->use_simd_kernels = TRUE;
2323 fr->bDomDec = DOMAINDECOMP(cr);
2325 if (check_box(ir->ePBC, box))
2327 gmx_fatal(FARGS, check_box(ir->ePBC, box));
2330 /* Test particle insertion ? */
2333 /* Set to the size of the molecule to be inserted (the last one) */
2334 /* Because of old style topologies, we have to use the last cg
2335 * instead of the last molecule type.
2337 cgs = &mtop->moltype[mtop->molblock[mtop->nmolblock-1].type].cgs;
2338 fr->n_tpi = cgs->index[cgs->nr] - cgs->index[cgs->nr-1];
2339 if (fr->n_tpi != mtop->mols.index[mtop->mols.nr] - mtop->mols.index[mtop->mols.nr-1])
2341 gmx_fatal(FARGS, "The molecule to insert can not consist of multiple charge groups.\nMake it a single charge group.");
2349 /* Copy AdResS parameters */
2352 fr->adress_type = ir->adress->type;
2353 fr->adress_const_wf = ir->adress->const_wf;
2354 fr->adress_ex_width = ir->adress->ex_width;
2355 fr->adress_hy_width = ir->adress->hy_width;
2356 fr->adress_icor = ir->adress->icor;
2357 fr->adress_site = ir->adress->site;
2358 fr->adress_ex_forcecap = ir->adress->ex_forcecap;
2359 fr->adress_do_hybridpairs = ir->adress->do_hybridpairs;
2362 snew(fr->adress_group_explicit, ir->adress->n_energy_grps);
2363 for (i = 0; i < ir->adress->n_energy_grps; i++)
2365 fr->adress_group_explicit[i] = ir->adress->group_explicit[i];
2368 fr->n_adress_tf_grps = ir->adress->n_tf_grps;
2369 snew(fr->adress_tf_table_index, fr->n_adress_tf_grps);
2370 for (i = 0; i < fr->n_adress_tf_grps; i++)
2372 fr->adress_tf_table_index[i] = ir->adress->tf_table_index[i];
2374 copy_rvec(ir->adress->refs, fr->adress_refs);
2378 fr->adress_type = eAdressOff;
2379 fr->adress_do_hybridpairs = FALSE;
2382 /* Copy the user determined parameters */
2383 fr->userint1 = ir->userint1;
2384 fr->userint2 = ir->userint2;
2385 fr->userint3 = ir->userint3;
2386 fr->userint4 = ir->userint4;
2387 fr->userreal1 = ir->userreal1;
2388 fr->userreal2 = ir->userreal2;
2389 fr->userreal3 = ir->userreal3;
2390 fr->userreal4 = ir->userreal4;
2393 fr->fc_stepsize = ir->fc_stepsize;
2396 fr->efep = ir->efep;
2397 fr->sc_alphavdw = ir->fepvals->sc_alpha;
2398 if (ir->fepvals->bScCoul)
2400 fr->sc_alphacoul = ir->fepvals->sc_alpha;
2401 fr->sc_sigma6_min = pow(ir->fepvals->sc_sigma_min, 6);
2405 fr->sc_alphacoul = 0;
2406 fr->sc_sigma6_min = 0; /* only needed when bScCoul is on */
2408 fr->sc_power = ir->fepvals->sc_power;
2409 fr->sc_r_power = ir->fepvals->sc_r_power;
2410 fr->sc_sigma6_def = pow(ir->fepvals->sc_sigma, 6);
2412 env = getenv("GMX_SCSIGMA_MIN");
2416 sscanf(env, "%20lf", &dbl);
2417 fr->sc_sigma6_min = pow(dbl, 6);
2420 fprintf(fp, "Setting the minimum soft core sigma to %g nm\n", dbl);
2424 fr->bNonbonded = TRUE;
2425 if (getenv("GMX_NO_NONBONDED") != NULL)
2427 /* turn off non-bonded calculations */
2428 fr->bNonbonded = FALSE;
2429 md_print_warn(cr, fp,
2430 "Found environment variable GMX_NO_NONBONDED.\n"
2431 "Disabling nonbonded calculations.\n");
2434 bGenericKernelOnly = FALSE;
2436 /* We now check in the NS code whether a particular combination of interactions
2437 * can be used with water optimization, and disable it if that is not the case.
2440 if (getenv("GMX_NB_GENERIC") != NULL)
2445 "Found environment variable GMX_NB_GENERIC.\n"
2446 "Disabling all interaction-specific nonbonded kernels, will only\n"
2447 "use the slow generic ones in src/gmxlib/nonbonded/nb_generic.c\n\n");
2449 bGenericKernelOnly = TRUE;
2452 if (bGenericKernelOnly == TRUE)
2457 if ( (getenv("GMX_DISABLE_SIMD_KERNELS") != NULL) || (getenv("GMX_NOOPTIMIZEDKERNELS") != NULL) )
2459 fr->use_simd_kernels = FALSE;
2463 "\nFound environment variable GMX_DISABLE_SIMD_KERNELS.\n"
2464 "Disabling the usage of any SIMD-specific non-bonded & bonded kernel routines\n"
2465 "(e.g. SSE2/SSE4.1/AVX).\n\n");
2469 fr->bBHAM = (mtop->ffparams.functype[0] == F_BHAM);
2471 /* Check if we can/should do all-vs-all kernels */
2472 fr->bAllvsAll = can_use_allvsall(ir, FALSE, NULL, NULL);
2473 fr->AllvsAll_work = NULL;
2474 fr->AllvsAll_workgb = NULL;
2476 /* All-vs-all kernels have not been implemented in 4.6, and
2477 * the SIMD group kernels are also buggy in this case. Non-SIMD
2478 * group kernels are OK. See Redmine #1249. */
2481 fr->bAllvsAll = FALSE;
2482 fr->use_simd_kernels = FALSE;
2486 "\nYour simulation settings would have triggered the efficient all-vs-all\n"
2487 "kernels in GROMACS 4.5, but these have not been implemented in GROMACS\n"
2488 "4.6. Also, we can't use the accelerated SIMD kernels here because\n"
2489 "of an unfixed bug. The reference C kernels are correct, though, so\n"
2490 "we are proceeding by disabling all CPU architecture-specific\n"
2491 "(e.g. SSE2/SSE4/AVX) routines. If performance is important, please\n"
2492 "use GROMACS 4.5.7 or try cutoff-scheme = Verlet.\n\n");
2496 /* Neighbour searching stuff */
2497 fr->cutoff_scheme = ir->cutoff_scheme;
2498 fr->bGrid = (ir->ns_type == ensGRID);
2499 fr->ePBC = ir->ePBC;
2501 if (fr->cutoff_scheme == ecutsGROUP)
2503 const char *note = "NOTE: This file uses the deprecated 'group' cutoff_scheme. This will be\n"
2504 "removed in a future release when 'verlet' supports all interaction forms.\n";
2508 fprintf(stderr, "\n%s\n", note);
2512 fprintf(fp, "\n%s\n", note);
2516 /* Determine if we will do PBC for distances in bonded interactions */
2517 if (fr->ePBC == epbcNONE)
2519 fr->bMolPBC = FALSE;
2523 if (!DOMAINDECOMP(cr))
2525 /* The group cut-off scheme and SHAKE assume charge groups
2526 * are whole, but not using molpbc is faster in most cases.
2528 if (fr->cutoff_scheme == ecutsGROUP ||
2529 (ir->eConstrAlg == econtSHAKE &&
2530 (gmx_mtop_ftype_count(mtop, F_CONSTR) > 0 ||
2531 gmx_mtop_ftype_count(mtop, F_CONSTRNC) > 0)))
2533 fr->bMolPBC = ir->bPeriodicMols;
2538 if (getenv("GMX_USE_GRAPH") != NULL)
2540 fr->bMolPBC = FALSE;
2543 fprintf(fp, "\nGMX_MOLPBC is set, using the graph for bonded interactions\n\n");
2550 fr->bMolPBC = dd_bonded_molpbc(cr->dd, fr->ePBC);
2553 fr->bGB = (ir->implicit_solvent == eisGBSA);
2555 fr->rc_scaling = ir->refcoord_scaling;
2556 copy_rvec(ir->posres_com, fr->posres_com);
2557 copy_rvec(ir->posres_comB, fr->posres_comB);
2558 fr->rlist = cutoff_inf(ir->rlist);
2559 fr->rlistlong = cutoff_inf(ir->rlistlong);
2560 fr->eeltype = ir->coulombtype;
2561 fr->vdwtype = ir->vdwtype;
2562 fr->ljpme_combination_rule = ir->ljpme_combination_rule;
2564 fr->coulomb_modifier = ir->coulomb_modifier;
2565 fr->vdw_modifier = ir->vdw_modifier;
2567 /* Electrostatics: Translate from interaction-setting-in-mdp-file to kernel interaction format */
2568 switch (fr->eeltype)
2571 fr->nbkernel_elec_interaction = (fr->bGB) ? GMX_NBKERNEL_ELEC_GENERALIZEDBORN : GMX_NBKERNEL_ELEC_COULOMB;
2577 fr->nbkernel_elec_interaction = GMX_NBKERNEL_ELEC_REACTIONFIELD;
2581 fr->nbkernel_elec_interaction = GMX_NBKERNEL_ELEC_REACTIONFIELD;
2582 fr->coulomb_modifier = eintmodEXACTCUTOFF;
2591 case eelPMEUSERSWITCH:
2592 fr->nbkernel_elec_interaction = GMX_NBKERNEL_ELEC_CUBICSPLINETABLE;
2597 fr->nbkernel_elec_interaction = GMX_NBKERNEL_ELEC_EWALD;
2601 gmx_fatal(FARGS, "Unsupported electrostatic interaction: %s", eel_names[fr->eeltype]);
2605 /* Vdw: Translate from mdp settings to kernel format */
2606 switch (fr->vdwtype)
2611 fr->nbkernel_vdw_interaction = GMX_NBKERNEL_VDW_BUCKINGHAM;
2615 fr->nbkernel_vdw_interaction = GMX_NBKERNEL_VDW_LENNARDJONES;
2619 fr->nbkernel_vdw_interaction = GMX_NBKERNEL_VDW_LJEWALD;
2625 case evdwENCADSHIFT:
2626 fr->nbkernel_vdw_interaction = GMX_NBKERNEL_VDW_CUBICSPLINETABLE;
2630 gmx_fatal(FARGS, "Unsupported vdw interaction: %s", evdw_names[fr->vdwtype]);
2634 /* These start out identical to ir, but might be altered if we e.g. tabulate the interaction in the kernel */
2635 fr->nbkernel_elec_modifier = fr->coulomb_modifier;
2636 fr->nbkernel_vdw_modifier = fr->vdw_modifier;
2638 fr->rvdw = cutoff_inf(ir->rvdw);
2639 fr->rvdw_switch = ir->rvdw_switch;
2640 fr->rcoulomb = cutoff_inf(ir->rcoulomb);
2641 fr->rcoulomb_switch = ir->rcoulomb_switch;
2643 fr->bTwinRange = fr->rlistlong > fr->rlist;
2644 fr->bEwald = (EEL_PME(fr->eeltype) || fr->eeltype == eelEWALD);
2646 fr->reppow = mtop->ffparams.reppow;
2648 if (ir->cutoff_scheme == ecutsGROUP)
2650 fr->bvdwtab = ((fr->vdwtype != evdwCUT || !gmx_within_tol(fr->reppow, 12.0, 10*GMX_DOUBLE_EPS))
2651 && !EVDW_PME(fr->vdwtype));
2652 /* We have special kernels for standard Ewald and PME, but the pme-switch ones are tabulated above */
2653 fr->bcoultab = !(fr->eeltype == eelCUT ||
2654 fr->eeltype == eelEWALD ||
2655 fr->eeltype == eelPME ||
2656 fr->eeltype == eelRF ||
2657 fr->eeltype == eelRF_ZERO);
2659 /* If the user absolutely wants different switch/shift settings for coul/vdw, it is likely
2660 * going to be faster to tabulate the interaction than calling the generic kernel.
2661 * However, if generic kernels have been requested we keep things analytically.
2663 if (fr->nbkernel_elec_modifier == eintmodPOTSWITCH &&
2664 fr->nbkernel_vdw_modifier == eintmodPOTSWITCH &&
2665 bGenericKernelOnly == FALSE)
2667 if ((fr->rcoulomb_switch != fr->rvdw_switch) || (fr->rcoulomb != fr->rvdw))
2669 fr->bcoultab = TRUE;
2670 /* Once we tabulate electrostatics, we can use the switch function for LJ,
2671 * which would otherwise need two tables.
2675 else if ((fr->nbkernel_elec_modifier == eintmodPOTSHIFT && fr->nbkernel_vdw_modifier == eintmodPOTSHIFT) ||
2676 ((fr->nbkernel_elec_interaction == GMX_NBKERNEL_ELEC_REACTIONFIELD &&
2677 fr->nbkernel_elec_modifier == eintmodEXACTCUTOFF &&
2678 (fr->nbkernel_vdw_modifier == eintmodPOTSWITCH || fr->nbkernel_vdw_modifier == eintmodPOTSHIFT))))
2680 if ((fr->rcoulomb != fr->rvdw) && (bGenericKernelOnly == FALSE))
2682 fr->bcoultab = TRUE;
2686 if (fr->nbkernel_elec_modifier == eintmodFORCESWITCH)
2688 fr->bcoultab = TRUE;
2690 if (fr->nbkernel_vdw_modifier == eintmodFORCESWITCH)
2695 if (getenv("GMX_REQUIRE_TABLES"))
2698 fr->bcoultab = TRUE;
2703 fprintf(fp, "Table routines are used for coulomb: %s\n", bool_names[fr->bcoultab]);
2704 fprintf(fp, "Table routines are used for vdw: %s\n", bool_names[fr->bvdwtab ]);
2707 if (fr->bvdwtab == TRUE)
2709 fr->nbkernel_vdw_interaction = GMX_NBKERNEL_VDW_CUBICSPLINETABLE;
2710 fr->nbkernel_vdw_modifier = eintmodNONE;
2712 if (fr->bcoultab == TRUE)
2714 fr->nbkernel_elec_interaction = GMX_NBKERNEL_ELEC_CUBICSPLINETABLE;
2715 fr->nbkernel_elec_modifier = eintmodNONE;
2719 if (ir->cutoff_scheme == ecutsVERLET)
2721 if (!gmx_within_tol(fr->reppow, 12.0, 10*GMX_DOUBLE_EPS))
2723 gmx_fatal(FARGS, "Cut-off scheme %S only supports LJ repulsion power 12", ecutscheme_names[ir->cutoff_scheme]);
2725 fr->bvdwtab = FALSE;
2726 fr->bcoultab = FALSE;
2729 /* Tables are used for direct ewald sum */
2732 if (EEL_PME(ir->coulombtype))
2736 fprintf(fp, "Will do PME sum in reciprocal space for electrostatic interactions.\n");
2738 if (ir->coulombtype == eelP3M_AD)
2740 please_cite(fp, "Hockney1988");
2741 please_cite(fp, "Ballenegger2012");
2745 please_cite(fp, "Essmann95a");
2748 if (ir->ewald_geometry == eewg3DC)
2752 fprintf(fp, "Using the Ewald3DC correction for systems with a slab geometry.\n");
2754 please_cite(fp, "In-Chul99a");
2757 fr->ewaldcoeff_q = calc_ewaldcoeff_q(ir->rcoulomb, ir->ewald_rtol);
2758 init_ewald_tab(&(fr->ewald_table), ir, fp);
2761 fprintf(fp, "Using a Gaussian width (1/beta) of %g nm for Ewald\n",
2762 1/fr->ewaldcoeff_q);
2766 if (EVDW_PME(ir->vdwtype))
2770 fprintf(fp, "Will do PME sum in reciprocal space for LJ dispersion interactions.\n");
2772 please_cite(fp, "Essmann95a");
2773 fr->ewaldcoeff_lj = calc_ewaldcoeff_lj(ir->rvdw, ir->ewald_rtol_lj);
2776 fprintf(fp, "Using a Gaussian width (1/beta) of %g nm for LJ Ewald\n",
2777 1/fr->ewaldcoeff_lj);
2781 /* Electrostatics */
2782 fr->epsilon_r = ir->epsilon_r;
2783 fr->epsilon_rf = ir->epsilon_rf;
2784 fr->fudgeQQ = mtop->ffparams.fudgeQQ;
2786 /* Parameters for generalized RF */
2790 if (fr->eeltype == eelGRF)
2792 init_generalized_rf(fp, mtop, ir, fr);
2795 fr->bF_NoVirSum = (EEL_FULL(fr->eeltype) || EVDW_PME(fr->vdwtype) ||
2796 gmx_mtop_ftype_count(mtop, F_POSRES) > 0 ||
2797 gmx_mtop_ftype_count(mtop, F_FBPOSRES) > 0 ||
2798 IR_ELEC_FIELD(*ir) ||
2799 (fr->adress_icor != eAdressICOff)
2802 if (fr->cutoff_scheme == ecutsGROUP &&
2803 ncg_mtop(mtop) > fr->cg_nalloc && !DOMAINDECOMP(cr))
2805 /* Count the total number of charge groups */
2806 fr->cg_nalloc = ncg_mtop(mtop);
2807 srenew(fr->cg_cm, fr->cg_nalloc);
2809 if (fr->shift_vec == NULL)
2811 snew(fr->shift_vec, SHIFTS);
2814 if (fr->fshift == NULL)
2816 snew(fr->fshift, SHIFTS);
2819 if (fr->nbfp == NULL)
2821 fr->ntype = mtop->ffparams.atnr;
2822 fr->nbfp = mk_nbfp(&mtop->ffparams, fr->bBHAM);
2823 if (EVDW_PME(fr->vdwtype))
2825 fr->ljpme_c6grid = make_ljpme_c6grid(&mtop->ffparams, fr);
2829 /* Copy the energy group exclusions */
2830 fr->egp_flags = ir->opts.egp_flags;
2832 /* Van der Waals stuff */
2833 if ((fr->vdwtype != evdwCUT) && (fr->vdwtype != evdwUSER) && !fr->bBHAM)
2835 if (fr->rvdw_switch >= fr->rvdw)
2837 gmx_fatal(FARGS, "rvdw_switch (%f) must be < rvdw (%f)",
2838 fr->rvdw_switch, fr->rvdw);
2842 fprintf(fp, "Using %s Lennard-Jones, switch between %g and %g nm\n",
2843 (fr->eeltype == eelSWITCH) ? "switched" : "shifted",
2844 fr->rvdw_switch, fr->rvdw);
2848 if (fr->bBHAM && EVDW_PME(fr->vdwtype))
2850 gmx_fatal(FARGS, "LJ PME not supported with Buckingham");
2853 if (fr->bBHAM && (fr->vdwtype == evdwSHIFT || fr->vdwtype == evdwSWITCH))
2855 gmx_fatal(FARGS, "Switch/shift interaction not supported with Buckingham");
2858 if (fr->bBHAM && fr->cutoff_scheme == ecutsVERLET)
2860 gmx_fatal(FARGS, "Verlet cutoff-scheme is not supported with Buckingham");
2865 fprintf(fp, "Cut-off's: NS: %g Coulomb: %g %s: %g\n",
2866 fr->rlist, fr->rcoulomb, fr->bBHAM ? "BHAM" : "LJ", fr->rvdw);
2869 fr->eDispCorr = ir->eDispCorr;
2870 if (ir->eDispCorr != edispcNO)
2872 set_avcsixtwelve(fp, fr, mtop);
2877 set_bham_b_max(fp, fr, mtop);
2880 fr->gb_epsilon_solvent = ir->gb_epsilon_solvent;
2882 /* Copy the GBSA data (radius, volume and surftens for each
2883 * atomtype) from the topology atomtype section to forcerec.
2885 snew(fr->atype_radius, fr->ntype);
2886 snew(fr->atype_vol, fr->ntype);
2887 snew(fr->atype_surftens, fr->ntype);
2888 snew(fr->atype_gb_radius, fr->ntype);
2889 snew(fr->atype_S_hct, fr->ntype);
2891 if (mtop->atomtypes.nr > 0)
2893 for (i = 0; i < fr->ntype; i++)
2895 fr->atype_radius[i] = mtop->atomtypes.radius[i];
2897 for (i = 0; i < fr->ntype; i++)
2899 fr->atype_vol[i] = mtop->atomtypes.vol[i];
2901 for (i = 0; i < fr->ntype; i++)
2903 fr->atype_surftens[i] = mtop->atomtypes.surftens[i];
2905 for (i = 0; i < fr->ntype; i++)
2907 fr->atype_gb_radius[i] = mtop->atomtypes.gb_radius[i];
2909 for (i = 0; i < fr->ntype; i++)
2911 fr->atype_S_hct[i] = mtop->atomtypes.S_hct[i];
2915 /* Generate the GB table if needed */
2919 fr->gbtabscale = 2000;
2921 fr->gbtabscale = 500;
2925 fr->gbtab = make_gb_table(oenv, fr);
2927 init_gb(&fr->born, fr, ir, mtop, ir->gb_algorithm);
2929 /* Copy local gb data (for dd, this is done in dd_partition_system) */
2930 if (!DOMAINDECOMP(cr))
2932 make_local_gb(cr, fr->born, ir->gb_algorithm);
2936 /* Set the charge scaling */
2937 if (fr->epsilon_r != 0)
2939 fr->epsfac = ONE_4PI_EPS0/fr->epsilon_r;
2943 /* eps = 0 is infinite dieletric: no coulomb interactions */
2947 /* Reaction field constants */
2948 if (EEL_RF(fr->eeltype))
2950 calc_rffac(fp, fr->eeltype, fr->epsilon_r, fr->epsilon_rf,
2951 fr->rcoulomb, fr->temp, fr->zsquare, box,
2952 &fr->kappa, &fr->k_rf, &fr->c_rf);
2955 /*This now calculates sum for q and c6*/
2956 set_chargesum(fp, fr, mtop);
2958 /* if we are using LR electrostatics, and they are tabulated,
2959 * the tables will contain modified coulomb interactions.
2960 * Since we want to use the non-shifted ones for 1-4
2961 * coulombic interactions, we must have an extra set of tables.
2964 /* Construct tables.
2965 * A little unnecessary to make both vdw and coul tables sometimes,
2966 * but what the heck... */
2968 bMakeTables = fr->bcoultab || fr->bvdwtab || fr->bEwald ||
2969 (ir->eDispCorr != edispcNO && ir_vdw_switched(ir));
2971 bMakeSeparate14Table = ((!bMakeTables || fr->eeltype != eelCUT || fr->vdwtype != evdwCUT ||
2972 fr->coulomb_modifier != eintmodNONE ||
2973 fr->vdw_modifier != eintmodNONE ||
2974 fr->bBHAM || fr->bEwald) &&
2975 (gmx_mtop_ftype_count(mtop, F_LJ14) > 0 ||
2976 gmx_mtop_ftype_count(mtop, F_LJC14_Q) > 0 ||
2977 gmx_mtop_ftype_count(mtop, F_LJC_PAIRS_NB) > 0));
2979 negp_pp = ir->opts.ngener - ir->nwall;
2983 bSomeNormalNbListsAreInUse = TRUE;
2988 bSomeNormalNbListsAreInUse = (ir->eDispCorr != edispcNO);
2989 for (egi = 0; egi < negp_pp; egi++)
2991 for (egj = egi; egj < negp_pp; egj++)
2993 egp_flags = ir->opts.egp_flags[GID(egi, egj, ir->opts.ngener)];
2994 if (!(egp_flags & EGP_EXCL))
2996 if (egp_flags & EGP_TABLE)
3002 bSomeNormalNbListsAreInUse = TRUE;
3007 if (bSomeNormalNbListsAreInUse)
3009 fr->nnblists = negptable + 1;
3013 fr->nnblists = negptable;
3015 if (fr->nnblists > 1)
3017 snew(fr->gid2nblists, ir->opts.ngener*ir->opts.ngener);
3026 snew(fr->nblists, fr->nnblists);
3028 /* This code automatically gives table length tabext without cut-off's,
3029 * in that case grompp should already have checked that we do not need
3030 * normal tables and we only generate tables for 1-4 interactions.
3032 rtab = ir->rlistlong + ir->tabext;
3036 /* make tables for ordinary interactions */
3037 if (bSomeNormalNbListsAreInUse)
3039 make_nbf_tables(fp, oenv, fr, rtab, cr, tabfn, NULL, NULL, &fr->nblists[0]);
3042 make_nbf_tables(fp, oenv, fr, rtab, cr, tabfn, NULL, NULL, &fr->nblists[fr->nnblists/2]);
3044 if (!bMakeSeparate14Table)
3046 fr->tab14 = fr->nblists[0].table_elec_vdw;
3056 /* Read the special tables for certain energy group pairs */
3057 nm_ind = mtop->groups.grps[egcENER].nm_ind;
3058 for (egi = 0; egi < negp_pp; egi++)
3060 for (egj = egi; egj < negp_pp; egj++)
3062 egp_flags = ir->opts.egp_flags[GID(egi, egj, ir->opts.ngener)];
3063 if ((egp_flags & EGP_TABLE) && !(egp_flags & EGP_EXCL))
3065 if (fr->nnblists > 1)
3067 fr->gid2nblists[GID(egi, egj, ir->opts.ngener)] = m;
3069 /* Read the table file with the two energy groups names appended */
3070 make_nbf_tables(fp, oenv, fr, rtab, cr, tabfn,
3071 *mtop->groups.grpname[nm_ind[egi]],
3072 *mtop->groups.grpname[nm_ind[egj]],
3076 make_nbf_tables(fp, oenv, fr, rtab, cr, tabfn,
3077 *mtop->groups.grpname[nm_ind[egi]],
3078 *mtop->groups.grpname[nm_ind[egj]],
3079 &fr->nblists[fr->nnblists/2+m]);
3083 else if (fr->nnblists > 1)
3085 fr->gid2nblists[GID(egi, egj, ir->opts.ngener)] = 0;
3091 else if ((fr->eDispCorr != edispcNO) &&
3092 ((fr->vdw_modifier == eintmodPOTSWITCH) ||
3093 (fr->vdw_modifier == eintmodFORCESWITCH) ||
3094 (fr->vdw_modifier == eintmodPOTSHIFT)))
3096 /* Tables might not be used for the potential modifier interactions per se, but
3097 * we still need them to evaluate switch/shift dispersion corrections in this case.
3099 make_nbf_tables(fp, oenv, fr, rtab, cr, tabfn, NULL, NULL, &fr->nblists[0]);
3102 if (bMakeSeparate14Table)
3104 /* generate extra tables with plain Coulomb for 1-4 interactions only */
3105 fr->tab14 = make_tables(fp, oenv, fr, MASTER(cr), tabpfn, rtab,
3106 GMX_MAKETABLES_14ONLY);
3109 /* Read AdResS Thermo Force table if needed */
3110 if (fr->adress_icor == eAdressICThermoForce)
3112 /* old todo replace */
3114 if (ir->adress->n_tf_grps > 0)
3116 make_adress_tf_tables(fp, oenv, fr, ir, tabfn, mtop, box);
3121 /* load the default table */
3122 snew(fr->atf_tabs, 1);
3123 fr->atf_tabs[DEFAULT_TF_TABLE] = make_atf_table(fp, oenv, fr, tabafn, box);
3128 fr->nwall = ir->nwall;
3129 if (ir->nwall && ir->wall_type == ewtTABLE)
3131 make_wall_tables(fp, oenv, ir, tabfn, &mtop->groups, fr);
3136 fcd->bondtab = make_bonded_tables(fp,
3137 F_TABBONDS, F_TABBONDSNC,
3139 fcd->angletab = make_bonded_tables(fp,
3142 fcd->dihtab = make_bonded_tables(fp,
3150 fprintf(debug, "No fcdata or table file name passed, can not read table, can not do bonded interactions\n");
3154 /* QM/MM initialization if requested
3158 fprintf(stderr, "QM/MM calculation requested.\n");
3161 fr->bQMMM = ir->bQMMM;
3162 fr->qr = mk_QMMMrec();
3164 /* Set all the static charge group info */
3165 fr->cginfo_mb = init_cginfo_mb(fp, mtop, fr, bNoSolvOpt,
3167 &fr->bExcl_IntraCGAll_InterCGNone);
3168 if (DOMAINDECOMP(cr))
3174 fr->cginfo = cginfo_expand(mtop->nmolblock, fr->cginfo_mb);
3177 if (!DOMAINDECOMP(cr))
3179 forcerec_set_ranges(fr, ncg_mtop(mtop), ncg_mtop(mtop),
3180 mtop->natoms, mtop->natoms, mtop->natoms);
3183 fr->print_force = print_force;
3186 /* coarse load balancing vars */
3191 /* Initialize neighbor search */
3192 init_ns(fp, cr, &fr->ns, fr, mtop);
3194 if (cr->duty & DUTY_PP)
3196 gmx_nonbonded_setup(fr, bGenericKernelOnly);
3200 gmx_setup_adress_kernels(fp,bGenericKernelOnly);
3205 /* Initialize the thread working data for bonded interactions */
3206 init_bonded_threading(fp, fr, mtop->groups.grps[egcENER].nr);
3208 snew(fr->excl_load, fr->nthreads+1);
3210 if (fr->cutoff_scheme == ecutsVERLET)
3212 if (ir->rcoulomb != ir->rvdw)
3214 gmx_fatal(FARGS, "With Verlet lists rcoulomb and rvdw should be identical");
3217 init_nb_verlet(fp, &fr->nbv, bFEP_NonBonded, ir, fr, cr, nbpu_opt);
3220 /* fr->ic is used both by verlet and group kernels (to some extent) now */
3221 init_interaction_const(fp, cr, &fr->ic, fr, rtab);
3223 if (ir->eDispCorr != edispcNO)
3225 calc_enervirdiff(fp, ir->eDispCorr, fr);
3229 #define pr_real(fp, r) fprintf(fp, "%s: %e\n",#r, r)
3230 #define pr_int(fp, i) fprintf((fp), "%s: %d\n",#i, i)
3231 #define pr_bool(fp, b) fprintf((fp), "%s: %s\n",#b, bool_names[b])
3233 void pr_forcerec(FILE *fp, t_forcerec *fr)
3237 pr_real(fp, fr->rlist);
3238 pr_real(fp, fr->rcoulomb);
3239 pr_real(fp, fr->fudgeQQ);
3240 pr_bool(fp, fr->bGrid);
3241 pr_bool(fp, fr->bTwinRange);
3242 /*pr_int(fp,fr->cg0);
3243 pr_int(fp,fr->hcg);*/
3244 for (i = 0; i < fr->nnblists; i++)
3246 pr_int(fp, fr->nblists[i].table_elec_vdw.n);
3248 pr_real(fp, fr->rcoulomb_switch);
3249 pr_real(fp, fr->rcoulomb);
3254 void forcerec_set_excl_load(t_forcerec *fr,
3255 const gmx_localtop_t *top)
3258 int t, i, j, ntot, n, ntarget;
3260 ind = top->excls.index;
3264 for (i = 0; i < top->excls.nr; i++)
3266 for (j = ind[i]; j < ind[i+1]; j++)
3275 fr->excl_load[0] = 0;
3278 for (t = 1; t <= fr->nthreads; t++)
3280 ntarget = (ntot*t)/fr->nthreads;
3281 while (i < top->excls.nr && n < ntarget)
3283 for (j = ind[i]; j < ind[i+1]; j++)
3292 fr->excl_load[t] = i;
3296 /* Frees GPU memory and destroys the CUDA context.
3298 * Note that this function needs to be called even if GPUs are not used
3299 * in this run because the PME ranks have no knowledge of whether GPUs
3300 * are used or not, but all ranks need to enter the barrier below.
3302 void free_gpu_resources(const t_forcerec *fr,
3303 const t_commrec *cr,
3304 const gmx_gpu_info_t *gpu_info,
3305 const gmx_gpu_opt_t *gpu_opt)
3307 gmx_bool bIsPPrankUsingGPU;
3308 char gpu_err_str[STRLEN];
3310 bIsPPrankUsingGPU = (cr->duty & DUTY_PP) && fr && fr->nbv && fr->nbv->bUseGPU;
3312 if (bIsPPrankUsingGPU)
3314 /* free nbnxn data in GPU memory */
3315 nbnxn_cuda_free(fr->nbv->cu_nbv);
3317 /* With tMPI we need to wait for all ranks to finish deallocation before
3318 * destroying the context in free_gpu() as some ranks may be sharing
3320 * Note: as only PP ranks need to free GPU resources, so it is safe to
3321 * not call the barrier on PME ranks.
3323 #ifdef GMX_THREAD_MPI
3328 #endif /* GMX_THREAD_MPI */
3330 /* uninitialize GPU (by destroying the context) */
3331 if (!free_gpu(cr->rank_pp_intranode, gpu_err_str, gpu_info, gpu_opt))
3333 gmx_warning("On rank %d failed to free GPU #%d: %s",
3334 cr->nodeid, get_current_gpu_device_id(), gpu_err_str);