2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5 * Copyright (c) 2001-2004, The GROMACS development team,
6 * check out http://www.gromacs.org for more information.
7 * Copyright (c) 2012,2013, by the GROMACS development team, led by
8 * David van der Spoel, Berk Hess, Erik Lindahl, and including many
9 * others, as listed in the AUTHORS file in the top-level source
10 * directory and at http://www.gromacs.org.
12 * GROMACS is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU Lesser General Public License
14 * as published by the Free Software Foundation; either version 2.1
15 * of the License, or (at your option) any later version.
17 * GROMACS is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * Lesser General Public License for more details.
22 * You should have received a copy of the GNU Lesser General Public
23 * License along with GROMACS; if not, see
24 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
25 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
27 * If you want to redistribute modifications to GROMACS, please
28 * consider that scientific software is very special. Version
29 * control is crucial - bugs must be traceable. We will be happy to
30 * consider code for inclusion in the official distribution, but
31 * derived work must not be called official GROMACS. Details are found
32 * in the README & COPYING files - if they are missing, get the
33 * official version at http://www.gromacs.org.
35 * To help us fund GROMACS development, we humbly ask that you cite
36 * the research papers on the package. Check out http://www.gromacs.org.
52 #include "gmx_fatal.h"
53 #include "gmx_fatal_collective.h"
57 #include "nonbonded.h"
66 #include "md_support.h"
67 #include "md_logging.h"
72 #include "mtop_util.h"
73 #include "nbnxn_search.h"
74 #include "nbnxn_atomdata.h"
75 #include "nbnxn_consts.h"
77 #include "gmx_omp_nthreads.h"
78 #include "gmx_detect_hardware.h"
81 /* MSVC definition for __cpuid() */
85 #include "types/nbnxn_cuda_types_ext.h"
86 #include "gpu_utils.h"
87 #include "nbnxn_cuda_data_mgmt.h"
88 #include "pmalloc_cuda.h"
90 t_forcerec *mk_forcerec(void)
100 static void pr_nbfp(FILE *fp, real *nbfp, gmx_bool bBHAM, int atnr)
104 for (i = 0; (i < atnr); i++)
106 for (j = 0; (j < atnr); j++)
108 fprintf(fp, "%2d - %2d", i, j);
111 fprintf(fp, " a=%10g, b=%10g, c=%10g\n", BHAMA(nbfp, atnr, i, j),
112 BHAMB(nbfp, atnr, i, j), BHAMC(nbfp, atnr, i, j)/6.0);
116 fprintf(fp, " c6=%10g, c12=%10g\n", C6(nbfp, atnr, i, j)/6.0,
117 C12(nbfp, atnr, i, j)/12.0);
124 static real *mk_nbfp(const gmx_ffparams_t *idef, gmx_bool bBHAM)
132 snew(nbfp, 3*atnr*atnr);
133 for (i = k = 0; (i < atnr); i++)
135 for (j = 0; (j < atnr); j++, k++)
137 BHAMA(nbfp, atnr, i, j) = idef->iparams[k].bham.a;
138 BHAMB(nbfp, atnr, i, j) = idef->iparams[k].bham.b;
139 /* nbfp now includes the 6.0 derivative prefactor */
140 BHAMC(nbfp, atnr, i, j) = idef->iparams[k].bham.c*6.0;
146 snew(nbfp, 2*atnr*atnr);
147 for (i = k = 0; (i < atnr); i++)
149 for (j = 0; (j < atnr); j++, k++)
151 /* nbfp now includes the 6.0/12.0 derivative prefactors */
152 C6(nbfp, atnr, i, j) = idef->iparams[k].lj.c6*6.0;
153 C12(nbfp, atnr, i, j) = idef->iparams[k].lj.c12*12.0;
161 /* This routine sets fr->solvent_opt to the most common solvent in the
162 * system, e.g. esolSPC or esolTIP4P. It will also mark each charge group in
163 * the fr->solvent_type array with the correct type (or esolNO).
165 * Charge groups that fulfill the conditions but are not identical to the
166 * most common one will be marked as esolNO in the solvent_type array.
168 * TIP3p is identical to SPC for these purposes, so we call it
169 * SPC in the arrays (Apologies to Bill Jorgensen ;-)
171 * NOTE: QM particle should not
172 * become an optimized solvent. Not even if there is only one charge
182 } solvent_parameters_t;
185 check_solvent_cg(const gmx_moltype_t *molt,
188 const unsigned char *qm_grpnr,
189 const t_grps *qm_grps,
191 int *n_solvent_parameters,
192 solvent_parameters_t **solvent_parameters_p,
196 const t_blocka * excl;
207 solvent_parameters_t *solvent_parameters;
209 /* We use a list with parameters for each solvent type.
210 * Every time we discover a new molecule that fulfills the basic
211 * conditions for a solvent we compare with the previous entries
212 * in these lists. If the parameters are the same we just increment
213 * the counter for that type, and otherwise we create a new type
214 * based on the current molecule.
216 * Once we've finished going through all molecules we check which
217 * solvent is most common, and mark all those molecules while we
218 * clear the flag on all others.
221 solvent_parameters = *solvent_parameters_p;
223 /* Mark the cg first as non optimized */
226 /* Check if this cg has no exclusions with atoms in other charge groups
227 * and all atoms inside the charge group excluded.
228 * We only have 3 or 4 atom solvent loops.
230 if (GET_CGINFO_EXCL_INTER(cginfo) ||
231 !GET_CGINFO_EXCL_INTRA(cginfo))
236 /* Get the indices of the first atom in this charge group */
237 j0 = molt->cgs.index[cg0];
238 j1 = molt->cgs.index[cg0+1];
240 /* Number of atoms in our molecule */
246 "Moltype '%s': there are %d atoms in this charge group\n",
250 /* Check if it could be an SPC (3 atoms) or TIP4p (4) water,
253 if (nj < 3 || nj > 4)
258 /* Check if we are doing QM on this group */
260 if (qm_grpnr != NULL)
262 for (j = j0; j < j1 && !qm; j++)
264 qm = (qm_grpnr[j] < qm_grps->nr - 1);
267 /* Cannot use solvent optimization with QM */
273 atom = molt->atoms.atom;
275 /* Still looks like a solvent, time to check parameters */
277 /* If it is perturbed (free energy) we can't use the solvent loops,
278 * so then we just skip to the next molecule.
282 for (j = j0; j < j1 && !perturbed; j++)
284 perturbed = PERTURBED(atom[j]);
292 /* Now it's only a question if the VdW and charge parameters
293 * are OK. Before doing the check we compare and see if they are
294 * identical to a possible previous solvent type.
295 * First we assign the current types and charges.
297 for (j = 0; j < nj; j++)
299 tmp_vdwtype[j] = atom[j0+j].type;
300 tmp_charge[j] = atom[j0+j].q;
303 /* Does it match any previous solvent type? */
304 for (k = 0; k < *n_solvent_parameters; k++)
309 /* We can only match SPC with 3 atoms and TIP4p with 4 atoms */
310 if ( (solvent_parameters[k].model == esolSPC && nj != 3) ||
311 (solvent_parameters[k].model == esolTIP4P && nj != 4) )
316 /* Check that types & charges match for all atoms in molecule */
317 for (j = 0; j < nj && match == TRUE; j++)
319 if (tmp_vdwtype[j] != solvent_parameters[k].vdwtype[j])
323 if (tmp_charge[j] != solvent_parameters[k].charge[j])
330 /* Congratulations! We have a matched solvent.
331 * Flag it with this type for later processing.
334 solvent_parameters[k].count += nmol;
336 /* We are done with this charge group */
341 /* If we get here, we have a tentative new solvent type.
342 * Before we add it we must check that it fulfills the requirements
343 * of the solvent optimized loops. First determine which atoms have
346 for (j = 0; j < nj; j++)
349 tjA = tmp_vdwtype[j];
351 /* Go through all other tpes and see if any have non-zero
352 * VdW parameters when combined with this one.
354 for (k = 0; k < fr->ntype && (has_vdw[j] == FALSE); k++)
356 /* We already checked that the atoms weren't perturbed,
357 * so we only need to check state A now.
361 has_vdw[j] = (has_vdw[j] ||
362 (BHAMA(fr->nbfp, fr->ntype, tjA, k) != 0.0) ||
363 (BHAMB(fr->nbfp, fr->ntype, tjA, k) != 0.0) ||
364 (BHAMC(fr->nbfp, fr->ntype, tjA, k) != 0.0));
369 has_vdw[j] = (has_vdw[j] ||
370 (C6(fr->nbfp, fr->ntype, tjA, k) != 0.0) ||
371 (C12(fr->nbfp, fr->ntype, tjA, k) != 0.0));
376 /* Now we know all we need to make the final check and assignment. */
380 * For this we require thatn all atoms have charge,
381 * the charges on atom 2 & 3 should be the same, and only
382 * atom 1 might have VdW.
384 if (has_vdw[1] == FALSE &&
385 has_vdw[2] == FALSE &&
386 tmp_charge[0] != 0 &&
387 tmp_charge[1] != 0 &&
388 tmp_charge[2] == tmp_charge[1])
390 srenew(solvent_parameters, *n_solvent_parameters+1);
391 solvent_parameters[*n_solvent_parameters].model = esolSPC;
392 solvent_parameters[*n_solvent_parameters].count = nmol;
393 for (k = 0; k < 3; k++)
395 solvent_parameters[*n_solvent_parameters].vdwtype[k] = tmp_vdwtype[k];
396 solvent_parameters[*n_solvent_parameters].charge[k] = tmp_charge[k];
399 *cg_sp = *n_solvent_parameters;
400 (*n_solvent_parameters)++;
405 /* Or could it be a TIP4P?
406 * For this we require thatn atoms 2,3,4 have charge, but not atom 1.
407 * Only atom 1 mght have VdW.
409 if (has_vdw[1] == FALSE &&
410 has_vdw[2] == FALSE &&
411 has_vdw[3] == FALSE &&
412 tmp_charge[0] == 0 &&
413 tmp_charge[1] != 0 &&
414 tmp_charge[2] == tmp_charge[1] &&
417 srenew(solvent_parameters, *n_solvent_parameters+1);
418 solvent_parameters[*n_solvent_parameters].model = esolTIP4P;
419 solvent_parameters[*n_solvent_parameters].count = nmol;
420 for (k = 0; k < 4; k++)
422 solvent_parameters[*n_solvent_parameters].vdwtype[k] = tmp_vdwtype[k];
423 solvent_parameters[*n_solvent_parameters].charge[k] = tmp_charge[k];
426 *cg_sp = *n_solvent_parameters;
427 (*n_solvent_parameters)++;
431 *solvent_parameters_p = solvent_parameters;
435 check_solvent(FILE * fp,
436 const gmx_mtop_t * mtop,
438 cginfo_mb_t *cginfo_mb)
441 const t_block * mols;
442 const gmx_moltype_t *molt;
443 int mb, mol, cg_mol, at_offset, cg_offset, am, cgm, i, nmol_ch, nmol;
444 int n_solvent_parameters;
445 solvent_parameters_t *solvent_parameters;
451 fprintf(debug, "Going to determine what solvent types we have.\n");
456 n_solvent_parameters = 0;
457 solvent_parameters = NULL;
458 /* Allocate temporary array for solvent type */
459 snew(cg_sp, mtop->nmolblock);
463 for (mb = 0; mb < mtop->nmolblock; mb++)
465 molt = &mtop->moltype[mtop->molblock[mb].type];
467 /* Here we have to loop over all individual molecules
468 * because we need to check for QMMM particles.
470 snew(cg_sp[mb], cginfo_mb[mb].cg_mod);
471 nmol_ch = cginfo_mb[mb].cg_mod/cgs->nr;
472 nmol = mtop->molblock[mb].nmol/nmol_ch;
473 for (mol = 0; mol < nmol_ch; mol++)
476 am = mol*cgs->index[cgs->nr];
477 for (cg_mol = 0; cg_mol < cgs->nr; cg_mol++)
479 check_solvent_cg(molt, cg_mol, nmol,
480 mtop->groups.grpnr[egcQMMM] ?
481 mtop->groups.grpnr[egcQMMM]+at_offset+am : 0,
482 &mtop->groups.grps[egcQMMM],
484 &n_solvent_parameters, &solvent_parameters,
485 cginfo_mb[mb].cginfo[cgm+cg_mol],
486 &cg_sp[mb][cgm+cg_mol]);
489 cg_offset += cgs->nr;
490 at_offset += cgs->index[cgs->nr];
493 /* Puh! We finished going through all charge groups.
494 * Now find the most common solvent model.
497 /* Most common solvent this far */
499 for (i = 0; i < n_solvent_parameters; i++)
502 solvent_parameters[i].count > solvent_parameters[bestsp].count)
510 bestsol = solvent_parameters[bestsp].model;
517 #ifdef DISABLE_WATER_NLIST
522 for (mb = 0; mb < mtop->nmolblock; mb++)
524 cgs = &mtop->moltype[mtop->molblock[mb].type].cgs;
525 nmol = (mtop->molblock[mb].nmol*cgs->nr)/cginfo_mb[mb].cg_mod;
526 for (i = 0; i < cginfo_mb[mb].cg_mod; i++)
528 if (cg_sp[mb][i] == bestsp)
530 SET_CGINFO_SOLOPT(cginfo_mb[mb].cginfo[i], bestsol);
535 SET_CGINFO_SOLOPT(cginfo_mb[mb].cginfo[i], esolNO);
542 if (bestsol != esolNO && fp != NULL)
544 fprintf(fp, "\nEnabling %s-like water optimization for %d molecules.\n\n",
546 solvent_parameters[bestsp].count);
549 sfree(solvent_parameters);
550 fr->solvent_opt = bestsol;
554 acNONE = 0, acCONSTRAINT, acSETTLE
557 static cginfo_mb_t *init_cginfo_mb(FILE *fplog, const gmx_mtop_t *mtop,
558 t_forcerec *fr, gmx_bool bNoSolvOpt,
559 gmx_bool *bExcl_IntraCGAll_InterCGNone)
562 const t_blocka *excl;
563 const gmx_moltype_t *molt;
564 const gmx_molblock_t *molb;
565 cginfo_mb_t *cginfo_mb;
568 int cg_offset, a_offset, cgm, am;
569 int mb, m, ncg_tot, cg, a0, a1, gid, ai, j, aj, excl_nalloc;
573 gmx_bool bId, *bExcl, bExclIntraAll, bExclInter, bHaveVDW, bHaveQ;
575 ncg_tot = ncg_mtop(mtop);
576 snew(cginfo_mb, mtop->nmolblock);
578 snew(type_VDW, fr->ntype);
579 for (ai = 0; ai < fr->ntype; ai++)
581 type_VDW[ai] = FALSE;
582 for (j = 0; j < fr->ntype; j++)
584 type_VDW[ai] = type_VDW[ai] ||
586 C6(fr->nbfp, fr->ntype, ai, j) != 0 ||
587 C12(fr->nbfp, fr->ntype, ai, j) != 0;
591 *bExcl_IntraCGAll_InterCGNone = TRUE;
594 snew(bExcl, excl_nalloc);
597 for (mb = 0; mb < mtop->nmolblock; mb++)
599 molb = &mtop->molblock[mb];
600 molt = &mtop->moltype[molb->type];
604 /* Check if the cginfo is identical for all molecules in this block.
605 * If so, we only need an array of the size of one molecule.
606 * Otherwise we make an array of #mol times #cgs per molecule.
610 for (m = 0; m < molb->nmol; m++)
612 am = m*cgs->index[cgs->nr];
613 for (cg = 0; cg < cgs->nr; cg++)
616 a1 = cgs->index[cg+1];
617 if (ggrpnr(&mtop->groups, egcENER, a_offset+am+a0) !=
618 ggrpnr(&mtop->groups, egcENER, a_offset +a0))
622 if (mtop->groups.grpnr[egcQMMM] != NULL)
624 for (ai = a0; ai < a1; ai++)
626 if (mtop->groups.grpnr[egcQMMM][a_offset+am+ai] !=
627 mtop->groups.grpnr[egcQMMM][a_offset +ai])
636 cginfo_mb[mb].cg_start = cg_offset;
637 cginfo_mb[mb].cg_end = cg_offset + molb->nmol*cgs->nr;
638 cginfo_mb[mb].cg_mod = (bId ? 1 : molb->nmol)*cgs->nr;
639 snew(cginfo_mb[mb].cginfo, cginfo_mb[mb].cg_mod);
640 cginfo = cginfo_mb[mb].cginfo;
642 /* Set constraints flags for constrained atoms */
643 snew(a_con, molt->atoms.nr);
644 for (ftype = 0; ftype < F_NRE; ftype++)
646 if (interaction_function[ftype].flags & IF_CONSTRAINT)
651 for (ia = 0; ia < molt->ilist[ftype].nr; ia += 1+nral)
655 for (a = 0; a < nral; a++)
657 a_con[molt->ilist[ftype].iatoms[ia+1+a]] =
658 (ftype == F_SETTLE ? acSETTLE : acCONSTRAINT);
664 for (m = 0; m < (bId ? 1 : molb->nmol); m++)
667 am = m*cgs->index[cgs->nr];
668 for (cg = 0; cg < cgs->nr; cg++)
671 a1 = cgs->index[cg+1];
673 /* Store the energy group in cginfo */
674 gid = ggrpnr(&mtop->groups, egcENER, a_offset+am+a0);
675 SET_CGINFO_GID(cginfo[cgm+cg], gid);
677 /* Check the intra/inter charge group exclusions */
678 if (a1-a0 > excl_nalloc)
680 excl_nalloc = a1 - a0;
681 srenew(bExcl, excl_nalloc);
683 /* bExclIntraAll: all intra cg interactions excluded
684 * bExclInter: any inter cg interactions excluded
686 bExclIntraAll = TRUE;
690 for (ai = a0; ai < a1; ai++)
692 /* Check VDW and electrostatic interactions */
693 bHaveVDW = bHaveVDW || (type_VDW[molt->atoms.atom[ai].type] ||
694 type_VDW[molt->atoms.atom[ai].typeB]);
695 bHaveQ = bHaveQ || (molt->atoms.atom[ai].q != 0 ||
696 molt->atoms.atom[ai].qB != 0);
698 /* Clear the exclusion list for atom ai */
699 for (aj = a0; aj < a1; aj++)
701 bExcl[aj-a0] = FALSE;
703 /* Loop over all the exclusions of atom ai */
704 for (j = excl->index[ai]; j < excl->index[ai+1]; j++)
707 if (aj < a0 || aj >= a1)
716 /* Check if ai excludes a0 to a1 */
717 for (aj = a0; aj < a1; aj++)
721 bExclIntraAll = FALSE;
728 SET_CGINFO_CONSTR(cginfo[cgm+cg]);
731 SET_CGINFO_SETTLE(cginfo[cgm+cg]);
739 SET_CGINFO_EXCL_INTRA(cginfo[cgm+cg]);
743 SET_CGINFO_EXCL_INTER(cginfo[cgm+cg]);
745 if (a1 - a0 > MAX_CHARGEGROUP_SIZE)
747 /* The size in cginfo is currently only read with DD */
748 gmx_fatal(FARGS, "A charge group has size %d which is larger than the limit of %d atoms", a1-a0, MAX_CHARGEGROUP_SIZE);
752 SET_CGINFO_HAS_VDW(cginfo[cgm+cg]);
756 SET_CGINFO_HAS_Q(cginfo[cgm+cg]);
758 /* Store the charge group size */
759 SET_CGINFO_NATOMS(cginfo[cgm+cg], a1-a0);
761 if (!bExclIntraAll || bExclInter)
763 *bExcl_IntraCGAll_InterCGNone = FALSE;
770 cg_offset += molb->nmol*cgs->nr;
771 a_offset += molb->nmol*cgs->index[cgs->nr];
775 /* the solvent optimizer is called after the QM is initialized,
776 * because we don't want to have the QM subsystemto become an
780 check_solvent(fplog, mtop, fr, cginfo_mb);
782 if (getenv("GMX_NO_SOLV_OPT"))
786 fprintf(fplog, "Found environment variable GMX_NO_SOLV_OPT.\n"
787 "Disabling all solvent optimization\n");
789 fr->solvent_opt = esolNO;
793 fr->solvent_opt = esolNO;
795 if (!fr->solvent_opt)
797 for (mb = 0; mb < mtop->nmolblock; mb++)
799 for (cg = 0; cg < cginfo_mb[mb].cg_mod; cg++)
801 SET_CGINFO_SOLOPT(cginfo_mb[mb].cginfo[cg], esolNO);
809 static int *cginfo_expand(int nmb, cginfo_mb_t *cgi_mb)
814 ncg = cgi_mb[nmb-1].cg_end;
817 for (cg = 0; cg < ncg; cg++)
819 while (cg >= cgi_mb[mb].cg_end)
824 cgi_mb[mb].cginfo[(cg - cgi_mb[mb].cg_start) % cgi_mb[mb].cg_mod];
830 static void set_chargesum(FILE *log, t_forcerec *fr, const gmx_mtop_t *mtop)
832 double qsum, q2sum, q;
834 const t_atoms *atoms;
838 for (mb = 0; mb < mtop->nmolblock; mb++)
840 nmol = mtop->molblock[mb].nmol;
841 atoms = &mtop->moltype[mtop->molblock[mb].type].atoms;
842 for (i = 0; i < atoms->nr; i++)
844 q = atoms->atom[i].q;
850 fr->q2sum[0] = q2sum;
851 if (fr->efep != efepNO)
855 for (mb = 0; mb < mtop->nmolblock; mb++)
857 nmol = mtop->molblock[mb].nmol;
858 atoms = &mtop->moltype[mtop->molblock[mb].type].atoms;
859 for (i = 0; i < atoms->nr; i++)
861 q = atoms->atom[i].qB;
866 fr->q2sum[1] = q2sum;
871 fr->qsum[1] = fr->qsum[0];
872 fr->q2sum[1] = fr->q2sum[0];
876 if (fr->efep == efepNO)
878 fprintf(log, "System total charge: %.3f\n", fr->qsum[0]);
882 fprintf(log, "System total charge, top. A: %.3f top. B: %.3f\n",
883 fr->qsum[0], fr->qsum[1]);
888 void update_forcerec(FILE *log, t_forcerec *fr, matrix box)
890 if (fr->eeltype == eelGRF)
892 calc_rffac(NULL, fr->eeltype, fr->epsilon_r, fr->epsilon_rf,
893 fr->rcoulomb, fr->temp, fr->zsquare, box,
894 &fr->kappa, &fr->k_rf, &fr->c_rf);
898 void set_avcsixtwelve(FILE *fplog, t_forcerec *fr, const gmx_mtop_t *mtop)
900 const t_atoms *atoms, *atoms_tpi;
901 const t_blocka *excl;
902 int mb, nmol, nmolc, i, j, tpi, tpj, j1, j2, k, n, nexcl, q;
903 #if (defined SIZEOF_LONG_LONG_INT) && (SIZEOF_LONG_LONG_INT >= 8)
904 long long int npair, npair_ij, tmpi, tmpj;
906 double npair, npair_ij, tmpi, tmpj;
908 double csix, ctwelve;
917 for (q = 0; q < (fr->efep == efepNO ? 1 : 2); q++)
925 /* Count the types so we avoid natoms^2 operations */
926 snew(typecount, ntp);
927 for (mb = 0; mb < mtop->nmolblock; mb++)
929 nmol = mtop->molblock[mb].nmol;
930 atoms = &mtop->moltype[mtop->molblock[mb].type].atoms;
931 for (i = 0; i < atoms->nr; i++)
935 tpi = atoms->atom[i].type;
939 tpi = atoms->atom[i].typeB;
941 typecount[tpi] += nmol;
944 for (tpi = 0; tpi < ntp; tpi++)
946 for (tpj = tpi; tpj < ntp; tpj++)
948 tmpi = typecount[tpi];
949 tmpj = typecount[tpj];
952 npair_ij = tmpi*tmpj;
956 npair_ij = tmpi*(tmpi - 1)/2;
960 /* nbfp now includes the 6.0 derivative prefactor */
961 csix += npair_ij*BHAMC(nbfp, ntp, tpi, tpj)/6.0;
965 /* nbfp now includes the 6.0/12.0 derivative prefactors */
966 csix += npair_ij* C6(nbfp, ntp, tpi, tpj)/6.0;
967 ctwelve += npair_ij* C12(nbfp, ntp, tpi, tpj)/12.0;
973 /* Subtract the excluded pairs.
974 * The main reason for substracting exclusions is that in some cases
975 * some combinations might never occur and the parameters could have
976 * any value. These unused values should not influence the dispersion
979 for (mb = 0; mb < mtop->nmolblock; mb++)
981 nmol = mtop->molblock[mb].nmol;
982 atoms = &mtop->moltype[mtop->molblock[mb].type].atoms;
983 excl = &mtop->moltype[mtop->molblock[mb].type].excls;
984 for (i = 0; (i < atoms->nr); i++)
988 tpi = atoms->atom[i].type;
992 tpi = atoms->atom[i].typeB;
995 j2 = excl->index[i+1];
996 for (j = j1; j < j2; j++)
1003 tpj = atoms->atom[k].type;
1007 tpj = atoms->atom[k].typeB;
1011 /* nbfp now includes the 6.0 derivative prefactor */
1012 csix -= nmol*BHAMC(nbfp, ntp, tpi, tpj)/6.0;
1016 /* nbfp now includes the 6.0/12.0 derivative prefactors */
1017 csix -= nmol*C6 (nbfp, ntp, tpi, tpj)/6.0;
1018 ctwelve -= nmol*C12(nbfp, ntp, tpi, tpj)/12.0;
1028 /* Only correct for the interaction of the test particle
1029 * with the rest of the system.
1032 &mtop->moltype[mtop->molblock[mtop->nmolblock-1].type].atoms;
1035 for (mb = 0; mb < mtop->nmolblock; mb++)
1037 nmol = mtop->molblock[mb].nmol;
1038 atoms = &mtop->moltype[mtop->molblock[mb].type].atoms;
1039 for (j = 0; j < atoms->nr; j++)
1042 /* Remove the interaction of the test charge group
1045 if (mb == mtop->nmolblock-1)
1049 if (mb == 0 && nmol == 1)
1051 gmx_fatal(FARGS, "Old format tpr with TPI, please generate a new tpr file");
1056 tpj = atoms->atom[j].type;
1060 tpj = atoms->atom[j].typeB;
1062 for (i = 0; i < fr->n_tpi; i++)
1066 tpi = atoms_tpi->atom[i].type;
1070 tpi = atoms_tpi->atom[i].typeB;
1074 /* nbfp now includes the 6.0 derivative prefactor */
1075 csix += nmolc*BHAMC(nbfp, ntp, tpi, tpj)/6.0;
1079 /* nbfp now includes the 6.0/12.0 derivative prefactors */
1080 csix += nmolc*C6 (nbfp, ntp, tpi, tpj)/6.0;
1081 ctwelve += nmolc*C12(nbfp, ntp, tpi, tpj)/12.0;
1088 if (npair - nexcl <= 0 && fplog)
1090 fprintf(fplog, "\nWARNING: There are no atom pairs for dispersion correction\n\n");
1096 csix /= npair - nexcl;
1097 ctwelve /= npair - nexcl;
1101 fprintf(debug, "Counted %d exclusions\n", nexcl);
1102 fprintf(debug, "Average C6 parameter is: %10g\n", (double)csix);
1103 fprintf(debug, "Average C12 parameter is: %10g\n", (double)ctwelve);
1105 fr->avcsix[q] = csix;
1106 fr->avctwelve[q] = ctwelve;
1110 if (fr->eDispCorr == edispcAllEner ||
1111 fr->eDispCorr == edispcAllEnerPres)
1113 fprintf(fplog, "Long Range LJ corr.: <C6> %10.4e, <C12> %10.4e\n",
1114 fr->avcsix[0], fr->avctwelve[0]);
1118 fprintf(fplog, "Long Range LJ corr.: <C6> %10.4e\n", fr->avcsix[0]);
1124 static void set_bham_b_max(FILE *fplog, t_forcerec *fr,
1125 const gmx_mtop_t *mtop)
1127 const t_atoms *at1, *at2;
1128 int mt1, mt2, i, j, tpi, tpj, ntypes;
1134 fprintf(fplog, "Determining largest Buckingham b parameter for table\n");
1141 for (mt1 = 0; mt1 < mtop->nmoltype; mt1++)
1143 at1 = &mtop->moltype[mt1].atoms;
1144 for (i = 0; (i < at1->nr); i++)
1146 tpi = at1->atom[i].type;
1149 gmx_fatal(FARGS, "Atomtype[%d] = %d, maximum = %d", i, tpi, ntypes);
1152 for (mt2 = mt1; mt2 < mtop->nmoltype; mt2++)
1154 at2 = &mtop->moltype[mt2].atoms;
1155 for (j = 0; (j < at2->nr); j++)
1157 tpj = at2->atom[j].type;
1160 gmx_fatal(FARGS, "Atomtype[%d] = %d, maximum = %d", j, tpj, ntypes);
1162 b = BHAMB(nbfp, ntypes, tpi, tpj);
1163 if (b > fr->bham_b_max)
1167 if ((b < bmin) || (bmin == -1))
1177 fprintf(fplog, "Buckingham b parameters, min: %g, max: %g\n",
1178 bmin, fr->bham_b_max);
1182 static void make_nbf_tables(FILE *fp, const output_env_t oenv,
1183 t_forcerec *fr, real rtab,
1184 const t_commrec *cr,
1185 const char *tabfn, char *eg1, char *eg2,
1195 fprintf(debug, "No table file name passed, can not read table, can not do non-bonded interactions\n");
1200 sprintf(buf, "%s", tabfn);
1203 /* Append the two energy group names */
1204 sprintf(buf + strlen(tabfn) - strlen(ftp2ext(efXVG)) - 1, "_%s_%s.%s",
1205 eg1, eg2, ftp2ext(efXVG));
1207 nbl->table_elec_vdw = make_tables(fp, oenv, fr, MASTER(cr), buf, rtab, 0);
1208 /* Copy the contents of the table to separate coulomb and LJ tables too,
1209 * to improve cache performance.
1211 /* For performance reasons we want
1212 * the table data to be aligned to 16-byte. The pointers could be freed
1213 * but currently aren't.
1215 nbl->table_elec.interaction = GMX_TABLE_INTERACTION_ELEC;
1216 nbl->table_elec.format = nbl->table_elec_vdw.format;
1217 nbl->table_elec.r = nbl->table_elec_vdw.r;
1218 nbl->table_elec.n = nbl->table_elec_vdw.n;
1219 nbl->table_elec.scale = nbl->table_elec_vdw.scale;
1220 nbl->table_elec.scale_exp = nbl->table_elec_vdw.scale_exp;
1221 nbl->table_elec.formatsize = nbl->table_elec_vdw.formatsize;
1222 nbl->table_elec.ninteractions = 1;
1223 nbl->table_elec.stride = nbl->table_elec.formatsize * nbl->table_elec.ninteractions;
1224 snew_aligned(nbl->table_elec.data, nbl->table_elec.stride*(nbl->table_elec.n+1), 32);
1226 nbl->table_vdw.interaction = GMX_TABLE_INTERACTION_VDWREP_VDWDISP;
1227 nbl->table_vdw.format = nbl->table_elec_vdw.format;
1228 nbl->table_vdw.r = nbl->table_elec_vdw.r;
1229 nbl->table_vdw.n = nbl->table_elec_vdw.n;
1230 nbl->table_vdw.scale = nbl->table_elec_vdw.scale;
1231 nbl->table_vdw.scale_exp = nbl->table_elec_vdw.scale_exp;
1232 nbl->table_vdw.formatsize = nbl->table_elec_vdw.formatsize;
1233 nbl->table_vdw.ninteractions = 2;
1234 nbl->table_vdw.stride = nbl->table_vdw.formatsize * nbl->table_vdw.ninteractions;
1235 snew_aligned(nbl->table_vdw.data, nbl->table_vdw.stride*(nbl->table_vdw.n+1), 32);
1237 for (i = 0; i <= nbl->table_elec_vdw.n; i++)
1239 for (j = 0; j < 4; j++)
1241 nbl->table_elec.data[4*i+j] = nbl->table_elec_vdw.data[12*i+j];
1243 for (j = 0; j < 8; j++)
1245 nbl->table_vdw.data[8*i+j] = nbl->table_elec_vdw.data[12*i+4+j];
1250 static void count_tables(int ftype1, int ftype2, const gmx_mtop_t *mtop,
1251 int *ncount, int **count)
1253 const gmx_moltype_t *molt;
1255 int mt, ftype, stride, i, j, tabnr;
1257 for (mt = 0; mt < mtop->nmoltype; mt++)
1259 molt = &mtop->moltype[mt];
1260 for (ftype = 0; ftype < F_NRE; ftype++)
1262 if (ftype == ftype1 || ftype == ftype2)
1264 il = &molt->ilist[ftype];
1265 stride = 1 + NRAL(ftype);
1266 for (i = 0; i < il->nr; i += stride)
1268 tabnr = mtop->ffparams.iparams[il->iatoms[i]].tab.table;
1271 gmx_fatal(FARGS, "A bonded table number is smaller than 0: %d\n", tabnr);
1273 if (tabnr >= *ncount)
1275 srenew(*count, tabnr+1);
1276 for (j = *ncount; j < tabnr+1; j++)
1289 static bondedtable_t *make_bonded_tables(FILE *fplog,
1290 int ftype1, int ftype2,
1291 const gmx_mtop_t *mtop,
1292 const char *basefn, const char *tabext)
1294 int i, ncount, *count;
1302 count_tables(ftype1, ftype2, mtop, &ncount, &count);
1307 for (i = 0; i < ncount; i++)
1311 sprintf(tabfn, "%s", basefn);
1312 sprintf(tabfn + strlen(basefn) - strlen(ftp2ext(efXVG)) - 1, "_%s%d.%s",
1313 tabext, i, ftp2ext(efXVG));
1314 tab[i] = make_bonded_table(fplog, tabfn, NRAL(ftype1)-2);
1323 void forcerec_set_ranges(t_forcerec *fr,
1324 int ncg_home, int ncg_force,
1326 int natoms_force_constr, int natoms_f_novirsum)
1331 /* fr->ncg_force is unused in the standard code,
1332 * but it can be useful for modified code dealing with charge groups.
1334 fr->ncg_force = ncg_force;
1335 fr->natoms_force = natoms_force;
1336 fr->natoms_force_constr = natoms_force_constr;
1338 if (fr->natoms_force_constr > fr->nalloc_force)
1340 fr->nalloc_force = over_alloc_dd(fr->natoms_force_constr);
1344 srenew(fr->f_twin, fr->nalloc_force);
1348 if (fr->bF_NoVirSum)
1350 fr->f_novirsum_n = natoms_f_novirsum;
1351 if (fr->f_novirsum_n > fr->f_novirsum_nalloc)
1353 fr->f_novirsum_nalloc = over_alloc_dd(fr->f_novirsum_n);
1354 srenew(fr->f_novirsum_alloc, fr->f_novirsum_nalloc);
1359 fr->f_novirsum_n = 0;
1363 static real cutoff_inf(real cutoff)
1367 cutoff = GMX_CUTOFF_INF;
1373 static void make_adress_tf_tables(FILE *fp, const output_env_t oenv,
1374 t_forcerec *fr, const t_inputrec *ir,
1375 const char *tabfn, const gmx_mtop_t *mtop,
1383 gmx_fatal(FARGS, "No thermoforce table file given. Use -tabletf to specify a file\n");
1387 snew(fr->atf_tabs, ir->adress->n_tf_grps);
1389 sprintf(buf, "%s", tabfn);
1390 for (i = 0; i < ir->adress->n_tf_grps; i++)
1392 j = ir->adress->tf_table_index[i]; /* get energy group index */
1393 sprintf(buf + strlen(tabfn) - strlen(ftp2ext(efXVG)) - 1, "tf_%s.%s",
1394 *(mtop->groups.grpname[mtop->groups.grps[egcENER].nm_ind[j]]), ftp2ext(efXVG));
1397 fprintf(fp, "loading tf table for energygrp index %d from %s\n", ir->adress->tf_table_index[i], buf);
1399 fr->atf_tabs[i] = make_atf_table(fp, oenv, fr, buf, box);
1404 gmx_bool can_use_allvsall(const t_inputrec *ir, const gmx_mtop_t *mtop,
1405 gmx_bool bPrintNote, t_commrec *cr, FILE *fp)
1412 ir->rcoulomb == 0 &&
1414 ir->ePBC == epbcNONE &&
1415 ir->vdwtype == evdwCUT &&
1416 ir->coulombtype == eelCUT &&
1417 ir->efep == efepNO &&
1418 (ir->implicit_solvent == eisNO ||
1419 (ir->implicit_solvent == eisGBSA && (ir->gb_algorithm == egbSTILL ||
1420 ir->gb_algorithm == egbHCT ||
1421 ir->gb_algorithm == egbOBC))) &&
1422 getenv("GMX_NO_ALLVSALL") == NULL
1425 if (bAllvsAll && ir->opts.ngener > 1)
1427 const char *note = "NOTE: Can not use all-vs-all force loops, because there are multiple energy monitor groups; you might get significantly higher performance when using only a single energy monitor group.\n";
1433 fprintf(stderr, "\n%s\n", note);
1437 fprintf(fp, "\n%s\n", note);
1443 if (bAllvsAll && fp && MASTER(cr))
1445 fprintf(fp, "\nUsing accelerated all-vs-all kernels.\n\n");
1452 static void init_forcerec_f_threads(t_forcerec *fr, int nenergrp)
1456 /* These thread local data structures are used for bondeds only */
1457 fr->nthreads = gmx_omp_nthreads_get(emntBonded);
1459 if (fr->nthreads > 1)
1461 snew(fr->f_t, fr->nthreads);
1462 /* Thread 0 uses the global force and energy arrays */
1463 for (t = 1; t < fr->nthreads; t++)
1465 fr->f_t[t].f = NULL;
1466 fr->f_t[t].f_nalloc = 0;
1467 snew(fr->f_t[t].fshift, SHIFTS);
1468 fr->f_t[t].grpp.nener = nenergrp*nenergrp;
1469 for (i = 0; i < egNR; i++)
1471 snew(fr->f_t[t].grpp.ener[i], fr->f_t[t].grpp.nener);
1478 static void pick_nbnxn_kernel_cpu(FILE *fp,
1479 const t_commrec *cr,
1480 const gmx_cpuid_t cpuid_info,
1481 const t_inputrec *ir,
1485 *kernel_type = nbnxnk4x4_PlainC;
1486 *ewald_excl = ewaldexclTable;
1488 #ifdef GMX_NBNXN_SIMD
1490 #ifdef GMX_NBNXN_SIMD_4XN
1491 *kernel_type = nbnxnk4xN_SIMD_4xN;
1493 #ifdef GMX_NBNXN_SIMD_2XNN
1494 /* We expect the 2xNN kernels to be faster in most cases */
1495 *kernel_type = nbnxnk4xN_SIMD_2xNN;
1498 #if defined GMX_NBNXN_SIMD_4XN && defined GMX_X86_AVX_256
1499 if (EEL_RF(ir->coulombtype) || ir->coulombtype == eelCUT)
1501 /* The raw pair rate of the 4x8 kernel is higher than 2x(4+4),
1502 * 10% with HT, 50% without HT, but extra zeros interactions
1503 * can compensate. As we currently don't detect the actual use
1504 * of HT, switch to 4x8 to avoid a potential performance hit.
1506 *kernel_type = nbnxnk4xN_SIMD_4xN;
1509 if (getenv("GMX_NBNXN_SIMD_4XN") != NULL)
1511 #ifdef GMX_NBNXN_SIMD_4XN
1512 *kernel_type = nbnxnk4xN_SIMD_4xN;
1514 gmx_fatal(FARGS, "SIMD 4xN kernels requested, but Gromacs has been compiled without support for these kernels");
1517 if (getenv("GMX_NBNXN_SIMD_2XNN") != NULL)
1519 #ifdef GMX_NBNXN_SIMD_2XNN
1520 *kernel_type = nbnxnk4xN_SIMD_2xNN;
1522 gmx_fatal(FARGS, "SIMD 2x(N+N) kernels requested, but Gromacs has been compiled without support for these kernels");
1526 /* Analytical Ewald exclusion correction is only an option in
1527 * the SIMD kernel. On BlueGene/Q, this is faster regardless
1528 * of precision. In single precision, this is faster on
1529 * Bulldozer, and slightly faster on Sandy Bridge.
1531 #if ((defined GMX_X86_AVX_128_FMA || defined GMX_X86_AVX_256) && !defined GMX_DOUBLE) || (defined GMX_CPU_ACCELERATION_IBM_QPX)
1532 *ewald_excl = ewaldexclAnalytical;
1534 if (getenv("GMX_NBNXN_EWALD_TABLE") != NULL)
1536 *ewald_excl = ewaldexclTable;
1538 if (getenv("GMX_NBNXN_EWALD_ANALYTICAL") != NULL)
1540 *ewald_excl = ewaldexclAnalytical;
1544 #endif /* GMX_NBNXN_SIMD */
1548 const char *lookup_nbnxn_kernel_name(int kernel_type)
1550 const char *returnvalue = NULL;
1551 switch (kernel_type)
1554 returnvalue = "not set";
1556 case nbnxnk4x4_PlainC:
1557 returnvalue = "plain C";
1559 case nbnxnk4xN_SIMD_4xN:
1560 case nbnxnk4xN_SIMD_2xNN:
1561 #ifdef GMX_NBNXN_SIMD
1563 /* We have x86 SSE2 compatible SIMD */
1564 #ifdef GMX_X86_AVX_128_FMA
1565 returnvalue = "AVX-128-FMA";
1567 #if defined GMX_X86_AVX_256 || defined __AVX__
1568 /* x86 SIMD intrinsics can be converted to SSE or AVX depending
1569 * on compiler flags. As we use nearly identical intrinsics,
1570 * compiling for AVX without an AVX macros effectively results
1572 * For gcc we check for __AVX__
1573 * At least a check for icc should be added (if there is a macro)
1575 #if defined GMX_X86_AVX_256 && !defined GMX_NBNXN_HALF_WIDTH_SIMD
1576 returnvalue = "AVX-256";
1578 returnvalue = "AVX-128";
1581 #ifdef GMX_X86_SSE4_1
1582 returnvalue = "SSE4.1";
1584 returnvalue = "SSE2";
1588 #else /* GMX_X86_SSE2 */
1589 /* not GMX_X86_SSE2, but other SIMD */
1590 returnvalue = "SIMD";
1591 #endif /* GMX_X86_SSE2 */
1592 #else /* GMX_NBNXN_SIMD */
1593 returnvalue = "not available";
1594 #endif /* GMX_NBNXN_SIMD */
1596 case nbnxnk8x8x8_CUDA: returnvalue = "CUDA"; break;
1597 case nbnxnk8x8x8_PlainC: returnvalue = "plain C"; break;
1601 gmx_fatal(FARGS, "Illegal kernel type selected");
1608 static void pick_nbnxn_kernel(FILE *fp,
1609 const t_commrec *cr,
1610 const gmx_hw_info_t *hwinfo,
1611 gmx_bool use_cpu_acceleration,
1613 gmx_bool bEmulateGPU,
1614 const t_inputrec *ir,
1617 gmx_bool bDoNonbonded)
1619 assert(kernel_type);
1621 *kernel_type = nbnxnkNotSet;
1622 *ewald_excl = ewaldexclTable;
1626 *kernel_type = nbnxnk8x8x8_PlainC;
1630 md_print_warn(cr, fp, "Emulating a GPU run on the CPU (slow)");
1635 *kernel_type = nbnxnk8x8x8_CUDA;
1638 if (*kernel_type == nbnxnkNotSet)
1640 if (use_cpu_acceleration)
1642 pick_nbnxn_kernel_cpu(fp, cr, hwinfo->cpuid_info, ir,
1643 kernel_type, ewald_excl);
1647 *kernel_type = nbnxnk4x4_PlainC;
1651 if (bDoNonbonded && fp != NULL)
1653 fprintf(fp, "\nUsing %s %dx%d non-bonded kernels\n\n",
1654 lookup_nbnxn_kernel_name(*kernel_type),
1655 nbnxn_kernel_pairlist_simple(*kernel_type) ? NBNXN_CPU_CLUSTER_I_SIZE : NBNXN_GPU_CLUSTER_SIZE,
1656 nbnxn_kernel_to_cj_size(*kernel_type));
1660 static void pick_nbnxn_resources(FILE *fp,
1661 const t_commrec *cr,
1662 const gmx_hw_info_t *hwinfo,
1663 gmx_bool bDoNonbonded,
1665 gmx_bool *bEmulateGPU,
1666 const gmx_gpu_opt_t *gpu_opt)
1668 gmx_bool bEmulateGPUEnvVarSet;
1669 char gpu_err_str[STRLEN];
1673 bEmulateGPUEnvVarSet = (getenv("GMX_EMULATE_GPU") != NULL);
1675 /* Run GPU emulation mode if GMX_EMULATE_GPU is defined. Because
1676 * GPUs (currently) only handle non-bonded calculations, we will
1677 * automatically switch to emulation if non-bonded calculations are
1678 * turned off via GMX_NO_NONBONDED - this is the simple and elegant
1679 * way to turn off GPU initialization, data movement, and cleanup.
1681 * GPU emulation can be useful to assess the performance one can expect by
1682 * adding GPU(s) to the machine. The conditional below allows this even
1683 * if mdrun is compiled without GPU acceleration support.
1684 * Note that you should freezing the system as otherwise it will explode.
1686 *bEmulateGPU = (bEmulateGPUEnvVarSet ||
1688 gpu_opt->ncuda_dev_use > 0));
1690 /* Enable GPU mode when GPUs are available or no GPU emulation is requested.
1692 if (gpu_opt->ncuda_dev_use > 0 && !(*bEmulateGPU))
1694 /* Each PP node will use the intra-node id-th device from the
1695 * list of detected/selected GPUs. */
1696 if (!init_gpu(cr->rank_pp_intranode, gpu_err_str,
1697 &hwinfo->gpu_info, gpu_opt))
1699 /* At this point the init should never fail as we made sure that
1700 * we have all the GPUs we need. If it still does, we'll bail. */
1701 gmx_fatal(FARGS, "On node %d failed to initialize GPU #%d: %s",
1703 get_gpu_device_id(&hwinfo->gpu_info, gpu_opt,
1704 cr->rank_pp_intranode),
1708 /* Here we actually turn on hardware GPU acceleration */
1713 gmx_bool uses_simple_tables(int cutoff_scheme,
1714 nonbonded_verlet_t *nbv,
1717 gmx_bool bUsesSimpleTables = TRUE;
1720 switch (cutoff_scheme)
1723 bUsesSimpleTables = TRUE;
1726 assert(NULL != nbv && NULL != nbv->grp);
1727 grp_index = (group < 0) ? 0 : (nbv->ngrp - 1);
1728 bUsesSimpleTables = nbnxn_kernel_pairlist_simple(nbv->grp[grp_index].kernel_type);
1731 gmx_incons("unimplemented");
1733 return bUsesSimpleTables;
1736 static void init_ewald_f_table(interaction_const_t *ic,
1737 gmx_bool bUsesSimpleTables,
1742 if (bUsesSimpleTables)
1744 /* With a spacing of 0.0005 we are at the force summation accuracy
1745 * for the SSE kernels for "normal" atomistic simulations.
1747 ic->tabq_scale = ewald_spline3_table_scale(ic->ewaldcoeff,
1750 maxr = (rtab > ic->rcoulomb) ? rtab : ic->rcoulomb;
1751 ic->tabq_size = (int)(maxr*ic->tabq_scale) + 2;
1755 ic->tabq_size = GPU_EWALD_COULOMB_FORCE_TABLE_SIZE;
1756 /* Subtract 2 iso 1 to avoid access out of range due to rounding */
1757 ic->tabq_scale = (ic->tabq_size - 2)/ic->rcoulomb;
1760 sfree_aligned(ic->tabq_coul_FDV0);
1761 sfree_aligned(ic->tabq_coul_F);
1762 sfree_aligned(ic->tabq_coul_V);
1764 /* Create the original table data in FDV0 */
1765 snew_aligned(ic->tabq_coul_FDV0, ic->tabq_size*4, 32);
1766 snew_aligned(ic->tabq_coul_F, ic->tabq_size, 32);
1767 snew_aligned(ic->tabq_coul_V, ic->tabq_size, 32);
1768 table_spline3_fill_ewald_lr(ic->tabq_coul_F, ic->tabq_coul_V, ic->tabq_coul_FDV0,
1769 ic->tabq_size, 1/ic->tabq_scale, ic->ewaldcoeff);
1772 void init_interaction_const_tables(FILE *fp,
1773 interaction_const_t *ic,
1774 gmx_bool bUsesSimpleTables,
1779 if (ic->eeltype == eelEWALD || EEL_PME(ic->eeltype))
1781 init_ewald_f_table(ic, bUsesSimpleTables, rtab);
1785 fprintf(fp, "Initialized non-bonded Ewald correction tables, spacing: %.2e size: %d\n\n",
1786 1/ic->tabq_scale, ic->tabq_size);
1791 static void init_interaction_const(FILE *fp,
1792 const t_commrec *cr,
1793 interaction_const_t **interaction_const,
1794 const t_forcerec *fr,
1797 interaction_const_t *ic;
1798 gmx_bool bUsesSimpleTables = TRUE;
1802 /* Just allocate something so we can free it */
1803 snew_aligned(ic->tabq_coul_FDV0, 16, 32);
1804 snew_aligned(ic->tabq_coul_F, 16, 32);
1805 snew_aligned(ic->tabq_coul_V, 16, 32);
1807 ic->rlist = fr->rlist;
1808 ic->rlistlong = fr->rlistlong;
1811 ic->rvdw = fr->rvdw;
1812 if (fr->vdw_modifier == eintmodPOTSHIFT)
1814 ic->sh_invrc6 = pow(ic->rvdw, -6.0);
1821 /* Electrostatics */
1822 ic->eeltype = fr->eeltype;
1823 ic->rcoulomb = fr->rcoulomb;
1824 ic->epsilon_r = fr->epsilon_r;
1825 ic->epsfac = fr->epsfac;
1828 ic->ewaldcoeff = fr->ewaldcoeff;
1829 if (fr->coulomb_modifier == eintmodPOTSHIFT)
1831 ic->sh_ewald = gmx_erfc(ic->ewaldcoeff*ic->rcoulomb);
1838 /* Reaction-field */
1839 if (EEL_RF(ic->eeltype))
1841 ic->epsilon_rf = fr->epsilon_rf;
1842 ic->k_rf = fr->k_rf;
1843 ic->c_rf = fr->c_rf;
1847 /* For plain cut-off we might use the reaction-field kernels */
1848 ic->epsilon_rf = ic->epsilon_r;
1850 if (fr->coulomb_modifier == eintmodPOTSHIFT)
1852 ic->c_rf = 1/ic->rcoulomb;
1862 fprintf(fp, "Potential shift: LJ r^-12: %.3f r^-6 %.3f",
1863 sqr(ic->sh_invrc6), ic->sh_invrc6);
1864 if (ic->eeltype == eelCUT)
1866 fprintf(fp, ", Coulomb %.3f", ic->c_rf);
1868 else if (EEL_PME(ic->eeltype))
1870 fprintf(fp, ", Ewald %.3e", ic->sh_ewald);
1875 *interaction_const = ic;
1877 if (fr->nbv != NULL && fr->nbv->bUseGPU)
1879 nbnxn_cuda_init_const(fr->nbv->cu_nbv, ic, fr->nbv->grp);
1881 /* With tMPI + GPUs some ranks may be sharing GPU(s) and therefore
1882 * also sharing texture references. To keep the code simple, we don't
1883 * treat texture references as shared resources, but this means that
1884 * the coulomb_tab and nbfp texture refs will get updated by multiple threads.
1885 * Hence, to ensure that the non-bonded kernels don't start before all
1886 * texture binding operations are finished, we need to wait for all ranks
1887 * to arrive here before continuing.
1889 * Note that we could omit this barrier if GPUs are not shared (or
1890 * texture objects are used), but as this is initialization code, there
1891 * is not point in complicating things.
1893 #ifdef GMX_THREAD_MPI
1898 #endif /* GMX_THREAD_MPI */
1901 bUsesSimpleTables = uses_simple_tables(fr->cutoff_scheme, fr->nbv, -1);
1902 init_interaction_const_tables(fp, ic, bUsesSimpleTables, rtab);
1905 static void init_nb_verlet(FILE *fp,
1906 nonbonded_verlet_t **nb_verlet,
1907 const t_inputrec *ir,
1908 const t_forcerec *fr,
1909 const t_commrec *cr,
1910 const char *nbpu_opt)
1912 nonbonded_verlet_t *nbv;
1915 gmx_bool bEmulateGPU, bHybridGPURun = FALSE;
1917 nbnxn_alloc_t *nb_alloc;
1918 nbnxn_free_t *nb_free;
1922 pick_nbnxn_resources(fp, cr, fr->hwinfo,
1930 nbv->ngrp = (DOMAINDECOMP(cr) ? 2 : 1);
1931 for (i = 0; i < nbv->ngrp; i++)
1933 nbv->grp[i].nbl_lists.nnbl = 0;
1934 nbv->grp[i].nbat = NULL;
1935 nbv->grp[i].kernel_type = nbnxnkNotSet;
1937 if (i == 0) /* local */
1939 pick_nbnxn_kernel(fp, cr, fr->hwinfo, fr->use_cpu_acceleration,
1940 nbv->bUseGPU, bEmulateGPU,
1942 &nbv->grp[i].kernel_type,
1943 &nbv->grp[i].ewald_excl,
1946 else /* non-local */
1948 if (nbpu_opt != NULL && strcmp(nbpu_opt, "gpu_cpu") == 0)
1950 /* Use GPU for local, select a CPU kernel for non-local */
1951 pick_nbnxn_kernel(fp, cr, fr->hwinfo, fr->use_cpu_acceleration,
1954 &nbv->grp[i].kernel_type,
1955 &nbv->grp[i].ewald_excl,
1958 bHybridGPURun = TRUE;
1962 /* Use the same kernel for local and non-local interactions */
1963 nbv->grp[i].kernel_type = nbv->grp[0].kernel_type;
1964 nbv->grp[i].ewald_excl = nbv->grp[0].ewald_excl;
1971 /* init the NxN GPU data; the last argument tells whether we'll have
1972 * both local and non-local NB calculation on GPU */
1973 nbnxn_cuda_init(fp, &nbv->cu_nbv,
1974 &fr->hwinfo->gpu_info, fr->gpu_opt,
1975 cr->rank_pp_intranode,
1976 (nbv->ngrp > 1) && !bHybridGPURun);
1978 if ((env = getenv("GMX_NB_MIN_CI")) != NULL)
1982 nbv->min_ci_balanced = strtol(env, &end, 10);
1983 if (!end || (*end != 0) || nbv->min_ci_balanced <= 0)
1985 gmx_fatal(FARGS, "Invalid value passed in GMX_NB_MIN_CI=%s, positive integer required", env);
1990 fprintf(debug, "Neighbor-list balancing parameter: %d (passed as env. var.)\n",
1991 nbv->min_ci_balanced);
1996 nbv->min_ci_balanced = nbnxn_cuda_min_ci_balanced(nbv->cu_nbv);
1999 fprintf(debug, "Neighbor-list balancing parameter: %d (auto-adjusted to the number of GPU multi-processors)\n",
2000 nbv->min_ci_balanced);
2006 nbv->min_ci_balanced = 0;
2011 nbnxn_init_search(&nbv->nbs,
2012 DOMAINDECOMP(cr) ? &cr->dd->nc : NULL,
2013 DOMAINDECOMP(cr) ? domdec_zones(cr->dd) : NULL,
2014 gmx_omp_nthreads_get(emntNonbonded));
2016 for (i = 0; i < nbv->ngrp; i++)
2018 if (nbv->grp[0].kernel_type == nbnxnk8x8x8_CUDA)
2020 nb_alloc = &pmalloc;
2029 nbnxn_init_pairlist_set(&nbv->grp[i].nbl_lists,
2030 nbnxn_kernel_pairlist_simple(nbv->grp[i].kernel_type),
2031 /* 8x8x8 "non-simple" lists are ATM always combined */
2032 !nbnxn_kernel_pairlist_simple(nbv->grp[i].kernel_type),
2036 nbv->grp[0].kernel_type != nbv->grp[i].kernel_type)
2038 snew(nbv->grp[i].nbat, 1);
2039 nbnxn_atomdata_init(fp,
2041 nbv->grp[i].kernel_type,
2042 fr->ntype, fr->nbfp,
2044 nbnxn_kernel_pairlist_simple(nbv->grp[i].kernel_type) ? gmx_omp_nthreads_get(emntNonbonded) : 1,
2049 nbv->grp[i].nbat = nbv->grp[0].nbat;
2054 void init_forcerec(FILE *fp,
2055 const output_env_t oenv,
2058 const t_inputrec *ir,
2059 const gmx_mtop_t *mtop,
2060 const t_commrec *cr,
2067 const char *nbpu_opt,
2068 gmx_bool bNoSolvOpt,
2071 int i, j, m, natoms, ngrp, negp_pp, negptable, egi, egj;
2077 gmx_bool bGenericKernelOnly;
2078 gmx_bool bTab, bSep14tab, bNormalnblists;
2080 int *nm_ind, egp_flags;
2082 if (fr->hwinfo == NULL)
2084 /* Detect hardware, gather information.
2085 * In mdrun, hwinfo has already been set before calling init_forcerec.
2086 * Here we ignore GPUs, as tools will not use them anyhow.
2088 fr->hwinfo = gmx_detect_hardware(fp, cr, FALSE);
2091 /* By default we turn acceleration on, but it might be turned off further down... */
2092 fr->use_cpu_acceleration = TRUE;
2094 fr->bDomDec = DOMAINDECOMP(cr);
2096 natoms = mtop->natoms;
2098 if (check_box(ir->ePBC, box))
2100 gmx_fatal(FARGS, check_box(ir->ePBC, box));
2103 /* Test particle insertion ? */
2106 /* Set to the size of the molecule to be inserted (the last one) */
2107 /* Because of old style topologies, we have to use the last cg
2108 * instead of the last molecule type.
2110 cgs = &mtop->moltype[mtop->molblock[mtop->nmolblock-1].type].cgs;
2111 fr->n_tpi = cgs->index[cgs->nr] - cgs->index[cgs->nr-1];
2112 if (fr->n_tpi != mtop->mols.index[mtop->mols.nr] - mtop->mols.index[mtop->mols.nr-1])
2114 gmx_fatal(FARGS, "The molecule to insert can not consist of multiple charge groups.\nMake it a single charge group.");
2122 /* Copy AdResS parameters */
2125 fr->adress_type = ir->adress->type;
2126 fr->adress_const_wf = ir->adress->const_wf;
2127 fr->adress_ex_width = ir->adress->ex_width;
2128 fr->adress_hy_width = ir->adress->hy_width;
2129 fr->adress_icor = ir->adress->icor;
2130 fr->adress_site = ir->adress->site;
2131 fr->adress_ex_forcecap = ir->adress->ex_forcecap;
2132 fr->adress_do_hybridpairs = ir->adress->do_hybridpairs;
2135 snew(fr->adress_group_explicit, ir->adress->n_energy_grps);
2136 for (i = 0; i < ir->adress->n_energy_grps; i++)
2138 fr->adress_group_explicit[i] = ir->adress->group_explicit[i];
2141 fr->n_adress_tf_grps = ir->adress->n_tf_grps;
2142 snew(fr->adress_tf_table_index, fr->n_adress_tf_grps);
2143 for (i = 0; i < fr->n_adress_tf_grps; i++)
2145 fr->adress_tf_table_index[i] = ir->adress->tf_table_index[i];
2147 copy_rvec(ir->adress->refs, fr->adress_refs);
2151 fr->adress_type = eAdressOff;
2152 fr->adress_do_hybridpairs = FALSE;
2155 /* Copy the user determined parameters */
2156 fr->userint1 = ir->userint1;
2157 fr->userint2 = ir->userint2;
2158 fr->userint3 = ir->userint3;
2159 fr->userint4 = ir->userint4;
2160 fr->userreal1 = ir->userreal1;
2161 fr->userreal2 = ir->userreal2;
2162 fr->userreal3 = ir->userreal3;
2163 fr->userreal4 = ir->userreal4;
2166 fr->fc_stepsize = ir->fc_stepsize;
2169 fr->efep = ir->efep;
2170 fr->sc_alphavdw = ir->fepvals->sc_alpha;
2171 if (ir->fepvals->bScCoul)
2173 fr->sc_alphacoul = ir->fepvals->sc_alpha;
2174 fr->sc_sigma6_min = pow(ir->fepvals->sc_sigma_min, 6);
2178 fr->sc_alphacoul = 0;
2179 fr->sc_sigma6_min = 0; /* only needed when bScCoul is on */
2181 fr->sc_power = ir->fepvals->sc_power;
2182 fr->sc_r_power = ir->fepvals->sc_r_power;
2183 fr->sc_sigma6_def = pow(ir->fepvals->sc_sigma, 6);
2185 env = getenv("GMX_SCSIGMA_MIN");
2189 sscanf(env, "%lf", &dbl);
2190 fr->sc_sigma6_min = pow(dbl, 6);
2193 fprintf(fp, "Setting the minimum soft core sigma to %g nm\n", dbl);
2197 fr->bNonbonded = TRUE;
2198 if (getenv("GMX_NO_NONBONDED") != NULL)
2200 /* turn off non-bonded calculations */
2201 fr->bNonbonded = FALSE;
2202 md_print_warn(cr, fp,
2203 "Found environment variable GMX_NO_NONBONDED.\n"
2204 "Disabling nonbonded calculations.\n");
2207 bGenericKernelOnly = FALSE;
2209 /* We now check in the NS code whether a particular combination of interactions
2210 * can be used with water optimization, and disable it if that is not the case.
2213 if (getenv("GMX_NB_GENERIC") != NULL)
2218 "Found environment variable GMX_NB_GENERIC.\n"
2219 "Disabling all interaction-specific nonbonded kernels, will only\n"
2220 "use the slow generic ones in src/gmxlib/nonbonded/nb_generic.c\n\n");
2222 bGenericKernelOnly = TRUE;
2225 if (bGenericKernelOnly == TRUE)
2230 if ( (getenv("GMX_DISABLE_CPU_ACCELERATION") != NULL) || (getenv("GMX_NOOPTIMIZEDKERNELS") != NULL) )
2232 fr->use_cpu_acceleration = FALSE;
2236 "\nFound environment variable GMX_DISABLE_CPU_ACCELERATION.\n"
2237 "Disabling all CPU architecture-specific (e.g. SSE2/SSE4/AVX) routines.\n\n");
2241 fr->bBHAM = (mtop->ffparams.functype[0] == F_BHAM);
2243 /* Check if we can/should do all-vs-all kernels */
2244 fr->bAllvsAll = can_use_allvsall(ir, mtop, FALSE, NULL, NULL);
2245 fr->AllvsAll_work = NULL;
2246 fr->AllvsAll_workgb = NULL;
2248 /* All-vs-all kernels have not been implemented in 4.6, and
2249 * the SIMD group kernels are also buggy in this case. Non-accelerated
2250 * group kernels are OK. See Redmine #1249. */
2253 fr->bAllvsAll = FALSE;
2254 fr->use_cpu_acceleration = FALSE;
2258 "\nYour simulation settings would have triggered the efficient all-vs-all\n"
2259 "kernels in GROMACS 4.5, but these have not been implemented in GROMACS\n"
2260 "4.6. Also, we can't use the accelerated SIMD kernels here because\n"
2261 "of an unfixed bug. The reference C kernels are correct, though, so\n"
2262 "we are proceeding by disabling all CPU architecture-specific\n"
2263 "(e.g. SSE2/SSE4/AVX) routines. If performance is important, please\n"
2264 "use GROMACS 4.5.7 or try cutoff-scheme = Verlet.\n\n");
2268 /* Neighbour searching stuff */
2269 fr->cutoff_scheme = ir->cutoff_scheme;
2270 fr->bGrid = (ir->ns_type == ensGRID);
2271 fr->ePBC = ir->ePBC;
2273 /* Determine if we will do PBC for distances in bonded interactions */
2274 if (fr->ePBC == epbcNONE)
2276 fr->bMolPBC = FALSE;
2280 if (!DOMAINDECOMP(cr))
2282 /* The group cut-off scheme and SHAKE assume charge groups
2283 * are whole, but not using molpbc is faster in most cases.
2285 if (fr->cutoff_scheme == ecutsGROUP ||
2286 (ir->eConstrAlg == econtSHAKE &&
2287 (gmx_mtop_ftype_count(mtop, F_CONSTR) > 0 ||
2288 gmx_mtop_ftype_count(mtop, F_CONSTRNC) > 0)))
2290 fr->bMolPBC = ir->bPeriodicMols;
2295 if (getenv("GMX_USE_GRAPH") != NULL)
2297 fr->bMolPBC = FALSE;
2300 fprintf(fp, "\nGMX_MOLPBC is set, using the graph for bonded interactions\n\n");
2307 fr->bMolPBC = dd_bonded_molpbc(cr->dd, fr->ePBC);
2310 fr->bGB = (ir->implicit_solvent == eisGBSA);
2312 fr->rc_scaling = ir->refcoord_scaling;
2313 copy_rvec(ir->posres_com, fr->posres_com);
2314 copy_rvec(ir->posres_comB, fr->posres_comB);
2315 fr->rlist = cutoff_inf(ir->rlist);
2316 fr->rlistlong = cutoff_inf(ir->rlistlong);
2317 fr->eeltype = ir->coulombtype;
2318 fr->vdwtype = ir->vdwtype;
2320 fr->coulomb_modifier = ir->coulomb_modifier;
2321 fr->vdw_modifier = ir->vdw_modifier;
2323 /* Electrostatics: Translate from interaction-setting-in-mdp-file to kernel interaction format */
2324 switch (fr->eeltype)
2327 fr->nbkernel_elec_interaction = (fr->bGB) ? GMX_NBKERNEL_ELEC_GENERALIZEDBORN : GMX_NBKERNEL_ELEC_COULOMB;
2333 fr->nbkernel_elec_interaction = GMX_NBKERNEL_ELEC_REACTIONFIELD;
2337 fr->nbkernel_elec_interaction = GMX_NBKERNEL_ELEC_REACTIONFIELD;
2338 fr->coulomb_modifier = eintmodEXACTCUTOFF;
2347 case eelPMEUSERSWITCH:
2348 fr->nbkernel_elec_interaction = GMX_NBKERNEL_ELEC_CUBICSPLINETABLE;
2353 fr->nbkernel_elec_interaction = GMX_NBKERNEL_ELEC_EWALD;
2357 gmx_fatal(FARGS, "Unsupported electrostatic interaction: %s", eel_names[fr->eeltype]);
2361 /* Vdw: Translate from mdp settings to kernel format */
2362 switch (fr->vdwtype)
2367 fr->nbkernel_vdw_interaction = GMX_NBKERNEL_VDW_BUCKINGHAM;
2371 fr->nbkernel_vdw_interaction = GMX_NBKERNEL_VDW_LENNARDJONES;
2378 case evdwENCADSHIFT:
2379 fr->nbkernel_vdw_interaction = GMX_NBKERNEL_VDW_CUBICSPLINETABLE;
2383 gmx_fatal(FARGS, "Unsupported vdw interaction: %s", evdw_names[fr->vdwtype]);
2387 /* These start out identical to ir, but might be altered if we e.g. tabulate the interaction in the kernel */
2388 fr->nbkernel_elec_modifier = fr->coulomb_modifier;
2389 fr->nbkernel_vdw_modifier = fr->vdw_modifier;
2391 fr->bTwinRange = fr->rlistlong > fr->rlist;
2392 fr->bEwald = (EEL_PME(fr->eeltype) || fr->eeltype == eelEWALD);
2394 fr->reppow = mtop->ffparams.reppow;
2396 if (ir->cutoff_scheme == ecutsGROUP)
2398 fr->bvdwtab = (fr->vdwtype != evdwCUT ||
2399 !gmx_within_tol(fr->reppow, 12.0, 10*GMX_DOUBLE_EPS));
2400 /* We have special kernels for standard Ewald and PME, but the pme-switch ones are tabulated above */
2401 fr->bcoultab = !(fr->eeltype == eelCUT ||
2402 fr->eeltype == eelEWALD ||
2403 fr->eeltype == eelPME ||
2404 fr->eeltype == eelRF ||
2405 fr->eeltype == eelRF_ZERO);
2407 /* If the user absolutely wants different switch/shift settings for coul/vdw, it is likely
2408 * going to be faster to tabulate the interaction than calling the generic kernel.
2410 if (fr->nbkernel_elec_modifier == eintmodPOTSWITCH && fr->nbkernel_vdw_modifier == eintmodPOTSWITCH)
2412 if ((fr->rcoulomb_switch != fr->rvdw_switch) || (fr->rcoulomb != fr->rvdw))
2414 fr->bcoultab = TRUE;
2417 else if ((fr->nbkernel_elec_modifier == eintmodPOTSHIFT && fr->nbkernel_vdw_modifier == eintmodPOTSHIFT) ||
2418 ((fr->nbkernel_elec_interaction == GMX_NBKERNEL_ELEC_REACTIONFIELD &&
2419 fr->nbkernel_elec_modifier == eintmodEXACTCUTOFF &&
2420 (fr->nbkernel_vdw_modifier == eintmodPOTSWITCH || fr->nbkernel_vdw_modifier == eintmodPOTSHIFT))))
2422 if (fr->rcoulomb != fr->rvdw)
2424 fr->bcoultab = TRUE;
2428 if (getenv("GMX_REQUIRE_TABLES"))
2431 fr->bcoultab = TRUE;
2436 fprintf(fp, "Table routines are used for coulomb: %s\n", bool_names[fr->bcoultab]);
2437 fprintf(fp, "Table routines are used for vdw: %s\n", bool_names[fr->bvdwtab ]);
2440 if (fr->bvdwtab == TRUE)
2442 fr->nbkernel_vdw_interaction = GMX_NBKERNEL_VDW_CUBICSPLINETABLE;
2443 fr->nbkernel_vdw_modifier = eintmodNONE;
2445 if (fr->bcoultab == TRUE)
2447 fr->nbkernel_elec_interaction = GMX_NBKERNEL_ELEC_CUBICSPLINETABLE;
2448 fr->nbkernel_elec_modifier = eintmodNONE;
2452 if (ir->cutoff_scheme == ecutsVERLET)
2454 if (!gmx_within_tol(fr->reppow, 12.0, 10*GMX_DOUBLE_EPS))
2456 gmx_fatal(FARGS, "Cut-off scheme %S only supports LJ repulsion power 12", ecutscheme_names[ir->cutoff_scheme]);
2458 fr->bvdwtab = FALSE;
2459 fr->bcoultab = FALSE;
2462 /* Tables are used for direct ewald sum */
2465 if (EEL_PME(ir->coulombtype))
2469 fprintf(fp, "Will do PME sum in reciprocal space.\n");
2471 if (ir->coulombtype == eelP3M_AD)
2473 please_cite(fp, "Hockney1988");
2474 please_cite(fp, "Ballenegger2012");
2478 please_cite(fp, "Essmann95a");
2481 if (ir->ewald_geometry == eewg3DC)
2485 fprintf(fp, "Using the Ewald3DC correction for systems with a slab geometry.\n");
2487 please_cite(fp, "In-Chul99a");
2490 fr->ewaldcoeff = calc_ewaldcoeff(ir->rcoulomb, ir->ewald_rtol);
2491 init_ewald_tab(&(fr->ewald_table), cr, ir, fp);
2494 fprintf(fp, "Using a Gaussian width (1/beta) of %g nm for Ewald\n",
2499 /* Electrostatics */
2500 fr->epsilon_r = ir->epsilon_r;
2501 fr->epsilon_rf = ir->epsilon_rf;
2502 fr->fudgeQQ = mtop->ffparams.fudgeQQ;
2503 fr->rcoulomb_switch = ir->rcoulomb_switch;
2504 fr->rcoulomb = cutoff_inf(ir->rcoulomb);
2506 /* Parameters for generalized RF */
2510 if (fr->eeltype == eelGRF)
2512 init_generalized_rf(fp, mtop, ir, fr);
2514 else if (fr->eeltype == eelSHIFT)
2516 for (m = 0; (m < DIM); m++)
2518 box_size[m] = box[m][m];
2521 if ((fr->eeltype == eelSHIFT && fr->rcoulomb > fr->rcoulomb_switch))
2523 set_shift_consts(fp, fr->rcoulomb_switch, fr->rcoulomb, box_size, fr);
2527 fr->bF_NoVirSum = (EEL_FULL(fr->eeltype) ||
2528 gmx_mtop_ftype_count(mtop, F_POSRES) > 0 ||
2529 IR_ELEC_FIELD(*ir) ||
2530 (fr->adress_icor != eAdressICOff)
2533 if (fr->cutoff_scheme == ecutsGROUP &&
2534 ncg_mtop(mtop) > fr->cg_nalloc && !DOMAINDECOMP(cr))
2536 /* Count the total number of charge groups */
2537 fr->cg_nalloc = ncg_mtop(mtop);
2538 srenew(fr->cg_cm, fr->cg_nalloc);
2540 if (fr->shift_vec == NULL)
2542 snew(fr->shift_vec, SHIFTS);
2545 if (fr->fshift == NULL)
2547 snew(fr->fshift, SHIFTS);
2550 if (fr->nbfp == NULL)
2552 fr->ntype = mtop->ffparams.atnr;
2553 fr->nbfp = mk_nbfp(&mtop->ffparams, fr->bBHAM);
2556 /* Copy the energy group exclusions */
2557 fr->egp_flags = ir->opts.egp_flags;
2559 /* Van der Waals stuff */
2560 fr->rvdw = cutoff_inf(ir->rvdw);
2561 fr->rvdw_switch = ir->rvdw_switch;
2562 if ((fr->vdwtype != evdwCUT) && (fr->vdwtype != evdwUSER) && !fr->bBHAM)
2564 if (fr->rvdw_switch >= fr->rvdw)
2566 gmx_fatal(FARGS, "rvdw_switch (%f) must be < rvdw (%f)",
2567 fr->rvdw_switch, fr->rvdw);
2571 fprintf(fp, "Using %s Lennard-Jones, switch between %g and %g nm\n",
2572 (fr->eeltype == eelSWITCH) ? "switched" : "shifted",
2573 fr->rvdw_switch, fr->rvdw);
2577 if (fr->bBHAM && (fr->vdwtype == evdwSHIFT || fr->vdwtype == evdwSWITCH))
2579 gmx_fatal(FARGS, "Switch/shift interaction not supported with Buckingham");
2582 if (fr->bBHAM && fr->cutoff_scheme == ecutsVERLET)
2584 gmx_fatal(FARGS, "Verlet cutoff-scheme is not supported with Buckingham");
2589 fprintf(fp, "Cut-off's: NS: %g Coulomb: %g %s: %g\n",
2590 fr->rlist, fr->rcoulomb, fr->bBHAM ? "BHAM" : "LJ", fr->rvdw);
2593 fr->eDispCorr = ir->eDispCorr;
2594 if (ir->eDispCorr != edispcNO)
2596 set_avcsixtwelve(fp, fr, mtop);
2601 set_bham_b_max(fp, fr, mtop);
2604 fr->gb_epsilon_solvent = ir->gb_epsilon_solvent;
2606 /* Copy the GBSA data (radius, volume and surftens for each
2607 * atomtype) from the topology atomtype section to forcerec.
2609 snew(fr->atype_radius, fr->ntype);
2610 snew(fr->atype_vol, fr->ntype);
2611 snew(fr->atype_surftens, fr->ntype);
2612 snew(fr->atype_gb_radius, fr->ntype);
2613 snew(fr->atype_S_hct, fr->ntype);
2615 if (mtop->atomtypes.nr > 0)
2617 for (i = 0; i < fr->ntype; i++)
2619 fr->atype_radius[i] = mtop->atomtypes.radius[i];
2621 for (i = 0; i < fr->ntype; i++)
2623 fr->atype_vol[i] = mtop->atomtypes.vol[i];
2625 for (i = 0; i < fr->ntype; i++)
2627 fr->atype_surftens[i] = mtop->atomtypes.surftens[i];
2629 for (i = 0; i < fr->ntype; i++)
2631 fr->atype_gb_radius[i] = mtop->atomtypes.gb_radius[i];
2633 for (i = 0; i < fr->ntype; i++)
2635 fr->atype_S_hct[i] = mtop->atomtypes.S_hct[i];
2639 /* Generate the GB table if needed */
2643 fr->gbtabscale = 2000;
2645 fr->gbtabscale = 500;
2649 fr->gbtab = make_gb_table(fp, oenv, fr, tabpfn, fr->gbtabscale);
2651 init_gb(&fr->born, cr, fr, ir, mtop, ir->rgbradii, ir->gb_algorithm);
2653 /* Copy local gb data (for dd, this is done in dd_partition_system) */
2654 if (!DOMAINDECOMP(cr))
2656 make_local_gb(cr, fr->born, ir->gb_algorithm);
2660 /* Set the charge scaling */
2661 if (fr->epsilon_r != 0)
2663 fr->epsfac = ONE_4PI_EPS0/fr->epsilon_r;
2667 /* eps = 0 is infinite dieletric: no coulomb interactions */
2671 /* Reaction field constants */
2672 if (EEL_RF(fr->eeltype))
2674 calc_rffac(fp, fr->eeltype, fr->epsilon_r, fr->epsilon_rf,
2675 fr->rcoulomb, fr->temp, fr->zsquare, box,
2676 &fr->kappa, &fr->k_rf, &fr->c_rf);
2679 set_chargesum(fp, fr, mtop);
2681 /* if we are using LR electrostatics, and they are tabulated,
2682 * the tables will contain modified coulomb interactions.
2683 * Since we want to use the non-shifted ones for 1-4
2684 * coulombic interactions, we must have an extra set of tables.
2687 /* Construct tables.
2688 * A little unnecessary to make both vdw and coul tables sometimes,
2689 * but what the heck... */
2691 bTab = fr->bcoultab || fr->bvdwtab || fr->bEwald;
2693 bSep14tab = ((!bTab || fr->eeltype != eelCUT || fr->vdwtype != evdwCUT ||
2694 fr->bBHAM || fr->bEwald) &&
2695 (gmx_mtop_ftype_count(mtop, F_LJ14) > 0 ||
2696 gmx_mtop_ftype_count(mtop, F_LJC14_Q) > 0 ||
2697 gmx_mtop_ftype_count(mtop, F_LJC_PAIRS_NB) > 0));
2699 negp_pp = ir->opts.ngener - ir->nwall;
2703 bNormalnblists = TRUE;
2708 bNormalnblists = (ir->eDispCorr != edispcNO);
2709 for (egi = 0; egi < negp_pp; egi++)
2711 for (egj = egi; egj < negp_pp; egj++)
2713 egp_flags = ir->opts.egp_flags[GID(egi, egj, ir->opts.ngener)];
2714 if (!(egp_flags & EGP_EXCL))
2716 if (egp_flags & EGP_TABLE)
2722 bNormalnblists = TRUE;
2729 fr->nnblists = negptable + 1;
2733 fr->nnblists = negptable;
2735 if (fr->nnblists > 1)
2737 snew(fr->gid2nblists, ir->opts.ngener*ir->opts.ngener);
2746 snew(fr->nblists, fr->nnblists);
2748 /* This code automatically gives table length tabext without cut-off's,
2749 * in that case grompp should already have checked that we do not need
2750 * normal tables and we only generate tables for 1-4 interactions.
2752 rtab = ir->rlistlong + ir->tabext;
2756 /* make tables for ordinary interactions */
2759 make_nbf_tables(fp, oenv, fr, rtab, cr, tabfn, NULL, NULL, &fr->nblists[0]);
2762 make_nbf_tables(fp, oenv, fr, rtab, cr, tabfn, NULL, NULL, &fr->nblists[fr->nnblists/2]);
2766 fr->tab14 = fr->nblists[0].table_elec_vdw;
2776 /* Read the special tables for certain energy group pairs */
2777 nm_ind = mtop->groups.grps[egcENER].nm_ind;
2778 for (egi = 0; egi < negp_pp; egi++)
2780 for (egj = egi; egj < negp_pp; egj++)
2782 egp_flags = ir->opts.egp_flags[GID(egi, egj, ir->opts.ngener)];
2783 if ((egp_flags & EGP_TABLE) && !(egp_flags & EGP_EXCL))
2785 nbl = &(fr->nblists[m]);
2786 if (fr->nnblists > 1)
2788 fr->gid2nblists[GID(egi, egj, ir->opts.ngener)] = m;
2790 /* Read the table file with the two energy groups names appended */
2791 make_nbf_tables(fp, oenv, fr, rtab, cr, tabfn,
2792 *mtop->groups.grpname[nm_ind[egi]],
2793 *mtop->groups.grpname[nm_ind[egj]],
2797 make_nbf_tables(fp, oenv, fr, rtab, cr, tabfn,
2798 *mtop->groups.grpname[nm_ind[egi]],
2799 *mtop->groups.grpname[nm_ind[egj]],
2800 &fr->nblists[fr->nnblists/2+m]);
2804 else if (fr->nnblists > 1)
2806 fr->gid2nblists[GID(egi, egj, ir->opts.ngener)] = 0;
2814 /* generate extra tables with plain Coulomb for 1-4 interactions only */
2815 fr->tab14 = make_tables(fp, oenv, fr, MASTER(cr), tabpfn, rtab,
2816 GMX_MAKETABLES_14ONLY);
2819 /* Read AdResS Thermo Force table if needed */
2820 if (fr->adress_icor == eAdressICThermoForce)
2822 /* old todo replace */
2824 if (ir->adress->n_tf_grps > 0)
2826 make_adress_tf_tables(fp, oenv, fr, ir, tabfn, mtop, box);
2831 /* load the default table */
2832 snew(fr->atf_tabs, 1);
2833 fr->atf_tabs[DEFAULT_TF_TABLE] = make_atf_table(fp, oenv, fr, tabafn, box);
2838 fr->nwall = ir->nwall;
2839 if (ir->nwall && ir->wall_type == ewtTABLE)
2841 make_wall_tables(fp, oenv, ir, tabfn, &mtop->groups, fr);
2846 fcd->bondtab = make_bonded_tables(fp,
2847 F_TABBONDS, F_TABBONDSNC,
2849 fcd->angletab = make_bonded_tables(fp,
2852 fcd->dihtab = make_bonded_tables(fp,
2860 fprintf(debug, "No fcdata or table file name passed, can not read table, can not do bonded interactions\n");
2864 /* QM/MM initialization if requested
2868 fprintf(stderr, "QM/MM calculation requested.\n");
2871 fr->bQMMM = ir->bQMMM;
2872 fr->qr = mk_QMMMrec();
2874 /* Set all the static charge group info */
2875 fr->cginfo_mb = init_cginfo_mb(fp, mtop, fr, bNoSolvOpt,
2876 &fr->bExcl_IntraCGAll_InterCGNone);
2877 if (DOMAINDECOMP(cr))
2883 fr->cginfo = cginfo_expand(mtop->nmolblock, fr->cginfo_mb);
2886 if (!DOMAINDECOMP(cr))
2888 /* When using particle decomposition, the effect of the second argument,
2889 * which sets fr->hcg, is corrected later in do_md and init_em.
2891 forcerec_set_ranges(fr, ncg_mtop(mtop), ncg_mtop(mtop),
2892 mtop->natoms, mtop->natoms, mtop->natoms);
2895 fr->print_force = print_force;
2898 /* coarse load balancing vars */
2903 /* Initialize neighbor search */
2904 init_ns(fp, cr, &fr->ns, fr, mtop, box);
2906 if (cr->duty & DUTY_PP)
2908 gmx_nonbonded_setup(fp, fr, bGenericKernelOnly);
2912 gmx_setup_adress_kernels(fp,bGenericKernelOnly);
2917 /* Initialize the thread working data for bonded interactions */
2918 init_forcerec_f_threads(fr, mtop->groups.grps[egcENER].nr);
2920 snew(fr->excl_load, fr->nthreads+1);
2922 if (fr->cutoff_scheme == ecutsVERLET)
2924 if (ir->rcoulomb != ir->rvdw)
2926 gmx_fatal(FARGS, "With Verlet lists rcoulomb and rvdw should be identical");
2929 init_nb_verlet(fp, &fr->nbv, ir, fr, cr, nbpu_opt);
2932 /* fr->ic is used both by verlet and group kernels (to some extent) now */
2933 init_interaction_const(fp, cr, &fr->ic, fr, rtab);
2935 if (ir->eDispCorr != edispcNO)
2937 calc_enervirdiff(fp, ir->eDispCorr, fr);
2941 #define pr_real(fp, r) fprintf(fp, "%s: %e\n",#r, r)
2942 #define pr_int(fp, i) fprintf((fp), "%s: %d\n",#i, i)
2943 #define pr_bool(fp, b) fprintf((fp), "%s: %s\n",#b, bool_names[b])
2945 void pr_forcerec(FILE *fp, t_forcerec *fr, t_commrec *cr)
2949 pr_real(fp, fr->rlist);
2950 pr_real(fp, fr->rcoulomb);
2951 pr_real(fp, fr->fudgeQQ);
2952 pr_bool(fp, fr->bGrid);
2953 pr_bool(fp, fr->bTwinRange);
2954 /*pr_int(fp,fr->cg0);
2955 pr_int(fp,fr->hcg);*/
2956 for (i = 0; i < fr->nnblists; i++)
2958 pr_int(fp, fr->nblists[i].table_elec_vdw.n);
2960 pr_real(fp, fr->rcoulomb_switch);
2961 pr_real(fp, fr->rcoulomb);
2966 void forcerec_set_excl_load(t_forcerec *fr,
2967 const gmx_localtop_t *top, const t_commrec *cr)
2970 int t, i, j, ntot, n, ntarget;
2972 if (cr != NULL && PARTDECOMP(cr))
2974 /* No OpenMP with particle decomposition */
2982 ind = top->excls.index;
2986 for (i = 0; i < top->excls.nr; i++)
2988 for (j = ind[i]; j < ind[i+1]; j++)
2997 fr->excl_load[0] = 0;
3000 for (t = 1; t <= fr->nthreads; t++)
3002 ntarget = (ntot*t)/fr->nthreads;
3003 while (i < top->excls.nr && n < ntarget)
3005 for (j = ind[i]; j < ind[i+1]; j++)
3014 fr->excl_load[t] = i;