2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5 * Copyright (c) 2001-2004, The GROMACS development team,
6 * check out http://www.gromacs.org for more information.
7 * Copyright (c) 2012,2013, by the GROMACS development team, led by
8 * David van der Spoel, Berk Hess, Erik Lindahl, and including many
9 * others, as listed in the AUTHORS file in the top-level source
10 * directory and at http://www.gromacs.org.
12 * GROMACS is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU Lesser General Public License
14 * as published by the Free Software Foundation; either version 2.1
15 * of the License, or (at your option) any later version.
17 * GROMACS is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * Lesser General Public License for more details.
22 * You should have received a copy of the GNU Lesser General Public
23 * License along with GROMACS; if not, see
24 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
25 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
27 * If you want to redistribute modifications to GROMACS, please
28 * consider that scientific software is very special. Version
29 * control is crucial - bugs must be traceable. We will be happy to
30 * consider code for inclusion in the official distribution, but
31 * derived work must not be called official GROMACS. Details are found
32 * in the README & COPYING files - if they are missing, get the
33 * official version at http://www.gromacs.org.
35 * To help us fund GROMACS development, we humbly ask that you cite
36 * the research papers on the package. Check out http://www.gromacs.org.
53 #include "gmx_fatal.h"
59 #include "nonbonded.h"
64 #include "gmx_simd_macros.h"
67 /* Find a better place for this? */
68 const int cmap_coeff_matrix[] = {
69 1, 0, -3, 2, 0, 0, 0, 0, -3, 0, 9, -6, 2, 0, -6, 4,
70 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, -9, 6, -2, 0, 6, -4,
71 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9, -6, 0, 0, -6, 4,
72 0, 0, 3, -2, 0, 0, 0, 0, 0, 0, -9, 6, 0, 0, 6, -4,
73 0, 0, 0, 0, 1, 0, -3, 2, -2, 0, 6, -4, 1, 0, -3, 2,
74 0, 0, 0, 0, 0, 0, 0, 0, -1, 0, 3, -2, 1, 0, -3, 2,
75 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -3, 2, 0, 0, 3, -2,
76 0, 0, 0, 0, 0, 0, 3, -2, 0, 0, -6, 4, 0, 0, 3, -2,
77 0, 1, -2, 1, 0, 0, 0, 0, 0, -3, 6, -3, 0, 2, -4, 2,
78 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, -6, 3, 0, -2, 4, -2,
79 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -3, 3, 0, 0, 2, -2,
80 0, 0, -1, 1, 0, 0, 0, 0, 0, 0, 3, -3, 0, 0, -2, 2,
81 0, 0, 0, 0, 0, 1, -2, 1, 0, -2, 4, -2, 0, 1, -2, 1,
82 0, 0, 0, 0, 0, 0, 0, 0, 0, -1, 2, -1, 0, 1, -2, 1,
83 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, -1, 0, 0, -1, 1,
84 0, 0, 0, 0, 0, 0, -1, 1, 0, 0, 2, -2, 0, 0, -1, 1
89 int glatnr(int *global_atom_index, int i)
93 if (global_atom_index == NULL)
99 atnr = global_atom_index[i] + 1;
105 static int pbc_rvec_sub(const t_pbc *pbc, const rvec xi, const rvec xj, rvec dx)
109 return pbc_dx_aiuc(pbc, xi, xj, dx);
113 rvec_sub(xi, xj, dx);
120 /* Below are 3 SIMD vector operations.
121 * Currently these are only used here, but they should be moved to
122 * a general SIMD include file when used elsewhere.
125 /* SIMD inner-product of multiple vectors */
126 static gmx_inline gmx_mm_pr
127 gmx_iprod_pr(gmx_mm_pr ax, gmx_mm_pr ay, gmx_mm_pr az,
128 gmx_mm_pr bx, gmx_mm_pr by, gmx_mm_pr bz)
132 ret = gmx_mul_pr(ax, bx);
133 ret = gmx_madd_pr(ay, by, ret);
134 ret = gmx_madd_pr(az, bz, ret);
139 /* SIMD norm squared of multiple vectors */
140 static gmx_inline gmx_mm_pr
141 gmx_norm2_pr(gmx_mm_pr ax, gmx_mm_pr ay, gmx_mm_pr az)
145 ret = gmx_mul_pr(ax, ax);
146 ret = gmx_madd_pr(ay, ay, ret);
147 ret = gmx_madd_pr(az, az, ret);
152 /* SIMD cross-product of multiple vectors */
153 static gmx_inline void
154 gmx_cprod_pr(gmx_mm_pr ax, gmx_mm_pr ay, gmx_mm_pr az,
155 gmx_mm_pr bx, gmx_mm_pr by, gmx_mm_pr bz,
156 gmx_mm_pr *cx, gmx_mm_pr *cy, gmx_mm_pr *cz)
158 *cx = gmx_mul_pr(ay, bz);
159 *cx = gmx_nmsub_pr(az, by, *cx);
161 *cy = gmx_mul_pr(az, bx);
162 *cy = gmx_nmsub_pr(ax, bz, *cy);
164 *cz = gmx_mul_pr(ax, by);
165 *cz = gmx_nmsub_pr(ay, bx, *cz);
168 /* SIMD PBC data structure, containing 1/boxdiag and the box vectors */
181 /* Set the SIMD pbc data from a normal t_pbc struct */
182 static void set_pbc_simd(const t_pbc *pbc, pbc_simd_t *pbc_simd)
187 /* Setting inv_bdiag to 0 effectively turns off PBC */
188 clear_rvec(inv_bdiag);
191 for (d = 0; d < pbc->ndim_ePBC; d++)
193 inv_bdiag[d] = 1.0/pbc->box[d][d];
197 pbc_simd->inv_bzz = gmx_set1_pr(inv_bdiag[ZZ]);
198 pbc_simd->inv_byy = gmx_set1_pr(inv_bdiag[YY]);
199 pbc_simd->inv_bxx = gmx_set1_pr(inv_bdiag[XX]);
203 pbc_simd->bzx = gmx_set1_pr(pbc->box[ZZ][XX]);
204 pbc_simd->bzy = gmx_set1_pr(pbc->box[ZZ][YY]);
205 pbc_simd->bzz = gmx_set1_pr(pbc->box[ZZ][ZZ]);
206 pbc_simd->byx = gmx_set1_pr(pbc->box[YY][XX]);
207 pbc_simd->byy = gmx_set1_pr(pbc->box[YY][YY]);
208 pbc_simd->bxx = gmx_set1_pr(pbc->box[XX][XX]);
212 pbc_simd->bzx = gmx_setzero_pr();
213 pbc_simd->bzy = gmx_setzero_pr();
214 pbc_simd->bzz = gmx_setzero_pr();
215 pbc_simd->byx = gmx_setzero_pr();
216 pbc_simd->byy = gmx_setzero_pr();
217 pbc_simd->bxx = gmx_setzero_pr();
221 /* Correct distance vector *dx,*dy,*dz for PBC using SIMD */
222 static gmx_inline void
223 pbc_dx_simd(gmx_mm_pr *dx, gmx_mm_pr *dy, gmx_mm_pr *dz,
224 const pbc_simd_t *pbc)
228 sh = gmx_round_pr(gmx_mul_pr(*dz, pbc->inv_bzz));
229 *dx = gmx_nmsub_pr(sh, pbc->bzx, *dx);
230 *dy = gmx_nmsub_pr(sh, pbc->bzy, *dy);
231 *dz = gmx_nmsub_pr(sh, pbc->bzz, *dz);
233 sh = gmx_round_pr(gmx_mul_pr(*dy, pbc->inv_byy));
234 *dx = gmx_nmsub_pr(sh, pbc->byx, *dx);
235 *dy = gmx_nmsub_pr(sh, pbc->byy, *dy);
237 sh = gmx_round_pr(gmx_mul_pr(*dx, pbc->inv_bxx));
238 *dx = gmx_nmsub_pr(sh, pbc->bxx, *dx);
241 #endif /* SIMD_BONDEDS */
244 * Morse potential bond by Frank Everdij
246 * Three parameters needed:
248 * b0 = equilibrium distance in nm
249 * be = beta in nm^-1 (actually, it's nu_e*Sqrt(2*pi*pi*mu/D_e))
250 * cb = well depth in kJ/mol
252 * Note: the potential is referenced to be +cb at infinite separation
253 * and zero at the equilibrium distance!
256 real morse_bonds(int nbonds,
257 const t_iatom forceatoms[], const t_iparams forceparams[],
258 const rvec x[], rvec f[], rvec fshift[],
259 const t_pbc *pbc, const t_graph *g,
260 real lambda, real *dvdlambda,
261 const t_mdatoms *md, t_fcdata *fcd,
262 int *global_atom_index)
264 const real one = 1.0;
265 const real two = 2.0;
266 real dr, dr2, temp, omtemp, cbomtemp, fbond, vbond, fij, vtot;
267 real b0, be, cb, b0A, beA, cbA, b0B, beB, cbB, L1;
269 int i, m, ki, type, ai, aj;
273 for (i = 0; (i < nbonds); )
275 type = forceatoms[i++];
276 ai = forceatoms[i++];
277 aj = forceatoms[i++];
279 b0A = forceparams[type].morse.b0A;
280 beA = forceparams[type].morse.betaA;
281 cbA = forceparams[type].morse.cbA;
283 b0B = forceparams[type].morse.b0B;
284 beB = forceparams[type].morse.betaB;
285 cbB = forceparams[type].morse.cbB;
287 L1 = one-lambda; /* 1 */
288 b0 = L1*b0A + lambda*b0B; /* 3 */
289 be = L1*beA + lambda*beB; /* 3 */
290 cb = L1*cbA + lambda*cbB; /* 3 */
292 ki = pbc_rvec_sub(pbc, x[ai], x[aj], dx); /* 3 */
293 dr2 = iprod(dx, dx); /* 5 */
294 dr = dr2*gmx_invsqrt(dr2); /* 10 */
295 temp = exp(-be*(dr-b0)); /* 12 */
299 /* bonds are constrainted. This may _not_ include bond constraints if they are lambda dependent */
300 *dvdlambda += cbB-cbA;
304 omtemp = one-temp; /* 1 */
305 cbomtemp = cb*omtemp; /* 1 */
306 vbond = cbomtemp*omtemp; /* 1 */
307 fbond = -two*be*temp*cbomtemp*gmx_invsqrt(dr2); /* 9 */
308 vtot += vbond; /* 1 */
310 *dvdlambda += (cbB - cbA) * omtemp * omtemp - (2-2*omtemp)*omtemp * cb * ((b0B-b0A)*be - (beB-beA)*(dr-b0)); /* 15 */
314 ivec_sub(SHIFT_IVEC(g, ai), SHIFT_IVEC(g, aj), dt);
318 for (m = 0; (m < DIM); m++) /* 15 */
323 fshift[ki][m] += fij;
324 fshift[CENTRAL][m] -= fij;
330 real cubic_bonds(int nbonds,
331 const t_iatom forceatoms[], const t_iparams forceparams[],
332 const rvec x[], rvec f[], rvec fshift[],
333 const t_pbc *pbc, const t_graph *g,
334 real lambda, real *dvdlambda,
335 const t_mdatoms *md, t_fcdata *fcd,
336 int *global_atom_index)
338 const real three = 3.0;
339 const real two = 2.0;
341 real dr, dr2, dist, kdist, kdist2, fbond, vbond, fij, vtot;
343 int i, m, ki, type, ai, aj;
347 for (i = 0; (i < nbonds); )
349 type = forceatoms[i++];
350 ai = forceatoms[i++];
351 aj = forceatoms[i++];
353 b0 = forceparams[type].cubic.b0;
354 kb = forceparams[type].cubic.kb;
355 kcub = forceparams[type].cubic.kcub;
357 ki = pbc_rvec_sub(pbc, x[ai], x[aj], dx); /* 3 */
358 dr2 = iprod(dx, dx); /* 5 */
365 dr = dr2*gmx_invsqrt(dr2); /* 10 */
370 vbond = kdist2 + kcub*kdist2*dist;
371 fbond = -(two*kdist + three*kdist2*kcub)/dr;
373 vtot += vbond; /* 21 */
377 ivec_sub(SHIFT_IVEC(g, ai), SHIFT_IVEC(g, aj), dt);
380 for (m = 0; (m < DIM); m++) /* 15 */
385 fshift[ki][m] += fij;
386 fshift[CENTRAL][m] -= fij;
392 real FENE_bonds(int nbonds,
393 const t_iatom forceatoms[], const t_iparams forceparams[],
394 const rvec x[], rvec f[], rvec fshift[],
395 const t_pbc *pbc, const t_graph *g,
396 real lambda, real *dvdlambda,
397 const t_mdatoms *md, t_fcdata *fcd,
398 int *global_atom_index)
400 const real half = 0.5;
401 const real one = 1.0;
403 real dr, dr2, bm2, omdr2obm2, fbond, vbond, fij, vtot;
405 int i, m, ki, type, ai, aj;
409 for (i = 0; (i < nbonds); )
411 type = forceatoms[i++];
412 ai = forceatoms[i++];
413 aj = forceatoms[i++];
415 bm = forceparams[type].fene.bm;
416 kb = forceparams[type].fene.kb;
418 ki = pbc_rvec_sub(pbc, x[ai], x[aj], dx); /* 3 */
419 dr2 = iprod(dx, dx); /* 5 */
431 "r^2 (%f) >= bm^2 (%f) in FENE bond between atoms %d and %d",
433 glatnr(global_atom_index, ai),
434 glatnr(global_atom_index, aj));
437 omdr2obm2 = one - dr2/bm2;
439 vbond = -half*kb*bm2*log(omdr2obm2);
440 fbond = -kb/omdr2obm2;
442 vtot += vbond; /* 35 */
446 ivec_sub(SHIFT_IVEC(g, ai), SHIFT_IVEC(g, aj), dt);
449 for (m = 0; (m < DIM); m++) /* 15 */
454 fshift[ki][m] += fij;
455 fshift[CENTRAL][m] -= fij;
461 real harmonic(real kA, real kB, real xA, real xB, real x, real lambda,
464 const real half = 0.5;
465 real L1, kk, x0, dx, dx2;
466 real v, f, dvdlambda;
469 kk = L1*kA+lambda*kB;
470 x0 = L1*xA+lambda*xB;
477 dvdlambda = half*(kB-kA)*dx2 + (xA-xB)*kk*dx;
484 /* That was 19 flops */
488 real bonds(int nbonds,
489 const t_iatom forceatoms[], const t_iparams forceparams[],
490 const rvec x[], rvec f[], rvec fshift[],
491 const t_pbc *pbc, const t_graph *g,
492 real lambda, real *dvdlambda,
493 const t_mdatoms *md, t_fcdata *fcd,
494 int *global_atom_index)
496 int i, m, ki, ai, aj, type;
497 real dr, dr2, fbond, vbond, fij, vtot;
502 for (i = 0; (i < nbonds); )
504 type = forceatoms[i++];
505 ai = forceatoms[i++];
506 aj = forceatoms[i++];
508 ki = pbc_rvec_sub(pbc, x[ai], x[aj], dx); /* 3 */
509 dr2 = iprod(dx, dx); /* 5 */
510 dr = dr2*gmx_invsqrt(dr2); /* 10 */
512 *dvdlambda += harmonic(forceparams[type].harmonic.krA,
513 forceparams[type].harmonic.krB,
514 forceparams[type].harmonic.rA,
515 forceparams[type].harmonic.rB,
516 dr, lambda, &vbond, &fbond); /* 19 */
524 vtot += vbond; /* 1*/
525 fbond *= gmx_invsqrt(dr2); /* 6 */
529 fprintf(debug, "BONDS: dr = %10g vbond = %10g fbond = %10g\n",
535 ivec_sub(SHIFT_IVEC(g, ai), SHIFT_IVEC(g, aj), dt);
538 for (m = 0; (m < DIM); m++) /* 15 */
543 fshift[ki][m] += fij;
544 fshift[CENTRAL][m] -= fij;
550 real restraint_bonds(int nbonds,
551 const t_iatom forceatoms[], const t_iparams forceparams[],
552 const rvec x[], rvec f[], rvec fshift[],
553 const t_pbc *pbc, const t_graph *g,
554 real lambda, real *dvdlambda,
555 const t_mdatoms *md, t_fcdata *fcd,
556 int *global_atom_index)
558 int i, m, ki, ai, aj, type;
559 real dr, dr2, fbond, vbond, fij, vtot;
561 real low, dlow, up1, dup1, up2, dup2, k, dk;
569 for (i = 0; (i < nbonds); )
571 type = forceatoms[i++];
572 ai = forceatoms[i++];
573 aj = forceatoms[i++];
575 ki = pbc_rvec_sub(pbc, x[ai], x[aj], dx); /* 3 */
576 dr2 = iprod(dx, dx); /* 5 */
577 dr = dr2*gmx_invsqrt(dr2); /* 10 */
579 low = L1*forceparams[type].restraint.lowA + lambda*forceparams[type].restraint.lowB;
580 dlow = -forceparams[type].restraint.lowA + forceparams[type].restraint.lowB;
581 up1 = L1*forceparams[type].restraint.up1A + lambda*forceparams[type].restraint.up1B;
582 dup1 = -forceparams[type].restraint.up1A + forceparams[type].restraint.up1B;
583 up2 = L1*forceparams[type].restraint.up2A + lambda*forceparams[type].restraint.up2B;
584 dup2 = -forceparams[type].restraint.up2A + forceparams[type].restraint.up2B;
585 k = L1*forceparams[type].restraint.kA + lambda*forceparams[type].restraint.kB;
586 dk = -forceparams[type].restraint.kA + forceparams[type].restraint.kB;
595 *dvdlambda += 0.5*dk*drh2 - k*dlow*drh;
608 *dvdlambda += 0.5*dk*drh2 - k*dup1*drh;
613 vbond = k*(up2 - up1)*(0.5*(up2 - up1) + drh);
614 fbond = -k*(up2 - up1);
615 *dvdlambda += dk*(up2 - up1)*(0.5*(up2 - up1) + drh)
616 + k*(dup2 - dup1)*(up2 - up1 + drh)
617 - k*(up2 - up1)*dup2;
625 vtot += vbond; /* 1*/
626 fbond *= gmx_invsqrt(dr2); /* 6 */
630 fprintf(debug, "BONDS: dr = %10g vbond = %10g fbond = %10g\n",
636 ivec_sub(SHIFT_IVEC(g, ai), SHIFT_IVEC(g, aj), dt);
639 for (m = 0; (m < DIM); m++) /* 15 */
644 fshift[ki][m] += fij;
645 fshift[CENTRAL][m] -= fij;
652 real polarize(int nbonds,
653 const t_iatom forceatoms[], const t_iparams forceparams[],
654 const rvec x[], rvec f[], rvec fshift[],
655 const t_pbc *pbc, const t_graph *g,
656 real lambda, real *dvdlambda,
657 const t_mdatoms *md, t_fcdata *fcd,
658 int *global_atom_index)
660 int i, m, ki, ai, aj, type;
661 real dr, dr2, fbond, vbond, fij, vtot, ksh;
666 for (i = 0; (i < nbonds); )
668 type = forceatoms[i++];
669 ai = forceatoms[i++];
670 aj = forceatoms[i++];
671 ksh = sqr(md->chargeA[aj])*ONE_4PI_EPS0/forceparams[type].polarize.alpha;
674 fprintf(debug, "POL: local ai = %d aj = %d ksh = %.3f\n", ai, aj, ksh);
677 ki = pbc_rvec_sub(pbc, x[ai], x[aj], dx); /* 3 */
678 dr2 = iprod(dx, dx); /* 5 */
679 dr = dr2*gmx_invsqrt(dr2); /* 10 */
681 *dvdlambda += harmonic(ksh, ksh, 0, 0, dr, lambda, &vbond, &fbond); /* 19 */
688 vtot += vbond; /* 1*/
689 fbond *= gmx_invsqrt(dr2); /* 6 */
693 ivec_sub(SHIFT_IVEC(g, ai), SHIFT_IVEC(g, aj), dt);
696 for (m = 0; (m < DIM); m++) /* 15 */
701 fshift[ki][m] += fij;
702 fshift[CENTRAL][m] -= fij;
708 real anharm_polarize(int nbonds,
709 const t_iatom forceatoms[], const t_iparams forceparams[],
710 const rvec x[], rvec f[], rvec fshift[],
711 const t_pbc *pbc, const t_graph *g,
712 real lambda, real *dvdlambda,
713 const t_mdatoms *md, t_fcdata *fcd,
714 int *global_atom_index)
716 int i, m, ki, ai, aj, type;
717 real dr, dr2, fbond, vbond, fij, vtot, ksh, khyp, drcut, ddr, ddr3;
722 for (i = 0; (i < nbonds); )
724 type = forceatoms[i++];
725 ai = forceatoms[i++];
726 aj = forceatoms[i++];
727 ksh = sqr(md->chargeA[aj])*ONE_4PI_EPS0/forceparams[type].anharm_polarize.alpha; /* 7*/
728 khyp = forceparams[type].anharm_polarize.khyp;
729 drcut = forceparams[type].anharm_polarize.drcut;
732 fprintf(debug, "POL: local ai = %d aj = %d ksh = %.3f\n", ai, aj, ksh);
735 ki = pbc_rvec_sub(pbc, x[ai], x[aj], dx); /* 3 */
736 dr2 = iprod(dx, dx); /* 5 */
737 dr = dr2*gmx_invsqrt(dr2); /* 10 */
739 *dvdlambda += harmonic(ksh, ksh, 0, 0, dr, lambda, &vbond, &fbond); /* 19 */
750 vbond += khyp*ddr*ddr3;
751 fbond -= 4*khyp*ddr3;
753 fbond *= gmx_invsqrt(dr2); /* 6 */
754 vtot += vbond; /* 1*/
758 ivec_sub(SHIFT_IVEC(g, ai), SHIFT_IVEC(g, aj), dt);
761 for (m = 0; (m < DIM); m++) /* 15 */
766 fshift[ki][m] += fij;
767 fshift[CENTRAL][m] -= fij;
773 real water_pol(int nbonds,
774 const t_iatom forceatoms[], const t_iparams forceparams[],
775 const rvec x[], rvec f[], rvec fshift[],
776 const t_pbc *pbc, const t_graph *g,
777 real lambda, real *dvdlambda,
778 const t_mdatoms *md, t_fcdata *fcd,
779 int *global_atom_index)
781 /* This routine implements anisotropic polarizibility for water, through
782 * a shell connected to a dummy with spring constant that differ in the
783 * three spatial dimensions in the molecular frame.
785 int i, m, aO, aH1, aH2, aD, aS, type, type0;
786 rvec dOH1, dOH2, dHH, dOD, dDS, nW, kk, dx, kdx, proj;
790 real vtot, fij, r_HH, r_OD, r_nW, tx, ty, tz, qS;
795 type0 = forceatoms[0];
797 qS = md->chargeA[aS];
798 kk[XX] = sqr(qS)*ONE_4PI_EPS0/forceparams[type0].wpol.al_x;
799 kk[YY] = sqr(qS)*ONE_4PI_EPS0/forceparams[type0].wpol.al_y;
800 kk[ZZ] = sqr(qS)*ONE_4PI_EPS0/forceparams[type0].wpol.al_z;
801 r_HH = 1.0/forceparams[type0].wpol.rHH;
802 r_OD = 1.0/forceparams[type0].wpol.rOD;
805 fprintf(debug, "WPOL: qS = %10.5f aS = %5d\n", qS, aS);
806 fprintf(debug, "WPOL: kk = %10.3f %10.3f %10.3f\n",
807 kk[XX], kk[YY], kk[ZZ]);
808 fprintf(debug, "WPOL: rOH = %10.3f rHH = %10.3f rOD = %10.3f\n",
809 forceparams[type0].wpol.rOH,
810 forceparams[type0].wpol.rHH,
811 forceparams[type0].wpol.rOD);
813 for (i = 0; (i < nbonds); i += 6)
815 type = forceatoms[i];
818 gmx_fatal(FARGS, "Sorry, type = %d, type0 = %d, file = %s, line = %d",
819 type, type0, __FILE__, __LINE__);
821 aO = forceatoms[i+1];
822 aH1 = forceatoms[i+2];
823 aH2 = forceatoms[i+3];
824 aD = forceatoms[i+4];
825 aS = forceatoms[i+5];
827 /* Compute vectors describing the water frame */
828 rvec_sub(x[aH1], x[aO], dOH1);
829 rvec_sub(x[aH2], x[aO], dOH2);
830 rvec_sub(x[aH2], x[aH1], dHH);
831 rvec_sub(x[aD], x[aO], dOD);
832 rvec_sub(x[aS], x[aD], dDS);
833 cprod(dOH1, dOH2, nW);
835 /* Compute inverse length of normal vector
836 * (this one could be precomputed, but I'm too lazy now)
838 r_nW = gmx_invsqrt(iprod(nW, nW));
839 /* This is for precision, but does not make a big difference,
842 r_OD = gmx_invsqrt(iprod(dOD, dOD));
844 /* Normalize the vectors in the water frame */
846 svmul(r_HH, dHH, dHH);
847 svmul(r_OD, dOD, dOD);
849 /* Compute displacement of shell along components of the vector */
850 dx[ZZ] = iprod(dDS, dOD);
851 /* Compute projection on the XY plane: dDS - dx[ZZ]*dOD */
852 for (m = 0; (m < DIM); m++)
854 proj[m] = dDS[m]-dx[ZZ]*dOD[m];
857 /*dx[XX] = iprod(dDS,nW);
858 dx[YY] = iprod(dDS,dHH);*/
859 dx[XX] = iprod(proj, nW);
860 for (m = 0; (m < DIM); m++)
862 proj[m] -= dx[XX]*nW[m];
864 dx[YY] = iprod(proj, dHH);
869 fprintf(debug, "WPOL: dx2=%10g dy2=%10g dz2=%10g sum=%10g dDS^2=%10g\n",
870 sqr(dx[XX]), sqr(dx[YY]), sqr(dx[ZZ]), iprod(dx, dx), iprod(dDS, dDS));
871 fprintf(debug, "WPOL: dHH=(%10g,%10g,%10g)\n", dHH[XX], dHH[YY], dHH[ZZ]);
872 fprintf(debug, "WPOL: dOD=(%10g,%10g,%10g), 1/r_OD = %10g\n",
873 dOD[XX], dOD[YY], dOD[ZZ], 1/r_OD);
874 fprintf(debug, "WPOL: nW =(%10g,%10g,%10g), 1/r_nW = %10g\n",
875 nW[XX], nW[YY], nW[ZZ], 1/r_nW);
876 fprintf(debug, "WPOL: dx =%10g, dy =%10g, dz =%10g\n",
877 dx[XX], dx[YY], dx[ZZ]);
878 fprintf(debug, "WPOL: dDSx=%10g, dDSy=%10g, dDSz=%10g\n",
879 dDS[XX], dDS[YY], dDS[ZZ]);
882 /* Now compute the forces and energy */
883 kdx[XX] = kk[XX]*dx[XX];
884 kdx[YY] = kk[YY]*dx[YY];
885 kdx[ZZ] = kk[ZZ]*dx[ZZ];
886 vtot += iprod(dx, kdx);
887 for (m = 0; (m < DIM); m++)
889 /* This is a tensor operation but written out for speed */
903 fprintf(debug, "WPOL: vwpol=%g\n", 0.5*iprod(dx, kdx));
904 fprintf(debug, "WPOL: df = (%10g, %10g, %10g)\n", df[XX], df[YY], df[ZZ]);
912 static real do_1_thole(const rvec xi, const rvec xj, rvec fi, rvec fj,
913 const t_pbc *pbc, real qq,
914 rvec fshift[], real afac)
917 real r12sq, r12_1, r12n, r12bar, v0, v1, fscal, ebar, fff;
920 t = pbc_rvec_sub(pbc, xi, xj, r12); /* 3 */
922 r12sq = iprod(r12, r12); /* 5 */
923 r12_1 = gmx_invsqrt(r12sq); /* 5 */
924 r12bar = afac/r12_1; /* 5 */
925 v0 = qq*ONE_4PI_EPS0*r12_1; /* 2 */
926 ebar = exp(-r12bar); /* 5 */
927 v1 = (1-(1+0.5*r12bar)*ebar); /* 4 */
928 fscal = ((v0*r12_1)*v1 - v0*0.5*afac*ebar*(r12bar+1))*r12_1; /* 9 */
931 fprintf(debug, "THOLE: v0 = %.3f v1 = %.3f r12= % .3f r12bar = %.3f fscal = %.3f ebar = %.3f\n", v0, v1, 1/r12_1, r12bar, fscal, ebar);
934 for (m = 0; (m < DIM); m++)
940 fshift[CENTRAL][m] -= fff;
943 return v0*v1; /* 1 */
947 real thole_pol(int nbonds,
948 const t_iatom forceatoms[], const t_iparams forceparams[],
949 const rvec x[], rvec f[], rvec fshift[],
950 const t_pbc *pbc, const t_graph *g,
951 real lambda, real *dvdlambda,
952 const t_mdatoms *md, t_fcdata *fcd,
953 int *global_atom_index)
955 /* Interaction between two pairs of particles with opposite charge */
956 int i, type, a1, da1, a2, da2;
957 real q1, q2, qq, a, al1, al2, afac;
960 for (i = 0; (i < nbonds); )
962 type = forceatoms[i++];
963 a1 = forceatoms[i++];
964 da1 = forceatoms[i++];
965 a2 = forceatoms[i++];
966 da2 = forceatoms[i++];
967 q1 = md->chargeA[da1];
968 q2 = md->chargeA[da2];
969 a = forceparams[type].thole.a;
970 al1 = forceparams[type].thole.alpha1;
971 al2 = forceparams[type].thole.alpha2;
973 afac = a*pow(al1*al2, -1.0/6.0);
974 V += do_1_thole(x[a1], x[a2], f[a1], f[a2], pbc, qq, fshift, afac);
975 V += do_1_thole(x[da1], x[a2], f[da1], f[a2], pbc, -qq, fshift, afac);
976 V += do_1_thole(x[a1], x[da2], f[a1], f[da2], pbc, -qq, fshift, afac);
977 V += do_1_thole(x[da1], x[da2], f[da1], f[da2], pbc, qq, fshift, afac);
983 real bond_angle(const rvec xi, const rvec xj, const rvec xk, const t_pbc *pbc,
984 rvec r_ij, rvec r_kj, real *costh,
986 /* Return value is the angle between the bonds i-j and j-k */
991 *t1 = pbc_rvec_sub(pbc, xi, xj, r_ij); /* 3 */
992 *t2 = pbc_rvec_sub(pbc, xk, xj, r_kj); /* 3 */
994 *costh = cos_angle(r_ij, r_kj); /* 25 */
995 th = acos(*costh); /* 10 */
1000 real angles(int nbonds,
1001 const t_iatom forceatoms[], const t_iparams forceparams[],
1002 const rvec x[], rvec f[], rvec fshift[],
1003 const t_pbc *pbc, const t_graph *g,
1004 real lambda, real *dvdlambda,
1005 const t_mdatoms *md, t_fcdata *fcd,
1006 int *global_atom_index)
1008 int i, ai, aj, ak, t1, t2, type;
1010 real cos_theta, cos_theta2, theta, dVdt, va, vtot;
1011 ivec jt, dt_ij, dt_kj;
1014 for (i = 0; i < nbonds; )
1016 type = forceatoms[i++];
1017 ai = forceatoms[i++];
1018 aj = forceatoms[i++];
1019 ak = forceatoms[i++];
1021 theta = bond_angle(x[ai], x[aj], x[ak], pbc,
1022 r_ij, r_kj, &cos_theta, &t1, &t2); /* 41 */
1024 *dvdlambda += harmonic(forceparams[type].harmonic.krA,
1025 forceparams[type].harmonic.krB,
1026 forceparams[type].harmonic.rA*DEG2RAD,
1027 forceparams[type].harmonic.rB*DEG2RAD,
1028 theta, lambda, &va, &dVdt); /* 21 */
1031 cos_theta2 = sqr(cos_theta);
1038 real nrkj_1, nrij_1;
1041 st = dVdt*gmx_invsqrt(1 - cos_theta2); /* 12 */
1042 sth = st*cos_theta; /* 1 */
1046 fprintf(debug, "ANGLES: theta = %10g vth = %10g dV/dtheta = %10g\n",
1047 theta*RAD2DEG, va, dVdt);
1050 nrij2 = iprod(r_ij, r_ij); /* 5 */
1051 nrkj2 = iprod(r_kj, r_kj); /* 5 */
1053 nrij_1 = gmx_invsqrt(nrij2); /* 10 */
1054 nrkj_1 = gmx_invsqrt(nrkj2); /* 10 */
1056 cik = st*nrij_1*nrkj_1; /* 2 */
1057 cii = sth*nrij_1*nrij_1; /* 2 */
1058 ckk = sth*nrkj_1*nrkj_1; /* 2 */
1060 for (m = 0; m < DIM; m++)
1062 f_i[m] = -(cik*r_kj[m] - cii*r_ij[m]);
1063 f_k[m] = -(cik*r_ij[m] - ckk*r_kj[m]);
1064 f_j[m] = -f_i[m] - f_k[m];
1071 copy_ivec(SHIFT_IVEC(g, aj), jt);
1073 ivec_sub(SHIFT_IVEC(g, ai), jt, dt_ij);
1074 ivec_sub(SHIFT_IVEC(g, ak), jt, dt_kj);
1075 t1 = IVEC2IS(dt_ij);
1076 t2 = IVEC2IS(dt_kj);
1078 rvec_inc(fshift[t1], f_i);
1079 rvec_inc(fshift[CENTRAL], f_j);
1080 rvec_inc(fshift[t2], f_k);
1089 /* As angles, but using SIMD to calculate many dihedrals at once.
1090 * This routines does not calculate energies and shift forces.
1092 static gmx_inline void
1093 angles_noener_simd(int nbonds,
1094 const t_iatom forceatoms[], const t_iparams forceparams[],
1095 const rvec x[], rvec f[],
1096 const t_pbc *pbc, const t_graph *g,
1098 const t_mdatoms *md, t_fcdata *fcd,
1099 int *global_atom_index)
1101 #define UNROLL GMX_SIMD_WIDTH_HERE
1104 int type, ai[UNROLL], aj[UNROLL], ak[UNROLL];
1105 real coeff_array[2*UNROLL+UNROLL], *coeff;
1106 real dr_array[2*DIM*UNROLL+UNROLL], *dr;
1107 real f_buf_array[6*UNROLL+UNROLL], *f_buf;
1108 gmx_mm_pr k_S, theta0_S;
1109 gmx_mm_pr rijx_S, rijy_S, rijz_S;
1110 gmx_mm_pr rkjx_S, rkjy_S, rkjz_S;
1112 gmx_mm_pr rij_rkj_S;
1113 gmx_mm_pr nrij2_S, nrij_1_S;
1114 gmx_mm_pr nrkj2_S, nrkj_1_S;
1115 gmx_mm_pr cos_S, sin_S;
1117 gmx_mm_pr st_S, sth_S;
1118 gmx_mm_pr cik_S, cii_S, ckk_S;
1119 gmx_mm_pr f_ix_S, f_iy_S, f_iz_S;
1120 gmx_mm_pr f_kx_S, f_ky_S, f_kz_S;
1121 pbc_simd_t pbc_simd;
1123 /* Ensure register memory alignment */
1124 coeff = gmx_simd_align_real(coeff_array);
1125 dr = gmx_simd_align_real(dr_array);
1126 f_buf = gmx_simd_align_real(f_buf_array);
1128 set_pbc_simd(pbc,&pbc_simd);
1130 one_S = gmx_set1_pr(1.0);
1132 /* nbonds is the number of angles times nfa1, here we step UNROLL angles */
1133 for (i = 0; (i < nbonds); i += UNROLL*nfa1)
1135 /* Collect atoms for UNROLL angles.
1136 * iu indexes into forceatoms, we should not let iu go beyond nbonds.
1139 for (s = 0; s < UNROLL; s++)
1141 type = forceatoms[iu];
1142 ai[s] = forceatoms[iu+1];
1143 aj[s] = forceatoms[iu+2];
1144 ak[s] = forceatoms[iu+3];
1146 coeff[s] = forceparams[type].harmonic.krA;
1147 coeff[UNROLL+s] = forceparams[type].harmonic.rA*DEG2RAD;
1149 /* If you can't use pbc_dx_simd below for PBC, e.g. because
1150 * you can't round in SIMD, use pbc_rvec_sub here.
1152 /* Store the non PBC corrected distances packed and aligned */
1153 for (m = 0; m < DIM; m++)
1155 dr[s + m *UNROLL] = x[ai[s]][m] - x[aj[s]][m];
1156 dr[s + (DIM+m)*UNROLL] = x[ak[s]][m] - x[aj[s]][m];
1159 /* At the end fill the arrays with identical entries */
1160 if (iu + nfa1 < nbonds)
1166 k_S = gmx_load_pr(coeff);
1167 theta0_S = gmx_load_pr(coeff+UNROLL);
1169 rijx_S = gmx_load_pr(dr + 0*UNROLL);
1170 rijy_S = gmx_load_pr(dr + 1*UNROLL);
1171 rijz_S = gmx_load_pr(dr + 2*UNROLL);
1172 rkjx_S = gmx_load_pr(dr + 3*UNROLL);
1173 rkjy_S = gmx_load_pr(dr + 4*UNROLL);
1174 rkjz_S = gmx_load_pr(dr + 5*UNROLL);
1176 pbc_dx_simd(&rijx_S, &rijy_S, &rijz_S, &pbc_simd);
1177 pbc_dx_simd(&rkjx_S, &rkjy_S, &rkjz_S, &pbc_simd);
1179 rij_rkj_S = gmx_iprod_pr(rijx_S, rijy_S, rijz_S,
1180 rkjx_S, rkjy_S, rkjz_S);
1182 nrij2_S = gmx_norm2_pr(rijx_S, rijy_S, rijz_S);
1183 nrkj2_S = gmx_norm2_pr(rkjx_S, rkjy_S, rkjz_S);
1185 nrij_1_S = gmx_invsqrt_pr(nrij2_S);
1186 nrkj_1_S = gmx_invsqrt_pr(nrkj2_S);
1188 cos_S = gmx_mul_pr(rij_rkj_S, gmx_mul_pr(nrij_1_S, nrkj_1_S));
1190 theta_S = gmx_acos_pr(cos_S);
1192 sin_S = gmx_invsqrt_pr(gmx_max_pr(gmx_sub_pr(one_S, gmx_mul_pr(cos_S, cos_S)),
1194 st_S = gmx_mul_pr(gmx_mul_pr(k_S, gmx_sub_pr(theta0_S, theta_S)),
1196 sth_S = gmx_mul_pr(st_S, cos_S);
1198 cik_S = gmx_mul_pr(st_S, gmx_mul_pr(nrij_1_S, nrkj_1_S));
1199 cii_S = gmx_mul_pr(sth_S, gmx_mul_pr(nrij_1_S, nrij_1_S));
1200 ckk_S = gmx_mul_pr(sth_S, gmx_mul_pr(nrkj_1_S, nrkj_1_S));
1202 f_ix_S = gmx_mul_pr(cii_S, rijx_S);
1203 f_ix_S = gmx_nmsub_pr(cik_S, rkjx_S, f_ix_S);
1204 f_iy_S = gmx_mul_pr(cii_S, rijy_S);
1205 f_iy_S = gmx_nmsub_pr(cik_S, rkjy_S, f_iy_S);
1206 f_iz_S = gmx_mul_pr(cii_S, rijz_S);
1207 f_iz_S = gmx_nmsub_pr(cik_S, rkjz_S, f_iz_S);
1208 f_kx_S = gmx_mul_pr(ckk_S, rkjx_S);
1209 f_kx_S = gmx_nmsub_pr(cik_S, rijx_S, f_kx_S);
1210 f_ky_S = gmx_mul_pr(ckk_S, rkjy_S);
1211 f_ky_S = gmx_nmsub_pr(cik_S, rijy_S, f_ky_S);
1212 f_kz_S = gmx_mul_pr(ckk_S, rkjz_S);
1213 f_kz_S = gmx_nmsub_pr(cik_S, rijz_S, f_kz_S);
1215 gmx_store_pr(f_buf + 0*UNROLL, f_ix_S);
1216 gmx_store_pr(f_buf + 1*UNROLL, f_iy_S);
1217 gmx_store_pr(f_buf + 2*UNROLL, f_iz_S);
1218 gmx_store_pr(f_buf + 3*UNROLL, f_kx_S);
1219 gmx_store_pr(f_buf + 4*UNROLL, f_ky_S);
1220 gmx_store_pr(f_buf + 5*UNROLL, f_kz_S);
1226 for (m = 0; m < DIM; m++)
1228 f[ai[s]][m] += f_buf[s + m*UNROLL];
1229 f[aj[s]][m] -= f_buf[s + m*UNROLL] + f_buf[s + (DIM+m)*UNROLL];
1230 f[ak[s]][m] += f_buf[s + (DIM+m)*UNROLL];
1235 while (s < UNROLL && iu < nbonds);
1240 #endif /* SIMD_BONDEDS */
1242 real linear_angles(int nbonds,
1243 const t_iatom forceatoms[], const t_iparams forceparams[],
1244 const rvec x[], rvec f[], rvec fshift[],
1245 const t_pbc *pbc, const t_graph *g,
1246 real lambda, real *dvdlambda,
1247 const t_mdatoms *md, t_fcdata *fcd,
1248 int *global_atom_index)
1250 int i, m, ai, aj, ak, t1, t2, type;
1252 real L1, kA, kB, aA, aB, dr, dr2, va, vtot, a, b, klin;
1253 ivec jt, dt_ij, dt_kj;
1254 rvec r_ij, r_kj, r_ik, dx;
1258 for (i = 0; (i < nbonds); )
1260 type = forceatoms[i++];
1261 ai = forceatoms[i++];
1262 aj = forceatoms[i++];
1263 ak = forceatoms[i++];
1265 kA = forceparams[type].linangle.klinA;
1266 kB = forceparams[type].linangle.klinB;
1267 klin = L1*kA + lambda*kB;
1269 aA = forceparams[type].linangle.aA;
1270 aB = forceparams[type].linangle.aB;
1271 a = L1*aA+lambda*aB;
1274 t1 = pbc_rvec_sub(pbc, x[ai], x[aj], r_ij);
1275 t2 = pbc_rvec_sub(pbc, x[ak], x[aj], r_kj);
1276 rvec_sub(r_ij, r_kj, r_ik);
1279 for (m = 0; (m < DIM); m++)
1281 dr = -a * r_ij[m] - b * r_kj[m];
1286 f_j[m] = -(f_i[m]+f_k[m]);
1292 *dvdlambda += 0.5*(kB-kA)*dr2 + klin*(aB-aA)*iprod(dx, r_ik);
1298 copy_ivec(SHIFT_IVEC(g, aj), jt);
1300 ivec_sub(SHIFT_IVEC(g, ai), jt, dt_ij);
1301 ivec_sub(SHIFT_IVEC(g, ak), jt, dt_kj);
1302 t1 = IVEC2IS(dt_ij);
1303 t2 = IVEC2IS(dt_kj);
1305 rvec_inc(fshift[t1], f_i);
1306 rvec_inc(fshift[CENTRAL], f_j);
1307 rvec_inc(fshift[t2], f_k);
1312 real urey_bradley(int nbonds,
1313 const t_iatom forceatoms[], const t_iparams forceparams[],
1314 const rvec x[], rvec f[], rvec fshift[],
1315 const t_pbc *pbc, const t_graph *g,
1316 real lambda, real *dvdlambda,
1317 const t_mdatoms *md, t_fcdata *fcd,
1318 int *global_atom_index)
1320 int i, m, ai, aj, ak, t1, t2, type, ki;
1321 rvec r_ij, r_kj, r_ik;
1322 real cos_theta, cos_theta2, theta;
1323 real dVdt, va, vtot, dr, dr2, vbond, fbond, fik;
1324 real kthA, th0A, kUBA, r13A, kthB, th0B, kUBB, r13B;
1325 ivec jt, dt_ij, dt_kj, dt_ik;
1328 for (i = 0; (i < nbonds); )
1330 type = forceatoms[i++];
1331 ai = forceatoms[i++];
1332 aj = forceatoms[i++];
1333 ak = forceatoms[i++];
1334 th0A = forceparams[type].u_b.thetaA*DEG2RAD;
1335 kthA = forceparams[type].u_b.kthetaA;
1336 r13A = forceparams[type].u_b.r13A;
1337 kUBA = forceparams[type].u_b.kUBA;
1338 th0B = forceparams[type].u_b.thetaB*DEG2RAD;
1339 kthB = forceparams[type].u_b.kthetaB;
1340 r13B = forceparams[type].u_b.r13B;
1341 kUBB = forceparams[type].u_b.kUBB;
1343 theta = bond_angle(x[ai], x[aj], x[ak], pbc,
1344 r_ij, r_kj, &cos_theta, &t1, &t2); /* 41 */
1346 *dvdlambda += harmonic(kthA, kthB, th0A, th0B, theta, lambda, &va, &dVdt); /* 21 */
1349 ki = pbc_rvec_sub(pbc, x[ai], x[ak], r_ik); /* 3 */
1350 dr2 = iprod(r_ik, r_ik); /* 5 */
1351 dr = dr2*gmx_invsqrt(dr2); /* 10 */
1353 *dvdlambda += harmonic(kUBA, kUBB, r13A, r13B, dr, lambda, &vbond, &fbond); /* 19 */
1355 cos_theta2 = sqr(cos_theta); /* 1 */
1363 st = dVdt*gmx_invsqrt(1 - cos_theta2); /* 12 */
1364 sth = st*cos_theta; /* 1 */
1368 fprintf(debug, "ANGLES: theta = %10g vth = %10g dV/dtheta = %10g\n",
1369 theta*RAD2DEG, va, dVdt);
1372 nrkj2 = iprod(r_kj, r_kj); /* 5 */
1373 nrij2 = iprod(r_ij, r_ij);
1375 cik = st*gmx_invsqrt(nrkj2*nrij2); /* 12 */
1376 cii = sth/nrij2; /* 10 */
1377 ckk = sth/nrkj2; /* 10 */
1379 for (m = 0; (m < DIM); m++) /* 39 */
1381 f_i[m] = -(cik*r_kj[m]-cii*r_ij[m]);
1382 f_k[m] = -(cik*r_ij[m]-ckk*r_kj[m]);
1383 f_j[m] = -f_i[m]-f_k[m];
1390 copy_ivec(SHIFT_IVEC(g, aj), jt);
1392 ivec_sub(SHIFT_IVEC(g, ai), jt, dt_ij);
1393 ivec_sub(SHIFT_IVEC(g, ak), jt, dt_kj);
1394 t1 = IVEC2IS(dt_ij);
1395 t2 = IVEC2IS(dt_kj);
1397 rvec_inc(fshift[t1], f_i);
1398 rvec_inc(fshift[CENTRAL], f_j);
1399 rvec_inc(fshift[t2], f_k);
1401 /* Time for the bond calculations */
1407 vtot += vbond; /* 1*/
1408 fbond *= gmx_invsqrt(dr2); /* 6 */
1412 ivec_sub(SHIFT_IVEC(g, ai), SHIFT_IVEC(g, ak), dt_ik);
1413 ki = IVEC2IS(dt_ik);
1415 for (m = 0; (m < DIM); m++) /* 15 */
1417 fik = fbond*r_ik[m];
1420 fshift[ki][m] += fik;
1421 fshift[CENTRAL][m] -= fik;
1427 real quartic_angles(int nbonds,
1428 const t_iatom forceatoms[], const t_iparams forceparams[],
1429 const rvec x[], rvec f[], rvec fshift[],
1430 const t_pbc *pbc, const t_graph *g,
1431 real lambda, real *dvdlambda,
1432 const t_mdatoms *md, t_fcdata *fcd,
1433 int *global_atom_index)
1435 int i, j, ai, aj, ak, t1, t2, type;
1437 real cos_theta, cos_theta2, theta, dt, dVdt, va, dtp, c, vtot;
1438 ivec jt, dt_ij, dt_kj;
1441 for (i = 0; (i < nbonds); )
1443 type = forceatoms[i++];
1444 ai = forceatoms[i++];
1445 aj = forceatoms[i++];
1446 ak = forceatoms[i++];
1448 theta = bond_angle(x[ai], x[aj], x[ak], pbc,
1449 r_ij, r_kj, &cos_theta, &t1, &t2); /* 41 */
1451 dt = theta - forceparams[type].qangle.theta*DEG2RAD; /* 2 */
1454 va = forceparams[type].qangle.c[0];
1456 for (j = 1; j <= 4; j++)
1458 c = forceparams[type].qangle.c[j];
1467 cos_theta2 = sqr(cos_theta); /* 1 */
1476 st = dVdt*gmx_invsqrt(1 - cos_theta2); /* 12 */
1477 sth = st*cos_theta; /* 1 */
1481 fprintf(debug, "ANGLES: theta = %10g vth = %10g dV/dtheta = %10g\n",
1482 theta*RAD2DEG, va, dVdt);
1485 nrkj2 = iprod(r_kj, r_kj); /* 5 */
1486 nrij2 = iprod(r_ij, r_ij);
1488 cik = st*gmx_invsqrt(nrkj2*nrij2); /* 12 */
1489 cii = sth/nrij2; /* 10 */
1490 ckk = sth/nrkj2; /* 10 */
1492 for (m = 0; (m < DIM); m++) /* 39 */
1494 f_i[m] = -(cik*r_kj[m]-cii*r_ij[m]);
1495 f_k[m] = -(cik*r_ij[m]-ckk*r_kj[m]);
1496 f_j[m] = -f_i[m]-f_k[m];
1503 copy_ivec(SHIFT_IVEC(g, aj), jt);
1505 ivec_sub(SHIFT_IVEC(g, ai), jt, dt_ij);
1506 ivec_sub(SHIFT_IVEC(g, ak), jt, dt_kj);
1507 t1 = IVEC2IS(dt_ij);
1508 t2 = IVEC2IS(dt_kj);
1510 rvec_inc(fshift[t1], f_i);
1511 rvec_inc(fshift[CENTRAL], f_j);
1512 rvec_inc(fshift[t2], f_k);
1518 real dih_angle(const rvec xi, const rvec xj, const rvec xk, const rvec xl,
1520 rvec r_ij, rvec r_kj, rvec r_kl, rvec m, rvec n,
1521 real *sign, int *t1, int *t2, int *t3)
1525 *t1 = pbc_rvec_sub(pbc, xi, xj, r_ij); /* 3 */
1526 *t2 = pbc_rvec_sub(pbc, xk, xj, r_kj); /* 3 */
1527 *t3 = pbc_rvec_sub(pbc, xk, xl, r_kl); /* 3 */
1529 cprod(r_ij, r_kj, m); /* 9 */
1530 cprod(r_kj, r_kl, n); /* 9 */
1531 phi = gmx_angle(m, n); /* 49 (assuming 25 for atan2) */
1532 ipr = iprod(r_ij, n); /* 5 */
1533 (*sign) = (ipr < 0.0) ? -1.0 : 1.0;
1534 phi = (*sign)*phi; /* 1 */
1542 /* As dih_angle above, but calculates 4 dihedral angles at once using SIMD,
1543 * also calculates the pre-factor required for the dihedral force update.
1544 * Note that bv and buf should be register aligned.
1546 static gmx_inline void
1547 dih_angle_simd(const rvec *x,
1548 const int *ai, const int *aj, const int *ak, const int *al,
1549 const pbc_simd_t *pbc,
1552 gmx_mm_pr *mx_S, gmx_mm_pr *my_S, gmx_mm_pr *mz_S,
1553 gmx_mm_pr *nx_S, gmx_mm_pr *ny_S, gmx_mm_pr *nz_S,
1554 gmx_mm_pr *nrkj_m2_S,
1555 gmx_mm_pr *nrkj_n2_S,
1559 #define UNROLL GMX_SIMD_WIDTH_HERE
1561 gmx_mm_pr rijx_S, rijy_S, rijz_S;
1562 gmx_mm_pr rkjx_S, rkjy_S, rkjz_S;
1563 gmx_mm_pr rklx_S, rkly_S, rklz_S;
1564 gmx_mm_pr cx_S, cy_S, cz_S;
1568 gmx_mm_pr iprm_S, iprn_S;
1569 gmx_mm_pr nrkj2_S, nrkj_1_S, nrkj_2_S, nrkj_S;
1571 gmx_mm_pr fmin_S = gmx_set1_pr(GMX_FLOAT_MIN);
1572 /* Using -0.0 should lead to only the sign bit being set */
1573 gmx_mm_pr sign_mask_S = gmx_set1_pr(-0.0);
1575 for (s = 0; s < UNROLL; s++)
1577 /* If you can't use pbc_dx_simd below for PBC, e.g. because
1578 * you can't round in SIMD, use pbc_rvec_sub here.
1580 for (m = 0; m < DIM; m++)
1582 dr[s + (0*DIM + m)*UNROLL] = x[ai[s]][m] - x[aj[s]][m];
1583 dr[s + (1*DIM + m)*UNROLL] = x[ak[s]][m] - x[aj[s]][m];
1584 dr[s + (2*DIM + m)*UNROLL] = x[ak[s]][m] - x[al[s]][m];
1588 rijx_S = gmx_load_pr(dr + 0*UNROLL);
1589 rijy_S = gmx_load_pr(dr + 1*UNROLL);
1590 rijz_S = gmx_load_pr(dr + 2*UNROLL);
1591 rkjx_S = gmx_load_pr(dr + 3*UNROLL);
1592 rkjy_S = gmx_load_pr(dr + 4*UNROLL);
1593 rkjz_S = gmx_load_pr(dr + 5*UNROLL);
1594 rklx_S = gmx_load_pr(dr + 6*UNROLL);
1595 rkly_S = gmx_load_pr(dr + 7*UNROLL);
1596 rklz_S = gmx_load_pr(dr + 8*UNROLL);
1598 pbc_dx_simd(&rijx_S, &rijy_S, &rijz_S, pbc);
1599 pbc_dx_simd(&rkjx_S, &rkjy_S, &rkjz_S, pbc);
1600 pbc_dx_simd(&rklx_S, &rkly_S, &rklz_S, pbc);
1602 gmx_cprod_pr(rijx_S, rijy_S, rijz_S,
1603 rkjx_S, rkjy_S, rkjz_S,
1606 gmx_cprod_pr(rkjx_S, rkjy_S, rkjz_S,
1607 rklx_S, rkly_S, rklz_S,
1610 gmx_cprod_pr(*mx_S, *my_S, *mz_S,
1611 *nx_S, *ny_S, *nz_S,
1612 &cx_S, &cy_S, &cz_S);
1614 cn_S = gmx_sqrt_pr(gmx_norm2_pr(cx_S, cy_S, cz_S));
1616 s_S = gmx_iprod_pr(*mx_S, *my_S, *mz_S, *nx_S, *ny_S, *nz_S);
1618 /* Determine the dihedral angle, the sign might need correction */
1619 *phi_S = gmx_atan2_pr(cn_S, s_S);
1621 ipr_S = gmx_iprod_pr(rijx_S, rijy_S, rijz_S,
1622 *nx_S, *ny_S, *nz_S);
1624 iprm_S = gmx_norm2_pr(*mx_S, *my_S, *mz_S);
1625 iprn_S = gmx_norm2_pr(*nx_S, *ny_S, *nz_S);
1627 nrkj2_S = gmx_norm2_pr(rkjx_S, rkjy_S, rkjz_S);
1629 /* Avoid division by zero. When zero, the result is multiplied by 0
1630 * anyhow, so the 3 max below do not affect the final result.
1632 nrkj2_S = gmx_max_pr(nrkj2_S, fmin_S);
1633 nrkj_1_S = gmx_invsqrt_pr(nrkj2_S);
1634 nrkj_2_S = gmx_mul_pr(nrkj_1_S, nrkj_1_S);
1635 nrkj_S = gmx_mul_pr(nrkj2_S, nrkj_1_S);
1637 iprm_S = gmx_max_pr(iprm_S, fmin_S);
1638 iprn_S = gmx_max_pr(iprn_S, fmin_S);
1639 *nrkj_m2_S = gmx_mul_pr(nrkj_S, gmx_inv_pr(iprm_S));
1640 *nrkj_n2_S = gmx_mul_pr(nrkj_S, gmx_inv_pr(iprn_S));
1642 /* Set sign of the angle with the sign of ipr_S.
1643 * Since phi is currently positive, we can use OR instead of XOR.
1645 *phi_S = gmx_or_pr(*phi_S, gmx_and_pr(ipr_S, sign_mask_S));
1647 p_S = gmx_iprod_pr(rijx_S, rijy_S, rijz_S,
1648 rkjx_S, rkjy_S, rkjz_S);
1649 p_S = gmx_mul_pr(p_S, nrkj_2_S);
1651 q_S = gmx_iprod_pr(rklx_S, rkly_S, rklz_S,
1652 rkjx_S, rkjy_S, rkjz_S);
1653 q_S = gmx_mul_pr(q_S, nrkj_2_S);
1655 gmx_store_pr(p, p_S);
1656 gmx_store_pr(q, q_S);
1660 #endif /* SIMD_BONDEDS */
1663 void do_dih_fup(int i, int j, int k, int l, real ddphi,
1664 rvec r_ij, rvec r_kj, rvec r_kl,
1665 rvec m, rvec n, rvec f[], rvec fshift[],
1666 const t_pbc *pbc, const t_graph *g,
1667 const rvec x[], int t1, int t2, int t3)
1670 rvec f_i, f_j, f_k, f_l;
1671 rvec uvec, vvec, svec, dx_jl;
1672 real iprm, iprn, nrkj, nrkj2, nrkj_1, nrkj_2;
1673 real a, b, p, q, toler;
1674 ivec jt, dt_ij, dt_kj, dt_lj;
1676 iprm = iprod(m, m); /* 5 */
1677 iprn = iprod(n, n); /* 5 */
1678 nrkj2 = iprod(r_kj, r_kj); /* 5 */
1679 toler = nrkj2*GMX_REAL_EPS;
1680 if ((iprm > toler) && (iprn > toler))
1682 nrkj_1 = gmx_invsqrt(nrkj2); /* 10 */
1683 nrkj_2 = nrkj_1*nrkj_1; /* 1 */
1684 nrkj = nrkj2*nrkj_1; /* 1 */
1685 a = -ddphi*nrkj/iprm; /* 11 */
1686 svmul(a, m, f_i); /* 3 */
1687 b = ddphi*nrkj/iprn; /* 11 */
1688 svmul(b, n, f_l); /* 3 */
1689 p = iprod(r_ij, r_kj); /* 5 */
1690 p *= nrkj_2; /* 1 */
1691 q = iprod(r_kl, r_kj); /* 5 */
1692 q *= nrkj_2; /* 1 */
1693 svmul(p, f_i, uvec); /* 3 */
1694 svmul(q, f_l, vvec); /* 3 */
1695 rvec_sub(uvec, vvec, svec); /* 3 */
1696 rvec_sub(f_i, svec, f_j); /* 3 */
1697 rvec_add(f_l, svec, f_k); /* 3 */
1698 rvec_inc(f[i], f_i); /* 3 */
1699 rvec_dec(f[j], f_j); /* 3 */
1700 rvec_dec(f[k], f_k); /* 3 */
1701 rvec_inc(f[l], f_l); /* 3 */
1705 copy_ivec(SHIFT_IVEC(g, j), jt);
1706 ivec_sub(SHIFT_IVEC(g, i), jt, dt_ij);
1707 ivec_sub(SHIFT_IVEC(g, k), jt, dt_kj);
1708 ivec_sub(SHIFT_IVEC(g, l), jt, dt_lj);
1709 t1 = IVEC2IS(dt_ij);
1710 t2 = IVEC2IS(dt_kj);
1711 t3 = IVEC2IS(dt_lj);
1715 t3 = pbc_rvec_sub(pbc, x[l], x[j], dx_jl);
1722 rvec_inc(fshift[t1], f_i);
1723 rvec_dec(fshift[CENTRAL], f_j);
1724 rvec_dec(fshift[t2], f_k);
1725 rvec_inc(fshift[t3], f_l);
1730 /* As do_dih_fup above, but without shift forces */
1732 do_dih_fup_noshiftf(int i, int j, int k, int l, real ddphi,
1733 rvec r_ij, rvec r_kj, rvec r_kl,
1734 rvec m, rvec n, rvec f[])
1736 rvec f_i, f_j, f_k, f_l;
1737 rvec uvec, vvec, svec, dx_jl;
1738 real iprm, iprn, nrkj, nrkj2, nrkj_1, nrkj_2;
1739 real a, b, p, q, toler;
1740 ivec jt, dt_ij, dt_kj, dt_lj;
1742 iprm = iprod(m, m); /* 5 */
1743 iprn = iprod(n, n); /* 5 */
1744 nrkj2 = iprod(r_kj, r_kj); /* 5 */
1745 toler = nrkj2*GMX_REAL_EPS;
1746 if ((iprm > toler) && (iprn > toler))
1748 nrkj_1 = gmx_invsqrt(nrkj2); /* 10 */
1749 nrkj_2 = nrkj_1*nrkj_1; /* 1 */
1750 nrkj = nrkj2*nrkj_1; /* 1 */
1751 a = -ddphi*nrkj/iprm; /* 11 */
1752 svmul(a, m, f_i); /* 3 */
1753 b = ddphi*nrkj/iprn; /* 11 */
1754 svmul(b, n, f_l); /* 3 */
1755 p = iprod(r_ij, r_kj); /* 5 */
1756 p *= nrkj_2; /* 1 */
1757 q = iprod(r_kl, r_kj); /* 5 */
1758 q *= nrkj_2; /* 1 */
1759 svmul(p, f_i, uvec); /* 3 */
1760 svmul(q, f_l, vvec); /* 3 */
1761 rvec_sub(uvec, vvec, svec); /* 3 */
1762 rvec_sub(f_i, svec, f_j); /* 3 */
1763 rvec_add(f_l, svec, f_k); /* 3 */
1764 rvec_inc(f[i], f_i); /* 3 */
1765 rvec_dec(f[j], f_j); /* 3 */
1766 rvec_dec(f[k], f_k); /* 3 */
1767 rvec_inc(f[l], f_l); /* 3 */
1771 /* As do_dih_fup_noshiftf above, but with pre-calculated pre-factors */
1772 static gmx_inline void
1773 do_dih_fup_noshiftf_precalc(int i, int j, int k, int l,
1775 real f_i_x, real f_i_y, real f_i_z,
1776 real mf_l_x, real mf_l_y, real mf_l_z,
1779 rvec f_i, f_j, f_k, f_l;
1780 rvec uvec, vvec, svec;
1788 svmul(p, f_i, uvec);
1789 svmul(q, f_l, vvec);
1790 rvec_sub(uvec, vvec, svec);
1791 rvec_sub(f_i, svec, f_j);
1792 rvec_add(f_l, svec, f_k);
1793 rvec_inc(f[i], f_i);
1794 rvec_dec(f[j], f_j);
1795 rvec_dec(f[k], f_k);
1796 rvec_inc(f[l], f_l);
1800 real dopdihs(real cpA, real cpB, real phiA, real phiB, int mult,
1801 real phi, real lambda, real *V, real *F)
1803 real v, dvdlambda, mdphi, v1, sdphi, ddphi;
1804 real L1 = 1.0 - lambda;
1805 real ph0 = (L1*phiA + lambda*phiB)*DEG2RAD;
1806 real dph0 = (phiB - phiA)*DEG2RAD;
1807 real cp = L1*cpA + lambda*cpB;
1809 mdphi = mult*phi - ph0;
1811 ddphi = -cp*mult*sdphi;
1812 v1 = 1.0 + cos(mdphi);
1815 dvdlambda = (cpB - cpA)*v1 + cp*dph0*sdphi;
1822 /* That was 40 flops */
1826 dopdihs_noener(real cpA, real cpB, real phiA, real phiB, int mult,
1827 real phi, real lambda, real *F)
1829 real mdphi, sdphi, ddphi;
1830 real L1 = 1.0 - lambda;
1831 real ph0 = (L1*phiA + lambda*phiB)*DEG2RAD;
1832 real cp = L1*cpA + lambda*cpB;
1834 mdphi = mult*phi - ph0;
1836 ddphi = -cp*mult*sdphi;
1840 /* That was 20 flops */
1844 dopdihs_mdphi(real cpA, real cpB, real phiA, real phiB, int mult,
1845 real phi, real lambda, real *cp, real *mdphi)
1847 real L1 = 1.0 - lambda;
1848 real ph0 = (L1*phiA + lambda*phiB)*DEG2RAD;
1850 *cp = L1*cpA + lambda*cpB;
1852 *mdphi = mult*phi - ph0;
1855 static real dopdihs_min(real cpA, real cpB, real phiA, real phiB, int mult,
1856 real phi, real lambda, real *V, real *F)
1857 /* similar to dopdihs, except for a minus sign *
1858 * and a different treatment of mult/phi0 */
1860 real v, dvdlambda, mdphi, v1, sdphi, ddphi;
1861 real L1 = 1.0 - lambda;
1862 real ph0 = (L1*phiA + lambda*phiB)*DEG2RAD;
1863 real dph0 = (phiB - phiA)*DEG2RAD;
1864 real cp = L1*cpA + lambda*cpB;
1866 mdphi = mult*(phi-ph0);
1868 ddphi = cp*mult*sdphi;
1869 v1 = 1.0-cos(mdphi);
1872 dvdlambda = (cpB-cpA)*v1 + cp*dph0*sdphi;
1879 /* That was 40 flops */
1882 real pdihs(int nbonds,
1883 const t_iatom forceatoms[], const t_iparams forceparams[],
1884 const rvec x[], rvec f[], rvec fshift[],
1885 const t_pbc *pbc, const t_graph *g,
1886 real lambda, real *dvdlambda,
1887 const t_mdatoms *md, t_fcdata *fcd,
1888 int *global_atom_index)
1890 int i, type, ai, aj, ak, al;
1892 rvec r_ij, r_kj, r_kl, m, n;
1893 real phi, sign, ddphi, vpd, vtot;
1897 for (i = 0; (i < nbonds); )
1899 type = forceatoms[i++];
1900 ai = forceatoms[i++];
1901 aj = forceatoms[i++];
1902 ak = forceatoms[i++];
1903 al = forceatoms[i++];
1905 phi = dih_angle(x[ai], x[aj], x[ak], x[al], pbc, r_ij, r_kj, r_kl, m, n,
1906 &sign, &t1, &t2, &t3); /* 84 */
1907 *dvdlambda += dopdihs(forceparams[type].pdihs.cpA,
1908 forceparams[type].pdihs.cpB,
1909 forceparams[type].pdihs.phiA,
1910 forceparams[type].pdihs.phiB,
1911 forceparams[type].pdihs.mult,
1912 phi, lambda, &vpd, &ddphi);
1915 do_dih_fup(ai, aj, ak, al, ddphi, r_ij, r_kj, r_kl, m, n,
1916 f, fshift, pbc, g, x, t1, t2, t3); /* 112 */
1919 fprintf(debug, "pdih: (%d,%d,%d,%d) phi=%g\n",
1920 ai, aj, ak, al, phi);
1927 void make_dp_periodic(real *dp) /* 1 flop? */
1929 /* dp cannot be outside (-pi,pi) */
1934 else if (*dp < -M_PI)
1941 /* As pdihs above, but without calculating energies and shift forces */
1943 pdihs_noener(int nbonds,
1944 const t_iatom forceatoms[], const t_iparams forceparams[],
1945 const rvec x[], rvec f[],
1946 const t_pbc *pbc, const t_graph *g,
1948 const t_mdatoms *md, t_fcdata *fcd,
1949 int *global_atom_index)
1951 int i, type, ai, aj, ak, al;
1953 rvec r_ij, r_kj, r_kl, m, n;
1954 real phi, sign, ddphi_tot, ddphi;
1956 for (i = 0; (i < nbonds); )
1958 ai = forceatoms[i+1];
1959 aj = forceatoms[i+2];
1960 ak = forceatoms[i+3];
1961 al = forceatoms[i+4];
1963 phi = dih_angle(x[ai], x[aj], x[ak], x[al], pbc, r_ij, r_kj, r_kl, m, n,
1964 &sign, &t1, &t2, &t3);
1968 /* Loop over dihedrals working on the same atoms,
1969 * so we avoid recalculating angles and force distributions.
1973 type = forceatoms[i];
1974 dopdihs_noener(forceparams[type].pdihs.cpA,
1975 forceparams[type].pdihs.cpB,
1976 forceparams[type].pdihs.phiA,
1977 forceparams[type].pdihs.phiB,
1978 forceparams[type].pdihs.mult,
1979 phi, lambda, &ddphi);
1984 while (i < nbonds &&
1985 forceatoms[i+1] == ai &&
1986 forceatoms[i+2] == aj &&
1987 forceatoms[i+3] == ak &&
1988 forceatoms[i+4] == al);
1990 do_dih_fup_noshiftf(ai, aj, ak, al, ddphi_tot, r_ij, r_kj, r_kl, m, n, f);
1997 /* As pdihs_noner above, but using SIMD to calculate many dihedrals at once */
1999 pdihs_noener_simd(int nbonds,
2000 const t_iatom forceatoms[], const t_iparams forceparams[],
2001 const rvec x[], rvec f[],
2002 const t_pbc *pbc, const t_graph *g,
2004 const t_mdatoms *md, t_fcdata *fcd,
2005 int *global_atom_index)
2007 #define UNROLL GMX_SIMD_WIDTH_HERE
2010 int type, ai[UNROLL], aj[UNROLL], ak[UNROLL], al[UNROLL];
2011 int t1[UNROLL], t2[UNROLL], t3[UNROLL];
2013 real dr_array[3*DIM*UNROLL+UNROLL], *dr;
2014 real buf_array[7*UNROLL+UNROLL], *buf;
2015 real *cp, *phi0, *mult, *phi, *p, *q, *sf_i, *msf_l;
2016 gmx_mm_pr phi0_S, phi_S;
2017 gmx_mm_pr mx_S, my_S, mz_S;
2018 gmx_mm_pr nx_S, ny_S, nz_S;
2019 gmx_mm_pr nrkj_m2_S, nrkj_n2_S;
2020 gmx_mm_pr cp_S, mdphi_S, mult_S;
2021 gmx_mm_pr sin_S, cos_S;
2023 gmx_mm_pr sf_i_S, msf_l_S;
2024 pbc_simd_t pbc_simd;
2026 /* Ensure SIMD register alignment */
2027 dr = gmx_simd_align_real(dr_array);
2028 buf = gmx_simd_align_real(buf_array);
2030 /* Extract aligned pointer for parameters and variables */
2031 cp = buf + 0*UNROLL;
2032 phi0 = buf + 1*UNROLL;
2033 mult = buf + 2*UNROLL;
2036 sf_i = buf + 5*UNROLL;
2037 msf_l = buf + 6*UNROLL;
2039 set_pbc_simd(pbc, &pbc_simd);
2041 /* nbonds is the number of dihedrals times nfa1, here we step UNROLL dihs */
2042 for (i = 0; (i < nbonds); i += UNROLL*nfa1)
2044 /* Collect atoms quadruplets for UNROLL dihedrals.
2045 * iu indexes into forceatoms, we should not let iu go beyond nbonds.
2048 for (s = 0; s < UNROLL; s++)
2050 type = forceatoms[iu];
2051 ai[s] = forceatoms[iu+1];
2052 aj[s] = forceatoms[iu+2];
2053 ak[s] = forceatoms[iu+3];
2054 al[s] = forceatoms[iu+4];
2056 cp[s] = forceparams[type].pdihs.cpA;
2057 phi0[s] = forceparams[type].pdihs.phiA*DEG2RAD;
2058 mult[s] = forceparams[type].pdihs.mult;
2060 /* At the end fill the arrays with identical entries */
2061 if (iu + nfa1 < nbonds)
2067 /* Caclulate UNROLL dihedral angles at once */
2068 dih_angle_simd(x, ai, aj, ak, al, &pbc_simd,
2071 &mx_S, &my_S, &mz_S,
2072 &nx_S, &ny_S, &nz_S,
2077 cp_S = gmx_load_pr(cp);
2078 phi0_S = gmx_load_pr(phi0);
2079 mult_S = gmx_load_pr(mult);
2081 mdphi_S = gmx_sub_pr(gmx_mul_pr(mult_S, phi_S), phi0_S);
2083 /* Calculate UNROLL sines at once */
2084 gmx_sincos_pr(mdphi_S, &sin_S, &cos_S);
2085 mddphi_S = gmx_mul_pr(gmx_mul_pr(cp_S, mult_S), sin_S);
2086 sf_i_S = gmx_mul_pr(mddphi_S, nrkj_m2_S);
2087 msf_l_S = gmx_mul_pr(mddphi_S, nrkj_n2_S);
2089 /* After this m?_S will contain f[i] */
2090 mx_S = gmx_mul_pr(sf_i_S, mx_S);
2091 my_S = gmx_mul_pr(sf_i_S, my_S);
2092 mz_S = gmx_mul_pr(sf_i_S, mz_S);
2094 /* After this m?_S will contain -f[l] */
2095 nx_S = gmx_mul_pr(msf_l_S, nx_S);
2096 ny_S = gmx_mul_pr(msf_l_S, ny_S);
2097 nz_S = gmx_mul_pr(msf_l_S, nz_S);
2099 gmx_store_pr(dr + 0*UNROLL, mx_S);
2100 gmx_store_pr(dr + 1*UNROLL, my_S);
2101 gmx_store_pr(dr + 2*UNROLL, mz_S);
2102 gmx_store_pr(dr + 3*UNROLL, nx_S);
2103 gmx_store_pr(dr + 4*UNROLL, ny_S);
2104 gmx_store_pr(dr + 5*UNROLL, nz_S);
2110 do_dih_fup_noshiftf_precalc(ai[s], aj[s], ak[s], al[s],
2115 dr[(DIM+XX)*UNROLL+s],
2116 dr[(DIM+YY)*UNROLL+s],
2117 dr[(DIM+ZZ)*UNROLL+s],
2122 while (s < UNROLL && iu < nbonds);
2127 #endif /* SIMD_BONDEDS */
2130 real idihs(int nbonds,
2131 const t_iatom forceatoms[], const t_iparams forceparams[],
2132 const rvec x[], rvec f[], rvec fshift[],
2133 const t_pbc *pbc, const t_graph *g,
2134 real lambda, real *dvdlambda,
2135 const t_mdatoms *md, t_fcdata *fcd,
2136 int *global_atom_index)
2138 int i, type, ai, aj, ak, al;
2140 real phi, phi0, dphi0, ddphi, sign, vtot;
2141 rvec r_ij, r_kj, r_kl, m, n;
2142 real L1, kk, dp, dp2, kA, kB, pA, pB, dvdl_term;
2147 for (i = 0; (i < nbonds); )
2149 type = forceatoms[i++];
2150 ai = forceatoms[i++];
2151 aj = forceatoms[i++];
2152 ak = forceatoms[i++];
2153 al = forceatoms[i++];
2155 phi = dih_angle(x[ai], x[aj], x[ak], x[al], pbc, r_ij, r_kj, r_kl, m, n,
2156 &sign, &t1, &t2, &t3); /* 84 */
2158 /* phi can jump if phi0 is close to Pi/-Pi, which will cause huge
2159 * force changes if we just apply a normal harmonic.
2160 * Instead, we first calculate phi-phi0 and take it modulo (-Pi,Pi).
2161 * This means we will never have the periodicity problem, unless
2162 * the dihedral is Pi away from phiO, which is very unlikely due to
2165 kA = forceparams[type].harmonic.krA;
2166 kB = forceparams[type].harmonic.krB;
2167 pA = forceparams[type].harmonic.rA;
2168 pB = forceparams[type].harmonic.rB;
2170 kk = L1*kA + lambda*kB;
2171 phi0 = (L1*pA + lambda*pB)*DEG2RAD;
2172 dphi0 = (pB - pA)*DEG2RAD;
2176 make_dp_periodic(&dp);
2183 dvdl_term += 0.5*(kB - kA)*dp2 - kk*dphi0*dp;
2185 do_dih_fup(ai, aj, ak, al, (real)(-ddphi), r_ij, r_kj, r_kl, m, n,
2186 f, fshift, pbc, g, x, t1, t2, t3); /* 112 */
2191 fprintf(debug, "idih: (%d,%d,%d,%d) phi=%g\n",
2192 ai, aj, ak, al, phi);
2197 *dvdlambda += dvdl_term;
2202 real posres(int nbonds,
2203 const t_iatom forceatoms[], const t_iparams forceparams[],
2204 const rvec x[], rvec f[], rvec vir_diag,
2206 real lambda, real *dvdlambda,
2207 int refcoord_scaling, int ePBC, rvec comA, rvec comB)
2209 int i, ai, m, d, type, ki, npbcdim = 0;
2210 const t_iparams *pr;
2213 real posA, posB, ref = 0;
2214 rvec comA_sc, comB_sc, rdist, dpdl, pos, dx;
2215 gmx_bool bForceValid = TRUE;
2217 if ((f == NULL) || (vir_diag == NULL)) /* should both be null together! */
2219 bForceValid = FALSE;
2222 npbcdim = ePBC2npbcdim(ePBC);
2224 if (refcoord_scaling == erscCOM)
2226 clear_rvec(comA_sc);
2227 clear_rvec(comB_sc);
2228 for (m = 0; m < npbcdim; m++)
2230 for (d = m; d < npbcdim; d++)
2232 comA_sc[m] += comA[d]*pbc->box[d][m];
2233 comB_sc[m] += comB[d]*pbc->box[d][m];
2241 for (i = 0; (i < nbonds); )
2243 type = forceatoms[i++];
2244 ai = forceatoms[i++];
2245 pr = &forceparams[type];
2247 for (m = 0; m < DIM; m++)
2249 posA = forceparams[type].posres.pos0A[m];
2250 posB = forceparams[type].posres.pos0B[m];
2253 switch (refcoord_scaling)
2257 rdist[m] = L1*posA + lambda*posB;
2258 dpdl[m] = posB - posA;
2261 /* Box relative coordinates are stored for dimensions with pbc */
2262 posA *= pbc->box[m][m];
2263 posB *= pbc->box[m][m];
2264 for (d = m+1; d < npbcdim; d++)
2266 posA += forceparams[type].posres.pos0A[d]*pbc->box[d][m];
2267 posB += forceparams[type].posres.pos0B[d]*pbc->box[d][m];
2269 ref = L1*posA + lambda*posB;
2271 dpdl[m] = posB - posA;
2274 ref = L1*comA_sc[m] + lambda*comB_sc[m];
2275 rdist[m] = L1*posA + lambda*posB;
2276 dpdl[m] = comB_sc[m] - comA_sc[m] + posB - posA;
2279 gmx_fatal(FARGS, "No such scaling method implemented");
2284 ref = L1*posA + lambda*posB;
2286 dpdl[m] = posB - posA;
2289 /* We do pbc_dx with ref+rdist,
2290 * since with only ref we can be up to half a box vector wrong.
2292 pos[m] = ref + rdist[m];
2297 pbc_dx(pbc, x[ai], pos, dx);
2301 rvec_sub(x[ai], pos, dx);
2304 for (m = 0; (m < DIM); m++)
2306 kk = L1*pr->posres.fcA[m] + lambda*pr->posres.fcB[m];
2308 vtot += 0.5*kk*dx[m]*dx[m];
2310 0.5*(pr->posres.fcB[m] - pr->posres.fcA[m])*dx[m]*dx[m]
2313 /* Here we correct for the pbc_dx which included rdist */
2317 vir_diag[m] -= 0.5*(dx[m] + rdist[m])*fm;
2325 static real low_angres(int nbonds,
2326 const t_iatom forceatoms[], const t_iparams forceparams[],
2327 const rvec x[], rvec f[], rvec fshift[],
2328 const t_pbc *pbc, const t_graph *g,
2329 real lambda, real *dvdlambda,
2332 int i, m, type, ai, aj, ak, al;
2334 real phi, cos_phi, cos_phi2, vid, vtot, dVdphi;
2335 rvec r_ij, r_kl, f_i, f_k = {0, 0, 0};
2336 real st, sth, nrij2, nrkl2, c, cij, ckl;
2339 t2 = 0; /* avoid warning with gcc-3.3. It is never used uninitialized */
2342 ak = al = 0; /* to avoid warnings */
2343 for (i = 0; i < nbonds; )
2345 type = forceatoms[i++];
2346 ai = forceatoms[i++];
2347 aj = forceatoms[i++];
2348 t1 = pbc_rvec_sub(pbc, x[aj], x[ai], r_ij); /* 3 */
2351 ak = forceatoms[i++];
2352 al = forceatoms[i++];
2353 t2 = pbc_rvec_sub(pbc, x[al], x[ak], r_kl); /* 3 */
2362 cos_phi = cos_angle(r_ij, r_kl); /* 25 */
2363 phi = acos(cos_phi); /* 10 */
2365 *dvdlambda += dopdihs_min(forceparams[type].pdihs.cpA,
2366 forceparams[type].pdihs.cpB,
2367 forceparams[type].pdihs.phiA,
2368 forceparams[type].pdihs.phiB,
2369 forceparams[type].pdihs.mult,
2370 phi, lambda, &vid, &dVdphi); /* 40 */
2374 cos_phi2 = sqr(cos_phi); /* 1 */
2377 st = -dVdphi*gmx_invsqrt(1 - cos_phi2); /* 12 */
2378 sth = st*cos_phi; /* 1 */
2379 nrij2 = iprod(r_ij, r_ij); /* 5 */
2380 nrkl2 = iprod(r_kl, r_kl); /* 5 */
2382 c = st*gmx_invsqrt(nrij2*nrkl2); /* 11 */
2383 cij = sth/nrij2; /* 10 */
2384 ckl = sth/nrkl2; /* 10 */
2386 for (m = 0; m < DIM; m++) /* 18+18 */
2388 f_i[m] = (c*r_kl[m]-cij*r_ij[m]);
2393 f_k[m] = (c*r_ij[m]-ckl*r_kl[m]);
2401 ivec_sub(SHIFT_IVEC(g, ai), SHIFT_IVEC(g, aj), dt);
2404 rvec_inc(fshift[t1], f_i);
2405 rvec_dec(fshift[CENTRAL], f_i);
2410 ivec_sub(SHIFT_IVEC(g, ak), SHIFT_IVEC(g, al), dt);
2413 rvec_inc(fshift[t2], f_k);
2414 rvec_dec(fshift[CENTRAL], f_k);
2419 return vtot; /* 184 / 157 (bZAxis) total */
2422 real angres(int nbonds,
2423 const t_iatom forceatoms[], const t_iparams forceparams[],
2424 const rvec x[], rvec f[], rvec fshift[],
2425 const t_pbc *pbc, const t_graph *g,
2426 real lambda, real *dvdlambda,
2427 const t_mdatoms *md, t_fcdata *fcd,
2428 int *global_atom_index)
2430 return low_angres(nbonds, forceatoms, forceparams, x, f, fshift, pbc, g,
2431 lambda, dvdlambda, FALSE);
2434 real angresz(int nbonds,
2435 const t_iatom forceatoms[], const t_iparams forceparams[],
2436 const rvec x[], rvec f[], rvec fshift[],
2437 const t_pbc *pbc, const t_graph *g,
2438 real lambda, real *dvdlambda,
2439 const t_mdatoms *md, t_fcdata *fcd,
2440 int *global_atom_index)
2442 return low_angres(nbonds, forceatoms, forceparams, x, f, fshift, pbc, g,
2443 lambda, dvdlambda, TRUE);
2446 real dihres(int nbonds,
2447 const t_iatom forceatoms[], const t_iparams forceparams[],
2448 const rvec x[], rvec f[], rvec fshift[],
2449 const t_pbc *pbc, const t_graph *g,
2450 real lambda, real *dvdlambda,
2451 const t_mdatoms *md, t_fcdata *fcd,
2452 int *global_atom_index)
2455 int ai, aj, ak, al, i, k, type, t1, t2, t3;
2456 real phi0A, phi0B, dphiA, dphiB, kfacA, kfacB, phi0, dphi, kfac;
2457 real phi, ddphi, ddp, ddp2, dp, sign, d2r, fc, L1;
2458 rvec r_ij, r_kj, r_kl, m, n;
2465 for (i = 0; (i < nbonds); )
2467 type = forceatoms[i++];
2468 ai = forceatoms[i++];
2469 aj = forceatoms[i++];
2470 ak = forceatoms[i++];
2471 al = forceatoms[i++];
2473 phi0A = forceparams[type].dihres.phiA*d2r;
2474 dphiA = forceparams[type].dihres.dphiA*d2r;
2475 kfacA = forceparams[type].dihres.kfacA;
2477 phi0B = forceparams[type].dihres.phiB*d2r;
2478 dphiB = forceparams[type].dihres.dphiB*d2r;
2479 kfacB = forceparams[type].dihres.kfacB;
2481 phi0 = L1*phi0A + lambda*phi0B;
2482 dphi = L1*dphiA + lambda*dphiB;
2483 kfac = L1*kfacA + lambda*kfacB;
2485 phi = dih_angle(x[ai], x[aj], x[ak], x[al], pbc, r_ij, r_kj, r_kl, m, n,
2486 &sign, &t1, &t2, &t3);
2491 fprintf(debug, "dihres[%d]: %d %d %d %d : phi=%f, dphi=%f, kfac=%f\n",
2492 k++, ai, aj, ak, al, phi0, dphi, kfac);
2494 /* phi can jump if phi0 is close to Pi/-Pi, which will cause huge
2495 * force changes if we just apply a normal harmonic.
2496 * Instead, we first calculate phi-phi0 and take it modulo (-Pi,Pi).
2497 * This means we will never have the periodicity problem, unless
2498 * the dihedral is Pi away from phiO, which is very unlikely due to
2502 make_dp_periodic(&dp);
2508 else if (dp < -dphi)
2520 vtot += 0.5*kfac*ddp2;
2523 *dvdlambda += 0.5*(kfacB - kfacA)*ddp2;
2524 /* lambda dependence from changing restraint distances */
2527 *dvdlambda -= kfac*ddp*((dphiB - dphiA)+(phi0B - phi0A));
2531 *dvdlambda += kfac*ddp*((dphiB - dphiA)-(phi0B - phi0A));
2533 do_dih_fup(ai, aj, ak, al, ddphi, r_ij, r_kj, r_kl, m, n,
2534 f, fshift, pbc, g, x, t1, t2, t3); /* 112 */
2541 real unimplemented(int nbonds,
2542 const t_iatom forceatoms[], const t_iparams forceparams[],
2543 const rvec x[], rvec f[], rvec fshift[],
2544 const t_pbc *pbc, const t_graph *g,
2545 real lambda, real *dvdlambda,
2546 const t_mdatoms *md, t_fcdata *fcd,
2547 int *global_atom_index)
2549 gmx_impl("*** you are using a not implemented function");
2551 return 0.0; /* To make the compiler happy */
2554 real rbdihs(int nbonds,
2555 const t_iatom forceatoms[], const t_iparams forceparams[],
2556 const rvec x[], rvec f[], rvec fshift[],
2557 const t_pbc *pbc, const t_graph *g,
2558 real lambda, real *dvdlambda,
2559 const t_mdatoms *md, t_fcdata *fcd,
2560 int *global_atom_index)
2562 const real c0 = 0.0, c1 = 1.0, c2 = 2.0, c3 = 3.0, c4 = 4.0, c5 = 5.0;
2563 int type, ai, aj, ak, al, i, j;
2565 rvec r_ij, r_kj, r_kl, m, n;
2566 real parmA[NR_RBDIHS];
2567 real parmB[NR_RBDIHS];
2568 real parm[NR_RBDIHS];
2569 real cos_phi, phi, rbp, rbpBA;
2570 real v, sign, ddphi, sin_phi;
2572 real L1 = 1.0-lambda;
2576 for (i = 0; (i < nbonds); )
2578 type = forceatoms[i++];
2579 ai = forceatoms[i++];
2580 aj = forceatoms[i++];
2581 ak = forceatoms[i++];
2582 al = forceatoms[i++];
2584 phi = dih_angle(x[ai], x[aj], x[ak], x[al], pbc, r_ij, r_kj, r_kl, m, n,
2585 &sign, &t1, &t2, &t3); /* 84 */
2587 /* Change to polymer convention */
2594 phi -= M_PI; /* 1 */
2598 /* Beware of accuracy loss, cannot use 1-sqrt(cos^2) ! */
2601 for (j = 0; (j < NR_RBDIHS); j++)
2603 parmA[j] = forceparams[type].rbdihs.rbcA[j];
2604 parmB[j] = forceparams[type].rbdihs.rbcB[j];
2605 parm[j] = L1*parmA[j]+lambda*parmB[j];
2607 /* Calculate cosine powers */
2608 /* Calculate the energy */
2609 /* Calculate the derivative */
2612 dvdl_term += (parmB[0]-parmA[0]);
2617 rbpBA = parmB[1]-parmA[1];
2618 ddphi += rbp*cosfac;
2621 dvdl_term += cosfac*rbpBA;
2623 rbpBA = parmB[2]-parmA[2];
2624 ddphi += c2*rbp*cosfac;
2627 dvdl_term += cosfac*rbpBA;
2629 rbpBA = parmB[3]-parmA[3];
2630 ddphi += c3*rbp*cosfac;
2633 dvdl_term += cosfac*rbpBA;
2635 rbpBA = parmB[4]-parmA[4];
2636 ddphi += c4*rbp*cosfac;
2639 dvdl_term += cosfac*rbpBA;
2641 rbpBA = parmB[5]-parmA[5];
2642 ddphi += c5*rbp*cosfac;
2645 dvdl_term += cosfac*rbpBA;
2647 ddphi = -ddphi*sin_phi; /* 11 */
2649 do_dih_fup(ai, aj, ak, al, ddphi, r_ij, r_kj, r_kl, m, n,
2650 f, fshift, pbc, g, x, t1, t2, t3); /* 112 */
2653 *dvdlambda += dvdl_term;
2658 int cmap_setup_grid_index(int ip, int grid_spacing, int *ipm1, int *ipp1, int *ipp2)
2664 ip = ip + grid_spacing - 1;
2666 else if (ip > grid_spacing)
2668 ip = ip - grid_spacing - 1;
2677 im1 = grid_spacing - 1;
2679 else if (ip == grid_spacing-2)
2683 else if (ip == grid_spacing-1)
2697 real cmap_dihs(int nbonds,
2698 const t_iatom forceatoms[], const t_iparams forceparams[],
2699 const gmx_cmap_t *cmap_grid,
2700 const rvec x[], rvec f[], rvec fshift[],
2701 const t_pbc *pbc, const t_graph *g,
2702 real lambda, real *dvdlambda,
2703 const t_mdatoms *md, t_fcdata *fcd,
2704 int *global_atom_index)
2706 int i, j, k, n, idx;
2707 int ai, aj, ak, al, am;
2708 int a1i, a1j, a1k, a1l, a2i, a2j, a2k, a2l;
2710 int t11, t21, t31, t12, t22, t32;
2711 int iphi1, ip1m1, ip1p1, ip1p2;
2712 int iphi2, ip2m1, ip2p1, ip2p2;
2714 int pos1, pos2, pos3, pos4, tmp;
2716 real ty[4], ty1[4], ty2[4], ty12[4], tc[16], tx[16];
2717 real phi1, psi1, cos_phi1, sin_phi1, sign1, xphi1;
2718 real phi2, psi2, cos_phi2, sin_phi2, sign2, xphi2;
2719 real dx, xx, tt, tu, e, df1, df2, ddf1, ddf2, ddf12, vtot;
2720 real ra21, rb21, rg21, rg1, rgr1, ra2r1, rb2r1, rabr1;
2721 real ra22, rb22, rg22, rg2, rgr2, ra2r2, rb2r2, rabr2;
2722 real fg1, hg1, fga1, hgb1, gaa1, gbb1;
2723 real fg2, hg2, fga2, hgb2, gaa2, gbb2;
2726 rvec r1_ij, r1_kj, r1_kl, m1, n1;
2727 rvec r2_ij, r2_kj, r2_kl, m2, n2;
2728 rvec f1_i, f1_j, f1_k, f1_l;
2729 rvec f2_i, f2_j, f2_k, f2_l;
2730 rvec a1, b1, a2, b2;
2731 rvec f1, g1, h1, f2, g2, h2;
2732 rvec dtf1, dtg1, dth1, dtf2, dtg2, dth2;
2733 ivec jt1, dt1_ij, dt1_kj, dt1_lj;
2734 ivec jt2, dt2_ij, dt2_kj, dt2_lj;
2738 int loop_index[4][4] = {
2745 /* Total CMAP energy */
2748 for (n = 0; n < nbonds; )
2750 /* Five atoms are involved in the two torsions */
2751 type = forceatoms[n++];
2752 ai = forceatoms[n++];
2753 aj = forceatoms[n++];
2754 ak = forceatoms[n++];
2755 al = forceatoms[n++];
2756 am = forceatoms[n++];
2758 /* Which CMAP type is this */
2759 cmapA = forceparams[type].cmap.cmapA;
2760 cmapd = cmap_grid->cmapdata[cmapA].cmap;
2768 phi1 = dih_angle(x[a1i], x[a1j], x[a1k], x[a1l], pbc, r1_ij, r1_kj, r1_kl, m1, n1,
2769 &sign1, &t11, &t21, &t31); /* 84 */
2771 cos_phi1 = cos(phi1);
2773 a1[0] = r1_ij[1]*r1_kj[2]-r1_ij[2]*r1_kj[1];
2774 a1[1] = r1_ij[2]*r1_kj[0]-r1_ij[0]*r1_kj[2];
2775 a1[2] = r1_ij[0]*r1_kj[1]-r1_ij[1]*r1_kj[0]; /* 9 */
2777 b1[0] = r1_kl[1]*r1_kj[2]-r1_kl[2]*r1_kj[1];
2778 b1[1] = r1_kl[2]*r1_kj[0]-r1_kl[0]*r1_kj[2];
2779 b1[2] = r1_kl[0]*r1_kj[1]-r1_kl[1]*r1_kj[0]; /* 9 */
2781 tmp = pbc_rvec_sub(pbc, x[a1l], x[a1k], h1);
2783 ra21 = iprod(a1, a1); /* 5 */
2784 rb21 = iprod(b1, b1); /* 5 */
2785 rg21 = iprod(r1_kj, r1_kj); /* 5 */
2791 rabr1 = sqrt(ra2r1*rb2r1);
2793 sin_phi1 = rg1 * rabr1 * iprod(a1, h1) * (-1);
2795 if (cos_phi1 < -0.5 || cos_phi1 > 0.5)
2797 phi1 = asin(sin_phi1);
2807 phi1 = -M_PI - phi1;
2813 phi1 = acos(cos_phi1);
2821 xphi1 = phi1 + M_PI; /* 1 */
2823 /* Second torsion */
2829 phi2 = dih_angle(x[a2i], x[a2j], x[a2k], x[a2l], pbc, r2_ij, r2_kj, r2_kl, m2, n2,
2830 &sign2, &t12, &t22, &t32); /* 84 */
2832 cos_phi2 = cos(phi2);
2834 a2[0] = r2_ij[1]*r2_kj[2]-r2_ij[2]*r2_kj[1];
2835 a2[1] = r2_ij[2]*r2_kj[0]-r2_ij[0]*r2_kj[2];
2836 a2[2] = r2_ij[0]*r2_kj[1]-r2_ij[1]*r2_kj[0]; /* 9 */
2838 b2[0] = r2_kl[1]*r2_kj[2]-r2_kl[2]*r2_kj[1];
2839 b2[1] = r2_kl[2]*r2_kj[0]-r2_kl[0]*r2_kj[2];
2840 b2[2] = r2_kl[0]*r2_kj[1]-r2_kl[1]*r2_kj[0]; /* 9 */
2842 tmp = pbc_rvec_sub(pbc, x[a2l], x[a2k], h2);
2844 ra22 = iprod(a2, a2); /* 5 */
2845 rb22 = iprod(b2, b2); /* 5 */
2846 rg22 = iprod(r2_kj, r2_kj); /* 5 */
2852 rabr2 = sqrt(ra2r2*rb2r2);
2854 sin_phi2 = rg2 * rabr2 * iprod(a2, h2) * (-1);
2856 if (cos_phi2 < -0.5 || cos_phi2 > 0.5)
2858 phi2 = asin(sin_phi2);
2868 phi2 = -M_PI - phi2;
2874 phi2 = acos(cos_phi2);
2882 xphi2 = phi2 + M_PI; /* 1 */
2884 /* Range mangling */
2887 xphi1 = xphi1 + 2*M_PI;
2889 else if (xphi1 >= 2*M_PI)
2891 xphi1 = xphi1 - 2*M_PI;
2896 xphi2 = xphi2 + 2*M_PI;
2898 else if (xphi2 >= 2*M_PI)
2900 xphi2 = xphi2 - 2*M_PI;
2903 /* Number of grid points */
2904 dx = 2*M_PI / cmap_grid->grid_spacing;
2906 /* Where on the grid are we */
2907 iphi1 = (int)(xphi1/dx);
2908 iphi2 = (int)(xphi2/dx);
2910 iphi1 = cmap_setup_grid_index(iphi1, cmap_grid->grid_spacing, &ip1m1, &ip1p1, &ip1p2);
2911 iphi2 = cmap_setup_grid_index(iphi2, cmap_grid->grid_spacing, &ip2m1, &ip2p1, &ip2p2);
2913 pos1 = iphi1*cmap_grid->grid_spacing+iphi2;
2914 pos2 = ip1p1*cmap_grid->grid_spacing+iphi2;
2915 pos3 = ip1p1*cmap_grid->grid_spacing+ip2p1;
2916 pos4 = iphi1*cmap_grid->grid_spacing+ip2p1;
2918 ty[0] = cmapd[pos1*4];
2919 ty[1] = cmapd[pos2*4];
2920 ty[2] = cmapd[pos3*4];
2921 ty[3] = cmapd[pos4*4];
2923 ty1[0] = cmapd[pos1*4+1];
2924 ty1[1] = cmapd[pos2*4+1];
2925 ty1[2] = cmapd[pos3*4+1];
2926 ty1[3] = cmapd[pos4*4+1];
2928 ty2[0] = cmapd[pos1*4+2];
2929 ty2[1] = cmapd[pos2*4+2];
2930 ty2[2] = cmapd[pos3*4+2];
2931 ty2[3] = cmapd[pos4*4+2];
2933 ty12[0] = cmapd[pos1*4+3];
2934 ty12[1] = cmapd[pos2*4+3];
2935 ty12[2] = cmapd[pos3*4+3];
2936 ty12[3] = cmapd[pos4*4+3];
2938 /* Switch to degrees */
2939 dx = 360.0 / cmap_grid->grid_spacing;
2940 xphi1 = xphi1 * RAD2DEG;
2941 xphi2 = xphi2 * RAD2DEG;
2943 for (i = 0; i < 4; i++) /* 16 */
2946 tx[i+4] = ty1[i]*dx;
2947 tx[i+8] = ty2[i]*dx;
2948 tx[i+12] = ty12[i]*dx*dx;
2952 for (i = 0; i < 4; i++) /* 1056 */
2954 for (j = 0; j < 4; j++)
2957 for (k = 0; k < 16; k++)
2959 xx = xx + cmap_coeff_matrix[k*16+idx]*tx[k];
2967 tt = (xphi1-iphi1*dx)/dx;
2968 tu = (xphi2-iphi2*dx)/dx;
2977 for (i = 3; i >= 0; i--)
2979 l1 = loop_index[i][3];
2980 l2 = loop_index[i][2];
2981 l3 = loop_index[i][1];
2983 e = tt * e + ((tc[i*4+3]*tu+tc[i*4+2])*tu + tc[i*4+1])*tu+tc[i*4];
2984 df1 = tu * df1 + (3.0*tc[l1]*tt+2.0*tc[l2])*tt+tc[l3];
2985 df2 = tt * df2 + (3.0*tc[i*4+3]*tu+2.0*tc[i*4+2])*tu+tc[i*4+1];
2986 ddf1 = tu * ddf1 + 2.0*3.0*tc[l1]*tt+2.0*tc[l2];
2987 ddf2 = tt * ddf2 + 2.0*3.0*tc[4*i+3]*tu+2.0*tc[4*i+2];
2990 ddf12 = tc[5] + 2.0*tc[9]*tt + 3.0*tc[13]*tt*tt + 2.0*tu*(tc[6]+2.0*tc[10]*tt+3.0*tc[14]*tt*tt) +
2991 3.0*tu*tu*(tc[7]+2.0*tc[11]*tt+3.0*tc[15]*tt*tt);
2996 ddf1 = ddf1 * fac * fac;
2997 ddf2 = ddf2 * fac * fac;
2998 ddf12 = ddf12 * fac * fac;
3003 /* Do forces - first torsion */
3004 fg1 = iprod(r1_ij, r1_kj);
3005 hg1 = iprod(r1_kl, r1_kj);
3006 fga1 = fg1*ra2r1*rgr1;
3007 hgb1 = hg1*rb2r1*rgr1;
3011 for (i = 0; i < DIM; i++)
3013 dtf1[i] = gaa1 * a1[i];
3014 dtg1[i] = fga1 * a1[i] - hgb1 * b1[i];
3015 dth1[i] = gbb1 * b1[i];
3017 f1[i] = df1 * dtf1[i];
3018 g1[i] = df1 * dtg1[i];
3019 h1[i] = df1 * dth1[i];
3022 f1_j[i] = -f1[i] - g1[i];
3023 f1_k[i] = h1[i] + g1[i];
3026 f[a1i][i] = f[a1i][i] + f1_i[i];
3027 f[a1j][i] = f[a1j][i] + f1_j[i]; /* - f1[i] - g1[i] */
3028 f[a1k][i] = f[a1k][i] + f1_k[i]; /* h1[i] + g1[i] */
3029 f[a1l][i] = f[a1l][i] + f1_l[i]; /* h1[i] */
3032 /* Do forces - second torsion */
3033 fg2 = iprod(r2_ij, r2_kj);
3034 hg2 = iprod(r2_kl, r2_kj);
3035 fga2 = fg2*ra2r2*rgr2;
3036 hgb2 = hg2*rb2r2*rgr2;
3040 for (i = 0; i < DIM; i++)
3042 dtf2[i] = gaa2 * a2[i];
3043 dtg2[i] = fga2 * a2[i] - hgb2 * b2[i];
3044 dth2[i] = gbb2 * b2[i];
3046 f2[i] = df2 * dtf2[i];
3047 g2[i] = df2 * dtg2[i];
3048 h2[i] = df2 * dth2[i];
3051 f2_j[i] = -f2[i] - g2[i];
3052 f2_k[i] = h2[i] + g2[i];
3055 f[a2i][i] = f[a2i][i] + f2_i[i]; /* f2[i] */
3056 f[a2j][i] = f[a2j][i] + f2_j[i]; /* - f2[i] - g2[i] */
3057 f[a2k][i] = f[a2k][i] + f2_k[i]; /* h2[i] + g2[i] */
3058 f[a2l][i] = f[a2l][i] + f2_l[i]; /* - h2[i] */
3064 copy_ivec(SHIFT_IVEC(g, a1j), jt1);
3065 ivec_sub(SHIFT_IVEC(g, a1i), jt1, dt1_ij);
3066 ivec_sub(SHIFT_IVEC(g, a1k), jt1, dt1_kj);
3067 ivec_sub(SHIFT_IVEC(g, a1l), jt1, dt1_lj);
3068 t11 = IVEC2IS(dt1_ij);
3069 t21 = IVEC2IS(dt1_kj);
3070 t31 = IVEC2IS(dt1_lj);
3072 copy_ivec(SHIFT_IVEC(g, a2j), jt2);
3073 ivec_sub(SHIFT_IVEC(g, a2i), jt2, dt2_ij);
3074 ivec_sub(SHIFT_IVEC(g, a2k), jt2, dt2_kj);
3075 ivec_sub(SHIFT_IVEC(g, a2l), jt2, dt2_lj);
3076 t12 = IVEC2IS(dt2_ij);
3077 t22 = IVEC2IS(dt2_kj);
3078 t32 = IVEC2IS(dt2_lj);
3082 t31 = pbc_rvec_sub(pbc, x[a1l], x[a1j], h1);
3083 t32 = pbc_rvec_sub(pbc, x[a2l], x[a2j], h2);
3091 rvec_inc(fshift[t11], f1_i);
3092 rvec_inc(fshift[CENTRAL], f1_j);
3093 rvec_inc(fshift[t21], f1_k);
3094 rvec_inc(fshift[t31], f1_l);
3096 rvec_inc(fshift[t21], f2_i);
3097 rvec_inc(fshift[CENTRAL], f2_j);
3098 rvec_inc(fshift[t22], f2_k);
3099 rvec_inc(fshift[t32], f2_l);
3106 /***********************************************************
3108 * G R O M O S 9 6 F U N C T I O N S
3110 ***********************************************************/
3111 real g96harmonic(real kA, real kB, real xA, real xB, real x, real lambda,
3114 const real half = 0.5;
3115 real L1, kk, x0, dx, dx2;
3116 real v, f, dvdlambda;
3119 kk = L1*kA+lambda*kB;
3120 x0 = L1*xA+lambda*xB;
3127 dvdlambda = half*(kB-kA)*dx2 + (xA-xB)*kk*dx;
3134 /* That was 21 flops */
3137 real g96bonds(int nbonds,
3138 const t_iatom forceatoms[], const t_iparams forceparams[],
3139 const rvec x[], rvec f[], rvec fshift[],
3140 const t_pbc *pbc, const t_graph *g,
3141 real lambda, real *dvdlambda,
3142 const t_mdatoms *md, t_fcdata *fcd,
3143 int *global_atom_index)
3145 int i, m, ki, ai, aj, type;
3146 real dr2, fbond, vbond, fij, vtot;
3151 for (i = 0; (i < nbonds); )
3153 type = forceatoms[i++];
3154 ai = forceatoms[i++];
3155 aj = forceatoms[i++];
3157 ki = pbc_rvec_sub(pbc, x[ai], x[aj], dx); /* 3 */
3158 dr2 = iprod(dx, dx); /* 5 */
3160 *dvdlambda += g96harmonic(forceparams[type].harmonic.krA,
3161 forceparams[type].harmonic.krB,
3162 forceparams[type].harmonic.rA,
3163 forceparams[type].harmonic.rB,
3164 dr2, lambda, &vbond, &fbond);
3166 vtot += 0.5*vbond; /* 1*/
3170 fprintf(debug, "G96-BONDS: dr = %10g vbond = %10g fbond = %10g\n",
3171 sqrt(dr2), vbond, fbond);
3177 ivec_sub(SHIFT_IVEC(g, ai), SHIFT_IVEC(g, aj), dt);
3180 for (m = 0; (m < DIM); m++) /* 15 */
3185 fshift[ki][m] += fij;
3186 fshift[CENTRAL][m] -= fij;
3192 real g96bond_angle(const rvec xi, const rvec xj, const rvec xk, const t_pbc *pbc,
3193 rvec r_ij, rvec r_kj,
3195 /* Return value is the angle between the bonds i-j and j-k */
3199 *t1 = pbc_rvec_sub(pbc, xi, xj, r_ij); /* 3 */
3200 *t2 = pbc_rvec_sub(pbc, xk, xj, r_kj); /* 3 */
3202 costh = cos_angle(r_ij, r_kj); /* 25 */
3207 real g96angles(int nbonds,
3208 const t_iatom forceatoms[], const t_iparams forceparams[],
3209 const rvec x[], rvec f[], rvec fshift[],
3210 const t_pbc *pbc, const t_graph *g,
3211 real lambda, real *dvdlambda,
3212 const t_mdatoms *md, t_fcdata *fcd,
3213 int *global_atom_index)
3215 int i, ai, aj, ak, type, m, t1, t2;
3217 real cos_theta, dVdt, va, vtot;
3218 real rij_1, rij_2, rkj_1, rkj_2, rijrkj_1;
3220 ivec jt, dt_ij, dt_kj;
3223 for (i = 0; (i < nbonds); )
3225 type = forceatoms[i++];
3226 ai = forceatoms[i++];
3227 aj = forceatoms[i++];
3228 ak = forceatoms[i++];
3230 cos_theta = g96bond_angle(x[ai], x[aj], x[ak], pbc, r_ij, r_kj, &t1, &t2);
3232 *dvdlambda += g96harmonic(forceparams[type].harmonic.krA,
3233 forceparams[type].harmonic.krB,
3234 forceparams[type].harmonic.rA,
3235 forceparams[type].harmonic.rB,
3236 cos_theta, lambda, &va, &dVdt);
3239 rij_1 = gmx_invsqrt(iprod(r_ij, r_ij));
3240 rkj_1 = gmx_invsqrt(iprod(r_kj, r_kj));
3241 rij_2 = rij_1*rij_1;
3242 rkj_2 = rkj_1*rkj_1;
3243 rijrkj_1 = rij_1*rkj_1; /* 23 */
3248 fprintf(debug, "G96ANGLES: costheta = %10g vth = %10g dV/dct = %10g\n",
3249 cos_theta, va, dVdt);
3252 for (m = 0; (m < DIM); m++) /* 42 */
3254 f_i[m] = dVdt*(r_kj[m]*rijrkj_1 - r_ij[m]*rij_2*cos_theta);
3255 f_k[m] = dVdt*(r_ij[m]*rijrkj_1 - r_kj[m]*rkj_2*cos_theta);
3256 f_j[m] = -f_i[m]-f_k[m];
3264 copy_ivec(SHIFT_IVEC(g, aj), jt);
3266 ivec_sub(SHIFT_IVEC(g, ai), jt, dt_ij);
3267 ivec_sub(SHIFT_IVEC(g, ak), jt, dt_kj);
3268 t1 = IVEC2IS(dt_ij);
3269 t2 = IVEC2IS(dt_kj);
3271 rvec_inc(fshift[t1], f_i);
3272 rvec_inc(fshift[CENTRAL], f_j);
3273 rvec_inc(fshift[t2], f_k); /* 9 */
3279 real cross_bond_bond(int nbonds,
3280 const t_iatom forceatoms[], const t_iparams forceparams[],
3281 const rvec x[], rvec f[], rvec fshift[],
3282 const t_pbc *pbc, const t_graph *g,
3283 real lambda, real *dvdlambda,
3284 const t_mdatoms *md, t_fcdata *fcd,
3285 int *global_atom_index)
3287 /* Potential from Lawrence and Skimmer, Chem. Phys. Lett. 372 (2003)
3290 int i, ai, aj, ak, type, m, t1, t2;
3292 real vtot, vrr, s1, s2, r1, r2, r1e, r2e, krr;
3294 ivec jt, dt_ij, dt_kj;
3297 for (i = 0; (i < nbonds); )
3299 type = forceatoms[i++];
3300 ai = forceatoms[i++];
3301 aj = forceatoms[i++];
3302 ak = forceatoms[i++];
3303 r1e = forceparams[type].cross_bb.r1e;
3304 r2e = forceparams[type].cross_bb.r2e;
3305 krr = forceparams[type].cross_bb.krr;
3307 /* Compute distance vectors ... */
3308 t1 = pbc_rvec_sub(pbc, x[ai], x[aj], r_ij);
3309 t2 = pbc_rvec_sub(pbc, x[ak], x[aj], r_kj);
3311 /* ... and their lengths */
3315 /* Deviations from ideality */
3319 /* Energy (can be negative!) */
3324 svmul(-krr*s2/r1, r_ij, f_i);
3325 svmul(-krr*s1/r2, r_kj, f_k);
3327 for (m = 0; (m < DIM); m++) /* 12 */
3329 f_j[m] = -f_i[m] - f_k[m];
3338 copy_ivec(SHIFT_IVEC(g, aj), jt);
3340 ivec_sub(SHIFT_IVEC(g, ai), jt, dt_ij);
3341 ivec_sub(SHIFT_IVEC(g, ak), jt, dt_kj);
3342 t1 = IVEC2IS(dt_ij);
3343 t2 = IVEC2IS(dt_kj);
3345 rvec_inc(fshift[t1], f_i);
3346 rvec_inc(fshift[CENTRAL], f_j);
3347 rvec_inc(fshift[t2], f_k); /* 9 */
3353 real cross_bond_angle(int nbonds,
3354 const t_iatom forceatoms[], const t_iparams forceparams[],
3355 const rvec x[], rvec f[], rvec fshift[],
3356 const t_pbc *pbc, const t_graph *g,
3357 real lambda, real *dvdlambda,
3358 const t_mdatoms *md, t_fcdata *fcd,
3359 int *global_atom_index)
3361 /* Potential from Lawrence and Skimmer, Chem. Phys. Lett. 372 (2003)
3364 int i, ai, aj, ak, type, m, t1, t2, t3;
3365 rvec r_ij, r_kj, r_ik;
3366 real vtot, vrt, s1, s2, s3, r1, r2, r3, r1e, r2e, r3e, krt, k1, k2, k3;
3368 ivec jt, dt_ij, dt_kj;
3371 for (i = 0; (i < nbonds); )
3373 type = forceatoms[i++];
3374 ai = forceatoms[i++];
3375 aj = forceatoms[i++];
3376 ak = forceatoms[i++];
3377 r1e = forceparams[type].cross_ba.r1e;
3378 r2e = forceparams[type].cross_ba.r2e;
3379 r3e = forceparams[type].cross_ba.r3e;
3380 krt = forceparams[type].cross_ba.krt;
3382 /* Compute distance vectors ... */
3383 t1 = pbc_rvec_sub(pbc, x[ai], x[aj], r_ij);
3384 t2 = pbc_rvec_sub(pbc, x[ak], x[aj], r_kj);
3385 t3 = pbc_rvec_sub(pbc, x[ai], x[ak], r_ik);
3387 /* ... and their lengths */
3392 /* Deviations from ideality */
3397 /* Energy (can be negative!) */
3398 vrt = krt*s3*(s1+s2);
3404 k3 = -krt*(s1+s2)/r3;
3405 for (m = 0; (m < DIM); m++)
3407 f_i[m] = k1*r_ij[m] + k3*r_ik[m];
3408 f_k[m] = k2*r_kj[m] - k3*r_ik[m];
3409 f_j[m] = -f_i[m] - f_k[m];
3412 for (m = 0; (m < DIM); m++) /* 12 */
3422 copy_ivec(SHIFT_IVEC(g, aj), jt);
3424 ivec_sub(SHIFT_IVEC(g, ai), jt, dt_ij);
3425 ivec_sub(SHIFT_IVEC(g, ak), jt, dt_kj);
3426 t1 = IVEC2IS(dt_ij);
3427 t2 = IVEC2IS(dt_kj);
3429 rvec_inc(fshift[t1], f_i);
3430 rvec_inc(fshift[CENTRAL], f_j);
3431 rvec_inc(fshift[t2], f_k); /* 9 */
3437 static real bonded_tab(const char *type, int table_nr,
3438 const bondedtable_t *table, real kA, real kB, real r,
3439 real lambda, real *V, real *F)
3441 real k, tabscale, *VFtab, rt, eps, eps2, Yt, Ft, Geps, Heps2, Fp, VV, FF;
3443 real v, f, dvdlambda;
3445 k = (1.0 - lambda)*kA + lambda*kB;
3447 tabscale = table->scale;
3448 VFtab = table->data;
3454 gmx_fatal(FARGS, "A tabulated %s interaction table number %d is out of the table range: r %f, between table indices %d and %d, table length %d",
3455 type, table_nr, r, n0, n0+1, table->n);
3462 Geps = VFtab[nnn+2]*eps;
3463 Heps2 = VFtab[nnn+3]*eps2;
3464 Fp = Ft + Geps + Heps2;
3466 FF = Fp + Geps + 2.0*Heps2;
3468 *F = -k*FF*tabscale;
3470 dvdlambda = (kB - kA)*VV;
3474 /* That was 22 flops */
3477 real tab_bonds(int nbonds,
3478 const t_iatom forceatoms[], const t_iparams forceparams[],
3479 const rvec x[], rvec f[], rvec fshift[],
3480 const t_pbc *pbc, const t_graph *g,
3481 real lambda, real *dvdlambda,
3482 const t_mdatoms *md, t_fcdata *fcd,
3483 int *global_atom_index)
3485 int i, m, ki, ai, aj, type, table;
3486 real dr, dr2, fbond, vbond, fij, vtot;
3491 for (i = 0; (i < nbonds); )
3493 type = forceatoms[i++];
3494 ai = forceatoms[i++];
3495 aj = forceatoms[i++];
3497 ki = pbc_rvec_sub(pbc, x[ai], x[aj], dx); /* 3 */
3498 dr2 = iprod(dx, dx); /* 5 */
3499 dr = dr2*gmx_invsqrt(dr2); /* 10 */
3501 table = forceparams[type].tab.table;
3503 *dvdlambda += bonded_tab("bond", table,
3504 &fcd->bondtab[table],
3505 forceparams[type].tab.kA,
3506 forceparams[type].tab.kB,
3507 dr, lambda, &vbond, &fbond); /* 22 */
3515 vtot += vbond; /* 1*/
3516 fbond *= gmx_invsqrt(dr2); /* 6 */
3520 fprintf(debug, "TABBONDS: dr = %10g vbond = %10g fbond = %10g\n",
3526 ivec_sub(SHIFT_IVEC(g, ai), SHIFT_IVEC(g, aj), dt);
3529 for (m = 0; (m < DIM); m++) /* 15 */
3534 fshift[ki][m] += fij;
3535 fshift[CENTRAL][m] -= fij;
3541 real tab_angles(int nbonds,
3542 const t_iatom forceatoms[], const t_iparams forceparams[],
3543 const rvec x[], rvec f[], rvec fshift[],
3544 const t_pbc *pbc, const t_graph *g,
3545 real lambda, real *dvdlambda,
3546 const t_mdatoms *md, t_fcdata *fcd,
3547 int *global_atom_index)
3549 int i, ai, aj, ak, t1, t2, type, table;
3551 real cos_theta, cos_theta2, theta, dVdt, va, vtot;
3552 ivec jt, dt_ij, dt_kj;
3555 for (i = 0; (i < nbonds); )
3557 type = forceatoms[i++];
3558 ai = forceatoms[i++];
3559 aj = forceatoms[i++];
3560 ak = forceatoms[i++];
3562 theta = bond_angle(x[ai], x[aj], x[ak], pbc,
3563 r_ij, r_kj, &cos_theta, &t1, &t2); /* 41 */
3565 table = forceparams[type].tab.table;
3567 *dvdlambda += bonded_tab("angle", table,
3568 &fcd->angletab[table],
3569 forceparams[type].tab.kA,
3570 forceparams[type].tab.kB,
3571 theta, lambda, &va, &dVdt); /* 22 */
3574 cos_theta2 = sqr(cos_theta); /* 1 */
3583 st = dVdt*gmx_invsqrt(1 - cos_theta2); /* 12 */
3584 sth = st*cos_theta; /* 1 */
3588 fprintf(debug, "ANGLES: theta = %10g vth = %10g dV/dtheta = %10g\n",
3589 theta*RAD2DEG, va, dVdt);
3592 nrkj2 = iprod(r_kj, r_kj); /* 5 */
3593 nrij2 = iprod(r_ij, r_ij);
3595 cik = st*gmx_invsqrt(nrkj2*nrij2); /* 12 */
3596 cii = sth/nrij2; /* 10 */
3597 ckk = sth/nrkj2; /* 10 */
3599 for (m = 0; (m < DIM); m++) /* 39 */
3601 f_i[m] = -(cik*r_kj[m]-cii*r_ij[m]);
3602 f_k[m] = -(cik*r_ij[m]-ckk*r_kj[m]);
3603 f_j[m] = -f_i[m]-f_k[m];
3610 copy_ivec(SHIFT_IVEC(g, aj), jt);
3612 ivec_sub(SHIFT_IVEC(g, ai), jt, dt_ij);
3613 ivec_sub(SHIFT_IVEC(g, ak), jt, dt_kj);
3614 t1 = IVEC2IS(dt_ij);
3615 t2 = IVEC2IS(dt_kj);
3617 rvec_inc(fshift[t1], f_i);
3618 rvec_inc(fshift[CENTRAL], f_j);
3619 rvec_inc(fshift[t2], f_k);
3625 real tab_dihs(int nbonds,
3626 const t_iatom forceatoms[], const t_iparams forceparams[],
3627 const rvec x[], rvec f[], rvec fshift[],
3628 const t_pbc *pbc, const t_graph *g,
3629 real lambda, real *dvdlambda,
3630 const t_mdatoms *md, t_fcdata *fcd,
3631 int *global_atom_index)
3633 int i, type, ai, aj, ak, al, table;
3635 rvec r_ij, r_kj, r_kl, m, n;
3636 real phi, sign, ddphi, vpd, vtot;
3639 for (i = 0; (i < nbonds); )
3641 type = forceatoms[i++];
3642 ai = forceatoms[i++];
3643 aj = forceatoms[i++];
3644 ak = forceatoms[i++];
3645 al = forceatoms[i++];
3647 phi = dih_angle(x[ai], x[aj], x[ak], x[al], pbc, r_ij, r_kj, r_kl, m, n,
3648 &sign, &t1, &t2, &t3); /* 84 */
3650 table = forceparams[type].tab.table;
3652 /* Hopefully phi+M_PI never results in values < 0 */
3653 *dvdlambda += bonded_tab("dihedral", table,
3654 &fcd->dihtab[table],
3655 forceparams[type].tab.kA,
3656 forceparams[type].tab.kB,
3657 phi+M_PI, lambda, &vpd, &ddphi);
3660 do_dih_fup(ai, aj, ak, al, -ddphi, r_ij, r_kj, r_kl, m, n,
3661 f, fshift, pbc, g, x, t1, t2, t3); /* 112 */
3664 fprintf(debug, "pdih: (%d,%d,%d,%d) phi=%g\n",
3665 ai, aj, ak, al, phi);
3673 calc_bonded_reduction_mask(const t_idef *idef,
3678 int ftype, nb, nat1, nb0, nb1, i, a;
3682 for (ftype = 0; ftype < F_NRE; ftype++)
3684 if (interaction_function[ftype].flags & IF_BOND &&
3685 !(ftype == F_CONNBONDS || ftype == F_POSRES) &&
3686 (ftype<F_GB12 || ftype>F_GB14))
3688 nb = idef->il[ftype].nr;
3691 nat1 = interaction_function[ftype].nratoms + 1;
3693 /* Divide this interaction equally over the threads.
3694 * This is not stored: should match division in calc_bonds.
3696 nb0 = (((nb/nat1)* t )/nt)*nat1;
3697 nb1 = (((nb/nat1)*(t+1))/nt)*nat1;
3699 for (i = nb0; i < nb1; i += nat1)
3701 for (a = 1; a < nat1; a++)
3703 mask |= (1U << (idef->il[ftype].iatoms[i+a]>>shift));
3713 void init_bonded_thread_force_reduction(t_forcerec *fr,
3716 #define MAX_BLOCK_BITS 32
3720 if (fr->nthreads <= 1)
3727 /* We divide the force array in a maximum of 32 blocks.
3728 * Minimum force block reduction size is 2^6=64.
3731 while (fr->natoms_force > (int)(MAX_BLOCK_BITS*(1U<<fr->red_ashift)))
3737 fprintf(debug, "bonded force buffer block atom shift %d bits\n",
3741 /* Determine to which blocks each thread's bonded force calculation
3742 * contributes. Store this is a mask for each thread.
3744 #pragma omp parallel for num_threads(fr->nthreads) schedule(static)
3745 for (t = 1; t < fr->nthreads; t++)
3747 fr->f_t[t].red_mask =
3748 calc_bonded_reduction_mask(idef, fr->red_ashift, t, fr->nthreads);
3751 /* Determine the maximum number of blocks we need to reduce over */
3754 for (t = 0; t < fr->nthreads; t++)
3757 for (b = 0; b < MAX_BLOCK_BITS; b++)
3759 if (fr->f_t[t].red_mask & (1U<<b))
3761 fr->red_nblock = max(fr->red_nblock, b+1);
3767 fprintf(debug, "thread %d flags %x count %d\n",
3768 t, fr->f_t[t].red_mask, c);
3774 fprintf(debug, "Number of blocks to reduce: %d of size %d\n",
3775 fr->red_nblock, 1<<fr->red_ashift);
3776 fprintf(debug, "Reduction density %.2f density/#thread %.2f\n",
3777 ctot*(1<<fr->red_ashift)/(double)fr->natoms_force,
3778 ctot*(1<<fr->red_ashift)/(double)(fr->natoms_force*fr->nthreads));
3782 static void zero_thread_forces(f_thread_t *f_t, int n,
3783 int nblock, int blocksize)
3785 int b, a0, a1, a, i, j;
3787 if (n > f_t->f_nalloc)
3789 f_t->f_nalloc = over_alloc_large(n);
3790 srenew(f_t->f, f_t->f_nalloc);
3793 if (f_t->red_mask != 0)
3795 for (b = 0; b < nblock; b++)
3797 if (f_t->red_mask && (1U<<b))
3800 a1 = min((b+1)*blocksize, n);
3801 for (a = a0; a < a1; a++)
3803 clear_rvec(f_t->f[a]);
3808 for (i = 0; i < SHIFTS; i++)
3810 clear_rvec(f_t->fshift[i]);
3812 for (i = 0; i < F_NRE; i++)
3816 for (i = 0; i < egNR; i++)
3818 for (j = 0; j < f_t->grpp.nener; j++)
3820 f_t->grpp.ener[i][j] = 0;
3823 for (i = 0; i < efptNR; i++)
3829 static void reduce_thread_force_buffer(int n, rvec *f,
3830 int nthreads, f_thread_t *f_t,
3831 int nblock, int block_size)
3833 /* The max thread number is arbitrary,
3834 * we used a fixed number to avoid memory management.
3835 * Using more than 16 threads is probably never useful performance wise.
3837 #define MAX_BONDED_THREADS 256
3840 if (nthreads > MAX_BONDED_THREADS)
3842 gmx_fatal(FARGS, "Can not reduce bonded forces on more than %d threads",
3843 MAX_BONDED_THREADS);
3846 /* This reduction can run on any number of threads,
3847 * independently of nthreads.
3849 #pragma omp parallel for num_threads(nthreads) schedule(static)
3850 for (b = 0; b < nblock; b++)
3852 rvec *fp[MAX_BONDED_THREADS];
3856 /* Determine which threads contribute to this block */
3858 for (ft = 1; ft < nthreads; ft++)
3860 if (f_t[ft].red_mask & (1U<<b))
3862 fp[nfb++] = f_t[ft].f;
3867 /* Reduce force buffers for threads that contribute */
3869 a1 = (b+1)*block_size;
3871 for (a = a0; a < a1; a++)
3873 for (fb = 0; fb < nfb; fb++)
3875 rvec_inc(f[a], fp[fb][a]);
3882 static void reduce_thread_forces(int n, rvec *f, rvec *fshift,
3883 real *ener, gmx_grppairener_t *grpp, real *dvdl,
3884 int nthreads, f_thread_t *f_t,
3885 int nblock, int block_size,
3886 gmx_bool bCalcEnerVir,
3891 /* Reduce the bonded force buffer */
3892 reduce_thread_force_buffer(n, f, nthreads, f_t, nblock, block_size);
3895 /* When necessary, reduce energy and virial using one thread only */
3900 for (i = 0; i < SHIFTS; i++)
3902 for (t = 1; t < nthreads; t++)
3904 rvec_inc(fshift[i], f_t[t].fshift[i]);
3907 for (i = 0; i < F_NRE; i++)
3909 for (t = 1; t < nthreads; t++)
3911 ener[i] += f_t[t].ener[i];
3914 for (i = 0; i < egNR; i++)
3916 for (j = 0; j < f_t[1].grpp.nener; j++)
3918 for (t = 1; t < nthreads; t++)
3921 grpp->ener[i][j] += f_t[t].grpp.ener[i][j];
3927 for (i = 0; i < efptNR; i++)
3930 for (t = 1; t < nthreads; t++)
3932 dvdl[i] += f_t[t].dvdl[i];
3939 static real calc_one_bond(FILE *fplog, int thread,
3940 int ftype, const t_idef *idef,
3941 rvec x[], rvec f[], rvec fshift[],
3943 const t_pbc *pbc, const t_graph *g,
3944 gmx_enerdata_t *enerd, gmx_grppairener_t *grpp,
3946 real *lambda, real *dvdl,
3947 const t_mdatoms *md, t_fcdata *fcd,
3948 gmx_bool bCalcEnerVir,
3949 int *global_atom_index, gmx_bool bPrintSepPot)
3951 int ind, nat1, nbonds, efptFTYPE;
3956 if (IS_RESTRAINT_TYPE(ftype))
3958 efptFTYPE = efptRESTRAINT;
3962 efptFTYPE = efptBONDED;
3965 if (interaction_function[ftype].flags & IF_BOND &&
3966 !(ftype == F_CONNBONDS || ftype == F_POSRES))
3968 ind = interaction_function[ftype].nrnb_ind;
3969 nat1 = interaction_function[ftype].nratoms + 1;
3970 nbonds = idef->il[ftype].nr/nat1;
3971 iatoms = idef->il[ftype].iatoms;
3973 nb0 = ((nbonds* thread )/(fr->nthreads))*nat1;
3974 nbn = ((nbonds*(thread+1))/(fr->nthreads))*nat1 - nb0;
3976 if (!IS_LISTED_LJ_C(ftype))
3978 if (ftype == F_CMAP)
3980 v = cmap_dihs(nbn, iatoms+nb0,
3981 idef->iparams, &idef->cmap_grid,
3982 (const rvec*)x, f, fshift,
3983 pbc, g, lambda[efptFTYPE], &(dvdl[efptFTYPE]),
3984 md, fcd, global_atom_index);
3987 else if (ftype == F_ANGLES &&
3988 !bCalcEnerVir && fr->efep == efepNO)
3990 /* No energies, shift forces, dvdl */
3991 angles_noener_simd(nbn, idef->il[ftype].iatoms+nb0,
3994 pbc, g, lambda[efptFTYPE], md, fcd,
3999 else if (ftype == F_PDIHS &&
4000 !bCalcEnerVir && fr->efep == efepNO)
4002 /* No energies, shift forces, dvdl */
4003 #ifndef SIMD_BONDEDS
4008 (nbn, idef->il[ftype].iatoms+nb0,
4011 pbc, g, lambda[efptFTYPE], md, fcd,
4017 v = interaction_function[ftype].ifunc(nbn, iatoms+nb0,
4019 (const rvec*)x, f, fshift,
4020 pbc, g, lambda[efptFTYPE], &(dvdl[efptFTYPE]),
4021 md, fcd, global_atom_index);
4025 fprintf(fplog, " %-23s #%4d V %12.5e dVdl %12.5e\n",
4026 interaction_function[ftype].longname,
4027 nbonds/nat1, v, lambda[efptFTYPE]);
4032 v = do_nonbonded_listed(ftype, nbn, iatoms+nb0, idef->iparams, (const rvec*)x, f, fshift,
4033 pbc, g, lambda, dvdl, md, fr, grpp, global_atom_index);
4037 fprintf(fplog, " %-5s + %-15s #%4d dVdl %12.5e\n",
4038 interaction_function[ftype].longname,
4039 interaction_function[F_LJ14].longname, nbonds/nat1, dvdl[efptVDW]);
4040 fprintf(fplog, " %-5s + %-15s #%4d dVdl %12.5e\n",
4041 interaction_function[ftype].longname,
4042 interaction_function[F_COUL14].longname, nbonds/nat1, dvdl[efptCOUL]);
4045 if (ind != -1 && thread == 0)
4047 inc_nrnb(nrnb, ind, nbonds);
4054 /* WARNING! THIS FUNCTION MUST EXACTLY TRACK THE calc
4055 function, or horrible things will happen when doing free energy
4056 calculations! In a good coding world, this would not be a
4057 different function, but for speed reasons, it needs to be made a
4058 separate function. TODO for 5.0 - figure out a way to reorganize
4059 to reduce duplication.
4062 static real calc_one_bond_foreign(FILE *fplog, int ftype, const t_idef *idef,
4063 rvec x[], rvec f[], t_forcerec *fr,
4064 const t_pbc *pbc, const t_graph *g,
4065 gmx_grppairener_t *grpp, t_nrnb *nrnb,
4066 real *lambda, real *dvdl,
4067 const t_mdatoms *md, t_fcdata *fcd,
4068 int *global_atom_index, gmx_bool bPrintSepPot)
4070 int ind, nat1, nbonds, efptFTYPE, nbonds_np;
4074 if (IS_RESTRAINT_TYPE(ftype))
4076 efptFTYPE = efptRESTRAINT;
4080 efptFTYPE = efptBONDED;
4083 if (ftype < F_GB12 || ftype > F_GB14)
4085 if (interaction_function[ftype].flags & IF_BOND &&
4086 !(ftype == F_CONNBONDS || ftype == F_POSRES))
4088 ind = interaction_function[ftype].nrnb_ind;
4089 nat1 = interaction_function[ftype].nratoms+1;
4090 nbonds_np = idef->il[ftype].nr_nonperturbed;
4091 nbonds = idef->il[ftype].nr - nbonds_np;
4092 iatoms = idef->il[ftype].iatoms + nbonds_np;
4095 if (!IS_LISTED_LJ_C(ftype))
4097 if (ftype == F_CMAP)
4099 v = cmap_dihs(nbonds, iatoms,
4100 idef->iparams, &idef->cmap_grid,
4101 (const rvec*)x, f, fr->fshift,
4102 pbc, g, lambda[efptFTYPE], &(dvdl[efptFTYPE]), md, fcd,
4107 v = interaction_function[ftype].ifunc(nbonds, iatoms,
4109 (const rvec*)x, f, fr->fshift,
4110 pbc, g, lambda[efptFTYPE], &dvdl[efptFTYPE],
4111 md, fcd, global_atom_index);
4116 v = do_nonbonded_listed(ftype, nbonds, iatoms,
4118 (const rvec*)x, f, fr->fshift,
4119 pbc, g, lambda, dvdl,
4120 md, fr, grpp, global_atom_index);
4124 inc_nrnb(nrnb, ind, nbonds/nat1);
4132 void calc_bonds(FILE *fplog, const gmx_multisim_t *ms,
4134 rvec x[], history_t *hist,
4135 rvec f[], t_forcerec *fr,
4136 const t_pbc *pbc, const t_graph *g,
4137 gmx_enerdata_t *enerd, t_nrnb *nrnb,
4139 const t_mdatoms *md,
4140 t_fcdata *fcd, int *global_atom_index,
4141 t_atomtypes *atype, gmx_genborn_t *born,
4143 gmx_bool bPrintSepPot, gmx_large_int_t step)
4145 gmx_bool bCalcEnerVir;
4147 real v, dvdl[efptNR], dvdl_dum[efptNR]; /* The dummy array is to have a place to store the dhdl at other values
4148 of lambda, which will be thrown away in the end*/
4149 const t_pbc *pbc_null;
4153 bCalcEnerVir = (force_flags & (GMX_FORCE_VIRIAL | GMX_FORCE_ENERGY));
4155 for (i = 0; i < efptNR; i++)
4169 fprintf(fplog, "Step %s: bonded V and dVdl for this node\n",
4170 gmx_step_str(step, buf));
4176 p_graph(debug, "Bondage is fun", g);
4180 /* Do pre force calculation stuff which might require communication */
4181 if (idef->il[F_ORIRES].nr)
4183 enerd->term[F_ORIRESDEV] =
4184 calc_orires_dev(ms, idef->il[F_ORIRES].nr,
4185 idef->il[F_ORIRES].iatoms,
4186 idef->iparams, md, (const rvec*)x,
4187 pbc_null, fcd, hist);
4189 if (idef->il[F_DISRES].nr)
4191 calc_disres_R_6(ms, idef->il[F_DISRES].nr,
4192 idef->il[F_DISRES].iatoms,
4193 idef->iparams, (const rvec*)x, pbc_null,
4197 #pragma omp parallel for num_threads(fr->nthreads) schedule(static)
4198 for (thread = 0; thread < fr->nthreads; thread++)
4200 int ftype, nbonds, ind, nat1;
4205 gmx_grppairener_t *grpp;
4211 fshift = fr->fshift;
4213 grpp = &enerd->grpp;
4218 zero_thread_forces(&fr->f_t[thread], fr->natoms_force,
4219 fr->red_nblock, 1<<fr->red_ashift);
4221 ft = fr->f_t[thread].f;
4222 fshift = fr->f_t[thread].fshift;
4223 epot = fr->f_t[thread].ener;
4224 grpp = &fr->f_t[thread].grpp;
4225 dvdlt = fr->f_t[thread].dvdl;
4227 /* Loop over all bonded force types to calculate the bonded forces */
4228 for (ftype = 0; (ftype < F_NRE); ftype++)
4230 if (idef->il[ftype].nr > 0 &&
4231 (interaction_function[ftype].flags & IF_BOND) &&
4232 (ftype < F_GB12 || ftype > F_GB14) &&
4233 !(ftype == F_CONNBONDS || ftype == F_POSRES))
4235 v = calc_one_bond(fplog, thread, ftype, idef, x,
4236 ft, fshift, fr, pbc_null, g, enerd, grpp,
4237 nrnb, lambda, dvdlt,
4238 md, fcd, bCalcEnerVir,
4239 global_atom_index, bPrintSepPot);
4244 if (fr->nthreads > 1)
4246 reduce_thread_forces(fr->natoms_force, f, fr->fshift,
4247 enerd->term, &enerd->grpp, dvdl,
4248 fr->nthreads, fr->f_t,
4249 fr->red_nblock, 1<<fr->red_ashift,
4251 force_flags & GMX_FORCE_DHDL);
4253 if (force_flags & GMX_FORCE_DHDL)
4255 for (i = 0; i < efptNR; i++)
4257 enerd->dvdl_nonlin[i] += dvdl[i];
4261 /* Copy the sum of violations for the distance restraints from fcd */
4264 enerd->term[F_DISRESVIOL] = fcd->disres.sumviol;
4269 void calc_bonds_lambda(FILE *fplog,
4273 const t_pbc *pbc, const t_graph *g,
4274 gmx_grppairener_t *grpp, real *epot, t_nrnb *nrnb,
4276 const t_mdatoms *md,
4278 int *global_atom_index)
4280 int i, ftype, nbonds_np, nbonds, ind, nat;
4282 real dvdl_dum[efptNR];
4283 rvec *f, *fshift_orig;
4284 const t_pbc *pbc_null;
4296 snew(f, fr->natoms_force);
4297 /* We want to preserve the fshift array in forcerec */
4298 fshift_orig = fr->fshift;
4299 snew(fr->fshift, SHIFTS);
4301 /* Loop over all bonded force types to calculate the bonded forces */
4302 for (ftype = 0; (ftype < F_NRE); ftype++)
4304 v = calc_one_bond_foreign(fplog, ftype, idef, x,
4305 f, fr, pbc_null, g, grpp, nrnb, lambda, dvdl_dum,
4306 md, fcd, global_atom_index, FALSE);
4311 fr->fshift = fshift_orig;