2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5 * Copyright (c) 2001-2009, The GROMACS Development Team.
6 * Copyright (c) 2010,2014,2015, by the GROMACS development team, led by
7 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
8 * and including many others, as listed in the AUTHORS file in the
9 * top-level source directory and at http://www.gromacs.org.
11 * GROMACS is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public License
13 * as published by the Free Software Foundation; either version 2.1
14 * of the License, or (at your option) any later version.
16 * GROMACS is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Lesser General Public License for more details.
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with GROMACS; if not, see
23 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
24 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
26 * If you want to redistribute modifications to GROMACS, please
27 * consider that scientific software is very special. Version
28 * control is crucial - bugs must be traceable. We will be happy to
29 * consider code for inclusion in the official distribution, but
30 * derived work must not be called official GROMACS. Details are found
31 * in the README & COPYING files - if they are missing, get the
32 * official version at http://www.gromacs.org.
34 * To help us fund GROMACS development, we humbly ask that you cite
35 * the research papers on the package. Check out http://www.gromacs.org.
39 #include "genborn_allvsall.h"
45 #include "gromacs/legacyheaders/genborn.h"
46 #include "gromacs/legacyheaders/network.h"
47 #include "gromacs/legacyheaders/types/simple.h"
48 #include "gromacs/math/units.h"
49 #include "gromacs/math/vec.h"
50 #include "gromacs/utility/smalloc.h"
56 int ** exclusion_mask_gb;
58 gmx_allvsallgb2_data_t;
61 calc_maxoffset(int i, int natoms)
65 if ((natoms % 2) == 1)
67 /* Odd number of atoms, easy */
70 else if ((natoms % 4) == 0)
72 /* Multiple of four is hard */
81 maxoffset = natoms/2-1;
92 maxoffset = natoms/2-1;
101 maxoffset = natoms/2;
105 maxoffset = natoms/2-1;
113 setup_gb_exclusions_and_indices(gmx_allvsallgb2_data_t * aadata,
125 /* This routine can appear to be a bit complex, but it is mostly book-keeping.
126 * To enable the fast all-vs-all kernel we need to be able to stream through all coordinates
127 * whether they should interact or not.
129 * To avoid looping over the exclusions, we create a simple mask that is 1 if the interaction
130 * should be present, otherwise 0. Since exclusions typically only occur when i & j are close,
131 * we create a jindex array with three elements per i atom: the starting point, the point to
132 * which we need to check exclusions, and the end point.
133 * This way we only have to allocate a short exclusion mask per i atom.
136 /* Allocate memory for jindex arrays */
137 snew(aadata->jindex_gb, 3*natoms);
139 /* Pointer to lists with exclusion masks */
140 snew(aadata->exclusion_mask_gb, natoms);
142 for (i = 0; i < natoms; i++)
145 aadata->jindex_gb[3*i] = i+1;
146 max_offset = calc_maxoffset(i, natoms);
148 /* first check the max range of atoms to EXCLUDE */
152 for (j = 0; j < ilist[F_GB12].nr; j += 3)
154 a1 = ilist[F_GB12].iatoms[j+1];
155 a2 = ilist[F_GB12].iatoms[j+2];
169 if (k > 0 && k <= max_offset)
171 max_excl_offset = std::max(k, max_excl_offset);
177 for (j = 0; j < ilist[F_GB13].nr; j += 3)
179 a1 = ilist[F_GB13].iatoms[j+1];
180 a2 = ilist[F_GB13].iatoms[j+2];
195 if (k > 0 && k <= max_offset)
197 max_excl_offset = std::max(k, max_excl_offset);
203 for (j = 0; j < ilist[F_GB14].nr; j += 3)
205 a1 = ilist[F_GB14].iatoms[j+1];
206 a2 = ilist[F_GB14].iatoms[j+2];
221 if (k > 0 && k <= max_offset)
223 max_excl_offset = std::max(k, max_excl_offset);
227 max_excl_offset = std::min(max_offset, max_excl_offset);
229 aadata->jindex_gb[3*i+1] = i+1+max_excl_offset;
231 snew(aadata->exclusion_mask_gb[i], max_excl_offset);
233 /* Include everything by default */
234 for (j = 0; j < max_excl_offset; j++)
236 /* Use all-ones to mark interactions that should be present, compatible with SSE */
237 aadata->exclusion_mask_gb[i][j] = 0xFFFFFFFF;
239 /* Go through exclusions again */
242 for (j = 0; j < ilist[F_GB12].nr; j += 3)
244 a1 = ilist[F_GB12].iatoms[j+1];
245 a2 = ilist[F_GB12].iatoms[j+2];
259 if (k > 0 && k <= max_offset)
261 aadata->exclusion_mask_gb[i][k-1] = 0;
267 for (j = 0; j < ilist[F_GB13].nr; j += 3)
269 a1 = ilist[F_GB13].iatoms[j+1];
270 a2 = ilist[F_GB13].iatoms[j+2];
284 if (k > 0 && k <= max_offset)
286 aadata->exclusion_mask_gb[i][k-1] = 0;
292 for (j = 0; j < ilist[F_GB14].nr; j += 3)
294 a1 = ilist[F_GB14].iatoms[j+1];
295 a2 = ilist[F_GB14].iatoms[j+2];
309 if (k > 0 && k <= max_offset)
311 aadata->exclusion_mask_gb[i][k-1] = 0;
319 aadata->jindex_gb[3*i+2] = i+1+max_offset;
325 genborn_allvsall_setup(gmx_allvsallgb2_data_t ** p_aadata,
332 gmx_allvsallgb2_data_t *aadata;
337 setup_gb_exclusions_and_indices(aadata, ilist, natoms, bInclude12, bInclude13, bInclude14);
343 genborn_allvsall_calc_still_radii(t_forcerec * fr,
345 gmx_genborn_t * born,
346 gmx_localtop_t * top,
350 gmx_allvsallgb2_data_t *aadata;
363 real irsq, idr4, idr6;
364 real raj, rvdw, ratio;
365 real vaj, ccf, dccf, theta, cosq;
366 real term, prod, icf4, icf6, gpi2, factor, sinq;
368 natoms = mdatoms->nr;
370 ni1 = mdatoms->homenr;
371 factor = 0.5*ONE_4PI_EPS0;
374 aadata = *((gmx_allvsallgb2_data_t **)work);
378 genborn_allvsall_setup(&aadata, top->idef.il, mdatoms->nr,
380 *((gmx_allvsallgb2_data_t **)work) = aadata;
384 for (i = 0; i < born->nr; i++)
386 born->gpol_still_work[i] = 0;
390 for (i = ni0; i < ni1; i++)
392 /* We assume shifts are NOT used for all-vs-all interactions */
394 /* Load i atom data */
401 rai = top->atomtypes.gb_radius[mdatoms->typeA[i]];
402 vai = born->vsolv[i];
403 prod_ai = STILL_P4*vai;
405 /* Load limits for loop over neighbors */
406 nj0 = aadata->jindex_gb[3*i];
407 nj1 = aadata->jindex_gb[3*i+1];
408 nj2 = aadata->jindex_gb[3*i+2];
410 mask = aadata->exclusion_mask_gb[i];
412 /* Prologue part, including exclusion mask */
413 for (j = nj0; j < nj1; j++, mask++)
419 /* load j atom coordinates */
424 /* Calculate distance */
428 rsq = dx*dx+dy*dy+dz*dz;
430 /* Calculate 1/r and 1/r2 */
431 rinv = gmx_invsqrt(rsq);
436 raj = top->atomtypes.gb_radius[mdatoms->typeA[k]];
440 ratio = rsq / (rvdw * rvdw);
441 vaj = born->vsolv[k];
444 if (ratio > STILL_P5INV)
451 theta = ratio*STILL_PIP5;
453 term = 0.5*(1.0-cosq);
455 sinq = 1.0 - cosq*cosq;
456 dccf = 2.0*term*sinq*gmx_invsqrt(sinq)*theta;
461 icf6 = (4*ccf-dccf)*idr6;
463 born->gpol_still_work[k] += prod_ai*icf4;
466 /* Save ai->aj and aj->ai chain rule terms */
467 fr->dadx[n++] = prod*icf6;
468 fr->dadx[n++] = prod_ai*icf6;
470 /* 27 flops, plus one cos(x) - estimate at 20 flops => 47 */
475 /* Main part, no exclusions */
476 for (j = nj1; j < nj2; j++)
480 /* load j atom coordinates */
485 /* Calculate distance */
489 rsq = dx*dx+dy*dy+dz*dz;
491 /* Calculate 1/r and 1/r2 */
492 rinv = gmx_invsqrt(rsq);
497 raj = top->atomtypes.gb_radius[mdatoms->typeA[k]];
501 ratio = rsq / (rvdw * rvdw);
502 vaj = born->vsolv[k];
504 if (ratio > STILL_P5INV)
511 theta = ratio*STILL_PIP5;
513 term = 0.5*(1.0-cosq);
515 sinq = 1.0 - cosq*cosq;
516 dccf = 2.0*term*sinq*gmx_invsqrt(sinq)*theta;
521 icf6 = (4*ccf-dccf)*idr6;
523 born->gpol_still_work[k] += prod_ai*icf4;
526 /* Save ai->aj and aj->ai chain rule terms */
527 fr->dadx[n++] = prod*icf6;
528 fr->dadx[n++] = prod_ai*icf6;
530 born->gpol_still_work[i] += gpi;
533 /* Parallel summations would go here if ever implemented with DD */
535 /* Calculate the radii */
536 for (i = 0; i < natoms; i++)
538 if (born->use[i] != 0)
540 gpi = born->gpol[i]+born->gpol_still_work[i];
542 born->bRad[i] = factor*gmx_invsqrt(gpi2);
543 fr->invsqrta[i] = gmx_invsqrt(born->bRad[i]);
553 genborn_allvsall_calc_hct_obc_radii(t_forcerec * fr,
555 gmx_genborn_t * born,
557 gmx_localtop_t * top,
561 gmx_allvsallgb2_data_t *aadata;
573 real rai, doffset, rai_inv, rai_inv2, sk_ai, sk2_ai, sum_ai;
574 real dr, sk, lij, dlij, lij2, lij3, uij2, uij3, diff2, uij, log_term;
575 real lij_inv, sk2, sk2_rinv, tmp, t1, t2, t3, raj_inv, sum_ai2, sum_ai3, tsum;
580 natoms = mdatoms->nr;
582 ni1 = mdatoms->homenr;
585 doffset = born->gb_doffset;
587 aadata = *((gmx_allvsallgb2_data_t **)work);
591 genborn_allvsall_setup(&aadata, top->idef.il, mdatoms->nr,
593 *((gmx_allvsallgb2_data_t **)work) = aadata;
596 for (i = 0; i < born->nr; i++)
598 born->gpol_hct_work[i] = 0;
601 for (i = ni0; i < ni1; i++)
603 /* We assume shifts are NOT used for all-vs-all interactions */
605 /* Load i atom data */
610 rai = top->atomtypes.gb_radius[mdatoms->typeA[i]]-doffset;
613 sk_ai = born->param[i];
614 sk2_ai = sk_ai*sk_ai;
618 /* Load limits for loop over neighbors */
619 nj0 = aadata->jindex_gb[3*i];
620 nj1 = aadata->jindex_gb[3*i+1];
621 nj2 = aadata->jindex_gb[3*i+2];
623 mask = aadata->exclusion_mask_gb[i];
625 /* Prologue part, including exclusion mask */
626 for (j = nj0; j < nj1; j++, mask++)
632 /* load j atom coordinates */
637 /* Calculate distance */
641 rsq = dx*dx+dy*dy+dz*dz;
643 /* Calculate 1/r and 1/r2 */
644 rinv = gmx_invsqrt(rsq);
647 /* sk is precalculated in init_gb() */
649 raj = top->atomtypes.gb_radius[mdatoms->typeA[k]]-doffset;
651 /* aj -> ai interaction */
673 lij_inv = gmx_invsqrt(lij2);
676 prod = 0.25*sk2_rinv;
678 log_term = std::log(uij*lij_inv);
679 /* log_term = table_log(uij*lij_inv,born->log_table,LOG_TABLE_ACCURACY); */
680 tmp = lij-uij + 0.25*dr*diff2 + (0.5*rinv)*log_term + prod*(-diff2);
684 tmp = tmp + 2.0 * (rai_inv-lij);
687 t1 = 0.5*lij2 + prod*lij3 - 0.25*(lij*rinv+lij3*dr);
688 t2 = -0.5*uij2 - prod*uij3 + 0.25*(uij*rinv+uij3*dr);
690 t3 = 0.125*(1.0+sk2_rinv*rinv)*(-diff2)+0.25*log_term*rinv*rinv;
692 dadxi = (dlij*t1+t2+t3)*rinv;
701 /* ai -> aj interaction */
702 if (raj < dr + sk_ai)
704 lij = 1.0/(dr-sk_ai);
717 uij = 1.0/(dr+sk_ai);
723 lij_inv = gmx_invsqrt(lij2);
724 sk2 = sk2_ai; /* sk2_ai = sk_ai * sk_ai in i loop above */
726 prod = 0.25 * sk2_rinv;
728 /* log_term = table_log(uij*lij_inv,born->log_table,LOG_TABLE_ACCURACY); */
729 log_term = std::log(uij*lij_inv);
731 tmp = lij-uij + 0.25*dr*diff2 + (0.5*rinv)*log_term + prod*(-diff2);
735 tmp = tmp + 2.0 * (raj_inv-lij);
738 t1 = 0.5*lij2 + prod*lij3 - 0.25*(lij*rinv+lij3*dr);
739 t2 = -0.5*uij2 - 0.25*sk2_rinv*uij3 + 0.25*(uij*rinv+uij3*dr);
740 t3 = 0.125*(1.0+sk2_rinv*rinv)*(-diff2)+0.25*log_term*rinv*rinv;
742 dadxj = (dlij*t1+t2+t3)*rinv; /* rb2 is moved to chainrule */
744 born->gpol_hct_work[k] += 0.5*tmp;
750 fr->dadx[n++] = dadxi;
751 fr->dadx[n++] = dadxj;
756 /* Main part, no exclusions */
757 for (j = nj1; j < nj2; j++)
761 /* load j atom coordinates */
766 /* Calculate distance */
770 rsq = dx*dx+dy*dy+dz*dz;
772 /* Calculate 1/r and 1/r2 */
773 rinv = gmx_invsqrt(rsq);
776 /* sk is precalculated in init_gb() */
778 raj = top->atomtypes.gb_radius[mdatoms->typeA[k]]-doffset;
780 /* aj -> ai interaction */
800 lij_inv = gmx_invsqrt(lij2);
803 prod = 0.25*sk2_rinv;
805 log_term = std::log(uij*lij_inv);
806 /* log_term = table_log(uij*lij_inv,born->log_table,LOG_TABLE_ACCURACY); */
807 tmp = lij-uij + 0.25*dr*diff2 + (0.5*rinv)*log_term + prod*(-diff2);
811 tmp = tmp + 2.0 * (rai_inv-lij);
815 t1 = 0.5*lij2 + prod*lij3 - 0.25*(lij*rinv+lij3*dr);
816 t2 = -0.5*uij2 - 0.25*sk2_rinv*uij3 + 0.25*(uij*rinv+uij3*dr);
817 t3 = 0.125*(1.0+sk2_rinv*rinv)*(-diff2)+0.25*log_term*rinv*rinv;
819 dadxi = (dlij*t1+t2+t3)*rinv; /* rb2 is moved to chainrule */
828 /* ai -> aj interaction */
829 if (raj < dr + sk_ai)
831 lij = 1.0/(dr-sk_ai);
844 uij = 1.0/(dr+sk_ai);
850 lij_inv = gmx_invsqrt(lij2);
851 sk2 = sk2_ai; /* sk2_ai = sk_ai * sk_ai in i loop above */
853 prod = 0.25 * sk2_rinv;
855 /* log_term = table_log(uij*lij_inv,born->log_table,LOG_TABLE_ACCURACY); */
856 log_term = std::log(uij*lij_inv);
858 tmp = lij-uij + 0.25*dr*diff2 + (0.5*rinv)*log_term + prod*(-diff2);
862 tmp = tmp + 2.0 * (raj_inv-lij);
865 t1 = 0.5*lij2 + prod*lij3 - 0.25*(lij*rinv+lij3*dr);
866 t2 = -0.5*uij2 - 0.25*sk2_rinv*uij3 + 0.25*(uij*rinv+uij3*dr);
867 t3 = 0.125*(1.0+sk2_rinv*rinv)*(-diff2)+0.25*log_term*rinv*rinv;
869 dadxj = (dlij*t1+t2+t3)*rinv; /* rb2 is moved to chainrule */
871 born->gpol_hct_work[k] += 0.5*tmp;
877 fr->dadx[n++] = dadxi;
878 fr->dadx[n++] = dadxj;
880 born->gpol_hct_work[i] += sum_ai;
883 /* Parallel summations would go here if ever implemented with DD */
885 if (gb_algorithm == egbHCT)
888 for (i = 0; i < natoms; i++)
890 if (born->use[i] != 0)
892 rai = top->atomtypes.gb_radius[mdatoms->typeA[i]]-born->gb_doffset;
893 sum_ai = 1.0/rai - born->gpol_hct_work[i];
894 min_rad = rai + born->gb_doffset;
897 born->bRad[i] = std::max(rad, min_rad);
898 fr->invsqrta[i] = gmx_invsqrt(born->bRad[i]);
906 /* Calculate the radii */
907 for (i = 0; i < natoms; i++)
909 if (born->use[i] != 0)
911 rai = top->atomtypes.gb_radius[mdatoms->typeA[i]];
915 sum_ai = rai * born->gpol_hct_work[i];
916 sum_ai2 = sum_ai * sum_ai;
917 sum_ai3 = sum_ai2 * sum_ai;
919 tsum = tanh(born->obc_alpha*sum_ai-born->obc_beta*sum_ai2+born->obc_gamma*sum_ai3);
920 born->bRad[i] = rai_inv - tsum*rai_inv2;
921 born->bRad[i] = 1.0 / born->bRad[i];
923 fr->invsqrta[i] = gmx_invsqrt(born->bRad[i]);
925 tchain = rai * (born->obc_alpha-2*born->obc_beta*sum_ai+3*born->obc_gamma*sum_ai2);
926 born->drobc[i] = (1.0-tsum*tsum)*tchain*rai_inv2;
938 genborn_allvsall_calc_chainrule(t_forcerec * fr,
940 gmx_genborn_t * born,
946 gmx_allvsallgb2_data_t *aadata;
959 real rbai, rbaj, fgb, fgb_ai, rbi;
963 natoms = mdatoms->nr;
965 ni1 = mdatoms->homenr;
968 aadata = (gmx_allvsallgb2_data_t *)work;
973 /* Loop to get the proper form for the Born radius term */
974 if (gb_algorithm == egbSTILL)
976 for (i = 0; i < natoms; i++)
979 rb[i] = (2 * rbi * rbi * fr->dvda[i])/ONE_4PI_EPS0;
982 else if (gb_algorithm == egbHCT)
984 for (i = 0; i < natoms; i++)
987 rb[i] = rbi * rbi * fr->dvda[i];
990 else if (gb_algorithm == egbOBC)
992 for (idx = 0; idx < natoms; idx++)
994 rbi = born->bRad[idx];
995 rb[idx] = rbi * rbi * born->drobc[idx] * fr->dvda[idx];
999 for (i = ni0; i < ni1; i++)
1001 /* We assume shifts are NOT used for all-vs-all interactions */
1003 /* Load i atom data */
1014 /* Load limits for loop over neighbors */
1015 nj0 = aadata->jindex_gb[3*i];
1016 nj1 = aadata->jindex_gb[3*i+1];
1017 nj2 = aadata->jindex_gb[3*i+2];
1019 mask = aadata->exclusion_mask_gb[i];
1021 /* Prologue part, including exclusion mask */
1022 for (j = nj0; j < nj1; j++, mask++)
1028 /* load j atom coordinates */
1033 /* Calculate distance */
1040 fgb = rbai*dadx[n++];
1041 fgb_ai = rbaj*dadx[n++];
1043 /* Total force between ai and aj is the sum of ai->aj and aj->ai */
1054 /* Update force on atom aj */
1055 f[3*k] = f[3*k] - tx;
1056 f[3*k+1] = f[3*k+1] - ty;
1057 f[3*k+2] = f[3*k+2] - tz;
1061 /* Main part, no exclusions */
1062 for (j = nj1; j < nj2; j++)
1066 /* load j atom coordinates */
1071 /* Calculate distance */
1078 fgb = rbai*dadx[n++];
1079 fgb_ai = rbaj*dadx[n++];
1081 /* Total force between ai and aj is the sum of ai->aj and aj->ai */
1092 /* Update force on atom aj */
1093 f[3*k] = f[3*k] - tx;
1094 f[3*k+1] = f[3*k+1] - ty;
1095 f[3*k+2] = f[3*k+2] - tz;
1097 /* Update force and shift forces on atom ai */
1098 f[3*i] = f[3*i] + fix;
1099 f[3*i+1] = f[3*i+1] + fiy;
1100 f[3*i+2] = f[3*i+2] + fiz;