2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5 * Copyright (c) 2001-2009, The GROMACS Development Team.
6 * Copyright (c) 2010,2014, by the GROMACS development team, led by
7 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
8 * and including many others, as listed in the AUTHORS file in the
9 * top-level source directory and at http://www.gromacs.org.
11 * GROMACS is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public License
13 * as published by the Free Software Foundation; either version 2.1
14 * of the License, or (at your option) any later version.
16 * GROMACS is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Lesser General Public License for more details.
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with GROMACS; if not, see
23 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
24 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
26 * If you want to redistribute modifications to GROMACS, please
27 * consider that scientific software is very special. Version
28 * control is crucial - bugs must be traceable. We will be happy to
29 * consider code for inclusion in the official distribution, but
30 * derived work must not be called official GROMACS. Details are found
31 * in the README & COPYING files - if they are missing, get the
32 * official version at http://www.gromacs.org.
34 * To help us fund GROMACS development, we humbly ask that you cite
35 * the research papers on the package. Check out http://www.gromacs.org.
39 #include "genborn_allvsall.h"
43 #include "gromacs/legacyheaders/genborn.h"
44 #include "gromacs/legacyheaders/network.h"
45 #include "gromacs/legacyheaders/types/simple.h"
46 #include "gromacs/math/units.h"
47 #include "gromacs/math/vec.h"
48 #include "gromacs/utility/smalloc.h"
54 int ** exclusion_mask_gb;
56 gmx_allvsallgb2_data_t;
59 calc_maxoffset(int i, int natoms)
63 if ((natoms % 2) == 1)
65 /* Odd number of atoms, easy */
68 else if ((natoms % 4) == 0)
70 /* Multiple of four is hard */
79 maxoffset = natoms/2-1;
90 maxoffset = natoms/2-1;
103 maxoffset = natoms/2-1;
111 setup_gb_exclusions_and_indices(gmx_allvsallgb2_data_t * aadata,
125 /* This routine can appear to be a bit complex, but it is mostly book-keeping.
126 * To enable the fast all-vs-all kernel we need to be able to stream through all coordinates
127 * whether they should interact or not.
129 * To avoid looping over the exclusions, we create a simple mask that is 1 if the interaction
130 * should be present, otherwise 0. Since exclusions typically only occur when i & j are close,
131 * we create a jindex array with three elements per i atom: the starting point, the point to
132 * which we need to check exclusions, and the end point.
133 * This way we only have to allocate a short exclusion mask per i atom.
136 /* Allocate memory for jindex arrays */
137 snew(aadata->jindex_gb, 3*natoms);
139 /* Pointer to lists with exclusion masks */
140 snew(aadata->exclusion_mask_gb, natoms);
142 for (i = 0; i < natoms; i++)
145 aadata->jindex_gb[3*i] = i+1;
146 max_offset = calc_maxoffset(i, natoms);
148 /* first check the max range of atoms to EXCLUDE */
152 for (j = 0; j < ilist[F_GB12].nr; j += 3)
154 a1 = ilist[F_GB12].iatoms[j+1];
155 a2 = ilist[F_GB12].iatoms[j+2];
169 if (k > 0 && k <= max_offset)
171 max_excl_offset = (k > max_excl_offset) ? k : max_excl_offset;
177 for (j = 0; j < ilist[F_GB13].nr; j += 3)
179 a1 = ilist[F_GB13].iatoms[j+1];
180 a2 = ilist[F_GB13].iatoms[j+2];
195 if (k > 0 && k <= max_offset)
197 max_excl_offset = (k > max_excl_offset) ? k : max_excl_offset;
203 for (j = 0; j < ilist[F_GB14].nr; j += 3)
205 a1 = ilist[F_GB14].iatoms[j+1];
206 a2 = ilist[F_GB14].iatoms[j+2];
221 if (k > 0 && k <= max_offset)
223 max_excl_offset = (k > max_excl_offset) ? k : max_excl_offset;
227 max_excl_offset = (max_offset < max_excl_offset) ? max_offset : max_excl_offset;
229 aadata->jindex_gb[3*i+1] = i+1+max_excl_offset;
231 snew(aadata->exclusion_mask_gb[i], max_excl_offset);
233 /* Include everything by default */
234 for (j = 0; j < max_excl_offset; j++)
236 /* Use all-ones to mark interactions that should be present, compatible with SSE */
237 aadata->exclusion_mask_gb[i][j] = 0xFFFFFFFF;
239 /* Go through exclusions again */
242 for (j = 0; j < ilist[F_GB12].nr; j += 3)
244 a1 = ilist[F_GB12].iatoms[j+1];
245 a2 = ilist[F_GB12].iatoms[j+2];
259 if (k > 0 && k <= max_offset)
261 aadata->exclusion_mask_gb[i][k-1] = 0;
267 for (j = 0; j < ilist[F_GB13].nr; j += 3)
269 a1 = ilist[F_GB13].iatoms[j+1];
270 a2 = ilist[F_GB13].iatoms[j+2];
284 if (k > 0 && k <= max_offset)
286 aadata->exclusion_mask_gb[i][k-1] = 0;
292 for (j = 0; j < ilist[F_GB14].nr; j += 3)
294 a1 = ilist[F_GB14].iatoms[j+1];
295 a2 = ilist[F_GB14].iatoms[j+2];
309 if (k > 0 && k <= max_offset)
311 aadata->exclusion_mask_gb[i][k-1] = 0;
319 aadata->jindex_gb[3*i+2] = i+1+max_offset;
325 genborn_allvsall_setup(gmx_allvsallgb2_data_t ** p_aadata,
333 gmx_allvsallgb2_data_t *aadata;
339 setup_gb_exclusions_and_indices(aadata, ilist, natoms, bInclude12, bInclude13, bInclude14);
345 genborn_allvsall_calc_still_radii(t_forcerec * fr,
347 gmx_genborn_t * born,
348 gmx_localtop_t * top,
352 gmx_allvsallgb2_data_t *aadata;
365 real irsq, idr4, idr6;
366 real raj, rvdw, ratio;
367 real vaj, ccf, dccf, theta, cosq;
368 real term, prod, icf4, icf6, gpi2, factor, sinq;
370 natoms = mdatoms->nr;
372 ni1 = mdatoms->homenr;
373 factor = 0.5*ONE_4PI_EPS0;
376 aadata = *((gmx_allvsallgb2_data_t **)work);
380 genborn_allvsall_setup(&aadata, top->idef.il, mdatoms->nr,
382 *((gmx_allvsallgb2_data_t **)work) = aadata;
386 for (i = 0; i < born->nr; i++)
388 born->gpol_still_work[i] = 0;
392 for (i = ni0; i < ni1; i++)
394 /* We assume shifts are NOT used for all-vs-all interactions */
396 /* Load i atom data */
403 rai = top->atomtypes.gb_radius[mdatoms->typeA[i]];
404 vai = born->vsolv[i];
405 prod_ai = STILL_P4*vai;
407 /* Load limits for loop over neighbors */
408 nj0 = aadata->jindex_gb[3*i];
409 nj1 = aadata->jindex_gb[3*i+1];
410 nj2 = aadata->jindex_gb[3*i+2];
412 mask = aadata->exclusion_mask_gb[i];
414 /* Prologue part, including exclusion mask */
415 for (j = nj0; j < nj1; j++, mask++)
421 /* load j atom coordinates */
426 /* Calculate distance */
430 rsq = dx*dx+dy*dy+dz*dz;
432 /* Calculate 1/r and 1/r2 */
433 rinv = gmx_invsqrt(rsq);
438 raj = top->atomtypes.gb_radius[mdatoms->typeA[k]];
442 ratio = rsq / (rvdw * rvdw);
443 vaj = born->vsolv[k];
446 if (ratio > STILL_P5INV)
453 theta = ratio*STILL_PIP5;
455 term = 0.5*(1.0-cosq);
457 sinq = 1.0 - cosq*cosq;
458 dccf = 2.0*term*sinq*gmx_invsqrt(sinq)*theta;
463 icf6 = (4*ccf-dccf)*idr6;
465 born->gpol_still_work[k] += prod_ai*icf4;
468 /* Save ai->aj and aj->ai chain rule terms */
469 fr->dadx[n++] = prod*icf6;
470 fr->dadx[n++] = prod_ai*icf6;
472 /* 27 flops, plus one cos(x) - estimate at 20 flops => 47 */
477 /* Main part, no exclusions */
478 for (j = nj1; j < nj2; j++)
482 /* load j atom coordinates */
487 /* Calculate distance */
491 rsq = dx*dx+dy*dy+dz*dz;
493 /* Calculate 1/r and 1/r2 */
494 rinv = gmx_invsqrt(rsq);
499 raj = top->atomtypes.gb_radius[mdatoms->typeA[k]];
503 ratio = rsq / (rvdw * rvdw);
504 vaj = born->vsolv[k];
506 if (ratio > STILL_P5INV)
513 theta = ratio*STILL_PIP5;
515 term = 0.5*(1.0-cosq);
517 sinq = 1.0 - cosq*cosq;
518 dccf = 2.0*term*sinq*gmx_invsqrt(sinq)*theta;
523 icf6 = (4*ccf-dccf)*idr6;
525 born->gpol_still_work[k] += prod_ai*icf4;
528 /* Save ai->aj and aj->ai chain rule terms */
529 fr->dadx[n++] = prod*icf6;
530 fr->dadx[n++] = prod_ai*icf6;
532 born->gpol_still_work[i] += gpi;
535 /* Parallel summations would go here if ever implemented with DD */
537 /* Calculate the radii */
538 for (i = 0; i < natoms; i++)
540 if (born->use[i] != 0)
542 gpi = born->gpol[i]+born->gpol_still_work[i];
544 born->bRad[i] = factor*gmx_invsqrt(gpi2);
545 fr->invsqrta[i] = gmx_invsqrt(born->bRad[i]);
555 genborn_allvsall_calc_hct_obc_radii(t_forcerec * fr,
557 gmx_genborn_t * born,
559 gmx_localtop_t * top,
563 gmx_allvsallgb2_data_t *aadata;
575 real rai, doffset, rai_inv, rai_inv2, sk_ai, sk2_ai, sum_ai;
576 real dr, sk, lij, dlij, lij2, lij3, uij2, uij3, diff2, uij, log_term;
577 real lij_inv, sk2, sk2_rinv, tmp, t1, t2, t3, raj_inv, sum_ai2, sum_ai3, tsum;
582 natoms = mdatoms->nr;
584 ni1 = mdatoms->homenr;
589 doffset = born->gb_doffset;
591 aadata = *((gmx_allvsallgb2_data_t **)work);
595 genborn_allvsall_setup(&aadata, top->idef.il, mdatoms->nr,
597 *((gmx_allvsallgb2_data_t **)work) = aadata;
600 for (i = 0; i < born->nr; i++)
602 born->gpol_hct_work[i] = 0;
605 for (i = ni0; i < ni1; i++)
607 /* We assume shifts are NOT used for all-vs-all interactions */
609 /* Load i atom data */
614 rai = top->atomtypes.gb_radius[mdatoms->typeA[i]]-doffset;
617 sk_ai = born->param[i];
618 sk2_ai = sk_ai*sk_ai;
622 /* Load limits for loop over neighbors */
623 nj0 = aadata->jindex_gb[3*i];
624 nj1 = aadata->jindex_gb[3*i+1];
625 nj2 = aadata->jindex_gb[3*i+2];
627 mask = aadata->exclusion_mask_gb[i];
629 /* Prologue part, including exclusion mask */
630 for (j = nj0; j < nj1; j++, mask++)
636 /* load j atom coordinates */
641 /* Calculate distance */
645 rsq = dx*dx+dy*dy+dz*dz;
647 /* Calculate 1/r and 1/r2 */
648 rinv = gmx_invsqrt(rsq);
651 /* sk is precalculated in init_gb() */
653 raj = top->atomtypes.gb_radius[mdatoms->typeA[k]]-doffset;
655 /* aj -> ai interaction */
677 lij_inv = gmx_invsqrt(lij2);
680 prod = 0.25*sk2_rinv;
682 log_term = log(uij*lij_inv);
683 /* log_term = table_log(uij*lij_inv,born->log_table,LOG_TABLE_ACCURACY); */
684 tmp = lij-uij + 0.25*dr*diff2 + (0.5*rinv)*log_term + prod*(-diff2);
688 tmp = tmp + 2.0 * (rai_inv-lij);
691 t1 = 0.5*lij2 + prod*lij3 - 0.25*(lij*rinv+lij3*dr);
692 t2 = -0.5*uij2 - prod*uij3 + 0.25*(uij*rinv+uij3*dr);
694 t3 = 0.125*(1.0+sk2_rinv*rinv)*(-diff2)+0.25*log_term*rinv*rinv;
696 dadxi = (dlij*t1+t2+t3)*rinv;
705 /* ai -> aj interaction */
706 if (raj < dr + sk_ai)
708 lij = 1.0/(dr-sk_ai);
721 uij = 1.0/(dr+sk_ai);
727 lij_inv = gmx_invsqrt(lij2);
728 sk2 = sk2_ai; /* sk2_ai = sk_ai * sk_ai in i loop above */
730 prod = 0.25 * sk2_rinv;
732 /* log_term = table_log(uij*lij_inv,born->log_table,LOG_TABLE_ACCURACY); */
733 log_term = log(uij*lij_inv);
735 tmp = lij-uij + 0.25*dr*diff2 + (0.5*rinv)*log_term + prod*(-diff2);
739 tmp = tmp + 2.0 * (raj_inv-lij);
742 t1 = 0.5*lij2 + prod*lij3 - 0.25*(lij*rinv+lij3*dr);
743 t2 = -0.5*uij2 - 0.25*sk2_rinv*uij3 + 0.25*(uij*rinv+uij3*dr);
744 t3 = 0.125*(1.0+sk2_rinv*rinv)*(-diff2)+0.25*log_term*rinv*rinv;
746 dadxj = (dlij*t1+t2+t3)*rinv; /* rb2 is moved to chainrule */
748 born->gpol_hct_work[k] += 0.5*tmp;
754 fr->dadx[n++] = dadxi;
755 fr->dadx[n++] = dadxj;
760 /* Main part, no exclusions */
761 for (j = nj1; j < nj2; j++)
765 /* load j atom coordinates */
770 /* Calculate distance */
774 rsq = dx*dx+dy*dy+dz*dz;
776 /* Calculate 1/r and 1/r2 */
777 rinv = gmx_invsqrt(rsq);
780 /* sk is precalculated in init_gb() */
782 raj = top->atomtypes.gb_radius[mdatoms->typeA[k]]-doffset;
784 /* aj -> ai interaction */
804 lij_inv = gmx_invsqrt(lij2);
807 prod = 0.25*sk2_rinv;
809 log_term = log(uij*lij_inv);
810 /* log_term = table_log(uij*lij_inv,born->log_table,LOG_TABLE_ACCURACY); */
811 tmp = lij-uij + 0.25*dr*diff2 + (0.5*rinv)*log_term + prod*(-diff2);
815 tmp = tmp + 2.0 * (rai_inv-lij);
819 t1 = 0.5*lij2 + prod*lij3 - 0.25*(lij*rinv+lij3*dr);
820 t2 = -0.5*uij2 - 0.25*sk2_rinv*uij3 + 0.25*(uij*rinv+uij3*dr);
821 t3 = 0.125*(1.0+sk2_rinv*rinv)*(-diff2)+0.25*log_term*rinv*rinv;
823 dadxi = (dlij*t1+t2+t3)*rinv; /* rb2 is moved to chainrule */
832 /* ai -> aj interaction */
833 if (raj < dr + sk_ai)
835 lij = 1.0/(dr-sk_ai);
848 uij = 1.0/(dr+sk_ai);
854 lij_inv = gmx_invsqrt(lij2);
855 sk2 = sk2_ai; /* sk2_ai = sk_ai * sk_ai in i loop above */
857 prod = 0.25 * sk2_rinv;
859 /* log_term = table_log(uij*lij_inv,born->log_table,LOG_TABLE_ACCURACY); */
860 log_term = log(uij*lij_inv);
862 tmp = lij-uij + 0.25*dr*diff2 + (0.5*rinv)*log_term + prod*(-diff2);
866 tmp = tmp + 2.0 * (raj_inv-lij);
869 t1 = 0.5*lij2 + prod*lij3 - 0.25*(lij*rinv+lij3*dr);
870 t2 = -0.5*uij2 - 0.25*sk2_rinv*uij3 + 0.25*(uij*rinv+uij3*dr);
871 t3 = 0.125*(1.0+sk2_rinv*rinv)*(-diff2)+0.25*log_term*rinv*rinv;
873 dadxj = (dlij*t1+t2+t3)*rinv; /* rb2 is moved to chainrule */
875 born->gpol_hct_work[k] += 0.5*tmp;
881 fr->dadx[n++] = dadxi;
882 fr->dadx[n++] = dadxj;
884 born->gpol_hct_work[i] += sum_ai;
887 /* Parallel summations would go here if ever implemented with DD */
889 if (gb_algorithm == egbHCT)
892 for (i = 0; i < natoms; i++)
894 if (born->use[i] != 0)
896 rai = top->atomtypes.gb_radius[mdatoms->typeA[i]]-born->gb_doffset;
897 sum_ai = 1.0/rai - born->gpol_hct_work[i];
898 min_rad = rai + born->gb_doffset;
901 born->bRad[i] = rad > min_rad ? rad : min_rad;
902 fr->invsqrta[i] = gmx_invsqrt(born->bRad[i]);
910 /* Calculate the radii */
911 for (i = 0; i < natoms; i++)
913 if (born->use[i] != 0)
915 rai = top->atomtypes.gb_radius[mdatoms->typeA[i]];
919 sum_ai = rai * born->gpol_hct_work[i];
920 sum_ai2 = sum_ai * sum_ai;
921 sum_ai3 = sum_ai2 * sum_ai;
923 tsum = tanh(born->obc_alpha*sum_ai-born->obc_beta*sum_ai2+born->obc_gamma*sum_ai3);
924 born->bRad[i] = rai_inv - tsum*rai_inv2;
925 born->bRad[i] = 1.0 / born->bRad[i];
927 fr->invsqrta[i] = gmx_invsqrt(born->bRad[i]);
929 tchain = rai * (born->obc_alpha-2*born->obc_beta*sum_ai+3*born->obc_gamma*sum_ai2);
930 born->drobc[i] = (1.0-tsum*tsum)*tchain*rai_inv2;
942 genborn_allvsall_calc_chainrule(t_forcerec * fr,
944 gmx_genborn_t * born,
950 gmx_allvsallgb2_data_t *aadata;
963 real rbai, rbaj, fgb, fgb_ai, rbi;
967 natoms = mdatoms->nr;
969 ni1 = mdatoms->homenr;
972 aadata = (gmx_allvsallgb2_data_t *)work;
977 /* Loop to get the proper form for the Born radius term */
978 if (gb_algorithm == egbSTILL)
980 for (i = 0; i < natoms; i++)
983 rb[i] = (2 * rbi * rbi * fr->dvda[i])/ONE_4PI_EPS0;
986 else if (gb_algorithm == egbHCT)
988 for (i = 0; i < natoms; i++)
991 rb[i] = rbi * rbi * fr->dvda[i];
994 else if (gb_algorithm == egbOBC)
996 for (idx = 0; idx < natoms; idx++)
998 rbi = born->bRad[idx];
999 rb[idx] = rbi * rbi * born->drobc[idx] * fr->dvda[idx];
1003 for (i = ni0; i < ni1; i++)
1005 /* We assume shifts are NOT used for all-vs-all interactions */
1007 /* Load i atom data */
1018 /* Load limits for loop over neighbors */
1019 nj0 = aadata->jindex_gb[3*i];
1020 nj1 = aadata->jindex_gb[3*i+1];
1021 nj2 = aadata->jindex_gb[3*i+2];
1023 mask = aadata->exclusion_mask_gb[i];
1025 /* Prologue part, including exclusion mask */
1026 for (j = nj0; j < nj1; j++, mask++)
1032 /* load j atom coordinates */
1037 /* Calculate distance */
1044 fgb = rbai*dadx[n++];
1045 fgb_ai = rbaj*dadx[n++];
1047 /* Total force between ai and aj is the sum of ai->aj and aj->ai */
1058 /* Update force on atom aj */
1059 f[3*k] = f[3*k] - tx;
1060 f[3*k+1] = f[3*k+1] - ty;
1061 f[3*k+2] = f[3*k+2] - tz;
1065 /* Main part, no exclusions */
1066 for (j = nj1; j < nj2; j++)
1070 /* load j atom coordinates */
1075 /* Calculate distance */
1082 fgb = rbai*dadx[n++];
1083 fgb_ai = rbaj*dadx[n++];
1085 /* Total force between ai and aj is the sum of ai->aj and aj->ai */
1096 /* Update force on atom aj */
1097 f[3*k] = f[3*k] - tx;
1098 f[3*k+1] = f[3*k+1] - ty;
1099 f[3*k+2] = f[3*k+2] - tz;
1101 /* Update force and shift forces on atom ai */
1102 f[3*i] = f[3*i] + fix;
1103 f[3*i+1] = f[3*i+1] + fiy;
1104 f[3*i+2] = f[3*i+2] + fiz;