2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5 * Copyright (c) 2012, by the GROMACS development team, led by
6 * David van der Spoel, Berk Hess, Erik Lindahl, and including many
7 * others, as listed in the AUTHORS file in the top-level source
8 * directory and at http://www.gromacs.org.
10 * GROMACS is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU Lesser General Public License
12 * as published by the Free Software Foundation; either version 2.1
13 * of the License, or (at your option) any later version.
15 * GROMACS is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * Lesser General Public License for more details.
20 * You should have received a copy of the GNU Lesser General Public
21 * License along with GROMACS; if not, see
22 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
23 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
25 * If you want to redistribute modifications to GROMACS, please
26 * consider that scientific software is very special. Version
27 * control is crucial - bugs must be traceable. We will be happy to
28 * consider code for inclusion in the official distribution, but
29 * derived work must not be called official GROMACS. Details are found
30 * in the README & COPYING files - if they are missing, get the
31 * official version at http://www.gromacs.org.
33 * To help us fund GROMACS development, we humbly ask that you cite
34 * the research papers on the package. Check out http://www.gromacs.org.
41 #include "types/simple.h"
50 #include "genborn_allvsall.h"
56 int ** exclusion_mask_gb;
58 gmx_allvsallgb2_data_t;
61 calc_maxoffset(int i,int natoms)
65 if ((natoms % 2) == 1)
67 /* Odd number of atoms, easy */
70 else if ((natoms % 4) == 0)
72 /* Multiple of four is hard */
105 maxoffset=natoms/2-1;
113 setup_gb_exclusions_and_indices(gmx_allvsallgb2_data_t * aadata,
127 /* This routine can appear to be a bit complex, but it is mostly book-keeping.
128 * To enable the fast all-vs-all kernel we need to be able to stream through all coordinates
129 * whether they should interact or not.
131 * To avoid looping over the exclusions, we create a simple mask that is 1 if the interaction
132 * should be present, otherwise 0. Since exclusions typically only occur when i & j are close,
133 * we create a jindex array with three elements per i atom: the starting point, the point to
134 * which we need to check exclusions, and the end point.
135 * This way we only have to allocate a short exclusion mask per i atom.
138 /* Allocate memory for jindex arrays */
139 snew(aadata->jindex_gb,3*natoms);
141 /* Pointer to lists with exclusion masks */
142 snew(aadata->exclusion_mask_gb,natoms);
144 for(i=0;i<natoms;i++)
147 aadata->jindex_gb[3*i] = i+1;
148 max_offset = calc_maxoffset(i,natoms);
150 /* first check the max range of atoms to EXCLUDE */
154 for(j=0;j<ilist[F_GB12].nr;j+=3)
156 a1 = ilist[F_GB12].iatoms[j+1];
157 a2 = ilist[F_GB12].iatoms[j+2];
171 if(k>0 && k<=max_offset)
173 max_excl_offset = (k > max_excl_offset) ? k : max_excl_offset;
179 for(j=0;j<ilist[F_GB13].nr;j+=3)
181 a1 = ilist[F_GB13].iatoms[j+1];
182 a2 = ilist[F_GB13].iatoms[j+2];
197 if(k>0 && k<=max_offset)
199 max_excl_offset = (k > max_excl_offset) ? k : max_excl_offset;
205 for(j=0;j<ilist[F_GB14].nr;j+=3)
207 a1 = ilist[F_GB14].iatoms[j+1];
208 a2 = ilist[F_GB14].iatoms[j+2];
223 if(k>0 && k<=max_offset)
225 max_excl_offset = (k > max_excl_offset) ? k : max_excl_offset;
229 max_excl_offset = (max_offset < max_excl_offset) ? max_offset : max_excl_offset;
231 aadata->jindex_gb[3*i+1] = i+1+max_excl_offset;
233 snew(aadata->exclusion_mask_gb[i],max_excl_offset);
235 /* Include everything by default */
236 for(j=0;j<max_excl_offset;j++)
238 /* Use all-ones to mark interactions that should be present, compatible with SSE */
239 aadata->exclusion_mask_gb[i][j] = 0xFFFFFFFF;
241 /* Go through exclusions again */
244 for(j=0;j<ilist[F_GB12].nr;j+=3)
246 a1 = ilist[F_GB12].iatoms[j+1];
247 a2 = ilist[F_GB12].iatoms[j+2];
261 if(k>0 && k<=max_offset)
263 aadata->exclusion_mask_gb[i][k-1] = 0;
269 for(j=0;j<ilist[F_GB13].nr;j+=3)
271 a1 = ilist[F_GB13].iatoms[j+1];
272 a2 = ilist[F_GB13].iatoms[j+2];
286 if(k>0 && k<=max_offset)
288 aadata->exclusion_mask_gb[i][k-1] = 0;
294 for(j=0;j<ilist[F_GB14].nr;j+=3)
296 a1 = ilist[F_GB14].iatoms[j+1];
297 a2 = ilist[F_GB14].iatoms[j+2];
311 if(k>0 && k<=max_offset)
313 aadata->exclusion_mask_gb[i][k-1] = 0;
321 aadata->jindex_gb[3*i+2] = i+1+max_offset;
327 genborn_allvsall_setup(gmx_allvsallgb2_data_t ** p_aadata,
335 gmx_allvsallgb2_data_t *aadata;
341 setup_gb_exclusions_and_indices(aadata,ilist,natoms,bInclude12,bInclude13,bInclude14);
347 genborn_allvsall_calc_still_radii(t_forcerec * fr,
349 gmx_genborn_t * born,
350 gmx_localtop_t * top,
355 gmx_allvsallgb2_data_t *aadata;
370 real vaj,ccf,dccf,theta,cosq;
371 real term,prod,icf4,icf6,gpi2,factor,sinq;
373 natoms = mdatoms->nr;
374 ni0 = mdatoms->start;
375 ni1 = mdatoms->start+mdatoms->homenr;
376 factor = 0.5*ONE_4PI_EPS0;
379 aadata = *((gmx_allvsallgb2_data_t **)work);
383 genborn_allvsall_setup(&aadata,top->idef.il,mdatoms->nr,
385 *((gmx_allvsallgb2_data_t **)work) = aadata;
389 for(i=0;i<born->nr;i++)
391 born->gpol_still_work[i] = 0;
395 for(i=ni0; i<ni1; i++)
397 /* We assume shifts are NOT used for all-vs-all interactions */
399 /* Load i atom data */
406 rai = top->atomtypes.gb_radius[mdatoms->typeA[i]];
407 vai = born->vsolv[i];
408 prod_ai = STILL_P4*vai;
410 /* Load limits for loop over neighbors */
411 nj0 = aadata->jindex_gb[3*i];
412 nj1 = aadata->jindex_gb[3*i+1];
413 nj2 = aadata->jindex_gb[3*i+2];
415 mask = aadata->exclusion_mask_gb[i];
417 /* Prologue part, including exclusion mask */
418 for(j=nj0; j<nj1; j++,mask++)
424 /* load j atom coordinates */
429 /* Calculate distance */
433 rsq = dx*dx+dy*dy+dz*dz;
435 /* Calculate 1/r and 1/r2 */
436 rinv = gmx_invsqrt(rsq);
441 raj = top->atomtypes.gb_radius[mdatoms->typeA[k]];
445 ratio = rsq / (rvdw * rvdw);
446 vaj = born->vsolv[k];
449 if(ratio>STILL_P5INV)
456 theta = ratio*STILL_PIP5;
458 term = 0.5*(1.0-cosq);
460 sinq = 1.0 - cosq*cosq;
461 dccf = 2.0*term*sinq*gmx_invsqrt(sinq)*theta;
466 icf6 = (4*ccf-dccf)*idr6;
468 born->gpol_still_work[k] += prod_ai*icf4;
471 /* Save ai->aj and aj->ai chain rule terms */
472 fr->dadx[n++] = prod*icf6;
473 fr->dadx[n++] = prod_ai*icf6;
475 /* 27 flops, plus one cos(x) - estimate at 20 flops => 47 */
480 /* Main part, no exclusions */
481 for(j=nj1; j<nj2; j++)
485 /* load j atom coordinates */
490 /* Calculate distance */
494 rsq = dx*dx+dy*dy+dz*dz;
496 /* Calculate 1/r and 1/r2 */
497 rinv = gmx_invsqrt(rsq);
502 raj = top->atomtypes.gb_radius[mdatoms->typeA[k]];
506 ratio = rsq / (rvdw * rvdw);
507 vaj = born->vsolv[k];
509 if(ratio>STILL_P5INV)
516 theta = ratio*STILL_PIP5;
518 term = 0.5*(1.0-cosq);
520 sinq = 1.0 - cosq*cosq;
521 dccf = 2.0*term*sinq*gmx_invsqrt(sinq)*theta;
526 icf6 = (4*ccf-dccf)*idr6;
528 born->gpol_still_work[k] += prod_ai*icf4;
531 /* Save ai->aj and aj->ai chain rule terms */
532 fr->dadx[n++] = prod*icf6;
533 fr->dadx[n++] = prod_ai*icf6;
535 born->gpol_still_work[i]+=gpi;
538 /* Parallel summations */
541 gmx_sum(natoms, born->gpol_still_work, cr);
544 /* Calculate the radii */
545 for(i=0;i<natoms;i++)
547 if(born->use[i] != 0)
549 gpi = born->gpol[i]+born->gpol_still_work[i];
551 born->bRad[i] = factor*gmx_invsqrt(gpi2);
552 fr->invsqrta[i] = gmx_invsqrt(born->bRad[i]);
562 genborn_allvsall_calc_hct_obc_radii(t_forcerec * fr,
564 gmx_genborn_t * born,
566 gmx_localtop_t * top,
571 gmx_allvsallgb2_data_t *aadata;
583 real rai,doffset,rai_inv,rai_inv2,sk_ai,sk2_ai,sum_ai;
584 real dr,sk,lij,dlij,lij2,lij3,uij2,uij3,diff2,uij,log_term;
585 real lij_inv,sk2,sk2_rinv,tmp,t1,t2,t3,raj_inv,sum_ai2,sum_ai3,tsum;
590 natoms = mdatoms->nr;
591 ni0 = mdatoms->start;
592 ni1 = mdatoms->start+mdatoms->homenr;
597 doffset = born->gb_doffset;
599 aadata = *((gmx_allvsallgb2_data_t **)work);
603 genborn_allvsall_setup(&aadata,top->idef.il,mdatoms->nr,
605 *((gmx_allvsallgb2_data_t **)work) = aadata;
608 for(i=0;i<born->nr;i++)
610 born->gpol_hct_work[i] = 0;
613 for(i=ni0; i<ni1; i++)
615 /* We assume shifts are NOT used for all-vs-all interactions */
617 /* Load i atom data */
622 rai = top->atomtypes.gb_radius[mdatoms->typeA[i]]-doffset;
625 sk_ai = born->param[i];
626 sk2_ai = sk_ai*sk_ai;
630 /* Load limits for loop over neighbors */
631 nj0 = aadata->jindex_gb[3*i];
632 nj1 = aadata->jindex_gb[3*i+1];
633 nj2 = aadata->jindex_gb[3*i+2];
635 mask = aadata->exclusion_mask_gb[i];
637 /* Prologue part, including exclusion mask */
638 for(j=nj0; j<nj1; j++,mask++)
644 /* load j atom coordinates */
649 /* Calculate distance */
653 rsq = dx*dx+dy*dy+dz*dz;
655 /* Calculate 1/r and 1/r2 */
656 rinv = gmx_invsqrt(rsq);
659 /* sk is precalculated in init_gb() */
661 raj = top->atomtypes.gb_radius[mdatoms->typeA[k]]-doffset;
663 /* aj -> ai interaction */
685 lij_inv = gmx_invsqrt(lij2);
688 prod = 0.25*sk2_rinv;
690 log_term = log(uij*lij_inv);
691 /* log_term = table_log(uij*lij_inv,born->log_table,LOG_TABLE_ACCURACY); */
692 tmp = lij-uij + 0.25*dr*diff2 + (0.5*rinv)*log_term + prod*(-diff2);
696 tmp = tmp + 2.0 * (rai_inv-lij);
699 t1 = 0.5*lij2 + prod*lij3 - 0.25*(lij*rinv+lij3*dr);
700 t2 = -0.5*uij2 - prod*uij3 + 0.25*(uij*rinv+uij3*dr);
702 t3 = 0.125*(1.0+sk2_rinv*rinv)*(-diff2)+0.25*log_term*rinv*rinv;
704 dadxi = (dlij*t1+t2+t3)*rinv;
713 /* ai -> aj interaction */
716 lij = 1.0/(dr-sk_ai);
729 uij = 1.0/(dr+sk_ai);
735 lij_inv = gmx_invsqrt(lij2);
736 sk2 = sk2_ai; /* sk2_ai = sk_ai * sk_ai in i loop above */
738 prod = 0.25 * sk2_rinv;
740 /* log_term = table_log(uij*lij_inv,born->log_table,LOG_TABLE_ACCURACY); */
741 log_term = log(uij*lij_inv);
743 tmp = lij-uij + 0.25*dr*diff2 + (0.5*rinv)*log_term + prod*(-diff2);
747 tmp = tmp + 2.0 * (raj_inv-lij);
750 t1 = 0.5*lij2 + prod*lij3 - 0.25*(lij*rinv+lij3*dr);
751 t2 = -0.5*uij2 - 0.25*sk2_rinv*uij3 + 0.25*(uij*rinv+uij3*dr);
752 t3 = 0.125*(1.0+sk2_rinv*rinv)*(-diff2)+0.25*log_term*rinv*rinv;
754 dadxj = (dlij*t1+t2+t3)*rinv; /* rb2 is moved to chainrule */
756 born->gpol_hct_work[k] += 0.5*tmp;
762 fr->dadx[n++] = dadxi;
763 fr->dadx[n++] = dadxj;
768 /* Main part, no exclusions */
769 for(j=nj1; j<nj2; j++)
773 /* load j atom coordinates */
778 /* Calculate distance */
782 rsq = dx*dx+dy*dy+dz*dz;
784 /* Calculate 1/r and 1/r2 */
785 rinv = gmx_invsqrt(rsq);
788 /* sk is precalculated in init_gb() */
790 raj = top->atomtypes.gb_radius[mdatoms->typeA[k]]-doffset;
792 /* aj -> ai interaction */
812 lij_inv = gmx_invsqrt(lij2);
815 prod = 0.25*sk2_rinv;
817 log_term = log(uij*lij_inv);
818 /* log_term = table_log(uij*lij_inv,born->log_table,LOG_TABLE_ACCURACY); */
819 tmp = lij-uij + 0.25*dr*diff2 + (0.5*rinv)*log_term + prod*(-diff2);
823 tmp = tmp + 2.0 * (rai_inv-lij);
827 t1 = 0.5*lij2 + prod*lij3 - 0.25*(lij*rinv+lij3*dr);
828 t2 = -0.5*uij2 - 0.25*sk2_rinv*uij3 + 0.25*(uij*rinv+uij3*dr);
829 t3 = 0.125*(1.0+sk2_rinv*rinv)*(-diff2)+0.25*log_term*rinv*rinv;
831 dadxi = (dlij*t1+t2+t3)*rinv; /* rb2 is moved to chainrule */
840 /* ai -> aj interaction */
843 lij = 1.0/(dr-sk_ai);
856 uij = 1.0/(dr+sk_ai);
862 lij_inv = gmx_invsqrt(lij2);
863 sk2 = sk2_ai; /* sk2_ai = sk_ai * sk_ai in i loop above */
865 prod = 0.25 * sk2_rinv;
867 /* log_term = table_log(uij*lij_inv,born->log_table,LOG_TABLE_ACCURACY); */
868 log_term = log(uij*lij_inv);
870 tmp = lij-uij + 0.25*dr*diff2 + (0.5*rinv)*log_term + prod*(-diff2);
874 tmp = tmp + 2.0 * (raj_inv-lij);
877 t1 = 0.5*lij2 + prod*lij3 - 0.25*(lij*rinv+lij3*dr);
878 t2 = -0.5*uij2 - 0.25*sk2_rinv*uij3 + 0.25*(uij*rinv+uij3*dr);
879 t3 = 0.125*(1.0+sk2_rinv*rinv)*(-diff2)+0.25*log_term*rinv*rinv;
881 dadxj = (dlij*t1+t2+t3)*rinv; /* rb2 is moved to chainrule */
883 born->gpol_hct_work[k] += 0.5*tmp;
889 fr->dadx[n++] = dadxi;
890 fr->dadx[n++] = dadxj;
892 born->gpol_hct_work[i]+=sum_ai;
895 /* Parallel summations */
898 gmx_sum(natoms, born->gpol_hct_work, cr);
901 if(gb_algorithm==egbHCT)
904 for(i=0;i<natoms;i++)
906 if(born->use[i] != 0)
908 rai = top->atomtypes.gb_radius[mdatoms->typeA[i]]-born->gb_doffset;
909 sum_ai = 1.0/rai - born->gpol_hct_work[i];
910 min_rad = rai + born->gb_doffset;
913 born->bRad[i] = rad > min_rad ? rad : min_rad;
914 fr->invsqrta[i] = gmx_invsqrt(born->bRad[i]);
922 /* Calculate the radii */
923 for(i=0;i<natoms;i++)
925 if(born->use[i] != 0)
927 rai = top->atomtypes.gb_radius[mdatoms->typeA[i]];
931 sum_ai = rai * born->gpol_hct_work[i];
932 sum_ai2 = sum_ai * sum_ai;
933 sum_ai3 = sum_ai2 * sum_ai;
935 tsum = tanh(born->obc_alpha*sum_ai-born->obc_beta*sum_ai2+born->obc_gamma*sum_ai3);
936 born->bRad[i] = rai_inv - tsum*rai_inv2;
937 born->bRad[i] = 1.0 / born->bRad[i];
939 fr->invsqrta[i]=gmx_invsqrt(born->bRad[i]);
941 tchain = rai * (born->obc_alpha-2*born->obc_beta*sum_ai+3*born->obc_gamma*sum_ai2);
942 born->drobc[i] = (1.0-tsum*tsum)*tchain*rai_inv2;
954 genborn_allvsall_calc_chainrule(t_forcerec * fr,
956 gmx_genborn_t * born,
962 gmx_allvsallgb2_data_t *aadata;
975 real rbai,rbaj,fgb,fgb_ai,rbi;
979 natoms = mdatoms->nr;
980 ni0 = mdatoms->start;
981 ni1 = mdatoms->start+mdatoms->homenr;
984 aadata = (gmx_allvsallgb2_data_t *)work;
989 /* Loop to get the proper form for the Born radius term */
990 if(gb_algorithm==egbSTILL)
992 for(i=0;i<natoms;i++)
995 rb[i] = (2 * rbi * rbi * fr->dvda[i])/ONE_4PI_EPS0;
998 else if(gb_algorithm==egbHCT)
1000 for(i=0;i<natoms;i++)
1002 rbi = born->bRad[i];
1003 rb[i] = rbi * rbi * fr->dvda[i];
1006 else if(gb_algorithm==egbOBC)
1008 for(idx=0;idx<natoms;idx++)
1010 rbi = born->bRad[idx];
1011 rb[idx] = rbi * rbi * born->drobc[idx] * fr->dvda[idx];
1015 for(i=ni0; i<ni1; i++)
1017 /* We assume shifts are NOT used for all-vs-all interactions */
1019 /* Load i atom data */
1030 /* Load limits for loop over neighbors */
1031 nj0 = aadata->jindex_gb[3*i];
1032 nj1 = aadata->jindex_gb[3*i+1];
1033 nj2 = aadata->jindex_gb[3*i+2];
1035 mask = aadata->exclusion_mask_gb[i];
1037 /* Prologue part, including exclusion mask */
1038 for(j=nj0; j<nj1; j++,mask++)
1044 /* load j atom coordinates */
1049 /* Calculate distance */
1056 fgb = rbai*dadx[n++];
1057 fgb_ai = rbaj*dadx[n++];
1059 /* Total force between ai and aj is the sum of ai->aj and aj->ai */
1070 /* Update force on atom aj */
1071 f[3*k] = f[3*k] - tx;
1072 f[3*k+1] = f[3*k+1] - ty;
1073 f[3*k+2] = f[3*k+2] - tz;
1077 /* Main part, no exclusions */
1078 for(j=nj1; j<nj2; j++)
1082 /* load j atom coordinates */
1087 /* Calculate distance */
1094 fgb = rbai*dadx[n++];
1095 fgb_ai = rbaj*dadx[n++];
1097 /* Total force between ai and aj is the sum of ai->aj and aj->ai */
1108 /* Update force on atom aj */
1109 f[3*k] = f[3*k] - tx;
1110 f[3*k+1] = f[3*k+1] - ty;
1111 f[3*k+2] = f[3*k+2] - tz;
1113 /* Update force and shift forces on atom ai */
1114 f[3*i] = f[3*i] + fix;
1115 f[3*i+1] = f[3*i+1] + fiy;
1116 f[3*i+2] = f[3*i+2] + fiz;