src/gromacs/mdlib/genborn_allvsall.c

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
   5  * Copyright (c) 2001-2009, The GROMACS Development Team.
   6  * Copyright (c) 2010, by the GROMACS development team, led by
   7  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   8  * and including many others, as listed in the AUTHORS file in the
   9  * top-level source directory and at http://www.gromacs.org.
  10  *
  11  * GROMACS is free software; you can redistribute it and/or
  12  * modify it under the terms of the GNU Lesser General Public License
  13  * as published by the Free Software Foundation; either version 2.1
  14  * of the License, or (at your option) any later version.
  15  *
  16  * GROMACS is distributed in the hope that it will be useful,
  17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  19  * Lesser General Public License for more details.
  20  *
  21  * You should have received a copy of the GNU Lesser General Public
  22  * License along with GROMACS; if not, see
  23  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  24  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  25  *
  26  * If you want to redistribute modifications to GROMACS, please
  27  * consider that scientific software is very special. Version
  28  * control is crucial - bugs must be traceable. We will be happy to
  29  * consider code for inclusion in the official distribution, but
  30  * derived work must not be called official GROMACS. Details are found
  31  * in the README & COPYING files - if they are missing, get the
  32  * official version at http://www.gromacs.org.
  33  *
  34  * To help us fund GROMACS development, we humbly ask that you cite
  35  * the research papers on the package. Check out http://www.gromacs.org.
  36  */
  37 #ifdef HAVE_CONFIG_H
  38 #include <config.h>
  39 #endif
  40
  41 #include <math.h>
  42 #include "types/simple.h"
  43
  44 #include "vec.h"
  45 #include "smalloc.h"
  46
  47 #include "partdec.h"
  48 #include "network.h"
  49 #include "physics.h"
  50 #include "genborn.h"
  51 #include "genborn_allvsall.h"
  52
  53
  54 typedef struct
  55 {
  56     int *      jindex_gb;
  57     int **     exclusion_mask_gb;
  58 }
  59 gmx_allvsallgb2_data_t;
  60
  61 static int
  62 calc_maxoffset(int i, int natoms)
  63 {
  64     int maxoffset;
  65
  66     if ((natoms % 2) == 1)
  67     {
  68         /* Odd number of atoms, easy */
  69         maxoffset = natoms/2;
  70     }
  71     else if ((natoms % 4) == 0)
  72     {
  73         /* Multiple of four is hard */
  74         if (i < natoms/2)
  75         {
  76             if ((i % 2) == 0)
  77             {
  78                 maxoffset = natoms/2;
  79             }
  80             else
  81             {
  82                 maxoffset = natoms/2-1;
  83             }
  84         }
  85         else
  86         {
  87             if ((i % 2) == 1)
  88             {
  89                 maxoffset = natoms/2;
  90             }
  91             else
  92             {
  93                 maxoffset = natoms/2-1;
  94             }
  95         }
  96     }
  97     else
  98     {
  99         /* natoms/2 = odd */
 100         if ((i % 2) == 0)
 101         {
 102             maxoffset = natoms/2;
 103         }
 104         else
 105         {
 106             maxoffset = natoms/2-1;
 107         }
 108     }
 109
 110     return maxoffset;
 111 }
 112
 113 static void
 114 setup_gb_exclusions_and_indices(gmx_allvsallgb2_data_t     *   aadata,
 115                                 t_ilist     *                  ilist,
 116                                 int                            natoms,
 117                                 gmx_bool                       bInclude12,
 118                                 gmx_bool                       bInclude13,
 119                                 gmx_bool                       bInclude14)
 120 {
 121     int i, j, k, tp;
 122     int a1, a2;
 123     int nj0, nj1;
 124     int max_offset;
 125     int max_excl_offset;
 126     int nj;
 127
 128     /* This routine can appear to be a bit complex, but it is mostly book-keeping.
 129      * To enable the fast all-vs-all kernel we need to be able to stream through all coordinates
 130      * whether they should interact or not.
 131      *
 132      * To avoid looping over the exclusions, we create a simple mask that is 1 if the interaction
 133      * should be present, otherwise 0. Since exclusions typically only occur when i & j are close,
 134      * we create a jindex array with three elements per i atom: the starting point, the point to
 135      * which we need to check exclusions, and the end point.
 136      * This way we only have to allocate a short exclusion mask per i atom.
 137      */
 138
 139     /* Allocate memory for jindex arrays */
 140     snew(aadata->jindex_gb, 3*natoms);
 141
 142     /* Pointer to lists with exclusion masks */
 143     snew(aadata->exclusion_mask_gb, natoms);
 144
 145     for (i = 0; i < natoms; i++)
 146     {
 147         /* Start */
 148         aadata->jindex_gb[3*i]       = i+1;
 149         max_offset                   = calc_maxoffset(i, natoms);
 150
 151         /* first check the max range of atoms to EXCLUDE */
 152         max_excl_offset = 0;
 153         if (!bInclude12)
 154         {
 155             for (j = 0; j < ilist[F_GB12].nr; j += 3)
 156             {
 157                 a1 = ilist[F_GB12].iatoms[j+1];
 158                 a2 = ilist[F_GB12].iatoms[j+2];
 159
 160                 if (a1 == i)
 161                 {
 162                     k = a2-a1;
 163                 }
 164                 else if (a2 == i)
 165                 {
 166                     k = a1+natoms-a2;
 167                 }
 168                 else
 169                 {
 170                     continue;
 171                 }
 172                 if (k > 0 && k <= max_offset)
 173                 {
 174                     max_excl_offset = (k > max_excl_offset) ? k : max_excl_offset;
 175                 }
 176             }
 177         }
 178         if (!bInclude13)
 179         {
 180             for (j = 0; j < ilist[F_GB13].nr; j += 3)
 181             {
 182                 a1 = ilist[F_GB13].iatoms[j+1];
 183                 a2 = ilist[F_GB13].iatoms[j+2];
 184
 185
 186                 if (a1 == i)
 187                 {
 188                     k = a2-a1;
 189                 }
 190                 else if (a2 == i)
 191                 {
 192                     k = a1+natoms-a2;
 193                 }
 194                 else
 195                 {
 196                     continue;
 197                 }
 198                 if (k > 0 && k <= max_offset)
 199                 {
 200                     max_excl_offset = (k > max_excl_offset) ? k : max_excl_offset;
 201                 }
 202             }
 203         }
 204         if (!bInclude14)
 205         {
 206             for (j = 0; j < ilist[F_GB14].nr; j += 3)
 207             {
 208                 a1 = ilist[F_GB14].iatoms[j+1];
 209                 a2 = ilist[F_GB14].iatoms[j+2];
 210
 211
 212                 if (a1 == i)
 213                 {
 214                     k = a2-a1;
 215                 }
 216                 else if (a2 == i)
 217                 {
 218                     k = a1+natoms-a2;
 219                 }
 220                 else
 221                 {
 222                     continue;
 223                 }
 224                 if (k > 0 && k <= max_offset)
 225                 {
 226                     max_excl_offset = (k > max_excl_offset) ? k : max_excl_offset;
 227                 }
 228             }
 229         }
 230         max_excl_offset = (max_offset < max_excl_offset) ? max_offset : max_excl_offset;
 231
 232         aadata->jindex_gb[3*i+1] = i+1+max_excl_offset;
 233
 234         snew(aadata->exclusion_mask_gb[i], max_excl_offset);
 235
 236         /* Include everything by default */
 237         for (j = 0; j < max_excl_offset; j++)
 238         {
 239             /* Use all-ones to mark interactions that should be present, compatible with SSE */
 240             aadata->exclusion_mask_gb[i][j] = 0xFFFFFFFF;
 241         }
 242         /* Go through exclusions again */
 243         if (!bInclude12)
 244         {
 245             for (j = 0; j < ilist[F_GB12].nr; j += 3)
 246             {
 247                 a1 = ilist[F_GB12].iatoms[j+1];
 248                 a2 = ilist[F_GB12].iatoms[j+2];
 249
 250                 if (a1 == i)
 251                 {
 252                     k = a2-a1;
 253                 }
 254                 else if (a2 == i)
 255                 {
 256                     k = a1+natoms-a2;
 257                 }
 258                 else
 259                 {
 260                     continue;
 261                 }
 262                 if (k > 0 && k <= max_offset)
 263                 {
 264                     aadata->exclusion_mask_gb[i][k-1] = 0;
 265                 }
 266             }
 267         }
 268         if (!bInclude13)
 269         {
 270             for (j = 0; j < ilist[F_GB13].nr; j += 3)
 271             {
 272                 a1 = ilist[F_GB13].iatoms[j+1];
 273                 a2 = ilist[F_GB13].iatoms[j+2];
 274
 275                 if (a1 == i)
 276                 {
 277                     k = a2-a1;
 278                 }
 279                 else if (a2 == i)
 280                 {
 281                     k = a1+natoms-a2;
 282                 }
 283                 else
 284                 {
 285                     continue;
 286                 }
 287                 if (k > 0 && k <= max_offset)
 288                 {
 289                     aadata->exclusion_mask_gb[i][k-1] = 0;
 290                 }
 291             }
 292         }
 293         if (!bInclude14)
 294         {
 295             for (j = 0; j < ilist[F_GB14].nr; j += 3)
 296             {
 297                 a1 = ilist[F_GB14].iatoms[j+1];
 298                 a2 = ilist[F_GB14].iatoms[j+2];
 299
 300                 if (a1 == i)
 301                 {
 302                     k = a2-a1;
 303                 }
 304                 else if (a2 == i)
 305                 {
 306                     k = a1+natoms-a2;
 307                 }
 308                 else
 309                 {
 310                     continue;
 311                 }
 312                 if (k > 0 && k <= max_offset)
 313                 {
 314                     aadata->exclusion_mask_gb[i][k-1] = 0;
 315                 }
 316             }
 317         }
 318
 319         /* End */
 320
 321         /* End */
 322         aadata->jindex_gb[3*i+2] = i+1+max_offset;
 323     }
 324 }
 325
 326
 327 static void
 328 genborn_allvsall_setup(gmx_allvsallgb2_data_t     **  p_aadata,
 329                        t_ilist     *                  ilist,
 330                        int                            natoms,
 331                        gmx_bool                       bInclude12,
 332                        gmx_bool                       bInclude13,
 333                        gmx_bool                       bInclude14)
 334 {
 335     int                     i, j, idx;
 336     gmx_allvsallgb2_data_t *aadata;
 337     real                   *p;
 338
 339     snew(aadata, 1);
 340     *p_aadata = aadata;
 341
 342     setup_gb_exclusions_and_indices(aadata, ilist, natoms, bInclude12, bInclude13, bInclude14);
 343 }
 344
 345
 346
 347 int
 348 genborn_allvsall_calc_still_radii(t_forcerec *           fr,
 349                                   t_mdatoms *            mdatoms,
 350                                   gmx_genborn_t *        born,
 351                                   gmx_localtop_t *       top,
 352                                   real *                 x,
 353                                   t_commrec *            cr,
 354                                   void *                 work)
 355 {
 356     gmx_allvsallgb2_data_t *aadata;
 357     int                     natoms;
 358     int                     ni0, ni1;
 359     int                     nj0, nj1, nj2;
 360     int                     i, j, k, n;
 361     int              *      mask;
 362
 363     real                    ix, iy, iz;
 364     real                    jx, jy, jz;
 365     real                    dx, dy, dz;
 366     real                    rsq, rinv;
 367     real                    gpi, rai, vai;
 368     real                    prod_ai;
 369     real                    irsq, idr4, idr6;
 370     real                    raj, rvdw, ratio;
 371     real                    vaj, ccf, dccf, theta, cosq;
 372     real                    term, prod, icf4, icf6, gpi2, factor, sinq;
 373
 374     natoms              = mdatoms->nr;
 375     ni0                 = mdatoms->start;
 376     ni1                 = mdatoms->start+mdatoms->homenr;
 377     factor              = 0.5*ONE_4PI_EPS0;
 378     n                   = 0;
 379
 380     aadata = *((gmx_allvsallgb2_data_t **)work);
 381
 382     if (aadata == NULL)
 383     {
 384         genborn_allvsall_setup(&aadata, top->idef.il, mdatoms->nr,
 385                                FALSE, FALSE, TRUE);
 386         *((gmx_allvsallgb2_data_t **)work) = aadata;
 387     }
 388
 389
 390     for (i = 0; i < born->nr; i++)
 391     {
 392         born->gpol_still_work[i] = 0;
 393     }
 394
 395
 396     for (i = ni0; i < ni1; i++)
 397     {
 398         /* We assume shifts are NOT used for all-vs-all interactions */
 399
 400         /* Load i atom data */
 401         ix                = x[3*i];
 402         iy                = x[3*i+1];
 403         iz                = x[3*i+2];
 404
 405         gpi               = 0.0;
 406
 407         rai     = top->atomtypes.gb_radius[mdatoms->typeA[i]];
 408         vai     = born->vsolv[i];
 409         prod_ai = STILL_P4*vai;
 410
 411         /* Load limits for loop over neighbors */
 412         nj0              = aadata->jindex_gb[3*i];
 413         nj1              = aadata->jindex_gb[3*i+1];
 414         nj2              = aadata->jindex_gb[3*i+2];
 415
 416         mask             = aadata->exclusion_mask_gb[i];
 417
 418         /* Prologue part, including exclusion mask */
 419         for (j = nj0; j < nj1; j++, mask++)
 420         {
 421             if (*mask != 0)
 422             {
 423                 k = j%natoms;
 424
 425                 /* load j atom coordinates */
 426                 jx                = x[3*k];
 427                 jy                = x[3*k+1];
 428                 jz                = x[3*k+2];
 429
 430                 /* Calculate distance */
 431                 dx                = ix - jx;
 432                 dy                = iy - jy;
 433                 dz                = iz - jz;
 434                 rsq               = dx*dx+dy*dy+dz*dz;
 435
 436                 /* Calculate 1/r and 1/r2 */
 437                 rinv              = gmx_invsqrt(rsq);
 438                 irsq              = rinv*rinv;
 439                 idr4              = irsq*irsq;
 440                 idr6              = idr4*irsq;
 441
 442                 raj = top->atomtypes.gb_radius[mdatoms->typeA[k]];
 443
 444                 rvdw  = rai + raj;
 445
 446                 ratio = rsq / (rvdw * rvdw);
 447                 vaj   = born->vsolv[k];
 448
 449
 450                 if (ratio > STILL_P5INV)
 451                 {
 452                     ccf  = 1.0;
 453                     dccf = 0.0;
 454                 }
 455                 else
 456                 {
 457                     theta = ratio*STILL_PIP5;
 458                     cosq  = cos(theta);
 459                     term  = 0.5*(1.0-cosq);
 460                     ccf   = term*term;
 461                     sinq  = 1.0 - cosq*cosq;
 462                     dccf  = 2.0*term*sinq*gmx_invsqrt(sinq)*theta;
 463                 }
 464
 465                 prod          = STILL_P4*vaj;
 466                 icf4          = ccf*idr4;
 467                 icf6          = (4*ccf-dccf)*idr6;
 468
 469                 born->gpol_still_work[k] += prod_ai*icf4;
 470                 gpi                       = gpi+prod*icf4;
 471
 472                 /* Save ai->aj and aj->ai chain rule terms */
 473                 fr->dadx[n++]   = prod*icf6;
 474                 fr->dadx[n++]   = prod_ai*icf6;
 475
 476                 /* 27 flops, plus one cos(x) - estimate at 20 flops  => 47 */
 477
 478             }
 479         }
 480
 481         /* Main part, no exclusions */
 482         for (j = nj1; j < nj2; j++)
 483         {
 484             k = j%natoms;
 485
 486             /* load j atom coordinates */
 487             jx                = x[3*k];
 488             jy                = x[3*k+1];
 489             jz                = x[3*k+2];
 490
 491             /* Calculate distance */
 492             dx                = ix - jx;
 493             dy                = iy - jy;
 494             dz                = iz - jz;
 495             rsq               = dx*dx+dy*dy+dz*dz;
 496
 497             /* Calculate 1/r and 1/r2 */
 498             rinv              = gmx_invsqrt(rsq);
 499             irsq              = rinv*rinv;
 500             idr4              = irsq*irsq;
 501             idr6              = idr4*irsq;
 502
 503             raj = top->atomtypes.gb_radius[mdatoms->typeA[k]];
 504
 505             rvdw  = rai + raj;
 506
 507             ratio = rsq / (rvdw * rvdw);
 508             vaj   = born->vsolv[k];
 509
 510             if (ratio > STILL_P5INV)
 511             {
 512                 ccf  = 1.0;
 513                 dccf = 0.0;
 514             }
 515             else
 516             {
 517                 theta = ratio*STILL_PIP5;
 518                 cosq  = cos(theta);
 519                 term  = 0.5*(1.0-cosq);
 520                 ccf   = term*term;
 521                 sinq  = 1.0 - cosq*cosq;
 522                 dccf  = 2.0*term*sinq*gmx_invsqrt(sinq)*theta;
 523             }
 524
 525             prod          = STILL_P4*vaj;
 526             icf4          = ccf*idr4;
 527             icf6          = (4*ccf-dccf)*idr6;
 528
 529             born->gpol_still_work[k] += prod_ai*icf4;
 530             gpi                       = gpi+prod*icf4;
 531
 532             /* Save ai->aj and aj->ai chain rule terms */
 533             fr->dadx[n++]   = prod*icf6;
 534             fr->dadx[n++]   = prod_ai*icf6;
 535         }
 536         born->gpol_still_work[i] += gpi;
 537     }
 538
 539     /* Parallel summations */
 540     if (PARTDECOMP(cr))
 541     {
 542         gmx_sum(natoms, born->gpol_still_work, cr);
 543     }
 544
 545     /* Calculate the radii */
 546     for (i = 0; i < natoms; i++)
 547     {
 548         if (born->use[i] != 0)
 549         {
 550             gpi             = born->gpol[i]+born->gpol_still_work[i];
 551             gpi2            = gpi * gpi;
 552             born->bRad[i]   = factor*gmx_invsqrt(gpi2);
 553             fr->invsqrta[i] = gmx_invsqrt(born->bRad[i]);
 554         }
 555     }
 556
 557     return 0;
 558 }
 559
 560
 561
 562 int
 563 genborn_allvsall_calc_hct_obc_radii(t_forcerec *           fr,
 564                                     t_mdatoms *            mdatoms,
 565                                     gmx_genborn_t *        born,
 566                                     int                    gb_algorithm,
 567                                     gmx_localtop_t *       top,
 568                                     real *                 x,
 569                                     t_commrec *            cr,
 570                                     void *                 work)
 571 {
 572     gmx_allvsallgb2_data_t *aadata;
 573     int                     natoms;
 574     int                     ni0, ni1;
 575     int                     nj0, nj1, nj2;
 576     int                     i, j, k, n;
 577     int              *      mask;
 578
 579     real                    ix, iy, iz;
 580     real                    jx, jy, jz;
 581     real                    dx, dy, dz;
 582     real                    rsq, rinv;
 583     real                    prod, raj;
 584     real                    rai, doffset, rai_inv, rai_inv2, sk_ai, sk2_ai, sum_ai;
 585     real                    dr, sk, lij, dlij, lij2, lij3, uij2, uij3, diff2, uij, log_term;
 586     real                    lij_inv, sk2, sk2_rinv, tmp, t1, t2, t3, raj_inv, sum_ai2, sum_ai3, tsum;
 587     real                    tchain;
 588     real                    dadxi, dadxj;
 589     real                    rad, min_rad;
 590
 591     natoms              = mdatoms->nr;
 592     ni0                 = mdatoms->start;
 593     ni1                 = mdatoms->start+mdatoms->homenr;
 594
 595     n       = 0;
 596     prod    = 0;
 597     raj     = 0;
 598     doffset = born->gb_doffset;
 599
 600     aadata = *((gmx_allvsallgb2_data_t **)work);
 601
 602     if (aadata == NULL)
 603     {
 604         genborn_allvsall_setup(&aadata, top->idef.il, mdatoms->nr,
 605                                TRUE, TRUE, TRUE);
 606         *((gmx_allvsallgb2_data_t **)work) = aadata;
 607     }
 608
 609     for (i = 0; i < born->nr; i++)
 610     {
 611         born->gpol_hct_work[i] = 0;
 612     }
 613
 614     for (i = ni0; i < ni1; i++)
 615     {
 616         /* We assume shifts are NOT used for all-vs-all interactions */
 617
 618         /* Load i atom data */
 619         ix                = x[3*i];
 620         iy                = x[3*i+1];
 621         iz                = x[3*i+2];
 622
 623         rai      = top->atomtypes.gb_radius[mdatoms->typeA[i]]-doffset;
 624         rai_inv  = 1.0/rai;
 625
 626         sk_ai    = born->param[i];
 627         sk2_ai   = sk_ai*sk_ai;
 628
 629         sum_ai   = 0;
 630
 631         /* Load limits for loop over neighbors */
 632         nj0              = aadata->jindex_gb[3*i];
 633         nj1              = aadata->jindex_gb[3*i+1];
 634         nj2              = aadata->jindex_gb[3*i+2];
 635
 636         mask             = aadata->exclusion_mask_gb[i];
 637
 638         /* Prologue part, including exclusion mask */
 639         for (j = nj0; j < nj1; j++, mask++)
 640         {
 641             if (*mask != 0)
 642             {
 643                 k = j%natoms;
 644
 645                 /* load j atom coordinates */
 646                 jx                = x[3*k];
 647                 jy                = x[3*k+1];
 648                 jz                = x[3*k+2];
 649
 650                 /* Calculate distance */
 651                 dx                = ix - jx;
 652                 dy                = iy - jy;
 653                 dz                = iz - jz;
 654                 rsq               = dx*dx+dy*dy+dz*dz;
 655
 656                 /* Calculate 1/r and 1/r2 */
 657                 rinv              = gmx_invsqrt(rsq);
 658                 dr                = rsq*rinv;
 659
 660                 /* sk is precalculated in init_gb() */
 661                 sk    = born->param[k];
 662                 raj   = top->atomtypes.gb_radius[mdatoms->typeA[k]]-doffset;
 663
 664                 /* aj -> ai interaction */
 665
 666
 667                 if (rai < dr+sk)
 668                 {
 669                     lij       = 1.0/(dr-sk);
 670                     dlij      = 1.0;
 671
 672                     if (rai > dr-sk)
 673                     {
 674                         lij  = rai_inv;
 675                         dlij = 0.0;
 676                     }
 677
 678                     uij      = 1.0/(dr+sk);
 679                     lij2     = lij  * lij;
 680                     lij3     = lij2 * lij;
 681                     uij2     = uij  * uij;
 682                     uij3     = uij2 * uij;
 683
 684                     diff2    = uij2-lij2;
 685
 686                     lij_inv  = gmx_invsqrt(lij2);
 687                     sk2      = sk*sk;
 688                     sk2_rinv = sk2*rinv;
 689                     prod     = 0.25*sk2_rinv;
 690
 691                     log_term = log(uij*lij_inv);
 692                     /* log_term = table_log(uij*lij_inv,born->log_table,LOG_TABLE_ACCURACY); */
 693                     tmp      = lij-uij + 0.25*dr*diff2 + (0.5*rinv)*log_term + prod*(-diff2);
 694
 695                     if (rai < sk-dr)
 696                     {
 697                         tmp = tmp + 2.0 * (rai_inv-lij);
 698                     }
 699
 700                     t1      = 0.5*lij2 + prod*lij3 - 0.25*(lij*rinv+lij3*dr);
 701                     t2      = -0.5*uij2 - prod*uij3 + 0.25*(uij*rinv+uij3*dr);
 702
 703                     t3      = 0.125*(1.0+sk2_rinv*rinv)*(-diff2)+0.25*log_term*rinv*rinv;
 704
 705                     dadxi = (dlij*t1+t2+t3)*rinv;
 706
 707                     sum_ai += 0.5*tmp;
 708                 }
 709                 else
 710                 {
 711                     dadxi = 0.0;
 712                 }
 713
 714                 /* ai -> aj interaction */
 715                 if (raj < dr + sk_ai)
 716                 {
 717                     lij     = 1.0/(dr-sk_ai);
 718                     dlij    = 1.0;
 719                     raj_inv = 1.0/raj;
 720
 721                     if (raj > dr-sk_ai)
 722                     {
 723                         lij  = raj_inv;
 724                         dlij = 0.0;
 725                     }
 726
 727                     lij2     = lij  * lij;
 728                     lij3     = lij2 * lij;
 729
 730                     uij      = 1.0/(dr+sk_ai);
 731                     uij2     = uij  * uij;
 732                     uij3     = uij2 * uij;
 733
 734                     diff2    = uij2-lij2;
 735
 736                     lij_inv  = gmx_invsqrt(lij2);
 737                     sk2      =  sk2_ai; /* sk2_ai = sk_ai * sk_ai in i loop above */
 738                     sk2_rinv = sk2*rinv;
 739                     prod     = 0.25 * sk2_rinv;
 740
 741                     /* log_term = table_log(uij*lij_inv,born->log_table,LOG_TABLE_ACCURACY); */
 742                     log_term = log(uij*lij_inv);
 743
 744                     tmp      = lij-uij + 0.25*dr*diff2 + (0.5*rinv)*log_term + prod*(-diff2);
 745
 746                     if (raj < sk_ai-dr)
 747                     {
 748                         tmp     = tmp + 2.0 * (raj_inv-lij);
 749                     }
 750
 751                     t1      = 0.5*lij2 + prod*lij3 - 0.25*(lij*rinv+lij3*dr);
 752                     t2      = -0.5*uij2 - 0.25*sk2_rinv*uij3 + 0.25*(uij*rinv+uij3*dr);
 753                     t3      = 0.125*(1.0+sk2_rinv*rinv)*(-diff2)+0.25*log_term*rinv*rinv;
 754
 755                     dadxj = (dlij*t1+t2+t3)*rinv; /* rb2 is moved to chainrule  */
 756
 757                     born->gpol_hct_work[k] += 0.5*tmp;
 758                 }
 759                 else
 760                 {
 761                     dadxj = 0.0;
 762                 }
 763                 fr->dadx[n++] = dadxi;
 764                 fr->dadx[n++] = dadxj;
 765
 766             }
 767         }
 768
 769         /* Main part, no exclusions */
 770         for (j = nj1; j < nj2; j++)
 771         {
 772             k = j%natoms;
 773
 774             /* load j atom coordinates */
 775             jx                = x[3*k];
 776             jy                = x[3*k+1];
 777             jz                = x[3*k+2];
 778
 779             /* Calculate distance */
 780             dx                = ix - jx;
 781             dy                = iy - jy;
 782             dz                = iz - jz;
 783             rsq               = dx*dx+dy*dy+dz*dz;
 784
 785             /* Calculate 1/r and 1/r2 */
 786             rinv              = gmx_invsqrt(rsq);
 787             dr                = rsq*rinv;
 788
 789             /* sk is precalculated in init_gb() */
 790             sk    = born->param[k];
 791             raj   = top->atomtypes.gb_radius[mdatoms->typeA[k]]-doffset;
 792
 793             /* aj -> ai interaction */
 794             if (rai < dr+sk)
 795             {
 796                 lij       = 1.0/(dr-sk);
 797                 dlij      = 1.0;
 798
 799                 if (rai > dr-sk)
 800                 {
 801                     lij  = rai_inv;
 802                     dlij = 0.0;
 803                 }
 804
 805                 uij      = 1.0/(dr+sk);
 806                 lij2     = lij  * lij;
 807                 lij3     = lij2 * lij;
 808                 uij2     = uij  * uij;
 809                 uij3     = uij2 * uij;
 810
 811                 diff2    = uij2-lij2;
 812
 813                 lij_inv  = gmx_invsqrt(lij2);
 814                 sk2      = sk*sk;
 815                 sk2_rinv = sk2*rinv;
 816                 prod     = 0.25*sk2_rinv;
 817
 818                 log_term = log(uij*lij_inv);
 819                 /* log_term = table_log(uij*lij_inv,born->log_table,LOG_TABLE_ACCURACY); */
 820                 tmp      = lij-uij + 0.25*dr*diff2 + (0.5*rinv)*log_term + prod*(-diff2);
 821
 822                 if (rai < sk-dr)
 823                 {
 824                     tmp = tmp + 2.0 * (rai_inv-lij);
 825                 }
 826
 827                 /* duij    = 1.0; */
 828                 t1      = 0.5*lij2 + prod*lij3 - 0.25*(lij*rinv+lij3*dr);
 829                 t2      = -0.5*uij2 - 0.25*sk2_rinv*uij3 + 0.25*(uij*rinv+uij3*dr);
 830                 t3      = 0.125*(1.0+sk2_rinv*rinv)*(-diff2)+0.25*log_term*rinv*rinv;
 831
 832                 dadxi = (dlij*t1+t2+t3)*rinv; /* rb2 is moved to chainrule      */
 833
 834                 sum_ai += 0.5*tmp;
 835             }
 836             else
 837             {
 838                 dadxi = 0.0;
 839             }
 840
 841             /* ai -> aj interaction */
 842             if (raj < dr + sk_ai)
 843             {
 844                 lij     = 1.0/(dr-sk_ai);
 845                 dlij    = 1.0;
 846                 raj_inv = 1.0/raj;
 847
 848                 if (raj > dr-sk_ai)
 849                 {
 850                     lij  = raj_inv;
 851                     dlij = 0.0;
 852                 }
 853
 854                 lij2     = lij  * lij;
 855                 lij3     = lij2 * lij;
 856
 857                 uij      = 1.0/(dr+sk_ai);
 858                 uij2     = uij  * uij;
 859                 uij3     = uij2 * uij;
 860
 861                 diff2    = uij2-lij2;
 862
 863                 lij_inv  = gmx_invsqrt(lij2);
 864                 sk2      =  sk2_ai; /* sk2_ai = sk_ai * sk_ai in i loop above */
 865                 sk2_rinv = sk2*rinv;
 866                 prod     = 0.25 * sk2_rinv;
 867
 868                 /* log_term = table_log(uij*lij_inv,born->log_table,LOG_TABLE_ACCURACY); */
 869                 log_term = log(uij*lij_inv);
 870
 871                 tmp      = lij-uij + 0.25*dr*diff2 + (0.5*rinv)*log_term + prod*(-diff2);
 872
 873                 if (raj < sk_ai-dr)
 874                 {
 875                     tmp     = tmp + 2.0 * (raj_inv-lij);
 876                 }
 877
 878                 t1      = 0.5*lij2 + prod*lij3 - 0.25*(lij*rinv+lij3*dr);
 879                 t2      = -0.5*uij2 - 0.25*sk2_rinv*uij3 + 0.25*(uij*rinv+uij3*dr);
 880                 t3      = 0.125*(1.0+sk2_rinv*rinv)*(-diff2)+0.25*log_term*rinv*rinv;
 881
 882                 dadxj = (dlij*t1+t2+t3)*rinv; /* rb2 is moved to chainrule      */
 883
 884                 born->gpol_hct_work[k] += 0.5*tmp;
 885             }
 886             else
 887             {
 888                 dadxj = 0.0;
 889             }
 890             fr->dadx[n++] = dadxi;
 891             fr->dadx[n++] = dadxj;
 892         }
 893         born->gpol_hct_work[i] += sum_ai;
 894     }
 895
 896     /* Parallel summations */
 897     if (PARTDECOMP(cr))
 898     {
 899         gmx_sum(natoms, born->gpol_hct_work, cr);
 900     }
 901
 902     if (gb_algorithm == egbHCT)
 903     {
 904         /* HCT */
 905         for (i = 0; i < natoms; i++)
 906         {
 907             if (born->use[i] != 0)
 908             {
 909                 rai     = top->atomtypes.gb_radius[mdatoms->typeA[i]]-born->gb_doffset;
 910                 sum_ai  = 1.0/rai - born->gpol_hct_work[i];
 911                 min_rad = rai + born->gb_doffset;
 912                 rad     = 1.0/sum_ai;
 913
 914                 born->bRad[i]   = rad > min_rad ? rad : min_rad;
 915                 fr->invsqrta[i] = gmx_invsqrt(born->bRad[i]);
 916             }
 917         }
 918
 919     }
 920     else
 921     {
 922         /* OBC */
 923         /* Calculate the radii */
 924         for (i = 0; i < natoms; i++)
 925         {
 926             if (born->use[i] != 0)
 927             {
 928                 rai        = top->atomtypes.gb_radius[mdatoms->typeA[i]];
 929                 rai_inv2   = 1.0/rai;
 930                 rai        = rai-doffset;
 931                 rai_inv    = 1.0/rai;
 932                 sum_ai     = rai * born->gpol_hct_work[i];
 933                 sum_ai2    = sum_ai  * sum_ai;
 934                 sum_ai3    = sum_ai2 * sum_ai;
 935
 936                 tsum          = tanh(born->obc_alpha*sum_ai-born->obc_beta*sum_ai2+born->obc_gamma*sum_ai3);
 937                 born->bRad[i] = rai_inv - tsum*rai_inv2;
 938                 born->bRad[i] = 1.0 / born->bRad[i];
 939
 940                 fr->invsqrta[i] = gmx_invsqrt(born->bRad[i]);
 941
 942                 tchain         = rai * (born->obc_alpha-2*born->obc_beta*sum_ai+3*born->obc_gamma*sum_ai2);
 943                 born->drobc[i] = (1.0-tsum*tsum)*tchain*rai_inv2;
 944             }
 945         }
 946     }
 947     return 0;
 948 }
 949
 950
 951
 952
 953
 954 int
 955 genborn_allvsall_calc_chainrule(t_forcerec *           fr,
 956                                 t_mdatoms *            mdatoms,
 957                                 gmx_genborn_t *        born,
 958                                 real *                 x,
 959                                 real *                 f,
 960                                 int                    gb_algorithm,
 961                                 void *                 work)
 962 {
 963     gmx_allvsallgb2_data_t *aadata;
 964     int                     natoms;
 965     int                     ni0, ni1;
 966     int                     nj0, nj1, nj2;
 967     int                     i, j, k, n;
 968     int                     idx;
 969     int              *      mask;
 970
 971     real                    ix, iy, iz;
 972     real                    fix, fiy, fiz;
 973     real                    jx, jy, jz;
 974     real                    dx, dy, dz;
 975     real                    tx, ty, tz;
 976     real                    rbai, rbaj, fgb, fgb_ai, rbi;
 977     real              *     rb;
 978     real              *     dadx;
 979
 980     natoms              = mdatoms->nr;
 981     ni0                 = mdatoms->start;
 982     ni1                 = mdatoms->start+mdatoms->homenr;
 983     dadx                = fr->dadx;
 984
 985     aadata = (gmx_allvsallgb2_data_t *)work;
 986
 987     n  = 0;
 988     rb = born->work;
 989
 990     /* Loop to get the proper form for the Born radius term */
 991     if (gb_algorithm == egbSTILL)
 992     {
 993         for (i = 0; i < natoms; i++)
 994         {
 995             rbi   = born->bRad[i];
 996             rb[i] = (2 * rbi * rbi * fr->dvda[i])/ONE_4PI_EPS0;
 997         }
 998     }
 999     else if (gb_algorithm == egbHCT)
1000     {
1001         for (i = 0; i < natoms; i++)
1002         {
1003             rbi   = born->bRad[i];
1004             rb[i] = rbi * rbi * fr->dvda[i];
1005         }
1006     }
1007     else if (gb_algorithm == egbOBC)
1008     {
1009         for (idx = 0; idx < natoms; idx++)
1010         {
1011             rbi     = born->bRad[idx];
1012             rb[idx] = rbi * rbi * born->drobc[idx] * fr->dvda[idx];
1013         }
1014     }
1015
1016     for (i = ni0; i < ni1; i++)
1017     {
1018         /* We assume shifts are NOT used for all-vs-all interactions */
1019
1020         /* Load i atom data */
1021         ix                = x[3*i];
1022         iy                = x[3*i+1];
1023         iz                = x[3*i+2];
1024
1025         fix               = 0;
1026         fiy               = 0;
1027         fiz               = 0;
1028
1029         rbai              = rb[i];
1030
1031         /* Load limits for loop over neighbors */
1032         nj0              = aadata->jindex_gb[3*i];
1033         nj1              = aadata->jindex_gb[3*i+1];
1034         nj2              = aadata->jindex_gb[3*i+2];
1035
1036         mask             = aadata->exclusion_mask_gb[i];
1037
1038         /* Prologue part, including exclusion mask */
1039         for (j = nj0; j < nj1; j++, mask++)
1040         {
1041             if (*mask != 0)
1042             {
1043                 k = j%natoms;
1044
1045                 /* load j atom coordinates */
1046                 jx                = x[3*k];
1047                 jy                = x[3*k+1];
1048                 jz                = x[3*k+2];
1049
1050                 /* Calculate distance */
1051                 dx                = ix - jx;
1052                 dy                = iy - jy;
1053                 dz                = iz - jz;
1054
1055                 rbaj              = rb[k];
1056
1057                 fgb     = rbai*dadx[n++];
1058                 fgb_ai  = rbaj*dadx[n++];
1059
1060                 /* Total force between ai and aj is the sum of ai->aj and aj->ai */
1061                 fgb     = fgb + fgb_ai;
1062
1063                 tx      = fgb * dx;
1064                 ty      = fgb * dy;
1065                 tz      = fgb * dz;
1066
1067                 fix     = fix + tx;
1068                 fiy     = fiy + ty;
1069                 fiz     = fiz + tz;
1070
1071                 /* Update force on atom aj */
1072                 f[3*k]   = f[3*k] - tx;
1073                 f[3*k+1] = f[3*k+1] - ty;
1074                 f[3*k+2] = f[3*k+2] - tz;
1075             }
1076         }
1077
1078         /* Main part, no exclusions */
1079         for (j = nj1; j < nj2; j++)
1080         {
1081             k = j%natoms;
1082
1083             /* load j atom coordinates */
1084             jx                = x[3*k];
1085             jy                = x[3*k+1];
1086             jz                = x[3*k+2];
1087
1088             /* Calculate distance */
1089             dx                = ix - jx;
1090             dy                = iy - jy;
1091             dz                = iz - jz;
1092
1093             rbaj              = rb[k];
1094
1095             fgb     = rbai*dadx[n++];
1096             fgb_ai  = rbaj*dadx[n++];
1097
1098             /* Total force between ai and aj is the sum of ai->aj and aj->ai */
1099             fgb     = fgb + fgb_ai;
1100
1101             tx      = fgb * dx;
1102             ty      = fgb * dy;
1103             tz      = fgb * dz;
1104
1105             fix     = fix + tx;
1106             fiy     = fiy + ty;
1107             fiz     = fiz + tz;
1108
1109             /* Update force on atom aj */
1110             f[3*k]   = f[3*k] - tx;
1111             f[3*k+1] = f[3*k+1] - ty;
1112             f[3*k+2] = f[3*k+2] - tz;
1113         }
1114         /* Update force and shift forces on atom ai */
1115         f[3*i]   = f[3*i] + fix;
1116         f[3*i+1] = f[3*i+1] + fiy;
1117         f[3*i+2] = f[3*i+2] + fiz;
1118     }
1119
1120     return 0;
1121 }