src/gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_allvsall.c

   1 /*
   2  *                This source code is part of
   3  *
   4  *                 G   R   O   M   A   C   S
   5  *
   6  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
   7  * Copyright (c) 2001-2009, The GROMACS Development Team
   8  *
   9  * Gromacs is a library for molecular simulation and trajectory analysis,
  10  * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
  11  * a full list of developers and information, check out http://www.gromacs.org
  12  *
  13  * This program is free software; you can redistribute it and/or modify it under
  14  * the terms of the GNU Lesser General Public License as published by the Free
  15  * Software Foundation; either version 2 of the License, or (at your option) any
  16  * later version.
  17  * As a special exception, you may use this file as part of a free software
  18  * library without restriction.  Specifically, if other files instantiate
  19  * templates or use macros or inline functions from this file, or you compile
  20  * this file and link it with other files to produce an executable, this
  21  * file does not by itself cause the resulting executable to be covered by
  22  * the GNU Lesser General Public License.
  23  *
  24  * In plain-speak: do not worry about classes/macros/templates either - only
  25  * changes to the library have to be LGPL, not an application linking with it.
  26  *
  27  * To help fund GROMACS development, we humbly ask that you cite
  28  * the papers people have written on it - you can find them on the website!
  29  */
  30 #ifdef HAVE_CONFIG_H
  31 #include <config.h>
  32 #endif
  33
  34 #include <math.h>
  35
  36 #include "types/simple.h"
  37
  38 #include "vec.h"
  39 #include "smalloc.h"
  40
  41 #include "nb_kernel_allvsall.h"
  42 #include "nrnb.h"
  43
  44 typedef struct
  45 {
  46     real **    pvdwparam;
  47     int *      jindex;
  48     int **     exclusion_mask;
  49 }
  50 gmx_allvsall_data_t;
  51
  52 static int
  53 calc_maxoffset(int i,int natoms)
  54 {
  55     int maxoffset;
  56
  57     if ((natoms % 2) == 1)
  58     {
  59         /* Odd number of atoms, easy */
  60         maxoffset = natoms/2;
  61     }
  62     else if ((natoms % 4) == 0)
  63     {
  64         /* Multiple of four is hard */
  65         if (i < natoms/2)
  66         {
  67             if ((i % 2) == 0)
  68             {
  69                 maxoffset=natoms/2;
  70             }
  71             else
  72             {
  73                 maxoffset=natoms/2-1;
  74             }
  75         }
  76         else
  77         {
  78             if ((i % 2) == 1)
  79             {
  80                 maxoffset=natoms/2;
  81             }
  82             else
  83             {
  84                 maxoffset=natoms/2-1;
  85             }
  86         }
  87     }
  88     else
  89     {
  90         /* natoms/2 = odd */
  91         if ((i % 2) == 0)
  92         {
  93             maxoffset=natoms/2;
  94         }
  95         else
  96         {
  97             maxoffset=natoms/2-1;
  98         }
  99     }
 100
 101     return maxoffset;
 102 }
 103
 104
 105 static void
 106 setup_exclusions_and_indices(gmx_allvsall_data_t *   aadata,
 107                              t_blocka *              excl,
 108                              int                     natoms)
 109 {
 110     int i,j,k,iexcl;
 111     int nj0,nj1;
 112     int max_offset;
 113     int max_excl_offset;
 114     int nj;
 115
 116     /* This routine can appear to be a bit complex, but it is mostly book-keeping.
 117      * To enable the fast all-vs-all kernel we need to be able to stream through all coordinates
 118      * whether they should interact or not.
 119      *
 120      * To avoid looping over the exclusions, we create a simple mask that is 1 if the interaction
 121      * should be present, otherwise 0. Since exclusions typically only occur when i & j are close,
 122      * we create a jindex array with three elements per i atom: the starting point, the point to
 123      * which we need to check exclusions, and the end point.
 124      * This way we only have to allocate a short exclusion mask per i atom.
 125      */
 126
 127     /* Allocate memory for our modified jindex array */
 128     snew(aadata->jindex,3*natoms);
 129
 130     /* Pointer to lists with exclusion masks */
 131     snew(aadata->exclusion_mask,natoms);
 132
 133     for(i=0;i<natoms;i++)
 134     {
 135         /* Start */
 136         aadata->jindex[3*i]   = i+1;
 137         max_offset = calc_maxoffset(i,natoms);
 138
 139         /* Exclusions */
 140         nj0   = excl->index[i];
 141         nj1   = excl->index[i+1];
 142
 143         /* first check the max range */
 144         max_excl_offset = -1;
 145
 146         for(j=nj0; j<nj1; j++)
 147         {
 148             iexcl = excl->a[j];
 149
 150             k = iexcl - i;
 151
 152             if( k+natoms <= max_offset )
 153             {
 154                 k+=natoms;
 155             }
 156
 157             max_excl_offset = (k > max_excl_offset) ? k : max_excl_offset;
 158         }
 159
 160         max_excl_offset = (max_offset < max_excl_offset) ? max_offset : max_excl_offset;
 161
 162         aadata->jindex[3*i+1] = i+1+max_excl_offset;
 163
 164
 165         snew(aadata->exclusion_mask[i],max_excl_offset);
 166         /* Include everything by default */
 167         for(j=0;j<max_excl_offset;j++)
 168         {
 169             /* Use all-ones to mark interactions that should be present, compatible with SSE */
 170             aadata->exclusion_mask[i][j] = 0xFFFFFFFF;
 171         }
 172
 173         /* Go through exclusions again */
 174         for(j=nj0; j<nj1; j++)
 175         {
 176             iexcl = excl->a[j];
 177
 178             k = iexcl - i;
 179
 180             if( k+natoms <= max_offset )
 181             {
 182                 k+=natoms;
 183             }
 184
 185             if(k>0 && k<=max_excl_offset)
 186             {
 187                 /* Excluded, kill it! */
 188                 aadata->exclusion_mask[i][k-1] = 0;
 189             }
 190         }
 191
 192         /* End */
 193         aadata->jindex[3*i+2] = i+1+max_offset;
 194     }
 195 }
 196
 197 static void
 198 setup_aadata(gmx_allvsall_data_t **  p_aadata,
 199                          t_blocka *              excl,
 200              int                     natoms,
 201              int *                   type,
 202              int                     ntype,
 203              real *                  pvdwparam)
 204 {
 205         int i,j,idx;
 206         gmx_allvsall_data_t *aadata;
 207     real *p;
 208
 209         snew(aadata,1);
 210         *p_aadata = aadata;
 211
 212     /* Generate vdw params */
 213     snew(aadata->pvdwparam,ntype);
 214
 215     for(i=0;i<ntype;i++)
 216     {
 217         snew(aadata->pvdwparam[i],2*natoms);
 218         p = aadata->pvdwparam[i];
 219
 220         /* Lets keep it simple and use multiple steps - first create temp. c6/c12 arrays */
 221         for(j=0;j<natoms;j++)
 222         {
 223             idx             = i*ntype+type[j];
 224             p[2*j]          = pvdwparam[2*idx];
 225             p[2*j+1]        = pvdwparam[2*idx+1];
 226         }
 227     }
 228
 229     setup_exclusions_and_indices(aadata,excl,natoms);
 230 }
 231
 232
 233
 234 void
 235 nb_kernel_allvsall(t_nblist *                nlist,
 236                    rvec *                    xx,
 237                    rvec *                    ff,
 238                    t_forcerec *              fr,
 239                    t_mdatoms *               mdatoms,
 240                    nb_kernel_data_t *        kernel_data,
 241                    t_nrnb *                  nrnb)
 242 {
 243         gmx_allvsall_data_t *aadata;
 244         int        natoms;
 245         int        ni0,ni1;
 246         int        nj0,nj1,nj2;
 247         int        i,j,k;
 248         real *     charge;
 249         int *      type;
 250     real       facel;
 251         real *     pvdw;
 252         int        ggid;
 253     int *      mask;
 254
 255     real       ix,iy,iz,iq;
 256     real       fix,fiy,fiz;
 257     real       jx,jy,jz,qq;
 258     real       dx,dy,dz;
 259     real       tx,ty,tz;
 260     real       rsq,rinv,rinvsq,rinvsix;
 261     real       vcoul,vctot;
 262     real       c6,c12,Vvdw6,Vvdw12,Vvdwtot;
 263     real       fscal;
 264     t_blocka * excl;
 265     real *     f;
 266     real *     x;
 267     real *     Vvdw;
 268     real *     Vc;
 269
 270     x                   = xx[0];
 271     f                   = ff[0];
 272         charge              = mdatoms->chargeA;
 273         type                = mdatoms->typeA;
 274         facel               = fr->epsfac;
 275     natoms              = mdatoms->nr;
 276         ni0                 = mdatoms->start;
 277         ni1                 = mdatoms->start+mdatoms->homenr;
 278     aadata              = fr->AllvsAll_work;
 279     excl                = kernel_data->exclusions;
 280
 281     Vc                  = kernel_data->energygrp_elec;
 282     Vvdw                = kernel_data->energygrp_vdw;
 283
 284         if(aadata==NULL)
 285         {
 286                 setup_aadata(&aadata,excl,natoms,type,fr->ntype,fr->nbfp);
 287         fr->AllvsAll_work  = aadata;
 288         }
 289
 290         for(i=ni0; i<ni1; i++)
 291         {
 292                 /* We assume shifts are NOT used for all-vs-all interactions */
 293
 294                 /* Load i atom data */
 295         ix                = x[3*i];
 296         iy                = x[3*i+1];
 297         iz                = x[3*i+2];
 298         iq                = facel*charge[i];
 299
 300         pvdw              = aadata->pvdwparam[type[i]];
 301
 302                 /* Zero the potential energy for this list */
 303                 Vvdwtot           = 0.0;
 304         vctot             = 0.0;
 305
 306                 /* Clear i atom forces */
 307         fix               = 0.0;
 308         fiy               = 0.0;
 309         fiz               = 0.0;
 310
 311                 /* Load limits for loop over neighbors */
 312                 nj0              = aadata->jindex[3*i];
 313                 nj1              = aadata->jindex[3*i+1];
 314                 nj2              = aadata->jindex[3*i+2];
 315
 316         mask             = aadata->exclusion_mask[i];
 317
 318         /* Prologue part, including exclusion mask */
 319         for(j=nj0; j<nj1; j++,mask++)
 320         {
 321             if(*mask!=0)
 322             {
 323                 k = j%natoms;
 324
 325                 /* load j atom coordinates */
 326                 jx                = x[3*k];
 327                 jy                = x[3*k+1];
 328                 jz                = x[3*k+2];
 329
 330                 /* Calculate distance */
 331                 dx                = ix - jx;
 332                 dy                = iy - jy;
 333                 dz                = iz - jz;
 334                 rsq               = dx*dx+dy*dy+dz*dz;
 335
 336                 /* Calculate 1/r and 1/r2 */
 337                 rinv              = gmx_invsqrt(rsq);
 338                 rinvsq            = rinv*rinv;
 339
 340                 /* Load parameters for j atom */
 341                 qq                = iq*charge[k];
 342                 c6                = pvdw[2*k];
 343                 c12               = pvdw[2*k+1];
 344
 345                 /* Coulomb interaction */
 346                 vcoul             = qq*rinv;
 347                 vctot             = vctot+vcoul;
 348
 349                 /* Lennard-Jones interaction */
 350                 rinvsix           = rinvsq*rinvsq*rinvsq;
 351                 Vvdw6             = c6*rinvsix;
 352                 Vvdw12            = c12*rinvsix*rinvsix;
 353                 Vvdwtot           = Vvdwtot+Vvdw12-Vvdw6;
 354                 fscal             = (vcoul+12.0*Vvdw12-6.0*Vvdw6)*rinvsq;
 355
 356                 /* Calculate temporary vectorial force */
 357                 tx                = fscal*dx;
 358                 ty                = fscal*dy;
 359                 tz                = fscal*dz;
 360
 361                 /* Increment i atom force */
 362                 fix               = fix + tx;
 363                 fiy               = fiy + ty;
 364                 fiz               = fiz + tz;
 365
 366                 /* Decrement j atom force */
 367                 f[3*k]            = f[3*k]   - tx;
 368                 f[3*k+1]          = f[3*k+1] - ty;
 369                 f[3*k+2]          = f[3*k+2] - tz;
 370             }
 371             /* Inner loop uses 38 flops/iteration */
 372         }
 373
 374         /* Main part, no exclusions */
 375         for(j=nj1; j<nj2; j++)
 376         {
 377             k = j%natoms;
 378
 379             /* load j atom coordinates */
 380             jx                = x[3*k];
 381             jy                = x[3*k+1];
 382             jz                = x[3*k+2];
 383
 384             /* Calculate distance */
 385             dx                = ix - jx;
 386             dy                = iy - jy;
 387             dz                = iz - jz;
 388             rsq               = dx*dx+dy*dy+dz*dz;
 389
 390             /* Calculate 1/r and 1/r2 */
 391             rinv              = gmx_invsqrt(rsq);
 392             rinvsq            = rinv*rinv;
 393
 394             /* Load parameters for j atom */
 395             qq                = iq*charge[k];
 396             c6                = pvdw[2*k];
 397             c12               = pvdw[2*k+1];
 398
 399             /* Coulomb interaction */
 400             vcoul             = qq*rinv;
 401             vctot             = vctot+vcoul;
 402
 403             /* Lennard-Jones interaction */
 404             rinvsix           = rinvsq*rinvsq*rinvsq;
 405             Vvdw6             = c6*rinvsix;
 406             Vvdw12            = c12*rinvsix*rinvsix;
 407             Vvdwtot           = Vvdwtot+Vvdw12-Vvdw6;
 408             fscal             = (vcoul+12.0*Vvdw12-6.0*Vvdw6)*rinvsq;
 409
 410             /* Calculate temporary vectorial force */
 411             tx                = fscal*dx;
 412             ty                = fscal*dy;
 413             tz                = fscal*dz;
 414
 415             /* Increment i atom force */
 416             fix               = fix + tx;
 417             fiy               = fiy + ty;
 418             fiz               = fiz + tz;
 419
 420             /* Decrement j atom force */
 421             f[3*k]            = f[3*k]   - tx;
 422             f[3*k+1]          = f[3*k+1] - ty;
 423             f[3*k+2]          = f[3*k+2] - tz;
 424
 425             /* Inner loop uses 38 flops/iteration */
 426         }
 427
 428         f[3*i]   += fix;
 429         f[3*i+1] += fiy;
 430         f[3*i+2] += fiz;
 431
 432                 /* Add potential energies to the group for this list */
 433                 ggid             = 0;
 434
 435                 Vc[ggid]         = Vc[ggid] + vctot;
 436         Vvdw[ggid]       = Vvdw[ggid] + Vvdwtot;
 437
 438                 /* Outer loop uses 6 flops/iteration */
 439         }
 440
 441     /* 12 flops per outer iteration
 442      * 19 flops per inner iteration
 443      */
 444     inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_VF,(ni1-ni0)*12 + ((ni1-ni0)*natoms/2)*19);
 445 }
 446
 447