2 * This source code is part of
6 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
7 * Copyright (c) 2001-2009, The GROMACS Development Team
9 * Gromacs is a library for molecular simulation and trajectory analysis,
10 * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
11 * a full list of developers and information, check out http://www.gromacs.org
13 * This program is free software; you can redistribute it and/or modify it under
14 * the terms of the GNU Lesser General Public License as published by the Free
15 * Software Foundation; either version 2 of the License, or (at your option) any
17 * As a special exception, you may use this file as part of a free software
18 * library without restriction. Specifically, if other files instantiate
19 * templates or use macros or inline functions from this file, or you compile
20 * this file and link it with other files to produce an executable, this
21 * file does not by itself cause the resulting executable to be covered by
22 * the GNU Lesser General Public License.
24 * In plain-speak: do not worry about classes/macros/templates either - only
25 * changes to the library have to be LGPL, not an application linking with it.
27 * To help fund GROMACS development, we humbly ask that you cite
28 * the papers people have written on it - you can find them on the website!
36 #include "types/simple.h"
41 #include "nb_kernel_allvsallgb.h"
47 int ** exclusion_mask;
52 calc_maxoffset(int i,int natoms)
56 if ((natoms % 2) == 1)
58 /* Odd number of atoms, easy */
61 else if ((natoms % 4) == 0)
63 /* Multiple of four is hard */
105 setup_exclusions_and_indices(gmx_allvsall_data_t * aadata,
116 /* This routine can appear to be a bit complex, but it is mostly book-keeping.
117 * To enable the fast all-vs-all kernel we need to be able to stream through all coordinates
118 * whether they should interact or not.
120 * To avoid looping over the exclusions, we create a simple mask that is 1 if the interaction
121 * should be present, otherwise 0. Since exclusions typically only occur when i & j are close,
122 * we create a jindex array with three elements per i atom: the starting point, the point to
123 * which we need to check exclusions, and the end point.
124 * This way we only have to allocate a short exclusion mask per i atom.
127 /* Allocate memory for our modified jindex array */
128 snew(aadata->jindex,3*natoms);
130 /* Pointer to lists with exclusion masks */
131 snew(aadata->exclusion_mask,natoms);
133 for(i=0;i<natoms;i++)
136 aadata->jindex[3*i] = i+1;
137 max_offset = calc_maxoffset(i,natoms);
140 nj0 = excl->index[i];
141 nj1 = excl->index[i+1];
143 /* first check the max range */
144 max_excl_offset = -1;
146 for(j=nj0; j<nj1; j++)
152 if( k+natoms <= max_offset )
157 max_excl_offset = (k > max_excl_offset) ? k : max_excl_offset;
160 max_excl_offset = (max_offset < max_excl_offset) ? max_offset : max_excl_offset;
162 aadata->jindex[3*i+1] = i+1+max_excl_offset;
164 snew(aadata->exclusion_mask[i],max_excl_offset);
165 /* Include everything by default */
166 for(j=0;j<max_excl_offset;j++)
168 /* Use all-ones to mark interactions that should be present, compatible with SSE */
169 aadata->exclusion_mask[i][j] = 0xFFFFFFFF;
172 /* Go through exclusions again */
173 for(j=nj0; j<nj1; j++)
179 if( k+natoms <= max_offset )
184 if(k>0 && k<=max_excl_offset)
186 /* Excluded, kill it! */
187 aadata->exclusion_mask[i][k-1] = 0;
192 aadata->jindex[3*i+2] = i+1+max_offset;
198 setup_aadata(gmx_allvsall_data_t ** p_aadata,
206 gmx_allvsall_data_t *aadata;
212 /* Generate vdw params */
213 snew(aadata->pvdwparam,ntype);
217 snew(aadata->pvdwparam[i],2*natoms);
218 p = aadata->pvdwparam[i];
220 /* Lets keep it simple and use multiple steps - first create temp. c6/c12 arrays */
221 for(j=0;j<natoms;j++)
223 idx = i*ntype+type[j];
224 p[2*j] = pvdwparam[2*idx];
225 p[2*j+1] = pvdwparam[2*idx+1];
229 setup_exclusions_and_indices(aadata,excl,natoms);
235 nb_kernel_allvsallgb(t_forcerec * fr,
247 gmx_allvsall_data_t *aadata;
270 real rsq,rinv,rinvsq,rinvsix;
272 real c6,c12,Vvdw6,Vvdw12,Vvdwtot;
273 real fscal,dvdatmp,fijC,vgb;
274 real Y,F,Fp,Geps,Heps2,VV,FF,eps,eps2,r,rt;
275 real dvdaj,gbscale,isaprod,isai,isaj,gbtabscale;
277 charge = mdatoms->chargeA;
278 type = mdatoms->typeA;
279 gbfactor = ((1.0/fr->epsilon_r) - (1.0/fr->gb_epsilon_solvent));
281 GBtab = fr->gbtab.tab;
282 gbtabscale = fr->gbtab.scale;
283 invsqrta = fr->invsqrta;
286 natoms = mdatoms->nr;
287 ni0 = mdatoms->start;
288 ni1 = mdatoms->start+mdatoms->homenr;
290 aadata = *((gmx_allvsall_data_t **)work);
294 setup_aadata(&aadata,excl,natoms,type,fr->ntype,fr->nbfp);
295 *((gmx_allvsall_data_t **)work) = aadata;
298 for(i=ni0; i<ni1; i++)
300 /* We assume shifts are NOT used for all-vs-all interactions */
302 /* Load i atom data */
306 iq = facel*charge[i];
310 pvdw = aadata->pvdwparam[type[i]];
312 /* Zero the potential energy for this list */
318 /* Clear i atom forces */
323 /* Load limits for loop over neighbors */
324 nj0 = aadata->jindex[3*i];
325 nj1 = aadata->jindex[3*i+1];
326 nj2 = aadata->jindex[3*i+2];
328 mask = aadata->exclusion_mask[i];
330 /* Prologue part, including exclusion mask */
331 for(j=nj0; j<nj1; j++,mask++)
337 /* load j atom coordinates */
342 /* Calculate distance */
346 rsq = dx*dx+dy*dy+dz*dz;
348 /* Calculate 1/r and 1/r2 */
349 rinv = gmx_invsqrt(rsq);
351 /* Load parameters for j atom */
357 qq = isaprod*(-qq)*gbfactor;
358 gbscale = isaprod*gbtabscale;
363 /* Tabulated Generalized-Born interaction */
367 /* Calculate table index */
375 Geps = eps*GBtab[nnn+2];
376 Heps2 = eps2*GBtab[nnn+3];
379 FF = Fp+Geps+2.0*Heps2;
381 fijC = qq*FF*gbscale;
382 dvdatmp = -0.5*(vgb+fijC*r);
383 dvdasum = dvdasum + dvdatmp;
384 dvda[k] = dvdaj+dvdatmp*isaj*isaj;
385 vctot = vctot + vcoul;
386 vgbtot = vgbtot + vgb;
388 /* Lennard-Jones interaction */
389 rinvsix = rinvsq*rinvsq*rinvsq;
391 Vvdw12 = c12*rinvsix*rinvsix;
392 Vvdwtot = Vvdwtot+Vvdw12-Vvdw6;
393 fscal = (12.0*Vvdw12-6.0*Vvdw6)*rinvsq-(fijC-fscal)*rinv;
395 /* Calculate temporary vectorial force */
400 /* Increment i atom force */
405 /* Decrement j atom force */
406 f[3*k] = f[3*k] - tx;
407 f[3*k+1] = f[3*k+1] - ty;
408 f[3*k+2] = f[3*k+2] - tz;
410 /* Inner loop uses 38 flops/iteration */
413 /* Main part, no exclusions */
414 for(j=nj1; j<nj2; j++)
418 /* load j atom coordinates */
423 /* Calculate distance */
427 rsq = dx*dx+dy*dy+dz*dz;
429 /* Calculate 1/r and 1/r2 */
430 rinv = gmx_invsqrt(rsq);
432 /* Load parameters for j atom */
438 qq = isaprod*(-qq)*gbfactor;
439 gbscale = isaprod*gbtabscale;
444 /* Tabulated Generalized-Born interaction */
448 /* Calculate table index */
456 Geps = eps*GBtab[nnn+2];
457 Heps2 = eps2*GBtab[nnn+3];
460 FF = Fp+Geps+2.0*Heps2;
462 fijC = qq*FF*gbscale;
463 dvdatmp = -0.5*(vgb+fijC*r);
464 dvdasum = dvdasum + dvdatmp;
465 dvda[k] = dvdaj+dvdatmp*isaj*isaj;
466 vctot = vctot + vcoul;
467 vgbtot = vgbtot + vgb;
469 /* Lennard-Jones interaction */
470 rinvsix = rinvsq*rinvsq*rinvsq;
472 Vvdw12 = c12*rinvsix*rinvsix;
473 Vvdwtot = Vvdwtot+Vvdw12-Vvdw6;
474 fscal = (12.0*Vvdw12-6.0*Vvdw6)*rinvsq-(fijC-fscal)*rinv;
476 /* Calculate temporary vectorial force */
481 /* Increment i atom force */
486 /* Decrement j atom force */
487 f[3*k] = f[3*k] - tx;
488 f[3*k+1] = f[3*k+1] - ty;
489 f[3*k+2] = f[3*k+2] - tz;
491 /* Inner loop uses 38 flops/iteration */
498 /* Add potential energies to the group for this list */
501 Vc[ggid] = Vc[ggid] + vctot;
502 Vvdw[ggid] = Vvdw[ggid] + Vvdwtot;
503 vpol[ggid] = vpol[ggid] + vgbtot;
504 dvda[i] = dvda[i] + dvdasum*isai*isai;
506 /* Outer loop uses 6 flops/iteration */
509 /* Write outer/inner iteration count to pointers */
510 *outeriter = ni1-ni0;
511 *inneriter = (ni1-ni0)*natoms/2;