2 * This source code is part of
6 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
7 * Copyright (c) 2001-2009, The GROMACS Development Team
9 * Gromacs is a library for molecular simulation and trajectory analysis,
10 * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
11 * a full list of developers and information, check out http://www.gromacs.org
13 * This program is free software; you can redistribute it and/or modify it under
14 * the terms of the GNU Lesser General Public License as published by the Free
15 * Software Foundation; either version 2 of the License, or (at your option) any
17 * As a special exception, you may use this file as part of a free software
18 * library without restriction. Specifically, if other files instantiate
19 * templates or use macros or inline functions from this file, or you compile
20 * this file and link it with other files to produce an executable, this
21 * file does not by itself cause the resulting executable to be covered by
22 * the GNU Lesser General Public License.
24 * In plain-speak: do not worry about classes/macros/templates either - only
25 * changes to the library have to be LGPL, not an application linking with it.
27 * To help fund GROMACS development, we humbly ask that you cite
28 * the papers people have written on it - you can find them on the website!
36 #include "types/simple.h"
41 #include "nb_kernel_allvsallgb.h"
48 int ** exclusion_mask;
53 calc_maxoffset(int i,int natoms)
57 if ((natoms % 2) == 1)
59 /* Odd number of atoms, easy */
62 else if ((natoms % 4) == 0)
64 /* Multiple of four is hard */
106 setup_exclusions_and_indices(gmx_allvsall_data_t * aadata,
117 /* This routine can appear to be a bit complex, but it is mostly book-keeping.
118 * To enable the fast all-vs-all kernel we need to be able to stream through all coordinates
119 * whether they should interact or not.
121 * To avoid looping over the exclusions, we create a simple mask that is 1 if the interaction
122 * should be present, otherwise 0. Since exclusions typically only occur when i & j are close,
123 * we create a jindex array with three elements per i atom: the starting point, the point to
124 * which we need to check exclusions, and the end point.
125 * This way we only have to allocate a short exclusion mask per i atom.
128 /* Allocate memory for our modified jindex array */
129 snew(aadata->jindex,3*natoms);
131 /* Pointer to lists with exclusion masks */
132 snew(aadata->exclusion_mask,natoms);
134 for(i=0;i<natoms;i++)
137 aadata->jindex[3*i] = i+1;
138 max_offset = calc_maxoffset(i,natoms);
141 nj0 = excl->index[i];
142 nj1 = excl->index[i+1];
144 /* first check the max range */
145 max_excl_offset = -1;
147 for(j=nj0; j<nj1; j++)
153 if( k+natoms <= max_offset )
158 max_excl_offset = (k > max_excl_offset) ? k : max_excl_offset;
161 max_excl_offset = (max_offset < max_excl_offset) ? max_offset : max_excl_offset;
163 aadata->jindex[3*i+1] = i+1+max_excl_offset;
165 snew(aadata->exclusion_mask[i],max_excl_offset);
166 /* Include everything by default */
167 for(j=0;j<max_excl_offset;j++)
169 /* Use all-ones to mark interactions that should be present, compatible with SSE */
170 aadata->exclusion_mask[i][j] = 0xFFFFFFFF;
173 /* Go through exclusions again */
174 for(j=nj0; j<nj1; j++)
180 if( k+natoms <= max_offset )
185 if(k>0 && k<=max_excl_offset)
187 /* Excluded, kill it! */
188 aadata->exclusion_mask[i][k-1] = 0;
193 aadata->jindex[3*i+2] = i+1+max_offset;
199 setup_aadata(gmx_allvsall_data_t ** p_aadata,
207 gmx_allvsall_data_t *aadata;
213 /* Generate vdw params */
214 snew(aadata->pvdwparam,ntype);
218 snew(aadata->pvdwparam[i],2*natoms);
219 p = aadata->pvdwparam[i];
221 /* Lets keep it simple and use multiple steps - first create temp. c6/c12 arrays */
222 for(j=0;j<natoms;j++)
224 idx = i*ntype+type[j];
225 p[2*j] = pvdwparam[2*idx];
226 p[2*j+1] = pvdwparam[2*idx+1];
230 setup_exclusions_and_indices(aadata,excl,natoms);
236 nb_kernel_allvsallgb(t_nblist * nlist,
241 nb_kernel_data_t * kernel_data,
244 gmx_allvsall_data_t *aadata;
267 real rsq,rinv,rinvsq,rinvsix;
269 real c6,c12,Vvdw6,Vvdw12,Vvdwtot;
270 real fscal,dvdatmp,fijC,vgb;
271 real Y,F,Fp,Geps,Heps2,VV,FF,eps,eps2,r,rt;
272 real dvdaj,gbscale,isaprod,isai,isaj,gbtabscale;
282 charge = mdatoms->chargeA;
283 type = mdatoms->typeA;
284 gbfactor = ((1.0/fr->epsilon_r) - (1.0/fr->gb_epsilon_solvent));
286 GBtab = fr->gbtab.data;
287 gbtabscale = fr->gbtab.scale;
288 invsqrta = fr->invsqrta;
290 vpol = kernel_data->energygrp_polarization;
292 natoms = mdatoms->nr;
293 ni0 = mdatoms->start;
294 ni1 = mdatoms->start+mdatoms->homenr;
296 aadata = fr->AllvsAll_work;
297 excl = kernel_data->exclusions;
299 Vc = kernel_data->energygrp_elec;
300 Vvdw = kernel_data->energygrp_vdw;
304 setup_aadata(&aadata,excl,natoms,type,fr->ntype,fr->nbfp);
305 fr->AllvsAll_work = aadata;
308 for(i=ni0; i<ni1; i++)
310 /* We assume shifts are NOT used for all-vs-all interactions */
312 /* Load i atom data */
316 iq = facel*charge[i];
320 pvdw = aadata->pvdwparam[type[i]];
322 /* Zero the potential energy for this list */
328 /* Clear i atom forces */
333 /* Load limits for loop over neighbors */
334 nj0 = aadata->jindex[3*i];
335 nj1 = aadata->jindex[3*i+1];
336 nj2 = aadata->jindex[3*i+2];
338 mask = aadata->exclusion_mask[i];
340 /* Prologue part, including exclusion mask */
341 for(j=nj0; j<nj1; j++,mask++)
347 /* load j atom coordinates */
352 /* Calculate distance */
356 rsq = dx*dx+dy*dy+dz*dz;
358 /* Calculate 1/r and 1/r2 */
359 rinv = gmx_invsqrt(rsq);
361 /* Load parameters for j atom */
367 qq = isaprod*(-qq)*gbfactor;
368 gbscale = isaprod*gbtabscale;
373 /* Tabulated Generalized-Born interaction */
377 /* Calculate table index */
385 Geps = eps*GBtab[nnn+2];
386 Heps2 = eps2*GBtab[nnn+3];
389 FF = Fp+Geps+2.0*Heps2;
391 fijC = qq*FF*gbscale;
392 dvdatmp = -0.5*(vgb+fijC*r);
393 dvdasum = dvdasum + dvdatmp;
394 dvda[k] = dvdaj+dvdatmp*isaj*isaj;
395 vctot = vctot + vcoul;
396 vgbtot = vgbtot + vgb;
398 /* Lennard-Jones interaction */
399 rinvsix = rinvsq*rinvsq*rinvsq;
401 Vvdw12 = c12*rinvsix*rinvsix;
402 Vvdwtot = Vvdwtot+Vvdw12-Vvdw6;
403 fscal = (12.0*Vvdw12-6.0*Vvdw6)*rinvsq-(fijC-fscal)*rinv;
405 /* Calculate temporary vectorial force */
410 /* Increment i atom force */
415 /* Decrement j atom force */
416 f[3*k] = f[3*k] - tx;
417 f[3*k+1] = f[3*k+1] - ty;
418 f[3*k+2] = f[3*k+2] - tz;
420 /* Inner loop uses 38 flops/iteration */
423 /* Main part, no exclusions */
424 for(j=nj1; j<nj2; j++)
428 /* load j atom coordinates */
433 /* Calculate distance */
437 rsq = dx*dx+dy*dy+dz*dz;
439 /* Calculate 1/r and 1/r2 */
440 rinv = gmx_invsqrt(rsq);
442 /* Load parameters for j atom */
448 qq = isaprod*(-qq)*gbfactor;
449 gbscale = isaprod*gbtabscale;
454 /* Tabulated Generalized-Born interaction */
458 /* Calculate table index */
466 Geps = eps*GBtab[nnn+2];
467 Heps2 = eps2*GBtab[nnn+3];
470 FF = Fp+Geps+2.0*Heps2;
472 fijC = qq*FF*gbscale;
473 dvdatmp = -0.5*(vgb+fijC*r);
474 dvdasum = dvdasum + dvdatmp;
475 dvda[k] = dvdaj+dvdatmp*isaj*isaj;
476 vctot = vctot + vcoul;
477 vgbtot = vgbtot + vgb;
479 /* Lennard-Jones interaction */
480 rinvsix = rinvsq*rinvsq*rinvsq;
482 Vvdw12 = c12*rinvsix*rinvsix;
483 Vvdwtot = Vvdwtot+Vvdw12-Vvdw6;
484 fscal = (12.0*Vvdw12-6.0*Vvdw6)*rinvsq-(fijC-fscal)*rinv;
486 /* Calculate temporary vectorial force */
491 /* Increment i atom force */
496 /* Decrement j atom force */
497 f[3*k] = f[3*k] - tx;
498 f[3*k+1] = f[3*k+1] - ty;
499 f[3*k+2] = f[3*k+2] - tz;
501 /* Inner loop uses 38 flops/iteration */
508 /* Add potential energies to the group for this list */
511 Vc[ggid] = Vc[ggid] + vctot;
512 Vvdw[ggid] = Vvdw[ggid] + Vvdwtot;
513 vpol[ggid] = vpol[ggid] + vgbtot;
514 dvda[i] = dvda[i] + dvdasum*isai*isai;
516 /* Outer loop uses 6 flops/iteration */
519 /* 12 flops per outer iteration
520 * 19 flops per inner iteration
522 inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_VF,(ni1-ni0)*12 + ((ni1-ni0)*natoms/2)*19);