Sort all includes in src/gromacs
[alexxy/gromacs.git] / src / gromacs / mdlib / nbnxn_kernels / nbnxn_kernel_gpu_ref.c
index ef2c42f9a0484e69eab4a3067e0a79ef193da3b8..5e8ac7ca2abd538f80614883b54427f65d2c016d 100644 (file)
@@ -1,52 +1,54 @@
-/* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
+/*
+ * This file is part of the GROMACS molecular simulation package.
  *
- * 
- *                This source code is part of
- * 
- *                 G   R   O   M   A   C   S
- * 
- *          GROningen MAchine for Chemical Simulations
- * 
- *                        VERSION 3.2.0
- * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2004, The GROMACS development team,
- * check out http://www.gromacs.org for more information.
-
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version 2
+ * Copyright (c) 2012,2013,2014, by the GROMACS development team, led by
+ * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
+ * and including many others, as listed in the AUTHORS file in the
+ * top-level source directory and at http://www.gromacs.org.
+ *
+ * GROMACS is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public License
+ * as published by the Free Software Foundation; either version 2.1
  * of the License, or (at your option) any later version.
- * 
- * If you want to redistribute modifications, please consider that
- * scientific software is very special. Version control is crucial -
- * bugs must be traceable. We will be happy to consider code for
- * inclusion in the official distribution, but derived work must not
- * be called official GROMACS. Details are found in the README & COPYING
- * files - if they are missing, get the official version at www.gromacs.org.
- * 
+ *
+ * GROMACS is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with GROMACS; if not, see
+ * http://www.gnu.org/licenses, or write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
+ *
+ * If you want to redistribute modifications to GROMACS, please
+ * consider that scientific software is very special. Version
+ * control is crucial - bugs must be traceable. We will be happy to
+ * consider code for inclusion in the official distribution, but
+ * derived work must not be called official GROMACS. Details are found
+ * in the README & COPYING files - if they are missing, get the
+ * official version at http://www.gromacs.org.
+ *
  * To help us fund GROMACS development, we humbly ask that you cite
- * the papers on the package - you can find them in the top README file.
- * 
- * For more info, check our website at http://www.gromacs.org
- * 
- * And Hey:
- * GROningen Mixture of Alchemy and Childrens' Stories
+ * the research papers on the package. Check out http://www.gromacs.org.
  */
-#ifdef HAVE_CONFIG_H
-#include <config.h>
-#endif
+#include "gmxpre.h"
+
+#include "nbnxn_kernel_gpu_ref.h"
+
+#include "config.h"
 
 #include <math.h>
 
-#include "types/simple.h"
-#include "maths.h"
-#include "vec.h"
-#include "typedefs.h"
-#include "force.h"
-#include "nbnxn_kernel_gpu_ref.h"
-#include "../nbnxn_consts.h"
-#include "nbnxn_kernel_common.h"
+#include "gromacs/legacyheaders/force.h"
+#include "gromacs/legacyheaders/typedefs.h"
+#include "gromacs/legacyheaders/types/simple.h"
+#include "gromacs/math/utilities.h"
+#include "gromacs/math/vec.h"
+#include "gromacs/mdlib/nb_verlet.h"
+#include "gromacs/mdlib/nbnxn_consts.h"
+#include "gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_common.h"
+#include "gromacs/pbcutil/ishift.h"
 
 #define NCL_PER_SUPERCL         (NBNXN_GPU_NCLUSTER_PER_SUPERCLUSTER)
 #define CL_SIZE                 (NBNXN_GPU_CLUSTER_SIZE)
@@ -56,63 +58,63 @@ nbnxn_kernel_gpu_ref(const nbnxn_pairlist_t     *nbl,
                      const nbnxn_atomdata_t     *nbat,
                      const interaction_const_t  *iconst,
                      rvec                       *shift_vec,
-                     int                        force_flags,
-                     int                        clearF,
-                     real *                     f,
-                     real *                     fshift,
-                     real *                     Vc,
-                     real *                     Vvdw)
+                     int                         force_flags,
+                     int                         clearF,
+                     real  *                     f,
+                     real  *                     fshift,
+                     real  *                     Vc,
+                     real  *                     Vvdw)
 {
-    const nbnxn_sci_t *nbln;
-    const real    *x;
-    gmx_bool      bEner;
-    gmx_bool      bEwald;
-    const real    *Ftab=NULL;
-    real          rcut2,rvdw2,rlist2;
-    int           ntype;
-    real          facel;
-    int           n;
-    int           ish3;
-    int           sci;
-    int           cj4_ind0,cj4_ind1,cj4_ind;
-    int           ci,cj;
-    int           ic,jc,ia,ja,is,ifs,js,jfs,im,jm;
-    int           n0;
-    int           ggid;
-    real          shX,shY,shZ;
-    real          fscal,tx,ty,tz;
-    real          rinvsq;
-    real          iq;
-    real          qq,vcoul=0,krsq,vctot;
-    int           nti;
-    int           tj;
-    real          rt,r,eps;
-    real          rinvsix;
-    real          Vvdwtot;
-    real          Vvdw_rep,Vvdw_disp;
-    real          ix,iy,iz,fix,fiy,fiz;
-    real          jx,jy,jz;
-    real          dx,dy,dz,rsq,rinv;
-    int           int_bit;
-    real          fexcl;
-    real          c6,c12,cexp1,cexp2,br;
-    const real *  shiftvec;
-    real *        vdwparam;
-    int *         shift;
-    int *         type;
+    const nbnxn_sci_t  *nbln;
+    const real         *x;
+    gmx_bool            bEner;
+    gmx_bool            bEwald;
+    const real         *Ftab = NULL;
+    real                rcut2, rvdw2, rlist2;
+    int                 ntype;
+    real                facel;
+    int                 n;
+    int                 ish3;
+    int                 sci;
+    int                 cj4_ind0, cj4_ind1, cj4_ind;
+    int                 ci, cj;
+    int                 ic, jc, ia, ja, is, ifs, js, jfs, im, jm;
+    int                 n0;
+    int                 ggid;
+    real                shX, shY, shZ;
+    real                fscal, tx, ty, tz;
+    real                rinvsq;
+    real                iq;
+    real                qq, vcoul = 0, krsq, vctot;
+    int                 nti;
+    int                 tj;
+    real                rt, r, eps;
+    real                rinvsix;
+    real                Vvdwtot;
+    real                Vvdw_rep, Vvdw_disp;
+    real                ix, iy, iz, fix, fiy, fiz;
+    real                jx, jy, jz;
+    real                dx, dy, dz, rsq, rinv;
+    int                 int_bit;
+    real                fexcl;
+    real                c6, c12, cexp1, cexp2, br;
+    const real       *  shiftvec;
+    real       *        vdwparam;
+    int       *         shift;
+    int       *         type;
     const nbnxn_excl_t *excl[2];
 
-    int           npair_tot,npair;
-    int           nhwu,nhwu_pruned;
+    int                 npair_tot, npair;
+    int                 nhwu, nhwu_pruned;
 
     if (nbl->na_ci != CL_SIZE)
     {
-        gmx_fatal(FARGS,"The neighborlist cluster size in the GPU reference kernel is %d, expected it to be %d",nbl->na_ci,CL_SIZE);
+        gmx_fatal(FARGS, "The neighborlist cluster size in the GPU reference kernel is %d, expected it to be %d", nbl->na_ci, CL_SIZE);
     }
 
     if (clearF == enbvClearFYes)
     {
-        clear_f(nbat, f);
+        clear_f(nbat, 0, f);
     }
 
     bEner = (force_flags & GMX_FORCE_ENERGY);
@@ -140,19 +142,19 @@ nbnxn_kernel_gpu_ref(const nbnxn_pairlist_t     *nbl,
     nhwu        = 0;
     nhwu_pruned = 0;
 
-    for(n=0; n<nbl->nsci; n++)
+    for (n = 0; n < nbl->nsci; n++)
     {
         nbln = &nbl->sci[n];
 
-        ish3             = 3*nbln->shift;     
-        shX              = shiftvec[ish3];  
+        ish3             = 3*nbln->shift;
+        shX              = shiftvec[ish3];
         shY              = shiftvec[ish3+1];
         shZ              = shiftvec[ish3+2];
-        cj4_ind0         = nbln->cj4_ind_start;      
-        cj4_ind1         = nbln->cj4_ind_end;    
+        cj4_ind0         = nbln->cj4_ind_start;
+        cj4_ind1         = nbln->cj4_ind_end;
         sci              = nbln->sci;
-        vctot            = 0;              
-        Vvdwtot          = 0;              
+        vctot            = 0;
+        Vvdwtot          = 0;
 
         if (nbln->shift == CENTRAL &&
             nbl->cj4[cj4_ind0].cj[0] == sci*NCL_PER_SUPERCL)
@@ -160,10 +162,10 @@ nbnxn_kernel_gpu_ref(const nbnxn_pairlist_t     *nbl,
             /* we have the diagonal:
              * add the charge self interaction energy term
              */
-            for(im=0; im<NCL_PER_SUPERCL; im++)
+            for (im = 0; im < NCL_PER_SUPERCL; im++)
             {
                 ci = sci*NCL_PER_SUPERCL + im;
-                for (ic=0; ic<CL_SIZE; ic++)
+                for (ic = 0; ic < CL_SIZE; ic++)
                 {
                     ia     = ci*CL_SIZE + ic;
                     iq     = x[ia*nbat->xstride+3];
@@ -177,20 +179,20 @@ nbnxn_kernel_gpu_ref(const nbnxn_pairlist_t     *nbl,
             else
             {
                 /* last factor 1/sqrt(pi) */
-                vctot *= -facel*iconst->ewaldcoeff*0.564189583548;
+                vctot *= -facel*iconst->ewaldcoeff_q*M_1_SQRTPI;
             }
         }
-        
-        for(cj4_ind=cj4_ind0; (cj4_ind<cj4_ind1); cj4_ind++)
+
+        for (cj4_ind = cj4_ind0; (cj4_ind < cj4_ind1); cj4_ind++)
         {
             excl[0]           = &nbl->excl[nbl->cj4[cj4_ind].imei[0].excl_ind];
             excl[1]           = &nbl->excl[nbl->cj4[cj4_ind].imei[1].excl_ind];
 
-            for(jm=0; jm<4; jm++)
+            for (jm = 0; jm < NBNXN_GPU_JGROUP_SIZE; jm++)
             {
                 cj               = nbl->cj4[cj4_ind].cj[jm];
 
-                for(im=0; im<NCL_PER_SUPERCL; im++)
+                for (im = 0; im < NCL_PER_SUPERCL; im++)
                 {
                     /* We're only using the first imask,
                      * but here imei[1].imask is identical.
@@ -203,10 +205,10 @@ nbnxn_kernel_gpu_ref(const nbnxn_pairlist_t     *nbl,
 
                         within_rlist     = FALSE;
                         npair            = 0;
-                        for(ic=0; ic<CL_SIZE; ic++)
+                        for (ic = 0; ic < CL_SIZE; ic++)
                         {
                             ia               = ci*CL_SIZE + ic;
-                    
+
                             is               = ia*nbat->xstride;
                             ifs              = ia*nbat->fstride;
                             ix               = shX + x[is+0];
@@ -214,12 +216,12 @@ nbnxn_kernel_gpu_ref(const nbnxn_pairlist_t     *nbl,
                             iz               = shZ + x[is+2];
                             iq               = facel*x[is+3];
                             nti              = ntype*2*type[ia];
-                    
+
                             fix              = 0;
                             fiy              = 0;
                             fiz              = 0;
 
-                            for(jc=0; jc<CL_SIZE; jc++)
+                            for (jc = 0; jc < CL_SIZE; jc++)
                             {
                                 ja               = cj*CL_SIZE + jc;
 
@@ -228,17 +230,17 @@ nbnxn_kernel_gpu_ref(const nbnxn_pairlist_t     *nbl,
                                 {
                                     continue;
                                 }
-                        
-                                int_bit = ((excl[jc>>2]->pair[(jc & 3)*CL_SIZE+ic] >> (jm*NCL_PER_SUPERCL+im)) & 1); 
+
+                                int_bit = ((excl[jc>>2]->pair[(jc & 3)*CL_SIZE+ic] >> (jm*NCL_PER_SUPERCL+im)) & 1);
 
                                 js               = ja*nbat->xstride;
                                 jfs              = ja*nbat->fstride;
-                                jx               = x[js+0];      
-                                jy               = x[js+1];      
-                                jz               = x[js+2];      
-                                dx               = ix - jx;      
-                                dy               = iy - jy;      
-                                dz               = iz - jz;      
+                                jx               = x[js+0];
+                                jy               = x[js+1];
+                                jz               = x[js+2];
+                                dx               = ix - jx;
+                                dy               = iy - jy;
+                                dz               = iz - jz;
                                 rsq              = dx*dx + dy*dy + dz*dz;
                                 if (rsq < rlist2)
                                 {
@@ -258,9 +260,9 @@ nbnxn_kernel_gpu_ref(const nbnxn_pairlist_t     *nbl,
                                 rsq             += (1.0 - int_bit)*NBNXN_AVOID_SING_R2_INC;
 
                                 rinv             = gmx_invsqrt(rsq);
-                                rinvsq           = rinv*rinv;  
+                                rinvsq           = rinv*rinv;
                                 fscal            = 0;
-                        
+
                                 qq               = iq*x[js+3];
                                 if (!bEwald)
                                 {
@@ -285,7 +287,7 @@ nbnxn_kernel_gpu_ref(const nbnxn_pairlist_t     *nbl,
 
                                     if (bEner)
                                     {
-                                        vcoul = qq*((int_bit - gmx_erf(iconst->ewaldcoeff*r))*rinv - int_bit*iconst->sh_ewald);
+                                        vcoul = qq*((int_bit - gmx_erf(iconst->ewaldcoeff_q*r))*rinv - int_bit*iconst->sh_ewald);
                                     }
                                 }
 
@@ -296,9 +298,9 @@ nbnxn_kernel_gpu_ref(const nbnxn_pairlist_t     *nbl,
                                     /* Vanilla Lennard-Jones cutoff */
                                     c6        = vdwparam[tj];
                                     c12       = vdwparam[tj+1];
-                                
+
                                     rinvsix   = int_bit*rinvsq*rinvsq*rinvsq;
-                                    Vvdw_disp = c6*rinvsix;     
+                                    Vvdw_disp = c6*rinvsix;
                                     Vvdw_rep  = c12*rinvsix*rinvsix;
                                     fscal    += (Vvdw_rep - Vvdw_disp)*rinvsq;
 
@@ -311,7 +313,7 @@ nbnxn_kernel_gpu_ref(const nbnxn_pairlist_t     *nbl,
                                             (Vvdw_disp - int_bit*c6*iconst->sh_invrc6)/6;
                                     }
                                 }
-                                
+
                                 tx        = fscal*dx;
                                 ty        = fscal*dy;
                                 tz        = fscal*dz;
@@ -322,7 +324,7 @@ nbnxn_kernel_gpu_ref(const nbnxn_pairlist_t     *nbl,
                                 f[jfs+1] -= ty;
                                 f[jfs+2] -= tz;
                             }
-                            
+
                             f[ifs+0]        += fix;
                             f[ifs+1]        += fiy;
                             f[ifs+2]        += fiz;
@@ -351,10 +353,10 @@ nbnxn_kernel_gpu_ref(const nbnxn_pairlist_t     *nbl,
                 }
             }
         }
-        
+
         if (bEner)
         {
-            ggid = 0;
+            ggid             = 0;
             Vc[ggid]         = Vc[ggid]   + vctot;
             Vvdw[ggid]       = Vvdw[ggid] + Vvdwtot;
         }
@@ -362,16 +364,16 @@ nbnxn_kernel_gpu_ref(const nbnxn_pairlist_t     *nbl,
 
     if (debug)
     {
-        fprintf(debug,"number of half %dx%d atom pairs: %d after pruning: %d fraction %4.2f\n",
-                nbl->na_ci,nbl->na_ci,
-                nhwu,nhwu_pruned,nhwu_pruned/(double)nhwu);
-        fprintf(debug,"generic kernel pair interactions:            %d\n",
+        fprintf(debug, "number of half %dx%d atom pairs: %d after pruning: %d fraction %4.2f\n",
+                nbl->na_ci, nbl->na_ci,
+                nhwu, nhwu_pruned, nhwu_pruned/(double)nhwu);
+        fprintf(debug, "generic kernel pair interactions:            %d\n",
                 nhwu*nbl->na_ci/2*nbl->na_ci);
-        fprintf(debug,"generic kernel post-prune pair interactions: %d\n",
+        fprintf(debug, "generic kernel post-prune pair interactions: %d\n",
                 nhwu_pruned*nbl->na_ci/2*nbl->na_ci);
-        fprintf(debug,"generic kernel non-zero pair interactions:   %d\n",
+        fprintf(debug, "generic kernel non-zero pair interactions:   %d\n",
                 npair_tot);
-        fprintf(debug,"ratio non-zero/post-prune pair interactions: %4.2f\n",
+        fprintf(debug, "ratio non-zero/post-prune pair interactions: %4.2f\n",
                 npair_tot/(double)(nhwu_pruned*nbl->na_ci/2*nbl->na_ci));
     }
 }