FF = _mm_macc_pd(_mm_macc_pd(twogbeps,H,G),gbeps,Fp);
fgb = _mm_mul_pd(gbqqfactor,_mm_mul_pd(FF,gbscale));
dvdatmp = _mm_mul_pd(minushalf,_mm_macc_pd(fgb,r00,vgb));
+ dvdatmp = _mm_unpacklo_pd(dvdatmp,_mm_setzero_pd());
dvdasum = _mm_add_pd(dvdasum,dvdatmp);
gmx_mm_increment_1real_pd(dvda+jnrA,_mm_mul_pd(dvdatmp,_mm_mul_pd(isaj0,isaj0)));
velec = _mm_mul_pd(qq00,rinv00);
FF = _mm_macc_pd(_mm_macc_pd(twogbeps,H,G),gbeps,Fp);
fgb = _mm_mul_pd(gbqqfactor,_mm_mul_pd(FF,gbscale));
dvdatmp = _mm_mul_pd(minushalf,_mm_macc_pd(fgb,r00,vgb));
+ dvdatmp = _mm_unpacklo_pd(dvdatmp,_mm_setzero_pd());
dvdasum = _mm_add_pd(dvdasum,dvdatmp);
gmx_mm_increment_1real_pd(dvda+jnrA,_mm_mul_pd(dvdatmp,_mm_mul_pd(isaj0,isaj0)));
velec = _mm_mul_pd(qq00,rinv00);
FF = _mm_macc_pd(_mm_macc_pd(twogbeps,H,G),gbeps,Fp);
fgb = _mm_mul_pd(gbqqfactor,_mm_mul_pd(FF,gbscale));
dvdatmp = _mm_mul_pd(minushalf,_mm_macc_pd(fgb,r00,vgb));
+ dvdatmp = _mm_unpacklo_pd(dvdatmp,_mm_setzero_pd());
dvdasum = _mm_add_pd(dvdasum,dvdatmp);
gmx_mm_increment_1real_pd(dvda+jnrA,_mm_mul_pd(dvdatmp,_mm_mul_pd(isaj0,isaj0)));
velec = _mm_mul_pd(qq00,rinv00);
FF = _mm_macc_pd(_mm_macc_pd(twogbeps,H,G),gbeps,Fp);
fgb = _mm_mul_pd(gbqqfactor,_mm_mul_pd(FF,gbscale));
dvdatmp = _mm_mul_pd(minushalf,_mm_macc_pd(fgb,r00,vgb));
+ dvdatmp = _mm_unpacklo_pd(dvdatmp,_mm_setzero_pd());
dvdasum = _mm_add_pd(dvdasum,dvdatmp);
gmx_mm_increment_1real_pd(dvda+jnrA,_mm_mul_pd(dvdatmp,_mm_mul_pd(isaj0,isaj0)));
velec = _mm_mul_pd(qq00,rinv00);
FF = _mm_macc_pd(_mm_macc_pd(twogbeps,H,G),gbeps,Fp);
fgb = _mm_mul_pd(gbqqfactor,_mm_mul_pd(FF,gbscale));
dvdatmp = _mm_mul_pd(minushalf,_mm_macc_pd(fgb,r00,vgb));
+ dvdatmp = _mm_unpacklo_pd(dvdatmp,_mm_setzero_pd());
dvdasum = _mm_add_pd(dvdasum,dvdatmp);
gmx_mm_increment_1real_pd(dvda+jnrA,_mm_mul_pd(dvdatmp,_mm_mul_pd(isaj0,isaj0)));
velec = _mm_mul_pd(qq00,rinv00);
FF = _mm_macc_pd(_mm_macc_pd(twogbeps,H,G),gbeps,Fp);
fgb = _mm_mul_pd(gbqqfactor,_mm_mul_pd(FF,gbscale));
dvdatmp = _mm_mul_pd(minushalf,_mm_macc_pd(fgb,r00,vgb));
+ dvdatmp = _mm_unpacklo_pd(dvdatmp,_mm_setzero_pd());
dvdasum = _mm_add_pd(dvdasum,dvdatmp);
gmx_mm_increment_1real_pd(dvda+jnrA,_mm_mul_pd(dvdatmp,_mm_mul_pd(isaj0,isaj0)));
velec = _mm_mul_pd(qq00,rinv00);
FF = _mm_macc_pd(_mm_macc_pd(twogbeps,H,G),gbeps,Fp);
fgb = _mm_mul_pd(gbqqfactor,_mm_mul_pd(FF,gbscale));
dvdatmp = _mm_mul_pd(minushalf,_mm_macc_pd(fgb,r{I}{J},vgb));
+ /* #if ROUND == 'Epilogue' */
+ dvdatmp = _mm_unpacklo_pd(dvdatmp,_mm_setzero_pd());
+ /* #endif */
dvdasum = _mm_add_pd(dvdasum,dvdatmp);
/* #if ROUND == 'Loop' */
gmx_mm_increment_2real_swizzle_pd(dvda+jnrA,dvda+jnrB,_mm_mul_pd(dvdatmp,_mm_mul_pd(isaj{J},isaj{J})));
FF = _mm_macc_ps(_mm_macc_ps(twogbeps,H,G),gbeps,Fp);
fgb = _mm_mul_ps(gbqqfactor,_mm_mul_ps(FF,gbscale));
dvdatmp = _mm_mul_ps(minushalf,_mm_macc_ps(fgb,r00,vgb));
+ dvdatmp = _mm_andnot_ps(dummy_mask,dvdatmp);
dvdasum = _mm_add_ps(dvdasum,dvdatmp);
/* The pointers to scratch make sure that this code with compilers that take gmx_restrict seriously (e.g. icc 13) really can't screw things up. */
fjptrA = (jnrlistA>=0) ? dvda+jnrA : scratch;
FF = _mm_macc_ps(_mm_macc_ps(twogbeps,H,G),gbeps,Fp);
fgb = _mm_mul_ps(gbqqfactor,_mm_mul_ps(FF,gbscale));
dvdatmp = _mm_mul_ps(minushalf,_mm_macc_ps(fgb,r00,vgb));
+ dvdatmp = _mm_andnot_ps(dummy_mask,dvdatmp);
dvdasum = _mm_add_ps(dvdasum,dvdatmp);
/* The pointers to scratch make sure that this code with compilers that take gmx_restrict seriously (e.g. icc 13) really can't screw things up. */
fjptrA = (jnrlistA>=0) ? dvda+jnrA : scratch;
FF = _mm_macc_ps(_mm_macc_ps(twogbeps,H,G),gbeps,Fp);
fgb = _mm_mul_ps(gbqqfactor,_mm_mul_ps(FF,gbscale));
dvdatmp = _mm_mul_ps(minushalf,_mm_macc_ps(fgb,r00,vgb));
+ dvdatmp = _mm_andnot_ps(dummy_mask,dvdatmp);
dvdasum = _mm_add_ps(dvdasum,dvdatmp);
/* The pointers to scratch make sure that this code with compilers that take gmx_restrict seriously (e.g. icc 13) really can't screw things up. */
fjptrA = (jnrlistA>=0) ? dvda+jnrA : scratch;
FF = _mm_macc_ps(_mm_macc_ps(twogbeps,H,G),gbeps,Fp);
fgb = _mm_mul_ps(gbqqfactor,_mm_mul_ps(FF,gbscale));
dvdatmp = _mm_mul_ps(minushalf,_mm_macc_ps(fgb,r00,vgb));
+ dvdatmp = _mm_andnot_ps(dummy_mask,dvdatmp);
dvdasum = _mm_add_ps(dvdasum,dvdatmp);
/* The pointers to scratch make sure that this code with compilers that take gmx_restrict seriously (e.g. icc 13) really can't screw things up. */
fjptrA = (jnrlistA>=0) ? dvda+jnrA : scratch;
FF = _mm_macc_ps(_mm_macc_ps(twogbeps,H,G),gbeps,Fp);
fgb = _mm_mul_ps(gbqqfactor,_mm_mul_ps(FF,gbscale));
dvdatmp = _mm_mul_ps(minushalf,_mm_macc_ps(fgb,r00,vgb));
+ dvdatmp = _mm_andnot_ps(dummy_mask,dvdatmp);
dvdasum = _mm_add_ps(dvdasum,dvdatmp);
/* The pointers to scratch make sure that this code with compilers that take gmx_restrict seriously (e.g. icc 13) really can't screw things up. */
fjptrA = (jnrlistA>=0) ? dvda+jnrA : scratch;
FF = _mm_macc_ps(_mm_macc_ps(twogbeps,H,G),gbeps,Fp);
fgb = _mm_mul_ps(gbqqfactor,_mm_mul_ps(FF,gbscale));
dvdatmp = _mm_mul_ps(minushalf,_mm_macc_ps(fgb,r00,vgb));
+ dvdatmp = _mm_andnot_ps(dummy_mask,dvdatmp);
dvdasum = _mm_add_ps(dvdasum,dvdatmp);
/* The pointers to scratch make sure that this code with compilers that take gmx_restrict seriously (e.g. icc 13) really can't screw things up. */
fjptrA = (jnrlistA>=0) ? dvda+jnrA : scratch;
FF = _mm_macc_ps(_mm_macc_ps(twogbeps,H,G),gbeps,Fp);
fgb = _mm_mul_ps(gbqqfactor,_mm_mul_ps(FF,gbscale));
dvdatmp = _mm_mul_ps(minushalf,_mm_macc_ps(fgb,r{I}{J},vgb));
+ /* #if ROUND == 'Epilogue' */
+ dvdatmp = _mm_andnot_ps(dummy_mask,dvdatmp);
+ /* #endif */
dvdasum = _mm_add_ps(dvdasum,dvdatmp);
/* #if ROUND == 'Loop' */
fjptrA = dvda+jnrA;
FF = _mm256_add_pd(Fp,_mm256_mul_pd(gbeps,_mm256_add_pd(G,_mm256_add_pd(Heps,Heps))));
fgb = _mm256_mul_pd(gbqqfactor,_mm256_mul_pd(FF,gbscale));
dvdatmp = _mm256_mul_pd(minushalf,_mm256_add_pd(vgb,_mm256_mul_pd(fgb,r00)));
+ dvdatmp = _mm256_andnot_ps(dummy_mask,dvdatmp);
dvdasum = _mm256_add_pd(dvdasum,dvdatmp);
/* The pointers to scratch make sure that this code with compilers that take gmx_restrict seriously (e.g. icc 13) really can't screw things up. */
fjptrA = (jnrlistA>=0) ? dvda+jnrA : scratch;
FF = _mm256_add_pd(Fp,_mm256_mul_pd(gbeps,_mm256_add_pd(G,_mm256_add_pd(Heps,Heps))));
fgb = _mm256_mul_pd(gbqqfactor,_mm256_mul_pd(FF,gbscale));
dvdatmp = _mm256_mul_pd(minushalf,_mm256_add_pd(vgb,_mm256_mul_pd(fgb,r00)));
+ dvdatmp = _mm256_andnot_ps(dummy_mask,dvdatmp);
dvdasum = _mm256_add_pd(dvdasum,dvdatmp);
/* The pointers to scratch make sure that this code with compilers that take gmx_restrict seriously (e.g. icc 13) really can't screw things up. */
fjptrA = (jnrlistA>=0) ? dvda+jnrA : scratch;
FF = _mm256_add_pd(Fp,_mm256_mul_pd(gbeps,_mm256_add_pd(G,_mm256_add_pd(Heps,Heps))));
fgb = _mm256_mul_pd(gbqqfactor,_mm256_mul_pd(FF,gbscale));
dvdatmp = _mm256_mul_pd(minushalf,_mm256_add_pd(vgb,_mm256_mul_pd(fgb,r00)));
+ dvdatmp = _mm256_andnot_ps(dummy_mask,dvdatmp);
dvdasum = _mm256_add_pd(dvdasum,dvdatmp);
/* The pointers to scratch make sure that this code with compilers that take gmx_restrict seriously (e.g. icc 13) really can't screw things up. */
fjptrA = (jnrlistA>=0) ? dvda+jnrA : scratch;
FF = _mm256_add_pd(Fp,_mm256_mul_pd(gbeps,_mm256_add_pd(G,_mm256_add_pd(Heps,Heps))));
fgb = _mm256_mul_pd(gbqqfactor,_mm256_mul_pd(FF,gbscale));
dvdatmp = _mm256_mul_pd(minushalf,_mm256_add_pd(vgb,_mm256_mul_pd(fgb,r00)));
+ dvdatmp = _mm256_andnot_ps(dummy_mask,dvdatmp);
dvdasum = _mm256_add_pd(dvdasum,dvdatmp);
/* The pointers to scratch make sure that this code with compilers that take gmx_restrict seriously (e.g. icc 13) really can't screw things up. */
fjptrA = (jnrlistA>=0) ? dvda+jnrA : scratch;
FF = _mm256_add_pd(Fp,_mm256_mul_pd(gbeps,_mm256_add_pd(G,_mm256_add_pd(Heps,Heps))));
fgb = _mm256_mul_pd(gbqqfactor,_mm256_mul_pd(FF,gbscale));
dvdatmp = _mm256_mul_pd(minushalf,_mm256_add_pd(vgb,_mm256_mul_pd(fgb,r00)));
+ dvdatmp = _mm256_andnot_ps(dummy_mask,dvdatmp);
dvdasum = _mm256_add_pd(dvdasum,dvdatmp);
/* The pointers to scratch make sure that this code with compilers that take gmx_restrict seriously (e.g. icc 13) really can't screw things up. */
fjptrA = (jnrlistA>=0) ? dvda+jnrA : scratch;
FF = _mm256_add_pd(Fp,_mm256_mul_pd(gbeps,_mm256_add_pd(G,_mm256_add_pd(Heps,Heps))));
fgb = _mm256_mul_pd(gbqqfactor,_mm256_mul_pd(FF,gbscale));
dvdatmp = _mm256_mul_pd(minushalf,_mm256_add_pd(vgb,_mm256_mul_pd(fgb,r00)));
+ dvdatmp = _mm256_andnot_ps(dummy_mask,dvdatmp);
dvdasum = _mm256_add_pd(dvdasum,dvdatmp);
/* The pointers to scratch make sure that this code with compilers that take gmx_restrict seriously (e.g. icc 13) really can't screw things up. */
fjptrA = (jnrlistA>=0) ? dvda+jnrA : scratch;
FF = _mm256_add_pd(Fp,_mm256_mul_pd(gbeps,_mm256_add_pd(G,_mm256_add_pd(Heps,Heps))));
fgb = _mm256_mul_pd(gbqqfactor,_mm256_mul_pd(FF,gbscale));
dvdatmp = _mm256_mul_pd(minushalf,_mm256_add_pd(vgb,_mm256_mul_pd(fgb,r{I}{J})));
+ /* #if ROUND == 'Epilogue' */
+ dvdatmp = _mm256_andnot_ps(dummy_mask,dvdatmp);
+ /* #endif */
dvdasum = _mm256_add_pd(dvdasum,dvdatmp);
/* #if ROUND == 'Loop' */
fjptrA = dvda+jnrA;
FF = _mm256_add_ps(Fp,_mm256_mul_ps(gbeps,_mm256_add_ps(G,_mm256_add_ps(Heps,Heps))));
fgb = _mm256_mul_ps(gbqqfactor,_mm256_mul_ps(FF,gbscale));
dvdatmp = _mm256_mul_ps(minushalf,_mm256_add_ps(vgb,_mm256_mul_ps(fgb,r00)));
+ dvdatmp = _mm256_andnot_ps(dummy_mask,dvdatmp);
dvdasum = _mm256_add_ps(dvdasum,dvdatmp);
/* The pointers to scratch make sure that this code with compilers that take gmx_restrict seriously (e.g. icc 13) really can't screw things up. */
fjptrA = (jnrlistA>=0) ? dvda+jnrA : scratch;
FF = _mm256_add_ps(Fp,_mm256_mul_ps(gbeps,_mm256_add_ps(G,_mm256_add_ps(Heps,Heps))));
fgb = _mm256_mul_ps(gbqqfactor,_mm256_mul_ps(FF,gbscale));
dvdatmp = _mm256_mul_ps(minushalf,_mm256_add_ps(vgb,_mm256_mul_ps(fgb,r00)));
+ dvdatmp = _mm256_andnot_ps(dummy_mask,dvdatmp);
dvdasum = _mm256_add_ps(dvdasum,dvdatmp);
/* The pointers to scratch make sure that this code with compilers that take gmx_restrict seriously (e.g. icc 13) really can't screw things up. */
fjptrA = (jnrlistA>=0) ? dvda+jnrA : scratch;
FF = _mm256_add_ps(Fp,_mm256_mul_ps(gbeps,_mm256_add_ps(G,_mm256_add_ps(Heps,Heps))));
fgb = _mm256_mul_ps(gbqqfactor,_mm256_mul_ps(FF,gbscale));
dvdatmp = _mm256_mul_ps(minushalf,_mm256_add_ps(vgb,_mm256_mul_ps(fgb,r00)));
+ dvdatmp = _mm256_andnot_ps(dummy_mask,dvdatmp);
dvdasum = _mm256_add_ps(dvdasum,dvdatmp);
/* The pointers to scratch make sure that this code with compilers that take gmx_restrict seriously (e.g. icc 13) really can't screw things up. */
fjptrA = (jnrlistA>=0) ? dvda+jnrA : scratch;
FF = _mm256_add_ps(Fp,_mm256_mul_ps(gbeps,_mm256_add_ps(G,_mm256_add_ps(Heps,Heps))));
fgb = _mm256_mul_ps(gbqqfactor,_mm256_mul_ps(FF,gbscale));
dvdatmp = _mm256_mul_ps(minushalf,_mm256_add_ps(vgb,_mm256_mul_ps(fgb,r00)));
+ dvdatmp = _mm256_andnot_ps(dummy_mask,dvdatmp);
dvdasum = _mm256_add_ps(dvdasum,dvdatmp);
/* The pointers to scratch make sure that this code with compilers that take gmx_restrict seriously (e.g. icc 13) really can't screw things up. */
fjptrA = (jnrlistA>=0) ? dvda+jnrA : scratch;
FF = _mm256_add_ps(Fp,_mm256_mul_ps(gbeps,_mm256_add_ps(G,_mm256_add_ps(Heps,Heps))));
fgb = _mm256_mul_ps(gbqqfactor,_mm256_mul_ps(FF,gbscale));
dvdatmp = _mm256_mul_ps(minushalf,_mm256_add_ps(vgb,_mm256_mul_ps(fgb,r00)));
+ dvdatmp = _mm256_andnot_ps(dummy_mask,dvdatmp);
dvdasum = _mm256_add_ps(dvdasum,dvdatmp);
/* The pointers to scratch make sure that this code with compilers that take gmx_restrict seriously (e.g. icc 13) really can't screw things up. */
fjptrA = (jnrlistA>=0) ? dvda+jnrA : scratch;
FF = _mm256_add_ps(Fp,_mm256_mul_ps(gbeps,_mm256_add_ps(G,_mm256_add_ps(Heps,Heps))));
fgb = _mm256_mul_ps(gbqqfactor,_mm256_mul_ps(FF,gbscale));
dvdatmp = _mm256_mul_ps(minushalf,_mm256_add_ps(vgb,_mm256_mul_ps(fgb,r00)));
+ dvdatmp = _mm256_andnot_ps(dummy_mask,dvdatmp);
dvdasum = _mm256_add_ps(dvdasum,dvdatmp);
/* The pointers to scratch make sure that this code with compilers that take gmx_restrict seriously (e.g. icc 13) really can't screw things up. */
fjptrA = (jnrlistA>=0) ? dvda+jnrA : scratch;
FF = _mm256_add_ps(Fp,_mm256_mul_ps(gbeps,_mm256_add_ps(G,_mm256_add_ps(Heps,Heps))));
fgb = _mm256_mul_ps(gbqqfactor,_mm256_mul_ps(FF,gbscale));
dvdatmp = _mm256_mul_ps(minushalf,_mm256_add_ps(vgb,_mm256_mul_ps(fgb,r{I}{J})));
+ /* #if ROUND == 'Epilogue' */
+ dvdatmp = _mm256_andnot_ps(dummy_mask,dvdatmp);
+ /* #endif */
dvdasum = _mm256_add_ps(dvdasum,dvdatmp);
/* #if ROUND == 'Loop' */
fjptrA = dvda+jnrA;
vgbsum = 0.0;
vvdwsum = 0.0;
dvdasum = 0.0;
-
+ printf("inr=%d\n",inr);
/* Start inner kernel loop */
for(jidx=j_index_start; jidx<j_index_end; jidx++)
{
FF = Fp+Geps+2.0*Heps2;
fgb = gbqqfactor*FF*gbscale;
+ printf(" jnr=%d fgb=%g\n",jnr,fgb);
dvdatmp = -0.5*(vgb+fgb*r00);
dvdasum = dvdasum + dvdatmp;
+ printf(" dvdatmp=%g\n",dvdatmp);
dvda[jnr] = dvdaj+dvdatmp*isaj0*isaj0;
+ printf(" dvda, jcontrib=%g\n",dvdatmp*isaj0*isaj0);
velec = qq00*rinv00;
felec = (velec*rinv00-fgb)*rinv00;
FF = _mm_add_pd(Fp,_mm_mul_pd(gbeps,_mm_add_pd(G,_mm_add_pd(Heps,Heps))));
fgb = _mm_mul_pd(gbqqfactor,_mm_mul_pd(FF,gbscale));
dvdatmp = _mm_mul_pd(minushalf,_mm_add_pd(vgb,_mm_mul_pd(fgb,r00)));
+ dvdatmp = _mm_unpacklo_pd(dvdatmp,_mm_setzero_pd());
dvdasum = _mm_add_pd(dvdasum,dvdatmp);
gmx_mm_increment_1real_pd(dvda+jnrA,_mm_mul_pd(dvdatmp,_mm_mul_pd(isaj0,isaj0)));
velec = _mm_mul_pd(qq00,rinv00);
FF = _mm_add_pd(Fp,_mm_mul_pd(gbeps,_mm_add_pd(G,_mm_add_pd(Heps,Heps))));
fgb = _mm_mul_pd(gbqqfactor,_mm_mul_pd(FF,gbscale));
dvdatmp = _mm_mul_pd(minushalf,_mm_add_pd(vgb,_mm_mul_pd(fgb,r00)));
+ dvdatmp = _mm_unpacklo_pd(dvdatmp,_mm_setzero_pd());
dvdasum = _mm_add_pd(dvdasum,dvdatmp);
gmx_mm_increment_1real_pd(dvda+jnrA,_mm_mul_pd(dvdatmp,_mm_mul_pd(isaj0,isaj0)));
velec = _mm_mul_pd(qq00,rinv00);
FF = _mm_add_pd(Fp,_mm_mul_pd(gbeps,_mm_add_pd(G,_mm_add_pd(Heps,Heps))));
fgb = _mm_mul_pd(gbqqfactor,_mm_mul_pd(FF,gbscale));
dvdatmp = _mm_mul_pd(minushalf,_mm_add_pd(vgb,_mm_mul_pd(fgb,r00)));
+ dvdatmp = _mm_unpacklo_pd(dvdatmp,_mm_setzero_pd());
dvdasum = _mm_add_pd(dvdasum,dvdatmp);
gmx_mm_increment_1real_pd(dvda+jnrA,_mm_mul_pd(dvdatmp,_mm_mul_pd(isaj0,isaj0)));
velec = _mm_mul_pd(qq00,rinv00);
FF = _mm_add_pd(Fp,_mm_mul_pd(gbeps,_mm_add_pd(G,_mm_add_pd(Heps,Heps))));
fgb = _mm_mul_pd(gbqqfactor,_mm_mul_pd(FF,gbscale));
dvdatmp = _mm_mul_pd(minushalf,_mm_add_pd(vgb,_mm_mul_pd(fgb,r00)));
+ dvdatmp = _mm_unpacklo_pd(dvdatmp,_mm_setzero_pd());
dvdasum = _mm_add_pd(dvdasum,dvdatmp);
gmx_mm_increment_1real_pd(dvda+jnrA,_mm_mul_pd(dvdatmp,_mm_mul_pd(isaj0,isaj0)));
velec = _mm_mul_pd(qq00,rinv00);
FF = _mm_add_pd(Fp,_mm_mul_pd(gbeps,_mm_add_pd(G,_mm_add_pd(Heps,Heps))));
fgb = _mm_mul_pd(gbqqfactor,_mm_mul_pd(FF,gbscale));
dvdatmp = _mm_mul_pd(minushalf,_mm_add_pd(vgb,_mm_mul_pd(fgb,r00)));
+ dvdatmp = _mm_unpacklo_pd(dvdatmp,_mm_setzero_pd());
dvdasum = _mm_add_pd(dvdasum,dvdatmp);
gmx_mm_increment_1real_pd(dvda+jnrA,_mm_mul_pd(dvdatmp,_mm_mul_pd(isaj0,isaj0)));
velec = _mm_mul_pd(qq00,rinv00);
FF = _mm_add_pd(Fp,_mm_mul_pd(gbeps,_mm_add_pd(G,_mm_add_pd(Heps,Heps))));
fgb = _mm_mul_pd(gbqqfactor,_mm_mul_pd(FF,gbscale));
dvdatmp = _mm_mul_pd(minushalf,_mm_add_pd(vgb,_mm_mul_pd(fgb,r00)));
+ dvdatmp = _mm_unpacklo_pd(dvdatmp,_mm_setzero_pd());
dvdasum = _mm_add_pd(dvdasum,dvdatmp);
gmx_mm_increment_1real_pd(dvda+jnrA,_mm_mul_pd(dvdatmp,_mm_mul_pd(isaj0,isaj0)));
velec = _mm_mul_pd(qq00,rinv00);
FF = _mm_add_pd(Fp,_mm_mul_pd(gbeps,_mm_add_pd(G,_mm_add_pd(Heps,Heps))));
fgb = _mm_mul_pd(gbqqfactor,_mm_mul_pd(FF,gbscale));
dvdatmp = _mm_mul_pd(minushalf,_mm_add_pd(vgb,_mm_mul_pd(fgb,r{I}{J})));
+ /* #if ROUND == 'Epilogue' */
+ dvdatmp = _mm_unpacklo_pd(dvdatmp,_mm_setzero_pd());
+ /* #endif */
dvdasum = _mm_add_pd(dvdasum,dvdatmp);
/* #if ROUND == 'Loop' */
gmx_mm_increment_2real_swizzle_pd(dvda+jnrA,dvda+jnrB,_mm_mul_pd(dvdatmp,_mm_mul_pd(isaj{J},isaj{J})));
FF = _mm_add_ps(Fp,_mm_mul_ps(gbeps,_mm_add_ps(G,_mm_add_ps(Heps,Heps))));
fgb = _mm_mul_ps(gbqqfactor,_mm_mul_ps(FF,gbscale));
dvdatmp = _mm_mul_ps(minushalf,_mm_add_ps(vgb,_mm_mul_ps(fgb,r00)));
+ dvdatmp = _mm_andnot_ps(dummy_mask,dvdatmp);
dvdasum = _mm_add_ps(dvdasum,dvdatmp);
/* The pointers to scratch make sure that this code with compilers that take gmx_restrict seriously (e.g. icc 13) really can't screw things up. */
fjptrA = (jnrlistA>=0) ? dvda+jnrA : scratch;
FF = _mm_add_ps(Fp,_mm_mul_ps(gbeps,_mm_add_ps(G,_mm_add_ps(Heps,Heps))));
fgb = _mm_mul_ps(gbqqfactor,_mm_mul_ps(FF,gbscale));
dvdatmp = _mm_mul_ps(minushalf,_mm_add_ps(vgb,_mm_mul_ps(fgb,r00)));
+ dvdatmp = _mm_andnot_ps(dummy_mask,dvdatmp);
dvdasum = _mm_add_ps(dvdasum,dvdatmp);
/* The pointers to scratch make sure that this code with compilers that take gmx_restrict seriously (e.g. icc 13) really can't screw things up. */
fjptrA = (jnrlistA>=0) ? dvda+jnrA : scratch;
FF = _mm_add_ps(Fp,_mm_mul_ps(gbeps,_mm_add_ps(G,_mm_add_ps(Heps,Heps))));
fgb = _mm_mul_ps(gbqqfactor,_mm_mul_ps(FF,gbscale));
dvdatmp = _mm_mul_ps(minushalf,_mm_add_ps(vgb,_mm_mul_ps(fgb,r00)));
+ dvdatmp = _mm_andnot_ps(dummy_mask,dvdatmp);
dvdasum = _mm_add_ps(dvdasum,dvdatmp);
/* The pointers to scratch make sure that this code with compilers that take gmx_restrict seriously (e.g. icc 13) really can't screw things up. */
fjptrA = (jnrlistA>=0) ? dvda+jnrA : scratch;
FF = _mm_add_ps(Fp,_mm_mul_ps(gbeps,_mm_add_ps(G,_mm_add_ps(Heps,Heps))));
fgb = _mm_mul_ps(gbqqfactor,_mm_mul_ps(FF,gbscale));
dvdatmp = _mm_mul_ps(minushalf,_mm_add_ps(vgb,_mm_mul_ps(fgb,r00)));
+ dvdatmp = _mm_andnot_ps(dummy_mask,dvdatmp);
dvdasum = _mm_add_ps(dvdasum,dvdatmp);
/* The pointers to scratch make sure that this code with compilers that take gmx_restrict seriously (e.g. icc 13) really can't screw things up. */
fjptrA = (jnrlistA>=0) ? dvda+jnrA : scratch;
FF = _mm_add_ps(Fp,_mm_mul_ps(gbeps,_mm_add_ps(G,_mm_add_ps(Heps,Heps))));
fgb = _mm_mul_ps(gbqqfactor,_mm_mul_ps(FF,gbscale));
dvdatmp = _mm_mul_ps(minushalf,_mm_add_ps(vgb,_mm_mul_ps(fgb,r00)));
+ dvdatmp = _mm_andnot_ps(dummy_mask,dvdatmp);
dvdasum = _mm_add_ps(dvdasum,dvdatmp);
/* The pointers to scratch make sure that this code with compilers that take gmx_restrict seriously (e.g. icc 13) really can't screw things up. */
fjptrA = (jnrlistA>=0) ? dvda+jnrA : scratch;
FF = _mm_add_ps(Fp,_mm_mul_ps(gbeps,_mm_add_ps(G,_mm_add_ps(Heps,Heps))));
fgb = _mm_mul_ps(gbqqfactor,_mm_mul_ps(FF,gbscale));
dvdatmp = _mm_mul_ps(minushalf,_mm_add_ps(vgb,_mm_mul_ps(fgb,r00)));
+ dvdatmp = _mm_andnot_ps(dummy_mask,dvdatmp);
dvdasum = _mm_add_ps(dvdasum,dvdatmp);
/* The pointers to scratch make sure that this code with compilers that take gmx_restrict seriously (e.g. icc 13) really can't screw things up. */
fjptrA = (jnrlistA>=0) ? dvda+jnrA : scratch;
FF = _mm_add_ps(Fp,_mm_mul_ps(gbeps,_mm_add_ps(G,_mm_add_ps(Heps,Heps))));
fgb = _mm_mul_ps(gbqqfactor,_mm_mul_ps(FF,gbscale));
dvdatmp = _mm_mul_ps(minushalf,_mm_add_ps(vgb,_mm_mul_ps(fgb,r{I}{J})));
+ /* #if ROUND == 'Epilogue' */
+ dvdatmp = _mm_andnot_ps(dummy_mask,dvdatmp);
+ /* #endif */
dvdasum = _mm_add_ps(dvdasum,dvdatmp);
/* #if ROUND == 'Loop' */
fjptrA = dvda+jnrA;
FF = _mm_add_pd(Fp,_mm_mul_pd(gbeps,_mm_add_pd(G,_mm_add_pd(Heps,Heps))));
fgb = _mm_mul_pd(gbqqfactor,_mm_mul_pd(FF,gbscale));
dvdatmp = _mm_mul_pd(minushalf,_mm_add_pd(vgb,_mm_mul_pd(fgb,r00)));
+ dvdatmp = _mm_unpacklo_pd(dvdatmp,_mm_setzero_pd());
dvdasum = _mm_add_pd(dvdasum,dvdatmp);
gmx_mm_increment_1real_pd(dvda+jnrA,_mm_mul_pd(dvdatmp,_mm_mul_pd(isaj0,isaj0)));
velec = _mm_mul_pd(qq00,rinv00);
FF = _mm_add_pd(Fp,_mm_mul_pd(gbeps,_mm_add_pd(G,_mm_add_pd(Heps,Heps))));
fgb = _mm_mul_pd(gbqqfactor,_mm_mul_pd(FF,gbscale));
dvdatmp = _mm_mul_pd(minushalf,_mm_add_pd(vgb,_mm_mul_pd(fgb,r00)));
+ dvdatmp = _mm_unpacklo_pd(dvdatmp,_mm_setzero_pd());
dvdasum = _mm_add_pd(dvdasum,dvdatmp);
gmx_mm_increment_1real_pd(dvda+jnrA,_mm_mul_pd(dvdatmp,_mm_mul_pd(isaj0,isaj0)));
velec = _mm_mul_pd(qq00,rinv00);
FF = _mm_add_pd(Fp,_mm_mul_pd(gbeps,_mm_add_pd(G,_mm_add_pd(Heps,Heps))));
fgb = _mm_mul_pd(gbqqfactor,_mm_mul_pd(FF,gbscale));
dvdatmp = _mm_mul_pd(minushalf,_mm_add_pd(vgb,_mm_mul_pd(fgb,r00)));
+ dvdatmp = _mm_unpacklo_pd(dvdatmp,_mm_setzero_pd());
dvdasum = _mm_add_pd(dvdasum,dvdatmp);
gmx_mm_increment_1real_pd(dvda+jnrA,_mm_mul_pd(dvdatmp,_mm_mul_pd(isaj0,isaj0)));
velec = _mm_mul_pd(qq00,rinv00);
FF = _mm_add_pd(Fp,_mm_mul_pd(gbeps,_mm_add_pd(G,_mm_add_pd(Heps,Heps))));
fgb = _mm_mul_pd(gbqqfactor,_mm_mul_pd(FF,gbscale));
dvdatmp = _mm_mul_pd(minushalf,_mm_add_pd(vgb,_mm_mul_pd(fgb,r00)));
+ dvdatmp = _mm_unpacklo_pd(dvdatmp,_mm_setzero_pd());
dvdasum = _mm_add_pd(dvdasum,dvdatmp);
gmx_mm_increment_1real_pd(dvda+jnrA,_mm_mul_pd(dvdatmp,_mm_mul_pd(isaj0,isaj0)));
velec = _mm_mul_pd(qq00,rinv00);
FF = _mm_add_pd(Fp,_mm_mul_pd(gbeps,_mm_add_pd(G,_mm_add_pd(Heps,Heps))));
fgb = _mm_mul_pd(gbqqfactor,_mm_mul_pd(FF,gbscale));
dvdatmp = _mm_mul_pd(minushalf,_mm_add_pd(vgb,_mm_mul_pd(fgb,r00)));
+ dvdatmp = _mm_unpacklo_pd(dvdatmp,_mm_setzero_pd());
dvdasum = _mm_add_pd(dvdasum,dvdatmp);
gmx_mm_increment_1real_pd(dvda+jnrA,_mm_mul_pd(dvdatmp,_mm_mul_pd(isaj0,isaj0)));
velec = _mm_mul_pd(qq00,rinv00);
FF = _mm_add_pd(Fp,_mm_mul_pd(gbeps,_mm_add_pd(G,_mm_add_pd(Heps,Heps))));
fgb = _mm_mul_pd(gbqqfactor,_mm_mul_pd(FF,gbscale));
dvdatmp = _mm_mul_pd(minushalf,_mm_add_pd(vgb,_mm_mul_pd(fgb,r00)));
+ dvdatmp = _mm_unpacklo_pd(dvdatmp,_mm_setzero_pd());
dvdasum = _mm_add_pd(dvdasum,dvdatmp);
gmx_mm_increment_1real_pd(dvda+jnrA,_mm_mul_pd(dvdatmp,_mm_mul_pd(isaj0,isaj0)));
velec = _mm_mul_pd(qq00,rinv00);
FF = _mm_add_pd(Fp,_mm_mul_pd(gbeps,_mm_add_pd(G,_mm_add_pd(Heps,Heps))));
fgb = _mm_mul_pd(gbqqfactor,_mm_mul_pd(FF,gbscale));
dvdatmp = _mm_mul_pd(minushalf,_mm_add_pd(vgb,_mm_mul_pd(fgb,r{I}{J})));
+ /* #if ROUND == 'Epilogue' */
+ dvdatmp = _mm_unpacklo_pd(dvdatmp,_mm_setzero_pd());
+ /* #endif */
dvdasum = _mm_add_pd(dvdasum,dvdatmp);
/* #if ROUND == 'Loop' */
gmx_mm_increment_2real_swizzle_pd(dvda+jnrA,dvda+jnrB,_mm_mul_pd(dvdatmp,_mm_mul_pd(isaj{J},isaj{J})));
FF = _mm_add_ps(Fp,_mm_mul_ps(gbeps,_mm_add_ps(G,_mm_add_ps(Heps,Heps))));
fgb = _mm_mul_ps(gbqqfactor,_mm_mul_ps(FF,gbscale));
dvdatmp = _mm_mul_ps(minushalf,_mm_add_ps(vgb,_mm_mul_ps(fgb,r00)));
+ dvdatmp = _mm_andnot_ps(dummy_mask,dvdatmp);
dvdasum = _mm_add_ps(dvdasum,dvdatmp);
/* The pointers to scratch make sure that this code with compilers that take gmx_restrict seriously (e.g. icc 13) really can't screw things up. */
fjptrA = (jnrlistA>=0) ? dvda+jnrA : scratch;
FF = _mm_add_ps(Fp,_mm_mul_ps(gbeps,_mm_add_ps(G,_mm_add_ps(Heps,Heps))));
fgb = _mm_mul_ps(gbqqfactor,_mm_mul_ps(FF,gbscale));
dvdatmp = _mm_mul_ps(minushalf,_mm_add_ps(vgb,_mm_mul_ps(fgb,r00)));
+ dvdatmp = _mm_andnot_ps(dummy_mask,dvdatmp);
dvdasum = _mm_add_ps(dvdasum,dvdatmp);
/* The pointers to scratch make sure that this code with compilers that take gmx_restrict seriously (e.g. icc 13) really can't screw things up. */
fjptrA = (jnrlistA>=0) ? dvda+jnrA : scratch;
FF = _mm_add_ps(Fp,_mm_mul_ps(gbeps,_mm_add_ps(G,_mm_add_ps(Heps,Heps))));
fgb = _mm_mul_ps(gbqqfactor,_mm_mul_ps(FF,gbscale));
dvdatmp = _mm_mul_ps(minushalf,_mm_add_ps(vgb,_mm_mul_ps(fgb,r00)));
+ dvdatmp = _mm_andnot_ps(dummy_mask,dvdatmp);
dvdasum = _mm_add_ps(dvdasum,dvdatmp);
/* The pointers to scratch make sure that this code with compilers that take gmx_restrict seriously (e.g. icc 13) really can't screw things up. */
fjptrA = (jnrlistA>=0) ? dvda+jnrA : scratch;
FF = _mm_add_ps(Fp,_mm_mul_ps(gbeps,_mm_add_ps(G,_mm_add_ps(Heps,Heps))));
fgb = _mm_mul_ps(gbqqfactor,_mm_mul_ps(FF,gbscale));
dvdatmp = _mm_mul_ps(minushalf,_mm_add_ps(vgb,_mm_mul_ps(fgb,r00)));
+ dvdatmp = _mm_andnot_ps(dummy_mask,dvdatmp);
dvdasum = _mm_add_ps(dvdasum,dvdatmp);
/* The pointers to scratch make sure that this code with compilers that take gmx_restrict seriously (e.g. icc 13) really can't screw things up. */
fjptrA = (jnrlistA>=0) ? dvda+jnrA : scratch;
FF = _mm_add_ps(Fp,_mm_mul_ps(gbeps,_mm_add_ps(G,_mm_add_ps(Heps,Heps))));
fgb = _mm_mul_ps(gbqqfactor,_mm_mul_ps(FF,gbscale));
dvdatmp = _mm_mul_ps(minushalf,_mm_add_ps(vgb,_mm_mul_ps(fgb,r00)));
+ dvdatmp = _mm_andnot_ps(dummy_mask,dvdatmp);
dvdasum = _mm_add_ps(dvdasum,dvdatmp);
/* The pointers to scratch make sure that this code with compilers that take gmx_restrict seriously (e.g. icc 13) really can't screw things up. */
fjptrA = (jnrlistA>=0) ? dvda+jnrA : scratch;
FF = _mm_add_ps(Fp,_mm_mul_ps(gbeps,_mm_add_ps(G,_mm_add_ps(Heps,Heps))));
fgb = _mm_mul_ps(gbqqfactor,_mm_mul_ps(FF,gbscale));
dvdatmp = _mm_mul_ps(minushalf,_mm_add_ps(vgb,_mm_mul_ps(fgb,r00)));
+ dvdatmp = _mm_andnot_ps(dummy_mask,dvdatmp);
dvdasum = _mm_add_ps(dvdasum,dvdatmp);
/* The pointers to scratch make sure that this code with compilers that take gmx_restrict seriously (e.g. icc 13) really can't screw things up. */
fjptrA = (jnrlistA>=0) ? dvda+jnrA : scratch;
FF = _mm_add_ps(Fp,_mm_mul_ps(gbeps,_mm_add_ps(G,_mm_add_ps(Heps,Heps))));
fgb = _mm_mul_ps(gbqqfactor,_mm_mul_ps(FF,gbscale));
dvdatmp = _mm_mul_ps(minushalf,_mm_add_ps(vgb,_mm_mul_ps(fgb,r{I}{J})));
+ /* #if ROUND == 'Epilogue' */
+ dvdatmp = _mm_andnot_ps(dummy_mask,dvdatmp);
+ /* #endif */
dvdasum = _mm_add_ps(dvdasum,dvdatmp);
/* #if ROUND == 'Loop' */
fjptrA = dvda+jnrA;
fr->bMolPBC = dd_bonded_molpbc(cr->dd,fr->ePBC);
}
}
+ fr->bGB = (ir->implicit_solvent == eisGBSA);
fr->rc_scaling = ir->refcoord_scaling;
copy_rvec(ir->posres_com,fr->posres_com);
switch(fr->eeltype)
{
case eelCUT:
- fr->nbkernel_elec_interaction = GMX_NBKERNEL_ELEC_COULOMB;
+ fr->nbkernel_elec_interaction = (fr->bGB) ? GMX_NBKERNEL_ELEC_GENERALIZEDBORN : GMX_NBKERNEL_ELEC_COULOMB;
break;
case eelRF:
set_bham_b_max(fp,fr,mtop);
}
- fr->bGB = (ir->implicit_solvent == eisGBSA);
fr->gb_epsilon_solvent = ir->gb_epsilon_solvent;
/* Copy the GBSA data (radius, volume and surftens for each
real rinv,idr2,idr6,vaj,dccf,cosq,sinq,prod,gpi2;
real factor;
real vai, prod_ai, icf4,icf6;
-
+
factor = 0.5*ONE_4PI_EPS0;
n = 0;
-
+
for(i=0;i<born->nr;i++)
{
born->gpol_still_work[i]=0;
}
-
- for(i=0;i<nl->nri;i++ )
+
+ for(i=0;i<nl->nri;i++ )
{
ai = nl->iinr[i];
-
+
nj0 = nl->jindex[i];
nj1 = nl->jindex[i+1];
-
+
/* Load shifts for this list */
shift = nl->shift[i];
shX = fr->shift_vec[shift][0];
ix1 = shX + x[ai][0];
iy1 = shY + x[ai][1];
iz1 = shZ + x[ai][2];
-
- for(k=nj0;k<nj1;k++)
+
+ for(k=nj0;k<nj1 && nl->jjnr[k]>=0;k++)
{
aj = nl->jjnr[k];
jx1 = x[aj][0];
ratio = dr2 / (rvdw * rvdw);
vaj = born->vsolv[aj];
- if(ratio>STILL_P5INV)
+ if(ratio>STILL_P5INV)
{
ccf=1.0;
dccf=0.0;
prod = STILL_P4*vaj;
icf4 = ccf*idr4;
icf6 = (4*ccf-dccf)*idr6;
-
born->gpol_still_work[aj] += prod_ai*icf4;
gpi = gpi+prod*icf4;
{
if(born->use[i] != 0)
{
-
gpi = born->gpol[i]+born->gpol_still_work[i];
gpi2 = gpi * gpi;
born->bRad[i] = factor*gmx_invsqrt(gpi2);
sum_ai = 0;
- for(k=nj0;k<nj1;k++)
+ for(k=nj0;k<nj1 && nl->jjnr[k]>=0;k++)
{
aj = nl->jjnr[k];
sum_ai = 0;
- for(k=nj0;k<nj1;k++)
+ for(k=nj0;k<nj1 && nl->jjnr[k]>=0;k++)
{
aj = nl->jjnr[k];
real *p;
int cnt;
int ndadx;
-
+
if(fr->bAllvsAll && fr->dadx==NULL)
{
/* We might need up to 8 atoms of padding before and after,
switch(ir->gb_algorithm)
{
case egbSTILL:
- calc_gb_rad_still(cr,fr,born->nr,top,atype,x,nl,born,md);
+ calc_gb_rad_still(cr,fr,born->nr,top,atype,x,nl,born,md);
break;
case egbHCT:
calc_gb_rad_hct(cr,fr,born->nr,top,atype,x,nl,born,md);
rbai = rb[ai];
- for(k=nj0;k<nj1;k++)
+ for(k=nj0;k<nj1 && nl->jjnr[k]>=0;k++)
{
aj = nl->jjnr[k];