Merge release-5-0 into master
[alexxy/gromacs.git] / src / gromacs / gmxlib / nonbonded / nb_kernel_sparc64_hpc_ace_double / nb_kernel_ElecEw_VdwLJEw_GeomW4P1_sparc64_hpc_ace_double.c
index 7eae21170b373d7a86fa65099418a3623f75193a..d15a704666dbc0ca481693d09cf40c50ed83c7e4 100644 (file)
@@ -262,11 +262,11 @@ nb_kernel_ElecEw_VdwLJEw_GeomW4P1_VF_sparc64_hpc_ace_double
             rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
             ewcljrsq         = _fjsp_mul_v2r8(ewclj2,rsq00);
             ewclj6           = _fjsp_mul_v2r8(ewclj2,_fjsp_mul_v2r8(ewclj2,ewclj2));
-            exponent         = gmx_simd_exp_d(-ewcljrsq);
+            exponent         = gmx_simd_exp_d(ewcljrsq);
             /* poly = exp(-(beta*r)^2) * (1 + (beta*r)^2 + (beta*r)^4 /2) */
            poly             = _fjsp_mul_v2r8(exponent,_fjsp_madd_v2r8(_fjsp_mul_v2r8(ewcljrsq,ewcljrsq),one_half,_fjsp_sub_v2r8(one,ewcljrsq)));
             /* vvdw6 = [C6 - C6grid * (1-poly)]/r6 */
-            vvdw6            = _fjsp_mul_v2r8(_fjsp_madd_v2r8(-c6grid_00,_fjsp_sub_v2r8(one,poly),c6_00),rinvsix);
+            vvdw6            = _fjsp_mul_v2r8(_fjsp_madd_v2r8(c6grid_00,_fjsp_sub_v2r8(poly,one),c6_00),rinvsix);
             vvdw12           = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
            vvdw             = _fjsp_msub_v2r8(vvdw12,one_twelfth,_fjsp_mul_v2r8(vvdw6,one_sixth));         
             /* fvdw = vvdw12/r - (vvdw6/r + (C6grid * exponent * beta^6)/r) */
@@ -472,19 +472,21 @@ nb_kernel_ElecEw_VdwLJEw_GeomW4P1_VF_sparc64_hpc_ace_double
             r00              = _fjsp_mul_v2r8(rsq00,rinv00);
 
             /* Compute parameters for interactions between i and j atoms */
-            gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
 
-            c6grid_00       = gmx_fjsp_load_1real_swizzle_v2r8(vdwgridparam+vdwioffset0+vdwjidx0A);
+            c6grid_00       = gmx_fjsp_load_2real_swizzle_v2r8(vdwgridparam+vdwioffset0+vdwjidx0A,
+                                                                   vdwgridparam+vdwioffset0+vdwjidx0B);
 
             /* Analytical LJ-PME */
             rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
             ewcljrsq         = _fjsp_mul_v2r8(ewclj2,rsq00);
             ewclj6           = _fjsp_mul_v2r8(ewclj2,_fjsp_mul_v2r8(ewclj2,ewclj2));
-            exponent         = gmx_simd_exp_d(-ewcljrsq);
+            exponent         = gmx_simd_exp_d(ewcljrsq);
             /* poly = exp(-(beta*r)^2) * (1 + (beta*r)^2 + (beta*r)^4 /2) */
            poly             = _fjsp_mul_v2r8(exponent,_fjsp_madd_v2r8(_fjsp_mul_v2r8(ewcljrsq,ewcljrsq),one_half,_fjsp_sub_v2r8(one,ewcljrsq)));
             /* vvdw6 = [C6 - C6grid * (1-poly)]/r6 */
-            vvdw6            = _fjsp_mul_v2r8(_fjsp_madd_v2r8(-c6grid_00,_fjsp_sub_v2r8(one,poly),c6_00),rinvsix);
+            vvdw6            = _fjsp_mul_v2r8(_fjsp_madd_v2r8(c6grid_00,_fjsp_sub_v2r8(poly,one),c6_00),rinvsix);
             vvdw12           = _fjsp_mul_v2r8(c12_00,_fjsp_mul_v2r8(rinvsix,rinvsix));
            vvdw             = _fjsp_msub_v2r8(vvdw12,one_twelfth,_fjsp_mul_v2r8(vvdw6,one_sixth));         
             /* fvdw = vvdw12/r - (vvdw6/r + (C6grid * exponent * beta^6)/r) */
@@ -880,11 +882,11 @@ nb_kernel_ElecEw_VdwLJEw_GeomW4P1_F_sparc64_hpc_ace_double
             rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
             ewcljrsq         = _fjsp_mul_v2r8(ewclj2,rsq00);
             ewclj6           = _fjsp_mul_v2r8(ewclj2,_fjsp_mul_v2r8(ewclj2,ewclj2));
-            exponent         = gmx_simd_exp_d(-ewcljrsq);
+            exponent         = gmx_simd_exp_d(ewcljrsq);
             /* poly = exp(-(beta*r)^2) * (1 + (beta*r)^2 + (beta*r)^4 /2) */
            poly             = _fjsp_mul_v2r8(exponent,_fjsp_madd_v2r8(_fjsp_mul_v2r8(ewcljrsq,ewcljrsq),one_half,_fjsp_sub_v2r8(one,ewcljrsq)));
             /* f6A = 6 * C6grid * (1 - poly) */
-            f6A              = _fjsp_mul_v2r8(c6grid_00,_fjsp_msub_v2r8(one,poly));
+            f6A              = _fjsp_mul_v2r8(c6grid_00,_fjsp_sub_v2r8(one,poly));
             /* f6B = C6grid * exponent * beta^6 */
             f6B              = _fjsp_mul_v2r8(_fjsp_mul_v2r8(c6grid_00,one_sixth),_fjsp_mul_v2r8(exponent,ewclj6));
             /* fvdw = 12*C12/r13 - ((6*C6 - f6A)/r6 + f6B)/r */
@@ -1060,19 +1062,21 @@ nb_kernel_ElecEw_VdwLJEw_GeomW4P1_F_sparc64_hpc_ace_double
             r00              = _fjsp_mul_v2r8(rsq00,rinv00);
 
             /* Compute parameters for interactions between i and j atoms */
-            gmx_fjsp_load_1pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,&c6_00,&c12_00);
+            gmx_fjsp_load_2pair_swizzle_v2r8(vdwparam+vdwioffset0+vdwjidx0A,
+                                         vdwparam+vdwioffset0+vdwjidx0B,&c6_00,&c12_00);
 
-            c6grid_00       = gmx_fjsp_load_1real_swizzle_v2r8(vdwgridparam+vdwioffset0+vdwjidx0A);
+            c6grid_00       = gmx_fjsp_load_2real_swizzle_v2r8(vdwgridparam+vdwioffset0+vdwjidx0A,
+                                                                   vdwgridparam+vdwioffset0+vdwjidx0B);
 
             /* Analytical LJ-PME */
             rinvsix          = _fjsp_mul_v2r8(_fjsp_mul_v2r8(rinvsq00,rinvsq00),rinvsq00);
             ewcljrsq         = _fjsp_mul_v2r8(ewclj2,rsq00);
             ewclj6           = _fjsp_mul_v2r8(ewclj2,_fjsp_mul_v2r8(ewclj2,ewclj2));
-            exponent         = gmx_simd_exp_d(-ewcljrsq);
+            exponent         = gmx_simd_exp_d(ewcljrsq);
             /* poly = exp(-(beta*r)^2) * (1 + (beta*r)^2 + (beta*r)^4 /2) */
            poly             = _fjsp_mul_v2r8(exponent,_fjsp_madd_v2r8(_fjsp_mul_v2r8(ewcljrsq,ewcljrsq),one_half,_fjsp_sub_v2r8(one,ewcljrsq)));
             /* f6A = 6 * C6grid * (1 - poly) */
-            f6A              = _fjsp_mul_v2r8(c6grid_00,_fjsp_msub_v2r8(one,poly));
+            f6A              = _fjsp_mul_v2r8(c6grid_00,_fjsp_sub_v2r8(one,poly));
             /* f6B = C6grid * exponent * beta^6 */
             f6B              = _fjsp_mul_v2r8(_fjsp_mul_v2r8(c6grid_00,one_sixth),_fjsp_mul_v2r8(exponent,ewclj6));
             /* fvdw = 12*C12/r13 - ((6*C6 - f6A)/r6 + f6B)/r */