Fix AMD OpenCL float3 array optimization bug
[alexxy/gromacs.git] / src / gromacs / nbnxm / opencl / nbnxm_ocl_kernel.clh
index 60242b0ba87fb3fb6e494d67d37eb6f62ba9f119..11516fd4f1caf4214b93f3b5a288d20332e5e1a2 100644 (file)
@@ -254,10 +254,12 @@ __kernel void NB_KERNEL_FUNC_NAME(nbnxn_kernel, _F_opencl)
 #endif
     barrier(CLK_LOCAL_MEM_FENCE);
 
-    float3 fci_buf[c_nbnxnGpuNumClusterPerSupercluster]; /* i force buffer */
+    fvec fci_buf[c_nbnxnGpuNumClusterPerSupercluster]; /* i force buffer */
     for (int ci_offset = 0; ci_offset < c_nbnxnGpuNumClusterPerSupercluster; ci_offset++)
     {
-        fci_buf[ci_offset] = (float3)(0.0F);
+        fci_buf[ci_offset][0] = 0.0F;
+        fci_buf[ci_offset][1] = 0.0F;
+        fci_buf[ci_offset][2] = 0.0F;
     }
 
 #ifdef LJ_EWALD
@@ -554,7 +556,9 @@ __kernel void NB_KERNEL_FUNC_NAME(nbnxn_kernel, _F_opencl)
                                 fcj_buf -= f_ij;
 
                                 /* accumulate i forces in registers */
-                                fci_buf[i] += f_ij;
+                                fci_buf[i][0] += f_ij.x;
+                                fci_buf[i][1] += f_ij.y;
+                                fci_buf[i][2] += f_ij.z;
                             }
                         }