Remove use of interaction_mask_indices on BG/Q
authorMark Abraham <mark.j.abraham@gmail.com>
Wed, 12 Nov 2014 02:41:15 +0000 (02:41 +0000)
committerGerrit Code Review <gerrit@gerrit.gromacs.org>
Thu, 27 Nov 2014 18:17:45 +0000 (19:17 +0100)
This field was degrading cache performance ~1% on x86. It probably
made little difference on BG/Q, because the extra integer operations
can use the second instruction-issue port, assuming the use of OpenMP
to use more than one hardware thread per core. Overall, this code is
about 1% faster on BG/Q.

Minor fix to the gmx_load_simd_4xn_interactions() function that looks
up the exclusion masks, so that new non-x86 platforms won't silently
fail for want of an implementation of this function.

Minor simplication to always pass simd_interaction_indices to
gmx_load_simd_4xn_interactions(), since it is only used on BG/Q and
then it is non-null.

Change-Id: I140a11607810e9cf08b702cae0b48426c3592fec

src/gromacs/legacyheaders/types/nbnxn_pairlist.h
src/gromacs/mdlib/nbnxn_kernels/simd_4xn/nbnxn_kernel_simd_4xn_common.h
src/gromacs/mdlib/nbnxn_kernels/simd_4xn/nbnxn_kernel_simd_4xn_inner.h
src/gromacs/mdlib/nbnxn_search.c
src/gromacs/mdlib/nbnxn_search_simd_4xn.h

index dec56d38f17f0c5dfe6f5bbe416f2dfc4d61a493..f89a418ce38db08c05f548d3337aca3b3bf6f052 100644 (file)
@@ -75,8 +75,6 @@ typedef void nbnxn_free_t (void *ptr);
 typedef struct {
     int          cj;    /* The j-cluster                    */
     unsigned int excl;  /* The exclusion (interaction) bits */
-    /* Indices into the arrays of SIMD interaction masks. */
-    char         interaction_mask_indices[4];
 } nbnxn_cj_t;
 
 /* In nbnxn_ci_t the integer shift contains the shift in the lower 7 bits.
index 1940d1a3da59269551818bb711c35331bb4ceb3e..7b4bc4f524eac0e8bfa8e03b1b35208291004842 100644 (file)
 #include "../nbnxn_kernel_simd_utils.h"
 
 static gmx_inline void gmx_simdcall
-gmx_load_simd_4xn_interactions(int gmx_unused             excl,
+gmx_load_simd_4xn_interactions(int                        excl,
                                gmx_exclfilter gmx_unused  filter_S0,
                                gmx_exclfilter gmx_unused  filter_S1,
                                gmx_exclfilter gmx_unused  filter_S2,
                                gmx_exclfilter gmx_unused  filter_S3,
-                               const char gmx_unused     *interaction_mask_indices,
                                real gmx_unused           *simd_interaction_array,
                                gmx_simd_bool_t           *interact_S0,
                                gmx_simd_bool_t           *interact_S1,
@@ -76,13 +75,14 @@ gmx_load_simd_4xn_interactions(int gmx_unused             excl,
     *interact_S1  = gmx_checkbitmask_pb(mask_pr_S, filter_S1);
     *interact_S2  = gmx_checkbitmask_pb(mask_pr_S, filter_S2);
     *interact_S3  = gmx_checkbitmask_pb(mask_pr_S, filter_S3);
-#endif
-#ifdef GMX_SIMD_IBM_QPX
+#elif defined GMX_SIMD_IBM_QPX
     const int size = GMX_SIMD_REAL_WIDTH * sizeof(real);
-    *interact_S0  = gmx_load_interaction_mask_pb(size*interaction_mask_indices[0], simd_interaction_array);
-    *interact_S1  = gmx_load_interaction_mask_pb(size*interaction_mask_indices[1], simd_interaction_array);
-    *interact_S2  = gmx_load_interaction_mask_pb(size*interaction_mask_indices[2], simd_interaction_array);
-    *interact_S3  = gmx_load_interaction_mask_pb(size*interaction_mask_indices[3], simd_interaction_array);
+    *interact_S0  = gmx_load_interaction_mask_pb(size*((excl >> (0 * UNROLLJ)) & 0xF), simd_interaction_array);
+    *interact_S1  = gmx_load_interaction_mask_pb(size*((excl >> (1 * UNROLLJ)) & 0xF), simd_interaction_array);
+    *interact_S2  = gmx_load_interaction_mask_pb(size*((excl >> (2 * UNROLLJ)) & 0xF), simd_interaction_array);
+    *interact_S3  = gmx_load_interaction_mask_pb(size*((excl >> (3 * UNROLLJ)) & 0xF), simd_interaction_array);
+#else
+#error "Need implementation of gmx_load_simd_4xn_interactions"
 #endif
 }
 
index 64467a121c95bca9f9a22629816ac9b27e03435a..b465954dc4246cd1b66277a554d8c56e01510905 100644 (file)
     gmx_load_simd_4xn_interactions(l_cj[cjind].excl,
                                    filter_S0, filter_S1,
                                    filter_S2, filter_S3,
-#ifdef GMX_SIMD_IBM_QPX
-                                   l_cj[cjind].interaction_mask_indices,
                                    nbat->simd_interaction_array,
-#else
-                                   /* The struct fields do not exist
-                                      except on BlueGene/Q */
-                                   NULL,
-                                   NULL,
-#endif
                                    &interact_S0, &interact_S1,
                                    &interact_S2, &interact_S3);
 #endif /* CHECK_EXCLS */
index 5d182ee11c15a1202243e9bc3f3e4428cffbf2df..d201407bfda9af23daef7e4e4dffea23951f2f0c 100644 (file)
@@ -3258,12 +3258,6 @@ static void set_ci_top_excls(const nbnxn_search_t nbs,
                         inner_e = ge - (se << na_cj_2log);
 
                         nbl->cj[found].excl &= ~(1U<<((inner_i<<na_cj_2log) + inner_e));
-/* The next code line is usually not needed. We do not want to version
- * away the above line, because there is logic that relies on being
- * able to detect easily whether any exclusions exist. */
-#if (defined GMX_SIMD_IBM_QPX)
-                        nbl->cj[found].interaction_mask_indices[inner_i] &= ~(1U << inner_e);
-#endif
                     }
                 }
             }
index 4931a1a4eb02087cc78991e7dfe421fa8e384827..8a328404badb64065b5791bef66e029bb0de6868 100644 (file)
@@ -263,12 +263,6 @@ make_cluster_list_simd_4xn(const nbnxn_grid_t *gridj,
             /* Store cj and the interaction mask */
             nbl->cj[nbl->ncj].cj   = CI_TO_CJ_SIMD_4XN(gridj->cell0) + cj;
             nbl->cj[nbl->ncj].excl = get_imask_simd_4xn(remove_sub_diag, ci, cj);
-#ifdef GMX_SIMD_IBM_QPX
-            nbl->cj[nbl->ncj].interaction_mask_indices[0] = (nbl->cj[nbl->ncj].excl & 0x000F) >> (0 * 4);
-            nbl->cj[nbl->ncj].interaction_mask_indices[1] = (nbl->cj[nbl->ncj].excl & 0x00F0) >> (1 * 4);
-            nbl->cj[nbl->ncj].interaction_mask_indices[2] = (nbl->cj[nbl->ncj].excl & 0x0F00) >> (2 * 4);
-            nbl->cj[nbl->ncj].interaction_mask_indices[3] = (nbl->cj[nbl->ncj].excl & 0xF000) >> (3 * 4);
-#endif
             nbl->ncj++;
         }
         /* Increase the closing index in i super-cell list */