Merge "fix a segfault in sse_mask_init" into release-4-6

author Roland Schulz <roland@rschulz.eu>

Wed, 25 Jul 2012 23:24:54 +0000 (01:24 +0200)

committer Gerrit Code Review <gerrit@gerrit.gromacs.org>

Wed, 25 Jul 2012 23:24:54 +0000 (01:24 +0200)
author Roland Schulz <roland@rschulz.eu>
Wed, 25 Jul 2012 23:24:54 +0000 (01:24 +0200)
committer Gerrit Code Review <gerrit@gerrit.gromacs.org>
Wed, 25 Jul 2012 23:24:54 +0000 (01:24 +0200)
diff --git a/src/mdlib/pme.c b/src/mdlib/pme.c

index 97d1828ff8a1cd8bddefe96e2bec22cbd2ca9ed5..c39c978f605de808143b77c114da2bfe481a333c 100644 (file)
--- a/src/mdlib/pme.c
+++ b/src/mdlib/pme.c
@@ -283,7 +283,7 @@ typedef struct gmx_pme {
      int     pmegrid_start_ix,pmegrid_start_iy,pmegrid_start_iz;
  
      /* Work data for spreading and gathering */
-    pme_spline_work_t spline_work;
+    pme_spline_work_t *spline_work;
  
      real *fftgridA;             /* Grids for FFT. With 1D FFT decomposition this can be a pointer */
      real *fftgridB;             /* inside the interpolation grid, but separate for 2D PME decomp. */
@@ -2166,7 +2166,7 @@ static void gather_f_bsplines(gmx_pme_t pme,real *grid,
  
      pme_spline_work_t *work;
  
-    work = &pme->spline_work;
+    work = pme->spline_work;
  
      order = pme->pme_order;
      thx   = spline->theta[XX];
@@ -2863,15 +2863,23 @@ make_gridindex5_to_localindex(int n,int local_start,int local_range,
      *fraction_shift  = fsh;
  }
  
-static void sse_mask_init(pme_spline_work_t *work,int order)
+static pme_spline_work_t *make_pme_spline_work(int order)
  {
+    pme_spline_work_t *work;
+
  #ifdef PME_SSE
      float  tmp[8];
      __m128 zero_SSE;
      int    of,i;
  
+    snew_aligned(work,1,16);
+
      zero_SSE = _mm_setzero_ps();
  
+    /* Generate bit masks to mask out the unused grid entries,
+     * as we only operate on order of the 8 grid entries that are
+     * load into 2 SSE float registers.
+     */
      for(of=0; of<8-(order-1); of++)
      {
          for(i=0; i<8; i++)
@@ -2883,7 +2891,11 @@ static void sse_mask_init(pme_spline_work_t *work,int order)
          work->mask_SSE0[of] = _mm_cmpgt_ps(work->mask_SSE0[of],zero_SSE);
          work->mask_SSE1[of] = _mm_cmpgt_ps(work->mask_SSE1[of],zero_SSE);
      }
+#else
+    work = NULL;
  #endif
+
+    return work;
  }
  
  static void
@@ -3161,7 +3173,7 @@ int gmx_pme_init(gmx_pme_t *         pmedata,
                    pme->overlap[0].s2g1[pme->nodeid_major]-pme->overlap[0].s2g0[pme->nodeid_major+1],
                    pme->overlap[1].s2g1[pme->nodeid_minor]-pme->overlap[1].s2g0[pme->nodeid_minor+1]);
  
-    sse_mask_init(&pme->spline_work,pme->pme_order);
+    pme->spline_work = make_pme_spline_work(pme->pme_order);
  
      ndata[0] = pme->nkx;
      ndata[1] = pme->nky;
@@ -3785,7 +3797,7 @@ static void spread_on_grid(gmx_pme_t pme,
  #ifdef PME_TIME_SPREAD
              ct1a = omp_cyc_start();
  #endif
-            spread_q_bsplines_thread(grid,atc,spline,&pme->spline_work);
+            spread_q_bsplines_thread(grid,atc,spline,pme->spline_work);
  
              if (grids->nthread > 1)
              {
author	Roland Schulz <roland@rschulz.eu>
	Wed, 25 Jul 2012 23:24:54 +0000 (01:24 +0200)
committer	Gerrit Code Review <gerrit@gerrit.gromacs.org>
	Wed, 25 Jul 2012 23:24:54 +0000 (01:24 +0200)