src/mdlib/gmx_fft.c

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Gromacs 4.0                         Copyright (c) 1991-2003
   5  * Erik Lindahl, David van der Spoel, University of Groningen.
   6  * Copyright (c) 2012, by the GROMACS development team, led by
   7  * David van der Spoel, Berk Hess, Erik Lindahl, and including many
   8  * others, as listed in the AUTHORS file in the top-level source
   9  * directory and at http://www.gromacs.org.
  10  *
  11  * GROMACS is free software; you can redistribute it and/or
  12  * modify it under the terms of the GNU Lesser General Public License
  13  * as published by the Free Software Foundation; either version 2.1
  14  * of the License, or (at your option) any later version.
  15  *
  16  * GROMACS is distributed in the hope that it will be useful,
  17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  19  * Lesser General Public License for more details.
  20  *
  21  * You should have received a copy of the GNU Lesser General Public
  22  * License along with GROMACS; if not, see
  23  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  24  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  25  *
  26  * If you want to redistribute modifications to GROMACS, please
  27  * consider that scientific software is very special. Version
  28  * control is crucial - bugs must be traceable. We will be happy to
  29  * consider code for inclusion in the official distribution, but
  30  * derived work must not be called official GROMACS. Details are found
  31  * in the README & COPYING files - if they are missing, get the
  32  * official version at http://www.gromacs.org.
  33  *
  34  * To help us fund GROMACS development, we humbly ask that you cite
  35  * the research papers on the package. Check out http://www.gromacs.org.
  36  */
  37 #ifdef HAVE_CONFIG_H
  38 #include <config.h>
  39 #endif
  40
  41
  42 #include <stdio.h>
  43 #include <stdlib.h>
  44 #include <string.h>
  45 #include <errno.h>
  46
  47 #include "types/simple.h"
  48 #include "gmxcomplex.h"
  49 #include "gmx_fft.h"
  50
  51 #include "gmx_fatal.h"
  52
  53
  54 /* This file contains common fft utility functions, but not
  55  * the actual transform implementations. Check the
  56  * files like gmx_fft_fftw3.c or gmx_fft_intel_mkl.c for that.
  57  */
  58
  59 #ifndef GMX_FFT_FFTW3
  60
  61  struct gmx_many_fft {
  62      int howmany;
  63      int dist;
  64      gmx_fft_t fft;
  65  };
  66
  67 typedef struct gmx_many_fft* gmx_many_fft_t ;
  68
  69 int
  70 gmx_fft_init_many_1d(gmx_fft_t *        pfft,
  71                      int                nx,
  72                      int                howmany,
  73                      gmx_fft_flag       flags)
  74 {
  75     gmx_many_fft_t fft;
  76     if(pfft==NULL)
  77     {
  78         gmx_fatal(FARGS,"Invalid opaque FFT datatype pointer.");
  79         return EINVAL;
  80     }
  81     *pfft = NULL;
  82
  83     if( (fft = (gmx_many_fft_t)malloc(sizeof(struct gmx_many_fft))) == NULL)
  84     {
  85         return ENOMEM;
  86     }
  87
  88     gmx_fft_init_1d(&fft->fft,nx,flags);
  89     fft->howmany = howmany;
  90     fft->dist = 2*nx;
  91
  92     *pfft = (gmx_fft_t)fft;
  93     return 0;
  94 }
  95
  96 int
  97 gmx_fft_init_many_1d_real(gmx_fft_t *        pfft,
  98                      int                nx,
  99                      int                howmany,
 100                      gmx_fft_flag       flags)
 101 {
 102     gmx_many_fft_t fft;
 103     if(pfft==NULL)
 104     {
 105         gmx_fatal(FARGS,"Invalid opaque FFT datatype pointer.");
 106         return EINVAL;
 107     }
 108     *pfft = NULL;
 109
 110     if( (fft = (gmx_many_fft_t)malloc(sizeof(struct gmx_many_fft))) == NULL)
 111     {
 112         return ENOMEM;
 113     }
 114
 115     gmx_fft_init_1d_real(&fft->fft,nx,flags);
 116     fft->howmany = howmany;
 117     fft->dist = 2*(nx/2+1);
 118
 119     *pfft = (gmx_fft_t)fft;
 120     return 0;
 121 }
 122
 123 int
 124 gmx_fft_many_1d     (gmx_fft_t                  fft,
 125                      enum gmx_fft_direction     dir,
 126                      void *                     in_data,
 127                      void *                     out_data)
 128 {
 129     gmx_many_fft_t mfft = (gmx_many_fft_t)fft;
 130     int i,ret;
 131     for (i=0;i<mfft->howmany;i++)
 132     {
 133         ret=gmx_fft_1d(mfft->fft,dir,in_data,out_data);
 134         if (ret!=0) return ret;
 135         in_data=(real*)in_data+mfft->dist;
 136         out_data=(real*)out_data+mfft->dist;
 137     }
 138     return 0;
 139 }
 140
 141 int
 142 gmx_fft_many_1d_real     (gmx_fft_t                  fft,
 143                           enum gmx_fft_direction     dir,
 144                           void *                     in_data,
 145                           void *                     out_data)
 146 {
 147     gmx_many_fft_t mfft = (gmx_many_fft_t)fft;
 148     int i,ret;
 149     for (i=0;i<mfft->howmany;i++)
 150     {
 151         ret=gmx_fft_1d_real(mfft->fft,dir,in_data,out_data);
 152         if (ret!=0) return ret;
 153         in_data=(real*)in_data+mfft->dist;
 154         out_data=(real*)out_data+mfft->dist;
 155     }
 156     return 0;
 157 }
 158
 159
 160 void
 161 gmx_many_fft_destroy(gmx_fft_t    fft)
 162 {
 163     gmx_many_fft_t mfft = (gmx_many_fft_t)fft;
 164     if (mfft!=NULL)
 165     {
 166         if (mfft->fft!=NULL)
 167         {
 168             gmx_fft_destroy(mfft->fft);
 169         }
 170         free(mfft);
 171     }
 172 }
 173
 174 #endif
 175
 176 int gmx_fft_transpose_2d(t_complex *          in_data,
 177                          t_complex *          out_data,
 178                          int                  nx,
 179                          int                  ny)
 180 {
 181     int i,j,k,im,n,ncount,done1,done2;
 182     int i1,i1c,i2,i2c,kmi,max;
 183
 184     t_complex tmp1,tmp2,tmp3;
 185     t_complex *data;
 186
 187     /* Use 500 bytes on stack to indicate moves.
 188      * This is just for optimization, it does not limit any dimensions.
 189      */
 190     char          move[500];
 191     int           nmove = 500;
 192
 193     if(nx<2 || ny<2)
 194     {
 195         if(in_data != out_data)
 196         {
 197             memcpy(out_data,in_data,sizeof(t_complex)*nx*ny);
 198         }
 199         return 0;
 200     }
 201
 202     /* Out-of-place transposes are easy */
 203     if(in_data != out_data)
 204     {
 205         for(i=0;i<nx;i++)
 206         {
 207             for(j=0;j<ny;j++)
 208             {
 209                 out_data[j*nx+i].re = in_data[i*ny+j].re;
 210                 out_data[j*nx+i].im = in_data[i*ny+j].im;
 211             }
 212         }
 213         return 0;
 214     }
 215
 216     /* In-place transform. in_data=out_data=data */
 217     data = in_data;
 218
 219     if(nx==ny)
 220     {
 221         /* trivial case, just swap elements */
 222         for(i=0;i<nx;i++)
 223         {
 224             for(j=i+1;j<nx;j++)
 225             {
 226                 tmp1.re         = data[i*nx+j].re;
 227                 tmp1.im         = data[i*nx+j].im;
 228                 data[i*nx+j].re = data[j*nx+i].re;
 229                 data[i*nx+j].im = data[j*nx+i].im;
 230                 data[j*nx+i].re = tmp1.re;
 231                 data[j*nx+i].im = tmp1.im;
 232             }
 233         }
 234         return 0;
 235     }
 236
 237     for(i=0;i<nmove;i++)
 238     {
 239         move[i] = 0;
 240     }
 241
 242     ncount = 2;
 243
 244     if(nx>2 && ny>2)
 245     {
 246         i = nx-1;
 247         j = ny-1;
 248         do
 249         {
 250             k = i % j;
 251             i = j;
 252             j = k;
 253         }
 254         while(k);
 255         ncount += i-1;
 256     }
 257
 258     n = nx*ny;
 259     k = n - 1;
 260     i = 1;
 261     im = ny;
 262
 263     done1=0;
 264     do
 265     {
 266         i1 = i;
 267         kmi = k-i;
 268         tmp1.re = data[i1].re;
 269         tmp1.im = data[i1].im;
 270         i1c = kmi;
 271         tmp2.re = data[i1c].re;
 272         tmp2.im = data[i1c].im;
 273
 274         done2=0;
 275         do
 276         {
 277             i2 = ny*i1-k*(i1/nx);
 278             i2c = k-i2;
 279             if(i1<nmove)
 280             {
 281                 move[i1]= 1;
 282             }
 283             if(i1c<nmove)
 284             {
 285                 move[i1c] = 1;
 286             }
 287             ncount += 2;
 288             if(i2 == i)
 289             {
 290                 done2 = 1;
 291             }
 292             else if(i2 == kmi)
 293             {
 294                 tmp3.re = tmp1.re;
 295                 tmp3.im = tmp1.im;
 296                 tmp1.re = tmp2.re;
 297                 tmp1.im = tmp2.im;
 298                 tmp2.re = tmp3.re;
 299                 tmp2.im = tmp3.im;
 300                 done2 = 1;
 301             }
 302             else
 303             {
 304                 data[i1].re  = data[i2].re;
 305                 data[i1].im  = data[i2].im;
 306                 data[i1c].re = data[i2c].re;
 307                 data[i1c].im = data[i2c].im;
 308                 i1 = i2;
 309                 i1c = i2c;
 310             }
 311         }
 312         while(!done2);
 313
 314         data[i1].re  = tmp1.re;
 315         data[i1].im  = tmp1.im;
 316         data[i1c].re = tmp2.re;
 317         data[i1c].im = tmp2.im;
 318
 319         if(ncount >= n)
 320         {
 321             done1 = 1;
 322         }
 323         else
 324         {
 325             done2 = 0;
 326             do
 327             {
 328                 max = k-i;
 329                 i++;
 330                 im += ny;
 331                 if(im > k)
 332                 {
 333                     im -= k;
 334                 }
 335                 i2 = im;
 336                 if(i != i2)
 337                 {
 338                     if(i >= nmove)
 339                     {
 340                         while(i2>i && i2<max)
 341                         {
 342                             i1 = i2;
 343                             i2 = ny*i1-k*(i1/nx);
 344                         }
 345                         if(i2 == i)
 346                         {
 347                             done2 = 1;
 348                         }
 349                     }
 350                     else if(!move[i])
 351                     {
 352                         done2 = 1;
 353                     }
 354                 }
 355             }
 356             while(!done2);
 357         }
 358     }
 359     while(!done1);
 360
 361     return 0;
 362 }
 363
 364
 365
 366 /* Same as the above, but assume each (x,y) position holds
 367  * nelem complex numbers.
 368  * This is used when transposing the x/y dimensions of a
 369  * 3D matrix; set nelem to nz in this case.
 370  */
 371 int
 372 gmx_fft_transpose_2d_nelem(t_complex *             in_data,
 373                            t_complex *             out_data,
 374                            int                     nx,
 375                            int                     ny,
 376                            int                     nelem,
 377                            t_complex *             work)
 378 {
 379     int i,j,k,im,n,ncount,done1,done2;
 380     int i1,i1c,i2,i2c,kmi,max,ncpy;
 381
 382     t_complex *tmp1,*tmp2,*tmp3;
 383     t_complex *data;
 384
 385     /* Use 500 bytes on stack to indicate moves.
 386      * This is just for optimization, it does not limit any dimensions.
 387      */
 388     char          move[500];
 389     int           nmove = 500;
 390
 391     ncpy = nelem*sizeof(t_complex);
 392
 393     if(nx<2 || ny<2)
 394     {
 395         if(in_data != out_data)
 396         {
 397             memcpy(out_data,in_data,ncpy*nx*ny);
 398         }
 399         return 0;
 400     }
 401
 402     /* Out-of-place transposes are easy */
 403     if(in_data != out_data)
 404     {
 405         for(i=0;i<nx;i++)
 406         {
 407             for(j=0;j<ny;j++)
 408             {
 409                 memcpy(out_data + (j*nx+i)*nelem,
 410                        in_data  + (i*ny+j)*nelem,
 411                        ncpy);
 412             }
 413         }
 414         return 0;
 415     }
 416
 417
 418     /* In-place transform. in_data=out_data=data */
 419     data = in_data;
 420
 421
 422     /* Check the work array */
 423     if(work == NULL)
 424     {
 425         gmx_fatal(FARGS,
 426                   "No work array provided to gmx_fft_transpose_2d_nelem().");
 427         return EINVAL;
 428     }
 429
 430     tmp1 = work;
 431     tmp2 = work + nelem;
 432
 433     if(nx==ny)
 434     {
 435         /* trivial case, just swap elements */
 436         for(i=0;i<nx;i++)
 437         {
 438             for(j=i+1;j<nx;j++)
 439             {
 440                 memcpy(tmp1,data+(i*nx+j)*nelem,ncpy);
 441                 memcpy(data+(i*nx+j)*nelem,data+(j*nx+i)*nelem,ncpy);
 442                 memcpy(data+(j*nx+i)*nelem,tmp1,ncpy);
 443             }
 444         }
 445         return 0;
 446     }
 447
 448     for(i=0;i<nmove;i++)
 449     {
 450         move[i]=0;
 451     }
 452
 453     ncount = 2;
 454
 455     if(nx>2 && ny>2)
 456     {
 457         i = nx-1;
 458         j = ny-1;
 459         do
 460         {
 461             k = i % j;
 462             i = j;
 463             j = k;
 464         }
 465         while(k);
 466         ncount += i-1;
 467     }
 468
 469     n = nx*ny;
 470     k = n - 1;
 471     i = 1;
 472     im = ny;
 473
 474     done1=0;
 475     do
 476     {
 477         i1 = i;
 478         kmi = k-i;
 479         i1c=kmi;
 480         memcpy(tmp1,data+i1*nelem,ncpy);
 481         memcpy(tmp2,data+i1c*nelem,ncpy);
 482
 483         done2=0;
 484         do
 485         {
 486             i2 = ny*i1-k*(i1/nx);
 487             i2c = k-i2;
 488             if(i1<nmove)
 489             {
 490                 move[i1]=1;
 491             }
 492             if(i1c<nmove)
 493             {
 494                 move[i1c]=1;
 495             }
 496             ncount += 2;
 497             if(i2==i)
 498             {
 499                 done2 = 1;
 500             }
 501             else if(i2 == kmi)
 502             {
 503                 /* Swapping pointers instead of copying data */
 504                 tmp3=tmp1;
 505                 tmp1=tmp2;
 506                 tmp2=tmp3;
 507                 done2=1;
 508             }
 509             else
 510             {
 511                 memcpy(data+i1*nelem,data+i2*nelem,ncpy);
 512                 memcpy(data+i1c*nelem,data+i2c*nelem,ncpy);
 513                 i1=i2;
 514                 i1c = i2c;
 515             }
 516         }
 517         while(!done2);
 518
 519         memcpy(data+i1*nelem,tmp1,ncpy);
 520         memcpy(data+i1c*nelem,tmp2,ncpy);
 521
 522         if(ncount>=n)
 523         {
 524             done1=1;
 525         }
 526         else
 527         {
 528             done2=0;
 529             do
 530             {
 531                 max=k-i;
 532                 i++;
 533                 im+=ny;
 534                 if(im>k)
 535                 {
 536                     im-=k;
 537                 }
 538                 i2=im;
 539                 if(i!=i2)
 540                 {
 541                     if(i>=nmove)
 542                     {
 543                         while(i2>i && i2<max)
 544                         {
 545                             i1=i2;
 546                             i2=ny*i1-k*(i1/nx);
 547                         }
 548                         if(i2==i)
 549                         {
 550                             done2=1;
 551                         }
 552                     }
 553                     else if(!move[i])
 554                     {
 555                         done2=1;
 556                     }
 557                 }
 558             }
 559             while(!done2);
 560         }
 561     }
 562     while(!done1);
 563
 564     return 0;
 565 }
 566
 567
 568
 569