src/gmxlib/gmx_lapack/sgetri.c

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 2012,2013, by the GROMACS development team, led by
   5  * David van der Spoel, Berk Hess, Erik Lindahl, and including many
   6  * others, as listed in the AUTHORS file in the top-level source
   7  * directory and at http://www.gromacs.org.
   8  *
   9  * GROMACS is free software; you can redistribute it and/or
  10  * modify it under the terms of the GNU Lesser General Public License
  11  * as published by the Free Software Foundation; either version 2.1
  12  * of the License, or (at your option) any later version.
  13  *
  14  * GROMACS is distributed in the hope that it will be useful,
  15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  17  * Lesser General Public License for more details.
  18  *
  19  * You should have received a copy of the GNU Lesser General Public
  20  * License along with GROMACS; if not, see
  21  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  22  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  23  *
  24  * If you want to redistribute modifications to GROMACS, please
  25  * consider that scientific software is very special. Version
  26  * control is crucial - bugs must be traceable. We will be happy to
  27  * consider code for inclusion in the official distribution, but
  28  * derived work must not be called official GROMACS. Details are found
  29  * in the README & COPYING files - if they are missing, get the
  30  * official version at http://www.gromacs.org.
  31  *
  32  * To help us fund GROMACS development, we humbly ask that you cite
  33  * the research papers on the package. Check out http://www.gromacs.org.
  34  */
  35 #include "gmx_blas.h"
  36 #include "gmx_lapack.h"
  37 #include "lapack_limits.h"
  38
  39 void
  40 F77_FUNC(sgetri,SGETRI)(int *n,
  41         float *a,
  42         int *lda,
  43         int *ipiv,
  44         float *work,
  45         int *lwork,
  46         int *info)
  47 {
  48     int a_dim1, a_offset, i__1, i__2, i__3;
  49
  50     int i__, j, jb, nb, jj, jp, nn, iws;
  51     int nbmin;
  52     int ldwork;
  53     int lwkopt;
  54     int c__1 = 1;
  55     float c_b20 = -1.;
  56     float c_b22 = 1.;
  57
  58     a_dim1 = *lda;
  59     a_offset = 1 + a_dim1;
  60     a -= a_offset;
  61     --ipiv;
  62     --work;
  63
  64     *info = 0;
  65     nb = DGETRI_BLOCKSIZE;
  66     lwkopt = *n * nb;
  67     work[1] = (float) lwkopt;
  68
  69     if (*n < 0) {
  70         *info = -1;
  71     } else if (*lda < (*n)) {
  72         *info = -3;
  73     } else if (*lwork < (*n) && *lwork!=-1) {
  74         *info = -6;
  75     }
  76     if (*info != 0) {
  77         i__1 = -(*info);
  78         return;
  79     } else if (*lwork == -1) {
  80         return;
  81     }
  82
  83     if (*n == 0) {
  84         return;
  85     }
  86
  87     F77_FUNC(strtri,STRTRI)("Upper", "Non-unit", n, &a[a_offset], lda, info);
  88     if (*info > 0) {
  89         return;
  90     }
  91
  92     nbmin = 2;
  93     ldwork = *n;
  94     if (nb > 1 && nb < *n) {
  95         i__1 = ldwork * nb;
  96         iws = (i__1>1) ? i__1 : 1;
  97         if (*lwork < iws) {
  98             nb = *lwork / ldwork;
  99             nbmin = DGETRI_MINBLOCKSIZE;
 100         }
 101     } else {
 102         iws = *n;
 103     }
 104
 105     if (nb < nbmin || nb >= *n) {
 106
 107         for (j = *n; j >= 1; --j) {
 108
 109             i__1 = *n;
 110             for (i__ = j + 1; i__ <= i__1; ++i__) {
 111                 work[i__] = a[i__ + j * a_dim1];
 112                 a[i__ + j * a_dim1] = 0.;
 113             }
 114
 115             if (j < *n) {
 116                 i__1 = *n - j;
 117                 F77_FUNC(sgemv,SGEMV)("No transpose", n, &i__1, &c_b20, &a[(j + 1) * a_dim1
 118                         + 1], lda, &work[j + 1], &c__1, &c_b22, &a[j * a_dim1
 119                         + 1], &c__1);
 120             }
 121         }
 122     } else {
 123
 124         nn = (*n - 1) / nb * nb + 1;
 125         i__1 = -nb;
 126         for (j = nn; i__1 < 0 ? j >= 1 : j <= 1; j += i__1) {
 127             i__2 = nb, i__3 = *n - j + 1;
 128             jb = (i__2<i__3) ? i__2 : i__3;
 129
 130             i__2 = j + jb - 1;
 131             for (jj = j; jj <= i__2; ++jj) {
 132                 i__3 = *n;
 133                 for (i__ = jj + 1; i__ <= i__3; ++i__) {
 134                     work[i__ + (jj - j) * ldwork] = a[i__ + jj * a_dim1];
 135                     a[i__ + jj * a_dim1] = 0.;
 136                 }
 137             }
 138
 139             if (j + jb <= *n) {
 140                 i__2 = *n - j - jb + 1;
 141                 F77_FUNC(sgemm,SGEMM)("No transpose", "No transpose", n, &jb, &i__2, &c_b20,
 142                         &a[(j + jb) * a_dim1 + 1], lda, &work[j + jb], &
 143                         ldwork, &c_b22, &a[j * a_dim1 + 1], lda);
 144             }
 145             F77_FUNC(strsm,STRSM)("Right", "Lower", "No transpose", "Unit", n, &jb, &c_b22, &
 146                     work[j], &ldwork, &a[j * a_dim1 + 1], lda);
 147         }
 148     }
 149
 150     for (j = *n - 1; j >= 1; --j) {
 151         jp = ipiv[j];
 152         if (jp != j) {
 153             F77_FUNC(sswap,SSWAP)(n, &a[j * a_dim1 + 1], &c__1, &a[jp * a_dim1 + 1], &c__1);
 154         }
 155     }
 156
 157     work[1] = (float) iws;
 158     return;
 159
 160 }
 161
 162