首页/文章/ 详情

稀疏矩阵高性能求解器PARDISO的使用

1年前浏览2889
PARDISO是一个商用的高速的稀疏矩阵线性方程组直接法求解器,其可以用于大型稀疏对称或非对称线性方程组的求解,商用版支持的并行方式包括共享内存,分布式内存和NVIDA的GPU并行。多次实践已经表明,PARDISO求解器具有优秀的性能,在各种稀疏矩阵直接求解器的比较中通常处于第一梯队,其他常见的直接法求解器包括SuiteSparse,MUMPS,SPOOLES等,商用有限元软件COMSOL的官方文档甚至直接指出在COMSOL中PARDISO求解速度快于MUMPS。。
对于线性方程组求解来说,一般常见的求解方法可分为直接法和迭代法。直接法常见的方法是高斯消去,LU分解法等,在求解过程中不需要迭代,但是在求解中可能需要较大内存,商业软件中经常使用的一种直接法叫做多波前法。迭代法又可分为两个大类,分裂迭代和Krylov迭代。前者通过将系数矩阵进行拆分形成迭代列式,常见的是雅克比迭代,高斯赛德尔迭代等,分裂迭代这一类型的迭代法在商业软件中很少直接使用,但是经常作为Krylov迭代的预处理基础;后者以Krylov子空间为基础上,常见的包括共轭梯度法,广义最小残差法等,Krylov迭代法在商业软件中较为常见。
在稀疏矩阵直接法求解中,一个常见的技术是矩阵重排,通过矩阵重排,可以使得矩阵的带宽减小(即非0元素位置相对更趋近于对角线),从而使得直接求解效率更高,一种常见的矩阵重排方法是Cuthill_McKee,常见的可以实现矩阵重排的库是METIS。以下是采用Cuthill_McKee重排前和重排后的非0元素分布。
重排前非0元素分布

非0元素分布
商用版本的PARDISO个人一般较少直接使用(主要是要买)。实际中较为广泛使用的其中一个版本是配置在MKL库的PARDISO版本。PARDISO求解需要将矩阵采用CSR格式存储,并且在求解中需要正确的设置较多的参数才能正确运行。
以下提供MKL_PARDISO的一个官方例子:
Fortran版本:































































































































































INCLUDE 'mkl_pardiso.f90'PROGRAM pardiso_sym_f90USE mkl_pardisoIMPLICIT NONEINTEGER, PARAMETER :: dp = KIND(1.0D0)!.. Internal solver memory pointer TYPE(MKL_PARDISO_HANDLE), ALLOCATABLE  :: pt(:)!.. All other variablesINTEGER maxfct, mnum, mtype, phase, n, nrhs, error, msglvl, nnzINTEGER error1INTEGER, ALLOCATABLE :: iparm( : )INTEGER, ALLOCATABLE :: ia( : )INTEGER, ALLOCATABLE :: ja( : )REAL(KIND=DP), ALLOCATABLE :: a( : )REAL(KIND=DP), ALLOCATABLE :: b( : )REAL(KIND=DP), ALLOCATABLE :: x( : )INTEGER i, idum(1)REAL(KIND=DP) ddum(1)!.. Fill all arrays containing matrix data.n = 8 nnz = 18nrhs = 1 maxfct = 1 mnum = 1ALLOCATE(ia(n + 1))ia = (/ 1, 5, 8, 10, 12, 15, 17, 18, 19 /)ALLOCATE(ja(nnz))ja = (/ 1,    3,       6, 7,    &           2, 3,    5,          &              3,             8, &                 4,       7,    &                    5, 6, 7,    &                       6,    8, &                          7,    &                             8 /)ALLOCATE(a(nnz))a = (/ 7.d0,        1.d0,             2.d0, 7.d0,        &             -4.d0, 8.d0,       2.d0,                    &                    1.d0,                         5.d0,  &                          7.d0,             9.d0,        &                                5.d0, 1.d0, 5.d0,        &                                     -1.d0,       5.d0,  &                                           11.d0,        &                                                  5.d0 /)ALLOCATE(b(n))ALLOCATE(x(n))!..!.. Set up PARDISO control parameter!..ALLOCATE(iparm(64))

DO i = 1, 64   iparm(i) = 0END DO

iparm(1) = 1 ! no solver defaultiparm(2) = 2 ! fill-in reordering from METISiparm(4) = 0 ! no iterative-direct algorithmiparm(5) = 0 ! no user fill-in reducing permutationiparm(6) = 0 ! =0 solution on the first n components of xiparm(8) = 2 ! numbers of iterative refinement stepsiparm(10) = 13 ! perturb the pivot elements with 1E-13iparm(11) = 1 ! use nonsymmetric permutation and scaling MPSiparm(13) = 0 iparm(14) = 0 ! Output: number of perturbed pivotsiparm(18) = -1 ! Output: number of nonzeros in the factor LUiparm(19) = -1 ! Output: Mflops for LU factorizationiparm(20) = 0 ! Output: Numbers of CG Iterations

error  = 0 ! initialize error flagmsglvl = 1 ! print statistical informationmtype  = -2 ! symmetric, indefinite

!.. Initialize the internal solver memory pointer. This is only! necessary for the FIRST call of the PARDISO solver.

ALLOCATE (pt(64))DO i = 1, 64   pt(i)%DUMMY =  0 END DO

!.. Reordering and Symbolic Factorization,! This step also allocates! all memory that is necessary for the factorization

phase = 11 ! only reordering and symbolic factorization

CALL pardiso (pt, maxfct, mnum, mtype, phase, n, a, ia, ja, &              idum, nrhs, iparm, msglvl, ddum, ddum, error)    WRITE(*,*) 'Reordering completed ... 'IF (error /= 0) THEN   WRITE(*,*) 'The following ERROR was detected: ', error   GOTO 1000END IFWRITE(*,*) 'Number of nonzeros in factors = ',iparm(18)WRITE(*,*) 'Number of factorization MFLOPS = ',iparm(19)

!.. Factorization.phase = 22 ! only factorizationCALL pardiso (pt, maxfct, mnum, mtype, phase, n, a, ia, ja, &              idum, nrhs, iparm, msglvl, ddum, ddum, error)WRITE(*,*) 'Factorization completed ... 'IF (error /= 0) THEN   WRITE(*,*) 'The following ERROR was detected: ', error   GOTO 1000ENDIF

!.. Back substitution and iterative refinementiparm(8) = 2 ! max numbers of iterative refinement stepsphase = 33 ! only solvingDO i = 1, n   b(i) = 1.d0END DOCALL pardiso (pt, maxfct, mnum, mtype, phase, n, a, ia, ja, &              idum, nrhs, iparm, msglvl, b, x, error)WRITE(*,*) 'Solve completed ... 'IF (error /= 0) THEN   WRITE(*,*) 'The following ERROR was detected: ', error   GOTO 1000ENDIFWRITE(*,*) 'The solution of the system is 'DO i = 1, n   WRITE(*,*) ' x(',i,') = ', x(i)END DO      1000 CONTINUE!.. Termination and release of memoryphase = -1 ! release internal memoryCALL pardiso (pt, maxfct, mnum, mtype, phase, n, ddum, idum, idum, &              idum, nrhs, iparm, msglvl, ddum, ddum, error1)

IF (ALLOCATED(ia))      DEALLOCATE(ia)IF (ALLOCATED(ja))      DEALLOCATE(ja)IF (ALLOCATED(a))       DEALLOCATE(a)IF (ALLOCATED(b))       DEALLOCATE(b)IF (ALLOCATED(x))       DEALLOCATE(x)IF (ALLOCATED(iparm))   DEALLOCATE(iparm)

IF (error1 /= 0) THEN   WRITE(*,*) 'The following ERROR on release stage was detected: ', error1   STOP 1ENDIF

IF (error /= 0) STOP 1END PROGRAM pardiso_sym_f90

具体使用:安装VS和INTEL oneAPI后,在VS中新建Fortran工程,点项目-属性--配置属性-Fortran-Libraries,将Use Intel Math Kernal Library选项改为Parrallel即可运行。
c语言版本:




















































































































































#include <stdio.h>#include <stdlib.h>#include <math.h>
#include "mkl_pardiso.h"#include "mkl_types.h"
// Define the format to printf MKL_INT values#if !defined(MKL_ILP64)#define IFORMAT "%i"#else#define IFORMAT "%lli"#endif
int main(void){    /* Matrix data. */    MKL_INT n = 8;    MKL_INT ia[9] = { 0, 4, 7, 9, 11, 14, 16, 17, 18 };    MKL_INT ja[18] =    { 0,   2,       5, 6,        1, 2,    4,           2,             7,              3,       6,                 4, 5, 6,                    5,    7,                       6,                          7    };    double a[18] =    { 7.0,      1.0,           2.0, 7.0,          -4.0, 8.0,      2.0,                1.0,                     5.0,                     7.0,           9.0,                          5.0, 1.0, 5.0,                              -1.0,      5.0,                                   11.0,                                         5.0    };    MKL_INT mtype = -2;       /* Real symmetric matrix */    /* RHS and solution vectors. */    double b[8], x[8];    MKL_INT nrhs = 1;     /* Number of right hand sides. */    /* Internal solver memory pointer pt, */    /* 32-bit: int pt[64]; 64-bit: long int pt[64] */    /* or void *pt[64] should be OK on both architectures */    void* pt[64];    /* Pardiso control parameters. */    MKL_INT iparm[64];    MKL_INT maxfct, mnum, phase, error, msglvl;    /* Auxiliary variables. */    MKL_INT i;    double ddum;          /* Double dummy */    MKL_INT idum;         /* Integer dummy. *//* -------------------------------------*//* .. Setup Pardiso control parameters. *//* -------------------------------------*/    for (i = 0; i < 64; i++)    {        iparm[i] = 0;    }    iparm[0] = 1;         /* No solver default */    iparm[1] = 2;         /* Fill-in reordering from METIS */    iparm[3] = 0;         /* No iterative-direct algorithm */    iparm[4] = 0;         /* No user fill-in reducing permutation */    iparm[5] = 0;         /* Write solution into x */    iparm[7] = 2;         /* Max numbers of iterative refinement steps */    iparm[9] = 13;        /* Perturb the pivot elements with 1E-13 */    iparm[10] = 1;        /* Use nonsymmetric permutation and scaling MPS */    iparm[12] = 0;          iparm[13] = 0;        /* Output: Number of perturbed pivots */    iparm[17] = -1;       /* Output: Number of nonzeros in the factor LU */    iparm[18] = -1;       /* Output: Mflops for LU factorization */    iparm[19] = 0;        /* Output: Numbers of CG Iterations */    iparm[34] = 1;        /* PARDISO use C-style indexing for ia and ja arrays */    maxfct = 1;           /* Maximum number of numerical factorizations. */    mnum = 1;         /* Which factorization to use. */    msglvl = 1;           /* Print statistical information in file */    error = 0;            /* Initialize error flag *//* ----------------------------------------------------------------*//* .. Initialize the internal solver memory pointer. This is only  *//*   necessary for the FIRST call of the PARDISO solver.           *//* ----------------------------------------------------------------*/    for (i = 0; i < 64; i++)    {        pt[i] = 0;    }    /* --------------------------------------------------------------------*/    /* .. Reordering and Symbolic Factorization. This step also allocates  */    /*    all memory that is necessary for the factorization.              */    /* --------------------------------------------------------------------*/    phase = 11;    PARDISO(pt, &maxfct, &mnum, &mtype, &phase,        &n, a, ia, ja, &idum, &nrhs, iparm, &msglvl, &ddum, &ddum, &error);    if (error != 0)    {        printf("\nERROR during symbolic factorization: " IFORMAT, error);        exit(1);    }    printf("\nReordering completed ... ");    printf("\nNumber of nonzeros in factors = " IFORMAT, iparm[17]);    printf("\nNumber of factorization MFLOPS = " IFORMAT, iparm[18]);    /* ----------------------------*/    /* .. Numerical factorization. */    /* ----------------------------*/    phase = 22;    PARDISO(pt, &maxfct, &mnum, &mtype, &phase,        &n, a, ia, ja, &idum, &nrhs, iparm, &msglvl, &ddum, &ddum, &error);    if (error != 0)    {        printf("\nERROR during numerical factorization: " IFORMAT, error);        exit(2);    }    printf("\nFactorization completed ... ");    /* -----------------------------------------------*/    /* .. Back substitution and iterative refinement. */    /* -----------------------------------------------*/    phase = 33;    iparm[7] = 2;         /* Max numbers of iterative refinement steps. */    /* Set right hand side to one. */    for (i = 0; i < n; i++)    {        b[i] = 1;    }    PARDISO(pt, &maxfct, &mnum, &mtype, &phase,        &n, a, ia, ja, &idum, &nrhs, iparm, &msglvl, b, x, &error);    if (error != 0)    {        printf("\nERROR during solution: " IFORMAT, error);        exit(3);    }    printf("\nSolve completed ... ");    printf("\nThe solution of the system is: ");    for (i = 0; i < n; i++)    {        printf("\n x [" IFORMAT "] = % f", i, x[i]);    }    printf("\n");    /* --------------------------------------*/    /* .. Termination and release of memory. */    /* --------------------------------------*/    phase = -1;           /* Release internal memory. */    PARDISO(pt, &maxfct, &mnum, &mtype, &phase,        &n, &ddum, ia, ja, &idum, &nrhs,        iparm, &msglvl, &ddum, &ddum, &error);    return 0;}

以下是计算结果:
以上,即是MKL_PARDISO的具体使用例子。在实际中,MKL版本的PARDISO性能是非常高的,在某些稀疏矩阵的条件下,求解速度可以远超matlab。
以下,是某个条件数比较大但是方程组规模不大(61349个未知数)的matlab和PARDISO求解速度对比:
在该问题中,采用AMD3900X的处理器,MKL_PARDISO求解时间为43.6s,求解速度是matlab的两倍多。该稀疏矩阵的下载链接如下:
链接:https://pan.baidu.com/s/1lw-SgNlnSUAB4o9dwJa14Q
提取码:xmim
以上,即是高性能稀疏矩阵线性方程组求解器的简单介绍,感谢您的阅读。


来源:有限元术

附件

免费链接.txt
UM
著作权归作者所有,欢迎分享,未经许可,不得转载
首次发布时间:2023-07-17
最近编辑:1年前
寒江雪_123
硕士 | cae工程师 签名征集中
获赞 49粉丝 106文章 54课程 9
点赞
收藏
未登录
还没有评论
课程
培训
服务
行家
VIP会员 学习 福利任务 兑换礼品
下载APP
联系我们
帮助与反馈