/**
 * @file pastix_dcores.h
 *
 * PaStiX kernel header.
 *
 * @copyright 2011-2024 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
 *                      Univ. Bordeaux. All rights reserved.
 *
 * @version 6.4.0
 * @author Mathieu Faverge
 * @author Pierre Ramet
 * @author Xavier Lacoste
 * @author Esragul Korkmaz
 * @author Gregoire Pichon
 * @author Tony Delarue
 * @author Alycia Lisito
 * @author Nolan Bredel
 * @date 2024-07-05
 * @generated from /build/pastix/src/pastix-6.4.0/kernels/pastix_zcores.h, normal z -> d, Tue Dec 16 21:22:40 2025
 *
 */
#ifndef _pastix_dcores_h_
#define _pastix_dcores_h_

#ifndef DOXYGEN_SHOULD_SKIP_THIS
#define pastix_cblk_lock( cblk_ )    pastix_atomic_lock( &((cblk_)->lock) )
#define pastix_cblk_unlock( cblk_ )  pastix_atomic_unlock( &((cblk_)->lock) )
#endif /* DOXYGEN_SHOULD_SKIP_THIS */

/**
 * @addtogroup kernel_blas_lapack
 * @{
 *    This module contains all the BLAS and LAPACK-like kernels that are working
 *    on lapack layout matrices.
 *
 *    @name PastixDouble BLAS kernels
 *    @{
 */
void core_dplrnt( int                    m,
                  int                    n,
                  double    *A,
                  int                    lda,
                  int                    gM,
                  int                    m0,
                  int                    n0,
                  unsigned long long int seed );
void core_dgetmo( int                       m,
                  int                       n,
                  const double *A,
                  int                       lda,
                  double       *B,
                  int                       ldb );
int core_dgeadd( pastix_trans_t            trans,
                 pastix_int_t              M,
                 pastix_int_t              N,
                 double        alpha,
                 const double *A,
                 pastix_int_t              LDA,
                 double        beta,
                 double       *B,
                 pastix_int_t              LDB );
int core_dgemdm( pastix_trans_t            transA,
                 pastix_trans_t            transB,
                 int                       M,
                 int                       N,
                 int                       K,
                 double        alpha,
                 const double *A,
                 int                       LDA,
                 const double *B,
                 int                       LDB,
                 double        beta,
                 double       *C,
                 int                       LDC,
                 const double *D,
                 int                       incD,
                 double       *WORK,
                 int                       LWORK );
int core_dpqrcp( double              tol,
                 pastix_int_t        maxrank,
                 int                 full_update,
                 pastix_int_t        nb,
                 pastix_int_t        m,
                 pastix_int_t        n,
                 double *A,
                 pastix_int_t        lda,
                 pastix_int_t       *jpvt,
                 double *tau,
                 double *work,
                 pastix_int_t        lwork,
                 double             *rwork );
int core_drqrcp( double              tol,
                 pastix_int_t        maxrank,
                 int                 refine,
                 pastix_int_t        nb,
                 pastix_int_t        m,
                 pastix_int_t        n,
                 double *A,
                 pastix_int_t        lda,
                 pastix_int_t       *jpvt,
                 double *tau,
                 double *work,
                 pastix_int_t        lwork,
                 double             *rwork );
int core_drqrrt( double              tol,
                 pastix_int_t        maxrank,
                 pastix_int_t        nb,
                 pastix_int_t        m,
                 pastix_int_t        n,
                 double *A,
                 pastix_int_t        lda,
                 double *tau,
                 double *B,
                 pastix_int_t        ldb,
                 double *tau_b,
                 double *work,
                 pastix_int_t        lwork,
                 double              normA );
int core_dtqrcp( double              tol,
                 pastix_int_t        maxrank,
                 int                 unused,
                 pastix_int_t        nb,
                 pastix_int_t        m,
                 pastix_int_t        n,
                 double *A,
                 pastix_int_t        lda,
                 pastix_int_t       *jpvt,
                 double *tau,
                 double *work,
                 pastix_int_t        lwork,
                 double             *rwork );
int core_dtradd( pastix_uplo_t             uplo,
                 pastix_trans_t            trans,
                 pastix_int_t              M,
                 pastix_int_t              N,
                 double        alpha,
                 const double *A,
                 pastix_int_t              LDA,
                 double        beta,
                 double       *B,
                 pastix_int_t              LDB);
int core_dscalo( pastix_trans_t            trans,
                 pastix_int_t              M,
                 pastix_int_t              N,
                 const double *A,
                 pastix_int_t              lda,
                 const double *D,
                 pastix_int_t              ldd,
                 double       *B,
                 pastix_int_t              ldb );

/**
 *    @}
 *    @name PastixDouble Othogonalization kernels for low-rank updates
 *    @{
 */
pastix_fixdbl_t core_dlrorthu_fullqr( pastix_int_t        M,
                                      pastix_int_t        N,
                                      pastix_int_t        rank,
                                      double *U,
                                      pastix_int_t        ldu,
                                      double *V,
                                      pastix_int_t        ldv );
pastix_fixdbl_t core_dlrorthu_partialqr( pastix_int_t        M,
                                         pastix_int_t        N,
                                         pastix_int_t        r1,
                                         pastix_int_t       *r2ptr,
                                         pastix_int_t        offx,
                                         pastix_int_t        offy,
                                         double *U,
                                         pastix_int_t        ldu,
                                         double *V,
                                         pastix_int_t        ldv );
pastix_fixdbl_t core_dlrorthu_cgs( pastix_int_t        M1,
                                   pastix_int_t        N1,
                                   pastix_int_t        M2,
                                   pastix_int_t        N2,
                                   pastix_int_t        r1,
                                   pastix_int_t       *r2ptr,
                                   pastix_int_t        offx,
                                   pastix_int_t        offy,
                                   double *U,
                                   pastix_int_t        ldu,
                                   double *V,
                                   pastix_int_t        ldv );

/**
 *    @}
 *    @name PastixDouble LAPACK kernels
 *    @{
 */
void core_dpotrfsp( pastix_int_t        n,
                    double *A,
                    pastix_int_t        lda,
                    pastix_int_t       *nbpivots,
                    double              criterion );
void core_dpotrfsp( pastix_int_t        n,
                    double *A,
                    pastix_int_t        lda,
                    pastix_int_t       *nbpivots,
                    double              criterion );
void core_dgetrfsp( pastix_int_t        n,
                    double *A,
                    pastix_int_t        lda,
                    pastix_int_t       *nbpivots,
                    double              criterion );
#if defined(PRECISION_z) || defined(PRECISION_c)
void core_dsytrfsp( pastix_int_t        n,
                    double *A,
                    pastix_int_t        lda,
                    pastix_int_t       *nbpivots,
                    double              criterion );
#endif
void core_dsytrfsp( pastix_int_t        n,
                    double *A,
                    pastix_int_t        lda,
                    pastix_int_t       *nbpivots,
                    double              criterion );

/**
 *    @}
 * @}
 *
 * @addtogroup kernel_fact
 * @{
 *    This module contains all the kernel working at the solver matrix structure
 *    level for the numerical factorization step.
 *
 *    @name PastixDouble cblk-BLAS CPU kernels
 *    @{
 */

int cpucblk_dgeaddsp1d( const SolverCblk         *cblk1,
                        SolverCblk               *cblk2,
                        const double *L1,
                        double       *L2,
                        const double *U1,
                        double       *U2 );

pastix_fixdbl_t cpucblk_dgemmsp( pastix_coefside_t   sideA,
                                 pastix_trans_t      trans,
                                 const SolverCblk   *cblk,
                                 const SolverBlok   *blok,
                                 SolverCblk         *fcblk,
                                 const void         *A,
                                 const void         *B,
                                 void               *C,
                                 double *work,
                                 pastix_int_t        lwork,
                                 const pastix_lr_t  *lowrank );
void cpucblk_dtrsmsp( pastix_side_t      side,
                      pastix_uplo_t      uplo,
                      pastix_trans_t     trans,
                      pastix_diag_t      diag,
                      const SolverCblk  *cblk,
                      const void        *A,
                      void              *C,
                      const pastix_lr_t *lowrank );
void cpucblk_dscalo ( pastix_trans_t     trans,
                      const SolverCblk  *cblk,
                      void              *dataL,
                      void              *dataLD );

pastix_fixdbl_t cpublok_dgemmsp( pastix_trans_t     trans,
                                 const SolverCblk  *cblk,
                                 SolverCblk        *fcblk,
                                 pastix_int_t       blok_mk,
                                 pastix_int_t       blok_nk,
                                 pastix_int_t       blok_mn,
                                 const void        *A,
                                 const void        *B,
                                 void              *C,
                                 const pastix_lr_t *lowrank );
pastix_fixdbl_t cpublok_dtrsmsp( pastix_side_t      side,
                                 pastix_uplo_t      uplo,
                                 pastix_trans_t     trans,
                                 pastix_diag_t      diag,
                                 const SolverCblk  *cblk,
                                 pastix_int_t       blok_m,
                                 const void        *A,
                                 void              *C,
                                 const pastix_lr_t *lowrank );
void cpublok_dscalo ( pastix_trans_t    trans,
                      const SolverCblk *cblk,
                      pastix_int_t      blok_m,
                      const void       *A,
                      const void       *dataD,
                      void             *dataB );

/**
 *    @}
 *    @name PastixDouble cblk LU kernels
 *    @{
 */
int cpucblk_dgetrfsp1d_getrf( SolverMatrix *solvmtx,
                              SolverCblk   *cblk,
                              void         *L,
                              void         *U );
int cpucblk_dgetrfsp1d_panel( SolverMatrix *solvmtx,
                              SolverCblk   *cblk,
                              void         *L,
                              void         *U );
int cpucblk_dgetrfsp1d      ( SolverMatrix       *solvmtx,
                              SolverCblk         *cblk,
                              double *work,
                              pastix_int_t        lwork );

/**
 *    @}
 *    @name PastixDouble cblk Cholesky kernels
 *    @{
 */
int cpucblk_dpotrfsp1d_potrf( SolverMatrix *solvmtx,
                              SolverCblk   *cblk,
                              void         *dataL );
int cpucblk_dpotrfsp1d_panel( SolverMatrix *solvmtx,
                              SolverCblk   *cblk,
                              void         *L );
int cpucblk_dpotrfsp1d( SolverMatrix       *solvmtx,
                        SolverCblk         *cblk,
                        double *work,
                        pastix_int_t        lwork );

/**
 *    @}
 */

#if defined(PRECISION_z) || defined(PRECISION_c)
 /**
 *    @name PastixDouble cblk LDL^h kernels
 *    @{
 */
int cpucblk_dsytrfsp1d_sytrf( SolverMatrix *solvmtx,
                              SolverCblk   *cblk,
                              void         *L );
int cpucblk_dsytrfsp1d_panel( SolverMatrix *solvmtx,
                              SolverCblk   *cblk,
                              void         *L,
                              void         *DLh );
int cpucblk_dsytrfsp1d( SolverMatrix       *solvmtx,
                        SolverCblk         *cblk,
                        double *work1,
                        double *work2,
                        pastix_int_t        lwork );

/**
 *    @}
 *    @name PastixDouble cblk LL^t kernels
 *    @{
 */
int cpucblk_dpotrfsp1d_pxtrf( SolverMatrix *solvmtx,
                              SolverCblk   *cblk,
                              void         *dataL );
int cpucblk_dpotrfsp1d_panel( SolverMatrix *solvmtx,
                              SolverCblk   *cblk,
                              void         *dataL );
int cpucblk_dpotrfsp1d( SolverMatrix       *solvmtx,
                        SolverCblk         *cblk,
                        double *work,
                        pastix_int_t        lwork );

/**
 *    @}
 */
#endif

 /**
 *    @name PastixDouble cblk LDL^t kernels
 *    @{
 */
int cpucblk_dsytrfsp1d_sytrf( SolverMatrix *solvmtx,
                              SolverCblk   *cblk,
                              void         *dataL );
int cpucblk_dsytrfsp1d_panel( SolverMatrix *solvmtx,
                              SolverCblk   *cblk,
                              void         *L,
                              void         *DLt );
int cpucblk_dsytrfsp1d( SolverMatrix       *solvmtx,
                        SolverCblk         *cblk,
                        double *Dlt,
                        double *work,
                        pastix_int_t        lwork );

/**
 *    @}
 *    @name PastixDouble initialization and additionnal routines
 *    @{
 */
void cpucblk_dalloc_lrws( const SolverCblk   *cblk,
                          pastix_lrblock_t   *lrblok,
                          double *ws );
void cpucblk_dalloc_lr( pastix_coefside_t  side,
                        SolverCblk        *cblk,
                        int                rkmax );
void cpucblk_dalloc_fr( pastix_coefside_t  side,
                        SolverCblk        *cblk );
void cpucblk_dalloc( pastix_coefside_t  side,
                     SolverCblk        *cblk );
void cpucblk_dfree( pastix_coefside_t  side,
                    SolverCblk        *cblk );
void cpucblk_dfillin( pastix_coefside_t    side,
                      const SolverMatrix  *solvmtx,
                      const pastix_bcsc_t *bcsc,
                      pastix_int_t         itercblk );
void cpucblk_dinit( pastix_coefside_t    side,
                    const SolverMatrix  *solvmtx,
                    const pastix_bcsc_t *bcsc,
                    pastix_int_t         itercblk,
                    const char          *directory );
void cpucblk_dgetschur( const SolverCblk   *cblk,
                        int                 upper_part,
                        double *S,
                        pastix_int_t        lds );
void cpucblk_ddump( pastix_coefside_t  side,
                    const SolverCblk  *cblk,
                    FILE              *stream );
int cpucblk_ddiff( pastix_coefside_t  side,
                   const SolverCblk  *cblkA,
                   SolverCblk        *cblkB );
pastix_fixdbl_t cpucblk_dadd( double  alpha,
                              const SolverCblk   *cblkA,
                              SolverCblk         *cblkB,
                              const void         *A,
                              void               *B,
                              double *work,
                              pastix_int_t        lwork,
                              const pastix_lr_t  *lowrank );
pastix_fixdbl_t cpublok_dadd( double  alpha,
                              const SolverCblk   *cblkA,
                              SolverCblk         *cblkB,
                              pastix_int_t        blokA_m,
                              pastix_int_t        blokB_m,
                              const void         *A,
                              void               *B,
                              double *work,
                              pastix_int_t        lwork,
                              const pastix_lr_t  *lowrank );

/**
 *    @}
 *    @name PastixDouble MPI routines
 *    @{
 */
int cpucblk_dincoming_deps( int                mt_flag,
                            pastix_coefside_t  side,
                            SolverMatrix      *solvmtx,
                            SolverCblk        *cblk );
void cpucblk_drelease_deps( pastix_coefside_t  side,
                            SolverMatrix      *solvmtx,
                            const SolverCblk  *cblk,
                            SolverCblk        *fcbk );
void cpucblk_drequest_cleanup( pastix_coefside_t  side,
                               pastix_int_t       sched,
                               SolverMatrix      *solvmtx );
void cpucblk_dupdate_reqtab( SolverMatrix *solvmtx );
#if defined( PASTIX_WITH_MPI )
void cpucblk_dmpi_progress( pastix_coefside_t  side,
                            SolverMatrix      *solvmtx,
                            int                threadid );
void cpucblk_disend_rhs_bwd( SolverMatrix *solvmtx,
                             pastix_rhs_t  rhsb,
                             SolverCblk   *cblk );
#endif
void cpucblk_dmpi_rhs_fwd_progress( const args_solve_t *enums,
                                    SolverMatrix       *solvmtx,
                                    pastix_rhs_t        rhsb,
                                    int                 threadid );
void cpucblk_drelease_rhs_fwd_deps( const args_solve_t *enums,
                                    SolverMatrix       *solvmtx,
                                    pastix_rhs_t        rhsb,
                                    const SolverCblk   *cblk,
                                    SolverCblk         *fcbk );
int cpucblk_dincoming_rhs_fwd_deps( int                 rank,
                                    const args_solve_t *enums,
                                    SolverMatrix       *solvmtx,
                                    SolverCblk         *cblk,
                                    pastix_rhs_t        rhsb );
void cpucblk_drequest_rhs_fwd_cleanup( const args_solve_t *enums,
                                       pastix_int_t        sched,
                                       SolverMatrix       *solvmtx,
                                       pastix_rhs_t        rhsb );

void cpucblk_dmpi_rhs_bwd_progress( const args_solve_t *enums,
                                    SolverMatrix       *solvmtx,
                                    pastix_rhs_t        rhsb,
                                    int                 threadid );
void cpucblk_drelease_rhs_bwd_deps( const args_solve_t *enums,
                                    SolverMatrix       *solvmtx,
                                    pastix_rhs_t        rhsb,
                                    const SolverCblk   *cblk,
                                    SolverCblk         *fcbk );
int cpucblk_dincoming_rhs_bwd_deps( int                 rank,
                                    const args_solve_t *enums,
                                    SolverMatrix       *solvmtx,
                                    SolverCblk         *cblk,
                                    pastix_rhs_t        rhsb );
void cpucblk_drequest_rhs_bwd_cleanup( const args_solve_t *enums,
                                       pastix_int_t        sched,
                                       SolverMatrix       *solvmtx,
                                       pastix_rhs_t        rhsb );
void cpucblk_dsend_rhs_forward( const SolverMatrix *solvmtx,
                                SolverCblk         *cblk,
                                pastix_rhs_t        b );
void cpucblk_drecv_rhs_forward( const SolverMatrix *solvmtx,
                                SolverCblk         *cblk,
                                double *work,
                                pastix_rhs_t        b );
void cpucblk_dsend_rhs_backward( const SolverMatrix *solvmtx,
                                 SolverCblk         *cblk,
                                 pastix_rhs_t        b );
void cpucblk_drecv_rhs_backward( const SolverMatrix *solvmtx,
                                 SolverCblk         *cblk,
                                 pastix_rhs_t        b );

/**
 *    @}
 *    @name PastixDouble compression/uncompression routines
 *    @{
 */
pastix_fixdbl_t cpublok_dcompress( const pastix_lr_t *lowrank,
                                   pastix_int_t        M,
                                   pastix_int_t        N,
                                   pastix_lrblock_t   *blok );
pastix_int_t cpucblk_dcompress( const SolverMatrix *solvmtx,
                                pastix_coefside_t   side,
                                int                 max_ilulvl,
                                SolverCblk         *cblk );
void cpucblk_duncompress( pastix_coefside_t  side,
                          SolverCblk        *cblk );
void cpucblk_dmemory( pastix_coefside_t   side,
                      const SolverMatrix *solvmtx,
                      SolverCblk         *cblk,
                      pastix_int_t       *orig,
                      pastix_int_t       *gain );

/**
 *    @}
 * @}
 *
 * @addtogroup kernel_solve
 * @{
 *    This module contains all the kernel working on the solver matrix structure
 *    for the solve step.
 *
 */

void solve_blok_dtrsm( pastix_side_t       side,
                       pastix_uplo_t       uplo,
                       pastix_trans_t      trans,
                       pastix_diag_t       diag,
                       const SolverCblk   *cblk,
                       int                 nrhs,
                       const void         *dataA,
                       double *b,
                       int                 ldb );
void solve_blok_dgemm( pastix_side_t             side,
                       pastix_trans_t            trans,
                       pastix_int_t              nrhs,
                       const SolverCblk         *cblk,
                       const SolverBlok         *blok,
                       SolverCblk               *fcbk,
                       const void               *dataA,
                       const double *B,
                       pastix_int_t              ldb,
                       double       *C,
                       pastix_int_t              ldc );

void solve_cblk_dtrsmsp_forward( const args_solve_t *enums,
                                 SolverMatrix       *datacode,
                                 const SolverCblk   *cblk,
                                 pastix_rhs_t        b );
void solve_cblk_dtrsmsp_backward( const args_solve_t *enums,
                                  SolverMatrix       *datacode,
                                  SolverCblk         *cblk,
                                  pastix_rhs_t        b );

void solve_cblk_ddiag( const SolverCblk   *cblk,
                       const void         *dataA,
                       int                 nrhs,
                       double *b,
                       int                 ldb,
                       double *work );
/**
 * @}
 *
 * @addtogroup kernel_fact_null
 * @{
 *    This module contains the three terms update functions for the LDL^t and
 *    LDL^h factorizations.
 *
 */
#if defined(PRECISION_z) || defined(PRECISION_c)
void core_dsytrfsp1d_gemm( const SolverCblk         *cblk,
                           const SolverBlok         *blok,
                           SolverCblk               *fcblk,
                           const double *L,
                           double       *C,
                           double       *work );
#endif
void core_dsytrfsp1d_gemm( const SolverCblk         *cblk,
                           const SolverBlok         *blok,
                           SolverCblk               *fcblk,
                           const double *L,
                           double       *C,
                           double       *work );

int
cpucblk_dpotrfsp1dplus( SolverMatrix *solvmtx,
                        SolverCblk   *cblk );
void
cpucblk_dpotrfsp1dplus_update( SolverMatrix       *solvmtx,
                               SolverBlok         *blok,
                               double *work,
                               pastix_int_t        lwork );
int
cpucblk_dsytrfsp1dplus( SolverMatrix *solvmtx,
                        SolverCblk   *cblk );
void
cpucblk_dsytrfsp1dplus_update( SolverMatrix       *solvmtx,
                               SolverBlok         *blok,
                               double *work );
int
cpucblk_dgetrfsp1dplus( SolverMatrix *solvmtx,
                        SolverCblk   *cblk );
void
cpucblk_dgetrfsp1dplus_update( SolverMatrix       *solvmtx,
                               SolverBlok         *blok,
                               double *work,
                               pastix_int_t        lwork );
#if defined(PRECISION_z) || defined(PRECISION_c)
int
cpucblk_dpotrfsp1dplus( SolverMatrix *solvmtx,
                        SolverCblk   *cblk );
void
cpucblk_dpotrfsp1dplus_update( SolverMatrix       *solvmtx,
                               SolverBlok         *blok,
                               double *work,
                               pastix_int_t        lwork );
int
cpucblk_dsytrfsp1dplus( SolverMatrix *solvmtx,
                        SolverCblk   *cblk );
void
cpucblk_dsytrfsp1dplus_update( SolverMatrix       *solvmtx,
                               SolverBlok         *blok,
                               double *work );
#endif

/**
 * @}
 */

#endif /* _pastix_dcores_h_ */
