!===============================================================================
! Copyright 2021-2022 Intel Corporation.
!
! This software and the related documents are Intel copyrighted  materials,  and
! your use of  them is  governed by the  express license  under which  they were
! provided to you (License).  Unless the License provides otherwise, you may not
! use, modify, copy, publish, distribute,  disclose or transmit this software or
! the related documents without Intel's prior written permission.
!
! This software and the related documents  are provided as  is,  with no express
! or implied  warranties,  other  than those  that are  expressly stated  in the
! License.
!===============================================================================

!  Content:
!      Intel(R) oneAPI Math Kernel Library (oneMKL)
!      FORTRAN OpenMP offload examples for DGETRI_OOP_BATCH_STRIDED
!*******************************************************************************

include "mkl_omp_offload.f90"

program dgetri_oop_batch_strided_example

#if defined(MKL_ILP64)
    use onemkl_lapack_omp_offload_ilp64
#else
    use onemkl_lapack_omp_offload_lp64
#endif

    integer, parameter :: n = 10, batch_size = 4
    integer :: lda, ldainv, stride_a, stride_ainv, stride_ipiv
    double precision, allocatable :: a(:,:), ainv(:,:)
    integer, allocatable :: ipiv(:,:), info(:)

    integer i, j, imat, exit_status
    double precision temp

    lda = n
    ldainv = n
    stride_a = lda*n
    stride_ainv = ldainv*n
    stride_ipiv = n

    print '("========================================================================================")'
    print '("Compute the matrix inverse of the matrices in A. Store the results in Ainv.")'
    print '("The paramters for the batch are as follows:")'
    print '("========================================================================================")'
    print '("   Batch size:                          " (I6) )', batch_size
    print '("   Matrix order:                        " (I6) )', n
    print '("   Leading dimension for A matrices:    " (I6) )', lda
    print '("   Leading dimension for Ainv matrices: " (I6) )', ldainv
    print '("   Stride for A matrices:               " (I6) )', stride_a
    print '("   Stride for Ainv matrices:            " (I6) )', stride_ainv
    print '("   Stride for pivot arrays:             " (I6) )', stride_ipiv
    print '("========================================================================================"/)'

    ! Allocate required memory
    allocate ( a(stride_a,batch_size),        &
               ainv(stride_ainv,batch_size),  &
               ipiv(stride_ipiv,batch_size),  &
               info(batch_size) )

    if ( .not.allocated(a)    .OR. &
         .not.allocated(ainv) .OR. &
         .not.allocated(ipiv) .OR. &
         .not.allocated(info) ) then
         print '( "[FAILED] Failed allocation" )'
         stop 1
    endif

    ! Random initialization of matrices
    do imat=1,batch_size
        do j=1,n
            do i=1,n
                ! Random number in the internval (-0.5, 0.5)
                call random_number(temp)
                a(i + (j-1)*lda, imat) = temp * 0.5
            enddo
        enddo

        ! Make diagonal entries larger to ensure matrix is well-conditioned
        do i=1,n
            a(i + (i-1)*lda ,imat) = a(i + (i-1)*lda ,imat) + 5.0
        enddo
    enddo

    ! Compute LU factorization via OpenMP offload.
    ! On entry, A contains the inpyt matrix, on exit it contains the LU factorization
    !$omp target data map(tofrom:a) map(from:ipiv) map(from:info)
    !$omp target variant dispatch use_device_ptr(a,ipiv,info)
        call dgetrf_batch_strided(n, n, a, lda, stride_a, ipiv, stride_ipiv, batch_size, info)
    !$omp end target variant dispatch
    !$omp end target data
    print '("Finished call to dgetrf_batch_strided")'

    exit_status = 0
    do j=1,batch_size
        if(info(j) .NE. 0) then
            print '("ERROR: getrf_batch_strided returned with errors. Matrix" (1I4) " returned with info =" (1I4) )', j, info(j)
            exit_status = exit_status + 1
        endif
    enddo

    ! Compute the matrix inverse via OpenMP offload. On exit, the inverse is stored in Ainv.
    if(exit_status .EQ. 0) then
        !$omp target data map(tofrom:a) map(from:ainv) map(tofrom:ipiv) map(from:info)
        !$omp target variant dispatch use_device_ptr(a,ainv,ipiv,info)
          call dgetri_oop_batch_strided(n, a, lda, stride_a, ipiv, stride_ipiv, ainv, ldainv, stride_ainv, batch_size, info)
        !$omp end target variant dispatch
        !$omp end target data
        print '("Finished call to dgetri_oop_batch_strided")'

        do j=1,batch_size
            if(info(j) .NE. 0) then
                print '("ERROR: getri_oop_batch_strided returned with errors. Matrix" (I4) " returned with info =" (I4) )', j, info(j)
                exit_status = exit_status + 1
            endif
        enddo
    endif


    ! Clean up
    deallocate (a, ainv, ipiv, info)

    print '(//"==============================================================" )'
    if(exit_status .EQ. 0) then
        print '("Example executed succesfully" )'
    else
        print '("Example executed with errors" )'
    endif
    print '("==============================================================" )'

    stop exit_status

end program dgetri_oop_batch_strided_example
