!===============================================================================
! Copyright 2020-2022 Intel Corporation.
!
! This software and the related documents are Intel copyrighted  materials,  and
! your use of  them is  governed by the  express license  under which  they were
! provided to you (License).  Unless the License provides otherwise, you may not
! use, modify, copy, publish, distribute,  disclose or transmit this software or
! the related documents without Intel's prior written permission.
!
! This software and the related documents  are provided as  is,  with no express
! or implied  warranties,  other  than those  that are  expressly stated  in the
! License.
!===============================================================================

!*
!
!*  Content:
!*            ErfInv example program text (OpenMP offload interface)
!*
!*******************************************************************************/

include "mkl_omp_offload.f90"
include "_vml_common_functions.f90"

! @brief Real single precision function test begin
integer (kind=4) function test_float(funcname)

    use onemkl_vml_omp_offload
    implicit none
    include "_vml_common_data.f90"
    character (len = *) :: funcname
    real      (kind=4)  :: as_float
    integer   (kind=4)  :: check_result_float
    real      (kind=4),allocatable :: varg1(:), vres1(:), vmres1(:), vref1(:)
    real      (kind=4),allocatable :: vresi1(:), vmresi1(:), vrefi1(:)
    integer   (kind=4) i, a, errs
    integer   (kind=4) VLEN
    parameter (VLEN = 4)
    integer   (kind=4) test_arg1(VLEN)
    integer   (kind=4) test_ref1(VLEN)
    integer   (kind=4) nan_value
    integer   (kind=8) vml_accuracy_mode(3)
    data vml_accuracy_mode / VML_HA, VML_LA, VML_EP /
    integer tmode

    ! NaN value to fill result vector
    data  nan_value /Z'FFFFFFFF'/
    
    ! Arguments and reference results begin
    data test_arg1 / Z'3F2C10F1', & ! 0.672133505    
                     Z'BE5BB8DC', & ! -0.21457237    
                     Z'3F0EFF63', & ! 0.558584392    
                     Z'3F16CF15'  / ! 0.589097321    
    data test_ref1 / Z'3F311CED', & ! 0.691847622    
                     Z'BE4521D1', & ! -0.192511812   
                     Z'3F0B597E', & ! 0.544334292    
                     Z'3F14DA73'  / ! 0.581458271    
    ! Arguments and reference results end
    
    errs = 0

    ! Allocate vectors
    allocate(varg1(VLEN))
    allocate(vres1(VLEN))
    allocate(vmres1(VLEN))
    allocate(vref1(VLEN))
    allocate(vresi1(VLEN))
    allocate(vmresi1(VLEN))
    allocate(vrefi1(VLEN))

    ! Fill vectors
    do i = 1, VLEN
        varg1(i) = as_float(test_arg1(i))
        vref1(i) = as_float(test_ref1(i))
        vres1(i) = as_float(nan_value)
        vmres1(i) = as_float(nan_value)

        ! Fill even result values with 777 pads for strided indexing
        if (and(i,1) .eq. 1) then
            vrefi1(i)  = as_float(test_ref1(i))
            vresi1(i)  = 999
            vmresi1(i) = 999
        else
            vrefi1(i)  = 777
            vresi1(i)  = 777
            vmresi1(i) = 777
        end if

    enddo

    ! Loop by three accuracy flavors
    do a = 1, 3
        ! Call VML function with specific accuracy flavor

        !$omp target variant dispatch
        tmode = vmlsetmode(vml_accuracy_mode(a))
        !$omp end target variant dispatch

        !$omp target data map(varg1,vres1)
        !$omp target variant dispatch use_device_ptr(varg1,vres1)
        call vserfinv(VLEN, varg1, vres1)
        !$omp end target variant dispatch
        !$omp end target data
		
        !$omp target data map(varg1,vmres1)
        !$omp target variant dispatch use_device_ptr(varg1,vmres1)
        call vmserfinv(VLEN, varg1, vmres1, vml_accuracy_mode(a))
        !$omp end target variant dispatch
        !$omp end target data

        !$omp target data map(varg1,vresi1)
        !$omp target variant dispatch use_device_ptr(varg1,vresi1)
        call vserfinvi(VLEN/2, varg1, 2, vresi1, 2)
        !$omp end target variant dispatch
        !$omp end target data
		
        !$omp target data map(varg1,vmresi1)
        !$omp target variant dispatch use_device_ptr(varg1,vmresi1)
        call vmserfinvi(VLEN/2, varg1, 2, vmresi1, 2, vml_accuracy_mode(a))
        !$omp end target variant dispatch
        !$omp end target data

        ! Check results
        do i = 1, VLEN
          errs = errs + check_result_float(i, VML_ARG1_RES1, varg1(i), varg1(i), & 
                           vres1(i), vres1(i), vref1(i), vref1(i), "v"//funcname, a, ",  simple")
          errs = errs + check_result_float(i, VML_ARG1_RES1, varg1(i), varg1(i), & 
                           vmres1(i), vmres1(i), vref1(i), vref1(i), "vm"//funcname, a, ",  simple")
          errs = errs + check_result_float(i, VML_ARG1_RES1, varg1(i), varg1(i), & 
                           vresi1(i), vresi1(i), vrefi1(i), vrefi1(i), "v"//funcname//"i", a, ", strided")
          errs = errs + check_result_float(i, VML_ARG1_RES1, varg1(i), varg1(i), & 
                           vmresi1(i), vmresi1(i), vrefi1(i), vrefi1(i), "vm"//funcname//"i", a, ", strided")
        enddo
    enddo

    test_float = errs

end function
! @brief Real single precision function test end

! @brief Real double precision function test begin
integer (kind=4) function test_double(funcname)

    use onemkl_vml_omp_offload
    implicit none
    include "_vml_common_data.f90"
    character (len = *) :: funcname
    real      (kind=8) :: as_double
    integer   (kind=4) :: check_result_double
    real      (kind=8),allocatable :: varg1(:), vres1(:), vmres1(:), vref1(:)
    real      (kind=8),allocatable :: vresi1(:), vmresi1(:), vrefi1(:)
    integer   (kind=4) i, a, errs
    integer   (kind=8) VLEN
    parameter (VLEN = 4)
    integer   (kind=8) test_arg1(VLEN)
    integer   (kind=8) test_ref1(VLEN)
    integer   (kind=8) nan_value
    integer   (kind=8) vml_accuracy_mode(3)
    data vml_accuracy_mode / VML_HA, VML_LA, VML_EP /
    integer tmode

    ! NaN value to fill result vector
    data  nan_value /Z'FFFFFFFFFFFFFFFF'/
    
    ! Arguments and reference results begin
    data test_arg1 / Z'3FE5821DD706CAE3', & ! 0.672133369420717108     
                     Z'BFCB771B67504648', & ! -0.214572358556823994    
                     Z'3FE1DFEC4CAA263D', & ! 0.558584356055866871     
                     Z'3FE2D9E26D4AF341'  / ! 0.589097226583909728     
    data test_ref1 / Z'3FE6239D393954E6', & ! 0.69184743095925394      
                     Z'BFC8A43A11CD5897', & ! -0.192511805241058126    
                     Z'3FE16B2FAA0C0A6A', & ! 0.544334251521218393     
                     Z'3FE29B4E1787A761'  / ! 0.581458135563689749     
    ! Arguments and reference results end
    
    errs = 0

    ! Allocate vectors
    allocate(varg1(VLEN))
    allocate(vres1(VLEN))
    allocate(vmres1(VLEN))
    allocate(vref1(VLEN))
    allocate(vresi1(VLEN))
    allocate(vmresi1(VLEN))
    allocate(vrefi1(VLEN))

    ! Fill vectors
    do i = 1, VLEN
        varg1(i) = as_double(test_arg1(i))
        vref1(i) = as_double(test_ref1(i))
        vres1(i) = as_double(nan_value)
        vmres1(i) = as_double(nan_value)

        ! Fill even result values with 777 pads for strided indexing
        if (and(i,1) .eq. 1) then
            vrefi1(i)  = as_double(test_ref1(i))
            vresi1(i)  = 999
            vmresi1(i) = 999
        else
            vrefi1(i)  = 777
            vresi1(i)  = 777
            vmresi1(i) = 777
        end if

    enddo

    ! Loop by three accuracy flavors
    do a = 1, 3
        ! Call VML function with specific accuracy flavor

        !$omp target variant dispatch
        tmode = vmlsetmode(vml_accuracy_mode(a))
        !$omp end target variant dispatch

        !$omp target data map(varg1,vres1)
        !$omp target variant dispatch use_device_ptr(varg1,vres1)
        call vderfinv(VLEN, varg1, vres1)
        !$omp end target variant dispatch
        !$omp end target data
		
        !$omp target data map(varg1,vmres1)
        !$omp target variant dispatch use_device_ptr(varg1,vmres1)
        call vmderfinv(VLEN, varg1, vmres1, vml_accuracy_mode(a))
        !$omp end target variant dispatch
        !$omp end target data

        !$omp target data map(varg1,vresi1)
        !$omp target variant dispatch use_device_ptr(varg1,vresi1)
        call vderfinvi(VLEN/2, varg1, 2, vresi1, 2)
        !$omp end target variant dispatch
        !$omp end target data
		
        !$omp target data map(varg1,vmresi1)
        !$omp target variant dispatch use_device_ptr(varg1,vmresi1)
        call vmderfinvi(VLEN/2, varg1, 2, vmresi1, 2, vml_accuracy_mode(a))
        !$omp end target variant dispatch
        !$omp end target data

        ! Check results
        do i = 1, VLEN
          errs = errs + check_result_double(i, VML_ARG1_RES1, varg1(i), varg1(i), & 
                            vres1(i), vres1(i), vref1(i), vref1(i), "v"//funcname, a, ",  simple")
          errs = errs + check_result_double(i, VML_ARG1_RES1, varg1(i), varg1(i), & 
                            vmres1(i), vmres1(i), vref1(i), vref1(i), "vm"//funcname, a, ",  simple")
          errs = errs + check_result_double(i, VML_ARG1_RES1, varg1(i), varg1(i), & 
                            vresi1(i), vresi1(i), vrefi1(i), vrefi1(i), "v"//funcname//"i", a, ", strided")
          errs = errs + check_result_double(i, VML_ARG1_RES1, varg1(i), varg1(i), & 
                            vmresi1(i), vmresi1(i), vrefi1(i), vrefi1(i), "vm"//funcname//"i", a, ", strided")
        enddo
    enddo
    
    test_double = errs

end function
! @brief Real double precision function test end

! @brief Main test program begin
program erfinv_example

    use onemkl_vml_omp_offload
    implicit none
    include "_vml_common_data.f90"
    integer   (kind=4) :: blend_int32
    integer   (kind=4) :: test_float
    integer   (kind=4) :: test_double
    integer   (kind=4) errs, total_errs
    character (len = *), parameter :: funcname = "erfinv"
    
    total_errs = 0

    data FLOAT_MAXULP /5000.0,5000.0,5000.0/
    data COMPLEX_FLOAT_MAXULP /FLOAT_COMPLEX_MAXULP_HA,FLOAT_COMPLEX_MAXULP_LA,FLOAT_COMPLEX_MAXULP_EP/
    data DOUBLE_MAXULP /7D7,7D7,7D7/
    data COMPLEX_DOUBLE_MAXULP /DOUBLE_COMPLEX_MAXULP_HA,DOUBLE_COMPLEX_MAXULP_LA,DOUBLE_COMPLEX_MAXULP_EP/

    write (*, 111) funcname
    111 format ('Running ', A, ' functions:')

    ! Single precision test run begin
    write (*, 112) TAB, funcname
    112 format(A, 'Running ',  A, ' with single precision real data type:')
    errs = test_float(funcname)    
    total_errs = total_errs + errs
    write (*, 113) TAB, funcname, TEST_RESULT(blend_int32((errs>0),2,1))
    113 format(A, A, ' single precision real result: ', A)
    ! Single precision test run end

    ! Real double precision test run begin
    write (*, 117) TAB, funcname
    117 format(A, 'Running ',  A, ' with double precision real data type:')
    errs = test_double(funcname)    
    total_errs = total_errs + errs
    write (*, 118) TAB, funcname, TEST_RESULT(blend_int32((errs>0),2,1))
    118 format(A, A, ' double precision real result: ', A)
    ! Real double precision test run end

    write (*, 121) funcname, TEST_RESULT(blend_int32((total_errs>0),2,1))
    121 format(A, ' function result: ', A)

end program
! @brief Main test program end
