types_gpumat.F90 10.4 KB
Newer Older
1 2 3
MODULE m_types_gpumat
  USE m_judft
  USE m_types_mat
Daniel Wortmann's avatar
Daniel Wortmann committed
4
#ifdef CPP_GPU    
5
  USE cudafor
Daniel Wortmann's avatar
Daniel Wortmann committed
6
#endif  
7 8 9 10 11 12
  IMPLICIT NONE

  !<Some routines are overwritten for GPU handling
  !!
  !! Several select type constructs are only needed to make the PGI compiler happy...
  TYPE,EXTENDS(t_mat) :: t_gpumat
Daniel Wortmann's avatar
Daniel Wortmann committed
13
#ifdef CPP_GPU     
14 15 16 17 18 19 20 21 22 23 24 25
   CONTAINS
     PROCEDURE        :: alloc =>t_gpumat_alloc
     PROCEDURE        :: multiply=>t_gpumat_multiply            !> do a matrix-matrix multiply
     PROCEDURE        :: transpose=>t_gpumat_transpose          !> transpose the matrix
     PROCEDURE        :: from_packed=>t_gpumat_from_packed      !> initialized from a packed-storage matrix
     PROCEDURE        :: inverse =>t_gpumat_inverse             !> invert the matrix
     PROCEDURE        :: to_packed=>t_gpumat_to_packed          !> convert to packed-storage matrix
     PROCEDURE        :: clear => t_gpumat_clear                !> set data arrays to zero
     PROCEDURE        :: copy => t_gpumat_copy                  !> copy into another t_mat 
     PROCEDURE        :: move => t_gpumat_move                  !> move data into another t_mat 
     PROCEDURE        :: add_transpose => t_gpumat_add_transpose!> add the tranpose/Hermitian conjg. without the diagonal 
     PROCEDURE        :: init_from                              !> Initialize from an existing matrix
Daniel Wortmann's avatar
Daniel Wortmann committed
26
#endif
27 28 29
  END TYPE t_gpumat

CONTAINS
Daniel Wortmann's avatar
Daniel Wortmann committed
30
#ifdef CPP_GPU
31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107
  SUBROUTINE t_gpumat_alloc(mat,l_real,matsize1,matsize2,init)
    CLASS(t_gpumat) :: mat
    LOGICAL,INTENT(IN),OPTIONAL:: l_real
    INTEGER,INTENT(IN),OPTIONAL:: matsize1,matsize2
    REAL,INTENT(IN),OPTIONAL   :: init

    INTEGER:: err
    IF (present(l_real)) mat%l_real=l_real
    IF (present(matsize1)) mat%matsize1=matsize1
    IF (present(matsize2)) mat%matsize2=matsize2

    IF (mat%matsize1<0) CALL judft_error("Cannot allocate memory for mat datatype that is not initialized",hint="This is a BUG in FLEUR, please report")
    IF (mat%matsize2<0)  mat%matsize2=mat%matsize1 !Square by default

    IF (allocated(mat%data_r)) deallocate(mat%data_r)
    IF (allocated(mat%data_c)) deallocate(mat%data_c)

    IF (mat%l_real) THEN
       ALLOCATE(mat%data_r(mat%matsize1,mat%matsize2),STAT=err)
       ALLOCATE(mat%data_c(0,0))
    ELSE
       ALLOCATE(mat%data_r(0,0))
       ALLOCATE(mat%data_c(mat%matsize1,mat%matsize2),STAT=err)
    ENDIF

    IF (err>0) CALL judft_error("Allocation of memmory failed for mat datatype",hint="You probably run out of memory")
  END SUBROUTINE t_gpumat_alloc

  SUBROUTINE init_from(this,mat)
    CLASS(t_gpumat),INTENT(INOUT):: this
    CLASS(t_mat),INTENT(IN)      :: mat

    SELECT TYPE(mat)
    TYPE IS (t_mat)
       call this%alloc(mat%l_real,mat%matsize1,mat%matsize2)
       IF (mat%l_real) THEN
          this%data_r=mat%data_r
       ELSE
          this%data_c=mat%data_c
       END IF
       CLASS default
       CALL judft_error("Initializiation not implemented in t_gpumat")
    END SELECT
  END SUBROUTINE init_from


  SUBROUTINE t_gpumat_add_transpose(mat,mat1)
    USE m_types
    IMPLICIT NONE
    CLASS(t_gpumat),INTENT(INOUT):: mat
    CLASS(t_mat),INTENT(IN)    :: mat1
    INTEGER::i,j
    SELECT TYPE(mat)
    TYPE is (t_gpumat)
       SELECT TYPE(mat1)
       TYPE is (t_gpumat)
          IF ((mat%matsize1.NE.mat1%matsize2).OR. &
               (mat%matsize2.NE.mat1%matsize1)) &
               CALL judft_error("Matrix sizes missmatch in add_transpose")
          IF (mat%l_real.AND.mat1%l_real) THEN
             !$cuf kernel do <<<*,256>>>
             DO i=1,mat%matsize2
                DO j=i+1,mat%matsize1
                   mat%data_r(j,i)=mat1%data_r(i,j)
                ENDDO
             ENDDO
          ELSEIF((.NOT.mat%l_real).AND.(.NOT.mat1%l_real)) THEN
             !$cuf kernel do <<<*,256>>>
             DO i=1,mat%matsize2
                DO j=i+1,mat%matsize1
                   mat%data_c(j,i)=CONJG(mat1%data_c(i,j))
                ENDDO
             ENDDO
          ELSE
             CALL judft_error("Inconsistency between data types in m_mat")
          END IF
       END SELECT
108
       i=cudaDeviceSynchronize()
109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165
    END SELECT
  END SUBROUTINE t_gpumat_add_transpose



  SUBROUTINE t_gpumat_multiply(mat1,mat2,res)
    CLASS(t_gpumat),INTENT(INOUT)       ::mat1
    CLASS(t_mat),INTENT(IN)           ::mat2
    CLASS(t_mat),INTENT(OUT),OPTIONAL ::res

    if (mat1%matsize2.ne.mat2%matsize1) CALL judft_error("Cannot multiply matrices because of non-matching dimensions",hint="This is a BUG in FLEUR, please report")
    CALL judft_error(" multiply method not implemented for t_gpumat")

  end SUBROUTINE t_gpumat_multiply


  SUBROUTINE t_gpumat_transpose(mat1,res)
    CLASS(t_gpumat),INTENT(INOUT)       ::mat1
    TYPE(t_gpumat),INTENT(OUT),OPTIONAL ::res

    INTEGER :: i,j

    IF (PRESENT(res)) THEN
       CALL res%alloc(mat1%l_real,mat1%matsize2,mat1%matsize1)
       IF (mat1%l_real) THEN
          DO i=1,mat1%matsize1
             DO j=1,mat1%matsize2
                res%data_r(i,j)=mat1%data_r(j,i)
             ENDDO
          ENDDO
       ELSE
          DO i=1,mat1%matsize1
             DO j=1,mat1%matsize2
                res%data_c(i,j)=mat1%data_c(j,i)
             ENDDO
          ENDDO
       ENDIF
    ELSE
       SELECT TYPE(mat1)
       TYPE is (t_gpumat)
          IF (mat1%matsize1.NE.mat1%matsize2) CALL judft_error("Cannot transpose matrices inplace because of non-matching dimensions",hint="This is a BUG in FLEUR, please report")
          IF (mat1%l_real) THEN
             !$cuf kernel do <<<*,256>>>
             DO i=1,mat1%matsize1
                DO j=1,mat1%matsize2
                   mat1%data_r(i,j)=mat1%data_r(j,i)
                ENDDO
             ENDDO
          ELSE
             !$cuf kernel do <<<*,256>>>
             DO i=1,mat1%matsize1
                DO j=1,mat1%matsize2
                   mat1%data_c(i,j)=mat1%data_c(j,i)
                ENDDO
             ENDDO
          ENDIF
       END SELECT
166 167

       i=cudaDeviceSynchronize()
168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258
    END IF
  end SUBROUTINE t_gpumat_transpose

  SUBROUTINE t_gpumat_from_packed(mat1,l_real,matsize,packed_r,packed_c)
    CLASS(t_gpumat),INTENT(INOUT)       :: mat1
    INTEGER,INTENT(IN)               :: matsize
    LOGICAL,INTENT(IN)               :: l_real
    REAL,INTENT(IN)                  :: packed_r(:)
    COMPLEX,INTENT(IN)               :: packed_c(:)
    CALL judft_error(" mat_from_packed method not implemented for t_gpumat")
  end SUBROUTINE t_gpumat_from_packed

  function t_gpumat_to_packed(mat)result(packed)
    CLASS(t_gpumat),INTENT(IN)       :: mat
    COMPLEX                       :: packed(mat%matsize1*(mat%matsize1+1)/2)
    integer :: n,nn,i
    real,parameter :: tol=1e-5
    if (mat%matsize1.ne.mat%matsize2) call judft_error("Could not pack no-square matrix",hint='This is a BUG, please report')
    CALL judft_error(" mat_to_packed method not implemented for t_gpumat")
  end function t_gpumat_to_packed

  SUBROUTINE t_gpumat_inverse(mat)
    USE cublas
    implicit none
    CLASS(t_gpumat),INTENT(INOUT)       :: mat
    integer                :: info
    REAL, ALLOCATABLE,device      :: work_r(:)
    INTEGER, ALLOCATABLE,device   :: ipiv(:)
    COMPLEX,ALLOCATABLE,device    :: work_c(:)


    if (mat%matsize1.ne.mat%matsize2) call judft_error("Can only invert square matrices",hint="This is a BUG in FLEUR, please report")
    ALLOCATE(ipiv(mat%matsize1))

    if (mat%l_real) THEN
       ALLOCATE(work_r(mat%matsize1))
       call dgetrf(mat%matsize1,mat%matsize1,mat%data_r,mat%matsize1,ipiv,info)
       if(info.ne.0) call judft_error("Failed to invert matrix: dpotrf failed.")
       call dgetri(mat%matsize1,mat%data_r,mat%matsize1,ipiv,work_r,mat%matsize1,info)
       if(info.ne.0) call judft_error("Failed to invert matrix: dpotrf failed.")
    else
       ALLOCATE(work_c(mat%matsize1))
       call zgetrf(mat%matsize1,mat%matsize1,mat%data_c,mat%matsize1,ipiv,info)
       if(info.ne.0) call judft_error("Failed to invert matrix: dpotrf failed.")
       call zgetri(mat%matsize1,mat%data_c,size(mat%data_c,1),ipiv,work_c,mat%matsize1,info)
       if(info.ne.0) call judft_error("Failed to invert matrix: dpotrf failed.")
    end if
  end subroutine t_gpumat_inverse

  SUBROUTINE t_gpumat_move(mat,mat1)
    IMPLICIT NONE
    CLASS(t_gpumat),INTENT(INOUT):: mat
    CLASS(t_gpumat),INTENT(INOUT):: mat1
    !Special case, the full matrix is copied. Then use move alloc
    CALL mat%copy(mat1,1,1)
  END SUBROUTINE t_gpumat_move

  SUBROUTINE t_gpumat_copy(mat,mat1,n1,n2)
    IMPLICIT NONE
    CLASS(t_gpumat),INTENT(INOUT):: mat
    CLASS(t_mat),INTENT(IN)   :: mat1
    INTEGER,INTENT(IN)        :: n1,n2

    INTEGER:: i1,i2,i,j
    LOGICAL no_gpu

    no_gpu=.FALSE.
    i1=mat%matsize1-n1+1  !space available for first dimension
    i2=mat%matsize2-n1+1
    i1=MIN(i1,mat1%matsize1)
    i2=MIN(i2,mat1%matsize2)

    SELECT TYPE(mat)
    TYPE IS (t_gpumat)
       SELECT TYPE(mat1)
       TYPE IS (t_gpumat)
          IF (mat%l_real) THEN
             !$cuf kernel do <<<*,256>>>
             DO i=1,i1
                DO j=1,i2
                   mat%data_r(n1+i-1,n2+j-1)=mat1%data_r(i,j)
                ENDDO
             ENDDO
          ELSE
             !$cuf kernel do <<<*,256>>>
             DO i=1,i1
                DO j=1,i2
                   mat%data_c(n1+i-1,n2+j-1)=mat1%data_c(i,j)
                ENDDO
             ENDDO
          END IF
259
          i=cudaDeviceSynchronize()
260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303
          CLASS default
          no_gpu=.TRUE.
       END SELECT
       CLASS default
       no_gpu=.TRUE.
    END SELECT
    IF (no_gpu)THEN
       IF (mat%l_real) THEN
          DO i=1,i1
             DO j=1,i2
                mat%data_r(n1+i-1,n2+j-1)=mat1%data_r(i,j)
             ENDDO
          ENDDO
       ELSE
          DO i=1,i1
             DO j=1,i2
                mat%data_c(n1+i-1,n2+j-1)=mat1%data_c(i,j)
             ENDDO
          ENDDO
       END IF
    END IF
  END SUBROUTINE t_gpumat_copy

  SUBROUTINE t_gpumat_clear(mat)
    IMPLICIT NONE
    CLASS(t_gpumat),INTENT(INOUT):: mat
    INTEGER:: n,nn
    SELECT TYPE(mat)
    TYPE IS (t_gpumaT)
       IF (mat%l_real) THEN
          !$cuf kernel do <<<*,256>>>
          DO n=1,mat%matsize1
             DO nn=1,mat%matsize2
                mat%data_r(n,nn)=0.0
             ENDDO
          ENDDO
       ELSE
          !$cuf kernel do <<<*,256>>>
          DO n=1,mat%matsize1
             DO nn=1,mat%matsize2
                mat%data_c(n,nn)=0.0
             ENDDO
          ENDDO
       ENDIF
304
       n=cudaDeviceSynchronize()
305 306
    END SELECT
  END SUBROUTINE t_gpumat_clear
Daniel Wortmann's avatar
Daniel Wortmann committed
307
#endif  
308
END MODULE m_types_gpumat