Commit 3bb1e334 authored by Daniel Wortmann's avatar Daniel Wortmann

Make GPU version of hsmt_nonsph to compile with current PGI compiler on jureca

parent d7e1fea3
...@@ -66,6 +66,9 @@ CONTAINS ...@@ -66,6 +66,9 @@ CONTAINS
#include"cpp_double.h" #include"cpp_double.h"
USE m_hsmt_socinit USE m_hsmt_socinit
USE m_hsmt_nonsph USE m_hsmt_nonsph
#ifdef CPP_GPU
USE m_hsmt_nonsph_gpu
#endif
USE m_hsmt_sph USE m_hsmt_sph
USE m_hsmt_extra USE m_hsmt_extra
USE m_types USE m_types
...@@ -233,10 +236,18 @@ CONTAINS ...@@ -233,10 +236,18 @@ CONTAINS
kveclo,l_real,hamOvlp%a_r,hamOvlp%b_r,hamOvlp%a_c,hamOvlp%b_c) !out/in kveclo,l_real,hamOvlp%a_r,hamOvlp%b_r,hamOvlp%a_c,hamOvlp%b_c) !out/in
CALL timestop("hsmt extra") CALL timestop("hsmt extra")
CALL timestart("hsmt non-spherical") CALL timestart("hsmt non-spherical")
#ifndef CPP_GPU
CALL hsmt_nonsph(DIMENSION,atoms,sym,SUB_COMM,n_size,n_rank,input,isp,nintsp,& CALL hsmt_nonsph(DIMENSION,atoms,sym,SUB_COMM,n_size,n_rank,input,isp,nintsp,&
hlpmsize,noco,l_socfirst,lapw,cell,tlmplm,fj,gj,gk,vk,oneD,l_real,hamOvlp%a_r,hamOvlp%a_c) hlpmsize,noco,l_socfirst,lapw,cell,tlmplm,fj,gj,gk,vk,oneD,l_real,hamOvlp%a_r,hamOvlp%a_c)
CALL timestop("hsmt non-spherical") CALL timestop("hsmt non-spherical")
#else
CALL timestart("hsmt non-spherical-GPU")
CALL hsmt_nonsph_gpu(DIMENSION,atoms,sym,SUB_COMM,n_size,n_rank,input,isp,nintsp,&
hlpmsize,noco,l_socfirst,lapw,cell,tlmplm,fj,gj,gk,vk,oneD,l_real,hamOvlp%a_r,hamOvlp%a_c)
CALL timestop("hsmt non-spherical-GPU")
#endif
ENDIF ENDIF
ENDDO ENDDO
#if 1==2 #if 1==2
......
MODULE m_hsmt_nonsph MODULE m_hsmt_nonsph_GPU
#define CPP_BLOCKSIZE 64 #define CPP_BLOCKSIZE 64
! USE m_juDFT ! USE m_juDFT
!$ USE omp_lib !$ USE omp_lib
...@@ -9,7 +9,7 @@ MODULE m_hsmt_nonsph ...@@ -9,7 +9,7 @@ MODULE m_hsmt_nonsph
IMPLICIT NONE IMPLICIT NONE
CONTAINS CONTAINS
SUBROUTINE hsmt_nonsph(DIMENSION,atoms,sym,SUB_COMM, n_size,n_rank,input,isp,nintsp,& SUBROUTINE hsmt_nonsph_GPU(DIMENSION,atoms,sym,SUB_COMM, n_size,n_rank,input,isp,nintsp,&
hlpmsize,noco,l_socfirst, lapw, cell,tlmplm, fj,gj,gk,vk,oneD,l_real,aa_r,aa_c) hlpmsize,noco,l_socfirst, lapw, cell,tlmplm, fj,gj,gk,vk,oneD,l_real,aa_r,aa_c)
#include"cpp_double.h" #include"cpp_double.h"
...@@ -247,11 +247,12 @@ CONTAINS ...@@ -247,11 +247,12 @@ CONTAINS
call zgemm("N","C",bsize,ki+bsize2-1,lmp+1,one,b(ki,0,iintsp),SIZE(a,1),bx(1,0),SIZE(ax,1),one ,aa_block,SIZE(aa_block,1)) call zgemm("N","C",bsize,ki+bsize2-1,lmp+1,one,b(ki,0,iintsp),SIZE(a,1),bx(1,0),SIZE(ax,1),one ,aa_block,SIZE(aa_block,1))
!$acc end host_data !$acc end host_data
ELSE ELSE
stop "Not implemented"
!$acc host_data use_device( a,b,ax,bx,aa_block ) !$acc host_data use_device( a,b,ax,bx,aa_block )
CALL zgemm("N","C",bsize,ki+bsize2-1,lmp+1,one,a(ki:ki+bsize2-1:n_size,0:lmp,iintsp), & !CALL zgemm("N","C",bsize,ki+bsize2-1,lmp+1,one,a(ki:ki+bsize2-1:n_size,0:lmp,iintsp), &
SIZE(a(ki:ki+bsize2-1:n_size,0:lmp,iintsp),1),ax(1,0),SIZE(ax,1),zero,aa_block,SIZE(aa_block,1)) ! SIZE(a(ki:ki+bsize2-1:n_size,0:lmp,iintsp),1),ax(1,0),SIZE(ax,1),zero,aa_block,SIZE(aa_block,1))
CALL zgemm("N","C",bsize,ki+bsize2-1,lmp+1,one,b(ki:ki+bsize2-1:n_size,0:lmp,iintsp), & !CALL zgemm("N","C",bsize,ki+bsize2-1,lmp+1,one,b(ki:ki+bsize2-1:n_size,0:lmp,iintsp), &
SIZE(a(ki:ki+bsize2-1:n_size,0:lmp,iintsp),1),bx(1,0),SIZE(ax,1),one,aa_block,SIZE(aa_block,1)) ! SIZE(a(ki:ki+bsize2-1:n_size,0:lmp,iintsp),1),bx(1,0),SIZE(ax,1),one,aa_block,SIZE(aa_block,1))
!$acc end host_data !$acc end host_data
ENDIF ENDIF
!$acc kernels !$acc kernels
...@@ -261,9 +262,11 @@ CONTAINS ...@@ -261,9 +262,11 @@ CONTAINS
nc = 1+kii/n_size nc = 1+kii/n_size
ii = nc*(nc-1)/2*n_size-(nc-1)*(n_size-n_rank-1) ii = nc*(nc-1)/2*n_size-(nc-1)*(n_size-n_rank-1)
IF ( (n_size==1).OR.(kii+1<=lapw%nv(1)) ) THEN ! IF ( (n_size==1).OR.(kii+1<=lapw%nv(1)) ) THEN !
aahlp(ii+1:ii+ki) = aahlp(ii+1:ii+ki)+MATMUL(CONJG(ax(:ki,:lmp)),a(ki,:,iintsp))+MATMUL(CONJG(bx(:ki,:lmp)),b(ki,:lmp,iintsp)) stop "not implemented"
!comments below must be reactivated
!aahlp(ii+1:ii+ki) = aahlp(ii+1:ii+ki)+MATMUL(CONJG(ax(:ki,:lmp)),a(ki,:,iintsp))+MATMUL(CONJG(bx(:ki,:lmp)),b(ki,:lmp,iintsp))
ELSE ! components for <2||2> block unused ELSE ! components for <2||2> block unused
aa_tmphlp(:ki) = MATMUL(CONJG(ax(:ki,:lmp)),a(ki,:lmp,iintsp))+MATMUL(CONJG(bx(:ki,:DIMENSION%lmd)),b(ki,:lmp,iintsp)) !aa_tmphlp(:ki) = MATMUL(CONJG(ax(:ki,:lmp)),a(ki,:lmp,iintsp))+MATMUL(CONJG(bx(:ki,:DIMENSION%lmd)),b(ki,:lmp,iintsp))
!---> spin-down spin-down part !---> spin-down spin-down part
ij = ii + lapw%nv(1) ij = ii + lapw%nv(1)
aa_c(ij+1:ij+ki)=aa_c(ij+1:ij+ki)+chi22*aa_tmphlp(:ki) aa_c(ij+1:ij+ki)=aa_c(ij+1:ij+ki)+chi22*aa_tmphlp(:ki)
...@@ -296,8 +299,10 @@ CONTAINS ...@@ -296,8 +299,10 @@ CONTAINS
chihlp = chi21 chihlp = chi21
ii = (lapw%nv(1)+atoms%nlotot+ki-1)*(lapw%nv(1)+atoms%nlotot+ki)/2 ii = (lapw%nv(1)+atoms%nlotot+ki-1)*(lapw%nv(1)+atoms%nlotot+ki)/2
ENDIF ENDIF
aa_c(ii+1:ii+kjmax) = aa_c(ii+1:ii+kjmax) + chihlp*& STOP "NOT implemented"
(MATMUL(CONJG(ax(:kjmax,:lmp)),a(ki,:,iintsp))+MATMUL(CONJG(bx(:kjmax,:lmp)),b(ki,:lmp,iintsp))) !next line must be reactivated
!aa_c(ii+1:ii+kjmax) = aa_c(ii+1:ii+kjmax) + chihlp*&
! (MATMUL(CONJG(ax(:kjmax,:lmp)),a(ki,:,iintsp))+MATMUL(CONJG(bx(:kjmax,:lmp)),b(ki,:lmp,iintsp)))
ki=ki+n_size ki=ki+n_size
kii=kii+n_size kii=kii+n_size
ENDDO ENDDO
...@@ -347,7 +352,7 @@ CONTAINS ...@@ -347,7 +352,7 @@ CONTAINS
RETURN RETURN
END SUBROUTINE hsmt_nonsph END SUBROUTINE hsmt_nonsph_GPU
END MODULE m_hsmt_nonsph END MODULE m_hsmt_nonsph_GPU
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment