Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
fleur
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
51
Issues
51
List
Boards
Labels
Service Desk
Milestones
Operations
Operations
Incidents
Packages & Registries
Packages & Registries
Container Registry
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
fleur
fleur
Commits
29c56395
Commit
29c56395
authored
Oct 12, 2018
by
Uliana Alekseeva
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
hsmt_sph_GPU added
parent
a5c5606e
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
329 additions
and
16 deletions
+329
-16
eigen/hsmt.F90
eigen/hsmt.F90
+3
-3
eigen/hsmt_ab.F90
eigen/hsmt_ab.F90
+5
-7
eigen/hsmt_sph.F90
eigen/hsmt_sph.F90
+321
-6
No files found.
eigen/hsmt.F90
View file @
29c56395
...
...
@@ -71,7 +71,7 @@ CONTAINS
IF
(
.NOT.
noco
%
l_noco
)
THEN
!This is for collinear calculations: the (1,1) element of the matrices is all
!that is needed and allocated
CALL
hsmt_sph
(
n
,
atoms
,
mpi
,
ispin
,
input
,
noco
,
cell
,
1
,
1
,
chi_one
,
lapw
,
enpara
%
el0
,&
CALL
hsmt_sph
(
n
,
atoms
,
mpi
,
ispin
,
input
,
noco
,
1
,
1
,
chi_one
,
lapw
,
enpara
%
el0
,&
td
%
e_shift
(
n
,
ispin
),
usdus
,
fj
(:,
0
:,
ispin
,:),
gj
(:,
0
:,
ispin
,:),
smat
(
1
,
1
),
hmat
(
1
,
1
))
CALL
hsmt_nonsph
(
n
,
mpi
,
sym
,
atoms
,
ispin
,
1
,
1
,
chi_one
,
noco
,
cell
,
lapw
,
td
,&
fj
(:,
0
:,
ispin
,:),
gj
(:,
0
:,
ispin
,:),
hmat
(
1
,
1
))
...
...
@@ -83,7 +83,7 @@ CONTAINS
!stored in tmp-variables. Then these are distributed (rotated) into the 2x2
!global spin-matrices.
CALL
hmat_tmp
%
clear
();
CALL
smat_tmp
%
clear
()
CALL
hsmt_sph
(
n
,
atoms
,
mpi
,
ispin
,
input
,
noco
,
cell
,
1
,
1
,
chi_one
,
lapw
,
enpara
%
el0
,
td
%
e_shift
(
n
,
ispin
),&
CALL
hsmt_sph
(
n
,
atoms
,
mpi
,
ispin
,
input
,
noco
,
1
,
1
,
chi_one
,
lapw
,
enpara
%
el0
,
td
%
e_shift
(
n
,
ispin
),&
usdus
,
fj
(:,
0
:,
ispin
,:),
gj
(:,
0
:,
ispin
,:),
smat_tmp
,
hmat_tmp
)
CALL
hsmt_nonsph
(
n
,
mpi
,
sym
,
atoms
,
ispin
,
1
,
1
,
chi_one
,
noco
,
cell
,
lapw
,
td
,&
fj
(:,
0
:,
ispin
,:),
gj
(:,
0
:,
ispin
,:),
hmat_tmp
)
...
...
@@ -103,7 +103,7 @@ CONTAINS
CALL
hsmt_spinor
(
ispin
,
n
,
noco
,
chi
)
DO
iintsp
=
1
,
2
DO
jintsp
=
1
,
2
CALL
hsmt_sph
(
n
,
atoms
,
mpi
,
ispin
,
input
,
noco
,
cell
,
iintsp
,
jintsp
,
chi
(
iintsp
,
jintsp
),&
CALL
hsmt_sph
(
n
,
atoms
,
mpi
,
ispin
,
input
,
noco
,
iintsp
,
jintsp
,
chi
(
iintsp
,
jintsp
),&
lapw
,
enpara
%
el0
,
td
%
e_shift
(
n
,
ispin
),
usdus
,
fj
(:,
0
:,
ispin
,:),
gj
(:,
0
:,
ispin
,:),&
smat
(
iintsp
,
jintsp
),
hmat
(
iintsp
,
jintsp
))
CALL
hsmt_nonsph
(
n
,
mpi
,
sym
,
atoms
,
ispin
,
iintsp
,
jintsp
,
chi
(
iintsp
,
jintsp
),
noco
,
cell
,&
...
...
eigen/hsmt_ab.F90
View file @
29c56395
...
...
@@ -62,7 +62,7 @@ CONTAINS
USE
m_ylm
USE
m_apws
USE
cudafor
!
USE nvtx
USE
nvtx
IMPLICIT
NONE
TYPE
(
t_sym
),
INTENT
(
IN
)
::
sym
TYPE
(
t_cell
),
INTENT
(
IN
)
::
cell
...
...
@@ -92,8 +92,9 @@ CONTAINS
COMPLEX
,
ALLOCATABLE
,
DEVICE
::
c_ph_dev
(:,:)
REAL
,
ALLOCATABLE
,
DEVICE
::
gkrot_dev
(:,:)
INTEGER
::
grid
,
block
!
INTEGER :: istat
INTEGER
::
istat
call
nvtxStartRange
(
"hsmt_ab"
,
3
)
lmax
=
MERGE
(
atoms
%
lnonsph
(
n
),
atoms
%
lmax
(
n
),
l_nonsph
)
ALLOCATE
(
c_ph_dev
(
lapw
%
nv
(
1
),
MERGE
(
2
,
1
,
noco
%
l_ss
)))
...
...
@@ -129,17 +130,12 @@ CONTAINS
!--> synthesize the complex conjugates of a and b
!call nvtxStartRange("hsmt_synthAB",5)
!istat = cudaDeviceSynchronize()
! pretty ugly solution
block
=
256
grid
=
lapw
%
nv
(
1
)/(
block
*
4
)
+
1
CALL
synth_ab
<<<
grid
,
block
>>>
(
grid
,
block
,
lapw
%
nv
(
1
),
lmax
,
ab_size
,
gkrot_dev
,&
fj
(:,:,
iintsp
),
gj
(:,:,
iintsp
),
c_ph_dev
(:,
iintsp
),
ab
)
!istat = cudaDeviceSynchronize()
!call nvtxEndRange
IF
(
PRESENT
(
abclo
))
THEN
print
*
,
"Ooooops, TODO in hsmt_ab"
...
...
@@ -169,6 +165,8 @@ CONTAINS
DEALLOCATE
(
c_ph_dev
)
DEALLOCATE
(
gkrot_dev
)
istat
=
cudaDeviceSynchronize
()
call
nvtxEndRange
END
SUBROUTINE
hsmt_ab_gpu
#endif
...
...
eigen/hsmt_sph.F90
View file @
29c56395
...
...
@@ -7,15 +7,331 @@
MODULE
m_hsmt_sph
USE
m_juDFT
IMPLICIT
NONE
INTERFACE
hsmt_sph
module
procedure
hsmt_sph_cpu
#ifdef CPP_GPU
module
procedure
hsmt_sph_gpu
END
INTERFACE
INTERFACE
HsmtSphGpuKernel_noApw
module
procedure
HsmtSphGpuKernel_noApw_cmplx
,
HsmtSphGpuKernel_noApw_real
#endif
END
INTERFACE
CONTAINS
SUBROUTINE
hsmt_sph
(
n
,
atoms
,
mpi
,
isp
,
input
,
noco
,
cell
,
iintsp
,
jintsp
,
chi
,
lapw
,
el
,
e_shift
,
usdus
,
fj
,
gj
,
smat
,
hmat
)
#ifdef CPP_GPU
SUBROUTINE
HsmtSphGpuKernel_noApw_cmplx
(
iintsp
,
jintsp
,
nv
,
lmaxd
,
lmax
,
ki_start
,
ki_end
,
ki_step
,
nn_start
,
nn_end
,&
lnonsph
,
chi
,
qssbti
,
qssbtj
,
gvec
,
gk
,
fleg1
,
fleg2
,
fl2p1
,
fl2p1bt
,
fj
,
gj
,
taual
,
ddn
,
el
,
e_shift
,
smat_data
,
hmat_data
)
INTEGER
,
INTENT
(
IN
)
::
iintsp
,
jintsp
,
nv
(
2
),
lmaxd
,
lmax
,
ki_start
,
ki_end
,
ki_step
,
nn_start
,
nn_end
,
lnonsph
COMPLEX
,
INTENT
(
IN
)
::
chi
REAL
,
INTENT
(
IN
)
::
qssbti
(
3
),
qssbtj
(
3
)
INTEGER
,
INTENT
(
IN
)
::
gvec
(:,:,:)
REAL
,
INTENT
(
IN
)
::
gk
(:,:,:)
REAL
,
INTENT
(
IN
)
::
fleg1
(
0
:
lmaxd
),
fleg2
(
0
:
lmaxd
),
fl2p1
(
0
:
lmaxd
)
REAL
,
INTENT
(
IN
)
::
fl2p1bt
(
0
:
lmaxd
)
REAL
,
MANAGED
,
INTENT
(
IN
)
::
fj
(:,
0
:,:),
gj
(:,
0
:,:)
REAL
,
INTENT
(
IN
)
::
taual
(:,:)
REAL
,
INTENT
(
IN
)
::
ddn
(
0
:
lmaxd
)
REAL
,
INTENT
(
IN
)
::
el
(
0
:
lmaxd
)
REAL
,
INTENT
(
IN
)
::
e_shift
COMPLEX
,
INTENT
(
INOUT
)
::
smat_data
(:,:),
hmat_data
(:,:)
REAL
,
PARAMETER
::
tpi_const
=
2.
*
3.1415926535897932
REAL
,
ALLOCATABLE
::
plegend
(:,:)
COMPLEX
,
ALLOCATABLE
::
cph
(:)
REAL
tnn
(
3
),
elall
,
fct
,
fjkiln
,
gjkiln
,
ddnln
,
ski
(
3
)
INTEGER
kii
,
ki
,
kj
,
l
,
nn
ALLOCATE
(
cph
(
MAXVAL
(
nv
)))
ALLOCATE
(
plegend
(
MAXVAL
(
nv
),
0
:
lmaxd
))
plegend
=
0.0
plegend
(:,
0
)
=
1.0
DO
ki
=
ki_start
,
ki_end
,
ki_step
kii
=
(
ki
-1
)/
ki_step
+1
ski
=
gvec
(:,
ki
,
jintsp
)
+
qssbti
!---> legendre polynomials
DO
kj
=
1
,
ki
plegend
(
kj
,
1
)
=
DOT_PRODUCT
(
gk
(:,
kj
,
iintsp
),
gk
(:,
ki
,
jintsp
))
END
DO
DO
l
=
1
,
lmax
-
1
plegend
(:
ki
,
l
+1
)
=
fleg1
(
l
)
*
plegend
(:
ki
,
1
)
*
plegend
(:
ki
,
l
)
-
fleg2
(
l
)
*
plegend
(:
ki
,
l
-1
)
END
DO
!---> set up phase factors
cph
=
0.0
DO
nn
=
nn_start
,
nn_end
tnn
=
tpi_const
*
taual
(:,
nn
)
DO
kj
=
1
,
ki
cph
(
kj
)
=
cph
(
kj
)
+
&
CMPLX
(
COS
(
DOT_PRODUCT
(
ski
-
gvec
(:,
kj
,
iintsp
)
-
qssbtj
,
tnn
)),&
SIN
(
DOT_PRODUCT
(
gvec
(:,
kj
,
iintsp
)
+
qssbtj
-
ski
,
tnn
)))
! IF (iintsp.NE.jintsp) cph(kj)=CONJG(cph(kj))
END
DO
END
DO
!---> update overlap and l-diagonal hamiltonian matrix
DO
l
=
0
,
lmax
fjkiln
=
fj
(
ki
,
l
,
jintsp
)
gjkiln
=
gj
(
ki
,
l
,
jintsp
)
ddnln
=
ddn
(
l
)
elall
=
el
(
l
)
IF
(
l
<=
lnonsph
)
elall
=
elall
-
e_shift
!(isp)
DO
kj
=
1
,
MIN
(
ki
,
nv
(
iintsp
))
fct
=
chi
*
plegend
(
kj
,
l
)
*
fl2p1
(
l
)
*
&
(
fjkiln
*
fj
(
kj
,
l
,
iintsp
)
+
gjkiln
*
gj
(
kj
,
l
,
iintsp
)
*
ddnln
)
smat_data
(
kj
,
kii
)
=
smat_data
(
kj
,
kii
)
+
cph
(
kj
)
*
fct
hmat_data
(
kj
,
kii
)
=
hmat_data
(
kj
,
kii
)
+
cph
(
kj
)
*
(
fct
*
elall
&
+
chi
*
plegend
(
kj
,
l
)
*
fl2p1bt
(
l
)
*
(
fjkiln
*
gj
(
kj
,
l
,
iintsp
)
+
gjkiln
*
fj
(
kj
,
l
,
iintsp
)
)
)
END
DO
!---> end loop over l
ENDDO
!---> end loop over ki
ENDDO
DEALLOCATE
(
plegend
)
DEALLOCATE
(
cph
)
END
SUBROUTINE
HsmtSphGpuKernel_noApw_cmplx
SUBROUTINE
HsmtSphGpuKernel_noApw_real
(
iintsp
,
jintsp
,
nv
,
lmaxd
,
lmax
,
ki_start
,
ki_end
,
ki_step
,
nn_start
,
nn_end
,&
lnonsph
,
chi
,
qssbti
,
qssbtj
,
gvec
,
gk
,
fleg1
,
fleg2
,
fl2p1
,
fl2p1bt
,
fj
,
gj
,
taual
,
ddn
,
el
,
e_shift
,
smat_data
,
hmat_data
)
INTEGER
,
INTENT
(
IN
)
::
iintsp
,
jintsp
,
nv
(
2
),
lmaxd
,
lmax
,
ki_start
,
ki_end
,
ki_step
,
nn_start
,
nn_end
,
lnonsph
COMPLEX
,
INTENT
(
IN
)
::
chi
REAL
,
INTENT
(
IN
)
::
qssbti
(
3
),
qssbtj
(
3
)
INTEGER
,
INTENT
(
IN
)
::
gvec
(:,:,:)
REAL
,
INTENT
(
IN
)
::
gk
(:,:,:)
REAL
,
INTENT
(
IN
)
::
fleg1
(
0
:
lmaxd
),
fleg2
(
0
:
lmaxd
),
fl2p1
(
0
:
lmaxd
)
REAL
,
INTENT
(
IN
)
::
fl2p1bt
(
0
:
lmaxd
)
REAL
,
MANAGED
,
INTENT
(
IN
)
::
fj
(:,
0
:,:),
gj
(:,
0
:,:)
REAL
,
INTENT
(
IN
)
::
taual
(:,:)
REAL
,
INTENT
(
IN
)
::
ddn
(
0
:
lmaxd
)
REAL
,
INTENT
(
IN
)
::
el
(
0
:
lmaxd
)
REAL
,
INTENT
(
IN
)
::
e_shift
REAL
,
INTENT
(
INOUT
)
::
smat_data
(:,:),
hmat_data
(:,:)
REAL
,
PARAMETER
::
tpi_const
=
2.
*
3.1415926535897932
REAL
,
ALLOCATABLE
::
plegend
(:,:)
COMPLEX
,
ALLOCATABLE
::
cph
(:)
REAL
tnn
(
3
),
elall
,
fct
,
fjkiln
,
gjkiln
,
ddnln
,
ski
(
3
)
INTEGER
kii
,
ki
,
kj
,
l
,
nn
ALLOCATE
(
cph
(
MAXVAL
(
nv
)))
ALLOCATE
(
plegend
(
MAXVAL
(
nv
),
0
:
lmaxd
))
plegend
=
0.0
plegend
(:,
0
)
=
1.0
DO
ki
=
ki_start
,
ki_end
,
ki_step
kii
=
(
ki
-1
)/
ki_step
+1
ski
=
gvec
(:,
ki
,
jintsp
)
+
qssbti
!---> legendre polynomials
DO
kj
=
1
,
ki
plegend
(
kj
,
1
)
=
DOT_PRODUCT
(
gk
(:,
kj
,
iintsp
),
gk
(:,
ki
,
jintsp
))
END
DO
DO
l
=
1
,
lmax
-
1
plegend
(:
ki
,
l
+1
)
=
fleg1
(
l
)
*
plegend
(:
ki
,
1
)
*
plegend
(:
ki
,
l
)
-
fleg2
(
l
)
*
plegend
(:
ki
,
l
-1
)
END
DO
!---> set up phase factors
cph
=
0.0
DO
nn
=
nn_start
,
nn_end
tnn
=
tpi_const
*
taual
(:,
nn
)
DO
kj
=
1
,
ki
cph
(
kj
)
=
cph
(
kj
)
+
&
CMPLX
(
COS
(
DOT_PRODUCT
(
ski
-
gvec
(:,
kj
,
iintsp
)
-
qssbtj
,
tnn
)),&
SIN
(
DOT_PRODUCT
(
gvec
(:,
kj
,
iintsp
)
+
qssbtj
-
ski
,
tnn
)))
! IF (iintsp.NE.jintsp) cph(kj)=CONJG(cph(kj))
END
DO
END
DO
!---> update overlap and l-diagonal hamiltonian matrix
DO
l
=
0
,
lmax
fjkiln
=
fj
(
ki
,
l
,
jintsp
)
gjkiln
=
gj
(
ki
,
l
,
jintsp
)
ddnln
=
ddn
(
l
)
elall
=
el
(
l
)
IF
(
l
<=
lnonsph
)
elall
=
elall
-
e_shift
!(isp)
DO
kj
=
1
,
ki
fct
=
plegend
(
kj
,
l
)
*
fl2p1
(
l
)
*
&
(
fjkiln
*
fj
(
kj
,
l
,
iintsp
)
+
gjkiln
*
gj
(
kj
,
l
,
iintsp
)
*
ddnln
)
smat_data
(
kj
,
kii
)
=
smat_data
(
kj
,
kii
)
+
REAL
(
cph
(
kj
))
*
fct
hmat_data
(
kj
,
kii
)
=
hmat_data
(
kj
,
kii
)
+
REAL
(
cph
(
kj
))
*
&
(
fct
*
elall
+
plegend
(
kj
,
l
)
*
fl2p1bt
(
l
)
*
&
(
fjkiln
*
gj
(
kj
,
l
,
iintsp
)
+
gjkiln
*
fj
(
kj
,
l
,
iintsp
)
)
)
ENDDO
!---> end loop over l
ENDDO
!---> end loop over ki
ENDDO
DEALLOCATE
(
plegend
)
DEALLOCATE
(
cph
)
END
SUBROUTINE
HsmtSphGpuKernel_noApw_real
SUBROUTINE
hsmt_sph_gpu
(
n
,
atoms
,
mpi
,
isp
,
input
,
noco
,
iintsp
,
jintsp
,
chi
,
lapw
,
el
,
e_shift
,
usdus
,
fj
,
gj
,
smat
,
hmat
)
USE
m_constants
,
ONLY
:
fpi_const
,
tpi_const
USE
m_types
USE
nvtx
IMPLICIT
NONE
TYPE
(
t_input
),
INTENT
(
IN
)
::
input
TYPE
(
t_mpi
),
INTENT
(
IN
)
::
mpi
TYPE
(
t_noco
),
INTENT
(
IN
)
::
noco
TYPE
(
t_atoms
),
INTENT
(
IN
)
::
atoms
TYPE
(
t_lapw
),
INTENT
(
IN
)
::
lapw
TYPE
(
t_usdus
),
INTENT
(
IN
)
::
usdus
CLASS
(
t_mat
),
INTENT
(
INOUT
)
::
smat
,
hmat
! ..
! .. Scalar Arguments ..
INTEGER
,
INTENT
(
IN
)
::
n
,
isp
,
iintsp
,
jintsp
COMPLEX
,
INTENT
(
IN
)
::
chi
! ..
! .. Array Arguments ..
REAL
,
INTENT
(
IN
)
::
el
(
0
:
atoms
%
lmaxd
,
atoms
%
ntype
,
input
%
jspins
)
REAL
,
INTENT
(
IN
)
::
e_shift
!(atoms%ntype,input%jspins)
#if defined CPP_GPU
REAL
,
MANAGED
,
INTENT
(
IN
)
::
fj
(:,
0
:,:),
gj
(:,
0
:,:)
#else
REAL
,
INTENT
(
IN
)
::
fj
(:,
0
:,:),
gj
(:,
0
:,:)
#endif
! ..
! .. Local Scalars ..
REAL
tnn
(
3
),
elall
,
fct
,
fjkiln
,
gjkiln
,
ddnln
,
ski
(
3
)
REAL
apw_lo1
,
apw_lo2
,
apw1
,
w1
COMPLEX
capw1
INTEGER
kii
,
ki
,
kj
,
l
,
nn
! ..
! .. Local Arrays ..
REAL
fleg1
(
0
:
atoms
%
lmaxd
),
fleg2
(
0
:
atoms
%
lmaxd
),
fl2p1
(
0
:
atoms
%
lmaxd
)
REAL
fl2p1bt
(
0
:
atoms
%
lmaxd
)
REAL
qssbti
(
3
),
qssbtj
(
3
)
REAL
,
ALLOCATABLE
::
plegend
(:,:)
COMPLEX
,
ALLOCATABLE
::
cph
(:)
LOGICAL
apw
(
0
:
atoms
%
lmaxd
)
call
nvtxStartRange
(
"hsmt_sph"
,
2
)
CALL
timestart
(
"spherical setup"
)
DO
l
=
0
,
atoms
%
lmaxd
fleg1
(
l
)
=
REAL
(
l
+
l
+1
)/
REAL
(
l
+1
)
fleg2
(
l
)
=
REAL
(
l
)/
REAL
(
l
+1
)
fl2p1
(
l
)
=
REAL
(
l
+
l
+1
)/
fpi_const
fl2p1bt
(
l
)
=
fl2p1
(
l
)
*
0.5
END
DO
!!$OMP PARALLEL DEFAULT(SHARED)&
!!$OMP PRIVATE(kii,ki,ski,kj,plegend,l)&
!!$OMP PRIVATE(cph,nn,tnn,fjkiln,gjkiln)&
!!$OMP PRIVATE(w1,apw_lo1,apw_lo2,ddnln,elall,fct,apw1)&
!!$OMP PRIVATE(capw1)
qssbti
=
MERGE
(
-
noco
%
qss
/
2
,
+
noco
%
qss
/
2
,
jintsp
.EQ.
1
)
qssbtj
=
MERGE
(
-
noco
%
qss
/
2
,
+
noco
%
qss
/
2
,
iintsp
.EQ.
1
)
!!$OMP DO SCHEDULE(DYNAMIC,1)
IF
(
input
%
l_useapw
)
THEN
ALLOCATE
(
cph
(
MAXVAL
(
lapw
%
nv
)))
ALLOCATE
(
plegend
(
MAXVAL
(
lapw
%
nv
),
0
:
atoms
%
lmaxd
))
plegend
=
0.0
plegend
(:,
0
)
=
1.0
DO
ki
=
mpi
%
n_rank
+1
,
lapw
%
nv
(
jintsp
),
mpi
%
n_size
kii
=
(
ki
-1
)/
mpi
%
n_size
+1
ski
=
lapw
%
gvec
(:,
ki
,
jintsp
)
+
qssbti
!---> legendre polynomials
DO
kj
=
1
,
ki
plegend
(
kj
,
1
)
=
DOT_PRODUCT
(
lapw
%
gk
(:,
kj
,
iintsp
),
lapw
%
gk
(:,
ki
,
jintsp
))
END
DO
DO
l
=
1
,
atoms
%
lmax
(
n
)
-
1
plegend
(:
ki
,
l
+1
)
=
fleg1
(
l
)
*
plegend
(:
ki
,
1
)
*
plegend
(:
ki
,
l
)
-
fleg2
(
l
)
*
plegend
(:
ki
,
l
-1
)
END
DO
!---> set up phase factors
cph
=
0.0
DO
nn
=
SUM
(
atoms
%
neq
(:
n
-1
))
+1
,
SUM
(
atoms
%
neq
(:
n
))
tnn
=
tpi_const
*
atoms
%
taual
(:,
nn
)
DO
kj
=
1
,
ki
cph
(
kj
)
=
cph
(
kj
)
+
&
CMPLX
(
COS
(
DOT_PRODUCT
(
ski
-
lapw
%
gvec
(:,
kj
,
iintsp
)
-
qssbtj
,
tnn
)),&
SIN
(
DOT_PRODUCT
(
lapw
%
gvec
(:,
kj
,
iintsp
)
+
qssbtj
-
ski
,
tnn
)))
! IF (iintsp.NE.jintsp) cph(kj)=CONJG(cph(kj))
END
DO
END
DO
!---> update overlap and l-diagonal hamiltonian matrix
DO
l
=
0
,
atoms
%
lmax
(
n
)
fjkiln
=
fj
(
ki
,
l
,
jintsp
)
gjkiln
=
gj
(
ki
,
l
,
jintsp
)
!+APW
w1
=
0.5
*
(
usdus
%
uds
(
l
,
n
,
isp
)
*
usdus
%
dus
(
l
,
n
,
isp
)
+
&
usdus
%
us
(
l
,
n
,
isp
)
*
usdus
%
duds
(
l
,
n
,
isp
)
)
apw_lo1
=
fl2p1
(
l
)
*
0.5
*
atoms
%
rmt
(
n
)
**
2
*
(
gjkiln
*
w1
+
&
fjkiln
*
usdus
%
us
(
l
,
n
,
isp
)
*
usdus
%
dus
(
l
,
n
,
isp
)
)
apw_lo2
=
fl2p1
(
l
)
*
0.5
*
atoms
%
rmt
(
n
)
**
2
*
(
fjkiln
*
w1
+
&
gjkiln
*
usdus
%
uds
(
l
,
n
,
isp
)
*
usdus
%
duds
(
l
,
n
,
isp
)
)
!-APW
ddnln
=
usdus
%
ddn
(
l
,
n
,
isp
)
elall
=
el
(
l
,
n
,
isp
)
IF
(
l
<=
atoms
%
lnonsph
(
n
))
elall
=
elall
-
e_shift
!(isp)
IF
(
smat
%
l_real
)
THEN
DO
kj
=
1
,
ki
fct
=
plegend
(
kj
,
l
)
*
fl2p1
(
l
)
*
&
(
fjkiln
*
fj
(
kj
,
l
,
iintsp
)
+
gjkiln
*
gj
(
kj
,
l
,
iintsp
)
*
ddnln
)
smat
%
data_r
(
kj
,
kii
)
=
smat
%
data_r
(
kj
,
kii
)
+
REAL
(
cph
(
kj
))
*
fct
hmat
%
data_r
(
kj
,
kii
)
=
hmat
%
data_r
(
kj
,
kii
)
+
REAL
(
cph
(
kj
))
*
&
(
fct
*
elall
+
plegend
(
kj
,
l
)
*
fl2p1bt
(
l
)
*
&
(
fjkiln
*
gj
(
kj
,
l
,
iintsp
)
+
gjkiln
*
fj
(
kj
,
l
,
iintsp
)
)
)
!+APW
apw1
=
REAL
(
cph
(
kj
))
*
plegend
(
kj
,
l
)
*
&
(
apw_lo1
*
fj
(
kj
,
l
,
iintsp
)
+
apw_lo2
*
gj
(
kj
,
l
,
iintsp
)
)
hmat
%
data_r
(
kj
,
kii
)
=
hmat
%
data_r
(
kj
,
kii
)
+
apw1
!-APW
ENDDO
ELSE
DO
kj
=
1
,
MIN
(
ki
,
lapw
%
nv
(
iintsp
))
fct
=
chi
*
plegend
(
kj
,
l
)
*
fl2p1
(
l
)
*
&
(
fjkiln
*
fj
(
kj
,
l
,
iintsp
)
+
gjkiln
*
gj
(
kj
,
l
,
iintsp
)
*
ddnln
)
smat
%
data_c
(
kj
,
kii
)
=
smat
%
data_c
(
kj
,
kii
)
+
cph
(
kj
)
*
fct
hmat
%
data_c
(
kj
,
kii
)
=
hmat
%
data_c
(
kj
,
kii
)
+
cph
(
kj
)
*
(
fct
*
elall
&
+
chi
*
plegend
(
kj
,
l
)
*
fl2p1bt
(
l
)
*
(
fjkiln
*
gj
(
kj
,
l
,
iintsp
)
+
gjkiln
*
fj
(
kj
,
l
,
iintsp
)
)
)
!+APW
capw1
=
cph
(
kj
)
*
plegend
(
kj
,
l
)&
*
(
apw_lo1
*
fj
(
kj
,
l
,
iintsp
)
+
apw_lo2
*
gj
(
kj
,
l
,
iintsp
)
)
hmat
%
data_c
(
kj
,
kii
)
=
hmat
%
data_c
(
kj
,
kii
)
+
capw1
!-APW
END
DO
ENDIF
!---> end loop over l
ENDDO
!---> end loop over ki
ENDDO
!---> end loop over atom types (ntype)
DEALLOCATE
(
plegend
)
DEALLOCATE
(
cph
)
ELSE
IF
(
smat
%
l_real
)
THEN
CALL
HsmtSphGpuKernel_noApw
(
iintsp
,
jintsp
,
lapw
%
nv
,
atoms
%
lmaxd
,
atoms
%
lmax
(
n
),
mpi
%
n_rank
+1
,
lapw
%
nv
(
jintsp
),
mpi
%
n_size
,&
SUM
(
atoms
%
neq
(:
n
-1
))
+1
,
SUM
(
atoms
%
neq
(:
n
)),
atoms
%
lnonsph
(
n
),
chi
,
qssbti
,
qssbtj
,
lapw
%
gvec
,&
lapw
%
gk
,
fleg1
,
fleg2
,
fl2p1
,
fl2p1bt
,
fj
,
gj
,
atoms
%
taual
,
usdus
%
ddn
(:,
n
,
isp
),
el
(:,
n
,
isp
),
e_shift
,
smat
%
data_r
,
hmat
%
data_r
)
ELSE
CALL
HsmtSphGpuKernel_noApw
(
iintsp
,
jintsp
,
lapw
%
nv
,
atoms
%
lmaxd
,
atoms
%
lmax
(
n
),
mpi
%
n_rank
+1
,
lapw
%
nv
(
jintsp
),
mpi
%
n_size
,&
SUM
(
atoms
%
neq
(:
n
-1
))
+1
,
SUM
(
atoms
%
neq
(:
n
)),
atoms
%
lnonsph
(
n
),
chi
,
qssbti
,
qssbtj
,
lapw
%
gvec
,&
lapw
%
gk
,
fleg1
,
fleg2
,
fl2p1
,
fl2p1bt
,
fj
,
gj
,
atoms
%
taual
,
usdus
%
ddn
(:,
n
,
isp
),
el
(:,
n
,
isp
),
e_shift
,
smat
%
data_c
,
hmat
%
data_c
)
ENDIF
ENDIF
!!$OMP END DO
!!$OMP END PARALLEL
CALL
timestop
(
"spherical setup"
)
call
nvtxEndRange
RETURN
END
SUBROUTINE
hsmt_sph_gpu
#endif
SUBROUTINE
hsmt_sph_cpu
(
n
,
atoms
,
mpi
,
isp
,
input
,
noco
,
iintsp
,
jintsp
,
chi
,
lapw
,
el
,
e_shift
,
usdus
,
fj
,
gj
,
smat
,
hmat
)
USE
m_constants
,
ONLY
:
fpi_const
,
tpi_const
USE
m_types
IMPLICIT
NONE
TYPE
(
t_input
),
INTENT
(
IN
)
::
input
TYPE
(
t_mpi
),
INTENT
(
IN
)
::
mpi
TYPE
(
t_noco
),
INTENT
(
IN
)
::
noco
TYPE
(
t_cell
),
INTENT
(
IN
)
::
cell
TYPE
(
t_atoms
),
INTENT
(
IN
)
::
atoms
TYPE
(
t_lapw
),
INTENT
(
IN
)
::
lapw
TYPE
(
t_usdus
),
INTENT
(
IN
)
::
usdus
...
...
@@ -138,17 +454,16 @@ CONTAINS
END
DO
ENDIF
!---> end loop over l
!---> end loop over l
ENDDO
!---> end loop over ki
!---> end loop over ki
ENDDO
!$OMP END DO
!---> end loop over atom types (ntype)
DEALLOCATE
(
plegend
)
DEALLOCATE
(
cph
)
!$OMP END PARALLEL
CALL
timestop
(
"spherical setup"
)
RETURN
END
SUBROUTINE
hsmt_sph
END
SUBROUTINE
hsmt_sph
_cpu
END
MODULE
m_hsmt_sph
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment