Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
fleur
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
54
Issues
54
List
Boards
Labels
Service Desk
Milestones
Operations
Operations
Incidents
Packages & Registries
Packages & Registries
Container Registry
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
fleur
fleur
Commits
289cc8b7
Commit
289cc8b7
authored
Oct 30, 2018
by
Uliana Alekseeva
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Hsmt_sph ported onto GPU
parent
9abd67f2
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
109 additions
and
197 deletions
+109
-197
eigen/hsmt_sph.F90
eigen/hsmt_sph.F90
+83
-196
types/types_lapw.F90
types/types_lapw.F90
+10
-1
types/types_setup.F90
types/types_setup.F90
+8
-0
types/types_usdus.F90
types/types_usdus.F90
+8
-0
No files found.
eigen/hsmt_sph.F90
View file @
289cc8b7
...
...
@@ -12,35 +12,28 @@ MODULE m_hsmt_sph
module
procedure
hsmt_sph_cpu
#ifdef CPP_GPU
module
procedure
hsmt_sph_gpu
END
INTERFACE
INTERFACE
HsmtSphGpuKernel_noApw
module
procedure
HsmtSphGpuKernel_noApw_cmplx
,
HsmtSphGpuKernel_noApw_real
END
INTERFACE
INTERFACE
HsmtSphGpuKernel_Apw
module
procedure
HsmtSphGpuKernel_Apw_cmplx
,
HsmtSphGpuKernel_Apw_real
#endif
END
INTERFACE
CONTAINS
#ifdef CPP_GPU
SUBROUTINE
HsmtSphGpuKernel_Apw_real
(
iintsp
,
jintsp
,
nv
,
lmaxd
,
lmax
,
ki_start
,
ki_end
,
ki_step
,
nn_start
,
nn_end
,&
lnonsph
,
chi
,
qssbti
,
qssbtj
,
gvec
,
gk
,
fleg1
,
fleg2
,
fl2p1
,
fl2p1bt
,
fj
,
gj
,
taual
,
ddn
,
el
,
e_shift
,&
ATTRIBUTES
(
global
)
SUBROUTINE
HsmtSphGpuKernel_real
(
grid
,
block
,
iintsp
,
jintsp
,
nv
,
lmaxd
,
lmax
,
ki_start
,
ki_end
,
ki_step
,
nn_start
,
nn_end
,&
lnonsph
,
qssbti
,
qssbtj
,
gvec
,
gk
,
fleg1
,
fleg2
,
fl2p1
,
fl2p1bt
,
fj
,
gj
,
taual
,
ddn
,
el
,
e_shift
,&
smat_data
,
hmat_data
,&
uds
,
dus
,
us
,
duds
,
rmt
)
INTEGER
,
INTENT
(
IN
)
::
iintsp
,
jintsp
,
nv
(
2
),
lmaxd
,
lmax
,
ki_start
,
ki_end
,
ki_step
,
nn_start
,
nn_end
,
lnonsph
COMPLEX
,
INTENT
(
IN
)
::
chi
INTEGER
,
VALUE
,
INTENT
(
IN
)
::
grid
,
block
INTEGER
,
VALUE
,
INTENT
(
IN
)
::
iintsp
,
jintsp
,
lmaxd
,
lmax
,
ki_start
,
ki_end
,
ki_step
,
nn_start
,
nn_end
,
lnonsph
REAL
,
INTENT
(
IN
)
::
qssbti
(
3
),
qssbtj
(
3
)
INTEGER
,
INTENT
(
IN
)
::
gvec
(:,:,:)
INTEGER
,
INTENT
(
IN
)
::
gvec
(:,:,:)
,
nv
(
2
)
REAL
,
INTENT
(
IN
)
::
gk
(:,:,:)
REAL
,
INTENT
(
IN
)
::
fleg1
(
0
:
lmaxd
),
fleg2
(
0
:
lmaxd
),
fl2p1
(
0
:
lmaxd
)
REAL
,
INTENT
(
IN
)
::
fl2p1bt
(
0
:
lmaxd
)
REAL
,
MANAGED
,
INTENT
(
IN
)
::
fj
(:,
0
:,:),
gj
(:,
0
:,:)
REAL
,
INTENT
(
IN
)
::
fj
(:,
0
:,:),
gj
(:,
0
:,:)
REAL
,
INTENT
(
IN
)
::
taual
(:,:)
REAL
,
INTENT
(
IN
)
::
ddn
(
0
:
lmaxd
)
REAL
,
INTENT
(
IN
)
::
el
(
0
:
lmaxd
)
REAL
,
INTENT
(
IN
)
::
e_shift
REAL
,
VALUE
,
INTENT
(
IN
)
::
e_shift
REAL
,
INTENT
(
INOUT
)
::
smat_data
(:,:),
hmat_data
(:,:)
!+APW
REAL
,
INTENT
(
IN
),
OPTIONAL
::
uds
(
0
:
lmaxd
),
dus
(
0
:
lmaxd
),
us
(
0
:
lmaxd
),
duds
(
0
:
lmaxd
)
...
...
@@ -50,16 +43,28 @@ CONTAINS
REAL
,
PARAMETER
::
tpi_const
=
2.
*
3.1415926535897932
REAL
,
ALLOCATABLE
::
plegend
(:,:)
COMPLEX
,
ALLOCATABLE
::
cph
(:)
REAL
tnn
(
3
),
elall
,
fct
,
fjkiln
,
gjkiln
,
ddnln
,
ski
(
3
)
REAL
tnn
(
3
),
elall
,
fct
,
f
ct2
,
f
jkiln
,
gjkiln
,
ddnln
,
ski
(
3
)
REAL
apw_lo1
,
apw_lo2
,
apw1
,
w1
INTEGER
kii
,
ki
,
kj
,
l
,
nn
INTEGER
kii
,
ki
,
kj
,
l
,
nn
,
k
INTEGER
::
loop_start
,
loop_end
,
i
,
loop_size
ALLOCATE
(
cph
(
MAXVAL
(
nv
)))
ALLOCATE
(
plegend
(
MAXVAL
(
nv
),
0
:
lmaxd
))
plegend
=
0.0
plegend
(:,
0
)
=
1.0
DO
ki
=
ki_start
,
ki_end
,
ki_step
k
=
(
blockidx
%
x
-1
)
*
blockdim
%
x
+
threadidx
%
x
!TODO!!!
!for seq, i.e. ki_start = 1, ki_step = 1
loop_size
=
max
(
ki_end
/(
grid
*
block
),
1
)
if
(
loop_size
*
grid
*
block
<
ki_end
)
loop_size
=
loop_size
+
1
loop_start
=
(
k
-1
)
*
loop_size
+
1
loop_end
=
loop_start
+
loop_size
-
1
if
(
loop_end
>
ki_end
)
loop_end
=
ki_end
DO
ki
=
loop_start
,
loop_end
,
ki_step
!DO ki = ki_start,ki_end,ki_step
kii
=
(
ki
-1
)/
ki_step
+1
ski
=
gvec
(:,
ki
,
jintsp
)
+
qssbti
!---> legendre polynomials
...
...
@@ -100,10 +105,10 @@ CONTAINS
DO
kj
=
1
,
ki
fct
=
plegend
(
kj
,
l
)
*
fl2p1
(
l
)
*
&
(
fjkiln
*
fj
(
kj
,
l
,
iintsp
)
+
gjkiln
*
gj
(
kj
,
l
,
iintsp
)
*
ddnln
)
fct2
=
plegend
(
kj
,
l
)
*
fl2p1bt
(
l
)
*
(
fjkiln
*
gj
(
kj
,
l
,
iintsp
)
+
gjkiln
*
fj
(
kj
,
l
,
iintsp
)
)
smat_data
(
kj
,
kii
)
=
smat_data
(
kj
,
kii
)
+
REAL
(
cph
(
kj
))
*
fct
hmat_data
(
kj
,
kii
)
=
hmat_data
(
kj
,
kii
)
+
REAL
(
cph
(
kj
))
*
&
(
fct
*
elall
+
plegend
(
kj
,
l
)
*
fl2p1bt
(
l
)
*
&
(
fjkiln
*
gj
(
kj
,
l
,
iintsp
)
+
gjkiln
*
fj
(
kj
,
l
,
iintsp
)
)
)
hmat_data
(
kj
,
kii
)
=
hmat_data
(
kj
,
kii
)
+
REAL
(
cph
(
kj
))
*
(
fct
*
elall
+
fct2
)
!+APW
IF
(
PRESENT
(
uds
))
THEN
apw1
=
REAL
(
cph
(
kj
))
*
plegend
(
kj
,
l
)
*
&
...
...
@@ -118,24 +123,25 @@ CONTAINS
ENDDO
DEALLOCATE
(
plegend
)
DEALLOCATE
(
cph
)
END
SUBROUTINE
HsmtSphGpuKernel_
Apw_
real
END
SUBROUTINE
HsmtSphGpuKernel_real
SUBROUTINE
HsmtSphGpuKernel_Apw_cmplx
(
iintsp
,
jintsp
,
nv
,
lmaxd
,
lmax
,
ki_start
,
ki_end
,
ki_step
,
nn_start
,
nn_end
,&
ATTRIBUTES
(
global
)
SUBROUTINE
HsmtSphGpuKernel_cmplx
(
grid
,
block
,
iintsp
,
jintsp
,
nv
,
lmaxd
,
lmax
,
ki_start
,
ki_end
,
ki_step
,
nn_start
,
nn_end
,&
lnonsph
,
chi
,
qssbti
,
qssbtj
,
gvec
,
gk
,
fleg1
,
fleg2
,
fl2p1
,
fl2p1bt
,
fj
,
gj
,
taual
,
ddn
,
el
,
e_shift
,&
smat_data
,
hmat_data
,&
uds
,
dus
,
us
,
duds
,
rmt
)
INTEGER
,
INTENT
(
IN
)
::
iintsp
,
jintsp
,
nv
(
2
),
lmaxd
,
lmax
,
ki_start
,
ki_end
,
ki_step
,
nn_start
,
nn_end
,
lnonsph
COMPLEX
,
INTENT
(
IN
)
::
chi
INTEGER
,
VALUE
,
INTENT
(
IN
)
::
grid
,
block
INTEGER
,
VALUE
,
INTENT
(
IN
)
::
iintsp
,
jintsp
,
lmaxd
,
lmax
,
ki_start
,
ki_end
,
ki_step
,
nn_start
,
nn_end
,
lnonsph
COMPLEX
,
VALUE
,
INTENT
(
IN
)
::
chi
REAL
,
INTENT
(
IN
)
::
qssbti
(
3
),
qssbtj
(
3
)
INTEGER
,
INTENT
(
IN
)
::
gvec
(:,:,:)
INTEGER
,
INTENT
(
IN
)
::
gvec
(:,:,:)
,
nv
(
2
)
REAL
,
INTENT
(
IN
)
::
gk
(:,:,:)
REAL
,
INTENT
(
IN
)
::
fleg1
(
0
:
lmaxd
),
fleg2
(
0
:
lmaxd
),
fl2p1
(
0
:
lmaxd
)
REAL
,
INTENT
(
IN
)
::
fl2p1bt
(
0
:
lmaxd
)
REAL
,
MANAGED
,
INTENT
(
IN
)
::
fj
(:,
0
:,:),
gj
(:,
0
:,:)
REAL
,
INTENT
(
IN
)
::
fj
(:,
0
:,:),
gj
(:,
0
:,:)
REAL
,
INTENT
(
IN
)
::
taual
(:,:)
REAL
,
INTENT
(
IN
)
::
ddn
(
0
:
lmaxd
)
REAL
,
INTENT
(
IN
)
::
el
(
0
:
lmaxd
)
REAL
,
INTENT
(
IN
)
::
e_shift
REAL
,
VALUE
,
INTENT
(
IN
)
::
e_shift
COMPLEX
,
INTENT
(
INOUT
)
::
smat_data
(:,:),
hmat_data
(:,:)
!+APW
REAL
,
INTENT
(
IN
),
OPTIONAL
::
uds
(
0
:
lmaxd
),
dus
(
0
:
lmaxd
),
us
(
0
:
lmaxd
),
duds
(
0
:
lmaxd
)
...
...
@@ -144,18 +150,32 @@ CONTAINS
REAL
,
PARAMETER
::
tpi_const
=
2.
*
3.1415926535897932
REAL
,
ALLOCATABLE
::
plegend
(:,:)
REAL
,
ALLOCATABLE
::
VecHelpS
(:),
VecHelpH
(:)
COMPLEX
,
ALLOCATABLE
::
cph
(:)
REAL
apw_lo1
,
apw_lo2
,
w1
COMPLEX
capw1
REAL
tnn
(
3
),
elall
,
fct
,
fjkiln
,
gjkiln
,
ddnln
,
ski
(
3
)
INTEGER
kii
,
ki
,
kj
,
l
,
nn
REAL
tnn
(
3
),
elall
,
fct
,
fct2
,
fjkiln
,
gjkiln
,
ddnln
,
ski
(
3
)
INTEGER
kii
,
ki
,
kj
,
l
,
nn
,
kj_end
,
k
INTEGER
::
loop_start
,
loop_end
,
i
,
loop_size
ALLOCATE
(
cph
(
MAXVAL
(
nv
)))
ALLOCATE
(
plegend
(
MAXVAL
(
nv
),
0
:
lmaxd
))
plegend
=
0.0
plegend
(:,
0
)
=
1.0
ALLOCATE
(
VecHelpS
(
MAXVAL
(
nv
)),
VecHelpH
(
MAXVAL
(
nv
)))
DO
ki
=
ki_start
,
ki_end
,
ki_step
k
=
(
blockidx
%
x
-1
)
*
blockdim
%
x
+
threadidx
%
x
!TODO!!!
!for seq, i.e. ki_start = 1, ki_step = 1
loop_size
=
max
(
ki_end
/(
grid
*
block
),
1
)
if
(
loop_size
*
grid
*
block
<
ki_end
)
loop_size
=
loop_size
+
1
loop_start
=
(
k
-1
)
*
loop_size
+
1
loop_end
=
loop_start
+
loop_size
-
1
if
(
loop_end
>
ki_end
)
loop_end
=
ki_end
DO
ki
=
loop_start
,
loop_end
,
ki_step
!DO ki = ki_start,ki_end,ki_step
kii
=
(
ki
-1
)/
ki_step
+1
ski
=
gvec
(:,
ki
,
jintsp
)
+
qssbti
!---> legendre polynomials
...
...
@@ -178,6 +198,9 @@ CONTAINS
END
DO
!---> update overlap and l-diagonal hamiltonian matrix
kj_end
=
MIN
(
ki
,
nv
(
iintsp
))
VecHelpS
=
0.d0
VecHelpH
=
0.d0
DO
l
=
0
,
lmax
!+APW
IF
(
PRESENT
(
uds
))
THEN
...
...
@@ -193,13 +216,13 @@ CONTAINS
ddnln
=
ddn
(
l
)
elall
=
el
(
l
)
IF
(
l
<=
lnonsph
)
elall
=
elall
-
e_shift
!(isp)
DO
kj
=
1
,
MIN
(
ki
,
nv
(
iintsp
))
fct
=
chi
*
plegend
(
kj
,
l
)
*
fl2p1
(
l
)
*
&
(
fjkiln
*
fj
(
kj
,
l
,
iintsp
)
+
gjkiln
*
gj
(
kj
,
l
,
iintsp
)
*
ddnln
)
smat_data
(
kj
,
kii
)
=
smat_data
(
kj
,
kii
)
+
cph
(
kj
)
*
fct
hmat_data
(
kj
,
kii
)
=
hmat_data
(
kj
,
kii
)
+
cph
(
kj
)
*
(
fct
*
elall
&
+
chi
*
plegend
(
kj
,
l
)
*
fl2p1bt
(
l
)
*
(
fjkiln
*
gj
(
kj
,
l
,
iintsp
)
+
gjkiln
*
fj
(
kj
,
l
,
iintsp
)
)
)
DO
kj
=
1
,
kj_end
fct
=
plegend
(
kj
,
l
)
*
fl2p1
(
l
)
*
&
(
fjkiln
*
fj
(
kj
,
l
,
iintsp
)
+
gjkiln
*
gj
(
kj
,
l
,
iintsp
)
*
ddnln
)
fct2
=
plegend
(
kj
,
l
)
*
fl2p1bt
(
l
)
*
(
fjkiln
*
gj
(
kj
,
l
,
iintsp
)
+
gjkiln
*
fj
(
kj
,
l
,
iintsp
)
)
VecHelpS
(
kj
)
=
VecHelpS
(
kj
)
+
fct
VecHelpH
(
kj
)
=
VecHelpH
(
kj
)
+
fct
*
elall
+
fct2
!+APW
IF
(
PRESENT
(
uds
))
THEN
capw1
=
cph
(
kj
)
*
plegend
(
kj
,
l
)&
...
...
@@ -210,156 +233,15 @@ CONTAINS
END
DO
!---> end loop over l
ENDDO
smat_data
(:
kj_end
,
kii
)
=
smat_data
(:
kj_end
,
kii
)
+
chi
*
cph
(:
kj_end
)
*
VecHelpS
(:
kj_end
)
hmat_data
(:
kj_end
,
kii
)
=
hmat_data
(:
kj_end
,
kii
)
+
chi
*
cph
(:
kj_end
)
*
VecHelpH
(:
kj_end
)
!---> end loop over ki
ENDDO
DEALLOCATE
(
plegend
)
DEALLOCATE
(
cph
)
END
SUBROUTINE
HsmtSphGpuKernel_Apw_cmplx
SUBROUTINE
HsmtSphGpuKernel_noApw_cmplx
(
iintsp
,
jintsp
,
nv
,
lmaxd
,
lmax
,
ki_start
,
ki_end
,
ki_step
,
nn_start
,
nn_end
,&
lnonsph
,
chi
,
qssbti
,
qssbtj
,
gvec
,
gk
,
fleg1
,
fleg2
,
fl2p1
,
fl2p1bt
,
fj
,
gj
,
taual
,
ddn
,
el
,
e_shift
,&
smat_data
,
hmat_data
)
INTEGER
,
INTENT
(
IN
)
::
iintsp
,
jintsp
,
nv
(
2
),
lmaxd
,
lmax
,
ki_start
,
ki_end
,
ki_step
,
nn_start
,
nn_end
,
lnonsph
COMPLEX
,
INTENT
(
IN
)
::
chi
REAL
,
INTENT
(
IN
)
::
qssbti
(
3
),
qssbtj
(
3
)
INTEGER
,
INTENT
(
IN
)
::
gvec
(:,:,:)
REAL
,
INTENT
(
IN
)
::
gk
(:,:,:)
REAL
,
INTENT
(
IN
)
::
fleg1
(
0
:
lmaxd
),
fleg2
(
0
:
lmaxd
),
fl2p1
(
0
:
lmaxd
)
REAL
,
INTENT
(
IN
)
::
fl2p1bt
(
0
:
lmaxd
)
REAL
,
MANAGED
,
INTENT
(
IN
)
::
fj
(:,
0
:,:),
gj
(:,
0
:,:)
REAL
,
INTENT
(
IN
)
::
taual
(:,:)
REAL
,
INTENT
(
IN
)
::
ddn
(
0
:
lmaxd
)
REAL
,
INTENT
(
IN
)
::
el
(
0
:
lmaxd
)
REAL
,
INTENT
(
IN
)
::
e_shift
COMPLEX
,
INTENT
(
INOUT
)
::
smat_data
(:,:),
hmat_data
(:,:)
REAL
,
PARAMETER
::
tpi_const
=
2.
*
3.1415926535897932
REAL
,
ALLOCATABLE
::
plegend
(:,:)
COMPLEX
,
ALLOCATABLE
::
cph
(:)
REAL
tnn
(
3
),
elall
,
fct
,
fjkiln
,
gjkiln
,
ddnln
,
ski
(
3
)
INTEGER
kii
,
ki
,
kj
,
l
,
nn
ALLOCATE
(
cph
(
MAXVAL
(
nv
)))
ALLOCATE
(
plegend
(
MAXVAL
(
nv
),
0
:
lmaxd
))
plegend
=
0.0
plegend
(:,
0
)
=
1.0
DO
ki
=
ki_start
,
ki_end
,
ki_step
kii
=
(
ki
-1
)/
ki_step
+1
ski
=
gvec
(:,
ki
,
jintsp
)
+
qssbti
!---> legendre polynomials
DO
kj
=
1
,
ki
plegend
(
kj
,
1
)
=
DOT_PRODUCT
(
gk
(:,
kj
,
iintsp
),
gk
(:,
ki
,
jintsp
))
END
DO
DO
l
=
1
,
lmax
-
1
plegend
(:
ki
,
l
+1
)
=
fleg1
(
l
)
*
plegend
(:
ki
,
1
)
*
plegend
(:
ki
,
l
)
-
fleg2
(
l
)
*
plegend
(:
ki
,
l
-1
)
END
DO
!---> set up phase factors
cph
=
0.0
DO
nn
=
nn_start
,
nn_end
tnn
=
tpi_const
*
taual
(:,
nn
)
DO
kj
=
1
,
ki
cph
(
kj
)
=
cph
(
kj
)
+
&
CMPLX
(
COS
(
DOT_PRODUCT
(
ski
-
gvec
(:,
kj
,
iintsp
)
-
qssbtj
,
tnn
)),&
SIN
(
DOT_PRODUCT
(
gvec
(:,
kj
,
iintsp
)
+
qssbtj
-
ski
,
tnn
)))
! IF (iintsp.NE.jintsp) cph(kj)=CONJG(cph(kj))
END
DO
END
DO
!---> update overlap and l-diagonal hamiltonian matrix
DO
l
=
0
,
lmax
fjkiln
=
fj
(
ki
,
l
,
jintsp
)
gjkiln
=
gj
(
ki
,
l
,
jintsp
)
ddnln
=
ddn
(
l
)
elall
=
el
(
l
)
IF
(
l
<=
lnonsph
)
elall
=
elall
-
e_shift
!(isp)
DO
kj
=
1
,
MIN
(
ki
,
nv
(
iintsp
))
fct
=
chi
*
plegend
(
kj
,
l
)
*
fl2p1
(
l
)
*
&
(
fjkiln
*
fj
(
kj
,
l
,
iintsp
)
+
gjkiln
*
gj
(
kj
,
l
,
iintsp
)
*
ddnln
)
smat_data
(
kj
,
kii
)
=
smat_data
(
kj
,
kii
)
+
cph
(
kj
)
*
fct
hmat_data
(
kj
,
kii
)
=
hmat_data
(
kj
,
kii
)
+
cph
(
kj
)
*
(
fct
*
elall
&
+
chi
*
plegend
(
kj
,
l
)
*
fl2p1bt
(
l
)
*
(
fjkiln
*
gj
(
kj
,
l
,
iintsp
)
+
gjkiln
*
fj
(
kj
,
l
,
iintsp
)
)
)
END
DO
!---> end loop over l
ENDDO
!---> end loop over ki
ENDDO
DEALLOCATE
(
plegend
)
DEALLOCATE
(
cph
)
END
SUBROUTINE
HsmtSphGpuKernel_noApw_cmplx
DEALLOCATE
(
VecHelpS
,
VecHelpH
)
END
SUBROUTINE
HsmtSphGpuKernel_cmplx
SUBROUTINE
HsmtSphGpuKernel_noApw_real
(
iintsp
,
jintsp
,
nv
,
lmaxd
,
lmax
,
ki_start
,
ki_end
,
ki_step
,
nn_start
,
nn_end
,&
lnonsph
,
chi
,
qssbti
,
qssbtj
,
gvec
,
gk
,
fleg1
,
fleg2
,
fl2p1
,
fl2p1bt
,
fj
,
gj
,
taual
,
ddn
,
el
,
e_shift
,
smat_data
,
hmat_data
)
INTEGER
,
INTENT
(
IN
)
::
iintsp
,
jintsp
,
nv
(
2
),
lmaxd
,
lmax
,
ki_start
,
ki_end
,
ki_step
,
nn_start
,
nn_end
,
lnonsph
COMPLEX
,
INTENT
(
IN
)
::
chi
REAL
,
INTENT
(
IN
)
::
qssbti
(
3
),
qssbtj
(
3
)
INTEGER
,
INTENT
(
IN
)
::
gvec
(:,:,:)
REAL
,
INTENT
(
IN
)
::
gk
(:,:,:)
REAL
,
INTENT
(
IN
)
::
fleg1
(
0
:
lmaxd
),
fleg2
(
0
:
lmaxd
),
fl2p1
(
0
:
lmaxd
)
REAL
,
INTENT
(
IN
)
::
fl2p1bt
(
0
:
lmaxd
)
REAL
,
MANAGED
,
INTENT
(
IN
)
::
fj
(:,
0
:,:),
gj
(:,
0
:,:)
REAL
,
INTENT
(
IN
)
::
taual
(:,:)
REAL
,
INTENT
(
IN
)
::
ddn
(
0
:
lmaxd
)
REAL
,
INTENT
(
IN
)
::
el
(
0
:
lmaxd
)
REAL
,
INTENT
(
IN
)
::
e_shift
REAL
,
INTENT
(
INOUT
)
::
smat_data
(:,:),
hmat_data
(:,:)
REAL
,
PARAMETER
::
tpi_const
=
2.
*
3.1415926535897932
REAL
,
ALLOCATABLE
::
plegend
(:,:)
COMPLEX
,
ALLOCATABLE
::
cph
(:)
REAL
tnn
(
3
),
elall
,
fct
,
fjkiln
,
gjkiln
,
ddnln
,
ski
(
3
)
INTEGER
kii
,
ki
,
kj
,
l
,
nn
ALLOCATE
(
cph
(
MAXVAL
(
nv
)))
ALLOCATE
(
plegend
(
MAXVAL
(
nv
),
0
:
lmaxd
))
plegend
=
0.0
plegend
(:,
0
)
=
1.0
DO
ki
=
ki_start
,
ki_end
,
ki_step
kii
=
(
ki
-1
)/
ki_step
+1
ski
=
gvec
(:,
ki
,
jintsp
)
+
qssbti
!---> legendre polynomials
DO
kj
=
1
,
ki
plegend
(
kj
,
1
)
=
DOT_PRODUCT
(
gk
(:,
kj
,
iintsp
),
gk
(:,
ki
,
jintsp
))
END
DO
DO
l
=
1
,
lmax
-
1
plegend
(:
ki
,
l
+1
)
=
fleg1
(
l
)
*
plegend
(:
ki
,
1
)
*
plegend
(:
ki
,
l
)
-
fleg2
(
l
)
*
plegend
(:
ki
,
l
-1
)
END
DO
!---> set up phase factors
cph
=
0.0
DO
nn
=
nn_start
,
nn_end
tnn
=
tpi_const
*
taual
(:,
nn
)
DO
kj
=
1
,
ki
cph
(
kj
)
=
cph
(
kj
)
+
&
CMPLX
(
COS
(
DOT_PRODUCT
(
ski
-
gvec
(:,
kj
,
iintsp
)
-
qssbtj
,
tnn
)),&
SIN
(
DOT_PRODUCT
(
gvec
(:,
kj
,
iintsp
)
+
qssbtj
-
ski
,
tnn
)))
! IF (iintsp.NE.jintsp) cph(kj)=CONJG(cph(kj))
END
DO
END
DO
!---> update overlap and l-diagonal hamiltonian matrix
DO
l
=
0
,
lmax
fjkiln
=
fj
(
ki
,
l
,
jintsp
)
gjkiln
=
gj
(
ki
,
l
,
jintsp
)
ddnln
=
ddn
(
l
)
elall
=
el
(
l
)
IF
(
l
<=
lnonsph
)
elall
=
elall
-
e_shift
!(isp)
DO
kj
=
1
,
ki
fct
=
plegend
(
kj
,
l
)
*
fl2p1
(
l
)
*
&
(
fjkiln
*
fj
(
kj
,
l
,
iintsp
)
+
gjkiln
*
gj
(
kj
,
l
,
iintsp
)
*
ddnln
)
smat_data
(
kj
,
kii
)
=
smat_data
(
kj
,
kii
)
+
REAL
(
cph
(
kj
))
*
fct
hmat_data
(
kj
,
kii
)
=
hmat_data
(
kj
,
kii
)
+
REAL
(
cph
(
kj
))
*
&
(
fct
*
elall
+
plegend
(
kj
,
l
)
*
fl2p1bt
(
l
)
*
&
(
fjkiln
*
gj
(
kj
,
l
,
iintsp
)
+
gjkiln
*
fj
(
kj
,
l
,
iintsp
)
)
)
ENDDO
!---> end loop over l
ENDDO
!---> end loop over ki
ENDDO
DEALLOCATE
(
plegend
)
DEALLOCATE
(
cph
)
END
SUBROUTINE
HsmtSphGpuKernel_noApw_real
SUBROUTINE
hsmt_sph_gpu
(
n
,
atoms
,
mpi
,
isp
,
input
,
noco
,
iintsp
,
jintsp
,
chi
,
lapw
,
el
,
e_shift
,
usdus
,
fj
,
gj
,
smat
,
hmat
)
USE
m_constants
,
ONLY
:
fpi_const
,
tpi_const
...
...
@@ -379,22 +261,20 @@ CONTAINS
COMPLEX
,
INTENT
(
IN
)
::
chi
! ..
! .. Array Arguments ..
REAL
,
INTENT
(
IN
)
::
el
(
0
:
atoms
%
lmaxd
,
atoms
%
ntype
,
input
%
jspins
)
REAL
,
MANAGED
,
INTENT
(
IN
)
::
el
(
0
:
atoms
%
lmaxd
,
atoms
%
ntype
,
input
%
jspins
)
REAL
,
INTENT
(
IN
)
::
e_shift
!(atoms%ntype,input%jspins)
#if defined CPP_GPU
REAL
,
MANAGED
,
INTENT
(
IN
)
::
fj
(:,
0
:,:),
gj
(:,
0
:,:)
#else
REAL
,
INTENT
(
IN
)
::
fj
(:,
0
:,:),
gj
(:,
0
:,:)
#endif
! ..
! .. Local Scalars ..
INTEGER
l
INTEGER
::
grid
,
block
! ..
! .. Local Arrays ..
REAL
fleg1
(
0
:
atoms
%
lmaxd
),
fleg2
(
0
:
atoms
%
lmaxd
),
fl2p1
(
0
:
atoms
%
lmaxd
)
REAL
fl2p1bt
(
0
:
atoms
%
lmaxd
)
REAL
qssbti
(
3
),
qssbtj
(
3
)
REAL
,
MANAGED
::
fleg1
(
0
:
atoms
%
lmaxd
),
fleg2
(
0
:
atoms
%
lmaxd
),
fl2p1
(
0
:
atoms
%
lmaxd
)
REAL
,
MANAGED
::
fl2p1bt
(
0
:
atoms
%
lmaxd
)
REAL
,
MANAGED
::
qssbti
(
3
),
qssbtj
(
3
)
INTEGER
,
DEVICE
::
nv_dev
(
2
)
call
nvtxStartRange
(
"hsmt_sph"
,
2
)
CALL
timestart
(
"spherical setup"
)
...
...
@@ -408,16 +288,22 @@ CONTAINS
qssbti
=
MERGE
(
-
noco
%
qss
/
2
,
+
noco
%
qss
/
2
,
jintsp
.EQ.
1
)
qssbtj
=
MERGE
(
-
noco
%
qss
/
2
,
+
noco
%
qss
/
2
,
iintsp
.EQ.
1
)
! pretty ugly solution
nv_dev
=
lapw
%
nv
block
=
256
grid
=
lapw
%
nv
(
jintsp
)/(
block
*
4
)
+
1
IF
(
input
%
l_useapw
)
THEN
!TODO!!!!
! APW case is not testet
IF
(
smat
%
l_real
)
THEN
CALL
HsmtSphGpuKernel_
Apw
(
iintsp
,
jintsp
,
lapw
%
n
v
,
atoms
%
lmaxd
,
atoms
%
lmax
(
n
),
mpi
%
n_rank
+1
,&
CALL
HsmtSphGpuKernel_
real
<<<
grid
,
block
>>>
(
grid
,
block
,
iintsp
,
jintsp
,
nv_de
v
,
atoms
%
lmaxd
,
atoms
%
lmax
(
n
),
mpi
%
n_rank
+1
,&
lapw
%
nv
(
jintsp
),
mpi
%
n_size
,
SUM
(
atoms
%
neq
(:
n
-1
))
+1
,
SUM
(
atoms
%
neq
(:
n
)),
atoms
%
lnonsph
(
n
),&
chi
,
qssbti
,
qssbtj
,
lapw
%
gvec
,
lapw
%
gk
,
fleg1
,
fleg2
,
fl2p1
,
fl2p1bt
,
fj
,
gj
,
atoms
%
taual
,&
qssbti
,
qssbtj
,
lapw
%
gvec
,
lapw
%
gk
,
fleg1
,
fleg2
,
fl2p1
,
fl2p1bt
,
fj
,
gj
,
atoms
%
taual
,&
usdus
%
ddn
(:,
n
,
isp
),
el
(:,
n
,
isp
),
e_shift
,&
smat
%
data_r
,
hmat
%
data_r
,&
usdus
%
uds
(:,
n
,
isp
),
usdus
%
dus
(:,
n
,
isp
),
usdus
%
us
(:,
n
,
isp
),
usdus
%
duds
(:,
n
,
isp
),
atoms
%
rmt
(
n
))
ELSE
CALL
HsmtSphGpuKernel_
Apw
(
iintsp
,
jintsp
,
lapw
%
n
v
,
atoms
%
lmaxd
,
atoms
%
lmax
(
n
),
mpi
%
n_rank
+1
,&
CALL
HsmtSphGpuKernel_
cmplx
<<<
grid
,
block
>>>
(
grid
,
block
,
iintsp
,
jintsp
,
nv_de
v
,
atoms
%
lmaxd
,
atoms
%
lmax
(
n
),
mpi
%
n_rank
+1
,&
lapw
%
nv
(
jintsp
),
mpi
%
n_size
,
SUM
(
atoms
%
neq
(:
n
-1
))
+1
,
SUM
(
atoms
%
neq
(:
n
)),
atoms
%
lnonsph
(
n
),&
chi
,
qssbti
,
qssbtj
,
lapw
%
gvec
,
lapw
%
gk
,
fleg1
,
fleg2
,
fl2p1
,
fl2p1bt
,
fj
,
gj
,
atoms
%
taual
,&
usdus
%
ddn
(:,
n
,
isp
),
el
(:,
n
,
isp
),
e_shift
,&
...
...
@@ -426,12 +312,12 @@ CONTAINS
ENDIF
ELSE
IF
(
smat
%
l_real
)
THEN
CALL
HsmtSphGpuKernel_
Apw
(
iintsp
,
jintsp
,
lapw
%
n
v
,
atoms
%
lmaxd
,
atoms
%
lmax
(
n
),
mpi
%
n_rank
+1
,&
CALL
HsmtSphGpuKernel_
real
<<<
grid
,
block
>>>
(
grid
,
block
,
iintsp
,
jintsp
,
nv_de
v
,
atoms
%
lmaxd
,
atoms
%
lmax
(
n
),
mpi
%
n_rank
+1
,&
lapw
%
nv
(
jintsp
),
mpi
%
n_size
,
SUM
(
atoms
%
neq
(:
n
-1
))
+1
,
SUM
(
atoms
%
neq
(:
n
)),
atoms
%
lnonsph
(
n
),&
chi
,
qssbti
,
qssbtj
,
lapw
%
gvec
,
lapw
%
gk
,
fleg1
,
fleg2
,
fl2p1
,
fl2p1bt
,
fj
,
gj
,
atoms
%
taual
,&
qssbti
,
qssbtj
,
lapw
%
gvec
,
lapw
%
gk
,
fleg1
,
fleg2
,
fl2p1
,
fl2p1bt
,
fj
,
gj
,
atoms
%
taual
,&
usdus
%
ddn
(:,
n
,
isp
),
el
(:,
n
,
isp
),
e_shift
,
smat
%
data_r
,
hmat
%
data_r
)
ELSE
CALL
HsmtSphGpuKernel_
Apw
(
iintsp
,
jintsp
,
lapw
%
n
v
,
atoms
%
lmaxd
,
atoms
%
lmax
(
n
),
mpi
%
n_rank
+1
,&
CALL
HsmtSphGpuKernel_
cmplx
<<<
grid
,
block
>>>
(
grid
,
block
,
iintsp
,
jintsp
,
nv_de
v
,
atoms
%
lmaxd
,
atoms
%
lmax
(
n
),
mpi
%
n_rank
+1
,&
lapw
%
nv
(
jintsp
),
mpi
%
n_size
,
SUM
(
atoms
%
neq
(:
n
-1
))
+1
,
SUM
(
atoms
%
neq
(:
n
)),
atoms
%
lnonsph
(
n
),&
chi
,
qssbti
,
qssbtj
,
lapw
%
gvec
,
lapw
%
gk
,
fleg1
,
fleg2
,
fl2p1
,
fl2p1bt
,
fj
,
gj
,
atoms
%
taual
,&
usdus
%
ddn
(:,
n
,
isp
),
el
(:,
n
,
isp
),
e_shift
,
smat
%
data_c
,
hmat
%
data_c
)
...
...
@@ -589,6 +475,7 @@ CONTAINS
!$OMP END DO
DEALLOCATE
(
plegend
)
DEALLOCATE
(
cph
)
DEALLOCATE
(
VecHelpS
,
VecHelpH
)
!$OMP END PARALLEL
CALL
timestop
(
"spherical setup"
)
...
...
types/types_lapw.F90
View file @
289cc8b7
...
...
@@ -8,17 +8,26 @@ MODULE m_types_lapw
USE
m_judft
PRIVATE
TYPE
t_lapw
INTEGER
::
nv
(
2
),
num_local_cols
(
2
)
INTEGER
::
nv
(
2
)
INTEGER
::
num_local_cols
(
2
)
INTEGER
::
nv_tot
INTEGER
::
nmat
INTEGER
::
nlotot
INTEGER
,
ALLOCATABLE
::
k1
(:,:)
INTEGER
,
ALLOCATABLE
::
k2
(:,:)
INTEGER
,
ALLOCATABLE
::
k3
(:,:)
#ifdef CPP_GPU
INTEGER
,
ALLOCATABLE
,
MANAGED
::
gvec
(:,:,:)
!replaces k1,k2,k3
#else
INTEGER
,
ALLOCATABLE
::
gvec
(:,:,:)
!replaces k1,k2,k3
#endif
INTEGER
,
ALLOCATABLE
::
kp
(:,:)
REAL
,
ALLOCATABLE
::
rk
(:,:)
#ifdef CPP_GPU
REAL
,
ALLOCATABLE
,
MANAGED
::
gk
(:,:,:)
#else
REAL
,
ALLOCATABLE
::
gk
(:,:,:)
#endif
REAL
,
ALLOCATABLE
::
vk
(:,:,:)
INTEGER
,
ALLOCATABLE
::
matind
(:,:)
INTEGER
,
ALLOCATABLE
::
index_lo
(:,:)
...
...
types/types_setup.F90
View file @
289cc8b7
...
...
@@ -138,7 +138,11 @@ MODULE m_types_setup
!Calaculate forces for this atom?
LOGICAL
,
ALLOCATABLE
::
l_geo
(:)
!MT-Radius (ntype)
#ifdef CPP_GPU
REAL
,
ALLOCATABLE
,
MANAGED
::
rmt
(:)
#else
REAL
,
ALLOCATABLE
::
rmt
(:)
#endif
!log increment(ntype)
REAL
,
ALLOCATABLE
::
dx
(:)
!vol of MT(ntype)
...
...
@@ -152,7 +156,11 @@ MODULE m_types_setup
!pos of atom (absol) (3,nat)
REAL
,
ALLOCATABLE
::
pos
(:,:)
!pos of atom (relat)(3,nat)
#ifdef CPP_GPU
REAL
,
ALLOCATABLE
,
MANAGED
::
taual
(:,:)
#else
REAL
,
ALLOCATABLE
::
taual
(:,:)
#endif
!labels
CHARACTER
(
LEN
=
20
),
ALLOCATABLE
::
label
(:)
CHARACTER
(
len
=
20
),
ALLOCATABLE
::
speciesName
(:)
...
...
types/types_usdus.F90
View file @
289cc8b7
...
...
@@ -6,11 +6,19 @@
MODULE
m_types_usdus
TYPE
t_usdus
#ifdef CPP_GPU
REAL
,
ALLOCATABLE
,
DIMENSION
(:,:,:),
MANAGED
::
us
REAL
,
ALLOCATABLE
,
DIMENSION
(:,:,:),
MANAGED
::
dus
REAL
,
ALLOCATABLE
,
DIMENSION
(:,:,:),
MANAGED
::
uds
REAL
,
ALLOCATABLE
,
DIMENSION
(:,:,:),
MANAGED
::
duds
!(0:lmaxd,ntype,jspd)
REAL
,
ALLOCATABLE
,
DIMENSION
(:,:,:),
MANAGED
::
ddn
!(0:lmaxd,ntype,jspd)
#else
REAL
,
ALLOCATABLE
,
DIMENSION
(:,:,:)
::
us
REAL
,
ALLOCATABLE
,
DIMENSION
(:,:,:)
::
dus
REAL
,
ALLOCATABLE
,
DIMENSION
(:,:,:)
::
uds
REAL
,
ALLOCATABLE
,
DIMENSION
(:,:,:)
::
duds
!(0:lmaxd,ntype,jspd)
REAL
,
ALLOCATABLE
,
DIMENSION
(:,:,:)
::
ddn
!(0:lmaxd,ntype,jspd)
#endif
REAL
,
ALLOCATABLE
,
DIMENSION
(:,:,:)
::
ulos
REAL
,
ALLOCATABLE
,
DIMENSION
(:,:,:)
::
dulos
REAL
,
ALLOCATABLE
,
DIMENSION
(:,:,:)
::
uulon
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment