hsmt_sph.F90 28.4 KB
Newer Older
1 2 3 4 5 6
!--------------------------------------------------------------------------------
! Copyright (c) 2016 Peter Grünberg Institut, Forschungszentrum Jülich, Germany
! This file is part of FLEUR and available as free software under the conditions
! of the MIT license as expressed in the LICENSE file in more detail.
!--------------------------------------------------------------------------------

7
MODULE m_hsmt_sph
8 9
  USE m_juDFT
  IMPLICIT NONE
10
CONTAINS
11
  SUBROUTINE hsmt_sph(sym,DIMENSION,atoms,SUB_COMM,n_size,n_rank,sphhar,isp,ab_dim,&
12
       input,hlpmsize,noco,l_socfirst,cell,nintsp, lapw,el,usdus,vr,gk,rsoc,isigma,fj,gj,l_real,hamOvlp)
13 14 15 16 17 18 19 20 21 22 23 24 25 26

#include"cpp_double.h"
    USE m_constants, ONLY : fpi_const,tpi_const
    USE m_sphbes
    USE m_dsphbs
    USE m_ylm
    USE m_hsmt_socinit,ONLY:t_rsoc
    USE m_hsmt_spinor
    USE m_radovlp
#ifdef CPP_MPI
    USE m_mingeselle
#endif
    USE m_types
    IMPLICIT NONE
27 28 29 30 31 32 33 34 35 36
    TYPE(t_sym),INTENT(IN)        :: sym
    TYPE(t_dimension),INTENT(IN)  :: DIMENSION
    TYPE(t_input),INTENT(IN)      :: input
    TYPE(t_noco),INTENT(IN)       :: noco
    TYPE(t_cell),INTENT(IN)       :: cell
    TYPE(t_sphhar),INTENT(IN)     :: sphhar
    TYPE(t_atoms),INTENT(IN)      :: atoms
    TYPE(t_lapw),INTENT(INOUT)    :: lapw!lpaw%nv_tot is updated
    TYPE(t_usdus),INTENT(INOUT)   :: usdus
    TYPE(t_hamOvlp),INTENT(INOUT) :: hamOvlp
37 38 39 40 41 42 43 44
    !     ..
    !     .. Scalar Arguments ..
    INTEGER, INTENT (IN) :: isp,ab_dim
    INTEGER, INTENT (IN) :: SUB_COMM,n_size,n_rank 
    INTEGER, INTENT (IN) :: hlpmsize,nintsp
    LOGICAL, INTENT (IN) :: l_socfirst
    !     ..
    !     .. Array Arguments ..
Daniel Wortmann's avatar
Daniel Wortmann committed
45 46
    REAL,    INTENT (IN) :: el(0:atoms%lmaxd,atoms%ntype,DIMENSION%jspd)
    REAL,    INTENT (IN) :: vr(atoms%jmtd,0:sphhar%nlhd,atoms%ntype,DIMENSION%jspd)
47 48 49 50 51
    REAL,INTENT(IN)      :: gk(:,:,:)
    COMPLEX,INTENT(IN)   :: isigma(2,2,3)
    TYPE(t_rsoc),INTENT(IN) :: rsoc


52 53
    LOGICAL, INTENT(IN) :: l_real

54 55 56 57
    REAL,INTENT(OUT) :: fj(:,0:,:,:),gj(:,0:,:,:)
    !     ..
    !     .. Local Scalars ..
    REAL con1,ff,gg,gs,ws,tnn(3), elall,fct,fjkiln,gjkiln,ddnln,ski(3)
58
    REAL apw_lo1,apw_lo2,apw1,w1,aa_rDummy(1)
59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75

    COMPLEX chi11,chi21,chi22,capw1
    INTEGER ii,iii,ij,k,ki,kj,l,lo,n,n0,n1,nn,kjmax, nsp,iintsp,jintsp
    INTEGER nc ,kii

    !     ..
    !     .. Local Arrays ..
    REAL fb(0:atoms%lmaxd),fleg1(0:atoms%lmaxd),fleg2(0:atoms%lmaxd),fl2p1(0:atoms%lmaxd)     
    REAL fl2p1bt(0:atoms%lmaxd),gb(0:atoms%lmaxd)
    REAL qssbti(3),qssbtj(3)


    REAL, ALLOCATABLE :: plegend(:,:)
    REAL, ALLOCATABLE :: rph(:,:),cph(:,:)
    REAL, ALLOCATABLE :: uun21(:,:),udn21(:,:),dun21(:,:),ddn21(:,:)


Daniel Wortmann's avatar
Daniel Wortmann committed
76
    COMPLEX chi(2,2),chj(2,2,2,atoms%ntype),aawa(DIMENSION%nvd),bbwa(DIMENSION%nvd)
77 78 79 80 81 82 83 84 85
    COMPLEX, ALLOCATABLE :: aahlp(:),bbhlp(:)
    LOGICAL apw(0:atoms%lmaxd)


    ! for Spin-orbit...
    REAL, ALLOCATABLE :: dplegend(:,:)
    REAL, ALLOCATABLE :: cross_k(:,:)
    INTEGER :: j1,j2
    COMPLEX :: isigma_x(2,2),isigma_y(2,2),isigma_z(2,2)
86
    COMPLEX :: chi11so(2,2),chi21so(2,2),chi22so(2,2),angso(DIMENSION%nvd,2,2)
87

88 89
    REAL :: tmp,tmp1,tmp2,tmp3

90 91 92

    IF ( noco%l_noco .AND. (.NOT. noco%l_ss) ) ALLOCATE ( aahlp(hlpmsize),bbhlp(hlpmsize) )
    !     ..
93
    con1 = fpi_const/SQRT(cell%omtil)
94
    DO l = 0,atoms%lmaxd
95 96 97
       fleg1(l) = REAL(l+l+1)/REAL(l+1)
       fleg2(l) = REAL(l)/REAL(l+1)
       fl2p1(l) = REAL(l+l+1)/fpi_const
98 99 100 101 102
       fl2p1bt(l) = fl2p1(l)*0.5
    END DO
    !---> calculate the overlap of the radial basis functions with different
    !---> spin directions.
    IF (noco%l_constr) THEN
Daniel Wortmann's avatar
Daniel Wortmann committed
103 104
       ALLOCATE(uun21(0:atoms%lmaxd,atoms%ntype),udn21(0:atoms%lmaxd,atoms%ntype),&
            dun21(0:atoms%lmaxd,atoms%ntype),ddn21(0:atoms%lmaxd,atoms%ntype) )
105 106 107 108 109 110 111 112 113 114
       CALL rad_ovlp(atoms,usdus,input,vr,el(0:,:,:), uun21,udn21,dun21,ddn21)
    ENDIF
    !---> loop over each atom type

    DO iintsp = 1,nintsp
       !$OMP parallel do DEFAULT(SHARED)&
       !$OMP PRIVATE(n,l,apw,lo,k,gs,fb,gb,ws,ff,gg)
       DO n = 1,atoms%ntype

          DO l = 0,atoms%lmax(n)
115
             apw(l) = .FALSE.
116
             DO lo = 1,atoms%nlo(n)
117
                IF (atoms%l_dulo(lo,n)) apw(l) = .TRUE.
118
             ENDDO
119
             IF ((input%l_useapw).AND.(atoms%lapw_l(n).GE.l)) apw(l) = .FALSE.
120

121 122
          ENDDO
          DO lo = 1,atoms%nlo(n)
123
             IF (atoms%l_dulo(lo,n)) apw(atoms%llo(lo,n)) = .TRUE.
124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139
          ENDDO

          DO k = 1,lapw%nv(iintsp)
             gs = lapw%rk(k,iintsp)*atoms%rmt(n)
             CALL sphbes(atoms%lmax(n),gs, fb)
             CALL dsphbs(atoms%lmax(n),gs,fb, gb)
             DO l = 0,atoms%lmax(n)
                !---> set up wronskians for the matching conditions for each ntype
                ws = con1/(usdus%uds(l,n,isp)*usdus%dus(l,n,isp)&
                     - usdus%us(l,n,isp)*usdus%duds(l,n,isp))
                ff = fb(l)
                gg = lapw%rk(k,iintsp)*gb(l)
                IF ( apw(l) ) THEN
                   fj(k,l,n,iintsp) = 1.0*con1 * ff / usdus%us(l,n,isp)
                   gj(k,l,n,iintsp) = 0.0d0
                ELSE
140
                   IF (noco%l_constr.OR.l_socfirst) THEN
141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162
                      !--->                 in a constrained calculation fj and gj are needed
                      !--->                 both local spin directions (isp) at the same time
                      fj(k,l,n,isp) = ws * ( usdus%uds(l,n,isp)*gg - usdus%duds(l,n,isp)*ff )
                      gj(k,l,n,isp) = ws * ( usdus%dus(l,n,isp)*ff - usdus%us(l,n,isp)*gg )
                   ELSE
                      !--->                 in a spin-spiral calculation fj and gj are needed
                      !--->                 both interstitial spin directions at the same time
                      !--->                 In all other cases iintsp runs from 1 to 1.
                      fj(k,l,n,iintsp) = ws * ( usdus%uds(l,n,isp)*gg - usdus%duds(l,n,isp)*ff )
                      gj(k,l,n,iintsp) = ws * ( usdus%dus(l,n,isp)*ff - usdus%us(l,n,isp)*gg )
                   ENDIF
                ENDIF
             ENDDO
          ENDDO ! k = 1, lapw%nv
       ENDDO    ! n = 1,atoms%ntype
       !$OMP end parallel do

    ENDDO       ! iintsp = 1,nintsp
    !
    IF ( noco%l_noco .AND. (.NOT. noco%l_ss) ) THEN
       !---> pk non-collinear
       !--->    initialize hamiltonian help array
163 164
       aahlp=CMPLX(0.0,0.0)
       bbhlp=CMPLX(0.0,0.0)
165 166 167 168 169 170 171 172
    ENDIF

    !$OMP PARALLEL  DEFAULT(shared)&
    !$OMP PRIVATE(kii,ki,nc,iii,kjmax,ski,kj,plegend,l,n1,n)&
    !$OMP PRIVATE(n0,rph,cph,nn,tnn,fjkiln,gjkiln)&
    !$OMP PRIVATE(w1,apw_lo1,apw_lo2,ddnln,elall,fct,ij,apw1)&
    !$OMP PRIVATE(cross_k,dplegend,chi,chi11,chi21,chi22,nsp,chj)&
    !$OMP PRIVATE(isigma_x,isigma_y,isigma_z,j1,j2,chi11so,chi21so,chi22so)&
Gregor Michalicek's avatar
Gregor Michalicek committed
173
    !$OMP PRIVATE(tmp1,tmp2,tmp3,tmp)&
174 175 176
    !$OMP PRIVATE(aawa,bbwa,capw1,ii) IF (.not.l_socfirst)
    !$     IF (l_socfirst) WRITE(*,*) "WARNING: first variation SOC does not work with OPENMP in hsmt_sph"
    !$     IF (l_socfirst) WRITE(*,*) "         switching off openmp parallelization"
177 178 179
    ALLOCATE(rph(DIMENSION%nvd,ab_dim))
    ALLOCATE(cph(DIMENSION%nvd,ab_dim))
    ALLOCATE(plegend(DIMENSION%nvd,0:atoms%lmaxd))
180
    IF (l_socfirst)THEN
181
       ALLOCATE ( dplegend(DIMENSION%nvd,0:atoms%lmaxd),cross_k(DIMENSION%nvd,3))
182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204
       dplegend(:,0)=0.e0
       dplegend(:,1)=1.e0
    ENDIF

    plegend=0.0
    plegend(:,0)=1.0
    DO iintsp = 1,nintsp
       IF (iintsp.EQ.1) THEN
          qssbti = - noco%qss/2
       ELSE
          qssbti = + noco%qss/2
       ENDIF
       DO jintsp = 1,iintsp
          IF (jintsp.EQ.1) THEN
             qssbtj = - noco%qss/2
          ELSE
             qssbtj = + noco%qss/2
          ENDIF

          nc = 0                                    ! maybe IF (iintsp.EQ
          IF ( noco%l_noco .AND. (n_size.GT.1) ) THEN
             !--->       for EV-parallelization & noco ( see comments at top )
             lapw%nv_tot = lapw%nv(1) + lapw%nv(2)
205
             IF (noco%l_ss)  CALL juDFT_error("ev-parallelization & spin-spiral !",calledby ="hsmt_sph")
206 207 208 209 210 211 212
          ELSE
             lapw%nv_tot = lapw%nv(iintsp)
          ENDIF

          !
          !$OMP  DO SCHEDULE(DYNAMIC,1)
          DO  kii =  n_rank, lapw%nv_tot-1, n_size
213
             ki = MOD(kii,lapw%nv(iintsp)) + 1
214 215 216 217 218 219 220 221 222 223 224 225 226
             nc = nc + 1
             !$          nc= 1+ (kii-n_rank)/n_size
             iii = nc*(nc-1)/2 * n_size - (nc-1)*(n_size - n_rank - 1)
             !            ii = nc*(nc+1)/2 * n_size -  nc   *(n_size - n_rank - 1)

             IF (noco%l_ss.OR.noco%l_constr.OR.l_socfirst) THEN
                kjmax = lapw%nv(jintsp)
             ELSE
                kjmax = ki
             ENDIF
             ski = (/lapw%k1(ki,iintsp),lapw%k2(ki,iintsp),lapw%k3(ki,iintsp)/) + qssbti
             !--->       legendre polynomials
             DO kj = 1,kjmax
227
                plegend(kj,1) = DOT_PRODUCT(gk(kj,:,jintsp),gk(ki,:,iintsp))
228
                IF (l_socfirst) THEN
229
                    cross_k(kj,1)=gk(ki,2,jintsp)*gk(kj,3,iintsp)- gk(ki,3,jintsp)*gk(kj,2,iintsp)
230 231 232 233
                   cross_k(kj,2)=gk(ki,3,jintsp)*gk(kj,1,iintsp)- gk(ki,1,jintsp)*gk(kj,3,iintsp)
                   cross_k(kj,3)=gk(ki,1,jintsp)*gk(kj,2,iintsp)- gk(ki,2,jintsp)*gk(kj,1,iintsp)
                ENDIF
             END DO
234
             DO l = 1,MAXVAL(atoms%lmax) - 1
235 236 237 238 239 240 241 242 243 244 245 246 247
                plegend(:,l+1) = fleg1(l)*plegend(:,1)*plegend(:,l) - fleg2(l)*plegend(:,l-1)
                IF (l_socfirst) THEN
                   dplegend(:,l+1)=REAL(l+1)*plegend(:,l)+ plegend(:,1)*dplegend(:,l)
                ENDIF
             END DO
             !--->       loop over equivalent atoms
             n1 = 0
             DO n = 1,atoms%ntype

                IF (noco%l_noco) THEN
                   !--->          pk non-collinear
                   !--->             set up the spinors of this atom within global
                   !--->             spin-coordinateframe
248
                   CALL hsmt_spinor(isp,n, noco, input,chi, chi11, chi21, chi22,l_socfirst,&
249 250 251 252 253 254 255 256 257 258
                        isigma,isigma_x,isigma_y,isigma_z,chi11so,chi21so,chi22so,angso,chj,cross_k)
                ENDIF
                !---> pk non-collinear
                n0 = n1 + 1
                n1 = n1 + atoms%neq(n)
                rph(:,1) = 0.0
                cph(:,1) = 0.0
                DO nn = n0,n1
                   tnn = tpi_const*atoms%taual(:,nn)
                   !--->             set up phase factors
259
                   !!$OMP SIMD PRIVATE(tmp1,tmp2,tmp3,tmp) 
260
                   DO kj = 1,kjmax
261 262 263 264 265 266 267
                      !rph(kj,1) = rph(kj,1) +&
                      !     COS(DOT_PRODUCT(ski-(/lapw%k1(kj,jintsp),lapw%k2(kj,jintsp),lapw%k3(kj,jintsp)/)+qssbtj,tnn))
                      tmp1 = (ski(1)-lapw%k1(kj,jintsp)+qssbtj(1)) * tnn(1)
                      tmp2 = (ski(2)-lapw%k2(kj,jintsp)+qssbtj(2)) * tnn(2)
                      tmp3 = (ski(3)-lapw%k3(kj,jintsp)+qssbtj(3)) * tnn(3)
                      tmp = cos(tmp1 + tmp2 + tmp3)
                      rph(kj,1) = rph(kj,1) + tmp
268
                   END DO
269 270 271
                   IF (.NOT.sym%invs) THEN
                      !--->                if the system does not posses inversion symmetry
                      !--->                the complex part of the exponential is needed.
272
                      !!$OMP SIMD PRIVATE(tmp1,tmp2,tmp3,tmp) 
273 274 275 276 277 278 279 280 281 282
                      DO kj = 1,kjmax
                      !   cph(kj,1) = cph(kj,1) +&
                      !        SIN(DOT_PRODUCT((/lapw%k1(kj,jintsp),lapw%k2(kj,jintsp),lapw%k3(kj,jintsp)/)+qssbtj-ski,tnn))
                      tmp1 = (lapw%k1(kj,jintsp)+qssbtj(1)-ski(1)) * tnn(1)
                      tmp2 = (lapw%k2(kj,jintsp)+qssbtj(2)-ski(2)) * tnn(2)
                      tmp3 = (lapw%k3(kj,jintsp)+qssbtj(3)-ski(3)) * tnn(3)
                      tmp = sin(tmp1 + tmp2 + tmp3)
                      cph(kj,1) = cph(kj,1) + tmp
                      END DO
                   ENDIF
283 284 285 286
                END DO

                !--->          update overlap and l-diagonal hamiltonian matrix
                DO  l = 0,atoms%lmax(n)
287
                   IF (noco%l_constr.OR.l_socfirst) THEN
288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306
                      fjkiln = fj(ki,l,n,isp)
                      gjkiln = gj(ki,l,n,isp)
                   ELSE
                      fjkiln = fj(ki,l,n,iintsp)
                      gjkiln = gj(ki,l,n,iintsp)
                   ENDIF
                   !
                   w1 = 0.5 * ( usdus%uds(l,n,isp)*usdus%dus(l,n,isp) + &
                        usdus%us(l,n,isp)*usdus%duds(l,n,isp) )
                   apw_lo1 = fl2p1(l) * 0.5 * atoms%rmt(n)**2 * ( gjkiln * w1 +&
                        fjkiln * usdus%us(l,n,isp) * usdus%dus(l,n,isp) )
                   apw_lo2 = fl2p1(l) * 0.5 * atoms%rmt(n)**2 * ( fjkiln * w1 +&
                        gjkiln * usdus%uds(l,n,isp) * usdus%duds(l,n,isp) )
                   !
                   ddnln =  usdus%ddn(l,n,isp)
                   elall = el(l,n,isp)

                   IF ( noco%l_noco .AND. (.NOT. noco%l_ss) ) THEN
                      !--->             pk non-collinear
307
                      IF (noco%l_constr.OR.l_socfirst) THEN
308 309 310 311
                         DO kj = 1,ki
                            fct  = plegend(kj,l)*fl2p1(l)*&
                                 ( fjkiln*fj(kj,l,n,isp) + gjkiln*gj(kj,l,n,isp)*ddnln )

312 313
                            bbwa(kj) = CMPLX(rph(kj,1),cph(kj,1))*fct
                            aawa(kj) = CMPLX(rph(kj,1),cph(kj,1)) * ( &
314 315
                                 fct*elall + plegend(kj,l)*fl2p1bt(l)*&
                                 ( fjkiln*gj(kj,l,n,isp) + gjkiln*fj(kj,l,n,isp) ) )
316 317 318 319 320
                            IF (input%l_useapw) THEN
                               capw1 = CMPLX(rph(kj,1),cph(kj,1))*plegend(kj,l)&
                                    * ( apw_lo1 * fj(kj,l,n,isp) + apw_lo2 * gj(kj,l,n,isp) )
                               aawa(kj) = aawa(kj) + capw1
                            ENDIF
321 322 323 324 325 326
                         ENDDO
                      ELSE
                         DO kj = 1,ki
                            fct  = plegend(kj,l)*fl2p1(l)*&
                                 ( fjkiln*fj(kj,l,n,jintsp) + gjkiln*gj(kj,l,n,jintsp)*ddnln )

327 328
                            bbwa(kj) = CMPLX(rph(kj,1),cph(kj,1))*fct
                            aawa(kj) = CMPLX(rph(kj,1),cph(kj,1)) * (&
329 330
                                 fct*elall + plegend(kj,l)*fl2p1bt(l)*&
                                 ( fjkiln*gj(kj,l,n,jintsp) + gjkiln*fj(kj,l,n,jintsp) ) )
331 332 333 334 335
                            IF (input%l_useapw) THEN
                               capw1 = CMPLX(rph(kj,1),cph(kj,1))*plegend(kj,l)&
                                    * ( apw_lo1 * fj(kj,l,n,jintsp) + apw_lo2 * gj(kj,l,n,jintsp) )
                               aawa(kj) = aawa(kj) + capw1
                            ENDIF
336 337 338 339 340
                         ENDDO
                      ENDIF
                      !+||
                      IF ( kii+1.LE.lapw%nv(1) ) THEN
                         !--->                spin-up spin-up part
341 342
                         CALL CPP_BLAS_caxpy(ki,chi11,bbwa,1,hamOvlp%b_c(iii+1),1)
                         CALL CPP_BLAS_caxpy(ki,chi11,aawa,1,hamOvlp%a_c(iii+1),1)
343 344 345 346 347 348
                         !--->                spin-down spin-up part, upper triangle.
                         !--->                the help array is used to allow vectorization on
                         !--->                Cray PVP systems. it is mapped onto the hamiltonian
                         !--->                matrix after the setup is complete.
                         DO kj = 1,ki - 1
                            ii = iii + kj
349 350
                            aahlp(ii)=aahlp(ii)+CONJG(aawa(kj))*chi21
                            bbhlp(ii)=bbhlp(ii)+CONJG(bbwa(kj))*chi21
351 352 353 354 355 356 357 358 359
                         END DO
                      ENDIF
                      IF ( (kii+1.GT.lapw%nv(1)).OR.(n_size.EQ.1) ) THEN
                         IF (n_size.EQ.1) THEN
                            ii = (lapw%nv(1)+atoms%nlotot+ki-1)*(lapw%nv(1)+atoms%nlotot+ki)/2
                         ELSE
                            ii = iii
                         ENDIF
                         !--->                spin-down spin-up part, lower triangle
360 361
                         CALL CPP_BLAS_caxpy(ki,chi21,bbwa,1,hamOvlp%b_c(ii+1),1)
                         CALL CPP_BLAS_caxpy(ki,chi21,aawa,1,hamOvlp%a_c(ii+1),1)
362 363
                         !--->                spin-down spin-down part
                         ii = ii + lapw%nv(1)+atoms%nlotot
364 365
                         CALL CPP_BLAS_caxpy(ki,chi22,bbwa,1,hamOvlp%b_c(ii+1),1)
                         CALL CPP_BLAS_caxpy(ki,chi22,aawa,1,hamOvlp%a_c(ii+1),1)
366 367 368 369 370 371 372 373 374
                      ENDIF
                      !-||
                      !--->                when fj and gj are available for both local spins
                      !--->                (isp), add the contribution from the constraint
                      !--->                B-field.
                      IF (noco%l_constr .AND. (isp .EQ. 2)) THEN
                         DO nsp = 1,input%jspins
                            IF (nsp.EQ.1) THEN
                               DO kj = 1,lapw%nv(1)
375 376
                                  aawa(kj) = (-0.5)*CMPLX(noco%b_con(1,n),noco%b_con(2,n))*&
                                       CMPLX(rph(kj,1),cph(kj,1))*&
377 378 379 380 381 382 383 384
                                       plegend(kj,l)*fl2p1(l)*(&
                                       + fj(ki,l,n,2)*fj(kj,l,n,1)*uun21(l,n)&
                                       + fj(ki,l,n,2)*gj(kj,l,n,1)*udn21(l,n)&
                                       + gj(ki,l,n,2)*fj(kj,l,n,1)*dun21(l,n)&
                                       + gj(ki,l,n,2)*gj(kj,l,n,1)*ddn21(l,n))
                               ENDDO
                            ELSE
                               DO kj = 1,lapw%nv(1)
385 386
                                  aawa(kj) = (-0.5)*CMPLX(noco%b_con(1,n),-noco%b_con(2,n))*&
                                       CMPLX(rph(kj,1),cph(kj,1))*&
387 388 389 390 391 392 393 394 395 396
                                       plegend(kj,l)*fl2p1(l)*(&
                                       + fj(ki,l,n,1)*fj(kj,l,n,2)*uun21(l,n)&
                                       + fj(ki,l,n,1)*gj(kj,l,n,2)*dun21(l,n)&
                                       + gj(ki,l,n,1)*fj(kj,l,n,2)*udn21(l,n)&
                                       + gj(ki,l,n,1)*gj(kj,l,n,2)*ddn21(l,n))
                               ENDDO
                            ENDIF
                            !--->                  spin-up spin-up part
                            ii = (ki-1)*(ki)/2
                            DO kj = 1,ki
397
                               hamOvlp%a_c(ii+kj) = hamOvlp%a_c(ii+kj) + aawa(kj)*chj(nsp,1,1,n)
398 399 400 401 402
                            ENDDO
                            !--->                  spin-down spin-down part
                            ii = (lapw%nv(1)+atoms%nlotot+ki-1)*(lapw%nv(1)+atoms%nlotot+ki)/2 + &
                                 lapw%nv(1)+atoms%nlotot
                            DO kj = 1,ki
403
                               hamOvlp%a_c(ii+kj) = hamOvlp%a_c(ii+kj) + aawa(kj)*chj(nsp,2,2,n)
404 405 406 407
                            ENDDO
                            !--->                  spin-down spin-up part
                            ii = (lapw%nv(1)+atoms%nlotot+ki-1)*(lapw%nv(1)+atoms%nlotot+ki)/2
                            DO kj = 1,lapw%nv(1)
408
                               hamOvlp%a_c(ii+kj) = hamOvlp%a_c(ii+kj) + aawa(kj)*chj(nsp,2,1,n)
409 410 411 412 413 414 415 416 417 418 419
                            ENDDO
                         ENDDO
                      ENDIF

                      !                 Add spin-orbit coupling
                      IF (isp.EQ.2) THEN
                         IF ((l.GT.0).AND.l_socfirst) THEN

                            DO j1 = 1,input%jspins
                               DO j2 = 1,input%jspins
                                  DO kj = 1,lapw%nv(1)
420
                                     aawa(kj)=CMPLX(rph(kj,1),cph(kj,1))*(&
421 422 423 424 425 426 427 428 429 430 431
                                          + fj(ki,l,n,j1)*fj(kj,l,n,j2)*rsoc%rsopp(n,l,j1,j2)&
                                          + fj(ki,l,n,j1)*gj(kj,l,n,j2)*rsoc%rsopdp(n,l,j1,j2)&
                                          + gj(ki,l,n,j1)*fj(kj,l,n,j2)*rsoc%rsoppd(n,l,j1,j2)&
                                          + gj(ki,l,n,j1)*gj(kj,l,n,j2)*rsoc%rsopdpd(n,l,j1,j2))&
                                          *dplegend(kj,l)*fl2p1(l)*angso(kj,j1,j2)
                                  ENDDO
                                  IF (n_size.EQ.1) THEN

                                     !--->                  spin-up spin-up part
                                     ii = (ki-1)*(ki)/2
                                     DO kj = 1,ki
432
                                        hamOvlp%a_c(ii+kj) = hamOvlp%a_c(ii+kj) + aawa(kj)*chi11so(j1,j2)
433 434 435 436 437
                                     ENDDO
                                     !--->                  spin-down spin-down part
                                     ii = (lapw%nv(1)+atoms%nlotot+ki-1)*(lapw%nv(1)+atoms%nlotot+ki)/2 +&
                                          lapw%nv(1)+atoms%nlotot
                                     DO kj = 1,ki
438
                                        hamOvlp%a_c(ii+kj) = hamOvlp%a_c(ii+kj) + aawa(kj)*chi22so(j1,j2)
439 440 441 442
                                     ENDDO
                                     !--->                  spin-down spin-up part
                                     ii = (lapw%nv(1)+atoms%nlotot+ki-1)*(lapw%nv(1)+atoms%nlotot+ki)/2
                                     DO kj = 1,lapw%nv(1)
443
                                        hamOvlp%a_c(ii+kj) = hamOvlp%a_c(ii+kj) + aawa(kj)*chi21so(j1,j2)
444 445 446 447 448 449
                                     ENDDO

                                  ELSE  ! eigenvalue parallelization

                                     IF ( kii+1.LE.lapw%nv(1) ) THEN
                                        !--->                    spin-up spin-up part
450
                                        CALL CPP_BLAS_caxpy(ki,chi11so(j1,j2),aawa,1, hamOvlp%a_c(iii+1),1)
451 452 453 454

                                        !--->                    spin-down spin-up part, upper triangle.
                                        DO kj = 1,ki - 1
                                           ii = iii + kj
455
                                           aahlp(ii) = aahlp(ii) + CONJG(aawa(kj))*chi21so(j2,j1)
456 457 458 459 460
                                        END DO
                                     ENDIF
                                     IF  (kii+1.GT.lapw%nv(1)) THEN
                                        ii = iii
                                        !--->                    spin-down spin-up part, lower triangle
461
                                        CALL CPP_BLAS_caxpy(ki,chi21so(j1,j2),aawa,1, hamOvlp%a_c(ii+1),1)
462 463
                                        !--->                    spin-down spin-down part
                                        ii = ii + lapw%nv(1)+atoms%nlotot
464
                                        CALL CPP_BLAS_caxpy(ki,chi22so(j1,j2),aawa,1, hamOvlp%a_c(ii+1),1)
465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483
                                     ENDIF

                                  ENDIF ! eigenvalue par.

                               ENDDO ! j2
                            ENDDO    ! j1
                         ENDIF       ! ( l > 0 ) & socfirst
                      ENDIF          ! (isp = 2)
                      !               End spin-orbit
                   ELSEIF ( noco%l_noco .AND. noco%l_ss ) THEN
                      IF ( iintsp.EQ.2 .AND. jintsp.EQ.1 ) THEN
                         kjmax = lapw%nv(1)
                      ELSE
                         kjmax = ki
                      ENDIF
                      DO kj = 1,kjmax
                         fct  = plegend(kj,l)*fl2p1(l)* ( fjkiln*fj(kj,l,n,jintsp) +&
                              gjkiln*gj(kj,l,n,jintsp)*ddnln )

484 485
                         bbwa(kj) = CMPLX(rph(kj,1),cph(kj,1))*fct
                         aawa(kj) = CMPLX(rph(kj,1),cph(kj,1)) * ( &
486 487 488 489 490 491 492
                              fct*elall + plegend(kj,l)*fl2p1bt(l)*&
                              ( fjkiln*gj(kj,l,n,jintsp) + gjkiln*fj(kj,l,n,jintsp) ) )
                      ENDDO
                      IF ( iintsp.EQ.1 .AND. jintsp.EQ.1 ) THEN
                         !--->                   spin-up spin-up part
                         ii = (ki-1)*(ki)/2
                         DO kj = 1,ki
493 494
                            hamOvlp%b_c(ii+kj) = hamOvlp%b_c(ii+kj) + bbwa(kj)*chi11
                            hamOvlp%a_c(ii+kj) = hamOvlp%a_c(ii+kj) + aawa(kj)*chi11
495 496 497 498 499 500
                         ENDDO
                      ELSEIF ( iintsp.EQ.2 .AND. jintsp.EQ.2 ) THEN
                         !--->                   spin-down spin-down part
                         ii = (lapw%nv(1)+atoms%nlotot+ki-1)*(lapw%nv(1)+atoms%nlotot+ki)/2 +&
                              lapw%nv(1)+atoms%nlotot
                         DO kj = 1,ki
501 502
                            hamOvlp%b_c(ii+kj) = hamOvlp%b_c(ii+kj) + bbwa(kj)*chi22
                            hamOvlp%a_c(ii+kj) = hamOvlp%a_c(ii+kj) + aawa(kj)*chi22
503 504 505 506 507
                         ENDDO
                      ELSE
                         !--->                   spin-down spin-up part
                         ii = (lapw%nv(1)+atoms%nlotot+ki-1)*(lapw%nv(1)+atoms%nlotot+ki)/2
                         DO kj = 1,lapw%nv(1)
508 509
                            hamOvlp%b_c(ii+kj) = hamOvlp%b_c(ii+kj) + bbwa(kj)*chi21
                            hamOvlp%a_c(ii+kj) = hamOvlp%a_c(ii+kj) + aawa(kj)*chi21
510 511 512 513
                         ENDDO
                      ENDIF
                      !--->             pk non-collinear
                   ELSE
514 515 516 517
                      IF (l_real) THEN
                         DO kj = 1,ki
                            fct  = plegend(kj,l)*fl2p1(l)*&
                                 ( fjkiln*fj(kj,l,n,jintsp) + gjkiln*gj(kj,l,n,jintsp)*ddnln )
518

519
                            ij = iii + kj
520 521
                            hamOvlp%b_r(ij) = hamOvlp%b_r(ij) + rph(kj,1) * fct
                            hamOvlp%a_r(ij) = hamOvlp%a_r(ij) + rph(kj,1) * ( fct * elall + plegend(kj,l) * fl2p1bt(l) *&
522 523 524 525 526
                                 ( fjkiln*gj(kj,l,n,jintsp) + gjkiln*fj(kj,l,n,jintsp) ) )
                            !+APW
                            IF (input%l_useapw) THEN
                               apw1 = rph(kj,1) * plegend(kj,l)  * &
                                    ( apw_lo1 * fj(kj,l,n,iintsp) + apw_lo2 * gj(kj,l,n,iintsp) )
527
                               hamOvlp%a_r(ij) = hamOvlp%a_r(ij) + apw1
528 529 530 531 532 533 534 535 536
                            ENDIF
                            !-APW
                         ENDDO
                      ELSE
                         DO kj = 1,ki
                            fct  = plegend(kj,l)*fl2p1(l)*&
                                 ( fjkiln*fj(kj,l,n,jintsp) + gjkiln*gj(kj,l,n,jintsp)*ddnln )

                            ij = iii + kj
537 538
                            hamOvlp%b_c(ij) = hamOvlp%b_c(ij) + CMPLX(rph(kj,1),cph(kj,1))*fct
                            hamOvlp%a_c(ij) = hamOvlp%a_c(ij) + CMPLX(rph(kj,1),cph(kj,1)) * (fct*elall + plegend(kj,l)*fl2p1bt(l) *&
539 540 541 542 543
                                 ( fjkiln*gj(kj,l,n,jintsp) + gjkiln*fj(kj,l,n,jintsp) ) )
                            IF (input%l_useapw) THEN

                               capw1 = CMPLX(rph(kj,1),cph(kj,1))*plegend(kj,l)&
                                    * ( apw_lo1 * fj(kj,l,n,iintsp) + apw_lo2 * gj(kj,l,n,iintsp) )
544
                               hamOvlp%a_c(ij) = hamOvlp%a_c(ij) + capw1
545 546 547
                            ENDIF
                         END DO
                      ENDIF
548 549 550
                   ENDIF

                   !--->          end loop over l
551
                ENDDO
552 553

                !--->       end loop over atom types (ntype)
554
             ENDDO
555 556 557

             !--->    end loop over ki

558
          ENDDO
559 560 561 562
          !$OMP END DO
          !---> end loops over interstitial spins
       ENDDO ! jintsp = 1,iintsp
    ENDDO   ! iintsp = 1,nintsp
563
    DEALLOCATE(plegend)
564
    IF (l_socfirst) DEALLOCATE(dplegend,cross_k)
565
    DEALLOCATE(rph,cph)
566 567 568 569 570 571 572 573 574 575 576
    !$OMP END PARALLEL


    !---> pk non-collinear
    IF ( noco%l_noco .AND. (.NOT. noco%l_ss) ) THEN
       !--->    map the hamiltonian help array onto the hamitonian matrix
       IF (n_size.EQ.1) THEN
          DO ki = 1,lapw%nv(1)
             ii = (ki-1)*(ki)/2
             DO kj = 1,ki-1
                ij = (lapw%nv(1)+atoms%nlotot+kj-1)*(lapw%nv(1)+atoms%nlotot+kj)/2 + ki
577 578
                hamOvlp%a_c(ij) = hamOvlp%a_c(ij) + aahlp(ii+kj)
                hamOvlp%b_c(ij) = hamOvlp%b_c(ij) + bbhlp(ii+kj)
579 580 581 582
             ENDDO
          ENDDO

       ELSE
583
#ifdef CPP_MPI
584 585
          CALL mingeselle(SUB_COMM,n_size,n_rank,lapw%nv, aahlp, .FALSE.,aa_rDummy,hamOvlp%a_c)
          CALL mingeselle(SUB_COMM,n_size,n_rank,lapw%nv, bbhlp, .FALSE.,aa_rDummy,hamOvlp%b_c)
586
#endif
587 588 589 590 591 592 593 594
       ENDIF
    ENDIF



    RETURN
  END SUBROUTINE hsmt_sph
END MODULE m_hsmt_sph