hsmt_sph.F90 28.3 KB
Newer Older
1 2 3 4 5 6
!--------------------------------------------------------------------------------
! Copyright (c) 2016 Peter Grünberg Institut, Forschungszentrum Jülich, Germany
! This file is part of FLEUR and available as free software under the conditions
! of the MIT license as expressed in the LICENSE file in more detail.
!--------------------------------------------------------------------------------

7
MODULE m_hsmt_sph
8 9
  USE m_juDFT
  IMPLICIT NONE
10
CONTAINS
11
  SUBROUTINE hsmt_sph(sym,DIMENSION,atoms,SUB_COMM,n_size,n_rank,sphhar,isp,ab_dim,&
12
       input,hlpmsize,noco,l_socfirst,cell,nintsp, lapw,el,usdus,vr,gk,rsoc,isigma,fj,gj,l_real,hamOvlp)
13 14 15 16 17 18 19 20 21 22 23 24 25 26

#include"cpp_double.h"
    USE m_constants, ONLY : fpi_const,tpi_const
    USE m_sphbes
    USE m_dsphbs
    USE m_ylm
    USE m_hsmt_socinit,ONLY:t_rsoc
    USE m_hsmt_spinor
    USE m_radovlp
#ifdef CPP_MPI
    USE m_mingeselle
#endif
    USE m_types
    IMPLICIT NONE
27 28 29 30 31 32 33 34 35 36
    TYPE(t_sym),INTENT(IN)        :: sym
    TYPE(t_dimension),INTENT(IN)  :: DIMENSION
    TYPE(t_input),INTENT(IN)      :: input
    TYPE(t_noco),INTENT(IN)       :: noco
    TYPE(t_cell),INTENT(IN)       :: cell
    TYPE(t_sphhar),INTENT(IN)     :: sphhar
    TYPE(t_atoms),INTENT(IN)      :: atoms
    TYPE(t_lapw),INTENT(INOUT)    :: lapw!lpaw%nv_tot is updated
    TYPE(t_usdus),INTENT(INOUT)   :: usdus
    TYPE(t_hamOvlp),INTENT(INOUT) :: hamOvlp
37 38 39 40 41 42 43 44
    !     ..
    !     .. Scalar Arguments ..
    INTEGER, INTENT (IN) :: isp,ab_dim
    INTEGER, INTENT (IN) :: SUB_COMM,n_size,n_rank 
    INTEGER, INTENT (IN) :: hlpmsize,nintsp
    LOGICAL, INTENT (IN) :: l_socfirst
    !     ..
    !     .. Array Arguments ..
Daniel Wortmann's avatar
Daniel Wortmann committed
45 46
    REAL,    INTENT (IN) :: el(0:atoms%lmaxd,atoms%ntype,DIMENSION%jspd)
    REAL,    INTENT (IN) :: vr(atoms%jmtd,0:sphhar%nlhd,atoms%ntype,DIMENSION%jspd)
47 48 49 50 51
    REAL,INTENT(IN)      :: gk(:,:,:)
    COMPLEX,INTENT(IN)   :: isigma(2,2,3)
    TYPE(t_rsoc),INTENT(IN) :: rsoc


52 53
    LOGICAL, INTENT(IN) :: l_real

54 55 56 57
    REAL,INTENT(OUT) :: fj(:,0:,:,:),gj(:,0:,:,:)
    !     ..
    !     .. Local Scalars ..
    REAL con1,ff,gg,gs,ws,tnn(3), elall,fct,fjkiln,gjkiln,ddnln,ski(3)
58
    REAL apw_lo1,apw_lo2,apw1,w1,aa_rDummy(1)
59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75

    COMPLEX chi11,chi21,chi22,capw1
    INTEGER ii,iii,ij,k,ki,kj,l,lo,n,n0,n1,nn,kjmax, nsp,iintsp,jintsp
    INTEGER nc ,kii

    !     ..
    !     .. Local Arrays ..
    REAL fb(0:atoms%lmaxd),fleg1(0:atoms%lmaxd),fleg2(0:atoms%lmaxd),fl2p1(0:atoms%lmaxd)     
    REAL fl2p1bt(0:atoms%lmaxd),gb(0:atoms%lmaxd)
    REAL qssbti(3),qssbtj(3)


    REAL, ALLOCATABLE :: plegend(:,:)
    REAL, ALLOCATABLE :: rph(:,:),cph(:,:)
    REAL, ALLOCATABLE :: uun21(:,:),udn21(:,:),dun21(:,:),ddn21(:,:)


Daniel Wortmann's avatar
Daniel Wortmann committed
76
    COMPLEX chi(2,2),chj(2,2,2,atoms%ntype),aawa(DIMENSION%nvd),bbwa(DIMENSION%nvd)
77 78 79 80 81 82 83 84 85
    COMPLEX, ALLOCATABLE :: aahlp(:),bbhlp(:)
    LOGICAL apw(0:atoms%lmaxd)


    ! for Spin-orbit...
    REAL, ALLOCATABLE :: dplegend(:,:)
    REAL, ALLOCATABLE :: cross_k(:,:)
    INTEGER :: j1,j2
    COMPLEX :: isigma_x(2,2),isigma_y(2,2),isigma_z(2,2)
86
    COMPLEX :: chi11so(2,2),chi21so(2,2),chi22so(2,2),angso(DIMENSION%nvd,2,2)
87

88 89
    REAL :: tmp,tmp1,tmp2,tmp3

90 91 92

    IF ( noco%l_noco .AND. (.NOT. noco%l_ss) ) ALLOCATE ( aahlp(hlpmsize),bbhlp(hlpmsize) )
    !     ..
93
    con1 = fpi_const/SQRT(cell%omtil)
94
    DO l = 0,atoms%lmaxd
95 96 97
       fleg1(l) = REAL(l+l+1)/REAL(l+1)
       fleg2(l) = REAL(l)/REAL(l+1)
       fl2p1(l) = REAL(l+l+1)/fpi_const
98 99 100 101 102
       fl2p1bt(l) = fl2p1(l)*0.5
    END DO
    !---> calculate the overlap of the radial basis functions with different
    !---> spin directions.
    IF (noco%l_constr) THEN
Daniel Wortmann's avatar
Daniel Wortmann committed
103 104
       ALLOCATE(uun21(0:atoms%lmaxd,atoms%ntype),udn21(0:atoms%lmaxd,atoms%ntype),&
            dun21(0:atoms%lmaxd,atoms%ntype),ddn21(0:atoms%lmaxd,atoms%ntype) )
105 106 107 108 109 110 111 112 113 114
       CALL rad_ovlp(atoms,usdus,input,vr,el(0:,:,:), uun21,udn21,dun21,ddn21)
    ENDIF
    !---> loop over each atom type

    DO iintsp = 1,nintsp
       !$OMP parallel do DEFAULT(SHARED)&
       !$OMP PRIVATE(n,l,apw,lo,k,gs,fb,gb,ws,ff,gg)
       DO n = 1,atoms%ntype

          DO l = 0,atoms%lmax(n)
115
             apw(l) = .FALSE.
116
             DO lo = 1,atoms%nlo(n)
117
                IF (atoms%l_dulo(lo,n)) apw(l) = .TRUE.
118
             ENDDO
119
             IF ((input%l_useapw).AND.(atoms%lapw_l(n).GE.l)) apw(l) = .FALSE.
120

121 122
          ENDDO
          DO lo = 1,atoms%nlo(n)
123
             IF (atoms%l_dulo(lo,n)) apw(atoms%llo(lo,n)) = .TRUE.
124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139
          ENDDO

          DO k = 1,lapw%nv(iintsp)
             gs = lapw%rk(k,iintsp)*atoms%rmt(n)
             CALL sphbes(atoms%lmax(n),gs, fb)
             CALL dsphbs(atoms%lmax(n),gs,fb, gb)
             DO l = 0,atoms%lmax(n)
                !---> set up wronskians for the matching conditions for each ntype
                ws = con1/(usdus%uds(l,n,isp)*usdus%dus(l,n,isp)&
                     - usdus%us(l,n,isp)*usdus%duds(l,n,isp))
                ff = fb(l)
                gg = lapw%rk(k,iintsp)*gb(l)
                IF ( apw(l) ) THEN
                   fj(k,l,n,iintsp) = 1.0*con1 * ff / usdus%us(l,n,isp)
                   gj(k,l,n,iintsp) = 0.0d0
                ELSE
140
                   IF (noco%l_constr.OR.l_socfirst) THEN
141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162
                      !--->                 in a constrained calculation fj and gj are needed
                      !--->                 both local spin directions (isp) at the same time
                      fj(k,l,n,isp) = ws * ( usdus%uds(l,n,isp)*gg - usdus%duds(l,n,isp)*ff )
                      gj(k,l,n,isp) = ws * ( usdus%dus(l,n,isp)*ff - usdus%us(l,n,isp)*gg )
                   ELSE
                      !--->                 in a spin-spiral calculation fj and gj are needed
                      !--->                 both interstitial spin directions at the same time
                      !--->                 In all other cases iintsp runs from 1 to 1.
                      fj(k,l,n,iintsp) = ws * ( usdus%uds(l,n,isp)*gg - usdus%duds(l,n,isp)*ff )
                      gj(k,l,n,iintsp) = ws * ( usdus%dus(l,n,isp)*ff - usdus%us(l,n,isp)*gg )
                   ENDIF
                ENDIF
             ENDDO
          ENDDO ! k = 1, lapw%nv
       ENDDO    ! n = 1,atoms%ntype
       !$OMP end parallel do

    ENDDO       ! iintsp = 1,nintsp
    !
    IF ( noco%l_noco .AND. (.NOT. noco%l_ss) ) THEN
       !---> pk non-collinear
       !--->    initialize hamiltonian help array
163 164
       aahlp=CMPLX(0.0,0.0)
       bbhlp=CMPLX(0.0,0.0)
165 166 167 168 169 170 171 172 173 174 175
    ENDIF

    !$OMP PARALLEL  DEFAULT(shared)&
    !$OMP PRIVATE(kii,ki,nc,iii,kjmax,ski,kj,plegend,l,n1,n)&
    !$OMP PRIVATE(n0,rph,cph,nn,tnn,fjkiln,gjkiln)&
    !$OMP PRIVATE(w1,apw_lo1,apw_lo2,ddnln,elall,fct,ij,apw1)&
    !$OMP PRIVATE(cross_k,dplegend,chi,chi11,chi21,chi22,nsp,chj)&
    !$OMP PRIVATE(isigma_x,isigma_y,isigma_z,j1,j2,chi11so,chi21so,chi22so)&
    !$OMP PRIVATE(aawa,bbwa,capw1,ii) IF (.not.l_socfirst)
    !$     IF (l_socfirst) WRITE(*,*) "WARNING: first variation SOC does not work with OPENMP in hsmt_sph"
    !$     IF (l_socfirst) WRITE(*,*) "         switching off openmp parallelization"
176 177 178
    ALLOCATE(rph(DIMENSION%nvd,ab_dim))
    ALLOCATE(cph(DIMENSION%nvd,ab_dim))
    ALLOCATE(plegend(DIMENSION%nvd,0:atoms%lmaxd))
179
    IF (l_socfirst)THEN
180
       ALLOCATE ( dplegend(DIMENSION%nvd,0:atoms%lmaxd),cross_k(DIMENSION%nvd,3))
181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203
       dplegend(:,0)=0.e0
       dplegend(:,1)=1.e0
    ENDIF

    plegend=0.0
    plegend(:,0)=1.0
    DO iintsp = 1,nintsp
       IF (iintsp.EQ.1) THEN
          qssbti = - noco%qss/2
       ELSE
          qssbti = + noco%qss/2
       ENDIF
       DO jintsp = 1,iintsp
          IF (jintsp.EQ.1) THEN
             qssbtj = - noco%qss/2
          ELSE
             qssbtj = + noco%qss/2
          ENDIF

          nc = 0                                    ! maybe IF (iintsp.EQ
          IF ( noco%l_noco .AND. (n_size.GT.1) ) THEN
             !--->       for EV-parallelization & noco ( see comments at top )
             lapw%nv_tot = lapw%nv(1) + lapw%nv(2)
204
             IF (noco%l_ss)  CALL juDFT_error("ev-parallelization & spin-spiral !",calledby ="hsmt_sph")
205 206 207 208 209 210 211
          ELSE
             lapw%nv_tot = lapw%nv(iintsp)
          ENDIF

          !
          !$OMP  DO SCHEDULE(DYNAMIC,1)
          DO  kii =  n_rank, lapw%nv_tot-1, n_size
212
             ki = MOD(kii,lapw%nv(iintsp)) + 1
213 214 215 216 217 218 219 220 221 222 223 224 225
             nc = nc + 1
             !$          nc= 1+ (kii-n_rank)/n_size
             iii = nc*(nc-1)/2 * n_size - (nc-1)*(n_size - n_rank - 1)
             !            ii = nc*(nc+1)/2 * n_size -  nc   *(n_size - n_rank - 1)

             IF (noco%l_ss.OR.noco%l_constr.OR.l_socfirst) THEN
                kjmax = lapw%nv(jintsp)
             ELSE
                kjmax = ki
             ENDIF
             ski = (/lapw%k1(ki,iintsp),lapw%k2(ki,iintsp),lapw%k3(ki,iintsp)/) + qssbti
             !--->       legendre polynomials
             DO kj = 1,kjmax
226
                plegend(kj,1) = DOT_PRODUCT(gk(kj,:,jintsp),gk(ki,:,iintsp))
227
                IF (l_socfirst) THEN
228
                    cross_k(kj,1)=gk(ki,2,jintsp)*gk(kj,3,iintsp)- gk(ki,3,jintsp)*gk(kj,2,iintsp)
229 230 231 232
                   cross_k(kj,2)=gk(ki,3,jintsp)*gk(kj,1,iintsp)- gk(ki,1,jintsp)*gk(kj,3,iintsp)
                   cross_k(kj,3)=gk(ki,1,jintsp)*gk(kj,2,iintsp)- gk(ki,2,jintsp)*gk(kj,1,iintsp)
                ENDIF
             END DO
233
             DO l = 1,MAXVAL(atoms%lmax) - 1
234 235 236 237 238 239 240 241 242 243 244 245 246
                plegend(:,l+1) = fleg1(l)*plegend(:,1)*plegend(:,l) - fleg2(l)*plegend(:,l-1)
                IF (l_socfirst) THEN
                   dplegend(:,l+1)=REAL(l+1)*plegend(:,l)+ plegend(:,1)*dplegend(:,l)
                ENDIF
             END DO
             !--->       loop over equivalent atoms
             n1 = 0
             DO n = 1,atoms%ntype

                IF (noco%l_noco) THEN
                   !--->          pk non-collinear
                   !--->             set up the spinors of this atom within global
                   !--->             spin-coordinateframe
247
                   CALL hsmt_spinor(isp,n, noco, input,chi, chi11, chi21, chi22,l_socfirst,&
248 249 250 251 252 253 254 255 256 257
                        isigma,isigma_x,isigma_y,isigma_z,chi11so,chi21so,chi22so,angso,chj,cross_k)
                ENDIF
                !---> pk non-collinear
                n0 = n1 + 1
                n1 = n1 + atoms%neq(n)
                rph(:,1) = 0.0
                cph(:,1) = 0.0
                DO nn = n0,n1
                   tnn = tpi_const*atoms%taual(:,nn)
                   !--->             set up phase factors
258
                   !$OMP SIMD PRIVATE(tmp1,tmp2,tmp3,tmp) 
259
                   DO kj = 1,kjmax
260 261 262 263 264 265 266
                      !rph(kj,1) = rph(kj,1) +&
                      !     COS(DOT_PRODUCT(ski-(/lapw%k1(kj,jintsp),lapw%k2(kj,jintsp),lapw%k3(kj,jintsp)/)+qssbtj,tnn))
                      tmp1 = (ski(1)-lapw%k1(kj,jintsp)+qssbtj(1)) * tnn(1)
                      tmp2 = (ski(2)-lapw%k2(kj,jintsp)+qssbtj(2)) * tnn(2)
                      tmp3 = (ski(3)-lapw%k3(kj,jintsp)+qssbtj(3)) * tnn(3)
                      tmp = cos(tmp1 + tmp2 + tmp3)
                      rph(kj,1) = rph(kj,1) + tmp
267
                   END DO
268 269 270 271 272 273 274 275 276 277 278 279 280 281
                   IF (.NOT.sym%invs) THEN
                      !--->                if the system does not posses inversion symmetry
                      !--->                the complex part of the exponential is needed.
                      !$OMP SIMD PRIVATE(tmp1,tmp2,tmp3,tmp) 
                      DO kj = 1,kjmax
                      !   cph(kj,1) = cph(kj,1) +&
                      !        SIN(DOT_PRODUCT((/lapw%k1(kj,jintsp),lapw%k2(kj,jintsp),lapw%k3(kj,jintsp)/)+qssbtj-ski,tnn))
                      tmp1 = (lapw%k1(kj,jintsp)+qssbtj(1)-ski(1)) * tnn(1)
                      tmp2 = (lapw%k2(kj,jintsp)+qssbtj(2)-ski(2)) * tnn(2)
                      tmp3 = (lapw%k3(kj,jintsp)+qssbtj(3)-ski(3)) * tnn(3)
                      tmp = sin(tmp1 + tmp2 + tmp3)
                      cph(kj,1) = cph(kj,1) + tmp
                      END DO
                   ENDIF
282 283 284 285
                END DO

                !--->          update overlap and l-diagonal hamiltonian matrix
                DO  l = 0,atoms%lmax(n)
286
                   IF (noco%l_constr.OR.l_socfirst) THEN
287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305
                      fjkiln = fj(ki,l,n,isp)
                      gjkiln = gj(ki,l,n,isp)
                   ELSE
                      fjkiln = fj(ki,l,n,iintsp)
                      gjkiln = gj(ki,l,n,iintsp)
                   ENDIF
                   !
                   w1 = 0.5 * ( usdus%uds(l,n,isp)*usdus%dus(l,n,isp) + &
                        usdus%us(l,n,isp)*usdus%duds(l,n,isp) )
                   apw_lo1 = fl2p1(l) * 0.5 * atoms%rmt(n)**2 * ( gjkiln * w1 +&
                        fjkiln * usdus%us(l,n,isp) * usdus%dus(l,n,isp) )
                   apw_lo2 = fl2p1(l) * 0.5 * atoms%rmt(n)**2 * ( fjkiln * w1 +&
                        gjkiln * usdus%uds(l,n,isp) * usdus%duds(l,n,isp) )
                   !
                   ddnln =  usdus%ddn(l,n,isp)
                   elall = el(l,n,isp)

                   IF ( noco%l_noco .AND. (.NOT. noco%l_ss) ) THEN
                      !--->             pk non-collinear
306
                      IF (noco%l_constr.OR.l_socfirst) THEN
307 308 309 310
                         DO kj = 1,ki
                            fct  = plegend(kj,l)*fl2p1(l)*&
                                 ( fjkiln*fj(kj,l,n,isp) + gjkiln*gj(kj,l,n,isp)*ddnln )

311 312
                            bbwa(kj) = CMPLX(rph(kj,1),cph(kj,1))*fct
                            aawa(kj) = CMPLX(rph(kj,1),cph(kj,1)) * ( &
313 314
                                 fct*elall + plegend(kj,l)*fl2p1bt(l)*&
                                 ( fjkiln*gj(kj,l,n,isp) + gjkiln*fj(kj,l,n,isp) ) )
315 316 317 318 319
                            IF (input%l_useapw) THEN
                               capw1 = CMPLX(rph(kj,1),cph(kj,1))*plegend(kj,l)&
                                    * ( apw_lo1 * fj(kj,l,n,isp) + apw_lo2 * gj(kj,l,n,isp) )
                               aawa(kj) = aawa(kj) + capw1
                            ENDIF
320 321 322 323 324 325
                         ENDDO
                      ELSE
                         DO kj = 1,ki
                            fct  = plegend(kj,l)*fl2p1(l)*&
                                 ( fjkiln*fj(kj,l,n,jintsp) + gjkiln*gj(kj,l,n,jintsp)*ddnln )

326 327
                            bbwa(kj) = CMPLX(rph(kj,1),cph(kj,1))*fct
                            aawa(kj) = CMPLX(rph(kj,1),cph(kj,1)) * (&
328 329
                                 fct*elall + plegend(kj,l)*fl2p1bt(l)*&
                                 ( fjkiln*gj(kj,l,n,jintsp) + gjkiln*fj(kj,l,n,jintsp) ) )
330 331 332 333 334
                            IF (input%l_useapw) THEN
                               capw1 = CMPLX(rph(kj,1),cph(kj,1))*plegend(kj,l)&
                                    * ( apw_lo1 * fj(kj,l,n,jintsp) + apw_lo2 * gj(kj,l,n,jintsp) )
                               aawa(kj) = aawa(kj) + capw1
                            ENDIF
335 336 337 338 339
                         ENDDO
                      ENDIF
                      !+||
                      IF ( kii+1.LE.lapw%nv(1) ) THEN
                         !--->                spin-up spin-up part
340 341
                         CALL CPP_BLAS_caxpy(ki,chi11,bbwa,1,hamOvlp%b_c(iii+1),1)
                         CALL CPP_BLAS_caxpy(ki,chi11,aawa,1,hamOvlp%a_c(iii+1),1)
342 343 344 345 346 347
                         !--->                spin-down spin-up part, upper triangle.
                         !--->                the help array is used to allow vectorization on
                         !--->                Cray PVP systems. it is mapped onto the hamiltonian
                         !--->                matrix after the setup is complete.
                         DO kj = 1,ki - 1
                            ii = iii + kj
348 349
                            aahlp(ii)=aahlp(ii)+CONJG(aawa(kj))*chi21
                            bbhlp(ii)=bbhlp(ii)+CONJG(bbwa(kj))*chi21
350 351 352 353 354 355 356 357 358
                         END DO
                      ENDIF
                      IF ( (kii+1.GT.lapw%nv(1)).OR.(n_size.EQ.1) ) THEN
                         IF (n_size.EQ.1) THEN
                            ii = (lapw%nv(1)+atoms%nlotot+ki-1)*(lapw%nv(1)+atoms%nlotot+ki)/2
                         ELSE
                            ii = iii
                         ENDIF
                         !--->                spin-down spin-up part, lower triangle
359 360
                         CALL CPP_BLAS_caxpy(ki,chi21,bbwa,1,hamOvlp%b_c(ii+1),1)
                         CALL CPP_BLAS_caxpy(ki,chi21,aawa,1,hamOvlp%a_c(ii+1),1)
361 362
                         !--->                spin-down spin-down part
                         ii = ii + lapw%nv(1)+atoms%nlotot
363 364
                         CALL CPP_BLAS_caxpy(ki,chi22,bbwa,1,hamOvlp%b_c(ii+1),1)
                         CALL CPP_BLAS_caxpy(ki,chi22,aawa,1,hamOvlp%a_c(ii+1),1)
365 366 367 368 369 370 371 372 373
                      ENDIF
                      !-||
                      !--->                when fj and gj are available for both local spins
                      !--->                (isp), add the contribution from the constraint
                      !--->                B-field.
                      IF (noco%l_constr .AND. (isp .EQ. 2)) THEN
                         DO nsp = 1,input%jspins
                            IF (nsp.EQ.1) THEN
                               DO kj = 1,lapw%nv(1)
374 375
                                  aawa(kj) = (-0.5)*CMPLX(noco%b_con(1,n),noco%b_con(2,n))*&
                                       CMPLX(rph(kj,1),cph(kj,1))*&
376 377 378 379 380 381 382 383
                                       plegend(kj,l)*fl2p1(l)*(&
                                       + fj(ki,l,n,2)*fj(kj,l,n,1)*uun21(l,n)&
                                       + fj(ki,l,n,2)*gj(kj,l,n,1)*udn21(l,n)&
                                       + gj(ki,l,n,2)*fj(kj,l,n,1)*dun21(l,n)&
                                       + gj(ki,l,n,2)*gj(kj,l,n,1)*ddn21(l,n))
                               ENDDO
                            ELSE
                               DO kj = 1,lapw%nv(1)
384 385
                                  aawa(kj) = (-0.5)*CMPLX(noco%b_con(1,n),-noco%b_con(2,n))*&
                                       CMPLX(rph(kj,1),cph(kj,1))*&
386 387 388 389 390 391 392 393 394 395
                                       plegend(kj,l)*fl2p1(l)*(&
                                       + fj(ki,l,n,1)*fj(kj,l,n,2)*uun21(l,n)&
                                       + fj(ki,l,n,1)*gj(kj,l,n,2)*dun21(l,n)&
                                       + gj(ki,l,n,1)*fj(kj,l,n,2)*udn21(l,n)&
                                       + gj(ki,l,n,1)*gj(kj,l,n,2)*ddn21(l,n))
                               ENDDO
                            ENDIF
                            !--->                  spin-up spin-up part
                            ii = (ki-1)*(ki)/2
                            DO kj = 1,ki
396
                               hamOvlp%a_c(ii+kj) = hamOvlp%a_c(ii+kj) + aawa(kj)*chj(nsp,1,1,n)
397 398 399 400 401
                            ENDDO
                            !--->                  spin-down spin-down part
                            ii = (lapw%nv(1)+atoms%nlotot+ki-1)*(lapw%nv(1)+atoms%nlotot+ki)/2 + &
                                 lapw%nv(1)+atoms%nlotot
                            DO kj = 1,ki
402
                               hamOvlp%a_c(ii+kj) = hamOvlp%a_c(ii+kj) + aawa(kj)*chj(nsp,2,2,n)
403 404 405 406
                            ENDDO
                            !--->                  spin-down spin-up part
                            ii = (lapw%nv(1)+atoms%nlotot+ki-1)*(lapw%nv(1)+atoms%nlotot+ki)/2
                            DO kj = 1,lapw%nv(1)
407
                               hamOvlp%a_c(ii+kj) = hamOvlp%a_c(ii+kj) + aawa(kj)*chj(nsp,2,1,n)
408 409 410 411 412 413 414 415 416 417 418
                            ENDDO
                         ENDDO
                      ENDIF

                      !                 Add spin-orbit coupling
                      IF (isp.EQ.2) THEN
                         IF ((l.GT.0).AND.l_socfirst) THEN

                            DO j1 = 1,input%jspins
                               DO j2 = 1,input%jspins
                                  DO kj = 1,lapw%nv(1)
419
                                     aawa(kj)=CMPLX(rph(kj,1),cph(kj,1))*(&
420 421 422 423 424 425 426 427 428 429 430
                                          + fj(ki,l,n,j1)*fj(kj,l,n,j2)*rsoc%rsopp(n,l,j1,j2)&
                                          + fj(ki,l,n,j1)*gj(kj,l,n,j2)*rsoc%rsopdp(n,l,j1,j2)&
                                          + gj(ki,l,n,j1)*fj(kj,l,n,j2)*rsoc%rsoppd(n,l,j1,j2)&
                                          + gj(ki,l,n,j1)*gj(kj,l,n,j2)*rsoc%rsopdpd(n,l,j1,j2))&
                                          *dplegend(kj,l)*fl2p1(l)*angso(kj,j1,j2)
                                  ENDDO
                                  IF (n_size.EQ.1) THEN

                                     !--->                  spin-up spin-up part
                                     ii = (ki-1)*(ki)/2
                                     DO kj = 1,ki
431
                                        hamOvlp%a_c(ii+kj) = hamOvlp%a_c(ii+kj) + aawa(kj)*chi11so(j1,j2)
432 433 434 435 436
                                     ENDDO
                                     !--->                  spin-down spin-down part
                                     ii = (lapw%nv(1)+atoms%nlotot+ki-1)*(lapw%nv(1)+atoms%nlotot+ki)/2 +&
                                          lapw%nv(1)+atoms%nlotot
                                     DO kj = 1,ki
437
                                        hamOvlp%a_c(ii+kj) = hamOvlp%a_c(ii+kj) + aawa(kj)*chi22so(j1,j2)
438 439 440 441
                                     ENDDO
                                     !--->                  spin-down spin-up part
                                     ii = (lapw%nv(1)+atoms%nlotot+ki-1)*(lapw%nv(1)+atoms%nlotot+ki)/2
                                     DO kj = 1,lapw%nv(1)
442
                                        hamOvlp%a_c(ii+kj) = hamOvlp%a_c(ii+kj) + aawa(kj)*chi21so(j1,j2)
443 444 445 446 447 448
                                     ENDDO

                                  ELSE  ! eigenvalue parallelization

                                     IF ( kii+1.LE.lapw%nv(1) ) THEN
                                        !--->                    spin-up spin-up part
449
                                        CALL CPP_BLAS_caxpy(ki,chi11so(j1,j2),aawa,1, hamOvlp%a_c(iii+1),1)
450 451 452 453

                                        !--->                    spin-down spin-up part, upper triangle.
                                        DO kj = 1,ki - 1
                                           ii = iii + kj
454
                                           aahlp(ii) = aahlp(ii) + CONJG(aawa(kj))*chi21so(j2,j1)
455 456 457 458 459
                                        END DO
                                     ENDIF
                                     IF  (kii+1.GT.lapw%nv(1)) THEN
                                        ii = iii
                                        !--->                    spin-down spin-up part, lower triangle
460
                                        CALL CPP_BLAS_caxpy(ki,chi21so(j1,j2),aawa,1, hamOvlp%a_c(ii+1),1)
461 462
                                        !--->                    spin-down spin-down part
                                        ii = ii + lapw%nv(1)+atoms%nlotot
463
                                        CALL CPP_BLAS_caxpy(ki,chi22so(j1,j2),aawa,1, hamOvlp%a_c(ii+1),1)
464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482
                                     ENDIF

                                  ENDIF ! eigenvalue par.

                               ENDDO ! j2
                            ENDDO    ! j1
                         ENDIF       ! ( l > 0 ) & socfirst
                      ENDIF          ! (isp = 2)
                      !               End spin-orbit
                   ELSEIF ( noco%l_noco .AND. noco%l_ss ) THEN
                      IF ( iintsp.EQ.2 .AND. jintsp.EQ.1 ) THEN
                         kjmax = lapw%nv(1)
                      ELSE
                         kjmax = ki
                      ENDIF
                      DO kj = 1,kjmax
                         fct  = plegend(kj,l)*fl2p1(l)* ( fjkiln*fj(kj,l,n,jintsp) +&
                              gjkiln*gj(kj,l,n,jintsp)*ddnln )

483 484
                         bbwa(kj) = CMPLX(rph(kj,1),cph(kj,1))*fct
                         aawa(kj) = CMPLX(rph(kj,1),cph(kj,1)) * ( &
485 486 487 488 489 490 491
                              fct*elall + plegend(kj,l)*fl2p1bt(l)*&
                              ( fjkiln*gj(kj,l,n,jintsp) + gjkiln*fj(kj,l,n,jintsp) ) )
                      ENDDO
                      IF ( iintsp.EQ.1 .AND. jintsp.EQ.1 ) THEN
                         !--->                   spin-up spin-up part
                         ii = (ki-1)*(ki)/2
                         DO kj = 1,ki
492 493
                            hamOvlp%b_c(ii+kj) = hamOvlp%b_c(ii+kj) + bbwa(kj)*chi11
                            hamOvlp%a_c(ii+kj) = hamOvlp%a_c(ii+kj) + aawa(kj)*chi11
494 495 496 497 498 499
                         ENDDO
                      ELSEIF ( iintsp.EQ.2 .AND. jintsp.EQ.2 ) THEN
                         !--->                   spin-down spin-down part
                         ii = (lapw%nv(1)+atoms%nlotot+ki-1)*(lapw%nv(1)+atoms%nlotot+ki)/2 +&
                              lapw%nv(1)+atoms%nlotot
                         DO kj = 1,ki
500 501
                            hamOvlp%b_c(ii+kj) = hamOvlp%b_c(ii+kj) + bbwa(kj)*chi22
                            hamOvlp%a_c(ii+kj) = hamOvlp%a_c(ii+kj) + aawa(kj)*chi22
502 503 504 505 506
                         ENDDO
                      ELSE
                         !--->                   spin-down spin-up part
                         ii = (lapw%nv(1)+atoms%nlotot+ki-1)*(lapw%nv(1)+atoms%nlotot+ki)/2
                         DO kj = 1,lapw%nv(1)
507 508
                            hamOvlp%b_c(ii+kj) = hamOvlp%b_c(ii+kj) + bbwa(kj)*chi21
                            hamOvlp%a_c(ii+kj) = hamOvlp%a_c(ii+kj) + aawa(kj)*chi21
509 510 511 512
                         ENDDO
                      ENDIF
                      !--->             pk non-collinear
                   ELSE
513 514 515 516
                      IF (l_real) THEN
                         DO kj = 1,ki
                            fct  = plegend(kj,l)*fl2p1(l)*&
                                 ( fjkiln*fj(kj,l,n,jintsp) + gjkiln*gj(kj,l,n,jintsp)*ddnln )
517

518
                            ij = iii + kj
519 520
                            hamOvlp%b_r(ij) = hamOvlp%b_r(ij) + rph(kj,1) * fct
                            hamOvlp%a_r(ij) = hamOvlp%a_r(ij) + rph(kj,1) * ( fct * elall + plegend(kj,l) * fl2p1bt(l) *&
521 522 523 524 525
                                 ( fjkiln*gj(kj,l,n,jintsp) + gjkiln*fj(kj,l,n,jintsp) ) )
                            !+APW
                            IF (input%l_useapw) THEN
                               apw1 = rph(kj,1) * plegend(kj,l)  * &
                                    ( apw_lo1 * fj(kj,l,n,iintsp) + apw_lo2 * gj(kj,l,n,iintsp) )
526
                               hamOvlp%a_r(ij) = hamOvlp%a_r(ij) + apw1
527 528 529 530 531 532 533 534 535
                            ENDIF
                            !-APW
                         ENDDO
                      ELSE
                         DO kj = 1,ki
                            fct  = plegend(kj,l)*fl2p1(l)*&
                                 ( fjkiln*fj(kj,l,n,jintsp) + gjkiln*gj(kj,l,n,jintsp)*ddnln )

                            ij = iii + kj
536 537
                            hamOvlp%b_c(ij) = hamOvlp%b_c(ij) + CMPLX(rph(kj,1),cph(kj,1))*fct
                            hamOvlp%a_c(ij) = hamOvlp%a_c(ij) + CMPLX(rph(kj,1),cph(kj,1)) * (fct*elall + plegend(kj,l)*fl2p1bt(l) *&
538 539 540 541 542
                                 ( fjkiln*gj(kj,l,n,jintsp) + gjkiln*fj(kj,l,n,jintsp) ) )
                            IF (input%l_useapw) THEN

                               capw1 = CMPLX(rph(kj,1),cph(kj,1))*plegend(kj,l)&
                                    * ( apw_lo1 * fj(kj,l,n,iintsp) + apw_lo2 * gj(kj,l,n,iintsp) )
543
                               hamOvlp%a_c(ij) = hamOvlp%a_c(ij) + capw1
544 545 546
                            ENDIF
                         END DO
                      ENDIF
547 548 549
                   ENDIF

                   !--->          end loop over l
550
                ENDDO
551 552

                !--->       end loop over atom types (ntype)
553
             ENDDO
554 555 556

             !--->    end loop over ki

557
          ENDDO
558 559 560 561
          !$OMP END DO
          !---> end loops over interstitial spins
       ENDDO ! jintsp = 1,iintsp
    ENDDO   ! iintsp = 1,nintsp
562
    DEALLOCATE(plegend)
563
    IF (l_socfirst) DEALLOCATE(dplegend,cross_k)
564
    DEALLOCATE(rph,cph)
565 566 567 568 569 570 571 572 573 574 575
    !$OMP END PARALLEL


    !---> pk non-collinear
    IF ( noco%l_noco .AND. (.NOT. noco%l_ss) ) THEN
       !--->    map the hamiltonian help array onto the hamitonian matrix
       IF (n_size.EQ.1) THEN
          DO ki = 1,lapw%nv(1)
             ii = (ki-1)*(ki)/2
             DO kj = 1,ki-1
                ij = (lapw%nv(1)+atoms%nlotot+kj-1)*(lapw%nv(1)+atoms%nlotot+kj)/2 + ki
576 577
                hamOvlp%a_c(ij) = hamOvlp%a_c(ij) + aahlp(ii+kj)
                hamOvlp%b_c(ij) = hamOvlp%b_c(ij) + bbhlp(ii+kj)
578 579 580 581
             ENDDO
          ENDDO

       ELSE
582
#ifdef CPP_MPI
583 584
          CALL mingeselle(SUB_COMM,n_size,n_rank,lapw%nv, aahlp, .FALSE.,aa_rDummy,hamOvlp%a_c)
          CALL mingeselle(SUB_COMM,n_size,n_rank,lapw%nv, bbhlp, .FALSE.,aa_rDummy,hamOvlp%b_c)
585
#endif
586 587 588 589 590 591 592 593
       ENDIF
    ENDIF



    RETURN
  END SUBROUTINE hsmt_sph
END MODULE m_hsmt_sph