From 3dd019e2cc44cc09d7ddb05fd4bff3286a76a02b Mon Sep 17 00:00:00 2001 From: Philipp Ruessmann <p.ruessmann@fz-juelich.de> Date: Mon, 4 Jul 2022 15:08:00 +0200 Subject: [PATCH] Fix merge of new version of radial solver --- source/KKRhost/main1c.F90 | 15 +- source/KKRhost/main2.F90 | 2 +- source/KKRhost/rhooutnew.F90 | 39 ++- source/KKRhost/rhovalnew.F90 | 281 ++++++++++-------- source/KKRhost/tmat_newsolver.F90 | 142 +++++---- source/common/bfield.f90 | 36 +-- .../radial_solver_Chebychev/calcsph.f90 | 11 - .../create_newmesh.f90 | 19 +- .../rll_global_solutions.F90 | 53 +++- .../common/radial_solver_Chebychev/rllsll.F90 | 2 +- .../rllsllsourceterms.f90 | 3 +- .../sll_global_solutions.F90 | 75 +++-- 12 files changed, 381 insertions(+), 297 deletions(-) diff --git a/source/KKRhost/main1c.F90 b/source/KKRhost/main1c.F90 index 2fc07e0fb..c80aabcf3 100644 --- a/source/KKRhost/main1c.F90 +++ b/source/KKRhost/main1c.F90 @@ -34,8 +34,8 @@ contains use mod_timing #endif use mod_datatypes, only: dp - use mod_runoptions, only: calc_gmat_lm_full, fix_nonco_angles, relax_SpinAngle_Dirac, use_Chebychev_solver, & - use_decimation, use_qdos, write_DOS, write_complex_qdos, write_density_ascii, write_rho2ns, write_DOS_lm, & + use mod_runoptions, only: calc_gmat_lm_full, relax_SpinAngle_Dirac, use_Chebychev_solver, & + use_decimation, use_qdos, write_DOS, write_density_ascii, write_rho2ns, write_DOS_lm, & decouple_spin_cheby, disable_print_serialnumber, check_custom_testopt, write_BdG_tests, use_ldau, write_npy, & write_all_denlm use mod_constants, only: czero, pi @@ -183,8 +183,7 @@ contains integer :: ierr, i_all integer :: ntot1 integer :: ihelp - integer :: idim, nranks_local, myrank_ie_tmp - complex (kind=dp) :: dentot ! qdos + integer :: nranks_local, myrank_ie_tmp integer, dimension (0:nranks-1) :: ntot_pt integer, dimension (0:nranks-1) :: ioff_pt complex (kind=dp), dimension (:, :), allocatable :: work @@ -530,7 +529,6 @@ contains ! copy temporary arrays to proper place in big den and espv arrays den(0:lmaxd1, 1:ielast, 1:nqdos, ipot2+1:ipot2+nspin/(nspin-korbit)*(1+kBdG)**2) = dentemp(0:lmaxd1, 1:ielast, 1:nqdos, 1:nspin/(nspin-korbit)*(1+kBdG)**2) espv(0:lmaxd1, ipot2+1:ipot2+2/(nspin-korbit)*(1+kBdG)**2) = espv_work(0:lmaxd1, 1:2/(nspin-korbit)*(1+kBdG)**2) - #ifdef CPP_TIMING call timing_pause('main1c - rhovalnew') #endif @@ -685,7 +683,7 @@ contains ! move writeout of qdos file here if (use_qdos) then ! first communicate den array to write out qdos files - if (nranks > 1) call mympi_comm_qdos_den(lmaxd1, ielast, nqdos, npotd, kBdG, lmmaxd, den, denlm) + if (nranks > 1) call mympi_comm_qdos_den(lmaxd1, lmmaxd, ielast, nqdos, npotd, kBdG, den, denlm) ! now write out only on main rank if (myrank==master) then @@ -747,7 +745,6 @@ contains call mympi_main1c_comm_newsosol2(lmaxd1, lmmaxd, ielast, nqdos, npotd, natyp, lmpotd, irmd, mmaxd, den, denlm, muorb, espv, r2nef, rho2ns, denefat, denef, denmatn, & angles_new, totmoment, bconstr, t_mpi_c_grid%mympi_comm_ie) end if - #endif #ifdef CPP_TIMING @@ -803,8 +800,6 @@ contains call timing_start('main1c - serial part') #endif - - ! In case of Lloyds formula renormalize valence charge if (lly>0) then lmaxp1 = lmax @@ -1088,7 +1083,7 @@ contains complex (kind=dp), dimension (:), allocatable, intent(inout) :: cdosat1 complex (kind=dp), dimension (:, :), allocatable, intent(inout) :: cdos_lly integer, intent(out) :: nqdos - integer :: i_stat, i1, iq + integer :: i_stat if (allocmode==1) then diff --git a/source/KKRhost/main2.F90 b/source/KKRhost/main2.F90 index 5657e8f15..a49c79158 100644 --- a/source/KKRhost/main2.F90 +++ b/source/KKRhost/main2.F90 @@ -35,7 +35,7 @@ contains use mod_runoptions, only: disable_charge_neutrality, print_program_flow, relax_SpinAngle_Dirac, & search_Efermi, simulate_asa, slow_mixing_Efermi, symmetrize_potential_cubic, symmetrize_potential_madelung, & use_decimation, use_rigid_Efermi, use_semicore, use_spherical_potential_only, write_deci_tmat, write_kkrimp_input, & - write_madelung_file, write_potential_tests, write_rho2ns, use_Chebychev_solver, use_semi_circle_contour, & + write_madelung_file, write_potential_tests, write_rho2ns, use_Chebychev_solver, & write_denvis_input, no_core_density use global_variables, only: krel, ipand, npotd, natomimpd, lmxspd, iemxd, nspotd, irid, ngshd, linterface, & nfund, ncelld, irmd, nembd1, nembd, irmind, lmmaxd, wlength, natypd, naezd, lmpotd, lpotd, lmaxd, nspind, nspotd, & diff --git a/source/KKRhost/rhooutnew.F90 b/source/KKRhost/rhooutnew.F90 index e07b498ec..a1251f473 100644 --- a/source/KKRhost/rhooutnew.F90 +++ b/source/KKRhost/rhooutnew.F90 @@ -25,7 +25,7 @@ contains !------------------------------------------------------------------------------------ subroutine rhooutnew(nsra, lmax, gmatll, ek, lmpot, df, npan_tot, ncheb, cleb, icleb, iend, irmdnew, thetasnew, ifunm, imt1, lmsp, & rll, ull, rllleft, sllleft, cden, cdenlm, cdenns, rho2nsc, corbital, gflle_part, rpan_intervall, ipan_intervall, nspin, & - ek2, eryd, ie, ielast, iatom) + ek2, eryd, ie, ielast) use mod_mympi, only: myrank use mod_constants, only: cone,czero,pi @@ -55,7 +55,6 @@ contains integer, intent (in) :: corbital integer, intent (in) :: npan_tot integer, intent (in) :: ie, ielast !! index of current energy point and maximum number of energy points - integer, intent(in) :: iatom !! atom index complex (kind=dp), intent (in) :: ek, ek2 !! kappa prefactor (sqrt(E) without relativistic) complex (kind=dp), intent (in) :: eryd !! complex energy point in Ry complex (kind=dp), intent (in) :: df @@ -85,7 +84,6 @@ contains ! .. Local variables integer :: lmmax0d !! lm matrix size without spin doubling integer :: ir, jspin, lm1, lm2, lm3, m1, l1, j, ifun - integer :: i_stat integer :: i_BdG, j_BdG, soffset, lmoffset, lmoffset2 ! for BdG mode real (kind=dp) :: c0ll complex (kind=dp) :: cltdf, alpha, dftmp @@ -93,7 +91,6 @@ contains integer, dimension (4) :: lmshift2 complex (kind=dp), dimension (lmmaxd*(1+kBdG), lmmaxd*(1+kBdG), 3) :: loperator complex (kind=dp), dimension (lmmaxd, lmmaxd) :: mat_in, mat_out - character (len=100) :: filename ! test writeout in BdG mode ! .. Local allocatable arrays complex (kind=dp), dimension (:), allocatable :: cwr ! lmlm-dos @@ -179,16 +176,19 @@ contains if (kBdG>0) qnsi(1+lmmaxd:2*lmmaxd, 1+lmmaxd:2*lmmaxd) = ek2*qnsi(1+lmmaxd:2*lmmaxd, 1+lmmaxd:2*lmmaxd) ! wr = kappa * ull * sllleft + !!! call zgemm(..., ek, ull, ..., slleft, ..., czero, wr(1,1,ir), ...) call zgemm('N', 'T', lmmaxd*(1+kBdG), lmmaxd*(1+kBdG), lmmaxd*(1+kBdG), cone, pnsi, & lmmaxd*(1+kBdG), qnsi, lmmaxd*(1+kBdG), czero, wr(1,1,ir), lmmaxd*(1+kBdG)) - + ! qnsi = rllleft * Gmat + !!! call zgemm(..., cone, rllleft, ..., gmatll, ..., czero, QNSI, lmmaxd) pnsi(1:lmmaxd*(1+kBdG), 1:lmmaxd*(1+kBdG)) = rllleft(1:lmmaxd*(1+kBdG), 1:lmmaxd*(1+kBdG), ir) call zgemm('N', 'T', lmmaxd*(1+kBdG), lmmaxd*(1+kBdG), lmmaxd*(1+kBdG), alpha, pnsi, & lmmaxd*(1+kBdG), gmatll, lmmaxd*(1+kBdG), czero, qnsi, lmmaxd*(1+kBdG)) ! ------ wr ----------- pnsi --- qnsi ----- ! wr = (kappa * ull * sllleft) + rll * (Gmat * rllleft) + !!! call zgemm(..., cone, rll, ..., QNSI, ..., cone, wr(1,1,ir), lmmaxd) pnsi(1:lmmaxd*(1+kBdG), 1:lmmaxd*(1+kBdG)) = rll(1:lmmaxd*(1+kBdG), 1:lmmaxd*(1+kBdG), ir) call zgemm('N', 'T', lmmaxd*(1+kBdG), lmmaxd*(1+kBdG), lmmaxd*(1+kBdG), cone, pnsi, & lmmaxd*(1+kBdG), qnsi, lmmaxd*(1+kBdG), cone, wr(1,1,ir), lmmaxd*(1+kBdG)) @@ -204,16 +204,19 @@ contains if (kBdG>0) qnsi(1+lmmaxd:2*lmmaxd, 1+lmmaxd:2*lmmaxd) = ek2*qnsi(1+lmmaxd:2*lmmaxd, 1+lmmaxd:2*lmmaxd) ! wr = kappa * ull * -sllleft + !!! call zgemm(..., ek, ull, ..., -sllleft, ..., cone, wr(1,1,ir), lmmaxd) call zgemm('N', 'T', lmmaxd*(1+kBdG), lmmaxd*(1+kBdG), lmmaxd*(1+kBdG), cone, pnsi, & lmmaxd*(1+kBdG), qnsi, lmmaxd*(1+kBdG), cone, wr(1,1,ir), lmmaxd*(1+kBdG)) ! qnsi = -rllleft * Gmat + !!! call zgemm(..., cone, -rllleft, ..., gmatll, ..., czero, QNSI, lmmaxd) pnsi(1:lmmaxd*(1+kBdG), 1:lmmaxd*(1+kBdG)) = -rllleft(1+lmmaxd*(1+kBdG):2*lmmaxd*(1+kBdG), 1:lmmaxd*(1+kBdG), ir) call zgemm('N', 'T', lmmaxd*(1+kBdG), lmmaxd*(1+kBdG), lmmaxd*(1+kBdG), cone, pnsi, & - lmmaxd*(1+kBdG), gmatll, lmmaxd*(1+kBdG), czero, qnsi, lmmaxd*(1+kBdG)) + lmmaxd*(1+kBdG), gmatll, lmmaxd*(1+kBdG), czero, qnsi, lmmaxd*(1+kBdG)) ! ------- wr ----------- pnsi ---- qnsi ----- ! wr = (kappa * ull * -sllleft) + rll * (Gmat * -rllleft) + !!! call zgemm(..., cone, rll, ..., QNSI, ..., cone, wr(1,1,ir), lmmaxd) pnsi(1:lmmaxd*(1+kBdG), 1:lmmaxd*(1+kBdG)) = rll(1+lmmaxd*(1+kBdG):2*lmmaxd*(1+kBdG), 1:lmmaxd*(1+kBdG), ir) call zgemm('N', 'T', lmmaxd*(1+kBdG), lmmaxd*(1+kBdG), lmmaxd*(1+kBdG), cone, pnsi, & lmmaxd*(1+kBdG), qnsi, lmmaxd*(1+kBdG), cone, wr(1,1,ir), lmmaxd*(1+kBdG)) @@ -280,9 +283,6 @@ contains end do ! IR !$omp end do - ! test uncomment this here since we now set it in rhovalnew from cdenlm - !if (kBdG>0) t_BdG%den_lm_ir(iatom, :, :, :) = den_lm_ir(:, :, :) - ! deallocate working arrays call allocate_work_thread(-1, irmdnew, cwr, pnsi, qnsi) @@ -493,7 +493,6 @@ contains use mod_datatypes, only: dp use mod_constants, only: czero, cone, kb use global_variables, only: efermi_in, kBdG, Temp_BdG, wfac_renorm - use mod_wunfiles, only: t_params use mod_runoptions, only: check_custom_testopt use mod_BdG, only: t_BdG implicit none @@ -630,22 +629,22 @@ contains if (allocmode==1) then ! allocate and initialize to zero: wr, cwr, wr1, pnsi and qnsi arrays - allocate (wr(lmmaxd*(1+kBdG),lmmaxd*(1+kBdG),irmdnew), stat=i_stat) + allocate(wr(lmmaxd*(1+kBdG),lmmaxd*(1+kBdG),irmdnew), stat=i_stat) call memocc(i_stat, product(shape(wr))*kind(wr), 'WR', 'RHOOUTNEW') wr = czero - allocate (wr1(lmmaxd*(1+kBdG),lmmaxd*(1+kBdG),irmdnew), stat=i_stat) + allocate(wr1(lmmaxd*(1+kBdG),lmmaxd*(1+kBdG),irmdnew), stat=i_stat) call memocc(i_stat, product(shape(wr1))*kind(wr1), 'WR1', 'RHOOUTNEW') wr1 = czero elseif (allocmode==-1) then ! deallocate: wr, cwr, wr1, pnsi and qnsi arrays itmp = -product(shape(wr))*kind(wr) if (allocated(wr)) then - deallocate (wr, stat=i_stat) + deallocate(wr, stat=i_stat) call memocc(i_stat, itmp, 'WR', 'RHOOUTNEW') end if itmp = -product(shape(wr1))*kind(wr1) if (allocated(wr1)) then - deallocate (wr1, stat=i_stat) + deallocate(wr1, stat=i_stat) call memocc(i_stat, itmp, 'WR1', 'RHOOUTNEW') end if else @@ -680,30 +679,30 @@ contains if (allocmode==1) then ! allocate and initialize to zero: wr, cwr, wr1, pnsi and qnsi arrays - allocate (cwr(irmdnew), stat=i_stat) + allocate(cwr(irmdnew), stat=i_stat) call memocc(i_stat, product(shape(cwr))*kind(cwr), 'CWR', 'RHOOUTNEW') cwr = czero - allocate (qnsi(lmmaxd*(1+kBdG),lmmaxd*(1+kBdG)), stat=i_stat) + allocate(qnsi(lmmaxd*(1+kBdG),lmmaxd*(1+kBdG)), stat=i_stat) call memocc(i_stat, product(shape(qnsi))*kind(qnsi), 'QNSI', 'RHOOUTNEW') qnsi = czero - allocate (pnsi(lmmaxd*(1+kBdG),lmmaxd*(1+kBdG)), stat=i_stat) + allocate(pnsi(lmmaxd*(1+kBdG),lmmaxd*(1+kBdG)), stat=i_stat) call memocc(i_stat, product(shape(pnsi))*kind(pnsi), 'PNSI', 'RHOOUTNEW') pnsi = czero elseif (allocmode==-1) then ! deallocate: wr, cwr, wr1, pnsi and qnsi arrays itmp = -product(shape(cwr))*kind(cwr) if (allocated(cwr)) then - deallocate (cwr, stat=i_stat) + deallocate(cwr, stat=i_stat) call memocc(i_stat, itmp, 'CWR', 'RHOOUTNEW') end if itmp = -product(shape(qnsi))*kind(qnsi) if (allocated(qnsi)) then - deallocate (qnsi, stat=i_stat) + deallocate(qnsi, stat=i_stat) call memocc(i_stat, itmp, 'QNSI', 'RHOOUTNEW') end if itmp = -product(shape(pnsi))*kind(pnsi) if (allocated(pnsi)) then - deallocate (pnsi, stat=i_stat) + deallocate(pnsi, stat=i_stat) call memocc(i_stat, itmp, 'PNSI', 'RHOOUTNEW') end if else diff --git a/source/KKRhost/rhovalnew.F90 b/source/KKRhost/rhovalnew.F90 index 8aa3499ed..e50b3fa45 100644 --- a/source/KKRhost/rhovalnew.F90 +++ b/source/KKRhost/rhovalnew.F90 @@ -44,9 +44,9 @@ contains use mod_types, only: t_tgmat, t_inc use mod_mympi, only: myrank, master, nranks use mod_save_wavefun, only: t_wavefunctions, read_wavefunc - use mod_runoptions, only: calc_exchange_couplings, calc_gmat_lm_full, disable_tmat_sratrick, fix_nonco_angles, & + use mod_runoptions, only: calc_exchange_couplings, calc_gmat_lm_full, disable_tmat_sratrick, & use_qdos, write_pkkr_operators, write_DOS_lm, set_cheby_nospeedup, & - decouple_spin_cheby, disable_print_serialnumber, write_BdG_tests, check_custom_testopt, set_gmat_to_zero, write_npy, calc_chi_ns + decouple_spin_cheby, write_BdG_tests, check_custom_testopt, set_gmat_to_zero, write_npy, calc_chi_ns use global_variables, only: lmmaxd, iemxd, ncleb, lmxspd, irmd, ntotd, nrmaxd, lmpotd, nspotd, nfund, korbit, mmaxd, nspind, kBdG, efermi_in, angles_cutoff use mod_constants, only: czero, cvlight, cone, pi, ci use mod_profiling, only: memocc @@ -62,7 +62,7 @@ contains use mod_vllmat, only: vllmat use mod_tmatnewsolver, only: prepare_socpotential, get_sourceterms use mod_BdG, only: t_BdG, reshape_BdG - use mod_qdos, only: read_qvec, write_qdos + use mod_qdos, only: write_qdos use m_npy, only: save_npy use mod_wunfiles, only: t_params use :: mod_torque, only: calc_torque @@ -120,12 +120,11 @@ contains integer :: i_BdG, j_BdG, soffset, soffset2, soffset3 ! for BdG mode integer :: lmmax0d !! (lmax+1)**2 integer :: lmaxd1 - integer :: ir, irec, use_sratrick, nvec, lm1, lm2, ie, irmdnew, imt1, jspin, idim, iorb, l1 - integer :: i_stat, i_all - integer :: iq, nqdos ! qdos ruess: number of qdos points + integer :: ir, irec, use_sratrick, nvec, lm1, lm2, ie, irmdnew, imt1, jspin, idim, iorb + integer :: iq, nqdos ! qdos integer :: lrecgflle, ierr ! lmlm-dos integer :: lmlo, lmhi, is, js, mmax ! LDAU - integer :: ix, m1 ! qdos ruess + integer :: m1 logical :: calcrll, calcsll, calctmat @@ -163,7 +162,7 @@ contains ! .. Local allocatable arrays - real (kind=dp), dimension (:, :), allocatable :: qvec ! qdos ruess: q-vectors for qdos + real (kind=dp), dimension (:, :), allocatable :: qvec ! qdos: q-vectors real (kind=dp), dimension (:, :, :), allocatable :: vins ! vins in the new mesh!! corresponds to the vinsnew in main1c but only for the two spin channels complex (kind=dp), dimension (:, :), allocatable :: tmatsph complex (kind=dp), dimension (:, :), allocatable :: cdentemp @@ -198,11 +197,6 @@ contains complex (kind=dp), dimension (:, :, :, :), allocatable :: r2nefc_loop complex (kind=dp), dimension (:, :, :, :), allocatable :: rho2nsc_loop -#ifdef CPP_MPI - complex (kind=dp), dimension (2) :: dentot ! qdos ruess - ! communication - complex (kind=dp), dimension (:, :, :, :), allocatable :: workc -#endif ! OMP - number of threads, thread id integer :: nth, ith integer :: ie_start, ie_end, ie_num @@ -305,29 +299,12 @@ contains call timing_pause('rhovalnew - prepare pot') #endif - nqdos = 1 ! qdos ruess - if (use_qdos) then ! qdos ruess - ! Read BZ path for qdos calculation: ! qdos ruess - call read_qvec(qvec) ! qdos ruess - nqdos = size(qvec, dim=2) ! find number of qvec ! qdos ruess - ! Change allocation for GFLLE to be suitabel for qdos run ! qdos ruess - i_all = -product(shape(gflle))*kind(gflle) ! qdos ruess - deallocate (gflle, stat=i_stat) ! qdos ruess - call memocc(i_stat, i_all, 'GFLLE', 'RHOVALNEW') ! qdos ruess - i_all = -product(shape(den))*kind(den) ! qdos ruess - deallocate (den, stat=i_stat) ! qdos ruess - call memocc(i_stat, i_all, 'DEN', 'RHOVALNEW') ! qdos ruess - i_all = -product(shape(denlm))*kind(denlm) ! qdos ruess - deallocate (denlm, stat=i_stat) ! qdos ruess - call memocc(i_stat, i_all, 'DENLM', 'RHOVALNEW') ! qdos ruess - ! now reallocate with larger nqdos dimension ! qdos ruess - allocate (gflle(lmmaxd*(1+kBdG),lmmaxd*(1+kBdG),ielast,nqdos), stat=i_stat) ! qdos ruess - call memocc(i_stat, product(shape(gflle))*kind(gflle), 'GFLLE', 'RHOVALNEW') ! qdos ruess - allocate (den(0:lmaxd1,ielast,nqdos,nspin/(nspin-korbit)*(1+kBdG)**2), stat=i_stat) ! qdos ruess - call memocc(i_stat, product(shape(den))*kind(den), 'DEN', 'RHOVALNEW') ! qdos ruess - allocate (denlm(lmmax0d,ielast,nqdos,nspin/(nspin-korbit)*(1+kBdG)**2), stat=i_stat) ! qdos ruess - call memocc(i_stat, product(shape(denlm))*kind(denlm), 'DENLM', 'RHOVALNEW') ! qdos ruess - end if ! use_qdos ! qdos ruess + nqdos = 1 + if (use_qdos) then + ! read qvec.dat and change size of arrays + call change_allocations_qdos(lmmaxd, kBdG, ielast, lmaxd1, nspin, korbit, & + lmmax0d, nqdos, qvec, gflle, den, denlm) + end if ! use_qdos #ifdef CPP_MPI iatom_myrank = iatom - t_mpi_c_grid%ioff_pt1(t_mpi_c_grid%myrank_ie) ! lmlm-dos ruess @@ -472,20 +449,30 @@ contains ! faster calculation of RLL. ! no irregular solutions SLL are needed in self-consistent iterations ! because the density depends only on RLL, RLLLEFT and SLLLEFT + calctmat = .true. + calcrll = .true. + ! compute rll only (not sll since usually not needed) + if (calc_exchange_couplings .or. write_pkkr_operators) then + calcsll = .true. + else + calcsll = .false. + end if + if (.not.set_cheby_nospeedup .and. .not. (calc_exchange_couplings .or. write_pkkr_operators)) then call rll_global_solutions(rpan_intervall, rnew, vnspll(:,:,:,ith), ull(:,:,:,ith), rll(:,:,:,ith), tmatll, ncheb, npan_tot, & lmmaxd*(1+kBdG), nvec*lmmaxd*(1+kBdG), nsra*(1+korbit)*(lmax+1)*(1+kBdG), irmdnew, nsra, jlk_index, & hlk(:,:,ith), jlk(:,:,ith), hlk2(:,:,ith), jlk2(:,:,ith), gmatprefactor, use_sratrick, alphall) else - calcsll = .false. + call rllsll(rpan_intervall, rnew, vnspll(:,:,:,ith), rll(:,:,:,ith), sll(:,:,:,ith), tmatll, ncheb, npan_tot, & + lmmaxd*(1+kBdG), nvec*lmmaxd*(1+kBdG), nsra*(lmax+1)*(1+korbit)*(1+kBdG), irmdnew, nsra, jlk_index, hlk(:,:,ith), jlk(:,:,ith), & + hlk2(:,:,ith), jlk2(:,:,ith), gmatprefactor, use_sratrick, alphall, calcrll, calcsll, calctmat) + ! with old rllsll interface we need this for rhooutnew + ull(:, :, :, ith) = rll(:, :, :, ith) end if - call rllsll(rpan_intervall, rnew, vnspll(:,:,:,ith), rll(:,:,:,ith), sll(:,:,:,ith), tmatll, ncheb, npan_tot, & - lmmaxd*(1+kBdG), nvec*lmmaxd*(1+kBdG), nsra*(lmax+1)*(1+korbit)*(1+kBdG), irmdnew, nsra, jlk_index, hlk(:,:,ith), jlk(:,:,ith), & - hlk2(:,:,ith), jlk2(:,:,ith), gmatprefactor, use_sratrick, alphall, calcrll, calcsll, calctmat) if (nsra==2) then ull(lmmaxd*(1+kBdG)+1:nvec*lmmaxd*(1+kBdG), :, :, ith) = ull(lmmaxd*(1+kBdG)+1:nvec*lmmaxd*(1+kBdG), :, :, ith)/cvlight rll(lmmaxd*(1+kBdG)+1:nvec*lmmaxd*(1+kBdG), :, :, ith) = rll(lmmaxd*(1+kBdG)+1:nvec*lmmaxd*(1+kBdG), :, :, ith)/cvlight - sll(lmmaxd*(1+kBdG)+1:nvec*lmmaxd*(1+kBdG), :, :, ith) = sll(lmmaxd*(1+kBdG)+1:nvec*lmmaxd*(1+kBdG), :, :, ith)/cvlight + if (calcsll) sll(lmmaxd*(1+kBdG)+1:nvec*lmmaxd*(1+kBdG), :, :, ith) = sll(lmmaxd*(1+kBdG)+1:nvec*lmmaxd*(1+kBdG), :, :, ith)/cvlight end if if (write_BdG_tests .and. check_custom_testopt('rhovalnew_rll')) then write(22211, '(A,30i5)') '#', shape(rll(:,:,:,ith)) @@ -536,6 +523,11 @@ contains tmattemp = czero alphall = czero ! faster calculation of RLLLEFT and SLLLEFT. + ! compute rllleft and + ! calculate sllleft + calcrll = .true. + calcsll = .true. + calctmat = .false. if (.not.set_cheby_nospeedup .and. .not. (calc_exchange_couplings .or. write_pkkr_operators)) then call rll_global_solutions(rpan_intervall, rnew, vnspll(:,:,:,ith), ullleft(:,:,:,ith), rllleft(:,:,:,ith), tmattemp, & ncheb, npan_tot, lmmaxd*(1+kBdG), nvec*lmmaxd*(1+kBdG), nsra*(1+korbit)*(lmax+1)*(1+kBdG), irmdnew, nsra, jlk_index, & @@ -547,6 +539,9 @@ contains call rllsll(rpan_intervall, rnew, vnspll(:,:,:,ith), rllleft(:,:,:,ith), sllleft(:,:,:,ith), tmattemp, ncheb, npan_tot, & lmmaxd*(1+kBdG), nvec*lmmaxd*(1+kBdG), nsra*(1+korbit)*(lmax+1)*(1+kBdG), irmdnew, nsra, & jlk_index, hlk2(:,:,ith), jlk2(:,:,ith), hlk(:,:,ith), jlk(:,:,ith), gmatprefactor, use_sratrick, alphall, calcrll, calcsll, calctmat) + ! ! testing + ! ull(lmmaxd*(1+kBdG)+1:nvec*lmmaxd*(1+kBdG), :, :, ith) = rllleft(lmmaxd*(1+kBdG)+1:nvec*lmmaxd*(1+kBdG), :, :, ith) + ! ull(lmmaxd*(1+kBdG)+1:nvec*lmmaxd*(1+kBdG), :, :, ith) = ull(lmmaxd*(1+kBdG)+1:nvec*lmmaxd*(1+kBdG), :, :, ith)/cvlight end if if (nsra==2) then ullleft(lmmaxd*(1+kBdG)+1:nvec*lmmaxd*(1+kBdG), :, :, ith) = ullleft(lmmaxd*(1+kBdG)+1:nvec*lmmaxd*(1+kBdG), :, :, ith)/cvlight @@ -609,7 +604,7 @@ contains call timing_start('rhovalnew - rhooutnew') #endif call rhooutnew(nsra, lmax, gmatll, ek, lmpotd, df, npan_tot, ncheb, cleb, icleb, iend, irmdnew, thetasnew, ifunm, imt1, lmsp, rll(:,:,:,ith), & ! SLL(:,:,:,ith), commented out since sll is not used in rhooutnew - ull(:,:,:,ith), rllleft(:,:,:,ith), sllleft(:,:,:,ith), cden(:,:,:,ith), cdenlm(:,:,:,ith), cdenns(:,:,ith), rho2nsc_loop(:,:,:,ie), 0, gflle(:,:,ie,iq), rpan_intervall, ipan_intervall, nspin/(nspin-korbit), ek2, eryd, ie, ielast, iatom) + ull(:,:,:,ith), rllleft(:,:,:,ith), sllleft(:,:,:,ith), cden(:,:,:,ith), cdenlm(:,:,:,ith), cdenns(:,:,ith), rho2nsc_loop(:,:,:,ie), 0, gflle(:,:,ie,iq), rpan_intervall, ipan_intervall, nspin/(nspin-korbit), ek2, eryd, ie, ielast) #ifdef CPP_TIMING call timing_pause('rhovalnew - rhooutnew') @@ -783,7 +778,7 @@ contains #endif call rhooutnew(nsra, lmax, gmatll, ek, lmpotd, cone, npan_tot, ncheb, cleb, icleb, iend, irmdnew, thetasnew, ifunm, imt1, lmsp, rll(:,:,:,ith), & ! SLL(:,:,:,ith), ! commented out since sll is not used in rhooutnew ull(:,:,:,ith), rllleft(:,:,:,ith), sllleft(:,:,:,ith), cden(:,:,:,ith), cdenlm(:,:,:,ith), cdenns(:,:,ith), r2nefc_loop(:,:,:,ith), 0, gflle_part(:,:,ith), rpan_intervall, & - ipan_intervall, nspin/(nspin-korbit), ek2, eryd, -1, ielast, iatom) + ipan_intervall, nspin/(nspin-korbit), ek2, eryd, -1, ielast) #ifdef CPP_TIMING call timing_pause('rhovalnew - rhooutnew') #endif @@ -803,7 +798,7 @@ contains call timing_start('rhovalnew - rhooutnew') #endif call rhooutnew(nsra, lmax, gmatll, ek, lmpotd, cone, npan_tot, ncheb, cleb, icleb, iend, irmdnew, thetasnew, ifunm, imt1, lmsp, rll(:,:,:,ith), & ! SLL(:,:,:,ith), ! commented out since sll is not used in rhooutnew - ull(:,:,:,ith), rllleft(:,:,:,ith), sllleft(:,:,:,ith), cden(:,:,:,ith), cdenlm(:,:,:,ith), cdenns(:,:,ith), r2orbc(:,:,:,ith), iorb, gflle_part(:,:,ith), rpan_intervall, ipan_intervall, nspin, ek2, eryd, ie, ielast, iatom) + ull(:,:,:,ith), rllleft(:,:,:,ith), sllleft(:,:,:,ith), cden(:,:,:,ith), cdenlm(:,:,:,ith), cdenns(:,:,ith), r2orbc(:,:,:,ith), iorb, gflle_part(:,:,ith), rpan_intervall, ipan_intervall, nspin, ek2, eryd, ie, ielast) #ifdef CPP_TIMING call timing_pause('rhovalnew - rhooutnew') #endif @@ -960,7 +955,7 @@ contains t_params%thetasnew(1:irmdnew,:,t_params%ntcell(iatom)), bconstr, & t_params%fixdir(iatom), t_params%ntcell(iatom)) if (t_inc%i_write>1) write (1337,'("calc_torque: myrank=",i8," iatom=",i8," bfield_constr=",3es16.8," mspin=",es16.8)') myrank, iatom, bconstr(1:3), bconstr(4) - end if + end if ! ltorque ! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! ! LDAU ! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! @@ -995,8 +990,7 @@ contains ! omp: moved write-out of dos files out of parallel energy loop ! Write out lm-dos: if (write_DOS_lm) then - ! qdos ruess - call write_lmdos(ielast, ez, wez, denlm, iatom, nspin, lmmax0d, irmdnew, ispin) + call write_lmdos(ielast, ez, wez, denlm, iatom, nspin, lmmax0d, ispin) end if ! write_dos_lm end if ! .not. use_qdos @@ -1160,7 +1154,7 @@ contains use mod_datatypes, only: dp use mod_constants, only: czero use global_variables, only: irmd - use mod_profiling, only: memocc + use mod_profiling, only: memocc implicit none ! interface @@ -1206,80 +1200,80 @@ contains if (imode>0) then ! potential arrays - allocate (vnspll(nsra*lmmaxd*(1+kBdG),nsra*lmmaxd*(1+kBdG),irmdnew,0:nth-1), stat=i_stat) + allocate(vnspll(nsra*lmmaxd*(1+kBdG),nsra*lmmaxd*(1+kBdG),irmdnew,0:nth-1), stat=i_stat) call memocc(i_stat, product(shape(vnspll))*kind(vnspll), 'VNSPLL', 'RHOVALNEW') - allocate (vnspll0(lmmaxd,lmmaxd,irmdnew), stat=i_stat) + allocate(vnspll0(lmmaxd,lmmaxd,irmdnew), stat=i_stat) call memocc(i_stat, product(shape(vnspll0))*kind(vnspll0), 'VNSPLL0', 'RHOVALNEW') - allocate (vnspll1(lmmaxd*(1+kBdG),lmmaxd*(1+kBdG),irmdnew,0:nth-1), stat=i_stat) + allocate(vnspll1(lmmaxd*(1+kBdG),lmmaxd*(1+kBdG),irmdnew,0:nth-1), stat=i_stat) call memocc(i_stat, product(shape(vnspll1))*kind(vnspll1), 'VNSPLL1', 'RHOVALNEW') - allocate (vins(irmdnew,lmpotd,nspin/(nspin-korbit)), stat=i_stat) + allocate(vins(irmdnew,lmpotd,nspin/(nspin-korbit)), stat=i_stat) call memocc(i_stat, product(shape(vins))*kind(vins), 'VINS', 'RHOVALNEW') ! wave function source terms - allocate (hlk(nsra*(1+korbit)*(lmax+1)*(1+kBdG),irmdnew,0:nth-1), stat=i_stat) + allocate(hlk(nsra*(1+korbit)*(lmax+1)*(1+kBdG),irmdnew,0:nth-1), stat=i_stat) call memocc(i_stat, product(shape(hlk))*kind(hlk), 'HLK', 'RHOVALNEW') - allocate (jlk(nsra*(1+korbit)*(lmax+1)*(1+kBdG),irmdnew,0:nth-1), stat=i_stat) + allocate(jlk(nsra*(1+korbit)*(lmax+1)*(1+kBdG),irmdnew,0:nth-1), stat=i_stat) call memocc(i_stat, product(shape(jlk))*kind(jlk), 'JLK', 'RHOVALNEW') - allocate (hlk2(nsra*(1+korbit)*(lmax+1)*(1+kBdG),irmdnew,0:nth-1), stat=i_stat) + allocate(hlk2(nsra*(1+korbit)*(lmax+1)*(1+kBdG),irmdnew,0:nth-1), stat=i_stat) call memocc(i_stat, product(shape(hlk2))*kind(hlk2), 'HLK2', 'RHOVALNEW') - allocate (jlk2(nsra*(1+korbit)*(lmax+1)*(1+kBdG),irmdnew,0:nth-1), stat=i_stat) + allocate(jlk2(nsra*(1+korbit)*(lmax+1)*(1+kBdG),irmdnew,0:nth-1), stat=i_stat) call memocc(i_stat, product(shape(jlk2))*kind(jlk2), 'JLK2', 'RHOVALNEW') ! sperical part of tmat - allocate (tmatsph(nspin/(nspin-korbit)*(lmax+1)*(1+kBdG),0:nth-1), stat=i_stat) + allocate(tmatsph(nspin/(nspin-korbit)*(lmax+1)*(1+kBdG),0:nth-1), stat=i_stat) call memocc(i_stat, product(shape(tmatsph))*kind(tmatsph), 'TMATSPH', 'RHOVALNEW') ! regula and iregular wavefunctions - allocate (rll(nsra*lmmaxd*(1+kBdG),lmmaxd*(1+kBdG),irmdnew,0:nth-1), stat=i_stat) + allocate(rll(nsra*lmmaxd*(1+kBdG),lmmaxd*(1+kBdG),irmdnew,0:nth-1), stat=i_stat) call memocc(i_stat, product(shape(rll))*kind(rll), 'RLL', 'RHOVALNEW') - allocate (sll(nsra*lmmaxd*(1+kBdG),lmmaxd*(1+kBdG),irmdnew,0:nth-1), stat=i_stat) + allocate(sll(nsra*lmmaxd*(1+kBdG),lmmaxd*(1+kBdG),irmdnew,0:nth-1), stat=i_stat) call memocc(i_stat, product(shape(sll))*kind(sll), 'SLL', 'RHOVALNEW') - allocate (rllleft(nsra*lmmaxd*(1+kBdG),lmmaxd*(1+kBdG),irmdnew,0:nth-1), stat=i_stat) + allocate(rllleft(nsra*lmmaxd*(1+kBdG),lmmaxd*(1+kBdG),irmdnew,0:nth-1), stat=i_stat) call memocc(i_stat, product(shape(rllleft))*kind(rllleft), 'RLLLEFT', 'RHOVALNEW') - allocate (sllleft(nsra*lmmaxd*(1+kBdG),lmmaxd*(1+kBdG),irmdnew,0:nth-1), stat=i_stat) + allocate(sllleft(nsra*lmmaxd*(1+kBdG),lmmaxd*(1+kBdG),irmdnew,0:nth-1), stat=i_stat) call memocc(i_stat, product(shape(sllleft))*kind(sllleft), 'SLLLEFT', 'RHOVALNEW') - allocate (ull(nsra*lmmaxd,lmmaxd,irmdnew,0:nth-1), stat=i_stat) + allocate(ull(nsra*lmmaxd,lmmaxd,irmdnew,0:nth-1), stat=i_stat) call memocc(i_stat, product(shape(ull))*kind(ull), 'ULL', 'RHOVALNEW') - allocate (ullleft(nsra*lmmaxd,lmmaxd,irmdnew,0:nth-1), stat=i_stat) + allocate(ullleft(nsra*lmmaxd,lmmaxd,irmdnew,0:nth-1), stat=i_stat) call memocc(i_stat, product(shape(ullleft))*kind(ullleft), 'ULLLEFT', 'RHOVALNEW') ! complex density - allocate (cden(irmdnew,0:lmax,nspin/(nspin-korbit)*(1+korbit)*(1+kBdG)**2,0:nth-1), stat=i_stat) + allocate(cden(irmdnew,0:lmax,nspin/(nspin-korbit)*(1+korbit)*(1+kBdG)**2,0:nth-1), stat=i_stat) call memocc(i_stat, product(shape(cden))*kind(cden), 'CDEN', 'RHOVALNEW') - allocate (cdenlm(irmdnew,lmmax0d,nspin/(nspin-korbit)*(1+korbit)*(1+kBdG)**2,0:nth-1), stat=i_stat) + allocate(cdenlm(irmdnew,lmmax0d,nspin/(nspin-korbit)*(1+korbit)*(1+kBdG)**2,0:nth-1), stat=i_stat) call memocc(i_stat, product(shape(cdenlm))*kind(cdenlm), 'CDENLM', 'RHOVALNEW') - allocate (cdenns(irmdnew,nspin/(nspin-korbit)*(1+korbit)*(1+kBdG)**2,0:nth-1), stat=i_stat) + allocate(cdenns(irmdnew,nspin/(nspin-korbit)*(1+korbit)*(1+kBdG)**2,0:nth-1), stat=i_stat) call memocc(i_stat, product(shape(cdenns))*kind(cdenns), 'CDENNS', 'RHOVALNEW') - allocate (cdentemp(irmdnew,0:nth-1), stat=i_stat) + allocate(cdentemp(irmdnew,0:nth-1), stat=i_stat) call memocc(i_stat, product(shape(cdentemp))*kind(cdentemp), 'CDENTEMP', 'RHOVALNEW') ! real density - allocate (den(0:lmaxd1,ielast,1,nspin/(nspin-korbit)*(1+kBdG)**2), stat=i_stat) + allocate(den(0:lmaxd1,ielast,1,nspin/(nspin-korbit)*(1+kBdG)**2), stat=i_stat) call memocc(i_stat, product(shape(den))*kind(den), 'DEN', 'RHOVALNEW') - allocate (denlm(lmmax0d,ielast,1,nspin/(nspin-korbit)*(1+kBdG)**2), stat=i_stat) + allocate(denlm(lmmax0d,ielast,1,nspin/(nspin-korbit)*(1+kBdG)**2), stat=i_stat) call memocc(i_stat, product(shape(denlm))*kind(denlm), 'DENLM', 'RHOVALNEW') - allocate (rho2nsc(irmdnew,lmpotd,nspin/(nspin-korbit)*(1+korbit)*(1+kBdG)**2), stat=i_stat) + allocate(rho2nsc(irmdnew,lmpotd,nspin/(nspin-korbit)*(1+korbit)*(1+kBdG)**2), stat=i_stat) call memocc(i_stat, product(shape(rho2nsc))*kind(rho2nsc), 'RHO2NSC', 'RHOVALNEW') - allocate (rho2nsc_loop(irmdnew,lmpotd,nspin/(nspin-korbit)*(1+korbit)*(1+kBdG)**2,ielast), stat=i_stat) + allocate(rho2nsc_loop(irmdnew,lmpotd,nspin/(nspin-korbit)*(1+korbit)*(1+kBdG)**2,ielast), stat=i_stat) call memocc(i_stat, product(shape(rho2nsc_loop))*kind(rho2nsc_loop), 'RHO2NSC_loop', 'RHOVALNEW') - allocate (rho2nsnew(irmd,lmpotd,nspin/(nspin-korbit)*(1+korbit)*(1+kBdG)**2), stat=i_stat) + allocate(rho2nsnew(irmd,lmpotd,nspin/(nspin-korbit)*(1+korbit)*(1+kBdG)**2), stat=i_stat) call memocc(i_stat, product(shape(rho2nsnew))*kind(rho2nsnew), 'RHO2NSNEW', 'RHOVALNEW') - allocate (r2nefc(irmdnew,lmpotd,nspin/(nspin-korbit)*(1+korbit)*(1+kBdG)**2), stat=i_stat) + allocate(r2nefc(irmdnew,lmpotd,nspin/(nspin-korbit)*(1+korbit)*(1+kBdG)**2), stat=i_stat) call memocc(i_stat, product(shape(r2nefc))*kind(r2nefc), 'R2NEFC', 'RHOVALNEW') - allocate (r2nefc_loop(irmdnew,lmpotd,nspin/(nspin-korbit)*(1+korbit)*(1+kBdG)**2,0:nth-1), stat=i_stat) + allocate(r2nefc_loop(irmdnew,lmpotd,nspin/(nspin-korbit)*(1+korbit)*(1+kBdG)**2,0:nth-1), stat=i_stat) call memocc(i_stat, product(shape(r2nefc_loop))*kind(r2nefc_loop), 'R2NEFC_loop', 'RHOVALNEW') - allocate (r2nefnew(irmd,lmpotd,nspin/(nspin-korbit)*(1+korbit)*(1+kBdG)**2), stat=i_stat) + allocate(r2nefnew(irmd,lmpotd,nspin/(nspin-korbit)*(1+korbit)*(1+kBdG)**2), stat=i_stat) call memocc(i_stat, product(shape(r2nefnew))*kind(r2nefnew), 'R2NEFNEW', 'RHOVALNEW') - allocate (r2orbc(irmdnew,lmpotd,nspin/(nspin-korbit)*(1+korbit)*(1+kBdG)**2,0:nth-1), stat=i_stat) + allocate(r2orbc(irmdnew,lmpotd,nspin/(nspin-korbit)*(1+korbit)*(1+kBdG)**2,0:nth-1), stat=i_stat) call memocc(i_stat, product(shape(r2orbc))*kind(r2orbc), 'R2ORBC', 'RHOVALNEW') - allocate (rhotemp(irmdnew,lmpotd), stat=i_stat) + allocate(rhotemp(irmdnew,lmpotd), stat=i_stat) call memocc(i_stat, product(shape(rhotemp))*kind(rhotemp), 'RHOTEMP', 'RHOVALNEW') - allocate (rhonewtemp(irws,lmpotd), stat=i_stat) + allocate(rhonewtemp(irws,lmpotd), stat=i_stat) call memocc(i_stat, product(shape(rhonewtemp))*kind(rhonewtemp), 'RHONEWTEMP', 'RHOVALNEW') ! full green function (structural constants after Dyson equation) - allocate (gflle_part(lmmaxd*(1+kBdG),lmmaxd*(1+kBdG),0:nth-1), stat=i_stat) + allocate(gflle_part(lmmaxd*(1+kBdG),lmmaxd*(1+kBdG),0:nth-1), stat=i_stat) call memocc(i_stat, product(shape(gflle_part))*kind(gflle_part), 'GFLLE_PART', 'RHOVALNEW') - allocate (gflle(lmmaxd*(1+kBdG),lmmaxd*(1+kBdG),ielast,1), stat=i_stat) + allocate(gflle(lmmaxd*(1+kBdG),lmmaxd*(1+kBdG),ielast,1), stat=i_stat) call memocc(i_stat, product(shape(gflle))*kind(gflle), 'GFLLE', 'RHOVALNEW') ! initialize values @@ -1317,100 +1311,100 @@ contains else ! imode<0 i_all = -product(shape(vins))*kind(vins) - deallocate (vins, stat=i_stat) + deallocate(vins, stat=i_stat) call memocc(i_stat, i_all, 'VINS', 'RHOVALNEW') i_all = -product(shape(vnspll0))*kind(vnspll0) - deallocate (vnspll0, stat=i_stat) + deallocate(vnspll0, stat=i_stat) call memocc(i_stat, i_all, 'VNSPLL0', 'RHOVALNEW') i_all = -product(shape(vnspll1))*kind(vnspll1) - deallocate (vnspll1, stat=i_stat) + deallocate(vnspll1, stat=i_stat) call memocc(i_stat, i_all, 'VNSPLL1', 'RHOVALNEW') i_all = -product(shape(vnspll))*kind(vnspll) - deallocate (vnspll, stat=i_stat) + deallocate(vnspll, stat=i_stat) call memocc(i_stat, i_all, 'VNSPLL', 'RHOVALNEW') i_all = -product(shape(hlk))*kind(hlk) - deallocate (hlk, stat=i_stat) + deallocate(hlk, stat=i_stat) call memocc(i_stat, i_all, 'HLK', 'RHOVALNEW') i_all = -product(shape(jlk))*kind(jlk) - deallocate (jlk, stat=i_stat) + deallocate(jlk, stat=i_stat) call memocc(i_stat, i_all, 'JLK', 'RHOVALNEW') i_all = -product(shape(hlk2))*kind(hlk2) - deallocate (hlk2, stat=i_stat) + deallocate(hlk2, stat=i_stat) call memocc(i_stat, i_all, 'HLK2', 'RHOVALNEW') i_all = -product(shape(jlk2))*kind(jlk2) - deallocate (jlk2, stat=i_stat) + deallocate(jlk2, stat=i_stat) call memocc(i_stat, i_all, 'JLK2', 'RHOVALNEW') i_all = -product(shape(tmatsph))*kind(tmatsph) - deallocate (tmatsph, stat=i_stat) + deallocate(tmatsph, stat=i_stat) call memocc(i_stat, i_all, 'TMATSPH', 'RHOVALNEW') i_all = -product(shape(rll))*kind(rll) - deallocate (rll, stat=i_stat) + deallocate(rll, stat=i_stat) call memocc(i_stat, i_all, 'RLL', 'RHOVALNEW') i_all = -product(shape(sll))*kind(sll) - deallocate (sll, stat=i_stat) + deallocate(sll, stat=i_stat) call memocc(i_stat, i_all, 'SLL', 'RHOVALNEW') i_all = -product(shape(rllleft))*kind(rllleft) - deallocate (rllleft, stat=i_stat) + deallocate(rllleft, stat=i_stat) call memocc(i_stat, i_all, 'RLLLEFT', 'RHOVALNEW') i_all = -product(shape(sllleft))*kind(sllleft) - deallocate (sllleft, stat=i_stat) + deallocate(sllleft, stat=i_stat) call memocc(i_stat, i_all, 'SLLLEFT', 'RHOVALNEW') i_all = -product(shape(cden))*kind(cden) - deallocate (cden, stat=i_stat) + deallocate(cden, stat=i_stat) call memocc(i_stat, i_all, 'CDEN', 'RHOVALNEW') i_all = -product(shape(cdenlm))*kind(cdenlm) - deallocate (cdenlm, stat=i_stat) + deallocate(cdenlm, stat=i_stat) call memocc(i_stat, i_all, 'CDENLM', 'RHOVALNEW') i_all = -product(shape(cdenns))*kind(cdenns) - deallocate (cdenns, stat=i_stat) + deallocate(cdenns, stat=i_stat) call memocc(i_stat, i_all, 'CDENNS', 'RHOVALNEW') i_all = -product(shape(rho2nsc))*kind(rho2nsc) - deallocate (rho2nsc, stat=i_stat) + deallocate(rho2nsc, stat=i_stat) call memocc(i_stat, i_all, 'RHO2NSC', 'RHOVALNEW') i_all = -product(shape(rho2nsc_loop))*kind(rho2nsc_loop) - deallocate (rho2nsc_loop, stat=i_stat) + deallocate(rho2nsc_loop, stat=i_stat) call memocc(i_stat, i_all, 'RHO2NSC_loop', 'RHOVALNEW') i_all = -product(shape(rho2nsnew))*kind(rho2nsnew) - deallocate (rho2nsnew, stat=i_stat) + deallocate(rho2nsnew, stat=i_stat) call memocc(i_stat, i_all, 'RHO2NSNEW', 'RHOVALNEW') i_all = -product(shape(r2nefc))*kind(r2nefc) - deallocate (r2nefc, stat=i_stat) + deallocate(r2nefc, stat=i_stat) call memocc(i_stat, i_all, 'R2NEFC', 'RHOVALNEW') i_all = -product(shape(r2nefc_loop))*kind(r2nefc_loop) - deallocate (r2nefc_loop, stat=i_stat) + deallocate(r2nefc_loop, stat=i_stat) call memocc(i_stat, i_all, 'R2NEFC_loop', 'RHOVALNEW') i_all = -product(shape(r2nefnew))*kind(r2nefnew) - deallocate (r2nefnew, stat=i_stat) + deallocate(r2nefnew, stat=i_stat) call memocc(i_stat, i_all, 'R2NEFNEW', 'RHOVALNEW') i_all = -product(shape(r2orbc))*kind(r2orbc) - deallocate (r2orbc, stat=i_stat) + deallocate(r2orbc, stat=i_stat) call memocc(i_stat, i_all, 'R2ORBC', 'RHOVALNEW') i_all = -product(shape(cdentemp))*kind(cdentemp) - deallocate (cdentemp, stat=i_stat) + deallocate(cdentemp, stat=i_stat) call memocc(i_stat, i_all, 'CDENTEMP', 'RHOVALNEW') i_all = -product(shape(gflle_part))*kind(gflle_part) - deallocate (gflle_part, stat=i_stat) + deallocate(gflle_part, stat=i_stat) call memocc(i_stat, i_all, 'GFLLE_PART', 'RHOVALNEW') i_all = -product(shape(gflle))*kind(gflle) - deallocate (gflle, stat=i_stat) + deallocate(gflle, stat=i_stat) call memocc(i_stat, i_all, 'GFLLE', 'RHOVALNEW') i_all = -product(shape(den))*kind(den) - deallocate (den, stat=i_stat) + deallocate(den, stat=i_stat) call memocc(i_stat, i_all, 'DEN', 'RHOVALNEW') i_all = -product(shape(denlm))*kind(denlm) - deallocate (denlm, stat=i_stat) + deallocate(denlm, stat=i_stat) call memocc(i_stat, i_all, 'DENLM', 'RHOVALNEW') i_all = -product(shape(rhotemp))*kind(rhotemp) - deallocate (rhotemp, stat=i_stat) + deallocate(rhotemp, stat=i_stat) call memocc(i_stat, i_all, 'RHOTEMP', 'RHOVALNEW') i_all = -product(shape(rhonewtemp))*kind(rhonewtemp) - deallocate (rhonewtemp, stat=i_stat) + deallocate(rhonewtemp, stat=i_stat) call memocc(i_stat, i_all, 'RHONEWTEMP', 'RHOVALNEW') i_all = -product(shape(ull))*kind(ull) - deallocate (ull, stat=i_stat) + deallocate(ull, stat=i_stat) call memocc(i_stat, i_all, 'ULL', 'RHOVALNEW') i_all = -product(shape(ullleft))*kind(ullleft) - deallocate (ullleft, stat=i_stat) + deallocate(ullleft, stat=i_stat) call memocc(i_stat, i_all, 'ULLLEFT', 'RHOVALNEW') @@ -1426,19 +1420,19 @@ contains !> Deprecated: False !> writes lmdos.AAA.SS.dat files where `AAA` and `S` are the atom and spin indices !------------------------------------------------------------------------------- - subroutine write_lmdos(ielast, ez, wez, denlm, iatom, nspin, lmmax0d, irmdnew, ispin) + subroutine write_lmdos(ielast, ez, wez, denlm, iatom, nspin, lmmax0d, ispin) use mod_datatypes, only: dp use mod_constants, only: pi, czero - use global_variables, only: kBdG, korbit, iemxd, lmmaxd, lambda_BdG - use mod_runoptions, only: write_DOS_lm, disable_print_serialnumber, write_complex_lmdos + use global_variables, only: kBdG, korbit, iemxd + use mod_runoptions, only: disable_print_serialnumber, write_complex_lmdos use mod_version_info, only: version_print_header use mod_rhooutnew, only: fix_integration_weight_BdG - use mod_BdG, only: t_BdG, get_new_cmat_at + use mod_BdG, only: get_new_cmat_at use m_npy, only: save_npy use mod_runoptions, only: check_custom_testopt implicit none ! interface - integer, intent(in) :: ielast, nspin, iatom, lmmax0d, irmdnew, ispin + integer, intent(in) :: ielast, nspin, iatom, lmmax0d, ispin complex (kind=dp), intent(in) :: ez(ielast) complex (kind=dp), dimension (iemxd), intent (in) :: wez complex (kind=dp), intent(in), allocatable :: denlm(:, :, :, :) @@ -1448,9 +1442,6 @@ contains real (kind=dp), dimension(lmmax0d) :: den_integrated character (len=100) :: filename - complex (kind=dp) :: cmat_new(lmmaxd*(1+kBdG), lmmaxd*(1+kBdG), irmdnew) - integer :: ir - integer :: iie integer, allocatable :: iemap(:) @@ -1600,6 +1591,56 @@ contains end subroutine write_chi_ns + !------------------------------------------------------------------------------- + !> Summary: read qvec.dat and change allocations accordingly + !> Author: Philipp Ruessmann + !> Category: single-site, KKRhost, density + !> Deprecated: False + !> read q-points for qos calculation from qvec.dat file and reallocate arrays + !> that have the number of q-points + !------------------------------------------------------------------------------- + subroutine change_allocations_qdos(lmmaxd, kBdG, ielast, lmaxd1, nspin, korbit, lmmax0d, & + nqdos, qvec, gflle, den, denlm) + use mod_datatypes, only: dp + use mod_profiling, only: memocc + use mod_qdos, only: read_qvec + implicit none + integer, intent(in) :: lmmaxd, kBdG, ielast, lmaxd1, nspin, korbit, lmmax0d + integer, intent(out) :: nqdos + real (kind=dp), allocatable, intent(inout) :: qvec(:, :) + complex (kind=dp), allocatable, intent(inout) :: gflle(:, :, :, :) + complex (kind=dp), allocatable, intent(inout) :: den(:, :, :, :) + complex (kind=dp), allocatable, intent(inout) :: denlm(:, :, :, :) + ! local + integer :: i_all, i_stat + + ! Read BZ path for qdos calculation: + call read_qvec(qvec) + nqdos = size(qvec, dim=2) ! find number of qvec + + ! Change allocation for GFLLE to be suitabel for qdos run + i_all = -product(shape(gflle))*kind(gflle) + deallocate(gflle, stat=i_stat) + call memocc(i_stat, i_all, 'GFLLE', 'RHOVALNEW') + i_all = -product(shape(den))*kind(den) + deallocate(den, stat=i_stat) + call memocc(i_stat, i_all, 'DEN', 'RHOVALNEW') + i_all = -product(shape(denlm))*kind(denlm) + deallocate(denlm, stat=i_stat) + call memocc(i_stat, i_all, 'DENLM', 'RHOVALNEW') + + ! now reallocate with larger nqdos dimension + allocate(gflle(lmmaxd*(1+kBdG),lmmaxd*(1+kBdG),ielast,nqdos), stat=i_stat) + call memocc(i_stat, product(shape(gflle))*kind(gflle), 'GFLLE', 'RHOVALNEW') + allocate(den(0:lmaxd1,ielast,nqdos,nspin/(nspin-korbit)*(1+kBdG)**2), stat=i_stat) + call memocc(i_stat, product(shape(den))*kind(den), 'DEN', 'RHOVALNEW') + allocate(denlm(lmmax0d,ielast,nqdos,nspin/(nspin-korbit)*(1+kBdG)**2), stat=i_stat) + call memocc(i_stat, product(shape(denlm))*kind(denlm), 'DENLM', 'RHOVALNEW') + + end subroutine change_allocations_qdos + + + end module mod_rhovalnew diff --git a/source/KKRhost/tmat_newsolver.F90 b/source/KKRhost/tmat_newsolver.F90 index 30654c9dc..52ca490a3 100644 --- a/source/KKRhost/tmat_newsolver.F90 +++ b/source/KKRhost/tmat_newsolver.F90 @@ -65,7 +65,6 @@ contains use m_npy, only: save_npy use mod_BdG, only: t_BdG, reshape_BdG, reshape_pot_mat_BdG use mod_rll_global_solutions, only: rll_global_solutions - use mod_bfield, only: add_bfield implicit none @@ -837,164 +836,165 @@ contains ! local integer :: i_stat + character (len=*), parameter :: routine_name = 'allocate_locals_tmat_newsolver' if (allocmode==1) then ! allocate and initialize ! potential arrays allocate (vnspll(nsra*lmmaxd*(1+kBdG),nsra*lmmaxd*(1+kBdG),irmdnew,0:nth-1), stat=i_stat) - call memocc(i_stat, product(shape(vnspll))*kind(vnspll), 'VNSPLL', 'allocate_locals_tmat_newsolver') + call memocc(i_stat, product(shape(vnspll))*kind(vnspll), 'VNSPLL', routine_name) vnspll = czero allocate (vnspll0(lmmaxd,lmmaxd,irmdnew), stat=i_stat) - call memocc(i_stat, product(shape(vnspll0))*kind(vnspll0), 'VNSPLL0', 'allocate_locals_tmat_newsolver') + call memocc(i_stat, product(shape(vnspll0))*kind(vnspll0), 'VNSPLL0', routine_name) vnspll0 = czero allocate (vnspll1(lmmaxd*(1+kBdG),lmmaxd*(1+kBdG),irmdnew,0:nth-1), stat=i_stat) - call memocc(i_stat, product(shape(vnspll1))*kind(vnspll1), 'VNSPLL1', 'allocate_locals_tmat_newsolver') + call memocc(i_stat, product(shape(vnspll1))*kind(vnspll1), 'VNSPLL1', routine_name) vnspll1 = czero allocate (vnspll2(lmmaxd,lmmaxd,irmdnew), stat=i_stat) - call memocc(i_stat, product(shape(vnspll2))*kind(vnspll2), 'VNSPLL2', 'allocate_locals_tmat_newsolver') + call memocc(i_stat, product(shape(vnspll2))*kind(vnspll2), 'VNSPLL2', routine_name) vnspll2 = czero ! source terms (bessel and hankel functions) allocate (hlk(1:nsra*(1+korbit)*(lmax+1)*(1+kBdG),irmdnew,0:nth-1), stat=i_stat) - call memocc(i_stat, product(shape(hlk))*kind(hlk), 'HLK', 'allocate_locals_tmat_newsolver') + call memocc(i_stat, product(shape(hlk))*kind(hlk), 'HLK', routine_name) hlk = czero allocate (jlk(1:nsra*(1+korbit)*(lmax+1)*(1+kBdG),irmdnew,0:nth-1), stat=i_stat) - call memocc(i_stat, product(shape(jlk))*kind(jlk), 'JLK', 'allocate_locals_tmat_newsolver') + call memocc(i_stat, product(shape(jlk))*kind(jlk), 'JLK', routine_name) jlk = czero allocate (hlk2(1:nsra*(1+korbit)*(lmax+1)*(1+kBdG),irmdnew,0:nth-1), stat=i_stat) - call memocc(i_stat, product(shape(hlk2))*kind(hlk2), 'HLK2', 'allocate_locals_tmat_newsolver') + call memocc(i_stat, product(shape(hlk2))*kind(hlk2), 'HLK2', routine_name) hlk2 = czero allocate (jlk2(1:nsra*(1+korbit)*(lmax+1)*(1+kBdG),irmdnew,0:nth-1), stat=i_stat) - call memocc(i_stat, product(shape(jlk2))*kind(jlk2), 'JLK2', 'allocate_locals_tmat_newsolver') + call memocc(i_stat, product(shape(jlk2))*kind(jlk2), 'JLK2', routine_name) jlk2 = czero ! Spherical part of tmatrix (used with SRATRICK) allocate (tmatsph(nspin/(nspin-korbit)*(lmax+1)*(1+kBdG),0:nth-1), stat=i_stat) - call memocc(i_stat, product(shape(tmatsph))*kind(tmatsph), 'TMATSPH', 'allocate_locals_tmat_newsolver') + call memocc(i_stat, product(shape(tmatsph))*kind(tmatsph), 'TMATSPH', routine_name) tmatsph = czero ! Regular and irregular wavefunctions allocate (rll(nsra*lmmaxd*(1+kBdG),lmmaxd*(1+kBdG),irmdnew,0:nth-1), stat=i_stat) - call memocc(i_stat, product(shape(rll))*kind(rll), 'RLL', 'allocate_locals_tmat_newsolver') + call memocc(i_stat, product(shape(rll))*kind(rll), 'RLL', routine_name) rll = czero allocate (sll(nsra*lmmaxd*(1+kBdG),lmmaxd*(1+kBdG),irmdnew,0:nth-1), stat=i_stat) - call memocc(i_stat, product(shape(sll))*kind(sll), 'SLL', 'allocate_locals_tmat_newsolver') + call memocc(i_stat, product(shape(sll))*kind(sll), 'SLL', routine_name) sll = czero allocate (ull(nsra*lmmaxd,lmmaxd,irmdnew,0:nth-1), stat=i_stat) - call memocc(i_stat, product(shape(ull))*kind(ull), 'ULL', 'allocate_locals_tmat_newsolver') + call memocc(i_stat, product(shape(ull))*kind(ull), 'ULL', routine_name) ull = czero ! Left regular and irregular wavefunctions (used here only in case of XCPL or saving of left wavefunctions) if (calc_exchange_couplings .or. (t_wavefunctions%save_rllleft .or. t_wavefunctions%save_sllleft .or. write_rhoq_input) .or. calc_wronskian) then allocate (rllleft(nsra*lmmaxd*(1+kBdG),lmmaxd*(1+kBdG),irmdnew,0:nth-1), stat=i_stat) - call memocc(i_stat, product(shape(rllleft))*kind(rllleft), 'RLLLEFT', 'allocate_locals_tmat_newsolver') + call memocc(i_stat, product(shape(rllleft))*kind(rllleft), 'RLLLEFT', routine_name) rllleft = czero allocate (sllleft(nsra*lmmaxd*(1+kBdG),lmmaxd*(1+kBdG),irmdnew,0:nth-1), stat=i_stat) - call memocc(i_stat, product(shape(sllleft))*kind(sllleft), 'SLLLEFT', 'allocate_locals_tmat_newsolver') + call memocc(i_stat, product(shape(sllleft))*kind(sllleft), 'SLLLEFT', routine_name) sllleft = czero else allocate (rllleft(1,1,1,0:nth-1), stat=i_stat) - call memocc(i_stat, product(shape(rllleft))*kind(rllleft), 'RLLLEFT', 'allocate_locals_tmat_newsolver') + call memocc(i_stat, product(shape(rllleft))*kind(rllleft), 'RLLLEFT', routine_name) rllleft = czero allocate (sllleft(1,1,1,0:nth-1), stat=i_stat) - call memocc(i_stat, product(shape(sllleft))*kind(sllleft), 'SLLLEFT', 'allocate_locals_tmat_newsolver') + call memocc(i_stat, product(shape(sllleft))*kind(sllleft), 'SLLLEFT', routine_name) sllleft = czero end if ! ( calc_exchange_couplings .or. ... ) allocate (vins(irmdnew,lmpot,nspin), stat=i_stat) - call memocc(i_stat, product(shape(vins))*kind(vins), 'VINS', 'allocate_locals_tmat_newsolver') + call memocc(i_stat, product(shape(vins))*kind(vins), 'VINS', routine_name) vins = 0.0_dp allocate (aux(lmmaxd,lmmaxd), stat=i_stat) - call memocc(i_stat, product(shape(aux))*kind(aux), 'AUX', 'allocate_locals_tmat_newsolver') + call memocc(i_stat, product(shape(aux))*kind(aux), 'AUX', routine_name) aux = czero allocate (ipiv(lmmaxd), stat=i_stat) - call memocc(i_stat, product(shape(ipiv))*kind(ipiv), 'IPIV', 'allocate_locals_tmat_newsolver') + call memocc(i_stat, product(shape(ipiv))*kind(ipiv), 'IPIV', routine_name) ipiv = 0 allocate (tmat0(lmmaxd*(1+kBdG),lmmaxd*(1+kBdG)), stat=i_stat) - call memocc(i_stat, product(shape(tmat0))*kind(tmat0), 'TMAT0', 'allocate_locals_tmat_newsolver') + call memocc(i_stat, product(shape(tmat0))*kind(tmat0), 'TMAT0', routine_name) tmat0 = czero allocate (tmatll(lmmaxd*(1+kBdG),lmmaxd*(1+kBdG)), stat=i_stat) - call memocc(i_stat, product(shape(tmatll))*kind(tmatll), 'TMATLL', 'allocate_locals_tmat_newsolver') + call memocc(i_stat, product(shape(tmatll))*kind(tmatll), 'TMATLL', routine_name) tmatll = czero allocate (alpha0(lmmaxd*(1+kBdG),lmmaxd*(1+kBdG)), stat=i_stat) - call memocc(i_stat, product(shape(alpha0))*kind(alpha0), 'ALPHA0', 'allocate_locals_tmat_newsolver') + call memocc(i_stat, product(shape(alpha0))*kind(alpha0), 'ALPHA0', routine_name) alpha0 = czero allocate (dtmatll(lmmaxd,lmmaxd), stat=i_stat) - call memocc(i_stat, product(shape(dtmatll))*kind(dtmatll), 'DTMATLL', 'allocate_locals_tmat_newsolver') + call memocc(i_stat, product(shape(dtmatll))*kind(dtmatll), 'DTMATLL', routine_name) dtmatll = czero allocate (alphall(lmmaxd,lmmaxd), stat=i_stat) - call memocc(i_stat, product(shape(alphall))*kind(alphall), 'ALPHALL', 'allocate_locals_tmat_newsolver') + call memocc(i_stat, product(shape(alphall))*kind(alphall), 'ALPHALL', routine_name) alphall = czero allocate (dalphall(lmmaxd,lmmaxd), stat=i_stat) - call memocc(i_stat, product(shape(dalphall))*kind(dalphall), 'DALPHALL', 'allocate_locals_tmat_newsolver') + call memocc(i_stat, product(shape(dalphall))*kind(dalphall), 'DALPHALL', routine_name) dalphall = czero allocate (jlk_index(nsra*lmmaxd*(1+kBdG)), stat=i_stat) - call memocc(i_stat, product(shape(jlk_index))*kind(jlk_index), 'JLK_INDEX', 'allocate_locals_tmat_newsolver') + call memocc(i_stat, product(shape(jlk_index))*kind(jlk_index), 'JLK_INDEX', routine_name) jlk_index = 0 else ! allocmode/=1: deallocate arrays deallocate (vnspll, stat=i_stat) - call memocc(i_stat, -product(shape(vnspll))*kind(vnspll), 'VNSPLL', 'allocate_locals_tmat_newsolver') + call memocc(i_stat, -product(shape(vnspll))*kind(vnspll), 'VNSPLL', routine_name) deallocate (vnspll0, stat=i_stat) - call memocc(i_stat, -product(shape(vnspll0))*kind(vnspll0), 'VNSPLL0', 'allocate_locals_tmat_newsolver') + call memocc(i_stat, -product(shape(vnspll0))*kind(vnspll0), 'VNSPLL0', routine_name) deallocate (vnspll1, stat=i_stat) - call memocc(i_stat, -product(shape(vnspll1))*kind(vnspll1), 'VNSPLL1', 'allocate_locals_tmat_newsolver') + call memocc(i_stat, -product(shape(vnspll1))*kind(vnspll1), 'VNSPLL1', routine_name) deallocate (vnspll2, stat=i_stat) - call memocc(i_stat, -product(shape(vnspll2))*kind(vnspll2), 'VNSPLL1', 'allocate_locals_tmat_newsolver') + call memocc(i_stat, -product(shape(vnspll2))*kind(vnspll2), 'VNSPLL1', routine_name) deallocate (hlk, stat=i_stat) - call memocc(i_stat, -product(shape(hlk))*kind(hlk), 'HLK', 'allocate_locals_tmat_newsolver') + call memocc(i_stat, -product(shape(hlk))*kind(hlk), 'HLK', routine_name) deallocate (jlk, stat=i_stat) - call memocc(i_stat, -product(shape(jlk))*kind(jlk), 'JLK', 'allocate_locals_tmat_newsolver') + call memocc(i_stat, -product(shape(jlk))*kind(jlk), 'JLK', routine_name) deallocate (hlk2, stat=i_stat) - call memocc(i_stat, -product(shape(hlk2))*kind(hlk2), 'HLK2', 'allocate_locals_tmat_newsolver') + call memocc(i_stat, -product(shape(hlk2))*kind(hlk2), 'HLK2', routine_name) deallocate (jlk2, stat=i_stat) - call memocc(i_stat, -product(shape(jlk2))*kind(jlk2), 'JLK2', 'allocate_locals_tmat_newsolver') + call memocc(i_stat, -product(shape(jlk2))*kind(jlk2), 'JLK2', routine_name) deallocate (tmatsph, stat=i_stat) - call memocc(i_stat, -product(shape(tmatsph))*kind(tmatsph), 'TMATSPH', 'allocate_locals_tmat_newsolver') + call memocc(i_stat, -product(shape(tmatsph))*kind(tmatsph), 'TMATSPH', routine_name) deallocate (ull, stat=i_stat) - call memocc(i_stat, -product(shape(ull))*kind(ull), 'ULL', 'allocate_locals_tmat_newsolver') + call memocc(i_stat, -product(shape(ull))*kind(ull), 'ULL', routine_name) deallocate (rll, stat=i_stat) - call memocc(i_stat, -product(shape(rll))*kind(rll), 'RLL', 'allocate_locals_tmat_newsolver') + call memocc(i_stat, -product(shape(rll))*kind(rll), 'RLL', routine_name) deallocate (sll, stat=i_stat) - call memocc(i_stat, -product(shape(sll))*kind(sll), 'SLL', 'allocate_locals_tmat_newsolver') + call memocc(i_stat, -product(shape(sll))*kind(sll), 'SLL', routine_name) if (calc_exchange_couplings .or. (t_wavefunctions%save_rllleft .or. t_wavefunctions%save_sllleft .or. write_rhoq_input)) then deallocate (rllleft, stat=i_stat) - call memocc(i_stat, -product(shape(rllleft))*kind(rllleft), 'RLLLEFT', 'allocate_locals_tmat_newsolver') + call memocc(i_stat, -product(shape(rllleft))*kind(rllleft), 'RLLLEFT', routine_name) deallocate (sllleft, stat=i_stat) - call memocc(i_stat, -product(shape(sllleft))*kind(sllleft), 'SLLLEFT', 'allocate_locals_tmat_newsolver') + call memocc(i_stat, -product(shape(sllleft))*kind(sllleft), 'SLLLEFT', routine_name) else deallocate (rllleft, stat=i_stat) - call memocc(i_stat, -product(shape(rllleft))*kind(rllleft), 'RLLLEFT', 'allocate_locals_tmat_newsolver') + call memocc(i_stat, -product(shape(rllleft))*kind(rllleft), 'RLLLEFT', routine_name) deallocate (sllleft, stat=i_stat) - call memocc(i_stat, -product(shape(sllleft))*kind(sllleft), 'SLLLEFT', 'allocate_locals_tmat_newsolver') + call memocc(i_stat, -product(shape(sllleft))*kind(sllleft), 'SLLLEFT', routine_name) end if ! ( calc_exchange_couplings .or. ... ) deallocate (vins, stat=i_stat) - call memocc(i_stat, -product(shape(vins))*kind(vins), 'VINS', 'allocate_locals_tmat_newsolver') + call memocc(i_stat, -product(shape(vins))*kind(vins), 'VINS', routine_name) deallocate (aux, stat=i_stat) - call memocc(i_stat, -product(shape(aux))*kind(aux), 'AUX', 'allocate_locals_tmat_newsolver') + call memocc(i_stat, -product(shape(aux))*kind(aux), 'AUX', routine_name) deallocate (ipiv, stat=i_stat) - call memocc(i_stat, -product(shape(ipiv))*kind(ipiv), 'IPIV', 'allocate_locals_tmat_newsolver') + call memocc(i_stat, -product(shape(ipiv))*kind(ipiv), 'IPIV', routine_name) deallocate (tmat0, stat=i_stat) - call memocc(i_stat, -product(shape(tmat0))*kind(tmat0), 'TMAT0', 'allocate_locals_tmat_newsolver') + call memocc(i_stat, -product(shape(tmat0))*kind(tmat0), 'TMAT0', routine_name) deallocate (tmatll, stat=i_stat) - call memocc(i_stat, -product(shape(tmatll))*kind(tmatll), 'TMATLL', 'allocate_locals_tmat_newsolver') + call memocc(i_stat, -product(shape(tmatll))*kind(tmatll), 'TMATLL', routine_name) deallocate (alpha0, stat=i_stat) - call memocc(i_stat, -product(shape(alpha0))*kind(alpha0), 'ALPHA0', 'allocate_locals_tmat_newsolver') + call memocc(i_stat, -product(shape(alpha0))*kind(alpha0), 'ALPHA0', routine_name) deallocate (dtmatll, stat=i_stat) - call memocc(i_stat, -product(shape(dtmatll))*kind(dtmatll), 'DTMATLL', 'allocate_locals_tmat_newsolver') + call memocc(i_stat, -product(shape(dtmatll))*kind(dtmatll), 'DTMATLL', routine_name) deallocate (alphall, stat=i_stat) - call memocc(i_stat, -product(shape(alphall))*kind(alphall), 'ALPHALL', 'allocate_locals_tmat_newsolver') + call memocc(i_stat, -product(shape(alphall))*kind(alphall), 'ALPHALL', routine_name) deallocate (dalphall, stat=i_stat) - call memocc(i_stat, -product(shape(dalphall))*kind(dalphall), 'DALPHALL', 'allocate_locals_tmat_newsolver') + call memocc(i_stat, -product(shape(dalphall))*kind(dalphall), 'DALPHALL', routine_name) deallocate (jlk_index, stat=i_stat) - call memocc(i_stat, -product(shape(jlk_index))*kind(jlk_index), 'JLK_INDEX', 'allocate_locals_tmat_newsolver') + call memocc(i_stat, -product(shape(jlk_index))*kind(jlk_index), 'JLK_INDEX', routine_name) end if ! allocmode ==1 or /=1 @@ -1058,12 +1058,14 @@ contains ! Add magnetic field if ( t_params%bfield%lbfield .or. t_params%bfield%lbfield_constr ) then + write(*,*) 'apply magnetic field to potential' imt1 = ipan_intervall(t_params%npan_log+t_params%npan_eq) + 1 call add_bfield(t_params%bfield, i1, lmax, nspin, irmdnew, imt1, iend, ncheb, theta, phi, t_params%ifunm1(:, t_params%ntcell(i1)), & - t_params%icleb, t_params%cleb(:,1), t_params%thetasnew(1:irmdnew, :, t_params%ntcell(i1)), '1', vnspll2(:, :, :), & - vnspll1(1:lmmaxd,1:lmmaxd, :, ith), t_params%bfield%thetallmat(:, :, 1:irmdnew, t_params%ntcell(i1))) - else - vnspll1(1:lmmaxd,1:lmmaxd, 1:irmdnew, ith) = vnspll2(1:lmmaxd,1:lmmaxd, 1:irmdnew) + t_params%icleb, t_params%cleb(:,1), t_params%thetasnew(1:irmdnew, :, t_params%ntcell(i1)), '1', & + vnspll1(1:lmmaxd,1:lmmaxd, :, ith), vnspll2(:, :, :), & ! input and output potentials (not replace in-place) + t_params%bfield%thetallmat(:, :, 1:irmdnew, t_params%ntcell(i1))) + ! copy changed array back because add_bfield does not apply the change in-place + vnspll1(1:lmmaxd,1:lmmaxd, 1:irmdnew, ith) = vnspll2(1:lmmaxd,1:lmmaxd, 1:irmdnew) end if ! bfield ! add SOC for hole part at eryd=2*ef-eryd @@ -1071,20 +1073,21 @@ contains call spinorbit_ham(lmax, lmmax0d, vins, rnew, 2*efermi_in-conjg(eryd), zat, cvlight, socscale, nspin, lmpot, theta, phi, ipan_intervall, rpan_intervall, npan_tot, ncheb, irmdnew, nrmaxd, & vnspll0(:,:,:), vnspll1(lmmaxd+1:2*lmmaxd,lmmaxd+1:2*lmmaxd,:,ith), mode) - ! Add magnetic field + ! Add magnetic field in hole block of potential (not the different part of the matrix vnspll1) if ( t_params%bfield%lbfield .or. t_params%bfield%lbfield_constr ) then imt1 = ipan_intervall(t_params%npan_log+t_params%npan_eq) + 1 call add_bfield(t_params%bfield, i1, lmax, nspin, irmdnew, imt1, iend, ncheb, theta, phi, t_params%ifunm1(:, t_params%ntcell(i1)), & - t_params%icleb, t_params%cleb(:,1), t_params%thetasnew(1:irmdnew, :, t_params%ntcell(i1)), '1', vnspll2(:, :, :), & - vnspll1(lmmaxd+1:2*lmmaxd,lmmaxd+1:2*lmmaxd, :, ith), t_params%bfield%thetallmat(:, :, 1:irmdnew, t_params%ntcell(i1))) - else + t_params%icleb, t_params%cleb(:,1), t_params%thetasnew(1:irmdnew, :, t_params%ntcell(i1)), '1', & + vnspll1(lmmaxd+1:2*lmmaxd,lmmaxd+1:2*lmmaxd, :, ith), vnspll2(:, :, :), & ! input and output potentials (not replace in-place) + t_params%bfield%thetallmat(:, :, 1:irmdnew, t_params%ntcell(i1))) + ! copy changed array back because add_bfield does not apply the change in-place vnspll1(lmmaxd+1:2*lmmaxd,lmmaxd+1:2*lmmaxd, 1:irmdnew, ith) = vnspll2(1:lmmaxd,1:lmmaxd, 1:irmdnew) end if ! bfield end if ! kBdG>0 ! clean up working array for bfield - deallocate(vnspll2) + deallocate(vnspll2) else ! decouple_spin_cheby @@ -1093,6 +1096,8 @@ contains end if! decouple_spin_cheby + ! now vnspll1 contains the proper potential with BdG doubling + ! test writeout of VNSPLL1 if (write_BdG_tests .and. check_custom_testopt('write_potential')) then @@ -1199,6 +1204,18 @@ contains else use_fullgmat = 1 end if + + write (filename, '(A)') 'rll_source_input.dat' + open (888888, file=trim(filename), form='formatted') + write (888888, *) 'dims', nsra, nvec, eryd, rnew, irmdnew, nrmaxd, lmax, lmmaxd, use_fullgmat + write (888888, *) 'jlk_index', jlk_index + write (888888, *) 'hlk', hlk(:,:,ith) + write (888888, *) 'jlk', jlk(:,:,ith) + write (888888, *) 'hlk2', hlk2(:,:,ith) + write (888888, *) 'jlk2', jlk2(:,:,ith) + write (888888, *) 'gmatprefac', gmatprefactor + close (888888) + call rllsllsourceterms(nsra, nvec, eryd, rnew, irmdnew, nrmaxd, lmax, lmmaxd, use_fullgmat, jlk_index, hlk(:,:,ith), jlk(:,:,ith), hlk2(:,:,ith), jlk2(:,:,ith), gmatprefactor) if (check_custom_testopt('gmatprefac1')) gmatprefactor(:) = (1.0_dp, 0.0_dp) @@ -1226,6 +1243,7 @@ contains write (888888, '(A,I9,A,I9,A,2ES15.7)') '# dimension: 4*(LMAX+1)=', 4*(lmax+1)*(1+kBdG), ' IRMDNEW=', irmdnew, ' ; ERYD=', eryd write (888888, '(2ES21.9)') sum(hlk2(:, :, ith), 2) close (888888) + stop 'done writing rll source' end if #ifdef CPP_OMP !$omp end critical diff --git a/source/common/bfield.f90 b/source/common/bfield.f90 index f6d1f024f..d7f30a246 100644 --- a/source/common/bfield.f90 +++ b/source/common/bfield.f90 @@ -358,24 +358,24 @@ contains implicit none - type(type_bfield) , intent (in) :: bfield ! contains all relevant information about the magnetic fields - integer , intent (in) :: iatom ! atom index - integer , intent (in) :: lmax ! angular momentum cut-off - integer , intent (in) :: nspin ! number of spins - integer , intent (in) :: irmdnew ! number of radials point on the Cheby mesh - integer , intent (in) :: imt1 ! index muffin-tin radius - integer , intent (in) :: iend ! Number of nonzero gaunt coefficients - integer , intent (in) :: ncheb ! Number of Chebychev pannels for the new solver - real (kind=dp) , intent (in) :: theta ! polar angle of the magnetic moment - real (kind=dp) , intent (in) :: phi ! azimuthal angle of the magnetic moment - integer , dimension (1:(2*lmax+1)**2) , intent (in) :: ifunm ! pointer array for shapefun ! Check index and dimensions!!!!! - integer, dimension (ncleb, 4), intent (in) :: icleb !! Pointer array - real (kind=dp), dimension (ncleb), intent (in) :: cleb !! GAUNT coefficients (GAUNT) ! CHECK THE DIMENSION AND HOW IT IS USED!!! - real (kind=dp) , dimension (irmdnew, nfund) , intent (in) :: thetasnew ! shapefun on the Cheby mesh - character (len=*) , intent (in) :: mode !! either '1' or 'transpose', depending whether SOC potential is constructed for right or left solution - complex (kind=dp) , dimension(lmmaxd, lmmaxd, irmdnew) , intent (in) :: vnspll0 !! input potential in (l,m,s) basis - complex (kind=dp) , dimension(lmmaxd, lmmaxd, irmdnew) , intent (out) :: vnspll1 !! input potential in (l,m,s) basis - real(kind=dp) , dimension(1:(lmax+1)**2,1:(lmax+1)**2,1:irmdnew), intent(in) :: thetansll + type(type_bfield) , intent (in) :: bfield !! contains all relevant information about the magnetic fields + integer , intent (in) :: iatom !! atom index + integer , intent (in) :: lmax !! angular momentum cut-off + integer , intent (in) :: nspin !! number of spins + integer , intent (in) :: irmdnew !! number of radials point on the Cheby mesh + integer , intent (in) :: imt1 !! index muffin-tin radius + integer , intent (in) :: iend !! Number of nonzero gaunt coefficients + integer , intent (in) :: ncheb !! Number of Chebychev pannels for the new solver + real (kind=dp) , intent (in) :: theta !! polar angle of the magnetic moment + real (kind=dp) , intent (in) :: phi !! azimuthal angle of the magnetic moment + integer , dimension (1:(2*lmax+1)**2), intent (in) :: ifunm !! pointer array for shapefun ! Check index and dimensions!!!!! + integer, dimension (ncleb, 4), intent (in) :: icleb !! Pointer array + real (kind=dp), dimension (ncleb), intent (in) :: cleb !! GAUNT coefficients (GAUNT) ! CHECK THE DIMENSION AND HOW IT IS USED!!! + real (kind=dp) , dimension (irmdnew, nfund) , intent (in) :: thetasnew !! shapefun on the Cheby mesh + character (len=*) , intent (in) :: mode !! either '1' or 'transpose', depending whether SOC potential is constructed for right or left solution + complex (kind=dp) , dimension(lmmaxd, lmmaxd, irmdnew) , intent (in) :: vnspll0 !! input potential in (l,m,s) basis + complex (kind=dp) , dimension(lmmaxd, lmmaxd, irmdnew) , intent (out) :: vnspll1 !! input potential in (l,m,s) basis + real(kind=dp) , dimension(1:(lmax+1)**2,1:(lmax+1)**2,1:irmdnew), intent(in) :: thetansll !! !------------------------------------------------------------------------------------ ! local variables !------------------------------------------------------------------------------------ diff --git a/source/common/radial_solver_Chebychev/calcsph.f90 b/source/common/radial_solver_Chebychev/calcsph.f90 index 0fa1a7361..2675b9ab1 100644 --- a/source/common/radial_solver_Chebychev/calcsph.f90 +++ b/source/common/radial_solver_Chebychev/calcsph.f90 @@ -132,20 +132,9 @@ contains end if tmattemp = (0e0_dp, 0e0_dp) alphatemp = (0e0_dp, 0e0_dp) - ! if (i_BdG>0) jlktemp = conjg(jlktemp) - ! if (i_BdG>0) jlk2temp = conjg(jlk2temp) - !if (i_BdG>0) hlktemp = -conjg(hlktemp) - !if (i_BdG>0) hlk2temp = -conjg(hlk2temp) - !if (i_BdG>0) hlk2temp = hlktemp - !if (i_BdG>0) jlk2temp = jlktemp - !if (i_BdG>0) vll = -conjg(vll) - !write(*,*) 'vll', vll(1,1,10) - ! if (lval==0) write(*,*) 'sph rll1', hlktemp(1,10), jlktemp(1,10), hlk2temp(1,10), jlk2temp(1,10), use_sratrick call rllsll(rpan_intervall, rnew, vll, rlltemp, slltemp, tmattemp, ncheb, npan_tot, lmsize, lmsize2, nvec, irmdnew, nvec, jlk_indextemp, hlktemp, jlktemp, hlk2temp, & jlk2temp, gmatprefactor, use_sratrick, alphatemp, .true., .true., .true.) ! last three: calcrll/calcsll/calctmat - ! if (lval==0) write(*,*) 'sph rll2', rlltemp(1,1,10), slltemp(1,1,10),tmattemp - do ir = 1, irmdnew hlknew(lspin+lval+1, ir) = slltemp(1, 1, ir)/rnew(ir) jlknew(lspin+lval+1, ir) = rlltemp(1, 1, ir)/rnew(ir) diff --git a/source/common/radial_solver_Chebychev/create_newmesh.f90 b/source/common/radial_solver_Chebychev/create_newmesh.f90 index c440e471b..dcaea3dcb 100644 --- a/source/common/radial_solver_Chebychev/create_newmesh.f90 +++ b/source/common/radial_solver_Chebychev/create_newmesh.f90 @@ -33,9 +33,10 @@ contains subroutine create_newmesh(natyp, irm, ipand, irid, ntotd, nfund, ncheb, irmdnew, rmesh, irmin, ipan, ircut, r_log, npan_log, npan_eq, npan_log_at, npan_eq_at, npan_tot, rnew, & rpan_intervall, ipan_intervall, ncelld, ntcell, thetas, thetasnew) ! last three: optional arguments - use :: mod_profiling, only: memocc - use :: mod_interpolspline, only: interpolspline - use :: mod_datatypes, only: dp + use mod_profiling, only: memocc + use mod_interpolspline, only: interpolspline + use mod_datatypes, only: dp + use mod_runoptions, only: check_custom_testopt implicit none ! .. Input variables @@ -104,7 +105,11 @@ contains ! log panel rmin = rmesh(2, i1) rmax = r_log - fac = (rmax/rmin)**(1.d0/npan_log) + if (check_custom_testopt('newmesh_old_fac')) then + fac = 2.0_dp + else + fac = (rmax/rmin)**(1.d0/npan_log) + end if rval = 0e0_dp ishift = 0 if (r_log>rmesh(irmin(i1),i1)) then @@ -162,10 +167,6 @@ contains call chebmesh(npan_tot(i1), ncheb, rpan_intervall(0:,i1), rnew(1,i1)) -! do ir2 = 1,50 -! write(6,*) ir2, rnew(ir2,i1) -! end do - ! do interpolation only when optional arguments are given if (present(ntcell)) then ! interpolate shape function THETAS to new shape function THETASNEW @@ -183,10 +184,8 @@ contains call interpolspline(rmesh(imin:imax,i1), rnew(iminnew:imaxnew,i1), thetasin(imin-ircut(1,i1):imax-ircut(1,i1),lm1,icell), thetasnew(iminnew:imaxnew,lm1,icell), imax-imin+1, & imaxnew-iminnew+1) end do - end do - end if ! present(ntcell) end do ! i1 diff --git a/source/common/radial_solver_Chebychev/rll_global_solutions.F90 b/source/common/radial_solver_Chebychev/rll_global_solutions.F90 index b7d01afe2..261aec6f4 100644 --- a/source/common/radial_solver_Chebychev/rll_global_solutions.F90 +++ b/source/common/radial_solver_Chebychev/rll_global_solutions.F90 @@ -48,7 +48,7 @@ contains integer :: lbessel, use_sratrick1 ! dimensions etc. ! running indices - integer :: lm1, lm2 + integer :: l1, lm1, lm2 integer :: info, icheb, ipan, mn, nm ! source terms @@ -67,9 +67,9 @@ contains complex (kind=dp), allocatable :: work(:, :), allp(:, :, :), bllp(:, :, :), & ! eq. 5.9, 5.10 for reg. sol mrnvy(:, :, :), mrnvz(:, :, :), & ! *************** mrjvy(:, :, :), mrjvz(:, :, :) ! eq. 5.19-5.22 + complex (kind=dp), allocatable :: hlk_tmp(:, :), jlk_tmp(:, :), hlk2_tmp(:, :), jlk2_tmp(:, :) ! working arrays complex (kind=dp), allocatable :: yrf(:, :, :, :), & ! source terms (different array - zrf(:, :, :, :) ! ordering) - ! chebyshev arrays + zrf(:, :, :, :) ! ordering) chebyshev arrays real (kind=dp) :: c1(0:ncheb, 0:ncheb), rpanbound(0:npan), drpan2 real (kind=dp) :: cslc1(0:ncheb, 0:ncheb), & ! Integration matrix from left ( C*S_L*C^-1 in eq. 5.53) csrc1(0:ncheb, 0:ncheb), & ! Same from right ( C*S_R*C^-1 in eq. 5.54) @@ -99,11 +99,11 @@ contains ! if the SRA approximation is used: ! matrix A ( C 0) (same as in eq. 5.68) - ! ( B 1) + ! ( B 1) ! (C, B are matricies here) ! inverse of A is (C^-1 0 ) - ! (-B C^-1 1 ) + ! (-B C^-1 1 ) ! Thus, it is sufficient to only inverse the matrix C which saves computational ! time. This is refered to as the SRA trick. ! *********************************************************************** @@ -128,13 +128,13 @@ contains call chebint(cslc1, csrc1, slc1sum, c1, ncheb) - allocate (mrnvy(lmsize,lmsize,npan), mrnvz(lmsize,lmsize,npan)) - allocate (mrjvy(lmsize,lmsize,npan), mrjvz(lmsize,lmsize,npan)) - allocate (yrf(lmsize2,lmsize,0:ncheb,npan)) - allocate (zrf(lmsize2,lmsize,0:ncheb,npan)) + allocate(mrnvy(lmsize,lmsize,npan), mrnvz(lmsize,lmsize,npan)) + allocate(mrjvy(lmsize,lmsize,npan), mrjvz(lmsize,lmsize,npan)) + allocate(yrf(lmsize2,lmsize,0:ncheb,npan)) + allocate(zrf(lmsize2,lmsize,0:ncheb,npan)) - allocate (work(lmsize,lmsize)) - allocate (allp(lmsize,lmsize,0:npan), bllp(lmsize,lmsize,0:npan)) + allocate(work(lmsize,lmsize)) + allocate(allp(lmsize,lmsize,0:npan), bllp(lmsize,lmsize,0:npan)) if (idotime) call timing_start('local') @@ -143,28 +143,49 @@ contains ! $OMP& SHARED(tau,npan,drpan2,rpanbound,mrnvy,mrnvz,mrjvy,mrjvz,yrf) & ! $OMP& SHARED(zrf,nvec,lmsize,lmsize2,ncheb,jlk,jlk2,jlk_index,vll) & ! $OMP& SHARED(gmatprefactor,hlk,hlk2,cslc1,csrc1,slc1sum) & - ! $OMP& SHARED(use_sratrick, rmesh) + ! $OMP& SHARED(use_sratrick1, rmesh) & + ! $omp& firstprivate(idotime) thread_id = omp_get_thread_num() + + ! deactivate timing on all other threads except the main thread + if (idotime .and. thread_id/=0) idotime = .false. #endif + ! prefetch jlk and hlk to help compiler vectorize + allocate(hlk_tmp(lmsize*nvec, nrmax), jlk_tmp(lmsize*nvec, nrmax)) + allocate(hlk2_tmp(lmsize*nvec, nrmax), jlk2_tmp(lmsize*nvec, nrmax)) + do lm1 = 1, lmsize*nvec + l1 = jlk_index(lm1) + jlk_tmp(lm1, :) = jlk(l1,:) + hlk_tmp(lm1, :) = hlk(l1,:) + jlk2_tmp(lm1, :) = jlk2(l1,:) + hlk2_tmp(lm1, :) = hlk2(l1,:) + end do + ! loop over subintervals #ifdef CPP_HYBRID ! openMP pragmas added sachin, parallel region starts earlier to get allocations of arrays right - ! $OMP DO + ! $OMP DO schedule (dynamic) #endif do ipan = 1, npan drpan2 = (rpanbound(ipan)-rpanbound(ipan-1))/2.0_dp ! *(b-a)/2 in eq. 5.53, 5.54 call rll_local_solutions(vll, tau(0,ipan), drpan2, cslc1, slc1sum, mrnvy(1,1,ipan), & mrnvz(1,1,ipan), mrjvy(1,1,ipan), mrjvz(1,1,ipan), yrf(1,1,0,ipan), zrf(1,1,0,ipan), & - ncheb, ipan, lmsize, lmsize2, nrmax, nvec, hlk, jlk, hlk2, & - jlk2, gmatprefactor, use_sratrick1, idotime) + ncheb, ipan, lmsize, lmsize2, nrmax, nvec, hlk_tmp, jlk_tmp, hlk2_tmp, & + jlk2_tmp, gmatprefactor, use_sratrick1, idotime) end do ! ipan #ifdef CPP_HYBRID ! $OMP END DO +#endif + + ! clean working arrays + deallocate(hlk_tmp, jlk_tmp, jlk2_tmp, hlk2_tmp) + +#ifdef CPP_HYBRID ! $OMP END PARALLEL #endif ! end the big loop over the subintervals @@ -249,7 +270,7 @@ contains if (idotime) call timing_stop('checknan') if (idotime) call timing_stop('rll-glob') - deallocate (work, allp, bllp, mrnvy, mrnvz, mrjvy, mrjvz, yrf, zrf, stat=ierror) + deallocate(work, allp, bllp, mrnvy, mrnvz, mrjvy, mrjvz, yrf, zrf, stat=ierror) if (ierror/=0) stop '[rll-glob] ERROR in deallocating arrays' end subroutine rll_global_solutions diff --git a/source/common/radial_solver_Chebychev/rllsll.F90 b/source/common/radial_solver_Chebychev/rllsll.F90 index f435fd705..b282fde21 100644 --- a/source/common/radial_solver_Chebychev/rllsll.F90 +++ b/source/common/radial_solver_Chebychev/rllsll.F90 @@ -282,7 +282,7 @@ contains #endif call sll_local_solutions(vll, tau(0,ipan), drpan2, csrc1, slc1sum, & mihvy(1,1,ipan), mihvz(1,1,ipan), mijvy(1,1,ipan), mijvz(1,1,ipan), & - yif(1,1,0,ipan), zif(1,1,0,ipan), ncheb, ipan, lmsize, lmsize2, nrmax, nvec, & + yif(1,1,0,ipan), zif(1,1,0,ipan), ncheb, ipan, lmsize, lmsize2, nrmax, nvec, & hlk_tmp, jlk_tmp, hlk2_tmp, jlk2_tmp, gmatprefactor, use_sratrick, idotime) #if defined(CPP_HYBRID) !$omp end task diff --git a/source/common/radial_solver_Chebychev/rllsllsourceterms.f90 b/source/common/radial_solver_Chebychev/rllsllsourceterms.f90 index 3842f580c..77c02aa05 100644 --- a/source/common/radial_solver_Chebychev/rllsllsourceterms.f90 +++ b/source/common/radial_solver_Chebychev/rllsllsourceterms.f90 @@ -40,8 +40,7 @@ contains use mod_beshank, only: beshank, beshank_smallcomp use global_variables, only: korbit, kBdG, efermi_in use mod_BdG, only: t_BdG - use mod_runoptions, only: nosmallcomp - use mod_runoptions, only: check_custom_testopt + use mod_runoptions, only: nosmallcomp, check_custom_testopt implicit none diff --git a/source/common/radial_solver_Chebychev/sll_global_solutions.F90 b/source/common/radial_solver_Chebychev/sll_global_solutions.F90 index 27f079a34..195887b53 100644 --- a/source/common/radial_solver_Chebychev/sll_global_solutions.F90 +++ b/source/common/radial_solver_Chebychev/sll_global_solutions.F90 @@ -49,8 +49,8 @@ contains integer :: lbessel, use_sratrick1 ! dimensions etc., needed only for host code interface ! running indices - integer :: lm1, lm2 - integer :: icheb, ipan, mn, nm + integer :: l1, lm1, lm2 + integer :: icheb, ipan, mn integer :: info, ipiv(lmsize) integer :: iter_beta, niter_beta @@ -58,9 +58,11 @@ contains complex (kind=dp), intent(in) :: gmatprefactor(1+kBdG) ! prefactor of green function (non-rel: = kappa = sqrt e) ! non-rel: = kappa = sqrt e complex (kind=dp) :: hlk(lbessel, nrmax), jlk(lbessel, nrmax), hlk2(lbessel, nrmax), jlk2(lbessel, nrmax) + complex (kind=dp), allocatable :: hlk_tmp(:, :), jlk_tmp(:, :), hlk2_tmp(:, :), jlk2_tmp(:, :) ! working arrays integer :: jlk_index(2*lmsize) + ! quadruple precision arrays complex*32, allocatable :: qcllp(:, :, :), qdllp(:, :, :) complex*32, allocatable :: qmihvy(:, :), qmihvz(:, :), qmijvy(:, :), qmijvz(:, :) complex*32, allocatable :: qyif(:, :, :) @@ -108,11 +110,11 @@ contains ! if the SRA approximation is used: ! matrix A ( C 1) (same as in eq. 5.68) - ! ( B 0) + ! ( B 0) ! (C, B are matricies here) ! inverse of A is (C^-1 0 ) - ! (-B C^-1 1 ) + ! (-B C^-1 1 ) ! Thus, it is sufficient to only inverse the matrix C which saves computational ! time. This is refered to as the SRA trick. ! *********************************************************************** @@ -127,14 +129,14 @@ contains #endif if (idotime) call timing_start('sll-glob') - allocate (mihvy(lmsize,lmsize,npan), mihvz(lmsize,lmsize,npan)) - allocate (mijvy(lmsize,lmsize,npan), mijvz(lmsize,lmsize,npan)) - allocate (yif(lmsize2,lmsize,0:ncheb,npan)) - allocate (zif(lmsize2,lmsize,0:ncheb,npan)) - allocate (betainv(lmsize,lmsize), betainv_save(lmsize,lmsize)) - allocate (cllp(lmsize,lmsize,0:npan), dllp(lmsize,lmsize,0:npan)) - allocate (cllptemp(lmsize,lmsize), dllptemp(lmsize,lmsize)) - allocate (work(lmsize,lmsize)) + allocate(mihvy(lmsize,lmsize,npan), mihvz(lmsize,lmsize,npan)) + allocate(mijvy(lmsize,lmsize,npan), mijvz(lmsize,lmsize,npan)) + allocate(yif(lmsize2,lmsize,0:ncheb,npan)) + allocate(zif(lmsize2,lmsize,0:ncheb,npan)) + allocate(betainv(lmsize,lmsize), betainv_save(lmsize,lmsize)) + allocate(cllp(lmsize,lmsize,0:npan), dllp(lmsize,lmsize,0:npan)) + allocate(cllptemp(lmsize,lmsize), dllptemp(lmsize,lmsize)) + allocate(work(lmsize,lmsize)) do ipan = 1, npan do icheb = 0, ncheb @@ -152,28 +154,49 @@ contains ! $OMP& SHARED(tau,npan,drpan2,rpanbound,mihvy,mihvz,mijvy,mijvz,yif) & ! $OMP& SHARED(zif,nvec,lmsize,lmsize2,ncheb,jlk,jlk2,jlk_index) & ! $OMP& SHARED(vll,gmatprefactor,hlk,hlk2,cslc1,csrc1,slc1sum) & - ! $OMP& SHARED(use_sratrick, rmesh) + ! $OMP& SHARED(use_sratrick, rmesh) & + ! $omp& firstprivate(idotime) thread_id = omp_get_thread_num() + + ! deactivate timing on all other threads except the main thread + if (idotime .and. thread_id/=0) idotime = .false. #endif + ! prefetch jlk and hlk to help compiler vectorize + allocate(hlk_tmp(lmsize*nvec, nrmax), jlk_tmp(lmsize*nvec, nrmax)) + allocate(hlk2_tmp(lmsize*nvec, nrmax), jlk2_tmp(lmsize*nvec, nrmax)) + do lm1 = 1, lmsize*nvec + l1 = jlk_index(lm1) + jlk_tmp(lm1, :) = jlk(l1,:) + hlk_tmp(lm1, :) = hlk(l1,:) + jlk2_tmp(lm1, :) = jlk2(l1,:) + hlk2_tmp(lm1, :) = hlk2(l1,:) + end do + ! loop over subintervals #ifdef CPP_HYBRID ! openMP pragmas added sachin, parallel region starts earlier to get allocations of arrays right - ! $OMP DO + ! $OMP DO schedule (dynamic) #endif do ipan = 1, npan drpan2 = (rpanbound(ipan)-rpanbound(ipan-1))/2.d0 ! *(b-a)/2 in eq. 5.53, 5.54 call sll_local_solutions(vll, tau(0,ipan), drpan2, csrc1, slc1sum, & mihvy(1,1,ipan), mihvz(1,1,ipan), mijvy(1,1,ipan), mijvz(1,1,ipan), & - yif(1,1,0,ipan), zif(1,1,0,ipan), ncheb, ipan, lmsize, lmsize2, nrmax, nvec, & - hlk, jlk, hlk2, jlk2, gmatprefactor, use_sratrick1, idotime) + yif(1,1,0,ipan), zif(1,1,0,ipan), ncheb, ipan, lmsize, lmsize2, nrmax, nvec, & + hlk_tmp, jlk_tmp, hlk2_tmp, jlk2_tmp, gmatprefactor, use_sratrick1, idotime) end do ! ipan #ifdef CPP_HYBRID ! $OMP END DO +#endif + + ! clean working arrays + deallocate(hlk_tmp, jlk_tmp, jlk2_tmp, hlk2_tmp) + +#ifdef CPP_HYBRID ! $OMP END PARALLEL #endif ! end the big loop over the subintervals @@ -215,16 +238,16 @@ contains call zgetri(lmsize, betainv, lmsize, ipiv, work, lmsize*lmsize, info) if(use_cheby_quadprec) then - allocate (qcone, qczero) + allocate(qcone, qczero) qcone = (1.q0,0.0q0) qczero = (0.q0,0.0q0) - allocate (qmihvy(lmsize,lmsize), qmihvz(lmsize,lmsize)) - allocate (qmijvy(lmsize,lmsize), qmijvz(lmsize,lmsize)) - allocate (qyif(lmsize2,lmsize,0:ncheb)) - allocate (qbetainv(lmsize,lmsize), qbetainv_save(lmsize,lmsize)) - allocate (qsll(lmsize2,lmsize)) - allocate (qcllp(lmsize,lmsize,0:npan), qdllp(lmsize,lmsize,0:npan)) - allocate (qcllptemp(lmsize,lmsize), qdllptemp(lmsize,lmsize)) + allocate(qmihvy(lmsize,lmsize), qmihvz(lmsize,lmsize)) + allocate(qmijvy(lmsize,lmsize), qmijvz(lmsize,lmsize)) + allocate(qyif(lmsize2,lmsize,0:ncheb)) + allocate(qbetainv(lmsize,lmsize), qbetainv_save(lmsize,lmsize)) + allocate(qsll(lmsize2,lmsize)) + allocate(qcllp(lmsize,lmsize,0:npan), qdllp(lmsize,lmsize,0:npan)) + allocate(qcllptemp(lmsize,lmsize), qdllptemp(lmsize,lmsize)) qbetainv = betainv end if @@ -375,9 +398,9 @@ contains if (idotime) call timing_stop('checknan') if (idotime) call timing_stop('sll-glob') - deallocate (work, betainv, betainv_save, cllp, dllp, cllptemp, dllptemp, mihvy, mihvz, mijvy, mijvz, yif, zif, stat=ierror) + deallocate(work, betainv, betainv_save, cllp, dllp, cllptemp, dllptemp, mihvy, mihvz, mijvy, mijvz, yif, zif, stat=ierror) if (ierror/=0) stop '[sll-glob] ERROR in deallocating arrays' - if(use_cheby_quadprec) deallocate (qbetainv, qbetainv_save, qcllp, qdllp, qcllptemp, qdllptemp, qmihvy, qmihvz, qmijvy, qmijvz, qyif, qsll, stat=ierror) + if(use_cheby_quadprec) deallocate(qbetainv, qbetainv_save, qcllp, qdllp, qcllptemp, qdllptemp, qmihvy, qmihvz, qmijvy, qmijvz, qyif, qsll, stat=ierror) if (ierror/=0) stop '[sll-glob] ERROR in deallocating arrays' end subroutine sll_global_solutions -- GitLab