From 3dd019e2cc44cc09d7ddb05fd4bff3286a76a02b Mon Sep 17 00:00:00 2001
From: Philipp Ruessmann <p.ruessmann@fz-juelich.de>
Date: Mon, 4 Jul 2022 15:08:00 +0200
Subject: [PATCH] Fix merge of new version of radial solver

---
 source/KKRhost/main1c.F90                     |  15 +-
 source/KKRhost/main2.F90                      |   2 +-
 source/KKRhost/rhooutnew.F90                  |  39 ++-
 source/KKRhost/rhovalnew.F90                  | 281 ++++++++++--------
 source/KKRhost/tmat_newsolver.F90             | 142 +++++----
 source/common/bfield.f90                      |  36 +--
 .../radial_solver_Chebychev/calcsph.f90       |  11 -
 .../create_newmesh.f90                        |  19 +-
 .../rll_global_solutions.F90                  |  53 +++-
 .../common/radial_solver_Chebychev/rllsll.F90 |   2 +-
 .../rllsllsourceterms.f90                     |   3 +-
 .../sll_global_solutions.F90                  |  75 +++--
 12 files changed, 381 insertions(+), 297 deletions(-)

diff --git a/source/KKRhost/main1c.F90 b/source/KKRhost/main1c.F90
index 2fc07e0fb..c80aabcf3 100644
--- a/source/KKRhost/main1c.F90
+++ b/source/KKRhost/main1c.F90
@@ -34,8 +34,8 @@ contains
     use mod_timing
 #endif
     use mod_datatypes, only: dp
-    use mod_runoptions, only: calc_gmat_lm_full, fix_nonco_angles, relax_SpinAngle_Dirac, use_Chebychev_solver, &
-      use_decimation, use_qdos, write_DOS, write_complex_qdos, write_density_ascii, write_rho2ns, write_DOS_lm, &
+    use mod_runoptions, only: calc_gmat_lm_full, relax_SpinAngle_Dirac, use_Chebychev_solver, &
+      use_decimation, use_qdos, write_DOS, write_density_ascii, write_rho2ns, write_DOS_lm, &
       decouple_spin_cheby, disable_print_serialnumber, check_custom_testopt, write_BdG_tests, use_ldau, write_npy, &
       write_all_denlm
     use mod_constants, only: czero, pi
@@ -183,8 +183,7 @@ contains
     integer :: ierr, i_all
     integer :: ntot1
     integer :: ihelp
-    integer :: idim, nranks_local, myrank_ie_tmp
-    complex (kind=dp) :: dentot    ! qdos
+    integer :: nranks_local, myrank_ie_tmp
     integer, dimension (0:nranks-1) :: ntot_pt
     integer, dimension (0:nranks-1) :: ioff_pt
     complex (kind=dp), dimension (:, :), allocatable :: work
@@ -530,7 +529,6 @@ contains
               ! copy temporary arrays to proper place in big den and espv arrays
               den(0:lmaxd1, 1:ielast, 1:nqdos, ipot2+1:ipot2+nspin/(nspin-korbit)*(1+kBdG)**2) = dentemp(0:lmaxd1, 1:ielast, 1:nqdos, 1:nspin/(nspin-korbit)*(1+kBdG)**2)
               espv(0:lmaxd1, ipot2+1:ipot2+2/(nspin-korbit)*(1+kBdG)**2) = espv_work(0:lmaxd1, 1:2/(nspin-korbit)*(1+kBdG)**2)
-
 #ifdef CPP_TIMING
               call timing_pause('main1c - rhovalnew')
 #endif
@@ -685,7 +683,7 @@ contains
         ! move writeout of qdos file here
         if (use_qdos) then
           ! first communicate den array to write out qdos files
-          if (nranks > 1) call mympi_comm_qdos_den(lmaxd1, ielast, nqdos, npotd, kBdG, lmmaxd, den, denlm)
+          if (nranks > 1) call mympi_comm_qdos_den(lmaxd1, lmmaxd, ielast, nqdos, npotd, kBdG, den, denlm)
 
           ! now write out only on main rank
           if (myrank==master) then
@@ -747,7 +745,6 @@ contains
           call mympi_main1c_comm_newsosol2(lmaxd1, lmmaxd, ielast, nqdos, npotd, natyp, lmpotd, irmd, mmaxd, den, denlm, muorb, espv, r2nef, rho2ns, denefat, denef, denmatn, &
             angles_new, totmoment, bconstr, t_mpi_c_grid%mympi_comm_ie)
         end if
-
 #endif
 
 #ifdef CPP_TIMING
@@ -803,8 +800,6 @@ contains
       call timing_start('main1c - serial part')
 #endif
 
-
-
       ! In case of Lloyds formula renormalize valence charge
       if (lly>0) then
         lmaxp1 = lmax
@@ -1088,7 +1083,7 @@ contains
     complex (kind=dp), dimension (:), allocatable, intent(inout) :: cdosat1
     complex (kind=dp), dimension (:, :), allocatable, intent(inout) :: cdos_lly
     integer, intent(out) :: nqdos
-    integer :: i_stat, i1, iq
+    integer :: i_stat
 
     if (allocmode==1) then
 
diff --git a/source/KKRhost/main2.F90 b/source/KKRhost/main2.F90
index 5657e8f15..a49c79158 100644
--- a/source/KKRhost/main2.F90
+++ b/source/KKRhost/main2.F90
@@ -35,7 +35,7 @@ contains
     use mod_runoptions, only: disable_charge_neutrality, print_program_flow, relax_SpinAngle_Dirac, &
       search_Efermi, simulate_asa, slow_mixing_Efermi, symmetrize_potential_cubic, symmetrize_potential_madelung, &
       use_decimation, use_rigid_Efermi, use_semicore, use_spherical_potential_only, write_deci_tmat, write_kkrimp_input, &
-      write_madelung_file, write_potential_tests, write_rho2ns, use_Chebychev_solver, use_semi_circle_contour, &
+      write_madelung_file, write_potential_tests, write_rho2ns, use_Chebychev_solver, &
       write_denvis_input, no_core_density
     use global_variables, only: krel, ipand, npotd, natomimpd, lmxspd, iemxd, nspotd, irid, ngshd, linterface, &
       nfund, ncelld, irmd, nembd1, nembd, irmind, lmmaxd, wlength, natypd, naezd, lmpotd, lpotd, lmaxd, nspind, nspotd, &
diff --git a/source/KKRhost/rhooutnew.F90 b/source/KKRhost/rhooutnew.F90
index e07b498ec..a1251f473 100644
--- a/source/KKRhost/rhooutnew.F90
+++ b/source/KKRhost/rhooutnew.F90
@@ -25,7 +25,7 @@ contains
   !------------------------------------------------------------------------------------
   subroutine rhooutnew(nsra, lmax, gmatll, ek, lmpot, df, npan_tot, ncheb, cleb, icleb, iend, irmdnew, thetasnew, ifunm, imt1, lmsp, &
     rll, ull, rllleft, sllleft, cden, cdenlm, cdenns, rho2nsc, corbital, gflle_part, rpan_intervall, ipan_intervall, nspin, &
-    ek2, eryd, ie, ielast, iatom)
+    ek2, eryd, ie, ielast)
 
     use mod_mympi, only: myrank
     use mod_constants, only: cone,czero,pi
@@ -55,7 +55,6 @@ contains
     integer, intent (in) :: corbital
     integer, intent (in) :: npan_tot
     integer, intent (in) :: ie, ielast !! index of current energy point and maximum number of energy points
-    integer, intent(in) :: iatom !! atom index
     complex (kind=dp), intent (in) :: ek, ek2 !! kappa prefactor (sqrt(E) without relativistic)
     complex (kind=dp), intent (in) :: eryd !! complex energy point in Ry
     complex (kind=dp), intent (in) :: df
@@ -85,7 +84,6 @@ contains
     ! .. Local variables
     integer :: lmmax0d !! lm matrix size without spin doubling
     integer :: ir, jspin, lm1, lm2, lm3, m1, l1, j, ifun
-    integer :: i_stat
     integer :: i_BdG, j_BdG, soffset, lmoffset, lmoffset2 ! for BdG mode
     real (kind=dp) :: c0ll
     complex (kind=dp) :: cltdf, alpha, dftmp
@@ -93,7 +91,6 @@ contains
     integer, dimension (4) :: lmshift2
     complex (kind=dp), dimension (lmmaxd*(1+kBdG), lmmaxd*(1+kBdG), 3) :: loperator
     complex (kind=dp), dimension (lmmaxd, lmmaxd) :: mat_in, mat_out
-    character (len=100) :: filename ! test writeout in BdG mode
 
     ! .. Local allocatable arrays
     complex (kind=dp), dimension (:), allocatable :: cwr ! lmlm-dos
@@ -179,16 +176,19 @@ contains
       if (kBdG>0) qnsi(1+lmmaxd:2*lmmaxd, 1+lmmaxd:2*lmmaxd) = ek2*qnsi(1+lmmaxd:2*lmmaxd, 1+lmmaxd:2*lmmaxd)
 
       ! wr = kappa * ull * sllleft
+      !!! call zgemm(..., ek, ull, ..., slleft, ..., czero, wr(1,1,ir), ...)
       call zgemm('N', 'T', lmmaxd*(1+kBdG), lmmaxd*(1+kBdG), lmmaxd*(1+kBdG), cone, pnsi, &
                  lmmaxd*(1+kBdG), qnsi, lmmaxd*(1+kBdG), czero, wr(1,1,ir), lmmaxd*(1+kBdG))
-
+      
       ! qnsi = rllleft * Gmat
+      !!! call zgemm(..., cone, rllleft, ..., gmatll, ..., czero, QNSI, lmmaxd)
       pnsi(1:lmmaxd*(1+kBdG), 1:lmmaxd*(1+kBdG)) = rllleft(1:lmmaxd*(1+kBdG), 1:lmmaxd*(1+kBdG), ir)
       call zgemm('N', 'T', lmmaxd*(1+kBdG), lmmaxd*(1+kBdG), lmmaxd*(1+kBdG), alpha, pnsi, &
                  lmmaxd*(1+kBdG), gmatll, lmmaxd*(1+kBdG), czero, qnsi, lmmaxd*(1+kBdG))
 
       !       ------ wr -----------    pnsi   --- qnsi -----
       ! wr = (kappa * ull * sllleft) + rll * (Gmat * rllleft)
+      !!! call zgemm(..., cone, rll, ..., QNSI, ..., cone, wr(1,1,ir), lmmaxd)
       pnsi(1:lmmaxd*(1+kBdG), 1:lmmaxd*(1+kBdG)) = rll(1:lmmaxd*(1+kBdG), 1:lmmaxd*(1+kBdG), ir)
       call zgemm('N', 'T', lmmaxd*(1+kBdG), lmmaxd*(1+kBdG), lmmaxd*(1+kBdG), cone, pnsi, &
                  lmmaxd*(1+kBdG), qnsi, lmmaxd*(1+kBdG), cone, wr(1,1,ir), lmmaxd*(1+kBdG))
@@ -204,16 +204,19 @@ contains
         if (kBdG>0) qnsi(1+lmmaxd:2*lmmaxd, 1+lmmaxd:2*lmmaxd) = ek2*qnsi(1+lmmaxd:2*lmmaxd, 1+lmmaxd:2*lmmaxd)
 
         ! wr = kappa * ull * -sllleft
+        !!! call zgemm(..., ek, ull, ..., -sllleft, ..., cone, wr(1,1,ir), lmmaxd)
         call zgemm('N', 'T', lmmaxd*(1+kBdG), lmmaxd*(1+kBdG), lmmaxd*(1+kBdG), cone, pnsi, &
                    lmmaxd*(1+kBdG), qnsi, lmmaxd*(1+kBdG), cone, wr(1,1,ir), lmmaxd*(1+kBdG))
 
         ! qnsi = -rllleft * Gmat
+        !!! call zgemm(..., cone, -rllleft, ..., gmatll, ..., czero, QNSI, lmmaxd)
         pnsi(1:lmmaxd*(1+kBdG), 1:lmmaxd*(1+kBdG)) = -rllleft(1+lmmaxd*(1+kBdG):2*lmmaxd*(1+kBdG), 1:lmmaxd*(1+kBdG), ir)
         call zgemm('N', 'T', lmmaxd*(1+kBdG), lmmaxd*(1+kBdG), lmmaxd*(1+kBdG), cone, pnsi, &
-                   lmmaxd*(1+kBdG), gmatll, lmmaxd*(1+kBdG), czero, qnsi, lmmaxd*(1+kBdG))
+                  lmmaxd*(1+kBdG), gmatll, lmmaxd*(1+kBdG), czero, qnsi, lmmaxd*(1+kBdG))
 
         !       ------- wr -----------    pnsi   ---- qnsi -----
         ! wr = (kappa * ull * -sllleft) + rll * (Gmat * -rllleft)
+        !!! call zgemm(..., cone, rll, ..., QNSI, ..., cone, wr(1,1,ir), lmmaxd)
         pnsi(1:lmmaxd*(1+kBdG), 1:lmmaxd*(1+kBdG)) = rll(1+lmmaxd*(1+kBdG):2*lmmaxd*(1+kBdG), 1:lmmaxd*(1+kBdG), ir)
         call zgemm('N', 'T', lmmaxd*(1+kBdG), lmmaxd*(1+kBdG), lmmaxd*(1+kBdG), cone, pnsi, &
                    lmmaxd*(1+kBdG), qnsi, lmmaxd*(1+kBdG), cone, wr(1,1,ir), lmmaxd*(1+kBdG))
@@ -280,9 +283,6 @@ contains
     end do ! IR
     !$omp end do
 
-    ! test uncomment this here since we now set it in rhovalnew from cdenlm
-    !if (kBdG>0) t_BdG%den_lm_ir(iatom, :, :, :) = den_lm_ir(:, :, :)
-
     ! deallocate working arrays
     call allocate_work_thread(-1, irmdnew, cwr, pnsi, qnsi)
 
@@ -493,7 +493,6 @@ contains
     use mod_datatypes, only: dp
     use mod_constants, only: czero, cone, kb
     use global_variables, only: efermi_in, kBdG, Temp_BdG, wfac_renorm
-    use mod_wunfiles, only: t_params
     use mod_runoptions, only: check_custom_testopt
     use mod_BdG, only: t_BdG
     implicit none
@@ -630,22 +629,22 @@ contains
     
     if (allocmode==1) then
       ! allocate and initialize to zero: wr, cwr, wr1, pnsi and qnsi arrays
-      allocate (wr(lmmaxd*(1+kBdG),lmmaxd*(1+kBdG),irmdnew), stat=i_stat)
+      allocate(wr(lmmaxd*(1+kBdG),lmmaxd*(1+kBdG),irmdnew), stat=i_stat)
       call memocc(i_stat, product(shape(wr))*kind(wr), 'WR', 'RHOOUTNEW')
       wr = czero
-      allocate (wr1(lmmaxd*(1+kBdG),lmmaxd*(1+kBdG),irmdnew), stat=i_stat)
+      allocate(wr1(lmmaxd*(1+kBdG),lmmaxd*(1+kBdG),irmdnew), stat=i_stat)
       call memocc(i_stat, product(shape(wr1))*kind(wr1), 'WR1', 'RHOOUTNEW')
       wr1 = czero
     elseif (allocmode==-1) then
       ! deallocate: wr, cwr, wr1, pnsi and qnsi arrays
       itmp = -product(shape(wr))*kind(wr)
       if (allocated(wr)) then
-        deallocate (wr, stat=i_stat)
+        deallocate(wr, stat=i_stat)
         call memocc(i_stat, itmp, 'WR', 'RHOOUTNEW')
       end if
       itmp = -product(shape(wr1))*kind(wr1)
       if (allocated(wr1)) then
-        deallocate (wr1, stat=i_stat)
+        deallocate(wr1, stat=i_stat)
         call memocc(i_stat, itmp, 'WR1', 'RHOOUTNEW')
       end if
     else
@@ -680,30 +679,30 @@ contains
     
     if (allocmode==1) then
       ! allocate and initialize to zero: wr, cwr, wr1, pnsi and qnsi arrays
-      allocate (cwr(irmdnew), stat=i_stat)
+      allocate(cwr(irmdnew), stat=i_stat)
       call memocc(i_stat, product(shape(cwr))*kind(cwr), 'CWR', 'RHOOUTNEW')
       cwr = czero
-      allocate (qnsi(lmmaxd*(1+kBdG),lmmaxd*(1+kBdG)), stat=i_stat)
+      allocate(qnsi(lmmaxd*(1+kBdG),lmmaxd*(1+kBdG)), stat=i_stat)
       call memocc(i_stat, product(shape(qnsi))*kind(qnsi), 'QNSI', 'RHOOUTNEW')
       qnsi = czero
-      allocate (pnsi(lmmaxd*(1+kBdG),lmmaxd*(1+kBdG)), stat=i_stat)
+      allocate(pnsi(lmmaxd*(1+kBdG),lmmaxd*(1+kBdG)), stat=i_stat)
       call memocc(i_stat, product(shape(pnsi))*kind(pnsi), 'PNSI', 'RHOOUTNEW')
       pnsi = czero
     elseif (allocmode==-1) then
       ! deallocate: wr, cwr, wr1, pnsi and qnsi arrays
       itmp = -product(shape(cwr))*kind(cwr)
       if (allocated(cwr)) then
-        deallocate (cwr, stat=i_stat)
+        deallocate(cwr, stat=i_stat)
         call memocc(i_stat, itmp, 'CWR', 'RHOOUTNEW')
       end if
       itmp = -product(shape(qnsi))*kind(qnsi)
       if (allocated(qnsi)) then
-        deallocate (qnsi, stat=i_stat)
+        deallocate(qnsi, stat=i_stat)
         call memocc(i_stat, itmp, 'QNSI', 'RHOOUTNEW')
       end if
       itmp = -product(shape(pnsi))*kind(pnsi)
       if (allocated(pnsi)) then
-        deallocate (pnsi, stat=i_stat)
+        deallocate(pnsi, stat=i_stat)
         call memocc(i_stat, itmp, 'PNSI', 'RHOOUTNEW')
       end if
     else
diff --git a/source/KKRhost/rhovalnew.F90 b/source/KKRhost/rhovalnew.F90
index 8aa3499ed..e50b3fa45 100644
--- a/source/KKRhost/rhovalnew.F90
+++ b/source/KKRhost/rhovalnew.F90
@@ -44,9 +44,9 @@ contains
     use mod_types, only: t_tgmat, t_inc
     use mod_mympi, only: myrank, master, nranks
     use mod_save_wavefun, only: t_wavefunctions, read_wavefunc
-    use mod_runoptions, only: calc_exchange_couplings, calc_gmat_lm_full, disable_tmat_sratrick, fix_nonco_angles, &
+    use mod_runoptions, only: calc_exchange_couplings, calc_gmat_lm_full, disable_tmat_sratrick, &
                               use_qdos, write_pkkr_operators, write_DOS_lm, set_cheby_nospeedup, &
-                              decouple_spin_cheby, disable_print_serialnumber, write_BdG_tests, check_custom_testopt, set_gmat_to_zero, write_npy, calc_chi_ns
+                              decouple_spin_cheby, write_BdG_tests, check_custom_testopt, set_gmat_to_zero, write_npy, calc_chi_ns
     use global_variables, only: lmmaxd, iemxd, ncleb, lmxspd, irmd, ntotd, nrmaxd, lmpotd, nspotd, nfund, korbit, mmaxd, nspind, kBdG, efermi_in, angles_cutoff
     use mod_constants, only: czero, cvlight, cone, pi, ci
     use mod_profiling, only: memocc 
@@ -62,7 +62,7 @@ contains
     use mod_vllmat, only: vllmat
     use mod_tmatnewsolver, only: prepare_socpotential, get_sourceterms
     use mod_BdG, only: t_BdG, reshape_BdG
-    use mod_qdos, only: read_qvec, write_qdos
+    use mod_qdos, only: write_qdos
     use m_npy, only: save_npy
     use mod_wunfiles, only: t_params
     use :: mod_torque, only: calc_torque
@@ -120,12 +120,11 @@ contains
     integer :: i_BdG, j_BdG, soffset, soffset2, soffset3 ! for BdG mode
     integer :: lmmax0d !! (lmax+1)**2
     integer :: lmaxd1
-    integer :: ir, irec, use_sratrick, nvec, lm1, lm2, ie, irmdnew, imt1, jspin, idim, iorb, l1
-    integer :: i_stat, i_all
-    integer :: iq, nqdos                                                                ! qdos ruess: number of qdos points
+    integer :: ir, irec, use_sratrick, nvec, lm1, lm2, ie, irmdnew, imt1, jspin, idim, iorb
+    integer :: iq, nqdos ! qdos
     integer :: lrecgflle, ierr     ! lmlm-dos
     integer :: lmlo, lmhi, is, js, mmax ! LDAU
-    integer :: ix, m1                                                                   ! qdos ruess
+    integer :: m1
     logical :: calcrll, calcsll, calctmat
 
 
@@ -163,7 +162,7 @@ contains
 
 
     ! .. Local allocatable arrays
-    real (kind=dp), dimension (:, :), allocatable :: qvec ! qdos ruess: q-vectors for qdos
+    real (kind=dp), dimension (:, :), allocatable :: qvec    ! qdos: q-vectors
     real (kind=dp), dimension (:, :, :), allocatable :: vins ! vins in the new mesh!! corresponds to the vinsnew in main1c but only for the two spin channels
     complex (kind=dp), dimension (:, :), allocatable :: tmatsph
     complex (kind=dp), dimension (:, :), allocatable :: cdentemp
@@ -198,11 +197,6 @@ contains
     complex (kind=dp), dimension (:, :, :, :), allocatable :: r2nefc_loop
     complex (kind=dp), dimension (:, :, :, :), allocatable :: rho2nsc_loop
 
-#ifdef CPP_MPI
-    complex (kind=dp), dimension (2) :: dentot                                                      ! qdos ruess
-    ! communication
-    complex (kind=dp), dimension (:, :, :, :), allocatable :: workc
-#endif
     ! OMP - number of threads, thread id
     integer :: nth, ith
     integer :: ie_start, ie_end, ie_num
@@ -305,29 +299,12 @@ contains
       call timing_pause('rhovalnew - prepare pot')
 #endif
 
-    nqdos = 1                                                                               ! qdos ruess
-    if (use_qdos) then                                                                      ! qdos ruess
-      ! Read BZ path for qdos calculation:                                                  ! qdos ruess
-      call read_qvec(qvec)                                                                  ! qdos ruess
-      nqdos = size(qvec, dim=2) ! find number of qvec                                       ! qdos ruess
-      ! Change allocation for GFLLE to be suitabel for qdos run                             ! qdos ruess
-      i_all = -product(shape(gflle))*kind(gflle)                                            ! qdos ruess
-      deallocate (gflle, stat=i_stat)                                                       ! qdos ruess
-      call memocc(i_stat, i_all, 'GFLLE', 'RHOVALNEW')                                      ! qdos ruess
-      i_all = -product(shape(den))*kind(den)                                                ! qdos ruess
-      deallocate (den, stat=i_stat)                                                         ! qdos ruess
-      call memocc(i_stat, i_all, 'DEN', 'RHOVALNEW')                                        ! qdos ruess
-      i_all = -product(shape(denlm))*kind(denlm)                                            ! qdos ruess
-      deallocate (denlm, stat=i_stat)                                                       ! qdos ruess
-      call memocc(i_stat, i_all, 'DENLM', 'RHOVALNEW')                                      ! qdos ruess
-      ! now reallocate with larger nqdos dimension                                          ! qdos ruess
-      allocate (gflle(lmmaxd*(1+kBdG),lmmaxd*(1+kBdG),ielast,nqdos), stat=i_stat)           ! qdos ruess
-      call memocc(i_stat, product(shape(gflle))*kind(gflle), 'GFLLE', 'RHOVALNEW')          ! qdos ruess
-      allocate (den(0:lmaxd1,ielast,nqdos,nspin/(nspin-korbit)*(1+kBdG)**2), stat=i_stat)   ! qdos ruess
-      call memocc(i_stat, product(shape(den))*kind(den), 'DEN', 'RHOVALNEW')                ! qdos ruess
-      allocate (denlm(lmmax0d,ielast,nqdos,nspin/(nspin-korbit)*(1+kBdG)**2), stat=i_stat)  ! qdos ruess
-      call memocc(i_stat, product(shape(denlm))*kind(denlm), 'DENLM', 'RHOVALNEW')          ! qdos ruess
-    end if ! use_qdos                                                                       ! qdos ruess
+    nqdos = 1
+    if (use_qdos) then
+      ! read qvec.dat and change size of arrays
+      call change_allocations_qdos(lmmaxd, kBdG, ielast, lmaxd1, nspin, korbit, &
+                                   lmmax0d, nqdos, qvec, gflle, den, denlm)
+    end if ! use_qdos
 
 #ifdef CPP_MPI
     iatom_myrank = iatom - t_mpi_c_grid%ioff_pt1(t_mpi_c_grid%myrank_ie) ! lmlm-dos ruess
@@ -472,20 +449,30 @@ contains
         ! faster calculation of RLL.
         ! no irregular solutions SLL are needed in self-consistent iterations
         ! because the density depends only on RLL, RLLLEFT and SLLLEFT
+        calctmat = .true.
+        calcrll = .true.
+        ! compute rll only (not sll since usually not needed)
+        if (calc_exchange_couplings .or. write_pkkr_operators) then
+          calcsll = .true.
+        else
+          calcsll = .false.
+        end if
+
         if (.not.set_cheby_nospeedup .and. .not. (calc_exchange_couplings .or. write_pkkr_operators)) then
           call rll_global_solutions(rpan_intervall, rnew, vnspll(:,:,:,ith), ull(:,:,:,ith), rll(:,:,:,ith), tmatll, ncheb, npan_tot, &
             lmmaxd*(1+kBdG), nvec*lmmaxd*(1+kBdG), nsra*(1+korbit)*(lmax+1)*(1+kBdG), irmdnew, nsra, jlk_index, &
             hlk(:,:,ith), jlk(:,:,ith), hlk2(:,:,ith), jlk2(:,:,ith), gmatprefactor, use_sratrick, alphall)
         else
-          calcsll = .false.
+          call rllsll(rpan_intervall, rnew, vnspll(:,:,:,ith), rll(:,:,:,ith), sll(:,:,:,ith), tmatll, ncheb, npan_tot, &
+            lmmaxd*(1+kBdG), nvec*lmmaxd*(1+kBdG), nsra*(lmax+1)*(1+korbit)*(1+kBdG), irmdnew, nsra, jlk_index, hlk(:,:,ith), jlk(:,:,ith), &
+            hlk2(:,:,ith), jlk2(:,:,ith), gmatprefactor, use_sratrick, alphall, calcrll, calcsll, calctmat)
+            ! with old rllsll interface we need this for rhooutnew
+            ull(:, :, :, ith) = rll(:, :, :, ith)
         end if
-        call rllsll(rpan_intervall, rnew, vnspll(:,:,:,ith), rll(:,:,:,ith), sll(:,:,:,ith), tmatll, ncheb, npan_tot, &
-          lmmaxd*(1+kBdG), nvec*lmmaxd*(1+kBdG), nsra*(lmax+1)*(1+korbit)*(1+kBdG), irmdnew, nsra, jlk_index, hlk(:,:,ith), jlk(:,:,ith), &
-          hlk2(:,:,ith), jlk2(:,:,ith), gmatprefactor, use_sratrick, alphall, calcrll, calcsll, calctmat)
         if (nsra==2) then
           ull(lmmaxd*(1+kBdG)+1:nvec*lmmaxd*(1+kBdG), :, :, ith) = ull(lmmaxd*(1+kBdG)+1:nvec*lmmaxd*(1+kBdG), :, :, ith)/cvlight
           rll(lmmaxd*(1+kBdG)+1:nvec*lmmaxd*(1+kBdG), :, :, ith) = rll(lmmaxd*(1+kBdG)+1:nvec*lmmaxd*(1+kBdG), :, :, ith)/cvlight
-          sll(lmmaxd*(1+kBdG)+1:nvec*lmmaxd*(1+kBdG), :, :, ith) = sll(lmmaxd*(1+kBdG)+1:nvec*lmmaxd*(1+kBdG), :, :, ith)/cvlight
+          if (calcsll) sll(lmmaxd*(1+kBdG)+1:nvec*lmmaxd*(1+kBdG), :, :, ith) = sll(lmmaxd*(1+kBdG)+1:nvec*lmmaxd*(1+kBdG), :, :, ith)/cvlight
         end if
         if (write_BdG_tests .and. check_custom_testopt('rhovalnew_rll')) then
           write(22211, '(A,30i5)') '#', shape(rll(:,:,:,ith))
@@ -536,6 +523,11 @@ contains
         tmattemp = czero
         alphall = czero
         ! faster calculation of RLLLEFT and SLLLEFT.
+        ! compute rllleft and 
+        ! calculate sllleft 
+        calcrll = .true.
+        calcsll = .true.
+        calctmat = .false.
         if (.not.set_cheby_nospeedup .and. .not. (calc_exchange_couplings .or. write_pkkr_operators)) then
           call rll_global_solutions(rpan_intervall, rnew, vnspll(:,:,:,ith), ullleft(:,:,:,ith), rllleft(:,:,:,ith), tmattemp, &
             ncheb, npan_tot, lmmaxd*(1+kBdG), nvec*lmmaxd*(1+kBdG), nsra*(1+korbit)*(lmax+1)*(1+kBdG), irmdnew, nsra, jlk_index, &
@@ -547,6 +539,9 @@ contains
           call rllsll(rpan_intervall, rnew, vnspll(:,:,:,ith), rllleft(:,:,:,ith), sllleft(:,:,:,ith), tmattemp, ncheb, npan_tot, &
             lmmaxd*(1+kBdG), nvec*lmmaxd*(1+kBdG), nsra*(1+korbit)*(lmax+1)*(1+kBdG), irmdnew, nsra, &
             jlk_index, hlk2(:,:,ith), jlk2(:,:,ith), hlk(:,:,ith), jlk(:,:,ith), gmatprefactor, use_sratrick, alphall, calcrll, calcsll, calctmat)
+            ! ! testing
+            ! ull(lmmaxd*(1+kBdG)+1:nvec*lmmaxd*(1+kBdG), :, :, ith) = rllleft(lmmaxd*(1+kBdG)+1:nvec*lmmaxd*(1+kBdG), :, :, ith)
+            ! ull(lmmaxd*(1+kBdG)+1:nvec*lmmaxd*(1+kBdG), :, :, ith) = ull(lmmaxd*(1+kBdG)+1:nvec*lmmaxd*(1+kBdG), :, :, ith)/cvlight
         end if
         if (nsra==2) then
           ullleft(lmmaxd*(1+kBdG)+1:nvec*lmmaxd*(1+kBdG), :, :, ith) = ullleft(lmmaxd*(1+kBdG)+1:nvec*lmmaxd*(1+kBdG), :, :, ith)/cvlight
@@ -609,7 +604,7 @@ contains
         call timing_start('rhovalnew - rhooutnew')
 #endif
         call rhooutnew(nsra, lmax, gmatll, ek, lmpotd, df, npan_tot, ncheb, cleb, icleb, iend, irmdnew, thetasnew, ifunm, imt1, lmsp, rll(:,:,:,ith), & ! SLL(:,:,:,ith), commented out since sll is not used in rhooutnew
-          ull(:,:,:,ith), rllleft(:,:,:,ith), sllleft(:,:,:,ith), cden(:,:,:,ith), cdenlm(:,:,:,ith), cdenns(:,:,ith), rho2nsc_loop(:,:,:,ie), 0, gflle(:,:,ie,iq), rpan_intervall, ipan_intervall, nspin/(nspin-korbit), ek2, eryd, ie, ielast, iatom)
+          ull(:,:,:,ith), rllleft(:,:,:,ith), sllleft(:,:,:,ith), cden(:,:,:,ith), cdenlm(:,:,:,ith), cdenns(:,:,ith), rho2nsc_loop(:,:,:,ie), 0, gflle(:,:,ie,iq), rpan_intervall, ipan_intervall, nspin/(nspin-korbit), ek2, eryd, ie, ielast)
 
 #ifdef CPP_TIMING
         call timing_pause('rhovalnew - rhooutnew')
@@ -783,7 +778,7 @@ contains
 #endif
         call rhooutnew(nsra, lmax, gmatll, ek, lmpotd, cone, npan_tot, ncheb, cleb, icleb, iend, irmdnew, thetasnew, ifunm, imt1, lmsp, rll(:,:,:,ith), & ! SLL(:,:,:,ith), ! commented out since sll is not used in rhooutnew
           ull(:,:,:,ith), rllleft(:,:,:,ith), sllleft(:,:,:,ith), cden(:,:,:,ith), cdenlm(:,:,:,ith), cdenns(:,:,ith), r2nefc_loop(:,:,:,ith), 0, gflle_part(:,:,ith), rpan_intervall, &
-          ipan_intervall, nspin/(nspin-korbit), ek2, eryd, -1, ielast, iatom)
+          ipan_intervall, nspin/(nspin-korbit), ek2, eryd, -1, ielast)
 #ifdef CPP_TIMING
         call timing_pause('rhovalnew - rhooutnew')
 #endif
@@ -803,7 +798,7 @@ contains
         call timing_start('rhovalnew - rhooutnew')
 #endif
           call rhooutnew(nsra, lmax, gmatll, ek, lmpotd, cone, npan_tot, ncheb, cleb, icleb, iend, irmdnew, thetasnew, ifunm, imt1, lmsp, rll(:,:,:,ith), & ! SLL(:,:,:,ith), ! commented out since sll is not used in rhooutnew
-            ull(:,:,:,ith), rllleft(:,:,:,ith), sllleft(:,:,:,ith), cden(:,:,:,ith), cdenlm(:,:,:,ith), cdenns(:,:,ith), r2orbc(:,:,:,ith), iorb, gflle_part(:,:,ith), rpan_intervall, ipan_intervall, nspin, ek2, eryd, ie, ielast, iatom)
+            ull(:,:,:,ith), rllleft(:,:,:,ith), sllleft(:,:,:,ith), cden(:,:,:,ith), cdenlm(:,:,:,ith), cdenns(:,:,ith), r2orbc(:,:,:,ith), iorb, gflle_part(:,:,ith), rpan_intervall, ipan_intervall, nspin, ek2, eryd, ie, ielast)
 #ifdef CPP_TIMING
         call timing_pause('rhovalnew - rhooutnew')
 #endif
@@ -960,7 +955,7 @@ contains
                          t_params%thetasnew(1:irmdnew,:,t_params%ntcell(iatom)), bconstr, &
                          t_params%fixdir(iatom), t_params%ntcell(iatom))
         if (t_inc%i_write>1) write (1337,'("calc_torque: myrank=",i8,"  iatom=",i8,"  bfield_constr=",3es16.8,"  mspin=",es16.8)') myrank, iatom, bconstr(1:3), bconstr(4)
-      end if
+      end if ! ltorque
       ! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
       ! LDAU
       ! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
@@ -995,8 +990,7 @@ contains
         ! omp: moved write-out of dos files out of parallel energy loop
         ! Write out lm-dos:
         if (write_DOS_lm) then 
-                                                      ! qdos ruess
-          call write_lmdos(ielast, ez, wez, denlm, iatom, nspin, lmmax0d, irmdnew, ispin)
+          call write_lmdos(ielast, ez, wez, denlm, iatom, nspin, lmmax0d, ispin)
         end if ! write_dos_lm
       end if  ! .not. use_qdos
 
@@ -1160,7 +1154,7 @@ contains
     use mod_datatypes, only: dp
     use mod_constants, only: czero
     use global_variables, only: irmd
-    use mod_profiling, only: memocc 
+    use mod_profiling, only: memocc
     
     implicit none
     ! interface
@@ -1206,80 +1200,80 @@ contains
     if (imode>0) then
 
       ! potential arrays
-      allocate (vnspll(nsra*lmmaxd*(1+kBdG),nsra*lmmaxd*(1+kBdG),irmdnew,0:nth-1), stat=i_stat)
+      allocate(vnspll(nsra*lmmaxd*(1+kBdG),nsra*lmmaxd*(1+kBdG),irmdnew,0:nth-1), stat=i_stat)
       call memocc(i_stat, product(shape(vnspll))*kind(vnspll), 'VNSPLL', 'RHOVALNEW')
-      allocate (vnspll0(lmmaxd,lmmaxd,irmdnew), stat=i_stat)
+      allocate(vnspll0(lmmaxd,lmmaxd,irmdnew), stat=i_stat)
       call memocc(i_stat, product(shape(vnspll0))*kind(vnspll0), 'VNSPLL0', 'RHOVALNEW')
-      allocate (vnspll1(lmmaxd*(1+kBdG),lmmaxd*(1+kBdG),irmdnew,0:nth-1), stat=i_stat)
+      allocate(vnspll1(lmmaxd*(1+kBdG),lmmaxd*(1+kBdG),irmdnew,0:nth-1), stat=i_stat)
       call memocc(i_stat, product(shape(vnspll1))*kind(vnspll1), 'VNSPLL1', 'RHOVALNEW')
-      allocate (vins(irmdnew,lmpotd,nspin/(nspin-korbit)), stat=i_stat)
+      allocate(vins(irmdnew,lmpotd,nspin/(nspin-korbit)), stat=i_stat)
       call memocc(i_stat, product(shape(vins))*kind(vins), 'VINS', 'RHOVALNEW')
      
       ! wave function source terms
-      allocate (hlk(nsra*(1+korbit)*(lmax+1)*(1+kBdG),irmdnew,0:nth-1), stat=i_stat)
+      allocate(hlk(nsra*(1+korbit)*(lmax+1)*(1+kBdG),irmdnew,0:nth-1), stat=i_stat)
       call memocc(i_stat, product(shape(hlk))*kind(hlk), 'HLK', 'RHOVALNEW')
-      allocate (jlk(nsra*(1+korbit)*(lmax+1)*(1+kBdG),irmdnew,0:nth-1), stat=i_stat)
+      allocate(jlk(nsra*(1+korbit)*(lmax+1)*(1+kBdG),irmdnew,0:nth-1), stat=i_stat)
       call memocc(i_stat, product(shape(jlk))*kind(jlk), 'JLK', 'RHOVALNEW')
-      allocate (hlk2(nsra*(1+korbit)*(lmax+1)*(1+kBdG),irmdnew,0:nth-1), stat=i_stat)
+      allocate(hlk2(nsra*(1+korbit)*(lmax+1)*(1+kBdG),irmdnew,0:nth-1), stat=i_stat)
       call memocc(i_stat, product(shape(hlk2))*kind(hlk2), 'HLK2', 'RHOVALNEW')
-      allocate (jlk2(nsra*(1+korbit)*(lmax+1)*(1+kBdG),irmdnew,0:nth-1), stat=i_stat)
+      allocate(jlk2(nsra*(1+korbit)*(lmax+1)*(1+kBdG),irmdnew,0:nth-1), stat=i_stat)
       call memocc(i_stat, product(shape(jlk2))*kind(jlk2), 'JLK2', 'RHOVALNEW')
       ! sperical part of tmat
-      allocate (tmatsph(nspin/(nspin-korbit)*(lmax+1)*(1+kBdG),0:nth-1), stat=i_stat)
+      allocate(tmatsph(nspin/(nspin-korbit)*(lmax+1)*(1+kBdG),0:nth-1), stat=i_stat)
       call memocc(i_stat, product(shape(tmatsph))*kind(tmatsph), 'TMATSPH', 'RHOVALNEW')
 
       ! regula and iregular wavefunctions
-      allocate (rll(nsra*lmmaxd*(1+kBdG),lmmaxd*(1+kBdG),irmdnew,0:nth-1), stat=i_stat)
+      allocate(rll(nsra*lmmaxd*(1+kBdG),lmmaxd*(1+kBdG),irmdnew,0:nth-1), stat=i_stat)
       call memocc(i_stat, product(shape(rll))*kind(rll), 'RLL', 'RHOVALNEW')
-      allocate (sll(nsra*lmmaxd*(1+kBdG),lmmaxd*(1+kBdG),irmdnew,0:nth-1), stat=i_stat)
+      allocate(sll(nsra*lmmaxd*(1+kBdG),lmmaxd*(1+kBdG),irmdnew,0:nth-1), stat=i_stat)
       call memocc(i_stat, product(shape(sll))*kind(sll), 'SLL', 'RHOVALNEW')
-      allocate (rllleft(nsra*lmmaxd*(1+kBdG),lmmaxd*(1+kBdG),irmdnew,0:nth-1), stat=i_stat)
+      allocate(rllleft(nsra*lmmaxd*(1+kBdG),lmmaxd*(1+kBdG),irmdnew,0:nth-1), stat=i_stat)
       call memocc(i_stat, product(shape(rllleft))*kind(rllleft), 'RLLLEFT', 'RHOVALNEW')
-      allocate (sllleft(nsra*lmmaxd*(1+kBdG),lmmaxd*(1+kBdG),irmdnew,0:nth-1), stat=i_stat)
+      allocate(sllleft(nsra*lmmaxd*(1+kBdG),lmmaxd*(1+kBdG),irmdnew,0:nth-1), stat=i_stat)
       call memocc(i_stat, product(shape(sllleft))*kind(sllleft), 'SLLLEFT', 'RHOVALNEW')
-      allocate (ull(nsra*lmmaxd,lmmaxd,irmdnew,0:nth-1), stat=i_stat)
+      allocate(ull(nsra*lmmaxd,lmmaxd,irmdnew,0:nth-1), stat=i_stat)
       call memocc(i_stat, product(shape(ull))*kind(ull), 'ULL', 'RHOVALNEW')
-      allocate (ullleft(nsra*lmmaxd,lmmaxd,irmdnew,0:nth-1), stat=i_stat)
+      allocate(ullleft(nsra*lmmaxd,lmmaxd,irmdnew,0:nth-1), stat=i_stat)
       call memocc(i_stat, product(shape(ullleft))*kind(ullleft), 'ULLLEFT', 'RHOVALNEW')
 
       ! complex density
-      allocate (cden(irmdnew,0:lmax,nspin/(nspin-korbit)*(1+korbit)*(1+kBdG)**2,0:nth-1), stat=i_stat)
+      allocate(cden(irmdnew,0:lmax,nspin/(nspin-korbit)*(1+korbit)*(1+kBdG)**2,0:nth-1), stat=i_stat)
       call memocc(i_stat, product(shape(cden))*kind(cden), 'CDEN', 'RHOVALNEW')
-      allocate (cdenlm(irmdnew,lmmax0d,nspin/(nspin-korbit)*(1+korbit)*(1+kBdG)**2,0:nth-1), stat=i_stat)
+      allocate(cdenlm(irmdnew,lmmax0d,nspin/(nspin-korbit)*(1+korbit)*(1+kBdG)**2,0:nth-1), stat=i_stat)
       call memocc(i_stat, product(shape(cdenlm))*kind(cdenlm), 'CDENLM', 'RHOVALNEW')
-      allocate (cdenns(irmdnew,nspin/(nspin-korbit)*(1+korbit)*(1+kBdG)**2,0:nth-1), stat=i_stat)
+      allocate(cdenns(irmdnew,nspin/(nspin-korbit)*(1+korbit)*(1+kBdG)**2,0:nth-1), stat=i_stat)
       call memocc(i_stat, product(shape(cdenns))*kind(cdenns), 'CDENNS', 'RHOVALNEW')
-      allocate (cdentemp(irmdnew,0:nth-1), stat=i_stat)
+      allocate(cdentemp(irmdnew,0:nth-1), stat=i_stat)
       call memocc(i_stat, product(shape(cdentemp))*kind(cdentemp), 'CDENTEMP', 'RHOVALNEW')
 
       ! real density 
-      allocate (den(0:lmaxd1,ielast,1,nspin/(nspin-korbit)*(1+kBdG)**2), stat=i_stat)
+      allocate(den(0:lmaxd1,ielast,1,nspin/(nspin-korbit)*(1+kBdG)**2), stat=i_stat)
       call memocc(i_stat, product(shape(den))*kind(den), 'DEN', 'RHOVALNEW')
-      allocate (denlm(lmmax0d,ielast,1,nspin/(nspin-korbit)*(1+kBdG)**2), stat=i_stat)
+      allocate(denlm(lmmax0d,ielast,1,nspin/(nspin-korbit)*(1+kBdG)**2), stat=i_stat)
       call memocc(i_stat, product(shape(denlm))*kind(denlm), 'DENLM', 'RHOVALNEW')
-      allocate (rho2nsc(irmdnew,lmpotd,nspin/(nspin-korbit)*(1+korbit)*(1+kBdG)**2), stat=i_stat)
+      allocate(rho2nsc(irmdnew,lmpotd,nspin/(nspin-korbit)*(1+korbit)*(1+kBdG)**2), stat=i_stat)
       call memocc(i_stat, product(shape(rho2nsc))*kind(rho2nsc), 'RHO2NSC', 'RHOVALNEW')
-      allocate (rho2nsc_loop(irmdnew,lmpotd,nspin/(nspin-korbit)*(1+korbit)*(1+kBdG)**2,ielast), stat=i_stat)
+      allocate(rho2nsc_loop(irmdnew,lmpotd,nspin/(nspin-korbit)*(1+korbit)*(1+kBdG)**2,ielast), stat=i_stat)
       call memocc(i_stat, product(shape(rho2nsc_loop))*kind(rho2nsc_loop), 'RHO2NSC_loop', 'RHOVALNEW')
-      allocate (rho2nsnew(irmd,lmpotd,nspin/(nspin-korbit)*(1+korbit)*(1+kBdG)**2), stat=i_stat)
+      allocate(rho2nsnew(irmd,lmpotd,nspin/(nspin-korbit)*(1+korbit)*(1+kBdG)**2), stat=i_stat)
       call memocc(i_stat, product(shape(rho2nsnew))*kind(rho2nsnew), 'RHO2NSNEW', 'RHOVALNEW')
-      allocate (r2nefc(irmdnew,lmpotd,nspin/(nspin-korbit)*(1+korbit)*(1+kBdG)**2), stat=i_stat)
+      allocate(r2nefc(irmdnew,lmpotd,nspin/(nspin-korbit)*(1+korbit)*(1+kBdG)**2), stat=i_stat)
       call memocc(i_stat, product(shape(r2nefc))*kind(r2nefc), 'R2NEFC', 'RHOVALNEW')
-      allocate (r2nefc_loop(irmdnew,lmpotd,nspin/(nspin-korbit)*(1+korbit)*(1+kBdG)**2,0:nth-1), stat=i_stat)
+      allocate(r2nefc_loop(irmdnew,lmpotd,nspin/(nspin-korbit)*(1+korbit)*(1+kBdG)**2,0:nth-1), stat=i_stat)
       call memocc(i_stat, product(shape(r2nefc_loop))*kind(r2nefc_loop), 'R2NEFC_loop', 'RHOVALNEW')
-      allocate (r2nefnew(irmd,lmpotd,nspin/(nspin-korbit)*(1+korbit)*(1+kBdG)**2), stat=i_stat)
+      allocate(r2nefnew(irmd,lmpotd,nspin/(nspin-korbit)*(1+korbit)*(1+kBdG)**2), stat=i_stat)
       call memocc(i_stat, product(shape(r2nefnew))*kind(r2nefnew), 'R2NEFNEW', 'RHOVALNEW')
-      allocate (r2orbc(irmdnew,lmpotd,nspin/(nspin-korbit)*(1+korbit)*(1+kBdG)**2,0:nth-1), stat=i_stat)
+      allocate(r2orbc(irmdnew,lmpotd,nspin/(nspin-korbit)*(1+korbit)*(1+kBdG)**2,0:nth-1), stat=i_stat)
       call memocc(i_stat, product(shape(r2orbc))*kind(r2orbc), 'R2ORBC', 'RHOVALNEW')
-      allocate (rhotemp(irmdnew,lmpotd), stat=i_stat)
+      allocate(rhotemp(irmdnew,lmpotd), stat=i_stat)
       call memocc(i_stat, product(shape(rhotemp))*kind(rhotemp), 'RHOTEMP', 'RHOVALNEW')
-      allocate (rhonewtemp(irws,lmpotd), stat=i_stat)
+      allocate(rhonewtemp(irws,lmpotd), stat=i_stat)
       call memocc(i_stat, product(shape(rhonewtemp))*kind(rhonewtemp), 'RHONEWTEMP', 'RHOVALNEW')
 
       ! full green function (structural constants after Dyson equation)
-      allocate (gflle_part(lmmaxd*(1+kBdG),lmmaxd*(1+kBdG),0:nth-1), stat=i_stat)
+      allocate(gflle_part(lmmaxd*(1+kBdG),lmmaxd*(1+kBdG),0:nth-1), stat=i_stat)
       call memocc(i_stat, product(shape(gflle_part))*kind(gflle_part), 'GFLLE_PART', 'RHOVALNEW')
-      allocate (gflle(lmmaxd*(1+kBdG),lmmaxd*(1+kBdG),ielast,1), stat=i_stat)
+      allocate(gflle(lmmaxd*(1+kBdG),lmmaxd*(1+kBdG),ielast,1), stat=i_stat)
       call memocc(i_stat, product(shape(gflle))*kind(gflle), 'GFLLE', 'RHOVALNEW')
 
       ! initialize values 
@@ -1317,100 +1311,100 @@ contains
     else ! imode<0
 
       i_all = -product(shape(vins))*kind(vins)
-      deallocate (vins, stat=i_stat)
+      deallocate(vins, stat=i_stat)
       call memocc(i_stat, i_all, 'VINS', 'RHOVALNEW')
       i_all = -product(shape(vnspll0))*kind(vnspll0)
-      deallocate (vnspll0, stat=i_stat)
+      deallocate(vnspll0, stat=i_stat)
       call memocc(i_stat, i_all, 'VNSPLL0', 'RHOVALNEW')
       i_all = -product(shape(vnspll1))*kind(vnspll1)
-      deallocate (vnspll1, stat=i_stat)
+      deallocate(vnspll1, stat=i_stat)
       call memocc(i_stat, i_all, 'VNSPLL1', 'RHOVALNEW')
       i_all = -product(shape(vnspll))*kind(vnspll)
-      deallocate (vnspll, stat=i_stat)
+      deallocate(vnspll, stat=i_stat)
       call memocc(i_stat, i_all, 'VNSPLL', 'RHOVALNEW')
       i_all = -product(shape(hlk))*kind(hlk)
-      deallocate (hlk, stat=i_stat)
+      deallocate(hlk, stat=i_stat)
       call memocc(i_stat, i_all, 'HLK', 'RHOVALNEW')
       i_all = -product(shape(jlk))*kind(jlk)
-      deallocate (jlk, stat=i_stat)
+      deallocate(jlk, stat=i_stat)
       call memocc(i_stat, i_all, 'JLK', 'RHOVALNEW')
       i_all = -product(shape(hlk2))*kind(hlk2)
-      deallocate (hlk2, stat=i_stat)
+      deallocate(hlk2, stat=i_stat)
       call memocc(i_stat, i_all, 'HLK2', 'RHOVALNEW')
       i_all = -product(shape(jlk2))*kind(jlk2)
-      deallocate (jlk2, stat=i_stat)
+      deallocate(jlk2, stat=i_stat)
       call memocc(i_stat, i_all, 'JLK2', 'RHOVALNEW')
       i_all = -product(shape(tmatsph))*kind(tmatsph)
-      deallocate (tmatsph, stat=i_stat)
+      deallocate(tmatsph, stat=i_stat)
       call memocc(i_stat, i_all, 'TMATSPH', 'RHOVALNEW')
       i_all = -product(shape(rll))*kind(rll)
-      deallocate (rll, stat=i_stat)
+      deallocate(rll, stat=i_stat)
       call memocc(i_stat, i_all, 'RLL', 'RHOVALNEW')
       i_all = -product(shape(sll))*kind(sll)
-      deallocate (sll, stat=i_stat)
+      deallocate(sll, stat=i_stat)
       call memocc(i_stat, i_all, 'SLL', 'RHOVALNEW')
       i_all = -product(shape(rllleft))*kind(rllleft)
-      deallocate (rllleft, stat=i_stat)
+      deallocate(rllleft, stat=i_stat)
       call memocc(i_stat, i_all, 'RLLLEFT', 'RHOVALNEW')
       i_all = -product(shape(sllleft))*kind(sllleft)
-      deallocate (sllleft, stat=i_stat)
+      deallocate(sllleft, stat=i_stat)
       call memocc(i_stat, i_all, 'SLLLEFT', 'RHOVALNEW')
       i_all = -product(shape(cden))*kind(cden)
-      deallocate (cden, stat=i_stat)
+      deallocate(cden, stat=i_stat)
       call memocc(i_stat, i_all, 'CDEN', 'RHOVALNEW')
       i_all = -product(shape(cdenlm))*kind(cdenlm)
-      deallocate (cdenlm, stat=i_stat)
+      deallocate(cdenlm, stat=i_stat)
       call memocc(i_stat, i_all, 'CDENLM', 'RHOVALNEW')
       i_all = -product(shape(cdenns))*kind(cdenns)
-      deallocate (cdenns, stat=i_stat)
+      deallocate(cdenns, stat=i_stat)
       call memocc(i_stat, i_all, 'CDENNS', 'RHOVALNEW')
       i_all = -product(shape(rho2nsc))*kind(rho2nsc)
-      deallocate (rho2nsc, stat=i_stat)
+      deallocate(rho2nsc, stat=i_stat)
       call memocc(i_stat, i_all, 'RHO2NSC', 'RHOVALNEW')
       i_all = -product(shape(rho2nsc_loop))*kind(rho2nsc_loop)
-      deallocate (rho2nsc_loop, stat=i_stat)
+      deallocate(rho2nsc_loop, stat=i_stat)
       call memocc(i_stat, i_all, 'RHO2NSC_loop', 'RHOVALNEW')
       i_all = -product(shape(rho2nsnew))*kind(rho2nsnew)
-      deallocate (rho2nsnew, stat=i_stat)
+      deallocate(rho2nsnew, stat=i_stat)
       call memocc(i_stat, i_all, 'RHO2NSNEW', 'RHOVALNEW')
       i_all = -product(shape(r2nefc))*kind(r2nefc)
-      deallocate (r2nefc, stat=i_stat)
+      deallocate(r2nefc, stat=i_stat)
       call memocc(i_stat, i_all, 'R2NEFC', 'RHOVALNEW')
       i_all = -product(shape(r2nefc_loop))*kind(r2nefc_loop)
-      deallocate (r2nefc_loop, stat=i_stat)
+      deallocate(r2nefc_loop, stat=i_stat)
       call memocc(i_stat, i_all, 'R2NEFC_loop', 'RHOVALNEW')
       i_all = -product(shape(r2nefnew))*kind(r2nefnew)
-      deallocate (r2nefnew, stat=i_stat)
+      deallocate(r2nefnew, stat=i_stat)
       call memocc(i_stat, i_all, 'R2NEFNEW', 'RHOVALNEW')
       i_all = -product(shape(r2orbc))*kind(r2orbc)
-      deallocate (r2orbc, stat=i_stat)
+      deallocate(r2orbc, stat=i_stat)
       call memocc(i_stat, i_all, 'R2ORBC', 'RHOVALNEW')
       i_all = -product(shape(cdentemp))*kind(cdentemp)
-      deallocate (cdentemp, stat=i_stat)
+      deallocate(cdentemp, stat=i_stat)
       call memocc(i_stat, i_all, 'CDENTEMP', 'RHOVALNEW')
       i_all = -product(shape(gflle_part))*kind(gflle_part)
-      deallocate (gflle_part, stat=i_stat)
+      deallocate(gflle_part, stat=i_stat)
       call memocc(i_stat, i_all, 'GFLLE_PART', 'RHOVALNEW')
       i_all = -product(shape(gflle))*kind(gflle)
-      deallocate (gflle, stat=i_stat)
+      deallocate(gflle, stat=i_stat)
       call memocc(i_stat, i_all, 'GFLLE', 'RHOVALNEW')
       i_all = -product(shape(den))*kind(den)
-      deallocate (den, stat=i_stat)
+      deallocate(den, stat=i_stat)
       call memocc(i_stat, i_all, 'DEN', 'RHOVALNEW')
       i_all = -product(shape(denlm))*kind(denlm)
-      deallocate (denlm, stat=i_stat)
+      deallocate(denlm, stat=i_stat)
       call memocc(i_stat, i_all, 'DENLM', 'RHOVALNEW')
       i_all = -product(shape(rhotemp))*kind(rhotemp)
-      deallocate (rhotemp, stat=i_stat)
+      deallocate(rhotemp, stat=i_stat)
       call memocc(i_stat, i_all, 'RHOTEMP', 'RHOVALNEW')
       i_all = -product(shape(rhonewtemp))*kind(rhonewtemp)
-      deallocate (rhonewtemp, stat=i_stat)
+      deallocate(rhonewtemp, stat=i_stat)
       call memocc(i_stat, i_all, 'RHONEWTEMP', 'RHOVALNEW')
       i_all = -product(shape(ull))*kind(ull)
-      deallocate (ull, stat=i_stat)
+      deallocate(ull, stat=i_stat)
       call memocc(i_stat, i_all, 'ULL', 'RHOVALNEW')
       i_all = -product(shape(ullleft))*kind(ullleft)
-      deallocate (ullleft, stat=i_stat)
+      deallocate(ullleft, stat=i_stat)
       call memocc(i_stat, i_all, 'ULLLEFT', 'RHOVALNEW')
   
 
@@ -1426,19 +1420,19 @@ contains
   !> Deprecated: False 
   !> writes lmdos.AAA.SS.dat files where `AAA` and `S` are the atom and spin indices
   !-------------------------------------------------------------------------------
-  subroutine write_lmdos(ielast, ez, wez, denlm, iatom, nspin, lmmax0d, irmdnew, ispin)
+  subroutine write_lmdos(ielast, ez, wez, denlm, iatom, nspin, lmmax0d, ispin)
     use mod_datatypes, only: dp
     use mod_constants, only: pi, czero
-    use global_variables, only: kBdG, korbit, iemxd, lmmaxd, lambda_BdG
-    use mod_runoptions, only: write_DOS_lm, disable_print_serialnumber, write_complex_lmdos
+    use global_variables, only: kBdG, korbit, iemxd
+    use mod_runoptions, only: disable_print_serialnumber, write_complex_lmdos
     use mod_version_info, only: version_print_header
     use mod_rhooutnew, only: fix_integration_weight_BdG
-    use mod_BdG, only: t_BdG, get_new_cmat_at
+    use mod_BdG, only: get_new_cmat_at
     use m_npy, only: save_npy
     use mod_runoptions, only: check_custom_testopt
     implicit none
     ! interface
-    integer, intent(in) :: ielast, nspin, iatom, lmmax0d, irmdnew, ispin
+    integer, intent(in) :: ielast, nspin, iatom, lmmax0d, ispin
     complex (kind=dp), intent(in) :: ez(ielast)
     complex (kind=dp), dimension (iemxd), intent (in) :: wez
     complex (kind=dp), intent(in), allocatable :: denlm(:, :, :, :)
@@ -1448,9 +1442,6 @@ contains
     real (kind=dp), dimension(lmmax0d) :: den_integrated
     character (len=100) :: filename
 
-    complex (kind=dp) :: cmat_new(lmmaxd*(1+kBdG), lmmaxd*(1+kBdG), irmdnew)
-    integer :: ir
-
     integer :: iie
     integer, allocatable :: iemap(:)
 
@@ -1600,6 +1591,56 @@ contains
   end subroutine write_chi_ns
 
 
+  !-------------------------------------------------------------------------------
+  !> Summary: read qvec.dat and change allocations accordingly
+  !> Author: Philipp Ruessmann
+  !> Category: single-site, KKRhost, density
+  !> Deprecated: False 
+  !> read q-points for qos calculation from qvec.dat file and reallocate arrays 
+  !> that have the number of q-points
+  !-------------------------------------------------------------------------------
+  subroutine change_allocations_qdos(lmmaxd, kBdG, ielast, lmaxd1, nspin, korbit, lmmax0d, &
+                                     nqdos, qvec, gflle, den, denlm)
+    use mod_datatypes, only: dp
+    use mod_profiling, only: memocc
+    use mod_qdos, only: read_qvec
+    implicit none
+    integer, intent(in) :: lmmaxd, kBdG, ielast, lmaxd1, nspin, korbit, lmmax0d
+    integer, intent(out) :: nqdos
+    real (kind=dp), allocatable, intent(inout) :: qvec(:, :)
+    complex (kind=dp), allocatable, intent(inout) :: gflle(:, :, :, :)
+    complex (kind=dp), allocatable, intent(inout) :: den(:, :, :, :)
+    complex (kind=dp), allocatable, intent(inout) :: denlm(:, :, :, :)
+    ! local
+    integer :: i_all, i_stat
+
+    ! Read BZ path for qdos calculation:
+    call read_qvec(qvec)
+    nqdos = size(qvec, dim=2) ! find number of qvec
+
+    ! Change allocation for GFLLE to be suitabel for qdos run
+    i_all = -product(shape(gflle))*kind(gflle)
+    deallocate(gflle, stat=i_stat)
+    call memocc(i_stat, i_all, 'GFLLE', 'RHOVALNEW')
+    i_all = -product(shape(den))*kind(den)
+    deallocate(den, stat=i_stat)
+    call memocc(i_stat, i_all, 'DEN', 'RHOVALNEW')
+    i_all = -product(shape(denlm))*kind(denlm)
+    deallocate(denlm, stat=i_stat)
+    call memocc(i_stat, i_all, 'DENLM', 'RHOVALNEW')
+
+    ! now reallocate with larger nqdos dimension
+    allocate(gflle(lmmaxd*(1+kBdG),lmmaxd*(1+kBdG),ielast,nqdos), stat=i_stat)
+    call memocc(i_stat, product(shape(gflle))*kind(gflle), 'GFLLE', 'RHOVALNEW')
+    allocate(den(0:lmaxd1,ielast,nqdos,nspin/(nspin-korbit)*(1+kBdG)**2), stat=i_stat)
+    call memocc(i_stat, product(shape(den))*kind(den), 'DEN', 'RHOVALNEW')
+    allocate(denlm(lmmax0d,ielast,nqdos,nspin/(nspin-korbit)*(1+kBdG)**2), stat=i_stat)
+    call memocc(i_stat, product(shape(denlm))*kind(denlm), 'DENLM', 'RHOVALNEW')
+
+  end subroutine change_allocations_qdos
+
+
+
 end module mod_rhovalnew
 
 
diff --git a/source/KKRhost/tmat_newsolver.F90 b/source/KKRhost/tmat_newsolver.F90
index 30654c9dc..52ca490a3 100644
--- a/source/KKRhost/tmat_newsolver.F90
+++ b/source/KKRhost/tmat_newsolver.F90
@@ -65,7 +65,6 @@ contains
     use m_npy, only: save_npy
     use mod_BdG, only: t_BdG, reshape_BdG, reshape_pot_mat_BdG
     use mod_rll_global_solutions, only: rll_global_solutions
-    use mod_bfield, only: add_bfield
 
     implicit none
 
@@ -837,164 +836,165 @@ contains
 
     ! local
     integer :: i_stat
+    character (len=*), parameter :: routine_name = 'allocate_locals_tmat_newsolver'
 
 
     if (allocmode==1) then ! allocate and initialize
 
       ! potential arrays
       allocate (vnspll(nsra*lmmaxd*(1+kBdG),nsra*lmmaxd*(1+kBdG),irmdnew,0:nth-1), stat=i_stat)
-      call memocc(i_stat, product(shape(vnspll))*kind(vnspll), 'VNSPLL', 'allocate_locals_tmat_newsolver')
+      call memocc(i_stat, product(shape(vnspll))*kind(vnspll), 'VNSPLL', routine_name)
       vnspll = czero
       allocate (vnspll0(lmmaxd,lmmaxd,irmdnew), stat=i_stat)
-      call memocc(i_stat, product(shape(vnspll0))*kind(vnspll0), 'VNSPLL0', 'allocate_locals_tmat_newsolver')
+      call memocc(i_stat, product(shape(vnspll0))*kind(vnspll0), 'VNSPLL0', routine_name)
       vnspll0 = czero
       allocate (vnspll1(lmmaxd*(1+kBdG),lmmaxd*(1+kBdG),irmdnew,0:nth-1), stat=i_stat)
-      call memocc(i_stat, product(shape(vnspll1))*kind(vnspll1), 'VNSPLL1', 'allocate_locals_tmat_newsolver')
+      call memocc(i_stat, product(shape(vnspll1))*kind(vnspll1), 'VNSPLL1', routine_name)
       vnspll1 = czero
       allocate (vnspll2(lmmaxd,lmmaxd,irmdnew), stat=i_stat)
-      call memocc(i_stat, product(shape(vnspll2))*kind(vnspll2), 'VNSPLL2', 'allocate_locals_tmat_newsolver')
+      call memocc(i_stat, product(shape(vnspll2))*kind(vnspll2), 'VNSPLL2', routine_name)
       vnspll2 = czero
 
       ! source terms (bessel and hankel functions)
       allocate (hlk(1:nsra*(1+korbit)*(lmax+1)*(1+kBdG),irmdnew,0:nth-1), stat=i_stat)
-      call memocc(i_stat, product(shape(hlk))*kind(hlk), 'HLK', 'allocate_locals_tmat_newsolver')
+      call memocc(i_stat, product(shape(hlk))*kind(hlk), 'HLK', routine_name)
       hlk = czero
       allocate (jlk(1:nsra*(1+korbit)*(lmax+1)*(1+kBdG),irmdnew,0:nth-1), stat=i_stat)
-      call memocc(i_stat, product(shape(jlk))*kind(jlk), 'JLK', 'allocate_locals_tmat_newsolver')
+      call memocc(i_stat, product(shape(jlk))*kind(jlk), 'JLK', routine_name)
       jlk = czero
       allocate (hlk2(1:nsra*(1+korbit)*(lmax+1)*(1+kBdG),irmdnew,0:nth-1), stat=i_stat)
-      call memocc(i_stat, product(shape(hlk2))*kind(hlk2), 'HLK2', 'allocate_locals_tmat_newsolver')
+      call memocc(i_stat, product(shape(hlk2))*kind(hlk2), 'HLK2', routine_name)
       hlk2 = czero
       allocate (jlk2(1:nsra*(1+korbit)*(lmax+1)*(1+kBdG),irmdnew,0:nth-1), stat=i_stat)
-      call memocc(i_stat, product(shape(jlk2))*kind(jlk2), 'JLK2', 'allocate_locals_tmat_newsolver')
+      call memocc(i_stat, product(shape(jlk2))*kind(jlk2), 'JLK2', routine_name)
       jlk2 = czero
 
       ! Spherical part of tmatrix (used with SRATRICK)
       allocate (tmatsph(nspin/(nspin-korbit)*(lmax+1)*(1+kBdG),0:nth-1), stat=i_stat)
-      call memocc(i_stat, product(shape(tmatsph))*kind(tmatsph), 'TMATSPH', 'allocate_locals_tmat_newsolver')
+      call memocc(i_stat, product(shape(tmatsph))*kind(tmatsph), 'TMATSPH', routine_name)
       tmatsph = czero
 
       ! Regular and irregular wavefunctions
       allocate (rll(nsra*lmmaxd*(1+kBdG),lmmaxd*(1+kBdG),irmdnew,0:nth-1), stat=i_stat)
-      call memocc(i_stat, product(shape(rll))*kind(rll), 'RLL', 'allocate_locals_tmat_newsolver')
+      call memocc(i_stat, product(shape(rll))*kind(rll), 'RLL', routine_name)
       rll = czero
       allocate (sll(nsra*lmmaxd*(1+kBdG),lmmaxd*(1+kBdG),irmdnew,0:nth-1), stat=i_stat)
-      call memocc(i_stat, product(shape(sll))*kind(sll), 'SLL', 'allocate_locals_tmat_newsolver')
+      call memocc(i_stat, product(shape(sll))*kind(sll), 'SLL', routine_name)
       sll = czero
       allocate (ull(nsra*lmmaxd,lmmaxd,irmdnew,0:nth-1), stat=i_stat)
-      call memocc(i_stat, product(shape(ull))*kind(ull), 'ULL', 'allocate_locals_tmat_newsolver')
+      call memocc(i_stat, product(shape(ull))*kind(ull), 'ULL', routine_name)
       ull = czero
 
       ! Left regular and irregular wavefunctions (used here only in case of XCPL or saving of left wavefunctions)
       if (calc_exchange_couplings .or. (t_wavefunctions%save_rllleft .or. t_wavefunctions%save_sllleft .or. write_rhoq_input) .or. calc_wronskian) then
         allocate (rllleft(nsra*lmmaxd*(1+kBdG),lmmaxd*(1+kBdG),irmdnew,0:nth-1), stat=i_stat)
-        call memocc(i_stat, product(shape(rllleft))*kind(rllleft), 'RLLLEFT', 'allocate_locals_tmat_newsolver')
+        call memocc(i_stat, product(shape(rllleft))*kind(rllleft), 'RLLLEFT', routine_name)
         rllleft = czero
         allocate (sllleft(nsra*lmmaxd*(1+kBdG),lmmaxd*(1+kBdG),irmdnew,0:nth-1), stat=i_stat)
-        call memocc(i_stat, product(shape(sllleft))*kind(sllleft), 'SLLLEFT', 'allocate_locals_tmat_newsolver')
+        call memocc(i_stat, product(shape(sllleft))*kind(sllleft), 'SLLLEFT', routine_name)
         sllleft = czero
       else
         allocate (rllleft(1,1,1,0:nth-1), stat=i_stat)
-        call memocc(i_stat, product(shape(rllleft))*kind(rllleft), 'RLLLEFT', 'allocate_locals_tmat_newsolver')
+        call memocc(i_stat, product(shape(rllleft))*kind(rllleft), 'RLLLEFT', routine_name)
         rllleft = czero
         allocate (sllleft(1,1,1,0:nth-1), stat=i_stat)
-        call memocc(i_stat, product(shape(sllleft))*kind(sllleft), 'SLLLEFT', 'allocate_locals_tmat_newsolver')
+        call memocc(i_stat, product(shape(sllleft))*kind(sllleft), 'SLLLEFT', routine_name)
         sllleft = czero
       end if                       ! ( calc_exchange_couplings .or. ... )
 
       allocate (vins(irmdnew,lmpot,nspin), stat=i_stat)
-      call memocc(i_stat, product(shape(vins))*kind(vins), 'VINS', 'allocate_locals_tmat_newsolver')
+      call memocc(i_stat, product(shape(vins))*kind(vins), 'VINS', routine_name)
       vins = 0.0_dp
       allocate (aux(lmmaxd,lmmaxd), stat=i_stat)
-      call memocc(i_stat, product(shape(aux))*kind(aux), 'AUX', 'allocate_locals_tmat_newsolver')
+      call memocc(i_stat, product(shape(aux))*kind(aux), 'AUX', routine_name)
       aux = czero
       allocate (ipiv(lmmaxd), stat=i_stat)
-      call memocc(i_stat, product(shape(ipiv))*kind(ipiv), 'IPIV', 'allocate_locals_tmat_newsolver')
+      call memocc(i_stat, product(shape(ipiv))*kind(ipiv), 'IPIV', routine_name)
       ipiv = 0
       allocate (tmat0(lmmaxd*(1+kBdG),lmmaxd*(1+kBdG)), stat=i_stat)
-      call memocc(i_stat, product(shape(tmat0))*kind(tmat0), 'TMAT0', 'allocate_locals_tmat_newsolver')
+      call memocc(i_stat, product(shape(tmat0))*kind(tmat0), 'TMAT0', routine_name)
       tmat0 = czero
       allocate (tmatll(lmmaxd*(1+kBdG),lmmaxd*(1+kBdG)), stat=i_stat)
-      call memocc(i_stat, product(shape(tmatll))*kind(tmatll), 'TMATLL', 'allocate_locals_tmat_newsolver')
+      call memocc(i_stat, product(shape(tmatll))*kind(tmatll), 'TMATLL', routine_name)
       tmatll = czero
       allocate (alpha0(lmmaxd*(1+kBdG),lmmaxd*(1+kBdG)), stat=i_stat)
-      call memocc(i_stat, product(shape(alpha0))*kind(alpha0), 'ALPHA0', 'allocate_locals_tmat_newsolver')
+      call memocc(i_stat, product(shape(alpha0))*kind(alpha0), 'ALPHA0', routine_name)
       alpha0 = czero
       allocate (dtmatll(lmmaxd,lmmaxd), stat=i_stat)
-      call memocc(i_stat, product(shape(dtmatll))*kind(dtmatll), 'DTMATLL', 'allocate_locals_tmat_newsolver')
+      call memocc(i_stat, product(shape(dtmatll))*kind(dtmatll), 'DTMATLL', routine_name)
       dtmatll = czero
       allocate (alphall(lmmaxd,lmmaxd), stat=i_stat)
-      call memocc(i_stat, product(shape(alphall))*kind(alphall), 'ALPHALL', 'allocate_locals_tmat_newsolver')
+      call memocc(i_stat, product(shape(alphall))*kind(alphall), 'ALPHALL', routine_name)
       alphall = czero
       allocate (dalphall(lmmaxd,lmmaxd), stat=i_stat)
-      call memocc(i_stat, product(shape(dalphall))*kind(dalphall), 'DALPHALL', 'allocate_locals_tmat_newsolver')
+      call memocc(i_stat, product(shape(dalphall))*kind(dalphall), 'DALPHALL', routine_name)
       dalphall = czero
       allocate (jlk_index(nsra*lmmaxd*(1+kBdG)), stat=i_stat)
-      call memocc(i_stat, product(shape(jlk_index))*kind(jlk_index), 'JLK_INDEX', 'allocate_locals_tmat_newsolver')
+      call memocc(i_stat, product(shape(jlk_index))*kind(jlk_index), 'JLK_INDEX', routine_name)
       jlk_index = 0
 
     else ! allocmode/=1: deallocate arrays
 
       deallocate (vnspll, stat=i_stat)
-      call memocc(i_stat, -product(shape(vnspll))*kind(vnspll), 'VNSPLL', 'allocate_locals_tmat_newsolver')
+      call memocc(i_stat, -product(shape(vnspll))*kind(vnspll), 'VNSPLL', routine_name)
       deallocate (vnspll0, stat=i_stat)
-      call memocc(i_stat, -product(shape(vnspll0))*kind(vnspll0), 'VNSPLL0', 'allocate_locals_tmat_newsolver')
+      call memocc(i_stat, -product(shape(vnspll0))*kind(vnspll0), 'VNSPLL0', routine_name)
       deallocate (vnspll1, stat=i_stat)
-      call memocc(i_stat, -product(shape(vnspll1))*kind(vnspll1), 'VNSPLL1', 'allocate_locals_tmat_newsolver')
+      call memocc(i_stat, -product(shape(vnspll1))*kind(vnspll1), 'VNSPLL1', routine_name)
       deallocate (vnspll2, stat=i_stat)
-      call memocc(i_stat, -product(shape(vnspll2))*kind(vnspll2), 'VNSPLL1', 'allocate_locals_tmat_newsolver')
+      call memocc(i_stat, -product(shape(vnspll2))*kind(vnspll2), 'VNSPLL1', routine_name)
 
       deallocate (hlk, stat=i_stat)
-      call memocc(i_stat, -product(shape(hlk))*kind(hlk), 'HLK', 'allocate_locals_tmat_newsolver')
+      call memocc(i_stat, -product(shape(hlk))*kind(hlk), 'HLK', routine_name)
       deallocate (jlk, stat=i_stat)
-      call memocc(i_stat, -product(shape(jlk))*kind(jlk), 'JLK', 'allocate_locals_tmat_newsolver')
+      call memocc(i_stat, -product(shape(jlk))*kind(jlk), 'JLK', routine_name)
       deallocate (hlk2, stat=i_stat)
-      call memocc(i_stat, -product(shape(hlk2))*kind(hlk2), 'HLK2', 'allocate_locals_tmat_newsolver')
+      call memocc(i_stat, -product(shape(hlk2))*kind(hlk2), 'HLK2', routine_name)
       deallocate (jlk2, stat=i_stat)
-      call memocc(i_stat, -product(shape(jlk2))*kind(jlk2), 'JLK2', 'allocate_locals_tmat_newsolver')
+      call memocc(i_stat, -product(shape(jlk2))*kind(jlk2), 'JLK2', routine_name)
 
       deallocate (tmatsph, stat=i_stat)
-      call memocc(i_stat, -product(shape(tmatsph))*kind(tmatsph), 'TMATSPH', 'allocate_locals_tmat_newsolver')
+      call memocc(i_stat, -product(shape(tmatsph))*kind(tmatsph), 'TMATSPH', routine_name)
 
       deallocate (ull, stat=i_stat)
-      call memocc(i_stat, -product(shape(ull))*kind(ull), 'ULL', 'allocate_locals_tmat_newsolver')
+      call memocc(i_stat, -product(shape(ull))*kind(ull), 'ULL', routine_name)
       deallocate (rll, stat=i_stat)
-      call memocc(i_stat, -product(shape(rll))*kind(rll), 'RLL', 'allocate_locals_tmat_newsolver')
+      call memocc(i_stat, -product(shape(rll))*kind(rll), 'RLL', routine_name)
       deallocate (sll, stat=i_stat)
-      call memocc(i_stat, -product(shape(sll))*kind(sll), 'SLL', 'allocate_locals_tmat_newsolver')
+      call memocc(i_stat, -product(shape(sll))*kind(sll), 'SLL', routine_name)
 
       if (calc_exchange_couplings .or. (t_wavefunctions%save_rllleft .or. t_wavefunctions%save_sllleft .or. write_rhoq_input)) then
         deallocate (rllleft, stat=i_stat)
-        call memocc(i_stat, -product(shape(rllleft))*kind(rllleft), 'RLLLEFT', 'allocate_locals_tmat_newsolver')
+        call memocc(i_stat, -product(shape(rllleft))*kind(rllleft), 'RLLLEFT', routine_name)
         deallocate (sllleft, stat=i_stat)
-        call memocc(i_stat, -product(shape(sllleft))*kind(sllleft), 'SLLLEFT', 'allocate_locals_tmat_newsolver')
+        call memocc(i_stat, -product(shape(sllleft))*kind(sllleft), 'SLLLEFT', routine_name)
       else
         deallocate (rllleft, stat=i_stat)
-        call memocc(i_stat, -product(shape(rllleft))*kind(rllleft), 'RLLLEFT', 'allocate_locals_tmat_newsolver')
+        call memocc(i_stat, -product(shape(rllleft))*kind(rllleft), 'RLLLEFT', routine_name)
         deallocate (sllleft, stat=i_stat)
-        call memocc(i_stat, -product(shape(sllleft))*kind(sllleft), 'SLLLEFT', 'allocate_locals_tmat_newsolver')
+        call memocc(i_stat, -product(shape(sllleft))*kind(sllleft), 'SLLLEFT', routine_name)
       end if                       ! ( calc_exchange_couplings .or. ... )
 
       deallocate (vins, stat=i_stat)
-      call memocc(i_stat, -product(shape(vins))*kind(vins), 'VINS', 'allocate_locals_tmat_newsolver')
+      call memocc(i_stat, -product(shape(vins))*kind(vins), 'VINS', routine_name)
       deallocate (aux, stat=i_stat)
-      call memocc(i_stat, -product(shape(aux))*kind(aux), 'AUX', 'allocate_locals_tmat_newsolver')
+      call memocc(i_stat, -product(shape(aux))*kind(aux), 'AUX', routine_name)
       deallocate (ipiv, stat=i_stat)
-      call memocc(i_stat, -product(shape(ipiv))*kind(ipiv), 'IPIV', 'allocate_locals_tmat_newsolver')
+      call memocc(i_stat, -product(shape(ipiv))*kind(ipiv), 'IPIV', routine_name)
       deallocate (tmat0, stat=i_stat)
-      call memocc(i_stat, -product(shape(tmat0))*kind(tmat0), 'TMAT0', 'allocate_locals_tmat_newsolver')
+      call memocc(i_stat, -product(shape(tmat0))*kind(tmat0), 'TMAT0', routine_name)
       deallocate (tmatll, stat=i_stat)
-      call memocc(i_stat, -product(shape(tmatll))*kind(tmatll), 'TMATLL', 'allocate_locals_tmat_newsolver')
+      call memocc(i_stat, -product(shape(tmatll))*kind(tmatll), 'TMATLL', routine_name)
       deallocate (alpha0, stat=i_stat)
-      call memocc(i_stat, -product(shape(alpha0))*kind(alpha0), 'ALPHA0', 'allocate_locals_tmat_newsolver')
+      call memocc(i_stat, -product(shape(alpha0))*kind(alpha0), 'ALPHA0', routine_name)
       deallocate (dtmatll, stat=i_stat)
-      call memocc(i_stat, -product(shape(dtmatll))*kind(dtmatll), 'DTMATLL', 'allocate_locals_tmat_newsolver')
+      call memocc(i_stat, -product(shape(dtmatll))*kind(dtmatll), 'DTMATLL', routine_name)
       deallocate (alphall, stat=i_stat)
-      call memocc(i_stat, -product(shape(alphall))*kind(alphall), 'ALPHALL', 'allocate_locals_tmat_newsolver')
+      call memocc(i_stat, -product(shape(alphall))*kind(alphall), 'ALPHALL', routine_name)
       deallocate (dalphall, stat=i_stat)
-      call memocc(i_stat, -product(shape(dalphall))*kind(dalphall), 'DALPHALL', 'allocate_locals_tmat_newsolver')
+      call memocc(i_stat, -product(shape(dalphall))*kind(dalphall), 'DALPHALL', routine_name)
       deallocate (jlk_index, stat=i_stat)
-      call memocc(i_stat, -product(shape(jlk_index))*kind(jlk_index), 'JLK_INDEX', 'allocate_locals_tmat_newsolver')
+      call memocc(i_stat, -product(shape(jlk_index))*kind(jlk_index), 'JLK_INDEX', routine_name)
 
     end if ! allocmode ==1 or /=1
 
@@ -1058,12 +1058,14 @@ contains
 
       ! Add magnetic field
       if ( t_params%bfield%lbfield .or. t_params%bfield%lbfield_constr ) then
+        write(*,*) 'apply magnetic field to potential'
         imt1 = ipan_intervall(t_params%npan_log+t_params%npan_eq) + 1
         call add_bfield(t_params%bfield, i1, lmax, nspin, irmdnew, imt1, iend, ncheb, theta, phi, t_params%ifunm1(:, t_params%ntcell(i1)), &
-          t_params%icleb, t_params%cleb(:,1), t_params%thetasnew(1:irmdnew, :, t_params%ntcell(i1)), '1', vnspll2(:, :, :), &
-          vnspll1(1:lmmaxd,1:lmmaxd, :, ith), t_params%bfield%thetallmat(:, :, 1:irmdnew, t_params%ntcell(i1)))
-      else
-        vnspll1(1:lmmaxd,1:lmmaxd, 1:irmdnew, ith) = vnspll2(1:lmmaxd,1:lmmaxd, 1:irmdnew)
+          t_params%icleb, t_params%cleb(:,1), t_params%thetasnew(1:irmdnew, :, t_params%ntcell(i1)), '1', &
+          vnspll1(1:lmmaxd,1:lmmaxd, :, ith), vnspll2(:, :, :), & ! input and output potentials (not replace in-place)
+          t_params%bfield%thetallmat(:, :, 1:irmdnew, t_params%ntcell(i1)))
+          ! copy changed array back because add_bfield does not apply the change in-place
+          vnspll1(1:lmmaxd,1:lmmaxd, 1:irmdnew, ith) = vnspll2(1:lmmaxd,1:lmmaxd, 1:irmdnew)
       end if ! bfield
 
       ! add SOC for hole part at eryd=2*ef-eryd
@@ -1071,20 +1073,21 @@ contains
         call spinorbit_ham(lmax, lmmax0d, vins, rnew, 2*efermi_in-conjg(eryd), zat, cvlight, socscale, nspin, lmpot, theta, phi, ipan_intervall, rpan_intervall, npan_tot, ncheb, irmdnew, nrmaxd, &
           vnspll0(:,:,:), vnspll1(lmmaxd+1:2*lmmaxd,lmmaxd+1:2*lmmaxd,:,ith), mode)
 
-        ! Add magnetic field
+        ! Add magnetic field in hole block of potential (not the different part of the matrix vnspll1)
         if ( t_params%bfield%lbfield .or. t_params%bfield%lbfield_constr ) then
           imt1 = ipan_intervall(t_params%npan_log+t_params%npan_eq) + 1
           call add_bfield(t_params%bfield, i1, lmax, nspin, irmdnew, imt1, iend, ncheb, theta, phi, t_params%ifunm1(:, t_params%ntcell(i1)), &
-            t_params%icleb, t_params%cleb(:,1), t_params%thetasnew(1:irmdnew, :, t_params%ntcell(i1)), '1', vnspll2(:, :, :), &
-            vnspll1(lmmaxd+1:2*lmmaxd,lmmaxd+1:2*lmmaxd, :, ith), t_params%bfield%thetallmat(:, :, 1:irmdnew, t_params%ntcell(i1)))
-        else
+            t_params%icleb, t_params%cleb(:,1), t_params%thetasnew(1:irmdnew, :, t_params%ntcell(i1)), '1', & 
+            vnspll1(lmmaxd+1:2*lmmaxd,lmmaxd+1:2*lmmaxd, :, ith), vnspll2(:, :, :), & ! input and output potentials (not replace in-place)
+            t_params%bfield%thetallmat(:, :, 1:irmdnew, t_params%ntcell(i1)))
+          ! copy changed array back because add_bfield does not apply the change in-place
           vnspll1(lmmaxd+1:2*lmmaxd,lmmaxd+1:2*lmmaxd, 1:irmdnew, ith) = vnspll2(1:lmmaxd,1:lmmaxd, 1:irmdnew)
         end if ! bfield
 
       end if ! kBdG>0
 
       ! clean up working array for bfield
-      deallocate(vnspll2) 
+      deallocate(vnspll2)
 
     else ! decouple_spin_cheby
 
@@ -1093,6 +1096,8 @@ contains
 
     end if! decouple_spin_cheby
 
+    ! now vnspll1 contains the proper potential with BdG doubling
+
 
     ! test writeout of VNSPLL1
     if (write_BdG_tests .and. check_custom_testopt('write_potential')) then
@@ -1199,6 +1204,18 @@ contains
     else
       use_fullgmat = 1
     end if
+    
+    write (filename, '(A)') 'rll_source_input.dat'
+    open (888888, file=trim(filename), form='formatted')
+    write (888888, *) 'dims', nsra, nvec, eryd, rnew, irmdnew, nrmaxd, lmax, lmmaxd, use_fullgmat
+    write (888888, *) 'jlk_index', jlk_index
+    write (888888, *) 'hlk', hlk(:,:,ith)
+    write (888888, *) 'jlk', jlk(:,:,ith)
+    write (888888, *) 'hlk2', hlk2(:,:,ith)
+    write (888888, *) 'jlk2', jlk2(:,:,ith)
+    write (888888, *) 'gmatprefac', gmatprefactor
+    close (888888)
+
     call rllsllsourceterms(nsra, nvec, eryd, rnew, irmdnew, nrmaxd, lmax, lmmaxd, use_fullgmat, jlk_index, hlk(:,:,ith), jlk(:,:,ith), hlk2(:,:,ith), jlk2(:,:,ith), gmatprefactor)
     if (check_custom_testopt('gmatprefac1')) gmatprefactor(:) = (1.0_dp, 0.0_dp)
 
@@ -1226,6 +1243,7 @@ contains
       write (888888, '(A,I9,A,I9,A,2ES15.7)') '# dimension: 4*(LMAX+1)=', 4*(lmax+1)*(1+kBdG), ' IRMDNEW=', irmdnew, ' ; ERYD=', eryd
       write (888888, '(2ES21.9)') sum(hlk2(:, :, ith), 2)
       close (888888)
+      stop 'done writing rll source'
     end if
 #ifdef CPP_OMP
     !$omp end critical
diff --git a/source/common/bfield.f90 b/source/common/bfield.f90
index f6d1f024f..d7f30a246 100644
--- a/source/common/bfield.f90
+++ b/source/common/bfield.f90
@@ -358,24 +358,24 @@ contains
 
     implicit none
 
-    type(type_bfield)    , intent (in)    :: bfield       ! contains all relevant information about the magnetic fields
-    integer              , intent (in)    :: iatom        ! atom index
-    integer              , intent (in)    :: lmax         ! angular momentum cut-off
-    integer              , intent (in)    :: nspin        ! number of spins
-    integer              , intent (in)    :: irmdnew      ! number of radials point on the Cheby mesh
-    integer              , intent (in)    :: imt1         ! index muffin-tin radius
-    integer              , intent (in)    :: iend         ! Number of nonzero gaunt coefficients
-    integer              , intent (in)    :: ncheb        ! Number of Chebychev pannels for the new solver
-    real (kind=dp)       , intent (in)    :: theta        ! polar angle of the magnetic moment 
-    real (kind=dp)       , intent (in)    :: phi          ! azimuthal angle of the magnetic moment 
-    integer              , dimension (1:(2*lmax+1)**2)                     , intent (in) :: ifunm        ! pointer array for shapefun     ! Check index and dimensions!!!!!
-    integer, dimension (ncleb, 4), intent (in)                                  :: icleb !! Pointer array
-    real (kind=dp), dimension (ncleb), intent (in)                                  :: cleb !! GAUNT coefficients (GAUNT)    ! CHECK THE DIMENSION AND HOW IT IS USED!!!
-    real (kind=dp)       , dimension (irmdnew, nfund)     , intent (in) :: thetasnew    ! shapefun on the Cheby mesh
-    character (len=*)    , intent (in)                                          :: mode         !! either '1' or 'transpose', depending whether SOC potential is constructed for right or left solution
-    complex (kind=dp)    , dimension(lmmaxd, lmmaxd, irmdnew) , intent (in)    :: vnspll0       !! input potential in (l,m,s) basis
-    complex (kind=dp)    , dimension(lmmaxd, lmmaxd, irmdnew) , intent (out)   :: vnspll1       !! input potential in (l,m,s) basis
-    real(kind=dp) , dimension(1:(lmax+1)**2,1:(lmax+1)**2,1:irmdnew), intent(in)   :: thetansll
+    type(type_bfield)    , intent (in)    :: bfield  !! contains all relevant information about the magnetic fields
+    integer              , intent (in)    :: iatom   !! atom index
+    integer              , intent (in)    :: lmax    !! angular momentum cut-off
+    integer              , intent (in)    :: nspin   !! number of spins
+    integer              , intent (in)    :: irmdnew !! number of radials point on the Cheby mesh
+    integer              , intent (in)    :: imt1    !! index muffin-tin radius
+    integer              , intent (in)    :: iend    !! Number of nonzero gaunt coefficients
+    integer              , intent (in)    :: ncheb   !! Number of Chebychev pannels for the new solver
+    real (kind=dp)       , intent (in)    :: theta   !! polar angle of the magnetic moment 
+    real (kind=dp)       , intent (in)    :: phi     !! azimuthal angle of the magnetic moment 
+    integer              , dimension (1:(2*lmax+1)**2), intent (in)              :: ifunm     !! pointer array for shapefun     ! Check index and dimensions!!!!!
+    integer, dimension (ncleb, 4), intent (in)                                   :: icleb     !! Pointer array
+    real (kind=dp), dimension (ncleb), intent (in)                               :: cleb      !! GAUNT coefficients (GAUNT)    ! CHECK THE DIMENSION AND HOW IT IS USED!!!
+    real (kind=dp)       , dimension (irmdnew, nfund)     , intent (in)          :: thetasnew !! shapefun on the Cheby mesh
+    character (len=*)    , intent (in)                                           :: mode      !! either '1' or 'transpose', depending whether SOC potential is constructed for right or left solution
+    complex (kind=dp)    , dimension(lmmaxd, lmmaxd, irmdnew) , intent (in)      :: vnspll0   !! input potential in (l,m,s) basis
+    complex (kind=dp)    , dimension(lmmaxd, lmmaxd, irmdnew) , intent (out)     :: vnspll1   !! input potential in (l,m,s) basis
+    real(kind=dp) , dimension(1:(lmax+1)**2,1:(lmax+1)**2,1:irmdnew), intent(in) :: thetansll !! 
     !------------------------------------------------------------------------------------
     ! local variables
     !------------------------------------------------------------------------------------
diff --git a/source/common/radial_solver_Chebychev/calcsph.f90 b/source/common/radial_solver_Chebychev/calcsph.f90
index 0fa1a7361..2675b9ab1 100644
--- a/source/common/radial_solver_Chebychev/calcsph.f90
+++ b/source/common/radial_solver_Chebychev/calcsph.f90
@@ -132,20 +132,9 @@ contains
         end if
         tmattemp = (0e0_dp, 0e0_dp)
         alphatemp = (0e0_dp, 0e0_dp)
-        ! if (i_BdG>0) jlktemp = conjg(jlktemp)
-        ! if (i_BdG>0) jlk2temp = conjg(jlk2temp)
-        !if (i_BdG>0) hlktemp = -conjg(hlktemp)
-        !if (i_BdG>0) hlk2temp = -conjg(hlk2temp)
-        !if (i_BdG>0) hlk2temp = hlktemp
-        !if (i_BdG>0) jlk2temp = jlktemp
-        !if (i_BdG>0) vll = -conjg(vll)
-        !write(*,*) 'vll', vll(1,1,10)
-        ! if (lval==0) write(*,*) 'sph rll1', hlktemp(1,10), jlktemp(1,10), hlk2temp(1,10), jlk2temp(1,10), use_sratrick
         call rllsll(rpan_intervall, rnew, vll, rlltemp, slltemp, tmattemp, ncheb, npan_tot, lmsize, lmsize2, nvec, irmdnew, nvec, jlk_indextemp, hlktemp, jlktemp, hlk2temp, &
           jlk2temp, gmatprefactor, use_sratrick, alphatemp, .true., .true., .true.) ! last three: calcrll/calcsll/calctmat
 
-        ! if (lval==0) write(*,*) 'sph rll2', rlltemp(1,1,10), slltemp(1,1,10),tmattemp
-
         do ir = 1, irmdnew
           hlknew(lspin+lval+1, ir) = slltemp(1, 1, ir)/rnew(ir)
           jlknew(lspin+lval+1, ir) = rlltemp(1, 1, ir)/rnew(ir)
diff --git a/source/common/radial_solver_Chebychev/create_newmesh.f90 b/source/common/radial_solver_Chebychev/create_newmesh.f90
index c440e471b..dcaea3dcb 100644
--- a/source/common/radial_solver_Chebychev/create_newmesh.f90
+++ b/source/common/radial_solver_Chebychev/create_newmesh.f90
@@ -33,9 +33,10 @@ contains
   subroutine create_newmesh(natyp, irm, ipand, irid, ntotd, nfund, ncheb, irmdnew, rmesh, irmin, ipan, ircut, r_log, npan_log, npan_eq, npan_log_at, npan_eq_at, npan_tot, rnew, &
     rpan_intervall, ipan_intervall, ncelld, ntcell, thetas, thetasnew) ! last three: optional arguments
     
-    use :: mod_profiling, only: memocc
-    use :: mod_interpolspline, only: interpolspline
-    use :: mod_datatypes, only: dp
+    use mod_profiling, only: memocc
+    use mod_interpolspline, only: interpolspline
+    use mod_datatypes, only: dp
+    use mod_runoptions, only: check_custom_testopt
     implicit none
 
     ! .. Input variables
@@ -104,7 +105,11 @@ contains
       ! log panel
       rmin = rmesh(2, i1)
       rmax = r_log
-      fac = (rmax/rmin)**(1.d0/npan_log)
+      if (check_custom_testopt('newmesh_old_fac')) then
+        fac = 2.0_dp
+      else
+        fac = (rmax/rmin)**(1.d0/npan_log)
+      end if
       rval = 0e0_dp
       ishift = 0
       if (r_log>rmesh(irmin(i1),i1)) then
@@ -162,10 +167,6 @@ contains
 
       call chebmesh(npan_tot(i1), ncheb, rpan_intervall(0:,i1), rnew(1,i1))
 
-!     do ir2 = 1,50
-!     write(6,*) ir2, rnew(ir2,i1)
-!     end do
-
       ! do interpolation only when optional arguments are given
       if (present(ntcell)) then
         ! interpolate shape function THETAS to new shape function THETASNEW
@@ -183,10 +184,8 @@ contains
             call interpolspline(rmesh(imin:imax,i1), rnew(iminnew:imaxnew,i1), thetasin(imin-ircut(1,i1):imax-ircut(1,i1),lm1,icell), thetasnew(iminnew:imaxnew,lm1,icell), imax-imin+1, &
               imaxnew-iminnew+1)
           end do
-
         end do
 
-
       end if ! present(ntcell)
 
     end do ! i1
diff --git a/source/common/radial_solver_Chebychev/rll_global_solutions.F90 b/source/common/radial_solver_Chebychev/rll_global_solutions.F90
index b7d01afe2..261aec6f4 100644
--- a/source/common/radial_solver_Chebychev/rll_global_solutions.F90
+++ b/source/common/radial_solver_Chebychev/rll_global_solutions.F90
@@ -48,7 +48,7 @@ contains
     integer :: lbessel, use_sratrick1 ! dimensions etc.
 
     ! running indices
-    integer :: lm1, lm2
+    integer :: l1, lm1, lm2
     integer :: info, icheb, ipan, mn, nm
 
     ! source terms
@@ -67,9 +67,9 @@ contains
     complex (kind=dp), allocatable :: work(:, :), allp(:, :, :), bllp(:, :, :), & ! eq. 5.9, 5.10 for reg. sol
       mrnvy(:, :, :), mrnvz(:, :, :), & ! ***************
       mrjvy(:, :, :), mrjvz(:, :, :) ! eq. 5.19-5.22
+    complex (kind=dp), allocatable :: hlk_tmp(:, :), jlk_tmp(:, :), hlk2_tmp(:, :), jlk2_tmp(:, :) ! working arrays
     complex (kind=dp), allocatable :: yrf(:, :, :, :), & ! source terms (different array
-      zrf(:, :, :, :)            ! ordering)
-    ! chebyshev arrays
+      zrf(:, :, :, :)                                    ! ordering) chebyshev arrays
     real (kind=dp) :: c1(0:ncheb, 0:ncheb), rpanbound(0:npan), drpan2
     real (kind=dp) :: cslc1(0:ncheb, 0:ncheb), & ! Integration matrix from left ( C*S_L*C^-1 in eq. 5.53)
       csrc1(0:ncheb, 0:ncheb), & ! Same from right ( C*S_R*C^-1 in eq. 5.54)
@@ -99,11 +99,11 @@ contains
     ! if the SRA approximation is used:
 
     ! matrix A ( C 0)     (same as in eq. 5.68)
-    ! ( B 1)
+    !          ( B 1)
     ! (C, B are matricies here)
 
     ! inverse of A is   (C^-1    0 )
-    ! (-B C^-1 1 )
+    !                   (-B C^-1 1 )
     ! Thus, it is sufficient to only inverse the matrix C which saves computational
     ! time. This is refered to as the SRA trick.
     ! ***********************************************************************
@@ -128,13 +128,13 @@ contains
 
     call chebint(cslc1, csrc1, slc1sum, c1, ncheb)
 
-    allocate (mrnvy(lmsize,lmsize,npan), mrnvz(lmsize,lmsize,npan))
-    allocate (mrjvy(lmsize,lmsize,npan), mrjvz(lmsize,lmsize,npan))
-    allocate (yrf(lmsize2,lmsize,0:ncheb,npan))
-    allocate (zrf(lmsize2,lmsize,0:ncheb,npan))
+    allocate(mrnvy(lmsize,lmsize,npan), mrnvz(lmsize,lmsize,npan))
+    allocate(mrjvy(lmsize,lmsize,npan), mrjvz(lmsize,lmsize,npan))
+    allocate(yrf(lmsize2,lmsize,0:ncheb,npan))
+    allocate(zrf(lmsize2,lmsize,0:ncheb,npan))
 
-    allocate (work(lmsize,lmsize))
-    allocate (allp(lmsize,lmsize,0:npan), bllp(lmsize,lmsize,0:npan))
+    allocate(work(lmsize,lmsize))
+    allocate(allp(lmsize,lmsize,0:npan), bllp(lmsize,lmsize,0:npan))
 
     if (idotime) call timing_start('local')
 
@@ -143,28 +143,49 @@ contains
     ! $OMP& SHARED(tau,npan,drpan2,rpanbound,mrnvy,mrnvz,mrjvy,mrjvz,yrf) &
     ! $OMP& SHARED(zrf,nvec,lmsize,lmsize2,ncheb,jlk,jlk2,jlk_index,vll) &
     ! $OMP& SHARED(gmatprefactor,hlk,hlk2,cslc1,csrc1,slc1sum) &
-    ! $OMP& SHARED(use_sratrick, rmesh)
+    ! $OMP& SHARED(use_sratrick1, rmesh) &
+    ! $omp& firstprivate(idotime)
 
     thread_id = omp_get_thread_num()
+
+    ! deactivate timing on all other threads except the main thread
+    if (idotime .and. thread_id/=0) idotime = .false.
 #endif
 
+    ! prefetch jlk and hlk to help compiler vectorize
+    allocate(hlk_tmp(lmsize*nvec, nrmax), jlk_tmp(lmsize*nvec, nrmax))
+    allocate(hlk2_tmp(lmsize*nvec, nrmax), jlk2_tmp(lmsize*nvec, nrmax))
+    do lm1 = 1, lmsize*nvec
+      l1 = jlk_index(lm1)
+      jlk_tmp(lm1, :) = jlk(l1,:)
+      hlk_tmp(lm1, :) = hlk(l1,:)
+      jlk2_tmp(lm1, :) = jlk2(l1,:)
+      hlk2_tmp(lm1, :) = hlk2(l1,:)
+    end do
+
     ! loop over subintervals
 #ifdef CPP_HYBRID
     ! openMP pragmas added sachin, parallel region starts earlier to get allocations of arrays right
 
-    ! $OMP DO
+    ! $OMP DO schedule (dynamic)
 #endif
     do ipan = 1, npan
 
       drpan2 = (rpanbound(ipan)-rpanbound(ipan-1))/2.0_dp ! *(b-a)/2 in eq. 5.53, 5.54
       call rll_local_solutions(vll, tau(0,ipan), drpan2, cslc1, slc1sum, mrnvy(1,1,ipan),    & 
         mrnvz(1,1,ipan), mrjvy(1,1,ipan), mrjvz(1,1,ipan), yrf(1,1,0,ipan), zrf(1,1,0,ipan), &
-        ncheb, ipan, lmsize, lmsize2, nrmax, nvec, hlk, jlk, hlk2,                & 
-        jlk2, gmatprefactor, use_sratrick1, idotime)
+        ncheb, ipan, lmsize, lmsize2, nrmax, nvec, hlk_tmp, jlk_tmp, hlk2_tmp,               &
+        jlk2_tmp, gmatprefactor, use_sratrick1, idotime)
 
     end do                       ! ipan
 #ifdef CPP_HYBRID
     ! $OMP END DO
+#endif
+
+    ! clean working arrays
+    deallocate(hlk_tmp, jlk_tmp, jlk2_tmp, hlk2_tmp)
+
+#ifdef CPP_HYBRID
     ! $OMP END PARALLEL
 #endif
     ! end the big loop over the subintervals
@@ -249,7 +270,7 @@ contains
     if (idotime) call timing_stop('checknan')
     if (idotime) call timing_stop('rll-glob')
 
-    deallocate (work, allp, bllp, mrnvy, mrnvz, mrjvy, mrjvz, yrf, zrf, stat=ierror)
+    deallocate(work, allp, bllp, mrnvy, mrnvz, mrjvy, mrjvz, yrf, zrf, stat=ierror)
     if (ierror/=0) stop '[rll-glob] ERROR in deallocating arrays'
 
   end subroutine rll_global_solutions
diff --git a/source/common/radial_solver_Chebychev/rllsll.F90 b/source/common/radial_solver_Chebychev/rllsll.F90
index f435fd705..b282fde21 100644
--- a/source/common/radial_solver_Chebychev/rllsll.F90
+++ b/source/common/radial_solver_Chebychev/rllsll.F90
@@ -282,7 +282,7 @@ contains
 #endif
         call sll_local_solutions(vll, tau(0,ipan), drpan2, csrc1, slc1sum,               &
           mihvy(1,1,ipan), mihvz(1,1,ipan), mijvy(1,1,ipan), mijvz(1,1,ipan),            &
-          yif(1,1,0,ipan), zif(1,1,0,ipan), ncheb, ipan, lmsize, lmsize2, nrmax, nvec,       &
+          yif(1,1,0,ipan), zif(1,1,0,ipan), ncheb, ipan, lmsize, lmsize2, nrmax, nvec,   &
           hlk_tmp, jlk_tmp, hlk2_tmp, jlk2_tmp, gmatprefactor, use_sratrick, idotime)
 #if defined(CPP_HYBRID)
         !$omp end task
diff --git a/source/common/radial_solver_Chebychev/rllsllsourceterms.f90 b/source/common/radial_solver_Chebychev/rllsllsourceterms.f90
index 3842f580c..77c02aa05 100644
--- a/source/common/radial_solver_Chebychev/rllsllsourceterms.f90
+++ b/source/common/radial_solver_Chebychev/rllsllsourceterms.f90
@@ -40,8 +40,7 @@ contains
     use mod_beshank, only: beshank, beshank_smallcomp
     use global_variables, only: korbit, kBdG, efermi_in
     use mod_BdG, only: t_BdG
-    use mod_runoptions, only: nosmallcomp
-    use mod_runoptions, only: check_custom_testopt
+    use mod_runoptions, only: nosmallcomp, check_custom_testopt
 
     implicit none
 
diff --git a/source/common/radial_solver_Chebychev/sll_global_solutions.F90 b/source/common/radial_solver_Chebychev/sll_global_solutions.F90
index 27f079a34..195887b53 100644
--- a/source/common/radial_solver_Chebychev/sll_global_solutions.F90
+++ b/source/common/radial_solver_Chebychev/sll_global_solutions.F90
@@ -49,8 +49,8 @@ contains
     integer :: lbessel, use_sratrick1 ! dimensions etc., needed only for host code interface
 
     ! running indices
-    integer :: lm1, lm2
-    integer :: icheb, ipan, mn, nm
+    integer :: l1, lm1, lm2
+    integer :: icheb, ipan, mn
     integer :: info, ipiv(lmsize)
     integer :: iter_beta, niter_beta
 
@@ -58,9 +58,11 @@ contains
     complex (kind=dp), intent(in) :: gmatprefactor(1+kBdG) ! prefactor of green function (non-rel: = kappa = sqrt e)
     ! non-rel: = kappa = sqrt e
     complex (kind=dp) :: hlk(lbessel, nrmax), jlk(lbessel, nrmax), hlk2(lbessel, nrmax), jlk2(lbessel, nrmax)
+    complex (kind=dp), allocatable :: hlk_tmp(:, :), jlk_tmp(:, :), hlk2_tmp(:, :), jlk2_tmp(:, :) ! working arrays
 
     integer :: jlk_index(2*lmsize)
  
+    ! quadruple precision arrays
     complex*32, allocatable :: qcllp(:, :, :), qdllp(:, :, :)
     complex*32, allocatable :: qmihvy(:, :), qmihvz(:, :), qmijvy(:, :), qmijvz(:, :)
     complex*32, allocatable :: qyif(:, :, :)
@@ -108,11 +110,11 @@ contains
     ! if the SRA approximation is used:
 
     ! matrix A ( C 1)     (same as in eq. 5.68)
-    ! ( B 0)
+    !          ( B 0)
     ! (C, B are matricies here)
 
     ! inverse of A is   (C^-1    0 )
-    ! (-B C^-1 1 )
+    !                   (-B C^-1 1 )
     ! Thus, it is sufficient to only inverse the matrix C which saves computational
     ! time. This is refered to as the SRA trick.
     ! ***********************************************************************
@@ -127,14 +129,14 @@ contains
 #endif
     if (idotime) call timing_start('sll-glob')
 
-    allocate (mihvy(lmsize,lmsize,npan), mihvz(lmsize,lmsize,npan))
-    allocate (mijvy(lmsize,lmsize,npan), mijvz(lmsize,lmsize,npan))
-    allocate (yif(lmsize2,lmsize,0:ncheb,npan))
-    allocate (zif(lmsize2,lmsize,0:ncheb,npan))
-    allocate (betainv(lmsize,lmsize), betainv_save(lmsize,lmsize))
-    allocate (cllp(lmsize,lmsize,0:npan), dllp(lmsize,lmsize,0:npan))
-    allocate (cllptemp(lmsize,lmsize), dllptemp(lmsize,lmsize))
-    allocate (work(lmsize,lmsize))
+    allocate(mihvy(lmsize,lmsize,npan), mihvz(lmsize,lmsize,npan))
+    allocate(mijvy(lmsize,lmsize,npan), mijvz(lmsize,lmsize,npan))
+    allocate(yif(lmsize2,lmsize,0:ncheb,npan))
+    allocate(zif(lmsize2,lmsize,0:ncheb,npan))
+    allocate(betainv(lmsize,lmsize), betainv_save(lmsize,lmsize))
+    allocate(cllp(lmsize,lmsize,0:npan), dllp(lmsize,lmsize,0:npan))
+    allocate(cllptemp(lmsize,lmsize), dllptemp(lmsize,lmsize))
+    allocate(work(lmsize,lmsize))
 
     do ipan = 1, npan
       do icheb = 0, ncheb
@@ -152,28 +154,49 @@ contains
     ! $OMP& SHARED(tau,npan,drpan2,rpanbound,mihvy,mihvz,mijvy,mijvz,yif) &
     ! $OMP& SHARED(zif,nvec,lmsize,lmsize2,ncheb,jlk,jlk2,jlk_index) &
     ! $OMP& SHARED(vll,gmatprefactor,hlk,hlk2,cslc1,csrc1,slc1sum) &
-    ! $OMP& SHARED(use_sratrick, rmesh)
+    ! $OMP& SHARED(use_sratrick, rmesh) &
+    ! $omp& firstprivate(idotime)
 
     thread_id = omp_get_thread_num()
+
+    ! deactivate timing on all other threads except the main thread
+    if (idotime .and. thread_id/=0) idotime = .false.
 #endif
 
+    ! prefetch jlk and hlk to help compiler vectorize
+    allocate(hlk_tmp(lmsize*nvec, nrmax), jlk_tmp(lmsize*nvec, nrmax))
+    allocate(hlk2_tmp(lmsize*nvec, nrmax), jlk2_tmp(lmsize*nvec, nrmax))
+    do lm1 = 1, lmsize*nvec
+      l1 = jlk_index(lm1)
+      jlk_tmp(lm1, :) = jlk(l1,:)
+      hlk_tmp(lm1, :) = hlk(l1,:)
+      jlk2_tmp(lm1, :) = jlk2(l1,:)
+      hlk2_tmp(lm1, :) = hlk2(l1,:)
+    end do
+
     ! loop over subintervals
 #ifdef CPP_HYBRID
     ! openMP pragmas added sachin, parallel region starts earlier to get allocations of arrays right
 
-    ! $OMP DO
+    ! $OMP DO schedule (dynamic)
 #endif
     do ipan = 1, npan
 
       drpan2 = (rpanbound(ipan)-rpanbound(ipan-1))/2.d0 ! *(b-a)/2 in eq. 5.53, 5.54
       call sll_local_solutions(vll, tau(0,ipan), drpan2, csrc1, slc1sum,               &
         mihvy(1,1,ipan), mihvz(1,1,ipan), mijvy(1,1,ipan), mijvz(1,1,ipan),            &
-        yif(1,1,0,ipan), zif(1,1,0,ipan), ncheb, ipan, lmsize, lmsize2, nrmax, nvec,       &
-        hlk, jlk, hlk2, jlk2, gmatprefactor, use_sratrick1, idotime)
+        yif(1,1,0,ipan), zif(1,1,0,ipan), ncheb, ipan, lmsize, lmsize2, nrmax, nvec,   &
+        hlk_tmp, jlk_tmp, hlk2_tmp, jlk2_tmp, gmatprefactor, use_sratrick1, idotime)
 
     end do                       ! ipan
 #ifdef CPP_HYBRID
     ! $OMP END DO
+#endif
+
+    ! clean working arrays
+    deallocate(hlk_tmp, jlk_tmp, jlk2_tmp, hlk2_tmp)
+
+#ifdef CPP_HYBRID
     ! $OMP END PARALLEL
 #endif
     ! end the big loop over the subintervals
@@ -215,16 +238,16 @@ contains
     call zgetri(lmsize, betainv, lmsize, ipiv, work, lmsize*lmsize, info)
  
     if(use_cheby_quadprec) then
-      allocate (qcone, qczero)
+      allocate(qcone, qczero)
       qcone = (1.q0,0.0q0)
       qczero = (0.q0,0.0q0)
-      allocate (qmihvy(lmsize,lmsize), qmihvz(lmsize,lmsize))
-      allocate (qmijvy(lmsize,lmsize), qmijvz(lmsize,lmsize))
-      allocate (qyif(lmsize2,lmsize,0:ncheb))
-      allocate (qbetainv(lmsize,lmsize), qbetainv_save(lmsize,lmsize))
-      allocate (qsll(lmsize2,lmsize))
-      allocate (qcllp(lmsize,lmsize,0:npan), qdllp(lmsize,lmsize,0:npan))
-      allocate (qcllptemp(lmsize,lmsize), qdllptemp(lmsize,lmsize))
+      allocate(qmihvy(lmsize,lmsize), qmihvz(lmsize,lmsize))
+      allocate(qmijvy(lmsize,lmsize), qmijvz(lmsize,lmsize))
+      allocate(qyif(lmsize2,lmsize,0:ncheb))
+      allocate(qbetainv(lmsize,lmsize), qbetainv_save(lmsize,lmsize))
+      allocate(qsll(lmsize2,lmsize))
+      allocate(qcllp(lmsize,lmsize,0:npan), qdllp(lmsize,lmsize,0:npan))
+      allocate(qcllptemp(lmsize,lmsize), qdllptemp(lmsize,lmsize))
       qbetainv = betainv
     end if
 
@@ -375,9 +398,9 @@ contains
     if (idotime) call timing_stop('checknan')
     if (idotime) call timing_stop('sll-glob')
 
-    deallocate (work, betainv, betainv_save, cllp, dllp, cllptemp, dllptemp, mihvy, mihvz, mijvy, mijvz, yif, zif, stat=ierror)
+    deallocate(work, betainv, betainv_save, cllp, dllp, cllptemp, dllptemp, mihvy, mihvz, mijvy, mijvz, yif, zif, stat=ierror)
     if (ierror/=0) stop '[sll-glob] ERROR in deallocating arrays'
-    if(use_cheby_quadprec) deallocate (qbetainv, qbetainv_save, qcllp, qdllp, qcllptemp, qdllptemp, qmihvy, qmihvz, qmijvy, qmijvz, qyif, qsll, stat=ierror)
+    if(use_cheby_quadprec) deallocate(qbetainv, qbetainv_save, qcllp, qdllp, qcllptemp, qdllptemp, qmihvy, qmihvz, qmijvy, qmijvz, qyif, qsll, stat=ierror)
     if (ierror/=0) stop '[sll-glob] ERROR in deallocating arrays'
   end subroutine sll_global_solutions
 
-- 
GitLab