diff --git a/source/KKRhost/clsgen_tb.f90 b/source/KKRhost/clsgen_tb.f90
index 34927c8ff260a20615e8c1a0953404a242b57339..f2cd76b33395a0aca0f08abbd5be115b5954d2bd 100644
--- a/source/KKRhost/clsgen_tb.f90
+++ b/source/KKRhost/clsgen_tb.f90
@@ -39,13 +39,14 @@ contains
   !> atom.
   !-------------------------------------------------------------------------------
   subroutine clsgen_tb(naez, nemb, nvirt, rr, rbasis, kaoez, zat, cls, ncls, nacls, atom, ezoa, nlbasis, nrbasis, nleft, nright, zperleft, zperight, tleft, tright, rmtref, &
-    rmtrefat, vref, irefpot, nrefpot, rcls, rcut, rcutxy, alat, natyp, nclsd, nrd, naclsd, nrefd, nembd, linterface, nprincd, nprinc)
+    rmtrefat, vref, irefpot, nrefpot, rcls, rcut, rcutxy, alat, natyp, nclsd, nrd, naclsd, nrefd, nembd, linterface, nprincd, nprinc, invmod)
     ! ************************************************************************
     ! 
     use :: mod_datatypes, only: dp
     use :: mod_runoptions, only: write_tb_coupling, disable_print_serialnumber
     use :: mod_version_info, only: version_print_header
     use :: mod_dsort, only: dsort
+    use :: godfrin, only: t_godfrin ! GODFRIN MdSD
     implicit none
     ! .. arguments
     integer :: naez                !! number of atoms in EZ
@@ -59,6 +60,7 @@ contains
     integer :: nrefd
     integer, intent(in) :: nprincd !! user-supplied number of layers in a principal layer
     integer :: nprinc              !! Calculated number of layers in a principal layer
+    integer, intent(in) :: invmod  !! MdSD: to check if generalized block tridiagonal inversion is used
     integer :: natyp
     integer :: nembd
     real (kind=dp) :: alat         !! lattice constant A
@@ -456,7 +458,8 @@ contains
 
     ! Calculate number of layers in principal layer
     nprinc = 1
-    if (linterface) then
+    ! MdSD: except if using godfrin
+    if (linterface .and. invmod/=3) then
       do jatom = 1, naez           ! loop over rows
         do iat = 1, jatom - 1      ! loop over columns before the diagonal
           if (icouplmat(jatom,iat)==1) nprinc = max(nprinc, jatom-iat)
@@ -470,19 +473,40 @@ contains
 
     ! Check if the user-specified blocking is correct
     if (mod(naez,nprincd) == 0) then
-      nb = naez/nprincd
-      do ib=1,nb-1
-!       diagonal blocks
-        icouplmat(1+(ib-1)*nprincd:ib*nprincd,1+(ib-1)*nprincd:ib*nprincd) = 0
-!       subdiagonal blocks
-        icouplmat(1+ib*nprincd:(ib+1)*nprincd,1+(ib-1)*nprincd:ib*nprincd) = 0
-!       superdiagonal blocks
-        icouplmat(1+(ib-1)*nprincd:ib*nprincd,1+ib*nprincd:(ib+1)*nprincd) = 0
-      end do
-!     last diagonal blocks
-      icouplmat(1+(nb-1)*nprincd:nb*nprincd,1+(nb-1)*nprincd:nb*nprincd) = 0
+      ! MdSD: generalized blocking
+      if (invmod == 3) then
+        nb = t_godfrin%nb
+        do ib=1,nb-1
+!         diagonal blocks
+          icouplmat(1+sum(t_godfrin%bdims(1:ib-1)):sum(t_godfrin%bdims(1:ib)),1+sum(t_godfrin%bdims(1:ib-1)):sum(t_godfrin%bdims(1:ib))) = 0
+!         subdiagonal blocks
+          icouplmat(1+sum(t_godfrin%bdims(1:ib)):sum(t_godfrin%bdims(1:ib+1)),1+sum(t_godfrin%bdims(1:ib-1)):sum(t_godfrin%bdims(1:ib))) = 0
+!         superdiagonal blocks
+          icouplmat(1+sum(t_godfrin%bdims(1:ib-1)):sum(t_godfrin%bdims(1:ib)),1+sum(t_godfrin%bdims(1:ib)):sum(t_godfrin%bdims(1:ib+1))) = 0
+        end do
+!       last diagonal blocks
+        icouplmat(1+sum(t_godfrin%bdims(1:nb-1)):sum(t_godfrin%bdims(1:nb)),1+sum(t_godfrin%bdims(1:nb-1)):sum(t_godfrin%bdims(1:nb))) = 0
+!       additional blocks from periodic boundary conditions
+        if (t_godfrin%lper) then
+          icouplmat(1+sum(t_godfrin%bdims(1:0)):sum(t_godfrin%bdims(1:1)),1+sum(t_godfrin%bdims(1:nb-1)):sum(t_godfrin%bdims(1:nb))) = 0
+          icouplmat(1+sum(t_godfrin%bdims(1:nb-1)):sum(t_godfrin%bdims(1:nb)),1+sum(t_godfrin%bdims(1:0)):sum(t_godfrin%bdims(1:1))) = 0
+        end if
+      ! MdSD: standard blocking
+      else
+        nb = naez/nprincd
+        do ib=1,nb-1
+!         diagonal blocks
+          icouplmat(1+(ib-1)*nprincd:ib*nprincd,1+(ib-1)*nprincd:ib*nprincd) = 0
+!         subdiagonal blocks
+          icouplmat(1+ib*nprincd:(ib+1)*nprincd,1+(ib-1)*nprincd:ib*nprincd) = 0
+!         superdiagonal blocks
+          icouplmat(1+(ib-1)*nprincd:ib*nprincd,1+ib*nprincd:(ib+1)*nprincd) = 0
+        end do
+!       last diagonal blocks
+        icouplmat(1+(nb-1)*nprincd:nb*nprincd,1+(nb-1)*nprincd:nb*nprincd) = 0
+      end if
 !     mistakes were made?
-      if (any(icouplmat /= 0)) then
+      if (any(icouplmat /= 0) .and. invmod /= 0) then
         write (1337, *) 'CLSGEN_TB: WARNING --- User-supplied NPRINCD=', nprincd, ' is INCORRECT!'
         ! Write out the coupling matrix
         write (1337, *) 'Truncated coupling matrix:'
@@ -492,6 +516,8 @@ contains
       else
         nprinc = nprincd
       end if
+    else
+    ! MdSD: error message here?
     end if
 
     ! close clusters file
diff --git a/source/KKRhost/gfmask.f90 b/source/KKRhost/gfmask.f90
index f1b2d10e2af1d2e6e6ece3f3477f26b748938c89..86c41447817dacaffff0ae2bf05dbdde8d0c1011 100644
--- a/source/KKRhost/gfmask.f90
+++ b/source/KKRhost/gfmask.f90
@@ -87,7 +87,7 @@ contains
     ! (atomic sites) that are needed
 
     ! ======================================================================
-    if ((icc/=0) .and. (invmod==1)) then
+    if ((icc/=0) .and. invmod==1) then
       do i = 1, naez
         do j = 1, naez
           icouple(i, j) = 0
diff --git a/source/KKRhost/godfrin.F90 b/source/KKRhost/godfrin.F90
index 51e0cc0c12ebf87591192090da376eedf2f9762f..d05cf4ab5e2dabc610cbbcb089cc4f09f76eb56b 100644
--- a/source/KKRhost/godfrin.F90
+++ b/source/KKRhost/godfrin.F90
@@ -673,10 +673,10 @@ contains
     call zgemm('N', 'N', m, n, m, cone, delta, mb, aji(:,:,nb), mb, czero, tmp, mb)
     call zgemm('N', 'N', n, n, m, cone, aij(:,:,nb), mb, tmp, mb, czero, delta, mb)
     ! Delta = Delta + A(1,N) * B(N,1)
-    tmp = a(j0+1:j0+m, i0+1:i0+n)
+    tmp(1:m, 1:n) = a(j0+1:j0+m, i0+1:i0+n)
     call zgemm('N', 'N', n, n, m, cone, aij(:,:,nb), mb, tmp, mb, cone, delta, mb)
     ! Delta = Delta + B(1,N) * A(N,1)
-    tmp = a(i0+1:i0+n, j0+1:j0+m)
+    tmp(1:n, 1:m) = a(i0+1:i0+n, j0+1:j0+m)
     call zgemm('N', 'N', n, n, m, cone, tmp, mb, aji(:,:,nb), mb, cone, delta, mb)
     ! Delta = Delta + B(1,1) + 1
     delta(1:n, 1:n) = delta(1:n, 1:n) + a(i0+1:i0+n, i0+1:i0+n)
diff --git a/source/KKRhost/inversion.f90 b/source/KKRhost/inversion.f90
index c811027f36c4fe3e7feb4e9d6460abead93cdb94..f3be1e62df108cf03a1796cfeb6a8c0a5015775d 100644
--- a/source/KKRhost/inversion.f90
+++ b/source/KKRhost/inversion.f90
@@ -151,6 +151,8 @@ contains
       ! godfrin module
       ! -----------------------------------------------------------------
     else if (invmod==3) then
+      ! write (*, '("na=",i8,"  nb=",i8,"  ldiag=",l2,"  lper=",l2,"  lpardiso=",l2)') t_godfrin%na, t_godfrin%nb, t_godfrin%ldiag, t_godfrin%lper, t_godfrin%lpardiso
+      ! write (*, '("bdims(1:nb)=",100i8)') t_godfrin%bdims(:)
       call sparse_inverse(gllke, t_godfrin%na, t_godfrin%nb, t_godfrin%bdims, t_godfrin%ldiag, t_godfrin%lper, t_godfrin%lpardiso) ! GODFRIN Flaviano
       ! ------------------------------------------------------------------
     else
diff --git a/source/KKRhost/kkrmat01.F90 b/source/KKRhost/kkrmat01.F90
index ae581dc5ebb7e29d12a8d9770785aced61fa51ac..99636d5c79876267989af169509276ae57c08640 100644
--- a/source/KKRhost/kkrmat01.F90
+++ b/source/KKRhost/kkrmat01.F90
@@ -488,9 +488,9 @@ contains
         ! Actually -TAU, because TAU = (Deltat^-1 - Gref)^-1
         ! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
         ! $omp single
-        if (lly/=0) then           ! If LLY, full inversion is needed
+        if (lly/=0 .and. invmod/=3) then           ! If LLY, full inversion is needed
           call inversion(gllke, 0, icheck) ! LLY
-        else
+        else ! MdSD: godfrin options are set to full inversion if LLY is true
           call inversion(gllke, invmod, icheck)
         end if
         ! $omp end single
diff --git a/source/KKRhost/main0.F90 b/source/KKRhost/main0.F90
index 023498f6842df8bb4f244c4ed15e80e21ab5295c..1036dd72c2906d3a5e5ceb4273497da588070038 100644
--- a/source/KKRhost/main0.F90
+++ b/source/KKRhost/main0.F90
@@ -509,6 +509,7 @@ contains
     use :: mod_testdim, only: testdim
     use :: mod_write_tbkkr_files, only: write_tbkkr_files
     use :: mod_writehoststructure, only: writehoststructure
+    use :: godfrin, only: t_godfrin ! GODFRIN MdSD
     ! array dimensions
     use :: global_variables, only: krel, nspind, nrefd, irmd, ntotd, ipand, ncelld, nrmaxd, nchebd, natypd, naezd, lmaxd, alm, lmmaxd, &
       almgf0, lmgf0d, ndim_slabinv, nprincd, nembd, nembd1, nembd2, irmind, irnsd, nofgij, natomimpd, lpotd, lmpotd, npotd, nfund, &
@@ -703,7 +704,7 @@ contains
     call clsgen_tb(naez,nemb,nvirt,rr,rbasis,kaoez,zat,cls,ncls,nacls,atom,ezoa,    &
       nlbasis,nrbasis,nleft,nright,zperleft,zperight,tleft,tright,rmtref,rmtrefat,  &
       vref,refpot,nref,rcls,rcutz,rcutxy,alat,natyp,nclsd,nrd,naclsd,nrefd,nembd,   &
-      linterface,nprincd,nprinc)
+      linterface,nprincd,nprinc,invmod)
 
     ! change nrefd to nref and reduce size of rmtre, vref accordingly
     ! do the same for ncls(d) with nacls and rcls arrays
@@ -712,7 +713,8 @@ contains
 
     nlayer = naez/nprinc
     ! overwrite nprincd if chosen too small (also updates array `icheck`)
-    if (nprincd<nprinc) then
+    ! MdSD: except if using godfrin
+    if (nprincd<nprinc .and. invmod/=3) then
       ! find nprincd such that it is as big as it needs to be while being as
       ! small as commensurability with the number of layers etc. allows
       ! for this we loop over all layers and look for the divisors of naez
@@ -754,6 +756,14 @@ contains
       icheck = 0
     end if ! nprincd<nprinc
 
+    ! MdSD: actual block sizes for godfrin matrix inversion
+    if (invmod == 3) then
+      t_godfrin%na = naez*lmmaxd
+      t_godfrin%bdims(:) = t_godfrin%bdims(:)*lmmaxd
+    end if
+    ! write (*, '("na=",i8,"  nb=",i8,"  ldiag=",l2,"  lper=",l2,"  lpardiso=",l2)') t_godfrin%na, t_godfrin%nb, t_godfrin%ldiag, t_godfrin%lper, t_godfrin%lpardiso
+    ! write (*, '("bdims(1:nb)=",100i8)') t_godfrin%bdims(:)
+
     ! store nlayerd for later use
     nlayerd = nlayer
 
diff --git a/source/KKRhost/rinput13.F90 b/source/KKRhost/rinput13.F90
index 1876c9e1550f2319925e9d93b579b64c5e18cbba..4a1e4b6dc03bd7c2b9710aeedb8ca75bcd426454 100644
--- a/source/KKRhost/rinput13.F90
+++ b/source/KKRhost/rinput13.F90
@@ -2845,7 +2845,7 @@ contains
     if (invmod==3) then
       write (111, *) 'Godfrin inversion scheme parameters'
       write (1337, *) 'Godfrin inversion scheme parameters'
-
+      ! MdSD: write couplings.dat file
       write_tb_coupling=.true.
 
       t_godfrin%na = naez
@@ -2854,13 +2854,49 @@ contains
       read (unit=uio, fmt=*, iostat=ier) t_godfrin%nb, t_godfrin%ldiag, t_godfrin%lper, t_godfrin%lpardiso
       if (ier/=0) stop 'Error reading `GODFRIN` (nb, ldiag, lper, lpardiso): check your inputcard'
 
+      ! MdSD: nprincd is not used by godfrin
+      if (nprincd /= 1) then
+        write (111, fmt='(A)') 'rinput13: Warning! setting nprincd=1'
+        write (1337, fmt='(A)') 'rinput13: Warning! setting nprincd=1'
+        nprincd = 1
+      end if
+
+      ! MdSD: Lloyd's formula needs the full matrix inverse
+      if (use_lloyd .and. t_godfrin%ldiag) then
+        write (111, fmt='(A)') 'rinput13: Warning! use_lloyd=T, setting ldiag=F'
+        write (1337, fmt='(A)') 'rinput13: Warning! use_lloyd=T, setting ldiag=F'
+        t_godfrin%ldiag = .false.
+      end if
+
+      ! MdSD: for Jij's or connection to KKRimp also the full matrix inverse is needed
+      if (icc/=0 .and. t_godfrin%ldiag) then
+        write (111, fmt='(A)') 'rinput13: Warning! ICC/=0, setting ldiag=F'
+        write (1337, fmt='(A)') 'rinput13: Warning! ICC/=0, setting ldiag=F'
+        t_godfrin%ldiag = .false.
+      end if
+
+      ! MdSD: in 3D mode the sparse matrix has additional corner blocks
+      if (linterface) then ! 2D
+        if (t_godfrin%lper) then
+          write (111, fmt='(A)') 'rinput13: Warning! linterface=T, setting lper=F'
+          write (1337, fmt='(A)') 'rinput13: Warning! linterface=T, setting lper=F'
+          t_godfrin%lper = .false.
+        end if
+      else ! 3D
+        if (.not.t_godfrin%lper) then
+          write (111, fmt='(A)') 'rinput13: Warning! linterface=F, setting lper=T'
+          write (1337, fmt='(A)') 'rinput13: Warning! linterface=F, setting lper=T'
+          t_godfrin%lper = .true.
+        end if
+      end if
+
       call ioinput('GODFRIN         ', uio, 4, 7, ier)
       allocate (t_godfrin%bdims(t_godfrin%nb))
       read (unit=uio, fmt=*, iostat=ier) t_godfrin%bdims(:)
-      if (ier/=0) stop 'Error reading `GIDFRIN (bdims)`: check your inputcard'
+      if (ier/=0) stop 'Error reading `GODFRIN (bdims)`: check your inputcard'
 
       ! Inconsistency check
-      if (t_godfrin%na/=sum(t_godfrin%bdims)) stop 'godfrin: na /= sum(bdims)'
+      if (t_godfrin%na/=sum(t_godfrin%bdims)) stop 'godfrin: na/=sum(bdims)'
 
 #ifndef __INTEL_COMPILER
       ! can only use pardiso solver with intel mkl at the moment, probably only
@@ -2868,21 +2904,23 @@ contains
       if (t_godfrin%lpardiso) stop 'No pardiso library available. Try the intel compiler or fix the linking issues'
 #endif
 
-      write (111, fmt='(A100)') 'na, nb, ldiag, lper, lpardiso; then bdims(1:nb)'
-      write (1337, fmt='(A100)') 'na, nb, ldiag, lper, lpardiso; then bdims(1:nb)'
-      write (111, *) t_godfrin%na, t_godfrin%nb, t_godfrin%ldiag, t_godfrin%lper, t_godfrin%lpardiso
-      write (1337, *) t_godfrin%na, t_godfrin%nb, t_godfrin%ldiag, t_godfrin%lper, t_godfrin%lpardiso
-      write (111, fmt='(50(I0," "))') t_godfrin%bdims(:)
-      write (1337, fmt='(50(I0," "))') t_godfrin%bdims(:)
+      write (111, '("na=",i8,"  nb=",i8,"  ldiag=",l2,"  lper=",l2,"  lpardiso=",l2)') t_godfrin%na, t_godfrin%nb, t_godfrin%ldiag, t_godfrin%lper, t_godfrin%lpardiso
+      write (1337, '("na=",i8,"  nb=",i8,"  ldiag=",l2,"  lper=",l2,"  lpardiso=",l2)') t_godfrin%na, t_godfrin%nb, t_godfrin%ldiag, t_godfrin%lper, t_godfrin%lpardiso
+      write (111, '("bdims(1:nb)=",100i8)') t_godfrin%bdims(:)
+      write (1337, '("bdims(1:nb)=",100i8)') t_godfrin%bdims(:)
 
       ! multiply blocks by angular momentum dimension
-      t_godfrin%na = t_godfrin%na*lmmaxd
-      t_godfrin%bdims = t_godfrin%bdims*lmmaxd
-
-      if (icc/=0 .and. t_godfrin%ldiag) then
-        t_godfrin%ldiag = .false.
-        write (111, fmt='(A100)') 'rinput13: Warning! ICC/=0. Setting ldiag = T'
-        write (1337, fmt='(A100)') 'rinput13: Warning! ICC/=0. Setting ldiag = T'
+      ! MdSD: this has to happen after checking the correctness of the blocking
+      ! t_godfrin%na = t_godfrin%na*lmmaxd
+      ! t_godfrin%bdims = t_godfrin%bdims*lmmaxd
+      if (t_godfrin%nb == 1) then
+        write (111, fmt='(A)') 'rinput13: Warning! nb=1, setting invmod=0'
+        write (1337, fmt='(A)') 'rinput13: Warning! nb=1, setting invmod=0'
+        invmod = 0
+      else if (t_godfrin%nb == 2) then
+        write (111, fmt='(A)') 'rinput13: Warning! nb=2, setting lper=F'
+        write (1337, fmt='(A)') 'rinput13: Warning! nb=2, setting lper=F'
+        t_godfrin%lper = .false.
       end if
 
     end if
diff --git a/utils/block_partitioning/README b/utils/block_partitioning/README
new file mode 100644
index 0000000000000000000000000000000000000000..d7d99f867287935f152e911f289d3041bb9048d7
--- /dev/null
+++ b/utils/block_partitioning/README
@@ -0,0 +1,10 @@
+This is a helper tool to find a block tridiagonal partitioning of a sparse matrix.
+It reads in the file couplings.dat which is written in clsgen_tb.F90 and contains the couplings between the different sites in the unit cell according to the screening clusters.
+The program tries to find block partitions up to a maximum number of blocks in a brute force way.
+It's nice when it works.
+
+Compilation: ifort block_partitioning.F90 -o block_partitioning
+Input: couplings.dat
+Execution: ./block_partitioning
+
+An example couplings.dat is provided.
diff --git a/utils/block_partitioning/block_partitioning b/utils/block_partitioning/block_partitioning
new file mode 100755
index 0000000000000000000000000000000000000000..12b0b41a6a0925d4adc94cc9e59df3de98425b9a
Binary files /dev/null and b/utils/block_partitioning/block_partitioning differ
diff --git a/utils/block_partitioning/block_partitioning.F90 b/utils/block_partitioning/block_partitioning.F90
new file mode 100644
index 0000000000000000000000000000000000000000..b96c27d8c0911b37d18446c3cd13c8ff3d778295
--- /dev/null
+++ b/utils/block_partitioning/block_partitioning.F90
@@ -0,0 +1,177 @@
+  program block_partitioning
+! Given a banded matrix sparsity pattern,
+! an optimal partition into block tridiagonal structure is found
+
+  implicit none
+
+  integer, allocatable :: links(:,:), bsizes(:)
+  integer :: i, nc, nb
+
+! Read sparsity pattern
+  open(file='couplings.dat',unit=10,status='old')
+  read(10,*)  ! skip line
+  read(10,*) nc
+  allocate(links(nc,nc),bsizes(nc))
+  do i=1,nc
+    read(10,'(1000i1)') links(1:nc,i)
+  end do
+  close(10)
+  call block_partitioner(nc,links,nb,bsizes)
+  deallocate(links)
+
+
+  contains
+
+
+  subroutine block_partitioner(nc,links,nblocks,blocksizes)
+! Find an optimal block tridiagonal partition
+! for sparsity pattern given in links
+
+! Number of columns of matrix
+  integer, intent(in)  :: nc
+! Location of non-zero entries
+  integer, intent(in)  :: links(nc,nc)
+! Optimal number of blocks
+  integer, intent(out) :: nblocks
+! Sizes of each block, in (1:nblocks)
+  integer, intent(out) :: blocksizes(nc)
+! ----------------------------------------------------------------------
+  integer :: trialsizes(nc)
+  integer :: i, j, iup, idn, maxblock, minsize, sum2, minsum2, imax
+  logical :: lcheck
+  integer :: bandmin, bandmax, bandavg, banddev
+
+! Initial values
+  nblocks = 1; blocksizes(1) = nc; blocksizes(2:nc) = 0
+! Find bandwidth of each column
+  bandmax = 0; bandmin = nc; bandavg = 0; banddev = 0
+  do j=1,nc
+    iup = 0
+    do i=j,1,-1
+      if (links(i,j) == 1) iup = i
+    end do
+    idn = 0
+    do i=j,nc
+      if (links(i,j) == 1) idn = i
+    end do
+    if (idn-iup < bandmin) bandmin = idn-iup
+    if (idn-iup > bandmax) bandmax = idn-iup
+    bandavg = bandavg + idn - iup
+    banddev = banddev + (idn - iup)**2
+!    write(*,'("bandwidth: j, up, dn, diff=",4i8)') j, iup, idn, idn-iup
+  end do
+  bandavg = bandavg/nc
+  banddev = ceiling(sqrt(real(banddev/nc - bandavg**2)))
+  write(*,'("bandwidth: min, max, avg, dev=",4i8)') bandmin, bandmax, bandavg, banddev
+  call brute_partitioner(nc,links,bandmin,bandmax,bandavg,banddev,nblocks,blocksizes)
+! All done!
+  end subroutine block_partitioner
+
+
+  subroutine brute_partitioner(nc,links,bandmin,bandmax,bandavg,banddev,nblocks,blocksizes)
+! Brute force search for optimal block partition
+
+  implicit none
+
+! Size of full matrix
+  integer, intent(in)  :: nc
+! Sparsity pattern
+  integer, intent(in)  :: links(nc,nc)
+! Bandwidth statistics
+  integer, intent(in)  :: bandmin, bandmax, bandavg, banddev
+! Optimal number of blocks
+  integer, intent(out) :: nblocks
+! Sizes of each block, in (1:nblocks)
+  integer, intent(out) :: blocksizes(nc)
+! ----------------------------------------------------------------------
+  integer :: trialsizes(nc)
+  integer :: i, k, maxblock, minsize, sum2, minsum2, imax
+  logical :: lcheck
+  integer*8 :: j, jdiv, jrem, imax2i
+
+! Minimum block size is half of the minimum bandwith
+!  minsize = bandmin/2
+! Allowance for uneven bandwidth
+  minsize = (bandmin+banddev)/2
+! Maximum number of blocks
+  maxblock = nc/minsize
+  write(*,'("minsize, maxblock=",2i8)') minsize, maxblock
+! Find combinations of block sizes that add to nc
+  minsum2 = nc**2
+  do i=2,maxblock
+!   Sum of squares of block dimensions is bound by nc**2
+    imax = ceiling(sqrt(1.d0*(nc**2 - (i-1)*minsize**2))) - minsize
+!   Sum of block dimensions is bound by nc
+    imax = min(imax,max(nc-i*minsize,1))
+!   Empirical reduction
+    imax = min(imax,max(bandmax/2-minsize,1))
+!   Make sure the big integers don't explode
+    imax2i = imax**(i-1)
+    write(*,'("nblocks,imax=",2i8,i16)') i, imax, imax2i
+!   Go over all combinations, encoded in j
+!   Digits are decoded into jdiv
+    do j=0,imax2i
+      jrem = j
+      do k=i-1,1,-1
+        jdiv = jrem/(imax**(k-1))
+        jrem = mod(jrem,imax**(k-1))
+        trialsizes(k) = minsize + jdiv
+      end do
+      trialsizes(i) = nc - sum(trialsizes(1:i-1))
+      if (trialsizes(i) < minsize) cycle
+!     Do the blocks leave out any matrix elements?
+      call check_blocks(nc,links,i,trialsizes(1:i),lcheck)
+      if (lcheck) then
+        sum2 = sum(trialsizes(1:i)**2)
+        if (sum2 < minsum2) then
+          minsum2 = sum2
+          nblocks = i
+          blocksizes(1:i) = trialsizes(1:i)
+        end if
+      end if
+    end do
+    write(*,'("minsum2, nblocks, blocksizes=",1000i8)') minsum2, nblocks, blocksizes(1:nblocks)
+  end do
+! All done!
+  end subroutine brute_partitioner
+
+
+  pure subroutine check_blocks(nc,links,nblocks,blocksizes,lcheck)
+! Check if the given blocks are a tridiagonal partition
+
+  implicit none
+
+! Number of columns
+  integer, intent(in)  :: nc
+! Sparsity pattern
+  integer, intent(in)  :: links(nc,nc)
+! Number of blocks
+  integer, intent(in)  :: nblocks
+! Block sizes
+  integer, intent(in)  :: blocksizes(1:nblocks)
+! Is this a correct partition?
+  logical, intent(out) :: lcheck
+! ----------------------------------------------------------------------
+  integer :: ib, i0, i1, j0, j1
+  integer :: zeroed(nc,nc)
+
+  lcheck = .true.
+  zeroed(:,:) = links(:,:)
+  i0 = 1; i1 = blocksizes(1)
+  do ib=1,nblocks-1
+    i0 = sum(blocksizes(1:ib-1)) + 1
+    i1 = sum(blocksizes(1:ib))
+    j0 = i1 + 1
+    j1 = i1 + blocksizes(ib+1)
+    zeroed(i0:i1,i0:i1) = 0
+    zeroed(i0:i1,j0:j1) = 0
+    zeroed(j0:j1,i0:i1) = 0
+  end do
+  zeroed(j0:j1,j0:j1) = 0
+  if (any(zeroed(:,:) == 1)) lcheck = .false.
+! All done!
+  end subroutine check_blocks
+
+
+! All done!
+  end program block_partitioning
diff --git a/utils/block_partitioning/couplings.dat b/utils/block_partitioning/couplings.dat
new file mode 100644
index 0000000000000000000000000000000000000000..975fd7d8585aac1674316bb75de25fe400d1bce2
--- /dev/null
+++ b/utils/block_partitioning/couplings.dat
@@ -0,0 +1,108 @@
+# Couplings between atoms via gref
+     106
+1111111111110111111000110000000000000000000000000000000000000000000000000000000000000000000000000000000000
+1111111111111011111100010000000000000000000000000000000000000000000000000000000000000000000000000000000000
+1111111111111101111110000000000000000000000000000000000000000000000000000000000000000000000000000000000000
+1111111111111110011111000000000000000000000000000000000000000000000000000000000000000000000000000000000000
+1111111101111111001111100000000000000000000000000000000000000000000000000000000000000000000000000000000000
+1111111110111111000111110000000000000000000000000000000000000000000000000000000000000000000000000000000000
+1111111111011111100011110000000000000000000000000000000000000000000000000000000000000000000000000000000000
+1111111111101111110001110000000000000000000000000000000000000000000000000000000000000000000000000000000000
+1111011111111111111101111111000111000000000000000000000000000000000000000000000000000000000000000000000000
+1111101111111111111110111111100011000000000000000000000000000000000000000000000000000000000000000000000000
+1111110111111111111111011111110000000000000000000000000000000000000000000000000000000000000000000000000000
+1111111011111111111111100111111100000000000000000000000000000000000000000000000000000000000000000000000000
+0111111111111111011111110011111110000000000000000000000000000000000000000000000000000000000000000000000000
+1011111111111111101111110001111111000000000000000000000000000000000000000000000000000000000000000000000000
+1101111111111111110111111000011111000000000000000000000000000000000000000000000000000000000000000000000000
+1110111111111111111011111110001111000000000000000000000000000000000000000000000000000000000000000000000000
+1110001111110111111101111111101111111000110000000000000000000000000000000000000000000000000000000000000000
+1111000111111011111110111111110111111100010000000000000000000000000000000000000000000000000000000000000000
+1111100011111101111111011111111001111110000000000000000000000000000000000000000000000000000000000000000000
+0111110011111110111111101111111110011111000000000000000000000000000000000000000000000000000000000000000000
+0011111001111111011111110111111111001111100000000000000000000000000000000000000000000000000000000000000000
+0001111110111111101111111011111111000111110000000000000000000000000000000000000000000000000000000000000000
+1000111111011111110111111100111111100011110000000000000000000000000000000000000000000000000000000000000000
+1100011111101111111011111111011111110001110000000000000000000000000000000000000000000000000000000000000000
+0000000011100011111101111111101111111101111110001100000000000000000000000000000000000000000000000000000000
+0000000011110001111110111111110111111110111111000100000000000000000000000000000000000000000000000000000000
+0000000011111000111111011111111011111111011111100100000000000000000000000000000000000000000000000000000000
+0000000011111000111111011111111101111111011111110000000000000000000000000000000000000000000000000000000000
+0000000001111100111111101111111110111111100111110000000000000000000000000000000000000000000000000000000000
+0000000000111110011111110111111111011111110011111000000000000000000000000000000000000000000000000000000000
+0000000000011111101111111011111111101111110001111100000000000000000000000000000000000000000000000000000000
+0000000010001111110111111101111111110111111001111100000000000000000000000000000000000000000000000000000000
+0000000010001111110111111110111111110111111100111100000000000000000000000000000000000000000000000000000000
+0000000011000111111011111111011111111011111100011100000000000000000000000000000000000000000000000000000000
+0000000000000000111000111111101111111101111111011111100011000000000000000000000000000000000000000000000000
+0000000000000000111100011111110111111110111111101111110001000000000000000000000000000000000000000000000000
+0000000000000000111110001111111001111111011111110111111000000000000000000000000000000000000000000000000000
+0000000000000000011111001111111110111111101111111001111100000000000000000000000000000000000000000000000000
+0000000000000000001111100111111111011111110111111100111110000000000000000000000000000000000000000000000000
+0000000000000000000111111011111111101111111011111100011111000000000000000000000000000000000000000000000000
+0000000000000000100011111100111111110111111101111110001111000000000000000000000000000000000000000000000000
+0000000000000000110001111111011111111011111110111111000111000000000000000000000000000000000000000000000000
+0000000000000000000000001111000111111101111111011111110111111000110000000000000000000000000000000000000000
+0000000000000000000000001111100011111110111111101111111011111100010000000000000000000000000000000000000000
+0000000000000000000000001111110000111111011111111111111101111110000000000000000000000000000000000000000000
+0000000000000000000000000111111100111111101111111011111110011111000000000000000000000000000000000000000000
+0000000000000000000000000011111110011111110111111101111111001111100000000000000000000000000000000000000000
+0000000000000000000000000001111111101111111011111110111111000111110000000000000000000000000000000000000000
+0000000000000000000000001000011111110111111111111111011111100011110000000000000000000000000000000000000000
+0000000000000000000000001110001111111011111110111111101111110001110000000000000000000000000000000000000000
+0000000000000000000000000000000000111000111111011111111111111101111110001100000000000000000000000000000000
+0000000000000000000000000000000000111100011111101111111111111110111111000100000000000000000000000000000000
+0000000000000000000000000000000000111110001111110111111111111111011111100000000000000000000000000000000000
+0000000000000000000000000000000000011111001111111011111111111111100111110000000000000000000000000000000000
+0000000000000000000000000000000000001111100111111111111111011111110011111000000000000000000000000000000000
+0000000000000000000000000000000000000111111011111111111111101111110001111100000000000000000000000000000000
+0000000000000000000000000000000000100011111101111111111111110111111000111100000000000000000000000000000000
+0000000000000000000000000000000000110001111110111111111111111011111100011100000000000000000000000000000000
+0000000000000000000000000000000000000000001110001111110111111111111111011111100011000000000000000000000000
+0000000000000000000000000000000000000000001111000111111011111111111111101111110001000000000000000000000000
+0000000000000000000000000000000000000000001111100011111101111111111111110111111000000000000000000000000000
+0000000000000000000000000000000000000000000111110011111110111111111111111001111100000000000000000000000000
+0000000000000000000000000000000000000000000011111001111111111111110111111100111110000000000000000000000000
+0000000000000000000000000000000000000000000001111110111111111111111011111100011111000000000000000000000000
+0000000000000000000000000000000000000000001000111111011111111111111101111110001111000000000000000000000000
+0000000000000000000000000000000000000000001100011111101111111111111110111111000111000000000000000000000000
+0000000000000000000000000000000000000000000000000011100011111101111111111111110111111000110000000000000000
+0000000000000000000000000000000000000000000000000011110001111110111111111111111011111100010000000000000000
+0000000000000000000000000000000000000000000000000011111000111111011111111111111101111110000000000000000000
+0000000000000000000000000000000000000000000000000001111100111111101111111111111110011111000000000000000000
+0000000000000000000000000000000000000000000000000000111110011111111111111101111111001111100000000000000000
+0000000000000000000000000000000000000000000000000000011111101111111111111110111111000111110000000000000000
+0000000000000000000000000000000000000000000000000010001111110111111111111111011111100011110000000000000000
+0000000000000000000000000000000000000000000000000011000111111011111111111111101111110001110000000000000000
+0000000000000000000000000000000000000000000000000000000000111000111111011111111111111101111110001100000000
+0000000000000000000000000000000000000000000000000000000000111100011111101111111111111110111111000100000000
+0000000000000000000000000000000000000000000000000000000000111110001111110111111111111111011111100000000000
+0000000000000000000000000000000000000000000000000000000000011111001111111011111111111111100111110000000000
+0000000000000000000000000000000000000000000000000000000000001111100111111111111111011111110011111000000000
+0000000000000000000000000000000000000000000000000000000000000111111011111111111111101111110001111100000000
+0000000000000000000000000000000000000000000000000000000000100011111101111111111111110111111000111100000000
+0000000000000000000000000000000000000000000000000000000000110001111110111111111111111011111100011100000000
+0000000000000000000000000000000000000000000000000000000000000000001110001111110111111111111111011111100011
+0000000000000000000000000000000000000000000000000000000000000000001111000111111011111111111111101111110001
+0000000000000000000000000000000000000000000000000000000000000000001111100011111101111111111111110111111000
+0000000000000000000000000000000000000000000000000000000000000000000111110011111110111111111111111001111100
+0000000000000000000000000000000000000000000000000000000000000000000011111001111111111111110111111100111110
+0000000000000000000000000000000000000000000000000000000000000000000001111110111111111111111011111100011111
+0000000000000000000000000000000000000000000000000000000000000000001000111111011111111111111101111110001111
+0000000000000000000000000000000000000000000000000000000000000000001100011111101111111111111110111111000111
+0000000000000000000000000000000000000000000000000000000000000000000000000011100011111101111111111111110111
+0000000000000000000000000000000000000000000000000000000000000000000000000011110001111110111111111111111011
+0000000000000000000000000000000000000000000000000000000000000000000000000011111000111111011111111111111101
+0000000000000000000000000000000000000000000000000000000000000000000000000001111100111111101111111111111110
+0000000000000000000000000000000000000000000000000000000000000000000000000000111110011111111111111101111111
+0000000000000000000000000000000000000000000000000000000000000000000000000000011111101111111111111110111111
+0000000000000000000000000000000000000000000000000000000000000000000000000010001111110111111111111111011111
+0000000000000000000000000000000000000000000000000000000000000000000000000011000111111011111111111111101111
+0000000000000000000000000000000000000000000000000000000000000000000000000000000000111000111111011111111111
+0000000000000000000000000000000000000000000000000000000000000000000000000000000000111100011111101111111111
+0000000000000000000000000000000000000000000000000000000000000000000000000000000000111110001111110111111111
+0000000000000000000000000000000000000000000000000000000000000000000000000000000000011111001111111011111111
+0000000000000000000000000000000000000000000000000000000000000000000000000000000000001111100111111111111111
+0000000000000000000000000000000000000000000000000000000000000000000000000000000000000111111011111111111111
+0000000000000000000000000000000000000000000000000000000000000000000000000000000000100011111101111111111111
+0000000000000000000000000000000000000000000000000000000000000000000000000000000000110001111110111111111111
diff --git a/utils/block_partitioning/out b/utils/block_partitioning/out
new file mode 100644
index 0000000000000000000000000000000000000000..c71ce1a7804434ef8bb2157f791afdcf12f6a2fb
--- /dev/null
+++ b/utils/block_partitioning/out
@@ -0,0 +1,14 @@
+bandwidth: min, max, avg, dev=      20      41      34      10
+minsize, maxblock=      15       7
+nblocks,imax=       2       5               5
+minsum2, nblocks, blocksizes=    7796       2      20      86
+nblocks,imax=       3       5              25
+minsum2, nblocks, blocksizes=    5658       3      16      19      71
+nblocks,imax=       4       5             125
+minsum2, nblocks, blocksizes=    3750       4      16      18      19      53
+nblocks,imax=       5       5             625
+minsum2, nblocks, blocksizes=    2566       5      16      18      16      19      37
+nblocks,imax=       6       5            3125
+minsum2, nblocks, blocksizes=    1894       6      16      18      16      16      19      21
+nblocks,imax=       7       1               1
+minsum2, nblocks, blocksizes=    1894       6      16      18      16      16      19      21