diff --git a/source/KKRhost/clsgen_tb.f90 b/source/KKRhost/clsgen_tb.f90 index 34927c8ff260a20615e8c1a0953404a242b57339..f2cd76b33395a0aca0f08abbd5be115b5954d2bd 100644 --- a/source/KKRhost/clsgen_tb.f90 +++ b/source/KKRhost/clsgen_tb.f90 @@ -39,13 +39,14 @@ contains !> atom. !------------------------------------------------------------------------------- subroutine clsgen_tb(naez, nemb, nvirt, rr, rbasis, kaoez, zat, cls, ncls, nacls, atom, ezoa, nlbasis, nrbasis, nleft, nright, zperleft, zperight, tleft, tright, rmtref, & - rmtrefat, vref, irefpot, nrefpot, rcls, rcut, rcutxy, alat, natyp, nclsd, nrd, naclsd, nrefd, nembd, linterface, nprincd, nprinc) + rmtrefat, vref, irefpot, nrefpot, rcls, rcut, rcutxy, alat, natyp, nclsd, nrd, naclsd, nrefd, nembd, linterface, nprincd, nprinc, invmod) ! ************************************************************************ ! use :: mod_datatypes, only: dp use :: mod_runoptions, only: write_tb_coupling, disable_print_serialnumber use :: mod_version_info, only: version_print_header use :: mod_dsort, only: dsort + use :: godfrin, only: t_godfrin ! GODFRIN MdSD implicit none ! .. arguments integer :: naez !! number of atoms in EZ @@ -59,6 +60,7 @@ contains integer :: nrefd integer, intent(in) :: nprincd !! user-supplied number of layers in a principal layer integer :: nprinc !! Calculated number of layers in a principal layer + integer, intent(in) :: invmod !! MdSD: to check if generalized block tridiagonal inversion is used integer :: natyp integer :: nembd real (kind=dp) :: alat !! lattice constant A @@ -456,7 +458,8 @@ contains ! Calculate number of layers in principal layer nprinc = 1 - if (linterface) then + ! MdSD: except if using godfrin + if (linterface .and. invmod/=3) then do jatom = 1, naez ! loop over rows do iat = 1, jatom - 1 ! loop over columns before the diagonal if (icouplmat(jatom,iat)==1) nprinc = max(nprinc, jatom-iat) @@ -470,19 +473,40 @@ contains ! Check if the user-specified blocking is correct if (mod(naez,nprincd) == 0) then - nb = naez/nprincd - do ib=1,nb-1 -! diagonal blocks - icouplmat(1+(ib-1)*nprincd:ib*nprincd,1+(ib-1)*nprincd:ib*nprincd) = 0 -! subdiagonal blocks - icouplmat(1+ib*nprincd:(ib+1)*nprincd,1+(ib-1)*nprincd:ib*nprincd) = 0 -! superdiagonal blocks - icouplmat(1+(ib-1)*nprincd:ib*nprincd,1+ib*nprincd:(ib+1)*nprincd) = 0 - end do -! last diagonal blocks - icouplmat(1+(nb-1)*nprincd:nb*nprincd,1+(nb-1)*nprincd:nb*nprincd) = 0 + ! MdSD: generalized blocking + if (invmod == 3) then + nb = t_godfrin%nb + do ib=1,nb-1 +! diagonal blocks + icouplmat(1+sum(t_godfrin%bdims(1:ib-1)):sum(t_godfrin%bdims(1:ib)),1+sum(t_godfrin%bdims(1:ib-1)):sum(t_godfrin%bdims(1:ib))) = 0 +! subdiagonal blocks + icouplmat(1+sum(t_godfrin%bdims(1:ib)):sum(t_godfrin%bdims(1:ib+1)),1+sum(t_godfrin%bdims(1:ib-1)):sum(t_godfrin%bdims(1:ib))) = 0 +! superdiagonal blocks + icouplmat(1+sum(t_godfrin%bdims(1:ib-1)):sum(t_godfrin%bdims(1:ib)),1+sum(t_godfrin%bdims(1:ib)):sum(t_godfrin%bdims(1:ib+1))) = 0 + end do +! last diagonal blocks + icouplmat(1+sum(t_godfrin%bdims(1:nb-1)):sum(t_godfrin%bdims(1:nb)),1+sum(t_godfrin%bdims(1:nb-1)):sum(t_godfrin%bdims(1:nb))) = 0 +! additional blocks from periodic boundary conditions + if (t_godfrin%lper) then + icouplmat(1+sum(t_godfrin%bdims(1:0)):sum(t_godfrin%bdims(1:1)),1+sum(t_godfrin%bdims(1:nb-1)):sum(t_godfrin%bdims(1:nb))) = 0 + icouplmat(1+sum(t_godfrin%bdims(1:nb-1)):sum(t_godfrin%bdims(1:nb)),1+sum(t_godfrin%bdims(1:0)):sum(t_godfrin%bdims(1:1))) = 0 + end if + ! MdSD: standard blocking + else + nb = naez/nprincd + do ib=1,nb-1 +! diagonal blocks + icouplmat(1+(ib-1)*nprincd:ib*nprincd,1+(ib-1)*nprincd:ib*nprincd) = 0 +! subdiagonal blocks + icouplmat(1+ib*nprincd:(ib+1)*nprincd,1+(ib-1)*nprincd:ib*nprincd) = 0 +! superdiagonal blocks + icouplmat(1+(ib-1)*nprincd:ib*nprincd,1+ib*nprincd:(ib+1)*nprincd) = 0 + end do +! last diagonal blocks + icouplmat(1+(nb-1)*nprincd:nb*nprincd,1+(nb-1)*nprincd:nb*nprincd) = 0 + end if ! mistakes were made? - if (any(icouplmat /= 0)) then + if (any(icouplmat /= 0) .and. invmod /= 0) then write (1337, *) 'CLSGEN_TB: WARNING --- User-supplied NPRINCD=', nprincd, ' is INCORRECT!' ! Write out the coupling matrix write (1337, *) 'Truncated coupling matrix:' @@ -492,6 +516,8 @@ contains else nprinc = nprincd end if + else + ! MdSD: error message here? end if ! close clusters file diff --git a/source/KKRhost/gfmask.f90 b/source/KKRhost/gfmask.f90 index f1b2d10e2af1d2e6e6ece3f3477f26b748938c89..86c41447817dacaffff0ae2bf05dbdde8d0c1011 100644 --- a/source/KKRhost/gfmask.f90 +++ b/source/KKRhost/gfmask.f90 @@ -87,7 +87,7 @@ contains ! (atomic sites) that are needed ! ====================================================================== - if ((icc/=0) .and. (invmod==1)) then + if ((icc/=0) .and. invmod==1) then do i = 1, naez do j = 1, naez icouple(i, j) = 0 diff --git a/source/KKRhost/godfrin.F90 b/source/KKRhost/godfrin.F90 index 51e0cc0c12ebf87591192090da376eedf2f9762f..d05cf4ab5e2dabc610cbbcb089cc4f09f76eb56b 100644 --- a/source/KKRhost/godfrin.F90 +++ b/source/KKRhost/godfrin.F90 @@ -673,10 +673,10 @@ contains call zgemm('N', 'N', m, n, m, cone, delta, mb, aji(:,:,nb), mb, czero, tmp, mb) call zgemm('N', 'N', n, n, m, cone, aij(:,:,nb), mb, tmp, mb, czero, delta, mb) ! Delta = Delta + A(1,N) * B(N,1) - tmp = a(j0+1:j0+m, i0+1:i0+n) + tmp(1:m, 1:n) = a(j0+1:j0+m, i0+1:i0+n) call zgemm('N', 'N', n, n, m, cone, aij(:,:,nb), mb, tmp, mb, cone, delta, mb) ! Delta = Delta + B(1,N) * A(N,1) - tmp = a(i0+1:i0+n, j0+1:j0+m) + tmp(1:n, 1:m) = a(i0+1:i0+n, j0+1:j0+m) call zgemm('N', 'N', n, n, m, cone, tmp, mb, aji(:,:,nb), mb, cone, delta, mb) ! Delta = Delta + B(1,1) + 1 delta(1:n, 1:n) = delta(1:n, 1:n) + a(i0+1:i0+n, i0+1:i0+n) diff --git a/source/KKRhost/inversion.f90 b/source/KKRhost/inversion.f90 index c811027f36c4fe3e7feb4e9d6460abead93cdb94..f3be1e62df108cf03a1796cfeb6a8c0a5015775d 100644 --- a/source/KKRhost/inversion.f90 +++ b/source/KKRhost/inversion.f90 @@ -151,6 +151,8 @@ contains ! godfrin module ! ----------------------------------------------------------------- else if (invmod==3) then + ! write (*, '("na=",i8," nb=",i8," ldiag=",l2," lper=",l2," lpardiso=",l2)') t_godfrin%na, t_godfrin%nb, t_godfrin%ldiag, t_godfrin%lper, t_godfrin%lpardiso + ! write (*, '("bdims(1:nb)=",100i8)') t_godfrin%bdims(:) call sparse_inverse(gllke, t_godfrin%na, t_godfrin%nb, t_godfrin%bdims, t_godfrin%ldiag, t_godfrin%lper, t_godfrin%lpardiso) ! GODFRIN Flaviano ! ------------------------------------------------------------------ else diff --git a/source/KKRhost/kkrmat01.F90 b/source/KKRhost/kkrmat01.F90 index ae581dc5ebb7e29d12a8d9770785aced61fa51ac..99636d5c79876267989af169509276ae57c08640 100644 --- a/source/KKRhost/kkrmat01.F90 +++ b/source/KKRhost/kkrmat01.F90 @@ -488,9 +488,9 @@ contains ! Actually -TAU, because TAU = (Deltat^-1 - Gref)^-1 ! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! ! $omp single - if (lly/=0) then ! If LLY, full inversion is needed + if (lly/=0 .and. invmod/=3) then ! If LLY, full inversion is needed call inversion(gllke, 0, icheck) ! LLY - else + else ! MdSD: godfrin options are set to full inversion if LLY is true call inversion(gllke, invmod, icheck) end if ! $omp end single diff --git a/source/KKRhost/main0.F90 b/source/KKRhost/main0.F90 index 023498f6842df8bb4f244c4ed15e80e21ab5295c..1036dd72c2906d3a5e5ceb4273497da588070038 100644 --- a/source/KKRhost/main0.F90 +++ b/source/KKRhost/main0.F90 @@ -509,6 +509,7 @@ contains use :: mod_testdim, only: testdim use :: mod_write_tbkkr_files, only: write_tbkkr_files use :: mod_writehoststructure, only: writehoststructure + use :: godfrin, only: t_godfrin ! GODFRIN MdSD ! array dimensions use :: global_variables, only: krel, nspind, nrefd, irmd, ntotd, ipand, ncelld, nrmaxd, nchebd, natypd, naezd, lmaxd, alm, lmmaxd, & almgf0, lmgf0d, ndim_slabinv, nprincd, nembd, nembd1, nembd2, irmind, irnsd, nofgij, natomimpd, lpotd, lmpotd, npotd, nfund, & @@ -703,7 +704,7 @@ contains call clsgen_tb(naez,nemb,nvirt,rr,rbasis,kaoez,zat,cls,ncls,nacls,atom,ezoa, & nlbasis,nrbasis,nleft,nright,zperleft,zperight,tleft,tright,rmtref,rmtrefat, & vref,refpot,nref,rcls,rcutz,rcutxy,alat,natyp,nclsd,nrd,naclsd,nrefd,nembd, & - linterface,nprincd,nprinc) + linterface,nprincd,nprinc,invmod) ! change nrefd to nref and reduce size of rmtre, vref accordingly ! do the same for ncls(d) with nacls and rcls arrays @@ -712,7 +713,8 @@ contains nlayer = naez/nprinc ! overwrite nprincd if chosen too small (also updates array `icheck`) - if (nprincd<nprinc) then + ! MdSD: except if using godfrin + if (nprincd<nprinc .and. invmod/=3) then ! find nprincd such that it is as big as it needs to be while being as ! small as commensurability with the number of layers etc. allows ! for this we loop over all layers and look for the divisors of naez @@ -754,6 +756,14 @@ contains icheck = 0 end if ! nprincd<nprinc + ! MdSD: actual block sizes for godfrin matrix inversion + if (invmod == 3) then + t_godfrin%na = naez*lmmaxd + t_godfrin%bdims(:) = t_godfrin%bdims(:)*lmmaxd + end if + ! write (*, '("na=",i8," nb=",i8," ldiag=",l2," lper=",l2," lpardiso=",l2)') t_godfrin%na, t_godfrin%nb, t_godfrin%ldiag, t_godfrin%lper, t_godfrin%lpardiso + ! write (*, '("bdims(1:nb)=",100i8)') t_godfrin%bdims(:) + ! store nlayerd for later use nlayerd = nlayer diff --git a/source/KKRhost/rinput13.F90 b/source/KKRhost/rinput13.F90 index 1876c9e1550f2319925e9d93b579b64c5e18cbba..4a1e4b6dc03bd7c2b9710aeedb8ca75bcd426454 100644 --- a/source/KKRhost/rinput13.F90 +++ b/source/KKRhost/rinput13.F90 @@ -2845,7 +2845,7 @@ contains if (invmod==3) then write (111, *) 'Godfrin inversion scheme parameters' write (1337, *) 'Godfrin inversion scheme parameters' - + ! MdSD: write couplings.dat file write_tb_coupling=.true. t_godfrin%na = naez @@ -2854,13 +2854,49 @@ contains read (unit=uio, fmt=*, iostat=ier) t_godfrin%nb, t_godfrin%ldiag, t_godfrin%lper, t_godfrin%lpardiso if (ier/=0) stop 'Error reading `GODFRIN` (nb, ldiag, lper, lpardiso): check your inputcard' + ! MdSD: nprincd is not used by godfrin + if (nprincd /= 1) then + write (111, fmt='(A)') 'rinput13: Warning! setting nprincd=1' + write (1337, fmt='(A)') 'rinput13: Warning! setting nprincd=1' + nprincd = 1 + end if + + ! MdSD: Lloyd's formula needs the full matrix inverse + if (use_lloyd .and. t_godfrin%ldiag) then + write (111, fmt='(A)') 'rinput13: Warning! use_lloyd=T, setting ldiag=F' + write (1337, fmt='(A)') 'rinput13: Warning! use_lloyd=T, setting ldiag=F' + t_godfrin%ldiag = .false. + end if + + ! MdSD: for Jij's or connection to KKRimp also the full matrix inverse is needed + if (icc/=0 .and. t_godfrin%ldiag) then + write (111, fmt='(A)') 'rinput13: Warning! ICC/=0, setting ldiag=F' + write (1337, fmt='(A)') 'rinput13: Warning! ICC/=0, setting ldiag=F' + t_godfrin%ldiag = .false. + end if + + ! MdSD: in 3D mode the sparse matrix has additional corner blocks + if (linterface) then ! 2D + if (t_godfrin%lper) then + write (111, fmt='(A)') 'rinput13: Warning! linterface=T, setting lper=F' + write (1337, fmt='(A)') 'rinput13: Warning! linterface=T, setting lper=F' + t_godfrin%lper = .false. + end if + else ! 3D + if (.not.t_godfrin%lper) then + write (111, fmt='(A)') 'rinput13: Warning! linterface=F, setting lper=T' + write (1337, fmt='(A)') 'rinput13: Warning! linterface=F, setting lper=T' + t_godfrin%lper = .true. + end if + end if + call ioinput('GODFRIN ', uio, 4, 7, ier) allocate (t_godfrin%bdims(t_godfrin%nb)) read (unit=uio, fmt=*, iostat=ier) t_godfrin%bdims(:) - if (ier/=0) stop 'Error reading `GIDFRIN (bdims)`: check your inputcard' + if (ier/=0) stop 'Error reading `GODFRIN (bdims)`: check your inputcard' ! Inconsistency check - if (t_godfrin%na/=sum(t_godfrin%bdims)) stop 'godfrin: na /= sum(bdims)' + if (t_godfrin%na/=sum(t_godfrin%bdims)) stop 'godfrin: na/=sum(bdims)' #ifndef __INTEL_COMPILER ! can only use pardiso solver with intel mkl at the moment, probably only @@ -2868,21 +2904,23 @@ contains if (t_godfrin%lpardiso) stop 'No pardiso library available. Try the intel compiler or fix the linking issues' #endif - write (111, fmt='(A100)') 'na, nb, ldiag, lper, lpardiso; then bdims(1:nb)' - write (1337, fmt='(A100)') 'na, nb, ldiag, lper, lpardiso; then bdims(1:nb)' - write (111, *) t_godfrin%na, t_godfrin%nb, t_godfrin%ldiag, t_godfrin%lper, t_godfrin%lpardiso - write (1337, *) t_godfrin%na, t_godfrin%nb, t_godfrin%ldiag, t_godfrin%lper, t_godfrin%lpardiso - write (111, fmt='(50(I0," "))') t_godfrin%bdims(:) - write (1337, fmt='(50(I0," "))') t_godfrin%bdims(:) + write (111, '("na=",i8," nb=",i8," ldiag=",l2," lper=",l2," lpardiso=",l2)') t_godfrin%na, t_godfrin%nb, t_godfrin%ldiag, t_godfrin%lper, t_godfrin%lpardiso + write (1337, '("na=",i8," nb=",i8," ldiag=",l2," lper=",l2," lpardiso=",l2)') t_godfrin%na, t_godfrin%nb, t_godfrin%ldiag, t_godfrin%lper, t_godfrin%lpardiso + write (111, '("bdims(1:nb)=",100i8)') t_godfrin%bdims(:) + write (1337, '("bdims(1:nb)=",100i8)') t_godfrin%bdims(:) ! multiply blocks by angular momentum dimension - t_godfrin%na = t_godfrin%na*lmmaxd - t_godfrin%bdims = t_godfrin%bdims*lmmaxd - - if (icc/=0 .and. t_godfrin%ldiag) then - t_godfrin%ldiag = .false. - write (111, fmt='(A100)') 'rinput13: Warning! ICC/=0. Setting ldiag = T' - write (1337, fmt='(A100)') 'rinput13: Warning! ICC/=0. Setting ldiag = T' + ! MdSD: this has to happen after checking the correctness of the blocking + ! t_godfrin%na = t_godfrin%na*lmmaxd + ! t_godfrin%bdims = t_godfrin%bdims*lmmaxd + if (t_godfrin%nb == 1) then + write (111, fmt='(A)') 'rinput13: Warning! nb=1, setting invmod=0' + write (1337, fmt='(A)') 'rinput13: Warning! nb=1, setting invmod=0' + invmod = 0 + else if (t_godfrin%nb == 2) then + write (111, fmt='(A)') 'rinput13: Warning! nb=2, setting lper=F' + write (1337, fmt='(A)') 'rinput13: Warning! nb=2, setting lper=F' + t_godfrin%lper = .false. end if end if diff --git a/utils/block_partitioning/README b/utils/block_partitioning/README new file mode 100644 index 0000000000000000000000000000000000000000..d7d99f867287935f152e911f289d3041bb9048d7 --- /dev/null +++ b/utils/block_partitioning/README @@ -0,0 +1,10 @@ +This is a helper tool to find a block tridiagonal partitioning of a sparse matrix. +It reads in the file couplings.dat which is written in clsgen_tb.F90 and contains the couplings between the different sites in the unit cell according to the screening clusters. +The program tries to find block partitions up to a maximum number of blocks in a brute force way. +It's nice when it works. + +Compilation: ifort block_partitioning.F90 -o block_partitioning +Input: couplings.dat +Execution: ./block_partitioning + +An example couplings.dat is provided. diff --git a/utils/block_partitioning/block_partitioning b/utils/block_partitioning/block_partitioning new file mode 100755 index 0000000000000000000000000000000000000000..12b0b41a6a0925d4adc94cc9e59df3de98425b9a Binary files /dev/null and b/utils/block_partitioning/block_partitioning differ diff --git a/utils/block_partitioning/block_partitioning.F90 b/utils/block_partitioning/block_partitioning.F90 new file mode 100644 index 0000000000000000000000000000000000000000..b96c27d8c0911b37d18446c3cd13c8ff3d778295 --- /dev/null +++ b/utils/block_partitioning/block_partitioning.F90 @@ -0,0 +1,177 @@ + program block_partitioning +! Given a banded matrix sparsity pattern, +! an optimal partition into block tridiagonal structure is found + + implicit none + + integer, allocatable :: links(:,:), bsizes(:) + integer :: i, nc, nb + +! Read sparsity pattern + open(file='couplings.dat',unit=10,status='old') + read(10,*) ! skip line + read(10,*) nc + allocate(links(nc,nc),bsizes(nc)) + do i=1,nc + read(10,'(1000i1)') links(1:nc,i) + end do + close(10) + call block_partitioner(nc,links,nb,bsizes) + deallocate(links) + + + contains + + + subroutine block_partitioner(nc,links,nblocks,blocksizes) +! Find an optimal block tridiagonal partition +! for sparsity pattern given in links + +! Number of columns of matrix + integer, intent(in) :: nc +! Location of non-zero entries + integer, intent(in) :: links(nc,nc) +! Optimal number of blocks + integer, intent(out) :: nblocks +! Sizes of each block, in (1:nblocks) + integer, intent(out) :: blocksizes(nc) +! ---------------------------------------------------------------------- + integer :: trialsizes(nc) + integer :: i, j, iup, idn, maxblock, minsize, sum2, minsum2, imax + logical :: lcheck + integer :: bandmin, bandmax, bandavg, banddev + +! Initial values + nblocks = 1; blocksizes(1) = nc; blocksizes(2:nc) = 0 +! Find bandwidth of each column + bandmax = 0; bandmin = nc; bandavg = 0; banddev = 0 + do j=1,nc + iup = 0 + do i=j,1,-1 + if (links(i,j) == 1) iup = i + end do + idn = 0 + do i=j,nc + if (links(i,j) == 1) idn = i + end do + if (idn-iup < bandmin) bandmin = idn-iup + if (idn-iup > bandmax) bandmax = idn-iup + bandavg = bandavg + idn - iup + banddev = banddev + (idn - iup)**2 +! write(*,'("bandwidth: j, up, dn, diff=",4i8)') j, iup, idn, idn-iup + end do + bandavg = bandavg/nc + banddev = ceiling(sqrt(real(banddev/nc - bandavg**2))) + write(*,'("bandwidth: min, max, avg, dev=",4i8)') bandmin, bandmax, bandavg, banddev + call brute_partitioner(nc,links,bandmin,bandmax,bandavg,banddev,nblocks,blocksizes) +! All done! + end subroutine block_partitioner + + + subroutine brute_partitioner(nc,links,bandmin,bandmax,bandavg,banddev,nblocks,blocksizes) +! Brute force search for optimal block partition + + implicit none + +! Size of full matrix + integer, intent(in) :: nc +! Sparsity pattern + integer, intent(in) :: links(nc,nc) +! Bandwidth statistics + integer, intent(in) :: bandmin, bandmax, bandavg, banddev +! Optimal number of blocks + integer, intent(out) :: nblocks +! Sizes of each block, in (1:nblocks) + integer, intent(out) :: blocksizes(nc) +! ---------------------------------------------------------------------- + integer :: trialsizes(nc) + integer :: i, k, maxblock, minsize, sum2, minsum2, imax + logical :: lcheck + integer*8 :: j, jdiv, jrem, imax2i + +! Minimum block size is half of the minimum bandwith +! minsize = bandmin/2 +! Allowance for uneven bandwidth + minsize = (bandmin+banddev)/2 +! Maximum number of blocks + maxblock = nc/minsize + write(*,'("minsize, maxblock=",2i8)') minsize, maxblock +! Find combinations of block sizes that add to nc + minsum2 = nc**2 + do i=2,maxblock +! Sum of squares of block dimensions is bound by nc**2 + imax = ceiling(sqrt(1.d0*(nc**2 - (i-1)*minsize**2))) - minsize +! Sum of block dimensions is bound by nc + imax = min(imax,max(nc-i*minsize,1)) +! Empirical reduction + imax = min(imax,max(bandmax/2-minsize,1)) +! Make sure the big integers don't explode + imax2i = imax**(i-1) + write(*,'("nblocks,imax=",2i8,i16)') i, imax, imax2i +! Go over all combinations, encoded in j +! Digits are decoded into jdiv + do j=0,imax2i + jrem = j + do k=i-1,1,-1 + jdiv = jrem/(imax**(k-1)) + jrem = mod(jrem,imax**(k-1)) + trialsizes(k) = minsize + jdiv + end do + trialsizes(i) = nc - sum(trialsizes(1:i-1)) + if (trialsizes(i) < minsize) cycle +! Do the blocks leave out any matrix elements? + call check_blocks(nc,links,i,trialsizes(1:i),lcheck) + if (lcheck) then + sum2 = sum(trialsizes(1:i)**2) + if (sum2 < minsum2) then + minsum2 = sum2 + nblocks = i + blocksizes(1:i) = trialsizes(1:i) + end if + end if + end do + write(*,'("minsum2, nblocks, blocksizes=",1000i8)') minsum2, nblocks, blocksizes(1:nblocks) + end do +! All done! + end subroutine brute_partitioner + + + pure subroutine check_blocks(nc,links,nblocks,blocksizes,lcheck) +! Check if the given blocks are a tridiagonal partition + + implicit none + +! Number of columns + integer, intent(in) :: nc +! Sparsity pattern + integer, intent(in) :: links(nc,nc) +! Number of blocks + integer, intent(in) :: nblocks +! Block sizes + integer, intent(in) :: blocksizes(1:nblocks) +! Is this a correct partition? + logical, intent(out) :: lcheck +! ---------------------------------------------------------------------- + integer :: ib, i0, i1, j0, j1 + integer :: zeroed(nc,nc) + + lcheck = .true. + zeroed(:,:) = links(:,:) + i0 = 1; i1 = blocksizes(1) + do ib=1,nblocks-1 + i0 = sum(blocksizes(1:ib-1)) + 1 + i1 = sum(blocksizes(1:ib)) + j0 = i1 + 1 + j1 = i1 + blocksizes(ib+1) + zeroed(i0:i1,i0:i1) = 0 + zeroed(i0:i1,j0:j1) = 0 + zeroed(j0:j1,i0:i1) = 0 + end do + zeroed(j0:j1,j0:j1) = 0 + if (any(zeroed(:,:) == 1)) lcheck = .false. +! All done! + end subroutine check_blocks + + +! All done! + end program block_partitioning diff --git a/utils/block_partitioning/couplings.dat b/utils/block_partitioning/couplings.dat new file mode 100644 index 0000000000000000000000000000000000000000..975fd7d8585aac1674316bb75de25fe400d1bce2 --- /dev/null +++ b/utils/block_partitioning/couplings.dat @@ -0,0 +1,108 @@ +# Couplings between atoms via gref + 106 +1111111111110111111000110000000000000000000000000000000000000000000000000000000000000000000000000000000000 +1111111111111011111100010000000000000000000000000000000000000000000000000000000000000000000000000000000000 +1111111111111101111110000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +1111111111111110011111000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +1111111101111111001111100000000000000000000000000000000000000000000000000000000000000000000000000000000000 +1111111110111111000111110000000000000000000000000000000000000000000000000000000000000000000000000000000000 +1111111111011111100011110000000000000000000000000000000000000000000000000000000000000000000000000000000000 +1111111111101111110001110000000000000000000000000000000000000000000000000000000000000000000000000000000000 +1111011111111111111101111111000111000000000000000000000000000000000000000000000000000000000000000000000000 +1111101111111111111110111111100011000000000000000000000000000000000000000000000000000000000000000000000000 +1111110111111111111111011111110000000000000000000000000000000000000000000000000000000000000000000000000000 +1111111011111111111111100111111100000000000000000000000000000000000000000000000000000000000000000000000000 +0111111111111111011111110011111110000000000000000000000000000000000000000000000000000000000000000000000000 +1011111111111111101111110001111111000000000000000000000000000000000000000000000000000000000000000000000000 +1101111111111111110111111000011111000000000000000000000000000000000000000000000000000000000000000000000000 +1110111111111111111011111110001111000000000000000000000000000000000000000000000000000000000000000000000000 +1110001111110111111101111111101111111000110000000000000000000000000000000000000000000000000000000000000000 +1111000111111011111110111111110111111100010000000000000000000000000000000000000000000000000000000000000000 +1111100011111101111111011111111001111110000000000000000000000000000000000000000000000000000000000000000000 +0111110011111110111111101111111110011111000000000000000000000000000000000000000000000000000000000000000000 +0011111001111111011111110111111111001111100000000000000000000000000000000000000000000000000000000000000000 +0001111110111111101111111011111111000111110000000000000000000000000000000000000000000000000000000000000000 +1000111111011111110111111100111111100011110000000000000000000000000000000000000000000000000000000000000000 +1100011111101111111011111111011111110001110000000000000000000000000000000000000000000000000000000000000000 +0000000011100011111101111111101111111101111110001100000000000000000000000000000000000000000000000000000000 +0000000011110001111110111111110111111110111111000100000000000000000000000000000000000000000000000000000000 +0000000011111000111111011111111011111111011111100100000000000000000000000000000000000000000000000000000000 +0000000011111000111111011111111101111111011111110000000000000000000000000000000000000000000000000000000000 +0000000001111100111111101111111110111111100111110000000000000000000000000000000000000000000000000000000000 +0000000000111110011111110111111111011111110011111000000000000000000000000000000000000000000000000000000000 +0000000000011111101111111011111111101111110001111100000000000000000000000000000000000000000000000000000000 +0000000010001111110111111101111111110111111001111100000000000000000000000000000000000000000000000000000000 +0000000010001111110111111110111111110111111100111100000000000000000000000000000000000000000000000000000000 +0000000011000111111011111111011111111011111100011100000000000000000000000000000000000000000000000000000000 +0000000000000000111000111111101111111101111111011111100011000000000000000000000000000000000000000000000000 +0000000000000000111100011111110111111110111111101111110001000000000000000000000000000000000000000000000000 +0000000000000000111110001111111001111111011111110111111000000000000000000000000000000000000000000000000000 +0000000000000000011111001111111110111111101111111001111100000000000000000000000000000000000000000000000000 +0000000000000000001111100111111111011111110111111100111110000000000000000000000000000000000000000000000000 +0000000000000000000111111011111111101111111011111100011111000000000000000000000000000000000000000000000000 +0000000000000000100011111100111111110111111101111110001111000000000000000000000000000000000000000000000000 +0000000000000000110001111111011111111011111110111111000111000000000000000000000000000000000000000000000000 +0000000000000000000000001111000111111101111111011111110111111000110000000000000000000000000000000000000000 +0000000000000000000000001111100011111110111111101111111011111100010000000000000000000000000000000000000000 +0000000000000000000000001111110000111111011111111111111101111110000000000000000000000000000000000000000000 +0000000000000000000000000111111100111111101111111011111110011111000000000000000000000000000000000000000000 +0000000000000000000000000011111110011111110111111101111111001111100000000000000000000000000000000000000000 +0000000000000000000000000001111111101111111011111110111111000111110000000000000000000000000000000000000000 +0000000000000000000000001000011111110111111111111111011111100011110000000000000000000000000000000000000000 +0000000000000000000000001110001111111011111110111111101111110001110000000000000000000000000000000000000000 +0000000000000000000000000000000000111000111111011111111111111101111110001100000000000000000000000000000000 +0000000000000000000000000000000000111100011111101111111111111110111111000100000000000000000000000000000000 +0000000000000000000000000000000000111110001111110111111111111111011111100000000000000000000000000000000000 +0000000000000000000000000000000000011111001111111011111111111111100111110000000000000000000000000000000000 +0000000000000000000000000000000000001111100111111111111111011111110011111000000000000000000000000000000000 +0000000000000000000000000000000000000111111011111111111111101111110001111100000000000000000000000000000000 +0000000000000000000000000000000000100011111101111111111111110111111000111100000000000000000000000000000000 +0000000000000000000000000000000000110001111110111111111111111011111100011100000000000000000000000000000000 +0000000000000000000000000000000000000000001110001111110111111111111111011111100011000000000000000000000000 +0000000000000000000000000000000000000000001111000111111011111111111111101111110001000000000000000000000000 +0000000000000000000000000000000000000000001111100011111101111111111111110111111000000000000000000000000000 +0000000000000000000000000000000000000000000111110011111110111111111111111001111100000000000000000000000000 +0000000000000000000000000000000000000000000011111001111111111111110111111100111110000000000000000000000000 +0000000000000000000000000000000000000000000001111110111111111111111011111100011111000000000000000000000000 +0000000000000000000000000000000000000000001000111111011111111111111101111110001111000000000000000000000000 +0000000000000000000000000000000000000000001100011111101111111111111110111111000111000000000000000000000000 +0000000000000000000000000000000000000000000000000011100011111101111111111111110111111000110000000000000000 +0000000000000000000000000000000000000000000000000011110001111110111111111111111011111100010000000000000000 +0000000000000000000000000000000000000000000000000011111000111111011111111111111101111110000000000000000000 +0000000000000000000000000000000000000000000000000001111100111111101111111111111110011111000000000000000000 +0000000000000000000000000000000000000000000000000000111110011111111111111101111111001111100000000000000000 +0000000000000000000000000000000000000000000000000000011111101111111111111110111111000111110000000000000000 +0000000000000000000000000000000000000000000000000010001111110111111111111111011111100011110000000000000000 +0000000000000000000000000000000000000000000000000011000111111011111111111111101111110001110000000000000000 +0000000000000000000000000000000000000000000000000000000000111000111111011111111111111101111110001100000000 +0000000000000000000000000000000000000000000000000000000000111100011111101111111111111110111111000100000000 +0000000000000000000000000000000000000000000000000000000000111110001111110111111111111111011111100000000000 +0000000000000000000000000000000000000000000000000000000000011111001111111011111111111111100111110000000000 +0000000000000000000000000000000000000000000000000000000000001111100111111111111111011111110011111000000000 +0000000000000000000000000000000000000000000000000000000000000111111011111111111111101111110001111100000000 +0000000000000000000000000000000000000000000000000000000000100011111101111111111111110111111000111100000000 +0000000000000000000000000000000000000000000000000000000000110001111110111111111111111011111100011100000000 +0000000000000000000000000000000000000000000000000000000000000000001110001111110111111111111111011111100011 +0000000000000000000000000000000000000000000000000000000000000000001111000111111011111111111111101111110001 +0000000000000000000000000000000000000000000000000000000000000000001111100011111101111111111111110111111000 +0000000000000000000000000000000000000000000000000000000000000000000111110011111110111111111111111001111100 +0000000000000000000000000000000000000000000000000000000000000000000011111001111111111111110111111100111110 +0000000000000000000000000000000000000000000000000000000000000000000001111110111111111111111011111100011111 +0000000000000000000000000000000000000000000000000000000000000000001000111111011111111111111101111110001111 +0000000000000000000000000000000000000000000000000000000000000000001100011111101111111111111110111111000111 +0000000000000000000000000000000000000000000000000000000000000000000000000011100011111101111111111111110111 +0000000000000000000000000000000000000000000000000000000000000000000000000011110001111110111111111111111011 +0000000000000000000000000000000000000000000000000000000000000000000000000011111000111111011111111111111101 +0000000000000000000000000000000000000000000000000000000000000000000000000001111100111111101111111111111110 +0000000000000000000000000000000000000000000000000000000000000000000000000000111110011111111111111101111111 +0000000000000000000000000000000000000000000000000000000000000000000000000000011111101111111111111110111111 +0000000000000000000000000000000000000000000000000000000000000000000000000010001111110111111111111111011111 +0000000000000000000000000000000000000000000000000000000000000000000000000011000111111011111111111111101111 +0000000000000000000000000000000000000000000000000000000000000000000000000000000000111000111111011111111111 +0000000000000000000000000000000000000000000000000000000000000000000000000000000000111100011111101111111111 +0000000000000000000000000000000000000000000000000000000000000000000000000000000000111110001111110111111111 +0000000000000000000000000000000000000000000000000000000000000000000000000000000000011111001111111011111111 +0000000000000000000000000000000000000000000000000000000000000000000000000000000000001111100111111111111111 +0000000000000000000000000000000000000000000000000000000000000000000000000000000000000111111011111111111111 +0000000000000000000000000000000000000000000000000000000000000000000000000000000000100011111101111111111111 +0000000000000000000000000000000000000000000000000000000000000000000000000000000000110001111110111111111111 diff --git a/utils/block_partitioning/out b/utils/block_partitioning/out new file mode 100644 index 0000000000000000000000000000000000000000..c71ce1a7804434ef8bb2157f791afdcf12f6a2fb --- /dev/null +++ b/utils/block_partitioning/out @@ -0,0 +1,14 @@ +bandwidth: min, max, avg, dev= 20 41 34 10 +minsize, maxblock= 15 7 +nblocks,imax= 2 5 5 +minsum2, nblocks, blocksizes= 7796 2 20 86 +nblocks,imax= 3 5 25 +minsum2, nblocks, blocksizes= 5658 3 16 19 71 +nblocks,imax= 4 5 125 +minsum2, nblocks, blocksizes= 3750 4 16 18 19 53 +nblocks,imax= 5 5 625 +minsum2, nblocks, blocksizes= 2566 5 16 18 16 19 37 +nblocks,imax= 6 5 3125 +minsum2, nblocks, blocksizes= 1894 6 16 18 16 16 19 21 +nblocks,imax= 7 1 1 +minsum2, nblocks, blocksizes= 1894 6 16 18 16 16 19 21