Commit 0649ba8f authored by Victor Yu's avatar Victor Yu

Add distributed band parallelization to Wstat

This feature helps to reduce memory per image. Can be enabled by
specifying `-nb xxx` at the command line (see QE manual).
parent 9cdb9297
......@@ -54,7 +54,7 @@ stages:
- make conf PYT=python3 PYT_LDFLAGS="`python3-config --ldflags --embed`"
- make -j4 all
- cd test-suite
- make NP=$CI_NP NI=$CI_NI NT=$CI_NT
- make NP=$CI_NP NI=$CI_NI NB=$CI_NB NT=$CI_NT
artifacts:
when: on_failure
paths:
......@@ -83,6 +83,7 @@ gcc840_t:
variables:
CI_NP: 8
CI_NI: 1
CI_NB: 1
CI_NT: 1
extends:
- .template_bot_start
......@@ -92,7 +93,8 @@ gcc840_t:
gcc930_t:
variables:
CI_NP: 8
CI_NI: 1
CI_NI: 2
CI_NB: 2
CI_NT: 1
extends:
- .template_bot_start
......@@ -103,9 +105,10 @@ gcc930_t:
gcc840_t2:
variables:
CI_NP: 4
CI_NP: 8
CI_NI: 2
CI_NT: 1
CI_NB: 1
CI_NT: 2
only:
- schedules
extends:
......@@ -115,8 +118,9 @@ gcc840_t2:
gcc930_t2:
variables:
CI_NP: 2
CI_NI: 2
CI_NP: 8
CI_NI: 1
CI_NB: 2
CI_NT: 2
only:
- schedules
......
......@@ -9,6 +9,7 @@ MODFLAGS= $(MOD_FLAG)../../iotk/src $(MOD_FLAG)../../Modules $(MOD_FLAG)../../LA
$(MOD_FLAG)../Tools \
$(MOD_FLAG)../FFT_kernel \
$(MOD_FLAG)../Coulomb_kernel \
$(MOD_FLAG)../Para_kernel \
$(MOD_FLAG).
IFLAGS=
......
!
! Copyright (C) 2015-2017 M. Govoni
! Copyright (C) 2015-2017 M. Govoni
! This file is distributed under the terms of the
! GNU General Public License. See the file `License'
! in the root directory of the present distribution,
......@@ -7,7 +7,7 @@
!
! This file is part of WEST.
!
! Contributors to this file:
! Contributors to this file:
! Marco Govoni
!
!-----------------------------------------------------------------------
......@@ -40,7 +40,7 @@ SUBROUTINE apply_sternheimerop_to_m_wfcs(nbndval, psi, hpsi, e, alpha, m)
! input: the vector
! output: the operator applied to the vector
!
! Workspace
! Workspace
!
INTEGER :: ibnd, ig
COMPLEX(DP) :: za
......@@ -50,11 +50,15 @@ SUBROUTINE apply_sternheimerop_to_m_wfcs(nbndval, psi, hpsi, e, alpha, m)
! compute the product of the hamiltonian with the h vector
!
hpsi=(0.0_DP,0.0_DP)
!
!
IF(l_kinetic_only) THEN
CALL k_psi( npwx, npw, m, psi, hpsi )
ELSE
CALL h_psi( npwx, npw, m, psi, hpsi )
!
! use h_psi_, i.e. h_psi without band parallelization, as wstat
! handles band parallelization separately in dfpt_module
!
CALL h_psi_( npwx, npw, m, psi, hpsi )
ENDIF
!
! then we compute the operator H-epsilon S
......@@ -74,4 +78,4 @@ SUBROUTINE apply_sternheimerop_to_m_wfcs(nbndval, psi, hpsi, e, alpha, m)
!
CALL stop_clock ('stern')
!
END SUBROUTINE
END SUBROUTINE
This diff is collapsed.
......@@ -22,6 +22,7 @@ SUBROUTINE wstat_memory_report()
USE gvecs, ONLY : ngms
USE uspp, ONLY : nkb
USE control_flags, ONLY : gamma_only
USE mp_bands, ONLY : nbgrp
USE mp_world, ONLY : mpime,root
USE westcom, ONLY : nbnd_occ,n_pdep_basis,npwqx,logfile
USE distribution_center, ONLY : pert
......@@ -141,15 +142,15 @@ SUBROUTINE wstat_memory_report()
WRITE(stdout,'(5x,"[MEM] Allocated arrays ",5x,"est. size (Mb)", 5x,"dimensions")')
WRITE(stdout,'(5x,"[MEM] ----------------------------------------------------------")')
!
mem_partial = (1.0_DP/Mb)*complex_size*npwx*npol*nbnd_occ(1)
mem_partial = (1.0_DP/Mb)*complex_size*npwx*npol*((nbnd_occ(1)-1)/nbgrp+1)
WRITE( stdout, '(5x,"[MEM] dvpsi ",f10.2," Mb", 5x,"(",i7,",",i5,")")') &
mem_partial, npwx*npol, nbnd_occ(1)
mem_partial, npwx*npol, ((nbnd_occ(1)-1)/nbgrp+1)
IF( mpime == root ) CALL json%add( 'memory.dvpsi', mem_partial )
mem_tot = mem_tot + mem_partial
!
mem_partial = (1.0_DP/Mb)*complex_size*npwx*npol*nbnd_occ(1)
mem_partial = (1.0_DP/Mb)*complex_size*npwx*npol*((nbnd_occ(1)-1)/nbgrp+1)
WRITE( stdout, '(5x,"[MEM] dpsi ",f10.2," Mb", 5x,"(",i7,",",i5,")")') &
mem_partial, npwx*npol, nbnd_occ(1)
mem_partial, npwx*npol, ((nbnd_occ(1)-1)/nbgrp+1)
IF( mpime == root ) CALL json%add( 'memory.dpsi', mem_partial )
mem_tot = mem_tot + mem_partial
!
......
......@@ -4,6 +4,7 @@
export NP=2 # Number of MPI processes
export NI=1 # Number of images
export NB=1 # Number of band groups
export NT=1 # Number of OpenMP threads
#
......@@ -25,5 +26,6 @@ export WGET=wget -N -q
###### DO NOT TOUCH BELOW ######
export NIMAGE=${NI}
export NBAND=${NB}
export OMP_NUM_THREADS=${NT}
......@@ -15,7 +15,7 @@ pw:
${PARA_PREFIX_QE} ${BINDIR}/pw.x -i pw.in > pw.out 2> pw.err
wstat: pw
${PARA_PREFIX} ${BINDIR}/wstat.x -nimage ${NIMAGE} -i wstat.in > wstat.out 2> wstat.err
${PARA_PREFIX} ${BINDIR}/wstat.x -nimage ${NIMAGE} -nband ${NBAND} -i wstat.in > wstat.out 2> wstat.err
wfreq: wstat
${PARA_PREFIX} ${BINDIR}/wfreq.x -nimage ${NIMAGE} -i wfreq.in > wfreq.out 2> wfreq.err
......
......@@ -15,7 +15,7 @@ pw:
${PARA_PREFIX_QE} ${BINDIR}/pw.x -i pw.in > pw.out 2> pw.err
wstat: pw
${PARA_PREFIX} ${BINDIR}/wstat.x -nimage ${NIMAGE} -i wstat.in > wstat.out 2> wstat.err
${PARA_PREFIX} ${BINDIR}/wstat.x -nimage ${NIMAGE} -nband ${NBAND} -i wstat.in > wstat.out 2> wstat.err
wfreq: wstat
${PARA_PREFIX} ${BINDIR}/wfreq.x -nimage ${NIMAGE} -i wfreq.in > wfreq.out 2> wfreq.err
......
......@@ -15,7 +15,7 @@ pw:
${PARA_PREFIX_QE} ${BINDIR}/pw.x -i pw.in > pw.out 2> pw.err
wstat: pw
${PARA_PREFIX} ${BINDIR}/wstat.x -nimage ${NIMAGE} -i wstat.in > wstat.out 2> wstat.err
${PARA_PREFIX} ${BINDIR}/wstat.x -nimage ${NIMAGE} -nband ${NBAND} -i wstat.in > wstat.out 2> wstat.err
wfreq: wstat
${PARA_PREFIX} ${BINDIR}/wfreq.x -nimage ${NIMAGE} -i wfreq.in > wfreq.out 2> wfreq.err
......
......@@ -15,7 +15,7 @@ pw:
${PARA_PREFIX_QE} ${BINDIR}/pw.x -i pw.in > pw.out 2> pw.err
wstat: pw
${PARA_PREFIX} ${BINDIR}/wstat.x -nimage ${NIMAGE} -i wstat.in > wstat.out 2> wstat.err
${PARA_PREFIX} ${BINDIR}/wstat.x -nimage ${NIMAGE} -nband ${NBAND} -i wstat.in > wstat.out 2> wstat.err
wfreq: wstat
${PARA_PREFIX} ${BINDIR}/wfreq.x -nimage ${NIMAGE} -i wfreq.in > wfreq.out 2> wfreq.err
......
......@@ -15,7 +15,7 @@ pw:
${PARA_PREFIX_QE} ${BINDIR}/pw.x -i pw.in > pw.out 2> pw.err
wstat: pw
${PARA_PREFIX} ${BINDIR}/wstat.x -nimage ${NIMAGE} -i wstat.in > wstat.out 2> wstat.err
${PARA_PREFIX} ${BINDIR}/wstat.x -nimage ${NIMAGE} -nband ${NBAND} -i wstat.in > wstat.out 2> wstat.err
wfreq: wstat
${PARA_PREFIX} ${BINDIR}/wfreq.x -nimage ${NIMAGE} -i wfreq.in > wfreq.out 2> wfreq.err
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment