Differences

This shows you the differences between two versions of the page.

--- doku:vasp-vsc2 [2014/10/02 14:00] – [VASP + ELPA] ir
+++ doku:vasp-vsc2 [2022/02/01 21:10] (current) – removed goldenberg
@@ Line 1: / Line 1: @@
-====== VASP ======
-see also
-  * [[vasp-benchmarks|VASP benchmarks]]
-==== MPI SELECTOR ====
-In order to successfully compile and run VASP use Intel MPI 4.0.1.007, which gives best performance for VASP:
-<code>
-# mpi-selector --query
-default:impi_intel-4.0.1.007
-level:user
-</code>
-==== VASP + ELPA ====
-An optimized executable, which scales very nicely on VSC-2 (scaLAPACK has been replaced with  [[http://elpa.rzg.mpg.de/software|ELPA]] (Eigenvalue soLvers for Petaflop Applications)), is accessible for VASP users in /opt/sw/vasp.
-Please send an email to {{:doku:contact_vsc-red_crop.png?120|}} in order
- to be added to the corresponding user group.
-ELPA devolopers highly appreciate citing their work when using the optimized library, see the README file of the library below:
-<code>
-*** Citing:
-  A description of some algorithms present in ELPA can be found in:
-  T. Auckenthaler, V. Blum, H.-J. Bungartz, T. Huckle, R. Johanni,
-  L. Kr\"amer, B. Lang, H. Lederer, and P. R. Willems,
-  "Parallel solution of partial symmetric eigenvalue problems from
-  electronic structure calculations",
-  Parallel Computing [volume], [page], (2011).
-  accepted for publication (May 11, 2011).
-  Please cite this paper when using ELPA. We also intend to publish an
-  overview description of the ELPA library as such, and ask you to
-  make appropriate reference to that as well, once it appears.
-</code>
-==== Compiling ====
-With the above mpi-selector setting you should set the following PATHS to libraries in your makefile
-<code>
-OFLAG=-O2 -ip -ftz -fno-alias -msse3
-OFLAG_HIGH = $(OFLAG)
-OFLAG_NOOPT = -O1 -msse3 -ip -ftz
-OBJ_HIGH =
-OBJ_NOOPT =
-DEBUG  = -FR -O0
-INLINE = $(OFLAG) -ip
-MKL_PATH=$(MKLROOT)/lib/intel64
-# Use libgoto preferentially; gives better perfomance:
-BLAS   = -Wl,--start-group /opt/goto/libgoto.a $(MKL_PATH)/libmkl_intel_thread.a $(MKL_PATH)/libmkl_core.a -Wl,--end-group  -liomp5
-LAPACK = /opt/goto/libgoto.a $(MKL_PATH)/libmkl_intel_lp64.a
-# Alternative BLAS and LAPACK:
-#BLAS   = $(MKL_PATH)/libmkl_intel_lp64.a $(MKL_PATH)/libmkl_intel_thread.a $(MKL_PATH)/libmkl_core.a -liomp5
-#LAPACK = $(MKL_PATH)/libmkl_intel_lp64.a $(MKL_PATH)/libmkl_intel_thread.a $(MKL_PATH)/libmkl_core.a
-BLACS=-lmkl_blacs_intelmpi_lp64
-SCA=$(MKL_PATH)/libmkl_scalapack_lp64.so $(BLACS)
-</code>
-and the compiler should be set to:
-<code>
-FC=mpiifort
-</code>
-VASP has an own FFTW library. For normal use this library is sufficient. In case of non collinear spin calculations using the fftw (http://www.fftw.org/) located in /opt/fftw is recommended:
-vasp.5.2.12_mpi:
-<code>
-FFT3D=fftmpiw.o fftmpi_map.o fftw3d.o fft3dlib.o /opt/intel/composerxe/mkl/lib/intel64/libfftw3xf_intel.a
-</code>
-or
-<code>
-FFT3D   = fftmpiw.o fftmpi_map.o  fftw3d.o  fft3dlib.o   ./libfftw3.a
-</code>
-vasp.5.2.12_mpi_ncl:
-<code>
-FFT3D   = fftmpiw.o fftmpi_map.o  fftw3d.o  fft3dlib.o   \
-          /opt/fftw/3.3/intel/lib/libfftw3
-</code>
-Complete Makefile for non collinear VASP 5.2.12 on VSC-2
-<code>
-.SUFFIXES: .inc .f .f90 .F
-#-----------------------------------------------------------------------
-# Makefile for Intel Fortran compiler for P4 systems
-#
-# The makefile was tested only under Linux on Intel platforms
-#-----------------------------------------------------------------------
-# all CPP processed fortran files have the extension .f
-SUFFIX=.f90
-#-----------------------------------------------------------------------
-# START CUSTOMIZATION HERE
-#-----------------------------------------------------------------------
-#-----------------------------------------------------------------------
-# whereis CPP ?? (I need CPP, can't use gcc with proper options)
-# the following works almost on all systems
-# possible cpp is located in a different directory
-#-----------------------------------------------------------------------
-# CPP_ =  ./preprocess <$*.F | /usr/bin/cpp -P -C -traditional >$*$(SUFFIX)
-# CPP_ =  /usr/bin/cpp <$*.F -P -C -traditional >$*$(SUFFIX)
-CPP_=fpp -f_com=no -free -w0 $*.F $*$(SUFFIX)
-#-----------------------------------------------------------------------
-# f90 compiler
-#-----------------------------------------------------------------------
-# simple version, use mpif77 wrapper
-# this works only if mpif77 has been compiled using the exactly
-# same fortran compiler
-FC=mpiifort
-#FCL=mpif90 -i-static
-FCL=mpiifort
-#-----------------------------------------------------------------------
-# general fortran flags  (there must a trailing blank on this line)
-#-----------------------------------------------------------------------
-# INCS=-I$(VSC)/FFTW_Intel/include/fftw
-FFLAGS =  -FR -lowercase -assume byterecl
-#-----------------------------------------------------------------------
-# optimization
-# for some files a lower optimization level is explicitly selected
-# at the bottom
-#-----------------------------------------------------------------------
-OFLAG=-O2 -ip -ftz -fno-alias -msse3
-OFLAG_HIGH = $(OFLAG)
-OFLAG_NOOPT = -O1 -msse3 -ip -ftz
-OBJ_HIGH =
-OBJ_NOOPT =
-DEBUG  = -FR -O0
-INLINE = $(OFLAG) -ip
-#-----------------------------------------------------------------------
-# the following lines specify the position of BLAS  and LAPACK,
-# PBLAS and scaLAPACK
-#-----------------------------------------------------------------------
-# fastest Kazushige Goto's BLAS
-# http://www.cs.utexas.edu/users/kgoto/signup_first.html
-# mkl is almost as fast
-MKL_PATH=$(MKLROOT)/lib/intel64
-BLAS   = -Wl,--start-group /opt/goto/libgoto.a $(MKL_PATH)/libmkl_intel_thread.a $(MKL_PATH)/libmkl_core.a -Wl,--end-group  -liomp5
-LAPACK = /opt/goto/libgoto.a $(MKL_PATH)/libmkl_intel_lp64.a
-#BLAS  = $(MKL_PATH)/libmkl_intel_lp64.a $(MKL_PATH)/libmkl_intel_thread.a $(MKL_PATH)/libmkl_core.a -liomp5
-# LAPACK, use vasp.5.lib/lapack_double
-# optimized LAPACK does not improve the performance
-#LAPACK = ../vasp.5.lib/lapack_double.o
-#LAPACK= $(MKL_PATH)/libmkl_intel_lp64.a $(MKL_PATH)/libmkl_intel_thread.a $(MKL_PATH)/libmkl_core.a
-# location of BLACS and SCALAPACK
-# optional only required if SCA is defined below
-#BLACSdir      = $(VSC)/BLACS/BLACS/LIB
-#BLACS=$(BLACSdir)/blacsF77init_MPI-LINUX-OPENMPI-0.a $(BLACSdir)/blacs_MPI-LINUX-OPENMPI-0.a $(BLACSdir)/blacsF77init_MPI-LINUX-OPENMPI-0.a
-## For openmpi
-#BLACS=-lmkl_blacs_openmpi_lp64
-## For mpich
-BLACS=-lmkl_blacs_intelmpi_lp64
-## Just a test for qlogic
-#BLACS=-lmkl_blacs_intelmpi_lp64
-# BLACS and SCALAPACK libraries if available
-# if SCA is defined SCALAPACK will be used
-#SCA= $(VSC)/ScaLAPACK/scalapack-1.8.0/libscalapack.a $(BLACS)
-SCA=$(MKL_PATH)/libmkl_scalapack_lp64.so $(BLACS)
-#WANLIB=
-WANLIB=../wannier90-1.2/libwannier.a
-LINK=-mpif90_abi=intel11
-#-----------------------------------------------------------------------
-# END CUSTOMIZATION
-#-----------------------------------------------------------------------
-#-----------------------------------------------------------------------
-# options for CPP in parallel version (see also above):
-# NGZhalf               charge density   reduced in Z direction
-# wNGZhalf              gamma point only reduced in Z direction
-# scaLAPACK             use scaLAPACK (usually slower on 100 Mbit Net)
-#-----------------------------------------------------------------------
-ifdef SCA
-CPP    = $(CPP_) -DMPI  -DHOST=\"IFC91_ompi\" -DIFC \
-     -Dkind8 -DCACHE_SIZE=4000 -DPGF90 -Davoidalloc \
-     -DMPI_BLOCK=8000  -DscaLAPACK -Duse_collective -Duse_bse_te \
-     -DnoAugXCmeta -DVASP2WANNIER90
-else
-CPP    = $(CPP_) -DMPI  -DHOST=\"IFC91_ompi\" -DIFC \
-     -Dkind8 -DCACHE_SIZE=4000 -DPGF90 -Davoidalloc \
-     -DMPI_BLOCK=8000 -Duse_collective -Duse_bse_te \
-     -DnoAugXCmeta -DVASP2WANNIER90
-endif
-#-----------------------------------------------------------------------
-# libraries for vasp
-#-----------------------------------------------------------------------
-ifdef SCA
-LIB     = -L../vasp.5.lib -ldmy  \
-      ../vasp.5.lib/linpack_double.o   \
-       $(WANLIB) $(SCA) $(LAPACK) $(BLAS)
-else
-LIB     = -L../vasp.5.lib -ldmy  \
-      ../vasp.5.lib/linpack_double.o   \
-      $(WANLIB) $(LAPACK) $(BLAS)
-endif
-# FFT: fftmpi.o with fft3dlib of Juergen Furthmueller
-# must be used for this benchmark
-#rv,sgi FFT3D   = fftmpi.o fftmpi_map.o fft3dlib.o
-#FFT3D   =  fftmpi.o fftmpi_map.o  fft3dfurth.o fft3dlib.o
-#FFT3D   = fftmpiw.o fftmpi_map.o  fftw3d.o  fft3dlib.o   ./libfftw3.a
-#FFT3D   = fftmpi.o fftmpi_map.o  fftw3d.o  fft3dlib.o   ./libfftw3xf_intel.a
-#FFT3D   = fftmpi.o fftmpi_map.o  fftw3d.o  fft3dlib.o  /opt/intel/Compiler/11.0/083/mkl/lib/em64t/libfftw3xf_intel.a
-# FFT3D   = fftmpi.o fftmpi_map.o fftw3d.o fft3dlib.o $(VSC)/FFTW_Intel/lib/em64t/libfftw3xf_intel.a
-#FFT3D   = fftmpiw.o fftmpi_map.o  fftw3d.o  fft3dlib.o /opt/fftw/3.3/intel/lib/libfftw3.a
-FFT3D=fftmpiw.o fftmpi_map.o fftw3d.o fft3dlib.o /opt/intel/composerxe/mkl/lib/intel64/libfftw3xf_intel.a
-#-----------------------------------------------------------------------
-# general rules and compile lines
-#-----------------------------------------------------------------------
-BASIC=   symmetry.o symlib.o   lattlib.o  random.o
-SOURCE=  base.o     mpi.o      smart_allocate.o      xml.o  \
-         constant.o jacobi.o   main_mpi.o  scala.o   \
-         asa.o      lattice.o  poscar.o   ini.o  mgrid.o  xclib.o  vdw_nl.o  xclib_grad.o \
-         radial.o   pseudo.o   gridq.o     ebs.o  \
-         mkpoints.o wave.o     wave_mpi.o  wave_high.o  \
-         $(BASIC)   nonl.o     nonlr.o    nonl_high.o dfast.o    choleski2.o \
-         mix.o      hamil.o    xcgrad.o   xcspin.o    potex1.o   potex2.o  \
-         constrmag.o cl_shift.o relativistic.o LDApU.o \
-         paw_base.o metagga.o  egrad.o    pawsym.o   pawfock.o  pawlhf.o   rhfatm.o  paw.o   \
-         mkpoints_full.o       charge.o   Lebedev-Laikov.o  stockholder.o dipol.o    pot.o \
-         dos.o      elf.o      tet.o      tetweight.o hamil_rot.o \
-         steep.o    chain.o    dyna.o     sphpro.o    us.o  core_rel.o \
-         aedens.o   wavpre.o   wavpre_noio.o broyden.o \
-         dynbr.o    rmm-diis.o reader.o   writer.o   tutor.o xml_writer.o \
-         brent.o    stufak.o   fileio.o   opergrid.o stepver.o  \
-         chgloc.o   fast_aug.o fock.o     mkpoints_change.o sym_grad.o \
-         mymath.o   internals.o dynconstr.o dimer_heyden.o dvvtrajectory.o vdwforcefield.o \
-         hamil_high.o nmr.o    pead.o     mlwf.o     subrot.o   subrot_scf.o \
-         force.o    pwlhf.o  gw_model.o optreal.o   davidson.o  david_inner.o \
-         electron.o rot.o  electron_all.o shm.o    pardens.o  paircorrection.o \
-         optics.o   constr_cell_relax.o   stm.o    finite_diff.o elpol.o    \
-         hamil_lr.o rmm-diis_lr.o  subrot_cluster.o subrot_lr.o \
-         lr_helper.o hamil_lrf.o   elinear_response.o ilinear_response.o \
-         linear_optics.o linear_response.o   \
-         setlocalpp.o  wannier.o electron_OEP.o electron_lhf.o twoelectron4o.o \
-         ratpol.o screened_2e.o wave_cacher.o chi_base.o wpot.o local_field.o \
-         ump2.o bse_te.o bse.o acfdt.o chi.o sydmat.o dmft.o \
-         rmm-diis_mlr.o  linear_response_NMR.o
-INC=
-vasp: $(SOURCE) $(FFT3D) $(INC) main.o
-	rm -f vasp
-	$(FCL) -o vasp main.o $(SOURCE) $(FFT3D) $(LIB) $(LINK)
-vasp_: $(SOURCE) $(FFT3D) $(INC) main.o
-	rm -f vasp_
-	$(FCL) -o vasp_ main.o  $(SOURCE)   $(FFT3D) $(LIB) $(LINK)
-makeparam: $(SOURCE) $(FFT3D) makeparam.o main.F $(INC)
-	$(FCL) -o makeparam  $(LINK) makeparam.o $(SOURCE) $(FFT3D) $(LIB)
-zgemmtest: zgemmtest.o base.o random.o $(INC)
-	$(FCL) -o zgemmtest $(LINK) zgemmtest.o random.o base.o $(LIB)
-dgemmtest: dgemmtest.o base.o random.o $(INC)
-	$(FCL) -o dgemmtest $(LINK) dgemmtest.o random.o base.o $(LIB)
-ffttest: base.o smart_allocate.o mpi.o mgrid.o random.o ffttest.o $(FFT3D) $(INC)
-	$(FCL) -o ffttest $(LINK) ffttest.o mpi.o mgrid.o random.o smart_allocate.o base.o $(FFT3D) $(LIB)
-kpoints: $(SOURCE) $(FFT3D) makekpoints.o main.F $(INC)
-	$(FCL) -o kpoints $(LINK) makekpoints.o $(SOURCE) $(FFT3D) $(LIB)
-clean:
-	-rm -f *.mod *.f90 *.o *.L ; touch *.F
-main.o: main$(SUFFIX)
-	$(FC) $(FFLAGS)$(DEBUG)  $(INCS) -c main$(SUFFIX)
-xcgrad.o: xcgrad$(SUFFIX)
-	$(FC) $(FFLAGS) $(INLINE)  $(INCS) -c xcgrad$(SUFFIX)
-xcspin.o: xcspin$(SUFFIX)
-	$(FC) $(FFLAGS) $(INLINE)  $(INCS) -c xcspin$(SUFFIX)
-makeparam.o: makeparam$(SUFFIX)
-	$(FC) $(FFLAGS)$(DEBUG)  $(INCS) -c makeparam$(SUFFIX)
-makeparam$(SUFFIX): makeparam.F main.F
-#
-# MIND: I do not have a full dependency list for the include
-# and MODULES: here are only the minimal basic dependencies
-# if one strucuture is changed then touch_dep must be called
-# with the corresponding name of the structure
-#
-base.o: base.inc base.F
-mgrid.o: mgrid.inc mgrid.F
-constant.o: constant.inc constant.F
-lattice.o: lattice.inc lattice.F
-setex.o: setexm.inc setex.F
-pseudo.o: pseudo.inc pseudo.F
-poscar.o: poscar.inc poscar.F
-mkpoints.o: mkpoints.inc mkpoints.F
-wave.o: wave.inc wave.F
-nonl.o: nonl.inc nonl.F
-nonlr.o: nonlr.inc nonlr.F
-$(OBJ_HIGH):
-	$(CPP)
-	$(FC) $(FFLAGS) $(OFLAG_HIGH) $(INCS) -c $*$(SUFFIX)
-$(OBJ_NOOPT):
-	$(CPP)
-	$(FC) $(FFLAGS) $(OFLAG_NOOPT) $(INCS) -c $*$(SUFFIX)
-fft3dlib_f77.o: fft3dlib_f77.F
-	$(CPP)
-	$(F77) $(FFLAGS_F77) -c $*$(SUFFIX)
-.F.o:
-	$(CPP)
-	$(FC) $(FFLAGS) $(OFLAG) $(INCS) -c $*$(SUFFIX)
-.F$(SUFFIX):
-	$(CPP)
-$(SUFFIX).o:
-	$(FC) $(FFLAGS) $(OFLAG) $(INCS) -c $*$(SUFFIX)
-# special rules
-#-----------------------------------------------------------------------
-# -tpp5|6|7 P, PII-PIII, PIV
-# -xW use SIMD (does not pay of on PII, since fft3d uses double prec)
-# all other options do no affect the code performance since -O1 is used
-fft3dlib.o : fft3dlib.F
-	$(CPP)
-	$(F77) -FR -lowercase -O2 -c $*$(SUFFIX)
-fft3dfurth.o : fft3dfurth.F
-	$(CPP)
-	$(F77) -FR -lowercase -O1 -c $*$(SUFFIX)
-fftw3d.o : fftw3d.F
-	$(CPP)
-	$(F77) -FR -lowercase -O1 -c $*$(SUFFIX)
-radial.o : radial.F
-	$(CPP)
-	$(F77) -FR -lowercase $(OFLAG) -c $*$(SUFFIX)
-fftmpi.o : fftmpi.F
-	$(CPP)
-	$(F77) -FR -lowercase -O1 -c $*$(SUFFIX)
-fftmpiw.o : fftmpiw.F
-	$(CPP)
-	$(F77) -FR -lowercase -O1 $(INCS) -c $*$(SUFFIX)
-symlib.o : symlib.F
-	$(CPP)
-	$(F77) -FR -lowercase $(OFLAG)  -c $*$(SUFFIX)
-symmetry.o : symmetry.F
-	$(CPP)
-	$(F77) -FR -lowercase $(OFLAG) -c $*$(SUFFIX)
-broyden.o : broyden.F
-	$(CPP)
-	$(F77) -FR -lowercase $(OFLAG) -c $*$(SUFFIX)
-dynbr.o : dynbr.F
-	$(CPP)
-	$(F77) -FR -lowercase $(OFLAG) -c $*$(SUFFIX)
-paw.o : paw.F
-	$(CPP)
-	$(F77) -FR -lowercase -O3 -c $*$(SUFFIX)
-cl_shift.o : cl_shift.F
-	$(CPP)
-	$(F77) -FR -lowercase -O3 -c $*$(SUFFIX)
-us.o : us.F
-	$(CPP)
-	$(F77) -FR -lowercase -O3 -c $*$(SUFFIX)
-wave.o : wave.F
-	$(CPP)
-	$(F77) -FR -lowercase -O3 -c $*$(SUFFIX)
-wave_high.o : wave_high.F
-	$(CPP)
-	$(FC) -FR -lowercase -O1 -c $*$(SUFFIX)
-LDApU.o : LDApU.F
-	$(CPP)
-	$(F77) -FR -lowercase -O3 -c $*$(SUFFIX)
-</code>