Differences

This shows you the differences between two versions of the page.

Link to this comparison view

Both sides previous revision Previous revision
Next revision
Previous revision
Next revisionBoth sides next revision
doku:ai_intro [2024/03/15 08:13] alindnerdoku:ai_intro [2024/03/15 08:56] – Improve readability of example SLURM script mpfister
Line 17: Line 17:
 <file bash gpu_job_template.slurm> <file bash gpu_job_template.slurm>
 #!/bin/bash #!/bin/bash
-+ 
-#SBATCH --job-name=GPU_job  Job name+## Specify job name: 
 +#SBATCH --job-name=GPU_job 
 + 
 +## Specify GPU:
 ## For Nvidia A40: ## For Nvidia A40:
-#SBATCH --partition=zen2_0256_a40x2  # Type of hardware +##SBATCH --partition=zen2_0256_a40x2 
-#SBATCH --qos=zen2_0256_a40x2  # Quality of service, for VSC often identical to partition+##SBATCH --qos=zen2_0256_a40x2
 ## For Nvidia A100: ## For Nvidia A100:
-##SBATCH --partition=zen3_0512_a100x2  # Type of hardware +#SBATCH --partition=zen3_0512_a100x2 
-##SBATCH --qos=zen3_0512_a100x2  # Quality of service, for VSC often identical to partition +#SBATCH --qos=zen3_0512_a100x2 
-#SBATCH --time=0-01:00:00  Maximum run time in format days-hours:minutes:seconds (up to 3 days)+ 
 +## Specify run time limit in format days-hours:minutes:seconds (up to 3 days) 
 +## Note: Job will be killed once the run time limit is reached. 
 +## Shorter values might reduce queuing time. 
 +#SBATCH --time=0-01:00:00 
 + 
 +## Specify number of GPUs (1 or 2):
 #SBATCH --gres=gpu: # Number of GPUs #SBATCH --gres=gpu: # Number of GPUs
-##Optional: Get notified via mail when the job runs and finishes:+ 
 +## Optional: Get notified via mail when the job runs and finishes:
 ##SBATCH --mail-type=ALL    # BEGIN, END, FAIL, REQUEUE, ALL ##SBATCH --mail-type=ALL    # BEGIN, END, FAIL, REQUEUE, ALL
-##SBATCH --mail-user=martin.pfister@tuwien.ac.at+##SBATCH --mail-user=user@example.com 
 +  
 +# Start in a clean environment 
 +module purge
  
-module purge  Start in a clean environment+List available GPUs:
 nvidia-smi nvidia-smi
 +
 +# Load conda:
 module load miniconda3 module load miniconda3
 eval "$(conda shell.bash hook)" eval "$(conda shell.bash hook)"
 +
 +# Load a conda environment with Python 3.11.6, PyTorch 2.1.0, TensorFlow 2.13.1 and other packages:
 +export XLA_FLAGS=--xla_gpu_cuda_data_dir=/gpfs/opt/sw/cuda-zen/spack-0.19.0/opt/spack/linux-almalinux8-zen/gcc-12.2.0/cuda-11.8.0-knnuyxtpma52vhp6zhj72nbjfbrcvb7f
 conda activate /opt/sw/jupyterhub/envs/conda/vsc5/jupyterhub-horovod-v1 conda activate /opt/sw/jupyterhub/envs/conda/vsc5/jupyterhub-horovod-v1
 +
 +# Run AI scripts:
 python -c "import torch;print(torch.__version__);print(torch.cuda.get_device_properties(0))" python -c "import torch;print(torch.__version__);print(torch.cuda.get_device_properties(0))"
 </file> </file>
  • doku/ai_intro.txt
  • Last modified: 2024/05/16 15:06
  • by mpfister