Differences

This shows you the differences between two versions of the page.

Link to this comparison view

Both sides previous revision Previous revision
Next revisionBoth sides next revision
doku:ai_intro [2024/03/15 08:18] alindnerdoku:ai_intro [2024/03/15 08:56] – Improve readability of example SLURM script mpfister
Line 17: Line 17:
 <file bash gpu_job_template.slurm> <file bash gpu_job_template.slurm>
 #!/bin/bash #!/bin/bash
-+ 
-#SBATCH --job-name=GPU_job  Job name+## Specify job name: 
 +#SBATCH --job-name=GPU_job 
 + 
 +## Specify GPU:
 ## For Nvidia A40: ## For Nvidia A40:
-#SBATCH --partition=zen2_0256_a40x2  # Type of hardware +##SBATCH --partition=zen2_0256_a40x2 
-#SBATCH --qos=zen2_0256_a40x2  # Quality of service, for VSC often identical to partition+##SBATCH --qos=zen2_0256_a40x2
 ## For Nvidia A100: ## For Nvidia A100:
-##SBATCH --partition=zen3_0512_a100x2  # Type of hardware +#SBATCH --partition=zen3_0512_a100x2 
-##SBATCH --qos=zen3_0512_a100x2  # Quality of service, for VSC often identical to partition +#SBATCH --qos=zen3_0512_a100x2 
-#SBATCH --time=0-01:00:00  Maximum run time in format days-hours:minutes:seconds (up to 3 days)+ 
 +## Specify run time limit in format days-hours:minutes:seconds (up to 3 days) 
 +## Note: Job will be killed once the run time limit is reached. 
 +## Shorter values might reduce queuing time. 
 +#SBATCH --time=0-01:00:00 
 + 
 +## Specify number of GPUs (1 or 2):
 #SBATCH --gres=gpu: # Number of GPUs #SBATCH --gres=gpu: # Number of GPUs
-##Optional: Get notified via mail when the job runs and finishes:+ 
 +## Optional: Get notified via mail when the job runs and finishes:
 ##SBATCH --mail-type=ALL    # BEGIN, END, FAIL, REQUEUE, ALL ##SBATCH --mail-type=ALL    # BEGIN, END, FAIL, REQUEUE, ALL
-##SBATCH --mail-user=martin.pfister@tuwien.ac.at+##SBATCH --mail-user=user@example.com 
 +  
 +# Start in a clean environment 
 +module purge
  
-module purge  Start in a clean environment+List available GPUs:
 nvidia-smi nvidia-smi
 +
 +# Load conda:
 module load miniconda3 module load miniconda3
 eval "$(conda shell.bash hook)" eval "$(conda shell.bash hook)"
 +
 +# Load a conda environment with Python 3.11.6, PyTorch 2.1.0, TensorFlow 2.13.1 and other packages:
 +export XLA_FLAGS=--xla_gpu_cuda_data_dir=/gpfs/opt/sw/cuda-zen/spack-0.19.0/opt/spack/linux-almalinux8-zen/gcc-12.2.0/cuda-11.8.0-knnuyxtpma52vhp6zhj72nbjfbrcvb7f
 conda activate /opt/sw/jupyterhub/envs/conda/vsc5/jupyterhub-horovod-v1 conda activate /opt/sw/jupyterhub/envs/conda/vsc5/jupyterhub-horovod-v1
-python -c "import torch;print(torch.__version__);print(torch.cuda.get_device_properties(0))" 
  
-module list  Keep track of environment+Run AI scripts: 
 +python -c "import torch;print(torch.__version__);print(torch.cuda.get_device_properties(0))"
 </file> </file>
  
  • doku/ai_intro.txt
  • Last modified: 2024/05/08 08:44
  • by mpfister