#!/bin/bash ## Specify job name: #SBATCH --job-name=GPU_job ## Specify GPU: ## For Nvidia A40: ##SBATCH --partition=zen2_0256_a40x2 ##SBATCH --qos=zen2_0256_a40x2 ## For Nvidia A100: #SBATCH --partition=zen3_0512_a100x2 #SBATCH --qos=zen3_0512_a100x2 ## Specify run time limit in format days-hours:minutes:seconds (up to 3 days) ## Note: Job will be killed once the run time limit is reached. ## Shorter values might reduce queuing time. #SBATCH --time=3-00:00:00 ## Specify number of GPUs (1 or 2): #SBATCH --gres=gpu:1 # Number of GPUs ## Optional: Get notified via mail when the job runs and finishes: ##SBATCH --mail-type=ALL # BEGIN, END, FAIL, REQUEUE, ALL ##SBATCH --mail-user=user@example.com # Start in a clean environment module purge # List available GPUs: nvidia-smi # Load conda: module load miniconda3 eval "$(conda shell.bash hook)" # Load a conda environment with Python 3.11.6, PyTorch 2.1.0, TensorFlow 2.13.1 and other packages: export XLA_FLAGS=--xla_gpu_cuda_data_dir=/gpfs/opt/sw/cuda-zen/spack-0.19.0/opt/spack/linux-almalinux8-zen/gcc-12.2.0/cuda-11.8.0-knnuyxtpma52vhp6zhj72nbjfbrcvb7f conda activate /opt/sw/jupyterhub/envs/conda/vsc5/jupyterhub-horovod-v1 # Run AI scripts: python -c "import torch;print(torch.__version__);print(torch.cuda.get_device_properties(0))"