Differences
This shows you the differences between two versions of the page.
Both sides previous revision Previous revision | Next revisionBoth sides next revision | ||
doku:ai_intro [2024/03/15 08:18] – alindner | doku:ai_intro [2024/03/15 08:56] – Improve readability of example SLURM script mpfister | ||
---|---|---|---|
Line 17: | Line 17: | ||
<file bash gpu_job_template.slurm> | <file bash gpu_job_template.slurm> | ||
#!/bin/bash | #!/bin/bash | ||
- | # | + | |
- | #SBATCH --job-name=GPU_job | + | ## Specify job name: |
+ | #SBATCH --job-name=GPU_job | ||
+ | |||
+ | ## Specify GPU: | ||
## For Nvidia A40: | ## For Nvidia A40: | ||
- | #SBATCH --partition=zen2_0256_a40x2 | + | ##SBATCH --partition=zen2_0256_a40x2 |
- | #SBATCH --qos=zen2_0256_a40x2 | + | ##SBATCH --qos=zen2_0256_a40x2 |
## For Nvidia A100: | ## For Nvidia A100: | ||
- | ##SBATCH --partition=zen3_0512_a100x2 | + | #SBATCH --partition=zen3_0512_a100x2 |
- | ##SBATCH --qos=zen3_0512_a100x2 | + | #SBATCH --qos=zen3_0512_a100x2 |
- | #SBATCH --time=0-01: | + | |
+ | ## Specify | ||
+ | ## Note: Job will be killed once the run time limit is reached. | ||
+ | ## Shorter values might reduce queuing time. | ||
+ | #SBATCH --time=0-01: | ||
+ | |||
+ | ## Specify number of GPUs (1 or 2): | ||
#SBATCH --gres=gpu: | #SBATCH --gres=gpu: | ||
- | ##Optional: Get notified via mail when the job runs and finishes: | + | |
+ | ## Optional: Get notified via mail when the job runs and finishes: | ||
##SBATCH --mail-type=ALL | ##SBATCH --mail-type=ALL | ||
- | ##SBATCH --mail-user=martin.pfister@tuwien.ac.at | + | ##SBATCH --mail-user=user@example.com |
+ | |||
+ | # Start in a clean environment | ||
+ | module purge | ||
- | module purge | + | # List available GPUs: |
nvidia-smi | nvidia-smi | ||
+ | |||
+ | # Load conda: | ||
module load miniconda3 | module load miniconda3 | ||
eval " | eval " | ||
+ | |||
+ | # Load a conda environment with Python 3.11.6, PyTorch 2.1.0, TensorFlow 2.13.1 and other packages: | ||
+ | export XLA_FLAGS=--xla_gpu_cuda_data_dir=/ | ||
conda activate / | conda activate / | ||
- | python -c " | ||
- | module list | + | # Run AI scripts: |
+ | python -c " | ||
</ | </ | ||