Difference between revisions of "Infrastructure/software/horovod"

From Nordic Language Processing Laboratory
Jump to: navigation, search
(Installation on Saga)
(Installation on Saga)
 
Line 5: Line 5:
  
 
<pre>
 
<pre>
module purge; module --ignore-cache load Python/3.7.2-GCCcore-8.2.0 NCCL/2.4.8-CUDA-10.0
+
module purge; module load Python/3.7.2-GCCcore-8.2.0
 +
module load OpenMPI/3.1.3-GCC-8.2.0-2.31.1 CUDA/10.0.130 NCCL/2.4.8-CUDA-10.0
 
/cluster/shared/nlpl/operation/python/initialize --version 0.18.2 horovod
 
/cluster/shared/nlpl/operation/python/initialize --version 0.18.2 horovod
 
</pre>
 
</pre>
  
 
<pre>
 
<pre>
module load nlpl-horovod/0.18.2/3.7
 
 
module load GCC/8.2.0-2.31.1 CMake/3.12.1
 
module load GCC/8.2.0-2.31.1 CMake/3.12.1
 +
module load nlpl-tensorflow/1.15.0/3.7 nlpl-horovod/0.18.2/3.7
 
\rm ~/pip.log; \
 
\rm ~/pip.log; \
 
HOROVOD_NCCL_HOME=/cluster/software/NCCL/2.4.8-CUDA-10.0 \
 
HOROVOD_NCCL_HOME=/cluster/software/NCCL/2.4.8-CUDA-10.0 \
 
   HOROVOD_GPU_ALLREDUCE=NCCL python3 -m pip install --no-cache-dir horovod --log ~/pip.log
 
   HOROVOD_GPU_ALLREDUCE=NCCL python3 -m pip install --no-cache-dir horovod --log ~/pip.log
 
</pre>
 
</pre>

Latest revision as of 10:37, 28 October 2019

Background

Installation on Saga

module purge; module load Python/3.7.2-GCCcore-8.2.0
module load OpenMPI/3.1.3-GCC-8.2.0-2.31.1 CUDA/10.0.130 NCCL/2.4.8-CUDA-10.0
/cluster/shared/nlpl/operation/python/initialize --version 0.18.2 horovod
module load GCC/8.2.0-2.31.1 CMake/3.12.1
module load nlpl-tensorflow/1.15.0/3.7 nlpl-horovod/0.18.2/3.7
\rm ~/pip.log; \
HOROVOD_NCCL_HOME=/cluster/software/NCCL/2.4.8-CUDA-10.0 \
  HOROVOD_GPU_ALLREDUCE=NCCL python3 -m pip install --no-cache-dir horovod --log ~/pip.log