install_devfair.sh 2.7 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879
  1. # The script is installing seamless_communication (internal) + fairseq2 on AWS cluster.
  2. set -e
  3. set -x
  4. echo "Installing Conda"
  5. export TGT=`echo ~/seacom`
  6. rm -rf $TGT
  7. wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh -qO /tmp/conda.sh
  8. bash /tmp/conda.sh -bp $TGT
  9. export CONDA=$TGT/bin/conda
  10. export CONDA_ACTIVATE=$TGT/bin/activate
  11. export ENV_N=sc_fr2
  12. echo "Next step will take ~15 minutes. Get some coffee"
  13. module add cuda/11.8
  14. $CONDA create -y -n ${ENV_N} python=3.10 pytorch=2.0.1 pytorch-cuda=11.8 torchvision torchaudio \
  15. compilers libsndfile==1.0.31 gcc==11.4.0 \
  16. --strict-channel-priority --override-channels \
  17. -c pytorch \
  18. -c nvidia \
  19. -c conda-forge
  20. echo "Setting LD_LIBRARY_PATH"
  21. . $CONDA_ACTIVATE activate ${ENV_N}
  22. if [ -z "$CONDA_PREFIX" ]; then
  23. echo "CONDA_PREFIX env var is not set!"
  24. exit 1
  25. else
  26. path=$CONDA_PREFIX/etc/conda/activate.d/env_vars.sh
  27. echo "export LD_LIBRARY_PATH=$CONDA_PREFIX/lib:$LD_LIBRARY_PATH\n" >> ${path}
  28. fi
  29. . $CONDA_ACTIVATE activate ${ENV_N} # update env vars
  30. # Installing fairseq2.
  31. echo "Installing fairseq2"
  32. if [[ "${I_DONT_PLAN_TO_HACK_FAIRSEQ2:-No}" == "Yes" ]] ; then
  33. pip install fairseq2 \
  34. --pre --extra-index-url https://fair.pkg.atmeta.com/fairseq2/whl/nightly/pt2.0.1/cu118
  35. else
  36. # NOTICE: to compile CUDA kernels, you need NVCC. On AWS cluster an easy way would be to get a GPU container:
  37. # srun -N 1 --gres=gpu:1 --cpus-per-task=20 --partition seamless --time 2400 --pty /bin/bash -l
  38. cd $TGT
  39. git clone --recurse-submodules git@github.com:facebookresearch/fairseq2.git
  40. pip install -r fairseq2/fairseq2n/python/requirements-build.txt
  41. cd fairseq2
  42. pip install -e . # it will install public fairseq2n, we rewrite it below
  43. cd fairseq2n
  44. args="-GNinja\
  45. -DCMAKE_BUILD_TYPE=Release \
  46. -DCMAKE_CUDA_ARCHITECTURES=80-real;80-virtual\
  47. -DFAIRSEQ2N_INSTALL_STANDALONE=ON\
  48. -DFAIRSEQ2N_PERFORM_LTO=ON\
  49. -DFAIRSEQ2N_TREAT_WARNINGS_AS_ERRORS=OFF\
  50. -DFAIRSEQ2N_USE_CUDA=ON\
  51. -DFAIRSEQ2N_BUILD_PYTHON_BINDINGS=ON\
  52. -DFAIRSEQ2N_PYTHON_DEVEL=OFF"
  53. cmake ${args} -B build
  54. cmake --build build
  55. cd python && pip install .
  56. fi
  57. # Quick test
  58. python -c "from fairseq2n.bindings.data.string import CString as CString"
  59. # Has to go before fairseq2 to make sure that it will not reinstall fairseq2n
  60. echo "Installing seamless_communication"
  61. cd $TGT
  62. git clone git@github.com:fairinternal/seamless_communication.git
  63. cd seamless_communication
  64. pip install -e . # editable mode for hacking
  65. echo "One more time re-install fairseq2n (most propably overriden by seamless_communication)"
  66. cd $TGT/fairseq2/fairseq2n/python
  67. pip install .
  68. echo "Finished."
  69. echo "To activate the environment run: . $CONDA_ACTIVATE activate ${ENV_N}"
  70. echo "Location of seamless_communication checkout: $TGT/seamless_communication"