12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879 |
- # The script is installing seamless_communication (internal) + fairseq2 on AWS cluster.
- set -e
- set -x
- echo "Installing Conda"
- export TGT=`echo ~/seacom`
- rm -rf $TGT
- wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh -qO /tmp/conda.sh
- bash /tmp/conda.sh -bp $TGT
- export CONDA=$TGT/bin/conda
- export CONDA_ACTIVATE=$TGT/bin/activate
- export ENV_N=sc_fr2
- echo "Next step will take ~15 minutes. Get some coffee"
- module add cuda/11.8
- $CONDA create -y -n ${ENV_N} python=3.10 pytorch=2.0.1 pytorch-cuda=11.8 torchvision torchaudio \
- compilers libsndfile==1.0.31 gcc==11.4.0 \
- --strict-channel-priority --override-channels \
- -c pytorch \
- -c nvidia \
- -c conda-forge
- echo "Setting LD_LIBRARY_PATH"
- . $CONDA_ACTIVATE activate ${ENV_N}
- if [ -z "$CONDA_PREFIX" ]; then
- echo "CONDA_PREFIX env var is not set!"
- exit 1
- else
- path=$CONDA_PREFIX/etc/conda/activate.d/env_vars.sh
- echo "export LD_LIBRARY_PATH=$CONDA_PREFIX/lib:$LD_LIBRARY_PATH\n" >> ${path}
- fi
- . $CONDA_ACTIVATE activate ${ENV_N} # update env vars
- # Installing fairseq2.
- echo "Installing fairseq2"
- if [[ "${I_DONT_PLAN_TO_HACK_FAIRSEQ2:-No}" == "Yes" ]] ; then
- pip install fairseq2 \
- --pre --extra-index-url https://fair.pkg.atmeta.com/fairseq2/whl/nightly/pt2.0.1/cu118
- else
- # NOTICE: to compile CUDA kernels, you need NVCC. On AWS cluster an easy way would be to get a GPU container:
- # srun -N 1 --gres=gpu:1 --cpus-per-task=20 --partition seamless --time 2400 --pty /bin/bash -l
- cd $TGT
- git clone --recurse-submodules git@github.com:facebookresearch/fairseq2.git
- pip install -r fairseq2/fairseq2n/python/requirements-build.txt
- cd fairseq2
- pip install -e . # it will install public fairseq2n, we rewrite it below
- cd fairseq2n
- args="-GNinja\
- -DCMAKE_BUILD_TYPE=Release \
- -DCMAKE_CUDA_ARCHITECTURES=80-real;80-virtual\
- -DFAIRSEQ2N_INSTALL_STANDALONE=ON\
- -DFAIRSEQ2N_PERFORM_LTO=ON\
- -DFAIRSEQ2N_TREAT_WARNINGS_AS_ERRORS=OFF\
- -DFAIRSEQ2N_USE_CUDA=ON\
- -DFAIRSEQ2N_BUILD_PYTHON_BINDINGS=ON\
- -DFAIRSEQ2N_PYTHON_DEVEL=OFF"
- cmake ${args} -B build
- cmake --build build
- cd python && pip install .
- fi
- # Quick test
- python -c "from fairseq2n.bindings.data.string import CString as CString"
- # Has to go before fairseq2 to make sure that it will not reinstall fairseq2n
- echo "Installing seamless_communication"
- cd $TGT
- git clone git@github.com:fairinternal/seamless_communication.git
- cd seamless_communication
- pip install -e . # editable mode for hacking
- echo "One more time re-install fairseq2n (most propably overriden by seamless_communication)"
- cd $TGT/fairseq2/fairseq2n/python
- pip install .
- echo "Finished."
- echo "To activate the environment run: . $CONDA_ACTIVATE activate ${ENV_N}"
- echo "Location of seamless_communication checkout: $TGT/seamless_communication"
|