forked from deepspeedai/DeepSpeed
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathinstall.sh
More file actions
executable file
·201 lines (181 loc) · 5.48 KB
/
install.sh
File metadata and controls
executable file
·201 lines (181 loc) · 5.48 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
#!/bin/bash
set -e
err_report() {
echo "Error on line $1"
echo "Fail to install deepspeed"
}
trap 'err_report $LINENO' ERR
usage() {
echo """
Usage: install.sh [options...]
By default will install deepspeed and all third party dependecies accross all machines listed in
hostfile (hostfile: /job/hostfile). If no hostfile exists, will only install locally
[optional]
-d, --deepspeed_only Install only deepspeed and no third party dependencies
-t, --third_party_only Install only third party dependencies and not deepspeed
-l, --local_only Install only on local machine
-s, --pip_sudo Run pip with sudo (default: no sudo)
-m, --pip_mirror Use the specified pip mirror (default: the default pip mirror)
-H, --hostfile Path to MPI-style hostfile (default: /job/hostfile)
-a, --apex_commit Install a specific commit hash of apex, instead of the one deepspeed points to
-k, --skip_requirements Skip installing DeepSpeed requirements
-h, --help This help text
"""
}
ds_only=0
tp_only=0
deepspeed_install=1
third_party_install=1
local_only=0
pip_sudo=0
entire_dlts_job=1
hostfile=/job/hostfile
pip_mirror=""
apex_commit=""
skip_requirements=0
while [[ $# -gt 0 ]]
do
key="$1"
case $key in
-d|--deepspeed_only)
deepspeed_install=1;
third_party_install=0;
ds_only=1;
shift
;;
-t|--third_party_only)
deepspeed_install=0;
third_party_install=1;
tp_only=1;
shift
;;
-l|--local_only)
local_only=1;
shift
;;
-s|--pip_sudo)
pip_sudo=1;
shift
;;
-m|--pip_mirror)
pip_mirror=$2;
shift
shift
;;
-a|--apex_commit)
apex_commit=$2;
shift
shift
;;
-k|--skip_requirements)
skip_requirements=1;
shift
;;
-H|--hostfile)
hostfile=$2
if [ ! -f $2 ]; then
echo "User provided hostfile does not exist at $hostfile, exiting"
exit 1
fi
shift
shift
;;
-h|--help)
usage
exit 0
;;
*)
echo "Unkown argument(s)"
usage
exit 1
shift
;;
esac
done
if [ "$ds_only" == "1" ] && [ "$tp_only" == "1" ]; then
echo "-d and -t are mutually exclusive, only choose one or none"
usage
exit 1
fi
echo "Updating git hash/branch info"
echo "git_hash = '$(git rev-parse --short HEAD)'" > deepspeed/git_version_info.py
echo "git_branch = '$(git rev-parse --abbrev-ref HEAD)'" >> deepspeed/git_version_info.py
cat deepspeed/git_version_info.py
if [ "$pip_sudo" == "1" ]; then
PIP_SUDO="sudo -H"
else
PIP_SUDO=""
fi
if [ "$pip_mirror" != "" ]; then
PIP_INSTALL="pip install -i $pip_mirror"
else
PIP_INSTALL="pip install"
fi
if [ ! -f $hostfile ]; then
echo "No hostfile exists at $hostfile, installing locally"
local_only=1
fi
if [ "$skip_requirements" == "0" ]; then
# Ensure dependencies are installed locally
$PIP_SUDO $PIP_INSTALL -r requirements.txt
fi
# Build wheels
if [ "$third_party_install" == "1" ]; then
echo "Checking out sub-module(s)"
git submodule update --init --recursive
echo "Building apex wheel"
cd third_party/apex
if [ "$apex_commit" != "" ]; then
echo "Installing a non-standard version of apex at commit: $apex_commit"
git fetch
git checkout $apex_commit
fi
python setup.py --cpp_ext --cuda_ext bdist_wheel
cd -
echo "Installing apex locally so that deepspeed will build"
$PIP_SUDO pip uninstall -y apex
$PIP_SUDO $PIP_INSTALL third_party/apex/dist/apex*.whl
fi
if [ "$deepspeed_install" == "1" ]; then
echo "Building deepspeed wheel"
python setup.py bdist_wheel
fi
if [ "$local_only" == "1" ]; then
if [ "$deepspeed_install" == "1" ]; then
echo "Installing deepspeed"
$PIP_SUDO pip uninstall -y deepspeed
$PIP_SUDO $PIP_INSTALL dist/deepspeed*.whl
python -c 'import deepspeed; print("deepspeed info:", deepspeed.__version__, deepspeed.__git_branch__, deepspeed.__git_hash__)'
echo "Installation is successful"
fi
else
local_path=`pwd`
if [ -f $hostfile ]; then
hosts=`cat $hostfile | awk '{print $1}' | paste -sd "," -`;
else
echo "hostfile not found, cannot proceed"
exit 1
fi
export PDSH_RCMD_TYPE=ssh;
tmp_wheel_path="/tmp/deepspeed_wheels"
pdsh -w $hosts "if [ -d $tmp_wheel_path ]; then rm $tmp_wheel_path/*.whl; else mkdir -pv $tmp_wheel_path; fi"
pdcp -w $hosts requirements.txt ${tmp_wheel_path}/
if [ "$skip_requirements" == "0" ]; then
pdsh -w $hosts "$PIP_SUDO $PIP_INSTALL -r ${tmp_wheel_path}/requirements.txt"
fi
if [ "$third_party_install" == "1" ]; then
pdsh -w $hosts "$PIP_SUDO pip uninstall -y apex"
pdcp -w $hosts third_party/apex/dist/apex*.whl $tmp_wheel_path/
pdsh -w $hosts "$PIP_SUDO $PIP_INSTALL $tmp_wheel_path/apex*.whl"
pdsh -w $hosts 'python -c "import apex"'
fi
if [ "$deepspeed_install" == "1" ]; then
echo "Installing deepspeed"
pdsh -w $hosts "$PIP_SUDO pip uninstall -y deepspeed"
pdcp -w $hosts dist/deepspeed*.whl $tmp_wheel_path/
pdsh -w $hosts "$PIP_SUDO $PIP_INSTALL $tmp_wheel_path/deepspeed*.whl"
pdsh -w $hosts "python -c 'import deepspeed; print(\"deepspeed info:\", deepspeed.__version__, deepspeed.__git_branch__, deepspeed.__git_hash__)'"
echo "Installation is successful"
fi
pdsh -w $hosts "if [ -d $tmp_wheel_path ]; then rm $tmp_wheel_path/*.whl $tmp_wheel_path/requirements.txt; rmdir $tmp_wheel_path; fi"
fi