Skip to content

Commit c551b88

Browse files
Changes and fixes for May/June/July 2018 web graphs
- fix creation of webgraph/bvgraph with input from multiple edge files - update configuration
1 parent 9ddcafc commit c551b88

File tree

2 files changed

+14
-8
lines changed

2 files changed

+14
-8
lines changed

src/script/webgraph_ranking/process_webgraph.sh

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -169,6 +169,10 @@ set -exo pipefail
169169

170170
if [ -d $EDGES ]; then
171171
# edges is a directory with multiple files
172+
sort_input=""
173+
for e in $EDGES/part-*.gz; do
174+
sort_input="$sort_input <(zcat $e)"
175+
done
172176
if ${USE_WEBGRAPH_BIG:-false}; then
173177
## TODO:
174178
## * option --threads not available in webgraph-big
@@ -184,15 +188,11 @@ if [ -d $EDGES ]; then
184188
## at it.unimi.dsi.big.webgraph.ArcListASCIIGraph.load(ArcListASCIIGraph.java:283)
185189
## at it.unimi.dsi.big.webgraph.ArcListASCIIGraph.load(ArcListASCIIGraph.java:279)
186190
## at it.unimi.dsi.big.webgraph.ArcListASCIIGraph.loadOffline(ArcListASCIIGraph.java:255)
187-
sort_input=""
188-
for e in $EDGES/part-*.gz; do
189-
sort_input="$sort_input <(zcat $e)"
190-
done
191191
_step bvgraph \
192192
bash -c "eval \"sort --batch-size=$SORT_BATCHES -t$'\t' -k1,1n -k2,2n --stable --merge $sort_input\" | $WG $WGP.BVGraph --once -g $WGP.ArcListASCIIGraph - $FULLNAME"
193193
else
194194
_step bvgraph \
195-
$WG $WGP.BVGraph --threads $THREADS -g $WGP.ArcListASCIIGraph <(zcat $EDGES/*.gz) $FULLNAME
195+
bash -c "$WG $WGP.BVGraph --threads $THREADS -g $WGP.ArcListASCIIGraph <(eval \"sort --batch-size=$SORT_BATCHES -t$'\t' -k1,1n -k2,2n --stable --merge $sort_input\") $FULLNAME"
196196
fi
197197
else
198198
if ${USE_WEBGRAPH_BIG:-false}; then

src/script/webgraph_ranking/webgraph_config.sh

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -24,9 +24,15 @@ JOIN_RANKS_IN_MEMORY=${JOIN_RANKS_IN_MEMORY:-true}
2424
# => it's only an empirical value and possibly needs to be adjusted
2525
THREADS=2
2626
HYP_REG=4
27+
## on r4.8xlarge (244 GB)
28+
#THREADS=32
29+
#HYP_REG=4 # 4-5 for hostgraph, 10 for domain graph
2730
## on r4.16xlarge (488 GB)
28-
THREADS=64
29-
HYP_REG=5 # 4-6 for hostgraph, 10 for domain graph
31+
#THREADS=64
32+
#HYP_REG=5 # 4-6 for hostgraph, 10 for domain graph
33+
## on r5.12xlarge (384 GB)
34+
#THREADS=48
35+
#HYP_REG=5 # 4-6 for hostgraph, 10 for domain graph
3036
## on x1.16xlarge (976 GB)
3137
#THREADS=64
3238
#HYP_REG=9
@@ -62,5 +68,5 @@ export SORT_BUFFER_SIZE=${SORT_BUFFER_SIZE:-$MEM_10PERC}
6268

6369
# max. number of merge inputs
6470
# (should be not less than number of vertices / edges files to be merged)
65-
export SORT_BATCHES=${SORT_BATCHES:-160}
71+
export SORT_BATCHES=${SORT_BATCHES:-240}
6672

0 commit comments

Comments
 (0)