Skip to content

Commit

Permalink
feat: text export of outdegrees and indegrees
Browse files Browse the repository at this point in the history
Join outdegrees and indegrees with vertex names and write it to
a text file. Export the top-10k vertices by outdegree resp. indegree.
  • Loading branch information
sebastian-nagel committed Jun 28, 2024
1 parent 25d59a4 commit 15917a1
Showing 1 changed file with 36 additions and 0 deletions.
36 changes: 36 additions & 0 deletions src/script/webgraph_ranking/process_webgraph.sh
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,39 @@ function join_ranks_in_memory() (
| sort $SORTOPTS -t$'\t' -k1,1n --stable | gzip >$_OUT
)

function join_degrees() (
set -exo pipefail
_FULLNAME="$1"
_VERT="$2"
HEADER="#outdegree\t#indegree\t#host_rev"
if [ -n "$3" ]; then
HEADER="$HEADER\t$3"
fi
if [ -d $_VERT ]; then
# _VERT is a directory with multiple vertices files
_VERT="$_VERT/*.gz"
fi
zcat $_VERT \
| cut -f2- \
| paste $FULLNAME.outdegrees $FULLNAME.indegrees - \
| gzip >$FULLNAME-outdegrees-indegrees.txt.gz
# top-N out/indegrees
(echo -e "$HEADER";
set +o pipefail;
zcat $FULLNAME-outdegrees-indegrees.txt.gz \
| perl -aF'\t' -lne 'print if $F[0] > 1000' \
| sort -k1,1nr \
| head -10000) \
| gzip >$FULLNAME-outdegrees-indegrees-topout.txt.gz
(echo -e "$HEADER";
set +o pipefail;
zcat $FULLNAME-outdegrees-indegrees.txt.gz \
| perl -aF'\t' -lne 'print if $F[1] > 1000' \
| sort -k2,2nr \
| head -10000) \
| gzip >$FULLNAME-outdegrees-indegrees-topin.txt.gz
)

function connected_distrib() (
set -exo pipefail
NUM_NODES=$1
Expand Down Expand Up @@ -265,6 +298,9 @@ fi
_step stats \
$WG $WGP.Stats --save-degrees $FULLNAME

_step_bg join_degrees 15 \
join_degrees $FULLNAME $VERTICES "$EXTRA_FIELDS_HEADER"

NODES=$(perl -lne 'print if s@^nodes=@@' $FULLNAME.stats)
_step connected_distrib \
connected_distrib $NODES $FULLNAME.wccsizes $FULLNAME-connected-components-distrib.txt.gz
Expand Down

0 comments on commit 15917a1

Please sign in to comment.