From a7cd4e434be4fcd9ad9166db1dcbcfb059a69682 Mon Sep 17 00:00:00 2001 From: Thom Vaughan Date: Mon, 21 Oct 2024 09:12:59 +0000 Subject: [PATCH] chore: update hostgraph configuration for cc-main-2024-aug-sep-oct --- src/script/hostgraph/hostgraph_config.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/script/hostgraph/hostgraph_config.sh b/src/script/hostgraph/hostgraph_config.sh index fd3d1aa..b366580 100644 --- a/src/script/hostgraph/hostgraph_config.sh +++ b/src/script/hostgraph/hostgraph_config.sh @@ -12,7 +12,7 @@ ### saved as tuples # crawls to be processed -CRAWLS=("CC-MAIN-2024-30" "CC-MAIN-2024-33" "CC-MAIN-2024-38") +CRAWLS=("CC-MAIN-2024-33" "CC-MAIN-2024-38" "CC-MAIN-2024-42") INPUT_BASE_URL="s3://commoncrawl/" @@ -42,7 +42,7 @@ S3A_OUTPUT_PREFIX=s3a://commoncrawl-webgraph ################################################################################ # construct a merged graph of multiple monthly crawls -MERGE_NAME=cc-main-2024-jul-aug-sep +MERGE_NAME=cc-main-2024-aug-sep-oct # Naming convention should be the three months' crawls that are # used to generate this graph release. In the event of multiple months