@@ -8,12 +8,33 @@ import { type RedisCrawlState } from "./state.js";
88
99export async function parseSeeds (
1010 params : CrawlerArgs ,
11- crawlState : RedisCrawlState ,
11+ crawlState ? : RedisCrawlState ,
1212) : Promise < ScopedSeed [ ] > {
1313 let seeds = params . seeds as string [ ] ;
1414 const scopedSeeds : ScopedSeed [ ] = [ ] ;
1515
16- const seedFileDone = await crawlState . isSeedFileDone ( ) ;
16+ // Re-add seedFileDone from serialized state to Redis if present
17+ if ( params . state && params . state . seedFileDone && crawlState ) {
18+ await crawlState . markSeedFileDone ( ) ;
19+ }
20+
21+ let seedFileDone = false ;
22+ if ( crawlState ) {
23+ seedFileDone = await crawlState . isSeedFileDone ( ) ;
24+ }
25+
26+ // Re-add any seeds from seed files from serialized state to Redis
27+ if (
28+ params . state &&
29+ params . state . seedFileSeeds &&
30+ seedFileDone &&
31+ crawlState
32+ ) {
33+ for ( const seed of params . state . seedFileSeeds ) {
34+ const scopedSeed : ScopedSeed = JSON . parse ( seed ) ;
35+ await crawlState . addSeedFileSeed ( scopedSeed ) ;
36+ }
37+ }
1738
1839 if ( params . seedFile && ! seedFileDone ) {
1940 let seedFilePath = params . seedFile as string ;
@@ -54,7 +75,7 @@ export async function parseSeeds(
5475 try {
5576 const scopedSeed = new ScopedSeed ( { ...scopeOpts , ...newSeed } ) ;
5677 scopedSeeds . push ( scopedSeed ) ;
57- if ( params . seedFile ) {
78+ if ( params . seedFile && ! seedFileDone && crawlState ) {
5879 await crawlState . addSeedFileSeed ( scopedSeed ) ;
5980 logger . debug (
6081 "Pushed seed file seed to Redis" ,
@@ -80,7 +101,8 @@ export async function parseSeeds(
80101 }
81102 }
82103
83- if ( params . seedFile && seedFileDone ) {
104+ // If seed file was already successfully parsed, re-add seeds from Redis
105+ if ( params . seedFile && seedFileDone && crawlState ) {
84106 const seedFileScopedSeeds = await crawlState . getSeedFileSeeds ( ) ;
85107 for ( const seed of seedFileScopedSeeds ) {
86108 logger . debug (
@@ -105,7 +127,7 @@ export async function parseSeeds(
105127 logger . fatal ( "No valid seeds specified, aborting crawl" ) ;
106128 }
107129
108- if ( params . seedFile ) {
130+ if ( params . seedFile && crawlState ) {
109131 await crawlState . markSeedFileDone ( ) ;
110132 }
111133
0 commit comments