@@ -183,6 +183,7 @@ export type SaveState = {
183183 extraSeeds : string [ ] ;
184184 sitemapDone : boolean ;
185185 seedFileDone : boolean ;
186+ seedFileSeeds : string [ ] ;
186187} ;
187188
188189// ============================================================================
@@ -206,7 +207,10 @@ export class RedisCrawlState {
206207 esMap : string ;
207208
208209 sitemapDoneKey : string ;
210+
209211 seedFileDoneKey : string ;
212+ seedFileSeedsKey : string ;
213+ seedFileSeedsMap : string ;
210214
211215 waczFilename : string | null = null ;
212216
@@ -242,7 +246,10 @@ export class RedisCrawlState {
242246 this . esMap = this . key + ":esMap" ;
243247
244248 this . sitemapDoneKey = this . key + ":sitemapDone" ;
245- this . seedFileDoneKey = this . key + ":seedFileDone" ;
249+
250+ this . seedFileDoneKey = this . key + ":sfDone" ;
251+ this . seedFileSeedsKey = this . key + "sfSeeds" ;
252+ this . seedFileSeedsMap = this . key + ":sfMap" ;
246253
247254 this . _initLuaCommands ( this . redis ) ;
248255 }
@@ -736,6 +743,7 @@ return inx;
736743 const pending = await this . getPendingList ( ) ;
737744 const failed = await this . _iterListKeys ( this . fkey , seen ) ;
738745 const errors = await this . getErrorList ( ) ;
746+ const seedFileSeeds = await this . _iterListKeys ( this . seedFileSeedsKey , seen ) ;
739747 const extraSeeds = await this . _iterListKeys ( this . esKey , seen ) ;
740748 const sitemapDone = await this . isSitemapDone ( ) ;
741749 const seedFileDone = await this . isSeedFileDone ( ) ;
@@ -749,6 +757,7 @@ return inx;
749757 pending,
750758 sitemapDone,
751759 seedFileDone,
760+ seedFileSeeds,
752761 failed,
753762 errors,
754763 } ;
@@ -846,6 +855,13 @@ return inx;
846855 await this . redis . set ( this . dkey , state . finished . length ) ;
847856 }
848857
858+ if ( state . seedFileSeeds ) {
859+ for ( const seed of state . seedFileSeeds ) {
860+ const scopedSeed : ScopedSeed = JSON . parse ( seed ) ;
861+ await this . addSeedFileSeed ( scopedSeed ) ;
862+ }
863+ }
864+
849865 if ( state . extraSeeds ) {
850866 const origLen = seeds . length ;
851867
@@ -1041,6 +1057,14 @@ return inx;
10411057 return await this . redis . lpush ( this . pageskey , JSON . stringify ( data ) ) ;
10421058 }
10431059
1060+ async addSeedFileSeed ( seed : ScopedSeed ) {
1061+ const ret = await this . redis . sadd ( this . seedFileSeedsMap , seed . url ) ;
1062+ if ( ret > 0 ) {
1063+ // Push to end of list to keep seeds in order for ids
1064+ await this . redis . rpush ( this . seedFileSeedsKey , JSON . stringify ( seed ) ) ;
1065+ }
1066+ }
1067+
10441068 // add extra seeds from redirect
10451069 async addExtraSeed (
10461070 seeds : ScopedSeed [ ] ,
@@ -1094,6 +1118,16 @@ return inx;
10941118 return seeds [ newSeedId ] ;
10951119 }
10961120
1121+ async getSeedFileSeeds ( ) {
1122+ const seeds : ScopedSeed [ ] = [ ] ;
1123+
1124+ const res = await this . redis . lrange ( this . seedFileSeedsKey , 0 , - 1 ) ;
1125+ for ( const key of res ) {
1126+ seeds . push ( JSON . parse ( key ) ) ;
1127+ }
1128+ return seeds ;
1129+ }
1130+
10971131 async getExtraSeeds ( ) {
10981132 const seeds : ExtraRedirectSeed [ ] = [ ] ;
10991133 const res = await this . redis . lrange ( this . esKey , 0 , - 1 ) ;
0 commit comments