Skip to content

Commit 69eca37

Browse files
committed
Support re-adding seed file seeds from serialized state, fix typing
Allow crawlState to be undefined in parseSeeds for use in scope tests
1 parent 221d801 commit 69eca37

File tree

2 files changed

+27
-12
lines changed

2 files changed

+27
-12
lines changed

src/util/parseseeds.ts

Lines changed: 27 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -8,12 +8,33 @@ import { type RedisCrawlState } from "./state.js";
88

99
export async function parseSeeds(
1010
params: CrawlerArgs,
11-
crawlState: RedisCrawlState,
11+
crawlState?: RedisCrawlState,
1212
): Promise<ScopedSeed[]> {
1313
let seeds = params.seeds as string[];
1414
const scopedSeeds: ScopedSeed[] = [];
1515

16-
const seedFileDone = await crawlState.isSeedFileDone();
16+
// Re-add seedFileDone from serialized state to Redis if present
17+
if (params.state && params.state.seedFileDone && crawlState) {
18+
await crawlState.markSeedFileDone();
19+
}
20+
21+
let seedFileDone = false;
22+
if (crawlState) {
23+
seedFileDone = await crawlState.isSeedFileDone();
24+
}
25+
26+
// Re-add any seeds from seed files from serialized state to Redis
27+
if (
28+
params.state &&
29+
params.state.seedFileSeeds &&
30+
seedFileDone &&
31+
crawlState
32+
) {
33+
for (const seed of params.state.seedFileSeeds) {
34+
const scopedSeed: ScopedSeed = JSON.parse(seed);
35+
await crawlState.addSeedFileSeed(scopedSeed);
36+
}
37+
}
1738

1839
if (params.seedFile && !seedFileDone) {
1940
let seedFilePath = params.seedFile as string;
@@ -54,7 +75,7 @@ export async function parseSeeds(
5475
try {
5576
const scopedSeed = new ScopedSeed({ ...scopeOpts, ...newSeed });
5677
scopedSeeds.push(scopedSeed);
57-
if (params.seedFile) {
78+
if (params.seedFile && !seedFileDone && crawlState) {
5879
await crawlState.addSeedFileSeed(scopedSeed);
5980
logger.debug(
6081
"Pushed seed file seed to Redis",
@@ -80,7 +101,8 @@ export async function parseSeeds(
80101
}
81102
}
82103

83-
if (params.seedFile && seedFileDone) {
104+
// If seed file was already successfully parsed, re-add seeds from Redis
105+
if (params.seedFile && seedFileDone && crawlState) {
84106
const seedFileScopedSeeds = await crawlState.getSeedFileSeeds();
85107
for (const seed of seedFileScopedSeeds) {
86108
logger.debug(
@@ -105,7 +127,7 @@ export async function parseSeeds(
105127
logger.fatal("No valid seeds specified, aborting crawl");
106128
}
107129

108-
if (params.seedFile) {
130+
if (params.seedFile && crawlState) {
109131
await crawlState.markSeedFileDone();
110132
}
111133

src/util/state.ts

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -855,13 +855,6 @@ return inx;
855855
await this.redis.set(this.dkey, state.finished.length);
856856
}
857857

858-
if (state.seedFileSeeds) {
859-
for (const seed of state.seedFileSeeds) {
860-
const scopedSeed: ScopedSeed = JSON.parse(seed);
861-
await this.addSeedFileSeed(scopedSeed);
862-
}
863-
}
864-
865858
if (state.extraSeeds) {
866859
const origLen = seeds.length;
867860

0 commit comments

Comments
 (0)