Skip to content

Commit

Permalink
fix(WebScraper/sitemap): await urlsHandler to fix race condition
Browse files Browse the repository at this point in the history
  • Loading branch information
mogery committed Dec 30, 2024
1 parent 8ae34a0 commit 71a8f74
Show file tree
Hide file tree
Showing 2 changed files with 6 additions and 2 deletions.
2 changes: 1 addition & 1 deletion apps/api/src/scraper/WebScraper/crawler.ts
Original file line number Diff line number Diff line change
Expand Up @@ -506,7 +506,7 @@ export class WebCrawler {
// Get all links from the main domain's sitemap
sitemapCount += await getLinksFromSitemap(
{ sitemapUrl: mainDomainSitemapUrl, urlsHandler(urls) {
urlsHandler(urls.filter(link => {
return urlsHandler(urls.filter(link => {
try {
const linkUrl = new URL(link);
return linkUrl.hostname.endsWith(hostname);
Expand Down
6 changes: 5 additions & 1 deletion apps/api/src/scraper/WebScraper/sitemap.ts
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,11 @@ export async function getLinksFromSitemap(
)
.map((url) => url.loc[0]);
count += validUrls.length;
urlsHandler(validUrls);

const h = urlsHandler(validUrls);
if (h instanceof Promise) {
await h;
}
}

return count;
Expand Down

0 comments on commit 71a8f74

Please sign in to comment.