Skip to content

Commit 898cc54

Browse files
committed
fix: improve error handling in course scraping and syllabus fetching
1 parent 242cdf9 commit 898cc54

File tree

2 files changed

+36
-8
lines changed

2 files changed

+36
-8
lines changed

src/index.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,8 @@ const APIHandler = {
5959
try {
6060
const cache = await scrapeArchivedCourses(env, semester);
6161
await scrapeSyllabus(env, semester, cache);
62+
// await syncCoursesToAlgolia(env, semester);
63+
console.log("Scheduled tasks completed successfully.");
6264
resolve(void 0);
6365
} catch (error) {
6466
console.error("Error during scheduled task:", error);

src/scheduled/syllabus.ts

Lines changed: 34 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,11 @@ export const scrapeArchivedCourses = async (env: Env, semester: string) => {
5151
.then((res) => res.arrayBuffer())
5252
.then((arrayBuffer) =>
5353
new TextDecoder("big5").decode(new Uint8Array(arrayBuffer)),
54-
);
54+
)
55+
.catch((error) => {
56+
console.error("Failed to fetch landing page:", error);
57+
throw new Error("Failed to fetch landing page, please try again later");
58+
});
5559

5660

5761
// search for the text https://www.ccxp.nthu.edu.tw/ccxp/INQUIRE/JH/mod/auth_img/auth_img.php?ACIXSTORE=643u4hfveif4and3kbudjqusu7
@@ -62,7 +66,11 @@ export const scrapeArchivedCourses = async (env: Env, semester: string) => {
6266
const acixStore = acixStoreMatch[1];
6367

6468
const ocrResults = await fetch(`https://ocr.nthumods.com/?url=https://www.ccxp.nthu.edu.tw/ccxp/INQUIRE/JH/mod/auth_img/auth_img.php?ACIXSTORE=${acixStore}&d=3`)
65-
.then((res) => res.text());
69+
.then((res) => res.text())
70+
.catch((error) => {
71+
console.error("Failed to fetch OCR results:", error);
72+
throw new Error("Failed to fetch OCR results, please try again later");
73+
})
6674
if (ocrResults.length != 3) {
6775
throw new Error("OCR results are not valid, please try again later");
6876
}
@@ -154,7 +162,11 @@ export const scrapeArchivedCourses = async (env: Env, semester: string) => {
154162
}),
155163
method: "POST",
156164
cf: { cacheTtl: 0 }
157-
});
165+
})
166+
.catch((error) => {
167+
console.error(`Failed to fetch courses for ${department.code} ${yearSemester}:`, error);
168+
throw new Error(`Failed to fetch courses for ${department.code} ${yearSemester}, please try again later`);
169+
});
158170
return response;
159171
};
160172

@@ -168,7 +180,10 @@ export const scrapeArchivedCourses = async (env: Env, semester: string) => {
168180
.then((res) => res.arrayBuffer())
169181
.then((arrayBuffer) =>
170182
new TextDecoder("big5").decode(new Uint8Array(arrayBuffer)),
171-
);
183+
)
184+
.catch((error) => {
185+
console.error(`Failed to fetch courses for ${department.code} ${semester}:`, error);
186+
});
172187
const doc = parseHTML(text).document;
173188

174189

@@ -393,7 +408,14 @@ const downloadPDF = async (env: Env, url: string, c_key: string) => {
393408
//get url+c_key file as a arrayBuffer
394409
const file = await fetch(url, { cf: { cacheTtl: 0 } })
395410
.then((res) => res.arrayBuffer())
396-
.then((arrayBuffer) => Buffer.from(arrayBuffer));
411+
.then((arrayBuffer) => Buffer.from(arrayBuffer))
412+
.catch((error) => {
413+
console.error(`Failed to download PDF for ${c_key}:`, error);
414+
});
415+
if (!file) {
416+
console.error(`No file found for ${c_key}, skipping...`);
417+
return;
418+
}
397419
//save file to local fs
398420
// await fs.writeFileSync(c_key + '.pdf', file)
399421
await supabaseWithEnv(env.SUPABASE_URL, env.SUPABASE_SERVICE_ROLE_KEY).storage
@@ -477,7 +499,11 @@ export const scrapeSyllabus = async (
477499
.then((res) => res.arrayBuffer())
478500
.then((arrayBuffer) =>
479501
new TextDecoder("big5").decode(new Uint8Array(arrayBuffer)),
480-
);
502+
)
503+
.catch((error) => {
504+
console.error(`Failed to fetch syllabus for ${c_key}:`, error);
505+
return "";
506+
});
481507
return text;
482508
}; const courses = cachedCourses ?? await fetchCourses();
483509

@@ -521,7 +547,7 @@ export const scrapeSyllabus = async (
521547
};
522548

523549
// Process courses with concurrency limit of 50
524-
const concurrencyLimit = 20;
550+
const concurrencyLimit = 50;
525551
for (let i = 0; i < courses.length; i += concurrencyLimit) {
526552
const batch = courses.slice(i, i + concurrencyLimit);
527553
await Promise.all(batch.map(processCourse));
@@ -567,7 +593,7 @@ export const syncCoursesToAlgolia = async (env: Env, semester: string) => {
567593
console.log(`Saved ${algoliaChunk.length} courses to Algolia, taskID: ${taskIDs}`);
568594
})
569595
.catch((error) => {
570-
console.error("Error saving courses to Algolia:");
596+
console.error("Error saving courses to Algolia:", error);
571597
});
572598
}
573599

0 commit comments

Comments
 (0)