Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .eslintrc.cjs
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ module.exports = {
},
],
"@typescript-eslint/no-floating-promises": "error",
"@typescript-eslint/await-thenable": "error"
"@typescript-eslint/await-thenable": "error",
},
reportUnusedDisableDirectives: true,
};
2 changes: 1 addition & 1 deletion .github/workflows/ci.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -87,4 +87,4 @@ jobs:
run: |
sudo rm -rf ./test-crawls
mkdir test-crawls
sudo CI=true yarn test ./tests/saved-state.test.js ./tests/qa_compare.test.js
sudo CI=true yarn test ./tests/saved-state.test.ts ./tests/qa_compare.test.ts
18 changes: 18 additions & 0 deletions jest.config.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
/** @type {import("ts-jest").JestConfigWithTsJest} **/
export default {
testEnvironment: "node",
extensionsToTreatAsEsm: [".ts"],
moduleNameMapper: {
"^(\\.{1,2}/.*)\\.js$": "$1",
},
transform: {
"^.+\\.tsx?$": [
"ts-jest",
{
useESM: true,
tsconfig: "tsconfig.test.json",
},
],
},
testTimeout: 90000,
};
10 changes: 5 additions & 5 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -48,9 +48,11 @@
},
"devDependencies": {
"@types/get-folder-size": "^3.0.4",
"@types/jest": "^30.0.0",
"@types/js-levenshtein": "^1.1.3",
"@types/js-yaml": "^4.0.8",
"@types/node": "^20.8.7",
"@types/md5": "^2.3.6",
"@types/node": "^25.5.0",
"@types/pixelmatch": "^5.2.6",
"@types/pngjs": "^6.0.4",
"@types/sax": "^1.2.7",
Expand All @@ -67,12 +69,10 @@
"md5": "^2.3.0",
"prettier": "3.0.3",
"puppeteer": "^24.4.0",
"ts-jest": "^29.4.9",
"type-fest": "^5.5.0",
"typescript": "^5.5.4"
},
"jest": {
"transform": {},
"testTimeout": 90000
},
"resolutions": {
"wrap-ansi": "7.0.0",
"warcio": "^2.4.9",
Expand Down
7 changes: 5 additions & 2 deletions src/util/storage.ts
Original file line number Diff line number Diff line change
Expand Up @@ -276,7 +276,7 @@ export async function checkDiskUtilization(
collDir: string,
params: CrawlerArgs,
archiveDirSize: number,
dfOutput = null,
dfOutput: string | null = null,
doLog = true,
) {
const diskUsage: Record<string, string> = await getDiskUsage(
Expand Down Expand Up @@ -350,7 +350,10 @@ export async function getDFOutput(path: string) {
return res.stdout;
}

export async function getDiskUsage(path = "/crawls", dfOutput = null) {
export async function getDiskUsage(
path = "/crawls",
dfOutput: string | null = null,
) {
const result = dfOutput || (await getDFOutput(path));
const lines = result.split("\n");
const keys = lines[0].split(/\s+/gi);
Expand Down
Binary file removed tests/.DS_Store
Binary file not shown.
12 changes: 8 additions & 4 deletions tests/adblockrules.test.js → tests/adblockrules.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,17 +2,21 @@ import child_process from "child_process";
import fs from "fs";
import yaml from "js-yaml";

function runCrawl(name, config, commandExtra = "") {
function runCrawl(
name: string,
config: Record<string, unknown>,
commandExtra = "",
) {
config.generateCDX = true;
config.depth = 0;
config.collection = name;

const configYaml = yaml.dump(config);

try {
const proc = child_process.execSync(
child_process.execSync(
`docker run -i -v $PWD/test-crawls:/crawls webrecorder/browsertrix-crawler crawl --config stdin ${commandExtra}`,
{ input: configYaml, stdin: "inherit", encoding: "utf8" },
{ input: configYaml, stdio: ["pipe", "ignore", "ignore"], encoding: "utf8" },
);

//console.log(proc);
Expand All @@ -21,7 +25,7 @@ function runCrawl(name, config, commandExtra = "") {
}
}

function doesCDXContain(coll, value) {
function doesCDXContain(coll: string, value: string) {
const data = fs.readFileSync(
`test-crawls/collections/${coll}/indexes/index.cdxj`,
);
Expand Down
39 changes: 26 additions & 13 deletions tests/add-exclusion.test.js → tests/add-exclusion.test.ts
Original file line number Diff line number Diff line change
@@ -1,32 +1,46 @@
import { exec } from "child_process";
import { exec, ExecException } from "child_process";
import Redis from "ioredis";

function sleep(ms) {
return new Promise((resolve) => setTimeout(resolve, ms));
}
import { sleep } from "./utils";

test("dynamically add exclusion while crawl is running", async () => {
let callback = null;
let callback:
| ((
error: ExecException | null,
stdout: NonSharedBuffer,
stderr: NonSharedBuffer,
) => void)
| null = null;

const p = new Promise((resolve) => {
callback = (error, stdout, stderr) => {
resolve({ error, stdout, stderr });
const p = new Promise<{
error: ExecException | null;
stdout: NonSharedBuffer;
stderr: NonSharedBuffer;
}>((resolve) => {
callback = (
error: ExecException | null,
stdout: NonSharedBuffer,
stderr: NonSharedBuffer,
) => {
resolve({ error, stdout, stderr } as const);
};
});

try {
exec(
"docker run -p 36382:6379 -e CRAWL_ID=test -v $PWD/test-crawls:/crawls -v $PWD/tests/fixtures:/tests/fixtures webrecorder/browsertrix-crawler crawl --collection add-exclusion --url https://old.webrecorder.net/ --scopeType prefix --limit 20 --logging debug --debugAccessRedis",
{ shell: "/bin/bash" },
callback,
{ shell: "/bin/bash", encoding: "buffer" },
callback!,
);
} catch (error) {
console.log(error);
}

await sleep(3000);

const redis = new Redis("redis://127.0.0.1:36382/0", { lazyConnect: true, retryStrategy: () => null })
const redis = new Redis("redis://127.0.0.1:36382/0", {
lazyConnect: true,
retryStrategy: () => null,
});

await redis.connect();

Expand All @@ -53,4 +67,3 @@ test("dynamically add exclusion while crawl is running", async () => {

expect(stdout.indexOf("Removing excluded URL") > 0).toBe(true);
});

10 changes: 4 additions & 6 deletions tests/basic_crawl.test.js → tests/basic_crawl.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,7 @@ import child_process from "child_process";
import fs from "fs";
import path from "path";
import md5 from "md5";

const doValidate = process.argv.filter((x) => x.startsWith('-validate'))[0];
const testIf = (condition, ...args) => condition ? test(...args) : test.skip(...args);
import { doValidate, testIf } from "./utils";

test("ensure basic crawl run with docker run passes", async () => {
child_process.execSync(
Expand Down Expand Up @@ -38,9 +36,9 @@ test("check that individual WARCs have correct prefix and are under rollover siz

test("check that a combined warc file exists in the archive folder", () => {
const warcLists = fs.readdirSync("test-crawls/collections/wr-net");
var captureFound = 0;
let captureFound = 0;

for (var i = 0; i < warcLists.length; i++) {
for (let i = 0; i < warcLists.length; i++) {
if (warcLists[i].endsWith("_0.warc.gz")) {
captureFound = 1;
}
Expand All @@ -54,7 +52,7 @@ test("check that a combined warc file is under the rolloverSize", () => {
);
let rolloverSize = 0;

function getFileSize(filename) {
function getFileSize(filename: string) {
return fs.statSync(filename).size;
}

Expand Down
Loading
Loading