Skip to content

Commit f461147

Browse files
authored
feat: Extract screenshots from CWS and Firefox addons (#3)
1 parent 74249c1 commit f461147

14 files changed

+189
-7
lines changed

.gitignore

+4
Original file line numberDiff line numberDiff line change
@@ -18,3 +18,7 @@ tsconfig.tsbuildinfo
1818
*.njsproj
1919
*.sln
2020
*.sw?
21+
22+
# .env files
23+
.env
24+
.env.*

bun.lockb

340 Bytes
Binary file not shown.

package.json

+2-1
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,8 @@
1919
"dataloader": "^2.2.2",
2020
"graphql": "^16.8.0",
2121
"linkedom": "^0.15.3",
22-
"picocolors": "^1.0.0"
22+
"picocolors": "^1.0.0",
23+
"radix3": "^1.1.2"
2324
},
2425
"devDependencies": {
2526
"@aklinker1/check": "^1.2.0",

src/apis/firefox-api.ts

+8
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import consola from "consola";
2+
import { buildScreenshotUrl } from "../utils/urls";
23

34
export function createFirefoxApiClient() {
45
return {
@@ -29,6 +30,13 @@ export function createFirefoxApiClient() {
2930
storeUrl: json.url,
3031
version: json.current_version.version,
3132
dailyActiveUsers: json.average_daily_users,
33+
screenshots: (json.previews as any[]).map<Gql.Screenshot>(
34+
(preview, i) => ({
35+
index: i,
36+
rawUrl: preview.image_url,
37+
indexUrl: buildScreenshotUrl("firefox-addons", json.id, i),
38+
}),
39+
),
3240
};
3341
},
3442
};

src/crawlers/__tests__/chrome-crawler.test.ts

+14
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,20 @@ describe("Chrome Web Store Crawler", () => {
2222
"https://chromewebstore.google.com/detail/github-better-line-counts/ocfdgncpifmegplaglcnglhioflaimkd",
2323
version: expect.any(String),
2424
weeklyActiveUsers: expect.any(Number),
25+
screenshots: [
26+
{
27+
index: 0,
28+
indexUrl:
29+
"http://localhost:3000/api/rest/chrome-extensions/ocfdgncpifmegplaglcnglhioflaimkd/screenshots/0",
30+
rawUrl: expect.any(String),
31+
},
32+
{
33+
index: 1,
34+
indexUrl:
35+
"http://localhost:3000/api/rest/chrome-extensions/ocfdgncpifmegplaglcnglhioflaimkd/screenshots/1",
36+
rawUrl: expect.any(String),
37+
},
38+
],
2539
});
2640
});
2741
});

src/crawlers/chrome-crawler.ts

+26-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import consola from "consola";
22
import { HTMLAnchorElement, HTMLElement, parseHTML } from "linkedom";
3+
import { buildScreenshotUrl } from "../utils/urls";
34

45
export async function crawlExtension(
56
id: string,
@@ -21,7 +22,7 @@ export async function crawlExtension(
2122
const { document } = parseHTML(html);
2223

2324
// Uncomment to debug HTML
24-
// Bun.write("chrome.html", document.documentElement.outerHTML);
25+
Bun.write("chrome.html", document.documentElement.outerHTML);
2526

2627
// Basic metadata
2728
const name = metaContent(document, "property=og:title")?.replace(
@@ -106,6 +107,23 @@ export async function crawlExtension(
106107
// const rating = extractNumber(ratingDiv.title); // "Average rating: 4.78 stars"
107108
// const reviewCount = extractNumber(ratingDiv.textContent); // "(1024)"
108109

110+
// <div
111+
// aria-label="Item media 1 screenshot"
112+
// data-media-url="https://lh3.googleusercontent.com/GUgh0ThX2FDPNvbaumYl4DqsUhsbYiCe-Hut9FoVEnkmTrXyA-sHbMk5jmZTj_t-dDP8rAmy6X6a6GNTCn9F8zo4VYU"
113+
// data-is-video="false"
114+
// data-slide-index="0"
115+
// >
116+
const screenshots = [...document.querySelectorAll("div[data-media-url]")]
117+
.filter((div) => div.getAttribute("data-is-video") === "false")
118+
.map<Gql.Screenshot>((div) => {
119+
const index = Number(div.getAttribute("data-slide-index") || -1);
120+
return {
121+
index,
122+
rawUrl: div.getAttribute("data-media-url") + "=s1280", // "s1280" gets the full resolution
123+
indexUrl: buildScreenshotUrl("chrome-extensions", id, index),
124+
};
125+
});
126+
109127
if (name == null) return;
110128
if (storeUrl == null) return;
111129
if (iconUrl == null) return;
@@ -114,6 +132,12 @@ export async function crawlExtension(
114132
if (version == null) return;
115133
if (shortDescription == null) return;
116134
if (longDescription == null) return;
135+
if (
136+
screenshots.some(
137+
(screenshot) => screenshot.index === -1 || !screenshot.rawUrl,
138+
)
139+
)
140+
return;
117141

118142
const result: Gql.ChromeExtension = {
119143
id,
@@ -127,6 +151,7 @@ export async function crawlExtension(
127151
longDescription,
128152
rating,
129153
reviewCount,
154+
screenshots,
130155
};
131156
consola.debug("Crawl results:", result);
132157
return result;

src/rest/getChromeScreenshot.ts

+15
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
import type { ChromeService } from "../services/chrome-service";
2+
import { RouteHandler } from "../utils/rest-router";
3+
4+
export const getChromeScreenshot =
5+
(chrome: ChromeService): RouteHandler<{ id: string; index: string }> =>
6+
async (params) => {
7+
const extension = await chrome.getExtension(params.id);
8+
const index = Number(params.index);
9+
const screenshot = extension?.screenshots.find(
10+
(screenshot) => screenshot.index == index,
11+
);
12+
13+
if (screenshot == null) return new Response(null, { status: 404 });
14+
return Response.redirect(screenshot.rawUrl);
15+
};

src/rest/getFirefoxScreenshot.ts

+15
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
import type { FirefoxService } from "../services/firefox-service";
2+
import { RouteHandler } from "../utils/rest-router";
3+
4+
export const getFirefoxScreenshot =
5+
(firefox: FirefoxService): RouteHandler<{ id: string; index: string }> =>
6+
async (params) => {
7+
const addon = await firefox.getAddon(params.id);
8+
const index = Number(params.index);
9+
const screenshot = addon?.screenshots.find(
10+
(screenshot) => screenshot.index == index,
11+
);
12+
13+
if (screenshot == null) return new Response(null, { status: 404 });
14+
return Response.redirect(screenshot.rawUrl);
15+
};

src/schema.gql

+17
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ type ChromeExtension {
3333
lastUpdated: String!
3434
rating: Float
3535
reviewCount: Int
36+
screenshots: [Screenshot!]!
3637
}
3738

3839
type FirefoxAddon {
@@ -47,4 +48,20 @@ type FirefoxAddon {
4748
lastUpdated: String!
4849
rating: Float
4950
reviewCount: Int
51+
screenshots: [Screenshot!]!
52+
}
53+
54+
type Screenshot {
55+
"""
56+
The screenshot's order.
57+
"""
58+
index: Int!
59+
"""
60+
The image's raw URL provided by the service. When screenshots are updated, this URL changes.
61+
"""
62+
rawUrl: String!
63+
"""
64+
URL to the image based on the index. If the raw URL changes, the `indexUrl` will remain constant, good for links in README.md files.
65+
"""
66+
indexUrl: String!
5067
}

src/server.ts

+22-1
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,10 @@ import playgroundHtmlTemplate from "./public/playground.html";
55
import consola from "consola";
66
import { createChromeService } from "./services/chrome-service";
77
import { createFirefoxService } from "./services/firefox-service";
8+
import { createRestRouter } from "./utils/rest-router";
9+
import { getChromeScreenshot } from "./rest/getChromeScreenshot";
10+
import { getFirefoxScreenshot } from "./rest/getFirefoxScreenshot";
11+
import { SERVER_ORIGIN } from "./utils/urls";
812

913
const playgroundHtml = playgroundHtmlTemplate.replace(
1014
"{{VERSION}}",
@@ -22,6 +26,16 @@ export function createServer(config?: ServerConfig) {
2226
firefox,
2327
});
2428

29+
const restRouter = createRestRouter()
30+
.get(
31+
"/api/rest/chrome-extensions/:id/screenshots/:index",
32+
getChromeScreenshot(chrome),
33+
)
34+
.get(
35+
"/api/rest/firefox-addons/:id/screenshots/:index",
36+
getFirefoxScreenshot(firefox),
37+
);
38+
2539
const httpServer = Bun.serve({
2640
port,
2741
error(request) {
@@ -32,8 +46,15 @@ export function createServer(config?: ServerConfig) {
3246
return createResponse(undefined, { status: 204 });
3347
}
3448

49+
const url = new URL(req.url, SERVER_ORIGIN);
50+
51+
// REST
52+
if (url.pathname.startsWith("/api/rest")) {
53+
return restRouter.fetch(url, req);
54+
}
55+
3556
// GraphQL
36-
if (req.url.endsWith("/api")) {
57+
if (url.pathname.startsWith("/api")) {
3758
const data = await graphql.evaluateQuery(req);
3859

3960
return createResponse(JSON.stringify(data), {

src/services/chrome-service.ts

+7-2
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,11 @@ export function createChromeService() {
1616
});
1717

1818
return {
19-
getExtension: (id: string) => loader.load(id),
20-
getExtensions: async (ids: string[]) => {
19+
getExtension: (id: string): Promise<Gql.ChromeExtension | undefined> =>
20+
loader.load(id),
21+
getExtensions: async (
22+
ids: string[],
23+
): Promise<Array<Gql.ChromeExtension | undefined>> => {
2124
const result = await loader.loadMany(ids);
2225
return result.map((item, index) => {
2326
if (item instanceof Error) {
@@ -29,3 +32,5 @@ export function createChromeService() {
2932
},
3033
};
3134
}
35+
36+
export type ChromeService = ReturnType<typeof createChromeService>;

src/services/firefox-service.ts

+7-2
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,11 @@ export function createFirefoxService() {
1111
>(HOUR_MS, (ids) => Promise.all(ids.map((id) => firefox.getAddon(id))));
1212

1313
return {
14-
getAddon: (id: string | number) => loader.load(id),
15-
getAddons: async (ids: Array<string | number>) => {
14+
getAddon: (id: string | number): Promise<Gql.FirefoxAddon | undefined> =>
15+
loader.load(id),
16+
getAddons: async (
17+
ids: Array<string | number>,
18+
): Promise<Array<Gql.FirefoxAddon | undefined>> => {
1619
const result = await loader.loadMany(ids);
1720
return result.map((item) => {
1821
if (item == null) return undefined;
@@ -25,3 +28,5 @@ export function createFirefoxService() {
2528
},
2629
};
2730
}
31+
32+
export type FirefoxService = ReturnType<typeof createFirefoxService>;

src/utils/rest-router.ts

+42
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
import * as radix3 from "radix3";
2+
3+
export type RouteHandler<TParams = {}> = (
4+
params: TParams,
5+
url: URL,
6+
req: Request,
7+
) => Response | Promise<Response>;
8+
9+
export interface Route {
10+
method: string;
11+
handler: RouteHandler;
12+
}
13+
14+
export function createRestRouter() {
15+
const r = radix3.createRouter<Route>();
16+
const router = {
17+
get(path: string, handler: RouteHandler<any>) {
18+
r.insert(path, { method: "GET", handler });
19+
return router;
20+
},
21+
post(path: string, handler: RouteHandler<any>) {
22+
r.insert(path, { method: "POST", handler });
23+
return router;
24+
},
25+
any(path: string, handler: RouteHandler<any>) {
26+
r.insert(path, { method: "ANY", handler });
27+
return router;
28+
},
29+
on(method: string, path: string, handler: RouteHandler<any>) {
30+
r.insert(path, { method, handler });
31+
return router;
32+
},
33+
async fetch(url: URL, req: Request): Promise<Response> {
34+
const match = r.lookup(url.pathname);
35+
if (match && (req.method === match.method || match.method === "ANY")) {
36+
return await match.handler(match.params ?? {}, url, req);
37+
}
38+
return new Response(null, { status: 404 });
39+
},
40+
};
41+
return router;
42+
}

src/utils/urls.ts

+10
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
export const SERVER_ORIGIN =
2+
process.env.SERVER_ORIGIN ?? "http://localhost:3000";
3+
4+
export function buildScreenshotUrl(
5+
base: "chrome-extensions" | "firefox-addons",
6+
id: string,
7+
index: number,
8+
) {
9+
return `${SERVER_ORIGIN}/api/rest/${base}/${id}/screenshots/${index}`;
10+
}

0 commit comments

Comments
 (0)