1
1
import consola from "consola" ;
2
2
import { HTMLAnchorElement , HTMLElement , parseHTML } from "linkedom" ;
3
+ import { buildScreenshotUrl } from "../utils/urls" ;
3
4
4
5
export async function crawlExtension (
5
6
id : string ,
@@ -21,7 +22,7 @@ export async function crawlExtension(
21
22
const { document } = parseHTML ( html ) ;
22
23
23
24
// Uncomment to debug HTML
24
- // Bun.write("chrome.html", document.documentElement.outerHTML);
25
+ Bun . write ( "chrome.html" , document . documentElement . outerHTML ) ;
25
26
26
27
// Basic metadata
27
28
const name = metaContent ( document , "property=og:title" ) ?. replace (
@@ -106,6 +107,23 @@ export async function crawlExtension(
106
107
// const rating = extractNumber(ratingDiv.title); // "Average rating: 4.78 stars"
107
108
// const reviewCount = extractNumber(ratingDiv.textContent); // "(1024)"
108
109
110
+ // <div
111
+ // aria-label="Item media 1 screenshot"
112
+ // data-media-url="https://lh3.googleusercontent.com/GUgh0ThX2FDPNvbaumYl4DqsUhsbYiCe-Hut9FoVEnkmTrXyA-sHbMk5jmZTj_t-dDP8rAmy6X6a6GNTCn9F8zo4VYU"
113
+ // data-is-video="false"
114
+ // data-slide-index="0"
115
+ // >
116
+ const screenshots = [ ...document . querySelectorAll ( "div[data-media-url]" ) ]
117
+ . filter ( ( div ) => div . getAttribute ( "data-is-video" ) === "false" )
118
+ . map < Gql . Screenshot > ( ( div ) => {
119
+ const index = Number ( div . getAttribute ( "data-slide-index" ) || - 1 ) ;
120
+ return {
121
+ index,
122
+ rawUrl : div . getAttribute ( "data-media-url" ) + "=s1280" , // "s1280" gets the full resolution
123
+ indexUrl : buildScreenshotUrl ( "chrome-extensions" , id , index ) ,
124
+ } ;
125
+ } ) ;
126
+
109
127
if ( name == null ) return ;
110
128
if ( storeUrl == null ) return ;
111
129
if ( iconUrl == null ) return ;
@@ -114,6 +132,12 @@ export async function crawlExtension(
114
132
if ( version == null ) return ;
115
133
if ( shortDescription == null ) return ;
116
134
if ( longDescription == null ) return ;
135
+ if (
136
+ screenshots . some (
137
+ ( screenshot ) => screenshot . index === - 1 || ! screenshot . rawUrl ,
138
+ )
139
+ )
140
+ return ;
117
141
118
142
const result : Gql . ChromeExtension = {
119
143
id,
@@ -127,6 +151,7 @@ export async function crawlExtension(
127
151
longDescription,
128
152
rating,
129
153
reviewCount,
154
+ screenshots,
130
155
} ;
131
156
consola . debug ( "Crawl results:" , result ) ;
132
157
return result ;
0 commit comments