Skip to content

Commit fd5cd71

Browse files
authored
New Action in Scrapy-puppeteer-service (RecaptchaSolver) added (#31)
* Added new action (recaptcha_solver) * Fixed dependencies and removed extra code * Implemented RecaptchaSolver, added comments and description. Changed 2captcha token name. Started to think about fingerprints in browser. * Implemented RecaptchaSolver, added comments and description. Changed 2captcha token name. Added StealthPlugin * HotFix: fixed token_2captcha environment variable name * Deleted extra properties, handled "no tekoen_2captcha" error (scapy-puppeteer)
1 parent 31be376 commit fd5cd71

File tree

5 files changed

+477
-7
lines changed

5 files changed

+477
-7
lines changed

app.js

+33-2
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,8 @@
11
const express = require('express');
2-
const puppeteer = require('puppeteer');
2+
const puppeteer = require('puppeteer-extra')
3+
4+
const RecaptchaPlugin = require('puppeteer-extra-plugin-recaptcha')
5+
const StealthPlugin = require('puppeteer-extra-plugin-stealth')
36
const cookieParser = require('cookie-parser');
47
const logger = require('morgan');
58
const bodyParser = require('body-parser');
@@ -13,6 +16,7 @@ const clickRouter = require('./routes/click');
1316
const actionRouter = require('./routes/action');
1417
const scrollRouter = require('./routes/scroll');
1518
const screenshotRouter = require('./routes/screenshot');
19+
const recaptchaSolverRouter = require('./routes/recaptcha_solver')
1620
const mhtmlRouter = require('./routes/mhtml');
1721
const harRouter = require('./routes/har');
1822
const closeContextRouter = require('./routes/close_context');
@@ -23,8 +27,35 @@ const HEADLESS = (process.env.HEADLESS || "true").toLowerCase() === "true";
2327
const CONNECT_TIMEOUT = parseInt(process.env.CONNECT_TIMEOUT) || 180000;
2428
const VIEWPORT_WIDTH = parseInt(process.env.VIEWPORT_WIDTH) || 1280;
2529
const VIEWPORT_HEIGHT = parseInt(process.env.VIEWPORT_HEIGHT) || 720;
30+
const TOKEN_2CAPTCHA = process.env.TOKEN_2CAPTCHA;
31+
const STEALTH_BROWSING = (process.env.STEALTH_BROWSING || "true").toLowerCase() === "true";
2632

2733
async function setupBrowser() {
34+
try {
35+
if (TOKEN_2CAPTCHA) { // If token is given then RecapcthaPlugin is activated
36+
puppeteer.use(
37+
RecaptchaPlugin({
38+
provider: {
39+
id: '2captcha',
40+
token: TOKEN_2CAPTCHA
41+
}
42+
})
43+
)
44+
}
45+
} catch (error) {
46+
console.error('Failed to proceed 2captcha token:', error);
47+
process.exit(1);
48+
}
49+
50+
try {
51+
if (STEALTH_BROWSING) { // Activate or not StealthPlugin
52+
puppeteer.use(StealthPlugin());
53+
}
54+
} catch (error) {
55+
console.error('Failed to enable StealthPlugin:', error);
56+
process.exit(1);
57+
}
58+
2859
try {
2960
//TODO add more params for puppeteer launch
3061
const browser = await puppeteer.launch(
@@ -36,7 +67,6 @@ async function setupBrowser() {
3667
browser.on('disconnected', setupBrowser);
3768
app.set('browser', browser);
3869
} catch (error) {
39-
console.error('Failed to start browser:', error);
4070
process.exit(1);
4171
}
4272
}
@@ -60,6 +90,7 @@ app.use('/click', clickRouter);
6090
app.use('/action', actionRouter);
6191
app.use('/scroll', scrollRouter);
6292
app.use('/screenshot', screenshotRouter);
93+
app.use('/recaptcha_solver', recaptchaSolverRouter);
6394
app.use('/mhtml', mhtmlRouter);
6495
app.use('/har', harRouter);
6596
app.use('/close_context', closeContextRouter);

helpers/utils.js

+1
Original file line numberDiff line numberDiff line change
@@ -128,6 +128,7 @@ exports.getBrowserPage = async function getBrowserPage(browser, request) {
128128
return newPage(context);
129129
}
130130
const { origin: proxyServer, username, password } = new URL(proxy);
131+
131132
const context = await browser.createIncognitoBrowserContext({ proxyServer });
132133
context[PROXY_URL_KEY] = proxy;
133134
const page = await newPage(context);

package.json

+4
Original file line numberDiff line numberDiff line change
@@ -11,9 +11,13 @@
1111
"cookie-parser": "~1.4.6",
1212
"debug": "~4.3.4",
1313
"express": "~4.17.3",
14+
"fingerprint-injector": "^2.1.30",
1415
"morgan": "~1.10.0",
1516
"npm-run-all": "^4.1.5",
1617
"puppeteer": "^20.1.2",
18+
"puppeteer-extra": "^3.3.6",
19+
"puppeteer-extra-plugin-recaptcha": "^3.6.8",
20+
"puppeteer-extra-plugin-stealth": "^2.11.2",
1721
"puppeteer-proxy": "^2.1.2"
1822
}
1923
}

routes/recaptcha_solver.js

+74
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
const express = require('express')
2+
const router = express.Router()
3+
const utils = require('../helpers/utils')
4+
5+
const DEFAULT_TIMEOUT = 1000; // 1 second
6+
7+
/*
8+
* This module introduces new ability to puppeteer-service.
9+
* It is capable of solving recaptchas on the given web-page.
10+
* If there is no recaptcha on the page nothing bad will happen.
11+
* If there is recaptcha it solves it and then inserts the special code
12+
* into the page automatically.
13+
*
14+
* Returns useful information about recaptcha_solving.
15+
* For more information about return value visit
16+
* https://github.com/berstend/puppeteer-extra/tree/master/packages/puppeteer-extra-plugin-recaptcha#result-object
17+
*/
18+
19+
/**
20+
*
21+
* @param page - page with possible recaptcha.
22+
* @param request - request to the page.
23+
*/
24+
25+
async function action(page, request) {
26+
27+
let recaptcha_data;
28+
29+
if (request.body.solve_recaptcha) {
30+
recaptcha_data = await page.solveRecaptchas();
31+
}
32+
else {
33+
recaptcha_data = await page.findRecaptchas();
34+
}
35+
36+
const waitOptions = request.body.waitOptions || { timeout: DEFAULT_TIMEOUT };
37+
return {
38+
...await utils.formResponse(page, request.query.closePage, waitOptions),
39+
recaptcha_data: recaptcha_data,
40+
}
41+
}
42+
43+
router.post('/', async function (req, res, next) {
44+
if (!req.query.contextId || !req.query.pageId) {
45+
res.status(400);
46+
res.send("No page in request");
47+
next();
48+
return;
49+
}
50+
51+
if (!process.env.TOKEN_2CAPTCHA) {
52+
res.status("501");
53+
res.send("TOKEN_2CAPTCHA is not provided!");
54+
next();
55+
return;
56+
}
57+
58+
if (!("solve_recaptcha" in req.body)) {
59+
res.status("400");
60+
res.send("No solve_recaptcha parameter in request");
61+
next();
62+
return;
63+
}
64+
65+
try {
66+
let response = await utils.performAction(req, action);
67+
res.header('scrapy-puppeteer-service-context-id', response.contextId);
68+
res.send(response)
69+
} catch (e) {
70+
next(e);
71+
}
72+
});
73+
74+
module.exports = router;

0 commit comments

Comments
 (0)