Skip to content

Commit

Permalink
Cookies improvements (#9)
Browse files Browse the repository at this point in the history
* don't set cookies as extra headers

* fix typo

* use puppeteer-proxy (based on got) instead of node-fetch

* minor improvements

* version++
  • Loading branch information
mxsnq authored Nov 24, 2020
1 parent 20c1ecd commit 34b66d8
Show file tree
Hide file tree
Showing 12 changed files with 472 additions and 480 deletions.
17 changes: 0 additions & 17 deletions helpers/fetcher.js

This file was deleted.

68 changes: 28 additions & 40 deletions helpers/utils.js
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
const fetch = require('./fetcher');
const { proxyRequest } = require('puppeteer-proxy');
const PAGE_PROXY_URL_KEY = 'puppeteer-service-proxy-url'

async function findContextInBrowser(browser, contextId) {

Expand Down Expand Up @@ -67,37 +68,12 @@ async function newPage(context) {
await page.setRequestInterception(true);

// This is request interception in order to make request through proxies
page.on('request', async interceptedRequest => {
const schemaType = new URL(interceptedRequest.url()).protocol;

if ('puppeteer-service-proxy-url' in interceptedRequest.headers() && ['http:', 'https:'].indexOf(schemaType) !== -1) {
const options = {
method: interceptedRequest.method(),
headers: interceptedRequest.headers(),
body: interceptedRequest.postData(),
};

let proxy = options.headers['puppeteer-service-proxy-url'];
delete options.headers['puppeteer-service-proxy-url'];

fetch(interceptedRequest.url(), options, proxy)
.then(async (response) => {
interceptedRequest.respond({
status: response.statusCode,
contentType: response.headers['content-type'],
headers: response.headers,
body: response.body,
});
})
.catch((err) => {
interceptedRequest.respond({
status: 404,
body: err.stack,
});
});

page.on('request', async request => {
const { [PAGE_PROXY_URL_KEY]: proxyUrl } = page;
if (proxyUrl) {
proxyRequest({ page, proxyUrl, request });
} else {
interceptedRequest.continue();
request.continue();
}
});

Expand Down Expand Up @@ -126,23 +102,35 @@ exports.getBrowserPage = async function getBrowserPage(browser, contextId, pageI
}
};

exports.perfomAction = async function perfomAction(request, action) {
let lock = request.app.get('lock');
let page = await exports.getBrowserPage(request.app.get('browser'), request.query.contextId, request.query.pageId);
exports.performAction = async function performAction(request, action) {
const { contextId, pageId } = request.query;
const lock = request.app.get('lock');
const page = await exports.getBrowserPage(request.app.get('browser'), contextId, pageId);
return lock.acquire(await page._target._targetId, async () => {

let extra_headers = {};
let extraHeaders = {};

if ('body' in request && 'headers' in request.body) {
extra_headers = { ...request.body.headers };
extraHeaders = { ...request.body.headers };
}

if ('body' in request && 'proxy' in request.body) {
extra_headers['puppeteer-service-proxy-url'] = request.body.proxy
// TODO maybe we should map page ids to proxies instead
page[PAGE_PROXY_URL_KEY] = request.body.proxy;
}

if ('cookie' in extraHeaders) {
// TODO set cookies from request body like headers
const url = request.body.url || page.url()
const cookies = extraHeaders.cookie.split(';').map(s => {
const [name, value] = s.trim().split(/=(.*)/, 2);
return { name, value, url };
});
delete extraHeaders.cookie;
await page.setCookie(...cookies);
}

if (Object.keys(extra_headers).length !== 0) {
await page.setExtraHTTPHeaders(extra_headers);
if (Object.keys(extraHeaders).length !== 0) {
await page.setExtraHTTPHeaders(extraHeaders);
}

return await action(page, request);
Expand Down
7 changes: 3 additions & 4 deletions package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "scrapy-puppeteer-service",
"version": "0.0.4",
"version": "0.0.5",
"private": true,
"scripts": {
"start": "node ./bin/www"
Expand All @@ -12,9 +12,8 @@
"debug": "~2.6.9",
"express": "~4.16.1",
"morgan": "~1.9.1",
"node-fetch": "^2.6.1",
"npm-run-all": "^4.1.5",
"proxy-agent": "^3.1.1",
"puppeteer": "^1.19.0"
"puppeteer": "^3.1.0",
"puppeteer-proxy": "^2.1.1"
}
}
2 changes: 1 addition & 1 deletion routes/action.js
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ router.post('/', async function (req, res, next) {
throw new Error("Invalid action function");
}

let response = await utils.perfomAction(req, action);
let response = await utils.performAction(req, action);
res.header('scrapy-puppeteer-service-context-id', response.contextId);
res.send(response);
} catch (e) {
Expand Down
2 changes: 1 addition & 1 deletion routes/click.js
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ router.post('/', async function (req, res, next) {
}

try {
let response = await utils.perfomAction(req, action);
let response = await utils.performAction(req, action);
res.header('scrapy-puppeteer-service-context-id', response.contextId);
res.send(response)
} catch (e) {
Expand Down
2 changes: 1 addition & 1 deletion routes/goback.js
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ router.post('/', async function (req, res, next) {
}

try {
let response = await utils.perfomAction(req, action);
let response = await utils.performAction(req, action);
res.header('scrapy-puppeteer-service-context-id', response.contextId);
res.send(response)
} catch (e) {
Expand Down
2 changes: 1 addition & 1 deletion routes/goforward.js
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ router.post('/', async function (req, res, next) {
}

try {
let response = await utils.perfomAction(req, action);
let response = await utils.performAction(req, action);
res.header('scrapy-puppeteer-service-context-id', response.contextId);
res.send(response)
} catch (e) {
Expand Down
2 changes: 1 addition & 1 deletion routes/goto.js
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ router.post('/', async function (req, res, next) {
}

try {
let response = await utils.perfomAction(req, action);
let response = await utils.performAction(req, action);
res.header('scrapy-puppeteer-service-context-id', response.contextId);
res.send(response);
} catch (e) {
Expand Down
2 changes: 1 addition & 1 deletion routes/har.js
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ router.post('/', async function (req, res, next) {
}

try {
let response = await utils.perfomAction(req, action);
let response = await utils.performAction(req, action);
res.header('scrapy-puppeteer-service-context-id', response.contextId);
res.send(response);
} catch (e) {
Expand Down
2 changes: 1 addition & 1 deletion routes/screenshot.js
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ async function action(page, request) {
router.post('/', async function (req, res, next) {

try {
let response = await utils.perfomAction(req, action);
let response = await utils.performAction(req, action);
res.header('scrapy-puppeteer-service-context-id', response.contextId);
res.send(response);
} catch (e) {
Expand Down
2 changes: 1 addition & 1 deletion routes/scroll.js
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ async function action(page, request) {
router.post('/', async function (req, res, next) {

try {
let response = await utils.perfomAction(req, action);
let response = await utils.performAction(req, action);
res.header('scrapy-puppeteer-service-context-id', response.contextId);
res.send(response);
} catch (e) {
Expand Down
Loading

0 comments on commit 34b66d8

Please sign in to comment.