diff --git a/src/modules/google.js b/src/modules/google.js
index 38ccad2..2d10422 100644
--- a/src/modules/google.js
+++ b/src/modules/google.js
@@ -13,13 +13,13 @@ class GoogleScraper extends Scraper {
const results = await this.page.evaluate(() => {
- let _text = (el, s) => {
+ let _text = (el, s, onlyFirstTextNode) => {
let n = el.querySelector(s);
if (n) {
- return n.innerText;
+ return (onlyFirstTextNode) ? n.childNodes[0].nodeValue : n.innerText;
} else {
- return '';
+ return;
}
};
@@ -29,7 +29,7 @@ class GoogleScraper extends Scraper {
if (n) {
return n.getAttribute(attr);
} else {
- return null;
+ return;
}
};
@@ -111,14 +111,14 @@ class GoogleScraper extends Scraper {
// parse right side product information
results.right_info.review = _attr(document, '#rhs .cu-container g-review-stars span', 'aria-label');
- let title_el = document.querySelector('#rhs .cu-container g-review-stars');
+ let title_el = document.querySelector('#rhs .cu-container .Q7Oxbd');
if (title_el) {
- results.right_info.review.title = title_el.parentNode.querySelector('div:first-child').innerText;
+ results.right_info.title = title_el.innerText;
}
- let num_reviews_el = document.querySelector('#rhs .cu-container g-review-stars');
+ let num_reviews_el = document.querySelector('#rhs .cu-container .PGDKUd');
if (num_reviews_el) {
- results.right_info.num_reviews = num_reviews_el.parentNode.querySelector('div:nth-of-type(2)').innerText;
+ results.right_info.num_reviews = num_reviews_el.innerText;
}
results.right_info.vendors = [];
@@ -127,20 +127,16 @@ class GoogleScraper extends Scraper {
document.querySelectorAll('#rhs .cu-container .rhsvw > div > div:nth-child(4) > div > div:nth-child(3) > div').forEach((el) => {
results.right_info.vendors.push({
price: _text(el, 'span:nth-of-type(1)'),
- merchant_name: _text(el, 'span:nth-child(3) a:nth-child(2)'),
+ merchant_name: _text(el, '.doUe3s0oL2B__jackpot-merchant a'),
merchant_ad_link: _attr(el, 'span:nth-child(3) a:first-child', 'href'),
- merchant_link: _attr(el, 'span:nth-child(3) a:nth-child(2)', 'href'),
+ merchant_link: _attr(el, 'span:nth-child(3) a:nth-child(2)', 'href'), // TODO this is not working anymore
source_name: _text(el, 'span:nth-child(4) a'),
source_link: _attr(el, 'span:nth-child(4) a', 'href'),
- info: _text(el, 'div span'),
- shipping: _text(el, 'span:last-child > span'),
+ info: _text(el, '.SdBHnc.e2CF7c'),
+ shipping: _text(el, '.JfwJme'),
})
});
- if (!results.right_info.title) {
- results.right_info = {};
- }
-
let right_side_info_el = document.getElementById('rhs');
if (right_side_info_el) {
@@ -151,26 +147,19 @@ class GoogleScraper extends Scraper {
}
}
- // parse top main column product information
- // #tvcap .pla-unit
- document.querySelectorAll('#tvcap .pla-unit').forEach((el) => {
+ // Parse Google Shopping top or left
+ document.querySelectorAll('.pla-unit').forEach((el) => {
let top_product = {
tracking_link: _attr(el, '.pla-unit-title a:first-child', 'href'),
link: _attr(el, '.pla-unit-title a:nth-child(2)', 'href'),
title: _text(el, '.pla-unit-title a:nth-child(2) span'),
- price: _text(el, '.pla-unit-title + div'),
- shipping: _text(el, '.pla-extensions-container div:nth-of-type(1)'),
- vendor_link: _attr(el,'.pla-extensions-container div > a', 'href'),
+ price: _text(el, '.pla-unit-title + div', true),
+ originalPrice: _text(el, '.pla-unit-title + div > span'),
+ shipping: _text(el, '.pla-extensions-container .cYBBsb'),
+ vendor_link: _attr(el,'.pla-extensions-container a.FfKHB', 'href'),
+ merchant_name: _text(el,'.LbUacb span:nth-child(1)'),
};
- let merchant_node = el.querySelector('.pla-unit-title');
- if (merchant_node) {
- let node = merchant_node.parentNode.querySelector('div > span');
- if (node) {
- top_product.merchant_name = node.innerText;
- }
- }
-
results.top_products.push(top_product);
});
diff --git a/src/modules/se_scraper.js b/src/modules/se_scraper.js
index 3a453ac..17ff117 100644
--- a/src/modules/se_scraper.js
+++ b/src/modules/se_scraper.js
@@ -31,8 +31,8 @@ module.exports = class Scraper {
this.proxy = config.proxy;
this.keywords = config.keywords;
- this.STANDARD_TIMEOUT = 10000;
- this.SOLVE_CAPTCHA_TIME = 45000;
+ this.STANDARD_TIMEOUT = config.standard_timeout;
+ this.SOLVE_CAPTCHA_TIME = config.solve_captcha_time;
this.results = {};
this.result_rank = 1;
@@ -272,6 +272,12 @@ module.exports = class Scraper {
await this.page.screenshot({ path: `debug_se_scraper_${this.config.search_engine_name}_${keyword}.png` });
}
+ if (this.config.keep_html_on_error){
+ const html_error = await this.page.content();
+ e.html_on_error = html_error;
+ e.lastUrl = await this.page.evaluate(() => {return window.location.href;});
+ }
+
this.metadata.scraping_detected = await this.detected();
if (this.metadata.scraping_detected === true) {
diff --git a/src/node_scraper.js b/src/node_scraper.js
index 2dec432..b71fe61 100644
--- a/src/node_scraper.js
+++ b/src/node_scraper.js
@@ -139,6 +139,9 @@ class ScrapeManager {
//custom_func: resolve('examples/pluggable.js'),
custom_func: null,
throw_on_detection: false,
+ keep_html_on_error: false,
+ standard_timeout: 10000,
+ solve_captcha_time: 45000,
// List of proxies to use ['socks5://78.94.172.42:1080', 'http://localhost:1080']
proxies: null,
// a file with one proxy per line. Example:
diff --git a/test/keep_html_on_error.js b/test/keep_html_on_error.js
new file mode 100644
index 0000000..e731a41
--- /dev/null
+++ b/test/keep_html_on_error.js
@@ -0,0 +1,108 @@
+'use strict';
+const express = require('express');
+const { createLogger, transports } = require('winston');
+const http = require('http');
+const https = require('https');
+const assert = require('assert');
+const path = require('path');
+const keyCert = require('key-cert');
+const Promise = require('bluebird');
+const Proxy = require('http-mitm-proxy');
+
+const debug = require('debug')('se-scraper:test');
+const se_scraper = require('..');
+
+const httpPort = 3012;
+const httpsPort = httpPort + 1;
+const proxyPort = httpPort + 2;
+
+const fakeSearchEngine = express();
+fakeSearchEngine.get('/search', (req, res) => {
+ debug('q=%s', req.query.q);
+ const pageNumber = ((req.query.start/10) || 0) + 1;
+ res.sendFile(path.join(__dirname, 'mocks/google/' + req.query.q + '_page' + pageNumber + '.html'));
+});
+fakeSearchEngine.use(express.static('test/mocks/google', {extensions: ['html']}));
+
+describe('Config', function(){
+
+ let httpServer, httpsServer, proxy;
+ before(async function(){
+ // Here mount our fake engine in both http and https listen server
+ httpServer = http.createServer(fakeSearchEngine);
+ httpsServer = https.createServer(await keyCert(), fakeSearchEngine);
+
+ proxy = Proxy();
+ proxy.onRequest((ctx, callback) => {
+ ctx.proxyToServerRequestOptions.host = 'localhost';
+ ctx.proxyToServerRequestOptions.port = (ctx.isSSL) ? httpsPort : httpPort;
+ ctx.proxyToServerRequestOptions.headers['X-Forwarded-Host'] = 'ProxiedThroughFakeEngine';
+ debug('Proxy request to %s', ctx.clientToProxyRequest.headers.host);
+ return callback();
+ });
+
+ await Promise.promisify(proxy.listen, {context: proxy})({port: proxyPort});
+ await Promise.promisify(httpServer.listen, {context: httpServer})(httpPort);
+ await Promise.promisify(httpsServer.listen, {context: httpsServer})(httpsPort);
+ debug('Fake http search engine servers started');
+ });
+
+ after(function(){
+ httpsServer.close();
+ httpServer.close();
+ proxy.close();
+ });
+
+ describe('keep_html_on_error', function(){
+
+ const testLogger = createLogger({
+ transports: [
+ new transports.Console({
+ level: 'error'
+ })
+ ]
+ });
+
+ /**
+ * Test html_output option
+ */
+ it('html_output single page single keyword', async function () {
+
+ const scrape_job = {
+ search_engine: 'google',
+ /* TODO refactor start_url
+ google_settings: {
+ start_url: 'http://localhost:' + httpPort
+ },
+ */
+ keywords: ['test error'],
+ };
+
+ var scraper = new se_scraper.ScrapeManager({
+ throw_on_detection: true,
+ keep_html_on_error: true,
+ logger: testLogger,
+ //clean_html_output: false,
+ //clean_data_images: false,
+ // TODO refactor start_url so we can use-it instead of depending of the proxy for this test
+ proxies: ['http://localhost:' + proxyPort],
+ use_proxies_only: true,
+ standard_timeout: 500,
+ });
+ await scraper.start();
+ await assert.rejects(
+ async () => {
+ await scraper.scrape(scrape_job);
+ },
+ (error) => {
+ assert(error.html_on_error, 'Error is containing the html output');
+ return /#fbar/.test(error.message);
+ }
+ )
+ await scraper.quit();
+
+ });
+
+ });
+
+});
\ No newline at end of file
diff --git a/test/mocks/google/shopping 2_page1.html b/test/mocks/google/shopping 2_page1.html
new file mode 100644
index 0000000..4342228
--- /dev/null
+++ b/test/mocks/google/shopping 2_page1.html
@@ -0,0 +1,209 @@
+
cheap lacoste shoes - Recherche Google
Cliquez
ici si, d'ici quelques secondes, vous n'avez pas été redirigé.
Environ 55 700 000 résultats (0,48 secondes)
Rappel concernant les règles de confidentialité de Google
Me le rappeler plus tard Lire
Afficher les produits correspondants à cheap lacoste... 39,99 €
LaBoutiqueOffic...
49,90 €
LaBoutiqueOffic...
64,95 €
LaBoutiqueOffic...
Annonces Commandez Vite sur le Site Officiel. Livraison Express 48h dès 180€ d'achat !
Note associée à lacoste.com : 4,8 - Conditions de retour: 60 jours ou plus pour la plupart des articles
Chic et intemporel, vous porterez cet essentiel en toutes occasions.
L'Elégance d'une coupe Chemise : Découvrez Le Nouveau Paris Polo.
lacoste discount
lacoste shoes homme
lacoste usa
lacoste shoes femme
get the label lacoste
Résultats de recherche Résultats de recherche à proximité Les horaires ou les services proposés peuvent varier Résultats Web Browse cheap Lacoste trainers at low prices. Find smart shoes to look good and feel great. Get big brand footwear for less, now!
Get huge savings on our great range of Lacoste trainers , polos, t-shirts and more for men and women at MandM Direct. But hurry, once it's gone, it's gone!
Discover our collections on the official Lacoste online store: Clothes, shoes , bags and accessories for men, women and kids.
114 items - Save up to 75% on our huge range of Lacoste clothing including polos, t shirts and trainers for men, women & kids. Shop now and get great deals for ...
Cheap Lacoste trainer at Soletrader Outlet with at least 30% off the range, free delivery on orders over £50 and easy returns.
289 items - Free shipping BOTH ways on Lacoste , Shoes , Men from our vast selection of styles. Fast delivery, and 24/7/365 real-person service with a smile.
Lacoste Mens trainers sale ✅ now on with up to 60% off ✅ Huge discounts on High tops, Plimsolls and more ✅ from the biggest online sales & clearance outlet.
Discounted shoes , clothing, accessories and more at 6pm.com! ... 6pm - Your Premier Destination for Discount Fashion ... René Lacoste entered the legend of tennis when he and his teammates "The Musketeers", stole the Davis Cup away ... Results 1 - 43 of 43 - Incredible savings with up to 75% off in the Mens Lacoste Trainers Sale! The best prices in the UK on Cheap Lacoste Trainers and Shoes ...
2411 products - Find Lacoste sales up to 93% off from 38 stores. Shop the best Lacoste sale from the most popular stores.
Annonces Nouvelle Collection - Vêtements - Chaussures - Urban Sport - Accessoires Jusqu'à -70%. Choisissez les modèles qui conviennent le votre style, Vaste choix de tailles & modèles . Manches courtes. Coupe classique. Coupe slim. Types: Polo, T-Shirts, Sweatshirts.
Lacoste Chaussures 200 Modèles à Prix Promo. Déstockage - Lacoste Chaussures à prix web ! Modèles de Lacoste Chaussures à Prix Super Réduits. Vite: Lacoste Chaussures à Saisir ! Comparez plus de prix. Faites des économies. Enchères et prix fixe.
diff --git a/test/mocks/google/shopping right product review_page1.html b/test/mocks/google/shopping right product review_page1.html
new file mode 100644
index 0000000..bca8e73
--- /dev/null
+++ b/test/mocks/google/shopping right product review_page1.html
@@ -0,0 +1,220 @@
+
+
+lacoste 317 - Recherche Google
Cliquez
ici si, d'ici quelques secondes, vous n'avez pas été redirigé.
Environ 4 160 000 résultats (0,50 secondes)
Rappel concernant les règles de confidentialité de Google
Me le rappeler plus tard Lire
Résultats de recherche Filtres de recherche guidée Résultats Web Lacoste Chaymon 317 Hommes Baskets. Note : 5 - 1 avis
Basket Lacoste Avenir 317 2 SPW. Coloris : Noir. Référence : 734SPW0003024. Ancrée dans la plus pure tradition sportive de la marque, la chaussure Lacoste ...
Acheter des Lacoste L 881S en ligne à prix très bas ✓ Essayage Virtuel 3D ✓ 4 variantes de couleurs ✓ Déja à partir de ... Lacoste L881S 317 CRYSTAL/KHAKI.
Acheter des Lacoste L 2858 en ligne à prix très bas ✓ Essayage Virtuel 3D ✓ 5 variantes de couleurs ✓ Déja à partir de 74 ... Lacoste L2858 317 MATTE KHAKI ...
Commencez à voir les lieux où vous vous êtes rendu
Turn on Google Account settings to track places you visit. Turn on Location History to track places you visit. Activez l'enregistrement de l'activité sur le Web et les applications pour pouvoir effectuer le suivi des lieux où vous vous rendez.
Annuler OK
Résultats de recherche à proximité Les horaires ou les services proposés peuvent varier Résultats Web Chaussures Homme Lacoste 317 . Filtrer. Meilleures ventes, Moins cher, Plus cher, Date d'arrivée, Titre, À propos des modalités de tri. Meilleures ventes.
Lacoste Avenir 317 ,Basket femme Article: Avenir 317 Reference: 7-34SPW0003024 Manufacture: Lacoste Product category: Basket femme - Avenir 317 - Noir ...
Basket Lacoste LTR01 317 6SPM. Coloris : Bleu marine, Rouge, Blanc. Référence : 734SPM0035144. Partenaire idéale pour une tenue à la fois élégante et ...
Découvrez notre sélection de Baskets Lacoste L.IGHT 317 5 sur Sarenza. Livraison et retour toujours gratuits !
Les meilleures offres pour lacoste europa 317 1 spm leather trainers shoes mens sont sur ✓ Comparez les prix et les spécificités des produits neufs et ...
Résultats complémentaires
Partager
E-mail Cliquez pour copier le lien
Lien copié
Tous les types de Couleur Comparer les prix Tous les types de Couleur
Marque : Lacoste
Type : Verres correcteurs
Gamme : Homme
Types de montures : Cerclées
Plus de détails Les lunettes de vue Lacoste Lacoste L 2807 317 sont des lunettes de soleil por Homme. Couleur de la monture: Gris, Taille: Medium, Forme: Fullframe, Largeur de la monture: 134mm, Hauteur des verres: 40mm, Cerclées: Plastique. Largeur du Pont: 16mm. Fabricant: Marchon Germany GmbH. Style: Modern
Types de montures
Cerclées
Matériau de la monture
Monture en plastique
Avis des utilisateurs
il y a un an
N'ayant trouvé dans aucune boutique de ma ville des lunettes orange, j'ai effectué une recherche sur internet.
+J'ai trouvé et choisi ce modèle sur visio net pour ses couleurs et ses dimensions.
+Il correspond exactement à ce que je recherchais…
· Avis publié sur visio-net.fr
Une erreur s'est produite. Veuillez réessayer.
Plus d'avis Une erreur s'est produite. Veuillez réessayer.
Plus d'avis
Applications Google
\ No newline at end of file
diff --git a/test/mocks/google/shopping_page1.html b/test/mocks/google/shopping_page1.html
new file mode 100644
index 0000000..55a9a9d
--- /dev/null
+++ b/test/mocks/google/shopping_page1.html
@@ -0,0 +1,213 @@
+cheap lacoste shoes - Recherche Google
Cliquez
ici si, d'ici quelques secondes, vous n'avez pas été redirigé.
Environ 54 900 000 résultats (0,37 secondes)
Rappel concernant les règles de confidentialité de Google
Me le rappeler plus tard Lire
Annonces Découvrez et Commandez la Nouvelle Collection de Chaussures Lacoste . Livraison standard offerte dès 80€ d'achat. SAV : Mail ou Téléphone. Paiement Sécurisé. Retour Facile et Gratuit. Models: Polos, Chaussures, Robes, Pullover, Sacs, Accessoires, Pantalons.
Note associée à lacoste.com : 4,8 - Commandes correctes: 95 - 100 %
lacoste discount
lacoste shoes homme
lacoste usa
lacoste shoes femme
get the label lacoste
Résultats de recherche Résultats de recherche à proximité Les horaires ou les services proposés peuvent varier Résultats Web Browse cheap Lacoste trainers at low prices. Find smart shoes to look good and feel great. Get big brand footwear for less, now!
Get huge savings on our great range of Lacoste trainers , polos, t-shirts and more for men and women at MandM Direct. But hurry, once it's gone, it's gone!
114 items - Save up to 75% on our huge range of Lacoste clothing including polos, t shirts and trainers for men, women & kids. Shop now and get great deals for ...
Discover our collections on the official Lacoste online store: Clothes, shoes , bags and accessories for men, women and kids.
Cheap Lacoste trainer at Soletrader Outlet with at least 30% off the range, free delivery on orders over £50 and easy returns.
289 items - Free shipping BOTH ways on Lacoste , Shoes , Men from our vast selection of styles. Fast delivery, and 24/7/365 real-person service with a smile.
Lacoste Mens trainers sale ✅ now on with up to 60% off ✅ Huge discounts on High tops, Plimsolls and more ✅ from the biggest online sales & clearance outlet.
Discounted shoes , clothing, accessories and more at 6pm.com! ... 6pm - Your Premier Destination for Discount Fashion ... René Lacoste entered the legend of tennis when he and his teammates "The Musketeers", stole the Davis Cup away ... Results 1 - 44 of 44 - Incredible savings with up to 75% off in the Mens Lacoste Trainers Sale! The best prices in the UK on Cheap Lacoste Trainers and Shoes ...
2405 products - ... up to 77% off from 38 stores. Shop the best Lacoste sale from the most popular stores. ... Lacoste LT Fit sneakers - 12789862. Farfetch. 44 46.
Résultats complémentaires Acheter
Acheter
Résultats Shopping
39,99 €
LaBoutiqueOffi... LaBoutiqueOffici... LaBoutiqueOffici...
45,00 €
Chausport Chausport Chausport
44,99 €
GetTheLabel.c... GetTheLabel.com GetTheLabel.com
45,50 €65 €
Sarenza Sarenza Sarenza
58,00 €
Spartoo.com Spartoo.com Spartoo.com
55,00 €
Nike Officiel Nike Officiel Nike Officiel
Applications Google
diff --git a/test/mocks/google/test error_page1.html b/test/mocks/google/test error_page1.html
new file mode 100644
index 0000000..19e35b5
--- /dev/null
+++ b/test/mocks/google/test error_page1.html
@@ -0,0 +1 @@
+THIS IS A EMPTY PAGE TO THROW SOME ERROR IN SE-SCRAPER
diff --git a/test/modules/google.js b/test/modules/google.js
index 83c2ae3..d008e7a 100644
--- a/test/modules/google.js
+++ b/test/modules/google.js
@@ -120,4 +120,150 @@ describe('Module Google', function(){
});
});
-});
\ No newline at end of file
+ it('extract google shopping on right', function () {
+ const googleScraper = new GoogleScraper({
+ config: {
+ search_engine_name: 'google',
+ throw_on_detection: true,
+ keywords: ['shopping'],
+ logger: testLogger,
+ scrape_from_file: '',
+ num_pages: 1,
+ }
+ });
+ googleScraper.STANDARD_TIMEOUT = 500;
+ return googleScraper.run({page}).then(({results, metadata, num_requests}) => {
+ assert.strictEqual(num_requests, 1, 'One request should be done');
+ assert.strictEqual(results['shopping']['1'].results.length, 10, 'Must have 10 organic results parsed on page 1');
+ assert.deepEqual(results['shopping']['1'].top_products, [
+ {
+ 'link': 'https://www.laboutiqueofficielle.com/achat-baskets-basses/classic-series-baskets-317-blanc-144046.html?referer=gshopping&LGWCODE=3010559970809;160079;7403',
+ 'merchant_name': 'LaBoutiqueOffi...',
+ 'price': '39,99 €',
+ 'rank': 1,
+ 'title': 'Classic Series - Baskets 317 Blanc',
+ 'tracking_link': '/aclk?sa=l&ai=DChcSEwjJqLX1v4bqAhXJlBgKHYRrDO4YABAEGgJsZQ&sig=AOD64_1OEdvZgHU2YEMPI4JNdeTqLJTVjw&ctype=5&q=&ved=2ahUKEwjPmK31v4bqAhXLxYUKHe8BByEQ9A56BAgOEFU&adurl=',
+ 'vendor_link': 'https://www.google.com/search?tbm=shop&q=cheap%20lacoste%20shoes',
+ },
+ {
+ 'link': 'https://www.chausport.com/p/lacoste-carnaby-evo-noire-enfant-173257.html',
+ 'merchant_name': 'Chausport',
+ 'price': '45,00 €',
+ 'rank': 2,
+ 'title': 'Tennis Lacoste Carnaby Evo Noire Enfant 28',
+ 'tracking_link': '/aclk?sa=L&ai=DChcSEwjJqLX1v4bqAhXJlBgKHYRrDO4YABAFGgJsZQ&sig=AOD64_0lhZrLNYCENmxzquCMa5M4_D04ng&ctype=5&q=&ved=2ahUKEwjPmK31v4bqAhXLxYUKHe8BByEQ9A56BAgOEGA&adurl=',
+ 'vendor_link': 'http://www.choozen.fr/nf/gs-cheap%20lacoste%20shoes.htm?kpartnerid=96955353',
+ },
+ {
+ 'link': 'https://www.getthelabel.com/fr/p/lacoste-baskets-lerond-418/138256',
+ 'merchant_name': 'GetTheLabel.c...',
+ 'price': '44,99 €',
+ 'rank': 3,
+ 'title': 'Lacoste Baskets Lerond 418 Size 9 in Blanc pour Homme',
+ 'tracking_link': '/aclk?sa=l&ai=DChcSEwjJqLX1v4bqAhXJlBgKHYRrDO4YABAIGgJsZQ&sig=AOD64_13MoA9It0w-yp3GqriMf13OPLI8w&ctype=5&q=&ved=2ahUKEwjPmK31v4bqAhXLxYUKHe8BByEQ9A56BAgOEG0&adurl=',
+ 'vendor_link': 'https://highstreetone.com/?search=cheap%20lacoste%20shoes',
+ },
+ {
+ 'link': 'https://www.sarenza.com/lacoste-carnaby-evo-120-2-s834061-br918-t76-p0000227925#size=39-39',
+ 'merchant_name': 'Sarenza',
+ 'price': '45,50 €',
+ 'originalPrice': '65 €',
+ 'rank': 4,
+ 'title': 'Lacoste Carnaby Evo 120 2 Blanc - Baskets - Disponible en 39',
+ 'tracking_link': '/aclk?sa=l&ai=DChcSEwjJqLX1v4bqAhXJlBgKHYRrDO4YABANGgJsZQ&sig=AOD64_1Q6WUe8YXjhb-y_k0rErD2WUsTqQ&ctype=5&q=&ved=2ahUKEwjPmK31v4bqAhXLxYUKHe8BByEQ9A56BAgOEHk&adurl=',
+ 'vendor_link': 'https://www.feed-price.com/search/cheap%20lacoste%20shoes',
+ },
+ {
+ 'link': 'https://www.spartoo.com/Lacoste-CARNABY-EVO-BL-1-x4736301.php?track_id=adwo_fgl&sx=B&utm_source=froogle&utm_medium=comparateurs&utm_content=4736301&utm_campaign=adwo_fgl&size_id=158&fcsize=1&sx=B',
+ 'merchant_name': 'Spartoo.com',
+ 'price': '58,00 €',
+ 'rank': 5,
+ 'title': 'Lacoste CARNABY EVO BL 1 Baskets basses enfant (garcons)',
+ 'tracking_link': '/aclk?sa=l&ai=DChcSEwjJqLX1v4bqAhXJlBgKHYRrDO4YABAMGgJsZQ&sig=AOD64_0NfyG0tH5Pc7kPfADKcQflx78H1g&ctype=5&q=&ved=2ahUKEwjPmK31v4bqAhXLxYUKHe8BByEQ9A56BQgOEIcB&adurl=',
+ 'vendor_link': 'https://www.google.com/search?tbm=shop&q=cheap%20lacoste%20shoes',
+ },
+ {
+ 'link': 'https://www.nike.com/fr/t/nikecourt-royale-shoe-KyTwJwgV/749747-111',
+ 'merchant_name': 'Nike Officiel',
+ 'price': '55,00 €',
+ 'rank': 6,
+ 'title': 'Chaussure Nike Court Royale pour Homme - Blanc',
+ 'tracking_link': '/aclk?sa=l&ai=DChcSEwjJqLX1v4bqAhXJlBgKHYRrDO4YABASGgJsZQ&sig=AOD64_2KQENuVGnvXutmSUufDSa4FnTYsw&ctype=5&q=&ved=2ahUKEwjPmK31v4bqAhXLxYUKHe8BByEQ9A56BQgOEJIB&adurl=',
+ 'vendor_link': 'https://www.pricesearcher.com/css/search/?p=1&q=cheap%20lacoste%20shoes&utm_source=google&utm_medium=css',
+ }
+ ])
+ });
+ });
+
+ it('extract google shopping on top', function () {
+ const googleScraper = new GoogleScraper({
+ config: {
+ search_engine_name: 'google',
+ throw_on_detection: true,
+ keywords: ['shopping 2'],
+ logger: testLogger,
+ scrape_from_file: '',
+ num_pages: 1,
+ }
+ });
+ googleScraper.STANDARD_TIMEOUT = 500;
+ return googleScraper.run({page}).then(({results, metadata, num_requests}) => {
+ assert.strictEqual(num_requests, 1, 'One request should be done');
+ assert.strictEqual(results['shopping 2']['1'].results.length, 10, 'Must have 10 organic results parsed on page 1');
+ assert.deepEqual(results['shopping 2']['1'].top_products[2], {
+ "link": "https://www.zalando.fr/lacoste-sideline-cub-chaussons-pour-bebe-whitegreen-la216f003-k11.html?size=17&allophones=0",
+ "merchant_name": "Zalando.fr",
+ "price": "31,95 €",
+ "rank": 3,
+ 'shipping': 'Livraison gratuite',
+ "title": "Lacoste Sideline CUB Cadeau de naissance white/green, gender.kids.unisex, Taille: 17, Blanc - Imitation cuir/textile",
+ "tracking_link": "/aclk?sa=l&ai=DChcSEwjt7o3yj4nqAhVZhdUKHbshBNwYABASGgJ3cw&sig=AOD64_0usikwrH4jD5vqtbS7vVoCrNxMOg&ctype=5&q=&ved=2ahUKEwj0w4fyj4nqAhWZDGMBHY7HAzAQww96BAgOEFI&adurl=",
+ "vendor_link": "https://fr.shoptail.eu/cheap%20lacoste%20shoes",
+ })
+ });
+ });
+
+ it('shopping extract right one product', function () {
+ const googleScraper = new GoogleScraper({
+ config: {
+ search_engine_name: 'google',
+ throw_on_detection: true,
+ keywords: ['shopping right product review'],
+ logger: testLogger,
+ scrape_from_file: '',
+ num_pages: 1,
+ }
+ });
+ googleScraper.STANDARD_TIMEOUT = 500;
+ return googleScraper.run({page}).then(({results, metadata, num_requests}) => {
+ assert.strictEqual(num_requests, 1, 'One request should be done');
+ assert.strictEqual(results['shopping right product review']['1'].results.length, 9, 'Must have 9 organic results parsed on page 1');
+ assert.deepEqual(results['shopping right product review']['1'].right_info, {
+ title: 'Lacoste Lunettes',
+ 'num_reviews': '146 avis',
+ 'review': 'Note : 4,6 sur 5',
+ 'vendors': [
+ {
+ 'info': '317 · 2807',
+ 'merchant_ad_link': 'https://www.googleadservices.com/pagead/aclk?sa=L&ai=DChcSEwihq9C82ojqAhUIyrIKHbIHAx8YABACGgJscg&ohost=www.google.com&cid=CAASE-Roz5UHMJg95vk99OwXQnKbUG0&sig=AOD64_0Wfsw3t3eO_yEtq8lWRIjiF6EqZw&ctype=5&q=&ved=2ahUKEwjsqsi82ojqAhVFPBoKHY38DAIQ9A56BAgNEH0&adurl=',
+ 'merchant_name': 'Edel-Optics FR',
+ 'price': '102,75 €',
+ 'shipping': 'Livraison gratuite',
+ 'source_link': 'https://www.google.com/search?tbm=shop&q=lacoste%20317',
+ 'source_name': 'Par Google',
+ },
+ {
+ 'info': '317 · 2805',
+ 'merchant_ad_link': 'https://www.googleadservices.com/pagead/aclk?sa=L&ai=DChcSEwihq9C82ojqAhUIyrIKHbIHAx8YABADGgJscg&ohost=www.google.com&cid=CAASE-Roz5UHMJg95vk99OwXQnKbUG0&sig=AOD64_2R4Idoiqc783K8OLyv9W9YQTJfog&ctype=5&q=&ved=2ahUKEwjsqsi82ojqAhVFPBoKHY38DAIQ9A56BQgNEIEB&adurl=',
+ 'merchant_name': 'EasyLunettes.fr',
+ 'price': '75,00 €',
+ 'shipping': 'Livraison gratuite',
+ 'source_link': 'https://producthero.com/?utm_source=google&utm_medium=css&q=lacoste%20317',
+ 'source_name': 'Par Producthero',
+ }
+ ]
+ });
+ });
+ });
+
+});
diff --git a/test/proxy.js b/test/proxy.js
index c1092ea..209d782 100644
--- a/test/proxy.js
+++ b/test/proxy.js
@@ -21,7 +21,7 @@ fakeSearchEngine.set('trust proxy', 'loopback');
fakeSearchEngine.get('/test-proxy', (req, res) => {
debug('fake-search-engine req.hostname=%s', req.hostname);
//debug('req to', req.socket.localAddress, req.socket.localPort);
- res.send(req.hostname);
+ setTimeout(() => res.send(req.hostname), 100); // Add timeout here because raise condition for first test
});
describe('Config', function(){
diff --git a/test/scrape-manager.js b/test/scrape-manager.js
new file mode 100644
index 0000000..f20ee3f
--- /dev/null
+++ b/test/scrape-manager.js
@@ -0,0 +1,122 @@
+'use strict';
+const express = require('express');
+const { createLogger, transports } = require('winston');
+const http = require('http');
+const https = require('https');
+const assert = require('assert');
+const path = require('path');
+const keyCert = require('key-cert');
+const Promise = require('bluebird');
+const Proxy = require('http-mitm-proxy');
+
+const debug = require('debug')('se-scraper:test');
+const se_scraper = require('../');
+
+const httpPort = 3012;
+const httpsPort = httpPort + 1;
+const proxyPort = httpPort + 2;
+
+const fakeSearchEngine = express();
+fakeSearchEngine.get('/search', (req, res) => {
+ debug('q=%s', req.query.q);
+ const pageNumber = ((req.query.start/10) || 0) + 1;
+ res.sendFile(path.join(__dirname, 'mocks/google/' + req.query.q + '_page' + pageNumber + '.html'));
+});
+fakeSearchEngine.use(express.static('test/mocks/google', {extensions: ['html']}));
+
+describe('ScrapeManager', function(){
+
+ let httpServer, httpsServer, proxy;
+ before(async function(){
+ // Here mount our fake engine in both http and https listen server
+ httpServer = http.createServer(fakeSearchEngine);
+ httpsServer = https.createServer(await keyCert(), fakeSearchEngine);
+
+ proxy = Proxy();
+ proxy.onRequest((ctx, callback) => {
+ ctx.proxyToServerRequestOptions.host = 'localhost';
+ ctx.proxyToServerRequestOptions.port = (ctx.isSSL) ? httpsPort : httpPort;
+ ctx.proxyToServerRequestOptions.headers['X-Forwarded-Host'] = 'ProxiedThroughFakeEngine';
+ debug('Proxy request to %s', ctx.clientToProxyRequest.headers.host);
+ return callback();
+ });
+
+ await Promise.promisify(proxy.listen, {context: proxy})({port: proxyPort});
+ await Promise.promisify(httpServer.listen, {context: httpServer})(httpPort);
+ await Promise.promisify(httpsServer.listen, {context: httpsServer})(httpsPort);
+ debug('Fake http search engine servers started');
+ });
+
+ after(function(){
+ httpsServer.close();
+ httpServer.close();
+ proxy.close();
+ });
+
+ describe('.quit()', function(){
+
+ const testLogger = createLogger({
+ transports: [
+ new transports.Console({
+ level: 'error'
+ })
+ ]
+ });
+
+ /**
+ * Test if quit correctly close all opened chrome
+ */
+ it('Ensure all chrome are closed after .quit() has been called', async function () {
+
+ const scrape_job = {
+ search_engine: 'google',
+ /* TODO refactor start_url
+ google_settings: {
+ start_url: 'http://localhost:' + httpPort
+ },
+ */
+ keywords: ['test keyword'],
+ };
+
+ var scraper = new se_scraper.ScrapeManager({
+ throw_on_detection: true,
+ logger: testLogger,
+ // TODO refactor start_url so we can use-it instead of depending of the proxy for this test
+ proxies: ['http://localhost:' + proxyPort],
+ use_proxies_only: true,
+ });
+ await scraper.start();
+ const { results } = await scraper.scrape(scrape_job);
+ await scraper.quit();
+
+ // TODO Check if all puppeteer chrome are stopped here
+ });
+
+
+ it('Ensure all chrome are closed after .scrape() has been called on index module', async function () {
+
+ const scrape_job = {
+ search_engine: 'google',
+ /* TODO refactor start_url
+ google_settings: {
+ start_url: 'http://localhost:' + httpPort
+ },
+ */
+ keywords: ['test keyword'],
+ };
+
+ var results = await se_scraper.scrape({
+ throw_on_detection: true,
+ logger: testLogger,
+ // TODO refactor start_url so we can use-it instead of depending of the proxy for this test
+ proxies: ['http://localhost:' + proxyPort],
+ use_proxies_only: true,
+ }, scrape_job);
+
+ // TODO Check if all puppeteer chrome are stopped here
+
+ });
+
+ });
+
+});
\ No newline at end of file