diff --git a/doctoshotgun.py b/doctoshotgun.py index 6bbfddb..8e9096e 100755 --- a/doctoshotgun.py +++ b/doctoshotgun.py @@ -96,6 +96,25 @@ def iter_centers_ids(self): yield data['searchResultId'] def get_next_page(self): + # French doctolib uses data-u attribute of span-element to create the link when user hovers span + for span in self.doc.xpath('//div[contains(@class, "next")]/span'): + if not span.attrib.has_key('data-u'): + continue + + # How to find the corresponding javascript-code: + # Press F12 to open dev-tools, select elements-tab, find div.next, right click on element and enable break on substructure change + # Hover "Next" element and follow callstack upwards + # JavaScript: + # var t = (e = r()(e)).data("u") + # , n = atob(t.replace(/\s/g, '').split('').reverse().join('')); + + import base64 + href = base64.urlsafe_b64decode(''.join(span.attrib['data-u'].split())[::-1]).decode() + query = dict(parse.parse_qsl(parse.urlsplit(href).query)) + + if 'page' in query: + return int(query['page']) + for a in self.doc.xpath('//div[contains(@class, "next")]/a'): href = a.attrib['href'] query = dict(parse.parse_qsl(parse.urlsplit(href).query)) diff --git a/test_browser.py b/test_browser.py index a4d19ca..d493fbc 100644 --- a/test_browser.py +++ b/test_browser.py @@ -2,11 +2,12 @@ from requests.adapters import Response import responses from html import escape +import lxml.html as html import json import datetime from woob.browser.browsers import Browser from woob.browser.exceptions import ServerError -from doctoshotgun import DoctolibDE, DoctolibFR, CenterBookingPage +from doctoshotgun import CentersPage, DoctolibDE, DoctolibFR, CenterBookingPage # globals FIXTURES_FOLDER = "test_fixtures" @@ -114,6 +115,297 @@ def test_find_centers_de_returns_502_should_fail(tmp_path): pass +@responses.activate +def test_get_next_page_fr_should_return_2_on_page_1(tmp_path): + """ + Check that get_next_page returns 2 when we are on page 1 and there is a next page available + """ + + """ + Next (data-u decoded): /vaccination-covid-19-autres-professions-prioritaires/france?page=2&ref_visit_motive_ids%5B%5D=6970&ref_visit_motive_ids%5B%5D=7005 + """ + + htmlString = """ + + """ + doc = html.document_fromstring(htmlString) + + response = Response() + response._content = b'{}' + + centers_page = CentersPage(browser=Browser(), response=response) + centers_page.doc = doc + next_page = centers_page.get_next_page() + assert next_page == 2 + + +@responses.activate +def test_get_next_page_fr_should_return_3_on_page_2(tmp_path): + """ + Check that get_next_page returns 3 when we are on page 2 and next page is available + """ + + """ + Previous (data-u decoded): /vaccination-covid-19-autres-professions-prioritaires/france?ref_visit_motive_ids%5B%5D=6970&ref_visit_motive_ids%5B%5D=7005 + Next (data-u decoded): /vaccination-covid-19-autres-professions-prioritaires/france?page=3&ref_visit_motive_ids%5B%5D=6970&ref_visit_motive_ids%5B%5D=7005 + """ + + htmlString = """ + + """ + doc = html.document_fromstring(htmlString) + + response = Response() + response._content = b'{}' + + centers_page = CentersPage(browser=Browser(), response=response) + centers_page.doc = doc + next_page = centers_page.get_next_page() + assert next_page == 3 + + +@responses.activate +def test_get_next_page_fr_should_return_4_on_page_3(tmp_path): + """ + Check that get_next_page returns 4 when we are on page 3 and next page is available + """ + + """ + Previous (data-u decoded): /vaccination-covid-19-autres-professions-prioritaires/france?page=2&ref_visit_motive_ids%5B%5D=6970&ref_visit_motive_ids%5B%5D=7005 + Next (data-u decoded): /vaccination-covid-19-autres-professions-prioritaires/france?page=4&ref_visit_motive_ids%5B%5D=6970&ref_visit_motive_ids%5B%5D=7005 + """ + + htmlString = """ + + """ + doc = html.document_fromstring(htmlString) + + response = Response() + response._content = b'{}' + + centers_page = CentersPage(browser=Browser(), response=response) + centers_page.doc = doc + next_page = centers_page.get_next_page() + assert next_page == 4 + + +def test_get_next_page_fr_should_return_None_on_last_page(tmp_path): + """ + Check that get_next_page returns None when we are on the last page + """ + """ + Previous (data-u decoded): /vaccination-covid-19-autres-professions-prioritaires/france?page=7&ref_visit_motive_ids%5B%5D=6970&ref_visit_motive_ids%5B%5D=7005 + """ + + htmlString = """ + + """ + doc = html.document_fromstring(htmlString) + + response = Response() + response._content = b'{}' + + centers_page = CentersPage(browser=Browser(), response=response) + centers_page.doc = doc + next_page = centers_page.get_next_page() + assert next_page == None + + +@responses.activate +def test_get_next_page_de_should_return_2_on_page_1(tmp_path): + """ + Check that get_next_page returns 2 when we are on page 1 and next page is available + """ + + htmlString = """ + + """ + doc = html.document_fromstring(htmlString) + + response = Response() + response._content = b'{}' + + centers_page = CentersPage(browser=Browser(), response=response) + centers_page.doc = doc + next_page = centers_page.get_next_page() + assert next_page == 2 + + +@responses.activate +def test_get_next_page_de_should_return_3_on_page_2(tmp_path): + """ + Check that get_next_page returns 3 when we are on page 2 and next page is available + """ + + htmlString = """ + + """ + doc = html.document_fromstring(htmlString) + + response = Response() + response._content = b'{}' + + centers_page = CentersPage(browser=Browser(), response=response) + centers_page.doc = doc + next_page = centers_page.get_next_page() + assert next_page == 3 + + +@responses.activate +def test_get_next_page_de_should_return_4_on_page_3(tmp_path): + """ + Check that get_next_page returns 4 when we are on page 3 and next page is available + """ + + htmlString = """ + + """ + doc = html.document_fromstring(htmlString) + + response = Response() + response._content = b'{}' + + centers_page = CentersPage(browser=Browser(), response=response) + centers_page.doc = doc + next_page = centers_page.get_next_page() + assert next_page == 4 + + +def test_get_next_page_de_should_return_None_on_last_page(tmp_path): + """ + Check that get_next_page returns None when we are on the last page + """ + + htmlString = """ + + """ + doc = html.document_fromstring(htmlString) + + response = Response() + response._content = b'{}' + + centers_page = CentersPage(browser=Browser(), response=response) + centers_page.doc = doc + next_page = centers_page.get_next_page() + assert next_page == None + + @responses.activate def test_book_slots_should_succeed(tmp_path): """