1+ import urllib .parse
12import pytest
2-
3+ from unittest . mock import patch , Mock
34from scrapegraphai .docloaders .scrape_do import scrape_do_fetch
4- from unittest .mock import Mock , patch
5-
6- class TestScrapeDoFetch :
7- @patch ('scrapegraphai.docloaders.scrape_do.requests.get' )
8- @patch ('scrapegraphai.docloaders.scrape_do.os.getenv' )
9- def test_scrape_do_fetch_with_proxy_geocode_and_super_proxy (self , mock_getenv , mock_get ):
10- """
11- Test scrape_do_fetch function with proxy mode, geoCode, and super_proxy enabled.
12- This test verifies that the function correctly handles proxy settings,
13- geoCode parameter, and super_proxy flag when making a request.
14- """
15- # Mock environment variable
16- mock_getenv .return_value = "proxy.scrape.do:8080"
17-
18- # Mock the response
19- mock_response = Mock ()
20- mock_response .text = "Mocked response content"
21- mock_get .return_value = mock_response
225
23- # Test parameters
24- token = "test_token"
25- target_url = "https://example.com"
26- use_proxy = True
27- geoCode = "US"
28- super_proxy = True
296
30- # Call the function
31- result = scrape_do_fetch (token , target_url , use_proxy , geoCode , super_proxy )
7+ def test_scrape_do_fetch_without_proxy ():
8+ """
9+ Test scrape_do_fetch function using API mode (without proxy).
3210
33- # Assertions
34- assert result == "Mocked response content"
35- mock_get .assert_called_once ()
36- call_args = mock_get .call_args
11+ This test verifies that:
12+ 1. The function correctly uses the API mode when use_proxy is False.
13+ 2. The correct URL is constructed with the token and encoded target URL.
14+ 3. The function returns the expected response text.
15+ """
16+ token = "test_token"
17+ target_url = "https://example.com"
18+ encoded_url = urllib .parse .quote (target_url )
19+ expected_response = "Mocked API response"
20+
21+ with patch ("requests.get" ) as mock_get :
22+ mock_response = Mock ()
23+ mock_response .text = expected_response
24+ mock_get .return_value = mock_response
3725
38- # Check if the URL is correct
39- assert call_args [0 ][0 ] == target_url
26+ result = scrape_do_fetch (token , target_url , use_proxy = False )
4027
41- # Check if proxies are set correctly
42- assert call_args [1 ]['proxies' ] == {
43- "http" : f"http://{ token } :@proxy.scrape.do:8080" ,
44- "https" : f"http://{ token } :@proxy.scrape.do:8080" ,
45- }
28+ expected_url = f"http://api.scrape.do?token={ token } &url={ encoded_url } "
29+ mock_get .assert_called_once_with (expected_url )
4630
47- # Check if verify is False
48- assert call_args [1 ]['verify' ] is False
31+ assert result == expected_response
4932
50- # Check if params are set correctly
51- assert call_args [1 ]['params' ] == {"geoCode" : "US" , "super" : "true" }
0 commit comments