1
+ import unittest
2
+ from unittest .mock import patch , MagicMock
3
+ from urllib .parse import urlparse
4
+ import requests
5
+ from wikiteam3 .dumpgenerator .config import Config , OtherConfig
6
+ from wikiteam3 .utils .wiki_avoid import avoid_robots_disallow
7
+
8
+ # filepath: wikiteam3/utils/test_wiki_avoid.py
9
+
10
+
11
+
12
+
13
+ class TestAvoidRobotsDisallow (unittest .TestCase ):
14
+
15
+ @patch ('wikiteam3.utils.wiki_avoid.sys.exit' )
16
+ @patch ('wikiteam3.utils.wiki_avoid.requests.get' )
17
+ @patch ('wikiteam3.utils.wiki_avoid.urllib.robotparser.RobotFileParser' )
18
+ def test_avoid_robots_disallow_allowed (self , mock_robotparser , mock_requests_get , mock_sys_exit ):
19
+ """Test when robots.txt allows the user agent"""
20
+ config = Config ()
21
+ config .api = "http://example.com/w/api.php"
22
+ other = MagicMock ()
23
+ other .session = requests .Session ()
24
+
25
+ mock_response = MagicMock ()
26
+ mock_response .status_code = 200
27
+ mock_response .text = "User-agent: *\n Allow: /"
28
+ mock_requests_get .return_value = mock_response
29
+
30
+ mock_bot = MagicMock ()
31
+ mock_bot .can_fetch .return_value = True
32
+ mock_robotparser .return_value = mock_bot
33
+
34
+ avoid_robots_disallow (config , other )
35
+
36
+ mock_requests_get .assert_called_once ()
37
+ mock_bot .parse .assert_called_once ()
38
+ self .assertEqual (mock_sys_exit .call_count , 0 )
39
+
40
+ @patch ('wikiteam3.utils.wiki_avoid.sys.exit' )
41
+ @patch ('wikiteam3.utils.wiki_avoid.requests.get' )
42
+ @patch ('wikiteam3.utils.wiki_avoid.urllib.robotparser.RobotFileParser' )
43
+ def test_avoid_robots_disallow_disallowed (self , mock_robotparser , mock_requests_get , mock_sys_exit ):
44
+ """Test when robots.txt disallows the user agent"""
45
+ config = Config ()
46
+ config .api = "http://example.com/w/api.php"
47
+ other = MagicMock ()
48
+ other .session = requests .Session ()
49
+
50
+ mock_response = MagicMock ()
51
+ mock_response .status_code = 200
52
+ mock_response .text = "User-agent: wikiteam3\n Disallow: /"
53
+ mock_requests_get .return_value = mock_response
54
+
55
+ mock_bot = MagicMock ()
56
+ mock_bot .can_fetch .return_value = False
57
+ mock_robotparser .return_value = mock_bot
58
+
59
+ avoid_robots_disallow (config , other )
60
+
61
+ mock_requests_get .assert_called_once ()
62
+ mock_bot .parse .assert_called_once ()
63
+ mock_sys_exit .assert_called_once_with (20 )
64
+
65
+ @patch ('wikiteam3.utils.wiki_avoid.sys.exit' )
66
+ @patch ('wikiteam3.utils.wiki_avoid.requests.get' )
67
+ @patch ('wikiteam3.utils.wiki_avoid.urllib.robotparser.RobotFileParser' )
68
+ def test_avoid_robots_disallow_error (self , mock_robotparser , mock_requests_get , mock_sys_exit ):
69
+ """Test when there is an error fetching robots.txt"""
70
+ config = Config ()
71
+ config .api = "http://example.com/w/api.php"
72
+ other = MagicMock ()
73
+ other .session = requests .Session ()
74
+
75
+ mock_requests_get .side_effect = Exception ("Test exception" )
76
+
77
+ avoid_robots_disallow (config , other )
78
+
79
+ mock_requests_get .assert_called_once ()
80
+ self .assertEqual (mock_robotparser .call_count , 1 )
81
+ self .assertEqual (mock_sys_exit .call_count , 0 )
82
+
83
+ @patch ('wikiteam3.utils.wiki_avoid.sys.exit' )
84
+ @patch ('wikiteam3.utils.wiki_avoid.requests.get' )
85
+ @patch ('wikiteam3.utils.wiki_avoid.urllib.robotparser.RobotFileParser' )
86
+ def test_avoid_robots_disallow_robots_not_found (self , mock_robotparser , mock_requests_get , mock_sys_exit ):
87
+ """Test when robots.txt returns a 404"""
88
+ config = Config ()
89
+ config .api = "http://example.com/w/api.php"
90
+ other = MagicMock ()
91
+ other .session = requests .Session ()
92
+
93
+ mock_response = MagicMock ()
94
+ mock_response .status_code = 404
95
+ mock_requests_get .return_value = mock_response
96
+
97
+ avoid_robots_disallow (config , other )
98
+
99
+ mock_requests_get .assert_called_once ()
100
+ self .assertEqual (mock_robotparser .call_count , 1 )
101
+ self .assertEqual (mock_sys_exit .call_count , 0 )
102
+
103
+ @patch ('wikiteam3.utils.wiki_avoid.sys.exit' )
104
+ @patch ('wikiteam3.utils.wiki_avoid.requests.get' )
105
+ @patch ('wikiteam3.utils.wiki_avoid.urllib.robotparser.RobotFileParser' )
106
+ def test_avoid_robots_disallow_no_api_index (self , mock_robotparser , mock_requests_get , mock_sys_exit ):
107
+ """Test when both config.api and config.index are None"""
108
+ config = Config ()
109
+ config .api = None
110
+ config .index = None
111
+ other = MagicMock ()
112
+ other .session = requests .Session ()
113
+
114
+ avoid_robots_disallow (config , other )
115
+
116
+ self .assertEqual (mock_requests_get .call_count , 0 )
117
+ self .assertEqual (mock_robotparser .call_count , 1 )
118
+ self .assertEqual (mock_sys_exit .call_count , 0 )
119
+
120
+ if __name__ == '__main__' :
121
+ unittest .main ()
0 commit comments