-
Notifications
You must be signed in to change notification settings - Fork 0
/
configs.py
68 lines (42 loc) · 1.21 KB
/
configs.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
import random
LOG_LEVEL = 'INFO'
# 下载延迟
TIMEOUT = 3
# 默认Headers
HEADERS = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.90 Safari/537.36',
'Accept-Language': 'zh-CN,zh;q=0.8',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8'
}
# 页面编码
APPARENT_ENCODING = 'UTF-8'
# redis 设置
REDIS_CONFIG = 'redis://luopx:[email protected]:6379'
# 存储的redis键
REDIS_KEY = 'proxiespool'
# 有序集合初始化值
INIT_SCORE = 10
# redis有序集合 对应最大分值
MAX_SCORE = 100
# 最小分值 超出则删除
MIN_SCORE = 0
# 每次代理失效, 分值减少
SCORE_DECREASE = 1
# 最大代理数 超过此代理数 crawler处于阻塞状态
MAX_NUMBERS = 5000
RandomTestUrl = ['http://httpbin.org/get']
Random_Test_ENABLE = True
# 测试url
TEST_URL = 'http://httpbin.org/get'
# 测试超时
TEST_TIME_OUT = 8
# 测试 正常状态码
VALID_STATUS = {200}
# 测试 线程
BATCH_TEST_SIZE = 64
# 代理池代理数量
IS_OVER_THRESHOLD = 2000
GETTER_CYCLE = 300
TESTER_CYCLE = 60
TESTER_ENABLED = True
GETTER_ENABLED = True
API_ENABLED = True