This repository has been archived by the owner on Oct 22, 2023. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 2
/
cian.py
58 lines (49 loc) · 2.03 KB
/
cian.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
import collections
import requests
import xlrd
Ad = collections.namedtuple('Ad', ['id', 'rooms', 'address', 'price', 'phones', 'description', 'url'])
class Cian:
def __init__(self, url):
self.url = url
def _download_xlsx(self):
resp = requests.get(
self.url,
headers={'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:74.0) Gecko/20100101 Firefox/74.0'}
)
if resp.status_code != 200: return None
with open('tmp/cian.xlsx', 'wb') as f:
f.write(resp.content)
return f.name
def get_ads(self):
ads = []
xlsx = xlrd.open_workbook(self._download_xlsx())
sheet = xlsx.sheet_by_index(0)
indexes = {
'id': {'field': 'ID объявления', 'index': None},
'rooms': {'field': 'Количество комнат', 'index': None},
'address': {'field': 'Адрес', 'index': None},
'price': {'field': 'Цена', 'index': None},
'phones': {'field': 'Телефоны', 'index': None},
'description': {'field': 'Описание', 'index': None},
'url': {'field': 'Ссылка на объявление', 'index': None},
}
header = sheet.row_values(0)
for key, val in indexes.items():
try:
indexes[key]['index'] = header.index(val['field'])
except ValueError:
return ads
for row in range(1, sheet.nrows):
data = sheet.row_values(row)
ads.append(
Ad(
id=data[indexes['id']['index']],
rooms=data[indexes['rooms']['index']],
address=data[indexes['address']['index']],
price=data[indexes['price']['index']],
phones=data[indexes['phones']['index']],
description=data[indexes['description']['index']],
url=data[indexes['url']['index']],
)
)
return ads