-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.py
39 lines (33 loc) · 1.58 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
import datetime
from crawler import LukouCrawler
import click
import dataset_processer
@click.command()
@click.argument('keywords', nargs=-1)
@click.option('--pages', '-P', '-p', default=1, help='爬取页数')
@click.option('--start-page', '-S', default=1, help='起始页码')
@click.option('--search-type', '-T', default=0, help='搜索类型: 0:宝贝, 1:文章, 3:专辑, 4:团购')
@click.option('--sort-type', '-t', default=0, help='搜索排序: 0:默认排序, 3:最新发布, 4:最热排序')
@click.option('--sort', '-s', default='collect', help='使用指定列排序,多个值使用|分隔,默认为collect')
@click.option('--ascending', '-a', is_flag=True, help='与-s选项同时使用以实现升序排列,默认为降序')
def main(keywords, pages, start_page, search_type, sort_type, sort, ascending):
keywords = ' '.join(keywords)
if not keywords:
raise ValueError
start_time_str = str(datetime.datetime.now().replace(
microsecond=0)).replace(':', '-')
crawler = LukouCrawler()
result = crawler.crawle(keywords,
pages,
start_page,
search_type,
sort_type)
result['feed_url'] = result['feed_url'].apply(
dataset_processer.generate_excel_hyperlink)
result = dataset_processer.filter_column(result)
result = dataset_processer.order_by(result, sort, ascending)
result.to_excel(f'{keywords} {start_time_str}.xlsx',
index=False,
encoding='gb18030')
if __name__ == "__main__":
main()