-
Notifications
You must be signed in to change notification settings - Fork 0
/
get_card_info_manually.py
60 lines (46 loc) · 1.85 KB
/
get_card_info_manually.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
"""
Scrape HearthstoneTopDecks website for failed cards.
Improvement: Automate this process with If-Else condition.
If the card has characteristics of minion => minion. Etc.
"""
# IMPORTING PACKAGES
# -------------------------------------- #
import pandas as pd
import os
from time import time
from utils import scraping as scr
# MAIN METHOD
# -------------------------------------- #
if __name__ == "__main__":
# Keep track of runtime
runtime_start = time()
print("\nCommencing card scraper...")
# Manually constructed dict of URLs and card types
# -------------------------------------- #
failed_cards_dict = {
"https://www.hearthstonetopdecks.com/cards/siegebreaker/": ["Minion", 5, 8],
"https://www.hearthstonetopdecks.com/cards/subject-9/": ["Minion", 4, 4],
"https://www.hearthstonetopdecks.com/cards/breath-of-the-infinite/": ["Spell", 0, 0]
}
# Initialize output
# -------------------------------------- #
failed_card_list = []
# Scrape main page for URL list to parse
# -------------------------------------- #
print("\nExtracting failed card information...")
for url in failed_cards_dict.keys():
card = scr.scrape_card_manually(
url,
card_type=failed_cards_dict[url][0],
attack=failed_cards_dict[url][1],
health=failed_cards_dict[url][2])
failed_card_list.append(card)
df = pd.DataFrame.from_records(failed_card_list)
# Serialize and save the list for later use
print("\nSaving failed cards in CSV...")
if not os.path.exists("data"):
os.makedirs("data")
df.to_csv("data/hstd_failed_cards.csv", index=False)
# Keeping track of runtime.
runtime_end = time()
print(f"\nFinished in {round(runtime_end-runtime_start,2):,}s...")