-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathscrapping_open_art_ai.py
105 lines (93 loc) · 3.46 KB
/
scrapping_open_art_ai.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
from DataBase.database_actions import Database_Actions
from DataBase.database_models import (
AI_Generated_Images_Platforms_Info,
AI_Generated_Images_Scrapping_Info,
)
import requests
from datetime import datetime
import json
def fetch_api_results(next_id):
try:
print("===========================================")
print(f"Going to fetch result for page {next_id}")
print("===========================================")
with open("Data/current_page_at_open_art_community.json", "w") as f:
file_data = {"current_page": next_id}
json.dump(file_data,f,indent=4)
if next_id is not None:
url = f"https://openart.ai/api/feed/community?cursor={next_id}"
else:
url = f"https://openart.ai/api/feed/community?cursor="
payload = {}
headers = {}
response = requests.request("GET", url, headers=headers, data=payload)
return response.json()
except Exception as ex:
print("An Error Occured while fetching records : ", str(ex))
return None
def extract_formatted_image_name(url):
splitted_url = str(url).split("/")
return str(splitted_url[len(splitted_url) - 1]).replace("webp", "jpg")
def extract_formatted_image_id(image_name):
return str(image_name).split(".")[0]
database_actions = Database_Actions("sqlite:///DataBase/open_art_images.db")
platform_id = database_actions.get(
AI_Generated_Images_Platforms_Info, name="Open Art Community Feed"
)[0].id
print(platform_id)
image_number = 0
latest_entry = database_actions.get_latest(
AI_Generated_Images_Scrapping_Info,
"image_number",
platform_id=platform_id,
)
if latest_entry is not None:
image_number = latest_entry.image_number
api_execution_flag = True
api_next_page_value = None
try:
with open("Data/current_page_at_open_art_community.json", "r") as f:
file_data = json.load(f)
print(file_data)
api_next_page_value = file_data["current_page"]
except Exception as ex:
print("An Error Occured while reading page id : ", str(ex))
pass
total_pages = 0
while api_execution_flag == True:
search_result = fetch_api_results(api_next_page_value)
if search_result is None:
api_execution_flag = False
continue
total_pages += 1
api_next_page_value = search_result["nextCursor"]
images_list = search_result["items"]
for image_info in images_list:
image_number += 1
image_src = image_info["image_url"]
image_name = extract_formatted_image_name(image_src)
image_id = extract_formatted_image_id(image_name)
image_promt = image_info["prompt"]
database_entity = AI_Generated_Images_Scrapping_Info(
date=datetime.now(),
platform_id=platform_id,
page_number=1,
image_number=image_number,
image_id=image_id,
image_name=image_name,
image_src=image_src,
image_promt=image_promt,
image_download_status=False,
)
entry_flag = database_actions.check_if_value_exists(
AI_Generated_Images_Scrapping_Info,
image_id=image_id,
platform_id=platform_id,
)
if entry_flag:
print("========= Image Already Exists ==============")
continue
database_actions.add(database_entity)
print(
f"Image with Id {image_id} successfully added to database from Page {total_pages}."
)