-
Notifications
You must be signed in to change notification settings - Fork 0
/
pyflocolor_functions.py
193 lines (159 loc) · 7.79 KB
/
pyflocolor_functions.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
from bs4 import BeautifulSoup
# imports the URL library, which helps download images from websites
import urllib.request
# imports the pandas library, which stores data into spreadsheet-like objects
import pandas as pd
# imports the os library, which is used to work with directories
import os
import pyflocolor_utils as pfc_utils
def setup_directories():
directories = ['AppData/Images', 'AppData/Occurrences', 'AppData/Values', 'AppData/Output', 'AppData/Summary']
for directory in directories:
os.makedirs(directory, exist_ok=True)
def download_images(occurrence_lists, image_num=100, re_download=False):
if not occurrence_lists:
return None
else:
file_num = 0
for occurrences in occurrence_lists:
occurrence_df = pd.read_csv(rf"AppData/Occurrences/{occurrences}")
species_name = os.path.splitext(occurrences)[0]
os.makedirs(r"AppData/Images/" + species_name, exist_ok=True)
dest = rf"AppData/Images/{species_name}/"
# This for loop iterates through every image url in the dataframe and downloads it locally to the destination
if len(occurrence_df.index) >= image_num:
length = image_num
else:
length = len(occurrence_df.index)
for i in range(0, length):
# The "try" tells the program to attempt the below code. If it fails for whatever reason, it moves to the
# "except"
if os.path.exists(dest + str(occurrence_df['id'][i]) + '.jpg') and not re_download:
continue
try:
# A single line to invisibly visit the image url and download it to the preselected destination
# Note 1: that it is assumed that the dataframe as a column labeled "image_url" and "id"
# Note 2: Images are saved under their id as a jpg
urllib.request.urlretrieve(occurrence_df['image_url'][i], dest + str(occurrence_df['id'][i]) + '.jpg')
# If the code fails, "continue" is called so that the program continues to run. This means if there is an error,
# (e.g. url is no longer available, incorrect url, bad internet connection, etc.) the loop moves to the next
# item and the image is not downloaded
except:
continue
# This is a really rough progress bar. Essentially, whenever an image downloads, a line will print telling how
# many images have been downloaded so far out of the total. For example, if 20 images download out of 40 total,
# the line will read "20/40"
print("File " + str(file_num+1) + "/" + str(len(occurrence_lists)) + ": image " + str(i + 1) + "/" + str(length))
file_num += 1
# takes a path and number of clusters k and creates an html file with the summary statistics and sample color
# Note that a html file is the basic bare-bones component to a static website. This provides a useful frame work
# for displaying data and information in an organized way. If running this on Jupyter Notebooks, the web browswer
# will be open anyway and will open a new tab to render the html file
def get_summary_visual(path, k):
# Get the photo data
summary = pfc_utils.image_summary_visualizer(path, k)
path = path.replace("\\", "/")
# write the start of the html file
start = """
<!DOCTYPE html>
<html>
<body>
"""
# start the body of the html as a blank
body = f"""
<div><b>Instructions</b>: <br/>
Please review <b>all groups</b> before selecting the best ones! <br/>
Select the groups which most fully represent the <b>flower petals</b> without background imagery. <br/>
Enter the best groups as a <b>comma separated list</b>, e.g.: 1,5,9</div> <br/>
<div><b>Alternative commands</b>: <br/>
<b>X</b>: If the original image has <b>no flowers</b>, <b>has over- or under-saturated</b> flowers, or has <b>very
small flowers</b>, enter "X". <br/>
<b>+</b>: If the flowers are <b>not separating from the background</b> into a group, increase the group number by
entering "+". <br/>
<b>-</b>: If the flowers are <b>separating into too many groups</b>, try to decrease the group number by entering
"-", or enter all relevant groups. <br/>
<b>E</b>: If you're seeing the <b>same images repeatedly</b> and believe this species is complete, enter "E"
to move on.</div> <br/>
<div><b>Original Image</b></div>
<img src = "file:///{path}">
<br>
"""
# For each entry in the summary (ie, that colors dictionary from the image_summary() function),
# add the details to the html file
for key in summary:
# The hsv values stored in a nice variable
hsv_cluster = summary[key]
# Convert the hsv values into RGB values
rgb_cluster = pfc_utils.hsv_to_rgb(hsv_cluster)
# Concat the body of the html and use f-strings to "fill in blanks" for key summary statistics
body = body + rf"""
<p>{key}</P>
<p>Average H: {hsv_cluster[0]}</p>
<p>Average S: {hsv_cluster[1]}</P>
<p>Average V: {hsv_cluster[2]}</p>
<div style ="background-color:rgb({rgb_cluster[0]},{rgb_cluster[1]},{rgb_cluster[2]});height:
50px;width: 50px;" ></div>
<img src = "file:///image_cluster_{key[8:]}.jpg">
<br>
"""
# Write a closing
end = """
</body>
</html>
"""
# Right now, all the components to the HTML file are in separate varaibles
# Concat all the elements together into a single, cohesive html document
html = start + body + end
# write the html locally
with open("image_summary.html", 'w') as file:
file.write(html)
# Get the working directory, so the user doesn't have to manually configure the path to the file
cwd = os.getcwd()
# Get the absolute path
path = cwd + "\\" + "image_summary.html"
return path
def get_cluster_info(cluster_id, k):
output = [None for _ in range(7)]
with open('image_summary.html', 'r') as file:
html_content = file.read()
soup = BeautifulSoup(html_content, 'html.parser')
cluster_section = soup.find_all('p', string=lambda text: f'cluster {cluster_id}' in text.lower())
if cluster_section:
siblings = cluster_section[0].find_next_siblings('p')
average_h = average_s = average_v = "N/A" # Default values
for sibling in siblings:
text = sibling.get_text()
if text.lower().startswith('cluster'):
break
if text.startswith('Average H:'):
average_h = float(text.split(': ')[1])
if not output[0]:
output[0] = average_h
else:
output[0] = min(output[0], average_h)
if not output[3]:
output[3] = average_h
else:
output[3] = max(output[3], average_h)
elif text.startswith('Average S:'):
average_s = float(text.split(': ')[1])
if not output[1]:
output[1] = average_s
else:
output[1] = min(output[1], average_s)
if not output[4]:
output[4] = average_s
else:
output[4] = max(output[4], average_s)
elif text.startswith('Average V:'):
average_v = float(text.split(': ')[1])
if not output[2]:
output[2] = average_v
else:
output[2] = min(output[2], average_v)
if not output[5]:
output[5] = average_v
else:
output[5] = max(output[5], average_v)
output[6] = k
return output