Skip to content

Commit 76e2efe

Browse files
authored
Bug fixes and small improvements (#40)
Fixes: - Experiment status codes are now respected. - SHIFT key for removing data points is not sticky anymore. - Big PoC files are OK. - Updated tailwind config due to breaking node image upgrade. - Increase RAM allowance for workers. - Ignore .DS_Store files in experiments folder. Improvements: - Upgrade MongoDB image to v6 - Add log rotating. - Additional logging. - Allow binary logging without external database.
2 parents 9e0e8f7 + 9cc24be commit 76e2efe

File tree

29 files changed

+279
-175
lines changed

29 files changed

+279
-175
lines changed

.devcontainer/devcontainer.json

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,8 @@
2121
"vscode": {
2222
"extensions": [
2323
"charliermarsh.ruff",
24+
"ms-python.debugpy",
25+
"ms-python.python",
2426
"Vue.volar"
2527
]
2628
}

.gitignore

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,8 @@ nginx/ssl/keys/*
77
**/node_modules
88
**/junit.xml
99

10-
# Screenshots
10+
# Screenshots and logs
11+
logs/*.log.*
1112
logs/screenshots/*
1213
!logs/screenshots/.gitkeep
1314

.vscode/launch.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
"configurations": [
77
{
88
"name": "BugHog",
9-
"type": "python",
9+
"type": "debugpy",
1010
"request": "launch",
1111
"program": "/app/bci/app.py",
1212
"purpose": [

Dockerfile

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
FROM node:lts-alpine as ui-build-stage
1+
FROM node:22.12-alpine as ui-build-stage
22
WORKDIR /app
33
COPY /bci/web/vue/package*.json ./
44
RUN npm install
@@ -7,9 +7,16 @@ RUN npm run build
77

88

99
FROM openresty/openresty:1.27.1.1-bullseye AS nginx
10+
RUN apt update -y && \
11+
apt install -y curl && \
12+
rm -rf /var/lib/apt/lists/*
13+
RUN mkdir -p /www/data/js && \
14+
curl https://cdn.bokeh.org/bokeh/release/bokeh-3.6.1.min.js -o /www/data/js/bokeh.min.js && \
15+
curl https://cdn.bokeh.org/bokeh/release/bokeh-api-3.6.1.min.js -o /www/data/js/bokeh-api.min.js
1016
COPY ./nginx/start.sh /usr/local/bin/
1117
COPY ./nginx/config /etc/nginx/config
1218
COPY --from=ui-build-stage /app/dist /www/data
19+
COPY --from=ui-build-stage /app/node_modules/ace-builds/src-min-noconflict /www/data/node_modules/ace-builds/src-min-noconflict
1320
CMD ["start.sh"]
1421

1522

bci/browser/binary/binary.py

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
import logging
44
import os
5+
import time
56
from abc import abstractmethod
67
from typing import Optional
78

@@ -53,7 +54,7 @@ def origin(self) -> str:
5354
raise AttributeError(f"Unknown binary origin for path '{self.get_bin_path()}'")
5455

5556
@staticmethod
56-
def list_downloaded_binaries(bin_folder_path: str) -> list[dict[str, str]]:
57+
def _list_downloaded_binaries(bin_folder_path: str) -> list[dict[str, str]]:
5758
binaries = []
5859
for subfolder_path in os.listdir(os.path.join(bin_folder_path, 'downloaded')):
5960
bin_entry = {}
@@ -63,10 +64,10 @@ def list_downloaded_binaries(bin_folder_path: str) -> list[dict[str, str]]:
6364

6465
@staticmethod
6566
def list_artisanal_binaries(bin_folder_path: str, executable_name: str):
66-
return Binary.get_artisanal_manager(bin_folder_path, executable_name).get_artisanal_binaries_list()
67+
return Binary._get_artisanal_manager(bin_folder_path, executable_name).get_artisanal_binaries_list()
6768

6869
@staticmethod
69-
def get_artisanal_manager(bin_folder_path: str, executable_name: str) -> ArtisanalBuildManager:
70+
def _get_artisanal_manager(bin_folder_path: str, executable_name: str) -> ArtisanalBuildManager:
7071
return ArtisanalBuildManager(bin_folder_path, executable_name)
7172

7273
def fetch_binary(self):
@@ -80,8 +81,10 @@ def fetch_binary(self):
8081
return
8182
# Try to download binary
8283
elif self.is_available_online():
84+
start = time.time()
8385
self.download_binary()
84-
logger.info(f'Binary for {self.state.index} downloaded')
86+
elapsed_time = time.time() - start
87+
logger.info(f'Binary for {self.state.index} downloaded in {elapsed_time:.2f}s')
8588
BinaryCache.store_binary_files(self.get_potential_bin_path(), self.state)
8689
else:
8790
raise BuildNotAvailableError(self.browser_name, self.state)

bci/browser/binary/factory.py

Lines changed: 0 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -1,53 +1,13 @@
1-
from typing import Type
2-
31
from bci.browser.binary.binary import Binary
42
from bci.browser.binary.vendors.chromium import ChromiumBinary
53
from bci.browser.binary.vendors.firefox import FirefoxBinary
64
from bci.version_control.states.state import State
75

86

9-
def list_downloaded_binaries(browser):
10-
return __get_class(browser).list_downloaded_binaries()
11-
12-
13-
def list_artisanal_binaries(browser):
14-
return __get_class(browser).list_artisanal_binaries()
15-
16-
17-
def update_artisanal_binaries(browser):
18-
return __get_class(browser).get_artisanal_manager().update()
19-
20-
21-
def download_online_binary(browser, revision_number):
22-
return __get_class(browser).download_online_binary(revision_number)
23-
24-
25-
def binary_is_available(state: State) -> bool:
26-
return __has_available_binary_online(state) or __has_available_binary_artisanal(state)
27-
28-
29-
def __has_available_binary_online(state: State) -> bool:
30-
return __get_class(state.browser_name).has_available_binary_online()
31-
32-
33-
def __has_available_binary_artisanal(state: State) -> bool:
34-
return __get_class(state.browser_name).get_artisanal_manager().has_artisanal_binary_for(state)
35-
36-
377
def get_binary(state: State) -> Binary:
388
return __get_object(state)
399

4010

41-
def __get_class(browser_name: str) -> Type[Binary]:
42-
match browser_name:
43-
case 'chromium':
44-
return ChromiumBinary
45-
case 'firefox':
46-
return FirefoxBinary
47-
case _:
48-
raise ValueError(f'Unknown browser {browser_name}')
49-
50-
5111
def __get_object(state: State) -> Binary:
5212
match state.browser_name:
5313
case 'chromium':

bci/browser/binary/vendors/chromium.py

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -68,11 +68,23 @@ def download_binary(self):
6868
bin_path = self.get_potential_bin_path()
6969
os.makedirs(os.path.dirname(bin_path), exist_ok=True)
7070
unzipped_folder_path = os.path.join(os.path.dirname(zip_file_path), "chrome-linux")
71+
self.__remove_unnecessary_files(unzipped_folder_path)
7172
util.safe_move_dir(unzipped_folder_path, os.path.dirname(bin_path))
7273
cli.execute_and_return_status("chmod -R a+x %s" % os.path.dirname(bin_path))
7374
# Remove temporary files in /tmp/COMMIT_POS
7475
shutil.rmtree(os.path.dirname(zip_file_path))
7576

77+
def __remove_unnecessary_files(self, binary_folder_path: str) -> None:
78+
"""
79+
Remove binary files that are not necessary for default usage of the browser.
80+
This is to improve performance, especially when caching binary files.
81+
82+
:param binary_folder_path: Path to the folder where the binary files are stored.
83+
"""
84+
locales_folder_path = os.path.join(binary_folder_path, 'locales')
85+
if os.path.isdir(locales_folder_path):
86+
util.remove_all_in_folder(locales_folder_path, except_files=['en-GB.pak', 'en-US.pak'])
87+
7688
def _get_version(self) -> str:
7789
command = "./chrome --version"
7890
if bin_path := self.get_bin_path():
@@ -86,11 +98,11 @@ def _get_version(self) -> str:
8698

8799
@staticmethod
88100
def list_downloaded_binaries() -> list[dict[str, str]]:
89-
return Binary.list_downloaded_binaries(BIN_FOLDER_PATH)
101+
return Binary._list_downloaded_binaries(BIN_FOLDER_PATH)
90102

91103
@staticmethod
92104
def get_artisanal_manager() -> ArtisanalBuildManager:
93-
return Binary.get_artisanal_manager(BIN_FOLDER_PATH, EXECUTABLE_NAME)
105+
return Binary._get_artisanal_manager(BIN_FOLDER_PATH, EXECUTABLE_NAME)
94106

95107
browser_version_to_driver_version = {
96108
'88': "88.0.4324.96",

bci/browser/binary/vendors/firefox.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -95,11 +95,11 @@ def get_driver_version(self, browser_version):
9595

9696
@staticmethod
9797
def list_downloaded_binaries() -> list[dict[str, str]]:
98-
return Binary.list_downloaded_binaries(BIN_FOLDER_PATH)
98+
return Binary._list_downloaded_binaries(BIN_FOLDER_PATH)
9999

100100
@staticmethod
101101
def get_artisanal_manager() -> ArtisanalBuildManager:
102-
return Binary.get_artisanal_manager(BIN_FOLDER_PATH, EXECUTABLE_NAME)
102+
return Binary._get_artisanal_manager(BIN_FOLDER_PATH, EXECUTABLE_NAME)
103103

104104
browser_version_to_driver_version = {
105105
'84': "0.28.0",

bci/configuration.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -70,16 +70,17 @@ def initialize_folders():
7070
def get_database_params() -> DatabaseParameters:
7171
required_database_params = ['BCI_MONGO_HOST', 'BCI_MONGO_USERNAME', 'BCI_MONGO_DATABASE', 'BCI_MONGO_PASSWORD']
7272
missing_database_params = [param for param in required_database_params if os.getenv(param) in ['', None]]
73+
binary_cache_limit = int(os.getenv('BCI_BINARY_CACHE_LIMIT', 0))
7374
if missing_database_params:
7475
logger.info(f'Could not find database parameters {missing_database_params}, using database container...')
75-
return container.run()
76+
return container.run(binary_cache_limit)
7677
else:
7778
database_params = DatabaseParameters(
7879
os.getenv('BCI_MONGO_HOST'),
7980
os.getenv('BCI_MONGO_USERNAME'),
8081
os.getenv('BCI_MONGO_PASSWORD'),
8182
os.getenv('BCI_MONGO_DATABASE'),
82-
int(os.getenv('BCI_BINARY_CACHE_LIMIT', 0)),
83+
binary_cache_limit,
8384
)
8485
logger.info(f"Found database environment variables '{database_params}'")
8586
return database_params
@@ -140,7 +141,7 @@ def configure_loggers():
140141
bci_logger.addHandler(stream_handler)
141142

142143
# Configure file handler
143-
file_handler = logging.handlers.RotatingFileHandler(f'/app/logs/{hostname}.log', mode='a', backupCount=2)
144+
file_handler = logging.handlers.RotatingFileHandler(f'/app/logs/{hostname}.log', mode='a', backupCount=3, maxBytes=8*1024*1024)
144145
file_handler.setLevel(logging.DEBUG)
145146
file_handler.setFormatter(Loggers.formatter)
146147
bci_logger.addHandler(file_handler)
@@ -154,8 +155,7 @@ def configure_loggers():
154155

155156
# Configure memory handler
156157
Loggers.memory_handler.setLevel(logging.INFO)
157-
buffer_formatter = logging.handlers.BufferingHandler(Loggers.formatter)
158-
Loggers.memory_handler.setFormatter(buffer_formatter)
158+
Loggers.memory_handler.setFormatter(Loggers.formatter)
159159
bci_logger.addHandler(Loggers.memory_handler)
160160

161161
# Log uncaught exceptions

bci/database/mongo/binary_cache.py

Lines changed: 55 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,7 @@ def write_from_db(file_path: str, grid_file_id: str) -> None:
6969
return True
7070

7171
@staticmethod
72-
def store_binary_files(binary_executable_path: str, state: State) -> bool:
72+
def store_binary_files(binary_executable_path: str, state: State):
7373
"""
7474
Stores the files in the folder of the given path in the database.
7575
@@ -80,35 +80,60 @@ def store_binary_files(binary_executable_path: str, state: State) -> bool:
8080
if MongoDB().binary_cache_limit <= 0:
8181
return False
8282

83-
while BinaryCache.__count_cached_binaries() >= MongoDB.binary_cache_limit:
83+
while BinaryCache.__count_cached_binaries() >= MongoDB().binary_cache_limit:
8484
if BinaryCache.__count_cached_binaries(state_type='revision') <= 0:
8585
# There are only version binaries in the cache, which will never be removed
8686
return False
8787
BinaryCache.__remove_least_used_revision_binary_files()
8888

89+
logger.debug(f"Caching binary files for {state}...")
8990
fs = MongoDB().gridfs
91+
9092
binary_folder_path = os.path.dirname(binary_executable_path)
93+
last_access_ts = datetime.datetime.now()
94+
def store_file(file_path: str) -> None:
95+
# Max chunk size is 16 MB (meta-data included)
96+
chunk_size = 1024 * 1024 * 15
97+
with open(file_path, 'rb') as file:
98+
file_id = fs.new_file(
99+
file_type='binary',
100+
browser_name=state.browser_name,
101+
state_type=state.type,
102+
state_index=state.index,
103+
relative_file_path=os.path.relpath(file_path, binary_folder_path),
104+
access_count=0,
105+
last_access_ts=last_access_ts,
106+
chunk_size=chunk_size
107+
)
108+
while chunk := file.read(chunk_size):
109+
file_id.write(chunk)
110+
file_id.close()
111+
91112
start_time = time.time()
92-
with concurrent.futures.ThreadPoolExecutor(max_workers=4) as executor:
113+
with concurrent.futures.ThreadPoolExecutor(max_workers=2) as executor:
114+
futures = []
93115
for root, _, files in os.walk(binary_folder_path):
94116
for file in files:
95117
file_path = os.path.join(root, file)
96-
with open(file_path, 'rb') as file:
97-
executor.submit(
98-
fs.put,
99-
file.read(),
100-
file_type='binary',
101-
browser_name=state.browser_name,
102-
state_type=state.type,
103-
state_index=state.index,
104-
relative_file_path=os.path.relpath(file_path, binary_folder_path),
105-
access_count=0,
106-
last_access_ts=datetime.datetime.now(),
107-
)
118+
future = executor.submit(store_file, file_path)
119+
futures.append(future)
120+
logger.debug(f"Number of files to cache: {len(futures)}")
108121
executor.shutdown(wait=True)
109-
elapsed_time = time.time() - start_time
110-
logger.debug(f'Stored binary in {elapsed_time:.2f}s')
111-
return True
122+
123+
futures_with_exception = [future for future in futures if future.exception() is not None]
124+
if futures_with_exception:
125+
logger.error(
126+
(
127+
f"Something went wrong caching binary files for {state}, "
128+
"Removing possibly imcomplete binary files from cache."
129+
),
130+
exc_info=futures_with_exception[0].exception()
131+
)
132+
BinaryCache.__remove_revision_binary_files(state.type, state.index)
133+
logger.debug(f"Removed possibly incomplete cached binary files for {state}.")
134+
else:
135+
elapsed_time = time.time() - start_time
136+
logger.debug(f'Stored binary in {elapsed_time:.2f}s')
112137

113138
@staticmethod
114139
def __count_cached_binaries(state_type: Optional[str] = None) -> int:
@@ -130,7 +155,6 @@ def __remove_least_used_revision_binary_files() -> None:
130155
"""
131156
Removes the least used revision binary files from the database.
132157
"""
133-
fs = MongoDB().gridfs
134158
files_collection = MongoDB().get_collection('fs.files')
135159

136160
grid_cursor = files_collection.find(
@@ -139,6 +163,16 @@ def __remove_least_used_revision_binary_files() -> None:
139163
)
140164
for state_doc in grid_cursor:
141165
state_index = state_doc['state_index']
142-
for grid_doc in files_collection.find({'state_index': state_index, 'state_type': 'revision'}):
143-
fs.delete(grid_doc['_id'])
166+
BinaryCache.__remove_revision_binary_files('revision', state_index)
144167
break
168+
169+
@staticmethod
170+
def __remove_revision_binary_files(state_type: str, state_index: int) -> None:
171+
"""
172+
Removes the binary files associated with the parameters.
173+
"""
174+
fs = MongoDB().gridfs
175+
files_collection = MongoDB().get_collection('fs.files')
176+
177+
for grid_doc in files_collection.find({'state_index': state_index, 'state_type': state_type}):
178+
fs.delete(grid_doc['_id'])

0 commit comments

Comments
 (0)