Skip to content

Commit

Permalink
Update generate_release.py
Browse files Browse the repository at this point in the history
  • Loading branch information
Alyetama committed May 5, 2022
1 parent 2ce9e63 commit 3110dbf
Show file tree
Hide file tree
Showing 4 changed files with 73 additions and 60 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/json2yolov5.yml
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ jobs:
with:
name: artifacts
path: |
tasks.json.tzst
tasks.json
classes.json
dataset-YOLO*.tar
retention-days: 90
108 changes: 63 additions & 45 deletions generate_release.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@
import time
from glob import glob
from pathlib import Path
from platform import platform
from typing import Union, Optional

import wandb
Expand All @@ -22,13 +21,14 @@

class GenerateRelease:
def __init__(self, run_path: str, version: str, repo: Optional[str],
overwrite: bool = False, dataset: str = None) -> \
overwrite: bool = False, dataset_folder: str = None) -> \
None:
self.run_path = run_path
self.version = version
self.repo = repo
self.overwrite = overwrite
self.dataset = dataset
self.dataset_folder = dataset_folder
self.release_folder = f'releases/{self.version}'

@staticmethod
def find_file(run, fname: str) -> \
Expand Down Expand Up @@ -76,7 +76,6 @@ def get_assets(self, run) -> Union[tuple, None]:
files.
"""
logger.debug(run)
config_fname = f'{self.version}/{self.version}-config.json'

for k in [('val', -3), ('train', -3), ('path', -2)]:
relative_path = '/'.join(
Expand All @@ -85,29 +84,32 @@ def get_assets(self, run) -> Union[tuple, None]:

logger.debug(run.config)
print()
with open(config_fname, 'w') as j:
with open(f'{self.release_folder}/{self.version}-config.json',
'w') as j:
json.dump(run.config, j, indent=4)

classes_fname = f'{self.version}/{self.version}-classes.txt'
classes = run.config['data_dict']['names']

with open(classes_fname, 'w') as f:
with open(f'{self.release_folder}/{self.version}-classes.txt',
'w') as f:
f.writelines([f'{x}\n' for x in classes])

Path('releases').mkdir(exist_ok=True)
if self.dataset_folder:
shutil.copy2(f'{self.dataset_folder}/classes.json',
f'{self.release_folder}/{self.version}-classes.json')

with tarfile.open(f'{self.version}.tar.gz', 'w:gz') as tar:
tar.add(self.version, self.version)
with tarfile.open(f'{self.release_folder}/{self.version}-meta.tgz',
'w:gz') as tar:
tar.add(f'{self.release_folder}', arcname=self.version)

if Path(f'releases/{self.version}').exists() and self.overwrite:
shutil.rmtree(f'releases/{self.version}', ignore_errors=True)
shutil.move(self.version, 'releases')
shutil.move(f'{self.version}.tar.gz', 'releases')
for file in glob(f'{self.release_folder}/*'):
if Path(file).suffix != '.tgz':
Path(file).unlink()

files_to_move = [x for x in glob('releases/*') if not Path(x).is_dir()]
Path(f'releases/{self.version}').mkdir(exist_ok=True)
_ = [shutil.move(x, f'releases/{self.version}') for x in files_to_move]
return config_fname, classes_fname
if self.dataset_folder:
shutil.copy2(f'{self.dataset_folder}/tasks.json',
f'{self.release_folder}/{self.version}-tasks.json')
return

def release_notes(self, run, f1_score: float) -> str:
"""Creates a release notes file.
Expand Down Expand Up @@ -145,8 +147,7 @@ def release_notes(self, run, f1_score: float) -> str:
if run.config.get('dataset_name'):
_run.update({'Dataset name': run.config['dataset_name']})

with open(f'releases/{self.version}/{self.version}-notes.md',
'w') as f:
with open(f'{self.release_folder}/{self.version}-notes.md', 'w') as f:

for k, v in _run.items():
if k == 'W&B run URL' or v == '':
Expand Down Expand Up @@ -194,9 +195,16 @@ def release_notes(self, run, f1_score: float) -> str:
</details>''')
f.write(base_ml_framework_section)

f.write('\n<details>\n<summary>Classes</summary>\n\n```YAML'
'\n- ' + '\n- '.join(run.config['data_dict']['names']) +
'\n```\n')
if self.dataset_folder:
with open(f'{self.dataset_folder}/classes.json') as j:
f.write(
'\n<details>\n<summary>Classes</summary>\n\n```JSON\n'
f'{json.dumps(json.load(j), indent=4)}\n```\n')
else:
f.write('\n<details>\n<summary>Classes</summary>\n\n```YAML'
'\n- ' +
'\n- '.join(run.config['data_dict']['names']) +
'\n```\n')

hist_file, hist_url = self.find_file(run, 'hist.jpg')
f.write(f'\n<img src="{hist_url}" alt="classes-hist">'
Expand Down Expand Up @@ -242,7 +250,7 @@ def release_notes(self, run, f1_score: float) -> str:
[f' <td>{round(x, 4)}'
for x in val_results.values()]) + '</table>\n')

with open(f'releases/{self.version}/{self.version}-notes.md') as f:
with open(f'{self.release_folder}/{self.version}-notes.md') as f:
content = f.read()
return content

Expand All @@ -264,39 +272,50 @@ def generate(self) -> None:
api = wandb.Api()
run = api.from_path(self.run_path)

Path(self.version).mkdir(exist_ok=True)
if Path(f'{self.release_folder}').exists() and self.overwrite:
shutil.rmtree(f'{self.release_folder}', ignore_errors=True)

Path(f'{self.release_folder}').mkdir(exist_ok=True, parents=True)

p = run.summary['best/precision']
r = run.summary['best/recall']
f1_score = 2 * ((p * r) / (p + r))
logger.debug(f'{run.name}: {f1_score}')

_ = self.get_assets(run)
self.get_assets(run)
_ = self.release_notes(run=run, f1_score=f1_score)

artifact = api.artifact(f'{Path(self.run_path).parent}/'
f'run_{Path(self.run_path).name}_model:best')
_ = artifact.download('artifacts')
shutil.move(f'artifacts/best.pt', f'releases/{self.version}')
shutil.move(f'artifacts/best.pt',
f'{self.release_folder}/{self.version}-best_weights.pt')
shutil.rmtree('artifacts')

upload_logs(logs_file)
if self.dataset_folder:
dataset_file = glob(f'{self.dataset_folder}/dataset-YOLO*.tar')[0]
shutil.copy2(dataset_file, f'{self.release_folder}')

if self.repo:
files = [
f'releases/{self.version}/*{x}'
for x in ['.json', '.gz', '.txt']
]
print(f'{"-" * 40}\n')
for file in [
f'{self.version}-best_weights.pt',
f'{self.version}-tasks.json',
Path(dataset_file).name
]:
print(f'FILE="{self.release_folder}/{file}"; gpg --pinentry-mode '
f'loopback -c "$FILE" && rm "$FILE"\n')
print(f'{"-" * 40}')

if self.dataset:
files = files + [self.dataset]
upload_logs(logs_file)

if self.repo:
print(f'{"-" * 40}\n')
gh_cli_cmd = f'gh release create {self.version} -d -F ' \
f'"releases/{self.version}/{self.version}-notes.md" --title ' \
f'"{self.release_folder}/{self.version}-notes.md" --title ' \
f'"{self.version}" --repo ' \
f'{self.repo} {" ".join(files)}'
if 'macOS' in platform():
gh_cli_cmd = gh_cli_cmd + ' | xargs open'
f'{self.repo} {self.release_folder}/*.gpg ' \
f'{self.release_folder}/*.tgz'

print(gh_cli_cmd)
return

Expand Down Expand Up @@ -333,15 +352,14 @@ def opts() -> argparse.Namespace:
'--repo',
help='URL to the repository (i.e., [...].git)',
type=str)
parser.add_argument('-d',
'--dataset',
help='Path to the dataset used in the run '
'(.tar.gpg file)',
type=str)
parser.add_argument('--overwrite',
help='Overwrite if the release already exists on '
'the local disk',
action='store_true')
parser.add_argument('-D',
'--dataset-folder',
help='Path to the dataset/artifacts folder',
type=str)

return parser.parse_args()

Expand All @@ -353,5 +371,5 @@ def opts() -> argparse.Namespace:
version=args.version,
repo=args.repo,
overwrite=args.overwrite,
dataset=args.dataset)
dataset_folder=args.dataset_folder)
gr.generate()
11 changes: 6 additions & 5 deletions json2yolov5.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
from model_utils.handlers import catch_keyboard_interrupt
from model_utils.minio_helper import BucketDoesNotExist, MinIO
from model_utils.mongodb_helper import get_tasks_from_mongodb
from model_utils.utils import compress_data, get_labels_count, get_project_ids
from model_utils.utils import tasks_data, get_labels_count, get_project_ids


class JSON2YOLO:
Expand Down Expand Up @@ -318,13 +318,16 @@ def iter_convert_to_yolo(t):
for k, v in d.items():
f.write(f'{k}: {v}\n')

compress_data(f'tasks.json.tzst')
tasks_data(f'tasks.json')

with open('classes.json', 'w') as f:
json.dump(get_labels_count(), f, indent=4)

folder_name = Path(self.output_dir).name
with tarfile.open(f'{folder_name}.tar', 'w') as tar:
ts = datetime.now().strftime('%m-%d-%Y_%H.%M.%S')
dataset_name = f'{folder_name}-{ts}.tar'

with tarfile.open(dataset_name, 'w') as tar:
tar.add(self.output_dir, folder_name)

if self.only_tar_file:
Expand All @@ -347,8 +350,6 @@ def iter_convert_to_yolo(t):
else:
upload_dataset = True
if upload_dataset:
ts = datetime.now().strftime('%m-%d-%Y_%H.%M.%S')
dataset_name = f'{folder_name}-{ts}.tar'
if self.copy_data_from and os.geteuid() != 0:
logger.error(
'Cannot run a local copy. Current user has no root '
Expand Down
12 changes: 3 additions & 9 deletions model_utils/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,12 @@
import json
import os
import shutil
import tempfile
from datetime import datetime
from pathlib import Path

import numpy as np
import ray
import requests
import zstandard
from loguru import logger
from minio.error import S3Error
from requests.structures import CaseInsensitiveDict
Expand Down Expand Up @@ -106,13 +104,9 @@ def iter_db(proj_id, j_min):
return sum(tasks, [])


def compress_data(output_path):
cctx = zstandard.ZstdCompressor(level=22)
with tempfile.TemporaryFile() as f:
f.write(json.dumps(get_data(False)).encode('utf-8'))
f.seek(0)
with open(output_path, 'wb') as fw:
cctx.copy_stream(f, fw)
def tasks_data(output_path):
with open(output_path, 'w') as j:
json.dump(get_data(False), j)
return


Expand Down

0 comments on commit 3110dbf

Please sign in to comment.