Skip to content

Commit

Permalink
Merge branch 'main' of github.com:dfm/tess-atlas
Browse files Browse the repository at this point in the history
  • Loading branch information
avivajpeyi committed Jul 28, 2023
2 parents 29d8fc7 + 92abefd commit 97597a0
Show file tree
Hide file tree
Showing 18 changed files with 148 additions and 85 deletions.
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
.pytest_cache
tmp
test_jobgen
*.tar.gz
.virtual_documents
.DS_Store
Expand Down Expand Up @@ -28,7 +29,7 @@ test_notebooks/
**/test_notebooks
tests/test_notebooks/
src/tess_atlas/notebook_controllers/templates/*.ipynb

.tess-atlas-cache
/tests/test_jobgen/
case_studies

Expand Down
3 changes: 3 additions & 0 deletions src/tess_atlas/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
# -*- coding: utf-8 -*-
from .utils import set_global_environ_vars

__all__ = []

Expand All @@ -10,3 +11,5 @@
__copyright__ = "Copyright 2020 TESS Atlas developers"
__contributors__ = "https://github.com/dfm/tess-atlas/graphs/contributors"
__website__ = "http://catalog.tess-atlas.cloud.edu.au"

set_global_environ_vars()
28 changes: 28 additions & 0 deletions src/tess_atlas/cli/plot_run_stats_cli.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
import click

from tess_atlas.logger import setup_logger
from tess_atlas.notebook_controllers.controllers.toi_notebook_controller.toi_run_stats_recorder import (
RUN_STATS_FILENAME,
TOIRunStatsRecorder,
)

PROG = "plot_run_stats"


@click.command(
name=PROG,
help="Plot the run stats from a run_stats.csv file",
)
@click.argument(
"filename",
type=click.Path(exists=True),
default=RUN_STATS_FILENAME,
)
def main(filename: str):
"""Plot the run stats from a run_stats.csv file
Args:
filename (str): The filename of the run_stats.csv file
"""
setup_logger()
TOIRunStatsRecorder(filename).plot()
9 changes: 4 additions & 5 deletions src/tess_atlas/cli/run_toi_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import os

from tess_atlas.file_management import TOI_DIR
from tess_atlas.logger import LOGGER_NAME, setup_logger
from tess_atlas.logger import LOGGER_NAME, setup_logger, timestamp
from tess_atlas.notebook_controllers.controllers.toi_notebook_controller import (
TOINotebookController,
)
Expand Down Expand Up @@ -45,9 +45,8 @@ def main():
LOGGER_NAME,
outdir=os.path.join(args.outdir, TOI_DIR.format(toi=args.toi_number)),
)
logger.info(
f"run_toi({args.toi_number}) {'quick' if args.quickrun else ''} {'setup' if args.setup else ''}"
)
stmt = f"run_toi({args.toi_number}) {'quick' if args.quickrun else ''} {'setup' if args.setup else ''}"
logger.info(stmt + f" [{timestamp()}]")
success, runtime = TOINotebookController.run_toi(
toi_number=args.toi_number,
outdir=args.outdir,
Expand All @@ -56,5 +55,5 @@ def main():
)
job_str = "setup" if args.setup else "execution"
logger.info(
f"TOI {args.toi_number} {job_str} complete: {success} ({runtime:.2f}s)"
f"TOI {args.toi_number} {job_str} complete: {success} ({runtime:.2f}s) [{timestamp()}]"
)
29 changes: 27 additions & 2 deletions src/tess_atlas/data/analysis_summary.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import logging
import os
import warnings
from typing import Dict, List, Union

Expand Down Expand Up @@ -36,6 +37,18 @@ def _set_counts(self):
def __repr__(self):
return f"AnalysisSummary(Started[{self.n_analysed}], Pass[{self.n_successful_analyses}] + Failed[{self.n_failed_analyses}] = {self.n_total})"

@classmethod
def load(
cls, notebook_dir: str, n_threads=1, clean=True
) -> "AnalysisSummary":
fname = AnalysisSummary.fname(notebook_dir)
if os.path.exists(fname) and not clean:
analysis_summary = cls.from_csv(fname)
else:
analysis_summary = cls.from_dir(notebook_dir, n_threads=n_threads)
analysis_summary.save(notebook_dir)
return analysis_summary

@classmethod
def from_dir(self, notebook_dir: str, n_threads=1) -> "AnalysisSummary":
"""Load the metadata from the output directory.
Expand All @@ -58,7 +71,7 @@ def from_dir(self, notebook_dir: str, n_threads=1) -> "AnalysisSummary":
return AnalysisSummary(df)

@classmethod
def load_from_csv(self, csv_path: str) -> "AnalysisSummary":
def from_csv(self, csv_path: str) -> "AnalysisSummary":
return AnalysisSummary(pd.read_csv(csv_path))

@property
Expand Down Expand Up @@ -89,10 +102,22 @@ def generate_summary_table(self) -> pd.DataFrame:
print("Num passed:", len(df[df["Status"] == Status.PASS.value]))
return df

def save_to_csv(self, csv_path: str):
def save(self, notebook_dir: str):
csv_path = self.fname(notebook_dir)
os.makedirs(notebook_dir, exist_ok=True)
self._data.to_csv(csv_path, index=False)
return csv_path

@staticmethod
def fname(notebook_dir: str) -> str:
# make sure notebook dir does not have a file extension
my_dir, fname = os.path.splitext(notebook_dir)
if fname:
raise ValueError(
"notebook_dir should not have a file extension: {notebook_dir}"
)
return os.path.join(notebook_dir, "analysis_summary.csv")


def _get_toi_metadict(fn: str) -> Dict[str, Union[str, bool, int, float]]:
return TOINotebookController(fn).get_meta_data()
10 changes: 6 additions & 4 deletions src/tess_atlas/logger.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
import datetime
import logging
import os
import sys
from contextlib import contextmanager
from datetime import datetime
from pathlib import Path
from typing import Optional, Union

Expand Down Expand Up @@ -75,9 +75,7 @@ def setup_logger(

class DeltaTimeFormatter(logging.Formatter):
def format(self, record):
duration = datetime.datetime.utcfromtimestamp(
record.relativeCreated / 1000
)
duration = datetime.utcfromtimestamp(record.relativeCreated / 1000)
record.delta = duration.strftime("%H:%M:%S")
return super().format(record)

Expand All @@ -100,3 +98,7 @@ def all_logging_disabled(highest_level=logging.CRITICAL):
yield
finally:
logging.disable(previous_level)


def timestamp() -> str:
return datetime.now().strftime("%Y-%m-%d %H:%M:%S")
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ class MenuPageController(NotebookController):

def _get_templatized_text(self, **kwargs):
summary_path = kwargs["summary_path"]
summary = AnalysisSummary.load_from_csv(summary_path)
summary = AnalysisSummary.from_csv(summary_path)
n_exofop_toi = len(
EXOFOP_DATA.get_toi_list(remove_toi_without_lk=False)
)
Expand All @@ -40,13 +40,10 @@ def execute(self, **kwargs) -> bool:


def run_menu_page(notebook_dir):
summary = AnalysisSummary.from_dir(notebook_dir)
summary_path = summary.save_to_csv(
os.path.join(notebook_dir, "summary.csv")
)
summary = AnalysisSummary.load(notebook_dir)
menu_notebook_fn = os.path.join(notebook_dir, "menu.ipynb")
processor = MenuPageController(menu_notebook_fn)
processor.generate(summary_path=summary_path)
processor.generate(summary_path=summary.fname(notebook_dir))
processor.execute()
logger.info(
f"Menu page generated [{processor.execution_success}]: {processor.notebook_path}"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,8 @@ class NotebookController:

def __init__(self, notebook_path):
self.notebook_path = notebook_path
self.execution_time = np.nan # TODO: read this from the notebook
self.execution_success = False # TODO: read this from the notebook
self.execution_time = np.nan
self.execution_success = False
os.makedirs(self.notebook_dir, exist_ok=True)

def generate(self, *args, **kwargs):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,8 @@ def run_toi(
execution_successful = toi_nb_processor.notebook_exists
runtime = time.time() - t0
else:
# generate the notebook once again (in case some changes made to the template)
toi_nb_processor.generate(quickrun=quickrun)
execution_successful = toi_nb_processor.execute(quickrun=quickrun)
runtime = toi_nb_processor.execution_time
TOIRunStatsRecorder.save_stats(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,31 +3,26 @@
import os
from datetime import datetime, timedelta

import click
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
from matplotlib.dates import DateFormatter
from matplotlib.patches import Rectangle

from ....data.exofop import EXOFOP_DATA
from ....file_management import get_file_timestamp
from ....logger import LOGGER_NAME
from tess_atlas.data.exofop import EXOFOP_DATA
from tess_atlas.file_management import get_file_timestamp
from tess_atlas.logger import LOGGER_NAME, timestamp

logger = logging.getLogger(LOGGER_NAME)

RUN_STATS_FILENAME = "run_stats.csv"


def timestamp() -> str:
return datetime.now().strftime("%Y-%m-%d %H:%M:%S")


class TOIRunStatsRecorder:
def __init__(self, fname: str):
self.fname = fname
self.outdir = os.path.dirname(fname)
self.fname = os.path.abspath(fname)
self.outdir = os.path.dirname(self.fname)
if not os.path.isfile(self.fname):
self.__init_file()
self.file_timestamp: datetime = get_file_timestamp(
Expand Down Expand Up @@ -60,6 +55,7 @@ def save_stats(
file_last_modified = runstats.file_timestamp
runstats.__append(toi, success, job_type, runtime)
# if the filetimestamp is older than 30 minutes, then make a new plot
# TODO: does this actually work??
if (datetime.now() - file_last_modified) > timedelta(30 * 60):
runstats.plot()

Expand All @@ -71,9 +67,9 @@ def data(self) -> pd.DataFrame:
self._data = self._data.rename(
columns={"timestamp": "end_time", "duration_in_s": "runtime"}
)
self._data["start_time"] = self._data[
"end_time"
] - pd.to_timedelta(self._data["runtime"], unit="s")
t1 = self._data["end_time"]
t0 = t1 - pd.to_timedelta(self._data["runtime"], unit="s")
self._data["start_time"] = t0
return self._data

def plot(self, savefig: bool = True):
Expand Down Expand Up @@ -190,11 +186,3 @@ def _plot_start_end_time(self, ax=None):
ax.set_xlim(d["start_time"].min(), d["end_time"].max())
# use datetime for x-axis
ax.xaxis.set_major_formatter(DateFormatter("%H:%M"))


@click.command()
@click.argument(
"filename", type=click.Path(exists=True), default=RUN_STATS_FILENAME
)
def cli_plot_run_stats(filename: str):
TOIRunStatsRecorder(filename).plot()
2 changes: 1 addition & 1 deletion src/tess_atlas/notebook_controllers/templates/menu_page.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@
from tess_atlas.data.analysis_summary import AnalysisSummary

init_notebook_mode(all_interactive=True)
summary_df = AnalysisSummary.load_from_csv(
summary_df = AnalysisSummary.from_csv(
"{{{SUMMARY_PATH}}}"
).generate_summary_table()

Expand Down
13 changes: 8 additions & 5 deletions src/tess_atlas/notebook_controllers/templates/toi_template.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,11 +23,6 @@

notebook_initalisations()

# + tags=["exe", "hide-cell"]
import theano

print(theano.config)

# + [markdown] tags=["def"]
# # TESS Atlas fit for TOI {{{TOINUMBER}}}
#
Expand Down Expand Up @@ -112,6 +107,14 @@
TOI_NUMBER = {{{TOINUMBER}}}
logger = get_notebook_logger(outdir=f"toi_{TOI_NUMBER}_files")

# + tags=["exe", "hide-cell"]
import theano

from tess_atlas.utils import tabulate_global_environ_vars

logger.info("GLOBAL ENVS:\n" + tabulate_global_environ_vars())
logger.info(f"THEANO Config:\n{theano.config}")

# + [markdown] tags=["def"]
# ## Downloading Data
#
Expand Down
22 changes: 21 additions & 1 deletion src/tess_atlas/slurm_job_generator/file_generators.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,10 +33,28 @@ def make_slurm_file(
array_job: Optional[bool] = False,
command: Optional[str] = None,
email: Optional[str] = "",
tmp_mem: Optional[str] = "",
account: Optional[str] = "",
) -> str:
"""Make a slurm file for submitting a job to the cluster
:param outdir: Base output directory (will generate {outdir}/log_{jobname}, {outdir}/submit))
:param outdir: Base output directory (will generate {outdir}/log_{jobname})
:param module_loads: Module loads to include in the slurm file
:param jobname: Name of the job
:param cpu_per_task: Number of CPUs per task
:param time: Time limit for the job
:param mem: Memory limit for the job
:param submit_dir: Directory to save the slurm file to
:param partition: Partition to submit the job on
:param jobid: Job ID (for array jobs)tail
:param array_args: Array arguments (for array jobs)
:param array_job: Whether the job is an array job
:param command: Command to run
:param email: Email address to send notifications to
:param tmp_mem: Temporary mem (tmp dir accessible via $JOBFS) (eg 1000M, or 1G)
:param account: Account to charge the job to
"""
log_dir = os.path.abspath(mkdir(outdir, f"log_{jobname}"))
common_kwargs = dict(
Expand All @@ -51,6 +69,8 @@ def make_slurm_file(
array_job=str(array_job),
command=command,
email=email,
tmp_mem=tmp_mem,
account=account,
)
array_kwargs = dict(
array_end=None,
Expand Down
1 change: 1 addition & 0 deletions src/tess_atlas/slurm_job_generator/slurm_job_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,7 @@ def setup_jobs(
jobname=f"pe",
mem="1500MB",
command=cmd,
tmp_mem="500M",
)
)

Expand Down
18 changes: 9 additions & 9 deletions src/tess_atlas/slurm_job_generator/templates/slurm_template.sh
Original file line number Diff line number Diff line change
Expand Up @@ -7,20 +7,20 @@
#SBATCH --time={{time}}
#SBATCH --mem={{mem}}
#SBATCH --cpus-per-task={{cpu_per_task}}
{% if partition!="" -%} #SBATCH --partition={{partition}}
{% endif %}
{% if array_job=="True" -%} #SBATCH --array=0-{{array_end}}
{% endif %}
{% if email!="" -%} #SBATCH --mail-user={{email}}
{% endif %}
{% if email!="" -%} #SBATCH --mail-type=ALL
{% endif %}

{% if tmp_mem!="" -%} #SBATCH --tmp={{tmp_mem}}{% endif %}
{% if partition!="" -%} #SBATCH --partition={{partition}}{% endif %}
{% if array_job=="True" -%} #SBATCH --array=0-{{array_end}}{% endif %}
{% if email!="" -%} #SBATCH --mail-user={{email}}{% endif %}
{% if email!="" -%} #SBATCH --mail-type=ALL{% endif %}
{% if account!="" -%} #SBATCH --account={{account}}{% endif %}

module load {{module_loads}}

{{load_env}}
{% if array_job=="True" %}
ARRAY_ARGS=({{array_args}})
{% endif %}
echo "Job tmp path: $JOBFS"
export THEANO_FLAGS="base_compiledir=$JOBFS/.theano_base,compiledir=$JOBFS/.theano_compile"
export IPYTHONDIR=$JOBFS/.ipython
{{command}}
Loading

0 comments on commit 97597a0

Please sign in to comment.