Skip to content

Commit

Permalink
🎨🔥 format, delete old code, isort imports
Browse files Browse the repository at this point in the history
- tested over the last few weeks on local branch
  • Loading branch information
Henry committed Apr 24, 2024
1 parent 80a62d7 commit ad5e4a1
Show file tree
Hide file tree
Showing 15 changed files with 82 additions and 261 deletions.
3 changes: 1 addition & 2 deletions vaep/analyzers/__init__.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
from types import SimpleNamespace

from . import diff_analysis
from . import compare_predictions
from . import compare_predictions, diff_analysis

__all__ = ['diff_analysis', 'compare_predictions', 'Analysis']

Expand Down
7 changes: 3 additions & 4 deletions vaep/analyzers/analyzers.py
Original file line number Diff line number Diff line change
Expand Up @@ -207,7 +207,7 @@ def to_wide_format(
self.is_wide_format = True
return self.df
self._df_wide = df_wide
print(f"Set attribute: df_wide")
print("Set attribute: df_wide")
return df_wide

def describe_peptides(self, sample_n: int = None):
Expand Down Expand Up @@ -264,7 +264,6 @@ def plot_pca(self,):
raise AttributeError('No metadata available, please set "df_meta" first.')

PCs = self.get_PCA()
cols = list(PCs.columns)

fig, axes = plt.subplots(nrows=3, ncols=1, figsize=(
15, 20), constrained_layout=True)
Expand Down Expand Up @@ -332,7 +331,7 @@ def get_dectection_limit(self):

def __repr__(self):
keys = sorted(self.__dict__)
items = ("{}".format(k, self.__dict__[k]) for k in keys)
items = ("{}".format(k) for k in keys)
return "{} with attributes: {}".format(type(self).__name__, ", ".join(items))

# def __dir__(self):
Expand Down Expand Up @@ -438,7 +437,7 @@ def plot_date_map(df, ax,
ax.set_ylabel(cols[1])
path_collection = scatter_plot_w_dates(
ax, df, dates=dates, errors='raise')
cbar = add_date_colorbar(path_collection, ax=ax)
_ = add_date_colorbar(path_collection, ax=ax)


def plot_scatter(df, ax,
Expand Down
7 changes: 3 additions & 4 deletions vaep/analyzers/compare_predictions.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
from __future__ import annotations

from pathlib import Path
from typing import List

import pandas as pd
from typing import List


def load_predictions(pred_files: List, shared_columns=['observed']):
Expand Down Expand Up @@ -42,7 +42,7 @@ def load_split_prediction_by_modelkey(experiment_folder: Path,
allow_missing : bool, optional
Ignore missing pred files of requested model, default False
shared_columns : List, optional
List of columns that are shared between all models, by default ['observed']
List of columns that are shared between all models, by default None
Returns
-------
Expand All @@ -60,8 +60,7 @@ def load_split_prediction_by_modelkey(experiment_folder: Path,
else:
raise FileNotFoundError(f'{file} does not exist')
if to_remove:
for file in to_remove:
pred_files.remove(to_remove)
pred_files.remove(to_remove)
return load_predictions(pred_files, shared_columns=shared_columns)


Expand Down
1 change: 0 additions & 1 deletion vaep/databases/diseases.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@

def get_disease_association(doid: int, limit: int = 1000):
params = {'type1': -26,
'type2': 'value2',
'id1': f'DOID:{doid}',
'type2': 9606,
'limit': limit,
Expand Down
11 changes: 5 additions & 6 deletions vaep/io/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import pickle
from collections import namedtuple
from pathlib import Path, PurePath, PurePosixPath
from typing import List, Tuple, Union
from typing import Tuple, Union

import numpy as np
import pandas as pd
Expand Down Expand Up @@ -54,8 +54,7 @@ def search_subfolders(path='.', depth: int = 1, exclude_root: bool = False):

def get_subfolders(path):
return [x for x in path.iterdir()
if x.is_dir() and
not any(x.match(excl) for excl in EXCLUDED)
if x.is_dir() and not any(x.match(excl) for excl in EXCLUDED)
]

directories_previous = directories.copy()
Expand All @@ -81,7 +80,7 @@ def resolve_path(path: Union[str, Path], to: Union[str, Path] = '.') -> Path:
return Path('/'.join(ret))


def get_fname_from_keys(keys, folder=Path('.'), file_ext='.pkl', remove_duplicates=True):
def get_fname_from_keys(keys, folder='.', file_ext='.pkl', remove_duplicates=True):
if remove_duplicates:
# https://stackoverflow.com/a/53657523/9684872
keys = list(dict.fromkeys(keys))
Expand Down Expand Up @@ -150,8 +149,8 @@ def load_json(fname: Union[str, Path]) -> dict:


def parse_dict(input_dict: dict,
types: List[Tuple] = [(PurePath, lambda p: str(PurePosixPath(p))),
(np.ndarray, lambda a: a.to_list())]):
types: Tuple[Tuple] = ((PurePath, lambda p: str(PurePosixPath(p))),
(np.ndarray, lambda a: a.to_list()))):
"""Transform a set of items (instances) to their string representation"""
d = dict()
for k, v in input_dict.items():
Expand Down
8 changes: 4 additions & 4 deletions vaep/io/datasplits.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,15 @@
from __future__ import annotations
from dataclasses import dataclass, field

import logging
from dataclasses import dataclass, field
from functools import partial
from pathlib import Path
from typing import Protocol
from typing import Union

import pandas as pd
from pandas.core.algorithms import isin

from vaep.io.format import class_full_module, classname
from vaep.pandas import interpolate
from vaep.io.format import classname, class_full_module

logger = logging.getLogger(__name__)

Expand Down
103 changes: 1 addition & 102 deletions vaep/model.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,9 @@
import logging

import numpy as np
import pandas as pd

import torch
import torch.utils.data
from torch.utils.data import Dataset
from torch import nn
from torch.nn import functional as F

import fastai.collab as _fastai

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -61,99 +56,3 @@ def get_latent_space(model_method_call: callable,
columns=[f'latent dimension {i+1}'
for i in range(M)])
return latent_space


# # Defining the model manuelly

# import torch.nn as nn
# d = 3

# n_features= 10

# class VAE(nn.Module):
# def __init__(self, d_input=n_features, d=d):
# super().__init__()

# self.d_input = d_input
# self.d_hidden = d

# self.encoder = nn.Sequential(
# nn.Linear(d_input, d ** 2),
# nn.ReLU(),
# nn.Linear(d ** 2, d * 2)
# )

# self.decoder = nn.Sequential(
# nn.Linear(d, d ** 2),
# nn.ReLU(),
# nn.Linear(d ** 2, self.d_input),
# nn.Sigmoid(),
# )

# def reparameterise(self, mu, logvar):
# if self.training:
# std = logvar.mul(0.5).exp_()
# eps = std.data.new(std.size()).normal_()
# return eps.mul(std).add_(mu)
# else:
# return mu

# def forward(self, x):
# mu_logvar = self.encoder(x.view(-1, self.d_input)).view(-1, 2, d)
# mu = mu_logvar[:, 0, :]
# logvar = mu_logvar[:, 1, :]
# z = self.reparameterise(mu, logvar)
# return self.decoder(z), mu, logvar

# model = VAE().double().to(device)
# model

# # Training and testing the VAE

# def loss_function(recon_batch, batch, mu, logvar, beta=1):
# BCE = nn.functional.binary_cross_entropy(
# recon_batch, batch, reduction='sum'
# )
# KLD = 0.5 * torch.sum(logvar.exp() - logvar - 1 + mu.pow(2))

# return BCE + beta * KLD

# epochs = 10
# codes = dict(μ=list(), logσ2=list())
# for epoch in range(0, epochs + 1):
# # Training
# if epoch > 0: # test untrained net first
# model.train()
# train_loss = 0
# for x in dl_train:
# x = x.to(device)
# # ===================forward=====================
# x_hat, mu, logvar = model(x)
# loss = loss_function(x_hat, x, mu, logvar)
# train_loss += loss.item()
# # ===================backward====================
# optimizer.zero_grad()
# loss.backward()
# optimizer.step()
# # ===================log========================
# print(f'====> Epoch: {epoch} Average loss: {train_loss / len(dl_train.dataset):.4f}')

# # Testing

# means, logvars = list(), list()
# with torch.no_grad():
# model.eval()
# test_loss = 0
# for x in dl_valid:
# x = x.to(device)
# # ===================forward=====================
# x_hat, mu, logvar = model(x)
# test_loss += loss_function(x_hat, x, mu, logvar).item()
# # =====================log=======================
# means.append(mu.detach())
# logvars.append(logvar.detach())
# # ===================log========================
# codes['μ'].append(torch.cat(means))
# codes['logσ2'].append(torch.cat(logvars))
# test_loss /= len(dl_valid.dataset)
# print(f'====> Test set loss: {test_loss:.4f}')
38 changes: 20 additions & 18 deletions vaep/models/__init__.py
Original file line number Diff line number Diff line change
@@ -1,33 +1,33 @@
from functools import reduce
import json
import logging
from operator import mul
from pathlib import Path
import pickle
import pprint
from typing import Tuple, List, Callable, Union
import json
from functools import reduce
from operator import mul
from pathlib import Path
from typing import Callable, List, Tuple

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import sklearn.metrics as sklm
import torch
from fastcore.foundation import L
from fastai import learner
import sklearn.metrics as sklm

from . import ae
from . import analysis
from . import collab
from . import vae
from fastcore.foundation import L

import vaep

from . import ae, analysis, collab, vae

logger = logging.getLogger(__name__)

NUMPY_ONE = np.int64(1)



def plot_loss(recorder: learner.Recorder,
norm_train: np.int64 = np.int64(1),
norm_val: np.int64 = np.int64(1),
norm_train: np.int64 = NUMPY_ONE,
norm_val: np.int64 = NUMPY_ONE,
skip_start: int = 5,
with_valid: bool = True,
ax: plt.Axes = None) -> plt.Axes:
Expand Down Expand Up @@ -66,11 +66,13 @@ def plot_loss(recorder: learner.Recorder,
return ax


NORM_ONES = np.array([1, 1], dtype='int')


def plot_training_losses(learner: learner.Learner,
name: str,
ax=None,
save_recorder: bool = True,
norm_factors=np.array([1, 1], dtype='int'),
norm_factors=NORM_ONES,
folder='figures',
figsize=(15, 8)):
if ax is None:
Expand Down Expand Up @@ -111,7 +113,7 @@ def __init__(self, recorder, name):
self.iters = recorder.iters
self.name = name

def save(self, folder=Path('.')):
def save(self, folder='.'):
with open(Path(folder) / self.filename_tmp.format(self.name), 'wb') as f:
pickle.dump(self, f)

Expand Down Expand Up @@ -310,7 +312,7 @@ def __repr__(self):


def get_df_from_nested_dict(nested_dict,
column_levels=['data_split', 'model', 'metric_name'],
column_levels=('data_split', 'model', 'metric_name'),
row_name='subset'):
metrics = {}
for k, run_metrics in nested_dict.items():
Expand Down
24 changes: 13 additions & 11 deletions vaep/models/collab.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,16 +3,16 @@
from typing import Tuple

import pandas as pd

from fastai.tabular.all import *
from fastai.collab import *
# import explicit objects for functional annotations
from fastai.collab import (CollabDataLoaders, IndexSplitter, TabularCollab, Categorify, TransformBlock)
from fastai.collab import *
from fastai.collab import (Categorify, CollabDataLoaders, IndexSplitter,
TabularCollab, TransformBlock)
from fastai.tabular.all import *

from . import analysis
import vaep.io.datasplits
import vaep.io.dataloaders
import vaep.io.datasplits

from . import analysis

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -45,11 +45,11 @@ class CollabAnalysis(analysis.ModelAnalysis):

def __init__(self,
datasplits: vaep.io.datasplits.DataSplits,
sample_column='Sample ID',
item_column='peptide',
target_column='intensity',
model_kwargs=dict(),
batch_size=64):
sample_column: str = 'Sample ID',
item_column: str = 'peptide',
target_column: str = 'intensity',
model_kwargs: dict = None,
batch_size: int = 64):
if datasplits.val_y is not None:
self.X, self.frac = combine_data(datasplits.train_X,
datasplits.val_y)
Expand Down Expand Up @@ -81,6 +81,8 @@ def __init__(self,
splits=splits)
self.dls = to.dataloaders(path='.', bs=self.batch_size)
self.params = {}
if model_kwargs is None:
model_kwargs = {}
self.model_kwargs = model_kwargs
self.params['model_kwargs'] = self.model_kwargs

Expand Down
Loading

0 comments on commit ad5e4a1

Please sign in to comment.