diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 99472ac..f4b0fae 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,6 +1,6 @@ repos: - repo: https://github.com/psf/black - rev: '24.2.0' + rev: '24.4.2' hooks: - id: black - id: black-jupyter @@ -24,13 +24,13 @@ - id: pydocstyle args: ['--convention=google', '--add-ignore=D100,D101,D102,D103,D104,D105,D106,D107'] - repo: https://github.com/pre-commit/pre-commit-hooks - rev: 'v4.5.0' + rev: 'v4.6.0' hooks: - id: check-ast - id: end-of-file-fixer - id: trailing-whitespace - repo: https://github.com/sqlfluff/sqlfluff - rev: '3.0.0a5' + rev: '3.0.5' hooks: - id: sqlfluff-fix - id: sqlfluff-lint diff --git a/Makefile b/Makefile index a874a05..784d87a 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ .EXPORT_ALL_VARIABLES: .PHONY: venv install pre-commit clean -GLOBAL_PYTHON = $(shell python3.9 -c 'import sys; print(sys.executable)') +GLOBAL_PYTHON = $(shell python3.10 -c 'import sys; print(sys.executable)') LOCAL_PYTHON = ./.venv/bin/python LOCAL_PRE_COMMIT = ./.venv/lib/python3.10/site-packages/pre_commit diff --git a/models/__init__.py b/models/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/models/dataset.py b/models/dataset.py new file mode 100644 index 0000000..dd7a22e --- /dev/null +++ b/models/dataset.py @@ -0,0 +1,47 @@ +from typing import Tuple + +import numpy as np +import tensorflow as tf +from sklearn.model_selection import train_test_split + + +def divide_data(data: np.array, test_size) -> Tuple[np.array]: + train, test = train_test_split(data, test_size=test_size, random_state=1900730) + train, val = train_test_split(train, test_size=test_size, random_state=1900730) + return train, val, test + + +def normalize_standarize(data, mean, std): + return (data - mean) / (std + 1e-6) + + +def flatten_images(images): + return images.reshape(images.shape[0], -1) + + +def compute_mean_std(image_data): + + # Compute mean and standard deviation along the examples axis + mean = np.mean(image_data, axis=0) + std = np.std(image_data, axis=0) + + return mean.reshape(1, -1), std.reshape(1, -1) + + +def get_data(test_size: float = 0.15) -> Tuple[np.array]: + # Load the MNIST dataset from TFDS + (x_train, _), (x_test, _) = tf.keras.datasets.mnist.load_data() + # Merge the data for division + data = np.vstack((x_train, x_test)) # (70000,28,28) + + x_train, x_val, x_test = divide_data(data, test_size) + x_train, x_val, x_test = [flatten_images(data) for data in [x_train, x_val, x_test]] + mean, std = compute_mean_std(x_train) + # print(mean,std) + data = [normalize_standarize(data, mean, std) for data in [x_train, x_val, x_test]] + # print(x_train_norm.shape , x_val_norm.shape,x_test_norm.shape) + return data[0], data[1], data[-1] + + +if __name__ == "__main__": + pass diff --git a/models/model.py b/models/model.py new file mode 100644 index 0000000..eb1b69b --- /dev/null +++ b/models/model.py @@ -0,0 +1,73 @@ +from typing import Dict + +from tensorflow.python.keras.layers import Dense, Input +from tensorflow.python.keras.models import Model + + +class AutoEncoder(object): + def __init__(self, config: Dict[str, str]): + self.config = config + self.input_shape = self.config["train"]["input_shape"] + self.Encoder = None + self.Decoder = None + self.AE = None + self.Model_Encoder = None + self.input_layer = Input(shape=(self.input_shape,)) + self.output_layer = Dense(units=self.input_shape) + + def build_model(self): + encoder_layers = self._get_encoder_layers() + + # initialize the Encoder Model + self.Encoder = encoder_layers[0](self.input_layer) + for layer in encoder_layers[1:]: + self.Encoder = layer(self.Encoder) + + # initialize the Decoder Model + decoder_layers = self._get_decoder_layers() + self.Decoder = decoder_layers[0](self.Encoder) + for layer in decoder_layers[1:]: + self.Decoder = layer(self.Decoder) + self.Decoder = self.output_layer(self.Decoder) + + self.Model_Encoder = Model(inputs=self.input_layer, outputs=self.Encoder) + self.AE = Model(inputs=self.input_layer, outputs=self.Decoder) + + def fit( + self, + x_train, + x_val, + ): + + if self.AE is None: + raise ValueError("Build The Model First") + + self.AE.compile( + optimizer=self.config["train"]["optimizer"], + loss=self.config["train"]["loss"], + metrics=self.config["train"]["metrics"], + ) + print(self.AE.summary()) + return self.AE.fit( + x=x_train, + y=x_train, + batch_size=self.config["train"]["batches"], + epochs=self.config["train"]["epochs"], + validation_data=(x_val, x_val), + verbose=True, + use_multiprocessing=True, + ) + + def _get_encoder_layers(self): + layers = self.config["train"]["autoencoder"]["encoder"]["layers_units"] + activation = self.config["train"]["autoencoder"]["encoder"]["activations"] + + return_layers = [Dense(units=unit, activation=activation) for unit in layers] + return return_layers + + def _get_decoder_layers(self): + layers = self.config["train"]["autoencoder"]["decoder"]["layers_units"] + activation = self.config["train"]["autoencoder"]["decoder"]["activations"] + + return_layers = [Dense(units=unit, activation=activation) for unit in layers] + return return_layers diff --git a/models/params.yaml b/models/params.yaml new file mode 100644 index 0000000..1876708 --- /dev/null +++ b/models/params.yaml @@ -0,0 +1,19 @@ +train: + input_shape: 784 + batches: 32 + optimizer: adam + metrics: ['accuracy'] + loss: mean_squared_error + epochs: 50 + autoencoder: + encoder: + layers_units: [512,256,128,64,32] + activations: relu + decoder: + layers_units: [64,128,256,512] + activations: relu + +data: + mean: 0 + std: 255 + random_status: 1900730 diff --git a/models/train.py b/models/train.py new file mode 100644 index 0000000..c94005e --- /dev/null +++ b/models/train.py @@ -0,0 +1,33 @@ +import argparse + +import pandas as pd +import yaml +from dataset import get_data +from model import AutoEncoder +from tensorflow.python.keras.models import save_model + + +def read_params(config_path): + with open(config_path) as yaml_file: + config = yaml.safe_load(yaml_file) + return config + + +if __name__ == "__main__": + args = argparse.ArgumentParser() + args.add_argument("--config", default="params.yaml") + parsed_args = args.parse_args() + print(parsed_args.config) + config = read_params(parsed_args.config) + ( + train, + val, + test, + ) = get_data() + print(config) + + model = AutoEncoder(config) + model.build_model() + history = model.fit(train, val) + pd.DataFrame(history.history).to_csv("./history.csv") + save_model(model.AE, "./model_files/") diff --git a/pyproject.toml b/pyproject.toml index 292a33b..0ec0498 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -5,7 +5,7 @@ description = "All You Need to Know about Computational Intelligence" authors = ["Ahmed Elsayed"] [tool.poetry.dependencies] -python = ">=3.9,<3.10.dev0" +python = ">=3.10,<3.11.dev0" pandas = "^1" tensorflow = "^2.15.0.post1" scipy = "^1.12.0" @@ -16,6 +16,11 @@ pre-commit = "^3.7.0" visdom = "^0.2.4" notebook = "^7.1.3" jupyterlab = "^4.1.6" +pipreqs = "0.4.0" +tensorflow-datasets = "^4.9.4" +update = "^0.0.1" +scikit-learn = "^1.4.2" +dvc = "^3.50.1" [tool.poetry.group.dev.dependencies] black = "*"