-
Notifications
You must be signed in to change notification settings - Fork 26
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Solution Christian #2
base: main
Are you sure you want to change the base?
Changes from all commits
ad3493d
862754a
c3b5198
b867dcc
bf35edb
69ed3cd
0e0cc24
2f2e937
bfc5e42
1c21d44
978ba11
1cb793e
17b9ae8
31bfe4a
12761c5
f680e3f
533702e
67193ab
49b4d22
b2ea984
02f4f5f
c052d74
524c7a5
22b902d
182cbba
d752b2a
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
PYTHONPATH=lab |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,3 +1,18 @@ | ||
Pipfile | ||
|
||
.vscode/ | ||
.vscode/ | ||
.ipynb_checkpoints | ||
__pycache__ | ||
.idea/ | ||
*.bundle | ||
*.csv | ||
*.joblib | ||
*.kvmodel | ||
*.npy | ||
*.pt | ||
*.png | ||
*.tgz | ||
.mypy_cache | ||
.ropeproject | ||
.coverage | ||
*.log |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
FROM python:3.9 | ||
|
||
ENV PYTHONPATH=lab | ||
|
||
WORKDIR /code | ||
|
||
COPY ./Pipfile /code/Pipfile | ||
COPY ./Pipfile.lock /code/Pipfile.lock | ||
|
||
RUN python -m pip install --upgrade pip | ||
RUN pip install pipenv | ||
RUN pipenv install --clear --system | ||
|
||
COPY ./lab/api /code/lab/api | ||
COPY ./models /code/models | ||
|
||
CMD ["./lab/api/launch.sh"] |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
[[source]] | ||
url = "https://pypi.python.org/simple" | ||
verify_ssl = true | ||
name = "pypi" | ||
|
||
[packages] | ||
pandas = "==1.4.3" | ||
matplotlib = "==3.5.2" | ||
numpy = "==1.23.1" | ||
scikit-learn = "==1.1.1" | ||
fastapi = "==0.79.0" | ||
uvicorn = "==0.18.2" | ||
|
||
[dev-packages] | ||
ipykernel = "==6.15.1" | ||
seaborn = "==0.11.2" | ||
pytest = "==7.1.2" | ||
pylint = "==2.14.5" | ||
|
||
[requires] | ||
python_version = "3.9" |
Large diffs are not rendered by default.
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,38 @@ | ||
""" | ||
This file contains the definition of the api, as simple as possible, to make an inference from model | ||
""" | ||
|
||
from fastapi import FastAPI | ||
|
||
from api.inference_engine.inference_engine import InferenceEngine | ||
from api.models.inference_models import InferenceRequest, InferenceResponse | ||
from api.settings import AppSettings | ||
|
||
settings = AppSettings() | ||
inference_engine = InferenceEngine(settings=settings) | ||
app = FastAPI() | ||
|
||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Un punto extra hubiese sido meter algo de security, como un API token simple. |
||
@app.get("/") | ||
async def root(): | ||
""" | ||
Root endpoint | ||
|
||
Returns: | ||
dict: default message for api | ||
""" | ||
return {"message": "Welcome to " + settings.app_name} | ||
|
||
|
||
@app.get("/inference/") | ||
async def inference(request: InferenceRequest) -> InferenceResponse: | ||
""" | ||
This endpoint recevie an inference request and return the result of them | ||
|
||
Args: | ||
request (InferenceRequest): Inference received | ||
|
||
Returns: | ||
InferenceResponse: Inference response created | ||
""" | ||
return InferenceResponse(id=request.id, price_category=inference_engine.inference(request)) |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,67 @@ | ||
""" | ||
This file contains all functions related with the inference process | ||
""" | ||
import pickle | ||
|
||
import numpy as np | ||
from fastapi import HTTPException | ||
|
||
from api.models.inference_models import InferenceRequest | ||
from api.settings import AppSettings | ||
|
||
|
||
class InferenceEngine: | ||
""" | ||
This class encapsulate the use of the inferences | ||
""" | ||
|
||
def __init__(self, settings: AppSettings) -> None: | ||
|
||
# Loading model | ||
with open(settings.model_path, "rb") as f: | ||
pickle_info = pickle.load(f) | ||
|
||
self.__clf = pickle_info[0] | ||
self.__settings = settings | ||
|
||
def inference(self, request: InferenceRequest) -> dict: | ||
""" | ||
This method | ||
|
||
Args: | ||
request (InferenceRequest): request inference | ||
|
||
Returns: | ||
dict: result of the inference | ||
""" | ||
# Create input | ||
X = np.array([self.__preprocess_request(request=request)]) | ||
|
||
# Predict | ||
pred = self.__clf.predict(X) | ||
price_category = int(pred[0]) | ||
|
||
return self.__settings.mapping_columns["price_category"][price_category] | ||
|
||
def __preprocess_request(self, request: InferenceRequest) -> list: | ||
""" | ||
Extract and preprocesss, from request, the information relevant for the inference | ||
|
||
Args: | ||
request (InferenceRequest): Request sended to api | ||
|
||
Returns: | ||
list: list of values for inference | ||
""" | ||
try: | ||
neighbourhood = self.__settings.mapping_columns["neighbourhood"][request.neighbourhood] | ||
except KeyError as key_exc: | ||
raise HTTPException(status_code=400, detail="Neighbourhood not valid") | ||
|
||
try: | ||
room_type = self.__settings.mapping_columns["room_type"][request.room_type] | ||
except KeyError as key_exc: | ||
raise HTTPException(status_code=400, detail="Room type not valid") | ||
Comment on lines
+56
to
+64
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Igual hubiese valido la pena escribir una función para esto ya que parece que el error es el mismo. |
||
|
||
room_type = self.__settings.mapping_columns["room_type"][request.room_type] | ||
return [neighbourhood, room_type, request.accommodates, request.bathrooms, request.bedrooms] |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
#! /bin/bash | ||
python -m uvicorn api.app:app --host 0.0.0.0 --port 80 | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Más que un cambio es que puedes lanzar este mismo comando desde Docker, por evitar más dependencias de código. |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,33 @@ | ||
""" | ||
This file contains all modesl used on inference petition | ||
""" | ||
|
||
from pydantic import BaseModel | ||
|
||
|
||
class InferenceRequest(BaseModel): | ||
""" | ||
Request received for inference endpoint | ||
""" | ||
|
||
id: int | ||
accommodates: int | ||
room_type: str | ||
beds: int | ||
bedrooms: int | ||
bathrooms: float | ||
neighbourhood: str | ||
tv: int | ||
elevator: int | ||
internet: int | ||
latitude: float | ||
longitude: float | ||
|
||
|
||
class InferenceResponse(BaseModel): | ||
""" | ||
Response returned for inference endpoint | ||
""" | ||
|
||
id: int | ||
price_category: str |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
""" | ||
This file has the class definition for app settings | ||
""" | ||
from pydantic import BaseSettings | ||
|
||
|
||
class AppSettings(BaseSettings): | ||
app_name: str = "Inference API" | ||
model_path: str = "models/random_forest_classifier_2022-08-04 08:31:07.734769.pkl" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Hubiese sido interesante hacer uso del f-string para que podamos pasar la fecha del modelo como parámetro (en Docker por ejemplo) |
||
mapping_columns = { | ||
"room_type": { | ||
"Shared room": 1, | ||
"Private room": 2, | ||
"Entire home/apt": 3, | ||
"Hotel room": 4, | ||
}, | ||
"neighbourhood": { | ||
"Bronx": 1, | ||
"Queens": 2, | ||
"Staten Island": 3, | ||
"Brooklyn": 4, | ||
"Manhattan": 5, | ||
}, | ||
"price_category": {0: "Low", 1: "Mid", 2: "High", 3: "Lux"}, | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,126 @@ | ||
""" | ||
This file contain the class that encapsulate config for preprocess | ||
""" | ||
|
||
import numpy as np | ||
|
||
|
||
class DataRawColumns: | ||
""" | ||
This class has all names of columns for the raw dataframe | ||
""" | ||
|
||
ID = "id" | ||
BATHROOMS = "bathrooms" | ||
BATHROOMS_TEXT = "bathrooms_text" | ||
NEIGHBOURHOOD_GROUP_CLEANSED = "neighbourhood_group_cleansed" | ||
PROPERTY_TYPE = "property_type" | ||
ROOM_TYPE = "room_type" | ||
LATITUDE = "latitude" | ||
LONGITUDE = "longitude" | ||
ACCOMMODATES = "accommodates" | ||
BEDROOMS = "bedrooms" | ||
BEDS = "beds" | ||
AMENITIES = "amenities" | ||
PRICE = "price" | ||
|
||
SUBSET_TRAINING = [ | ||
ID, | ||
BATHROOMS, | ||
NEIGHBOURHOOD_GROUP_CLEANSED, | ||
PROPERTY_TYPE, | ||
ROOM_TYPE, | ||
LATITUDE, | ||
LONGITUDE, | ||
ACCOMMODATES, | ||
BEDROOMS, | ||
BEDS, | ||
AMENITIES, | ||
PRICE, | ||
] | ||
|
||
|
||
class DataPreprocessColumns: | ||
""" | ||
This class has all names of columns for the preprocess dataframe | ||
""" | ||
|
||
ID = "id" | ||
NEIGHBOURHOOD = "neighbourhood" | ||
PROPERTY_TYPE = "property_type" | ||
ROOM_TYPE = "room_type" | ||
LATITUDE = "latitude" | ||
LONGITUDE = "longitude" | ||
ACCOMMODATES = "accommodates" | ||
BATHROOMS = "bathrooms" | ||
BEDROOMS = "bedrooms" | ||
BEDS = "beds" | ||
PRICE = "price" | ||
CATEGORY = "category" | ||
TV = "TV" | ||
INTERNET = "Internet" | ||
AIR_CONDITIONING = "Air_conditioning" | ||
KITCHEN = "Kitchen" | ||
HEATING = "Heating" | ||
WIFI = "Wifi" | ||
ELEVATOR = "Elevator" | ||
BREAKFAST = "Breakfast" | ||
|
||
|
||
class ConfigPreprocess: | ||
""" | ||
This class encapsulate the config for preprocess | ||
""" | ||
|
||
# Paths | ||
RAW_FILE = "data/raw/listings.csv" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Intenta evitar poner paths relativos (o completos). Puedes usar Pathlib para generar paths que se ajusten a todos los entornos |
||
PREPROCESS_FILE = "data/processed/new_processed_listings.csv" | ||
|
||
# Preprocess config | ||
MIN_PRICE = 10 | ||
BINS_PRICE = [10, 90, 180, 400, np.inf] | ||
LABELS_PRICE = [0, 1, 2, 3] | ||
MAPING_COLUMNS = { | ||
DataPreprocessColumns.ROOM_TYPE: { | ||
"Shared room": 1, | ||
"Private room": 2, | ||
"Entire home/apt": 3, | ||
"Hotel room": 4, | ||
}, | ||
DataPreprocessColumns.NEIGHBOURHOOD: { | ||
"Bronx": 1, | ||
"Queens": 2, | ||
"Staten Island": 3, | ||
"Brooklyn": 4, | ||
"Manhattan": 5, | ||
}, | ||
} | ||
|
||
|
||
class ConfigTrain: | ||
""" | ||
This class encapsulate the config for train process | ||
""" | ||
|
||
# Features info | ||
FEATURE_NAMES = [ | ||
DataPreprocessColumns.NEIGHBOURHOOD, | ||
DataPreprocessColumns.ROOM_TYPE, | ||
DataPreprocessColumns.ACCOMMODATES, | ||
DataPreprocessColumns.BATHROOMS, | ||
DataPreprocessColumns.BEDROOMS, | ||
] | ||
FEATURE_CATEGORY = DataPreprocessColumns.CATEGORY | ||
|
||
# Split parameters | ||
TEST_SIZE = 0.15 | ||
RANDOM_STATE_SPLIT = 1 | ||
|
||
# Train parameters | ||
N_ESTIMATORS = 500 | ||
RANDOM_STATE_TRAIN = 0 | ||
CLASS_WEIGHT = "balanced" | ||
N_JOBS = 4 | ||
|
||
# Paths | ||
FOLDER_PATH = "models/" |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Puedes añadir
ENV PYTHONUNBUFFERED=1
para forzar que el stdout salga al terminal (recomendado cuando corremos Python en Docker)