Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 34 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
# Ignore all CSV files
*.csv

# Recommended ignores for Python projects
__pycache__/
*.py[cod]
*.pyo
*.pyd
*.so
*.egg
*.egg-info/
dist/
build/
*.log

# Ignore Jupyter Notebook checkpoints
.ipynb_checkpoints/

# Ignore virtual environments
venv/
env/
ENV/
.venv/
.env/

# Ignore OS generated files
.DS_Store
Thumbs.db

# Ignore IDE/editor folders
.vscode/
.idea/
*.swp
*.swo
Binary file modified fastapi_reco/app/__pycache__/main.cpython-312.pyc
Binary file not shown.
43 changes: 19 additions & 24 deletions fastapi_reco/app/main.py
Original file line number Diff line number Diff line change
@@ -1,30 +1,25 @@
from fastapi import FastAPI, Query
from app.recommender_content import ContentRecommender
from app.recommender_als import ALSRecommender
from app.logger import save_log
from typing import List
from pydantic import BaseModel

app = FastAPI() # ✅ 반드시 필요!
from recommender.als import ALSRecommender
from recommender.content import ContentRecommender
from recommender.hybrid import hybrid_recommend

content_model = ContentRecommender()
als_model = ALSRecommender()
app = FastAPI()

@app.get("/recommend/hybrid")
def hybrid_recommend(user_id: int = Query(...), post_id: int = Query(...), top_k: int = 5):
als_result = als_model.recommend(user_id, top_k)
content_result = content_model.recommend(post_id, top_k)
als_model = ALSRecommender(csv_path="mock_data/user_post.csv")
content_model = ContentRecommender(csv_path="mock_data/post_tags.csv")

result = {
"recommendations": {
"for_you": als_result,
"similar_to_this": content_result
}
}
class RecommendationResult(BaseModel):
post_id: int
score: float
reason: str

save_log({
"user_id": user_id,
"post_id": post_id,
"top_k": top_k,
"result": result
})

return result
@app.get("/recommend", response_model=List[RecommendationResult])
def recommend(user_email: str = Query(...), top_k: int = Query(5)):
try:
results = hybrid_recommend(user_email, als_model, content_model, top_k=top_k)
return results
except Exception as e:
return []
12 changes: 0 additions & 12 deletions fastapi_reco/app/post_tags.csv

This file was deleted.

111 changes: 111 additions & 0 deletions fastapi_reco/app/recommender/als.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder, MinMaxScaler
from scipy.sparse import csr_matrix
from implicit.als import AlternatingLeastSquares
from typing import List, Dict, Any

class ALSRecommender:
def __init__(self, csv_path="app/user_post.csv"):
df = pd.read_csv(csv_path)
df["prefer"] = df["prefer"].fillna(1).clip(lower=1)

self.user_encoder = LabelEncoder()
self.item_encoder = LabelEncoder()
df["user_id"] = self.user_encoder.fit_transform(df["user_email"])
df["item_id"] = self.item_encoder.fit_transform(df["post_id"])

self.df = df
self.user_ids = df["user_id"].unique()
self.scaler = MinMaxScaler()

matrix = csr_matrix((
df["prefer"].astype(np.float32),
(df["item_id"], df["user_id"])
))

self.model = AlternatingLeastSquares(
factors=32, # 잠재 요인 수 증가
regularization=0.1,
iterations=50,
use_gpu=False
)
self.model.fit(matrix)
self.user_item_matrix = matrix.T.tocsr()

def _get_recommendation_reason(self, user_id: int, item_id: int) -> str:
"""추천 이유를 생성합니다."""
user_items = self.user_item_matrix[user_id]
similar_items = self.model.similar_items(item_id, N=3)

if len(similar_items) > 0:
return f"이 게시물은 당신이 관심을 보인 다른 게시물들과 유사합니다."
return "이 게시물은 당신의 관심사와 잘 맞습니다."

def _ensure_diversity(self, recommendations: List[Dict[str, Any]], top_k: int) -> List[Dict[str, Any]]:
"""추천 결과의 다양성을 보장합니다."""
if len(recommendations) <= top_k:
return recommendations

# 점수 기반 정렬
sorted_recs = sorted(recommendations, key=lambda x: x["score"], reverse=True)

# 상위 결과는 유지하고 나머지에서 다양하게 선택
diverse_recs = sorted_recs[:top_k//2]
remaining = sorted_recs[top_k//2:]

# 나머지에서 랜덤하게 선택
if remaining:
diverse_recs.extend(np.random.choice(remaining,
size=min(len(remaining), top_k - len(diverse_recs)),
replace=False))

return diverse_recs

def recommend(self, user_id: int, top_k: int = 5) -> List[Dict[str, Any]]:
if user_id not in self.user_ids:
return []

user_items = self.user_item_matrix[user_id]
num_seen = user_items.getnnz()
num_total = self.user_item_matrix.shape[1]
num_unseen = num_total - num_seen

if num_unseen <= 0:
return []

safe_k = min(top_k * 2, num_unseen) # 더 많은 후보를 생성

try:
item_ids, scores = self.model.recommend(
user_id,
user_items,
N=safe_k,
filter_already_liked_items=True
)

# 점수 정규화
scores = self.scaler.fit_transform(scores.reshape(-1, 1)).flatten()

results = []
for item_id, score in zip(item_ids, scores):
if np.isnan(score) or score < 0:
continue

post_id = self.item_encoder.inverse_transform([item_id])[0]
reason = self._get_recommendation_reason(user_id, item_id)

results.append({
"post_id": int(post_id),
"score": round(float(score), 3),
"reason": reason
})

# 다양성 보장
results = self._ensure_diversity(results, top_k)

return results

except Exception as e:
print(f"⚠️ ALS 추천 오류: {e}")
return []
64 changes: 64 additions & 0 deletions fastapi_reco/app/recommender/content.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from typing import List, Dict, Any
from sklearn.preprocessing import MinMaxScaler

class ContentRecommender:
def __init__(self, csv_path="app/post_tags.csv"):
df = pd.read_csv(csv_path).fillna("")
df["tags"] = df["tags"].apply(lambda x: x.replace(",", " "))
df["full_text"] = df["title"] + " " + df["content"] + " " + df["tags"]

self.df = df
self.post_ids = df["post_id"].values
self.vectorizer = TfidfVectorizer(
max_features=5000,
ngram_range=(1, 2),
min_df=2
)
self.X = self.vectorizer.fit_transform(df["full_text"])
self.similarity_matrix = cosine_similarity(self.X)
self.scaler = MinMaxScaler()

def _get_common_tags(self, post_id: int, similar_post_id: int) -> List[str]:
"""두 게시물 간의 공통 태그를 찾습니다."""
post_tags = set(self.df[self.df["post_id"] == post_id]["tags"].iloc[0].split())
similar_post_tags = set(self.df[self.df["post_id"] == similar_post_id]["tags"].iloc[0].split())
return list(post_tags.intersection(similar_post_tags))

def _get_recommendation_reason(self, post_id: int, similar_post_id: int) -> str:
"""추천 이유를 생성합니다."""
common_tags = self._get_common_tags(post_id, similar_post_id)
if common_tags:
return f"이 게시물은 다음 태그들을 공유합니다: {', '.join(common_tags[:3])}"
return "이 게시물은 비슷한 주제를 다루고 있습니다."

def recommend(self, post_id: int, top_k: int = 3) -> List[Dict[str, Any]]:
if post_id not in self.post_ids:
return []

idx = list(self.post_ids).index(post_id)
sim_scores = list(enumerate(self.similarity_matrix[idx]))
sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
sim_scores = [s for s in sim_scores if s[0] != idx][:top_k * 2] # 더 많은 후보 생성

# 유사도 점수 정규화
scores = np.array([score for _, score in sim_scores])
scores = self.scaler.fit_transform(scores.reshape(-1, 1)).flatten()

results = []
for (i, _), score in zip(sim_scores, scores):
similar_post_id = int(self.df.iloc[i]["post_id"])
reason = self._get_recommendation_reason(post_id, similar_post_id)

results.append({
"post_id": similar_post_id,
"title": self.df.iloc[i]["title"],
"similarity": round(float(score), 3),
"reason": reason
})

# 상위 결과만 반환
return results[:top_k]
27 changes: 27 additions & 0 deletions fastapi_reco/app/recommender/hybrid.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
from recommender.als import ALSRecommender
from recommender.content import ContentRecommender


def hybrid_recommend(user_email, als: ALSRecommender, content: ContentRecommender, top_k=5):
user_id = als.user_encoder.transform([user_email])[0]
als_recs = als.recommend(user_id, top_k=top_k*2)

seen_post_ids = set()
final_recs = []

for rec in als_recs:
post_id = rec["post_id"]
if post_id in seen_post_ids:
continue

content_recs = content.recommend(post_id, top_k=1)
if content_recs:
enriched_reason = content_recs[0]["reason"]
rec["reason"] += f" ({enriched_reason})"

final_recs.append(rec)
seen_post_ids.add(post_id)
if len(final_recs) >= top_k:
break

return final_recs
69 changes: 0 additions & 69 deletions fastapi_reco/app/recommender_als.py

This file was deleted.

Loading