From df60a62820e384740f7a10a3e02ec4fd29d10dbe Mon Sep 17 00:00:00 2001 From: pingpingy1 Date: Wed, 22 Nov 2023 21:05:29 +0900 Subject: [PATCH 1/2] =?UTF-8?q?[feat]=20=EC=97=B0=EB=A0=B9=20=ED=85=9C?= =?UTF-8?q?=ED=94=8C=EB=A6=BF=20=EB=8D=B0=EC=9D=B4=ED=84=B0=20=EC=88=98?= =?UTF-8?q?=EC=A0=95?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- model/ScrapResult.py | 42 ++++++++++++++++++++++++++++++++- routers/scrapResult.py | 53 +++++++++++++++++++++++++++++++++++++++++- 2 files changed, 93 insertions(+), 2 deletions(-) diff --git a/model/ScrapResult.py b/model/ScrapResult.py index ddd3141..aac77bc 100644 --- a/model/ScrapResult.py +++ b/model/ScrapResult.py @@ -22,7 +22,47 @@ class GenderTemplateData(BaseModel): class AgeTemplateData(BaseModel): - ageDiversityIndex: float + class AgeRankingParagraphData(BaseModel): + class AgeRankingAllIndices(BaseModel): + metroId: int + rank: int + ageDiversityIndex: float + + ageDiversityIndex: float + allIndices: list[AgeRankingAllIndices] + + class AgeIndexHistoryParagraphData(BaseModel): + class AgeIndexHistoryIndexData(BaseModel): + year: int + unit: int + candidateCount: int + candidateDiversityIndex: float + candidateDiversityRank: int + electedDiversityIndex: float + electedDiversityRank: int + + mostRecentYear: int + history: list[AgeIndexHistoryIndexData] + + class AgeHistogramParagraphData(BaseModel): + class AgeHistogramAreaData(BaseModel): + localId: int + firstQuintile: int + lastQuintile: int + + year: int + candidateCount: int + electedCount: int + firstQuintile: int + lastQuintile: int + divArea: AgeHistogramAreaData + uniArea: AgeHistogramAreaData + + metroId: int + localId: int + rankingParagraph: AgeRankingParagraphData + indexHistoryParagraph: AgeIndexHistoryParagraphData + ageHistogramParagraph: AgeHistogramParagraphData class PartyTemplateData(BaseModel): diff --git a/routers/scrapResult.py b/routers/scrapResult.py index d9b7189..bc3e3b1 100644 --- a/routers/scrapResult.py +++ b/routers/scrapResult.py @@ -41,7 +41,58 @@ async def getLocalTemplateData( age_list = [councilor["age"] async for councilor in councilors] age_diversity_index = diversity.gini_simpson(age_list, stair=AGE_STAIR) return ScrapResult.AgeTemplateData.model_validate( - {"ageDiversityIndex": age_diversity_index} + { + "metroId": metroId, + "localId": localId, + "rankingParagraph": { + "ageDiversityIndex": age_diversity_index, + "allIndices": [ + {"metroId": 3, "rank": 2, "ageDiversityIndex": 0.9}, + {"metroId": 14, "rank": 7, "ageDiversityIndex": 0.4}, + {"metroId": 15, "rank": 18, "ageDiversityIndex": 0.2}, + ], + }, + "indexHistoryParagraph": { + "mostRecentYear": 2022, + "history": [ + { + "year": 2022, + "unit": 8, + "candidateCount": 80, + "candidateDiversityIndex": 0.11, + "candidateDiversityRank": 33, + "electedDiversityIndex": 0.42, + "electedDiversityRank": 12, + }, + { + "year": 2018, + "unit": 7, + "candidateCount": 70, + "candidateDiversityIndex": 0.73, + "candidateDiversityRank": 3, + "electedDiversityIndex": 0.85, + "electedDiversityRank": 2, + }, + ], + }, + "ageHistogramParagraph": { + "year": 2022, + "candidateCount": 80, + "electedCount": 16, + "firstQuintile": 66, + "lastQuintile": 29, + "divArea": { + "localId": 172, + "firstQuintile": 43, + "lastQuintile": 21, + }, + "uniArea": { + "localId": 63, + "firstQuintile": 84, + "lastQuintile": 56, + }, + }, + } ) case ScrapResult.FactorType.party: From a6a4123db3b038d9d1365d991664405d8dc0f76c Mon Sep 17 00:00:00 2001 From: pingpingy1 Date: Thu, 23 Nov 2023 23:55:23 +0900 Subject: [PATCH 2/2] =?UTF-8?q?[feat]=20=EC=97=B0=EB=A0=B9=20=ED=85=9C?= =?UTF-8?q?=ED=94=8C=EB=A6=BF=20=EB=8D=B0=EC=9D=B4=ED=84=B0=20=EC=8B=A4?= =?UTF-8?q?=EC=A0=9C=20=EB=8D=B0=EC=9D=B4=ED=84=B0=20=EB=B0=98=ED=99=98?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- model/MongoDB.py | 4 +- model/ScrapResult.py | 2 +- routers/ageHist.py | 55 +++++------- routers/scrapResult.py | 191 +++++++++++++++++++++++++++++------------ utils/diversity.py | 2 +- 5 files changed, 162 insertions(+), 92 deletions(-) diff --git a/model/MongoDB.py b/model/MongoDB.py index 7f79cc7..afc663e 100644 --- a/model/MongoDB.py +++ b/model/MongoDB.py @@ -10,13 +10,13 @@ def __init__(self): self.client = None self.council_db = None self.district_db = None - self.age_hist_db = None + self.stats_db = None def connect(self): self.client = AsyncIOMotorClient(os.getenv("MONGO_CONNECTION_URI")) self.council_db = AsyncIOMotorDatabase(self.client, "council") self.district_db = AsyncIOMotorDatabase(self.client, "district") - self.age_hist_db = AsyncIOMotorDatabase(self.client, "age_hist") + self.stats_db = AsyncIOMotorDatabase(self.client, "stats") def close(self): self.client.close() diff --git a/model/ScrapResult.py b/model/ScrapResult.py index aac77bc..d7d4dd4 100644 --- a/model/ScrapResult.py +++ b/model/ScrapResult.py @@ -24,7 +24,7 @@ class GenderTemplateData(BaseModel): class AgeTemplateData(BaseModel): class AgeRankingParagraphData(BaseModel): class AgeRankingAllIndices(BaseModel): - metroId: int + localId: int rank: int ageDiversityIndex: float diff --git a/routers/ageHist.py b/routers/ageHist.py index de38e7d..d10dfba 100644 --- a/routers/ageHist.py +++ b/routers/ageHist.py @@ -24,23 +24,16 @@ async def getMetroAgeHistData( } ) - match ageHistType: - case AgeHistDataTypes.elected: - collection_name = f"지선-당선_{year}_1level_{method}" - case AgeHistDataTypes.candidate: - collection_name = f"지선-후보_{year}_1level_{method}" - - if collection_name not in await MongoDB.client.age_hist_db.list_collection_names(): - return BasicResponse.ErrorResponse.model_validate( - { - "error": "CollectionNotExistError", - "code": BasicResponse.COLLECTION_NOT_EXIST_ERR, - "message": f"No collection with name f{collection_name}. Perhaps the year is wrong?", - } - ) - - histogram = await MongoDB.client.age_hist_db[collection_name].find_one( - {"metroId": metroId} + histogram = await MongoDB.client.stats_db["age_hist"].find_one( + { + "level": 1, + "councilorType": ( + "elected" if ageHistType == AgeHistDataTypes.elected else "candidate" + ), + "year": year, + "method": method, + "metroId": metroId, + } ) return MetroAgeHistData.model_validate( @@ -70,23 +63,17 @@ async def getLocalAgeHistData( } ) - match ageHistType: - case AgeHistDataTypes.elected: - collection_name = f"지선-당선_{year}_2level_{method}" - case AgeHistDataTypes.candidate: - collection_name = f"지선-후보_{year}_2level_{method}" - - if collection_name not in await MongoDB.client.age_hist_db.list_collection_names(): - return BasicResponse.ErrorResponse.model_validate( - { - "error": "CollectionNotExistError", - "code": BasicResponse.COLLECTION_NOT_EXIST_ERR, - "message": f"No collection with name f{collection_name}. Perhaps the year is wrong?", - } - ) - - histogram = await MongoDB.client.age_hist_db[collection_name].find_one( - {"metroId": metroId, "localId": localId} + histogram = await MongoDB.client.stats_db["age_hist"].find_one( + { + "level": 2, + "councilorType": ( + "elected" if ageHistType == AgeHistDataTypes.elected else "candidate" + ), + "year": year, + "method": method, + "metroId": metroId, + "localId": localId, + } ) return MetroAgeHistData.model_validate( diff --git a/routers/scrapResult.py b/routers/scrapResult.py index bc3e3b1..2ac16bb 100644 --- a/routers/scrapResult.py +++ b/routers/scrapResult.py @@ -1,7 +1,18 @@ +from typing import TypeVar from fastapi import APIRouter -from model import BasicResponse, MongoDB, ScrapResult +from model.BasicResponse import ErrorResponse, REGION_CODE_ERR +from model.MongoDB import client +from model.ScrapResult import ( + GenderTemplateData, + GenderChartDataPoint, + AgeTemplateData, + AgeChartDataPoint, + PartyTemplateData, + PartyChartDataPoint, + FactorType, + ChartData, +) from utils import diversity -from typing import TypeVar router = APIRouter(prefix="/localCouncil", tags=["localCouncil"]) @@ -11,45 +22,122 @@ @router.get("/template-data/{metroId}/{localId}") async def getLocalTemplateData( - metroId: int, localId: int, factor: ScrapResult.FactorType -) -> BasicResponse.ErrorResponse | ScrapResult.GenderTemplateData | ScrapResult.AgeTemplateData | ScrapResult.PartyTemplateData: + metroId: int, localId: int, factor: FactorType +) -> ErrorResponse | GenderTemplateData | AgeTemplateData | PartyTemplateData: if ( - await MongoDB.client.district_db["local_district"].find_one( + await client.district_db["local_district"].find_one( {"localId": localId, "metroId": metroId} ) is None ): - return BasicResponse.ErrorResponse.model_validate( + return ErrorResponse.model_validate( { "error": "RegionCodeError", - "code": BasicResponse.REGION_CODE_ERR, + "code": REGION_CODE_ERR, "message": f"No local district with metroId {metroId} and localId {localId}.", } ) - councilors = MongoDB.client.council_db["local_councilor"].find({"localId": localId}) + local_stat = await client.stats_db["diversity_index"].find_one({"localId": localId}) match factor: - case ScrapResult.FactorType.gender: - gender_list = [councilor["gender"] async for councilor in councilors] - gender_diversity_index = diversity.gini_simpson(gender_list) - return ScrapResult.GenderTemplateData.model_validate( - {"genderDiversityIndex": gender_diversity_index} + case FactorType.gender: + return GenderTemplateData.model_validate( + {"genderDiversityIndex": local_stat["genderDiversityIndex"]} ) - case ScrapResult.FactorType.age: - age_list = [councilor["age"] async for councilor in councilors] - age_diversity_index = diversity.gini_simpson(age_list, stair=AGE_STAIR) - return ScrapResult.AgeTemplateData.model_validate( + case FactorType.age: + # ============================ + # rankingParagraph + # ============================ + age_diversity_index = local_stat["ageDiversityIndex"] + + localIds_of_same_metroId = [ + doc["localId"] + async for doc in client.district_db["local_district"].find( + {"metroId": metroId} + ) + ] + all_indices = ( + await client.stats_db["diversity_index"] + .find({"localId": {"$in": localIds_of_same_metroId}}) + .to_list(500) + ) + all_indices.sort(key=lambda x: x["ageDiversityRank"]) + + # ============================ + # ageHistogramParagraph + # ============================ + age_stat_elected = ( + await client.stats_db["age_stat"] + .aggregate( + [ + { + "$match": { + "level": 2, + "councilorType": "elected", + "metroId": metroId, + "localId": localId, + } + }, + {"$sort": {"year": -1}}, + {"$limit": 1}, + ] + ) + .to_list(500) + )[0] + most_recent_year = age_stat_elected["year"] + age_stat_candidate = await client.stats_db["age_stat"].find_one( + { + "level": 2, + "councilorType": "candidate", + "metroId": metroId, + "localId": localId, + "year": most_recent_year, + } + ) + + divArea_id = ( + await client.stats_db["diversity_index"].find_one( + {"ageDiversityRank": 1} + ) + )["localId"] + divArea = await client.stats_db["age_stat"].find_one( + { + "level": 2, + "councilorType": "elected", + "localId": divArea_id, + "year": most_recent_year, + } + ) + + uniArea_id = ( + await client.stats_db["diversity_index"].find_one( + {"ageDiversityRank": 226} + ) + )["localId"] + uniArea = await client.stats_db["age_stat"].find_one( + { + "level": 2, + "councilorType": "elected", + "localId": uniArea_id, + "year": most_recent_year, + } + ) + + return AgeTemplateData.model_validate( { "metroId": metroId, "localId": localId, "rankingParagraph": { "ageDiversityIndex": age_diversity_index, "allIndices": [ - {"metroId": 3, "rank": 2, "ageDiversityIndex": 0.9}, - {"metroId": 14, "rank": 7, "ageDiversityIndex": 0.4}, - {"metroId": 15, "rank": 18, "ageDiversityIndex": 0.2}, + { + "localId": doc["localId"], + "rank": idx + 1, + "ageDiversityIndex": doc["ageDiversityIndex"], + } + for idx, doc in enumerate(all_indices) ], }, "indexHistoryParagraph": { @@ -76,68 +164,65 @@ async def getLocalTemplateData( ], }, "ageHistogramParagraph": { - "year": 2022, - "candidateCount": 80, - "electedCount": 16, - "firstQuintile": 66, - "lastQuintile": 29, + "year": most_recent_year, + "candidateCount": age_stat_candidate["data"][0]["population"], + "electedCount": age_stat_elected["data"][0]["population"], + "firstQuintile": age_stat_elected["data"][0]["firstquintile"], + "lastQuintile": age_stat_elected["data"][0]["lastquintile"], "divArea": { - "localId": 172, - "firstQuintile": 43, - "lastQuintile": 21, + "localId": divArea_id, + "firstQuintile": divArea["data"][0]["firstquintile"], + "lastQuintile": divArea["data"][0]["lastquintile"], }, "uniArea": { - "localId": 63, - "firstQuintile": 84, - "lastQuintile": 56, + "localId": uniArea_id, + "firstQuintile": uniArea["data"][0]["firstquintile"], + "lastQuintile": uniArea["data"][0]["lastquintile"], }, }, } ) - case ScrapResult.FactorType.party: - party_list = [councilor["jdName"] async for councilor in councilors] - party_diversity_index = diversity.gini_simpson(party_list) - return ScrapResult.PartyTemplateData.model_validate( + case FactorType.party: + party_diversity_index = local_stat["partyDiversityIndex"] + return PartyTemplateData.model_validate( {"partyDiversityIndex": party_diversity_index} ) T = TypeVar( "T", - ScrapResult.GenderChartDataPoint, - ScrapResult.AgeChartDataPoint, - ScrapResult.PartyChartDataPoint, + GenderChartDataPoint, + AgeChartDataPoint, + PartyChartDataPoint, ) @router.get("/chart-data/{metroId}/{localId}") async def getLocalChartData( - metroId: int, localId: int, factor: ScrapResult.FactorType -) -> BasicResponse.ErrorResponse | ScrapResult.ChartData[T]: + metroId: int, localId: int, factor: FactorType +) -> ErrorResponse | ChartData[T]: if ( - await MongoDB.client.district_db["local_district"].find_one( + await client.district_db["local_district"].find_one( {"localId": localId, "metroId": metroId} ) is None ): - return BasicResponse.ErrorResponse.model_validate( + return ErrorResponse.model_validate( { "error": "RegionCodeError", - "code": BasicResponse.REGION_CODE_ERR, + "code": REGION_CODE_ERR, "message": f"No local district with metroId {metroId} and localId {localId}.", } ) - councilors = MongoDB.client.council_db["local_councilor"].find({"localId": localId}) + councilors = client.council_db["local_councilor"].find({"localId": localId}) match factor: - case ScrapResult.FactorType.gender: + case FactorType.gender: gender_list = [councilor["gender"] async for councilor in councilors] gender_count = diversity.count(gender_list) - return ScrapResult.ChartData[ - ScrapResult.GenderChartDataPoint - ].model_validate( + return ChartData[GenderChartDataPoint].model_validate( { "data": [ {"gender": gender, "count": gender_count[gender]} @@ -146,10 +231,10 @@ async def getLocalChartData( } ) - case ScrapResult.FactorType.age: + case FactorType.age: age_list = [councilor["age"] async for councilor in councilors] age_count = diversity.count(age_list, stair=AGE_STAIR) - return ScrapResult.ChartData[ScrapResult.AgeChartDataPoint].model_validate( + return ChartData[AgeChartDataPoint].model_validate( { "data": [ { @@ -162,12 +247,10 @@ async def getLocalChartData( } ) - case ScrapResult.FactorType.party: + case FactorType.party: party_list = [councilor["jdName"] async for councilor in councilors] party_count = diversity.count(party_list) - return ScrapResult.ChartData[ - ScrapResult.PartyChartDataPoint - ].model_validate( + return ChartData[PartyChartDataPoint].model_validate( { "data": [ {"party": party, "count": party_count[party]} diff --git a/utils/diversity.py b/utils/diversity.py index 95536e0..12b23d1 100644 --- a/utils/diversity.py +++ b/utils/diversity.py @@ -19,7 +19,7 @@ def gini_simpson(data, stair=0, opts=True): """ counts = count(data, stair) total = sum(counts.values()) - gs_idx = 1 - sum((n / total) ** 2 for n in counts.values()) + gs_idx = 1 - sum((n / total) * ((n - 1) / (total - 1)) for n in counts.values()) if opts: num_cats = len([c for c in counts.values() if c > 0])