Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[fix] analysis 코드 수정 #79

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion analysis/age/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,11 +30,15 @@
def run_by_excel(cluster_by, filenames, N=5, folder_name="To_be_filled"):
assert cluster_by in ["sdName", "wiwName"]
level = 1 if cluster_by == "sdName" else 2
datadir = os.path.join(BASE_DIR, "_data", folder_name)
# datadir = os.path.join(BASE_DIR, "_data", folder_name)
datadir = os.path.join(BASE_DIR, "_data")
df = pd.DataFrame()
for d in filenames:
df_new = pd.read_excel(os.path.join(datadir, d))
df = pd.concat([df, df_new])
df["sdName"] = df[["sdName", "wiwName"]].apply(
lambda x: "대구광역시" if x[1] == "군위군" else x[0], axis=1
)
if level == 1:
df = df[["sgId", "sdName", "name", "age", "gender"]]
else:
Expand Down
32 changes: 25 additions & 7 deletions analysis/diversity_db.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,12 +29,28 @@ def gini_simpson(data, stair=0, opts=True):
total = sum(counts.values())
gs_idx = 1 - sum((n / total) * ((n - 1) / (total - 1)) for n in counts.values())

bins = None
if isinstance(data[0], int):
bins = [0 for _ in range(4)] # 39세 이하, 40대, 50대, 60세 이상
for age in data:
if age < 40:
bins[0] = 1
elif age < 50:
bins[1] = 1
elif age < 60:
bins[2] = 1
else:
bins[3] = 1
bins = sum(bins)

if opts:
num_cats = len([c for c in counts.values() if c > 0])
if num_cats <= 1:
return 0.0
max_gs_idx = (num_cats - 1) / num_cats * total / (total - 1)
gs_idx /= max_gs_idx
if gs_idx > 0.8 and bins and bins < 4:
return 0.8

return gs_idx

Expand Down Expand Up @@ -170,10 +186,12 @@ def save_to_mongo_metro(metroId: int, factor: str, stair=0, opts=True) -> None:
factor_field = {"age": "age", "gender": "gender", "party": "jdName"}
data = [
councilor[factor_field[factor]]
for councilor in client["council"]["metro_councilor"].find({"metroId": metroId})
for councilor in client["council"]["metropolitan_councilor"].find(
{"metroId": metroId}
)
]
# print(f"{metroId} {factor}")
# print(data)
print(f"{metroId} {factor}")
print(data)
client["stats"].get_collection("diversity_index").update_one(
{"metroId": metroId},
{"$set": {f"{factor}DiversityIndex": gini_simpson(data, stair, opts)}},
Expand Down Expand Up @@ -371,10 +389,10 @@ def main():
calculate_rank_metro("party")
calculate_age_diversity_rank_history_metro()

save_to_mongo_national("age", stair=10)
save_to_mongo_national("gender")
save_to_mongo_national("party")
calculate_age_diversity_rank_history_national()
# save_to_mongo_national("age", stair=10)
# save_to_mongo_national("gender")
# save_to_mongo_national("party")
# calculate_age_diversity_rank_history_national()


if __name__ == "__main__":
Expand Down
6 changes: 6 additions & 0 deletions analysis/gender_party_hist.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,9 @@ def gender_hist(
df["wiwName"] = df[["sdName", "wiwName"]].apply(
lambda x: change_local_name(*x), axis=1
)
df["sdName"] = df[["sdName", "wiwName"]].apply(
lambda x: "대구광역시" if x[1] == "군위군" else x[0], axis=1
)

if level == 0:
df = df[["sgId", "name", "gender"]].groupby(by=["sgId", "gender"]).count()
Expand Down Expand Up @@ -153,6 +156,9 @@ def party_hist(councilor_type: str, level: int, is_elected: bool, filenames: lis
df["wiwName"] = df[["sdName", "wiwName"]].apply(
lambda x: change_local_name(*x), axis=1
)
df["sdName"] = df[["sdName", "wiwName"]].apply(
lambda x: "대구광역시" if x[1] == "군위군" else x[0], axis=1
)

if level == 0:
df = df[["sgId", "name", "jdName"]].groupby(by=["sgId", "jdName"]).count()
Expand Down