Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
83 changes: 83 additions & 0 deletions code/jjm-dashboard-scraping/get-village-info.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
import requests
import pandas as pd
import os
from glob import glob

df = pd.DataFrame()
s = requests.session()

url_search='https://ejalshakti.gov.in/jjmreport/JJMVillage_Profile.aspx/Bind_Fhtc_info'
headers={'user-agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36'}

old_data = pd.read_csv('../../data/scraping/village-data-odisha.csv')
villagesCollected = old_data['VillCode']

search_parameters = pd.read_csv('../../data/scraping/odisha-first-villcode.csv')
search_parameters = search_parameters[search_parameters['Collected'] == 0]
search_parameters = search_parameters.reset_index()

for index, row in search_parameters.iterrows():
DtCode = row['DtCode']
StCode = row['StCode']
VillCode = row['VillCode']
i = 0
while i < 15:
print(VillCode)
payloadStCode = str(StCode)
payloadStCode = payloadStCode.replace('0', '%3A')
payloadDtCode = str(DtCode)
payloadDtCode = payloadDtCode.replace('0', '%3A')
payloadVillCode = str(VillCode)
payloadVillCode = payloadVillCode.replace('0', '%3A')

if not payloadVillCode in villagesCollected:
payload = {
"Cat" : "11",
"DtCode11" : payloadDtCode, # This also varies a bit
"Param" : "11",
"StCode11" : payloadStCode,
"SubCat" : "11",
"VillCode" : payloadVillCode
}
req = s.post(url_search, headers = headers, json = payload)
info = req.json()
df_new = pd.DataFrame(info['d'])
if not df_new.empty:
df_new["VillCode"] = payloadVillCode
df_new['DtCode'] = payloadDtCode
df_new['StCode'] = StCode
print('clean data')
df = pd.concat([df, df_new])
df = df.loc[:, ~df.columns.str.contains('^Unnamed')]
i = 0
if df_new.empty:
i = i + 1
print(i)
VillCode = VillCode + 10
if not df.empty:
file = '../../data/scraping/odisha/village-data-' + payloadDtCode + '.csv'
if os.path.exists(file):
print('combining with old data')
df_old = pd.read_csv(file, low_memory=False)
df_old = df_old.drop_duplicates()
df = pd.concat([df, df_old])
df = df.drop_duplicates()
df.to_csv(file)


files = glob(os.path.join('../../data/scraping/odisha', '*.csv'))

df = pd.DataFrame()
for file in files:
print(file)
df_i = pd.read_csv(file)
# Assumes everyone has same columns
df = pd.concat((df, df_i))

df = df.loc[:, ~df.columns.str.contains('^Unnamed')]
df = df.drop_duplicates()

village_names = pd.read_csv('../../data/scraping/village-list-odisha.csv')
df = village_names.merge(df, how = 'left', on = 'VillageId')

df.to_csv('../../data/scraping/village-data-odisha.csv')
53 changes: 53 additions & 0 deletions code/jjm-dashboard-scraping/list-villages.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
import requests
import pandas as pd
import os
import json
from random import randint
from time import sleep

s = requests.session()
headers = {'user-agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36'}

# List states
url = " https://ejalshakti.gov.in/jjmreport/JJMIndia.aspx/JJM_StateDistrictSearch"
payload = {
"StCode" : "11",
"Name" : "1"
}

req = s.post(url, headers = headers, json = payload)
info = req.json()
states_df = pd.DataFrame(info['d'])

# Dont overwrite: was manually edited to inlcude the 'StCode' column
# states_df.to_csv('../../data/scraping/states-list.csv')

req = s.post(url, headers = headers, json = payload)
info = req.json()

dtcode_list = ['4951','4881', '4891','4921','4941','4991','4931','4%3A61','48%3A1','4971','4%3A%3A1','4%3A81','4%3A51','4%3A71','4%3A31','4%3A41','4811','4%3A21','4841','49%3A1','4911','4821','4%3A11','4861','4%3A91','4871','4961','4981','4831','4851']
alphabetsearch_lst = ['B1','C1','D1','E1','F1','G1','H1','I1','J1','K1','L1','M1','N1','O1','P1','Q1','R1','S1','T1','U1','V1','W1','X1','Y1','Z1','%5B1']
villages_df = pd.DataFrame()
url = 'https://ejalshakti.gov.in/jjmreport/JJMIndia.aspx/Bind_search_Village'

#for StCode in "321":
for DtCode in dtcode_list:
print(DtCode)
for letter in alphabetsearch_lst:
print(letter)
payload = {
'DtCode': DtCode,
'StCode': '321',
'VillageName': letter
}
req = s.post(url, headers = headers, json = payload)
info = req.json()
new_village = pd.DataFrame(info['d'])
if not new_village.empty:
new_village['DtCode'] = DtCode
new_village['StCode'] = '321'
villages_df = pd.concat([villages_df, new_village])

villages_df = villages_df.drop_duplicates()
villages_df.to_csv('../../data/scraping/village-list-odisha.csv')