forked from tanyagoel2018/extreme_user_detection
-
Notifications
You must be signed in to change notification settings - Fork 0
/
01_ Data collection
1 lines (1 loc) · 4.79 KB
/
01_ Data collection
1
{"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"01_ Data collection","provenance":[{"file_id":"14aNercQOETm7JAsdcXeZIvD4SVUR5LRP","timestamp":1595014332684},{"file_id":"10JxKMjuSU0kCDoo5rNwcOHAuuAqcdGRo","timestamp":1594988522837}],"collapsed_sections":[]},"kernelspec":{"name":"python3","display_name":"Python 3"}},"cells":[{"cell_type":"code","metadata":{"id":"48tQtOdqZtqc","colab_type":"code","colab":{}},"source":["import tweepy\n","import time\n","import csv\n","import pandas as pd"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"4HQaOw6dZxVS","colab_type":"code","colab":{}},"source":["key = key\n","secret = secret_key\n","token = token\n","token_secret = token_secret"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"7wnvEX0yaFNz","colab_type":"code","colab":{}},"source":["auth = tweepy.OAuthHandler(key, secret)\n","auth.set_access_token(token, token_secret)\n","api = tweepy.API(auth, wait_on_rate_limit=True)"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"KP_z5E5AaQ1a","colab_type":"text"},"source":["Connection made"]},{"cell_type":"code","metadata":{"id":"Gr4OHt3sFBH8","colab_type":"code","colab":{}},"source":["queries = ['@nsitharaman','#nsitharaman','@ashokgehlot51','#ashokgehlot51','#ashok_gehlot','#GehlotvsPilot','#gehlotvspilot','#Gehlot_vs_Pilot','#Ashok_Gehlot','@ArvindKejriwal','#ArvindKejriwal','#arvindkejriwal','#Arvind_Kejriwal','#arvind_kejriwal','#CMofDelhi','@JM_Scindia','#JM_Scindia','#jm_scindia','#jmscindia','@VasundharaBJP','#VasundharaBJP','#vasundharaBJP','#vasundharabjp','@iChiragPaswan','#iChiragPaswan','#ichiragpaswan','@irvpaswan','#irvpaswan','@Dev_Fadnavis','#Dev_Fadnavis','#dev_fadnavis','#devfadnavis','#DevFadnavis','#Devendra_Fadnavis','#devendra_fadnavis','#DevendraFadnavis','#devendrafadnavis','#CMofMaharashtra']"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"Ws2dBs9A8ubC","colab_type":"text"},"source":["'#Rahul_Gandhi'+'#rahul_gandhi'+'#RahulGandhi'+'#Rahulgandhi'+'#rahulgandhi'+'#rahulGandhi'+'#Gandhi'+'#gandhi'\n","'#SachinPilotKeSaath'+ '#SachinPilotSacked' +'@SachinPilot' + '#SachinPilot' + '#Sachin_Pilot' + '#sachin_pilot' '#GehlotVsPilot' + '#Sachin_pilot'+ '@yadavtejashwi' + '@NitishKumar' + '#nitishkumarcmbihar' + '@cmbihar' + '#cmbihar' + '@DrMadanMohanJha' + '@INCBihar' + '@narendramodi' + '@PMOIndia' + '#narendramodi' + '#NarendraModi' + '#Narendramodi' + '#PmModiLeadingTheWorld'+ '#AmitShah' + '@AmitShah' + '@AmitShahOffice' + '#homeministry' + '#amitshahstandwithdelhi' + '#AmitShahDoJusticeForSSR' + '@Mayawati' + '#Mayawati' \n","Priyanka Gandhi\n","Mamta Banerjee\n","Akhilesh Yadav \n","Rajnath Singh \n","Manish Sisodiya \n","Smriti Irani \n","Ravishanka Shukla\n","Nitin Gadkari\n","Nirmala Sitaraman\n","Ashok Gehlot \n","Arvind Kejriwal \n","Jyotiraditya Sindhya\n","Vasundra Rajya Sindhya\n","Chirag Paswan\n","Ramvilash Paswan \n","Devendra Fadnavis\n"," \n","Other queries are also there, this is just a sample code. "]},{"cell_type":"code","metadata":{"id":"A5LFGjS8XZ0W","colab_type":"code","colab":{"base_uri":"https://localhost:8080/","height":35},"executionInfo":{"status":"ok","timestamp":1595236517351,"user_tz":-330,"elapsed":2158,"user":{"displayName":"Bhanupriya Sharma","photoUrl":"","userId":"03440780144815987617"}},"outputId":"e8325a4b-86dd-4147-e639-2d7e77502f5e"},"source":["ql = len(queries)\n","ql"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/plain":["38"]},"metadata":{"tags":[]},"execution_count":19}]},{"cell_type":"code","metadata":{"id":"I-Z16ACDO8p1","colab_type":"code","colab":{}},"source":["#Complete loop for data collection & Storing in a csv file\n","tweet_list = []\n","header = ['created_at', 'id', 'id_str', 'full_text', 'truncated', 'display_text_range', 'entities', 'extended_entities', 'metadata', 'source', 'in_reply_to_status_id', 'in_reply_to_status_id_str', 'in_reply_to_user_id', 'in_reply_to_user_id_str', 'in_reply_to_screen_name', 'user', 'geo', 'coordinates', 'place', 'contributors', 'is_quote_status', 'retweet_count', 'favorite_count', 'favorited', 'retweeted', 'possibly_sensitive', 'lang','retweeted_status','quoted_status_id_str', 'quoted_status_id','quoted_status']\n","file = open('data_file.csv', 'w', newline ='')\n","with file:\n"," writer = csv.DictWriter(file, fieldnames = header) \n"," writer.writeheader()\n","\n","for c in range (5):\n"," for p in range(ql):\n"," search_result = api.search(q=queries[p], count = 100, tweet_mode = 'extended')\n"," for i in search_result:\n"," tweet_list.append(i._json) \n"," file = open('data_file.csv', 'a', newline ='')\n"," with file:\n"," writer = csv.DictWriter(file, fieldnames = header) \n"," writer.writerow(i._json)\n"," time.sleep(15*60)"],"execution_count":null,"outputs":[]}]}