Skip to content

Commit

Permalink
new events with group names, images. frontend tweaks
Browse files Browse the repository at this point in the history
  • Loading branch information
ionox0 committed Nov 1, 2016
1 parent d150e2f commit 26ee6b0
Show file tree
Hide file tree
Showing 3 changed files with 17 additions and 21 deletions.
2 changes: 1 addition & 1 deletion scraper/events_data.json

Large diffs are not rendered by default.

34 changes: 15 additions & 19 deletions scraper/get_events_2.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,56 +19,52 @@
}

fileDir = os.path.dirname(os.path.realpath('__file__'))
filename = os.path.join(fileDir, 'data/pages_with_ids.json')

pages_file = os.path.join(fileDir, 'pages_data.json')



class FacebookScraper:

def get_groups(self):

return

def get_events(self, groups_file):
def get_events(self):
start_date = datetime.datetime.now().strftime("%Y-%m-%d")
events = []

with open(filename) as data_file:
with open(pages_file) as data_file:
pages_data = json.load(data_file)

# For every Columbia Page:
for i, page in enumerate(pages_data):
pprint(page['node_id'])
pprint(page['url'])
pprint(page['group_name'])

url = 'https://graph.facebook.com/v2.8/'
url = url + page['node_id']
url = url + str(page['group_id'])
url = url + '/events?'
url = url + 'since='
url = url + start_date
url = url + '&access_token=EAACEdEose0cBALFkO6rUmGl01Qt864YOXOWv67Lg2FgRQbsqeq8B3HnevZCFlTsW9jmuIX4nMedvZALi9DBLXj06O5K8b9AA3hazmm4UUAsXDcl5hZBEFHx6ZCiiEDFBi4peoF8Pxj7yyhPYqtmnv0x8m5JcGlBu7LfQtS6HywZDZD'
url = url + '&access_token=EAACEdEose0cBAAJ5TssjmvbHicr5vUuSRSZCe8Ba2TM61LRLdRr1jzy9qglJYW6qoAUGk7zvxmDMCbziRLJQIrtgCZAvslzFILt6VVJUIKNr4fMGgtpMlLAsOMcYXwGrSP203J6axkZCZBKnONWkn9tRvv2xVDYY6pW0h89uzAZDZD'
url = url + '&debug=all&format=json&method=get&pretty=0&suppress_http_code=1'
url = url + '&fields=name,place,start_time,description,cover,photos.limit(1),picture'

response = requests.get(url, headers=headers)
data = response.json()
print(response.json())
response_data = requests.get(url, headers=headers).json()
print(response_data)

# Skipping some nodes that have urls instead of ids:
if 'http' in page['node_id']:
if 'error' in response_data:
continue

# Add all that Page's events:
for d in response.json()['data']:
for d in response_data['data']:
event = {}
event['id'] = d['id']
event['title'] = d['name']
event['page_id'] = page['node_id']
event['group_url'] = page['url']
event['datetime'] = d['start_time']
event['group_id'] = page['group_id']
event['group'] = page['group']
event['group_url'] = page['group_url']
if 'description' in d:
event['description'] = d['description']
event['datetime'] = d['start_time']
if 'place' in d:
event['location'] = d['place']['name']
if 'cover' in d:
Expand All @@ -87,4 +83,4 @@ def get_events(self, groups_file):

if __name__ == "__main__":
scraper = FacebookScraper()
scraper.get_events('dummy')
scraper.get_events()
2 changes: 1 addition & 1 deletion scraper/groups_scraper.js
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ page.open("http://www.facebook.com/login.php", function(status) {
for (var i = 0; i < pages_els.length; i++) {
pages.push({
group_id: JSON.parse(pages_els[i].getAttribute('data-bt')).id,
group_name: pages_els[i].querySelector('._5d-5').innerHTML,
group: pages_els[i].querySelector('._5d-5').innerHTML,
group_url: pages_els[i].querySelector('a').href
})
}
Expand Down

0 comments on commit 26ee6b0

Please sign in to comment.