-
Notifications
You must be signed in to change notification settings - Fork 0
/
TEMP_imdb.py
93 lines (60 loc) · 1.98 KB
/
TEMP_imdb.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
# -*- coding: utf-8 -*-
"""Untitled1.ipynb
Automatically generated by Colaboratory.
Original file is located at
https://colab.research.google.com/drive/1BnnfCuT_sMcyFvlYxkW5Z6Wzaz_BX5Ei
"""
import requests
from bs4 import BeautifulSoup
from urllib.parse import urljoin
baseImdb = 'https://www.imdb.com/'
searchBase = 'https://www.imdb.com/find?q='
movieName = input('Enter the movie to search :')
movieName.replace(' ','+')
searchQuery = searchBase + movieName
req = requests.get(searchQuery)
html = req.content
soup = BeautifulSoup(html,'html.parser')
searchTd = soup.find_all('td',{'class':'result_text'})
searchTd
aLink = []
count = 1
for i in searchTd:
a=i.find('a')
print(count,'. ',i.get_text())
aLink.append(a.get('href'))
count = count +1
select = int(input('Enter the number to search details for : '))
directory = aLink[select-1]
pageImdb = urljoin(baseImdb,directory)
print('Page Link = ',pageImdb)
req = requests.get(pageImdb)
html = req.content
movieSoup = BeautifulSoup(html,'html.parser')
head = movieSoup.find('h1')
Title = head.get_text().replace('\xa0',' ')
ratingRaw = movieSoup.find('span',{'itemprop':'ratingValue'})
ratingCountRaw = movieSoup.find('span',{'itemprop':'ratingCount'})
rating = ratingRaw.get_text()
ratingCount = ratingCountRaw.get_text()
ratingCount
"""# Table Cast"""
table = movieSoup.find('table',{'class':'cast_list'})
castRaw = table.find_all('img')
cast = []
for i in castRaw:
cast.append(i.get('title'))
cast
"""# Other info"""
storyLineRaw = movieSoup.find('div',attrs={'class':'article','id':'titleStoryLine'})
storyLine = storyLineRaw.find('div',{'class':'inline'}).find('span').get_text()
didYouKnow = movieSoup.find('div',{'id':'titleDidYouKnow'}).get_text()
print('\n\nDETAILS:\n')
print('Title : ',Title)
print('Rating = ',rating)
print('Total Rating Count = ',ratingCount)
print('\nCast of Movie \n')
for i in cast:
print('==> ',i)
print('\nSTORY LINE\n',storyLine)
# print('\nDID YOU KNOW ?\n',didYouKnow.replace('See more',''))