-
Notifications
You must be signed in to change notification settings - Fork 123
WIP: movie_json_data analysis #2
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from 7 commits
abc5228
733d731
f28b89e
6611c9c
60c8749
c362e57
8f24778
fd4f923
984e281
6081887
b6f000d
5aa1779
bdbb2b5
c111967
828673a
c372cb0
46e5e1a
cfcaf23
4e1bec6
145e778
3624a7a
d9affe4
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1 @@ | ||
| /img/ |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,4 @@ | ||
| import load_db as ldb | ||
| if __name__ == '__main__': | ||
| # execute only if run as the entry point into the program | ||
| ldb.main() | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,33 @@ | ||
| import json | ||
| import pandas as pd | ||
|
|
||
|
|
||
| # def load_json(data): | ||
| # Load JSON | ||
| #x=pd.DataFrame() | ||
| def load(): | ||
| with open("../json/top-rated-movies-02.json") as f: | ||
|
||
| data = json.load(f) | ||
| # print(data) | ||
| df = pd.DataFrame(data) | ||
| return df | ||
|
|
||
| #print(load(x)) | ||
| # JSON to DataFrame | ||
| def json_to_df(): | ||
| dataframe = load() | ||
| print(dataframe) | ||
| #dataframe | ||
| # type conversion | ||
| dataframe['genres'] = dataframe['genres'].astype('str').apply( | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why do we need this?
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. added relevant comments to all transformations. |
||
| lambda x: x.lower().strip().replace("[", "").replace("]", "").replace("\'", "").replace("\"", "").replace(", ", | ||
|
||
| ",")) | ||
| dataframe['ratings'] = dataframe['ratings'].astype('str') | ||
|
|
||
| dataframe['duration'] = dataframe['duration'].astype('str').apply( | ||
| lambda x: x.strip().replace("PT", "").replace("M", "")).astype(int) | ||
| dataframe['imdbRating'] = dataframe['imdbRating'].astype('float') | ||
| dataframe['actors'] = dataframe['actors'].astype('str').apply( | ||
| lambda x: x.lower().strip().replace("[", "").replace("]", "").replace("\'", "").replace("\"", "").replace(", ", | ||
|
||
| ",")) | ||
| return dataframe | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,69 @@ | ||
| import pyodbc | ||
| import teardown as db | ||
| import extract_transform as dp | ||
|
|
||
|
|
||
| def main(): | ||
| #For rerun purposes (incase of new daily feed) | ||
| db.dbcleanup() | ||
|
|
||
| #Extract Json and transform | ||
| dataframe = dp.json_to_df() | ||
|
|
||
| # DB_Connection | ||
| conn = pyodbc.connect( | ||
| 'DRIVER={ODBC Driver 17 for SQL Server};TrustServerCertificate=No;DATABASE=Movies_DB;WSID=LAPTOP-BLDSMT2E;APP={Microsoft® Windows® Operating System};Trusted_Connection=Yes;SERVER=(localdb)\MSSQLLocalDB;Description=movies') | ||
|
||
| # create the connection cursor | ||
| cursor = conn.cursor() | ||
| # Create Tables | ||
| cursor.execute('\n' | ||
|
||
| '\n' | ||
| ' CREATE TABLE [Top_rated_Movie] (\n' | ||
| ' Id int NOT NULL CONSTRAINT [Id] PRIMARY KEY,\n ' | ||
| 'title char (100) NULL ,\n' | ||
| ' [year] [date] NULL ,\n' | ||
| '[genres] [nvarchar] (50) NULL ,\n' | ||
| '[duration] [int] NULL ,\n' | ||
| '[releaseDate] [nvarchar] (50) NULL ,\n' | ||
| '[actors] [nvarchar] (500) NULL ,\n' | ||
| '[imdbRating] [varchar] (50) NULL ,\n' | ||
| ') ON [PRIMARY]\n' | ||
| '\n' | ||
| ' ') | ||
|
|
||
| cursor.execute('\n' | ||
| '\n' | ||
| ' CREATE TABLE [Movie_Actor_Relationship] (\n' | ||
| ' [movieID] [int] NOT NULL , ' | ||
| ' [actor] [nvarchar] (500) NULL ,\n' | ||
| ' [imdbRating] [float] (50) NULL ,\n' | ||
| ') \n' | ||
| '\n' | ||
| ' ') | ||
|
|
||
| cursor.execute('\n' | ||
| '\n' | ||
| ' CREATE TABLE [Movie_Genre_Relationship] (\n' | ||
| ' [movieID] [int] NOT NULL , ' | ||
| ' [genre] [nvarchar] (50) NULL ,\n' | ||
| ' [imdbRating] [float] (50) NULL ,\n' | ||
| ') \n' | ||
| '\n' | ||
| ' ') | ||
| conn.commit() | ||
| # Inserting data in SQL Table:- | ||
|
|
||
| for index, row in dataframe.iterrows(): | ||
| cursor.execute( | ||
|
||
| "INSERT INTO dbo.Top_rated_Movie(Id,title,year,genres,actors,duration,releaseDate,imdbRating) values (?,?,?,?,?,?,?,?)", | ||
| index, row.title, row.year, row['genres'], row['actors'], row.duration, row.releaseDate, row.imdbRating) | ||
| for actor in row['actors'].split(","): | ||
| cursor.execute("INSERT INTO dbo.Movie_Actor_Relationship(movieID,actor,imdbRating) values (?,?,?)", index, | ||
| actor, row.imdbRating) | ||
| for genre in row['genres'].split(","): | ||
| cursor.execute("INSERT INTO dbo.Movie_Genre_Relationship(movieID,genre,imdbRating) values (?,?,?)", index, | ||
| genre, row.imdbRating) | ||
|
|
||
| conn.commit() | ||
| cursor.close() | ||
| conn.close() | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,20 @@ | ||
| import pandas as pd | ||
| import json | ||
|
|
||
| import pyodbc | ||
| from dask.dataframe.methods import values | ||
|
|
||
| import pyodbc | ||
|
|
||
|
|
||
| def dbcleanup(): | ||
| conn = pyodbc.connect( | ||
| 'DRIVER={ODBC Driver 17 for SQL Server};TrustServerCertificate=No;DATABASE=Movies_DB;WSID=LAPTOP-BLDSMT2E;APP={Microsoft® Windows® Operating System};Trusted_Connection=Yes;SERVER=(localdb)\MSSQLLocalDB;Description=movies') | ||
|
|
||
| cursor = conn.cursor() | ||
|
|
||
| cursor.execute("DROP TABLE IF EXISTS [dbo].[Top_rated_Movie]") | ||
| cursor.execute("DROP TABLE IF EXISTS [dbo].[Movie_Actor_Relationship]") | ||
| cursor.execute("DROP TABLE IF EXISTS [dbo].[Movie_Genre_Relationship]") | ||
| cursor.commit() | ||
| conn.close() |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Missing end of line.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
added end of line.