FEND16 · dayanajoseph3091 · Dec 13, 2020 · Dec 13, 2020 · Dec 15, 2020 · Dec 15, 2020
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1 @@
+/img/
diff --git a/code/__main__.py b/code/__main__.py
@@ -0,0 +1,4 @@
+import load_db as ldb
+if __name__ == '__main__':
+    # execute only if run as the entry point into the program
+    ldb.main()
diff --git a/code/extract_transform.py b/code/extract_transform.py
@@ -0,0 +1,33 @@
+import json
+import pandas as pd
+
+
+# def load_json(data):
+    # Load JSON
+#x=pd.DataFrame()
+def load():
+    with open("../json/top-rated-movies-02.json") as f:
+        data = json.load(f)
+        # print(data)
+        df = pd.DataFrame(data)
+    return df
+
+#print(load(x))
+# JSON to DataFrame
+def json_to_df():
+    dataframe = load()
+    print(dataframe)
+    #dataframe
+    # type conversion
+    dataframe['genres'] = dataframe['genres'].astype('str').apply(
+        lambda x: x.lower().strip().replace("[", "").replace("]", "").replace("\'", "").replace("\"", "").replace(", ",
+                                                                                                                  ","))
+    dataframe['ratings'] = dataframe['ratings'].astype('str')
+
+    dataframe['duration'] = dataframe['duration'].astype('str').apply(
+        lambda x: x.strip().replace("PT", "").replace("M", "")).astype(int)
+    dataframe['imdbRating'] = dataframe['imdbRating'].astype('float')
+    dataframe['actors'] = dataframe['actors'].astype('str').apply(
+        lambda x: x.lower().strip().replace("[", "").replace("]", "").replace("\'", "").replace("\"", "").replace(", ",
+                                                                                    ","))
+    return dataframe
diff --git a/code/load_db.py b/code/load_db.py
@@ -0,0 +1,69 @@
+import pyodbc
+import teardown as db
+import extract_transform as dp
+
+
+def main():
+    #For rerun purposes (incase of new daily feed)
+    db.dbcleanup()
+
+    #Extract Json and transform
+    dataframe = dp.json_to_df()
+
+    # DB_Connection
+    conn = pyodbc.connect(
+        'DRIVER={ODBC Driver 17 for SQL Server};TrustServerCertificate=No;DATABASE=Movies_DB;WSID=LAPTOP-BLDSMT2E;APP={Microsoft® Windows® Operating System};Trusted_Connection=Yes;SERVER=(localdb)\MSSQLLocalDB;Description=movies')
+    # create the connection cursor
+    cursor = conn.cursor()
+    # Create Tables
+    cursor.execute('\n'
+                   '\n'
+                   ' CREATE TABLE [Top_rated_Movie] (\n'
+                   '	Id int NOT NULL CONSTRAINT [Id] PRIMARY KEY,\n '
+                   'title char (100) NULL ,\n'
+                   '	[year] [date] NULL ,\n'
+                   '[genres] [nvarchar] (50) NULL ,\n'
+                   '[duration] [int] NULL ,\n'
+                   '[releaseDate] [nvarchar] (50) NULL ,\n'
+                   '[actors] [nvarchar] (500) NULL ,\n'
+                   '[imdbRating] [varchar] (50) NULL ,\n'
+                   ') ON [PRIMARY]\n'
+                   '\n'
+                   '               ')
+
+    cursor.execute('\n'
+                   '\n'
+                   ' CREATE TABLE [Movie_Actor_Relationship] (\n'
+                   '	[movieID] [int] NOT NULL , '
+                   '	[actor] [nvarchar] (500) NULL ,\n'
+                   '	[imdbRating] [float] (50) NULL ,\n'
+                   ') \n'
+                   '\n'
+                   '               ')
+
+    cursor.execute('\n'
+                   '\n'
+                   '              CREATE TABLE [Movie_Genre_Relationship] (\n'
+                   '	[movieID] [int] NOT NULL , '
+                   '	[genre] [nvarchar] (50) NULL ,\n'
+                   '	[imdbRating] [float] (50) NULL ,\n'
+                   ') \n'
+                   '\n'
+                   '               ')
+    conn.commit()
+    # Inserting data in SQL Table:-
+
+    for index, row in dataframe.iterrows():
+        cursor.execute(
+            "INSERT INTO dbo.Top_rated_Movie(Id,title,year,genres,actors,duration,releaseDate,imdbRating) values (?,?,?,?,?,?,?,?)",
+            index, row.title, row.year, row['genres'], row['actors'], row.duration, row.releaseDate, row.imdbRating)
+        for actor in row['actors'].split(","):
+            cursor.execute("INSERT INTO dbo.Movie_Actor_Relationship(movieID,actor,imdbRating) values (?,?,?)", index,
+                           actor, row.imdbRating)
+        for genre in row['genres'].split(","):
+            cursor.execute("INSERT INTO dbo.Movie_Genre_Relationship(movieID,genre,imdbRating) values (?,?,?)", index,
+                           genre, row.imdbRating)
+
+    conn.commit()
+    cursor.close()
+    conn.close()
diff --git a/code/teardown.py b/code/teardown.py
@@ -0,0 +1,20 @@
+import pandas as pd
+import json
+
+import pyodbc
+from dask.dataframe.methods import values
+
+import pyodbc
+
+
+def dbcleanup():
+    conn = pyodbc.connect(
+        'DRIVER={ODBC Driver 17 for SQL Server};TrustServerCertificate=No;DATABASE=Movies_DB;WSID=LAPTOP-BLDSMT2E;APP={Microsoft® Windows® Operating System};Trusted_Connection=Yes;SERVER=(localdb)\MSSQLLocalDB;Description=movies')
+
+    cursor = conn.cursor()
+
+    cursor.execute("DROP TABLE IF EXISTS [dbo].[Top_rated_Movie]")
+    cursor.execute("DROP TABLE IF EXISTS  [dbo].[Movie_Actor_Relationship]")
+    cursor.execute("DROP TABLE IF EXISTS  [dbo].[Movie_Genre_Relationship]")
+    cursor.commit()
+    conn.close()