FEND16 · dayanajoseph3091 · Dec 13, 2020 · Dec 13, 2020 · Dec 15, 2020 · Dec 15, 2020
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1 @@
+/img/
diff --git a/Insights.pptx b/Insights.pptx
diff --git a/code/__main__.py b/code/__main__.py
@@ -0,0 +1,7 @@
+import load_db as ldb
+import sys
+
+if __name__ == '__main__':
+    # execute only if run as the entry point into the program
+    ldb.main()
+    sys.exit(0)
diff --git a/code/config_handler.py b/code/config_handler.py
@@ -0,0 +1,49 @@
+from configparser import ConfigParser
+
+import pyodbc
+
+
+def load_config():
+    # Read configuration file
+    parser = ConfigParser()
+    parser.read('../dev.ini')
+    return parser
+
+
+def get_SQLCONFIG():
+    parser = load_config()
+    # Read corresponding file parameters
+    _driver = parser.get("db", "driver")
+    _database = parser.get("db", "database")
+    _trusted_connection = parser.get("db", "trusted_connection")
+    _server = parser.get("db", "server")
+    return _driver, _database, _trusted_connection, _server  # return required parameters
+
+
+def ms_sql_connection():
+    c = get_SQLCONFIG()
+    driver_ = c[0]
+    db_name = c[1]
+    server_ = c[3]
+    conn_info = ('DRIVER=' + driver_ + ';TrustServerCertificate=No;'
+                                       'DATABASE=' + db_name + ';SERVER=' + server_)
+    return pyodbc.connect(conn_info)
+
+
+# load json path from configuration file
+def json_path():
+    parser = load_config()
+    return parser.get("json", "movie_list")
+
+############create .ini file
+
+# config = ConfigParser()
+# config['db'] = {
+#     "driver": "{ODBC Driver 17 for SQL Server}",
+#     "database": "Movies_DB",
+#     "trusted_Connection": "yes",
+#     "server": "(localdb)\MSSQLLocalDB"
+#
+# }
+# with open('../dev.ini', 'w') as f:
+#     config.write(f)
diff --git a/code/extract_transform.py b/code/extract_transform.py
@@ -0,0 +1,36 @@
+import json
+import pandas as pd
+import config_handler as ch
+
+
+def load():
+
+    with open(ch.json_path()) as f:
+        data = json.load(f)
+        # print(data)
+        df = pd.DataFrame(data)
+    return df
+
+
+# JSON to DataFrame
+def json_to_df():
+    dataframe = load()
+
+    # type conversion
+    # list to string for movie_genre_relationship table
+    dataframe['genres'] = dataframe['genres'].astype('str').apply(
+        lambda x: x.lower().strip().replace("[", "").replace("]", "")
+            .replace("\'", "").replace("\"", "").replace(", ", ","))
+
+    # extract relevant duration PT89M --> 89
+    dataframe['duration'] = dataframe['duration'].astype('str').apply(
+        lambda x: x.strip().replace("PT", "").replace("M", "")).astype(int)
+
+    # string to float conversion
+    dataframe['imdbRating'] = dataframe['imdbRating'].astype('float')
+
+    # handling names like Genelia D'Souza which was causing string handling issues
+    dataframe['actors'] = dataframe['actors'].astype('str').apply(
+        lambda x: x.lower().strip().replace("[", "").replace("]", "").
+            replace("\'", "").replace("\"", "").replace(", ", ","))
+    return dataframe
diff --git a/code/load_db.py b/code/load_db.py
@@ -0,0 +1,63 @@
+import pyodbc
+import teardown as td
+import extract_transform as et
+import config_handler as db_
+
+
+# import pyodbc
+# from configparser import ConfigParser
+
+
+def main():
+    # For rerun purposes (incase of new daily feed)
+    td.db_cleanup()
+
+    # Extract Json and transform
+    dataframe = et.json_to_df()
+
+    # write to MS SQL db
+    conn = db_.ms_sql_connection()
+    cursor = conn.cursor()
+    # Create Tables
+    cursor.execute("""CREATE TABLE [Top_rated_Movie] (
+                   Id int NOT NULL CONSTRAINT [Id] PRIMARY KEY,
+                   title char (100) NULL ,
+                   [year] [date] NULL ,
+                   [genres] [nvarchar] (50) NULL ,
+                   [duration] [int] NULL ,
+                   [releaseDate] [nvarchar] (50) NULL ,
+                   [actors] [nvarchar] (500) NULL ,
+                   [imdbRating] [varchar] (50) NULL ,
+                   ) ON [PRIMARY] """)
+
+    cursor.execute("""CREATE TABLE [Movie_Actor_Relationship] (
+                   	[movieID] [int] NOT NULL , 
+                   	[actor] [nvarchar] (500) NULL ,
+                   	[imdbRating] [float] (50) NULL ,) 
+                   """)
+
+    cursor.execute("""
+                    CREATE TABLE [Movie_Genre_Relationship] (
+                   	[movieID] [int] NOT NULL , 
+                   	[genre] [nvarchar] (50) NULL ,
+                   	[imdbRating] [float] (50) NULL ,
+                   )""")
+    conn.commit()
+
+    # Inserting data in SQL Table:-
+    for index, row in dataframe.iterrows():
+        cursor.execute(
+            """INSERT INTO dbo.Top_rated_Movie(Id,title,year,genres,actors,duration,
+            releaseDate,imdbRating) values (?,?,?,?,?,?,?,?)""",
+            index, row.title, row.year, row['genres'], row['actors'], row.duration,
+            row.releaseDate, row.imdbRating)
+        for actor in row['actors'].split(","):
+            cursor.execute("""INSERT INTO dbo.Movie_Actor_Relationship(movieID,actor,imdbRating) values (?,?,?)""",
+                           index, actor, row.imdbRating)
+        for genre in row['genres'].split(","):
+            cursor.execute("""INSERT INTO dbo.Movie_Genre_Relationship(movieID,genre,imdbRating) values (?,?,?)""",
+                           index, genre, row.imdbRating)
+
+    conn.commit()
+    cursor.close()
+    conn.close()
diff --git a/code/teardown.py b/code/teardown.py
@@ -0,0 +1,11 @@
+import config_handler as db
+
+
+def db_cleanup():
+    conn = db.ms_sql_connection()
+    cursor = conn.cursor()
+    cursor.execute("DROP TABLE IF EXISTS [dbo].[Top_rated_Movie]")
+    cursor.execute("DROP TABLE IF EXISTS  [dbo].[Movie_Actor_Relationship]")
+    cursor.execute("DROP TABLE IF EXISTS  [dbo].[Movie_Genre_Relationship]")
+    cursor.commit()
+    conn.close()
diff --git a/dev.ini b/dev.ini
@@ -0,0 +1,8 @@
+[db]
+driver = {ODBC Driver 17 for SQL Server}
+database = Movies_DB
+trusted_connection = yes
+server = (localdb)\MSSQLLocalDB
+
+[json]
+movie_list= ../json/top-rated-movies-02.json
diff --git a/modeling_reporting/Insights.twbx b/modeling_reporting/Insights.twbx
diff --git a/modeling_reporting/upwork.pptx b/modeling_reporting/upwork.pptx