feat: Adding Simple Frequency Table

brotherzhafif · Oct 12, 2024 · b929b19 · b929b19
1 parent a3f7bca
commit b929b19
Show file tree

Hide file tree

Showing 2 changed files with 95 additions and 35 deletions.
diff --git a/FrequencyTable.py b/FrequencyTable.py
@@ -36,8 +36,13 @@ def __init__(self, dataset):
         # Formula for Standard Deviation
         self.deviation = (self.variance ** 0.5)
 
-
-
+        # Formula to find Dataset Skewness
+        self.skewness = (self.length / ((self.length - 1) * (self.length - 2))) * sum(((x - self.mean) / self.deviation) ** 3 for x in self.dataset)
+
+        # Formula to find Dataset  
+        self.kurtosis = (self.length * (self.length + 1) * sum(((x - self.mean) / self.deviation) ** 4 for x in self.dataset) / ((self.length - 1) * (self.length - 2) * (self.length - 3))) - \
+                (3 * (self.length - 1) ** 2) / ((self.length - 2) * (self.length - 3))
+
     # Populate Grouped Table Frequency Data Method
     def PopulateGrouped(self):
         # Initiating Used List
@@ -72,8 +77,8 @@ def PopulateGrouped(self):
             top.append(current_number)
 
             # Append Class Bottom Limit
-            current_bot_limit = old_number - 0.5
-            bottom_limit.append(current_bot_limit)
+            current_bottom_limit = old_number - 0.5
+            bottom_limit.append(current_bottom_limit)
 
             # Append Class Top Limit
             current_top_limit = current_number + 0.5
@@ -88,7 +93,7 @@ def PopulateGrouped(self):
             data_range.append(current_data_range)
 
             # Adding Data Range Limit Of The Class Frequency
-            current_data_limit = f"{current_bot_limit} ~ {current_top_limit}"
+            current_data_limit = f"{current_bottom_limit} ~ {current_top_limit}"
             data_limit.append(current_data_limit)   
 
             # Adding Data Midpoint of The Class Frequency
@@ -99,7 +104,7 @@ def PopulateGrouped(self):
             current_bot_cumulative_frequency = self.find_frequency(self.lowest, old_number)
             bot_cumulative_frequency.append(current_bot_cumulative_frequency)
 
-            # Adding Bottom Cumulative Frequency of The Class 
+            # Adding Top Cumulative Frequency of The Class 
             current_top_cumulative_frequency = self.find_frequency(old_number, self.highest)
             top_cumulative_frequency.append(current_top_cumulative_frequency)
 
@@ -112,25 +117,72 @@ def PopulateGrouped(self):
         mode_index = [i for i, val in enumerate(frequency) if val == max(frequency)]
         mode = [data_range[i] for i in mode_index]
 
-        # Formula to find Dataset Skewness
-        skewness = (self.length / ((self.length - 1) * (self.length - 2))) * sum(((x - self.mean) / self.deviation) ** 3 for x in self.dataset)
-
-        # Formula to find Dataset  
-        kurtosis = (self.length * (self.length + 1) * sum(((x - self.mean) / self.deviation) ** 4 for x in self.dataset) / ((self.length - 1) * (self.length - 2) * (self.length - 3))) - \
-                (3 * (self.length - 1) ** 2) / ((self.length - 2) * (self.length - 3))
-
         # Append Processed Data into Data Attributes
-        self.grouped = ProcessedData(bottom, top, bottom_limit, top_limit, 
+        self.grouped = ProcessedData(None, bottom, top, bottom_limit, top_limit, 
                                      frequency, data_range, data_limit, data_midpoint, 
                                      bot_cumulative_frequency, top_cumulative_frequency, 
-                                     relative_frequency, skewness, kurtosis, mode)
+                                     relative_frequency, mode)
+
+    def PopulateSimple(self):
+        # Deleting Duplicate and Sort the Data
+        data = sorted(set(self.dataset))
+
+        # Initiating Used Variable
+        top_limit = []
+        bottom_limit = []
+        frequency = []
+        top_cumulative_frequency = []
+        bot_cumulative_frequency = []
+        relative_frequency = []
+        mode = []
+
+        for current_class in data:
+            # Bottom Limit of the Class
+            current_top_limit = current_class + 0.5
+            current_bottom_limit = current_class - 0.5
 
+            # Top Limit of the Class
+            top_limit.append(current_top_limit)
+            bottom_limit.append(current_bottom_limit)
+
+            # Calculate Current Class Frequency 
+            current_frequency = self.dataset.count(current_class)
+            frequency.append(current_frequency)
+
+            # Calculate Current Class Bottom Cumulative Frequency
+            current_bot_cumulative_frequency = self.find_frequency(self.lowest, current_class)
+            bot_cumulative_frequency.append(current_bot_cumulative_frequency)
+
+            # Calculate Current Class Top Cumulative Frequency
+            current_top_cumulative_frequency = self.find_frequency(current_class, self.highest)
+            top_cumulative_frequency.append(current_top_cumulative_frequency)
+
+            # Calculate Current Class Relative Frequency 
+            current_relative_frequency = np.round((current_frequency / self.length) * 100)
+            relative_frequency.append(current_relative_frequency)
+
+        # Temukan modus
+        mode_index = [i for i, val in enumerate(frequency) if val == max(frequency)]
+        mode = [data[i] for i in mode_index]
+
+        # Buat objek ProcessedData
+        self.simple = ProcessedData(data, None, None, bottom_limit, top_limit, 
+                                    frequency, None, None, None, 
+                                    bot_cumulative_frequency, top_cumulative_frequency, 
+                                    relative_frequency, mode)
+
     # Base 5 Rounding
     def roundy(self, x, base = 5):
         return base * round(x/base)
 
     # Function To Find Frequency in Dataset with Desired Range (Top and Down Limit)
     def find_frequency(self, bot, top):
+        try:
+            bot = int(bot)
+            top = int(top)
+        except (ValueError, TypeError) as e:
+            print(f"Error converting to int: {e}")
+
         total_frequency = 0
         for i in range(bot, top + 1):
             frequency = self.dataset.count(i)
@@ -140,7 +192,8 @@ def find_frequency(self, bot, top):
 # Processed Data Assignment 
 class ProcessedData:
     # Limit (L), Frequency (F), Ranges (R), Midpoint (M), Cumulative (C), Relative (R) 
-    def __init__(self, bot, top, bot_L, top_L, F, R, L, M, bot_CF, top_CF, RF, skew, kurt, mode):
+    def __init__(self, data, bot, top, bot_L, top_L, F, R, L, M, bot_CF, top_CF, RF, mode):
+        self.classval = data
         self.bottom = bot
         self.top = top
         self.bottom_limit = bot_L
@@ -155,8 +208,6 @@ def __init__(self, bot, top, bot_L, top_L, F, R, L, M, bot_CF, top_CF, RF, skew,
         self.relative_frequency = RF
 
         self.percentage_relative_frequency = [ f"{rf * 1:.2f}%" for rf in self.relative_frequency ]
-        self.skewness = skew
-        self.kurtosis = kurt
         self.mode = mode
 
 
diff --git a/Main.py b/Main.py
@@ -4,37 +4,45 @@
 import tabulate as tabulate
 
 # Raw Data
-dataset = (
-  58, 67, 45, 89, 72, 60, 76, 93, 
-  55, 48, 62, 85, 79, 56, 41, 90, 
-  77, 54, 68, 82, 46, 73, 57, 92, 
-  81, 53, 66, 74, 64, 52, 91, 78, 
-  49, 87, 88, 50, 69, 84, 43, 65, 
-  83, 70, 44, 61, 75, 80, 71, 63, 47,51)
+dataset = (1,1,1,4,6,7,3,6,7,1,2,2,5,3,1,8,3,2)
 
 # Initiate Object From The Raw Data
 data = ft.FrequencyTable(dataset)
 
 # Processing Raw Data to Frequency Grouped Frequency Table
-data.PopulateGrouped()
+data.PopulateSimple()
 
 # Transform The Data To A Frequency Table
 # Initiating The Data Using Pandas
+# df = pd.DataFrame(
+#     {
+#         "Class Interval" : data.grouped.ranges,
+#         "Class Limit" : data.grouped.limit,
+#         "Frequency" : data.grouped.frequency,
+#         "Midpoint" : data.grouped.midpoint,
+
+#         "C <" : data.grouped.bottom_limit,
+#         "CF <" : data.grouped.bottom_cumulative_frequency,
+#         "C >" : data.grouped.top_limit,
+#         "CF >" : data.grouped.top_cumulative_frequency,
+#         "Relative Frequency" : data.grouped.percentage_relative_frequency
+#     }
+# )
+
 df = pd.DataFrame(
     {
-        "Class Interval" : data.grouped.ranges,
-        "Class Limit" : data.grouped.limit,
-        "Frequency" : data.grouped.frequency,
-        "Midpoint" : data.grouped.midpoint,
+        "Class" : data.simple.classval,
+        "Frequency" : data.simple.frequency,
 
-        "C <" : data.grouped.bottom_limit,
-        "CF <" : data.grouped.bottom_cumulative_frequency,
-        "C >" : data.grouped.top_limit,
-        "CF >" : data.grouped.top_cumulative_frequency,
-        "Relative Frequency" : data.grouped.percentage_relative_frequency
+        "C <" : data.simple.bottom_limit,
+        "CF <" : data.simple.bottom_cumulative_frequency,
+        "C >" : data.simple.top_limit,
+        "CF >" : data.simple.top_cumulative_frequency,
+        "Relative Frequency" : data.simple.percentage_relative_frequency
     }
 )
 
+
 # Converting Pandas Data Into Tabulate
 table = tabulate.tabulate(
     df,
@@ -45,3 +53,4 @@
 # Print Output Data
 print(table)
 
+