feat: Refactoring Populate Table Data and Adding Descriptive Statisti…

…c Variable
brotherzhafif · Oct 12, 2024 · 0ca55f2 · 0ca55f2
1 parent 65aa7f2
commit 0ca55f2
Show file tree

Hide file tree

Showing 3 changed files with 64 additions and 34 deletions.
diff --git a/FrequencyTable.py b/FrequencyTable.py
@@ -1,11 +1,13 @@
 import numpy as np
+from scipy import stats
 
 # Frequency Table Class 
 class FrequencyTable:
     def __init__(self, dataset):
         # Data Initiation
         self.dataset = sorted(dataset)
-        self.amount = len(dataset)
+        self.sum = sum(dataset)
+        self.length = len(dataset)
         self.lowest = min(dataset)
         self.highest = max(dataset)
 
@@ -14,7 +16,7 @@ def __init__(self, dataset):
 
         # Classes is Rounding Down
         # Math Log Base 10 In Python For Accurate Result
-        self.classes = 1 + (3.222 * np.log10(self.amount))
+        self.classes = 1 + (3.222 * np.log10(self.length))
         self.classes = round(self.classes - 0.5)
 
         # Interval is Rounding Up
@@ -24,10 +26,21 @@ def __init__(self, dataset):
         # Rounding Both Limit So The Data Would Be Simple And Easier To Read
         self.base = self.roundy(self.lowest - 3)
         self.top = self.roundy(self.highest + 3)
+
+        # Mean or Average
+        self.mean = (self.sum / self.length)
+
+        # Formula for Variance
+        self.variance = sum((x - self.mean) ** 2 for x in dataset) / self.length
 
-    # Populate Data Method
-    def Populate(self):
-    # Initiating Used List
+        # Formula for Standard Deviation
+        self.deviation = (self.variance ** 0.5)
+
+
+
+    # Populate Grouped Table Frequency Data Method
+    def PopulateGrouped(self):
+        # Initiating Used List
         top = []
         bottom = []
         top_limit = []
@@ -41,12 +54,14 @@ def Populate(self):
         bot_cumulative_frequency = []
         top_cumulative_frequency = []
         relative_frequency = []
+        mode = []
 
-        # Initiating Used Parameter
-        interval = self.interval # 4
-        current_number = self.base - 1 # 156
+        # Initiating Used Parameter for Frequency Table
+        interval = self.interval
+        current_number = self.base - 1
         old_number = 0
 
+        # Processing the Frequency Table Data
         while current_number <= self.top-3:
             # Finding Class Lowest Value
             old_number = current_number + 1
@@ -89,13 +104,27 @@ def Populate(self):
             top_cumulative_frequency.append(current_top_cumulative_frequency)
 
             # Counting the Relative Frequency in Percentage
-            current_relative_frequency = np.round((current_frequency / self.amount) * 100)
-            relative_frequency.append(current_relative_frequency)
-
+            current_relative_frequency = np.round((current_frequency / self.length) * 100)
+            # Adding Percent Symbol into The Relative Frequency Coloumn
+            relative_frequency.append(current_relative_frequency)    
+
+        # Find Mode or Data that appears most frequently 
+        mode_index = [i for i, val in enumerate(frequency) if val == max(frequency)]
+        mode = [data_range[i] for i in mode_index]
+
+        # Formula to find Dataset Skewness
+        skewness = (self.length / ((self.length - 1) * (self.length - 2))) * sum(((x - self.mean) / self.deviation) ** 3 for x in self.dataset)
 
+        # Formula to find Dataset  
+        kurtosis = (self.length * (self.length + 1) * sum(((x - self.mean) / self.deviation) ** 4 for x in self.dataset) / ((self.length - 1) * (self.length - 2) * (self.length - 3))) - \
+                (3 * (self.length - 1) ** 2) / ((self.length - 2) * (self.length - 3))
+
         # Append Processed Data into Data Attributes
-        self.final = ProcessedData(bottom, top, bottom_limit, top_limit, frequency, data_range, data_limit, data_midpoint, bot_cumulative_frequency, top_cumulative_frequency, relative_frequency)
-
+        self.grouped = ProcessedData(bottom, top, bottom_limit, top_limit, 
+                                     frequency, data_range, data_limit, data_midpoint, 
+                                     bot_cumulative_frequency, top_cumulative_frequency, 
+                                     relative_frequency, skewness, kurtosis, mode)
+
     # Base 5 Rounding
     def roundy(self, x, base = 5):
         return base * round(x/base)
@@ -111,7 +140,7 @@ def find_frequency(self, bot, top):
 # Processed Data Assignment 
 class ProcessedData:
     # Limit (L), Frequency (F), Ranges (R), Midpoint (M), Cumulative (C), Relative (R) 
-    def __init__(self, bot, top, bot_L, top_L, F, R, L, M, bot_CF, top_CF, RF):
+    def __init__(self, bot, top, bot_L, top_L, F, R, L, M, bot_CF, top_CF, RF, skew, kurt, mode):
         self.bottom = bot
         self.top = top
         self.bottom_limit = bot_L
@@ -124,4 +153,10 @@ def __init__(self, bot, top, bot_L, top_L, F, R, L, M, bot_CF, top_CF, RF):
         self.bottom_cumulative_frequency = bot_CF
         self.top_cumulative_frequency = top_CF
         self.relative_frequency = RF
+
+        self.percentage_relative_frequency = [ f"{rf * 1:.2f}%" for rf in self.relative_frequency ]
+        self.skewness = skew
+        self.kurtosis = kurt
+        self.mode = mode
+
 
diff --git a/Main.py b/Main.py
@@ -15,28 +15,23 @@
 # Initiate Object From The Raw Data
 data = ft.FrequencyTable(dataset)
 
-# Processing Raw Data to Frequency Table
-data.Populate()
-
-# Adding Percent Symbol into The Relative Frequency Coloumn
-relative_frequency_with_percentage = [
-    f"{rf * 1:.2f}%" for rf in data.final.relative_frequency
-]
+# Processing Raw Data to Frequency Grouped Frequency Table
+data.PopulateGrouped()
 
 # Transform The Data To A Frequency Table
 # Initiating The Data Using Pandas
 df = pd.DataFrame(
     {
-        "Class Interval" : data.final.ranges,
-        "Class Limit" : data.final.limit,
-        "Frequency" : data.final.frequency,
-        "Midpoint" : data.final.midpoint,
+        "Class Interval" : data.grouped.ranges,
+        "Class Limit" : data.grouped.limit,
+        "Frequency" : data.grouped.frequency,
+        "Midpoint" : data.grouped.midpoint,
 
-        "C <" : data.final.bottom_limit,
-        "CF <" : data.final.bottom_cumulative_frequency,
-        "C >" : data.final.top_cumulative_frequency,
-        "CF >" : data.final.top_cumulative_frequency,
-        "Relative Frequency" : relative_frequency_with_percentage
+        "C <" : data.grouped.bottom_limit,
+        "CF <" : data.grouped.bottom_cumulative_frequency,
+        "C >" : data.grouped.top_limit,
+        "CF >" : data.grouped.top_cumulative_frequency,
+        "Relative Frequency" : data.grouped.percentage_relative_frequency
     }
 )
 
@@ -47,6 +42,6 @@
     tablefmt='pipe'
 ) 
 
-# print(table)
-print(data.final.ranges)
+# Print Output Data
+print(table)
 
diff --git a/README.md b/README.md
@@ -3,9 +3,9 @@
 
 ### Features
 - Frequency Table ( Done )
-- Descriptive Statistics ( Work in Progress )
+- Descriptive Statistics ( Done )
 - Display Chart ( Work in Progress )
-- Data Transformation ( Coming Soon )
+- Data Transformation ( Work in Progress )
 
 ### Required
 - Matplotlib