fix: Fixing Miscalculation in Decimal Frequency Table

brotherzhafif · Oct 13, 2024 · 3b95bc3 · 3b95bc3
1 parent e282935
commit 3b95bc3
Show file tree

Hide file tree

Showing 2 changed files with 107 additions and 127 deletions.
diff --git a/FrequencyTable.py b/FrequencyTable.py
@@ -1,4 +1,5 @@
 import numpy as np
+from scipy import stats
 
 # Frequency Table Class 
 class FrequencyTable:
@@ -24,38 +25,56 @@ def __init__(self, dataset):
 
             # Interval is Rounding Up
             self.interval = self.range / self.classes 
-            self.interval = round(self.interval + 0.5, 2)  # Keep two decimal places
+            self.interval = round(self.interval + 0.5)
 
-            # Rounding Both Limits
-            self.base = self.roundy(self.lowest - 0.5)
-            self.top = self.roundy(self.highest + 0.5)
+            # Rounding Both Limits So The Data Would Be Simple And Easier To Read
+            self.base = self.roundy(self.lowest - 3)
+            self.top = self.roundy(self.highest + 3)
 
             # Mean or Average
             self.mean = (self.sum / self.length)
 
-            # Variance and Standard Deviation
+            # Formula for Variance
             self.variance = sum((x - self.mean) ** 2 for x in dataset) / self.length
+
+            # Formula for Standard Deviation
             self.deviation = (self.variance ** 0.5)
 
-            # Skewness
+            # Formula to find Dataset Skewness
             self.skewness = (self.length / ((self.length - 1) * (self.length - 2))) * \
                             sum(((x - self.mean) / self.deviation) ** 3 for x in self.dataset)
 
-            # Kurtosis
+            # Formula to find Dataset Kurtosis
             self.kurtosis = (self.length * (self.length + 1) * sum(((x - self.mean) / self.deviation) ** 4 for x in self.dataset) / 
                             ((self.length - 1) * (self.length - 2) * (self.length - 3))) - \
                             (3 * (self.length - 1) ** 2) / ((self.length - 2) * (self.length - 3))
 
-    # Base Rounding
-    def roundy(self, x, base=0.5):
+    # Base 5 Rounding
+    def roundy(self, x, base=5):
         return base * round(x / base)
 
-    # Function To Find Frequency in Dataset with Desired Range
+    # Function To Find Frequency in Dataset with Desired Range (Top and Down Limit)
     def find_frequency(self, bot, top):
-        total_frequency = sum(1 for x in self.dataset if bot < x <= top)
+        total_frequency = 0
+        # Check if the dataset contains only integers
+        is_integer_data = all(isinstance(x, int) for x in self.dataset)
+
+        if is_integer_data:
+            # Loop for integers
+            for i in range(bot, top):
+                frequency = self.dataset.count(i)
+                total_frequency += frequency
+        else:
+            # Loop for decimals
+            current = bot
+            while current < top:
+                frequency = self.dataset.count(round(current, 2))  # Round for matching
+                total_frequency += frequency
+                current += 0.01  # Increment by 0.01 for decimals
+
         return total_frequency
 
-    # Populate Grouped Frequency Table Data Method
+    # Populate Grouped Table Frequency Data Method
     def PopulateGrouped(self):
         # Initiating Used List
         top = []
@@ -73,118 +92,70 @@ def PopulateGrouped(self):
         relative_frequency = []
         mode = []
 
-        # Frequency Table Initialization
+        # Initiating Used Parameter for Frequency Table
         interval = self.interval
-        current_number = self.base - 0.5
+        current_number = self.base - 1
         old_number = 0
 
         # Processing the Frequency Table Data
-        while current_number <= self.top:
+        while current_number <= self.top - 3:
             # Finding Class Lowest Value
-            old_number = current_number + 0.5
-            bottom.append(old_number) 
+            old_number = current_number + 1
+            bottom.append(old_number)
 
             # Finding Class Highest Value 
             current_number = current_number + interval
             top.append(current_number)
 
-            # Class Limits
+            # Append Class Bottom Limit
             current_bottom_limit = old_number - 0.5
             bottom_limit.append(current_bottom_limit)
+
+            # Append Class Top Limit
             current_top_limit = current_number + 0.5
             top_limit.append(current_top_limit)
 
-            # Frequency Calculation
-            current_frequency = self.find_frequency(old_number, current_number)
+            # Finding The Frequency That Range
+            current_frequency = self.find_frequency(old_number, current_number + 1)
             frequency.append(current_frequency)
 
-            # Data Range and Limits
-            current_data_range = f"{old_number:.2f} ~ {current_number:.2f}"
+            # Adding The Number Range From Both Frequency
+            current_data_range = f"{old_number:.2f} ~ {current_number:.2f}" if not all(isinstance(x, int) for x in self.dataset) else f"{old_number} ~ {current_number}"
             data_range.append(current_data_range)
-            current_data_limit = f"{current_bottom_limit:.2f} ~ {current_top_limit:.2f}"
+
+            # Adding Data Range Limit Of The Class Frequency
+            current_data_limit = f"{current_bottom_limit:.2f} ~ {current_top_limit:.2f}" if not all(isinstance(x, int) for x in self.dataset) else f"{current_bottom_limit} ~ {current_top_limit}"
             data_limit.append(current_data_limit)   
 
-            # Midpoint Calculation
+            # Adding Data Midpoint of The Class Frequency
             current_data_midpoint = (old_number + current_number) / 2
             data_midpoint.append(current_data_midpoint)
 
-            # Cumulative Frequencies
-            current_bot_cumulative_frequency = self.find_frequency(self.lowest - 0.5, old_number)
+            # Adding Bottom Cumulative Frequency of The Class 
+            current_bot_cumulative_frequency = self.find_frequency(self.lowest - 1, old_number)
             bot_cumulative_frequency.append(current_bot_cumulative_frequency)
-            current_top_cumulative_frequency = self.find_frequency(current_number, self.highest + 0.5)
-            top_cumulative_frequency.append(current_top_cumulative_frequency)
 
-            # Relative Frequency Calculation
-            current_relative_frequency = np.round((current_frequency / self.length) * 100, 2)
-            relative_frequency.append(current_relative_frequency)    
+            # Adding Top Cumulative Frequency of The Class 
+            current_top_cumulative_frequency = self.find_frequency(current_number + 1, self.highest + 1)
+            top_cumulative_frequency.append(current_top_cumulative_frequency)
 
-        # Find Mode
+            # Counting the Relative Frequency in Percentage
+            current_relative_frequency = np.round((current_frequency / self.length) * 100)
+            relative_frequency.append(current_relative_frequency)    
+
+        # Find Mode or Data that appears most frequently 
         mode_index = [i for i, val in enumerate(frequency) if val == max(frequency)]
         mode = [data_range[i] for i in mode_index]
 
-        # Store Processed Data
+        # Append Processed Data into Data Attributes
         self.grouped = ProcessedData(None, bottom, top, bottom_limit, top_limit, 
                                      frequency, data_range, data_limit, data_midpoint, 
                                      bot_cumulative_frequency, top_cumulative_frequency, 
                                      relative_frequency, mode)
-
-    # Populate Simple Frequency Table Data Method    
-    def PopulateSimple(self):
-        # Initialize variables
-        data = sorted(set(self.dataset))  
-        frequency = []  
-        top_cumulative_frequency = []  
-        bot_cumulative_frequency = []  
-        relative_frequency = []  
-        mode = []  
-
-        # Check for numeric data
-        top_limit = None
-        bottom_limit = None
-
-        if not all(isinstance(item, str) for item in self.dataset):
-            top_limit = []
-            bottom_limit = []
-
-        # Process each class
-        for current_class in data:
-            current_frequency = self.dataset.count(current_class)
-            frequency.append(current_frequency)
-
-            current_relative_frequency = np.round((current_frequency / self.length) * 100, 2)
-            relative_frequency.append(current_relative_frequency)
-
-            if top_limit is not None and bottom_limit is not None:
-                current_top_limit = current_class + 0.5
-                current_bottom_limit = current_class - 0.5
-                top_limit.append(current_top_limit)
-                bottom_limit.append(current_bottom_limit)
-
-                current_bot_cumulative_frequency = self.find_frequency(self.lowest - 0.5, current_class)
-                bot_cumulative_frequency.append(current_bot_cumulative_frequency)
-
-                current_top_cumulative_frequency = self.find_frequency(current_class, self.highest + 0.5)
-                top_cumulative_frequency.append(current_top_cumulative_frequency)
 
-            else:
-                current_bot_cumulative_frequency = self.dataset.count(current_class)
-                bot_cumulative_frequency.append(current_bot_cumulative_frequency)
-                current_top_cumulative_frequency = sum(frequency) - current_bot_cumulative_frequency
-                top_cumulative_frequency.append(current_top_cumulative_frequency)
-
-        mode_index = [i for i, val in enumerate(frequency) if val == max(frequency)]
-        mode = [data[i] for i in mode_index]
-
-        self.simple = ProcessedData(
-            data, None, None, bottom_limit, top_limit, 
-            frequency, None, None, None, 
-            bot_cumulative_frequency, top_cumulative_frequency, 
-            relative_frequency, mode
-        )
-
 # Processed Data Assignment 
 class ProcessedData:
-    # Constructor for processed data
+    # Limit (L), Frequency (F), Ranges (R), Midpoint (M), Cumulative (C), Relative (R) 
     def __init__(self, data, bot, top, bot_L, top_L, F, R, L, M, bot_CF, top_CF, RF, mode):
         self.classval = data
         self.bottom = bot
@@ -194,10 +165,11 @@ def __init__(self, data, bot, top, bot_L, top_L, F, R, L, M, bot_CF, top_CF, RF,
         self.midpoint = M
         self.ranges = R      
         self.limit = L     
+
         self.frequency = F
         self.bottom_cumulative_frequency = bot_CF
         self.top_cumulative_frequency = top_CF
         self.relative_frequency = RF
 
-        self.percentage_relative_frequency = [ f"{rf * 1:.2f}%" for rf in self.relative_frequency ]
-        self.mode = mode
+        self.percentage_relative_frequency = [f"{rf * 1:.2f}%" for rf in self.relative_frequency]
+        self.mode = mode
diff --git a/Main.py b/Main.py
@@ -4,59 +4,67 @@
 import tabulate as tabulate
 
 # Raw Data
-dataset = [1.2, 2.5, 3.1, 4.7, 1.2, 2.5, 3.8, 4.5, 2.1, 3.3, 4.8, 5.0]
+dataset = [12.5, 43.2, 56.7, 12.1, 98.3, 34.2, 78.4, 67.9, 23.5, 45.6,
+    78.1, 89.0, 32.4, 56.8, 44.5, 77.2, 12.6, 35.8, 67.1, 23.3,
+    56.5, 78.9, 99.5, 22.4, 10.2, 35.1, 48.6, 59.9, 71.3, 84.2,
+    45.3, 67.8, 89.1, 33.3, 76.4, 88.7, 41.2, 12.7, 34.4, 67.4,
+    23.8, 55.1, 77.3, 90.4, 13.5, 14.6, 55.7, 22.2, 33.1, 66.5,
+    78.2, 39.5, 41.8, 91.2, 12.4, 64.7, 49.9, 80.5, 92.3, 38.8,
+    14.5, 99.1, 25.4, 26.8, 37.5, 52.3, 43.8, 76.8, 28.7, 64.8,
+    14.9, 15.3, 48.5, 82.2, 93.4, 56.3, 88.3, 60.5, 72.9, 38.3,
+    57.2, 70.1, 84.4, 97.2, 18.6, 45.1, 66.1, 31.9, 94.5, 29.4,
+    11.9, 16.7, 21.1, 88.9, 99.7, 53.6, 62.0, 34.9, 82.8, 18.9,]
 
 
 # Initiate Object From The Raw Data
 data = ft.FrequencyTable(dataset)
 
 # Processing Raw Data to Frequency Grouped Frequency Table
-# data.PopulateGrouped() # Grouped Data
-data.PopulateSimple() # Simple Data
+data.PopulateGrouped() # Grouped Data
+# data.PopulateSimple() # Simple Data
 
 # Transform The Data To A Frequency Table
 # Initiating The Data Using Pandas
 # Grouped Populated Data
-# dfg = pd.DataFrame(
-#     {
-#         "Class Interval" : data.grouped.ranges,
-#         "Class Limit" : data.grouped.limit,
-#         "Frequency" : data.grouped.frequency,
-#         "Midpoint" : data.grouped.midpoint,
-
-#         "C <" : data.grouped.bottom_limit,
-#         "CF <" : data.grouped.bottom_cumulative_frequency,
-#         "C >" : data.grouped.top_limit,
-#         "CF >" : data.grouped.top_cumulative_frequency,
-#         "Relative Frequency" : data.grouped.percentage_relative_frequency
-#     }
-# )
-
-# Simple Populated Data
-dfs = pd.DataFrame(
+dfg = pd.DataFrame(
     {
-        "Class" : data.simple.classval,
-        "Frequency" : data.simple.frequency,
-        "Relative Frequency" : data.simple.percentage_relative_frequency
+        "Class Interval" : data.grouped.ranges,
+        "Class Limit" : data.grouped.limit,
+        "Frequency" : data.grouped.frequency,
+        "Midpoint" : data.grouped.midpoint,
+
+        "C <" : data.grouped.bottom_limit,
+        "CF <" : data.grouped.bottom_cumulative_frequency,
+        "C >" : data.grouped.top_limit,
+        "CF >" : data.grouped.top_cumulative_frequency,
+        "Relative Frequency" : data.grouped.percentage_relative_frequency
     }
 )
 
-# Converting Pandas Data Into Tabulate
-tablesimple = tabulate.tabulate(
-    dfs,
-    headers='keys',
-    tablefmt='pipe'
-) 
-
-# tablegrouped = tabulate.tabulate(
-#     dfg,
-#     headers='keys',
-#     tablefmt='pipe',
+# Simple Populated Data
+# dfs = pd.DataFrame(
+#     {
+#         "Class" : data.simple.classval,
+#         "Frequency" : data.simple.frequency,
+#         "Relative Frequency" : data.simple.percentage_relative_frequency
+#     }
 # )
 
-# Print The Processed Data
-print(tablesimple)
-# print(tablegrouped)
+# Converting Pandas Data Into Tabulate
+# tablesimple = tabulate.tabulate(
+#     dfs,
+#     headers='keys',
+#     tablefmt='pipe'
+# ) 
 
+tablegrouped = tabulate.tabulate(
+    dfg,
+    headers='keys',
+    tablefmt='pipe',
+)
 
+# Print The Processed Data
+# print(tablesimple)
+print(tablegrouped)
+print(data.length)