From 3b95bc34fb18bf9d616e905bf085c261403366b6 Mon Sep 17 00:00:00 2001 From: brotherzhafif Date: Sun, 13 Oct 2024 12:59:11 +0700 Subject: [PATCH] fix: Fixing Miscalculation in Decimal Frequency Table --- FrequencyTable.py | 152 +++++++++++++++++++--------------------------- Main.py | 82 ++++++++++++++----------- 2 files changed, 107 insertions(+), 127 deletions(-) diff --git a/FrequencyTable.py b/FrequencyTable.py index f5137f4..e7b91a6 100644 --- a/FrequencyTable.py +++ b/FrequencyTable.py @@ -1,4 +1,5 @@ import numpy as np +from scipy import stats # Frequency Table Class class FrequencyTable: @@ -24,38 +25,56 @@ def __init__(self, dataset): # Interval is Rounding Up self.interval = self.range / self.classes - self.interval = round(self.interval + 0.5, 2) # Keep two decimal places + self.interval = round(self.interval + 0.5) - # Rounding Both Limits - self.base = self.roundy(self.lowest - 0.5) - self.top = self.roundy(self.highest + 0.5) + # Rounding Both Limits So The Data Would Be Simple And Easier To Read + self.base = self.roundy(self.lowest - 3) + self.top = self.roundy(self.highest + 3) # Mean or Average self.mean = (self.sum / self.length) - # Variance and Standard Deviation + # Formula for Variance self.variance = sum((x - self.mean) ** 2 for x in dataset) / self.length + + # Formula for Standard Deviation self.deviation = (self.variance ** 0.5) - # Skewness + # Formula to find Dataset Skewness self.skewness = (self.length / ((self.length - 1) * (self.length - 2))) * \ sum(((x - self.mean) / self.deviation) ** 3 for x in self.dataset) - # Kurtosis + # Formula to find Dataset Kurtosis self.kurtosis = (self.length * (self.length + 1) * sum(((x - self.mean) / self.deviation) ** 4 for x in self.dataset) / ((self.length - 1) * (self.length - 2) * (self.length - 3))) - \ (3 * (self.length - 1) ** 2) / ((self.length - 2) * (self.length - 3)) - # Base Rounding - def roundy(self, x, base=0.5): + # Base 5 Rounding + def roundy(self, x, base=5): return base * round(x / base) - # Function To Find Frequency in Dataset with Desired Range + # Function To Find Frequency in Dataset with Desired Range (Top and Down Limit) def find_frequency(self, bot, top): - total_frequency = sum(1 for x in self.dataset if bot < x <= top) + total_frequency = 0 + # Check if the dataset contains only integers + is_integer_data = all(isinstance(x, int) for x in self.dataset) + + if is_integer_data: + # Loop for integers + for i in range(bot, top): + frequency = self.dataset.count(i) + total_frequency += frequency + else: + # Loop for decimals + current = bot + while current < top: + frequency = self.dataset.count(round(current, 2)) # Round for matching + total_frequency += frequency + current += 0.01 # Increment by 0.01 for decimals + return total_frequency - # Populate Grouped Frequency Table Data Method + # Populate Grouped Table Frequency Data Method def PopulateGrouped(self): # Initiating Used List top = [] @@ -73,118 +92,70 @@ def PopulateGrouped(self): relative_frequency = [] mode = [] - # Frequency Table Initialization + # Initiating Used Parameter for Frequency Table interval = self.interval - current_number = self.base - 0.5 + current_number = self.base - 1 old_number = 0 # Processing the Frequency Table Data - while current_number <= self.top: + while current_number <= self.top - 3: # Finding Class Lowest Value - old_number = current_number + 0.5 - bottom.append(old_number) + old_number = current_number + 1 + bottom.append(old_number) # Finding Class Highest Value current_number = current_number + interval top.append(current_number) - # Class Limits + # Append Class Bottom Limit current_bottom_limit = old_number - 0.5 bottom_limit.append(current_bottom_limit) + + # Append Class Top Limit current_top_limit = current_number + 0.5 top_limit.append(current_top_limit) - # Frequency Calculation - current_frequency = self.find_frequency(old_number, current_number) + # Finding The Frequency That Range + current_frequency = self.find_frequency(old_number, current_number + 1) frequency.append(current_frequency) - # Data Range and Limits - current_data_range = f"{old_number:.2f} ~ {current_number:.2f}" + # Adding The Number Range From Both Frequency + current_data_range = f"{old_number:.2f} ~ {current_number:.2f}" if not all(isinstance(x, int) for x in self.dataset) else f"{old_number} ~ {current_number}" data_range.append(current_data_range) - current_data_limit = f"{current_bottom_limit:.2f} ~ {current_top_limit:.2f}" + + # Adding Data Range Limit Of The Class Frequency + current_data_limit = f"{current_bottom_limit:.2f} ~ {current_top_limit:.2f}" if not all(isinstance(x, int) for x in self.dataset) else f"{current_bottom_limit} ~ {current_top_limit}" data_limit.append(current_data_limit) - # Midpoint Calculation + # Adding Data Midpoint of The Class Frequency current_data_midpoint = (old_number + current_number) / 2 data_midpoint.append(current_data_midpoint) - # Cumulative Frequencies - current_bot_cumulative_frequency = self.find_frequency(self.lowest - 0.5, old_number) + # Adding Bottom Cumulative Frequency of The Class + current_bot_cumulative_frequency = self.find_frequency(self.lowest - 1, old_number) bot_cumulative_frequency.append(current_bot_cumulative_frequency) - current_top_cumulative_frequency = self.find_frequency(current_number, self.highest + 0.5) - top_cumulative_frequency.append(current_top_cumulative_frequency) - # Relative Frequency Calculation - current_relative_frequency = np.round((current_frequency / self.length) * 100, 2) - relative_frequency.append(current_relative_frequency) + # Adding Top Cumulative Frequency of The Class + current_top_cumulative_frequency = self.find_frequency(current_number + 1, self.highest + 1) + top_cumulative_frequency.append(current_top_cumulative_frequency) - # Find Mode + # Counting the Relative Frequency in Percentage + current_relative_frequency = np.round((current_frequency / self.length) * 100) + relative_frequency.append(current_relative_frequency) + + # Find Mode or Data that appears most frequently mode_index = [i for i, val in enumerate(frequency) if val == max(frequency)] mode = [data_range[i] for i in mode_index] - # Store Processed Data + # Append Processed Data into Data Attributes self.grouped = ProcessedData(None, bottom, top, bottom_limit, top_limit, frequency, data_range, data_limit, data_midpoint, bot_cumulative_frequency, top_cumulative_frequency, relative_frequency, mode) - - # Populate Simple Frequency Table Data Method - def PopulateSimple(self): - # Initialize variables - data = sorted(set(self.dataset)) - frequency = [] - top_cumulative_frequency = [] - bot_cumulative_frequency = [] - relative_frequency = [] - mode = [] - - # Check for numeric data - top_limit = None - bottom_limit = None - - if not all(isinstance(item, str) for item in self.dataset): - top_limit = [] - bottom_limit = [] - - # Process each class - for current_class in data: - current_frequency = self.dataset.count(current_class) - frequency.append(current_frequency) - - current_relative_frequency = np.round((current_frequency / self.length) * 100, 2) - relative_frequency.append(current_relative_frequency) - - if top_limit is not None and bottom_limit is not None: - current_top_limit = current_class + 0.5 - current_bottom_limit = current_class - 0.5 - top_limit.append(current_top_limit) - bottom_limit.append(current_bottom_limit) - - current_bot_cumulative_frequency = self.find_frequency(self.lowest - 0.5, current_class) - bot_cumulative_frequency.append(current_bot_cumulative_frequency) - - current_top_cumulative_frequency = self.find_frequency(current_class, self.highest + 0.5) - top_cumulative_frequency.append(current_top_cumulative_frequency) - else: - current_bot_cumulative_frequency = self.dataset.count(current_class) - bot_cumulative_frequency.append(current_bot_cumulative_frequency) - current_top_cumulative_frequency = sum(frequency) - current_bot_cumulative_frequency - top_cumulative_frequency.append(current_top_cumulative_frequency) - - mode_index = [i for i, val in enumerate(frequency) if val == max(frequency)] - mode = [data[i] for i in mode_index] - - self.simple = ProcessedData( - data, None, None, bottom_limit, top_limit, - frequency, None, None, None, - bot_cumulative_frequency, top_cumulative_frequency, - relative_frequency, mode - ) - # Processed Data Assignment class ProcessedData: - # Constructor for processed data + # Limit (L), Frequency (F), Ranges (R), Midpoint (M), Cumulative (C), Relative (R) def __init__(self, data, bot, top, bot_L, top_L, F, R, L, M, bot_CF, top_CF, RF, mode): self.classval = data self.bottom = bot @@ -194,10 +165,11 @@ def __init__(self, data, bot, top, bot_L, top_L, F, R, L, M, bot_CF, top_CF, RF, self.midpoint = M self.ranges = R self.limit = L + self.frequency = F self.bottom_cumulative_frequency = bot_CF self.top_cumulative_frequency = top_CF self.relative_frequency = RF - self.percentage_relative_frequency = [ f"{rf * 1:.2f}%" for rf in self.relative_frequency ] - self.mode = mode \ No newline at end of file + self.percentage_relative_frequency = [f"{rf * 1:.2f}%" for rf in self.relative_frequency] + self.mode = mode diff --git a/Main.py b/Main.py index 4bfe858..fafaac4 100644 --- a/Main.py +++ b/Main.py @@ -4,59 +4,67 @@ import tabulate as tabulate # Raw Data -dataset = [1.2, 2.5, 3.1, 4.7, 1.2, 2.5, 3.8, 4.5, 2.1, 3.3, 4.8, 5.0] +dataset = [12.5, 43.2, 56.7, 12.1, 98.3, 34.2, 78.4, 67.9, 23.5, 45.6, + 78.1, 89.0, 32.4, 56.8, 44.5, 77.2, 12.6, 35.8, 67.1, 23.3, + 56.5, 78.9, 99.5, 22.4, 10.2, 35.1, 48.6, 59.9, 71.3, 84.2, + 45.3, 67.8, 89.1, 33.3, 76.4, 88.7, 41.2, 12.7, 34.4, 67.4, + 23.8, 55.1, 77.3, 90.4, 13.5, 14.6, 55.7, 22.2, 33.1, 66.5, + 78.2, 39.5, 41.8, 91.2, 12.4, 64.7, 49.9, 80.5, 92.3, 38.8, + 14.5, 99.1, 25.4, 26.8, 37.5, 52.3, 43.8, 76.8, 28.7, 64.8, + 14.9, 15.3, 48.5, 82.2, 93.4, 56.3, 88.3, 60.5, 72.9, 38.3, + 57.2, 70.1, 84.4, 97.2, 18.6, 45.1, 66.1, 31.9, 94.5, 29.4, + 11.9, 16.7, 21.1, 88.9, 99.7, 53.6, 62.0, 34.9, 82.8, 18.9,] # Initiate Object From The Raw Data data = ft.FrequencyTable(dataset) # Processing Raw Data to Frequency Grouped Frequency Table -# data.PopulateGrouped() # Grouped Data -data.PopulateSimple() # Simple Data +data.PopulateGrouped() # Grouped Data +# data.PopulateSimple() # Simple Data # Transform The Data To A Frequency Table # Initiating The Data Using Pandas # Grouped Populated Data -# dfg = pd.DataFrame( -# { -# "Class Interval" : data.grouped.ranges, -# "Class Limit" : data.grouped.limit, -# "Frequency" : data.grouped.frequency, -# "Midpoint" : data.grouped.midpoint, - -# "C <" : data.grouped.bottom_limit, -# "CF <" : data.grouped.bottom_cumulative_frequency, -# "C >" : data.grouped.top_limit, -# "CF >" : data.grouped.top_cumulative_frequency, -# "Relative Frequency" : data.grouped.percentage_relative_frequency -# } -# ) - -# Simple Populated Data -dfs = pd.DataFrame( +dfg = pd.DataFrame( { - "Class" : data.simple.classval, - "Frequency" : data.simple.frequency, - "Relative Frequency" : data.simple.percentage_relative_frequency + "Class Interval" : data.grouped.ranges, + "Class Limit" : data.grouped.limit, + "Frequency" : data.grouped.frequency, + "Midpoint" : data.grouped.midpoint, + + "C <" : data.grouped.bottom_limit, + "CF <" : data.grouped.bottom_cumulative_frequency, + "C >" : data.grouped.top_limit, + "CF >" : data.grouped.top_cumulative_frequency, + "Relative Frequency" : data.grouped.percentage_relative_frequency } ) -# Converting Pandas Data Into Tabulate -tablesimple = tabulate.tabulate( - dfs, - headers='keys', - tablefmt='pipe' -) - -# tablegrouped = tabulate.tabulate( -# dfg, -# headers='keys', -# tablefmt='pipe', +# Simple Populated Data +# dfs = pd.DataFrame( +# { +# "Class" : data.simple.classval, +# "Frequency" : data.simple.frequency, +# "Relative Frequency" : data.simple.percentage_relative_frequency +# } # ) -# Print The Processed Data -print(tablesimple) -# print(tablegrouped) +# Converting Pandas Data Into Tabulate +# tablesimple = tabulate.tabulate( +# dfs, +# headers='keys', +# tablefmt='pipe' +# ) +tablegrouped = tabulate.tabulate( + dfg, + headers='keys', + tablefmt='pipe', +) +# Print The Processed Data +# print(tablesimple) +print(tablegrouped) +print(data.length)