From e2829353ff0944eabde2a1b6880bcc351f1c8148 Mon Sep 17 00:00:00 2001 From: brotherzhafif Date: Sun, 13 Oct 2024 11:13:07 +0700 Subject: [PATCH] feat: Adding Float/Decimal Data Handler --- FrequencyTable.py | 136 +++++++++++++++++----------------------------- Main.py | 14 +---- 2 files changed, 51 insertions(+), 99 deletions(-) diff --git a/FrequencyTable.py b/FrequencyTable.py index 241ab19..f5137f4 100644 --- a/FrequencyTable.py +++ b/FrequencyTable.py @@ -1,5 +1,4 @@ import numpy as np -from scipy import stats # Frequency Table Class class FrequencyTable: @@ -25,49 +24,38 @@ def __init__(self, dataset): # Interval is Rounding Up self.interval = self.range / self.classes - self.interval = round(self.interval + 0.5) + self.interval = round(self.interval + 0.5, 2) # Keep two decimal places - # Rounding Both Limits So The Data Would Be Simple And Easier To Read - self.base = self.roundy(self.lowest - 3) - self.top = self.roundy(self.highest + 3) + # Rounding Both Limits + self.base = self.roundy(self.lowest - 0.5) + self.top = self.roundy(self.highest + 0.5) # Mean or Average self.mean = (self.sum / self.length) - # Formula for Variance + # Variance and Standard Deviation self.variance = sum((x - self.mean) ** 2 for x in dataset) / self.length - - # Formula for Standard Deviation self.deviation = (self.variance ** 0.5) - # Formula to find Dataset Skewness + # Skewness self.skewness = (self.length / ((self.length - 1) * (self.length - 2))) * \ sum(((x - self.mean) / self.deviation) ** 3 for x in self.dataset) - # Formula to find Dataset Kurtosis + # Kurtosis self.kurtosis = (self.length * (self.length + 1) * sum(((x - self.mean) / self.deviation) ** 4 for x in self.dataset) / ((self.length - 1) * (self.length - 2) * (self.length - 3))) - \ (3 * (self.length - 1) ** 2) / ((self.length - 2) * (self.length - 3)) - # Base 5 Rounding - def roundy(self, x, base=5): + # Base Rounding + def roundy(self, x, base=0.5): return base * round(x / base) - - # Function To Find Frequency in Dataset with Desired Range (Top and Down Limit) + + # Function To Find Frequency in Dataset with Desired Range def find_frequency(self, bot, top): - try: - bot = int(bot) - top = int(top) - except (ValueError, TypeError) as e: - print(f"Error converting to int: {e}") - - total_frequency = 0 - for i in range(bot, top): - frequency = self.dataset.count(i) - total_frequency = total_frequency + frequency + total_frequency = sum(1 for x in self.dataset if bot < x <= top) return total_frequency - - # Populate Grouped Table Frequency Data Method + + # Populate Grouped Frequency Table Data Method def PopulateGrouped(self): # Initiating Used List top = [] @@ -85,129 +73,108 @@ def PopulateGrouped(self): relative_frequency = [] mode = [] - # Initiating Used Parameter for Frequency Table + # Frequency Table Initialization interval = self.interval - current_number = self.base - 1 + current_number = self.base - 0.5 old_number = 0 # Processing the Frequency Table Data - while current_number <= self.top-3: + while current_number <= self.top: # Finding Class Lowest Value - old_number = current_number + 1 - bottom.append(old_number) # 155 + old_number = current_number + 0.5 + bottom.append(old_number) # Finding Class Highest Value current_number = current_number + interval top.append(current_number) - # Append Class Bottom Limit + # Class Limits current_bottom_limit = old_number - 0.5 bottom_limit.append(current_bottom_limit) - - # Append Class Top Limit current_top_limit = current_number + 0.5 top_limit.append(current_top_limit) - # Finding The Frequency That Range - current_frequency = self.find_frequency(old_number, current_number + 1) + # Frequency Calculation + current_frequency = self.find_frequency(old_number, current_number) frequency.append(current_frequency) - # Adding The Number Range From Both Frequency - current_data_range = f"{old_number} ~ {current_number}" + # Data Range and Limits + current_data_range = f"{old_number:.2f} ~ {current_number:.2f}" data_range.append(current_data_range) - - # Adding Data Range Limit Of The Class Frequency - current_data_limit = f"{current_bottom_limit} ~ {current_top_limit}" + current_data_limit = f"{current_bottom_limit:.2f} ~ {current_top_limit:.2f}" data_limit.append(current_data_limit) - # Adding Data Midpoint of The Class Frequency + # Midpoint Calculation current_data_midpoint = (old_number + current_number) / 2 data_midpoint.append(current_data_midpoint) - # Adding Bottom Cumulative Frequency of The Class - current_bot_cumulative_frequency = self.find_frequency(self.lowest - 1, old_number) + # Cumulative Frequencies + current_bot_cumulative_frequency = self.find_frequency(self.lowest - 0.5, old_number) bot_cumulative_frequency.append(current_bot_cumulative_frequency) - - # Adding Top Cumulative Frequency of The Class - current_top_cumulative_frequency = self.find_frequency(current_number + 1, self.highest + 1) + current_top_cumulative_frequency = self.find_frequency(current_number, self.highest + 0.5) top_cumulative_frequency.append(current_top_cumulative_frequency) - - # Counting the Relative Frequency in Percentage - current_relative_frequency = np.round((current_frequency / self.length) * 100) - # Adding Percent Symbol into The Relative Frequency Coloumn + + # Relative Frequency Calculation + current_relative_frequency = np.round((current_frequency / self.length) * 100, 2) relative_frequency.append(current_relative_frequency) - # Find Mode or Data that appears most frequently + # Find Mode mode_index = [i for i, val in enumerate(frequency) if val == max(frequency)] mode = [data_range[i] for i in mode_index] - # Append Processed Data into Data Attributes + # Store Processed Data self.grouped = ProcessedData(None, bottom, top, bottom_limit, top_limit, frequency, data_range, data_limit, data_midpoint, bot_cumulative_frequency, top_cumulative_frequency, relative_frequency, mode) - # Populate Simple Table Frequency Data Method + # Populate Simple Frequency Table Data Method def PopulateSimple(self): - # Initialize general variables - data = sorted(set(self.dataset)) # Remove duplicates and sort the data - frequency = [] # To store the frequency of each class - top_cumulative_frequency = [] # To store top cumulative frequency for each class - bot_cumulative_frequency = [] # To store bottom cumulative frequency for each class - relative_frequency = [] # To store relative frequency for each class - mode = [] # To store the mode(s) - - # Variables specifically for numeric data + # Initialize variables + data = sorted(set(self.dataset)) + frequency = [] + top_cumulative_frequency = [] + bot_cumulative_frequency = [] + relative_frequency = [] + mode = [] + + # Check for numeric data top_limit = None bottom_limit = None - # Check if the dataset is not entirely string-based (for numeric data) if not all(isinstance(item, str) for item in self.dataset): - # Initialize limits for numeric data top_limit = [] bottom_limit = [] - # Single loop to process both numeric and string data + # Process each class for current_class in data: - # Calculate the frequency of the current class current_frequency = self.dataset.count(current_class) frequency.append(current_frequency) - # Calculate the relative frequency for the current class - current_relative_frequency = np.round((current_frequency / self.length) * 100) + current_relative_frequency = np.round((current_frequency / self.length) * 100, 2) relative_frequency.append(current_relative_frequency) - # If the data is numeric, calculate limits and cumulative frequencies if top_limit is not None and bottom_limit is not None: - # Calculate top and bottom limits for numeric data current_top_limit = current_class + 0.5 current_bottom_limit = current_class - 0.5 top_limit.append(current_top_limit) bottom_limit.append(current_bottom_limit) - # Calculate bottom cumulative frequency for numeric data - current_bot_cumulative_frequency = self.find_frequency(self.lowest - 1, current_class) + current_bot_cumulative_frequency = self.find_frequency(self.lowest - 0.5, current_class) bot_cumulative_frequency.append(current_bot_cumulative_frequency) - # Calculate top cumulative frequency for numeric data - current_top_cumulative_frequency = self.find_frequency(current_class + 1, self.highest + 1) + current_top_cumulative_frequency = self.find_frequency(current_class, self.highest + 0.5) top_cumulative_frequency.append(current_top_cumulative_frequency) else: - # If the data is string-based, calculate cumulative frequencies - # Calculate bottom cumulative frequency for strings current_bot_cumulative_frequency = self.dataset.count(current_class) bot_cumulative_frequency.append(current_bot_cumulative_frequency) - - # Calculate top cumulative frequency for strings current_top_cumulative_frequency = sum(frequency) - current_bot_cumulative_frequency top_cumulative_frequency.append(current_top_cumulative_frequency) - # Find the mode (the class with the highest frequency) mode_index = [i for i, val in enumerate(frequency) if val == max(frequency)] mode = [data[i] for i in mode_index] - # Create the ProcessedData object based on the data type self.simple = ProcessedData( data, None, None, bottom_limit, top_limit, frequency, None, None, None, @@ -217,7 +184,7 @@ def PopulateSimple(self): # Processed Data Assignment class ProcessedData: - # Limit (L), Frequency (F), Ranges (R), Midpoint (M), Cumulative (C), Relative (R) + # Constructor for processed data def __init__(self, data, bot, top, bot_L, top_L, F, R, L, M, bot_CF, top_CF, RF, mode): self.classval = data self.bottom = bot @@ -227,13 +194,10 @@ def __init__(self, data, bot, top, bot_L, top_L, F, R, L, M, bot_CF, top_CF, RF, self.midpoint = M self.ranges = R self.limit = L - self.frequency = F self.bottom_cumulative_frequency = bot_CF self.top_cumulative_frequency = top_CF self.relative_frequency = RF self.percentage_relative_frequency = [ f"{rf * 1:.2f}%" for rf in self.relative_frequency ] - self.mode = mode - - + self.mode = mode \ No newline at end of file diff --git a/Main.py b/Main.py index aea8777..4bfe858 100644 --- a/Main.py +++ b/Main.py @@ -4,19 +4,7 @@ import tabulate as tabulate # Raw Data -dataset = [ - 'Mango', 'Pineapple', 'Banana', 'Banana', 'Pineapple', 'Banana', - 'Banana', 'Grapes', 'Pear', 'Pineapple', 'Orange', 'Strawberry', - 'Orange', 'Mango', 'Banana', 'Pineapple', 'Orange', 'Banana', - 'Strawberry', 'Pear', 'Apple', 'Banana', 'Pineapple', 'Orange', - 'Mango', 'Apple', 'Pear', 'Pear', 'Pear', 'Grapes', 'Pear', - 'Orange', 'Grapes', 'Strawberry', 'Mango', 'Orange', 'Orange', - 'Mango', 'Pear', 'Strawberry', 'Pear', 'Orange', 'Mango', - 'Mango', 'Pear', 'Grapes', 'Apple', 'Mango', 'Pineapple', - 'Strawberry', 'Strawberry', 'Grapes', 'Apple', 'Banana', - 'Grapes', 'Banana', 'Strawberry', 'Mango', 'Strawberry', - 'Orange', 'Pear', 'Grapes', 'Orange', 'Apple' -] +dataset = [1.2, 2.5, 3.1, 4.7, 1.2, 2.5, 3.8, 4.5, 2.1, 3.3, 4.8, 5.0] # Initiate Object From The Raw Data