From 0ca55f23193b6f197dcc109bdd638c6651d2d707 Mon Sep 17 00:00:00 2001 From: brotherzhafif Date: Sat, 12 Oct 2024 22:42:39 +0700 Subject: [PATCH] feat: Refactoring Populate Table Data and Adding Descriptive Statistic Variable --- FrequencyTable.py | 63 ++++++++++++++++++++++++++++++++++++----------- Main.py | 31 ++++++++++------------- README.md | 4 +-- 3 files changed, 64 insertions(+), 34 deletions(-) diff --git a/FrequencyTable.py b/FrequencyTable.py index dc73898..0d3201c 100644 --- a/FrequencyTable.py +++ b/FrequencyTable.py @@ -1,11 +1,13 @@ import numpy as np +from scipy import stats # Frequency Table Class class FrequencyTable: def __init__(self, dataset): # Data Initiation self.dataset = sorted(dataset) - self.amount = len(dataset) + self.sum = sum(dataset) + self.length = len(dataset) self.lowest = min(dataset) self.highest = max(dataset) @@ -14,7 +16,7 @@ def __init__(self, dataset): # Classes is Rounding Down # Math Log Base 10 In Python For Accurate Result - self.classes = 1 + (3.222 * np.log10(self.amount)) + self.classes = 1 + (3.222 * np.log10(self.length)) self.classes = round(self.classes - 0.5) # Interval is Rounding Up @@ -24,10 +26,21 @@ def __init__(self, dataset): # Rounding Both Limit So The Data Would Be Simple And Easier To Read self.base = self.roundy(self.lowest - 3) self.top = self.roundy(self.highest + 3) + + # Mean or Average + self.mean = (self.sum / self.length) + + # Formula for Variance + self.variance = sum((x - self.mean) ** 2 for x in dataset) / self.length - # Populate Data Method - def Populate(self): - # Initiating Used List + # Formula for Standard Deviation + self.deviation = (self.variance ** 0.5) + + + + # Populate Grouped Table Frequency Data Method + def PopulateGrouped(self): + # Initiating Used List top = [] bottom = [] top_limit = [] @@ -41,12 +54,14 @@ def Populate(self): bot_cumulative_frequency = [] top_cumulative_frequency = [] relative_frequency = [] + mode = [] - # Initiating Used Parameter - interval = self.interval # 4 - current_number = self.base - 1 # 156 + # Initiating Used Parameter for Frequency Table + interval = self.interval + current_number = self.base - 1 old_number = 0 + # Processing the Frequency Table Data while current_number <= self.top-3: # Finding Class Lowest Value old_number = current_number + 1 @@ -89,13 +104,27 @@ def Populate(self): top_cumulative_frequency.append(current_top_cumulative_frequency) # Counting the Relative Frequency in Percentage - current_relative_frequency = np.round((current_frequency / self.amount) * 100) - relative_frequency.append(current_relative_frequency) - + current_relative_frequency = np.round((current_frequency / self.length) * 100) + # Adding Percent Symbol into The Relative Frequency Coloumn + relative_frequency.append(current_relative_frequency) + + # Find Mode or Data that appears most frequently + mode_index = [i for i, val in enumerate(frequency) if val == max(frequency)] + mode = [data_range[i] for i in mode_index] + + # Formula to find Dataset Skewness + skewness = (self.length / ((self.length - 1) * (self.length - 2))) * sum(((x - self.mean) / self.deviation) ** 3 for x in self.dataset) + # Formula to find Dataset + kurtosis = (self.length * (self.length + 1) * sum(((x - self.mean) / self.deviation) ** 4 for x in self.dataset) / ((self.length - 1) * (self.length - 2) * (self.length - 3))) - \ + (3 * (self.length - 1) ** 2) / ((self.length - 2) * (self.length - 3)) + # Append Processed Data into Data Attributes - self.final = ProcessedData(bottom, top, bottom_limit, top_limit, frequency, data_range, data_limit, data_midpoint, bot_cumulative_frequency, top_cumulative_frequency, relative_frequency) - + self.grouped = ProcessedData(bottom, top, bottom_limit, top_limit, + frequency, data_range, data_limit, data_midpoint, + bot_cumulative_frequency, top_cumulative_frequency, + relative_frequency, skewness, kurtosis, mode) + # Base 5 Rounding def roundy(self, x, base = 5): return base * round(x/base) @@ -111,7 +140,7 @@ def find_frequency(self, bot, top): # Processed Data Assignment class ProcessedData: # Limit (L), Frequency (F), Ranges (R), Midpoint (M), Cumulative (C), Relative (R) - def __init__(self, bot, top, bot_L, top_L, F, R, L, M, bot_CF, top_CF, RF): + def __init__(self, bot, top, bot_L, top_L, F, R, L, M, bot_CF, top_CF, RF, skew, kurt, mode): self.bottom = bot self.top = top self.bottom_limit = bot_L @@ -124,4 +153,10 @@ def __init__(self, bot, top, bot_L, top_L, F, R, L, M, bot_CF, top_CF, RF): self.bottom_cumulative_frequency = bot_CF self.top_cumulative_frequency = top_CF self.relative_frequency = RF + + self.percentage_relative_frequency = [ f"{rf * 1:.2f}%" for rf in self.relative_frequency ] + self.skewness = skew + self.kurtosis = kurt + self.mode = mode + diff --git a/Main.py b/Main.py index b970de4..4bba857 100644 --- a/Main.py +++ b/Main.py @@ -15,28 +15,23 @@ # Initiate Object From The Raw Data data = ft.FrequencyTable(dataset) -# Processing Raw Data to Frequency Table -data.Populate() - -# Adding Percent Symbol into The Relative Frequency Coloumn -relative_frequency_with_percentage = [ - f"{rf * 1:.2f}%" for rf in data.final.relative_frequency -] +# Processing Raw Data to Frequency Grouped Frequency Table +data.PopulateGrouped() # Transform The Data To A Frequency Table # Initiating The Data Using Pandas df = pd.DataFrame( { - "Class Interval" : data.final.ranges, - "Class Limit" : data.final.limit, - "Frequency" : data.final.frequency, - "Midpoint" : data.final.midpoint, + "Class Interval" : data.grouped.ranges, + "Class Limit" : data.grouped.limit, + "Frequency" : data.grouped.frequency, + "Midpoint" : data.grouped.midpoint, - "C <" : data.final.bottom_limit, - "CF <" : data.final.bottom_cumulative_frequency, - "C >" : data.final.top_cumulative_frequency, - "CF >" : data.final.top_cumulative_frequency, - "Relative Frequency" : relative_frequency_with_percentage + "C <" : data.grouped.bottom_limit, + "CF <" : data.grouped.bottom_cumulative_frequency, + "C >" : data.grouped.top_limit, + "CF >" : data.grouped.top_cumulative_frequency, + "Relative Frequency" : data.grouped.percentage_relative_frequency } ) @@ -47,6 +42,6 @@ tablefmt='pipe' ) -# print(table) -print(data.final.ranges) +# Print Output Data +print(table) diff --git a/README.md b/README.md index c786d53..56ee761 100644 --- a/README.md +++ b/README.md @@ -3,9 +3,9 @@ ### Features - Frequency Table ( Done ) -- Descriptive Statistics ( Work in Progress ) +- Descriptive Statistics ( Done ) - Display Chart ( Work in Progress ) -- Data Transformation ( Coming Soon ) +- Data Transformation ( Work in Progress ) ### Required - Matplotlib