From de4fe9a5c5ff21ad8e7b9b8abcb2f3e34c344016 Mon Sep 17 00:00:00 2001 From: rscuad Date: Fri, 18 Oct 2024 18:17:55 +0700 Subject: [PATCH] refactor: Separate Each Class into Each Module --- Statistic_Calculation.py => Calculations.py | 9 +- FrequencyTable.py | 98 +++++---------------- Main.py | 6 +- README.md | 16 ++-- Summary.py | 16 ++++ 5 files changed, 51 insertions(+), 94 deletions(-) rename Statistic_Calculation.py => Calculations.py (83%) create mode 100644 Summary.py diff --git a/Statistic_Calculation.py b/Calculations.py similarity index 83% rename from Statistic_Calculation.py rename to Calculations.py index c640cbc..df69cf3 100644 --- a/Statistic_Calculation.py +++ b/Calculations.py @@ -1,5 +1,4 @@ -# Statistical Calculations Class -class Statistic_Calculation: +class Describe: @staticmethod def mean(dataset): return sum(dataset) / len(dataset) @@ -22,14 +21,14 @@ def kurtosis(dataset, mean, deviation): n = len(dataset) return (n * (n + 1) * sum(((x - mean) / deviation) ** 4 for x in dataset) / ((n - 1) * (n - 2) * (n - 3))) - (3 * (n - 1) ** 2) / ((n - 2) * (n - 3)) - + @staticmethod def median(dataset): sorted_data = sorted(dataset) n = len(sorted_data) mid = n // 2 - if n % 2 == 0: # If even, return the average of the two middle numbers + if n % 2 == 0: return (sorted_data[mid - 1] + sorted_data[mid]) / 2 - else: # If odd, return the middle number + else: return sorted_data[mid] diff --git a/FrequencyTable.py b/FrequencyTable.py index 57be1c7..c2f5ba3 100644 --- a/FrequencyTable.py +++ b/FrequencyTable.py @@ -1,85 +1,31 @@ import numpy as np +from Calculations import Describe +from Summary import Result -class StatisticalCalculations: - @staticmethod - def mean(dataset): - return sum(dataset) / len(dataset) - - @staticmethod - def variance(dataset, mean): - return sum((x - mean) ** 2 for x in dataset) / len(dataset) - - @staticmethod - def standard_deviation(variance): - return variance ** 0.5 - - @staticmethod - def skewness(dataset, mean, deviation): - n = len(dataset) - return (n / ((n - 1) * (n - 2))) * sum(((x - mean) / deviation) ** 3 for x in dataset) - - @staticmethod - def kurtosis(dataset, mean, deviation): - n = len(dataset) - return (n * (n + 1) * sum(((x - mean) / deviation) ** 4 for x in dataset) / - ((n - 1) * (n - 2) * (n - 3))) - (3 * (n - 1) ** 2) / ((n - 2) * (n - 3)) - - @staticmethod - def median(dataset): - sorted_data = sorted(dataset) - n = len(sorted_data) - mid = n // 2 - - if n % 2 == 0: # If even, return the average of the two middle numbers - return (sorted_data[mid - 1] + sorted_data[mid]) / 2 - else: # If odd, return the middle number - return sorted_data[mid] - -# Processed Data Assignment -class ProcessedData: - def __init__(self, data, bot, top, bot_L, top_L, F, R, L, M, bot_CF, top_CF, RF, mode): - self.top = top - self.limit = L - self.ranges = R - self.bottom = bot - self.midpoint = M - self.frequency = F - self.classval = data - self.top_limit = top_L - self.bottom_limit = bot_L - self.mode = mode - self.bottom_cumulative_frequency = bot_CF - self.top_cumulative_frequency = top_CF - self.relative_frequency = RF - self.percentage_relative_frequency = [f"{rf:.2f}%" for rf in self.relative_frequency] - -# Frequency Table Class -class FrequencyTable: +class Classify: def __init__(self, dataset): - # Check for mixed data types if any(isinstance(item, str) for item in dataset) and any(isinstance(item, (int, float)) for item in dataset): raise ValueError("Data is corrupted: contains both numeric and string values.") - # Data Initiation self.dataset = sorted(dataset) self.length = len(dataset) self.lowest = min(dataset) if isinstance(dataset[0], (int, float)) else None self.highest = max(dataset) if isinstance(dataset[0], (int, float)) else None - if self.lowest is not None: # Only calculate classes for numeric data - self.calculate_statistics() - self.calculate_classes() + if self.lowest is not None: + self._calculate_statistics() + self._calculate_classes() - def calculate_statistics(self): + def _calculate_statistics(self): self.sum = sum(self.dataset) - self.mean = StatisticalCalculations.mean(self.dataset) - self.median = StatisticalCalculations.median(self.dataset) - self.variance = StatisticalCalculations.variance(self.dataset, self.mean) - self.deviation = StatisticalCalculations.standard_deviation(self.variance) - self.skewness = StatisticalCalculations.skewness(self.dataset, self.mean, self.deviation) - self.kurtosis = StatisticalCalculations.kurtosis(self.dataset, self.mean, self.deviation) - - def calculate_classes(self): + self.mean = Describe.mean(self.dataset) + self.median = Describe.median(self.dataset) + self.variance = Describe.variance(self.dataset, self.mean) + self.deviation = Describe.standard_deviation(self.variance) + self.skewness = Describe.skewness(self.dataset, self.mean, self.deviation) + self.kurtosis = Describe.kurtosis(self.dataset, self.mean, self.deviation) + + def _calculate_classes(self): self.classes = 1 + (3.222 * np.log10(self.length)) self.classes = round(self.classes - 0.5) self.range = self.highest - self.lowest @@ -113,8 +59,9 @@ def populate_grouped(self): # Initiating Variables for Frequency Table current_number = self.base - 1 + top_cumulative_freq = 1 - while True: + while top_cumulative_freq != 0: old_number = current_number + 1 self.bottom.append(old_number) @@ -144,15 +91,12 @@ def populate_grouped(self): current_relative_frequency = np.round((current_frequency / self.length) * 100) self.relative_frequency.append(current_relative_frequency) - if current_frequency == 0: - break - # Find Mode mode_index = [i for i, val in enumerate(self.frequency) if val == max(self.frequency)] self.mode = [self.data_range[i] for i in mode_index] - # Create ProcessedData object - self.grouped = ProcessedData(None, self.bottom, self.top, self.bottom_limit, self.top_limit, + # Create Result object + self.grouped = Result(None, self.bottom, self.top, self.bottom_limit, self.top_limit, self.frequency, self.data_range, self.data_limit, self.data_midpoint, self.bot_cumulative_frequency, self.top_cumulative_frequency, self.relative_frequency, self.mode, @@ -184,8 +128,8 @@ def populate_simple(self): mode_index = [i for i, val in enumerate(self.frequency) if val == max(self.frequency)] self.mode = [unique_data[i] for i in mode_index] - # Create ProcessedData object - self.simple = ProcessedData( + # Create Result object + self.simple = Result( unique_data, None, None, self.bottom_limit, self.top_limit, self.frequency, None, None, None, self.bot_cumulative_frequency, self.top_cumulative_frequency, diff --git a/Main.py b/Main.py index 8486767..180a993 100644 --- a/Main.py +++ b/Main.py @@ -1,5 +1,5 @@ # EXAMPLE PROGRAM -import FrequencyTable as ft +from FrequencyTable import Classify import tabulate as tabulate import Transform as tf import pandas as pd @@ -18,9 +18,8 @@ 12.9, 12.8, 12.7, 12.6, 12.5, 12.4 ] - # Initiate Object From The Raw Data -data = ft.FrequencyTable(dataset) +data = Classify(dataset) data.populate_simple() # Simple Data # Simple Populated Data @@ -67,6 +66,7 @@ headers='keys', tablefmt='pipe', ) +print(data.interval) print(tablegrouped) print(data.grouped.mode) print(data.mean) diff --git a/README.md b/README.md index 5335c32..fca7a2d 100644 --- a/README.md +++ b/README.md @@ -2,26 +2,24 @@ > My Statistic Tools made with Python ### Features -- Frequency Table -- Descriptive Statistics -- Display Chart -- Data Transformation +- Auto Classify Data Into Frequency Table +- Descriptive Statistics Calculation +- Many Data Transformation Calculation ### Required -- Matplotlib -- Matplotlib_Venn - Numpy -- Tabulate -- Pandas - Scipy +- Tabulate ( Optional for printing Frequency Table ) +- Pandas ( Optional for printing Frequency Table ) ### Structure - Program Structure + Main.py [ Main Control file ] + FrequencyTable.py [ Frequency Table Module ] - + Chart.py [ Chart Display Module ] + Transform.py [ Data Transformation Module ] + + Calculations.py [ Descriptive Statistic Module ] + + Summary.py [ Processed Data Append in Here ] ### How to Use - For detailed Documentation, please visit [Pythistic Wiki](https://github.com/brotherzhafif/Pythistic/wiki) diff --git a/Summary.py b/Summary.py new file mode 100644 index 0000000..4717707 --- /dev/null +++ b/Summary.py @@ -0,0 +1,16 @@ +class Result: + def __init__(self, data, bot, top, bot_L, top_L, F, R, L, M, bot_CF, top_CF, RF, mode): + self.top = top + self.limit = L + self.ranges = R + self.bottom = bot + self.midpoint = M + self.frequency = F + self.classval = data + self.top_limit = top_L + self.bottom_limit = bot_L + self.mode = mode + self.bottom_cumulative_frequency = bot_CF + self.top_cumulative_frequency = top_CF + self.relative_frequency = RF + self.percentage_relative_frequency = [f"{rf:.2f}%" for rf in self.relative_frequency]