From 02e139cc12e6a278247e076d1ba00388b1a70793 Mon Sep 17 00:00:00 2001 From: brotherzhafif Date: Sun, 13 Oct 2024 09:56:55 +0700 Subject: [PATCH] refactor: Combining Simple and String Populate into Simple Populate --- Example.py | 97 +++++++++++++++++++++ FrequencyTable.py | 210 ++++++++++++++++++++-------------------------- Main.py | 73 ++++++---------- 3 files changed, 214 insertions(+), 166 deletions(-) create mode 100644 Example.py diff --git a/Example.py b/Example.py new file mode 100644 index 0000000..cb57402 --- /dev/null +++ b/Example.py @@ -0,0 +1,97 @@ +# EXAMPLE PROGRAM +import FrequencyTable as ft +import pandas as pd +import tabulate as tabulate + +# Raw Data +dataset = ( + "Apel", "Pisang", "Jeruk", "Mangga", "Semangka", + "Melon", "Pepaya", "Nanas", "Anggur", "Stroberi", + "Durian", "Salak", "Rambutan", "Sirsak", "Alpukat", + "Jambu Biji", "Pir", "Kelengkeng", "Markisa", "Leci", + "Ceri", "Blueberry", "Raspberry", "Kedondong", "Belimbing", + "Duku", "Manggis", "Kismis", "Kelengkeng", "Cempedak", + "Srikaya", "Delima", "Kiwi", "Plum", "Kurma", + "Aprikot", "Persik", "Buah Naga", "Nangka", "Pepino" +) + +# Initiate Object From The Raw Data +data = ft.FrequencyTable(dataset) + +# Processing Raw Data to Frequency Grouped Frequency Table +data.PopulateGrouped() # Grouped Data +data.PopulateSimple() # Simple Data +data.PopulateString() # String Data + +# Transform The Data To A Frequency Table +# Initiating The Data Using Pandas +# Grouped Populated Data +dfg = pd.DataFrame( + { + "Class Interval" : data.grouped.ranges, + "Class Limit" : data.grouped.limit, + "Frequency" : data.grouped.frequency, + "Midpoint" : data.grouped.midpoint, + + "C <" : data.grouped.bottom_limit, + "CF <" : data.grouped.bottom_cumulative_frequency, + "C >" : data.grouped.top_limit, + "CF >" : data.grouped.top_cumulative_frequency, + "Relative Frequency" : data.grouped.percentage_relative_frequency + } +) + +# # Simple Populated Data +dfs = pd.DataFrame( + { + "Class" : data.simple.classval, + "Frequency" : data.simple.frequency, + + "C <" : data.simple.bottom_limit, + "CF <" : data.simple.bottom_cumulative_frequency, + "C >" : data.simple.top_limit, + "CF >" : data.simple.top_cumulative_frequency, + "Relative Frequency" : data.simple.percentage_relative_frequency + } +) + +# Simple Populated Data +dfa = pd.DataFrame( + { + "Class" : data.text.classval, + "Frequency" : data.text.frequency, + + "C <" : data.text.bottom_limit, + "CF <" : data.text.bottom_cumulative_frequency, + "C >" : data.text.top_limit, + "CF >" : data.text.top_cumulative_frequency, + "Relative Frequency" : data.text.percentage_relative_frequency + } +) + +# Converting Pandas Data Into Tabulate +tablesimple = tabulate.tabulate( + dfs, + headers='keys', + tablefmt='pipe' +) + +tablegrouped = tabulate.tabulate( + dfg, + headers='keys', + tablefmt='pipe', +) + +tablestring = tabulate.tabulate( + dfa, + headers='keys', + tablefmt='pipe', +) + +# Print The Processed Data +print(tablesimple) +print(tablegrouped) +print(tablestring) + + + diff --git a/FrequencyTable.py b/FrequencyTable.py index 62fda67..241ab19 100644 --- a/FrequencyTable.py +++ b/FrequencyTable.py @@ -4,19 +4,21 @@ # Frequency Table Class class FrequencyTable: def __init__(self, dataset): + # Check for mixed data types (both numeric and string) + if any(isinstance(item, str) for item in dataset) and any(isinstance(item, (int, float)) for item in dataset): + raise ValueError("Data is corrupted: contains both numeric and string values.") + # Data Initiation self.dataset = sorted(dataset) self.length = len(dataset) - self.lowest = min(dataset) - self.highest = max(dataset) - - # Classes is Rounding Down - # Math Log Base 10 In Python For Accurate Result - self.classes = 1 + (3.222 * np.log10(self.length)) - self.classes = round(self.classes - 0.5) - - # Condition if the data is contain string - if not any(isinstance(item, str) for item in self.dataset): + self.lowest = min(dataset) if isinstance(dataset[0], (int, float)) else None + self.highest = max(dataset) if isinstance(dataset[0], (int, float)) else None + + # Classes is Rounding Down + if self.lowest is not None: # Only calculate classes for numeric data + self.classes = 1 + (3.222 * np.log10(self.length)) + self.classes = round(self.classes - 0.5) + # Sum of the data and range self.sum = sum(dataset) self.range = self.highest - self.lowest @@ -25,10 +27,10 @@ def __init__(self, dataset): self.interval = self.range / self.classes self.interval = round(self.interval + 0.5) - # Rounding Both Limit So The Data Would Be Simple And Easier To Read + # Rounding Both Limits So The Data Would Be Simple And Easier To Read self.base = self.roundy(self.lowest - 3) self.top = self.roundy(self.highest + 3) - + # Mean or Average self.mean = (self.sum / self.length) @@ -37,14 +39,34 @@ def __init__(self, dataset): # Formula for Standard Deviation self.deviation = (self.variance ** 0.5) - + # Formula to find Dataset Skewness - self.skewness = (self.length / ((self.length - 1) * (self.length - 2))) * sum(((x - self.mean) / self.deviation) ** 3 for x in self.dataset) + self.skewness = (self.length / ((self.length - 1) * (self.length - 2))) * \ + sum(((x - self.mean) / self.deviation) ** 3 for x in self.dataset) # Formula to find Dataset Kurtosis - self.kurtosis = (self.length * (self.length + 1) * sum(((x - self.mean) / self.deviation) ** 4 for x in self.dataset) / ((self.length - 1) * (self.length - 2) * (self.length - 3))) - \ - (3 * (self.length - 1) ** 2) / ((self.length - 2) * (self.length - 3)) - + self.kurtosis = (self.length * (self.length + 1) * sum(((x - self.mean) / self.deviation) ** 4 for x in self.dataset) / + ((self.length - 1) * (self.length - 2) * (self.length - 3))) - \ + (3 * (self.length - 1) ** 2) / ((self.length - 2) * (self.length - 3)) + + # Base 5 Rounding + def roundy(self, x, base=5): + return base * round(x / base) + + # Function To Find Frequency in Dataset with Desired Range (Top and Down Limit) + def find_frequency(self, bot, top): + try: + bot = int(bot) + top = int(top) + except (ValueError, TypeError) as e: + print(f"Error converting to int: {e}") + + total_frequency = 0 + for i in range(bot, top): + frequency = self.dataset.count(i) + total_frequency = total_frequency + frequency + return total_frequency + # Populate Grouped Table Frequency Data Method def PopulateGrouped(self): # Initiating Used List @@ -127,120 +149,72 @@ def PopulateGrouped(self): # Populate Simple Table Frequency Data Method def PopulateSimple(self): - # Deleting Duplicate and Sort the Data - data = sorted(set(self.dataset)) - - # Initiating Used Variable - top_limit = [] - bottom_limit = [] - frequency = [] - top_cumulative_frequency = [] - bot_cumulative_frequency = [] - relative_frequency = [] - mode = [] - - for current_class in data: - # Bottom Limit of the Class - current_top_limit = current_class + 0.5 - current_bottom_limit = current_class - 0.5 - - # Top Limit of the Class - top_limit.append(current_top_limit) - bottom_limit.append(current_bottom_limit) - - # Calculate Current Class Frequency - current_frequency = self.dataset.count(current_class) - frequency.append(current_frequency) - - # Calculate Current Class Bottom Cumulative Frequency - current_bot_cumulative_frequency = self.find_frequency(self.lowest -1 , current_class) - bot_cumulative_frequency.append(current_bot_cumulative_frequency) - - # Calculate Current Class Top Cumulative Frequency - current_top_cumulative_frequency = self.find_frequency(current_class + 1, self.highest + 1) - top_cumulative_frequency.append(current_top_cumulative_frequency) - - # Calculate Current Class Relative Frequency - current_relative_frequency = np.round((current_frequency / self.length) * 100) - relative_frequency.append(current_relative_frequency) - - # Temukan modus - mode_index = [i for i, val in enumerate(frequency) if val == max(frequency)] - mode = [data[i] for i in mode_index] - - # Buat objek ProcessedData - self.simple = ProcessedData(data, None, None, bottom_limit, top_limit, - frequency, None, None, None, - bot_cumulative_frequency, top_cumulative_frequency, - relative_frequency, mode) - - # Populate Simple String Table Frequency Data Method - def PopulateString(self): - # Memastikan bahwa dataset berisi string + # Initialize general variables + data = sorted(set(self.dataset)) # Remove duplicates and sort the data + frequency = [] # To store the frequency of each class + top_cumulative_frequency = [] # To store top cumulative frequency for each class + bot_cumulative_frequency = [] # To store bottom cumulative frequency for each class + relative_frequency = [] # To store relative frequency for each class + mode = [] # To store the mode(s) + + # Variables specifically for numeric data + top_limit = None + bottom_limit = None + + # Check if the dataset is not entirely string-based (for numeric data) if not all(isinstance(item, str) for item in self.dataset): - raise ValueError("Dataset harus berisi string saja untuk menggunakan PopulateString.") - - # Menghapus duplikat dan mengurutkan data secara alfabetis - data = sorted(set(self.dataset)) - - # Variabel yang diperlukan - frequency = [] - top_cumulative_frequency = [] - bot_cumulative_frequency = [] - relative_frequency = [] - mode = [] + # Initialize limits for numeric data + top_limit = [] + bottom_limit = [] - # Menghitung frekuensi untuk setiap string unik dalam dataset + # Single loop to process both numeric and string data for current_class in data: - # Menghitung frekuensi dari string saat ini + # Calculate the frequency of the current class current_frequency = self.dataset.count(current_class) frequency.append(current_frequency) - # Menghitung cumulative frequency (bawah) - current_bot_cumulative_frequency = self.find_frequency_string(self.dataset, current_class) - bot_cumulative_frequency.append(current_bot_cumulative_frequency) - - # Menghitung cumulative frequency (atas) - current_top_cumulative_frequency = sum(frequency) - current_bot_cumulative_frequency - top_cumulative_frequency.append(current_top_cumulative_frequency) - - # Menghitung relative frequency + # Calculate the relative frequency for the current class current_relative_frequency = np.round((current_frequency / self.length) * 100) relative_frequency.append(current_relative_frequency) - # Menemukan modus (nilai string yang paling sering muncul) + # If the data is numeric, calculate limits and cumulative frequencies + if top_limit is not None and bottom_limit is not None: + # Calculate top and bottom limits for numeric data + current_top_limit = current_class + 0.5 + current_bottom_limit = current_class - 0.5 + top_limit.append(current_top_limit) + bottom_limit.append(current_bottom_limit) + + # Calculate bottom cumulative frequency for numeric data + current_bot_cumulative_frequency = self.find_frequency(self.lowest - 1, current_class) + bot_cumulative_frequency.append(current_bot_cumulative_frequency) + + # Calculate top cumulative frequency for numeric data + current_top_cumulative_frequency = self.find_frequency(current_class + 1, self.highest + 1) + top_cumulative_frequency.append(current_top_cumulative_frequency) + + else: + # If the data is string-based, calculate cumulative frequencies + # Calculate bottom cumulative frequency for strings + current_bot_cumulative_frequency = self.dataset.count(current_class) + bot_cumulative_frequency.append(current_bot_cumulative_frequency) + + # Calculate top cumulative frequency for strings + current_top_cumulative_frequency = sum(frequency) - current_bot_cumulative_frequency + top_cumulative_frequency.append(current_top_cumulative_frequency) + + # Find the mode (the class with the highest frequency) mode_index = [i for i, val in enumerate(frequency) if val == max(frequency)] mode = [data[i] for i in mode_index] - # Menyimpan data yang diproses ke dalam atribut simple - self.text = ProcessedData(data, None, None, None, None, - frequency, None, None, None, - bot_cumulative_frequency, top_cumulative_frequency, - relative_frequency, mode) + # Create the ProcessedData object based on the data type + self.simple = ProcessedData( + data, None, None, bottom_limit, top_limit, + frequency, None, None, None, + bot_cumulative_frequency, top_cumulative_frequency, + relative_frequency, mode + ) - def find_frequency_string(self, dataset, value): - # Fungsi untuk menghitung frekuensi cumulative string dari dataset - frequency = dataset.count(value) - return frequency - - # Base 5 Rounding - def roundy(self, x, base = 5): - return base * round(x/base) - - # Function To Find Frequency in Dataset with Desired Range (Top and Down Limit) - def find_frequency(self, bot, top): - try: - bot = int(bot) - top = int(top) - except (ValueError, TypeError) as e: - print(f"Error converting to int: {e}") - - total_frequency = 0 - for i in range(bot, top): - frequency = self.dataset.count(i) - total_frequency = total_frequency + frequency - return total_frequency - # Processed Data Assignment class ProcessedData: # Limit (L), Frequency (F), Ranges (R), Midpoint (M), Cumulative (C), Relative (R) diff --git a/Main.py b/Main.py index a9264b4..aea8777 100644 --- a/Main.py +++ b/Main.py @@ -4,24 +4,27 @@ import tabulate as tabulate # Raw Data -dataset = ( - "Apel", "Pisang", "Jeruk", "Mangga", "Semangka", - "Melon", "Pepaya", "Nanas", "Anggur", "Stroberi", - "Durian", "Salak", "Rambutan", "Sirsak", "Alpukat", - "Jambu Biji", "Pir", "Kelengkeng", "Markisa", "Leci", - "Ceri", "Blueberry", "Raspberry", "Kedondong", "Belimbing", - "Duku", "Manggis", "Kismis", "Kelengkeng", "Cempedak", - "Srikaya", "Delima", "Kiwi", "Plum", "Kurma", - "Aprikot", "Persik", "Buah Naga", "Nangka", "Pepino" -) +dataset = [ + 'Mango', 'Pineapple', 'Banana', 'Banana', 'Pineapple', 'Banana', + 'Banana', 'Grapes', 'Pear', 'Pineapple', 'Orange', 'Strawberry', + 'Orange', 'Mango', 'Banana', 'Pineapple', 'Orange', 'Banana', + 'Strawberry', 'Pear', 'Apple', 'Banana', 'Pineapple', 'Orange', + 'Mango', 'Apple', 'Pear', 'Pear', 'Pear', 'Grapes', 'Pear', + 'Orange', 'Grapes', 'Strawberry', 'Mango', 'Orange', 'Orange', + 'Mango', 'Pear', 'Strawberry', 'Pear', 'Orange', 'Mango', + 'Mango', 'Pear', 'Grapes', 'Apple', 'Mango', 'Pineapple', + 'Strawberry', 'Strawberry', 'Grapes', 'Apple', 'Banana', + 'Grapes', 'Banana', 'Strawberry', 'Mango', 'Strawberry', + 'Orange', 'Pear', 'Grapes', 'Orange', 'Apple' +] + # Initiate Object From The Raw Data data = ft.FrequencyTable(dataset) # Processing Raw Data to Frequency Grouped Frequency Table # data.PopulateGrouped() # Grouped Data -# data.PopulateSimple() # Simple Data -data.PopulateString() +data.PopulateSimple() # Simple Data # Transform The Data To A Frequency Table # Initiating The Data Using Pandas @@ -41,40 +44,21 @@ # } # ) -# # Simple Populated Data -# dfs = pd.DataFrame( -# { -# "Class" : data.simple.classval, -# "Frequency" : data.simple.frequency, - -# "C <" : data.simple.bottom_limit, -# "CF <" : data.simple.bottom_cumulative_frequency, -# "C >" : data.simple.top_limit, -# "CF >" : data.simple.top_cumulative_frequency, -# "Relative Frequency" : data.simple.percentage_relative_frequency -# } -# ) - # Simple Populated Data -dfa = pd.DataFrame( +dfs = pd.DataFrame( { - "Class" : data.text.classval, - "Frequency" : data.text.frequency, - - "C <" : data.text.bottom_limit, - "CF <" : data.text.bottom_cumulative_frequency, - "C >" : data.text.top_limit, - "CF >" : data.text.top_cumulative_frequency, - "Relative Frequency" : data.text.percentage_relative_frequency + "Class" : data.simple.classval, + "Frequency" : data.simple.frequency, + "Relative Frequency" : data.simple.percentage_relative_frequency } ) # Converting Pandas Data Into Tabulate -# tablesimple = tabulate.tabulate( -# dfs, -# headers='keys', -# tablefmt='pipe' -# ) +tablesimple = tabulate.tabulate( + dfs, + headers='keys', + tablefmt='pipe' +) # tablegrouped = tabulate.tabulate( # dfg, @@ -82,16 +66,9 @@ # tablefmt='pipe', # ) -tablestring = tabulate.tabulate( - dfa, - headers='keys', - tablefmt='pipe', -) - # Print The Processed Data -# print(tablesimple) +print(tablesimple) # print(tablegrouped) -print(tablestring)