diff --git a/FrequencyTable.py b/FrequencyTable.py index f212625..62fda67 100644 --- a/FrequencyTable.py +++ b/FrequencyTable.py @@ -6,43 +6,45 @@ class FrequencyTable: def __init__(self, dataset): # Data Initiation self.dataset = sorted(dataset) - self.sum = sum(dataset) self.length = len(dataset) self.lowest = min(dataset) self.highest = max(dataset) - - # Counting Data Range - self.range = self.highest - self.lowest - - # Classes is Rounding Down + + # Classes is Rounding Down # Math Log Base 10 In Python For Accurate Result self.classes = 1 + (3.222 * np.log10(self.length)) self.classes = round(self.classes - 0.5) + + # Condition if the data is contain string + if not any(isinstance(item, str) for item in self.dataset): + # Sum of the data and range + self.sum = sum(dataset) + self.range = self.highest - self.lowest - # Interval is Rounding Up - self.interval = self.range / self.classes - self.interval = round(self.interval + 0.5) + # Interval is Rounding Up + self.interval = self.range / self.classes + self.interval = round(self.interval + 0.5) - # Rounding Both Limit So The Data Would Be Simple And Easier To Read - self.base = self.roundy(self.lowest - 3) - self.top = self.roundy(self.highest + 3) - - # Mean or Average - self.mean = (self.sum / self.length) + # Rounding Both Limit So The Data Would Be Simple And Easier To Read + self.base = self.roundy(self.lowest - 3) + self.top = self.roundy(self.highest + 3) + + # Mean or Average + self.mean = (self.sum / self.length) - # Formula for Variance - self.variance = sum((x - self.mean) ** 2 for x in dataset) / self.length + # Formula for Variance + self.variance = sum((x - self.mean) ** 2 for x in dataset) / self.length - # Formula for Standard Deviation - self.deviation = (self.variance ** 0.5) - - # Formula to find Dataset Skewness - self.skewness = (self.length / ((self.length - 1) * (self.length - 2))) * sum(((x - self.mean) / self.deviation) ** 3 for x in self.dataset) + # Formula for Standard Deviation + self.deviation = (self.variance ** 0.5) + + # Formula to find Dataset Skewness + self.skewness = (self.length / ((self.length - 1) * (self.length - 2))) * sum(((x - self.mean) / self.deviation) ** 3 for x in self.dataset) - # Formula to find Dataset - self.kurtosis = (self.length * (self.length + 1) * sum(((x - self.mean) / self.deviation) ** 4 for x in self.dataset) / ((self.length - 1) * (self.length - 2) * (self.length - 3))) - \ - (3 * (self.length - 1) ** 2) / ((self.length - 2) * (self.length - 3)) - + # Formula to find Dataset Kurtosis + self.kurtosis = (self.length * (self.length + 1) * sum(((x - self.mean) / self.deviation) ** 4 for x in self.dataset) / ((self.length - 1) * (self.length - 2) * (self.length - 3))) - \ + (3 * (self.length - 1) ** 2) / ((self.length - 2) * (self.length - 3)) + # Populate Grouped Table Frequency Data Method def PopulateGrouped(self): # Initiating Used List @@ -85,7 +87,7 @@ def PopulateGrouped(self): top_limit.append(current_top_limit) # Finding The Frequency That Range - current_frequency = self.find_frequency(old_number, current_number) + current_frequency = self.find_frequency(old_number, current_number + 1) frequency.append(current_frequency) # Adding The Number Range From Both Frequency @@ -101,11 +103,11 @@ def PopulateGrouped(self): data_midpoint.append(current_data_midpoint) # Adding Bottom Cumulative Frequency of The Class - current_bot_cumulative_frequency = self.find_frequency(self.lowest, old_number) + current_bot_cumulative_frequency = self.find_frequency(self.lowest - 1, old_number) bot_cumulative_frequency.append(current_bot_cumulative_frequency) # Adding Top Cumulative Frequency of The Class - current_top_cumulative_frequency = self.find_frequency(old_number, self.highest) + current_top_cumulative_frequency = self.find_frequency(current_number + 1, self.highest + 1) top_cumulative_frequency.append(current_top_cumulative_frequency) # Counting the Relative Frequency in Percentage @@ -122,7 +124,8 @@ def PopulateGrouped(self): frequency, data_range, data_limit, data_midpoint, bot_cumulative_frequency, top_cumulative_frequency, relative_frequency, mode) - + + # Populate Simple Table Frequency Data Method def PopulateSimple(self): # Deleting Duplicate and Sort the Data data = sorted(set(self.dataset)) @@ -150,11 +153,11 @@ def PopulateSimple(self): frequency.append(current_frequency) # Calculate Current Class Bottom Cumulative Frequency - current_bot_cumulative_frequency = self.find_frequency(self.lowest, current_class) + current_bot_cumulative_frequency = self.find_frequency(self.lowest -1 , current_class) bot_cumulative_frequency.append(current_bot_cumulative_frequency) # Calculate Current Class Top Cumulative Frequency - current_top_cumulative_frequency = self.find_frequency(current_class, self.highest) + current_top_cumulative_frequency = self.find_frequency(current_class + 1, self.highest + 1) top_cumulative_frequency.append(current_top_cumulative_frequency) # Calculate Current Class Relative Frequency @@ -170,7 +173,56 @@ def PopulateSimple(self): frequency, None, None, None, bot_cumulative_frequency, top_cumulative_frequency, relative_frequency, mode) - + + # Populate Simple String Table Frequency Data Method + def PopulateString(self): + # Memastikan bahwa dataset berisi string + if not all(isinstance(item, str) for item in self.dataset): + raise ValueError("Dataset harus berisi string saja untuk menggunakan PopulateString.") + + # Menghapus duplikat dan mengurutkan data secara alfabetis + data = sorted(set(self.dataset)) + + # Variabel yang diperlukan + frequency = [] + top_cumulative_frequency = [] + bot_cumulative_frequency = [] + relative_frequency = [] + mode = [] + + # Menghitung frekuensi untuk setiap string unik dalam dataset + for current_class in data: + # Menghitung frekuensi dari string saat ini + current_frequency = self.dataset.count(current_class) + frequency.append(current_frequency) + + # Menghitung cumulative frequency (bawah) + current_bot_cumulative_frequency = self.find_frequency_string(self.dataset, current_class) + bot_cumulative_frequency.append(current_bot_cumulative_frequency) + + # Menghitung cumulative frequency (atas) + current_top_cumulative_frequency = sum(frequency) - current_bot_cumulative_frequency + top_cumulative_frequency.append(current_top_cumulative_frequency) + + # Menghitung relative frequency + current_relative_frequency = np.round((current_frequency / self.length) * 100) + relative_frequency.append(current_relative_frequency) + + # Menemukan modus (nilai string yang paling sering muncul) + mode_index = [i for i, val in enumerate(frequency) if val == max(frequency)] + mode = [data[i] for i in mode_index] + + # Menyimpan data yang diproses ke dalam atribut simple + self.text = ProcessedData(data, None, None, None, None, + frequency, None, None, None, + bot_cumulative_frequency, top_cumulative_frequency, + relative_frequency, mode) + + def find_frequency_string(self, dataset, value): + # Fungsi untuk menghitung frekuensi cumulative string dari dataset + frequency = dataset.count(value) + return frequency + # Base 5 Rounding def roundy(self, x, base = 5): return base * round(x/base) @@ -184,7 +236,7 @@ def find_frequency(self, bot, top): print(f"Error converting to int: {e}") total_frequency = 0 - for i in range(bot, top + 1): + for i in range(bot, top): frequency = self.dataset.count(i) total_frequency = total_frequency + frequency return total_frequency diff --git a/Main.py b/Main.py index 71ef907..a9264b4 100644 --- a/Main.py +++ b/Main.py @@ -4,17 +4,29 @@ import tabulate as tabulate # Raw Data -dataset = (1,1,1,4,6,7,3,6,7,1,2,2,5,3,1,8,3,2) +dataset = ( + "Apel", "Pisang", "Jeruk", "Mangga", "Semangka", + "Melon", "Pepaya", "Nanas", "Anggur", "Stroberi", + "Durian", "Salak", "Rambutan", "Sirsak", "Alpukat", + "Jambu Biji", "Pir", "Kelengkeng", "Markisa", "Leci", + "Ceri", "Blueberry", "Raspberry", "Kedondong", "Belimbing", + "Duku", "Manggis", "Kismis", "Kelengkeng", "Cempedak", + "Srikaya", "Delima", "Kiwi", "Plum", "Kurma", + "Aprikot", "Persik", "Buah Naga", "Nangka", "Pepino" +) # Initiate Object From The Raw Data data = ft.FrequencyTable(dataset) # Processing Raw Data to Frequency Grouped Frequency Table -data.PopulateSimple() +# data.PopulateGrouped() # Grouped Data +# data.PopulateSimple() # Simple Data +data.PopulateString() # Transform The Data To A Frequency Table # Initiating The Data Using Pandas -# df = pd.DataFrame( +# Grouped Populated Data +# dfg = pd.DataFrame( # { # "Class Interval" : data.grouped.ranges, # "Class Limit" : data.grouped.limit, @@ -29,28 +41,57 @@ # } # ) -df = pd.DataFrame( +# # Simple Populated Data +# dfs = pd.DataFrame( +# { +# "Class" : data.simple.classval, +# "Frequency" : data.simple.frequency, + +# "C <" : data.simple.bottom_limit, +# "CF <" : data.simple.bottom_cumulative_frequency, +# "C >" : data.simple.top_limit, +# "CF >" : data.simple.top_cumulative_frequency, +# "Relative Frequency" : data.simple.percentage_relative_frequency +# } +# ) + +# Simple Populated Data +dfa = pd.DataFrame( { - "Class" : data.simple.classval, - "Frequency" : data.simple.frequency, + "Class" : data.text.classval, + "Frequency" : data.text.frequency, - "C <" : data.simple.bottom_limit, - "CF <" : data.simple.bottom_cumulative_frequency, - "C >" : data.simple.top_limit, - "CF >" : data.simple.top_cumulative_frequency, - "Relative Frequency" : data.simple.percentage_relative_frequency + "C <" : data.text.bottom_limit, + "CF <" : data.text.bottom_cumulative_frequency, + "C >" : data.text.top_limit, + "CF >" : data.text.top_cumulative_frequency, + "Relative Frequency" : data.text.percentage_relative_frequency } ) - # Converting Pandas Data Into Tabulate -table = tabulate.tabulate( - df, +# tablesimple = tabulate.tabulate( +# dfs, +# headers='keys', +# tablefmt='pipe' +# ) + +# tablegrouped = tabulate.tabulate( +# dfg, +# headers='keys', +# tablefmt='pipe', +# ) + +tablestring = tabulate.tabulate( + dfa, headers='keys', - tablefmt='pipe' -) + tablefmt='pipe', +) + +# Print The Processed Data +# print(tablesimple) +# print(tablegrouped) +print(tablestring) -# Print Output Data -print(table)