diff --git a/FrequencyTable.py b/FrequencyTable.py index e7b91a6..305117a 100644 --- a/FrequencyTable.py +++ b/FrequencyTable.py @@ -152,6 +152,75 @@ def PopulateGrouped(self): frequency, data_range, data_limit, data_midpoint, bot_cumulative_frequency, top_cumulative_frequency, relative_frequency, mode) + + # Populate Simple Table Frequency Data Method + def PopulateSimple(self): + # Initialize general variables + data = sorted(set(self.dataset)) # Remove duplicates and sort the data + frequency = [] # To store the frequency of each class + top_cumulative_frequency = [] # To store top cumulative frequency for each class + bot_cumulative_frequency = [] # To store bottom cumulative frequency for each class + relative_frequency = [] # To store relative frequency for each class + mode = [] # To store the mode(s) + + # Variables specifically for numeric data + top_limit = None + bottom_limit = None + + # Check if the dataset is not entirely string-based (for numeric data) + if not all(isinstance(item, str) for item in self.dataset): + # Initialize limits for numeric data + top_limit = [] + bottom_limit = [] + + # Single loop to process both numeric and string data + for current_class in data: + # Calculate the frequency of the current class + current_frequency = self.dataset.count(current_class) + frequency.append(current_frequency) + + # Calculate the relative frequency for the current class + current_relative_frequency = np.round((current_frequency / self.length) * 100) + relative_frequency.append(current_relative_frequency) + + # If the data is numeric, calculate limits and cumulative frequencies + if top_limit is not None and bottom_limit is not None: + # Calculate top and bottom limits for numeric data + current_top_limit = current_class + 0.5 + current_bottom_limit = current_class - 0.5 + top_limit.append(current_top_limit) + bottom_limit.append(current_bottom_limit) + + # Calculate bottom cumulative frequency for numeric data + current_bot_cumulative_frequency = self.find_frequency(self.lowest - 1, current_class) + bot_cumulative_frequency.append(current_bot_cumulative_frequency) + + # Calculate top cumulative frequency for numeric data + current_top_cumulative_frequency = self.find_frequency(current_class + 1, self.highest + 1) + top_cumulative_frequency.append(current_top_cumulative_frequency) + + else: + # If the data is string-based, calculate cumulative frequencies + # Calculate bottom cumulative frequency for strings + current_bot_cumulative_frequency = self.dataset.count(current_class) + bot_cumulative_frequency.append(current_bot_cumulative_frequency) + + # Calculate top cumulative frequency for strings + current_top_cumulative_frequency = sum(frequency) - current_bot_cumulative_frequency + top_cumulative_frequency.append(current_top_cumulative_frequency) + + # Find the mode (the class with the highest frequency) + mode_index = [i for i, val in enumerate(frequency) if val == max(frequency)] + mode = [data[i] for i in mode_index] + + # Create the ProcessedData object based on the data type + self.simple = ProcessedData( + data, None, None, bottom_limit, top_limit, + frequency, None, None, None, + bot_cumulative_frequency, top_cumulative_frequency, + relative_frequency, mode + ) + # Processed Data Assignment class ProcessedData: diff --git a/Main.py b/Main.py index fafaac4..aea8777 100644 --- a/Main.py +++ b/Main.py @@ -4,67 +4,71 @@ import tabulate as tabulate # Raw Data -dataset = [12.5, 43.2, 56.7, 12.1, 98.3, 34.2, 78.4, 67.9, 23.5, 45.6, - 78.1, 89.0, 32.4, 56.8, 44.5, 77.2, 12.6, 35.8, 67.1, 23.3, - 56.5, 78.9, 99.5, 22.4, 10.2, 35.1, 48.6, 59.9, 71.3, 84.2, - 45.3, 67.8, 89.1, 33.3, 76.4, 88.7, 41.2, 12.7, 34.4, 67.4, - 23.8, 55.1, 77.3, 90.4, 13.5, 14.6, 55.7, 22.2, 33.1, 66.5, - 78.2, 39.5, 41.8, 91.2, 12.4, 64.7, 49.9, 80.5, 92.3, 38.8, - 14.5, 99.1, 25.4, 26.8, 37.5, 52.3, 43.8, 76.8, 28.7, 64.8, - 14.9, 15.3, 48.5, 82.2, 93.4, 56.3, 88.3, 60.5, 72.9, 38.3, - 57.2, 70.1, 84.4, 97.2, 18.6, 45.1, 66.1, 31.9, 94.5, 29.4, - 11.9, 16.7, 21.1, 88.9, 99.7, 53.6, 62.0, 34.9, 82.8, 18.9,] +dataset = [ + 'Mango', 'Pineapple', 'Banana', 'Banana', 'Pineapple', 'Banana', + 'Banana', 'Grapes', 'Pear', 'Pineapple', 'Orange', 'Strawberry', + 'Orange', 'Mango', 'Banana', 'Pineapple', 'Orange', 'Banana', + 'Strawberry', 'Pear', 'Apple', 'Banana', 'Pineapple', 'Orange', + 'Mango', 'Apple', 'Pear', 'Pear', 'Pear', 'Grapes', 'Pear', + 'Orange', 'Grapes', 'Strawberry', 'Mango', 'Orange', 'Orange', + 'Mango', 'Pear', 'Strawberry', 'Pear', 'Orange', 'Mango', + 'Mango', 'Pear', 'Grapes', 'Apple', 'Mango', 'Pineapple', + 'Strawberry', 'Strawberry', 'Grapes', 'Apple', 'Banana', + 'Grapes', 'Banana', 'Strawberry', 'Mango', 'Strawberry', + 'Orange', 'Pear', 'Grapes', 'Orange', 'Apple' +] # Initiate Object From The Raw Data data = ft.FrequencyTable(dataset) # Processing Raw Data to Frequency Grouped Frequency Table -data.PopulateGrouped() # Grouped Data -# data.PopulateSimple() # Simple Data +# data.PopulateGrouped() # Grouped Data +data.PopulateSimple() # Simple Data # Transform The Data To A Frequency Table # Initiating The Data Using Pandas # Grouped Populated Data -dfg = pd.DataFrame( - { - "Class Interval" : data.grouped.ranges, - "Class Limit" : data.grouped.limit, - "Frequency" : data.grouped.frequency, - "Midpoint" : data.grouped.midpoint, - - "C <" : data.grouped.bottom_limit, - "CF <" : data.grouped.bottom_cumulative_frequency, - "C >" : data.grouped.top_limit, - "CF >" : data.grouped.top_cumulative_frequency, - "Relative Frequency" : data.grouped.percentage_relative_frequency - } -) - -# Simple Populated Data -# dfs = pd.DataFrame( +# dfg = pd.DataFrame( # { -# "Class" : data.simple.classval, -# "Frequency" : data.simple.frequency, -# "Relative Frequency" : data.simple.percentage_relative_frequency +# "Class Interval" : data.grouped.ranges, +# "Class Limit" : data.grouped.limit, +# "Frequency" : data.grouped.frequency, +# "Midpoint" : data.grouped.midpoint, + +# "C <" : data.grouped.bottom_limit, +# "CF <" : data.grouped.bottom_cumulative_frequency, +# "C >" : data.grouped.top_limit, +# "CF >" : data.grouped.top_cumulative_frequency, +# "Relative Frequency" : data.grouped.percentage_relative_frequency # } # ) -# Converting Pandas Data Into Tabulate -# tablesimple = tabulate.tabulate( -# dfs, -# headers='keys', -# tablefmt='pipe' -# ) +# Simple Populated Data +dfs = pd.DataFrame( + { + "Class" : data.simple.classval, + "Frequency" : data.simple.frequency, + "Relative Frequency" : data.simple.percentage_relative_frequency + } +) -tablegrouped = tabulate.tabulate( - dfg, +# Converting Pandas Data Into Tabulate +tablesimple = tabulate.tabulate( + dfs, headers='keys', - tablefmt='pipe', -) + tablefmt='pipe' +) + +# tablegrouped = tabulate.tabulate( +# dfg, +# headers='keys', +# tablefmt='pipe', +# ) # Print The Processed Data -# print(tablesimple) -print(tablegrouped) -print(data.length) +print(tablesimple) +# print(tablegrouped) + +