Skip to content

Commit

Permalink
fix: Returning Missing Simple Frequency Table Method
Browse files Browse the repository at this point in the history
  • Loading branch information
brotherzhafif committed Oct 13, 2024
1 parent 3b95bc3 commit cb20062
Show file tree
Hide file tree
Showing 2 changed files with 118 additions and 45 deletions.
69 changes: 69 additions & 0 deletions FrequencyTable.py
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,75 @@ def PopulateGrouped(self):
frequency, data_range, data_limit, data_midpoint,
bot_cumulative_frequency, top_cumulative_frequency,
relative_frequency, mode)

# Populate Simple Table Frequency Data Method
def PopulateSimple(self):
# Initialize general variables
data = sorted(set(self.dataset)) # Remove duplicates and sort the data
frequency = [] # To store the frequency of each class
top_cumulative_frequency = [] # To store top cumulative frequency for each class
bot_cumulative_frequency = [] # To store bottom cumulative frequency for each class
relative_frequency = [] # To store relative frequency for each class
mode = [] # To store the mode(s)

# Variables specifically for numeric data
top_limit = None
bottom_limit = None

# Check if the dataset is not entirely string-based (for numeric data)
if not all(isinstance(item, str) for item in self.dataset):
# Initialize limits for numeric data
top_limit = []
bottom_limit = []

# Single loop to process both numeric and string data
for current_class in data:
# Calculate the frequency of the current class
current_frequency = self.dataset.count(current_class)
frequency.append(current_frequency)

# Calculate the relative frequency for the current class
current_relative_frequency = np.round((current_frequency / self.length) * 100)
relative_frequency.append(current_relative_frequency)

# If the data is numeric, calculate limits and cumulative frequencies
if top_limit is not None and bottom_limit is not None:
# Calculate top and bottom limits for numeric data
current_top_limit = current_class + 0.5
current_bottom_limit = current_class - 0.5
top_limit.append(current_top_limit)
bottom_limit.append(current_bottom_limit)

# Calculate bottom cumulative frequency for numeric data
current_bot_cumulative_frequency = self.find_frequency(self.lowest - 1, current_class)
bot_cumulative_frequency.append(current_bot_cumulative_frequency)

# Calculate top cumulative frequency for numeric data
current_top_cumulative_frequency = self.find_frequency(current_class + 1, self.highest + 1)
top_cumulative_frequency.append(current_top_cumulative_frequency)

else:
# If the data is string-based, calculate cumulative frequencies
# Calculate bottom cumulative frequency for strings
current_bot_cumulative_frequency = self.dataset.count(current_class)
bot_cumulative_frequency.append(current_bot_cumulative_frequency)

# Calculate top cumulative frequency for strings
current_top_cumulative_frequency = sum(frequency) - current_bot_cumulative_frequency
top_cumulative_frequency.append(current_top_cumulative_frequency)

# Find the mode (the class with the highest frequency)
mode_index = [i for i, val in enumerate(frequency) if val == max(frequency)]
mode = [data[i] for i in mode_index]

# Create the ProcessedData object based on the data type
self.simple = ProcessedData(
data, None, None, bottom_limit, top_limit,
frequency, None, None, None,
bot_cumulative_frequency, top_cumulative_frequency,
relative_frequency, mode
)


# Processed Data Assignment
class ProcessedData:
Expand Down
94 changes: 49 additions & 45 deletions Main.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,67 +4,71 @@
import tabulate as tabulate

# Raw Data
dataset = [12.5, 43.2, 56.7, 12.1, 98.3, 34.2, 78.4, 67.9, 23.5, 45.6,
78.1, 89.0, 32.4, 56.8, 44.5, 77.2, 12.6, 35.8, 67.1, 23.3,
56.5, 78.9, 99.5, 22.4, 10.2, 35.1, 48.6, 59.9, 71.3, 84.2,
45.3, 67.8, 89.1, 33.3, 76.4, 88.7, 41.2, 12.7, 34.4, 67.4,
23.8, 55.1, 77.3, 90.4, 13.5, 14.6, 55.7, 22.2, 33.1, 66.5,
78.2, 39.5, 41.8, 91.2, 12.4, 64.7, 49.9, 80.5, 92.3, 38.8,
14.5, 99.1, 25.4, 26.8, 37.5, 52.3, 43.8, 76.8, 28.7, 64.8,
14.9, 15.3, 48.5, 82.2, 93.4, 56.3, 88.3, 60.5, 72.9, 38.3,
57.2, 70.1, 84.4, 97.2, 18.6, 45.1, 66.1, 31.9, 94.5, 29.4,
11.9, 16.7, 21.1, 88.9, 99.7, 53.6, 62.0, 34.9, 82.8, 18.9,]
dataset = [
'Mango', 'Pineapple', 'Banana', 'Banana', 'Pineapple', 'Banana',
'Banana', 'Grapes', 'Pear', 'Pineapple', 'Orange', 'Strawberry',
'Orange', 'Mango', 'Banana', 'Pineapple', 'Orange', 'Banana',
'Strawberry', 'Pear', 'Apple', 'Banana', 'Pineapple', 'Orange',
'Mango', 'Apple', 'Pear', 'Pear', 'Pear', 'Grapes', 'Pear',
'Orange', 'Grapes', 'Strawberry', 'Mango', 'Orange', 'Orange',
'Mango', 'Pear', 'Strawberry', 'Pear', 'Orange', 'Mango',
'Mango', 'Pear', 'Grapes', 'Apple', 'Mango', 'Pineapple',
'Strawberry', 'Strawberry', 'Grapes', 'Apple', 'Banana',
'Grapes', 'Banana', 'Strawberry', 'Mango', 'Strawberry',
'Orange', 'Pear', 'Grapes', 'Orange', 'Apple'
]


# Initiate Object From The Raw Data
data = ft.FrequencyTable(dataset)

# Processing Raw Data to Frequency Grouped Frequency Table
data.PopulateGrouped() # Grouped Data
# data.PopulateSimple() # Simple Data
# data.PopulateGrouped() # Grouped Data
data.PopulateSimple() # Simple Data

# Transform The Data To A Frequency Table
# Initiating The Data Using Pandas
# Grouped Populated Data
dfg = pd.DataFrame(
{
"Class Interval" : data.grouped.ranges,
"Class Limit" : data.grouped.limit,
"Frequency" : data.grouped.frequency,
"Midpoint" : data.grouped.midpoint,

"C <" : data.grouped.bottom_limit,
"CF <" : data.grouped.bottom_cumulative_frequency,
"C >" : data.grouped.top_limit,
"CF >" : data.grouped.top_cumulative_frequency,
"Relative Frequency" : data.grouped.percentage_relative_frequency
}
)

# Simple Populated Data
# dfs = pd.DataFrame(
# dfg = pd.DataFrame(
# {
# "Class" : data.simple.classval,
# "Frequency" : data.simple.frequency,
# "Relative Frequency" : data.simple.percentage_relative_frequency
# "Class Interval" : data.grouped.ranges,
# "Class Limit" : data.grouped.limit,
# "Frequency" : data.grouped.frequency,
# "Midpoint" : data.grouped.midpoint,

# "C <" : data.grouped.bottom_limit,
# "CF <" : data.grouped.bottom_cumulative_frequency,
# "C >" : data.grouped.top_limit,
# "CF >" : data.grouped.top_cumulative_frequency,
# "Relative Frequency" : data.grouped.percentage_relative_frequency
# }
# )

# Converting Pandas Data Into Tabulate
# tablesimple = tabulate.tabulate(
# dfs,
# headers='keys',
# tablefmt='pipe'
# )
# Simple Populated Data
dfs = pd.DataFrame(
{
"Class" : data.simple.classval,
"Frequency" : data.simple.frequency,
"Relative Frequency" : data.simple.percentage_relative_frequency
}
)

tablegrouped = tabulate.tabulate(
dfg,
# Converting Pandas Data Into Tabulate
tablesimple = tabulate.tabulate(
dfs,
headers='keys',
tablefmt='pipe',
)
tablefmt='pipe'
)

# tablegrouped = tabulate.tabulate(
# dfg,
# headers='keys',
# tablefmt='pipe',
# )

# Print The Processed Data
# print(tablesimple)
print(tablegrouped)
print(data.length)
print(tablesimple)
# print(tablegrouped)



0 comments on commit cb20062

Please sign in to comment.