Skip to content

Commit

Permalink
fix: Fixing Miscalculation in Decimal Frequency Table
Browse files Browse the repository at this point in the history
  • Loading branch information
brotherzhafif committed Oct 13, 2024
1 parent e282935 commit 3b95bc3
Show file tree
Hide file tree
Showing 2 changed files with 107 additions and 127 deletions.
152 changes: 62 additions & 90 deletions FrequencyTable.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import numpy as np
from scipy import stats

# Frequency Table Class
class FrequencyTable:
Expand All @@ -24,38 +25,56 @@ def __init__(self, dataset):

# Interval is Rounding Up
self.interval = self.range / self.classes
self.interval = round(self.interval + 0.5, 2) # Keep two decimal places
self.interval = round(self.interval + 0.5)

# Rounding Both Limits
self.base = self.roundy(self.lowest - 0.5)
self.top = self.roundy(self.highest + 0.5)
# Rounding Both Limits So The Data Would Be Simple And Easier To Read
self.base = self.roundy(self.lowest - 3)
self.top = self.roundy(self.highest + 3)

# Mean or Average
self.mean = (self.sum / self.length)

# Variance and Standard Deviation
# Formula for Variance
self.variance = sum((x - self.mean) ** 2 for x in dataset) / self.length

# Formula for Standard Deviation
self.deviation = (self.variance ** 0.5)

# Skewness
# Formula to find Dataset Skewness
self.skewness = (self.length / ((self.length - 1) * (self.length - 2))) * \
sum(((x - self.mean) / self.deviation) ** 3 for x in self.dataset)

# Kurtosis
# Formula to find Dataset Kurtosis
self.kurtosis = (self.length * (self.length + 1) * sum(((x - self.mean) / self.deviation) ** 4 for x in self.dataset) /
((self.length - 1) * (self.length - 2) * (self.length - 3))) - \
(3 * (self.length - 1) ** 2) / ((self.length - 2) * (self.length - 3))

# Base Rounding
def roundy(self, x, base=0.5):
# Base 5 Rounding
def roundy(self, x, base=5):
return base * round(x / base)

# Function To Find Frequency in Dataset with Desired Range
# Function To Find Frequency in Dataset with Desired Range (Top and Down Limit)
def find_frequency(self, bot, top):
total_frequency = sum(1 for x in self.dataset if bot < x <= top)
total_frequency = 0
# Check if the dataset contains only integers
is_integer_data = all(isinstance(x, int) for x in self.dataset)

if is_integer_data:
# Loop for integers
for i in range(bot, top):
frequency = self.dataset.count(i)
total_frequency += frequency
else:
# Loop for decimals
current = bot
while current < top:
frequency = self.dataset.count(round(current, 2)) # Round for matching
total_frequency += frequency
current += 0.01 # Increment by 0.01 for decimals

return total_frequency

# Populate Grouped Frequency Table Data Method
# Populate Grouped Table Frequency Data Method
def PopulateGrouped(self):
# Initiating Used List
top = []
Expand All @@ -73,118 +92,70 @@ def PopulateGrouped(self):
relative_frequency = []
mode = []

# Frequency Table Initialization
# Initiating Used Parameter for Frequency Table
interval = self.interval
current_number = self.base - 0.5
current_number = self.base - 1
old_number = 0

# Processing the Frequency Table Data
while current_number <= self.top:
while current_number <= self.top - 3:
# Finding Class Lowest Value
old_number = current_number + 0.5
bottom.append(old_number)
old_number = current_number + 1
bottom.append(old_number)

# Finding Class Highest Value
current_number = current_number + interval
top.append(current_number)

# Class Limits
# Append Class Bottom Limit
current_bottom_limit = old_number - 0.5
bottom_limit.append(current_bottom_limit)

# Append Class Top Limit
current_top_limit = current_number + 0.5
top_limit.append(current_top_limit)

# Frequency Calculation
current_frequency = self.find_frequency(old_number, current_number)
# Finding The Frequency That Range
current_frequency = self.find_frequency(old_number, current_number + 1)
frequency.append(current_frequency)

# Data Range and Limits
current_data_range = f"{old_number:.2f} ~ {current_number:.2f}"
# Adding The Number Range From Both Frequency
current_data_range = f"{old_number:.2f} ~ {current_number:.2f}" if not all(isinstance(x, int) for x in self.dataset) else f"{old_number} ~ {current_number}"
data_range.append(current_data_range)
current_data_limit = f"{current_bottom_limit:.2f} ~ {current_top_limit:.2f}"

# Adding Data Range Limit Of The Class Frequency
current_data_limit = f"{current_bottom_limit:.2f} ~ {current_top_limit:.2f}" if not all(isinstance(x, int) for x in self.dataset) else f"{current_bottom_limit} ~ {current_top_limit}"
data_limit.append(current_data_limit)

# Midpoint Calculation
# Adding Data Midpoint of The Class Frequency
current_data_midpoint = (old_number + current_number) / 2
data_midpoint.append(current_data_midpoint)

# Cumulative Frequencies
current_bot_cumulative_frequency = self.find_frequency(self.lowest - 0.5, old_number)
# Adding Bottom Cumulative Frequency of The Class
current_bot_cumulative_frequency = self.find_frequency(self.lowest - 1, old_number)
bot_cumulative_frequency.append(current_bot_cumulative_frequency)
current_top_cumulative_frequency = self.find_frequency(current_number, self.highest + 0.5)
top_cumulative_frequency.append(current_top_cumulative_frequency)

# Relative Frequency Calculation
current_relative_frequency = np.round((current_frequency / self.length) * 100, 2)
relative_frequency.append(current_relative_frequency)
# Adding Top Cumulative Frequency of The Class
current_top_cumulative_frequency = self.find_frequency(current_number + 1, self.highest + 1)
top_cumulative_frequency.append(current_top_cumulative_frequency)

# Find Mode
# Counting the Relative Frequency in Percentage
current_relative_frequency = np.round((current_frequency / self.length) * 100)
relative_frequency.append(current_relative_frequency)

# Find Mode or Data that appears most frequently
mode_index = [i for i, val in enumerate(frequency) if val == max(frequency)]
mode = [data_range[i] for i in mode_index]

# Store Processed Data
# Append Processed Data into Data Attributes
self.grouped = ProcessedData(None, bottom, top, bottom_limit, top_limit,
frequency, data_range, data_limit, data_midpoint,
bot_cumulative_frequency, top_cumulative_frequency,
relative_frequency, mode)

# Populate Simple Frequency Table Data Method
def PopulateSimple(self):
# Initialize variables
data = sorted(set(self.dataset))
frequency = []
top_cumulative_frequency = []
bot_cumulative_frequency = []
relative_frequency = []
mode = []

# Check for numeric data
top_limit = None
bottom_limit = None

if not all(isinstance(item, str) for item in self.dataset):
top_limit = []
bottom_limit = []

# Process each class
for current_class in data:
current_frequency = self.dataset.count(current_class)
frequency.append(current_frequency)

current_relative_frequency = np.round((current_frequency / self.length) * 100, 2)
relative_frequency.append(current_relative_frequency)

if top_limit is not None and bottom_limit is not None:
current_top_limit = current_class + 0.5
current_bottom_limit = current_class - 0.5
top_limit.append(current_top_limit)
bottom_limit.append(current_bottom_limit)

current_bot_cumulative_frequency = self.find_frequency(self.lowest - 0.5, current_class)
bot_cumulative_frequency.append(current_bot_cumulative_frequency)

current_top_cumulative_frequency = self.find_frequency(current_class, self.highest + 0.5)
top_cumulative_frequency.append(current_top_cumulative_frequency)

else:
current_bot_cumulative_frequency = self.dataset.count(current_class)
bot_cumulative_frequency.append(current_bot_cumulative_frequency)
current_top_cumulative_frequency = sum(frequency) - current_bot_cumulative_frequency
top_cumulative_frequency.append(current_top_cumulative_frequency)

mode_index = [i for i, val in enumerate(frequency) if val == max(frequency)]
mode = [data[i] for i in mode_index]

self.simple = ProcessedData(
data, None, None, bottom_limit, top_limit,
frequency, None, None, None,
bot_cumulative_frequency, top_cumulative_frequency,
relative_frequency, mode
)

# Processed Data Assignment
class ProcessedData:
# Constructor for processed data
# Limit (L), Frequency (F), Ranges (R), Midpoint (M), Cumulative (C), Relative (R)
def __init__(self, data, bot, top, bot_L, top_L, F, R, L, M, bot_CF, top_CF, RF, mode):
self.classval = data
self.bottom = bot
Expand All @@ -194,10 +165,11 @@ def __init__(self, data, bot, top, bot_L, top_L, F, R, L, M, bot_CF, top_CF, RF,
self.midpoint = M
self.ranges = R
self.limit = L

self.frequency = F
self.bottom_cumulative_frequency = bot_CF
self.top_cumulative_frequency = top_CF
self.relative_frequency = RF

self.percentage_relative_frequency = [ f"{rf * 1:.2f}%" for rf in self.relative_frequency ]
self.mode = mode
self.percentage_relative_frequency = [f"{rf * 1:.2f}%" for rf in self.relative_frequency]
self.mode = mode
82 changes: 45 additions & 37 deletions Main.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,59 +4,67 @@
import tabulate as tabulate

# Raw Data
dataset = [1.2, 2.5, 3.1, 4.7, 1.2, 2.5, 3.8, 4.5, 2.1, 3.3, 4.8, 5.0]
dataset = [12.5, 43.2, 56.7, 12.1, 98.3, 34.2, 78.4, 67.9, 23.5, 45.6,
78.1, 89.0, 32.4, 56.8, 44.5, 77.2, 12.6, 35.8, 67.1, 23.3,
56.5, 78.9, 99.5, 22.4, 10.2, 35.1, 48.6, 59.9, 71.3, 84.2,
45.3, 67.8, 89.1, 33.3, 76.4, 88.7, 41.2, 12.7, 34.4, 67.4,
23.8, 55.1, 77.3, 90.4, 13.5, 14.6, 55.7, 22.2, 33.1, 66.5,
78.2, 39.5, 41.8, 91.2, 12.4, 64.7, 49.9, 80.5, 92.3, 38.8,
14.5, 99.1, 25.4, 26.8, 37.5, 52.3, 43.8, 76.8, 28.7, 64.8,
14.9, 15.3, 48.5, 82.2, 93.4, 56.3, 88.3, 60.5, 72.9, 38.3,
57.2, 70.1, 84.4, 97.2, 18.6, 45.1, 66.1, 31.9, 94.5, 29.4,
11.9, 16.7, 21.1, 88.9, 99.7, 53.6, 62.0, 34.9, 82.8, 18.9,]


# Initiate Object From The Raw Data
data = ft.FrequencyTable(dataset)

# Processing Raw Data to Frequency Grouped Frequency Table
# data.PopulateGrouped() # Grouped Data
data.PopulateSimple() # Simple Data
data.PopulateGrouped() # Grouped Data
# data.PopulateSimple() # Simple Data

# Transform The Data To A Frequency Table
# Initiating The Data Using Pandas
# Grouped Populated Data
# dfg = pd.DataFrame(
# {
# "Class Interval" : data.grouped.ranges,
# "Class Limit" : data.grouped.limit,
# "Frequency" : data.grouped.frequency,
# "Midpoint" : data.grouped.midpoint,

# "C <" : data.grouped.bottom_limit,
# "CF <" : data.grouped.bottom_cumulative_frequency,
# "C >" : data.grouped.top_limit,
# "CF >" : data.grouped.top_cumulative_frequency,
# "Relative Frequency" : data.grouped.percentage_relative_frequency
# }
# )

# Simple Populated Data
dfs = pd.DataFrame(
dfg = pd.DataFrame(
{
"Class" : data.simple.classval,
"Frequency" : data.simple.frequency,
"Relative Frequency" : data.simple.percentage_relative_frequency
"Class Interval" : data.grouped.ranges,
"Class Limit" : data.grouped.limit,
"Frequency" : data.grouped.frequency,
"Midpoint" : data.grouped.midpoint,

"C <" : data.grouped.bottom_limit,
"CF <" : data.grouped.bottom_cumulative_frequency,
"C >" : data.grouped.top_limit,
"CF >" : data.grouped.top_cumulative_frequency,
"Relative Frequency" : data.grouped.percentage_relative_frequency
}
)

# Converting Pandas Data Into Tabulate
tablesimple = tabulate.tabulate(
dfs,
headers='keys',
tablefmt='pipe'
)

# tablegrouped = tabulate.tabulate(
# dfg,
# headers='keys',
# tablefmt='pipe',
# Simple Populated Data
# dfs = pd.DataFrame(
# {
# "Class" : data.simple.classval,
# "Frequency" : data.simple.frequency,
# "Relative Frequency" : data.simple.percentage_relative_frequency
# }
# )

# Print The Processed Data
print(tablesimple)
# print(tablegrouped)
# Converting Pandas Data Into Tabulate
# tablesimple = tabulate.tabulate(
# dfs,
# headers='keys',
# tablefmt='pipe'
# )

tablegrouped = tabulate.tabulate(
dfg,
headers='keys',
tablefmt='pipe',
)

# Print The Processed Data
# print(tablesimple)
print(tablegrouped)
print(data.length)

0 comments on commit 3b95bc3

Please sign in to comment.