Skip to content

Commit

Permalink
feat: Adding Float/Decimal Data Handler
Browse files Browse the repository at this point in the history
  • Loading branch information
brotherzhafif committed Oct 13, 2024
1 parent c68605d commit e282935
Show file tree
Hide file tree
Showing 2 changed files with 51 additions and 99 deletions.
136 changes: 50 additions & 86 deletions FrequencyTable.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
import numpy as np
from scipy import stats

# Frequency Table Class
class FrequencyTable:
Expand All @@ -25,49 +24,38 @@ def __init__(self, dataset):

# Interval is Rounding Up
self.interval = self.range / self.classes
self.interval = round(self.interval + 0.5)
self.interval = round(self.interval + 0.5, 2) # Keep two decimal places

# Rounding Both Limits So The Data Would Be Simple And Easier To Read
self.base = self.roundy(self.lowest - 3)
self.top = self.roundy(self.highest + 3)
# Rounding Both Limits
self.base = self.roundy(self.lowest - 0.5)
self.top = self.roundy(self.highest + 0.5)

# Mean or Average
self.mean = (self.sum / self.length)

# Formula for Variance
# Variance and Standard Deviation
self.variance = sum((x - self.mean) ** 2 for x in dataset) / self.length

# Formula for Standard Deviation
self.deviation = (self.variance ** 0.5)

# Formula to find Dataset Skewness
# Skewness
self.skewness = (self.length / ((self.length - 1) * (self.length - 2))) * \
sum(((x - self.mean) / self.deviation) ** 3 for x in self.dataset)

# Formula to find Dataset Kurtosis
# Kurtosis
self.kurtosis = (self.length * (self.length + 1) * sum(((x - self.mean) / self.deviation) ** 4 for x in self.dataset) /
((self.length - 1) * (self.length - 2) * (self.length - 3))) - \
(3 * (self.length - 1) ** 2) / ((self.length - 2) * (self.length - 3))

# Base 5 Rounding
def roundy(self, x, base=5):
# Base Rounding
def roundy(self, x, base=0.5):
return base * round(x / base)
# Function To Find Frequency in Dataset with Desired Range (Top and Down Limit)

# Function To Find Frequency in Dataset with Desired Range
def find_frequency(self, bot, top):
try:
bot = int(bot)
top = int(top)
except (ValueError, TypeError) as e:
print(f"Error converting to int: {e}")

total_frequency = 0
for i in range(bot, top):
frequency = self.dataset.count(i)
total_frequency = total_frequency + frequency
total_frequency = sum(1 for x in self.dataset if bot < x <= top)
return total_frequency
# Populate Grouped Table Frequency Data Method

# Populate Grouped Frequency Table Data Method
def PopulateGrouped(self):
# Initiating Used List
top = []
Expand All @@ -85,129 +73,108 @@ def PopulateGrouped(self):
relative_frequency = []
mode = []

# Initiating Used Parameter for Frequency Table
# Frequency Table Initialization
interval = self.interval
current_number = self.base - 1
current_number = self.base - 0.5
old_number = 0

# Processing the Frequency Table Data
while current_number <= self.top-3:
while current_number <= self.top:
# Finding Class Lowest Value
old_number = current_number + 1
bottom.append(old_number) # 155
old_number = current_number + 0.5
bottom.append(old_number)

# Finding Class Highest Value
current_number = current_number + interval
top.append(current_number)

# Append Class Bottom Limit
# Class Limits
current_bottom_limit = old_number - 0.5
bottom_limit.append(current_bottom_limit)

# Append Class Top Limit
current_top_limit = current_number + 0.5
top_limit.append(current_top_limit)

# Finding The Frequency That Range
current_frequency = self.find_frequency(old_number, current_number + 1)
# Frequency Calculation
current_frequency = self.find_frequency(old_number, current_number)
frequency.append(current_frequency)

# Adding The Number Range From Both Frequency
current_data_range = f"{old_number} ~ {current_number}"
# Data Range and Limits
current_data_range = f"{old_number:.2f} ~ {current_number:.2f}"
data_range.append(current_data_range)

# Adding Data Range Limit Of The Class Frequency
current_data_limit = f"{current_bottom_limit} ~ {current_top_limit}"
current_data_limit = f"{current_bottom_limit:.2f} ~ {current_top_limit:.2f}"
data_limit.append(current_data_limit)

# Adding Data Midpoint of The Class Frequency
# Midpoint Calculation
current_data_midpoint = (old_number + current_number) / 2
data_midpoint.append(current_data_midpoint)

# Adding Bottom Cumulative Frequency of The Class
current_bot_cumulative_frequency = self.find_frequency(self.lowest - 1, old_number)
# Cumulative Frequencies
current_bot_cumulative_frequency = self.find_frequency(self.lowest - 0.5, old_number)
bot_cumulative_frequency.append(current_bot_cumulative_frequency)

# Adding Top Cumulative Frequency of The Class
current_top_cumulative_frequency = self.find_frequency(current_number + 1, self.highest + 1)
current_top_cumulative_frequency = self.find_frequency(current_number, self.highest + 0.5)
top_cumulative_frequency.append(current_top_cumulative_frequency)

# Counting the Relative Frequency in Percentage
current_relative_frequency = np.round((current_frequency / self.length) * 100)
# Adding Percent Symbol into The Relative Frequency Coloumn

# Relative Frequency Calculation
current_relative_frequency = np.round((current_frequency / self.length) * 100, 2)
relative_frequency.append(current_relative_frequency)

# Find Mode or Data that appears most frequently
# Find Mode
mode_index = [i for i, val in enumerate(frequency) if val == max(frequency)]
mode = [data_range[i] for i in mode_index]

# Append Processed Data into Data Attributes
# Store Processed Data
self.grouped = ProcessedData(None, bottom, top, bottom_limit, top_limit,
frequency, data_range, data_limit, data_midpoint,
bot_cumulative_frequency, top_cumulative_frequency,
relative_frequency, mode)

# Populate Simple Table Frequency Data Method
# Populate Simple Frequency Table Data Method
def PopulateSimple(self):
# Initialize general variables
data = sorted(set(self.dataset)) # Remove duplicates and sort the data
frequency = [] # To store the frequency of each class
top_cumulative_frequency = [] # To store top cumulative frequency for each class
bot_cumulative_frequency = [] # To store bottom cumulative frequency for each class
relative_frequency = [] # To store relative frequency for each class
mode = [] # To store the mode(s)

# Variables specifically for numeric data
# Initialize variables
data = sorted(set(self.dataset))
frequency = []
top_cumulative_frequency = []
bot_cumulative_frequency = []
relative_frequency = []
mode = []

# Check for numeric data
top_limit = None
bottom_limit = None

# Check if the dataset is not entirely string-based (for numeric data)
if not all(isinstance(item, str) for item in self.dataset):
# Initialize limits for numeric data
top_limit = []
bottom_limit = []

# Single loop to process both numeric and string data
# Process each class
for current_class in data:
# Calculate the frequency of the current class
current_frequency = self.dataset.count(current_class)
frequency.append(current_frequency)

# Calculate the relative frequency for the current class
current_relative_frequency = np.round((current_frequency / self.length) * 100)
current_relative_frequency = np.round((current_frequency / self.length) * 100, 2)
relative_frequency.append(current_relative_frequency)

# If the data is numeric, calculate limits and cumulative frequencies
if top_limit is not None and bottom_limit is not None:
# Calculate top and bottom limits for numeric data
current_top_limit = current_class + 0.5
current_bottom_limit = current_class - 0.5
top_limit.append(current_top_limit)
bottom_limit.append(current_bottom_limit)

# Calculate bottom cumulative frequency for numeric data
current_bot_cumulative_frequency = self.find_frequency(self.lowest - 1, current_class)
current_bot_cumulative_frequency = self.find_frequency(self.lowest - 0.5, current_class)
bot_cumulative_frequency.append(current_bot_cumulative_frequency)

# Calculate top cumulative frequency for numeric data
current_top_cumulative_frequency = self.find_frequency(current_class + 1, self.highest + 1)
current_top_cumulative_frequency = self.find_frequency(current_class, self.highest + 0.5)
top_cumulative_frequency.append(current_top_cumulative_frequency)

else:
# If the data is string-based, calculate cumulative frequencies
# Calculate bottom cumulative frequency for strings
current_bot_cumulative_frequency = self.dataset.count(current_class)
bot_cumulative_frequency.append(current_bot_cumulative_frequency)

# Calculate top cumulative frequency for strings
current_top_cumulative_frequency = sum(frequency) - current_bot_cumulative_frequency
top_cumulative_frequency.append(current_top_cumulative_frequency)

# Find the mode (the class with the highest frequency)
mode_index = [i for i, val in enumerate(frequency) if val == max(frequency)]
mode = [data[i] for i in mode_index]

# Create the ProcessedData object based on the data type
self.simple = ProcessedData(
data, None, None, bottom_limit, top_limit,
frequency, None, None, None,
Expand All @@ -217,7 +184,7 @@ def PopulateSimple(self):

# Processed Data Assignment
class ProcessedData:
# Limit (L), Frequency (F), Ranges (R), Midpoint (M), Cumulative (C), Relative (R)
# Constructor for processed data
def __init__(self, data, bot, top, bot_L, top_L, F, R, L, M, bot_CF, top_CF, RF, mode):
self.classval = data
self.bottom = bot
Expand All @@ -227,13 +194,10 @@ def __init__(self, data, bot, top, bot_L, top_L, F, R, L, M, bot_CF, top_CF, RF,
self.midpoint = M
self.ranges = R
self.limit = L

self.frequency = F
self.bottom_cumulative_frequency = bot_CF
self.top_cumulative_frequency = top_CF
self.relative_frequency = RF

self.percentage_relative_frequency = [ f"{rf * 1:.2f}%" for rf in self.relative_frequency ]
self.mode = mode


self.mode = mode
14 changes: 1 addition & 13 deletions Main.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,19 +4,7 @@
import tabulate as tabulate

# Raw Data
dataset = [
'Mango', 'Pineapple', 'Banana', 'Banana', 'Pineapple', 'Banana',
'Banana', 'Grapes', 'Pear', 'Pineapple', 'Orange', 'Strawberry',
'Orange', 'Mango', 'Banana', 'Pineapple', 'Orange', 'Banana',
'Strawberry', 'Pear', 'Apple', 'Banana', 'Pineapple', 'Orange',
'Mango', 'Apple', 'Pear', 'Pear', 'Pear', 'Grapes', 'Pear',
'Orange', 'Grapes', 'Strawberry', 'Mango', 'Orange', 'Orange',
'Mango', 'Pear', 'Strawberry', 'Pear', 'Orange', 'Mango',
'Mango', 'Pear', 'Grapes', 'Apple', 'Mango', 'Pineapple',
'Strawberry', 'Strawberry', 'Grapes', 'Apple', 'Banana',
'Grapes', 'Banana', 'Strawberry', 'Mango', 'Strawberry',
'Orange', 'Pear', 'Grapes', 'Orange', 'Apple'
]
dataset = [1.2, 2.5, 3.1, 4.7, 1.2, 2.5, 3.8, 4.5, 2.1, 3.3, 4.8, 5.0]


# Initiate Object From The Raw Data
Expand Down

0 comments on commit e282935

Please sign in to comment.