Skip to content

Commit

Permalink
feat: Refactoring Populate Table Data and Adding Descriptive Statisti…
Browse files Browse the repository at this point in the history
…c Variable
  • Loading branch information
brotherzhafif committed Oct 12, 2024
1 parent 65aa7f2 commit 0ca55f2
Show file tree
Hide file tree
Showing 3 changed files with 64 additions and 34 deletions.
63 changes: 49 additions & 14 deletions FrequencyTable.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
import numpy as np
from scipy import stats

# Frequency Table Class
class FrequencyTable:
def __init__(self, dataset):
# Data Initiation
self.dataset = sorted(dataset)
self.amount = len(dataset)
self.sum = sum(dataset)
self.length = len(dataset)
self.lowest = min(dataset)
self.highest = max(dataset)

Expand All @@ -14,7 +16,7 @@ def __init__(self, dataset):

# Classes is Rounding Down
# Math Log Base 10 In Python For Accurate Result
self.classes = 1 + (3.222 * np.log10(self.amount))
self.classes = 1 + (3.222 * np.log10(self.length))
self.classes = round(self.classes - 0.5)

# Interval is Rounding Up
Expand All @@ -24,10 +26,21 @@ def __init__(self, dataset):
# Rounding Both Limit So The Data Would Be Simple And Easier To Read
self.base = self.roundy(self.lowest - 3)
self.top = self.roundy(self.highest + 3)

# Mean or Average
self.mean = (self.sum / self.length)

# Formula for Variance
self.variance = sum((x - self.mean) ** 2 for x in dataset) / self.length

# Populate Data Method
def Populate(self):
# Initiating Used List
# Formula for Standard Deviation
self.deviation = (self.variance ** 0.5)



# Populate Grouped Table Frequency Data Method
def PopulateGrouped(self):
# Initiating Used List
top = []
bottom = []
top_limit = []
Expand All @@ -41,12 +54,14 @@ def Populate(self):
bot_cumulative_frequency = []
top_cumulative_frequency = []
relative_frequency = []
mode = []

# Initiating Used Parameter
interval = self.interval # 4
current_number = self.base - 1 # 156
# Initiating Used Parameter for Frequency Table
interval = self.interval
current_number = self.base - 1
old_number = 0

# Processing the Frequency Table Data
while current_number <= self.top-3:
# Finding Class Lowest Value
old_number = current_number + 1
Expand Down Expand Up @@ -89,13 +104,27 @@ def Populate(self):
top_cumulative_frequency.append(current_top_cumulative_frequency)

# Counting the Relative Frequency in Percentage
current_relative_frequency = np.round((current_frequency / self.amount) * 100)
relative_frequency.append(current_relative_frequency)

current_relative_frequency = np.round((current_frequency / self.length) * 100)
# Adding Percent Symbol into The Relative Frequency Coloumn
relative_frequency.append(current_relative_frequency)

# Find Mode or Data that appears most frequently
mode_index = [i for i, val in enumerate(frequency) if val == max(frequency)]
mode = [data_range[i] for i in mode_index]

# Formula to find Dataset Skewness
skewness = (self.length / ((self.length - 1) * (self.length - 2))) * sum(((x - self.mean) / self.deviation) ** 3 for x in self.dataset)

# Formula to find Dataset
kurtosis = (self.length * (self.length + 1) * sum(((x - self.mean) / self.deviation) ** 4 for x in self.dataset) / ((self.length - 1) * (self.length - 2) * (self.length - 3))) - \
(3 * (self.length - 1) ** 2) / ((self.length - 2) * (self.length - 3))

# Append Processed Data into Data Attributes
self.final = ProcessedData(bottom, top, bottom_limit, top_limit, frequency, data_range, data_limit, data_midpoint, bot_cumulative_frequency, top_cumulative_frequency, relative_frequency)

self.grouped = ProcessedData(bottom, top, bottom_limit, top_limit,
frequency, data_range, data_limit, data_midpoint,
bot_cumulative_frequency, top_cumulative_frequency,
relative_frequency, skewness, kurtosis, mode)

# Base 5 Rounding
def roundy(self, x, base = 5):
return base * round(x/base)
Expand All @@ -111,7 +140,7 @@ def find_frequency(self, bot, top):
# Processed Data Assignment
class ProcessedData:
# Limit (L), Frequency (F), Ranges (R), Midpoint (M), Cumulative (C), Relative (R)
def __init__(self, bot, top, bot_L, top_L, F, R, L, M, bot_CF, top_CF, RF):
def __init__(self, bot, top, bot_L, top_L, F, R, L, M, bot_CF, top_CF, RF, skew, kurt, mode):
self.bottom = bot
self.top = top
self.bottom_limit = bot_L
Expand All @@ -124,4 +153,10 @@ def __init__(self, bot, top, bot_L, top_L, F, R, L, M, bot_CF, top_CF, RF):
self.bottom_cumulative_frequency = bot_CF
self.top_cumulative_frequency = top_CF
self.relative_frequency = RF

self.percentage_relative_frequency = [ f"{rf * 1:.2f}%" for rf in self.relative_frequency ]
self.skewness = skew
self.kurtosis = kurt
self.mode = mode


31 changes: 13 additions & 18 deletions Main.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,28 +15,23 @@
# Initiate Object From The Raw Data
data = ft.FrequencyTable(dataset)

# Processing Raw Data to Frequency Table
data.Populate()

# Adding Percent Symbol into The Relative Frequency Coloumn
relative_frequency_with_percentage = [
f"{rf * 1:.2f}%" for rf in data.final.relative_frequency
]
# Processing Raw Data to Frequency Grouped Frequency Table
data.PopulateGrouped()

# Transform The Data To A Frequency Table
# Initiating The Data Using Pandas
df = pd.DataFrame(
{
"Class Interval" : data.final.ranges,
"Class Limit" : data.final.limit,
"Frequency" : data.final.frequency,
"Midpoint" : data.final.midpoint,
"Class Interval" : data.grouped.ranges,
"Class Limit" : data.grouped.limit,
"Frequency" : data.grouped.frequency,
"Midpoint" : data.grouped.midpoint,

"C <" : data.final.bottom_limit,
"CF <" : data.final.bottom_cumulative_frequency,
"C >" : data.final.top_cumulative_frequency,
"CF >" : data.final.top_cumulative_frequency,
"Relative Frequency" : relative_frequency_with_percentage
"C <" : data.grouped.bottom_limit,
"CF <" : data.grouped.bottom_cumulative_frequency,
"C >" : data.grouped.top_limit,
"CF >" : data.grouped.top_cumulative_frequency,
"Relative Frequency" : data.grouped.percentage_relative_frequency
}
)

Expand All @@ -47,6 +42,6 @@
tablefmt='pipe'
)

# print(table)
print(data.final.ranges)
# Print Output Data
print(table)

4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,9 @@
### Features
- Frequency Table ( Done )
- Descriptive Statistics ( Work in Progress )
- Descriptive Statistics ( Done )
- Display Chart ( Work in Progress )
- Data Transformation ( Coming Soon )
- Data Transformation ( Work in Progress )

### Required
- Matplotlib
Expand Down

0 comments on commit 0ca55f2

Please sign in to comment.