Skip to content

Commit

Permalink
refactor: Combining Simple and String Populate into Simple Populate
Browse files Browse the repository at this point in the history
  • Loading branch information
brotherzhafif committed Oct 13, 2024
1 parent 9592684 commit 02e139c
Show file tree
Hide file tree
Showing 3 changed files with 214 additions and 166 deletions.
97 changes: 97 additions & 0 deletions Example.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
# EXAMPLE PROGRAM
import FrequencyTable as ft
import pandas as pd
import tabulate as tabulate

# Raw Data
dataset = (
"Apel", "Pisang", "Jeruk", "Mangga", "Semangka",
"Melon", "Pepaya", "Nanas", "Anggur", "Stroberi",
"Durian", "Salak", "Rambutan", "Sirsak", "Alpukat",
"Jambu Biji", "Pir", "Kelengkeng", "Markisa", "Leci",
"Ceri", "Blueberry", "Raspberry", "Kedondong", "Belimbing",
"Duku", "Manggis", "Kismis", "Kelengkeng", "Cempedak",
"Srikaya", "Delima", "Kiwi", "Plum", "Kurma",
"Aprikot", "Persik", "Buah Naga", "Nangka", "Pepino"
)

# Initiate Object From The Raw Data
data = ft.FrequencyTable(dataset)

# Processing Raw Data to Frequency Grouped Frequency Table
data.PopulateGrouped() # Grouped Data
data.PopulateSimple() # Simple Data
data.PopulateString() # String Data

# Transform The Data To A Frequency Table
# Initiating The Data Using Pandas
# Grouped Populated Data
dfg = pd.DataFrame(
{
"Class Interval" : data.grouped.ranges,
"Class Limit" : data.grouped.limit,
"Frequency" : data.grouped.frequency,
"Midpoint" : data.grouped.midpoint,

"C <" : data.grouped.bottom_limit,
"CF <" : data.grouped.bottom_cumulative_frequency,
"C >" : data.grouped.top_limit,
"CF >" : data.grouped.top_cumulative_frequency,
"Relative Frequency" : data.grouped.percentage_relative_frequency
}
)

# # Simple Populated Data
dfs = pd.DataFrame(
{
"Class" : data.simple.classval,
"Frequency" : data.simple.frequency,

"C <" : data.simple.bottom_limit,
"CF <" : data.simple.bottom_cumulative_frequency,
"C >" : data.simple.top_limit,
"CF >" : data.simple.top_cumulative_frequency,
"Relative Frequency" : data.simple.percentage_relative_frequency
}
)

# Simple Populated Data
dfa = pd.DataFrame(
{
"Class" : data.text.classval,
"Frequency" : data.text.frequency,

"C <" : data.text.bottom_limit,
"CF <" : data.text.bottom_cumulative_frequency,
"C >" : data.text.top_limit,
"CF >" : data.text.top_cumulative_frequency,
"Relative Frequency" : data.text.percentage_relative_frequency
}
)

# Converting Pandas Data Into Tabulate
tablesimple = tabulate.tabulate(
dfs,
headers='keys',
tablefmt='pipe'
)

tablegrouped = tabulate.tabulate(
dfg,
headers='keys',
tablefmt='pipe',
)

tablestring = tabulate.tabulate(
dfa,
headers='keys',
tablefmt='pipe',
)

# Print The Processed Data
print(tablesimple)
print(tablegrouped)
print(tablestring)



210 changes: 92 additions & 118 deletions FrequencyTable.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,19 +4,21 @@
# Frequency Table Class
class FrequencyTable:
def __init__(self, dataset):
# Check for mixed data types (both numeric and string)
if any(isinstance(item, str) for item in dataset) and any(isinstance(item, (int, float)) for item in dataset):
raise ValueError("Data is corrupted: contains both numeric and string values.")

# Data Initiation
self.dataset = sorted(dataset)
self.length = len(dataset)
self.lowest = min(dataset)
self.highest = max(dataset)

# Classes is Rounding Down
# Math Log Base 10 In Python For Accurate Result
self.classes = 1 + (3.222 * np.log10(self.length))
self.classes = round(self.classes - 0.5)

# Condition if the data is contain string
if not any(isinstance(item, str) for item in self.dataset):
self.lowest = min(dataset) if isinstance(dataset[0], (int, float)) else None
self.highest = max(dataset) if isinstance(dataset[0], (int, float)) else None

# Classes is Rounding Down
if self.lowest is not None: # Only calculate classes for numeric data
self.classes = 1 + (3.222 * np.log10(self.length))
self.classes = round(self.classes - 0.5)

# Sum of the data and range
self.sum = sum(dataset)
self.range = self.highest - self.lowest
Expand All @@ -25,10 +27,10 @@ def __init__(self, dataset):
self.interval = self.range / self.classes
self.interval = round(self.interval + 0.5)

# Rounding Both Limit So The Data Would Be Simple And Easier To Read
# Rounding Both Limits So The Data Would Be Simple And Easier To Read
self.base = self.roundy(self.lowest - 3)
self.top = self.roundy(self.highest + 3)

# Mean or Average
self.mean = (self.sum / self.length)

Expand All @@ -37,14 +39,34 @@ def __init__(self, dataset):

# Formula for Standard Deviation
self.deviation = (self.variance ** 0.5)

# Formula to find Dataset Skewness
self.skewness = (self.length / ((self.length - 1) * (self.length - 2))) * sum(((x - self.mean) / self.deviation) ** 3 for x in self.dataset)
self.skewness = (self.length / ((self.length - 1) * (self.length - 2))) * \
sum(((x - self.mean) / self.deviation) ** 3 for x in self.dataset)

# Formula to find Dataset Kurtosis
self.kurtosis = (self.length * (self.length + 1) * sum(((x - self.mean) / self.deviation) ** 4 for x in self.dataset) / ((self.length - 1) * (self.length - 2) * (self.length - 3))) - \
(3 * (self.length - 1) ** 2) / ((self.length - 2) * (self.length - 3))

self.kurtosis = (self.length * (self.length + 1) * sum(((x - self.mean) / self.deviation) ** 4 for x in self.dataset) /
((self.length - 1) * (self.length - 2) * (self.length - 3))) - \
(3 * (self.length - 1) ** 2) / ((self.length - 2) * (self.length - 3))

# Base 5 Rounding
def roundy(self, x, base=5):
return base * round(x / base)

# Function To Find Frequency in Dataset with Desired Range (Top and Down Limit)
def find_frequency(self, bot, top):
try:
bot = int(bot)
top = int(top)
except (ValueError, TypeError) as e:
print(f"Error converting to int: {e}")

total_frequency = 0
for i in range(bot, top):
frequency = self.dataset.count(i)
total_frequency = total_frequency + frequency
return total_frequency

# Populate Grouped Table Frequency Data Method
def PopulateGrouped(self):
# Initiating Used List
Expand Down Expand Up @@ -127,120 +149,72 @@ def PopulateGrouped(self):

# Populate Simple Table Frequency Data Method
def PopulateSimple(self):
# Deleting Duplicate and Sort the Data
data = sorted(set(self.dataset))

# Initiating Used Variable
top_limit = []
bottom_limit = []
frequency = []
top_cumulative_frequency = []
bot_cumulative_frequency = []
relative_frequency = []
mode = []

for current_class in data:
# Bottom Limit of the Class
current_top_limit = current_class + 0.5
current_bottom_limit = current_class - 0.5

# Top Limit of the Class
top_limit.append(current_top_limit)
bottom_limit.append(current_bottom_limit)

# Calculate Current Class Frequency
current_frequency = self.dataset.count(current_class)
frequency.append(current_frequency)

# Calculate Current Class Bottom Cumulative Frequency
current_bot_cumulative_frequency = self.find_frequency(self.lowest -1 , current_class)
bot_cumulative_frequency.append(current_bot_cumulative_frequency)

# Calculate Current Class Top Cumulative Frequency
current_top_cumulative_frequency = self.find_frequency(current_class + 1, self.highest + 1)
top_cumulative_frequency.append(current_top_cumulative_frequency)

# Calculate Current Class Relative Frequency
current_relative_frequency = np.round((current_frequency / self.length) * 100)
relative_frequency.append(current_relative_frequency)

# Temukan modus
mode_index = [i for i, val in enumerate(frequency) if val == max(frequency)]
mode = [data[i] for i in mode_index]

# Buat objek ProcessedData
self.simple = ProcessedData(data, None, None, bottom_limit, top_limit,
frequency, None, None, None,
bot_cumulative_frequency, top_cumulative_frequency,
relative_frequency, mode)

# Populate Simple String Table Frequency Data Method
def PopulateString(self):
# Memastikan bahwa dataset berisi string
# Initialize general variables
data = sorted(set(self.dataset)) # Remove duplicates and sort the data
frequency = [] # To store the frequency of each class
top_cumulative_frequency = [] # To store top cumulative frequency for each class
bot_cumulative_frequency = [] # To store bottom cumulative frequency for each class
relative_frequency = [] # To store relative frequency for each class
mode = [] # To store the mode(s)

# Variables specifically for numeric data
top_limit = None
bottom_limit = None

# Check if the dataset is not entirely string-based (for numeric data)
if not all(isinstance(item, str) for item in self.dataset):
raise ValueError("Dataset harus berisi string saja untuk menggunakan PopulateString.")

# Menghapus duplikat dan mengurutkan data secara alfabetis
data = sorted(set(self.dataset))

# Variabel yang diperlukan
frequency = []
top_cumulative_frequency = []
bot_cumulative_frequency = []
relative_frequency = []
mode = []
# Initialize limits for numeric data
top_limit = []
bottom_limit = []

# Menghitung frekuensi untuk setiap string unik dalam dataset
# Single loop to process both numeric and string data
for current_class in data:
# Menghitung frekuensi dari string saat ini
# Calculate the frequency of the current class
current_frequency = self.dataset.count(current_class)
frequency.append(current_frequency)

# Menghitung cumulative frequency (bawah)
current_bot_cumulative_frequency = self.find_frequency_string(self.dataset, current_class)
bot_cumulative_frequency.append(current_bot_cumulative_frequency)

# Menghitung cumulative frequency (atas)
current_top_cumulative_frequency = sum(frequency) - current_bot_cumulative_frequency
top_cumulative_frequency.append(current_top_cumulative_frequency)

# Menghitung relative frequency
# Calculate the relative frequency for the current class
current_relative_frequency = np.round((current_frequency / self.length) * 100)
relative_frequency.append(current_relative_frequency)

# Menemukan modus (nilai string yang paling sering muncul)
# If the data is numeric, calculate limits and cumulative frequencies
if top_limit is not None and bottom_limit is not None:
# Calculate top and bottom limits for numeric data
current_top_limit = current_class + 0.5
current_bottom_limit = current_class - 0.5
top_limit.append(current_top_limit)
bottom_limit.append(current_bottom_limit)

# Calculate bottom cumulative frequency for numeric data
current_bot_cumulative_frequency = self.find_frequency(self.lowest - 1, current_class)
bot_cumulative_frequency.append(current_bot_cumulative_frequency)

# Calculate top cumulative frequency for numeric data
current_top_cumulative_frequency = self.find_frequency(current_class + 1, self.highest + 1)
top_cumulative_frequency.append(current_top_cumulative_frequency)

else:
# If the data is string-based, calculate cumulative frequencies
# Calculate bottom cumulative frequency for strings
current_bot_cumulative_frequency = self.dataset.count(current_class)
bot_cumulative_frequency.append(current_bot_cumulative_frequency)

# Calculate top cumulative frequency for strings
current_top_cumulative_frequency = sum(frequency) - current_bot_cumulative_frequency
top_cumulative_frequency.append(current_top_cumulative_frequency)

# Find the mode (the class with the highest frequency)
mode_index = [i for i, val in enumerate(frequency) if val == max(frequency)]
mode = [data[i] for i in mode_index]

# Menyimpan data yang diproses ke dalam atribut simple
self.text = ProcessedData(data, None, None, None, None,
frequency, None, None, None,
bot_cumulative_frequency, top_cumulative_frequency,
relative_frequency, mode)
# Create the ProcessedData object based on the data type
self.simple = ProcessedData(
data, None, None, bottom_limit, top_limit,
frequency, None, None, None,
bot_cumulative_frequency, top_cumulative_frequency,
relative_frequency, mode
)

def find_frequency_string(self, dataset, value):
# Fungsi untuk menghitung frekuensi cumulative string dari dataset
frequency = dataset.count(value)
return frequency

# Base 5 Rounding
def roundy(self, x, base = 5):
return base * round(x/base)

# Function To Find Frequency in Dataset with Desired Range (Top and Down Limit)
def find_frequency(self, bot, top):
try:
bot = int(bot)
top = int(top)
except (ValueError, TypeError) as e:
print(f"Error converting to int: {e}")

total_frequency = 0
for i in range(bot, top):
frequency = self.dataset.count(i)
total_frequency = total_frequency + frequency
return total_frequency

# Processed Data Assignment
class ProcessedData:
# Limit (L), Frequency (F), Ranges (R), Midpoint (M), Cumulative (C), Relative (R)
Expand Down
Loading

0 comments on commit 02e139c

Please sign in to comment.