Skip to content

Commit

Permalink
feat: Adding Populate Text Frequency Table
Browse files Browse the repository at this point in the history
  • Loading branch information
brotherzhafif committed Oct 13, 2024
1 parent b929b19 commit 9592684
Show file tree
Hide file tree
Showing 2 changed files with 145 additions and 52 deletions.
120 changes: 86 additions & 34 deletions FrequencyTable.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,43 +6,45 @@ class FrequencyTable:
def __init__(self, dataset):
# Data Initiation
self.dataset = sorted(dataset)
self.sum = sum(dataset)
self.length = len(dataset)
self.lowest = min(dataset)
self.highest = max(dataset)

# Counting Data Range
self.range = self.highest - self.lowest

# Classes is Rounding Down

# Classes is Rounding Down
# Math Log Base 10 In Python For Accurate Result
self.classes = 1 + (3.222 * np.log10(self.length))
self.classes = round(self.classes - 0.5)

# Condition if the data is contain string
if not any(isinstance(item, str) for item in self.dataset):
# Sum of the data and range
self.sum = sum(dataset)
self.range = self.highest - self.lowest

# Interval is Rounding Up
self.interval = self.range / self.classes
self.interval = round(self.interval + 0.5)
# Interval is Rounding Up
self.interval = self.range / self.classes
self.interval = round(self.interval + 0.5)

# Rounding Both Limit So The Data Would Be Simple And Easier To Read
self.base = self.roundy(self.lowest - 3)
self.top = self.roundy(self.highest + 3)

# Mean or Average
self.mean = (self.sum / self.length)
# Rounding Both Limit So The Data Would Be Simple And Easier To Read
self.base = self.roundy(self.lowest - 3)
self.top = self.roundy(self.highest + 3)
# Mean or Average
self.mean = (self.sum / self.length)

# Formula for Variance
self.variance = sum((x - self.mean) ** 2 for x in dataset) / self.length
# Formula for Variance
self.variance = sum((x - self.mean) ** 2 for x in dataset) / self.length

# Formula for Standard Deviation
self.deviation = (self.variance ** 0.5)

# Formula to find Dataset Skewness
self.skewness = (self.length / ((self.length - 1) * (self.length - 2))) * sum(((x - self.mean) / self.deviation) ** 3 for x in self.dataset)
# Formula for Standard Deviation
self.deviation = (self.variance ** 0.5)
# Formula to find Dataset Skewness
self.skewness = (self.length / ((self.length - 1) * (self.length - 2))) * sum(((x - self.mean) / self.deviation) ** 3 for x in self.dataset)

# Formula to find Dataset
self.kurtosis = (self.length * (self.length + 1) * sum(((x - self.mean) / self.deviation) ** 4 for x in self.dataset) / ((self.length - 1) * (self.length - 2) * (self.length - 3))) - \
(3 * (self.length - 1) ** 2) / ((self.length - 2) * (self.length - 3))
# Formula to find Dataset Kurtosis
self.kurtosis = (self.length * (self.length + 1) * sum(((x - self.mean) / self.deviation) ** 4 for x in self.dataset) / ((self.length - 1) * (self.length - 2) * (self.length - 3))) - \
(3 * (self.length - 1) ** 2) / ((self.length - 2) * (self.length - 3))

# Populate Grouped Table Frequency Data Method
def PopulateGrouped(self):
# Initiating Used List
Expand Down Expand Up @@ -85,7 +87,7 @@ def PopulateGrouped(self):
top_limit.append(current_top_limit)

# Finding The Frequency That Range
current_frequency = self.find_frequency(old_number, current_number)
current_frequency = self.find_frequency(old_number, current_number + 1)
frequency.append(current_frequency)

# Adding The Number Range From Both Frequency
Expand All @@ -101,11 +103,11 @@ def PopulateGrouped(self):
data_midpoint.append(current_data_midpoint)

# Adding Bottom Cumulative Frequency of The Class
current_bot_cumulative_frequency = self.find_frequency(self.lowest, old_number)
current_bot_cumulative_frequency = self.find_frequency(self.lowest - 1, old_number)
bot_cumulative_frequency.append(current_bot_cumulative_frequency)

# Adding Top Cumulative Frequency of The Class
current_top_cumulative_frequency = self.find_frequency(old_number, self.highest)
current_top_cumulative_frequency = self.find_frequency(current_number + 1, self.highest + 1)
top_cumulative_frequency.append(current_top_cumulative_frequency)

# Counting the Relative Frequency in Percentage
Expand All @@ -122,7 +124,8 @@ def PopulateGrouped(self):
frequency, data_range, data_limit, data_midpoint,
bot_cumulative_frequency, top_cumulative_frequency,
relative_frequency, mode)


# Populate Simple Table Frequency Data Method
def PopulateSimple(self):
# Deleting Duplicate and Sort the Data
data = sorted(set(self.dataset))
Expand Down Expand Up @@ -150,11 +153,11 @@ def PopulateSimple(self):
frequency.append(current_frequency)

# Calculate Current Class Bottom Cumulative Frequency
current_bot_cumulative_frequency = self.find_frequency(self.lowest, current_class)
current_bot_cumulative_frequency = self.find_frequency(self.lowest -1 , current_class)
bot_cumulative_frequency.append(current_bot_cumulative_frequency)

# Calculate Current Class Top Cumulative Frequency
current_top_cumulative_frequency = self.find_frequency(current_class, self.highest)
current_top_cumulative_frequency = self.find_frequency(current_class + 1, self.highest + 1)
top_cumulative_frequency.append(current_top_cumulative_frequency)

# Calculate Current Class Relative Frequency
Expand All @@ -170,7 +173,56 @@ def PopulateSimple(self):
frequency, None, None, None,
bot_cumulative_frequency, top_cumulative_frequency,
relative_frequency, mode)


# Populate Simple String Table Frequency Data Method
def PopulateString(self):
# Memastikan bahwa dataset berisi string
if not all(isinstance(item, str) for item in self.dataset):
raise ValueError("Dataset harus berisi string saja untuk menggunakan PopulateString.")

# Menghapus duplikat dan mengurutkan data secara alfabetis
data = sorted(set(self.dataset))

# Variabel yang diperlukan
frequency = []
top_cumulative_frequency = []
bot_cumulative_frequency = []
relative_frequency = []
mode = []

# Menghitung frekuensi untuk setiap string unik dalam dataset
for current_class in data:
# Menghitung frekuensi dari string saat ini
current_frequency = self.dataset.count(current_class)
frequency.append(current_frequency)

# Menghitung cumulative frequency (bawah)
current_bot_cumulative_frequency = self.find_frequency_string(self.dataset, current_class)
bot_cumulative_frequency.append(current_bot_cumulative_frequency)

# Menghitung cumulative frequency (atas)
current_top_cumulative_frequency = sum(frequency) - current_bot_cumulative_frequency
top_cumulative_frequency.append(current_top_cumulative_frequency)

# Menghitung relative frequency
current_relative_frequency = np.round((current_frequency / self.length) * 100)
relative_frequency.append(current_relative_frequency)

# Menemukan modus (nilai string yang paling sering muncul)
mode_index = [i for i, val in enumerate(frequency) if val == max(frequency)]
mode = [data[i] for i in mode_index]

# Menyimpan data yang diproses ke dalam atribut simple
self.text = ProcessedData(data, None, None, None, None,
frequency, None, None, None,
bot_cumulative_frequency, top_cumulative_frequency,
relative_frequency, mode)

def find_frequency_string(self, dataset, value):
# Fungsi untuk menghitung frekuensi cumulative string dari dataset
frequency = dataset.count(value)
return frequency

# Base 5 Rounding
def roundy(self, x, base = 5):
return base * round(x/base)
Expand All @@ -184,7 +236,7 @@ def find_frequency(self, bot, top):
print(f"Error converting to int: {e}")

total_frequency = 0
for i in range(bot, top + 1):
for i in range(bot, top):
frequency = self.dataset.count(i)
total_frequency = total_frequency + frequency
return total_frequency
Expand Down
77 changes: 59 additions & 18 deletions Main.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,17 +4,29 @@
import tabulate as tabulate

# Raw Data
dataset = (1,1,1,4,6,7,3,6,7,1,2,2,5,3,1,8,3,2)
dataset = (
"Apel", "Pisang", "Jeruk", "Mangga", "Semangka",
"Melon", "Pepaya", "Nanas", "Anggur", "Stroberi",
"Durian", "Salak", "Rambutan", "Sirsak", "Alpukat",
"Jambu Biji", "Pir", "Kelengkeng", "Markisa", "Leci",
"Ceri", "Blueberry", "Raspberry", "Kedondong", "Belimbing",
"Duku", "Manggis", "Kismis", "Kelengkeng", "Cempedak",
"Srikaya", "Delima", "Kiwi", "Plum", "Kurma",
"Aprikot", "Persik", "Buah Naga", "Nangka", "Pepino"
)

# Initiate Object From The Raw Data
data = ft.FrequencyTable(dataset)

# Processing Raw Data to Frequency Grouped Frequency Table
data.PopulateSimple()
# data.PopulateGrouped() # Grouped Data
# data.PopulateSimple() # Simple Data
data.PopulateString()

# Transform The Data To A Frequency Table
# Initiating The Data Using Pandas
# df = pd.DataFrame(
# Grouped Populated Data
# dfg = pd.DataFrame(
# {
# "Class Interval" : data.grouped.ranges,
# "Class Limit" : data.grouped.limit,
Expand All @@ -29,28 +41,57 @@
# }
# )

df = pd.DataFrame(
# # Simple Populated Data
# dfs = pd.DataFrame(
# {
# "Class" : data.simple.classval,
# "Frequency" : data.simple.frequency,

# "C <" : data.simple.bottom_limit,
# "CF <" : data.simple.bottom_cumulative_frequency,
# "C >" : data.simple.top_limit,
# "CF >" : data.simple.top_cumulative_frequency,
# "Relative Frequency" : data.simple.percentage_relative_frequency
# }
# )

# Simple Populated Data
dfa = pd.DataFrame(
{
"Class" : data.simple.classval,
"Frequency" : data.simple.frequency,
"Class" : data.text.classval,
"Frequency" : data.text.frequency,

"C <" : data.simple.bottom_limit,
"CF <" : data.simple.bottom_cumulative_frequency,
"C >" : data.simple.top_limit,
"CF >" : data.simple.top_cumulative_frequency,
"Relative Frequency" : data.simple.percentage_relative_frequency
"C <" : data.text.bottom_limit,
"CF <" : data.text.bottom_cumulative_frequency,
"C >" : data.text.top_limit,
"CF >" : data.text.top_cumulative_frequency,
"Relative Frequency" : data.text.percentage_relative_frequency
}
)


# Converting Pandas Data Into Tabulate
table = tabulate.tabulate(
df,
# tablesimple = tabulate.tabulate(
# dfs,
# headers='keys',
# tablefmt='pipe'
# )

# tablegrouped = tabulate.tabulate(
# dfg,
# headers='keys',
# tablefmt='pipe',
# )

tablestring = tabulate.tabulate(
dfa,
headers='keys',
tablefmt='pipe'
)
tablefmt='pipe',
)

# Print The Processed Data
# print(tablesimple)
# print(tablegrouped)
print(tablestring)

# Print Output Data
print(table)


0 comments on commit 9592684

Please sign in to comment.