feat: Adding Populate Text Frequency Table

brotherzhafif · Oct 13, 2024 · 9592684 · 9592684
1 parent b929b19
commit 9592684
Show file tree

Hide file tree

Showing 2 changed files with 145 additions and 52 deletions.
diff --git a/FrequencyTable.py b/FrequencyTable.py
@@ -6,43 +6,45 @@ class FrequencyTable:
     def __init__(self, dataset):
         # Data Initiation
         self.dataset = sorted(dataset)
-        self.sum = sum(dataset)
         self.length = len(dataset)
         self.lowest = min(dataset)
         self.highest = max(dataset)
-
-        # Counting Data Range
-        self.range = self.highest - self.lowest 
-
-        # Classes is Rounding Down
+
+         # Classes is Rounding Down
         # Math Log Base 10 In Python For Accurate Result
         self.classes = 1 + (3.222 * np.log10(self.length))
         self.classes = round(self.classes - 0.5)
+
+        # Condition if the data is contain string
+        if not any(isinstance(item, str) for item in self.dataset):  
+            # Sum of the data and range
+            self.sum = sum(dataset)
+            self.range = self.highest - self.lowest
 
-        # Interval is Rounding Up
-        self.interval = self.range / self.classes 
-        self.interval = round(self.interval + 0.5)
+            # Interval is Rounding Up
+            self.interval = self.range / self.classes 
+            self.interval = round(self.interval + 0.5)
 
-        # Rounding Both Limit So The Data Would Be Simple And Easier To Read
-        self.base = self.roundy(self.lowest - 3)
-        self.top = self.roundy(self.highest + 3)
-
-        # Mean or Average
-        self.mean = (self.sum / self.length)
+            # Rounding Both Limit So The Data Would Be Simple And Easier To Read
+            self.base = self.roundy(self.lowest - 3)
+            self.top = self.roundy(self.highest + 3)
+            
+            # Mean or Average
+            self.mean = (self.sum / self.length)
 
-        # Formula for Variance
-        self.variance = sum((x - self.mean) ** 2 for x in dataset) / self.length
+            # Formula for Variance
+            self.variance = sum((x - self.mean) ** 2 for x in dataset) / self.length
 
-        # Formula for Standard Deviation
-        self.deviation = (self.variance ** 0.5)
-
-        # Formula to find Dataset Skewness
-        self.skewness = (self.length / ((self.length - 1) * (self.length - 2))) * sum(((x - self.mean) / self.deviation) ** 3 for x in self.dataset)
+            # Formula for Standard Deviation
+            self.deviation = (self.variance ** 0.5)
+            
+            # Formula to find Dataset Skewness
+            self.skewness = (self.length / ((self.length - 1) * (self.length - 2))) * sum(((x - self.mean) / self.deviation) ** 3 for x in self.dataset)
 
-        # Formula to find Dataset  
-        self.kurtosis = (self.length * (self.length + 1) * sum(((x - self.mean) / self.deviation) ** 4 for x in self.dataset) / ((self.length - 1) * (self.length - 2) * (self.length - 3))) - \
-                (3 * (self.length - 1) ** 2) / ((self.length - 2) * (self.length - 3))
-                   
+            # Formula to find Dataset Kurtosis
+            self.kurtosis = (self.length * (self.length + 1) * sum(((x - self.mean) / self.deviation) ** 4 for x in self.dataset) / ((self.length - 1) * (self.length - 2) * (self.length - 3))) - \
+                    (3 * (self.length - 1) ** 2) / ((self.length - 2) * (self.length - 3))
+
     # Populate Grouped Table Frequency Data Method
     def PopulateGrouped(self):
         # Initiating Used List
@@ -85,7 +87,7 @@ def PopulateGrouped(self):
             top_limit.append(current_top_limit)
 
             # Finding The Frequency That Range
-            current_frequency = self.find_frequency(old_number, current_number)
+            current_frequency = self.find_frequency(old_number, current_number + 1)
             frequency.append(current_frequency)
 
             # Adding The Number Range From Both Frequency
@@ -101,11 +103,11 @@ def PopulateGrouped(self):
             data_midpoint.append(current_data_midpoint)
 
             # Adding Bottom Cumulative Frequency of The Class 
-            current_bot_cumulative_frequency = self.find_frequency(self.lowest, old_number)
+            current_bot_cumulative_frequency = self.find_frequency(self.lowest - 1, old_number)
             bot_cumulative_frequency.append(current_bot_cumulative_frequency)
 
             # Adding Top Cumulative Frequency of The Class 
-            current_top_cumulative_frequency = self.find_frequency(old_number, self.highest)
+            current_top_cumulative_frequency = self.find_frequency(current_number + 1, self.highest + 1)
             top_cumulative_frequency.append(current_top_cumulative_frequency)
 
             # Counting the Relative Frequency in Percentage
@@ -122,7 +124,8 @@ def PopulateGrouped(self):
                                      frequency, data_range, data_limit, data_midpoint, 
                                      bot_cumulative_frequency, top_cumulative_frequency, 
                                      relative_frequency, mode)
-
+
+    # Populate Simple Table Frequency Data Method    
     def PopulateSimple(self):
         # Deleting Duplicate and Sort the Data
         data = sorted(set(self.dataset))
@@ -150,11 +153,11 @@ def PopulateSimple(self):
             frequency.append(current_frequency)
 
             # Calculate Current Class Bottom Cumulative Frequency
-            current_bot_cumulative_frequency = self.find_frequency(self.lowest, current_class)
+            current_bot_cumulative_frequency = self.find_frequency(self.lowest -1 , current_class)
             bot_cumulative_frequency.append(current_bot_cumulative_frequency)
 
             # Calculate Current Class Top Cumulative Frequency
-            current_top_cumulative_frequency = self.find_frequency(current_class, self.highest)
+            current_top_cumulative_frequency = self.find_frequency(current_class + 1, self.highest + 1)
             top_cumulative_frequency.append(current_top_cumulative_frequency)
 
             # Calculate Current Class Relative Frequency 
@@ -170,7 +173,56 @@ def PopulateSimple(self):
                                     frequency, None, None, None, 
                                     bot_cumulative_frequency, top_cumulative_frequency, 
                                     relative_frequency, mode)
-
+
+    # Populate Simple String Table Frequency Data Method 
+    def PopulateString(self):
+        # Memastikan bahwa dataset berisi string
+        if not all(isinstance(item, str) for item in self.dataset):
+            raise ValueError("Dataset harus berisi string saja untuk menggunakan PopulateString.")
+
+        # Menghapus duplikat dan mengurutkan data secara alfabetis
+        data = sorted(set(self.dataset))
+
+        # Variabel yang diperlukan
+        frequency = []
+        top_cumulative_frequency = []
+        bot_cumulative_frequency = []
+        relative_frequency = []
+        mode = []
+
+        # Menghitung frekuensi untuk setiap string unik dalam dataset
+        for current_class in data:
+            # Menghitung frekuensi dari string saat ini
+            current_frequency = self.dataset.count(current_class)
+            frequency.append(current_frequency)
+
+            # Menghitung cumulative frequency (bawah)
+            current_bot_cumulative_frequency = self.find_frequency_string(self.dataset, current_class)
+            bot_cumulative_frequency.append(current_bot_cumulative_frequency)
+
+            # Menghitung cumulative frequency (atas)
+            current_top_cumulative_frequency = sum(frequency) - current_bot_cumulative_frequency
+            top_cumulative_frequency.append(current_top_cumulative_frequency)
+
+            # Menghitung relative frequency
+            current_relative_frequency = np.round((current_frequency / self.length) * 100)
+            relative_frequency.append(current_relative_frequency)
+
+        # Menemukan modus (nilai string yang paling sering muncul)
+        mode_index = [i for i, val in enumerate(frequency) if val == max(frequency)]
+        mode = [data[i] for i in mode_index]
+
+        # Menyimpan data yang diproses ke dalam atribut simple
+        self.text = ProcessedData(data, None, None, None, None, 
+                                         frequency, None, None, None, 
+                                         bot_cumulative_frequency, top_cumulative_frequency, 
+                                         relative_frequency, mode)
+
+    def find_frequency_string(self, dataset, value):
+        # Fungsi untuk menghitung frekuensi cumulative string dari dataset
+        frequency = dataset.count(value)
+        return frequency
+
     # Base 5 Rounding
     def roundy(self, x, base = 5):
         return base * round(x/base)
@@ -184,7 +236,7 @@ def find_frequency(self, bot, top):
             print(f"Error converting to int: {e}")
 
         total_frequency = 0
-        for i in range(bot, top + 1):
+        for i in range(bot, top):
             frequency = self.dataset.count(i)
             total_frequency = total_frequency + frequency
         return total_frequency

diff --git a/Main.py b/Main.py
@@ -4,17 +4,29 @@
 import tabulate as tabulate
 
 # Raw Data
-dataset = (1,1,1,4,6,7,3,6,7,1,2,2,5,3,1,8,3,2)
+dataset = (
+           "Apel", "Pisang", "Jeruk", "Mangga", "Semangka", 
+    "Melon", "Pepaya", "Nanas", "Anggur", "Stroberi",
+    "Durian", "Salak", "Rambutan", "Sirsak", "Alpukat",
+    "Jambu Biji", "Pir", "Kelengkeng", "Markisa", "Leci",
+    "Ceri", "Blueberry", "Raspberry", "Kedondong", "Belimbing",
+    "Duku", "Manggis", "Kismis", "Kelengkeng", "Cempedak",
+    "Srikaya", "Delima", "Kiwi", "Plum", "Kurma", 
+    "Aprikot", "Persik", "Buah Naga", "Nangka", "Pepino"
+)
 
 # Initiate Object From The Raw Data
 data = ft.FrequencyTable(dataset)
 
 # Processing Raw Data to Frequency Grouped Frequency Table
-data.PopulateSimple()
+# data.PopulateGrouped() # Grouped Data
+# data.PopulateSimple() # Simple Data
+data.PopulateString()
 
 # Transform The Data To A Frequency Table
 # Initiating The Data Using Pandas
-# df = pd.DataFrame(
+# Grouped Populated Data
+# dfg = pd.DataFrame(
 #     {
 #         "Class Interval" : data.grouped.ranges,
 #         "Class Limit" : data.grouped.limit,
@@ -29,28 +41,57 @@
 #     }
 # )
 
-df = pd.DataFrame(
+# # Simple Populated Data
+# dfs = pd.DataFrame(
+#     {
+#         "Class" : data.simple.classval,
+#         "Frequency" : data.simple.frequency,
+
+#         "C <" : data.simple.bottom_limit,
+#         "CF <" : data.simple.bottom_cumulative_frequency,
+#         "C >" : data.simple.top_limit,
+#         "CF >" : data.simple.top_cumulative_frequency,
+#         "Relative Frequency" : data.simple.percentage_relative_frequency
+#     }
+# )
+
+# Simple Populated Data
+dfa = pd.DataFrame(
     {
-        "Class" : data.simple.classval,
-        "Frequency" : data.simple.frequency,
+        "Class" : data.text.classval,
+        "Frequency" : data.text.frequency,
 
-        "C <" : data.simple.bottom_limit,
-        "CF <" : data.simple.bottom_cumulative_frequency,
-        "C >" : data.simple.top_limit,
-        "CF >" : data.simple.top_cumulative_frequency,
-        "Relative Frequency" : data.simple.percentage_relative_frequency
+        "C <" : data.text.bottom_limit,
+        "CF <" : data.text.bottom_cumulative_frequency,
+        "C >" : data.text.top_limit,
+        "CF >" : data.text.top_cumulative_frequency,
+        "Relative Frequency" : data.text.percentage_relative_frequency
     }
 )
 
-
 # Converting Pandas Data Into Tabulate
-table = tabulate.tabulate(
-    df,
+# tablesimple = tabulate.tabulate(
+#     dfs,
+#     headers='keys',
+#     tablefmt='pipe'
+# ) 
+
+# tablegrouped = tabulate.tabulate(
+#     dfg,
+#     headers='keys',
+#     tablefmt='pipe',
+# )
+
+tablestring = tabulate.tabulate(
+    dfa,
     headers='keys',
-    tablefmt='pipe'
-) 
+    tablefmt='pipe',
+)
+
+# Print The Processed Data
+# print(tablesimple)
+# print(tablegrouped)
+print(tablestring)
 
-# Print Output Data
-print(table)