refactor: Combining Simple and String Populate into Simple Populate

brotherzhafif · Oct 13, 2024 · 02e139c · 02e139c
1 parent 9592684
commit 02e139c
Show file tree

Hide file tree

Showing 3 changed files with 214 additions and 166 deletions.
diff --git a/Example.py b/Example.py
@@ -0,0 +1,97 @@
+# EXAMPLE PROGRAM
+import FrequencyTable as ft
+import pandas as pd
+import tabulate as tabulate
+
+# Raw Data
+dataset = (
+           "Apel", "Pisang", "Jeruk", "Mangga", "Semangka", 
+    "Melon", "Pepaya", "Nanas", "Anggur", "Stroberi",
+    "Durian", "Salak", "Rambutan", "Sirsak", "Alpukat",
+    "Jambu Biji", "Pir", "Kelengkeng", "Markisa", "Leci",
+    "Ceri", "Blueberry", "Raspberry", "Kedondong", "Belimbing",
+    "Duku", "Manggis", "Kismis", "Kelengkeng", "Cempedak",
+    "Srikaya", "Delima", "Kiwi", "Plum", "Kurma", 
+    "Aprikot", "Persik", "Buah Naga", "Nangka", "Pepino"
+)
+
+# Initiate Object From The Raw Data
+data = ft.FrequencyTable(dataset)
+
+# Processing Raw Data to Frequency Grouped Frequency Table
+data.PopulateGrouped() # Grouped Data
+data.PopulateSimple() # Simple Data
+data.PopulateString() # String Data
+
+# Transform The Data To A Frequency Table
+# Initiating The Data Using Pandas
+# Grouped Populated Data
+dfg = pd.DataFrame(
+    {
+        "Class Interval" : data.grouped.ranges,
+        "Class Limit" : data.grouped.limit,
+        "Frequency" : data.grouped.frequency,
+        "Midpoint" : data.grouped.midpoint,
+
+        "C <" : data.grouped.bottom_limit,
+        "CF <" : data.grouped.bottom_cumulative_frequency,
+        "C >" : data.grouped.top_limit,
+        "CF >" : data.grouped.top_cumulative_frequency,
+        "Relative Frequency" : data.grouped.percentage_relative_frequency
+    }
+)
+
+# # Simple Populated Data
+dfs = pd.DataFrame(
+    {
+        "Class" : data.simple.classval,
+        "Frequency" : data.simple.frequency,
+
+        "C <" : data.simple.bottom_limit,
+        "CF <" : data.simple.bottom_cumulative_frequency,
+        "C >" : data.simple.top_limit,
+        "CF >" : data.simple.top_cumulative_frequency,
+        "Relative Frequency" : data.simple.percentage_relative_frequency
+    }
+)
+
+# Simple Populated Data
+dfa = pd.DataFrame(
+    {
+        "Class" : data.text.classval,
+        "Frequency" : data.text.frequency,
+
+        "C <" : data.text.bottom_limit,
+        "CF <" : data.text.bottom_cumulative_frequency,
+        "C >" : data.text.top_limit,
+        "CF >" : data.text.top_cumulative_frequency,
+        "Relative Frequency" : data.text.percentage_relative_frequency
+    }
+)
+
+# Converting Pandas Data Into Tabulate
+tablesimple = tabulate.tabulate(
+    dfs,
+    headers='keys',
+    tablefmt='pipe'
+) 
+
+tablegrouped = tabulate.tabulate(
+    dfg,
+    headers='keys',
+    tablefmt='pipe',
+)
+
+tablestring = tabulate.tabulate(
+    dfa,
+    headers='keys',
+    tablefmt='pipe',
+)
+
+# Print The Processed Data
+print(tablesimple)
+print(tablegrouped)
+print(tablestring)
+
+
+
diff --git a/FrequencyTable.py b/FrequencyTable.py
@@ -4,19 +4,21 @@
 # Frequency Table Class 
 class FrequencyTable:
     def __init__(self, dataset):
+        # Check for mixed data types (both numeric and string)
+        if any(isinstance(item, str) for item in dataset) and any(isinstance(item, (int, float)) for item in dataset):
+            raise ValueError("Data is corrupted: contains both numeric and string values.")
+
         # Data Initiation
         self.dataset = sorted(dataset)
         self.length = len(dataset)
-        self.lowest = min(dataset)
-        self.highest = max(dataset)
-
-         # Classes is Rounding Down
-        # Math Log Base 10 In Python For Accurate Result
-        self.classes = 1 + (3.222 * np.log10(self.length))
-        self.classes = round(self.classes - 0.5)
-
-        # Condition if the data is contain string
-        if not any(isinstance(item, str) for item in self.dataset):  
+        self.lowest = min(dataset) if isinstance(dataset[0], (int, float)) else None
+        self.highest = max(dataset) if isinstance(dataset[0], (int, float)) else None
+
+        # Classes is Rounding Down
+        if self.lowest is not None:  # Only calculate classes for numeric data
+            self.classes = 1 + (3.222 * np.log10(self.length))
+            self.classes = round(self.classes - 0.5)
+
             # Sum of the data and range
             self.sum = sum(dataset)
             self.range = self.highest - self.lowest
@@ -25,10 +27,10 @@ def __init__(self, dataset):
             self.interval = self.range / self.classes 
             self.interval = round(self.interval + 0.5)
 
-            # Rounding Both Limit So The Data Would Be Simple And Easier To Read
+            # Rounding Both Limits So The Data Would Be Simple And Easier To Read
             self.base = self.roundy(self.lowest - 3)
             self.top = self.roundy(self.highest + 3)
-            
+
             # Mean or Average
             self.mean = (self.sum / self.length)
 
@@ -37,14 +39,34 @@ def __init__(self, dataset):
 
             # Formula for Standard Deviation
             self.deviation = (self.variance ** 0.5)
-            
+
             # Formula to find Dataset Skewness
-            self.skewness = (self.length / ((self.length - 1) * (self.length - 2))) * sum(((x - self.mean) / self.deviation) ** 3 for x in self.dataset)
+            self.skewness = (self.length / ((self.length - 1) * (self.length - 2))) * \
+                            sum(((x - self.mean) / self.deviation) ** 3 for x in self.dataset)
 
             # Formula to find Dataset Kurtosis
-            self.kurtosis = (self.length * (self.length + 1) * sum(((x - self.mean) / self.deviation) ** 4 for x in self.dataset) / ((self.length - 1) * (self.length - 2) * (self.length - 3))) - \
-                    (3 * (self.length - 1) ** 2) / ((self.length - 2) * (self.length - 3))
-
+            self.kurtosis = (self.length * (self.length + 1) * sum(((x - self.mean) / self.deviation) ** 4 for x in self.dataset) / 
+                            ((self.length - 1) * (self.length - 2) * (self.length - 3))) - \
+                            (3 * (self.length - 1) ** 2) / ((self.length - 2) * (self.length - 3))
+
+    # Base 5 Rounding
+    def roundy(self, x, base=5):
+        return base * round(x / base)
+
+    # Function To Find Frequency in Dataset with Desired Range (Top and Down Limit)
+    def find_frequency(self, bot, top):
+        try:
+            bot = int(bot)
+            top = int(top)
+        except (ValueError, TypeError) as e:
+            print(f"Error converting to int: {e}")
+
+        total_frequency = 0
+        for i in range(bot, top):
+            frequency = self.dataset.count(i)
+            total_frequency = total_frequency + frequency
+        return total_frequency
+
     # Populate Grouped Table Frequency Data Method
     def PopulateGrouped(self):
         # Initiating Used List
@@ -127,120 +149,72 @@ def PopulateGrouped(self):
 
     # Populate Simple Table Frequency Data Method    
     def PopulateSimple(self):
-        # Deleting Duplicate and Sort the Data
-        data = sorted(set(self.dataset))
-
-        # Initiating Used Variable
-        top_limit = []
-        bottom_limit = []
-        frequency = []
-        top_cumulative_frequency = []
-        bot_cumulative_frequency = []
-        relative_frequency = []
-        mode = []
-
-        for current_class in data:
-            # Bottom Limit of the Class
-            current_top_limit = current_class + 0.5
-            current_bottom_limit = current_class - 0.5
-
-            # Top Limit of the Class
-            top_limit.append(current_top_limit)
-            bottom_limit.append(current_bottom_limit)
-
-            # Calculate Current Class Frequency 
-            current_frequency = self.dataset.count(current_class)
-            frequency.append(current_frequency)
-
-            # Calculate Current Class Bottom Cumulative Frequency
-            current_bot_cumulative_frequency = self.find_frequency(self.lowest -1 , current_class)
-            bot_cumulative_frequency.append(current_bot_cumulative_frequency)
-
-            # Calculate Current Class Top Cumulative Frequency
-            current_top_cumulative_frequency = self.find_frequency(current_class + 1, self.highest + 1)
-            top_cumulative_frequency.append(current_top_cumulative_frequency)
-
-            # Calculate Current Class Relative Frequency 
-            current_relative_frequency = np.round((current_frequency / self.length) * 100)
-            relative_frequency.append(current_relative_frequency)
-
-        # Temukan modus
-        mode_index = [i for i, val in enumerate(frequency) if val == max(frequency)]
-        mode = [data[i] for i in mode_index]
-
-        # Buat objek ProcessedData
-        self.simple = ProcessedData(data, None, None, bottom_limit, top_limit, 
-                                    frequency, None, None, None, 
-                                    bot_cumulative_frequency, top_cumulative_frequency, 
-                                    relative_frequency, mode)
-
-    # Populate Simple String Table Frequency Data Method 
-    def PopulateString(self):
-        # Memastikan bahwa dataset berisi string
+        # Initialize general variables
+        data = sorted(set(self.dataset))  # Remove duplicates and sort the data
+        frequency = []  # To store the frequency of each class
+        top_cumulative_frequency = []  # To store top cumulative frequency for each class
+        bot_cumulative_frequency = []  # To store bottom cumulative frequency for each class
+        relative_frequency = []  # To store relative frequency for each class
+        mode = []  # To store the mode(s)
+
+        # Variables specifically for numeric data
+        top_limit = None
+        bottom_limit = None
+
+        # Check if the dataset is not entirely string-based (for numeric data)
         if not all(isinstance(item, str) for item in self.dataset):
-            raise ValueError("Dataset harus berisi string saja untuk menggunakan PopulateString.")
-
-        # Menghapus duplikat dan mengurutkan data secara alfabetis
-        data = sorted(set(self.dataset))
-
-        # Variabel yang diperlukan
-        frequency = []
-        top_cumulative_frequency = []
-        bot_cumulative_frequency = []
-        relative_frequency = []
-        mode = []
+            # Initialize limits for numeric data
+            top_limit = []
+            bottom_limit = []
 
-        # Menghitung frekuensi untuk setiap string unik dalam dataset
+        # Single loop to process both numeric and string data
         for current_class in data:
-            # Menghitung frekuensi dari string saat ini
+            # Calculate the frequency of the current class
             current_frequency = self.dataset.count(current_class)
             frequency.append(current_frequency)
 
-            # Menghitung cumulative frequency (bawah)
-            current_bot_cumulative_frequency = self.find_frequency_string(self.dataset, current_class)
-            bot_cumulative_frequency.append(current_bot_cumulative_frequency)
-
-            # Menghitung cumulative frequency (atas)
-            current_top_cumulative_frequency = sum(frequency) - current_bot_cumulative_frequency
-            top_cumulative_frequency.append(current_top_cumulative_frequency)
-
-            # Menghitung relative frequency
+            # Calculate the relative frequency for the current class
             current_relative_frequency = np.round((current_frequency / self.length) * 100)
             relative_frequency.append(current_relative_frequency)
 
-        # Menemukan modus (nilai string yang paling sering muncul)
+            # If the data is numeric, calculate limits and cumulative frequencies
+            if top_limit is not None and bottom_limit is not None:
+                # Calculate top and bottom limits for numeric data
+                current_top_limit = current_class + 0.5
+                current_bottom_limit = current_class - 0.5
+                top_limit.append(current_top_limit)
+                bottom_limit.append(current_bottom_limit)
+
+                # Calculate bottom cumulative frequency for numeric data
+                current_bot_cumulative_frequency = self.find_frequency(self.lowest - 1, current_class)
+                bot_cumulative_frequency.append(current_bot_cumulative_frequency)
+
+                # Calculate top cumulative frequency for numeric data
+                current_top_cumulative_frequency = self.find_frequency(current_class + 1, self.highest + 1)
+                top_cumulative_frequency.append(current_top_cumulative_frequency)
+
+            else:
+                # If the data is string-based, calculate cumulative frequencies
+                # Calculate bottom cumulative frequency for strings
+                current_bot_cumulative_frequency = self.dataset.count(current_class)
+                bot_cumulative_frequency.append(current_bot_cumulative_frequency)
+
+                # Calculate top cumulative frequency for strings
+                current_top_cumulative_frequency = sum(frequency) - current_bot_cumulative_frequency
+                top_cumulative_frequency.append(current_top_cumulative_frequency)
+
+        # Find the mode (the class with the highest frequency)
         mode_index = [i for i, val in enumerate(frequency) if val == max(frequency)]
         mode = [data[i] for i in mode_index]
 
-        # Menyimpan data yang diproses ke dalam atribut simple
-        self.text = ProcessedData(data, None, None, None, None, 
-                                         frequency, None, None, None, 
-                                         bot_cumulative_frequency, top_cumulative_frequency, 
-                                         relative_frequency, mode)
+        # Create the ProcessedData object based on the data type
+        self.simple = ProcessedData(
+            data, None, None, bottom_limit, top_limit, 
+            frequency, None, None, None, 
+            bot_cumulative_frequency, top_cumulative_frequency, 
+            relative_frequency, mode
+        )
 
-    def find_frequency_string(self, dataset, value):
-        # Fungsi untuk menghitung frekuensi cumulative string dari dataset
-        frequency = dataset.count(value)
-        return frequency
-
-    # Base 5 Rounding
-    def roundy(self, x, base = 5):
-        return base * round(x/base)
-
-    # Function To Find Frequency in Dataset with Desired Range (Top and Down Limit)
-    def find_frequency(self, bot, top):
-        try:
-            bot = int(bot)
-            top = int(top)
-        except (ValueError, TypeError) as e:
-            print(f"Error converting to int: {e}")
-
-        total_frequency = 0
-        for i in range(bot, top):
-            frequency = self.dataset.count(i)
-            total_frequency = total_frequency + frequency
-        return total_frequency
-
 # Processed Data Assignment 
 class ProcessedData:
     # Limit (L), Frequency (F), Ranges (R), Midpoint (M), Cumulative (C), Relative (R)