Skip to content

Commit

Permalink
refactor: Separate Each Class into Each Module
Browse files Browse the repository at this point in the history
  • Loading branch information
rscuad committed Oct 18, 2024
1 parent 1f43387 commit de4fe9a
Show file tree
Hide file tree
Showing 5 changed files with 51 additions and 94 deletions.
9 changes: 4 additions & 5 deletions Statistic_Calculation.py → Calculations.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
# Statistical Calculations Class
class Statistic_Calculation:
class Describe:
@staticmethod
def mean(dataset):
return sum(dataset) / len(dataset)
Expand All @@ -22,14 +21,14 @@ def kurtosis(dataset, mean, deviation):
n = len(dataset)
return (n * (n + 1) * sum(((x - mean) / deviation) ** 4 for x in dataset) /
((n - 1) * (n - 2) * (n - 3))) - (3 * (n - 1) ** 2) / ((n - 2) * (n - 3))

@staticmethod
def median(dataset):
sorted_data = sorted(dataset)
n = len(sorted_data)
mid = n // 2

if n % 2 == 0: # If even, return the average of the two middle numbers
if n % 2 == 0:
return (sorted_data[mid - 1] + sorted_data[mid]) / 2
else: # If odd, return the middle number
else:
return sorted_data[mid]
98 changes: 21 additions & 77 deletions FrequencyTable.py
Original file line number Diff line number Diff line change
@@ -1,85 +1,31 @@
import numpy as np
from Calculations import Describe
from Summary import Result

class StatisticalCalculations:
@staticmethod
def mean(dataset):
return sum(dataset) / len(dataset)

@staticmethod
def variance(dataset, mean):
return sum((x - mean) ** 2 for x in dataset) / len(dataset)

@staticmethod
def standard_deviation(variance):
return variance ** 0.5

@staticmethod
def skewness(dataset, mean, deviation):
n = len(dataset)
return (n / ((n - 1) * (n - 2))) * sum(((x - mean) / deviation) ** 3 for x in dataset)

@staticmethod
def kurtosis(dataset, mean, deviation):
n = len(dataset)
return (n * (n + 1) * sum(((x - mean) / deviation) ** 4 for x in dataset) /
((n - 1) * (n - 2) * (n - 3))) - (3 * (n - 1) ** 2) / ((n - 2) * (n - 3))

@staticmethod
def median(dataset):
sorted_data = sorted(dataset)
n = len(sorted_data)
mid = n // 2

if n % 2 == 0: # If even, return the average of the two middle numbers
return (sorted_data[mid - 1] + sorted_data[mid]) / 2
else: # If odd, return the middle number
return sorted_data[mid]

# Processed Data Assignment
class ProcessedData:
def __init__(self, data, bot, top, bot_L, top_L, F, R, L, M, bot_CF, top_CF, RF, mode):
self.top = top
self.limit = L
self.ranges = R
self.bottom = bot
self.midpoint = M
self.frequency = F
self.classval = data
self.top_limit = top_L
self.bottom_limit = bot_L
self.mode = mode
self.bottom_cumulative_frequency = bot_CF
self.top_cumulative_frequency = top_CF
self.relative_frequency = RF
self.percentage_relative_frequency = [f"{rf:.2f}%" for rf in self.relative_frequency]

# Frequency Table Class
class FrequencyTable:
class Classify:
def __init__(self, dataset):
# Check for mixed data types
if any(isinstance(item, str) for item in dataset) and any(isinstance(item, (int, float)) for item in dataset):
raise ValueError("Data is corrupted: contains both numeric and string values.")

# Data Initiation
self.dataset = sorted(dataset)
self.length = len(dataset)
self.lowest = min(dataset) if isinstance(dataset[0], (int, float)) else None
self.highest = max(dataset) if isinstance(dataset[0], (int, float)) else None

if self.lowest is not None: # Only calculate classes for numeric data
self.calculate_statistics()
self.calculate_classes()
if self.lowest is not None:
self._calculate_statistics()
self._calculate_classes()

def calculate_statistics(self):
def _calculate_statistics(self):
self.sum = sum(self.dataset)
self.mean = StatisticalCalculations.mean(self.dataset)
self.median = StatisticalCalculations.median(self.dataset)
self.variance = StatisticalCalculations.variance(self.dataset, self.mean)
self.deviation = StatisticalCalculations.standard_deviation(self.variance)
self.skewness = StatisticalCalculations.skewness(self.dataset, self.mean, self.deviation)
self.kurtosis = StatisticalCalculations.kurtosis(self.dataset, self.mean, self.deviation)

def calculate_classes(self):
self.mean = Describe.mean(self.dataset)
self.median = Describe.median(self.dataset)
self.variance = Describe.variance(self.dataset, self.mean)
self.deviation = Describe.standard_deviation(self.variance)
self.skewness = Describe.skewness(self.dataset, self.mean, self.deviation)
self.kurtosis = Describe.kurtosis(self.dataset, self.mean, self.deviation)

def _calculate_classes(self):
self.classes = 1 + (3.222 * np.log10(self.length))
self.classes = round(self.classes - 0.5)
self.range = self.highest - self.lowest
Expand Down Expand Up @@ -113,8 +59,9 @@ def populate_grouped(self):

# Initiating Variables for Frequency Table
current_number = self.base - 1
top_cumulative_freq = 1

while True:
while top_cumulative_freq != 0:
old_number = current_number + 1
self.bottom.append(old_number)

Expand Down Expand Up @@ -144,15 +91,12 @@ def populate_grouped(self):
current_relative_frequency = np.round((current_frequency / self.length) * 100)
self.relative_frequency.append(current_relative_frequency)

if current_frequency == 0:
break

# Find Mode
mode_index = [i for i, val in enumerate(self.frequency) if val == max(self.frequency)]
self.mode = [self.data_range[i] for i in mode_index]

# Create ProcessedData object
self.grouped = ProcessedData(None, self.bottom, self.top, self.bottom_limit, self.top_limit,
# Create Result object
self.grouped = Result(None, self.bottom, self.top, self.bottom_limit, self.top_limit,
self.frequency, self.data_range, self.data_limit, self.data_midpoint,
self.bot_cumulative_frequency, self.top_cumulative_frequency,
self.relative_frequency, self.mode,
Expand Down Expand Up @@ -184,8 +128,8 @@ def populate_simple(self):
mode_index = [i for i, val in enumerate(self.frequency) if val == max(self.frequency)]
self.mode = [unique_data[i] for i in mode_index]

# Create ProcessedData object
self.simple = ProcessedData(
# Create Result object
self.simple = Result(
unique_data, None, None, self.bottom_limit, self.top_limit,
self.frequency, None, None, None,
self.bot_cumulative_frequency, self.top_cumulative_frequency,
Expand Down
6 changes: 3 additions & 3 deletions Main.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# EXAMPLE PROGRAM
import FrequencyTable as ft
from FrequencyTable import Classify
import tabulate as tabulate
import Transform as tf
import pandas as pd
Expand All @@ -18,9 +18,8 @@
12.9, 12.8, 12.7, 12.6, 12.5, 12.4
]


# Initiate Object From The Raw Data
data = ft.FrequencyTable(dataset)
data = Classify(dataset)
data.populate_simple() # Simple Data

# Simple Populated Data
Expand Down Expand Up @@ -67,6 +66,7 @@
headers='keys',
tablefmt='pipe',
)
print(data.interval)
print(tablegrouped)
print(data.grouped.mode)
print(data.mean)
Expand Down
16 changes: 7 additions & 9 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,26 +2,24 @@
> My Statistic Tools made with Python
### Features
- Frequency Table
- Descriptive Statistics
- Display Chart
- Data Transformation
- Auto Classify Data Into Frequency Table
- Descriptive Statistics Calculation
- Many Data Transformation Calculation

### Required
- Matplotlib
- Matplotlib_Venn
- Numpy
- Tabulate
- Pandas
- Scipy
- Tabulate ( Optional for printing Frequency Table )
- Pandas ( Optional for printing Frequency Table )

### Structure
- Program Structure

+ Main.py [ Main Control file ]
+ FrequencyTable.py [ Frequency Table Module ]
+ Chart.py [ Chart Display Module ]
+ Transform.py [ Data Transformation Module ]
+ Calculations.py [ Descriptive Statistic Module ]
+ Summary.py [ Processed Data Append in Here ]

### How to Use
- For detailed Documentation, please visit [Pythistic Wiki](https://github.com/brotherzhafif/Pythistic/wiki)
Expand Down
16 changes: 16 additions & 0 deletions Summary.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
class Result:
def __init__(self, data, bot, top, bot_L, top_L, F, R, L, M, bot_CF, top_CF, RF, mode):
self.top = top
self.limit = L
self.ranges = R
self.bottom = bot
self.midpoint = M
self.frequency = F
self.classval = data
self.top_limit = top_L
self.bottom_limit = bot_L
self.mode = mode
self.bottom_cumulative_frequency = bot_CF
self.top_cumulative_frequency = top_CF
self.relative_frequency = RF
self.percentage_relative_frequency = [f"{rf:.2f}%" for rf in self.relative_frequency]

0 comments on commit de4fe9a

Please sign in to comment.