Skip to content

Commit

Permalink
feat: Uploading as Library to PyPI
Browse files Browse the repository at this point in the history
  • Loading branch information
brotherzhafif committed Oct 15, 2024
1 parent 5a550df commit 11e24a5
Show file tree
Hide file tree
Showing 6 changed files with 494 additions and 0 deletions.
21 changes: 21 additions & 0 deletions LICENSE
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
MIT License

Copyright (c) [2024] [BrotherZhafif]

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
109 changes: 109 additions & 0 deletions pythistic/Chart.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
# Chart.py
import matplotlib.pyplot as plt
import numpy as np
from matplotlib_venn import venn2, venn3

class Chart:
def __init__(self, title="", xlabel="", ylabel=""):
self.title = title
self.xlabel = xlabel
self.ylabel = ylabel
self.figure = None

def _apply_common_properties(self):
if self.title:
plt.title(self.title)
if self.xlabel:
plt.xlabel(self.xlabel)
if self.ylabel:
plt.ylabel(self.ylabel)

def box(self, x_values, y_values, is_range=False):
self.figure = plt.figure(figsize=(10, 6))
bar_width = 0.5
indices = range(len(y_values))

plt.bar(indices, y_values, width=bar_width, alpha=0.7, color='b')

if is_range:
plt.xticks(indices, x_values) # Use ranges as labels
else:
plt.xticks(indices, [str(x) for x in x_values])

self._apply_common_properties()
plt.grid(axis='y')

def line(self, x_values, y_values, is_range=False):
self.figure = plt.figure(figsize=(10, 6))
if is_range:
x_values = [midpoint for midpoint in x_values] # Use midpoints for line plot

plt.plot(x_values, y_values, marker='o')
self._apply_common_properties()
plt.grid()

def scatter(self, x_values, y_values, is_range=False):
self.figure = plt.figure(figsize=(10, 6))
if is_range:
x_values = [midpoint for midpoint in x_values] # Use midpoints for scatter plot

plt.scatter(x_values, y_values, alpha=0.6, edgecolors='w', s=100)
self._apply_common_properties()
plt.grid()

def pie(self, data, labels):
# Prepare a pie chart to show percentage distribution.
self.figure = plt.figure(figsize=(8, 8))
plt.pie(data, labels=labels, autopct='%1.1f%%', startangle=140)
if self.title:
plt.title(self.title)

def heatmap(self, data, annot=True, cmap='viridis'):
# Prepare a heatmap for visualizing a 2D matrix data.
self.figure = plt.figure(figsize=(12, 8))
plt.imshow(data, cmap=cmap, aspect='auto')
if annot:
for (i, j), val in np.ndenumerate(data):
plt.text(j, i, f'{val}', ha='center', va='center', color='white')
self._apply_common_properties()
plt.colorbar() # Show color scale.

def venn(self, sets, set_labels):
# Prepare a Venn diagram for visualizing the overlap between two or three sets.
self.figure = plt.figure(figsize=(8, 8))
if len(sets) == 2:
venn2(sets, set_labels)
elif len(sets) == 3:
venn3(sets, set_labels)
else:
raise ValueError("Only 2 or 3 sets can be displayed in a Venn diagram.")
if self.title:
plt.title(self.title)

def pareto(self, data, labels):
# Prepare a Pareto chart with bars representing values and a line showing the cumulative percentage.
# Sort data in descending order.
sorted_data = sorted(zip(data, labels), reverse=True)
data, labels = zip(*sorted_data)
cumulative_percentage = [sum(data[:i + 1]) / sum(data) * 100 for i in range(len(data))]

self.figure, ax1 = plt.subplots(figsize=(12, 8))
ax1.bar(labels, data, color='b', alpha=0.6)
ax1.set_xlabel(self.xlabel)
ax1.set_ylabel(self.ylabel)

# Plot cumulative percentage line.
ax2 = ax1.twinx()
ax2.plot(labels, cumulative_percentage, color='r', marker='D', linestyle='-', linewidth=2)
ax2.set_ylabel('Cumulative Percentage')
ax2.yaxis.set_major_formatter(plt.FuncFormatter(lambda y, _: f'{y:.0f}%'))

if self.title:
ax1.set_title(self.title)

def show(self):
# Display the prepared chart.
if self.figure:
plt.show()
else:
print("No chart has been prepared. Please call a chart method first.")
259 changes: 259 additions & 0 deletions pythistic/FrequencyTable.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,259 @@
import numpy as np

# Global Variable Used in Frequency Table Data Processing
top = []
bottom = []
top_limit = []
bottom_limit = []
frequency = []
data_range = []
data_limit = []
data_midpoint = []
bot_cumulative_frequency = []
top_cumulative_frequency = []
relative_frequency = []
mode = []

# Frequency Table Class
class FrequencyTable:
def __init__(self, dataset):
# Check for mixed data types (both numeric and string)
if any(isinstance(item, str) for item in dataset) and any(isinstance(item, (int, float)) for item in dataset):
raise ValueError("Data is corrupted: contains both numeric and string values.")

# Data Initiation
self.dataset = sorted(dataset)
self.length = len(dataset)
self.lowest = min(dataset) if isinstance(dataset[0], (int, float)) else None
self.highest = max(dataset) if isinstance(dataset[0], (int, float)) else None

if self.lowest is not None: # Only calculate classes for numeric data
# Classes is Rounding Down
self.classes = 1 + (3.222 * np.log10(self.length))
self.classes = round(self.classes - 0.5)

# Sum of the data and range
self.sum = sum(dataset)
self.range = self.highest - self.lowest

# Interval is Rounding Up
self.interval = self.range / self.classes
self.interval = round(self.interval + 0.5)

# Rounding Both Limits So The Data Would Be Simple And Easier To Read
self.base = self.roundy(self.lowest - 3)
self.top = self.roundy(self.highest + 3)

# Mean or Average
self.mean = (self.sum / self.length)

# Formula for Variance
self.variance = sum((x - self.mean) ** 2 for x in dataset) / self.length

# Formula for Standard Deviation
self.deviation = (self.variance ** 0.5)

# Formula to find Dataset Skewness
self.skewness = (self.length / ((self.length - 1) * (self.length - 2))) * \
sum(((x - self.mean) / self.deviation) ** 3 for x in self.dataset)

# Formula to find Dataset Kurtosis
self.kurtosis = (self.length * (self.length + 1) * sum(((x - self.mean) / self.deviation) ** 4 for x in self.dataset) /
((self.length - 1) * (self.length - 2) * (self.length - 3))) - \
(3 * (self.length - 1) ** 2) / ((self.length - 2) * (self.length - 3))

# Base 5 Rounding
def roundy(self, x, base=5):
return base * round(x / base)

# Function to Reset Frequency Table Data
def reset(self):
global top, bottom, top_limit, bottom_limit, frequency
global data_range, data_limit, data_midpoint
global bot_cumulative_frequency, top_cumulative_frequency, relative_frequency, mode

top.clear()
bottom.clear()
top_limit.clear()
bottom_limit.clear()
frequency.clear()
data_range.clear()
data_limit.clear()
data_midpoint.clear()
bot_cumulative_frequency.clear()
top_cumulative_frequency.clear()
relative_frequency.clear()
mode.clear()

# Function To Find Frequency in Dataset with Desired Range (Top and Down Limit)
def find_frequency(self, bot, top):
total_frequency = 0
# Check if the dataset contains only integers
is_integer_data = all(isinstance(x, int) for x in self.dataset)

if is_integer_data:
# Loop for integers
for i in range(bot, top):
frequency = self.dataset.count(i)
total_frequency += frequency
else:
# Loop for decimals
current = bot
while current < top:
frequency = self.dataset.count(round(current, 2)) # Round for matching
total_frequency += frequency
current += 0.01 # Increment by 0.01 for decimals

return total_frequency

# Populate Grouped Table Frequency Data Method
def PopulateGrouped(self):
try:
# Check if the dataset contains text
if any(isinstance(item, str) for item in self.dataset):
raise ValueError("Text data is not allowed for grouped frequency tables. Please provide numeric data only.")

self.reset() # Reset the frequency table data before processing

# Initiating Used Parameter for Frequency Table
old_number = 0
interval = self.interval
current_number = self.base - 1
current_top_cumulative_frequency = 1

# Processing the Frequency Table Data
while current_top_cumulative_frequency != 0:
# Finding Class Lowest Value
old_number = current_number + 1
bottom.append(old_number)

# Finding Class Highest Value
current_number = current_number + interval
top.append(current_number)

# Append Class Bottom Limit
current_bottom_limit = old_number - 0.5
bottom_limit.append(current_bottom_limit)

# Append Class Top Limit
current_top_limit = current_number + 0.5
top_limit.append(current_top_limit)

# Finding The Frequency That Range
current_frequency = self.find_frequency(old_number, current_number + 1)
frequency.append(current_frequency)

# Adding The Number Range From Both Frequency
current_data_range = f"{old_number:.2f} ~ {current_number:.2f}" if not all(isinstance(x, int) for x in self.dataset) else f"{old_number} ~ {current_number}"
data_range.append(current_data_range)

# Adding Data Range Limit Of The Class Frequency
current_data_limit = f"{current_bottom_limit:.2f} ~ {current_top_limit:.2f}" if not all(isinstance(x, int) for x in self.dataset) else f"{current_bottom_limit} ~ {current_top_limit}"
data_limit.append(current_data_limit)

# Adding Data Midpoint of The Class Frequency
current_data_midpoint = (old_number + current_number) / 2
data_midpoint.append(current_data_midpoint)

# Adding Bottom Cumulative Frequency of The Class
current_bot_cumulative_frequency = self.find_frequency(self.lowest - 1, old_number)
bot_cumulative_frequency.append(current_bot_cumulative_frequency)

# Adding Top Cumulative Frequency of The Class
current_top_cumulative_frequency = self.find_frequency(current_number + 1, self.highest + 1)
top_cumulative_frequency.append(current_top_cumulative_frequency)

# Counting the Relative Frequency in Percentage
current_relative_frequency = np.round((current_frequency / self.length) * 100)
relative_frequency.append(current_relative_frequency)

# Find Mode or Data that appears most frequently
mode_index = [i for i, val in enumerate(frequency) if val == max(frequency)]
mode = [data_range[i] for i in mode_index]

# Append Processed Data into Data Attributes
self.grouped = ProcessedData(None, bottom, top, bottom_limit, top_limit,
frequency, data_range, data_limit, data_midpoint,
bot_cumulative_frequency, top_cumulative_frequency,
relative_frequency, mode)

except ValueError as e:
print(f"Error: {e}")

# Populate Simple Table Frequency Data Method
def PopulateSimple(self):
self.reset() # Reset the frequency table data before processing

# Initialize general variables
data = sorted(set(self.dataset)) # Remove duplicates and sort the data

# Initialize limits for numeric data
top_limit = []
bottom_limit = []

# Single loop to process both numeric and string data
for current_class in data:
# Calculate the frequency of the current class
current_frequency = self.dataset.count(current_class)
frequency.append(current_frequency)

# Calculate the relative frequency for the current class
current_relative_frequency = np.round((current_frequency / self.length) * 100)
relative_frequency.append(current_relative_frequency)

# If the data is numeric, calculate limits and cumulative frequencies
if not all(isinstance(item, str) for item in self.dataset):
# Calculate top and bottom limits for numeric data
current_top_limit = current_class + 0.5
current_bottom_limit = current_class - 0.5
top_limit.append(current_top_limit)
bottom_limit.append(current_bottom_limit)

# Calculate bottom cumulative frequency for numeric data
current_bot_cumulative_frequency = self.find_frequency(self.lowest - 1, current_class)
bot_cumulative_frequency.append(current_bot_cumulative_frequency)

# Calculate top cumulative frequency for numeric data
current_top_cumulative_frequency = self.find_frequency(current_class + 1, self.highest + 1)
top_cumulative_frequency.append(current_top_cumulative_frequency)

else:
# If the data is string-based, calculate cumulative frequencies
# Calculate bottom cumulative frequency for strings
current_bot_cumulative_frequency = self.dataset.count(current_class)
bot_cumulative_frequency.append(current_bot_cumulative_frequency)

# Calculate top cumulative frequency for strings
current_top_cumulative_frequency = sum(frequency) - current_bot_cumulative_frequency
top_cumulative_frequency.append(current_top_cumulative_frequency)

# Find the mode (the class with the highest frequency)
mode_index = [i for i, val in enumerate(frequency) if val == max(frequency)]
mode = [data[i] for i in mode_index]

# Create the ProcessedData object based on the data type
self.simple = ProcessedData(
data, None, None, bottom_limit, top_limit,
frequency, None, None, None,
bot_cumulative_frequency, top_cumulative_frequency,
relative_frequency, mode
)

# Processed Data Assignment
class ProcessedData:
# Limit (L), Frequency (F), Ranges (R), Midpoint (M), Cumulative (C), Relative (R)
def __init__(self, data, bot, top, bot_L, top_L, F, R, L, M, bot_CF, top_CF, RF, mode):
self.classval = data
self.bottom = bot
self.top = top
self.bottom_limit = bot_L
self.top_limit = top_L
self.midpoint = M
self.ranges = R
self.limit = L
self.frequency = F
self.bottom_cumulative_frequency = bot_CF
self.top_cumulative_frequency = top_CF
self.relative_frequency = RF
self.percentage_relative_frequency = [f"{rf * 1:.2f}%" for rf in self.relative_frequency]
self.mode = mode
Loading

0 comments on commit 11e24a5

Please sign in to comment.