-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathpca.py
74 lines (58 loc) · 2.23 KB
/
pca.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
'''Question : Create a python class PCA in “pca.py” to implement PCA (Principle component analysis).
The deliverable is a class that can be used as follows:
from pca import PCA
… .
… .
pca_model = PCA(n_component=2)
pca_model.fit(x)
print(pca_model.variance_ratio)
print(pca_model.transform(data))'''
from numpy import mean, std, cov
from numpy.linalg import eig
import numpy as np
class PCA:
# Initialize
def __init__(self, n_components=None, whiten=False):
self.n_components = n_components
self.whiten = bool(whiten)
# .fit()
def fit(self, X):
r,c = X.shape
# Subtracting mean to center the data/Mean normalization
self.mean_ = mean(X, axis=0)
X = X - self.mean_
# Feature scaling if necessary
if self.whiten:
self.stdv_ = std(X, axis=0)
X = X/self.stdv_
# MAIN : PERFORMING SVD/EIGEN DECOMPOSITION OF X
# Step 1:
A = np.array(X)
C = cov(A.T) # Calculating covariance matrix
# Calculating eigen values and eigen vectors
self.eigval, self.eigvect = eig(C)
# Step 2:
# Truncating for dimensionality reduction
if self.n_components is not None:
self.eigval = self.eigval[:self.n_components]
self.eigvect = self.eigvect[:, :self.n_components]
# Step 3:
# Sorting the eigen values and vectors in descending order
desc_ord = np.flip(np.argsort(self.eigval))
# Indices returned are for ascending order. Flipping to return indices in descending order.
self.eigval = self.eigval[desc_ord]
self.eigvect = self.eigvect[:, desc_ord]
return self
# Transforming the original matrix(of features) to the reduced form.
# .transform()
def transform(self, X):
X = X - self.mean_
if self.whiten:
X = X/self.stdv_
return X @ self.eigvect # Step 4
# Defining the property explained_variance_ratio. This is not a method
# .explained_variance_ratio
@property
def variance_ratio(self):
return (self.eigval/np.sum(self.eigval))
# How much each eigen value contributes to the variance in the data.