-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsoftmax_layer.py
134 lines (104 loc) · 4.69 KB
/
softmax_layer.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
from typing import Tuple
import numpy as np
class SoftMaxLayer:
"""
It converts logits (the outputs of the previous layers) into probabilities by applying the
softmax function. Each neuron in the layer represents a class, and the output of each neuron
is the probability that the given input belongs to that class.
"""
def __init__(self, input_size: int, output_size: int) -> None:
"""
Initializes the Softmax layer with random weights and zero biases.
Parameters:
- input_size: The number of input nodes.
- output_size: The number of output nodes (classes).
"""
self.input_size = input_size
self.output_size = output_size
self.weights: np.ndarray = np.random.randn(input_size, output_size) / input_size
self.biases: np.ndarray = np.zeros(output_size)
self.original_input_shape: Tuple[int, ...] = ()
self.flattened_input: np.ndarray = np.array([])
self.output_logits: np.ndarray = np.array([])
def forward(self, input_tensor: np.ndarray) -> np.ndarray:
"""
Performs the forward pass of the softmax layer.
Parameters:
- input_tensor: The input tensor, typically an image flattened into a 1D array.
Returns:
- The probabilities of each class.
"""
# Preserve original shape for backprop
self.original_input_shape: Tuple[int, ...] = input_tensor.shape
input_flattened: np.ndarray = input_tensor.flatten()
self.flattened_input: np.ndarray = input_flattened # Stored for backpropagation
logits: np.ndarray = np.dot(input_flattened, self.weights) + self.biases
self.output_logits: np.ndarray = logits # Stored for backpropagation
exp_logits: np.ndarray = np.exp(logits)
probabilities: np.ndarray = exp_logits / np.sum(exp_logits, axis=0)
return probabilities
def backward(
self, gradient_of_loss: np.ndarray, learning_rate: float
) -> np.ndarray:
"""
Performs the backward pass of the softmax layer, updating weights
and biases based on the gradient of the loss.
Parameters:
- gradient_of_loss: The gradient of the loss with respect to the output of this layer.
- learning_rate: The learning rate to use for the updates.
Returns:
- The gradient of the loss with respect to the input of this layer.
"""
for i, gradient in enumerate(gradient_of_loss):
if gradient == 0:
continue
exp_logits: np.ndarray = np.exp(self.output_logits)
sum_exp_logits: float = np.sum(exp_logits)
gradient_softmax_output = -exp_logits[i] * exp_logits / (sum_exp_logits**2)
gradient_softmax_output[i] = (
exp_logits[i] * (sum_exp_logits - exp_logits[i]) / (sum_exp_logits**2)
)
gradient_logits_weights = self.flattened_input
gradient_logits_biases = 1
gradient_logits_input = self.weights
gradient_loss_logits = gradient * gradient_softmax_output
gradient_loss_weights = np.outer(
gradient_logits_weights, gradient_loss_logits
)
gradient_loss_biases = gradient_loss_logits * gradient_logits_biases
gradient_loss_input = gradient_logits_input.dot(gradient_loss_logits)
self.weights -= learning_rate * gradient_loss_weights
self.biases -= learning_rate * gradient_loss_biases
return gradient_loss_input.reshape(self.original_input_shape)
return np.zeros(self.original_input_shape)
def serialize(self) -> dict:
"""
Serializes the layer to a dictionary of its weights and biases.
Returns:
- A dictionary containing the layer's weights and biases.
"""
return {
"type": "SoftmaxLayer",
"input_size": self.input_size,
"output_size": self.output_size,
"weights": self.weights.tolist(),
"biases": self.biases.tolist(),
}
@staticmethod
def deserialize(data: dict):
"""
Deserializes the layer from a dictionary.
Parameters:
- data: A dictionary containing the layer's input size, output size, weights and biases.
Returns:
- A new SoftmaxLayer instance.
"""
layer = data["softmaxLayer"]
input_size = layer["input_size"]
output_size = layer["output_size"]
weights = np.array(layer["weights"])
biases = np.array(layer["biases"])
SoftMax = SoftMaxLayer(input_size, output_size)
SoftMax.weights = weights
SoftMax.biases = biases
return SoftMax