-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathGD_Embeddings.py
91 lines (69 loc) · 3.2 KB
/
GD_Embeddings.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
'''
Source: Natural Language Processing with Classification and Vector Spaces, Week 4, Coursera Assignment
Description: Gradient Descent for language Translation Implemented with Forebinus Norm
'''
import numpy as np
def compute_loss(X, Y, R):
'''
Inputs:
X: a matrix of dimension (m,n) where the columns are the English embeddings.
Y: a matrix of dimension (m,n) where the columns correspong to the French embeddings.
R: a matrix of dimension (n,n) - transformation matrix from English to French vector space embeddings.
Outputs:
L: a matrix of dimension (m,n) - the value of the loss function for given X, Y and R.
'''
# m is the number of rows in X
m = X.shape[0]
# diff is XR - Y
diff = np.dot(X,R) - Y
# diff_squared is the element-wise square of the difference
diff_squared = diff**2
# sum_diff_squared is the sum of the squared elements
sum_diff_squared = np.sum(diff_squared)
# loss i is the sum_diff_squared divided by the number of examples (m)
loss = sum_diff_squared / m
return loss
def compute_gradient(X, Y, R):
'''
Inputs:
X: a matrix of dimension (m,n) where the columns are the English embeddings.
Y: a matrix of dimension (m,n) where the columns correspong to the French embeddings.
R: a matrix of dimension (n,n) - transformation matrix from English to French vector space embeddings.
Outputs:
g: a scalar value - gradient of the loss function L for given X, Y and R.
'''
# m is the number of rows in X
m = X.shape[0]
# gradient is X^T(XR - Y) * 2/m
gradient = np.dot(X.transpose(),np.dot(X,R)-Y)*(2/m)
return gradient
def align_embeddings(X, Y, train_steps=100, learning_rate=0.0003, verbose=True, compute_loss=compute_loss, compute_gradient=compute_gradient):
'''
Inputs:
X: a matrix of dimension (m,n) where the columns are the English embeddings.
Y: a matrix of dimension (m,n) where the columns correspong to the French embeddings.
train_steps: positive int - describes how many steps will gradient descent algorithm do.
learning_rate: positive float - describes how big steps will gradient descent algorithm do.
Outputs:
R: a matrix of dimension (n,n) - the projection matrix that minimizes the F norm ||X R -Y||^2
'''
np.random.seed(129)
# the number of columns in X is the number of dimensions for a word vector (e.g. 300)
# R is a square matrix with length equal to the number of dimensions in th word embedding
R = np.random.rand(X.shape[1], X.shape[1])
for i in range(train_steps):
if verbose and i % 25 == 0:
print(f"loss at iteration {i} is: {compute_loss(X, Y, R):.4f}")
# use the function that you defined to compute the gradient
gradient = compute_gradient(X, Y, R)
# update R by subtracting the learning rate times gradient
R -= learning_rate * gradient
return R
if __name__ == '__main__':
# Testing align_embeddings function
np.random.seed(129)
m = 10
n = 5
X = np.random.rand(m, n)
Y = np.random.rand(m, n) * .1
R = align_embeddings(X, Y, 100, 0.0003, True, compute_loss, compute_gradient)