-
Notifications
You must be signed in to change notification settings - Fork 6
/
activation_functions_quad.py
159 lines (143 loc) · 5.6 KB
/
activation_functions_quad.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
## activation_functions_qaud.py
##
## Definitions of ReLU activation function and its quadratic upper and lower bounds
##
## Copyright (C) 2018, Huan Zhang <[email protected]> and contributors
##
## This program is licenced under the BSD 2-Clause License,
## contained in the LICENCE file in this directory.
## See CREDITS for a list of contributors.
##
import numpy as np
from numba import jit
def get_area(a, b, c, lower, upper):
f = lambda x: 1.0 / 3.0 * a * x * x * x + 1.0 / 2.0 * b * x * x + c * x
return f(upper) - f(lower)
@jit(nopython=True)
def get_upper_quad_parameterized(u, l, theta):
a = 1.0 / (u - l) + (1.0 / (u * l)) * theta
b = (1.0 / (u * l)) * ((l * l * u) / (l - u) - (l + u) * theta)
c = theta
return a, b, c
def get_lower_quad_parameterized_lgtu(u, l, theta):
a = theta / (u * u - l * u)
b = -a * l
c = 0
print("u={:.3f}, l={:.3f}, theta={:.3f}, a={:.3f}, b={:.3f}, c={:.3f}, ".format(u, l, theta, a, b, c), end='')
area1 = - get_area(a, b, c, l, 0)
area2 = 0.5*u*u - get_area(a, b, c, 0, u)
area = area1 + area2
area_other = 0.5*u*u - theta / (u*u - l*u) * (1.0 / 6.0 * l*l*l + 1.0 / 3.0 * u*u*u - 0.5 * l * u*u)
print("area1={:.3f}, area2={:.3f}, area={:.3f} ({:.3f}), reference={:.3f}".format(area1, area2, area, area_other, 0.5*u*u))
return a, b, c
def get_lower_quad_parameterized_lltu(u, l, theta):
a = (l - theta) / (u*l - l*l)
b = (u*theta - l*l) / (u*l - l*l)
c = 0
print("u={:.3f}, l={:.3f}, theta={:.3f}, a={:.3f}, b={:.3f}, c={:.3f}, ".format(u, l, theta, a, b, c), end='')
area1 = - get_area(a, b, c, l, 0)
area2 = 0.5*u*u - get_area(a, b, c, 0, u)
area = area1 + area2
area_other = 0.5*u*u + 1.0/(u*l-l*l)*(1.0/3.0*(l-theta)*l*l*l + 1.0/2.0*(u*theta-l*l)*l*l - 1.0/3.0*(l-theta)*u*u*u - 0.5*(u*theta-l*l)*u*u)
print("area1={:.3f}, area2={:.3f}, area={:.3f} ({:.3f}), reference={:.3f}".format(area1, area2, area, area_other, 0.5*l*l))
return a, b, c
@jit(nopython=True)
def get_lower_quad_parameterized(u, l, theta):
a = theta / (u * u - l * u)
b = -a * l
c = 0
"""
print("u={:.3f}, l={:.3f}, theta={:.3f}, a={:.3f}, b={:.3f}, c={:.3f}, ".format(u, l, theta, a, b, c), end='')
area1 = - get_area(a, b, c, l, 0)
area2 = 0.5*u*u - get_area(a, b, c, 0, u)
area = area1 + area2
area_other = 0.5*u*u - theta / (u*u - l*u) * (1.0 / 6.0 * l*l*l + 1.0 / 3.0 * u*u*u - 0.5 * l * u*u)
print("area1={:.3f}, area2={:.3f}, area={:.3f} ({:.3f}), reference={:.3f}".format(area1, area2, area, area_other, min(0.5*u*u, 0.5*l*l)))
"""
return a, b, c
@jit(nopython=True)
def get_best_lower_quad(u, l):
if abs(l) >= abs(u):
return 0, 0, 0
if abs(l) >= 2.0 * abs(u):
return 0, 0, 0
else:
return get_lower_quad_parameterized(u, l, u)
else:
# return 0, 1, 0
if abs(u) >= 2.0 * abs(l):
return 0, 1, 0
else:
return get_lower_quad_parameterized(u, l, u)
@jit(nopython=True)
def get_best_upper_quad(u, l):
if abs(l) > abs(u):
bias = l / (l - u) + 2 * l / (u - l)
k = 2.0 / u - (l + u) / (u * l)
theta = - bias / k
else:
bias = l / (l - u) + 2 * u / (u - l)
k = 2.0 / l - (l + u) / (u * l)
theta = (1.0 - bias) / k
# print("l = {}, u = {}, bias = {}, k = {}, theta = {}".format(l, u, bias, k, theta))
# assert theta >= 0
# assert theta <= (- u * l) / (u - l)
a, b, c = get_upper_quad_parameterized(u, l, theta)
# print("slope at l: {}".format(2 * a * l + b))
# print("slope at u: {}".format(2 * a * u + b))
return a, b, c
def plot_parameterized(func, u, l, theta, label, linestyle='--'):
x = np.linspace(l * 1.3, u * 1.3, 1000)
a, b, c = func(u, l, theta)
y = a * x**2 + b*x + c
plt.plot(x, y, linestyle=linestyle, label=label)
if __name__ == "__main__":
import matplotlib
import matplotlib.pyplot as plt
import sys
if len(sys.argv) == 3:
l = float(sys.argv[1])
u = float(sys.argv[2])
else:
l = -1.0
u = 1.75
assert l <= 0
assert u >= 0
x = np.linspace(l * 1.3, u * 1.3, 1000)
# plot relu
plt.plot([l,0,u], [0, 0, u], linestyle='--', label="ReLU")
# get best upper quad bound
a, b, c = get_best_upper_quad(u, l)
y = a * x**2 + b*x + c
best_theta = c
upper = (- u * l) / (u - l)
print("valid theta: 0 - {}, best {}".format(upper, best_theta))
lower = c / 2
plt.plot(x, y, linestyle='-.', label="Best upper")
# slope u/(u-l) linear lower bound
plt.plot([l, 0, u], [l*u / (u - l), 0, u * u / (u - l)], linestyle=':', label="linear lower")
# slope u/(u-l) linear lower bound
plt.plot([l, 0, u], [0, -l*u / (u - l), u], linestyle=':', label="linear upper")
"""
for theta in np.linspace(lower, upper, 3):
if abs(theta - best_theta) < 1e-5:
continue
a, b, c = get_upper_quad_parameterized(u, l, theta)
y = a * x**2 + b*x + c
plt.plot(x, y, label="theta = {}".format(theta))
"""
if abs(u) < abs(l):
for theta in np.linspace(0, u, 10):
plot_parameterized(get_lower_quad_parameterized_lgtu, u, l, theta, "theta={}".format(theta))
else:
for theta in np.linspace(l, 0, 10):
plot_parameterized(get_lower_quad_parameterized_lltu, u, l, theta, "theta={}".format(theta))
# get best lower quad bound
a, b, c = get_best_lower_quad(u, l)
y = a * x**2 + b*x + c
plt.plot(x, y, linestyle='-.', label="Best lower")
lim = max(abs(l), abs(u))
plt.xlim(-lim, lim)
plt.ylim(-lim*0.3, lim)
plt.legend()
plt.show()