-
Notifications
You must be signed in to change notification settings - Fork 0
/
float_adder_double.sv
266 lines (222 loc) · 8.75 KB
/
float_adder_double.sv
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
// Author: Alex Ghandhi
/* Double-Precision Floating-Point Multiplier Unit
Calculates the sum of two 64-bit normalized floats
Inputs:
a: first input double
b: second input double
Outputs:
out: sum of input doubles
inexact: raised if truncation occurred
overflow: raised if overflow occurred
underflow: raised if underflow occurred
zero: raised if result is zero
Parameters:
FLOAT_SIZE: bit-length of floating point value
EXPONENT_SIZE: bit-length of exponent portion
MANTISSA_SIZE: bit-length of mantissa portion
BIAS: bias for exponent
*/
module float_adder_double #(
parameter FLOAT_SIZE = 64,
EXPONENT_SIZE = 11,
MANTISSA_SIZE = 52,
BIAS = 1023
) (
a,
b,
out,
overflow,
underflow,
inexact,
zero
);
// IO Declaration
input logic [FLOAT_SIZE-1:0] a, b;
output logic [FLOAT_SIZE-1:0] out;
output logic overflow, underflow, inexact, zero;
// Store the bias in the appropriate bitlength for later calculations
// We add extra bits to check for over/underflow
logic [EXPONENT_SIZE+1:0] bias;
assign bias = BIAS[EXPONENT_SIZE+1:0];
// Float Components for inputs and output
logic sign_a, sign_b, sign_out;
logic [EXPONENT_SIZE-1:0] exponent_a, exponent_b, exponent_out;
logic [MANTISSA_SIZE-1:0] mantissa_a, mantissa_b, mantissa_out;
// Define the output float
assign out = {sign_out, exponent_out, mantissa_out};
// Wire the float inputs to their components
// format: [ S | E | M ]
assign sign_a = a[FLOAT_SIZE-1];
assign sign_b = b[FLOAT_SIZE-1];
assign exponent_a = a[FLOAT_SIZE-2:MANTISSA_SIZE];
assign exponent_b = b[FLOAT_SIZE-2:MANTISSA_SIZE];
assign mantissa_a = a[MANTISSA_SIZE-1:0];
assign mantissa_b = b[MANTISSA_SIZE-1:0];
// Internal Control Signals
logic EB_greater, E_equal, MA_greater, MX_greater, sameSign, m_add_adjust;
logic [1:0] inexact_portions;
logic [EXPONENT_SIZE:0] E_shamt;
logic [MANTISSA_SIZE+1:0] mantissa_x, mantissa_y;
// Logic for Leading Zero fix
logic [6:0] leadingZeros, leadingZeros_raw;
// Simple Control Signal Definitions
assign inexact = |inexact_portions; // OR the bits together
assign MA_greater = (mantissa_a > mantissa_b) ? 1'b1 : 1'b0;
assign MX_greater = (mantissa_x > mantissa_y) ? 1'b1 : 1'b0;
assign sameSign = ~(sign_a ^ sign_b);
// Sign Unit
always_comb begin
if ((~EB_greater) & (~E_equal)) begin
sign_out = sign_a;
end else begin
if (EB_greater) begin
sign_out = sign_b;
end else begin
if (MA_greater) begin
sign_out = sign_a;
end else begin
sign_out = sign_b;
end
end
end
end // Sign Unit
///////////////////////////
// EXPONENT CALCULATIONS //
///////////////////////////
// Intermediate Logic
logic [EXPONENT_SIZE:0] E_base, E_diff_raw, E_m_add;
// Signal Assignments
assign E_diff_raw = {1'b0, exponent_a} - {1'b0, exponent_b};
assign EB_greater = E_diff_raw[EXPONENT_SIZE];
assign E_equal = (E_diff_raw == 0) ? 1'b1 : 1'b0;
assign E_shamt = EB_greater ? (~E_diff_raw + 1) : E_diff_raw;
assign E_base = EB_greater ? {1'b0, exponent_b} : {1'b0, exponent_a};
assign E_m_add = m_add_adjust ? (E_base + 1) : E_base;
assign overflow = E_m_add[EXPONENT_SIZE];
// Final adjustment (handle leading zeros for mantissa subtraction)
// Specifically set for Double-Precision values
assign {underflow, exponent_out} = E_m_add - {5'b00000, leadingZeros};
///////////////////////////
// MANTISSA CALCULATIONS //
///////////////////////////
// EXPONENT ADJUSTMENT STAGE
// In this stage, we shift the mantissas so that they represent the same
// exponent, allowing for a simple arithmetic operation in the next stage.
// Our input a and b will be converted to x and y, representing the order
// with weight to the value of the input exponent.
// Intermediate Logic
logic [MANTISSA_SIZE+1:0] exponentShiftIn, exponentShiftedBits;
assign exponentShiftIn = EB_greater ? {2'b01, mantissa_a} : {2'b01, mantissa_b};
// Logic assignments
assign mantissa_x = EB_greater ? {2'b01, mantissa_b} : {2'b01, mantissa_a};
// Adjust the lower mantissa by the difference in exponents
assign mantissa_y = exponentShiftIn >> E_shamt;
assign exponentShiftedBits = exponentShiftIn << (MANTISSA_SIZE - E_shamt);
always_comb begin
if (E_shamt > MANTISSA_SIZE) begin
inexact_portions[0] = |exponentShiftIn[MANTISSA_SIZE-1:0];
end else begin
if (E_shamt == 0) begin
inexact_portions[0] = 0;
end else begin
// Check the shifted-out bits for nonzero elements
inexact_portions[0] = |exponentShiftedBits;
end
end
end
// MANTISSA ADDITION/SUBTRACTION STAGE
// Intermediate Logic
logic [MANTISSA_SIZE+1:0] mantissa_add, mantissa_add_shift;
logic [MANTISSA_SIZE:0]
mantissa_sub, mantissa_add_ready, mantissa_sub_ready;
// Handle the addition logic flow
assign mantissa_add = mantissa_x + mantissa_y;
assign mantissa_add_shift = mantissa_add >> 1;
// If this is 1, we need to adjust the exponent and manissa to renormalize
assign m_add_adjust = sameSign & mantissa_add[MANTISSA_SIZE+1];
assign inexact_portions[1] = m_add_adjust & mantissa_add[0];
always_comb begin : mantissaAddResult
if (m_add_adjust)
mantissa_add_ready = mantissa_add_shift[MANTISSA_SIZE:0];
else mantissa_add_ready = mantissa_add[MANTISSA_SIZE:0];
end
// Handle the subtraction logic flow
always_comb begin : mantissaSubInputs
if (MX_greater)
mantissa_sub = mantissa_x[MANTISSA_SIZE:0] - mantissa_y[MANTISSA_SIZE:0];
else
mantissa_sub = mantissa_y[MANTISSA_SIZE:0] - mantissa_x[MANTISSA_SIZE:0];
end
// DOUBLE-PRECISION Specific: handle leading zero fix
// This assumes that MANTISSA_SIZE = 52
// Note that we pad the input to hit the bitlength, this is subtracted
// when setting the leadingZeros value
clz_64 leadingZeroCounter (
.in ({11'b000_0000_0000, mantissa_sub}),
.out(leadingZeros_raw)
);
assign leadingZeros = sameSign ? 0 : leadingZeros_raw - 7'd11;
// Left-shift adjustment for subtraction
assign mantissa_sub_ready = mantissa_sub << leadingZeros;
// Set the zero control signal
assign zero = sameSign ? ~|mantissa_add : ~|mantissa_sub;
// Determine mantissa output
always_comb begin : getMantissaOutput
if (sameSign) mantissa_out = mantissa_add_ready[MANTISSA_SIZE-1:0];
else mantissa_out = mantissa_sub_ready[MANTISSA_SIZE-1:0];
end
endmodule // float_adder_double
// Testbench for Double-Precision Floats
module float_adder_double_tb ();
parameter DELAY = 100;
// IO Replication
logic [63:0] a, b, out;
logic overflow, underflow, inexact, zero;
// Instance
float_adder_double dut (.*);
// Main Test
integer i;
initial begin
// Fully Randomized Double Inputs
for (i = 0; i < 30; i++) begin : randomTesting
a[63:32] = $urandom();
a[31:0] = $urandom();
b[63:32] = $urandom();
b[31:0] = $urandom();
#(DELAY);
$display("a: %e\nb: %e", $bitstoreal(a), $bitstoreal(b));
$display("a+b: %e", $bitstoreal(out));
$display("%s%s%s%s", overflow ? "OVERFLOW " : "",
underflow ? "UNDERFLOW" : "", zero ? "ZERO" : "",
inexact ? "INEXACT" : "");
end
// Fix the exponent for precise testing
a[62:52] = 11'd1023;
b[62:52] = 11'd1023;
for (i = 0; i < 20; i++) begin : zeroExponent
// Mix the Sign
a[63] = $urandom();
b[63] = $urandom();
// Mix the Mantissas
a[51:32] = $urandom();
a[31:0] = $urandom();
b[51:32] = $urandom();
b[31:0] = $urandom();
#(DELAY);
$display("a: %e\nb: %e", $bitstoreal(a), $bitstoreal(b));
$display("a+b: %e", $bitstoreal(out));
$display("%s%s%s%s", overflow ? "OVERFLOW " : "",
underflow ? "UNDERFLOW" : "", zero ? "ZERO" : "",
inexact ? "INEXACT" : "");
end
// Test Zero Flag
for (i = 0; i < 20; i++) begin : testZero
a[63:32] = $urandom();
a[31:0] = $urandom();
b = a ^ (64'd1 << 63);
#(DELAY);
assert (zero);
end
$stop();
end
endmodule // float_adder_double_tb