-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathvertex_transform.sv
197 lines (159 loc) · 5.46 KB
/
vertex_transform.sv
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
/*
WORLD MEMORY: 3x vertex_memory
2^14 words, all sequential data
one word:
21 bits 63:43 x (2's complement)
21 bits 42:22 y (2's complement)
21 bits 21: 1 z (2's complement, negative = behind camera)
1 bit end of data detection, set to 1 for all vertices, stops calculating when 0 is hit
**
RASTER MEMORY: 3x vertex_memory_bins
2^16 (max) words, in pieces depending on address bits
memory address format: {bin y-index (7 bits), bin number x (7 bits), sequential (2 bits)}
one word: (36 bits)
10 bits 35:26 x (unsigned) (overscan a bit to allow for triangles to be off the screen a bit)
10 bits 25:16 y (unsigned)
16 bits 15: 0 z (unsigned)
then, also 1x vertex_memory_norms
memory address format: same
one word: (32 bits)
10 bits 31:22 normx
10 bits 21:12 normy
10 bits 11: 2 normz
1 bit detection
1 bit for good luck
---
start going from 0 to 1 when done=1 makes it restart
while done=1, address to data guaranteed one clock cycle delay (directly to and from M9K)
---
debug:
world memory:
(1000, 1500, 10)
(125, 100, 1)
(200, 250, 2)
after division:
(100, 150)
(125, 100)
(100, 125)
after shifting:
(500, 412)
(525, 362)
(500, 387)
*/
module vertex_transform #(screenWidth = 800, screenHeight = 525) (
input logic start,
output logic done,
input logic[6:0] bin_y,
input logic[6:0] bin_x,
input logic[1:0] bin_seq,
output logic[35:0] triangle[3],
output logic[31:0] norms,
input logic BOARD_CLK
);
//params
parameter shiftX = screenWidth / 2;
parameter shiftY = screenHeight / 2;
//signals
logic[63:0] world_memory_out[3];
logic[35:0] raster_memory_in[3];
logic[31:0] raster_memory_norms_in;
logic[15:0] raster_memory_in_addr;
logic raster_memory_wren;
//registers and their next's
enum logic[7:0] {
initState,
calcState1,
calcState2,
calcState3,
saveState,
doneState1,
doneState2
} state = initState, state_next;
logic[13:0] world_memory_addr, world_memory_addr_next;
genvar i;
generate //3 vertexes in triangle
for(i = 0; i < 3; i++) begin: vertex_memory_generate
//worldspace vertices
vertex_memory world_memory (.clock(BOARD_CLK), //todo: write to world memory? keyboard controls?
.wraddress(14'd0), .data(64'd0), .wren(1'd0),
.rdaddress(world_memory_addr), .q(world_memory_out[i]));
//raster space bins
vertex_memory_bins raster_memory(.clock(BOARD_CLK), .wraddress(raster_memory_in_addr), .data(raster_memory_in[i]), .wren(raster_memory_wren),
.rdaddress({bin_y, bin_x, bin_seq}), .q(triangle[i]));
//two divisions per vertex
vertex_divide divx(.clock(BOARD_CLK),
.numer(world_memory_out[i][63:43]), //x
.denom(world_memory_out[i][21:1]), //z
.quotient(raster_memory_in[i][35:26]),//into x (todo: not just blindly truncate MSB's?)
.remain());
vertex_divide divy(.clock(BOARD_CLK),
.numer(world_memory_out[i][42:22]), //y
.denom(world_memory_out[i][21:1]), //z
.quotient(raster_memory_in[i][25:16]),//into x
.remain());
//and just assign z, right shifted to fit
assign raster_memory_in[i][15:0] = world_memory_out[i][21:6];
end
endgenerate
//raster space norms
vertex_memory_norms raster_norms(.clock(BOARD_CLK), .wraddress(raster_memory_in_addr), .data(raster_memory_norms_in), .wren(raster_memory_wren),
.rdaddress({bin_y, bin_x, bin_seq}), .q(norms));
always_ff @(posedge BOARD_CLK) begin
state <= state_next;
world_memory_addr <= world_memory_addr_next;
end
//next state logic
always_comb begin
state_next = state;
world_memory_addr_next = world_memory_addr;
raster_memory_wren = 0;
done = 0;
unique case(state)
initState: begin
//initially we start at zero
world_memory_addr_next = 0;
state_next = calcState1;
end
calcState1: begin
//finished reading from memory here, so we check if we need to halt
if (world_memory_out[0] == 0) state_next = doneState1;
else state_next = calcState2;
end
//these we are just waiting for the divider to catch up with everything
calcState2: state_next = calcState3;
calcState3: state_next = saveState;
saveState: begin
//save everything to raster memory
raster_memory_wren = 1;
//begin loading next world memory
world_memory_addr_next = world_memory_addr + 1;
state_next = calcState1;
end
doneState1: begin
//wait until start is low, just in case we finished more quickly than anticipated
if (start == 0) state_next = doneState2;
done = 1;
end
doneState2: begin
//now we wait until start is high again to start again
if (start == 1) state_next = initState;
done = 1;
end
default: ;
endcase
end
//calculating the norms, 6 multipliers required
//this does a cross product
//genvar i;
generate //this generate loops thru the 3 dimensions, not the 3 vertices
for(i = 0; i < 3; i++) begin: norm_generate
logic[41:0] vector1 = world_memory_out[0][i*21+21:i*21+1] - world_memory_out[1][i*21+21:i*21+1],
vector2 = world_memory_out[0][i*21+21:i*21+1] - world_memory_out[2][i*21+21:i*21+1];
logic[41:0] A, B;
vertex_multiply mult_A(.clock(BOARD_CLK), .dataa(vector1[(i+1)%3]), .datab(vector2[(i+2)%3]), .result(A));
vertex_multiply mult_B(.clock(BOARD_CLK), .dataa(vector1[(i-1)%3]), .datab(vector2[(i-2)%3]), .result(B));
//truncating those MSB's might not be the best option here... but tbh most of them are zeros... right?
assign raster_memory_norms_in[i*10+11:i*10+2] = A - B;
end
endgenerate
endmodule