From 5ff09249a61d561ba008669076e883df79aa6c6e Mon Sep 17 00:00:00 2001 From: sy2002 Date: Wed, 5 Aug 2015 01:37:21 +0200 Subject: [PATCH] fixed BRAM timing, optimized MOVE --- test_programs/regbank.asm | 12 +-- test_programs/regbank.lis | 164 ++++++++++++++++++------------------- test_programs/regbank.out | 168 ++++++++++++++++++-------------------- test_programs/regbank.rom | 8 +- vhdl/block_ram.vhd | 61 +++++++++----- vhdl/env1_globals.vhd | 8 +- vhdl/mmio_mux.vhd | 4 +- vhdl/qnice_cpu.vhd | 94 +++++++++------------ 8 files changed, 251 insertions(+), 268 deletions(-) diff --git a/test_programs/regbank.asm b/test_programs/regbank.asm index 532e5020..6d6b3b51 100644 --- a/test_programs/regbank.asm +++ b/test_programs/regbank.asm @@ -11,7 +11,7 @@ ; Everything works correct, if the TIL displays the following sequence in ; a loop: 8080, 0000, 1700, 0000 ; -; done by sy2002 on August, 1st/2nd 2015 +; done by sy2002 on August 2015 IO$TIL_BASE .EQU 0xFF10 ; Address of TIL-display @@ -27,7 +27,7 @@ CHECK_R1 .EQU 0x1700 ; 256 x 23 = 5.888 = 0x1700 ; memory locations STACK_TOP .EQU 0x8020 ; top of the stack, initial SP -VAR_DIFF .EQU 0x8000 ; variable in memory to store the +VAR_DIFF .EQU 0x8000 ; variable in RAM to store the ; difference between a register ; value and the expected value @@ -35,10 +35,6 @@ VAR_DIFF .EQU 0x8000 ; variable in memory to store the MOVE 0x8020, R13 ; setup stack pointer - MOVE 0x1111, R0 - MOVE 0x2222, R1 - RBRA DISPLAY_LOOP, 1 - OR 0xFF00, R14 ; activate highest register page MOVE 0x0100, R8 ; loop through 256 banks MOVE 0x0001, R9 ; we need to sub 1 often @@ -75,11 +71,11 @@ CHECK_LOOP ADD R10, R14 ; next bank ; output results to TIL -DISPLAY_LOOP AND 0x00FF, R14 ; switch back to reg bank 0 + AND 0x00FF, R14 ; switch back to reg bank 0 MOVE IO$TIL_BASE, R12 ; TIL MMIO display address ; display register R0 and the difference to the expected value - MOVE R0, R8 ; register = R8 + DISPLAY_LOOP MOVE R0, R8 ; register = R8 MOVE CHECK_R0, R9 ; expected value = R9 RSUB DISPLAY_REG, 1 ; call sub routine diff --git a/test_programs/regbank.lis b/test_programs/regbank.lis index 36926e82..a321d1e8 100644 --- a/test_programs/regbank.lis +++ b/test_programs/regbank.lis @@ -11,7 +11,7 @@ 000011 ; Everything works correct, if the TIL displays the following sequence in 000012 ; a loop: 8080, 0000, 1700, 0000 000013 ; -000014 ; done by sy2002 on August, 1st/2nd 2015 +000014 ; done by sy2002 on August 2015 000015 000016 IO$TIL_BASE .EQU 0xFF10 ; Address of TIL-display 000017 @@ -27,7 +27,7 @@ 000027 000028 ; memory locations 000029 STACK_TOP .EQU 0x8020 ; top of the stack, initial SP -000030 VAR_DIFF .EQU 0x8000 ; variable in memory to store the +000030 VAR_DIFF .EQU 0x8000 ; variable in RAM to store the 000031 ; difference between a register 000032 ; value and the expected value 000033 @@ -35,89 +35,85 @@ 000035 000036 0000 0FB4 8020 MOVE 0x8020, R13 ; setup stack pointer 000037 -000038 0002 0F80 1111 MOVE 0x1111, R0 -000039 0004 0F84 2222 MOVE 0x2222, R1 -000040 0006 FFA0 0024 RBRA DISPLAY_LOOP, 1 -000041 -000042 0008 AFB8 FF00 OR 0xFF00, R14 ; activate highest register page -000043 000A 0FA0 0100 MOVE 0x0100, R8 ; loop through 256 banks -000044 000C 0FA4 0001 MOVE 0x0001, R9 ; we need to sub 1 often -000045 000E 0FA8 0100 MOVE NEXT_BANK, R10 ; we need to sub 0x100 often -000046 0010 0FAC 0017 MOVE 23, R11 ; we need to move 23 often -000047 -000048 ; fill registers throughout 256 registerbanks with meaningful values -000049 0012 0800 BANK_LOOP MOVE R8, R0 ; move 256 downto 1 in all R0's -000050 0013 0B04 MOVE R11, R1 ; move 23 in all R1's -000051 0014 3A38 SUB R10, R14 ; previous register bank -000052 0015 3920 SUB R9, R8 ; decrease loop counter -000053 0016 FFAB FFFA RBRA BANK_LOOP, !Z ; loop 256 downto 1 (0 exits) +000038 0002 AFB8 FF00 OR 0xFF00, R14 ; activate highest register page +000039 0004 0FA0 0100 MOVE 0x0100, R8 ; loop through 256 banks +000040 0006 0FA4 0001 MOVE 0x0001, R9 ; we need to sub 1 often +000041 0008 0FA8 0100 MOVE NEXT_BANK, R10 ; we need to sub 0x100 often +000042 000A 0FAC 0017 MOVE 23, R11 ; we need to move 23 often +000043 +000044 ; fill registers throughout 256 registerbanks with meaningful values +000045 000C 0800 BANK_LOOP MOVE R8, R0 ; move 256 downto 1 in all R0's +000046 000D 0B04 MOVE R11, R1 ; move 23 in all R1's +000047 000E 3A38 SUB R10, R14 ; previous register bank +000048 000F 3920 SUB R9, R8 ; decrease loop counter +000049 0010 FFAB FFFA RBRA BANK_LOOP, !Z ; loop 256 downto 1 (0 exits) +000050 +000051 ; calculate check sums over all registers and store the results in bank 0 +000052 0012 0FA0 00FF MOVE 0x00FF, R8 ; loop only through 255 as we +000053 0014 9FB8 00FF AND 0x00FF, R14 ; are adding everything to bank 0 000054 -000055 ; calculate check sums over all registers and store the results in bank 0 -000056 0018 0FA0 00FF MOVE 0x00FF, R8 ; loop only through 255 as we -000057 001A 9FB8 00FF AND 0x00FF, R14 ; are adding everything to bank 0 -000058 -000059 001C 1A38 CHECK_LOOP ADD R10, R14 ; next bank -000060 -000061 001D 0030 MOVE R0, R12 ; use R12 as temp for R0 -000062 001E 0E2C MOVE R14, R11 ; save current bank page -000063 001F 9FB8 00FF AND 0x00FF, R14 ; back to bank 0 -000064 0021 1C00 ADD R12, R0 ; accumulate check sum in R0 -000065 0022 0B38 MOVE R11, R14 ; restore current bank page -000066 -000067 0023 0130 MOVE R1, R12 ; use R12 as temp for R1 -000068 0024 0E2C MOVE R14, R11 ; save current bank page -000069 0025 9FB8 00FF AND 0x00FF, R14 ; back to bank 0 -000070 0027 1C04 ADD R12, R1 ; accumulate check sum in R1 -000071 0028 0B38 MOVE R11, R14 ; restore current bank page +000055 0016 1A38 CHECK_LOOP ADD R10, R14 ; next bank +000056 +000057 0017 0030 MOVE R0, R12 ; use R12 as temp for R0 +000058 0018 0E2C MOVE R14, R11 ; save current bank page +000059 0019 9FB8 00FF AND 0x00FF, R14 ; back to bank 0 +000060 001B 1C00 ADD R12, R0 ; accumulate check sum in R0 +000061 001C 0B38 MOVE R11, R14 ; restore current bank page +000062 +000063 001D 0130 MOVE R1, R12 ; use R12 as temp for R1 +000064 001E 0E2C MOVE R14, R11 ; save current bank page +000065 001F 9FB8 00FF AND 0x00FF, R14 ; back to bank 0 +000066 0021 1C04 ADD R12, R1 ; accumulate check sum in R1 +000067 0022 0B38 MOVE R11, R14 ; restore current bank page +000068 +000069 0023 3920 SUB R9, R8 ; decrease loop counter +000070 0024 FFAB FFF0 RBRA CHECK_LOOP, !Z ; loop 255 downto 1 (0 exits) +000071 000072 -000073 0029 3920 SUB R9, R8 ; decrease loop counter -000074 002A FFAB FFF0 RBRA CHECK_LOOP, !Z ; loop 255 downto 1 (0 exits) -000075 +000073 ; output results to TIL +000074 0026 9FB8 00FF AND 0x00FF, R14 ; switch back to reg bank 0 +000075 0028 0FB0 FF10 MOVE IO$TIL_BASE, R12 ; TIL MMIO display address 000076 -000077 ; output results to TIL -000078 002C 9FB8 00FF DISPLAY_LOOP AND 0x00FF, R14 ; switch back to reg bank 0 -000079 002E 0FB0 FF10 MOVE IO$TIL_BASE, R12 ; TIL MMIO display address -000080 -000081 ; display register R0 and the difference to the expected value -000082 0030 0020 MOVE R0, R8 ; register = R8 -000083 0031 0FA4 8080 MOVE CHECK_R0, R9 ; expected value = R9 -000084 0033 FFB0 0008 RSUB DISPLAY_REG, 1 ; call sub routine -000085 -000086 ; dito R1 -000087 0035 0120 MOVE R1, R8 -000088 0036 0FA4 1700 MOVE CHECK_R1, R9 -000089 0038 FFB0 0003 RSUB DISPLAY_REG, 1 +000077 ; display register R0 and the difference to the expected value +000078 002A 0020 DISPLAY_LOOP MOVE R0, R8 ; register = R8 +000079 002B 0FA4 8080 MOVE CHECK_R0, R9 ; expected value = R9 +000080 002D FFB0 0008 RSUB DISPLAY_REG, 1 ; call sub routine +000081 +000082 ; dito R1 +000083 002F 0120 MOVE R1, R8 +000084 0030 0FA4 1700 MOVE CHECK_R1, R9 +000085 0032 FFB0 0003 RSUB DISPLAY_REG, 1 +000086 +000087 0034 FFA0 FFF4 RBRA DISPLAY_LOOP, 1 +000088 +000089 0036 E000 HALT 000090 -000091 003A FFA0 FFF0 RBRA DISPLAY_LOOP, 1 -000092 -000093 003C E000 HALT -000094 -000095 ; sub routine to display the register value and the expected value -000096 ; the sub routine uses another sub routine so this is also a nice first -000097 ; test of stacked sub routine calls -000098 ; input: R8 = register, R9 = expected value, R12 = TIL BASE -000099 003D 1FB8 0100 DISPLAY_REG ADD NEXT_BANK, R14 ; next register bank -000100 003F 0831 MOVE R8, @R12 ; display value on TIL -000101 0040 FFB0 000A RSUB DELAY, 1 ; wait 1 second -000102 0042 0F80 8000 MOVE VAR_DIFF, R0 ; memory location of variable -000103 0044 0801 MOVE R8, @R0 ; store register value in var -000104 0045 3901 SUB R9, @R0 ; subtract expected value -000105 0046 0071 MOVE @R0, @R12 ; display difference reg vs. expct -000106 0047 FFB0 0003 RSUB DELAY, 1 ; wait 1 second -000107 0049 3FB8 0100 SUB NEXT_BANK, R14 ; previous register bank -000108 004B 0DBC MOVE @R13++, R15 ; return from sub routine -000109 -000110 ; sub routine to wait for about 1sec -000111 004C 1FB8 0100 DELAY ADD NEXT_BANK, R14 ; next register bank -000112 004E 0F84 07D0 MOVE WAIT_CYCLES2, R1 ; outer wait cycles (2.000) -000113 0050 0F80 1388 WAIT_LOOP2 MOVE WAIT_CYCLES1, R0 ; inner wait cycles (5.000) -000114 0052 3F80 0001 WAIT_LOOP1 SUB 1, R0 ; dec inner wait cycles and ... -000115 0054 FFAB FFFC RBRA WAIT_LOOP1, !Z ; ... repeat if not zero -000116 0056 3F84 0001 SUB 1, R1 ; dec outer wait cycles and ... -000117 0058 FFAB FFF6 RBRA WAIT_LOOP2, !Z ; ... repeat if not zero -000118 005A 3FB8 0100 SUB NEXT_BANK, R14 ; previous register bank -000119 005C 0DBC MOVE @R13++, R15 ; return from sub routine -000120 +000091 ; sub routine to display the register value and the expected value +000092 ; the sub routine uses another sub routine so this is also a nice first +000093 ; test of stacked sub routine calls +000094 ; input: R8 = register, R9 = expected value, R12 = TIL BASE +000095 0037 1FB8 0100 DISPLAY_REG ADD NEXT_BANK, R14 ; next register bank +000096 0039 0831 MOVE R8, @R12 ; display value on TIL +000097 003A FFB0 000A RSUB DELAY, 1 ; wait 1 second +000098 003C 0F80 8000 MOVE VAR_DIFF, R0 ; memory location of variable +000099 003E 0801 MOVE R8, @R0 ; store register value in var +000100 003F 3901 SUB R9, @R0 ; subtract expected value +000101 0040 0071 MOVE @R0, @R12 ; display difference reg vs. expct +000102 0041 FFB0 0003 RSUB DELAY, 1 ; wait 1 second +000103 0043 3FB8 0100 SUB NEXT_BANK, R14 ; previous register bank +000104 0045 0DBC MOVE @R13++, R15 ; return from sub routine +000105 +000106 ; sub routine to wait for about 1sec +000107 0046 1FB8 0100 DELAY ADD NEXT_BANK, R14 ; next register bank +000108 0048 0F84 07D0 MOVE WAIT_CYCLES2, R1 ; outer wait cycles (2.000) +000109 004A 0F80 1388 WAIT_LOOP2 MOVE WAIT_CYCLES1, R0 ; inner wait cycles (5.000) +000110 004C 3F80 0001 WAIT_LOOP1 SUB 1, R0 ; dec inner wait cycles and ... +000111 004E FFAB FFFC RBRA WAIT_LOOP1, !Z ; ... repeat if not zero +000112 0050 3F84 0001 SUB 1, R1 ; dec outer wait cycles and ... +000113 0052 FFAB FFF6 RBRA WAIT_LOOP2, !Z ; ... repeat if not zero +000114 0054 3FB8 0100 SUB NEXT_BANK, R14 ; previous register bank +000115 0056 0DBC MOVE @R13++, R15 ; return from sub routine +000116 EQU-list: @@ -128,6 +124,6 @@ STACK_TOP : 0x8020 VAR_DIFF : 0x8000 Label-list: -------------------------------------------------------------------------------------------------------- -BANK_LOOP : 0x0012 CHECK_LOOP : 0x001C DISPLAY_LOOP : 0x002C -DISPLAY_REG : 0x003D DELAY : 0x004C WAIT_LOOP2 : 0x0050 -WAIT_LOOP1 : 0x0052 +BANK_LOOP : 0x000C CHECK_LOOP : 0x0016 DISPLAY_LOOP : 0x002A +DISPLAY_REG : 0x0037 DELAY : 0x0046 WAIT_LOOP2 : 0x004A +WAIT_LOOP1 : 0x004C diff --git a/test_programs/regbank.out b/test_programs/regbank.out index 28a24e77..55c1c735 100644 --- a/test_programs/regbank.out +++ b/test_programs/regbank.out @@ -1,93 +1,87 @@ 0x0000 0x0FB4 0x0001 0x8020 -0x0002 0x0F80 -0x0003 0x1111 -0x0004 0x0F84 -0x0005 0x2222 -0x0006 0xFFA0 -0x0007 0x0024 -0x0008 0xAFB8 -0x0009 0xFF00 -0x000A 0x0FA0 -0x000B 0x0100 -0x000C 0x0FA4 -0x000D 0x0001 -0x000E 0x0FA8 -0x000F 0x0100 -0x0010 0x0FAC -0x0011 0x0017 -0x0012 0x0800 -0x0013 0x0B04 -0x0014 0x3A38 -0x0015 0x3920 -0x0016 0xFFAB -0x0017 0xFFFA -0x0018 0x0FA0 -0x0019 0x00FF -0x001A 0x9FB8 -0x001B 0x00FF -0x001C 0x1A38 -0x001D 0x0030 +0x0002 0xAFB8 +0x0003 0xFF00 +0x0004 0x0FA0 +0x0005 0x0100 +0x0006 0x0FA4 +0x0007 0x0001 +0x0008 0x0FA8 +0x0009 0x0100 +0x000A 0x0FAC +0x000B 0x0017 +0x000C 0x0800 +0x000D 0x0B04 +0x000E 0x3A38 +0x000F 0x3920 +0x0010 0xFFAB +0x0011 0xFFFA +0x0012 0x0FA0 +0x0013 0x00FF +0x0014 0x9FB8 +0x0015 0x00FF +0x0016 0x1A38 +0x0017 0x0030 +0x0018 0x0E2C +0x0019 0x9FB8 +0x001A 0x00FF +0x001B 0x1C00 +0x001C 0x0B38 +0x001D 0x0130 0x001E 0x0E2C 0x001F 0x9FB8 0x0020 0x00FF -0x0021 0x1C00 +0x0021 0x1C04 0x0022 0x0B38 -0x0023 0x0130 -0x0024 0x0E2C -0x0025 0x9FB8 -0x0026 0x00FF -0x0027 0x1C04 -0x0028 0x0B38 -0x0029 0x3920 -0x002A 0xFFAB -0x002B 0xFFF0 -0x002C 0x9FB8 -0x002D 0x00FF -0x002E 0x0FB0 -0x002F 0xFF10 -0x0030 0x0020 -0x0031 0x0FA4 -0x0032 0x8080 -0x0033 0xFFB0 -0x0034 0x0008 -0x0035 0x0120 -0x0036 0x0FA4 -0x0037 0x1700 -0x0038 0xFFB0 -0x0039 0x0003 -0x003A 0xFFA0 -0x003B 0xFFF0 -0x003C 0xE000 -0x003D 0x1FB8 -0x003E 0x0100 -0x003F 0x0831 -0x0040 0xFFB0 -0x0041 0x000A -0x0042 0x0F80 -0x0043 0x8000 -0x0044 0x0801 -0x0045 0x3901 -0x0046 0x0071 -0x0047 0xFFB0 -0x0048 0x0003 -0x0049 0x3FB8 -0x004A 0x0100 -0x004B 0x0DBC -0x004C 0x1FB8 -0x004D 0x0100 -0x004E 0x0F84 -0x004F 0x07D0 -0x0050 0x0F80 -0x0051 0x1388 -0x0052 0x3F80 -0x0053 0x0001 -0x0054 0xFFAB -0x0055 0xFFFC -0x0056 0x3F84 -0x0057 0x0001 -0x0058 0xFFAB -0x0059 0xFFF6 -0x005A 0x3FB8 -0x005B 0x0100 -0x005C 0x0DBC +0x0023 0x3920 +0x0024 0xFFAB +0x0025 0xFFF0 +0x0026 0x9FB8 +0x0027 0x00FF +0x0028 0x0FB0 +0x0029 0xFF10 +0x002A 0x0020 +0x002B 0x0FA4 +0x002C 0x8080 +0x002D 0xFFB0 +0x002E 0x0008 +0x002F 0x0120 +0x0030 0x0FA4 +0x0031 0x1700 +0x0032 0xFFB0 +0x0033 0x0003 +0x0034 0xFFA0 +0x0035 0xFFF4 +0x0036 0xE000 +0x0037 0x1FB8 +0x0038 0x0100 +0x0039 0x0831 +0x003A 0xFFB0 +0x003B 0x000A +0x003C 0x0F80 +0x003D 0x8000 +0x003E 0x0801 +0x003F 0x3901 +0x0040 0x0071 +0x0041 0xFFB0 +0x0042 0x0003 +0x0043 0x3FB8 +0x0044 0x0100 +0x0045 0x0DBC +0x0046 0x1FB8 +0x0047 0x0100 +0x0048 0x0F84 +0x0049 0x07D0 +0x004A 0x0F80 +0x004B 0x1388 +0x004C 0x3F80 +0x004D 0x0001 +0x004E 0xFFAB +0x004F 0xFFFC +0x0050 0x3F84 +0x0051 0x0001 +0x0052 0xFFAB +0x0053 0xFFF6 +0x0054 0x3FB8 +0x0055 0x0100 +0x0056 0x0DBC diff --git a/test_programs/regbank.rom b/test_programs/regbank.rom index 81c2fc99..dab1ae03 100644 --- a/test_programs/regbank.rom +++ b/test_programs/regbank.rom @@ -1,11 +1,5 @@ 0000111110110100 1000000000100000 -0000111110000000 -0001000100010001 -0000111110000100 -0010001000100010 -1111111110100000 -0000000000100100 1010111110111000 1111111100000000 0000111110100000 @@ -57,7 +51,7 @@ 1111111110110000 0000000000000011 1111111110100000 -1111111111110000 +1111111111110100 1110000000000000 0001111110111000 0000000100000000 diff --git a/vhdl/block_ram.vhd b/vhdl/block_ram.vhd index 0eff7234..e7dd6033 100644 --- a/vhdl/block_ram.vhd +++ b/vhdl/block_ram.vhd @@ -1,8 +1,10 @@ -- Block RAM (synchronous) --- read and write on rising clock edge --- the RAM is initialized to zero on system start --- can be directly connected to a bus, as it goes high impedance on low chip enable and on writing --- can directly control the CPU's WAIT_FOR_DATA line +-- * read and write on falling clock edge; falling edge is chosen, because QNICE CPU's FSM +-- is generating control signals on rising edges; so there is enough time for the signals to settle +-- and therefore we do not need to waste a cycle +-- * the RAM is initialized to zero on system start +-- * can be directly connected to a bus, as it goes high impedance on low chip enable and on writing +-- * can directly control the CPU's (or any bus arbiter's) WAIT_FOR_DATA line -- inspired by http://vhdlguru.blogspot.de/2011/01/block-and-distributed-rams-on-xilinx.html -- done by sy2002 in August 2015 @@ -35,9 +37,10 @@ signal bram : bram_t := (others => x"baba"); signal output : std_logic_vector(15 downto 0); -signal counter : std_logic_vector(1 downto 0) := "00"; - +signal counter : std_logic := '1'; -- important to be initialized to one signal address_old : std_logic_vector(15 downto 0) := (others => 'U'); +signal we_old : std_logic := '0'; +signal async_reset : std_logic; begin @@ -56,6 +59,7 @@ begin end if; address_old <= address; + we_old <= we; end if; end process; @@ -71,25 +75,40 @@ begin -- generate a busy signal for one clock cycle, because this is -- the read delay that this block RAM is having - -- output high impedance when ce = 0 so that the busy line can be - -- part of a bus - manage_busy : process (clk, ce, we, counter, address, address_old) + manage_busy : process (clk, async_reset) begin - if rising_edge(ce) and we = '0' and counter = "00" then - counter <= "01"; - elsif falling_edge(clk) and counter = "01" then - counter <= "10"; - elsif rising_edge(clk) and counter = "10" then - counter <= "00"; - elsif address_old /= address and we = '0' and counter = "00" then - counter <= "01"; + if rising_edge(clk) then + if ce = '1' then + counter <= not counter; + else + counter <= '1'; -- reverse logic because busy needs to be "immediatelly" one when needed + end if; + end if; + + if async_reset = '1' then + counter <= '1'; end if; - + end process; + + -- address changes or changes between reading and writing are + -- re-triggering the busy-cycle as this means a new operation for the BRAM + manage_busy_on_changes : process (ce, we, we_old, address, address_old) + begin if ce = '1' then - busy <= counter(0) or counter(1); + if we /= we_old then + async_reset <= '1'; + elsif address /= address_old then + async_reset <= '1'; + else + async_reset <= '0'; + end if; else - busy <= 'Z'; - end if; + async_reset <= '0'; + end if; end process; + + with ce select + busy <= counter when '1', + 'Z' when others; end beh; diff --git a/vhdl/env1_globals.vhd b/vhdl/env1_globals.vhd index 43b7d572..31c97c1d 100644 --- a/vhdl/env1_globals.vhd +++ b/vhdl/env1_globals.vhd @@ -10,16 +10,16 @@ use IEEE.STD_LOGIC_1164.all; package env1_globals is -- file name and file size (in lines) of the file that is converted to the ROM located at 0x0000 -constant ROM_FILE : string := "../test_programs/bram.rom"; -constant ROM_SIZE : integer := 10; +constant ROM_FILE : string := "../test_programs/regbank.rom"; +constant ROM_SIZE : integer := 87; -- size of lower register bank: should be 256 -- set to 16 during development for faster synthesis, routing, etc. -constant SHADOW_REGFILE_SIZE : integer := 16; +constant SHADOW_REGFILE_SIZE : integer := 256; -- size of the block RAM in 16bit words: should be 32768 -- set to 256 during development for tracability during simulation -constant BLOCK_RAM_SIZE : integer := 256; +constant BLOCK_RAM_SIZE : integer := 32768; end env1_globals; diff --git a/vhdl/mmio_mux.vhd b/vhdl/mmio_mux.vhd index 26df7405..ed25e984 100644 --- a/vhdl/mmio_mux.vhd +++ b/vhdl/mmio_mux.vhd @@ -85,8 +85,8 @@ begin -- RAM is enabled when the address is in ($8000..$FEFF) and -- when a write attempt only occurs while the data is valid ram_enable_i <= addr(15) - and not (addr(14) and addr(13) and addr(12) and addr(11) and addr(10) and addr(9) and addr(8)) - and not (data_dir and not data_valid); + and not (addr(14) and addr(13) and addr(12) and addr(11) and addr(10) and addr(9) and addr(8)); + -- and not (data_dir and not data_valid); ram_enable <= ram_enable_i; end Behavioral; diff --git a/vhdl/qnice_cpu.vhd b/vhdl/qnice_cpu.vhd index a80e6a0e..b4ecd206 100644 --- a/vhdl/qnice_cpu.vhd +++ b/vhdl/qnice_cpu.vhd @@ -110,21 +110,13 @@ type tCPU_States is (cs_reset, cs_decode, cs_exeprep_get_src_indirect, - cs_exeprep_get_src_indirect2, - cs_exeprep_get_src_indirect3, cs_exeprep_get_dst_indirect, - cs_exeprep_get_dst_indirect2, - cs_exeprep_get_dst_indirect3, cs_execute, cs_exepost_store_dst_indirect, - cs_exepost_store_dst_indirect2, - cs_exepost_store_dst_indirect3, cs_exepost_sub, - cs_exepost_sub2, - cs_exepost_sub3, cs_exepost_prepfetch, cs_halt, @@ -388,7 +380,9 @@ begin end if; -- in case of a branch, Dst_Mode would contain garbage, therefore perform an explicit check - elsif Opcode /= opcBRA and Dst_Mode /= amDirect then + -- optimization: in case of MOVE the destination value is ignored anyway, so we can skip + -- the whole indirect parameter fetch in this case + elsif Opcode /= opcBRA and Dst_Mode /= amDirect and Opcode /= opcMOVE then fsmNextCpuState <= cs_exeprep_get_dst_indirect; -- pre decrement for destination register @@ -415,16 +409,13 @@ begin fsmNextCpuState <= cs_halt; end if; - when cs_exeprep_get_src_indirect => - fsmSrc_Value <= DATA_FROM_Bus; - - when cs_exeprep_get_src_indirect2 => + when cs_exeprep_get_src_indirect => -- add wait cycles, if necessary (e.g. due to slow RAM) - --if WAIT_FOR_DATA = '1' then - --fsmNextCpuState <= cs_exeprep_get_src_indirect; + if WAIT_FOR_DATA = '1' then + fsmNextCpuState <= cs_exeprep_get_src_indirect; -- data from bus is available - --else + else -- read the indirect value from the bus and store it fsmSrc_Value <= DATA_FROM_Bus; @@ -444,7 +435,9 @@ begin end if; -- decode the destination addressing mode (and avoid garbage due to a branch opcode) - if Opcode /= opcBRA and Dst_Mode /= amDirect then + -- optimization: in case of MOVE the destination value is ignored anyway, so we can skip + -- the whole indirect parameter fetch in this case + if Opcode /= opcBRA and Dst_Mode /= amDirect and Opcode /= opcMOVE then -- this code is nearly identical to the above-mentioned code -- within "elsif Dst_Mode /= amDirect then" fsmNextCpuState <= cs_exeprep_get_dst_indirect; @@ -463,20 +456,18 @@ begin fsmCpuAddr <= reg_read_data2; end if; end if; - --end if; + end if; - when cs_exeprep_get_dst_indirect | cs_exeprep_get_dst_indirect2 | cs_exeprep_get_dst_indirect3 => + when cs_exeprep_get_dst_indirect => -- add wait cycles, if necessary (e.g. due to slow RAM) - -- optimization for MOVE: the dst value is discarded in a "move to indirect" - -- scenario, so we can spare one CPU cycle in this case by ignoring WAIT_FOR_DATA - --if WAIT_FOR_DATA = '1' and Opcode /= opcMOVE then - --fsmNextCpuState <= cs_exeprep_get_dst_indirect; + if WAIT_FOR_DATA = '1' then + fsmNextCpuState <= cs_exeprep_get_dst_indirect; -- data from bus is available - --else + else -- read the indirect value from the bus and store it fsmDst_Value <= DATA_FROM_Bus; - --end if; + end if; when cs_execute => @@ -573,26 +564,27 @@ begin fsmCpuDataDirCtrl <= '1'; fsmCpuDataValid <= '1'; - -- perform post increment - if Dst_Mode = amIndirPostInc then - -- special handling of SP, SR and PC as they are not stored in the register file - case Dst_RegNo is - when x"D" => fsmSP <= SP + 1; - when x"E" => fsmSR <= SR + 1; - when x"F" => fsmPC <= PC + 1; - when others => - fsm_reg_write_addr <= Dst_RegNo; - fsm_reg_write_data <= reg_read_data2 + 1; - fsm_reg_write_en <= '1'; - end case; - end if; + -- add wait cycles if necessary + if WAIT_FOR_DATA = '1' then + fsmNextCpuState <= cs_exepost_store_dst_indirect; - when cs_exepost_store_dst_indirect2 | cs_exepost_store_dst_indirect3 => - fsmDataToBus <= DATA_To_Bus; - fsmCpuDataDirCtrl <= '1'; - fsmCpuDataValid <= '1'; + else + -- perform post increment + if Dst_Mode = amIndirPostInc then + -- special handling of SP, SR and PC as they are not stored in the register file + case Dst_RegNo is + when x"D" => fsmSP <= SP + 1; + when x"E" => fsmSR <= SR + 1; + when x"F" => fsmPC <= PC + 1; + when others => + fsm_reg_write_addr <= Dst_RegNo; + fsm_reg_write_data <= reg_read_data2 + 1; + fsm_reg_write_en <= '1'; + end case; + end if; + end if; - when cs_exepost_sub | cs_exepost_sub2 | cs_exepost_sub3 => + when cs_exepost_sub => fsmDataToBus <= DATA_To_Bus; fsmCpuDataDirCtrl <= '1'; fsmCpuDataValid <= '1'; @@ -623,19 +615,11 @@ begin when cs_reset => cpu_state_next <= cs_fetch; when cs_fetch => cpu_state_next <= cs_decode; when cs_decode => cpu_state_next <= cs_execute; - when cs_exeprep_get_src_indirect => cpu_state_next <= cs_exeprep_get_src_indirect2; - --when cs_exeprep_get_src_indirect2 => cpu_state_next <= cs_exeprep_get_src_indirect3; - when cs_exeprep_get_src_indirect2 => cpu_state_next <= cs_execute; - when cs_exeprep_get_dst_indirect => cpu_state_next <= cs_exeprep_get_dst_indirect2; - --when cs_exeprep_get_dst_indirect2 => cpu_state_next <= cs_exeprep_get_dst_indirect3; - when cs_exeprep_get_dst_indirect2 => cpu_state_next <= cs_execute; + when cs_exeprep_get_src_indirect => cpu_state_next <= cs_execute; + when cs_exeprep_get_dst_indirect => cpu_state_next <= cs_execute; when cs_execute => cpu_state_next <= cs_fetch; - when cs_exepost_store_dst_indirect => cpu_state_next <= cs_exepost_store_dst_indirect2; - --when cs_exepost_store_dst_indirect2 => cpu_state_next <= cs_exepost_store_dst_indirect3; - when cs_exepost_store_dst_indirect2 => cpu_state_next <= cs_exepost_prepfetch; - when cs_exepost_sub => cpu_state_next <= cs_exepost_sub2; - --when cs_exepost_sub2 => cpu_state_next <= cs_exepost_sub3; - when cs_exepost_sub2 => cpu_state_next <= cs_exepost_prepfetch; + when cs_exepost_store_dst_indirect => cpu_state_next <= cs_exepost_prepfetch; + when cs_exepost_sub => cpu_state_next <= cs_exepost_prepfetch; when cs_exepost_prepfetch => cpu_state_next <= cs_fetch; when cs_halt => cpu_state_next <= cs_halt; when others => cpu_state_next <= cpu_state;