Skip to content

Commit

Permalink
gpu_neon: rework enh. res. texturing hack
Browse files Browse the repository at this point in the history
  • Loading branch information
notaz committed Aug 22, 2024
1 parent 8489010 commit ee060c5
Show file tree
Hide file tree
Showing 8 changed files with 191 additions and 79 deletions.
2 changes: 0 additions & 2 deletions plugins/gpu_neon/psx_gpu/common.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,5 @@
#include "vector_types.h"
#include "psx_gpu.h"

#define unlikely(x) __builtin_expect((x), 0)

#endif

63 changes: 60 additions & 3 deletions plugins/gpu_neon/psx_gpu/psx_gpu.c
Original file line number Diff line number Diff line change
Expand Up @@ -560,8 +560,9 @@ void flush_render_block_buffer(psx_gpu_struct *psx_gpu)
y##set##_b.e[1] = vertex->b \


void compute_all_gradients(psx_gpu_struct *psx_gpu, vertex_struct *a,
vertex_struct *b, vertex_struct *c)
void compute_all_gradients(psx_gpu_struct * __restrict__ psx_gpu,
const vertex_struct * __restrict__ a, const vertex_struct * __restrict__ b,
const vertex_struct * __restrict__ c)
{
u32 triangle_area = psx_gpu->triangle_area;
u32 winding_mask_scalar;
Expand Down Expand Up @@ -1163,6 +1164,8 @@ static void setup_spans_debug_check(psx_gpu_struct *psx_gpu,
setup_spans_set_x4(alternate, down, alternate_active); \
height -= 4; \
} while(height > 0); \
if (psx_gpu->hacks_active & (AHACK_TEXTURE_ADJ_U | AHACK_TEXTURE_ADJ_V)) \
span_uvrg_offset[height - 1].low = span_uvrg_offset[height - 2].low; \
} \


Expand Down Expand Up @@ -1216,6 +1219,8 @@ static void setup_spans_debug_check(psx_gpu_struct *psx_gpu,
setup_spans_set_x4(alternate, up, alternate_active); \
height -= 4; \
} \
if (psx_gpu->hacks_active & AHACK_TEXTURE_ADJ_V) \
psx_gpu->span_uvrg_offset[0].low = psx_gpu->span_uvrg_offset[1].low; \
} \

#define index_left 0
Expand Down Expand Up @@ -1452,13 +1457,53 @@ void setup_spans_up_down(psx_gpu_struct *psx_gpu, vertex_struct *v_a,
setup_spans_set_x4(none, down, no);
height_minor_b -= 4;
}
if (psx_gpu->hacks_active & (AHACK_TEXTURE_ADJ_U | AHACK_TEXTURE_ADJ_V))
{
span_uvrg_offset[height_minor_b - 1].low =
span_uvrg_offset[height_minor_b - 2].low;
}
}

left_split_triangles++;
}

#endif

// this is some hacky mess, can this be improved somehow?
// ideally change things to not have to do this hack at all
void __attribute__((noinline))
setup_blocks_uv_adj_hack(psx_gpu_struct *psx_gpu, block_struct *block,
edge_data_struct *span_edge_data, vec_4x32u *span_uvrg_offset)
{
size_t span_i = span_uvrg_offset - psx_gpu->span_uvrg_offset;
if (span_i != 0 && span_i != psx_gpu->num_spans - 1
&& !(psx_gpu->hacks_active & AHACK_TEXTURE_ADJ_U))
return;
u32 num_blocks = span_edge_data->num_blocks - 1;
s32 offset = __builtin_ctz(span_edge_data->right_mask | 0x100) - 1;
s32 toffset = 8 * num_blocks + offset - 1;
if (toffset < 0 && !(psx_gpu->hacks_active & AHACK_TEXTURE_ADJ_U))
return;

toffset += span_edge_data->left_x;
s32 u_dx = psx_gpu->uvrg_dx.low.e[0];
s32 v_dx = psx_gpu->uvrg_dx.low.e[1];
u32 u = span_uvrg_offset->low.e[0];
u32 v = span_uvrg_offset->low.e[1];
u += u_dx * toffset;
v += v_dx * toffset;
u = (u >> 16) & psx_gpu->texture_mask_width;
v = (v >> 16) & psx_gpu->texture_mask_height;
if (!(psx_gpu->render_state_base & (TEXTURE_MODE_16BPP << 8))) {
// 4bpp 8bpp are swizzled
u32 u_ = u;
u = (u & 0x0f) | ((v & 0x0f) << 4);
v = (v & 0xf0) | (u_ >> 4);
}
assert(offset >= 0);
//assert(block->uv.e[offset] == ((v << 8) | u));
block->uv.e[offset] = (v << 8) | u;
}

#define dither_table_entry_normal(value) \
(value) \
Expand Down Expand Up @@ -1868,6 +1913,14 @@ void setup_spans_up_down(psx_gpu_struct *psx_gpu, vertex_struct *v_a,

#define setup_blocks_store_draw_mask_untextured_direct(_block, bits) \

#define setup_blocks_uv_adj_hack_untextured(_block, edge_data, uvrg_offset) \

#define setup_blocks_uv_adj_hack_textured(_block, edge_data, uvrg_offset) \
{ \
u32 m_ = AHACK_TEXTURE_ADJ_U | AHACK_TEXTURE_ADJ_V; \
if (unlikely(psx_gpu->hacks_active & m_)) \
setup_blocks_uv_adj_hack(psx_gpu, _block, edge_data, uvrg_offset); \
} \

#define setup_blocks_add_blocks_indirect() \
num_blocks += span_num_blocks; \
Expand Down Expand Up @@ -1938,6 +1991,8 @@ void setup_blocks_##shading##_##texturing##_##dithering##_##sw##_##target( \
setup_blocks_store_##shading##_##texturing(sw, dithering, target, edge); \
setup_blocks_store_draw_mask_##texturing##_##target(block, \
span_edge_data->right_mask); \
setup_blocks_uv_adj_hack_##texturing(block, span_edge_data, \
span_uvrg_offset); \
\
block++; \
} \
Expand Down Expand Up @@ -5016,8 +5071,10 @@ void initialize_psx_gpu(psx_gpu_struct *psx_gpu, u16 *vram)
psx_gpu->primitive_type = PRIMITIVE_TYPE_UNKNOWN;

psx_gpu->saved_hres = 256;
psx_gpu->hacks_active = 0;

// check some offset
// check some offsets, asm relies on these
psx_gpu->reserved_a[(offsetof(psx_gpu_struct, test_mask) == 0) - 1] = 0;
psx_gpu->reserved_a[(offsetof(psx_gpu_struct, blocks) == psx_gpu_blocks_offset) - 1] = 0;
}

Expand Down
13 changes: 12 additions & 1 deletion plugins/gpu_neon/psx_gpu/psx_gpu.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,17 @@

#define SPAN_DATA_BLOCKS_SIZE 32

#define AHACK_TEXTURE_ADJ_U (1 << 0)
#define AHACK_TEXTURE_ADJ_V (1 << 1)

#ifndef __ASSEMBLER__

#include "vector_types.h"

#ifndef unlikely
#define unlikely(x) __builtin_expect((x), 0)
#endif

typedef enum
{
PRIMITIVE_TYPE_TRIANGLE = 0,
Expand Down Expand Up @@ -189,6 +196,7 @@ typedef struct
// enhancement stuff
u16 *enhancement_buf_ptr; // main alloc
u16 *enhancement_current_buf_ptr; // offset into above, 4 bufs
u32 hacks_active; // AHACK_TEXTURE_ADJ_U ...
u32 saved_hres;
s16 saved_viewport_start_x;
s16 saved_viewport_start_y;
Expand All @@ -205,7 +213,7 @@ typedef struct

// Align up to 64 byte boundary to keep the upcoming buffers cache line
// aligned, also make reachable with single immediate addition
u8 reserved_a[184 + 9*4 - 9*sizeof(void *)];
u8 reserved_a[184 + 8*4 - 9*sizeof(void *)];

// 8KB
block_struct blocks[MAX_BLOCKS_PER_ROW];
Expand Down Expand Up @@ -256,6 +264,9 @@ u32 texture_region_mask(s32 x1, s32 y1, s32 x2, s32 y2);
void update_texture_8bpp_cache(psx_gpu_struct *psx_gpu);
void flush_render_block_buffer(psx_gpu_struct *psx_gpu);

void setup_blocks_uv_adj_hack(psx_gpu_struct *psx_gpu, block_struct *block,
edge_data_struct *span_edge_data, vec_4x32u *span_uvrg_offset);

void initialize_psx_gpu(psx_gpu_struct *psx_gpu, u16 *vram);
u32 gpu_parse(psx_gpu_struct *psx_gpu, u32 *list, u32 size,
s32 *cpu_cycles_sum_out, s32 *cpu_cycles_last, u32 *last_command);
Expand Down
59 changes: 56 additions & 3 deletions plugins/gpu_neon/psx_gpu/psx_gpu_arm_neon.S
Original file line number Diff line number Diff line change
Expand Up @@ -223,6 +223,7 @@
#ifdef __MACH__
#define flush_render_block_buffer _flush_render_block_buffer
#define update_texture_8bpp_cache _update_texture_8bpp_cache
#define setup_blocks_uv_adj_hack _setup_blocks_uv_adj_hack
#endif

@ r0: psx_gpu
Expand Down Expand Up @@ -543,6 +544,7 @@ function(compute_all_gradients)

#define uvrg q14
#define uvrg_dy q15
#define uv d28

#define alternate_x_16 d4

Expand Down Expand Up @@ -925,6 +927,14 @@ function(compute_all_gradients)
subs height, height, #4; \
bhi 2b; \
\
nop; \
ldr temp, [psx_gpu, #psx_gpu_hacks_active_offset]; \
tst temp, #(AHACK_TEXTURE_ADJ_U | AHACK_TEXTURE_ADJ_V); \
beq 1f; \
add temp, span_uvrg_offset, height, lsl #4; \
vldr uv, [temp, #(-16*2)]; \
vstr uv, [temp, #(-16)]; \
\
1: \


Expand Down Expand Up @@ -986,6 +996,14 @@ function(compute_all_gradients)
subs height, height, #4; \
bhi 2b; \
\
nop; \
ldr temp, [psx_gpu, #psx_gpu_hacks_active_offset]; \
tst temp, #AHACK_TEXTURE_ADJ_V; \
beq 1f; \
add temp, psx_gpu, #psx_gpu_span_uvrg_offset_offset; \
vldr uv, [temp, #16]; \
vstr uv, [temp, #0]; \
\
1: \


Expand Down Expand Up @@ -1216,6 +1234,14 @@ function(setup_spans_up_down)
subs height_minor_b, height_minor_b, #4
bhi 2b

nop
ldr temp, [psx_gpu, #psx_gpu_hacks_active_offset]
tst temp, #(AHACK_TEXTURE_ADJ_U | AHACK_TEXTURE_ADJ_V)
beq 1f
add temp, span_uvrg_offset, height, lsl #4
vldr uv, [temp, #(-16*2)]
vstr uv, [temp, #(-16)]

1:
setup_spans_epilogue()

Expand Down Expand Up @@ -1256,6 +1282,7 @@ function(setup_spans_up_down)

#define uvrg_dx_ptr r2
#define texture_mask_ptr r3
#define hacks_active r6
#define dither_shift r8
#define dither_row r10

Expand All @@ -1273,6 +1300,7 @@ function(setup_spans_up_down)
#define color_b r5

#undef uvrg
#undef uv

#define u_block q0
#define v_block q1
Expand Down Expand Up @@ -1350,6 +1378,26 @@ function(setup_spans_up_down)

#define setup_blocks_texture_unswizzled() \

#define setup_blocks_uv_adj_hack_textured(hacks_active) \
tst hacks_active, #(AHACK_TEXTURE_ADJ_U | AHACK_TEXTURE_ADJ_V); \
beq 91f; \
/* see flush_render_block_buffer below for a reg saving note */ \
vpush { texture_mask }; \
vpush { uvrg_dx4 }; \
\
stmdb sp!, { r0 - r3, EXTRA_UNSAVED_REGS r12, r14 }; \
mov r12, span_uvrg_offset; \
sub r1, block_ptr_a, #64; \
mov r2, span_edge_data; \
mov r3, r12; \
bl setup_blocks_uv_adj_hack; /* psx_gpu=r0 */ \
ldmia sp!, { r0 - r3, EXTRA_UNSAVED_REGS r12, r14 }; \
\
vpop { uvrg_dx4 }; \
vpop { texture_mask }; \
vadd.u32 uvrg_dx8, uvrg_dx4, uvrg_dx4; \
91: \


#define setup_blocks_shaded_textured_builder(swizzling) \
.align 3; \
Expand Down Expand Up @@ -1575,6 +1623,7 @@ function(setup_blocks_shaded_textured_dithered_##swizzling##_indirect) \
vld1.u32 { test_mask }, [psx_gpu, :128]; \
vdup.u8 draw_mask, right_mask; \
\
ldr hacks_active, [psx_gpu, #psx_gpu_hacks_active_offset]; \
vmov.u32 fb_mask_ptrs[0], right_mask; \
vtst.u16 draw_mask, draw_mask, test_mask; \
vzip.u8 u_whole_8, v_whole_8; \
Expand All @@ -1585,6 +1634,8 @@ function(setup_blocks_shaded_textured_dithered_##swizzling##_indirect) \
vst1.u32 { dither_offsets }, [block_ptr_b, :128], c_32; \
vst1.u32 { b_whole_8, fb_mask_ptrs }, [block_ptr_a, :128], c_32; \
\
setup_blocks_uv_adj_hack_textured(hacks_active); \
\
1: \
add span_uvrg_offset, span_uvrg_offset, #16; \
add span_b_offset, span_b_offset, #4; \
Expand All @@ -1599,7 +1650,8 @@ function(setup_blocks_shaded_textured_dithered_##swizzling##_indirect) \
ldmia sp!, { r4 - r11, pc }; \
\
2: \
/* TODO: Load from psx_gpu instead of saving/restoring these */\
/* this callee-save reg saving may look unnecessary but it actually is */ \
/* because the callee violates the ABI */ \
vpush { texture_mask }; \
vpush { uvrg_dx4 }; \
\
Expand Down Expand Up @@ -1776,6 +1828,7 @@ function(setup_blocks_unshaded_textured_dithered_##swizzling##_indirect) \
vld1.u32 { test_mask }, [psx_gpu, :128]; \
vdup.u8 draw_mask, right_mask; \
\
ldr hacks_active, [psx_gpu, #psx_gpu_hacks_active_offset]; \
vmov.u32 fb_mask_ptrs[0], right_mask; \
vtst.u16 draw_mask, draw_mask, test_mask; \
vzip.u8 u_whole_8, v_whole_8; \
Expand All @@ -1786,6 +1839,8 @@ function(setup_blocks_unshaded_textured_dithered_##swizzling##_indirect) \
vst1.u32 { dither_offsets }, [block_ptr_b, :128], c_32; \
vst1.u32 { b_whole_8, fb_mask_ptrs }, [block_ptr_a, :128], c_32; \
\
setup_blocks_uv_adj_hack_textured(hacks_active); \
\
1: \
add span_uvrg_offset, span_uvrg_offset, #16; \
add span_edge_data, span_edge_data, #8; \
Expand All @@ -1798,7 +1853,6 @@ function(setup_blocks_unshaded_textured_dithered_##swizzling##_indirect) \
ldmia sp!, { r4 - r11, pc }; \
\
2: \
/* TODO: Load from psx_gpu instead of saving/restoring these */\
vpush { texture_mask }; \
vpush { uvrg_dx4 }; \
\
Expand Down Expand Up @@ -2334,7 +2388,6 @@ function(setup_blocks_shaded_untextured_##dithering##_unswizzled_indirect) \
ldmia sp!, { r4 - r11, pc }; \
\
2: \
/* TODO: Load from psx_gpu instead of saving/restoring these */\
vpush { rg_dx4 }; \
\
stmdb sp!, { r0 - r3, EXTRA_UNSAVED_REGS r12, r14 }; \
Expand Down
1 change: 1 addition & 0 deletions plugins/gpu_neon/psx_gpu/psx_gpu_offsets.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
#define psx_gpu_texture_mask_width_offset 0xfa
#define psx_gpu_texture_mask_height_offset 0xfb
#define psx_gpu_reciprocal_table_ptr_offset 0x108
#define psx_gpu_hacks_active_offset 0x114
#define psx_gpu_blocks_offset 0x200
#define psx_gpu_span_uvrg_offset_offset 0x2200
#define psx_gpu_span_edge_data_offset 0x4200
Expand Down
1 change: 1 addition & 0 deletions plugins/gpu_neon/psx_gpu/psx_gpu_offsets_update.c
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,7 @@ int main()
//WRITE_OFFSET(f, clut_settings);
//WRITE_OFFSET(f, texture_settings);
WRITE_OFFSET(f, reciprocal_table_ptr);
WRITE_OFFSET(f, hacks_active);
WRITE_OFFSET(f, blocks);
WRITE_OFFSET(f, span_uvrg_offset);
WRITE_OFFSET(f, span_edge_data);
Expand Down
Loading

0 comments on commit ee060c5

Please sign in to comment.