Skip to content

Commit

Permalink
loongarch: sljit_emit_atomic_load/store implementation
Browse files Browse the repository at this point in the history
assumes all atomics are at least 32bit aligned through setting
SLJIT_ATOMIC_MIN_WIDTH.

only the bits that correspond to the operand size are taking
into consideration with the rest pressumed to be 0.

operations with lower alignment are undefined.
  • Loading branch information
Xiao-Tao authored and carenas committed Jun 10, 2023
1 parent 818706a commit 073f7de
Show file tree
Hide file tree
Showing 4 changed files with 110 additions and 26 deletions.
1 change: 1 addition & 0 deletions sljit_src/sljitConfigInternal.h
Original file line number Diff line number Diff line change
Expand Up @@ -786,6 +786,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_sw sljit_exec_offset(void* ptr);
#define SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS 12
#define SLJIT_MASKED_SHIFT 1
#define SLJIT_MASKED_SHIFT32 1
#define SLJIT_ATOMIC_MIN_WIDTH 32

#elif (defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED)

Expand Down
5 changes: 4 additions & 1 deletion sljit_src/sljitLir.h
Original file line number Diff line number Diff line change
Expand Up @@ -1804,7 +1804,10 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fmem_update(struct sljit_compiler
- the memory operation (op) and the base address (stored in mem_reg)
passed to the load/store operations must be the same (the mem_reg
can be a different register, only its value must be the same)
- an store must always follow a load for the same transaction.
- an store must always follow a load for the same transaction, but
loads might be abandoned
- if the CPU defines a minimum bit width supported then the memory
address must be aligned to it (SLJIT_ATOMIC_MIN_WIDTH)
op must be between SLJIT_MOV and SLJIT_MOV_P, excluding all
signed loads such as SLJIT_MOV32_S16
Expand Down
62 changes: 50 additions & 12 deletions sljit_src/sljitNativeLOONGARCH_64.c
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = {
/*
LoongArch instructions are 32 bits wide, belonging to 9 basic instruction formats (and variants of them):
| Format name | Composition |
| Format name | Composition |
| 2R | Opcode + Rj + Rd |
| 3R | Opcode + Rk + Rj + Rd |
| 4R | Opcode + Ra + Rk + Rj + Rd |
Expand Down Expand Up @@ -2449,8 +2449,10 @@ static sljit_ins get_jump_instruction(sljit_s32 type)
{
switch (type) {
case SLJIT_EQUAL:
case SLJIT_ATOMIC_NOT_STORED:
return BNE | RJ(EQUAL_FLAG) | RD(TMP_ZERO);
case SLJIT_NOT_EQUAL:
case SLJIT_ATOMIC_STORED:
return BEQ | RJ(EQUAL_FLAG) | RD(TMP_ZERO);
case SLJIT_LESS:
case SLJIT_GREATER:
Expand Down Expand Up @@ -2734,6 +2736,12 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *co
FAIL_IF(push_inst(compiler, SLTUI | RD(dst_r) | RJ(EQUAL_FLAG) | IMM_I12(1)));
src_r = dst_r;
break;
case SLJIT_ATOMIC_STORED:
case SLJIT_ATOMIC_NOT_STORED:
FAIL_IF(push_inst(compiler, SLTUI | RD(dst_r) | RJ(EQUAL_FLAG) | IMM_I12(1)));
src_r = dst_r;
invert ^= 0x1;
break;
case SLJIT_OVERFLOW:
case SLJIT_NOT_OVERFLOW:
if (compiler->status_flags_state & (SLJIT_CURRENT_FLAGS_ADD | SLJIT_CURRENT_FLAGS_SUB)) {
Expand Down Expand Up @@ -2933,15 +2941,28 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_atomic_load(struct sljit_compiler
sljit_s32 dst_reg,
sljit_s32 mem_reg)
{
SLJIT_UNUSED_ARG(compiler);
SLJIT_UNUSED_ARG(op);
SLJIT_UNUSED_ARG(dst_reg);
SLJIT_UNUSED_ARG(mem_reg);
sljit_ins ins = LL_W;

CHECK_ERROR();
CHECK(check_sljit_emit_atomic_load(compiler, op, dst_reg, mem_reg));

return SLJIT_ERR_UNSUPPORTED;
op = GET_OPCODE(op);
switch (op) {
case SLJIT_MOV_P:
case SLJIT_MOV:
ins = LL_D;
break;
}

FAIL_IF(push_inst(compiler, ins | RD(dst_reg) | RJ(mem_reg)));

switch (op) {
case SLJIT_MOV_U8:
return push_inst(compiler, ANDI | RD(dst_reg) | RJ(dst_reg) | IMM_I12(0xff));
case SLJIT_MOV_U16:
return push_inst(compiler, BSTRPICK_W | RD(dst_reg) | RJ(dst_reg) | (15 << 16));
}
return SLJIT_SUCCESS;
}

SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_atomic_store(struct sljit_compiler *compiler,
Expand All @@ -2950,16 +2971,33 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_atomic_store(struct sljit_compiler
sljit_s32 mem_reg,
sljit_s32 temp_reg)
{
SLJIT_UNUSED_ARG(compiler);
SLJIT_UNUSED_ARG(op);
SLJIT_UNUSED_ARG(src_reg);
SLJIT_UNUSED_ARG(mem_reg);
SLJIT_UNUSED_ARG(temp_reg);
sljit_ins ins = SC_W;
sljit_ins chk = INST(ADD, op) | RD(EQUAL_FLAG) | RJ(TMP_REG2) | RK(TMP_ZERO);

CHECK_ERROR();
CHECK(check_sljit_emit_atomic_store(compiler, op, src_reg, mem_reg, temp_reg));

return SLJIT_ERR_UNSUPPORTED;
switch (GET_OPCODE(op)) {
case SLJIT_MOV_P:
case SLJIT_MOV:
ins = SC_D;
FAIL_IF(push_inst(compiler, ORI | RD(TMP_REG2) | RJ(src_reg) | RK(TMP_ZERO)));
break;
case SLJIT_MOV_U32:
case SLJIT_MOV32:
FAIL_IF(push_inst(compiler, BSTRINS_D | RD(TMP_REG2) | RJ(src_reg) | (31 << 16)));
break;
case SLJIT_MOV_U16:
FAIL_IF(push_inst(compiler, BSTRINS_W | RD(TMP_REG2) | RJ(src_reg) | (15 << 16)));
break;
case SLJIT_MOV_U8:
FAIL_IF(push_inst(compiler, ANDI | RD(TMP_REG2) | RJ(src_reg) | IMM_I12(0xff)));
break;
}

FAIL_IF(push_inst(compiler, ins | RD(TMP_REG2) | RJ(mem_reg)));

return chk ? push_inst(compiler, chk) : SLJIT_SUCCESS;
}

static SLJIT_INLINE sljit_s32 emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw init_value, sljit_ins last_ins)
Expand Down
68 changes: 55 additions & 13 deletions test_src/sljitTest.c
Original file line number Diff line number Diff line change
Expand Up @@ -11537,22 +11537,30 @@ static void test92(void)
{
#if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) \
|| (defined SLJIT_CONFIG_ARM && SLJIT_CONFIG_ARM) \
|| (defined SLJIT_CONFIG_S390X && SLJIT_CONFIG_S390X)
|| (defined SLJIT_CONFIG_S390X && SLJIT_CONFIG_S390X) \
|| (defined SLJIT_CONFIG_LOONGARCH && SLJIT_CONFIG_LOONGARCH)
/* Test atomic load and store. */
executable_code code;
struct sljit_compiler *compiler = sljit_create_compiler(NULL, NULL);
struct sljit_label *label;
struct sljit_jump *jump;
sljit_sw buf[38];
sljit_sw buf[40];
sljit_s32 i;
#if (defined SLJIT_ATOMIC_MIN_WIDTH && SLJIT_ATOMIC_MIN_WIDTH > 0)
#define PADBYTE 0x00
sljit_sw padding = WCONST(0x5555555500000000, 0);
#else
#define PADBYTE 0x55
sljit_sw padding = WCONST(0x5555555555555555, 0x55555555);
#endif /* SLJIT_ATOMIC_MIN_WIDTH > 0 */

if (verbose)
printf("Run test92\n");

FAILED(!compiler, "cannot create compiler\n");

for (i = 0; i < 36; i++)
buf[i] = WCONST(0x5555555555555555, 0x55555555);
buf[i] = padding;

buf[0] = 4678;
*(sljit_u8*)(buf + 2) = 78;
Expand All @@ -11569,6 +11577,7 @@ static void test92(void)
((sljit_s32*)(buf + 33))[1] = -1;
#endif /* SLJIT_64BIT_ARCHITECTURE */
buf[37] = WCONST(0x1122334444332211, 0x11222211);
buf[38] = SLJIT_FUNC_ADDR(test92);

sljit_emit_enter(compiler, 0, SLJIT_ARGS1(VOID, P), 5, 5, 0, 0, 2 * sizeof(sljit_sw));

Expand Down Expand Up @@ -11674,6 +11683,19 @@ static void test92(void)
/* buf[19] */
sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 19 * sizeof(sljit_sw), SLJIT_R0, 0);

/* case: SLJIT_MOV_P */
sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_R2, 0, SLJIT_S0, 0, SLJIT_IMM, 38 * sizeof(sljit_sw));
label = sljit_emit_label(compiler);
sljit_emit_atomic_load(compiler, SLJIT_MOV_P, SLJIT_R0, SLJIT_R2);
sljit_emit_op1(compiler, SLJIT_MOV_P, SLJIT_S1, 0, SLJIT_R0, 0);
sljit_emit_op1(compiler, SLJIT_MOV_P, SLJIT_R1, 0, SLJIT_R0, 0);
sljit_emit_op1(compiler, SLJIT_MOV_P, SLJIT_R0, 0, SLJIT_R2, 0);
/* buf[38] */
sljit_emit_atomic_store(compiler, SLJIT_MOV_P | SLJIT_SET_ATOMIC_STORED, SLJIT_R0, SLJIT_R2, SLJIT_R1);
sljit_set_label(sljit_emit_jump(compiler, SLJIT_ATOMIC_NOT_STORED), label);
/* buf[39] */
sljit_emit_op1(compiler, SLJIT_MOV_P, SLJIT_MEM1(SLJIT_S0), 39 * sizeof(sljit_sw), SLJIT_S1, 0);

sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_R1, 0, SLJIT_S0, 0, SLJIT_IMM, 20 * sizeof(sljit_sw));
label = sljit_emit_label(compiler);
sljit_emit_atomic_load(compiler, SLJIT_MOV_U8, SLJIT_R3, SLJIT_R1);
Expand Down Expand Up @@ -11704,6 +11726,8 @@ static void test92(void)
/* buf[25] */
sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 25 * sizeof(sljit_sw), SLJIT_R0, 0);

#if (!defined SLJIT_ATOMIC_MIN_WIDTH || SLJIT_ATOMIC_MIN_WIDTH <= 8)
/* case: byte aligned lower offset */
sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_R1, 0, SLJIT_S0, 0, SLJIT_IMM, 26 * sizeof(sljit_sw) + 1);
label = sljit_emit_label(compiler);
sljit_emit_atomic_load(compiler, SLJIT_MOV_U8, SLJIT_R0, SLJIT_R1);
Expand All @@ -11717,6 +11741,7 @@ static void test92(void)
/* buf[27] */
sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 27 * sizeof(sljit_sw), SLJIT_S2, 0);

/* case: byte aligned higher offset */
sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_R1, 0, SLJIT_S0, 0, SLJIT_IMM, 28 * sizeof(sljit_sw) + 2);
label = sljit_emit_label(compiler);
sljit_emit_atomic_load(compiler, SLJIT_MOV_U8, SLJIT_R0, SLJIT_R1);
Expand All @@ -11730,7 +11755,10 @@ static void test92(void)
sljit_emit_op_flags(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_ATOMIC_NOT_STORED);
/* buf[30] */
sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 30 * sizeof(sljit_sw), SLJIT_R0, 0);
#endif /* SLJIT_ATOMIC_MIN_WIDTH <= 8 */

#if (!defined SLJIT_ATOMIC_MIN_WIDTH || SLJIT_ATOMIC_MIN_WIDTH <= 16)
/* case: half alighed offset lower offset */
sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_R1, 0, SLJIT_S0, 0, SLJIT_IMM, 31 * sizeof(sljit_sw) + 2);
label = sljit_emit_label(compiler);
sljit_emit_atomic_load(compiler, SLJIT_MOV_U16, SLJIT_R0, SLJIT_R1);
Expand All @@ -11741,6 +11769,7 @@ static void test92(void)
sljit_set_label(sljit_emit_jump(compiler, SLJIT_ATOMIC_NOT_STORED), label);
/* buf[32] */
sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 32 * sizeof(sljit_sw), SLJIT_S1, 0);
#endif /* SLJIT_ATOMIC_MIN_WIDTH <= 16 */

#if (defined SLJIT_64BIT_ARCHITECTURE && SLJIT_64BIT_ARCHITECTURE)
sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_R1, 0, SLJIT_S0, 0, SLJIT_IMM, 33 * sizeof(sljit_sw) + sizeof(sljit_u32));
Expand All @@ -11759,6 +11788,7 @@ static void test92(void)
/* buf[35] */
sljit_emit_op_flags(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 35 * sizeof(sljit_sw), SLJIT_ATOMIC_STORED);

#if (!defined SLJIT_ATOMIC_MIN_WIDTH || SLJIT_ATOMIC_MIN_WIDTH <= 8)
/* case50: abandoned atomic load is safe */
sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, 0);
sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_R1, 0, SLJIT_S0, 0, SLJIT_IMM, 37 * sizeof(sljit_sw));
Expand All @@ -11770,6 +11800,7 @@ static void test92(void)
sljit_emit_atomic_store(compiler, SLJIT_MOV_U8, SLJIT_R2, SLJIT_R1, SLJIT_R0);
/* buf[36] */
sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 36 * sizeof(sljit_sw), SLJIT_R2, 0);
#endif /* SLJIT_ATOMIC_MIN_WIDTH <= 8 */

sljit_emit_return_void(compiler);

Expand All @@ -11782,20 +11813,20 @@ static void test92(void)
FAILED(buf[0] != -9856, "test92 case 1 failed\n");
FAILED(buf[1] != 4678, "test92 case 2 failed\n");
FAILED(*(sljit_u8*)(buf + 2) != 203, "test92 case 3 failed\n");
FAILED(((sljit_u8*)(buf + 2))[1] != 0x55, "test92 case 4 failed\n");
FAILED(((sljit_u8*)(buf + 2))[1] != PADBYTE, "test92 case 4 failed\n");
FAILED(buf[3] != 78, "test92 case 5 failed\n");
FAILED(buf[4] != 203, "test92 case 6 failed\n");
FAILED(*(sljit_u8*)(buf + 5) != 97, "test92 case 7 failed\n");
FAILED(((sljit_u8*)(buf + 5))[1] != 0x55, "test92 case 8 failed\n");
FAILED(((sljit_u8*)(buf + 5))[1] != PADBYTE, "test92 case 8 failed\n");
FAILED(*(sljit_u32*)(buf + 6) != 211, "test92 case 9 failed\n");
FAILED(buf[7] != (sljit_sw)(buf + 5), "test92 case 10 failed\n");
FAILED(buf[8] != 97, "test92 case 11 failed\n");
FAILED(*(sljit_u16*)(buf + 9) != (sljit_u16)(sljit_sw)(buf + 9), "test92 case 12 failed\n");
FAILED(((sljit_u8*)(buf + 9))[2] != 0x55, "test92 case 13 failed\n");
FAILED(((sljit_u8*)(buf + 9))[2] != PADBYTE, "test92 case 13 failed\n");
FAILED(buf[10] != 17897, "test92 case 14 failed\n");
FAILED(buf[11] != (sljit_sw)(buf + 9), "test92 case 15 failed\n");
FAILED(*(sljit_u16*)(buf + 12) != 41306, "test92 case 16 failed\n");
FAILED(((sljit_u8*)(buf + 12))[2] != 0x55, "test92 case 17 failed\n");
FAILED(((sljit_u8*)(buf + 12))[2] != PADBYTE, "test92 case 17 failed\n");
FAILED(*(sljit_u32*)(buf + 13) != 57812, "test92 case 18 failed\n");
FAILED(buf[14] != 41306, "test92 case 19 failed\n");
FAILED(*(sljit_u32*)(buf + 15) != 987654321, "test92 case 20 failed\n");
Expand All @@ -11813,34 +11844,45 @@ static void test92(void)
#endif /* SLJIT_64BIT_ARCHITECTURE */
FAILED(buf[19] != -573621, "test92 case 26 failed\n");
FAILED(*(sljit_u8*)(buf + 20) != 240, "test92 case 27 failed\n");
FAILED(((sljit_u8*)(buf + 20))[1] != 0x55, "test92 case 28 failed\n");
FAILED(((sljit_u8*)(buf + 20))[1] != PADBYTE, "test92 case 28 failed\n");
FAILED(buf[21] != 192, "test92 case 29 failed\n");
FAILED(buf[22] != -5893, "test92 case 30 failed\n");
FAILED(buf[23] != 4059, "test92 case 31 failed\n");
FAILED(buf[24] != 6359, "test92 case 32 failed\n");
FAILED(buf[25] != (sljit_sw)(buf + 23), "test92 case 33 failed\n");
FAILED(((sljit_u8*)(buf + 26))[0] != 0x55, "test92 case 34 failed\n");
#if (!defined SLJIT_ATOMIC_MIN_WIDTH || SLJIT_ATOMIC_MIN_WIDTH <= 8)
FAILED(((sljit_u8*)(buf + 26))[0] != PADBYTE, "test92 case 34 failed\n");
FAILED(((sljit_u8*)(buf + 26))[1] != 204, "test92 case 35 failed\n");
FAILED(((sljit_u8*)(buf + 26))[2] != 0x55, "test92 case 36 failed\n");
FAILED(((sljit_u8*)(buf + 26))[2] != PADBYTE, "test92 case 36 failed\n");
FAILED(buf[27] != 105, "test92 case 37 failed\n");
FAILED(((sljit_u8*)(buf + 28))[1] != 0x55, "test92 case 38 failed\n");
FAILED(((sljit_u8*)(buf + 28))[1] != PADBYTE, "test92 case 38 failed\n");
FAILED(((sljit_u8*)(buf + 28))[2] != 240, "test92 case 39 failed\n");
FAILED(((sljit_u8*)(buf + 28))[3] != 0x55, "test92 case 40 failed\n");
FAILED(((sljit_u8*)(buf + 28))[3] != PADBYTE, "test92 case 40 failed\n");
FAILED(buf[29] != 13, "test92 case 41 failed\n");
FAILED(buf[30] != 0, "test92 case 42 failed\n");
#endif /* SLJIT_ATOMIC_MIN_WIDTH <= 8 */
#if (!defined SLJIT_ATOMIC_MIN_WIDTH || SLJIT_ATOMIC_MIN_WIDTH <= 16)
FAILED(((sljit_u16*)(buf + 31))[0] != 0x5555, "test92 case 43 failed\n");
FAILED(((sljit_u16*)(buf + 31))[1] != 51403, "test92 case 44 failed\n");
FAILED(buf[32] != 14876, "test92 case 45 failed\n");
#endif /* SLJIT_ATOMIC_MIN_WIDTH <= 16 */
#if (defined SLJIT_64BIT_ARCHITECTURE && SLJIT_64BIT_ARCHITECTURE)
#if PADBYTE != 0
FAILED(((sljit_u32*)(buf + 33))[0] != 0x55555555, "test92 case 46 failed\n");
#endif
FAILED(((sljit_u32*)(buf + 33))[1] != 0xdeadbeef, "test92 case 47 failed\n");
FAILED(buf[34] != 0xffffffff, "test92 case 48 failed\n");
FAILED(((sljit_u32*)(buf + 34))[0] != 0xffffffff, "test92 case 48 failed\n");
#endif /* SLJIT_64BIT_ARCHITECTURE */
FAILED(buf[35] != 1, "test92 case 49 failed\n");
#if (!defined SLJIT_ATOMIC_MIN_WIDTH || SLJIT_ATOMIC_MIN_WIDTH <= 8)
FAILED(buf[36] != 0x11, "test92 case 50 (load) failed\n");
FAILED(((sljit_u8*)(buf + 37))[1] != buf[36], "test92 case 50 (store) failed\n");
#endif /* SLJIT_ATOMIC_MIN_WIDTH <= 8 */
FAILED(buf[38] != (sljit_sw)&buf[38], "test92 case 51 (store) failed \n");
FAILED(buf[39] != SLJIT_FUNC_ADDR(test92), "test92 case 51 (load) failed \n");

sljit_free_code(code.code, NULL);
#undef PADBYTE
#endif
successful_tests++;
}
Expand Down

0 comments on commit 073f7de

Please sign in to comment.