Skip to content

Commit

Permalink
[GC] Fix & optimize arrayEquals on AArch64 (dragonwell-project#743)
Browse files Browse the repository at this point in the history
Summary: fix tail comparison and simplify flow control for arrayEquals

Testing: jdk/javadoc/tool/parser/7091528/T7091528.java

Reviewers: mmyxym, linade

Issue: dragonwell-project#743
  • Loading branch information
weixlu committed Apr 19, 2024
1 parent 6c9ea3e commit 6732396
Showing 1 changed file with 101 additions and 0 deletions.
101 changes: 101 additions & 0 deletions src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5532,6 +5532,107 @@ address MacroAssembler::arrays_equals(Register a1, Register a2, Register tmp3,
cbnzw(tmp5, DONE);
}
}
} else if (UseCompactObjectHeaders) {
Label NEXT_64BYTE, NEXT_16BYTE, TAIL, TAIL03, TAIL01;
mov(result, false);
// if (a1 == null || a2 == null)
// return false;
cbz(a1, DONE);
ldrw(cnt1, Address(a1, length_offset));
cbz(a2, DONE);
// if (a1.length != a2.length)
// return false;
ldrw(cnt2, Address(a2, length_offset));
cmpw(cnt2, cnt1);
br(NE, DONE);
// if (a1.length == 0)
// return true;
cbz(cnt1, SAME);
lea(a1, Address(a1, base_offset));
lea(a2, Address(a2, base_offset));

cmpw(cnt1, elem_per_word);
if (UseSIMDForArrayEquals) {
br(GE, NEXT_64BYTE);
} else {
br(GE, NEXT_16BYTE);
}

tbz(cnt1, 2 - log_elem_size, TAIL03); // 0-7 bytes left.
{
ldrw(tmp1, Address(post(a1, 4)));
ldrw(tmp2, Address(post(a2, 4)));
eorw(tmp5, tmp1, tmp2);
cbnzw(tmp5, DONE);
}
bind(TAIL03);
tbz(cnt1, 1 - log_elem_size, TAIL01); // 0-3 bytes left.
{
ldrh(tmp3, Address(post(a1, 2)));
ldrh(tmp4, Address(post(a2, 2)));
eorw(tmp5, tmp3, tmp4);
cbnzw(tmp5, DONE);
}
bind(TAIL01);
if (elem_size == 1) { // Only needed when comparing byte arrays.
tbz(cnt1, 0, SAME); // 0-1 bytes left.
{
ldrb(tmp1, a1);
ldrb(tmp2, a2);
eorw(tmp5, tmp1, tmp2);
cbnzw(tmp5, DONE);
}
}
b(SAME);

// 64 byte comparison loop(vector)
bind(NEXT_64BYTE);
cmpw(cnt1, elem_per_word * 8);
br(LT, NEXT_16BYTE);
ld1(v0, v1, v2, v3, T16B, Address(post(a1, wordSize * 8)));
ld1(v4, v5, v6, v7, T16B, Address(post(a2, wordSize * 8)));
sub(cnt1, cnt1, 8 * elem_per_word);
eor(v0, T16B, v0, v4);
eor(v1, T16B, v1, v5);
eor(v2, T16B, v2, v6);
eor(v3, T16B, v3, v7);
orr(v0, T16B, v0, v1);
orr(v0, T16B, v0, v2);
orr(v0, T16B, v0, v3);
umov(tmp1, v0, D, 0);
cbnz(tmp1, DONE);
umov(tmp1, v0, D, 1);
cbnz(tmp1, DONE);
b(NEXT_64BYTE);

// 16 byte comparison loop
bind(NEXT_16BYTE);
cmpw(cnt1, elem_per_word);
br(LT, TAIL);
ldr(tmp1, Address(post(a1, wordSize)));
ldr(tmp3, Address(post(a2, wordSize)));
sub(cnt1, cnt1, elem_per_word);
eor(tmp1, tmp1, tmp3);
cbnz(tmp1, DONE);
cmpw(cnt1, elem_per_word);
br(LT, TAIL);
ldr(tmp2, Address(post(a1, wordSize)));
ldr(tmp4, Address(post(a2, wordSize)));
sub(cnt1, cnt1, elem_per_word);
eor(tmp2, tmp2, tmp4);
cbnz(tmp2, DONE);
b(NEXT_16BYTE);

bind(TAIL);
cbz(cnt1, SAME);
sub(cnt1, cnt1, elem_per_word);
if (log_elem_size > 0)
lsl(cnt1, cnt1, log_elem_size);
ldr(tmp1, Address(a1, cnt1));
ldr(tmp2, Address(a2, cnt1));
eor(tmp5, tmp1, tmp2);
cbnz(tmp5, DONE);
b(SAME);
} else {
Label NEXT_DWORD, SHORT, TAIL, TAIL2, STUB,
CSET_EQ, LAST_CHECK;
Expand Down

0 comments on commit 6732396

Please sign in to comment.