Skip to content

Commit

Permalink
8319690: [AArch64] C2 compilation hits offset_ok_for_immed: assert "c…
Browse files Browse the repository at this point in the history
…2 compiler bug"

On LP64 systems, if the heap can be moved into low virtual
address space (below 4GB) and the heap size is smaller than the
interesting threshold of 4 GB, we can use unscaled decoding
pattern for narrow klass decoding. It means that a generic field
reference can be decoded by:
```
cast<64> (32-bit compressed reference) + field_offset
```

When the `field_offset` is an immediate, on aarch64 platform, the
unscaled decoding pattern can match perfectly with a direct
addressing mode, i.e., `base_plus_offset`, supported by LDR/STR
instructions. But for certain data width, not all immediates can
be encoded in the instruction field of LDR/STR[1]. The ranges are
different as data widths vary.

For example, when we try to load a value of long type at offset of
`1030`, the address expression is `(AddP (DecodeN base) 1030)`.
Before the patch, the expression was matching with
`operand indOffIN()`. But, for 64-bit LDR/STR, signed immediate
byte offset must be in the range -256 to 255 or positive immediate
byte offset must be a multiple of 8 in the range 0 to 32760[2].
`1030` can't be encoded in the instruction field. So, after
matching, when we do checking for instruction encoding, the
assertion would fail.

In this patch, we're going to filter out invalid immediates
when deciding if current addressing mode can be matched as
`base_plus_offset`. We introduce `indOffIN4/indOffLN4` and
`indOffIN8/indOffLN8` for 32-bit data type and 64-bit data
type separately in the patch. E.g., for `memory4`, we remove
the generic `indOffIN/indOffLN`, which matches wrong unscaled
immediate range, and replace them with `indOffIN4/indOffLN4`
instead.

Since 8-bit and 16-bit LDR/STR instructions also support the
unscaled decoding pattern, we add the addressing mode in the
lists of `memory1` and `memory2` by introducing
`indOffIN1/indOffLN1` and `indOffIN2/indOffLN2`.

We also remove unused operands `indOffI/indOffl/indOffIN/indOffLN`
to avoid misuse.

Tier 1-3 passed on aarch64.

[1] https://github.com/openjdk/jdk/blob/8db7bad992a0f31de9c7e00c2657c18670539102/src/hotspot/cpu/aarch64/assembler_aarch64.inline.hpp#L33
[2] https://developer.arm.com/documentation/ddi0602/2023-09/Base-Instructions/LDR--immediate---Load-Register--immediate--?lang=en
  • Loading branch information
Faye Gao authored and Fei Gao committed Dec 6, 2023
1 parent 1cf7ef5 commit 1895cf3
Show file tree
Hide file tree
Showing 2 changed files with 269 additions and 35 deletions.
132 changes: 97 additions & 35 deletions src/hotspot/cpu/aarch64/aarch64.ad
Original file line number Diff line number Diff line change
Expand Up @@ -5666,20 +5666,6 @@ operand indIndex(iRegP reg, iRegL lreg)
%}
%}

operand indOffI(iRegP reg, immIOffset off)
%{
constraint(ALLOC_IN_RC(ptr_reg));
match(AddP reg off);
op_cost(0);
format %{ "[$reg, $off]" %}
interface(MEMORY_INTER) %{
base($reg);
index(0xffffffff);
scale(0x0);
disp($off);
%}
%}

operand indOffI1(iRegP reg, immIOffset1 off)
%{
constraint(ALLOC_IN_RC(ptr_reg));
Expand Down Expand Up @@ -5750,20 +5736,6 @@ operand indOffI16(iRegP reg, immIOffset16 off)
%}
%}

operand indOffL(iRegP reg, immLoffset off)
%{
constraint(ALLOC_IN_RC(ptr_reg));
match(AddP reg off);
op_cost(0);
format %{ "[$reg, $off]" %}
interface(MEMORY_INTER) %{
base($reg);
index(0xffffffff);
scale(0x0);
disp($off);
%}
%}

operand indOffL1(iRegP reg, immLoffset1 off)
%{
constraint(ALLOC_IN_RC(ptr_reg));
Expand Down Expand Up @@ -5909,7 +5881,7 @@ operand indIndexN(iRegN reg, iRegL lreg)
%}
%}

operand indOffIN(iRegN reg, immIOffset off)
operand indOffIN1(iRegN reg, immIOffset1 off)
%{
predicate(CompressedOops::shift() == 0);
constraint(ALLOC_IN_RC(ptr_reg));
Expand All @@ -5924,7 +5896,7 @@ operand indOffIN(iRegN reg, immIOffset off)
%}
%}

operand indOffLN(iRegN reg, immLoffset off)
operand indOffLN1(iRegN reg, immLoffset1 off)
%{
predicate(CompressedOops::shift() == 0);
constraint(ALLOC_IN_RC(ptr_reg));
Expand All @@ -5939,6 +5911,95 @@ operand indOffLN(iRegN reg, immLoffset off)
%}
%}

operand indOffIN2(iRegN reg, immIOffset2 off)
%{
predicate(CompressedOops::shift() == 0);
constraint(ALLOC_IN_RC(ptr_reg));
match(AddP (DecodeN reg) off);
op_cost(0);
format %{ "[$reg, $off]\t# narrow" %}
interface(MEMORY_INTER) %{
base($reg);
index(0xffffffff);
scale(0x0);
disp($off);
%}
%}

operand indOffLN2(iRegN reg, immLoffset2 off)
%{
predicate(CompressedOops::shift() == 0);
constraint(ALLOC_IN_RC(ptr_reg));
match(AddP (DecodeN reg) off);
op_cost(0);
format %{ "[$reg, $off]\t# narrow" %}
interface(MEMORY_INTER) %{
base($reg);
index(0xffffffff);
scale(0x0);
disp($off);
%}
%}

operand indOffIN4(iRegN reg, immIOffset4 off)
%{
predicate(CompressedOops::shift() == 0);
constraint(ALLOC_IN_RC(ptr_reg));
match(AddP (DecodeN reg) off);
op_cost(0);
format %{ "[$reg, $off]\t# narrow" %}
interface(MEMORY_INTER) %{
base($reg);
index(0xffffffff);
scale(0x0);
disp($off);
%}
%}

operand indOffLN4(iRegN reg, immLoffset4 off)
%{
predicate(CompressedOops::shift() == 0);
constraint(ALLOC_IN_RC(ptr_reg));
match(AddP (DecodeN reg) off);
op_cost(0);
format %{ "[$reg, $off]\t# narrow" %}
interface(MEMORY_INTER) %{
base($reg);
index(0xffffffff);
scale(0x0);
disp($off);
%}
%}

operand indOffIN8(iRegN reg, immIOffset8 off)
%{
predicate(CompressedOops::shift() == 0);
constraint(ALLOC_IN_RC(ptr_reg));
match(AddP (DecodeN reg) off);
op_cost(0);
format %{ "[$reg, $off]\t# narrow" %}
interface(MEMORY_INTER) %{
base($reg);
index(0xffffffff);
scale(0x0);
disp($off);
%}
%}

operand indOffLN8(iRegN reg, immLoffset8 off)
%{
predicate(CompressedOops::shift() == 0);
constraint(ALLOC_IN_RC(ptr_reg));
match(AddP (DecodeN reg) off);
op_cost(0);
format %{ "[$reg, $off]\t# narrow" %}
interface(MEMORY_INTER) %{
base($reg);
index(0xffffffff);
scale(0x0);
disp($off);
%}
%}


// AArch64 opto stubs need to write to the pc slot in the thread anchor
Expand Down Expand Up @@ -6189,21 +6250,22 @@ opclass vmem16(indirect, indIndex, indOffI16, indOffL16);
// instruction defs. we can turn a memory op into an Address

opclass memory1(indirect, indIndexScaled, indIndexScaledI2L, indIndexI2L, indIndex, indOffI1, indOffL1,
indirectN, indIndexScaledN, indIndexScaledI2LN, indIndexI2LN, indIndexN);
indirectN, indIndexScaledN, indIndexScaledI2LN, indIndexI2LN, indIndexN, indOffIN1, indOffLN1);

opclass memory2(indirect, indIndexScaled, indIndexScaledI2L, indIndexI2L, indIndex, indOffI2, indOffL2,
indirectN, indIndexScaledN, indIndexScaledI2LN, indIndexI2LN, indIndexN);
indirectN, indIndexScaledN, indIndexScaledI2LN, indIndexI2LN, indIndexN, indOffIN2, indOffLN2);

opclass memory4(indirect, indIndexScaled, indIndexScaledI2L, indIndexI2L, indIndex, indOffI4, indOffL4,
indirectN, indIndexScaledN, indIndexScaledI2LN, indIndexI2LN, indIndexN, indOffIN, indOffLN);
indirectN, indIndexScaledN, indIndexScaledI2LN, indIndexI2LN, indIndexN, indOffIN4, indOffLN4);

opclass memory8(indirect, indIndexScaled, indIndexScaledI2L, indIndexI2L, indIndex, indOffI8, indOffL8,
indirectN, indIndexScaledN, indIndexScaledI2LN, indIndexI2LN, indIndexN, indOffIN, indOffLN);
indirectN, indIndexScaledN, indIndexScaledI2LN, indIndexI2LN, indIndexN, indOffIN8, indOffLN8);

// All of the memory operands. For the pipeline description.
opclass memory(indirect, indIndexScaled, indIndexScaledI2L, indIndexI2L, indIndex,
indOffI1, indOffL1, indOffI2, indOffL2, indOffI4, indOffL4, indOffI8, indOffL8,
indirectN, indIndexScaledN, indIndexScaledI2LN, indIndexI2LN, indIndexN, indOffIN, indOffLN);
indirectN, indIndexScaledN, indIndexScaledI2LN, indIndexI2LN, indIndexN,
indOffIN1, indOffLN1, indOffIN2, indOffLN2, indOffIN4, indOffLN4, indOffIN8, indOffLN8);


// iRegIorL2I is used for src inputs in rules for 32 bit int (I)
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,172 @@
/*
* Copyright (c) 2023, Arm Limited. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/

package compiler.c2.aarch64;

import jdk.internal.misc.Unsafe;
import jdk.test.lib.Asserts;

/**
* @test TestUnalignedAccessCompressedOops
* @summary AArch64: C2 compilation hits offset_ok_for_immed: assert "c2 compiler bug".
* @bug 8319690
* @library /test/lib
* @modules java.base/jdk.internal.misc
* @requires os.arch=="aarch64" & vm.compiler2.enabled
* @run main/othervm compiler.c2.aarch64.TestUnalignedAccessCompressedOops
* @run main/othervm -Xcomp -XX:-TieredCompilation -Xmx1g
* -XX:CompileCommand=compileonly,compiler.c2.aarch64.TestUnalignedAccessCompressedOops*::<clinit>
* compiler.c2.aarch64.TestUnalignedAccessCompressedOops
*/

public class TestUnalignedAccessCompressedOops {

public static final int LEN = 2040;

static final Unsafe UNSAFE = Unsafe.getUnsafe();
static void sink(int x) {}

public static long lseed = 1;
public static int iseed = 2;
public static short sseed = 3;
public static byte bseed = 4;
public static long lres = lseed;
public static int ires = iseed;
public static short sres = sseed;
public static byte bres = bseed;

public static class TestLong {

private static final byte[] BYTES = new byte[LEN];
private static final long rawdata = 0xbeef;
private static final long data;

static {
sink(2);
// Signed immediate byte offset: range -256 to 255
// Positive immediate byte offset: a multiple of 8 in the range 0 to 32760
// Other immediate byte offsets can't be encoded in the instruction field.

// 1030 can't be encoded as "base + offset" mode into the instruction field.
UNSAFE.putLongUnaligned(BYTES, 1030, rawdata);
lres += UNSAFE.getLongUnaligned(BYTES, 1030);
// 127 can be encoded into simm9 field.
UNSAFE.putLongUnaligned(BYTES, 127, lres);
lres += UNSAFE.getLongUnaligned(BYTES, 127);
// 1096 can be encoded into uimm12 field.
UNSAFE.putLongUnaligned(BYTES, 1096, lres);
data = UNSAFE.getLongUnaligned(BYTES, 1096);
}

}

public static class TestInt {

private static final byte[] BYTES = new byte[LEN];
private static final int rawdata = 0xbeef;
private static final int data;
static {
sink(2);
// Signed immediate byte offset: range -256 to 255
// Positive immediate byte offset, a multiple of 4 in the range 0 to 16380
// Other immediate byte offsets can't be encoded in the instruction field.

// 274 can't be encoded as "base + offset" mode into the instruction field.
UNSAFE.putIntUnaligned(BYTES, 274, rawdata);
ires += UNSAFE.getIntUnaligned(BYTES, 274);
// 255 can be encoded into simm9 field.
UNSAFE.putIntUnaligned(BYTES, 255, ires);
ires += UNSAFE.getIntUnaligned(BYTES, 255);
// 528 can be encoded into uimm12 field.
UNSAFE.putIntUnaligned(BYTES, 528, ires);
data = UNSAFE.getIntUnaligned(BYTES, 528);
}

}

public static class TestShort {

private static final byte[] BYTES = new byte[LEN];
private static final short rawdata = (short)0xbeef;
private static final short data;
static {
sink(2);
// Signed immediate byte offset: range -256 to 255
// Positive immediate byte offset: a multiple of 2 in the range 0 to 8190
// Other immediate byte offsets can't be encoded in the instruction field.

// 257 can't be encoded as "base + offset" mode into the instruction field.
UNSAFE.putShortUnaligned(BYTES, 257, rawdata);
sres = (short) (sres + UNSAFE.getShortUnaligned(BYTES, 257));
// 253 can be encoded into simm9 field.
UNSAFE.putShortUnaligned(BYTES, 253, sres);
sres = (short) (sres + UNSAFE.getShortUnaligned(BYTES, 253));
// 272 can be encoded into uimm12 field.
UNSAFE.putShortUnaligned(BYTES, 272, sres);
data = UNSAFE.getShortUnaligned(BYTES, 272);
}

}

public static class TestByte {

private static final byte[] BYTES = new byte[LEN];
private static final byte rawdata = (byte)0x3f;
private static final byte data;
static {
sink(2);
// Signed immediate byte offset: range -256 to 255
// Positive immediate byte offset: range 0 to 4095
// Other immediate byte offsets can't be encoded in the instruction field.

// 272 can be encoded into simm9 field.
UNSAFE.putByte(BYTES, 272, rawdata);
bres = (byte) (bres + UNSAFE.getByte(BYTES, 272));
// 53 can be encoded into simm9 field.
UNSAFE.putByte(BYTES, 53, bres);
bres = (byte) (bres + UNSAFE.getByte(BYTES, 53));
// 1027 can be encoded into uimm12 field.
UNSAFE.putByte(BYTES, 1027, bres);
data = UNSAFE.getByte(BYTES, 1027);
}

}

static void test() {
TestLong ta = new TestLong();
Asserts.assertEquals(ta.data, (ta.rawdata + lseed) * 2, "putUnaligned long failed!");

TestInt tb = new TestInt();
Asserts.assertEquals(tb.data, (tb.rawdata + iseed) * 2, "putUnaligned int failed!");

TestShort tc = new TestShort();
Asserts.assertEquals(tc.data, (short) (((short) (tc.rawdata + sseed)) * 2), "putUnaligned short failed!");

TestByte td = new TestByte();
Asserts.assertEquals(td.data, (byte) (((byte) (td.rawdata + bseed)) * 2), "put byte failed!");
}

public static void main(String[] strArr) {
test();
}
}

0 comments on commit 1895cf3

Please sign in to comment.