Skip to content

Commit

Permalink
Merge pull request #161 from RossBrunton/memcpyfix
Browse files Browse the repository at this point in the history
[vecz] Use correct alignment for memcpy source

The inlining for memcpy was incorrectly using the destination
parameter's alignment rather than the source parameter, resulting
in miscompiles.

As well as fixing this issue, a test has been added.
  • Loading branch information
RossBrunton authored Oct 13, 2023
2 parents a943416 + 8e51f61 commit 4cfaa18
Show file tree
Hide file tree
Showing 2 changed files with 38 additions and 1 deletion.
Original file line number Diff line number Diff line change
Expand Up @@ -245,7 +245,7 @@ static Value *emitBuiltinMemCpy(Function *F, IRBuilder<> &B,
byte == 0 ? DestAlignment : std::min(Align(8u), DestAlignment);
MC->setAlignment(StoreAlign);
Align LoadAlign =
byte == 0 ? DestAlignment : std::min(Align(8u), SourceAlignment);
byte == 0 ? SourceAlignment : std::min(Align(8u), SourceAlignment);
LoadValue->setAlignment(LoadAlign);
}
// ...and then we fill in the remaining with 8bit stores.
Expand Down
37 changes: 37 additions & 0 deletions modules/compiler/vecz/test/lit/llvm/builtin_inlining_memcpy.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
; Copyright (C) Codeplay Software Limited
;
; Licensed under the Apache License, Version 2.0 (the "License") with LLVM
; Exceptions; you may not use this file except in compliance with the License.
; You may obtain a copy of the License at
;
; https://github.com/codeplaysoftware/oneapi-construction-kit/blob/main/LICENSE.txt
;
; Unless required by applicable law or agreed to in writing, software
; distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
; WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
; License for the specific language governing permissions and limitations
; under the License.
;
; SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

; RUN: veczc -k memcpy_align -vecz-passes=builtin-inlining -vecz-simd-width=4 -S < %s | FileCheck %s

target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "spir64-unknown-unknown"

define spir_kernel void @memcpy_align(ptr align(16) %out, ptr align(8) %in) {
entry:
; CHECK: %[[A:.*]] = getelementptr inbounds i8, ptr %in, i64 0
; CHECK: %[[B:.*]] = getelementptr inbounds i8, ptr %out, i64 0
; CHECK: %[[C:.*]] = load i64, ptr %[[A]], align 8
; CHECK: store i64 %[[C]], ptr %[[B]], align 16

; CHECK: %[[D:.*]] = getelementptr inbounds i8, ptr %in, i64 8
; CHECK: %[[E:.*]] = getelementptr inbounds i8, ptr %out, i64 8
; CHECK: %[[F:.*]] = load i64, ptr %[[D]], align 8
; CHECK: store i64 %[[F]], ptr %[[E]], align 8
call void @llvm.memcpy.p0.p0.i32(ptr noundef align(16) %out, ptr noundef align(8) %in, i32 16, i1 false)
ret void
}

declare void @llvm.memcpy.p0.p0.i32(ptr noalias nocapture writeonly, ptr noalias nocapture readonly, i32, i1 immarg)

0 comments on commit 4cfaa18

Please sign in to comment.