This repository has been archived by the owner on Dec 1, 2021. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 110
Took forever to generate the goasm #30
Comments
I can reproduce it by using this code: int TesByteSlice(char *t, size_t sz)
{
for (int i = 0; i < sz; i++)
{
*t++ = (char)i;
}
return 1;
} or int TesByteSlice(char *t, size_t sz)
{
int i = 0;
loop:
*t++ = i++;
if (i < sz)
goto loop;
return 1;
} Generated CLang ASM.text
.intel_syntax noprefix
.file "tes.c"
.globl TesSum # -- Begin function TesSum
.p2align 4, 0x90
.type TesSum, @function
TesSum:
# @TesSum
# %bb.0:
push rbp
mov rbp, rsp
and rsp, -8
# kill: def $esi killed $esi def $rsi
# kill: def $edi killed $edi def $rdi
lea eax, [rdi + rsi]
mov rsp, rbp
pop rbp
ret
.Lfunc_end0:
.size TesSum, .Lfunc_end0-TesSum
# -- End function
.section .rodata.cst16, "aM", @progbits, 16
.p2align 4 # -- Begin function TesByteSlice
.LCPI1_0:
.byte 0 # 0x0
.byte 1 # 0x1
.byte 2 # 0x2
.byte 3 # 0x3
.byte 4 # 0x4
.byte 5 # 0x5
.byte 6 # 0x6
.byte 7 # 0x7
.byte 8 # 0x8
.byte 9 # 0x9
.byte 10 # 0xa
.byte 11 # 0xb
.byte 12 # 0xc
.byte 13 # 0xd
.byte 14 # 0xe
.byte 15 # 0xf
.LCPI1_1:
.zero 16, 16
.LCPI1_2:
.zero 16, 32
.LCPI1_3:
.zero 16, 48
.LCPI1_4:
.zero 16, 64
.LCPI1_5:
.zero 16, 80
.LCPI1_6:
.zero 16, 96
.LCPI1_7:
.zero 16, 112
.LCPI1_8:
.zero 16, 128
.text
.globl TesByteSlice
.p2align 4, 0x90
.type TesByteSlice, @function
TesByteSlice:
# @TesByteSlice
# %bb.0:
push rbp
mov rbp, rsp
and rsp, -8
test rsi, rsi
je .LBB1_13
# %bb.1:
cmp rsi, 15
ja .LBB1_3
# %bb.2:
xor eax, eax
jmp .LBB1_12
.LBB1_3:
mov rax, rsi
and rax, -16
lea rdx, [rax - 16]
mov rcx, rdx
shr rcx, 4
add rcx, 1
mov r8d, ecx
and r8d, 7
cmp rdx, 112
jae .LBB1_5
# %bb.4:
movdqa xmm2, xmmword ptr [rip + .LCPI1_0] # xmm2 = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]
xor edx, edx
jmp .LBB1_7
.LBB1_5:
sub rcx, r8
movdqa xmm2, xmmword ptr [rip + .LCPI1_0] # xmm2 = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]
xor edx, edx
movdqa xmm8, xmmword ptr [rip + .LCPI1_1] # xmm8 = [16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16]
movdqa xmm9, xmmword ptr [rip + .LCPI1_2] # xmm9 = [32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32]
movdqa xmm3, xmmword ptr [rip + .LCPI1_3] # xmm3 = [48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48]
movdqa xmm4, xmmword ptr [rip + .LCPI1_4] # xmm4 = [64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64]
movdqa xmm5, xmmword ptr [rip + .LCPI1_5] # xmm5 = [80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80]
movdqa xmm6, xmmword ptr [rip + .LCPI1_6] # xmm6 = [96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96]
movdqa xmm7, xmmword ptr [rip + .LCPI1_7] # xmm7 = [112, 112, 112, 112, 112, 112, 112, 112, 112, 112, 112, 112, 112, 112, 112, 112]
movdqa xmm1, xmmword ptr [rip + .LCPI1_8] # xmm1 = [128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128]
.p2align 4, 0x90
.LBB1_6: # =>This Inner Loop Header: Depth=1
movdqu xmmword ptr [rdi + rdx], xmm2
movdqa xmm0, xmm2
paddb xmm0, xmm8
movdqu xmmword ptr [rdi + rdx + 16], xmm0
movdqa xmm0, xmm2
paddb xmm0, xmm9
movdqu xmmword ptr [rdi + rdx + 32], xmm0
movdqa xmm0, xmm2
paddb xmm0, xmm3
movdqu xmmword ptr [rdi + rdx + 48], xmm0
movdqa xmm0, xmm2
paddb xmm0, xmm4
movdqu xmmword ptr [rdi + rdx + 64], xmm0
movdqa xmm0, xmm2
paddb xmm0, xmm5
movdqu xmmword ptr [rdi + rdx + 80], xmm0
movdqa xmm0, xmm2
paddb xmm0, xmm6
movdqu xmmword ptr [rdi + rdx + 96], xmm0
movdqa xmm0, xmm2
paddb xmm0, xmm7
movdqu xmmword ptr [rdi + rdx + 112], xmm0
sub rdx, -128
pxor xmm2, xmm1
add rcx, -8
jne .LBB1_6
.LBB1_7:
test r8, r8
je .LBB1_10
# %bb.8:
add rdx, rdi
neg r8
movdqa xmm1, xmmword ptr [rip + .LCPI1_1] # xmm1 = [16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16]
.p2align 4, 0x90
.LBB1_9: # =>This Inner Loop Header: Depth=1
movdqu xmmword ptr [rdx], xmm2
paddb xmm2, xmm1
add rdx, 16
inc r8
jne .LBB1_9
.LBB1_10:
cmp rax, rsi
je .LBB1_13
# %bb.11:
add rdi, rax
.p2align 4, 0x90
.LBB1_12: # =>This Inner Loop Header: Depth=1
mov byte ptr [rdi], al
add rdi, 1
add rax, 1
cmp rsi, rax
jne .LBB1_12
.LBB1_13:
mov eax, 1
mov rsp, rbp
pop rbp
ret
.Lfunc_end1:
.size TesByteSlice, .Lfunc_end1-TesByteSlice
# -- End function
.ident "clang version 10.0.0-4ubuntu1 "
.section ".note.GNU-stack", "", @progbits
.addrsig |
Recursive function works, but let's ignore it i guess edit: tried with byte slices, doesn't work :/ int TesByteSlice(char *t, size_t sz)
{
if (sz == 0)
return 1;
*t++ = sz--;
return TesByteSlice(t, sz);
} |
Sign up for free
to subscribe to this conversation on GitHub.
Already have an account?
Sign in.
Hi i am testing the c2goasm, for a simple function it works, but for bigger one it is took like forever, i wonder if there was a problem in my config or a bug?
build command:
clang -S -O3 -masm=intel -mno-red-zone -mstackrealign -mllvm -inline-threshold=1000 -fno-asynchronous-unwind-tables -fno-exceptions -fno-rtti $1
Generated CLang ASM
C code
The text was updated successfully, but these errors were encountered: