1
1
; LzmaDecOpt.asm -- ASM version of LzmaDec_DecodeReal_3() function
2
- ; 2021-02-23 : Igor Pavlov : Public domain
2
+ ; 2024-06-18 : Igor Pavlov : Public domain
3
3
;
4
4
; 3 - is the code compatibility version of LzmaDec_DecodeReal_*()
5
5
; function for check at link time.
@@ -17,11 +17,41 @@ include 7zAsm.asm
17
17
18
18
MY_ASM_START
19
19
20
- _TEXT $ LZMADECOPT SEGMENT ALIGN( 64 ) 'CODE'
20
+ ; if Z7_LZMA_DEC_OPT_ASM_USE_SEGMENT is defined, we use additional SEGMENT with 64-byte alignment.
21
+ ; if Z7_LZMA_DEC_OPT_ASM_USE_SEGMENT is not defined, we use default SEGMENT (where default 16-byte alignment of segment is expected).
22
+ ; The performance is almost identical in our tests.
23
+ ; But the performance can depend from position of lzmadec code inside instruction cache
24
+ ; or micro-op cache line (depending from low address bits in 32-byte/64-byte cache lines).
25
+ ; And 64-byte alignment provides a more consistent speed regardless
26
+ ; of the code's position in the executable.
27
+ ; But also it's possible that code without Z7_LZMA_DEC_OPT_ASM_USE_SEGMENT can be
28
+ ; slightly faster than 64-bytes aligned code in some cases, if offset of lzmadec
29
+ ; code in 64-byte block after compilation provides better speed by some reason.
30
+ ; Note that Z7_LZMA_DEC_OPT_ASM_USE_SEGMENT adds an extra section to the ELF file.
31
+ ; If you don't want to get that extra section, do not define Z7_LZMA_DEC_OPT_ASM_USE_SEGMENT.
32
+
33
+ ifndef Z7_LZMA_DEC_OPT_ASM_USE_SEGMENT
34
+ if (IS_LINUX gt 0 )
35
+ Z7_LZMA_DEC_OPT_ASM_USE_SEGMENT equ 1
36
+ else
37
+ Z7_LZMA_DEC_OPT_ASM_USE_SEGMENT equ 1
38
+ endif
39
+ endif
21
40
41
+ ifdef Z7_LZMA_DEC_OPT_ASM_USE_SEGMENT
42
+ _TEXT $ LZMADECOPT SEGMENT ALIGN( 64 ) 'CODE'
22
43
MY_ALIGN macro num:req
23
44
align num
45
+ ; align 16
24
46
endm
47
+ else
48
+ MY_ALIGN macro num:req
49
+ ; We expect that ".text" is aligned for 16-bytes.
50
+ ; So we don't need large alignment inside out function.
51
+ align 16
52
+ endm
53
+ endif
54
+
25
55
26
56
MY_ALIGN_16 macro
27
57
MY_ALIGN 16
@@ -610,7 +640,11 @@ PARAM_lzma equ REG_ABI_PARAM_0
610
640
PARAM_limit equ REG_ABI_PARAM_1
611
641
PARAM_bufLimit equ REG_ABI_PARAM_2
612
642
643
+ ifdef Z7_LZMA_DEC_OPT_ASM_USE_SEGMENT
613
644
; MY_ALIGN_64
645
+ else
646
+ MY_ALIGN_16
647
+ endif
614
648
MY_PROC LzmaDec_DecodeReal_3 , 3
615
649
MY_PUSH_PRESERVED_ABI_REGS
616
650
@@ -1298,6 +1332,8 @@ fin:
1298
1332
MY_POP_PRESERVED_ABI_REGS
1299
1333
MY_ENDP
1300
1334
1335
+ ifdef Z7_LZMA_DEC_OPT_ASM_USE_SEGMENT
1301
1336
_TEXT $ LZMADECOPT ENDS
1337
+ endif
1302
1338
1303
1339
end
0 commit comments