Skip to content

Commit a7a1d4a

Browse files
committed
24.07
1 parent 89a73b9 commit a7a1d4a

32 files changed

+400
-161
lines changed

Asm/x86/LzFindOpt.asm

+29-2
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
; LzFindOpt.asm -- ASM version of GetMatchesSpecN_2() function
2-
; 2021-07-21: Igor Pavlov : Public domain
2+
; 2024-06-18: Igor Pavlov : Public domain
33
;
44

55
ifndef x64
@@ -11,10 +11,31 @@ include 7zAsm.asm
1111

1212
MY_ASM_START
1313

14-
_TEXT$LZFINDOPT SEGMENT ALIGN(64) 'CODE'
14+
ifndef Z7_LZ_FIND_OPT_ASM_USE_SEGMENT
15+
if (IS_LINUX gt 0)
16+
Z7_LZ_FIND_OPT_ASM_USE_SEGMENT equ 1
17+
else
18+
Z7_LZ_FIND_OPT_ASM_USE_SEGMENT equ 1
19+
endif
20+
endif
1521

22+
ifdef Z7_LZ_FIND_OPT_ASM_USE_SEGMENT
23+
_TEXT$LZFINDOPT SEGMENT ALIGN(64) 'CODE'
1624
MY_ALIGN macro num:req
1725
align num
26+
; align 16
27+
endm
28+
else
29+
MY_ALIGN macro num:req
30+
; We expect that ".text" is aligned for 16-bytes.
31+
; So we don't need large alignment inside our function.
32+
align 16
33+
endm
34+
endif
35+
36+
37+
MY_ALIGN_16 macro
38+
MY_ALIGN 16
1839
endm
1940

2041
MY_ALIGN_32 macro
@@ -136,7 +157,11 @@ COPY_VAR_64 macro dest_var, src_var
136157
endm
137158

138159

160+
ifdef Z7_LZ_FIND_OPT_ASM_USE_SEGMENT
139161
; MY_ALIGN_64
162+
else
163+
MY_ALIGN_16
164+
endif
140165
MY_PROC GetMatchesSpecN_2, 13
141166
MY_PUSH_PRESERVED_ABI_REGS
142167
mov r0, RSP
@@ -508,6 +533,8 @@ fin:
508533
MY_POP_PRESERVED_ABI_REGS
509534
MY_ENDP
510535

536+
ifdef Z7_LZ_FIND_OPT_ASM_USE_SEGMENT
511537
_TEXT$LZFINDOPT ENDS
538+
endif
512539

513540
end

Asm/x86/LzmaDecOpt.asm

+38-2
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
; LzmaDecOpt.asm -- ASM version of LzmaDec_DecodeReal_3() function
2-
; 2021-02-23: Igor Pavlov : Public domain
2+
; 2024-06-18: Igor Pavlov : Public domain
33
;
44
; 3 - is the code compatibility version of LzmaDec_DecodeReal_*()
55
; function for check at link time.
@@ -17,11 +17,41 @@ include 7zAsm.asm
1717

1818
MY_ASM_START
1919

20-
_TEXT$LZMADECOPT SEGMENT ALIGN(64) 'CODE'
20+
; if Z7_LZMA_DEC_OPT_ASM_USE_SEGMENT is defined, we use additional SEGMENT with 64-byte alignment.
21+
; if Z7_LZMA_DEC_OPT_ASM_USE_SEGMENT is not defined, we use default SEGMENT (where default 16-byte alignment of segment is expected).
22+
; The performance is almost identical in our tests.
23+
; But the performance can depend from position of lzmadec code inside instruction cache
24+
; or micro-op cache line (depending from low address bits in 32-byte/64-byte cache lines).
25+
; And 64-byte alignment provides a more consistent speed regardless
26+
; of the code's position in the executable.
27+
; But also it's possible that code without Z7_LZMA_DEC_OPT_ASM_USE_SEGMENT can be
28+
; slightly faster than 64-bytes aligned code in some cases, if offset of lzmadec
29+
; code in 64-byte block after compilation provides better speed by some reason.
30+
; Note that Z7_LZMA_DEC_OPT_ASM_USE_SEGMENT adds an extra section to the ELF file.
31+
; If you don't want to get that extra section, do not define Z7_LZMA_DEC_OPT_ASM_USE_SEGMENT.
32+
33+
ifndef Z7_LZMA_DEC_OPT_ASM_USE_SEGMENT
34+
if (IS_LINUX gt 0)
35+
Z7_LZMA_DEC_OPT_ASM_USE_SEGMENT equ 1
36+
else
37+
Z7_LZMA_DEC_OPT_ASM_USE_SEGMENT equ 1
38+
endif
39+
endif
2140

41+
ifdef Z7_LZMA_DEC_OPT_ASM_USE_SEGMENT
42+
_TEXT$LZMADECOPT SEGMENT ALIGN(64) 'CODE'
2243
MY_ALIGN macro num:req
2344
align num
45+
; align 16
2446
endm
47+
else
48+
MY_ALIGN macro num:req
49+
; We expect that ".text" is aligned for 16-bytes.
50+
; So we don't need large alignment inside out function.
51+
align 16
52+
endm
53+
endif
54+
2555

2656
MY_ALIGN_16 macro
2757
MY_ALIGN 16
@@ -610,7 +640,11 @@ PARAM_lzma equ REG_ABI_PARAM_0
610640
PARAM_limit equ REG_ABI_PARAM_1
611641
PARAM_bufLimit equ REG_ABI_PARAM_2
612642

643+
ifdef Z7_LZMA_DEC_OPT_ASM_USE_SEGMENT
613644
; MY_ALIGN_64
645+
else
646+
MY_ALIGN_16
647+
endif
614648
MY_PROC LzmaDec_DecodeReal_3, 3
615649
MY_PUSH_PRESERVED_ABI_REGS
616650

@@ -1298,6 +1332,8 @@ fin:
12981332
MY_POP_PRESERVED_ABI_REGS
12991333
MY_ENDP
13001334

1335+
ifdef Z7_LZMA_DEC_OPT_ASM_USE_SEGMENT
13011336
_TEXT$LZMADECOPT ENDS
1337+
endif
13021338

13031339
end

Asm/x86/Sha1Opt.asm

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
; Sha1Opt.asm -- SHA-1 optimized code for SHA-1 x86 hardware instructions
2-
; 2021-03-10 : Igor Pavlov : Public domain
2+
; 2024-06-16 : Igor Pavlov : Public domain
33

44
include 7zAsm.asm
55

@@ -20,7 +20,7 @@ MY_ASM_START
2020

2121

2222

23-
CONST SEGMENT
23+
CONST SEGMENT READONLY
2424

2525
align 16
2626
Reverse_Endian_Mask db 15,14,13,12, 11,10,9,8, 7,6,5,4, 3,2,1,0

Asm/x86/Sha256Opt.asm

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
; Sha256Opt.asm -- SHA-256 optimized code for SHA-256 x86 hardware instructions
2-
; 2022-04-17 : Igor Pavlov : Public domain
2+
; 2024-06-16 : Igor Pavlov : Public domain
33

44
include 7zAsm.asm
55

@@ -20,7 +20,7 @@ endif
2020
EXTRN K_CONST:xmmword
2121
@
2222

23-
CONST SEGMENT
23+
CONST SEGMENT READONLY
2424

2525
align 16
2626
Reverse_Endian_Mask db 3,2,1,0, 7,6,5,4, 11,10,9,8, 15,14,13,12

C/7zVersion.h

+3-3
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
#define MY_VER_MAJOR 24
2-
#define MY_VER_MINOR 06
2+
#define MY_VER_MINOR 07
33
#define MY_VER_BUILD 0
4-
#define MY_VERSION_NUMBERS "24.06"
4+
#define MY_VERSION_NUMBERS "24.07"
55
#define MY_VERSION MY_VERSION_NUMBERS
66

77
#ifdef MY_CPU_NAME
@@ -10,7 +10,7 @@
1010
#define MY_VERSION_CPU MY_VERSION
1111
#endif
1212

13-
#define MY_DATE "2024-05-26"
13+
#define MY_DATE "2024-06-19"
1414
#undef MY_COPYRIGHT
1515
#undef MY_VERSION_COPYRIGHT_DATE
1616
#define MY_AUTHOR_NAME "Igor Pavlov"

C/CpuArch.h

+3-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/* CpuArch.h -- CPU specific code
2-
2024-05-18 : Igor Pavlov : Public domain */
2+
2024-06-17 : Igor Pavlov : Public domain */
33

44
#ifndef ZIP7_INC_CPU_ARCH_H
55
#define ZIP7_INC_CPU_ARCH_H
@@ -564,13 +564,15 @@ problem-4 : performace:
564564
#define SetBe32a(p, v) { *(UInt32 *)(void *)(p) = (v); }
565565
#define SetBe16a(p, v) { *(UInt16 *)(void *)(p) = (v); }
566566

567+
#define GetUi64a(p) GetUi64(p)
567568
#define GetUi32a(p) GetUi32(p)
568569
#define GetUi16a(p) GetUi16(p)
569570
#define SetUi32a(p, v) SetUi32(p, v)
570571
#define SetUi16a(p, v) SetUi16(p, v)
571572

572573
#elif defined(MY_CPU_LE)
573574

575+
#define GetUi64a(p) (*(const UInt64 *)(const void *)(p))
574576
#define GetUi32a(p) (*(const UInt32 *)(const void *)(p))
575577
#define GetUi16a(p) (*(const UInt16 *)(const void *)(p))
576578
#define SetUi32a(p, v) { *(UInt32 *)(void *)(p) = (v); }

C/ZstdDec.c

+4-2
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/* ZstdDec.c -- Zstd Decoder
2-
2024-05-26 : the code was developed by Igor Pavlov, using Zstandard format
2+
2024-06-18 : the code was developed by Igor Pavlov, using Zstandard format
33
specification and original zstd decoder code as reference code.
44
original zstd decoder code: Copyright (c) Facebook, Inc. All rights reserved.
55
This source code is licensed under BSD 3-Clause License.
@@ -1308,8 +1308,10 @@ FSE_Decode_SeqTable(CFseRecord * const table,
13081308
in->len--;
13091309
{
13101310
const Byte *ptr = in->ptr;
1311-
const Byte sym = ptr[0];
1311+
const unsigned sym = ptr[0];
13121312
in->ptr = ptr + 1;
1313+
if (sym >= numSymbolsMax)
1314+
return SZ_ERROR_DATA;
13131315
table[0] = (FastInt32)sym
13141316
#if defined(Z7_ZSTD_DEC_USE_ML_PLUS3)
13151317
+ (numSymbolsMax == NUM_ML_SYMBOLS ? MATCH_LEN_MIN : 0)

CPP/7zip/7zip_gcc.mak

+3
Original file line numberDiff line numberDiff line change
@@ -220,6 +220,9 @@ endif
220220

221221
all: $(O) $(PROGPATH) $(STATIC_TARGET)
222222

223+
# we need $(O) as order-only-prerequisites:
224+
$(OBJS): | $(O)
225+
223226
$(O):
224227
$(MY_MKDIR) $(O)
225228

CPP/7zip/Archive/7z/7zUpdate.cpp

+20
Original file line numberDiff line numberDiff line change
@@ -219,6 +219,14 @@ static int Parse_EXE(const Byte *buf, size_t size, CFilterMode *filterMode)
219219
}
220220

221221

222+
/*
223+
Filters don't improve the compression ratio for relocatable object files (".o").
224+
But we can get compression ratio gain, if we compress object
225+
files and executables in same solid block.
226+
So we use filters for relocatable object files (".o"):
227+
*/
228+
// #define Z7_7Z_CREATE_ARC_DISABLE_FILTER_FOR_OBJ
229+
222230
/* ---------- ELF ---------- */
223231

224232
#define ELF_SIG 0x464C457F
@@ -258,6 +266,12 @@ static int Parse_ELF(const Byte *buf, size_t size, CFilterMode *filterMode)
258266
default: return 0;
259267
}
260268

269+
#ifdef Z7_7Z_CREATE_ARC_DISABLE_FILTER_FOR_OBJ
270+
#define ELF_ET_REL 1
271+
if (Get16(buf + 0x10, be) == ELF_ET_REL)
272+
return 0;
273+
#endif
274+
261275
switch (Get16(buf + 0x12, be))
262276
{
263277
case 3:
@@ -318,6 +332,12 @@ static unsigned Parse_MACH(const Byte *buf, size_t size, CFilterMode *filterMode
318332
default: return 0;
319333
}
320334

335+
#ifdef Z7_7Z_CREATE_ARC_DISABLE_FILTER_FOR_OBJ
336+
#define MACH_TYPE_OBJECT 1
337+
if (Get32(buf + 0xC, be) == MACH_TYPE_OBJECT)
338+
return 0;
339+
#endif
340+
321341
switch (Get32(buf + 4, be))
322342
{
323343
case MACH_MACHINE_386:

0 commit comments

Comments
 (0)