Skip to content

Commit a9a11d2

Browse files
authored
Merge pull request #538 from libtom/small-stacksize
add `MP_SMALL_STACK_SIZE` option
2 parents 4929f09 + 3570e12 commit a9a11d2

29 files changed

+443
-75
lines changed

.github/workflows/main.yml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,12 @@ jobs:
7070
# RSA superclass with tests (no sanitizer, but debug info)
7171
- { BUILDOPTIONS: '--with-cc=gcc --with-m64 --cflags=-DLTM_NOTHING --cflags=-DSC_RSA_1_WITH_TESTS --limit-valgrind', SANITIZER: '', COMPILE_DEBUG: '1', COMPILE_LTO: '0', CONV_WARNINGS: '', OTHERDEPS: '' }
7272

73+
# Build with small stack-size
74+
- { BUILDOPTIONS: '--with-cc=gcc --with-m32 --with-m64 --cflags=-DMP_SMALL_STACK_SIZE', SANITIZER: '', COMPILE_DEBUG: '0', COMPILE_LTO: '0', CONV_WARNINGS: '', OTHERDEPS: 'gcc-multilib' }
75+
- { BUILDOPTIONS: '--with-cc=clang-10 --with-m32 --with-m64 --cflags=-DMP_SMALL_STACK_SIZE --limit-valgrind', SANITIZER: '1', COMPILE_DEBUG: '0', COMPILE_LTO: '0', CONV_WARNINGS: '', OTHERDEPS: 'clang-10 llvm-10 libc6-dev-i386 gcc-multilib' }
76+
- { BUILDOPTIONS: '--with-cc=gcc --with-m32 --with-m64 --cflags=-DMP_SMALL_STACK_SIZE --multithread --limit-valgrind', SANITIZER: '', COMPILE_DEBUG: '0', COMPILE_LTO: '0', CONV_WARNINGS: '', OTHERDEPS: 'libc6-dev-i386 gcc-multilib' }
77+
- { BUILDOPTIONS: '--with-cc=clang-10 --with-m32 --with-m64 --cflags=-DMP_SMALL_STACK_SIZE --multithread', SANITIZER: '1', COMPILE_DEBUG: '0', COMPILE_LTO: '0', CONV_WARNINGS: '', OTHERDEPS: 'clang-10 llvm-10 gcc-multilib' }
78+
7379
# Test "autotuning", the automatic evaluation and setting of the Toom-Cook cut-offs.
7480
#- env: SANITIZER=1 BUILDOPTIONS='--with-cc=gcc-5 --cflags=-DMP_16BIT --limit-valgrind --make-option=tune'
7581
#- env: SANITIZER=1 BUILDOPTIONS='--with-cc=gcc-5 --cflags=-DMP_32BIT --limit-valgrind --make-option=tune'

CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -138,7 +138,7 @@ if(COMPILE_LTO)
138138
if(COMPILER_SUPPORTS_LTO)
139139
set_property(TARGET ${PROJECT_NAME} PROPERTY INTERPROCEDURAL_OPTIMIZATION TRUE)
140140
else()
141-
message(SEND_ERROR "This compiler does not support LTO. Reconfigure ${PROJECT_NAME} with -DCOMPILE_LTO=OFF.")
141+
message(FATAL_ERROR "This compiler does not support LTO. Reconfigure ${PROJECT_NAME} with -DCOMPILE_LTO=OFF.")
142142
endif()
143143
endif()
144144

appveyor.yml

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,16 +11,20 @@ image:
1111
- Visual Studio 2019
1212
- Visual Studio 2017
1313
- Visual Studio 2015
14+
environment:
15+
matrix:
16+
- CFLAGS_VAR: ""
17+
CFLAGS_VAR_DLL: "CFLAGS=\"/Ox /MD /DLTM_TEST_DYNAMIC\""
1418
build_script:
1519
- cmd: >-
1620
if "Visual Studio 2022"=="%APPVEYOR_BUILD_WORKER_IMAGE%" call "C:\Program Files\Microsoft Visual Studio\2022\Community\VC\Auxiliary\Build\vcvars64.bat"
1721
if "Visual Studio 2019"=="%APPVEYOR_BUILD_WORKER_IMAGE%" call "C:\Program Files (x86)\Microsoft Visual Studio\2019\Community\VC\Auxiliary\Build\vcvars64.bat"
1822
if "Visual Studio 2017"=="%APPVEYOR_BUILD_WORKER_IMAGE%" call "C:\Program Files (x86)\Microsoft Visual Studio\2017\Community\VC\Auxiliary\Build\vcvars64.bat"
1923
if "Visual Studio 2015"=="%APPVEYOR_BUILD_WORKER_IMAGE%" call "C:\Program Files\Microsoft SDKs\Windows\v7.1\Bin\SetEnv.cmd" /x64
2024
if "Visual Studio 2015"=="%APPVEYOR_BUILD_WORKER_IMAGE%" call "C:\Program Files (x86)\Microsoft Visual Studio 14.0\VC\vcvarsall.bat" x86_amd64
21-
nmake -f makefile.msvc test.exe
25+
nmake -f makefile.msvc test.exe %CFLAGS_VAR%
2226
nmake -f makefile.msvc clean-obj
23-
nmake -f makefile.msvc test_dll.exe CFLAGS="/Ox /MD /DLTM_TEST_DYNAMIC"
27+
nmake -f makefile.msvc test_dll.exe %CFLAGS_VAR_DLL%
2428
test_script:
2529
- cmd: test.exe
2630
- cmd: test_dll.exe

demo/test.c

Lines changed: 129 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -2455,12 +2455,101 @@ static int test_mp_pack_unpack(void)
24552455
#define ONLY_PUBLIC_API_C
24562456
#endif
24572457

2458+
#if !defined(LTM_TEST_MULTITHREAD)
2459+
#define SINGLE_THREADED_C
2460+
typedef uintptr_t thread_id_t;
2461+
#else
2462+
#define MULTI_THREADED_C
2463+
#if !defined(_WIN32)
2464+
#define MULTI_THREADED_PTHREAD_C
2465+
#include <pthread.h>
2466+
typedef pthread_t thread_id_t;
2467+
#else
2468+
#define MULTI_THREADED_MSVC_C
2469+
2470+
#ifndef _WIN32_WINNT
2471+
#define _WIN32_WINNT 0x0501
2472+
#endif
2473+
#ifndef WINVER
2474+
#define WINVER 0x0501
2475+
#endif
2476+
2477+
#define WIN32_LEAN_AND_MEAN
2478+
#include <windows.h>
2479+
typedef HANDLE thread_id_t;
2480+
#endif
2481+
#endif
2482+
2483+
#if !defined(MULTI_THREADED_PTHREAD_C)
2484+
extern int pthread_create(thread_id_t *, const void *, void *(*)(void *), void *);
2485+
extern int pthread_join(thread_id_t, void **);
2486+
#endif
2487+
2488+
#if !defined(MULTI_THREADED_MSVC_C)
2489+
extern thread_id_t CreateThread(void *, size_t, unsigned long (*)(void *), void *, unsigned long, void *);
2490+
extern unsigned long WaitForSingleObject(thread_id_t hHandle, unsigned long dwMilliseconds);
2491+
#define INFINITE ((unsigned long)-1)
2492+
#endif
2493+
2494+
struct test_fn {
2495+
const char *name;
2496+
int (*fn)(void);
2497+
};
2498+
2499+
struct thread_info {
2500+
thread_id_t thread_id;
2501+
const struct test_fn *t;
2502+
int ret;
2503+
};
2504+
2505+
static void run(struct thread_info *tinfo)
2506+
{
2507+
tinfo->ret = tinfo->t->fn();
2508+
2509+
if (mp_warray_free() == -2)
2510+
tinfo->ret = EXIT_FAILURE;
2511+
}
2512+
2513+
static void *run_pthread(void *arg)
2514+
{
2515+
run(arg);
2516+
2517+
return arg;
2518+
}
2519+
2520+
static unsigned long run_msvc(void *arg)
2521+
{
2522+
run(arg);
2523+
2524+
return 0;
2525+
}
2526+
2527+
static int thread_start(struct thread_info *info)
2528+
{
2529+
if (MP_HAS(MULTI_THREADED_PTHREAD))
2530+
return pthread_create(&info->thread_id, NULL, run_pthread, info);
2531+
if (MP_HAS(MULTI_THREADED_MSVC)) {
2532+
info->thread_id = CreateThread(NULL, 0, run_msvc, info, 0, NULL);
2533+
return info->thread_id == (thread_id_t)NULL ? -1 : 0;
2534+
}
2535+
return -1;
2536+
}
2537+
2538+
static int thread_join(struct thread_info *info, struct thread_info **res)
2539+
{
2540+
if (MP_HAS(MULTI_THREADED_PTHREAD))
2541+
return pthread_join(info->thread_id, (void **)res);
2542+
if (MP_HAS(MULTI_THREADED_MSVC)) {
2543+
WaitForSingleObject(info->thread_id, INFINITE);
2544+
*res = info;
2545+
return 0;
2546+
}
2547+
return -1;
2548+
}
2549+
24582550
static int unit_tests(int argc, char **argv)
24592551
{
2460-
static const struct {
2461-
const char *name;
2462-
int (*fn)(void);
2463-
} test[] = {
2552+
static const struct test_fn test[] = {
24642553
#define T0(n) { #n, test_##n }
24652554
#define T1(n, o) { #n, MP_HAS(o) ? test_##n : NULL }
24662555
#define T2(n, o1, o2) { #n, (MP_HAS(o1) && MP_HAS(o2)) ? test_##n : NULL }
@@ -2522,31 +2611,54 @@ static int unit_tests(int argc, char **argv)
25222611
#undef T2
25232612
#undef T1
25242613
};
2614+
struct thread_info test_threads[sizeof(test)/sizeof(test[0])], *res;
25252615
unsigned long i, ok, fail, nop;
25262616
uint64_t t;
25272617
int j;
2528-
25292618
ok = fail = nop = 0;
25302619

25312620
t = (uint64_t)time(NULL);
25322621
printf("SEED: 0x%" PRIx64 "\n\n", t);
25332622
s_mp_rand_jenkins_init(t);
25342623
mp_rand_source(s_mp_rand_jenkins);
25352624

2625+
if (MP_HAS(MP_SMALL_STACK_SIZE)) {
2626+
printf("Small-stack enabled\n\n");
2627+
}
2628+
2629+
if (MP_HAS(MULTI_THREADED)) {
2630+
printf("Multi-threading enabled\n\n");
2631+
/* we ignore the fact that jenkins is not thread safe */
2632+
for (i = 0; i < (sizeof(test) / sizeof(test[0])); ++i) {
2633+
test_threads[i].t = &test[i];
2634+
EXPECT(thread_start(&test_threads[i]) == 0);
2635+
}
2636+
}
2637+
25362638
for (i = 0; i < (sizeof(test) / sizeof(test[0])); ++i) {
2537-
if (argc > 1) {
2538-
for (j = 1; j < argc; ++j) {
2539-
if (strstr(test[i].name, argv[j]) != NULL) {
2540-
break;
2639+
j = -1;
2640+
if (MP_HAS(SINGLE_THREADED)) {
2641+
if (argc > 1) {
2642+
for (j = 1; j < argc; ++j) {
2643+
if (strstr(test[i].name, argv[j]) != NULL) {
2644+
break;
2645+
}
25412646
}
2647+
if (j == argc) continue;
25422648
}
2543-
if (j == argc) continue;
2649+
2650+
if (test[i].fn)
2651+
j = test[i].fn();
2652+
} else if (MP_HAS(MULTI_THREADED)) {
2653+
EXPECT(thread_join(&test_threads[i], &res) == 0);
2654+
j = res->ret;
25442655
}
25452656
printf("TEST %s\n", test[i].name);
2657+
25462658
if (test[i].fn == NULL) {
25472659
nop++;
25482660
printf("NOP %s\n\n", test[i].name);
2549-
} else if (test[i].fn() == EXIT_SUCCESS) {
2661+
} else if (j == EXIT_SUCCESS) {
25502662
ok++;
25512663
printf("\n");
25522664
} else {
@@ -2556,8 +2668,12 @@ static int unit_tests(int argc, char **argv)
25562668
}
25572669
fprintf(fail?stderr:stdout, "Tests OK/NOP/FAIL: %lu/%lu/%lu\n", ok, nop, fail);
25582670

2559-
if (fail != 0) return EXIT_FAILURE;
2560-
else return EXIT_SUCCESS;
2671+
EXPECT(mp_warray_free() != -2);
2672+
2673+
if (fail == 0)
2674+
return EXIT_SUCCESS;
2675+
LBL_ERR:
2676+
return EXIT_FAILURE;
25612677
}
25622678

25632679
int main(int argc, char **argv)

doc/bn.tex

Lines changed: 46 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -352,6 +352,16 @@ \subsubsection{Operand Size Related}
352352
\end{center}
353353
\end{small}
354354

355+
\subsection{Small-Stack option}
356+
\label{ch:SMALL_STACK_INTRO}
357+
The library can be compiled with the symbol \texttt{MP\_SMALL\_STACK\_SIZE} defined, which results in
358+
the temporary \texttt{MP\_WARRAY}-sized stack buffers being put on the heap.
359+
This comes with one problem, namely: formerly promised thread-safety isn't given anymore.
360+
Therefore if the Small-Stack option is enabled while doing multi threading, one shall always initialize
361+
the library by calling \texttt{mp\_warray\_init()} once with the correct number of threads.
362+
363+
C.f. \ref{ch:SMALL_STACK_API} for the API description and further details.
364+
355365
\section{Purpose of LibTomMath}
356366
Unlike GNU MP (GMP) Library, LIP, OpenSSL or various other commercial kits (Miracl), LibTomMath
357367
was not written with bleeding edge performance in mind. First and foremost LibTomMath was written
@@ -428,7 +438,11 @@ \chapter{Getting Started with LibTomMath}
428438
\section{Building Programs}
429439
In order to use LibTomMath you must include ``tommath.h'' and link against the appropriate library
430440
file (typically
431-
libtommath.a). There is no library initialization required and the entire library is thread safe.
441+
libtommath.a). There is no library initialization required and the entire library is thread safe
442+
if it is used in its default configuration. The small-stack option makes use of atomic operations
443+
to maintain its internal state and therefore does not require locking, but it MUST be initialized
444+
if used from multiple threads. For further information see \ref{ch:SMALL_STACK_INTRO} resp.
445+
\ref{ch:SMALL_STACK_API}.
432446

433447
\section{Return Codes}
434448
There are five possible return codes a function may return.
@@ -813,6 +827,37 @@ \subsection{Adding additional digits}
813827
\end{alltt}
814828
\end{small}
815829

830+
\section{Small-Stack option}
831+
\label{ch:SMALL_STACK_API}
832+
833+
In case the \texttt{MP\_SMALL\_STACK\_SIZE} symbol is defined the following functions
834+
can be useful.
835+
836+
To initialize the internal structure the following function shall be called.
837+
838+
\index{mp\_warray\_init}
839+
\begin{alltt}
840+
mp_err mp_warray_init(size_t n_alloc, bool preallocate);
841+
\end{alltt}
842+
843+
The flag \texttt{preallocate} controls whether the internal buffers --
844+
\texttt{n\_alloc} buffers of size \texttt{MP\_WARRAY} -- will be allocated when
845+
\texttt{mp\_warray\_init()} is called, or whether they will be allocated when required.
846+
847+
To free the internally allocated memory the following function shall be called.
848+
849+
\index{mp\_warray\_free}
850+
\begin{alltt}
851+
int mp_warray_free(void);
852+
\end{alltt}
853+
854+
855+
Those two API functions are always available, even if the \texttt{MP\_SMALL\_STACK\_SIZE} option
856+
has been disabled at compile time.
857+
In that case \texttt{mp\_warray\_init()} will return \texttt{MP\_ERR} and \texttt{mp\_warray\_free()}
858+
will return $-1$.
859+
860+
816861
\chapter{Basic Operations}
817862
\section{Copying}
818863

helper.pl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -394,7 +394,7 @@ sub update_dep
394394
foreach my $filename (glob '*mp_*.c') {
395395
my $content;
396396
my $cc = $ENV{'CC'} || 'gcc';
397-
$content = `$cc -E -x c -DLTM_ALL $filename`;
397+
$content = `$cc -E -x c -DLTM_ALL -DMP_SMALL_STACK_SIZE $filename`;
398398
$content =~ s/^# 1 "$filename".*?^# 2 "$filename"//ms;
399399

400400
# convert filename to upper case so we can use it as a define

libtommath_VS2008.vcproj

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -792,6 +792,10 @@
792792
RelativePath="mp_unpack.c"
793793
>
794794
</File>
795+
<File
796+
RelativePath="mp_warray_free.c"
797+
>
798+
</File>
795799
<File
796800
RelativePath="mp_xor.c"
797801
>
@@ -928,6 +932,18 @@
928932
RelativePath="s_mp_sub.c"
929933
>
930934
</File>
935+
<File
936+
RelativePath="s_mp_warray.c"
937+
>
938+
</File>
939+
<File
940+
RelativePath="s_mp_warray_get.c"
941+
>
942+
</File>
943+
<File
944+
RelativePath="s_mp_warray_put.c"
945+
>
946+
</File>
931947
<File
932948
RelativePath="s_mp_zero_buf.c"
933949
>

makefile

Lines changed: 12 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -43,13 +43,14 @@ mp_reduce_2k_l.o mp_reduce_2k_setup.o mp_reduce_2k_setup_l.o mp_reduce_is_2k.o m
4343
mp_reduce_setup.o mp_root_n.o mp_rshd.o mp_sbin_size.o mp_set.o mp_set_double.o mp_set_i32.o mp_set_i64.o \
4444
mp_set_l.o mp_set_u32.o mp_set_u64.o mp_set_ul.o mp_shrink.o mp_signed_rsh.o mp_sqrmod.o mp_sqrt.o \
4545
mp_sqrtmod_prime.o mp_sub.o mp_sub_d.o mp_submod.o mp_to_radix.o mp_to_sbin.o mp_to_ubin.o mp_ubin_size.o \
46-
mp_unpack.o mp_xor.o mp_zero.o s_mp_add.o s_mp_copy_digs.o s_mp_div_3.o s_mp_div_recursive.o \
47-
s_mp_div_school.o s_mp_div_small.o s_mp_exptmod.o s_mp_exptmod_fast.o s_mp_fp_log.o s_mp_fp_log_d.o \
48-
s_mp_get_bit.o s_mp_invmod.o s_mp_invmod_odd.o s_mp_log_2expt.o s_mp_montgomery_reduce_comba.o s_mp_mul.o \
49-
s_mp_mul_balance.o s_mp_mul_comba.o s_mp_mul_high.o s_mp_mul_high_comba.o s_mp_mul_karatsuba.o \
50-
s_mp_mul_toom.o s_mp_prime_is_divisible.o s_mp_prime_tab.o s_mp_radix_map.o \
51-
s_mp_radix_size_overestimate.o s_mp_rand_platform.o s_mp_sqr.o s_mp_sqr_comba.o s_mp_sqr_karatsuba.o \
52-
s_mp_sqr_toom.o s_mp_sub.o s_mp_zero_buf.o s_mp_zero_digs.o
46+
mp_unpack.o mp_warray_free.o mp_xor.o mp_zero.o s_mp_add.o s_mp_copy_digs.o s_mp_div_3.o \
47+
s_mp_div_recursive.o s_mp_div_school.o s_mp_div_small.o s_mp_exptmod.o s_mp_exptmod_fast.o s_mp_fp_log.o \
48+
s_mp_fp_log_d.o s_mp_get_bit.o s_mp_invmod.o s_mp_invmod_odd.o s_mp_log_2expt.o \
49+
s_mp_montgomery_reduce_comba.o s_mp_mul.o s_mp_mul_balance.o s_mp_mul_comba.o s_mp_mul_high.o \
50+
s_mp_mul_high_comba.o s_mp_mul_karatsuba.o s_mp_mul_toom.o s_mp_prime_is_divisible.o s_mp_prime_tab.o \
51+
s_mp_radix_map.o s_mp_radix_size_overestimate.o s_mp_rand_platform.o s_mp_sqr.o s_mp_sqr_comba.o \
52+
s_mp_sqr_karatsuba.o s_mp_sqr_toom.o s_mp_sub.o s_mp_warray.o s_mp_warray_get.o s_mp_warray_put.o \
53+
s_mp_zero_buf.o s_mp_zero_digs.o
5354

5455
#END_INS
5556

@@ -172,9 +173,10 @@ c89:
172173
-e 's/UINT32_MAX/0xFFFFFFFFu/g' \
173174
-e 's/UINT64_MAX/(mp_u64)-1/g' \
174175
-e 's/INT32_MAX/0x7FFFFFFF/g' \
175-
-e 's/INT32_MIN/(-2147483647-1)/g' \
176+
-e 's/INT32_MIN/(-2147483647-1)/g' \
176177
-e 's/INT64_MAX/(mp_i64)(((mp_u64)1<<63)-1)/g' \
177178
-e 's/INT64_MIN/(mp_i64)((mp_u64)1<<63)/g' \
179+
-e 's/uintptr_t/mp_uintptr/g' \
178180
-e 's/SIZE_MAX/((size_t)-1)/g' \
179181
-e 's/\(PRI[ioux]64\)/MP_\1/g' \
180182
-e 's/uint\([0-9][0-9]*\)_t/mp_u\1/g' \
@@ -195,10 +197,11 @@ c99:
195197
-e 's/false_/MP_NO_/g' \
196198
-e 's/0xFFFFFFFFu/UINT32_MAX/g' \
197199
-e 's/(mp_u64)-1/UINT64_MAX/g' \
198-
-e 's/(-2147483647-1)/INT32_MIN/g' \
200+
-e 's/(-2147483647-1)/INT32_MIN/g' \
199201
-e 's/0x7FFFFFFF/INT32_MAX/g' \
200202
-e 's/(mp_i64)((mp_u64)1<<63)/INT64_MIN/g' \
201203
-e 's/(mp_i64)(((mp_u64)1<<63)-1)/INT64_MAX/g' \
204+
-e 's/mp_uintptr/uintptr_t/g' \
202205
-e 's/((size_t)-1)/SIZE_MAX/g' \
203206
-e 's/MP_\(PRI[ioux]64\)/\1/g' \
204207
-e 's/mp_u\([0-9][0-9]*\)/uint\1_t/g' \

0 commit comments

Comments
 (0)