Skip to content

Commit

Permalink
Merge pull request wolfSSL#8540 from douzzer/20250307-misc-xorbuf-opt…
Browse files Browse the repository at this point in the history
…imizer

20250307-misc-xorbuf-optimizer
  • Loading branch information
dgarske authored Mar 8, 2025
2 parents c3f2456 + a84831c commit ad8eb76
Show file tree
Hide file tree
Showing 3 changed files with 116 additions and 63 deletions.
File renamed without changes.
7 changes: 7 additions & 0 deletions configure.ac
Original file line number Diff line number Diff line change
Expand Up @@ -10064,6 +10064,13 @@ if test "x$ENABLED_LINUXKM" = "xyes"; then
fi
fi
AS_IF([test "$ENABLED_ASM" = "no" && (test "$ENABLED_INTELASM" != "no" || \
test "$ENABLED_AESNI" != "no" || \
test "$ENABLED_ARMASM" != "no" || \
test "$ENABLED_RISCV_ASM" != "no" || \
test "$ENABLED_SP_ASM" != "no")],
[AC_MSG_WARN([Conflicting asm settings.])])
# The following AM_CONDITIONAL statements set flags for use in the Makefiles.
# Some of these affect build targets and objects, some trigger different
# test scripts for make check.
Expand Down
172 changes: 109 additions & 63 deletions wolfcrypt/src/misc.c
Original file line number Diff line number Diff line change
Expand Up @@ -407,14 +407,18 @@ WC_MISC_STATIC WC_INLINE void ByteReverseWords64(word64* out, const word64* in,
#endif /* WORD64_AVAILABLE && !WOLFSSL_NO_WORD64_OPS */

#ifndef WOLFSSL_NO_XOR_OPS

/* Leave no doubt that WOLFSSL_WORD_SIZE is a power of 2. */
wc_static_assert((WOLFSSL_WORD_SIZE & (WOLFSSL_WORD_SIZE - 1)) == 0);

/* This routine performs a bitwise XOR operation of <*r> and <*a> for <n> number
of wolfssl_words, placing the result in <*r>. */
WC_MISC_STATIC WC_INLINE void XorWordsOut(wolfssl_word** r,
const wolfssl_word** a, const wolfssl_word** b, word32 n)
{
word32 i;
const wolfssl_word *e = *a + n;

for (i = 0; i < n; i++)
while (*a < e)
*((*r)++) = *((*a)++) ^ *((*b)++);
}

Expand All @@ -424,58 +428,78 @@ counts, placing the result in <*buf>. */
WC_MISC_STATIC WC_INLINE void xorbufout(void* out, const void* buf,
const void* mask, word32 count)
{
word32 i;
byte* o;
const byte* b;
const byte* m;

o = (byte*)out;
b = (const byte*)buf;
m = (const byte*)mask;


if (((wc_ptr_t)o) % WOLFSSL_WORD_SIZE ==
((wc_ptr_t)b) % WOLFSSL_WORD_SIZE &&
((wc_ptr_t)b) % WOLFSSL_WORD_SIZE ==
((wc_ptr_t)m) % WOLFSSL_WORD_SIZE) {
/* type-punning helpers */
union {
byte* bp;
wolfssl_word* wp;
} tpo;
union {
const byte* bp;
const wolfssl_word* wp;
} tpb, tpm;
/* Alignment checks out. Possible to XOR words. */
/* Move alignment so that it lines up with a
* WOLFSSL_WORD_SIZE boundary */
while (((wc_ptr_t)b) % WOLFSSL_WORD_SIZE != 0 && count > 0) {
*(o++) = (byte)(*(b++) ^ *(m++));
byte* o = (byte*)out;
const byte* b = (const byte*)buf;
const byte* m = (const byte*)mask;

/* type-punning helpers */
union {
byte* bp;
wolfssl_word* wp;
} tpo;
union {
const byte* bp;
const wolfssl_word* wp;
} tpb, tpm;

if (((((wc_ptr_t)o) & (WOLFSSL_WORD_SIZE - 1)) == 0) &&
((((wc_ptr_t)b) & (WOLFSSL_WORD_SIZE - 1)) == 0) &&
((((wc_ptr_t)m) & (WOLFSSL_WORD_SIZE - 1)) == 0))
{
/* All buffers are already aligned. Possible to XOR by words without
* fixup.
*/

tpo.bp = o;
tpb.bp = b;
tpm.bp = m;
XorWordsOut(&tpo.wp, &tpb.wp, &tpm.wp, count >> WOLFSSL_WORD_SIZE_LOG2);
o = tpo.bp;
b = tpb.bp;
m = tpm.bp;
count &= (WOLFSSL_WORD_SIZE - 1);
}
else if ((((wc_ptr_t)o) & (WOLFSSL_WORD_SIZE - 1)) ==
(((wc_ptr_t)b) & (WOLFSSL_WORD_SIZE - 1)) &&
(((wc_ptr_t)b) & (WOLFSSL_WORD_SIZE - 1)) ==
(((wc_ptr_t)m) & (WOLFSSL_WORD_SIZE - 1)))
{
/* Alignment can be fixed up to allow XOR by words. */

/* Perform bytewise xor until pointers are aligned to
* WOLFSSL_WORD_SIZE.
*/
while ((((wc_ptr_t)b & (WOLFSSL_WORD_SIZE - 1)) != 0) && (count > 0))
{
*o++ = (byte)(*b++ ^ *m++);
count--;
}

tpo.bp = o;
tpb.bp = b;
tpm.bp = m;
XorWordsOut( &tpo.wp, &tpb.wp, &tpm.wp, count / WOLFSSL_WORD_SIZE);
XorWordsOut(&tpo.wp, &tpb.wp, &tpm.wp, count >> WOLFSSL_WORD_SIZE_LOG2);
o = tpo.bp;
b = tpb.bp;
m = tpm.bp;
count %= WOLFSSL_WORD_SIZE;
count &= (WOLFSSL_WORD_SIZE - 1);
}

while (count > 0) {
*o++ = (byte)(*b++ ^ *m++);
count--;
}

for (i = 0; i < count; i++)
o[i] = (byte)(b[i] ^ m[i]);
}

/* This routine performs a bitwise XOR operation of <*r> and <*a> for <n> number
of wolfssl_words, placing the result in <*r>. */
WC_MISC_STATIC WC_INLINE void XorWords(wolfssl_word** r, const wolfssl_word** a,
word32 n)
{
word32 i;
const wolfssl_word *e = *a + n;

for (i = 0; i < n; i++)
while (*a < e)
*((*r)++) ^= *((*a)++);
}

Expand All @@ -484,36 +508,55 @@ counts, placing the result in <*buf>. */

WC_MISC_STATIC WC_INLINE void xorbuf(void* buf, const void* mask, word32 count)
{
/* Leave no doubt that WOLFSSL_WORD_SIZE is a power of 2. */
wc_static_assert((WOLFSSL_WORD_SIZE & (WOLFSSL_WORD_SIZE - 1)) == 0);

word32 i;
byte* b;
const byte* m;

b = (byte*)buf;
m = (const byte*)mask;
byte* b = (byte*)buf;
const byte* m = (const byte*)mask;

/* type-punning helpers */
union {
byte* bp;
wolfssl_word* wp;
} tpb;
union {
const byte* bp;
const wolfssl_word* wp;
} tpm;

if ((((wc_ptr_t)buf & (WOLFSSL_WORD_SIZE - 1)) == 0) &&
(((wc_ptr_t)mask & (WOLFSSL_WORD_SIZE - 1)) == 0))
{
/* Both buffers are already aligned. Possible to XOR by words without
* fixup.
*/

if ((((wc_ptr_t)b) & (WOLFSSL_WORD_SIZE - 1)) ==
(((wc_ptr_t)m) & (WOLFSSL_WORD_SIZE - 1)))
tpb.bp = b;
tpm.bp = m;
/* Work around false positives from linuxkm CONFIG_FORTIFY_SOURCE. */
#if defined(WOLFSSL_LINUXKM) && defined(CONFIG_FORTIFY_SOURCE)
PRAGMA_GCC_DIAG_PUSH;
PRAGMA_GCC("GCC diagnostic ignored \"-Wmaybe-uninitialized\"")
#endif
XorWords(&tpb.wp, &tpm.wp, count >> WOLFSSL_WORD_SIZE_LOG2);
#if defined(WOLFSSL_LINUXKM) && defined(CONFIG_FORTIFY_SOURCE)
PRAGMA_GCC_DIAG_POP;
#endif
b = tpb.bp;
m = tpm.bp;
count &= (WOLFSSL_WORD_SIZE - 1);
}
else if (((wc_ptr_t)buf & (WOLFSSL_WORD_SIZE - 1)) ==
((wc_ptr_t)mask & (WOLFSSL_WORD_SIZE - 1)))
{
/* type-punning helpers */
union {
byte* bp;
wolfssl_word* wp;
} tpb;
union {
const byte* bp;
const wolfssl_word* wp;
} tpm;
/* Alignment checks out. Possible to XOR words. */
/* Move alignment so that it lines up with a
* WOLFSSL_WORD_SIZE boundary */
while ((((wc_ptr_t)b) & (WOLFSSL_WORD_SIZE - 1)) != 0 && count > 0)
/* Alignment can be fixed up to allow XOR by words. */

/* Perform bytewise xor until pointers are aligned to
* WOLFSSL_WORD_SIZE.
*/
while ((((wc_ptr_t)b & (WOLFSSL_WORD_SIZE - 1)) != 0) && (count > 0))
{
*(b++) ^= *(m++);
count--;
}

tpb.bp = b;
tpm.bp = m;
/* Work around false positives from linuxkm CONFIG_FORTIFY_SOURCE. */
Expand All @@ -530,10 +573,13 @@ WC_MISC_STATIC WC_INLINE void xorbuf(void* buf, const void* mask, word32 count)
count &= (WOLFSSL_WORD_SIZE - 1);
}

for (i = 0; i < count; i++)
b[i] ^= m[i];
while (count > 0) {
*b++ ^= *m++;
count--;
}
}
#endif

#endif /* !WOLFSSL_NO_XOR_OPS */

#ifndef WOLFSSL_NO_FORCE_ZERO
/* This routine fills the first len bytes of the memory area pointed by mem
Expand Down

0 comments on commit ad8eb76

Please sign in to comment.