Skip to content

Commit

Permalink
lib: remove the "packed struct" approach to unaligned memory access
Browse files Browse the repository at this point in the history
gcc 10 is miscompiling libdeflate on x86_64 at -O3 due to a regression
in how packed structs are handled
(https://gcc.gnu.org/bugzilla/show_bug.cgi?id=94994).

Work around this by just always using memcpy() for unaligned accesses.
It's unclear that the "packed struct" approach is worthwhile to maintain
anymore.  Currently I'm only aware that it's useful with old gcc's on
arm32.  Hopefully, compilers are good enough now that we can simply use
memcpy() everywhere.

Update #64
  • Loading branch information
ebiggers committed May 9, 2020
1 parent 14be043 commit 0f5238f
Show file tree
Hide file tree
Showing 3 changed files with 31 additions and 57 deletions.
33 changes: 0 additions & 33 deletions common/common_defs.h
Original file line number Diff line number Diff line change
Expand Up @@ -244,39 +244,6 @@ static forceinline u64 bswap64(u64 n)
# define UNALIGNED_ACCESS_IS_FAST 0
#endif

/*
* DEFINE_UNALIGNED_TYPE(type) - a macro that, given an integer type 'type',
* defines load_type_unaligned(addr) and store_type_unaligned(v, addr) functions
* which load and store variables of type 'type' from/to unaligned memory
* addresses. If not defined, a fallback is used.
*/
#ifndef DEFINE_UNALIGNED_TYPE

/*
* Although memcpy() may seem inefficient, it *usually* gets optimized
* appropriately by modern compilers. It's portable and may be the best we can
* do for a fallback...
*/
#include <string.h>

#define DEFINE_UNALIGNED_TYPE(type) \
\
static forceinline type \
load_##type##_unaligned(const void *p) \
{ \
type v; \
memcpy(&v, p, sizeof(v)); \
return v; \
} \
\
static forceinline void \
store_##type##_unaligned(type v, void *p) \
{ \
memcpy(p, &v, sizeof(v)); \
}

#endif /* !DEFINE_UNALIGNED_TYPE */

/* ========================================================================== */
/* Bit scan functions */
/* ========================================================================== */
Expand Down
19 changes: 0 additions & 19 deletions common/compiler_gcc.h
Original file line number Diff line number Diff line change
Expand Up @@ -134,25 +134,6 @@
# define UNALIGNED_ACCESS_IS_FAST 1
#endif

/* With gcc, we can access unaligned memory through 'packed' structures. */
#define DEFINE_UNALIGNED_TYPE(type) \
\
struct type##unaligned { \
type v; \
} __attribute__((packed)); \
\
static forceinline type \
load_##type##_unaligned(const void *p) \
{ \
return ((const struct type##unaligned *)p)->v; \
} \
\
static forceinline void \
store_##type##_unaligned(type v, void *p) \
{ \
((struct type##unaligned *)p)->v = v; \
}

#define bsr32(n) (31 - __builtin_clz(n))
#define bsr64(n) (63 - __builtin_clzll(n))
#define bsf32(n) __builtin_ctz(n)
Expand Down
36 changes: 31 additions & 5 deletions lib/unaligned.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,39 @@

#include "lib_common.h"

/***** Unaligned loads and stores without endianness conversion *****/

/*
* Naming note:
* memcpy() is portable, and it usually gets optimized appropriately by modern
* compilers. I.e., each memcpy() of 1, 2, 4, or WORDBYTES bytes gets compiled
* to a load or store instruction, not to an actual function call.
*
* We no longer use the "packed struct" approach, as that is nonstandard, has
* unclear semantics, and doesn't receive enough testing
* (see https://gcc.gnu.org/bugzilla/show_bug.cgi?id=94994).
*
* {load,store}_*_unaligned() deal with raw bytes without endianness conversion.
* {get,put}_unaligned_*() deal with a specific endianness.
* arm32 with __ARM_FEATURE_UNALIGNED in gcc 5 and earlier is a known exception
* where memcpy() generates inefficient code
* (https://gcc.gnu.org/bugzilla/show_bug.cgi?id=67366). However, we no longer
* consider that one case important enough to maintain different code for.
* If you run into it, please just use a newer version of gcc (or use clang).
*/

#define DEFINE_UNALIGNED_TYPE(type) \
static forceinline type \
load_##type##_unaligned(const void *p) \
{ \
type v; \
memcpy(&v, p, sizeof(v)); \
return v; \
} \
\
static forceinline void \
store_##type##_unaligned(type v, void *p) \
{ \
memcpy(p, &v, sizeof(v)); \
}

DEFINE_UNALIGNED_TYPE(u16)
DEFINE_UNALIGNED_TYPE(u32)
DEFINE_UNALIGNED_TYPE(u64)
Expand All @@ -22,7 +48,7 @@ DEFINE_UNALIGNED_TYPE(machine_word_t)
#define load_word_unaligned load_machine_word_t_unaligned
#define store_word_unaligned store_machine_word_t_unaligned

/***** Unaligned loads *****/
/***** Unaligned loads with endianness conversion *****/

static forceinline u16
get_unaligned_le16(const u8 *p)
Expand Down Expand Up @@ -84,7 +110,7 @@ get_unaligned_leword(const u8 *p)
return get_unaligned_le64(p);
}

/***** Unaligned stores *****/
/***** Unaligned stores with endianness conversion *****/

static forceinline void
put_unaligned_le16(u16 v, u8 *p)
Expand Down

0 comments on commit 0f5238f

Please sign in to comment.