Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

dynamic: x86_64: Support runtime dynamic patching #1746

Open
wants to merge 9 commits into
base: master
Choose a base branch
from
131 changes: 131 additions & 0 deletions arch/x86_64/mcount-dynamic.c
Original file line number Diff line number Diff line change
@@ -1,7 +1,15 @@
#include <errno.h>
#include <string.h>
#include <sys/mman.h>
#include <unistd.h>

#if HAVE_MEMBARRIER
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We usually use #ifdef.

#include <linux/membarrier.h>
#else
#include <cpuid.h>
#include <semaphore.h>
#endif

/* This should be defined before #include "utils.h" */
#define PR_FMT "dynamic"
#define PR_DOMAIN DBG_DYNAMIC
Expand Down Expand Up @@ -121,6 +129,120 @@ static int configure_sigtrap_handler(void)
return 0;
}

#if HAVE_MEMBARRIER

/**
* setup_synchronization_mechanism - register intent to use the 'private
* expedited sync core' membarrier to synchronize instruction pipelines and
* caches across cores, for safe cross-modification.
* @return - negative on error, 0 on success
*/
static int setup_synchronization_mechanism(void)
{
int ret =
syscall(__NR_membarrier, MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_SYNC_CORE, 0, 0);
if (ret < 0)
pr_dbg2("failed to register membarrier intent: %s\n", strerror(errno));
return ret;
}

/**
* synchronize_all_cores - use membarrier to perform cache and pipeline
* synchronization across cores executing cross-modified code
* @return - negative on error, 0 on success
*/
static int synchronize_all_cores(void)
{
int ret = syscall(__NR_membarrier, MEMBARRIER_CMD_PRIVATE_EXPEDITED_SYNC_CORE, 0, 0);
if (ret < 0)
pr_dbg2("failed to use membarrier: %s\n", strerror(errno));
return ret;
}

#else /* HAVE_MEMBARRIER */

/* signal used to perform cache and pipeline synchronization across cores */
static int sig_sync_cores;

/* counter for the threads that have performed serialization when a signal is
issued */
static sem_t sem_sync_cores;

/**
* serialize_instruction_execution - execute core serialize instruction
*
* According to Intel manual, CPUID is a serializing instruction.
*/
static void serialize_instruction_execution(int signum, siginfo_t *info, void *arg)
{
int _;
__cpuid(_, _, _, _, _);
sem_post(&sem_sync_cores);
}

/**
* setup_synchronization_mechanism - setup real-time signal and its handler to
* perform core synchronization across all threads
* @return - 0 on success, -1 on failure
*/
static int setup_synchronization_mechanism(void)
{
struct sigaction act;

if (sig_sync_cores > 0)
return 0;

sig_sync_cores = find_unused_sigrt();
if (sig_sync_cores == -1)
return -1;

sem_init(&sem_sync_cores, 0, 0);

act.sa_sigaction = serialize_instruction_execution;
act.sa_flags = 0;

if (sigaction(sig_sync_cores, &act, NULL) < 0) {
pr_dbg("failed to configure core synchronization signal handler\n");
return -1;
}

pr_dbg("configured core synchronization signal (SIGRT%d) handler\n", sig_sync_cores);
return 0;
}

/**
* serialize_instruction_cache - send RT signals to perform cache and pipeline
* synchronization across cores executing cross-modified code.
* @return - -1 on error, 0 on success
*/
static int synchronize_all_cores(void)
{
int signal_count;
int sync_count = 0;
struct timespec ts;

ASSERT(sig_sync_cores >= SIGRTMIN);

signal_count = thread_broadcast_signal(sig_sync_cores);

if (clock_gettime(CLOCK_REALTIME, &ts) == -1)
return -1;
ts.tv_sec += 1;

for (int i = 0; i < signal_count; i++) {
if (sem_timedwait(&sem_sync_cores, &ts) == -1) {
if (errno == EINTR)
i--;
}
else
sync_count++;
}
pr_dbg3("synced core in %d/%d thread(s)\n", sync_count, signal_count);

return 0;
}

#endif /* HAVE_MEMBARRIER */
/**
* mcount_arch_dynamic_init - initialize arch-specific data structures to
* perform runtime dynamic instrumentation
Expand All @@ -137,8 +259,12 @@ int mcount_arch_dynamic_init(void)
if (configure_sigtrap_handler() < 0)
return -1;

if (setup_synchronization_mechanism() < 0)
return -1;

return 0;
}
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

A blank line please,


int mcount_setup_trampoline(struct mcount_dynamic_info *mdi)
{
unsigned char trampoline[] = { 0x3e, 0xff, 0x25, 0x01, 0x00, 0x00, 0x00, 0xcc };
Expand Down Expand Up @@ -549,12 +675,17 @@ static int patch_code(struct mcount_dynamic_info *mdi, struct mcount_disasm_info
* 0xb: <other instructions>
*
* The trap will emulate a call to the trampoline while in place.
*
* We ensure that every core sees the trap before patching the critical
* zone, by synchronizing the them.
*/

if (register_trap(origin_code_addr, (void *)mdi->trampoline) == -1)
return INSTRUMENT_FAILED;
((uint8_t *)origin_code_addr)[0] = 0xcc;

synchronize_all_cores();
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hmm.. are you calling it for each function? If so, I'm afraid it'd slow down the whole process.


/*
* We fill the remaining part of the patching region with nops.
*
Expand Down