diff --git a/GNUmakefile.os4 b/GNUmakefile.os4 index b5686f1b..1239a0a7 100644 --- a/GNUmakefile.os4 +++ b/GNUmakefile.os4 @@ -63,7 +63,7 @@ WARNINGS := \ -Wundef -Wmissing-declarations -Wunused -Wwrite-strings -Wno-unused-value -Wno-comment -Wno-missing-braces \ -Wno-deprecated-declarations -Wno-sign-compare -Wno-unused-variable -Wno-parentheses -Wno-missing-prototypes \ -Wstrict-aliasing -Wno-shadow -Wno-discarded-qualifiers -Wno-unused-function -Wno-unused-parameter -Wno-strict-aliasing \ - -Wno-type-limits -Wno-cast-function-type -Werror # -Wbad-function-cast -Wconversion -Wformat + -Wno-type-limits -Wno-cast-function-type -Wno-frame-address -Werror # -Wbad-function-cast -Wconversion -Wformat PIC := -fPIC -DPIC INCLUDES := -I$(LIB_DIR)/include \ @@ -149,7 +149,6 @@ include libc.gmk include libm.gmk include libamiga.gmk include libdebug.gmk -include libprofile.gmk include libpthread.gmk include libcrypt.gmk include librt.gmk @@ -200,13 +199,11 @@ clean: # Update the version numbers bound to the individual libraries version: $(COPY) c.lib_rev.rev amiga/amiga.lib_rev.rev - $(COPY) c.lib_rev.rev profile/profile.lib_rev.rev $(COPY) c.lib_rev.rev math/m.lib_rev.rev $(COPY) c.lib_rev.rev math/crypt.lib_rev.rev $(COPY) c.lib_rev.rev math/pthread.lib_rev.rev bumprev amiga.lib bumprev c.lib - bumprev profile.lib bumprev m.lib bumprev crypt.lib bumprev pthread.lib diff --git a/README.md b/README.md index 44d72768..81e522dd 100755 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ - C runtime library for AmigaOS4 + runtime library for AmigaOS4 [![Build Status](https://travis-ci.com/afxgroup/clib2.svg?branch=master)](https://travis-ci.org/afxgroup/clib2) [![License](https://img.shields.io/badge/License-BSD%203--Clause-blue.svg)](https://opensource.org/licenses/BSD-3-Clause) diff --git a/libc.gmk b/libc.gmk index 7f58b1d2..f9dad9da 100755 --- a/libc.gmk +++ b/libc.gmk @@ -954,6 +954,10 @@ C_LIBRARY := \ C_LIB := \ c.lib_rev.o \ + profile/_mcount.o \ + profile/profil.o \ + profile/gmon.o \ + profile/mcount.o \ shared_library/stubs.o \ unistd/getopt.o \ unistd/getopt_long.o @@ -986,6 +990,11 @@ $(OUT_SHARED)/%.o : $(LIB_DIR)/%.c $(VERBOSE)$(COMPILE_SHARED) endif +$(OUT_STATIC)/profile/%.o : $(LIB_DIR)/profile/%.S + $(VERBOSE)$(COMPILE_REG) +$(OUT_SHARED)/profile/%.o : $(LIB_DIR)/profile/%.S + $(VERBOSE)$(COMPILE_REG) + $(OUTPUT_LIB)/libc.a : $(SOURCES_STATIC) $(VERBOSE)@$(MAKELIB) $(OUTPUT_LIB)/libc.so : $(SOURCES_SHARED) diff --git a/libprofile.gmk b/libprofile.gmk deleted file mode 100755 index 53bf0cb8..00000000 --- a/libprofile.gmk +++ /dev/null @@ -1,57 +0,0 @@ -# -# $Id: libprofile.gmk,v 1.1 2006-09-17 17:37:27 clib4devs Exp $ -# - -OUT_STATIC := $(BUILD_DIR)/obj/libprofile -OUT_SHARED := $(BUILD_DIR)/obj.shared/libprofile - -ifeq ($(SHARED),yes) - LIBS += $(OUTPUT_LIB)/libprofile.so -endif -ifeq ($(STATIC),yes) - LIBS += $(OUTPUT_LIB)/libprofile.a -endif - -PROFILE_LIB = \ - profile/_mcount.o \ - profile/profil.o \ - profile/gmon.o \ - profile/mcount.o - -SOURCES_SHARED = $(addprefix $(OUT_SHARED)/, $(PROFILE_LIB)) -SOURCES_STATIC = $(addprefix $(OUT_STATIC)/, $(PROFILE_LIB)) - -# Dependencies to rebuild if the library version changes - -$(OUT_STATIC)/profile.lib_rev.o : $(LIB_DIR)/profile/profile.lib_rev.c $(LIB_DIR)/profile/profile.lib_rev.h -$(OUT_SHARED)/profile.lib_rev.o : $(LIB_DIR)/profile/profile.lib_rev.c $(LIB_DIR)/profile/profile.lib_rev.h - -$(OUT_STATIC)/%.o : AFLAGS += $(LARGEDATA) -$(OUT_SHARED)/%.o : AFLAGS += $(PIC) $(LARGEDATA) - -$(OUT_STATIC)/%.o : $(LIB_DIR)/%.S - $(VERBOSE)$(ASSEMBLE) - -$(OUT_SHARED)/%.o : $(LIB_DIR)/%.S - $(VERBOSE)$(ASSEMBLE) - -$(OUT_STATIC)/%.o : CFLAGS += $(LARGEDATA) -$(OUT_SHARED)/%.o : CFLAGS += $(PIC) $(LARGEDATA) - -ifdef SPE -$(OUT_STATIC)/%.o : $(LIB_DIR)/%.c - $(VERBOSE)$(COMPILE_SPE) -$(OUT_SHARED)/%.o : $(LIB_DIR)/%.c - $(VERBOSE)$(COMPILE_SHARED_SPE) -else -$(OUT_STATIC)/%.o : $(LIB_DIR)/%.c - $(VERBOSE)$(COMPILE) -$(OUT_SHARED)/%.o : $(LIB_DIR)/%.c - $(VERBOSE)$(COMPILE_SHARED) -endif - -$(OUTPUT_LIB)/libprofile.a : $(SOURCES_STATIC) - $(VERBOSE)$(MAKELIB) - -$(OUTPUT_LIB)/libprofile.so : $(OUTPUT_LIB)/libc.so $(SOURCES_SHARED) - $(VERBOSE)$(MAKESHARED) diff --git a/library/cpu/generic/bcopy.S b/library/cpu/generic/bcopy.S index 98f215c2..c427ffd9 100644 --- a/library/cpu/generic/bcopy.S +++ b/library/cpu/generic/bcopy.S @@ -66,7 +66,7 @@ // Main entry points. -.align 5 + .align 5 .global bcopy_g3 bcopy_g3: // void bcopy(const void *src, void *dst, size_t len) diff --git a/library/profile/_mcount.c b/library/profile/_mcount.c index 5237bfec..1ff88c57 100644 --- a/library/profile/_mcount.c +++ b/library/profile/_mcount.c @@ -1,22 +1,23 @@ /* - * $Id: profile__mcount.c,v 1.0 2022-08-06 10:36:26 clib4devs Exp $ + * $Id: profile__mcount.c,v 1.1 2023-10-20 10:36:26 clib4devs Exp $ */ -#include "profile_gmon.h" #include #include #include +#include +#include "gmon.h" void __mcount(uint32 frompc, uint32 selfpc); void __mcount(uint32 frompc, uint32 selfpc) { - uint16 *frompcindex; - struct tostruct *top, *prevtop; - struct gmonparam *p; - - int32 toindex; + register ARCINDEX *frompcindex; + register struct tostruct *top, *prevtop; + register struct gmonparam *p; + register ARCINDEX toindex; + int i; p = &_gmonparam; @@ -34,43 +35,59 @@ __mcount(uint32 frompc, uint32 selfpc) { if (frompc > p->textsize) goto done; -#if (HASHFRACTION & (HASHFRACTION - 1)) == 0 - if (p->hashfraction == HASHFRACTION) { - frompcindex = &p->froms[(size_t)(frompc / (HASHFRACTION * - sizeof(*p->froms)))]; - } else -#endif - { - frompcindex = &p->froms[(size_t)(frompc / (p->hashfraction * - sizeof(*p->froms)))]; + /* The following test used to be + if (p->log_hashfraction >= 0) + But we can simplify this if we assume the profiling data + is always initialized by the functions in gmon.c. But + then it is possible to avoid a runtime check and use the + same `if' as in gmon.c. So keep these tests in sync. + */ + if ((HASHFRACTION & (HASHFRACTION - 1)) == 0) { + /* avoid integer divide if possible: */ + i = frompc >> p->log_hashfraction; + } else { + i = frompc / (p->hashfraction * sizeof(*p->froms)); } + frompcindex = &p->froms[i]; toindex = *frompcindex; - if (toindex == 0) { - /* first time down this arc */ + /* + * first time traversing this arc + */ toindex = ++p->tos[0].link; if (toindex >= p->tolimit) - /* Ouch! Overflow */ + /* halt further profiling */ goto overflow; - *frompcindex = (uint16) toindex; + *frompcindex = toindex; top = &p->tos[toindex]; top->selfpc = selfpc; top->count = 1; top->link = 0; goto done; } - top = &p->tos[toindex]; + if (top->selfpc == selfpc) { - /* arc at front of chain */ + /* arc at front of chain; usual case. */ top->count++; goto done; } - - for (;;) { + /* + * have to go looking down chain for it. + * top points to what we are looking at, + * prevtop points to previous top. + * we know it is not at the head of the chain. + */ + for (;; ) { if (top->link == 0) { + /* + * top is end of the chain and none of the chain + * had top->selfpc == selfpc. + * so we allocate a new tostruct + * and link it to the head of the chain. + */ toindex = ++p->tos[0].link; if (toindex >= p->tolimit) goto overflow; @@ -79,17 +96,25 @@ __mcount(uint32 frompc, uint32 selfpc) { top->selfpc = selfpc; top->count = 1; top->link = *frompcindex; - *frompcindex = (uint16) toindex; + *frompcindex = toindex; goto done; } + /* + * otherwise, check the next arc on the chain. + */ prevtop = top; top = &p->tos[top->link]; if (top->selfpc == selfpc) { + /* + * there it is. + * increment its count + * move it to the head of the chain. + */ top->count++; toindex = prevtop->link; prevtop->link = top->link; top->link = *frompcindex; - *frompcindex = (uint16) toindex; + *frompcindex = toindex; goto done; } } diff --git a/library/profile/gmon.c b/library/profile/gmon.c index 9f3b8f8c..84b2d1ac 100644 --- a/library/profile/gmon.c +++ b/library/profile/gmon.c @@ -1,5 +1,5 @@ /* - * $Id: profile_gmon.c,v 1.0 2021-01-18 12:04:26 clib4devs Exp $ + * $Id: profile_gmon.c,v 1.1 2023-10-20 12:04:26 clib4devs Exp $ */ #include @@ -9,45 +9,168 @@ #include #include #include +#include +#include +#include #define SCALE_1_TO_1 0x10000L +#define MIN_OS_VERSION 52 -#include "profile_gmon.h" +#include "gmon.h" +#include "gmon_out.h" -#undef DebugPrintF -#define dprintf(format, args...) ((struct ExecIFace *)((*(struct ExecBase **)4)->MainInterface))->DebugPrintF("[%s] " format, __PRETTY_FUNCTION__, ##args) +/* Head of basic-block list or NULL. */ +struct __bb *__bb_head __attribute__ ((visibility ("hidden"))); struct gmonparam _gmonparam = { state : kGmonProfOn }; -/* Use __executable_start as the lowest address to keep profiling records - if it provided by the linker. */ -extern const char __executable_start[] __attribute__ ((visibility ("hidden"))); - static unsigned int s_scale; -void moncontrol(int); -void monstartup(uint32, uint32); -void moncleanup(void); -void mongetpcs(uint32 *lowpc, uint32 *highpc); -extern int profil(uint16 *buffer, uint32 bufSize, uint32 offset, uint32 scale); +void +write_hist(int fd) { + u_char tag = GMON_TAG_TIME_HIST; + + if (_gmonparam.kcountsize > 0) { + struct iovec iov[3] = { + { &tag, sizeof(tag) }, + { &thdr, sizeof(struct gmon_hist_hdr) }, + { _gmonparam.kcount, _gmonparam.kcountsize } + }; + + if ( + sizeof(thdr) != sizeof(struct gmon_hist_hdr) || + (offsetof(struct real_gmon_hist_hdr, low_pc) != offsetof(struct gmon_hist_hdr, low_pc)) || + (offsetof(struct real_gmon_hist_hdr, high_pc) != offsetof(struct gmon_hist_hdr, high_pc)) || + (offsetof(struct real_gmon_hist_hdr, hist_size) != offsetof(struct gmon_hist_hdr, hist_size)) || + (offsetof(struct real_gmon_hist_hdr, prof_rate) != offsetof(struct gmon_hist_hdr, prof_rate)) || + (offsetof(struct real_gmon_hist_hdr, dimen) != offsetof(struct gmon_hist_hdr, dimen)) || + (offsetof(struct real_gmon_hist_hdr, dimen_abbrev) != offsetof(struct gmon_hist_hdr, dimen_abbrev)) + ) + return; + + thdr.low_pc = (char *) _gmonparam.text_start; + thdr.high_pc = (char *) _gmonparam.text_start + _gmonparam.highpc - _gmonparam.lowpc; + thdr.hist_size = _gmonparam.kcountsize / sizeof(HISTCOUNTER); + dprintf("thdr.low_pc = %x - thdr.high_pc = %x\n", thdr.low_pc, thdr.high_pc); + thdr.prof_rate = 100; + strncpy(thdr.dimen, "seconds", sizeof(thdr.dimen)); + thdr.dimen_abbrev = 's'; + + writev(fd, iov, 3); + } +} + +void +write_call_graph(int fd) { + u_char tag = GMON_TAG_CG_ARC; + struct gmon_cg_arc_record raw_arc[NARCS_PER_WRITEV] __attribute__((aligned(__alignof__(char *)))); + ARCINDEX from_index, to_index; + u_long from_len; + u_long frompc; + struct iovec iov[2 * NARCS_PER_WRITEV]; + int nfilled; + + for (nfilled = 0; nfilled < NARCS_PER_WRITEV; ++nfilled) { + iov[2 * nfilled].iov_base = &tag; + iov[2 * nfilled].iov_len = sizeof(tag); + + iov[2 * nfilled + 1].iov_base = &raw_arc[nfilled]; + iov[2 * nfilled + 1].iov_len = sizeof(struct gmon_cg_arc_record); + } + + nfilled = 0; + from_len = _gmonparam.fromssize / sizeof(*_gmonparam.froms); + for (from_index = 0; from_index < from_len; ++from_index) { + if (_gmonparam.froms[from_index] == 0) + continue; + + frompc = _gmonparam.text_start; + frompc += from_index * _gmonparam.hashfraction * sizeof(*_gmonparam.froms); + for (to_index = _gmonparam.froms[from_index]; + to_index != 0; + to_index = _gmonparam.tos[to_index].link) { + struct arc { + char *frompc; + char *selfpc; + int32_t count; + } arc; + + arc.frompc = (char *) frompc; + arc.selfpc = (char *) _gmonparam.text_start + _gmonparam.tos[to_index].selfpc; + arc.count = _gmonparam.tos[to_index].count; + dprintf("arc.frompc = %p - arc.selfpc = %p\n", arc.frompc, arc.selfpc); + memcpy(raw_arc + nfilled, &arc, sizeof(raw_arc[0])); + + if (++nfilled == NARCS_PER_WRITEV) { + writev(fd, iov, 2 * nfilled); + nfilled = 0; + } + } + } + if (nfilled > 0) + writev(fd, iov, 2 * nfilled); +} + +void +write_bb_counts(int fd) { + struct __bb *grp; + u_char tag = GMON_TAG_BB_COUNT; + size_t ncounts; + size_t i; + + struct iovec bbhead[2] = { + {&tag, sizeof(tag)}, + {&ncounts, sizeof(ncounts)} + }; + struct iovec bbbody[8]; + size_t nfilled; + + for (i = 0; i < (sizeof(bbbody) / sizeof(bbbody[0])); i += 2) { + bbbody[i].iov_len = sizeof(grp->addresses[0]); + bbbody[i + 1].iov_len = sizeof(grp->counts[0]); + } + + /* Write each group of basic-block info (all basic-blocks in a + compilation unit form a single group). */ + + for (grp = __bb_head; grp; grp = grp->next) { + ncounts = grp->ncounts; + writev(fd, bbhead, 2); + for (nfilled = i = 0; i < ncounts; ++i) { + if (nfilled > (sizeof(bbbody) / sizeof(bbbody[0])) - 2) { + writev(fd, bbbody, nfilled); + nfilled = 0; + } + + bbbody[nfilled++].iov_base = (char *)&grp->addresses[i]; + bbbody[nfilled++].iov_base = &grp->counts[i]; + } + if (nfilled > 0) + writev(fd, bbbody, nfilled); + } +} void monstartup(uint32 low_pc, uint32 high_pc) { uint8 *cp; - uint32 lowpc, highpc; + uint32 lowpc, highpc, text_start; struct gmonparam *p = &_gmonparam; - dprintf("in monstartup)\n"); + + dprintf("in monstartup\n"); + /* * If we don't get proper lowpc and highpc, then * we'll try to get them from the elf handle. */ if (low_pc == 0 && high_pc == 0) { - mongetpcs(&lowpc, &highpc); + mongetpcs(&lowpc, &highpc, &text_start); } else { + p->text_start = 0x01000074; // Default to our default text segment start lowpc = low_pc; highpc = high_pc; } + p->text_start = text_start; /* * Round lowpc and highpc to multiples of the density @@ -65,12 +188,20 @@ void monstartup(uint32 low_pc, uint32 high_pc) { * every instruction is exactly one word wide and always aligned. */ p->kcountsize = p->textsize / HISTFRACTION; + p->log_hashfraction = -1; /* * The hash table size */ p->hashfraction = HASHFRACTION; - p->fromssize = p->textsize / p->hashfraction; + + /* The following test must be kept in sync with the corresponding test in _mcount.c. */ + if ((HASHFRACTION & (HASHFRACTION - 1)) == 0) { + /* if HASHFRACTION is a power of two, mcount can use shifting + instead of integer division. Precompute shift amount. */ + p->log_hashfraction = ffs(p->hashfraction * sizeof(*p->froms)) - 1; + } + p->fromssize = p->textsize / HASHFRACTION; p->tolimit = p->textsize * ARCDENSITY / 100; if (p->tolimit < MINARCS) @@ -80,7 +211,9 @@ void monstartup(uint32 low_pc, uint32 high_pc) { p->tossize = p->tolimit * sizeof(struct tostruct); - dprintf("lowpc = %p, highpc = %p\n", lowpc, highpc); + dprintf("lowpc = %p\n", lowpc); + dprintf("highpc = %p\n", highpc); + dprintf("text_start = %p\n", p->text_start); dprintf("textsize = %d\n", p->textsize); dprintf("kcountsize = %d\n", p->kcountsize); dprintf("fromssize = %d\n", p->fromssize); @@ -92,32 +225,36 @@ void monstartup(uint32 low_pc, uint32 high_pc) { return; } - p->memory = cp; p->tos = (struct tostruct *) cp; cp += p->tossize; - p->kcount = (uint16 *) cp; + p->kcount = (HISTCOUNTER *) cp; cp += p->kcountsize; - p->froms = (uint16 *) cp; + p->froms = (ARCINDEX *) cp; p->tos[0].link = 0; /* Verify granularity for sampling */ - if (p->kcountsize < p->textsize) + if (p->kcountsize < p->textsize) { /* FIXME Avoid floating point */ s_scale = ((float) p->kcountsize / p->textsize) * SCALE_1_TO_1; + } else s_scale = SCALE_1_TO_1; s_scale >>= 1; - dprintf("Enabling monitor\n"); + dprintf("Enabling monitor: Scale = %d\n", s_scale); moncontrol(1); } void moncontrol(int mode) { struct gmonparam *p = &_gmonparam; + /* Don't change the state if we ran into an error. */ + if (p->state == kGmonProfError) + return; + if (mode) { /* Start profiling. */ profil((uint16 *) p->kcount, (size_t) p->kcountsize, p->lowpc, s_scale); @@ -130,76 +267,62 @@ void moncontrol(int mode) { } void moncleanup(void) { - BPTR fd; - int fromindex; - int endfrom; - uint32 frompc; - int toindex; - struct rawarc rawarc; + int fd; struct gmonparam *p = &_gmonparam; - struct gmonhdr gmonhdr, *hdr; -#ifdef DEBUG - FILE *log; -#endif moncontrol(0); if (p->state == kGmonProfError) { fprintf(stderr, "WARNING: Overflow during profiling\n"); + goto out; } - fd = Open("gmon.out", MODE_NEWFILE); - if (!fd) { - fprintf(stderr, "ERROR: could not open gmon.out\n"); - return; - } - - hdr = (struct gmonhdr *) &gmonhdr; + if (_gmonparam.kcountsize > 0) { + fd = open("gmon.out", O_CREAT | O_TRUNC | O_WRONLY); + if (!fd) { + fprintf(stderr, "ERROR: could not open gmon.out\n"); + goto out; + } - hdr->lpc = 0; //p->lowpc; - hdr->hpc = p->highpc - p->lowpc; - hdr->ncnt = (int) p->kcountsize + sizeof(gmonhdr); - hdr->version = GMONVERSION; - hdr->profrate = 100; //FIXME:!! + /* write gmon.out header: */ + struct real_gmon_hdr { + char cookie[4]; + int32_t version; + char spare[3 * 4]; + } ghdr; + + if ( + sizeof(ghdr) != sizeof(struct gmon_hdr) || + (offsetof(struct real_gmon_hdr, cookie) != offsetof(struct gmon_hdr, cookie)) || + (offsetof(struct real_gmon_hdr, version) != offsetof(struct gmon_hdr, version))) { + goto out; + } - Write(fd, hdr, sizeof(*hdr)); - Write(fd, p->kcount, p->kcountsize); + memcpy(&ghdr.cookie[0], GMON_MAGIC, sizeof(ghdr.cookie)); + ghdr.version = GMON_VERSION; + memset(ghdr.spare, '\0', sizeof(ghdr.spare)); + write(fd, &ghdr, sizeof(struct gmon_hdr)); - endfrom = p->fromssize / sizeof(*p->froms); + /* write PC histogram: */ + write_hist(fd); -#ifdef DEBUG - log = fopen("gmon.log", "w"); -#endif + /* write call-graph: */ + write_call_graph(fd); - for (fromindex = 0; fromindex < endfrom; fromindex++) { - if (p->froms[fromindex] == 0) - continue; + /* write basic-block execution counts: */ + write_bb_counts(fd); - frompc = 0; /* FIXME: was p->lowpc; needs to be 0 and assumes -Ttext=0 on compile. Better idea? */ - frompc += fromindex * p->hashfraction * sizeof(*p->froms); - for (toindex = p->froms[fromindex]; toindex != 0; - toindex = p->tos[toindex].link) { -#ifdef DEBUG - if (log) - fprintf(log, "%p called from %p: %d times\n", frompc, - p->tos[toindex].selfpc, - p->tos[toindex].count); -#endif - rawarc.raw_frompc = frompc; - rawarc.raw_selfpc = p->tos[toindex].selfpc; - rawarc.raw_count = p->tos[toindex].count; - Write(fd, &rawarc, sizeof(rawarc)); - } + close(fd); + } +out: + if (p->tos) { + FreeVec(p->tos); + p->tos = NULL; } -#ifdef DEBUG - if (log) - fclose(log); -#endif - Close(fd); } -void mongetpcs(uint32 *lowpc, uint32 *highpc) { +void mongetpcs(uint32 *lowpc, uint32 *highpc, uint32 *text_start) { struct Library *ElfBase = NULL; struct ElfIFace *IElf = NULL; struct Process *self; @@ -224,7 +347,7 @@ void mongetpcs(uint32 *lowpc, uint32 *highpc) { seglist = GetProcSegList(self, GPSLF_CLI | GPSLF_SEG); GetSegListInfoTags(seglist, GSLI_ElfHandle, &elfHandle, TAG_DONE); - elfHandle = OpenElfTags(OET_ElfHandle, elfHandle, TAG_DONE); + elfHandle = OpenElfTags(OET_ElfHandle, elfHandle, OET_ReadOnlyCopy, TRUE, TAG_DONE); if (!elfHandle) goto out; @@ -237,6 +360,7 @@ void mongetpcs(uint32 *lowpc, uint32 *highpc) { uint32 base = (uint32) GetSectionTags(elfHandle, GST_SectionIndex, i, TAG_DONE); *lowpc = base; *highpc = base + shdr->sh_size; + *text_start = shdr->sh_addr; break; } } diff --git a/library/profile/gmon.h b/library/profile/gmon.h new file mode 100644 index 00000000..f06c048d --- /dev/null +++ b/library/profile/gmon.h @@ -0,0 +1,132 @@ +/* +* $Id: profile_gmon.h,v 1.1 2023-10-20 12:04:26 clib2devs Exp $ +*/ + +#ifndef _GMON_H +#define _GMON_H + +#include +#include "../shared_library/math.h" + +#undef DebugPrintF +#define dprintf(format, args...) ((struct ExecIFace *)((*(struct ExecBase **)4)->MainInterface))->DebugPrintF("[%s] " format, __PRETTY_FUNCTION__, ##args) + +void moncontrol(int); +void monstartup(uint32, uint32); +void moncleanup(void); +void mongetpcs(uint32 *lowpc, uint32 *highpc, uint32 *text_start); + +struct gmonhdr { + uint32 lpc; + uint32 hpc; + int ncnt; + int version; + int profrate; + int reserved[3]; +}; + +#define HISTCOUNTER uint16 + +// I am sure we can make these bigger +#define HISTFRACTION 2 +#define HASHFRACTION 4 + +/* + * Percent of text space to allocate for tostructs. + * This is a heuristic; we will fail with a warning when profiling programs + * with a very large number of very small functions, but that's + * normally OK. + * 2 is probably still a good value for normal programs. + * Profiling a test case with 64000 small functions will work if + * you raise this value to 3 and link statically (which bloats the + * text size, thus raising the number of arcs expected by the heuristic). + */ +#define ARCDENSITY 2 + +/* + * Always allocate at least this many tostructs. This + * hides the inadequacy of the ARCDENSITY heuristic, at least + * for small programs. + */ +#define MINARCS 50 + +/* + * Maximum number of arcs we want to allow. + * Used to be max representable value of ARCINDEX minus 2, but now + * that ARCINDEX is a long, that's too large; we don't really want + * to allow a 48 gigabyte table. + * The old value of 1<<16 wasn't high enough in practice for large C++ + * programs; will 1<<20 be adequate for long? FIXME + */ +#define MAXARCS ((1 << (8 * sizeof(HISTCOUNTER)))-2) + +/* + * The type used to represent indices into gmonparam.tos[]. + */ +#define ARCINDEX uint16 + +/* structure emitted by "gcc -a". This must match struct bb in + gcc/libgcc2.c. It is OK for gcc to declare a longer structure as + long as the members below are present. */ +struct __bb { + long zero_word; + const char *filename; + long *counts; + long ncounts; + struct __bb *next; + const unsigned long *addresses; +}; + +extern struct __bb *__bb_head; + +struct tostruct { + uint32 selfpc; + int32 count; + ARCINDEX link; + ARCINDEX pad; +}; + +struct rawarc { + uint32 raw_frompc; + uint32 raw_selfpc; + int32 raw_count; +}; + +#define ROUNDDOWN(x, y) (((x)/(y))*(y)) +#define ROUNDUP(x, y) ((((x)+(y)-1)/(y))*(y)) + +struct gmonparam { + int state; + uint16 *kcount; + uint32 kcountsize; + ARCINDEX *froms; + uint32 fromssize; + struct tostruct *tos; + uint32 tossize; + int32 tolimit; + uint32 lowpc; + uint32 highpc; + uint32 textsize; + uint32 hashfraction; + long log_hashfraction; + uint32_t text_start; +}; + +extern struct gmonparam _gmonparam; + +enum { + kGmonProfOn = 0, + kGmonProfBusy = 1, + kGmonProfError = 2, + kGmonProfOff = 3 +}; + +enum { + kGprofState = 0, + kGprofCount = 1, + kGprofFroms = 2, + kGprofTos = 3, + kGprofGmonParam = 4 +}; + +#endif diff --git a/library/profile/gmon_out.h b/library/profile/gmon_out.h new file mode 100644 index 00000000..5967c6fc --- /dev/null +++ b/library/profile/gmon_out.h @@ -0,0 +1,75 @@ +/* +* $Id: profile_gmon_out.h,v 1.1 2023-10-20 12:04:26 clib2devs Exp $ +*/ + +/* This file specifies the format of gmon.out files. It should have + as few external dependencies as possible as it is going to be included + in many different programs. That is, minimize the number of #include's. + + A gmon.out file consists of a header (defined by gmon_hdr) followed by + a sequence of records. Each record starts with a one-byte tag + identifying the type of records, followed by records specific data. */ + +#ifndef _SYS_GMON_OUT_H +#define _SYS_GMON_OUT_H 1 + +#include + +#define GMON_MAGIC "gmon" /* magic cookie */ +#define GMON_VERSION 1 /* version number */ + +/* For profiling shared object we need a new format. */ +#define GMON_SHOBJ_VERSION 0x1ffff +#define NARCS_PER_WRITEV 32 + +__BEGIN_DECLS + +/* + * Raw header as it appears on file (without padding). This header + * always comes first in gmon.out and is then followed by a series + * records defined below. + */ +struct gmon_hdr { + char cookie[4]; + char version[4]; + char spare[3 * 4]; +}; + +/* types of records in this file: */ +typedef enum { + GMON_TAG_TIME_HIST = 0, + GMON_TAG_CG_ARC = 1, + GMON_TAG_BB_COUNT = 2 +} GMON_Record_Tag; + +struct gmon_hist_hdr { + char low_pc[sizeof(char *)]; /* base pc address of sample buffer */ + char high_pc[sizeof(char *)]; /* max pc address of sampled buffer */ + char hist_size[4]; /* size of sample buffer */ + char prof_rate[4]; /* profiling clock rate */ + char dimen[15]; /* phys. dim., usually "seconds" */ + char dimen_abbrev; /* usually 's' for "seconds" */ +}; + +struct gmon_cg_arc_record { + char from_pc[sizeof(char *)]; /* address within caller's body */ + char self_pc[sizeof(char *)]; /* address within callee's body */ + char count[4]; /* number of arc traversals */ +}; + +struct real_gmon_hist_hdr { + char *low_pc; + char *high_pc; + int32_t hist_size; + int32_t prof_rate; + char dimen[15]; + char dimen_abbrev; +} thdr; + +void write_hist(int fd); +void write_call_graph(int fd); +void write_bb_counts(int fd); + +__END_DECLS + +#endif /* sys/gmon_out.h */ diff --git a/library/profile/mcount.S b/library/profile/mcount.S index 1ca26e7c..eeb3a700 100644 --- a/library/profile/mcount.S +++ b/library/profile/mcount.S @@ -1,40 +1,70 @@ -// -// $Id: profile_mcount.S,v 1.0 2021-01-18 12:04:26 clib4devs Exp $ -// +/* +/ $Id: profile_mcount.S,v 1.1 2023-10-20 12:04:26 clib4devs Exp $ +/* - .globl _mcount - .type _mcount,@function -_mcount: - stwu r1,-64(r1) - stw r3,16(r1) - stw r4,20(r1) - stw r5,24(r1) - stw r6,28(r1) - stw r7,32(r1) - stw r8,36(r1) - stw r9,40(r1) - stw r10,44(r1) +/* We do profiling as described in the SYSV ELF ABI, except that glibc + _mcount manages its own counters. The caller has put the address the + caller will return to in the usual place on the stack, 4(r1). _mcount + is responsible for ensuring that when it returns no argument-passing + registers are disturbed, and that the LR is set back to (what the + caller sees as) 4(r1). - mflr r4 - stw r4,48(r1) - lwz r3,68(r1) + This is intended so that the following code can be inserted at the + front of any routine without changing the routine: + + .data + mflr r0 + stw r0,4(r1) + bl _mcount +*/ - bl __mcount - lwz r3,68(r1) - mtlr r3 - lwz r4,48(r1) - mtctr r4 +#ifdef PIC +# define JUMPTARGET(name) name##@plt +#else +# define JUMPTARGET(name) name +#endif - lwz r3,16(r1) - lwz r4,20(r1) - lwz r5,24(r1) - lwz r6,28(r1) - lwz r7,32(r1) - lwz r8,36(r1) - lwz r9,40(r1) - lwz r10,44(r1) - addi r1,r1,64 + .text + .global _mcount + .type _mcount, @function +_mcount: + .cfi_startproc + stwu r1,-48(r1) + .cfi_adjust_cfa_offset 48 +/* We need to save the parameter-passing registers. */ + stw r3, 12(r1) + stw r4, 16(r1) + stw r5, 20(r1) + stw r6, 24(r1) + mflr r4 + lwz r3, 52(r1) + mfcr r5 + stw r7, 28(r1) + stw r8, 32(r1) + stw r9, 36(r1) + stw r10,40(r1) + stw r4, 44(r1) + .cfi_offset lr, -4 + stw r5, 8(r1) + bl JUMPTARGET(__mcount) + /* Restore the registers... */ + lwz r6, 8(r1) + lwz r0, 44(r1) + lwz r3, 12(r1) + mtctr r0 + lwz r4, 16(r1) + mtcrf 0xff,r6 + lwz r5, 20(r1) + lwz r6, 24(r1) + lwz r0, 52(r1) + lwz r7, 28(r1) + lwz r8, 32(r1) + mtlr r0 + lwz r9, 36(r1) + lwz r10,40(r1) + /* ...unwind the stack frame, and return to your usual programming. */ + addi r1,r1,48 bctr -_mcount_end: - .size _mcount,_mcount_end-_mcount + .cfi_endproc + .size _mcount,.-_mcount diff --git a/library/profile/profil.c b/library/profile/profil.c index 149ae734..1c3fbad2 100644 --- a/library/profile/profil.c +++ b/library/profile/profil.c @@ -1,5 +1,5 @@ /* - * $Id: profile_profil.c,v 1.0 2021-01-21 10:08:32 clib4devs Exp $ + * $Id: profile_profil.c,v 1.1 2023-10-20 10:08:32 clib4devs Exp $ */ #include @@ -8,6 +8,10 @@ #include #include +#include "gmon.h" + +#define COUNTER 1 + static struct Interrupt CounterInt; static struct PerformanceMonitorIFace *IPM; @@ -25,25 +29,20 @@ uint32 CounterIntFn(struct ExceptionContext *, struct ExecBase *, struct IntData uint32 GetCounterStart(void) { - uint64 fsb; + uint64 tb; double bit0time; uint32 count; - GetCPUInfoTags( - GCIT_FrontsideSpeed, &fsb, - TAG_DONE); + GetCPUInfoTags(GCIT_TimeBaseSpeed, &tb, TAG_DONE); - /* Timebase ticks at 1/4 of FSB */ - bit0time = (double) 8.0 / (double) fsb; - count = (uint32)((double) 0.01 / bit0time); + count = (uint32) (tb / (2 * 100 + 1)); - return 0x80000000 - count; + return (uint32) (-count); } uint32 CounterIntFn(struct ExceptionContext *ctx, struct ExecBase *ExecBase, struct IntData *profileData) { - APTR sampledAddress = profileData->IPM->GetSampledAddress(); - uint32 sia = (uint32) sampledAddress; + uint32 sia = (uint32) ctx->ip; /* Silence compiler */ (void) ExecBase; @@ -71,15 +70,17 @@ profil(unsigned short *buffer, size_t bufSize, size_t offset, unsigned int scale * A pointer to PerformanceMonitorIFace is never obtained, and the call to IPM->EventControlTags() when buffer == 0 attempts to dereference a NULL pointer * https://sourceforge.net/p/clib2/bugs/54/ */ - if (!IPM) + if (!IPM) { + dprintf("Cannot obtain Performance Monitor interface \n"); return 0; + } Stack = SuperState(); IPM->EventControlTags( PMECT_Disable, PMEC_MasterInterrupt, TAG_DONE); - IPM->SetInterruptVector(1, 0); + IPM->SetInterruptVector(COUNTER, 0); IPM->Unmark(0); IPM->Release(); @@ -111,10 +112,13 @@ profil(unsigned short *buffer, size_t bufSize, size_t offset, unsigned int scale /* Prepare Performance Monitor */ IPM->MonitorControlTags( - PMMCT_FreezeCounters, PMMC_Unmarked, - PMMCT_RTCBitSelect, PMMC_BIT0, + PMMCT_FreezeCounters, PMMC_Unmarked, + PMMCT_RTCBitSelect, PMMC_BIT0, TAG_DONE); - IPM->CounterControl(1, ProfileData.CounterStart, PMCI_Transition); + + if (!IPM->CounterControl(COUNTER, ProfileData.CounterStart, PMCI_Transition)) { + dprintf("Cannot set CounterControl\n"); + } IPM->EventControlTags( PMECT_Enable, 1, @@ -122,9 +126,10 @@ profil(unsigned short *buffer, size_t bufSize, size_t offset, unsigned int scale TAG_DONE); IPM->Mark(0); + IPM->Release(); if (Stack) UserState(Stack); return 0; -} +} \ No newline at end of file diff --git a/library/profile/profile.lib_rev.c b/library/profile/profile.lib_rev.c deleted file mode 100644 index 3e8b5ef9..00000000 --- a/library/profile/profile.lib_rev.c +++ /dev/null @@ -1,7 +0,0 @@ -/* - * $Id: profile.lib_rev.c,v 1.0 2022-07-18 11:59:23 clib4devs Exp $ -*/ - -#include "profile.lib_rev.h" - -char __profile_lib_version[] = VERSTAG; diff --git a/library/profile/profile.lib_rev.h b/library/profile/profile.lib_rev.h deleted file mode 100755 index 643e570e..00000000 --- a/library/profile/profile.lib_rev.h +++ /dev/null @@ -1,9 +0,0 @@ -#define VERSION 0 -#define REVISION 1 -#define SUBREVISION 0 - -#define DATE "08.02.2021" -#define VERS "profile.lib 0.1" -#define VSTRING "profile.lib 0.1 (08.02.2021)\r\n" -#define VERSTAG "\0$VER: profile.lib 0.1 (08.02.2021)" - diff --git a/library/profile/profile.lib_rev.rev b/library/profile/profile.lib_rev.rev deleted file mode 100755 index 573541ac..00000000 --- a/library/profile/profile.lib_rev.rev +++ /dev/null @@ -1 +0,0 @@ -0 diff --git a/library/profile/profile_gmon.h b/library/profile/profile_gmon.h deleted file mode 100755 index 471d1630..00000000 --- a/library/profile/profile_gmon.h +++ /dev/null @@ -1,79 +0,0 @@ -/* -* $Id: profile_gmon.h,v 1.0 2021-01-18 12:04:26 clib4devs Exp $ -*/ - -#ifndef _GMON_H -#define _GMON_H - -#include - -struct gmonhdr { - uint32 lpc; - uint32 hpc; - int ncnt; - int version; - int profrate; - int reserved[3]; -}; - -#define GMONVERSION 0x00051879 - -#define HISTCOUNTER uint16 -// I am sure we can make these bigger -#define HISTFRACTION 2 -#define HASHFRACTION 4 - -#define ARCDENSITY 2 -#define MINARCS 50 -#define MAXARCS ((1 << (8 * sizeof(HISTCOUNTER)))-2) - -struct tostruct { - uint32 selfpc; - int32 count; - uint16 link; - uint16 pad; -}; - -struct rawarc { - uint32 raw_frompc; - uint32 raw_selfpc; - int32 raw_count; -}; - -#define ROUNDDOWN(x, y) (((x)/(y))*(y)) -#define ROUNDUP(x, y) ((((x)+(y)-1)/(y))*(y)) - -struct gmonparam { - int state; - uint16 *kcount; - uint32 kcountsize; - uint16 *froms; - uint32 fromssize; - struct tostruct *tos; - uint32 tossize; - int32 tolimit; - uint32 lowpc; - uint32 highpc; - uint32 textsize; - uint32 hashfraction; - uint8 *memory; -}; - -extern struct gmonparam _gmonparam; - -enum { - kGmonProfOn = 0, - kGmonProfBusy = 1, - kGmonProfError = 2, - kGmonProfOff = 3 -}; - -enum { - kGprofState = 0, - kGprofCount = 1, - kGprofFroms = 2, - kGprofTos = 3, - kGprofGmonParam = 4 -}; - -#endif diff --git a/library/shared_library/clib4_vectors.h b/library/shared_library/clib4_vectors.h index e3219f64..b52e6ac2 100644 --- a/library/shared_library/clib4_vectors.h +++ b/library/shared_library/clib4_vectors.h @@ -1149,5 +1149,4 @@ static void *clib4Vectors[] = { (void *) (fts_open), /* 4316 */ (void *) (fts_read), /* 4320 */ (void *) (fts_set), /* 4324 */ - }; \ No newline at end of file diff --git a/library/shared_library/math.c b/library/shared_library/math.c index e897199c..1322409e 100644 --- a/library/shared_library/math.c +++ b/library/shared_library/math.c @@ -392,7 +392,55 @@ __unorddf2 (double a, double b) { return isnan(a) || isnan(b); } -#ifndef __SOFT_FP__ +#ifndef __SOFTFP__ +long double +__floatunditf(uint64_t a) { + /* Begins with an exact copy of the code from __floatundidf */ + + static const double twop52 = 0x1.0p52; + static const double twop84 = 0x1.0p84; + static const double twop84_plus_twop52 = 0x1.00000001p84; + + doublebits high = { .d = twop84 }; + doublebits low = { .d = twop52 }; + + high.x |= a >> 32; /* 0x1.0p84 + high 32 bits of a */ + low.x |= a & UINT64_C(0x00000000ffffffff); /* 0x1.0p52 + low 32 bits of a */ + + const double high_addend = high.d - twop84_plus_twop52; + + /* At this point, we have two double precision numbers + * high_addend and low.d, and we wish to return their sum + * as a canonicalized long double: + */ + /* This implementation sets the inexact flag spuriously. */ + /* This could be avoided, but at some substantial cost. */ + + DD result; + + result.s.hi = high_addend + low.d; + result.s.lo = (high_addend - result.s.hi) + low.d; + + return result.ld; +} + +double +__floatundidf(du_int a) { + static const double twop52 = 4503599627370496.0; // 0x1.0p52 + static const double twop84 = 19342813113834066795298816.0; // 0x1.0p84 + static const double twop84_plus_twop52 = + 19342813118337666422669312.0; // 0x1.00000001p84 + + doublebits high = {.d = twop84}; + doublebits low = {.d = twop52}; + + high.x |= a >> 32; + low.x |= a & UINT64_C(0x00000000ffffffff); + + const double result = (high.d - twop84_plus_twop52) + low.d; + return result; +} + /* Support for systems that have hardware floating-point; we'll set the inexact flag * as a side-effect of this computation. */ @@ -401,10 +449,7 @@ __floatdidf(di_int a) { static const double twop52 = 0x1.0p52; static const double twop32 = 0x1.0p32; - union { - int64_t x; - double d; - } low = {.d = twop52}; + doublebits low = {.d = twop52}; const double high = (int32_t)(a >> 32) * twop32; low.x |= a & INT64_C(0x00000000ffffffff); @@ -468,4 +513,50 @@ __floatdidf(di_int a) { fb.u.low = (su_int)a; /* mantissa-low */ return fb.f; } + +double +__floatundidf(du_int a) { + if (a == 0) + return 0.0; + const unsigned N = sizeof(du_int) * CHAR_BIT; + int sd = N - __builtin_clzll(a); // number of significant digits + int e = sd - 1; // exponent + if (sd > DBL_MANT_DIG) { + // start: 0000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQxxxxxxxxxxxxxxxxxx + // finish: 000000000000000000000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQR + // 12345678901234567890123456 + // 1 = msb 1 bit + // P = bit DBL_MANT_DIG-1 bits to the right of 1 + // Q = bit DBL_MANT_DIG bits to the right of 1 + // R = "or" of all bits to the right of Q + switch (sd) { + case DBL_MANT_DIG + 1: + a <<= 1; + break; + case DBL_MANT_DIG + 2: + break; + default: + a = (a >> (sd - (DBL_MANT_DIG + 2))) | + ((a & ((du_int)(-1) >> ((N + DBL_MANT_DIG + 2) - sd))) != 0); + }; + // finish: + a |= (a & 4) != 0; // Or P into R + ++a; // round - this step may add a significant bit + a >>= 2; // dump Q and R + // a is now rounded to DBL_MANT_DIG or DBL_MANT_DIG+1 bits + if (a & ((du_int)1 << DBL_MANT_DIG)) { + a >>= 1; + ++e; + } + // a is now rounded to DBL_MANT_DIG bits + } else { + a <<= (DBL_MANT_DIG - sd); + // a is now rounded to DBL_MANT_DIG bits + } + double_bits fb; + fb.u.s.high = ((su_int)(e + 1023) << 20) | // exponent + ((su_int)(a >> 32) & 0x000FFFFF); // mantissa-high + fb.u.s.low = (su_int)a; // mantissa-low + return fb.f; +} #endif \ No newline at end of file diff --git a/library/shared_library/math.h b/library/shared_library/math.h index 1b64b623..4247082b 100644 --- a/library/shared_library/math.h +++ b/library/shared_library/math.h @@ -1,6 +1,8 @@ #ifndef _MATH_H__ #define _MATH_H__ +#include + typedef int si_int; typedef unsigned su_int; typedef long long di_int; @@ -25,11 +27,25 @@ typedef union { su_int u; float f; } float_bits; + typedef union { udwords u; double f; } double_bits; +typedef union { + long double ld; + struct { + double hi; + double lo; + }s; +} DD; + +typedef union { + int64_t x; + double d; +} doublebits; + typedef struct { udwords high; udwords low; @@ -51,5 +67,7 @@ di_int __moddi3(di_int a, di_int b); di_int __divdi3(di_int a, di_int b); double __floatdidf(di_int a); di_int __unorddf2 (double a, double b); +long double __floatunditf(uint64_t a); +double __floatundidf(du_int a); #endif /* _MATH_H__ */ \ No newline at end of file diff --git a/misc/amigaos.h b/misc/amigaos.h index 54d6bfde..6dd29bce 100644 --- a/misc/amigaos.h +++ b/misc/amigaos.h @@ -448,3 +448,6 @@ do \ /* This target uses the amigaos.opt file. */ #define TARGET_USES_AMIGAOS_OPT 1 + +#undef NO_PROFILE_COUNTERS +#define NO_PROFILE_COUNTERS 0 \ No newline at end of file diff --git a/shared.gmk b/shared.gmk index 391900b4..248f3693 100644 --- a/shared.gmk +++ b/shared.gmk @@ -51,9 +51,6 @@ $(OUT_SHARED_LIB)/crypt/%.o : $(LIB_DIR)/crypt/%.c $(VERBOSE)$(COMPILE) endif -$(OUT_SHARED_LIB)/profile/%.o : $(LIB_DIR)/profile/%.S - $(VERBOSE)$(ASSEMBLE) - $(OUT_SHARED_LIB)/%.o : $(LIB_DIR)/%.sx $(VERBOSE)$(COMPILE_ALTIVEC_REG)