/* Copyright (C) 2021 Free Software Foundation, Inc. Contributed by Oracle. This file is part of GNU Binutils. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 3, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, 51 Franklin Street - Fifth Floor, Boston, MA 02110-1301, USA. */ /* * memory map tracking * incorporating former "loadobjects" into more general "map" * (including code and data segments and dynamic functions) */ #include "config.h" #include #include #include #include #include #include #include #include #include "gp-defs.h" #include "collector.h" #include "gp-experiment.h" #include "memmgr.h" /* * These are obsolete and unreliable. * They are included here only for historical compatibility. */ #define MA_SHARED 0x08 /* changes are shared by mapped object */ #define MA_ANON 0x40 /* anonymous memory (e.g. /dev/zero) */ #define MA_ISM 0x80 /* intimate shared mem (shared MMU resources) */ #define MA_BREAK 0x10 /* grown by brk(2) */ #define MA_STACK 0x20 /* grown automatically on stack faults */ typedef struct prmap_t { unsigned long pr_vaddr; /* virtual address of mapping */ unsigned long pr_size; /* size of mapping in bytes */ char *pr_mapname; /* name in /proc//object */ int pr_mflags; /* protection and attribute flags (see below) */ unsigned long pr_offset; /* offset into mapped object, if any */ unsigned long pr_dev; unsigned long pr_ino; int pr_pagesize; /* pagesize (bytes) for this mapping */ } prmap_t; /* TprintfT(,...) definitions. Adjust per module as needed */ #define DBG_LT0 0 // for high-level configuration, unexpected errors/warnings #define DBG_LT1 1 // for configuration details, warnings #define DBG_LT2 2 #define DBG_LT3 3 #define DBG_LT4 4 #define SYS_MMAP_NAME "mmap" #define SYS_MMAP64_NAME "mmap64" #define SYS_MUNMAP_NAME "munmap" #define SYS_DLOPEN_NAME "dlopen" #define SYS_DLCLOSE_NAME "dlclose" typedef struct MapInfo { struct MapInfo *next; unsigned long vaddr; unsigned long size; char *mapname; /* name in /proc//object */ char *filename; unsigned long offset; int mflags; int pagesize; } MapInfo; typedef struct NameInfo { struct NameInfo *next; char *mapname; char filename[1]; /* dynamic length file name */ } NameInfo; static NameInfo *namemaps = NULL; static MapInfo mmaps; /* current memory maps */ static struct DataHandle *map_hndl = NULL; static char dyntext_fname[MAXPATHLEN]; static void *mapcache = NULL; static char *maptext = NULL; static size_t maptext_sz = 4096; /* initial buffer size */ static int mmap_mode = 0; static int mmap_initted = 0; static collector_mutex_t map_lock = COLLECTOR_MUTEX_INITIALIZER; static collector_mutex_t dyntext_lock = COLLECTOR_MUTEX_INITIALIZER; /* a reentrance guard for the interposition functions ensures that updates to the map cache/file are sequential, with the first doing the final update */ static int reentrance = 0; #define CHCK_REENTRANCE (reentrance || mmap_mode <= 0) #define CURR_REENTRANCE reentrance #define PUSH_REENTRANCE reentrance++ #define POP_REENTRANCE reentrance-- #define CALL_REAL(x) (__real_##x) #define NULL_PTR(x) (__real_##x == NULL) /* interposition function handles */ static void *(*__real_mmap)(void* start, size_t length, int prot, int flags, int fd, off_t offset) = NULL; static void *(*__real_mmap64)(void* start, size_t length, int prot, int flags, int fd, off64_t offset) = NULL; static int (*__real_munmap)(void* start, size_t length) = NULL; static void *(*__real_dlopen)(const char* pathname, int mode) = NULL; #if (ARCH(Intel) && WSIZE(32)) || ARCH(SPARC) static void *(*__real_dlopen_2_1)(const char* pathname, int mode) = NULL; static void *(*__real_dlopen_2_0)(const char* pathname, int mode) = NULL; #endif static int (*__real_dlclose)(void* handle) = NULL; static void (*collector_heap_record)(int, size_t, void*) = NULL; /* internal function prototypes */ static int init_mmap_intf (); static int init_mmap_files (); static void append_segment_record (char *format, ...); static void update_map_segments (hrtime_t hrt, int resolve); static void resolve_mapname (MapInfo *map, char *name); static void record_segment_map (hrtime_t timestamp, uint64_t loadaddr, unsigned long msize, int pagesize, int modeflags, long long offset, unsigned check, char *name); static void record_segment_unmap (hrtime_t timestamp, uint64_t loadaddr); /* Linux needs handling of the vsyscall page to get its data into the map.xml file */ static void process_vsyscall_page (); #define MAXVSYSFUNCS 10 static int nvsysfuncs = 0; static char *sysfuncname[MAXVSYSFUNCS]; static uint64_t sysfuncvaddr[MAXVSYSFUNCS]; static unsigned long sysfuncsize[MAXVSYSFUNCS]; #define MAXDYN 20 static int ndyn = 0; static char *dynname [MAXDYN]; static void *dynvaddr [MAXDYN]; static unsigned dynsize [MAXDYN]; static char *dynfuncname[MAXDYN]; /*===================================================================*/ /* * void __collector_mmap_init_mutex_locks() * Iinitialize mmap mutex locks. */ void __collector_mmap_init_mutex_locks () { __collector_mutex_init (&map_lock); __collector_mutex_init (&dyntext_lock); } /* __collector_ext_update_map_segments called by the audit agent * Is is also called by dbx/collector when a (possible) map update * is intimated, such as after dlopen/dlclose. * Required when libcollector.so is not preloaded and interpositions inactive. */ int __collector_ext_update_map_segments (void) { if (!mmap_initted) return 0; TprintfT (0, "__collector_ext_update_map_segments(%d)\n", CURR_REENTRANCE); if (CHCK_REENTRANCE) return 0; PUSH_REENTRANCE; update_map_segments (GETRELTIME (), 1); POP_REENTRANCE; return 0; } /* * int __collector_ext_mmap_install() * Install and initialise mmap tracing. */ int __collector_ext_mmap_install (int record) { TprintfT (0, "__collector_ext_mmap_install(mmap_mode=%d)\n", mmap_mode); if (NULL_PTR (mmap)) { if (init_mmap_intf ()) { TprintfT (0, "ERROR: collector mmap tracing initialization failed.\n"); return COL_ERROR_EXPOPEN; } } else TprintfT (DBG_LT2, "collector mmap tracing: mmap pointer not null\n"); /* Initialize side door interface with the heap tracing module */ collector_heap_record = (void(*)(int, size_t, void*))dlsym (RTLD_DEFAULT, "__collector_heap_record"); if (record) { map_hndl = __collector_create_handle (SP_MAP_FILE); if (map_hndl == NULL) return COL_ERROR_MAPOPEN; if (init_mmap_files ()) { TprintfT (0, "ERROR: collector init_mmap_files() failed.\n"); return COL_ERROR_EXPOPEN; } } mmaps.next = NULL; mapcache = NULL; PUSH_REENTRANCE; update_map_segments (GETRELTIME (), 1); // initial map POP_REENTRANCE; mmap_mode = 1; mmap_initted = 1; process_vsyscall_page (); return COL_ERROR_NONE; } /* * int __collector_ext_mmap_deinstall() * Optionally update final map and stop tracing mmap events. */ int __collector_ext_mmap_deinstall (int update) { if (!mmap_initted) return COL_ERROR_NONE; mmap_mode = 0; if (update) { /* Final map */ PUSH_REENTRANCE; update_map_segments (GETRELTIME (), 1); POP_REENTRANCE; } TprintfT (0, "__collector_ext_mmap_deinstall(%d)\n", update); if (map_hndl != NULL) { __collector_delete_handle (map_hndl); map_hndl = NULL; } __collector_mutex_lock (&map_lock); // get lock before resetting /* Free all memory maps */ MapInfo *mp; for (mp = mmaps.next; mp;) { MapInfo *next = mp->next; __collector_freeCSize (__collector_heap, mp, sizeof (*mp)); mp = next; } mmaps.next = NULL; /* Free all name maps */ NameInfo *np; for (np = namemaps; np;) { NameInfo *next = np->next; __collector_freeCSize (__collector_heap, np, sizeof (*np) + __collector_strlen (np->filename)); np = next; } namemaps = NULL; mapcache = __collector_reallocVSize (__collector_heap, mapcache, 0); mmaps.next = NULL; mapcache = NULL; __collector_mutex_unlock (&map_lock); TprintfT (0, "__collector_ext_mmap_deinstall done\n"); return 0; } /* * void __collector_mmap_fork_child_cleanup() * Perform all necessary cleanup steps in child process after fork(). */ void __collector_mmap_fork_child_cleanup () { /* Initialize all mmap "mutex" locks */ __collector_mmap_init_mutex_locks (); if (!mmap_initted) return; mmap_mode = 0; __collector_delete_handle (map_hndl); __collector_mutex_lock (&map_lock); // get lock before resetting /* Free all memory maps */ MapInfo *mp; for (mp = mmaps.next; mp;) { MapInfo *next = mp->next; __collector_freeCSize (__collector_heap, mp, sizeof (*mp)); mp = next; } mmaps.next = NULL; /* Free all name maps */ NameInfo *np; for (np = namemaps; np;) { NameInfo *next = np->next; __collector_freeCSize (__collector_heap, np, sizeof (*np) + __collector_strlen (np->filename)); np = next; } namemaps = NULL; mapcache = __collector_reallocVSize (__collector_heap, mapcache, 0); mmap_initted = 0; reentrance = 0; __collector_mutex_unlock (&map_lock); } static int init_mmap_files () { TprintfT (DBG_LT2, "init_mmap_files\n"); /* also create the headerless dyntext file (if required) */ CALL_UTIL (snprintf)(dyntext_fname, sizeof (dyntext_fname), "%s/%s", __collector_exp_dir_name, SP_DYNTEXT_FILE); if (CALL_UTIL (access)(dyntext_fname, F_OK) != 0) { int fd = CALL_UTIL (open)(dyntext_fname, O_RDWR | O_CREAT | O_TRUNC, S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH); if (fd == -1) { char errmsg[256]; TprintfT (0, "ERROR: init_mmap_files: open(%s) failed\n", dyntext_fname); __collector_log_write ("%s: %s\n", SP_JCMD_CERROR, COL_ERROR_DYNOPEN, errno, dyntext_fname, errmsg); return COL_ERROR_DYNOPEN; } else CALL_UTIL (close)(fd); } return COL_ERROR_NONE; } static void append_segment_record (char *format, ...) { char buf[1024]; char *bufptr = buf; va_list va; va_start (va, format); int sz = __collector_xml_vsnprintf (bufptr, sizeof (buf), format, va); va_end (va); if (__collector_expstate != EXP_OPEN && __collector_expstate != EXP_PAUSED) { TprintfT (0, "append_segment_record: expt neither open nor paused (%d); " "not writing to map.xml\n\t%s", __collector_expstate, buf); return; } if (sz >= sizeof (buf)) { /* Allocate a new buffer */ sz += 1; /* add the terminating null byte */ bufptr = (char*) alloca (sz); va_start (va, format); sz = __collector_xml_vsnprintf (bufptr, sz, format, va); va_end (va); } int rc = __collector_write_string (map_hndl, bufptr, sz); if (rc != 0) (void) __collector_log_write ("\n", SP_JCMD_CERROR, COL_ERROR_MAPWRITE); } static void record_segment_map (hrtime_t timestamp, uint64_t loadaddr, unsigned long msize, int pagesize, int modeflags, long long offset, unsigned check, char *name) { TprintfT (DBG_LT2, "record_segment_map(%s @ 0x%llx)\n", name, (long long) loadaddr); append_segment_record ("\n", (unsigned) (timestamp / NANOSEC), (unsigned) (timestamp % NANOSEC), loadaddr, msize, pagesize, offset < 0 ? '-' : '+', offset < 0 ? -offset : offset, modeflags, check, name); } static void record_segment_unmap (hrtime_t timestamp, uint64_t loadaddr) { TprintfT (DBG_LT2, "record_segment_unmap(@ 0x%llx)\n", (long long) loadaddr); append_segment_record ("\n", (unsigned) (timestamp / NANOSEC), (unsigned) (timestamp % NANOSEC), loadaddr); } #if WSIZE(64) #define ELF_EHDR Elf64_Ehdr #define ELF_PHDR Elf64_Phdr #define ELF_SHDR Elf64_Shdr #define ELF_DYN Elf64_Dyn #define ELF_AUX Elf64_auxv_t #define ELF_SYM Elf64_Sym #define ELF_ST_BIND ELF64_ST_BIND #define ELF_ST_TYPE ELF64_ST_TYPE #elif WSIZE(32) #define ELF_EHDR Elf32_Ehdr #define ELF_PHDR Elf32_Phdr #define ELF_SHDR Elf32_Shdr #define ELF_DYN Elf32_Dyn #define ELF_AUX Elf32_auxv_t #define ELF_SYM Elf32_Sym #define ELF_ST_BIND ELF32_ST_BIND #define ELF_ST_TYPE ELF32_ST_TYPE #endif static unsigned checksum_mapname (MapInfo* map) { unsigned checksum = 0; /* only checksum code segments */ if ((map->mflags & (PROT_EXEC | PROT_READ)) == 0 || (map->mflags & PROT_WRITE) != 0) return 0; checksum = (unsigned) - 1; TprintfT (DBG_LT2, "checksum_mapname checksum = 0x%0X\n", checksum); return checksum; } #if (ARCH(Intel) && WSIZE(32)) || ARCH(SPARC) static void* dlopen_searchpath_symver (void*(real_dlopen) (), void* caller_addr, const char* basename, int mode) #else static void* dlopen_searchpath (void* caller_addr, const char* basename, int mode) #endif { TprintfT (DBG_LT2, "dlopen_searchpath(%p, %s, %d)\n", caller_addr, basename, mode); Dl_info dl_info; if (dladdr (caller_addr, &dl_info) == 0) { TprintfT (0, "ERROR: dladdr(%p): %s\n", caller_addr, dlerror ()); return 0; } TprintfT (DBG_LT2, "dladdr(%p): %p fname=%s\n", caller_addr, dl_info.dli_fbase, dl_info.dli_fname); int noload = RTLD_BINDING_MASK | RTLD_NOLOAD; //XXXX why RTLD_BINDING_MASK? #define WORKAROUND_RTLD_BUG 1 #ifdef WORKAROUND_RTLD_BUG // A dynamic linker dlopen bug can result in corruption/closure of open streams // XXXX workaround should be removed once linker patches are all available #if WSIZE(64) #define MAINBASE 0x400000 #elif WSIZE(32) #define MAINBASE 0x08048000 #endif const char* tmp_path = (dl_info.dli_fbase == (void*) MAINBASE) ? NULL : dl_info.dli_fname; void* caller_hndl = NULL; #if ((ARCH(Intel) && WSIZE(32)) || ARCH(SPARC)) caller_hndl = (real_dlopen) (tmp_path, noload); #else caller_hndl = CALL_REAL (dlopen)(tmp_path, noload); #endif #else //XXXX workaround should be removed once linker patches are all available void* caller_hndl = NULL; #if (ARCH(Intel) && WSIZE(32) || ARCH(SPARC) caller_hndl = (real_dlopen) (dl_info.dli_fname, noload); #else caller_hndl = CALL_REAL (dlopen)(dl_info.dli_fname, noload); #endif #endif //XXXX workaround should be removed once linker patches are all available if (!caller_hndl) { TprintfT (0, "ERROR: dlopen(%s,NOLOAD): %s\n", dl_info.dli_fname, dlerror ()); return 0; } Dl_serinfo _info, *info = &_info; Dl_serpath *path; /* determine search path count and required buffer size */ dlinfo (caller_hndl, RTLD_DI_SERINFOSIZE, (void *) info); /* allocate new buffer and initialize */ /* CR# 7191331 There is a bug in Linux that causes the first call to dlinfo() to return a small value for the dls_size. The first call to dlinfo() determines the search path count and the required buffer size. The second call to dlinfo() tries to obtain the search path information. However, the size of the buffer that is returned by the first call to the dlinfo() is incorrect (too small). The second call to dlinfo() uses the incorrect size to allocate memory on the stack and internally uses the memcpy() function to copy the search paths to the allocated memory space. The length of the search path is much larger than the buffer that is allocated on the stack. The memcpy() overwrites some of the information that are saved on the stack, specifically, it overwrites the "basename" parameter. collect crashes right after the second call to dlinfo(). The search paths are used to locate the shared libraries. dlinfo() creates the search paths based on the paths that are assigned to LD_LIBRARY_PATH environment variable and the standard library paths. The standard library paths consists of the /lib and the /usr/lib paths. The standard library paths are always included to the search paths by dlinfo() even if the LD_LIBRARY_PATH environment variable is not defined. Therefore, at the very least the dls_cnt is assigned to 2 (/lib and /usr/lib) and dlinfo() will never assign dls_cnt to zero. The dls_cnt is the count of the potential paths for searching the shared libraries. So we need to increase the buffer size before the second call to dlinfo(). There are number of ways to increase the buffer size. However, none of them can calculate the buffer size precisely. Some users on the web have suggested to multiply the MAXPATHLEN by dls_cnt for the buffer size. The MAXPATHLEN is assigned to 1024 bytes. In my opinion this is too much. So I have decided to multiply dls_size by dls_cnt for the buffer size since the dls_size is much smaller than 1024 bytes. I have already confirmed with our user that the workaround is working with his real application. Additionally, the dlopen_searchpath() function is called only by the libcorrector init() function when the experiment is started. Therefore, allocating some extra bytes on the stack which is local to this routine is harmless. */ info = alloca (_info.dls_size * _info.dls_cnt); info->dls_size = _info.dls_size; info->dls_cnt = _info.dls_cnt; /* obtain search path information */ dlinfo (caller_hndl, RTLD_DI_SERINFO, (void *) info); path = &info->dls_serpath[0]; char pathname[MAXPATHLEN]; for (unsigned int cnt = 1; cnt <= info->dls_cnt; cnt++, path++) { __collector_strlcpy (pathname, path->dls_name, sizeof (pathname)); __collector_strlcat (pathname, "/", sizeof (pathname)); __collector_strlcat (pathname, basename, sizeof (pathname)); void* ret = NULL; #if (ARCH(Intel) && WSIZE(32)) || ARCH(SPARC) ret = (real_dlopen) (pathname, mode); #else ret = CALL_REAL (dlopen)(pathname, mode); #endif TprintfT (DBG_LT2, "try %d/%d: %s = %p\n", cnt, info->dls_cnt, pathname, ret); if (ret) return ret; // success! } return 0; } static void resolve_mapname (MapInfo *map, char *name) { map->filename = ""; map->mapname = ""; if (name == NULL || *name == '\0') { if (map->mflags & MA_STACK) map->filename = "<" SP_MAP_STACK ">"; else if (map->mflags & MA_BREAK) map->filename = "<" SP_MAP_HEAP ">"; else if (map->mflags & MA_ISM) map->filename = "<" SP_MAP_SHMEM ">"; return; } NameInfo *np; for (np = namemaps; np; np = np->next) if (__collector_strcmp (np->mapname, name) == 0) break; if (np == NULL) { const char *fname; fname = name; /* Create and link a new name map */ size_t fnamelen = __collector_strlen (fname) + 1; np = (NameInfo*) __collector_allocCSize (__collector_heap, sizeof (NameInfo) + fnamelen, 1); if (np == NULL) // We could not get memory return; np->mapname = np->filename; __collector_strlcpy (np->filename, fname, fnamelen); np->next = namemaps; namemaps = np; } map->mapname = np->mapname; map->filename = np->filename; if (map->filename[0] == (char) 0) map->filename = map->mapname; TprintfT (DBG_LT2, "resolve_mapname: %s resolved to %s\n", map->mapname, map->filename); } static unsigned long str2ulong (char **ss) { char *s = *ss; unsigned long val = 0UL; const int base = 16; for (;;) { char c = *s++; if (c >= '0' && c <= '9') val = val * base + (c - '0'); else if (c >= 'a' && c <= 'f') val = val * base + (c - 'a') + 10; else if (c >= 'A' && c <= 'F') val = val * base + (c - 'A') + 10; else break; } *ss = s - 1; return val; } static void update_map_segments (hrtime_t hrt, int resolve) { size_t filesz; if (__collector_mutex_trylock (&map_lock)) { TprintfT (0, "WARNING: update_map_segments(resolve=%d) BUSY\n", resolve); return; } TprintfT (DBG_LT2, "\n"); TprintfT (DBG_LT2, "begin update_map_segments(hrt, %d)\n", resolve); // Note: there is similar code to read /proc/$PID/map[s] in // perfan/er_kernel/src/KSubExp.cc KSubExp::write_subexpt_map() const char* proc_map = "/proc/self/maps"; size_t bufsz = maptext_sz; int done = 0; filesz = 0; int map_fd = CALL_UTIL (open)(proc_map, O_RDONLY); while (!done) { bufsz *= 2; maptext = __collector_reallocVSize (__collector_heap, maptext, bufsz); TprintfT (DBG_LT2, " update_map_segments: Loop for bufsize=%ld\n", (long) bufsz); for (;;) { int n = CALL_UTIL (read)(map_fd, maptext + filesz, bufsz - filesz); TprintfT (DBG_LT2, " update_map_segments: __collector_read(bufp=%p nbyte=%ld)=%d\n", maptext + filesz, (long) ( bufsz - filesz), n); if (n < 0) { TprintfT (0, "ERROR: update_map_segments: read(maps): errno=%d\n", errno); (void) __collector_log_write ("%s\n", SP_JCMD_CERROR, COL_ERROR_MAPREAD, errno, proc_map); CALL_UTIL (close)(map_fd); __collector_mutex_unlock (&map_lock); return; } else if (n == 0) { done = 1; break; } filesz += n; if (filesz >= bufsz) /* Buffer too small */ break; } } CALL_UTIL (close)(map_fd); maptext_sz = filesz; int mapcache_entries = 0; char *str, *str1; for (str = maptext;; str = str1) { for (str1 = str; str1 - maptext < filesz; str1++) { if (*str1 == '\n') { *str1 = (char) 0; break; } } if (str1 - maptext >= filesz) break; str1++; mapcache_entries++; mapcache = __collector_reallocVSize (__collector_heap, mapcache, sizeof (prmap_t) * mapcache_entries); prmap_t *map = ((prmap_t *) mapcache) + (mapcache_entries - 1); map->pr_vaddr = str2ulong (&str); str++; unsigned long eaddr = str2ulong (&str); str++; map->pr_size = eaddr - map->pr_vaddr; map->pr_mflags = 0; map->pr_mflags += (*str++ == 'r' ? PROT_READ : 0); map->pr_mflags += (*str++ == 'w' ? PROT_WRITE : 0); map->pr_mflags += (*str++ == 'x' ? PROT_EXEC : 0); map->pr_mflags += (*str++ == 's' ? MA_SHARED : 0); str++; map->pr_offset = str2ulong (&str); str++; map->pr_dev = str2ulong (&str) * 0x100; str++; map->pr_dev += str2ulong (&str); str++; map->pr_ino = str2ulong (&str); if (map->pr_dev == 0) map->pr_mflags |= MA_ANON; while (*str == ' ') str++; map->pr_mapname = str; map->pr_pagesize = 4096; } /* Compare two maps and record all differences */ unsigned nidx = 0; MapInfo *prev = &mmaps; MapInfo *oldp = mmaps.next; for (;;) { prmap_t *newp = nidx < mapcache_entries ? (prmap_t*) mapcache + nidx : NULL; if (oldp == NULL && newp == NULL) break; /* If two maps are equal proceed to the next pair */ if (oldp && newp && oldp->vaddr == newp->pr_vaddr && oldp->size == newp->pr_size && __collector_strcmp (oldp->mapname, newp->pr_mapname) == 0) { prev = oldp; oldp = oldp->next; nidx++; continue; } /* Check if we need to unload the old map first */ if (newp == NULL || (oldp && oldp->vaddr <= newp->pr_vaddr)) { if (oldp != NULL) { /* Don't record MA_ANON maps except MA_STACK and MA_BREAK */ if ((!(oldp->mflags & MA_ANON) || (oldp->mflags & (MA_STACK | MA_BREAK)))) record_segment_unmap (hrt, oldp->vaddr); /* Remove and free map */ prev->next = oldp->next; MapInfo *tmp = oldp; oldp = oldp->next; __collector_freeCSize (__collector_heap, tmp, sizeof (*tmp)); } } else { MapInfo *map = (MapInfo*) __collector_allocCSize (__collector_heap, sizeof (MapInfo), 1); if (map == NULL) { __collector_mutex_unlock (&map_lock); return; } map->vaddr = newp->pr_vaddr; map->size = newp->pr_size; map->offset = newp->pr_offset; map->mflags = newp->pr_mflags; map->pagesize = newp->pr_pagesize; resolve_mapname (map, newp->pr_mapname); /* Insert new map */ map->next = prev->next; prev->next = map; prev = map; /* Don't record MA_ANON maps except MA_STACK and MA_BREAK */ if (!(newp->pr_mflags & MA_ANON) || (newp->pr_mflags & (MA_STACK | MA_BREAK))) { unsigned checksum = checksum_mapname (map); record_segment_map (hrt, map->vaddr, map->size, map->pagesize, map->mflags, map->offset, checksum, map->filename); } nidx++; } } TprintfT (DBG_LT2, "update_map_segments: done\n\n"); __collector_mutex_unlock (&map_lock); } /* update_map_segments */ /* * Map addr to a segment. Cope with split segments. */ int __collector_check_segment_internal (unsigned long addr, unsigned long *base, unsigned long *end, int maxnretries, int MA_FLAGS) { int number_of_tries = 0; retry: ; unsigned long curbase = 0; unsigned long curfoff = 0; unsigned long cursize = 0; MapInfo *mp; for (mp = mmaps.next; mp; mp = mp->next) { if (curbase + cursize == mp->vaddr && curfoff + cursize == mp->offset && ((mp->mflags & MA_FLAGS) == MA_FLAGS || __collector_strncmp (mp->mapname, "[vdso]", 6) == 0 || __collector_strncmp (mp->mapname, "[vsyscall]", 10) == 0 )) cursize = mp->vaddr + mp->size - curbase; else if (addr < mp->vaddr) break; else if ((mp->mflags & MA_FLAGS) != MA_FLAGS && __collector_strncmp (mp->mapname, "[vdso]", 6) && __collector_strncmp (mp->mapname, "[vsyscall]", 10)) { curbase = 0; curfoff = 0; cursize = 0; } else { curbase = mp->vaddr; curfoff = mp->offset; cursize = mp->size; } } if (addr >= curbase && addr < curbase + cursize) { *base = curbase; *end = curbase + cursize; return 1; } /* * 21275311 Unwind failure in native stack for java application running on jdk8 on x86 * * On JDK8, we've observed cases where Java-compiled methods end up * in virtual address segments that were "dead zones" (mflags&PROT_READ==0) at * the time of the last update_map_segments() but are now "live". So if we * fail to find a segment, let's call update_map_segments and then retry * before giving up. */ if (number_of_tries < maxnretries) { number_of_tries++; __collector_ext_update_map_segments (); goto retry; } *base = 0; *end = 0; return 0; } /** * Check if address belongs to a readable and executable segment * @param addr * @param base * @param end * @param maxnretries * @return 1 - yes, 0 - no */ int __collector_check_segment (unsigned long addr, unsigned long *base, unsigned long *end, int maxnretries) { int MA_FLAGS = PROT_READ | PROT_EXEC; int res = __collector_check_segment_internal (addr, base, end, maxnretries, MA_FLAGS); return res; } /** * Check if address belongs to a readable segment * @param addr * @param base * @param end * @param maxnretries * @return 1 - yes, 0 - no */ int __collector_check_readable_segment( unsigned long addr, unsigned long *base, unsigned long *end, int maxnretries ) { int MA_FLAGS = PROT_READ; int res = __collector_check_segment_internal(addr, base, end, maxnretries, MA_FLAGS); return res; } static ELF_AUX *auxv = NULL; static void process_vsyscall_page () { TprintfT (DBG_LT2, "process_vsyscall_page()\n"); if (ndyn != 0) { /* We've done this one in this process, and cached the results */ /* use the cached results */ for (int i = 0; i < ndyn; i++) { append_segment_record ("\n", dynname[i], dynvaddr[i], dynsize[i], dynfuncname[i]); TprintfT (DBG_LT2, "process_vsyscall_page: append_segment_record map dynfunc='%s' vaddr=0x%016lX size=%ld funcname='%s' -- from cache\n", dynname[i], (unsigned long) dynvaddr[i], (long) dynsize[i], dynfuncname[i]); } } if (nvsysfuncs != 0) { /* We've done this one in this process, and cached the results */ /* use the cached results */ hrtime_t hrt = GETRELTIME (); for (int i = 0; i < nvsysfuncs; i++) { append_segment_record ("\n", (unsigned) (hrt / NANOSEC), (unsigned) (hrt % NANOSEC), (unsigned long) sysfuncvaddr[i], (unsigned) sysfuncsize[i], sysfuncname[i]); TprintfT (DBG_LT2, "process_vsyscall_page: append_segment_record map function='%s' vaddr=0x%016lX size=%ld -- from cache\n", sysfuncname[i], (unsigned long) sysfuncvaddr[i], (long) sysfuncsize[i]); } } if (ndyn + nvsysfuncs != 0) return; /* After fork we can't rely on environ as it might have * been moved by putenv(). Use the pointer saved by the parent. */ if (auxv == NULL) { char **envp = (char**) environ; if (envp == NULL) return; while (*envp++ != NULL); auxv = (ELF_AUX*) envp; } TprintfT (DBG_LT2, "process_vsyscall_page, auxv = ox%p\n", auxv); ELF_AUX *ap; #ifdef DEBUG for (ap = auxv; ap->a_type != AT_NULL; ap++) TprintfT (DBG_LT2, "process_vsyscall_page: ELF_AUX: " " a_type = 0x%016llx %10lld " " a_un.a_val = 0x%016llx %10lld\n", (long long) ap->a_type, (long long) ap->a_type, (long long) ap->a_un.a_val, (long long) ap->a_un.a_val); #endif // find the first ELF_AUX of type AT_SYSINFO_EHDR ELF_EHDR *ehdr = NULL; for (ap = auxv; ap->a_type != AT_NULL; ap++) { if (ap->a_type == AT_SYSINFO_EHDR) { // newer Linuxes do not have a_ptr field, they just have a_val ehdr = (ELF_EHDR*)(intptr_t) ap->a_un.a_val; if (ehdr != NULL) break; } } // If one is found if (ehdr != NULL) { char *mapName = "SYSINFO_EHDR"; MapInfo *mp; for (mp = mmaps.next; mp; mp = mp->next) { if ((unsigned long) ehdr == mp->vaddr) { mp->mflags |= PROT_EXEC; if (mp->mapname && mp->mapname[0]) mapName = mp->mapname; break; } } // Find the dynsym section and record all symbols char *base = (char*) ehdr; ELF_SHDR *shdr = (ELF_SHDR*) (base + ehdr->e_shoff); int i; #if 0 TprintfT (DBG_LT2, "process_vsyscall_page: ehdr: EI_CLASS=%lld EI_DATA=%lld EI_OSABI=%lld e_type=%lld e_machine=%lld e_version=%lld\n" " e_entry =0x%016llx %10lld e_phoff =0x%016llx %10lld\n" " e_shoff =0x%016llx %10lld e_flags =0x%016llx %10lld\n" " e_ehsize =0x%016llx %10lld e_phentsize =0x%016llx %10lld\n" " e_phnum =0x%016llx %10lld e_shentsize =0x%016llx %10lld\n" " e_shnum =0x%016llx %10lld e_shstrndx =0x%016llx %10lld\n", (long long) ehdr->e_ident[EI_CLASS], (long long) ehdr->e_ident[EI_DATA], (long long) ehdr->e_ident[EI_OSABI], (long long) ehdr->e_type, (long long) ehdr->e_machine, (long long) ehdr->e_version, (long long) ehdr->e_entry, (long long) ehdr->e_entry, (long long) ehdr->e_phoff, (long long) ehdr->e_phoff, (long long) ehdr->e_shoff, (long long) ehdr->e_shoff, (long long) ehdr->e_flags, (long long) ehdr->e_flags, (long long) ehdr->e_ehsize, (long long) ehdr->e_ehsize, (long long) ehdr->e_phentsize, (long long) ehdr->e_phentsize, (long long) ehdr->e_phnum, (long long) ehdr->e_phnum, (long long) ehdr->e_shentsize, (long long) ehdr->e_shentsize, (long long) ehdr->e_shnum, (long long) ehdr->e_shnum, (long long) ehdr->e_shstrndx, (long long) ehdr->e_shstrndx); for (i = 1; i < ehdr->e_shnum; i++) { TprintfT (DBG_LT2, "process_vsyscall_page: SECTION=%d sh_name=%lld '%s'\n" " sh_type =0x%016llx %10lld\n" " sh_flags =0x%016llx %10lld\n" " sh_addr =0x%016llx %10lld\n" " sh_offset =0x%016llx %10lld\n" " sh_size =0x%016llx %10lld\n" " sh_link =0x%016llx %10lld\n" " sh_info =0x%016llx %10lld\n" " sh_addralign =0x%016llx %10lld\n" " sh_entsize =0x%016llx %10lld\n", i, (long long) shdr[i].sh_name, base + shdr[ehdr->e_shstrndx].sh_offset + shdr[i].sh_name, (long long) shdr[i].sh_type, (long long) shdr[i].sh_type, (long long) shdr[i].sh_flags, (long long) shdr[i].sh_flags, (long long) shdr[i].sh_addr, (long long) shdr[i].sh_addr, (long long) shdr[i].sh_offset, (long long) shdr[i].sh_offset, (long long) shdr[i].sh_size, (long long) shdr[i].sh_size, (long long) shdr[i].sh_link, (long long) shdr[i].sh_link, (long long) shdr[i].sh_info, (long long) shdr[i].sh_info, (long long) shdr[i].sh_addralign, (long long) shdr[i].sh_addralign, (long long) shdr[i].sh_entsize, (long long) shdr[i].sh_entsize); } #endif int dynSec = -1; for (i = 1; i < ehdr->e_shnum; i++) if (shdr[i].sh_type == SHT_DYNSYM) { dynSec = i; break; } if (dynSec != -1) { char *symbase = base + shdr[shdr[dynSec].sh_link].sh_offset; ELF_SYM *symbols = (ELF_SYM*) (base + shdr[dynSec].sh_offset); int nextSec = 0; int n = shdr[dynSec].sh_size / shdr[dynSec].sh_entsize; for (i = 0; i < n; i++) { ELF_SYM *sym = symbols + i; TprintfT (DBG_LT2, "process_vsyscall_page: symbol=%d st_name=%lld '%s'\n" " st_size = 0x%016llx %10lld\n" " st_value = 0x%016llx %10lld\n" " st_shndx = 0x%016llx %10lld\n" " st_info = 0x%016llx %10lld\n", i, (long long) sym->st_name, symbase + sym->st_name, (long long) sym->st_size, (long long) sym->st_size, (long long) sym->st_value, (long long) sym->st_value, (long long) sym->st_shndx, (long long) sym->st_shndx, (long long) sym->st_info, (long long) sym->st_info); if (sym->st_shndx <= 0 || sym->st_size <= 0 || ELF_ST_BIND (sym->st_info) != STB_GLOBAL || ELF_ST_TYPE (sym->st_info) != STT_FUNC) continue; if (nextSec == 0) nextSec = sym->st_shndx; else if (nextSec > sym->st_shndx) nextSec = sym->st_shndx; } if (nextSec == 0) ehdr = NULL; while (nextSec != 0) { int curSec = nextSec; char *bgn = base + shdr[curSec].sh_offset; char *end = bgn + shdr[curSec].sh_size; for (i = 0; i < n; i++) { ELF_SYM *sym = symbols + i; if (sym->st_shndx <= 0 || sym->st_size <= 0 || ELF_ST_BIND (sym->st_info) != STB_GLOBAL || ELF_ST_TYPE (sym->st_info) != STT_FUNC) continue; if (sym->st_shndx > curSec) { if (nextSec == curSec) nextSec = sym->st_shndx; else if (nextSec > sym->st_shndx) nextSec = sym->st_shndx; nextSec = sym->st_shndx; continue; } if (sym->st_shndx != curSec) continue; long long st_delta = (sym->st_value >= shdr[sym->st_shndx].sh_addr) ? (sym->st_value - shdr[sym->st_shndx].sh_addr) : -1; char *st_value = bgn + st_delta; if (st_delta >= 0 && st_value + sym->st_size <= end) { append_segment_record ("\n", mapName, (void*) st_value, sym->st_size, symbase + sym->st_name); TprintfT (DBG_LT2, "process_vsyscall_page: append_segment_record map dynfunc='%s' vaddr=%016lX size=%ld funcname='%s'\n", mapName, (unsigned long) st_value, (long) sym->st_size, symbase + sym->st_name); /* now cache this for a subsequent experiment */ if (ndyn >= MAXDYN) __collector_log_write ("MAXDYN=%d\n", SP_JCMD_CERROR, COL_ERROR_MAPCACHE, MAXDYN); else { dynname [ndyn] = CALL_UTIL (libc_strdup)(mapName); dynvaddr [ndyn] = (void *) st_value; dynsize [ndyn] = (unsigned) sym->st_size; dynfuncname[ndyn] = CALL_UTIL (libc_strdup)(symbase + sym->st_name); TprintfT (DBG_LT2, "process_vsyscall_page: cached entry %d map function='%s' vaddr=0x%016lX size=%ld '%s'\n", ndyn, dynname[ndyn], (unsigned long) dynvaddr[ndyn], (long) dynsize[ndyn], dynfuncname[ndyn]); ndyn++; } } } __collector_int_func_load (DFUNC_KERNEL, mapName, NULL, (void*) (base + shdr[curSec].sh_offset), shdr[curSec].sh_size, 0, NULL); /* now cache this function for a subsequent experiment */ if (nvsysfuncs >= MAXVSYSFUNCS) __collector_log_write ("MAXVSYSFUNCS=%d\n", SP_JCMD_CERROR, COL_ERROR_MAPCACHE, MAXVSYSFUNCS); else { sysfuncname[nvsysfuncs] = CALL_UTIL (libc_strdup)(mapName); sysfuncvaddr[nvsysfuncs] = (unsigned long) (base + shdr[curSec].sh_offset); sysfuncsize[nvsysfuncs] = (unsigned long) (shdr[curSec].sh_size); TprintfT (DBG_LT2, "process_vsyscall_page: cached entry %d map function='%s' vaddr=0x%016lX size=%ld\n", nvsysfuncs, sysfuncname[nvsysfuncs], (unsigned long) sysfuncvaddr[nvsysfuncs], (long) sysfuncsize[nvsysfuncs]); nvsysfuncs++; } TprintfT (DBG_LT2, "process_vsyscall_page: collector_int_func_load='%s' vaddr=0x%016lX size=%ld\n", mapName, (unsigned long) (base + shdr[curSec].sh_offset), (long) shdr[curSec].sh_size); if (curSec == nextSec) break; } } } #if WSIZE(32) unsigned long vsysaddr = (unsigned long) 0xffffe000; #elif WSIZE(64) unsigned long vsysaddr = (unsigned long) 0xffffffffff600000; #endif // Make sure the vsyscall map has PROT_EXEC MapInfo *mp; for (mp = mmaps.next; mp; mp = mp->next) { TprintfT (DBG_LT2, "MapInfo: vaddr=0x%016llx [size=%lld] mflags=0x%llx offset=%lld pagesize=%lld\n" " mapname='%s' filename='%s'\n", (unsigned long long) mp->vaddr, (long long) mp->size, (long long) mp->mflags, (long long) mp->offset, (long long) mp->pagesize, mp->mapname ? mp->mapname : "NULL", mp->filename ? mp->filename : "NULL"); if (vsysaddr == mp->vaddr) mp->mflags |= PROT_EXEC; if ((unsigned long) ehdr == (unsigned long) mp->vaddr) continue; if (__collector_strncmp (mp->mapname, "[vdso]", 6) == 0 || __collector_strncmp (mp->mapname, "[vsyscall]", 10) == 0) { /* * On rubbia ( 2.6.9-5.ELsmp #1 SMP 32-bit ) access to ehdr causes SEGV. * There doesn't seem to be a way to reliably determine the actual presence * of the page: even when /proc reports it's there it can't be accessed. * We will have to put up with on some Linuxes until this is resolved. __collector_int_func_load(DFUNC_KERNEL, mp->mapname, NULL, (void*) mp->vaddr, mp->size, 0, NULL); */ hrtime_t hrt = GETRELTIME (); append_segment_record ( "\n", (unsigned) (hrt / NANOSEC), (unsigned) (hrt % NANOSEC), (unsigned long) mp->vaddr, (unsigned) mp->size, mp->mapname); TprintfT (DBG_LT2, "process_vsyscall_page: append_segment_record map function = %s, vaddr = 0x%016lX, size = %u\n", mp->mapname, (unsigned long) mp->vaddr, (unsigned) mp->size); /* now cache this function for a subsequent experiment */ if (nvsysfuncs >= MAXVSYSFUNCS) __collector_log_write ("MAXVSYSFUNCS=%d\n", SP_JCMD_CERROR, COL_ERROR_MAPCACHE, MAXVSYSFUNCS); else { sysfuncname[nvsysfuncs] = CALL_UTIL (libc_strdup)(mp->mapname); sysfuncvaddr[nvsysfuncs] = mp->vaddr; sysfuncsize[nvsysfuncs] = (unsigned long) mp->size; TprintfT (DBG_LT2, "process_vsyscall_page: cached entry %d map function='%s' vaddr=0x%016lX size=%ld\n", nvsysfuncs, sysfuncname[nvsysfuncs], (unsigned long) sysfuncvaddr[nvsysfuncs], (long) sysfuncsize[nvsysfuncs]); nvsysfuncs++; } } } } /* * collector API for dynamic functions */ void collector_func_load () __attribute__ ((weak, alias ("__collector_func_load"))); void __collector_func_load (char *name, char *alias, char *sourcename, void *vaddr, int size, int lntsize, DT_lineno *lntable) { __collector_int_func_load (DFUNC_API, name, sourcename, vaddr, size, lntsize, lntable); } void collector_func_unload () __attribute__ ((weak, alias ("__collector_func_unload"))); void __collector_func_unload (void *vaddr) { __collector_int_func_unload (DFUNC_API, vaddr); } /* routines for handling dynamic functions */ static void rwrite (int fd, void *buf, size_t nbyte) { size_t left = nbyte; size_t res; char *ptr = (char*) buf; while (left > 0) { res = CALL_UTIL (write)(fd, ptr, left); if (res == -1) { TprintfT (0, "ERROR: rwrite(%s) failed: errno=%d\n", dyntext_fname, errno); (void) __collector_log_write ("%s\n", SP_JCMD_CERROR, COL_ERROR_DYNWRITE, errno, dyntext_fname); return; } left -= res; ptr += res; } } void __collector_int_func_load (dfunc_mode_t mode, char *name, char *sourcename, void *vaddr, int size, int lntsize, DT_lineno *lntable) { char name_buf[32]; int slen; static char pad[16]; int padn; if (!mmap_initted) return; hrtime_t hrt = GETRELTIME (); if (name == NULL) { /* generate a name based on vaddr */ CALL_UTIL (snprintf)(name_buf, sizeof (name_buf), "0x%lx", (unsigned long) vaddr); name = name_buf; } switch (mode) { case DFUNC_API: case DFUNC_KERNEL: append_segment_record ("\n", (unsigned) (hrt / NANOSEC), (unsigned) (hrt % NANOSEC), (unsigned long) vaddr, (unsigned) size, name); break; case DFUNC_JAVA: append_segment_record ("\n", (unsigned) (hrt / NANOSEC), (unsigned) (hrt % NANOSEC), (unsigned long) vaddr, (unsigned) size, name); break; default: return; } /* 21275311 Unwind failure in native stack for java application running on jdk8 on x86 * Check: * - function starts in a known segment (base1 != 0) * - function ends in the same segment (base1==base2 && end1==end2) * If not, then call update_map_segments(). */ unsigned long base1, end1, base2, end2; __collector_check_segment ((unsigned long) vaddr, &base1, &end1, 0); if (base1) __collector_check_segment (((unsigned long) vaddr)+((unsigned long) size), &base2, &end2, 0); if (base1 == 0 || base1 != base2 || end1 != end2) __collector_ext_update_map_segments (); /* Write a copy of actual code to the "dyntext" file */ DT_header dt_hdr; dt_hdr.type = DT_HEADER; dt_hdr.size = sizeof (dt_hdr); dt_hdr.time = hrt; unsigned long t = (unsigned long) vaddr; /* to suppress a warning from gcc */ dt_hdr.vaddr = (uint64_t) t; DT_code dt_code; dt_code.type = DT_CODE; void *code = vaddr; if (vaddr != NULL && size > 0) { dt_code.size = sizeof (dt_code) + ((size + 0xf) & ~0xf); if (mode == DFUNC_KERNEL) { /* Some Linuxes don't accept vaddrs from the vsyscall * page in write(). Make a copy. */ code = alloca (size); __collector_memcpy (code, vaddr, size); } } else dt_code.size = 0; DT_srcfile dt_src; dt_src.type = DT_SRCFILE; if (sourcename) { slen = CALL_UTIL (strlen)(sourcename) + 1; dt_src.size = slen ? sizeof (dt_src) + ((slen + 0xf) & ~0xf) : 0; } else { slen = 0; dt_src.size = 0; } DT_ltable dt_ltbl; dt_ltbl.type = DT_LTABLE; if (lntable != NULL && lntsize > 0) dt_ltbl.size = sizeof (dt_ltbl) + lntsize * sizeof (DT_lineno); else dt_ltbl.size = 0; int fd = CALL_UTIL (open)(dyntext_fname, O_RDWR | O_APPEND); if (fd == -1) { TprintfT (0, "ERROR: __collector_int_func_load: open(%s) failed: errno=%d\n", dyntext_fname, errno); (void) __collector_log_write ("%s\n", SP_JCMD_CERROR, COL_ERROR_DYNOPEN, errno, dyntext_fname); return; } /* Lock the whole file */ __collector_mutex_lock (&dyntext_lock); rwrite (fd, &dt_hdr, sizeof (dt_hdr)); if (dt_code.size) { padn = dt_code.size - sizeof (dt_code) - size; rwrite (fd, &dt_code, sizeof (dt_code)); rwrite (fd, code, size); rwrite (fd, &pad, padn); } if (dt_src.size) { padn = dt_src.size - sizeof (dt_src) - slen; rwrite (fd, &dt_src, sizeof (dt_src)); rwrite (fd, sourcename, slen); rwrite (fd, &pad, padn); } if (dt_ltbl.size) { rwrite (fd, &dt_ltbl, sizeof (dt_ltbl)); rwrite (fd, lntable, dt_ltbl.size - sizeof (dt_ltbl)); } /* Unlock the file */ __collector_mutex_unlock( &dyntext_lock ); CALL_UTIL(close( fd ) ); } void __collector_int_func_unload (dfunc_mode_t mode, void *vaddr) { if (!mmap_initted) return; hrtime_t hrt = GETRELTIME (); if (mode == DFUNC_API) append_segment_record ("\n", (unsigned) (hrt / NANOSEC), (unsigned) (hrt % NANOSEC), (unsigned long) vaddr); else if (mode == DFUNC_JAVA) /* note that the "vaddr" is really a method id, not an address */ append_segment_record ("\n", (unsigned) (hrt / NANOSEC), (unsigned) (hrt % NANOSEC), (unsigned long) vaddr); else return; } /* * int init_mmap_intf() * Set up interposition (if not already done). */ static int init_mmap_intf () { if (__collector_dlsym_guard) return 1; void *dlflag; __real_mmap = (void*(*)(void* addr, size_t len, int prot, int flags, int fildes, off_t off))dlsym (RTLD_NEXT, SYS_MMAP_NAME); if (__real_mmap == NULL) { /* We are probably dlopened after libthread/libc, * try to search in the previously loaded objects */ __real_mmap = (void*(*)(void* addr, size_t len, int prot, int flags, int fildes, off_t off))dlsym (RTLD_DEFAULT, SYS_MMAP_NAME); if (__real_mmap == NULL) { TprintfT (0, "ERROR: collector real mmap not found\n"); return 1; } TprintfT (DBG_LT2, "collector real mmap found with RTLD_DEFAULT\n"); dlflag = RTLD_DEFAULT; } else { TprintfT (DBG_LT2, "collector real mmap found with RTLD_NEXT\n"); dlflag = RTLD_NEXT; } TprintfT (DBG_LT2, "init_mmap_intf() @%p __real_mmap\n", __real_mmap); __real_mmap64 = (void*(*)(void *, size_t, int, int, int, off64_t)) dlsym (dlflag, SYS_MMAP64_NAME); TprintfT (DBG_LT2, "init_mmap_intf() @%p __real_mmap64\n", __real_mmap64); __real_munmap = (int(*)(void *, size_t)) dlsym (dlflag, SYS_MUNMAP_NAME); TprintfT (DBG_LT2, "init_mmap_intf() @%p __real_munmap\n", __real_munmap); // dlopen/dlmopen/dlclose are in libdl.so __real_dlopen = (void*(*)(const char *, int)) dlvsym (dlflag, SYS_DLOPEN_NAME, SYS_DLOPEN_VERSION); TprintfT (DBG_LT2, "init_mmap_intf() [%s] @%p __real_dlopen\n", SYS_DLOPEN_VERSION, __real_dlopen); #if (ARCH(Intel) && WSIZE(32)) || ARCH(SPARC) __real_dlopen_2_1 = __real_dlopen; __real_dlopen_2_0 = (void*(*)(const char *, int)) dlvsym (dlflag, SYS_DLOPEN_NAME, "GLIBC_2.0"); #endif __real_dlclose = (int(*)(void* handle))dlsym (dlflag, SYS_DLCLOSE_NAME); TprintfT (DBG_LT2, "init_mmap_intf() @%p __real_dlclose\n", __real_dlclose); TprintfT (DBG_LT2, "init_mmap_intf() done\n"); return 0; } /*------------------------------------------------------------- mmap */ void * mmap (void *start, size_t length, int prot, int flags, int fd, off_t offset) { int err = 0; if (NULL_PTR (mmap)) err = init_mmap_intf (); if (err) return MAP_FAILED; /* hrtime_t hrt = GETRELTIME(); */ void *ret = CALL_REAL (mmap)(start, length, prot, flags, fd, offset); if (!CHCK_REENTRANCE && (ret != MAP_FAILED) && collector_heap_record != NULL) { PUSH_REENTRANCE; /* write a separate record for mmap tracing */ collector_heap_record (MMAP_TRACE, length, ret); POP_REENTRANCE; } TprintfT (DBG_LT2, "libcollector.mmap(%p, %ld, %d, %d, %d, 0x%lld) = %p\n", start, (long) length, prot, flags, fd, (long long) offset, ret); return ret; } /*------------------------------------------------------------- mmap64 */ #if WSIZE(32) /* mmap64 only defined for non-64-bit */ void * mmap64 (void *start, size_t length, int prot, int flags, int fd, off64_t offset) { if (NULL_PTR (mmap64)) init_mmap_intf (); /* hrtime_t hrt = GETRELTIME(); */ void *ret = CALL_REAL (mmap64)(start, length, prot, flags, fd, offset); if (!CHCK_REENTRANCE && (ret != MAP_FAILED) && collector_heap_record != NULL) { PUSH_REENTRANCE; /* write a separate record for mmap tracing */ collector_heap_record (MMAP_TRACE, length, ret); POP_REENTRANCE; } TprintfT (DBG_LT2, "libcollector.mmap64(%p, %ld, %d, %d, %d, 0x%lld) = %p\n", start, (long) length, prot, flags, fd, (long long) offset, ret); return ret; } #endif /* WSIZE(32) */ /*------------------------------------------------------------- munmap */ int munmap (void *start, size_t length) { if (NULL_PTR (munmap)) init_mmap_intf (); /* hrtime_t hrt = GETRELTIME(); */ int rc = CALL_REAL (munmap)(start, length); if (!CHCK_REENTRANCE && (rc == 0) && collector_heap_record != NULL) { PUSH_REENTRANCE; /* write a separate record for mmap tracing */ collector_heap_record (MUNMAP_TRACE, length, start); POP_REENTRANCE; } TprintfT (DBG_LT2, "libcollector.munmap(%p, %ld) = %d\n", start, (long) length, rc); return rc; } /*------------------------------------------------------------- dlopen */ // map interposed symbol versions #if (ARCH(Intel) && WSIZE(32)) || ARCH(SPARC) static void * __collector_dlopen_symver (void*(real_dlopen) (), void *caller, const char *pathname, int mode); SYMVER_ATTRIBUTE (__collector_dlopen_2_1, dlopen@@GLIBC_2.1) void * __collector_dlopen_2_1 (const char *pathname, int mode) { if (NULL_PTR (dlopen)) init_mmap_intf (); void *caller = __builtin_return_address (0); // must be called inside dlopen first layer interpostion return __collector_dlopen_symver (CALL_REAL (dlopen_2_1), caller, pathname, mode); } SYMVER_ATTRIBUTE (__collector_dlopen_2_0, dlopen@GLIBC_2.0) void * __collector_dlopen_2_0 (const char *pathname, int mode) { if (NULL_PTR (dlopen)) init_mmap_intf (); void* caller = __builtin_return_address (0); // must be called inside dlopen first layer interpostion return __collector_dlopen_symver (CALL_REAL (dlopen_2_0), caller, pathname, mode); } #endif #if (ARCH(Intel) && WSIZE(32)) || ARCH(SPARC) static void * __collector_dlopen_symver (void*(real_dlopen) (), void *caller, const char *pathname, int mode) #else void * dlopen (const char *pathname, int mode) #endif { const char * real_pathname = pathname; char new_pathname[MAXPATHLEN]; int origin_offset = 0; TprintfT (DBG_LT2, "dlopen: pathname=%s, mode=%d\n", pathname ? pathname : "NULL", mode); if (pathname && __collector_strStartWith (pathname, "$ORIGIN/") == 0) origin_offset = 8; else if (pathname && __collector_strStartWith (pathname, "${ORIGIN}/") == 0) origin_offset = 10; if (origin_offset) { #if ! ((ARCH(Intel) && WSIZE(32)) || ARCH(SPARC)) // 'caller' is not passed as an argument void * caller = __builtin_return_address (0); // must be called inside dlopen first layer interpostion #endif Dl_info dl_info; if (caller && dladdr (caller, &dl_info) != 0) { TprintfT (DBG_LT2, "dladdr(%p): %p fname=%s\n", caller, dl_info.dli_fbase, dl_info.dli_fname); new_pathname[0] = '\0'; const char *p = __collector_strrchr (dl_info.dli_fname, '/'); if (p) __collector_strlcpy (new_pathname, dl_info.dli_fname, (p - dl_info.dli_fname + 2) < MAXPATHLEN ? (p - dl_info.dli_fname + 2) : MAXPATHLEN); __collector_strlcat (new_pathname, pathname + origin_offset, MAXPATHLEN - CALL_UTIL (strlen)(new_pathname)); real_pathname = new_pathname; } else TprintfT (0, "ERROR: dladdr(%p): %s\n", caller, dlerror ()); } if (NULL_PTR (dlopen)) init_mmap_intf (); TprintfT (DBG_LT2, "libcollector.dlopen(%s,%d) interposing\n", pathname ? pathname : "", mode); void* ret = NULL; // set guard for duration of handling dlopen, since want to ensure // new mappings are resolved after the actual dlopen has occurred PUSH_REENTRANCE; hrtime_t hrt = GETRELTIME (); if (real_pathname && !__collector_strchr (real_pathname, '/')) { // got an unqualified name // get caller and use its searchpath #if ! ((ARCH(Intel) && WSIZE(32)) || ARCH(SPARC)) void* caller = __builtin_return_address (0); // must be called inside dlopen #endif if (caller) { #if (ARCH(Intel) && WSIZE(32)) || ARCH(SPARC) ret = dlopen_searchpath_symver (real_dlopen, caller, real_pathname, mode); #else ret = dlopen_searchpath (caller, real_pathname, mode); #endif } } if (!ret) { #if (ARCH(Intel) && WSIZE(32)) || ARCH(SPARC) ret = (real_dlopen) (real_pathname, mode); #else ret = CALL_REAL (dlopen)(real_pathname, mode); #endif } TprintfT (DBG_LT2, "libcollector -- dlopen(%s) returning %p\n", pathname, ret); /* Don't call update if dlopen failed: preserve dlerror() */ if (ret && (mmap_mode > 0) && !(mode & RTLD_NOLOAD)) update_map_segments (hrt, 1); TprintfT (DBG_LT2, "libcollector -- dlopen(%s) returning %p\n", pathname, ret); POP_REENTRANCE; return ret; } /*------------------------------------------------------------- dlclose */ int dlclose (void *handle) { if (NULL_PTR (dlclose)) init_mmap_intf (); TprintfT (DBG_LT2, "__collector_dlclose(%p) entered\n", handle); hrtime_t hrt = GETRELTIME (); if (!CHCK_REENTRANCE) { PUSH_REENTRANCE; update_map_segments (hrt, 1); POP_REENTRANCE; hrt = GETRELTIME (); } int ret = CALL_REAL (dlclose)(handle); /* Don't call update if dlclose failed: preserve dlerror() */ if (!ret && !CHCK_REENTRANCE) { PUSH_REENTRANCE; update_map_segments (hrt, 1); POP_REENTRANCE; } TprintfT (DBG_LT2, "__collector_dlclose(%p) returning %d\n", handle, ret); return ret; }