Feature #4089: Add addr2line for C level backtrace
http://redmine.ruby-lang.org/issues/show/4089

ɼ???: Yui NARUSE
?????????: Open, ?????: Normal

??????? Ruby ??? SEGV ??? BUG ?????? Ruby level backtrace ????????????????
???????????????? C level backtrace ???????????????????????????

?????????????C level backtrace ????????????????????????????????????????????????????????
?????????????????????????????????????????????????????

???????????????????????????????????????????????????????????????????????込?????????????
glibc ???? (????? Linux) ??????libexecinfo ????????????? FreeBSD ??? NetBSD ???? (???ELF???????????) ??????????????

diff --git a/addr2line.c b/addr2line.c
new file mode 100644
index 0000000..da85f4d
--- /dev/null
+++ b/addr2line.c
@@ -0,0 +1,534 @@
+/**********************************************************************
+
+  addr2line.h -
+
+  $Author$
+
+  Copyright (C) 2010 Shinichiro Hamaji
+
+**********************************************************************/
+
+#include "addr2line.h"
+
+#include <stdio.h>
+
+#ifdef __ELF__
+
+#include <elf.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/mman.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
+
+#ifdef HAVE_DL_ITERATE_PHDR
+# ifndef _GNU_SOURCE
+#  define _GNU_SOURCE
+# endif
+# include <link.h>
+#endif
+
+#define DW_LNS_copy                     0x01
+#define DW_LNS_advance_pc               0x02
+#define DW_LNS_advance_line             0x03
+#define DW_LNS_set_file                 0x04
+#define DW_LNS_set_column               0x05
+#define DW_LNS_negate_stmt              0x06
+#define DW_LNS_set_basic_block          0x07
+#define DW_LNS_const_add_pc             0x08
+#define DW_LNS_fixed_advance_pc         0x09
+#define DW_LNS_set_prologue_end         0x0a /* DWARF3 */
+#define DW_LNS_set_epilogue_begin       0x0b /* DWARF3 */
+#define DW_LNS_set_isa                  0x0c /* DWARF3 */
+
+/* Line number extended opcode name. */
+#define DW_LNE_end_sequence             0x01
+#define DW_LNE_set_address              0x02
+#define DW_LNE_define_file              0x03
+#define DW_LNE_set_discriminator        0x04  /* DWARF4 */
+
+# if SIZEOF_VOIDP == 8
+#  define ElfW(x) Elf64##_##x
+# else
+#  define ElfW(x) Elf32##_##x
+# endif
+
+typedef struct {
+    const char *dirname;
+    const char *filename;
+    int line;
+
+    int fd;
+    void *mapped;
+    size_t mapped_size;
+    unsigned long base_addr;
+} line_info_t;
+
+/* Avoid consuming stack as this module may be used from signal handler */
+static char binary_filename[PATH_MAX];
+
+static unsigned long
+uleb128(char **p) {
+    unsigned long r = 0;
+    int s = 0;
+    for (;;) {
+	unsigned char b = *(unsigned char *)(*p)++;
+	if (b < 0x80) {
+	    r += b << s;
+	    break;
+	}
+	r += (b & 0x7f) << s;
+	s += 7;
+    }
+    return r;
+}
+
+static long
+sleb128(char **p) {
+    long r = 0;
+    int s = 0;
+    for (;;) {
+	unsigned char b = *(unsigned char *)(*p)++;
+	if (b < 0x80) {
+	    if (b & 0x40) {
+		r -= (0x80 - b) << s;
+	    }
+	    else {
+		r += (b & 0x3f) << s;
+	    }
+	    break;
+	}
+	r += (b & 0x7f) << s;
+	s += 7;
+    }
+    return r;
+}
+
+static const char *
+get_nth_dirname(int dir, char *p)
+{
+    if (!dir--) {
+	return "";
+    }
+    while (dir) {
+	while (*p) p++;
+	p++;
+	if (!*p) {
+	    fprintf(stderr, "Unexpected directory number %d in %s\n",
+		    dir, binary_filename);
+	    return "";
+	}
+    }
+    return p;
+}
+
+static void
+fill_filename(int file, char *include_directories, char *filenames,
+	      line_info_t *line)
+{
+    int i;
+    char *p = filenames;
+    char *filename;
+    unsigned long dir;
+    for (i = 1; i <= file; i++) {
+	filename = p;
+	if (!*p) {
+	    /* Need to output binary file name? */
+	    fprintf(stderr, "Unexpected file number %d in %s\n",
+		    file, binary_filename);
+	    return;
+	}
+	while (*p) p++;
+	p++;
+	dir = uleb128(&p);
+	/* last modified. */
+	uleb128(&p);
+	/* size of the file. */
+	uleb128(&p);
+
+	if (i == file) {
+	    line->filename = filename;
+	    line->dirname = get_nth_dirname(dir, include_directories);
+	}
+    }
+}
+
+static int
+get_path_from_symbol(const char *symbol, const char **p, size_t *len)
+{
+    if (symbol[0] == '0') {
+	/* libexecinfo */
+	*p   = strchr(symbol, '/');
+	if (*p == NULL) return 0;
+	*len = strlen(*p);
+    }
+    else {
+	/* glibc */
+	const char *q;
+	*p   = symbol;
+	q   = strchr(symbol, '(');
+	if (q == NULL) return 0;
+	*len = q - symbol;
+    }
+    return 1;
+}
+
+static void
+fill_line(int num_traces, void **traces,
+	  unsigned long addr, int file, int line,
+	  char *include_directories, char *filenames, line_info_t *lines)
+{
+    int i;
+    for (i = 0; i < num_traces; i++) {
+	unsigned long a = (unsigned long)traces[i] - lines[i].base_addr;
+	/* We assume one line code doesn't result >100 bytes of native code.
+       We may want more reliable way eventually... */
+	if (addr < a && a < addr + 100) {
+	    fill_filename(file, include_directories, filenames, &lines[i]);
+	    lines[i].line = line;
+	}
+    }
+}
+
+static void
+parse_debug_line_cu(int num_traces, void **traces,
+		    char **debug_line, line_info_t *lines)
+{
+    char *p, *cu_end, *cu_start, *include_directories, *filenames;
+    unsigned long unit_length;
+    int default_is_stmt, line_base;
+    unsigned int header_length, minimum_instruction_length, line_range,
+		 opcode_base;
+    unsigned char *standard_opcode_lengths;
+
+    /* The registers. */
+    unsigned long addr = 0;
+    unsigned int file = 1;
+    unsigned int line = 1;
+    unsigned int column = 0;
+    int is_stmt = default_is_stmt;
+    int basic_block = 0;
+    int end_sequence = 0;
+    int prologue_end = 0;
+    int epilogue_begin = 0;
+    unsigned int isa = 0;
+
+    p = *debug_line;
+
+    unit_length = *(unsigned int *)p;
+    p += sizeof(unsigned int);
+    if (unit_length == 0xffffffff) {
+	unit_length = *(unsigned long *)p;
+	p += sizeof(unsigned long);
+    }
+
+    cu_end = p + unit_length;
+
+    /*dwarf_version = *(unsigned short *)p;*/
+    p += 2;
+
+    header_length = *(unsigned int *)p;
+    p += sizeof(unsigned int);
+
+    cu_start = p + header_length;
+
+    minimum_instruction_length = *(unsigned char *)p;
+    p++;
+
+    default_is_stmt = *(unsigned char *)p;
+    p++;
+
+    line_base = *(char *)p;
+    p++;
+
+    line_range = *(unsigned char *)p;
+    p++;
+
+    opcode_base = *(unsigned char *)p;
+    p++;
+
+    standard_opcode_lengths = (unsigned char *)p - 1;
+    p += opcode_base - 1;
+
+    include_directories = p;
+
+    /* skip include directories */
+    while (*p) {
+	while (*p) p++;
+	p++;
+    }
+    p++;
+
+    filenames = p;
+
+    p = cu_start;
+
+#define FILL_LINE()						    \
+    do {							    \
+	fill_line(num_traces, traces, addr, file, line,		    \
+		  include_directories, filenames, lines);	    \
+	basic_block = prologue_end = epilogue_begin = 0;	    \
+    } while (0)
+
+    while (p < cu_end) {
+	unsigned long a;
+	unsigned char op = *p++;
+	switch (op) {
+	case DW_LNS_copy:
+	    FILL_LINE();
+	    break;
+	case DW_LNS_advance_pc:
+	    a = uleb128(&p);
+	    addr += a;
+	    break;
+	case DW_LNS_advance_line: {
+	    long a = sleb128(&p);
+	    line += a;
+	    break;
+	}
+	case DW_LNS_set_file:
+	    file = uleb128(&p);
+	    break;
+	case DW_LNS_set_column:
+	    column = uleb128(&p);
+	    break;
+	case DW_LNS_negate_stmt:
+	    is_stmt = !is_stmt;
+	    break;
+	case DW_LNS_set_basic_block:
+	    basic_block = 1;
+	    break;
+	case DW_LNS_const_add_pc:
+	    a = ((255 - opcode_base) / line_range) *
+		minimum_instruction_length;
+	    addr += a;
+	    break;
+	case DW_LNS_fixed_advance_pc:
+	    a = *(unsigned char *)p++;
+	    addr += a;
+	    break;
+	case DW_LNS_set_prologue_end:
+	    prologue_end = 1;
+	    break;
+	case DW_LNS_set_epilogue_begin:
+	    epilogue_begin = 1;
+	    break;
+	case DW_LNS_set_isa:
+	    isa = uleb128(&p);
+	    break;
+	case 0:
+	    a = *(unsigned char *)p++;
+	    op = *p++;
+	    switch (op) {
+	    case DW_LNE_end_sequence:
+		end_sequence = 1;
+		FILL_LINE();
+		addr = 0;
+		file = 1;
+		line = 1;
+		column = 0;
+		is_stmt = default_is_stmt;
+		end_sequence = 0;
+		isa = 0;
+		break;
+	    case DW_LNE_set_address:
+		addr = *(unsigned long *)p;
+		p += sizeof(unsigned long);
+		break;
+	    case DW_LNE_define_file:
+		fprintf(stderr, "Unsupported operation in %s\n",
+			binary_filename);
+		break;
+	    default:
+		fprintf(stderr, "Unknown extended opcode: %d in %s\n",
+			op, binary_filename);
+	    }
+	    break;
+	default: {
+	    unsigned int addr_incr;
+	    int line_incr;
+	    a = op - opcode_base;
+	    addr_incr = (a / line_range) * minimum_instruction_length;
+	    line_incr = line_base + (a % line_range);
+	    addr += addr_incr;
+	    line += line_incr;
+	    FILL_LINE();
+	}
+	}
+    }
+    *debug_line = p;
+}
+
+static void
+parse_debug_line(int num_traces, void **traces,
+		 char *debug_line, unsigned long size, line_info_t *lines)
+{
+    char *debug_line_end = debug_line + size;
+    while (debug_line < debug_line_end) {
+	parse_debug_line_cu(num_traces, traces, &debug_line, lines);
+    }
+    if (debug_line != debug_line_end) {
+	fprintf(stderr, "Unexpected size of .debug_line in %s\n",
+		binary_filename);
+    }
+}
+
+/* read file and fill lines */
+static void
+fill_lines(int num_traces, void **traces, char **syms,
+	   char *file, line_info_t *lines)
+{
+    int i;
+    char *shstr;
+    char *section_name;
+    ElfW(Ehdr) *ehdr;
+    ElfW(Shdr) *shdr, *shstr_shdr, *debug_line_shdr = NULL;
+
+    for (i = 0; i < num_traces; i++) {
+	const char *path;
+	size_t len;
+	if (get_path_from_symbol(syms[i], &path, &len) &&
+		!strncmp(path, binary_filename, len)) {
+	    lines[i].line = -1;
+	}
+    }
+
+    ehdr = (ElfW(Ehdr) *)file;
+    shdr = (ElfW(Shdr) *)(file + ehdr->e_shoff);
+
+    shstr_shdr = shdr + ehdr->e_shstrndx;
+    shstr = file + shstr_shdr->sh_offset;
+
+    for (i = 0; i < ehdr->e_shnum; i++) {
+	section_name = shstr + shdr[i].sh_name;
+	if (!strcmp(section_name, ".debug_line")) {
+	    debug_line_shdr = shdr + i;
+	    break;
+	}
+    }
+
+    if (!debug_line_shdr) {
+	/* this file doesn't have .debug_line section */
+	return;
+    }
+
+    parse_debug_line(num_traces, traces,
+		     file + debug_line_shdr->sh_offset,
+		     debug_line_shdr->sh_size,
+		     lines);
+}
+
+#ifdef HAVE_DL_ITERATE_PHDR
+
+typedef struct {
+    int num_traces;
+    char **syms;
+    line_info_t *lines;
+} fill_base_addr_state_t;
+
+static int
+fill_base_addr(struct dl_phdr_info *info, size_t size, void *data)
+{
+    int i;
+    fill_base_addr_state_t *st = (fill_base_addr_state_t *)data;
+    for (i = 0; i < st->num_traces; i++) {
+	const char *path;
+	size_t len;
+	size_t name_len = strlen(info->dlpi_name);
+	
+	if (get_path_from_symbol(st->syms[i], &path, &len) &&
+		(len == name_len || (len > name_len && path[len-name_len-1] == '/')) &&
+		!strncmp(path+len-name_len, info->dlpi_name, name_len)) {
+	    st->lines[i].base_addr = info->dlpi_addr;
+	}
+    }
+    return 0;
+}
+
+#endif /* HAVE_DL_ITERATE_PHDR */
+
+void
+rb_dump_backtrace_with_lines(int num_traces, void **trace, char **syms)
+{
+    int i;
+    int fd;
+    /* async-signal unsafe */
+    line_info_t *lines = (line_info_t *)calloc(num_traces,
+					       sizeof(line_info_t));
+    off_t filesize;
+    char *file;
+
+    /* Note that line info of shared objects might not be shown
+       if we don't have dl_iterate_phdr */
+#ifdef HAVE_DL_ITERATE_PHDR
+    fill_base_addr_state_t fill_base_addr_state;
+
+    fill_base_addr_state.num_traces = num_traces;
+    fill_base_addr_state.syms = syms;
+    fill_base_addr_state.lines = lines;
+    /* maybe async-signal unsafe */
+    dl_iterate_phdr(fill_base_addr, &fill_base_addr_state);
+#endif /* HAVE_DL_ITERATE_PHDR */
+
+    for (i = 0; i < num_traces; i++) {
+	const char *path;
+	size_t len;
+	if (lines[i].line) {
+	    continue;
+	}
+
+	if (!get_path_from_symbol(syms[i], &path, &len)) {
+	    continue;
+	}
+
+	strncpy(binary_filename, path, len);
+	binary_filename[len] = '\0';
+
+	fd = open(binary_filename, O_RDONLY);
+	filesize = lseek(fd, 0, SEEK_END);
+	lseek(fd, 0, SEEK_SET);
+	/* async-signal unsafe */
+	file = (char *)mmap(NULL, filesize, PROT_READ, MAP_SHARED, fd, 0);
+
+	lines[i].fd = fd;
+	lines[i].mapped = file;
+	lines[i].mapped_size = filesize;
+
+	fill_lines(num_traces, trace, syms, file, lines);
+    }
+
+    /* fprintf may not be async-signal safe */
+    for (i = 0; i < num_traces; i++) {
+	line_info_t *line = &lines[i];
+
+	if (line->line > 0) {
+	    fprintf(stderr, "%s ", syms[i]);
+	    if (line->filename) {
+		if (line->dirname && line->dirname[0]) {
+		    fprintf(stderr, "%s/", line->dirname);
+		}
+		fprintf(stderr, "%s", line->filename);
+	    } else {
+		fprintf(stderr, "???");
+	    }
+	    fprintf(stderr, ":%d\n", line->line);
+	} else {
+	    fprintf(stderr, "%s\n", syms[i]);
+	}
+    }
+
+    for (i = 0; i < num_traces; i++) {
+	line_info_t *line = &lines[i];
+	if (line->fd) {
+	    munmap(line->mapped, line->mapped_size);
+	    close(line->fd);
+	}
+    }
+    free(lines);
+}
+
+#endif /* defined(__ELF__) */
diff --git a/addr2line.h b/addr2line.h
new file mode 100644
index 0000000..cbb18e5
--- /dev/null
+++ b/addr2line.h
@@ -0,0 +1,21 @@
+/**********************************************************************
+
+  addr2line.h -
+
+  $Author$
+
+  Copyright (C) 2010 Shinichiro Hamaji
+
+**********************************************************************/
+
+#ifndef RUBY_ADDR2LINE_H
+#define RUBY_ADDR2LINE_H
+
+#ifdef __ELF__
+
+void
+rb_dump_backtrace_with_lines(int num_traces, void **traces, char **syms);
+
+#endif /* __ELF__ */
+
+#endif /* RUBY_ADDR2LINE_H */
diff --git a/common.mk b/common.mk
index c45c3e1..cf66e42 100644
--- a/common.mk
+++ b/common.mk
@@ -91,6 +91,7 @@ COMMONOBJS    = array.$(OBJEXT) \
 		vm_dump.$(OBJEXT) \
 		thread.$(OBJEXT) \
 		cont.$(OBJEXT) \
+		addr2line.$(OBJEXT) \
 		$(BUILTIN_ENCOBJS) \
 		$(BUILTIN_TRANSOBJS) \
 		$(MISSING)
diff --git a/configure.in b/configure.in
index 36a58d4..ef2ee2d 100644
--- a/configure.in
+++ b/configure.in
@@ -1300,7 +1300,7 @@ AC_CHECK_FUNCS(fmod killpg wait4 waitpid fork spawnv syscall chroot getcwd eacce
 	      setsid telldir seekdir fchmod cosh sinh tanh log2 round\
 	      setuid setgid daemon select_large_fdset setenv unsetenv\
               mktime timegm gmtime_r clock_gettime gettimeofday\
-              pread sendfile shutdown sigaltstack)
+              pread sendfile shutdown sigaltstack dl_iterate_phdr)
 
 AC_CACHE_CHECK(for unsetenv returns a value, rb_cv_unsetenv_return_value,
   [AC_TRY_COMPILE([
diff --git a/vm_dump.c b/vm_dump.c
index 2975001..b22c041 100644
--- a/vm_dump.c
+++ b/vm_dump.c
@@ -10,6 +10,7 @@
 
 
 #include "ruby/ruby.h"
+#include "addr2line.h"
 #include "vm_core.h"
 
 #define MAX_POSBUF 128
@@ -785,9 +786,13 @@ rb_vm_bugreport(void)
 	int i;
 
 	if (syms) {
+#ifdef __ELF__
+	    rb_dump_backtrace_with_lines(n, trace, syms);
+#else
 	    for (i=0; i<n; i++) {
 		fprintf(stderr, "%s\n", syms[i]);
 	    }
+#endif
 	    free(syms);
 	}
 #elif defined(_WIN32)


----------------------------------------
http://redmine.ruby-lang.org