]> git.neil.brown.name Git - history.git/commitdiff
[PATCH] i386 vsyscall DSO implementation
authorRoland McGrath <roland@redhat.com>
Thu, 1 May 2003 16:18:50 +0000 (09:18 -0700)
committerLinus Torvalds <torvalds@home.transmeta.com>
Thu, 1 May 2003 16:18:50 +0000 (09:18 -0700)
This creates the full debugging input for the vsyscall page as a DSO,
allowing for debuggers and exception handlers to correctly handle the
processor state during the vsyscall.

There was a small bug in the core dump changes in the original patch I
posted.  I fixed that, and the rest of the patch is unchanged.

arch/i386/kernel/Makefile
arch/i386/kernel/entry.S
arch/i386/kernel/signal.c
arch/i386/kernel/sysenter.c
arch/i386/kernel/vsyscall-int80.S [new file with mode: 0644]
arch/i386/kernel/vsyscall-sigreturn.S [new file with mode: 0644]
arch/i386/kernel/vsyscall-sysenter.S [new file with mode: 0644]
arch/i386/kernel/vsyscall.lds [new file with mode: 0644]
fs/binfmt_elf.c
include/asm-i386/elf.h
include/linux/elf.h

index f1ea1c5d2d99589a07ff98c7e3811a2583e3107a..08472d6b50e074e68d60b4bf4a4511c67464d274 100644 (file)
@@ -27,9 +27,29 @@ obj-$(CONFIG_SOFTWARE_SUSPEND)       += suspend.o suspend_asm.o
 obj-$(CONFIG_X86_NUMAQ)                += numaq.o
 obj-$(CONFIG_EDD)              += edd.o
 obj-$(CONFIG_MODULES)          += module.o
-obj-y                          += sysenter.o
+obj-y                          += sysenter.o vsyscall.o
 obj-$(CONFIG_ACPI_SRAT)        += srat.o
 
 EXTRA_AFLAGS   := -traditional
 
 obj-$(CONFIG_SCx200)           += scx200.o
+
+# vsyscall.o contains the vsyscall DSO images as __initdata.
+# We must build both images before we can assemble it.
+$(obj)/vsyscall.o: $(obj)/vsyscall-int80.so $(obj)/vsyscall-sysenter.so
+extra-y += $(foreach F,int80 sysenter,vsyscall-$F.o vsyscall-$F.so)
+
+# The DSO images are built using a special linker script.
+$(obj)/vsyscall-int80.so $(obj)/vsyscall-sysenter.so: \
+$(obj)/vsyscall-%.so: $(src)/vsyscall.lds $(obj)/vsyscall-%.o
+       $(CC) -nostdlib -shared -s -Wl,-soname=linux-vsyscall.so.1 \
+             -o $@ -Wl,-T,$^
+
+# We also create a special relocatable object that should mirror the symbol
+# table and layout of the linked DSO.  With ld -R we can then refer to
+# these symbols in the kernel code rather than hand-coded addresses.
+extra-y += vsyscall-syms.o
+$(obj)/built-in.o: $(obj)/vsyscall-syms.o
+$(obj)/built-in.o: ld_flags += -R $(obj)/vsyscall-syms.o
+$(obj)/vsyscall-syms.o: $(src)/vsyscall.lds $(obj)/vsyscall-sysenter.o
+       $(CC) -nostdlib -r -o $@ -Wl,-T,$^
index 6d745224359e6948020f63bd8584061b63bbbbe2..f13ce73f86a87826139c5eaa8b0b4617af6af3e6 100644 (file)
@@ -230,8 +230,8 @@ need_resched:
        jmp need_resched
 #endif
 
-/* Points to after the "sysenter" instruction in the vsyscall page */
-#define SYSENTER_RETURN 0xffffe010
+/* SYSENTER_RETURN points to after the "sysenter" instruction in
+   the vsyscall page.  See vsyscall-sysentry.S, which defines the symbol.  */
 
        # sysenter call handler stub
 ENTRY(sysenter_entry)
index 3624e3323f240f6a6bf4ad25f03b9117b9fb5dac..06dc97dd454935b87468625b3b18ff173535afe9 100644 (file)
@@ -19,6 +19,7 @@
 #include <linux/stddef.h>
 #include <linux/personality.h>
 #include <linux/suspend.h>
+#include <linux/elf.h>
 #include <asm/ucontext.h>
 #include <asm/uaccess.h>
 #include <asm/i387.h>
@@ -347,6 +348,10 @@ get_sigframe(struct k_sigaction *ka, struct pt_regs * regs, size_t frame_size)
        return (void __user *)((esp - frame_size) & -8ul);
 }
 
+/* These symbols are defined with the addresses in the vsyscall page.
+   See vsyscall-sigreturn.S.  */
+extern void __kernel_sigreturn, __kernel_rt_sigreturn;
+
 static void setup_frame(int sig, struct k_sigaction *ka,
                        sigset_t *set, struct pt_regs * regs)
 {
@@ -379,7 +384,7 @@ static void setup_frame(int sig, struct k_sigaction *ka,
        if (err)
                goto give_sigsegv;
 
-       restorer = (void *) (fix_to_virt(FIX_VSYSCALL) + 32);
+       restorer = &__kernel_sigreturn;
        if (ka->sa.sa_flags & SA_RESTORER)
                restorer = ka->sa.sa_restorer;
 
@@ -462,7 +467,7 @@ static void setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
                goto give_sigsegv;
 
        /* Set up to return from userspace.  */
-       restorer = (void *) (fix_to_virt(FIX_VSYSCALL) + 64);
+       restorer = &__kernel_rt_sigreturn;
        if (ka->sa.sa_flags & SA_RESTORER)
                restorer = ka->sa.sa_restorer;
        err |= __put_user(restorer, &frame->pretcode);
index 35f27670059659e88fe22248700dd6fb5567f492..f01fbf6e575106af501e9602c21b89278319bca2 100644 (file)
@@ -51,151 +51,30 @@ void enable_sep_cpu(void *info)
        put_cpu();      
 }
 
+/*
+ * These symbols are defined by vsyscall.o to mark the bounds
+ * of the ELF DSO images included therein.
+ */
+extern const char vsyscall_int80_start, vsyscall_int80_end;
+extern const char vsyscall_sysenter_start, vsyscall_sysenter_end;
+
 static int __init sysenter_setup(void)
 {
-       static const char __initdata int80[] = {
-               0xcd, 0x80,             /* int $0x80 */
-               0xc3                    /* ret */
-       };
-       /* Unwind information for the int80 code.  Keep track of
-          where the return address is stored.  */
-       static const char __initdata int80_eh_frame[] = {
-       /* First the Common Information Entry (CIE):  */
-               0x14, 0x00, 0x00, 0x00, /* Length of the CIE */
-               0x00, 0x00, 0x00, 0x00, /* CIE Identifier Tag */
-               0x01,                   /* CIE Version */
-               'z', 'R', 0x00,         /* CIE Augmentation */
-               0x01,                   /* CIE Code Alignment Factor */
-               0x7c,                   /* CIE Data Alignment Factor */
-               0x08,                   /* CIE RA Column */
-               0x01,                   /* Augmentation size */
-               0x1b,                   /* FDE Encoding (pcrel sdata4) */
-               0x0c,                   /* DW_CFA_def_cfa */
-               0x04,
-               0x04,
-               0x88,                   /* DW_CFA_offset, column 0x8 */
-               0x01,
-               0x00,                   /* padding */
-               0x00,
-       /* Now the FDE which contains the instructions for the frame.  */
-               0x0a, 0x00, 0x00, 0x00, /* FDE Length */
-               0x1c, 0x00, 0x00, 0x00, /* FDE CIE offset */
-       /* The PC-relative offset to the beginning of the code this
-          FDE covers.  The computation below assumes that the offset
-          can be represented in one byte.  Change if this is not true
-          anymore.  The offset from the beginning of the .eh_frame
-          is represented by EH_FRAME_OFFSET.  The word with the offset
-          starts at byte 0x20 of the .eh_frame.  */
-               0x100 - (EH_FRAME_OFFSET + 0x20),
-               0xff, 0xff, 0xff,       /* FDE initial location */
-               3,                      /* FDE address range */
-               0x00                    /* Augmentation size */
-       /* The code does not change the stack pointer.  We need not
-          record any operations.  */
-       };
-       static const char __initdata sysent[] = {
-               0x51,                   /* push %ecx */
-               0x52,                   /* push %edx */
-               0x55,                   /* push %ebp */
-       /* 3: backjump target */
-               0x89, 0xe5,             /* movl %esp,%ebp */
-               0x0f, 0x34,             /* sysenter */
-
-       /* 7: align return point with nop's to make disassembly easier */
-               0x90, 0x90, 0x90, 0x90,
-               0x90, 0x90, 0x90,
-
-       /* 14: System call restart point is here! (SYSENTER_RETURN - 2) */
-               0xeb, 0xf3,             /* jmp to "movl %esp,%ebp" */
-       /* 16: System call normal return point is here! (SYSENTER_RETURN in entry.S) */
-               0x5d,                   /* pop %ebp */
-               0x5a,                   /* pop %edx */
-               0x59,                   /* pop %ecx */
-               0xc3                    /* ret */
-       };
-       /* Unwind information for the sysenter code.  Keep track of
-          where the return address is stored.  */
-       static const char __initdata sysent_eh_frame[] = {
-       /* First the Common Information Entry (CIE):  */
-               0x14, 0x00, 0x00, 0x00, /* Length of the CIE */
-               0x00, 0x00, 0x00, 0x00, /* CIE Identifier Tag */
-               0x01,                   /* CIE Version */
-               'z', 'R', 0x00,         /* CIE Augmentation */
-               0x01,                   /* CIE Code Alignment Factor */
-               0x7c,                   /* CIE Data Alignment Factor */
-               0x08,                   /* CIE RA Column */
-               0x01,                   /* Augmentation size */
-               0x1b,                   /* FDE Encoding (pcrel sdata4) */
-               0x0c,                   /* DW_CFA_def_cfa */
-               0x04,
-               0x04,
-               0x88,                   /* DW_CFA_offset, column 0x8 */
-               0x01,
-               0x00,                   /* padding */
-               0x00,
-       /* Now the FDE which contains the instructions for the frame.  */
-               0x22, 0x00, 0x00, 0x00, /* FDE Length */
-               0x1c, 0x00, 0x00, 0x00, /* FDE CIE offset */
-       /* The PC-relative offset to the beginning of the code this
-          FDE covers.  The computation below assumes that the offset
-          can be represented in one byte.  Change if this is not true
-          anymore.  The offset from the beginning of the .eh_frame
-          is represented by EH_FRAME_OFFSET.  The word with the offset
-          starts at byte 0x20 of the .eh_frame.  */
-               0x100 - (EH_FRAME_OFFSET + 0x20),
-               0xff, 0xff, 0xff,       /* FDE initial location */
-               0x14, 0x00, 0x00, 0x00, /* FDE address range */
-               0x00,                   /* Augmentation size */
-       /* What follows are the instructions for the table generation.
-          We have to record all changes of the stack pointer and
-          callee-saved registers.  */
-               0x41,                   /* DW_CFA_advance_loc+1, push %ecx */
-               0x0e,                   /* DW_CFA_def_cfa_offset */
-               0x08,                   /* RA at offset 8 now */
-               0x41,                   /* DW_CFA_advance_loc+1, push %edx */
-               0x0e,                   /* DW_CFA_def_cfa_offset */
-               0x0c,                   /* RA at offset 12 now */
-               0x41,                   /* DW_CFA_advance_loc+1, push %ebp */
-               0x0e,                   /* DW_CFA_def_cfa_offset */
-               0x10,                   /* RA at offset 16 now */
-               0x85, 0x04,             /* DW_CFA_offset %ebp -16 */
-       /* Finally the epilogue.  */
-               0x4e,                   /* DW_CFA_advance_loc+14, pop %ebx */
-               0x0e,                   /* DW_CFA_def_cfa_offset */
-               0x12,                   /* RA at offset 12 now */
-               0xc5,                   /* DW_CFA_restore %ebp */
-               0x41,                   /* DW_CFA_advance_loc+1, pop %edx */
-               0x0e,                   /* DW_CFA_def_cfa_offset */
-               0x08,                   /* RA at offset 8 now */
-               0x41,                   /* DW_CFA_advance_loc+1, pop %ecx */
-               0x0e,                   /* DW_CFA_def_cfa_offset */
-               0x04                    /* RA at offset 4 now */
-       };
-       static const char __initdata sigreturn[] = {
-       /* 32: sigreturn point */
-               0x58,                           /* popl %eax */
-               0xb8, __NR_sigreturn, 0, 0, 0,  /* movl $__NR_sigreturn, %eax */
-               0xcd, 0x80,                     /* int $0x80 */
-       };
-       static const char __initdata rt_sigreturn[] = {
-       /* 64: rt_sigreturn point */
-               0xb8, __NR_rt_sigreturn, 0, 0, 0,       /* movl $__NR_rt_sigreturn, %eax */
-               0xcd, 0x80,                     /* int $0x80 */
-       };
        unsigned long page = get_zeroed_page(GFP_ATOMIC);
 
        __set_fixmap(FIX_VSYSCALL, __pa(page), PAGE_READONLY);
-       memcpy((void *) page, int80, sizeof(int80));
-       memcpy((void *)(page + 32), sigreturn, sizeof(sigreturn));
-       memcpy((void *)(page + 64), rt_sigreturn, sizeof(rt_sigreturn));
-       memcpy((void *)(page + EH_FRAME_OFFSET), int80_eh_frame,
-              sizeof(int80_eh_frame));
-       if (!boot_cpu_has(X86_FEATURE_SEP))
+
+       if (!boot_cpu_has(X86_FEATURE_SEP)) {
+               memcpy((void *) page,
+                      &vsyscall_int80_start,
+                      &vsyscall_int80_end - &vsyscall_int80_start);
                return 0;
+       }
+
+       memcpy((void *) page,
+              &vsyscall_sysenter_start,
+              &vsyscall_sysenter_end - &vsyscall_sysenter_start);
 
-       memcpy((void *) page, sysent, sizeof(sysent));
-       memcpy((void *)(page + EH_FRAME_OFFSET), sysent_eh_frame,
-              sizeof(sysent_eh_frame));
        on_each_cpu(enable_sep_cpu, NULL, 1, 1);
        return 0;
 }
diff --git a/arch/i386/kernel/vsyscall-int80.S b/arch/i386/kernel/vsyscall-int80.S
new file mode 100644 (file)
index 0000000..924526a
--- /dev/null
@@ -0,0 +1,48 @@
+/*
+ * Code for the vsyscall page.  This version uses the old int $0x80 method.
+ */
+
+       .text
+       .globl __kernel_vsyscall
+       .type __kernel_vsyscall,@function
+__kernel_vsyscall:
+.LSTART_vsyscall:
+       int $0x80
+       ret
+.LEND_vsyscall:
+       .size __kernel_vsyscall,.-.LSTART_vsyscall
+       .previous
+
+       .section .eh_frame,"a",@progbits
+.LSTARTFRAMEDLSI:
+       .long .LENDCIEDLSI-.LSTARTCIEDLSI
+.LSTARTCIEDLSI:
+       .long 0                 /* CIE ID */
+       .byte 1                 /* Version number */
+       .string "zR"            /* NUL-terminated augmentation string */
+       .uleb128 1              /* Code alignment factor */
+       .sleb128 -4             /* Data alignment factor */
+       .byte 8                 /* Return address register column */
+       .uleb128 1              /* Augmentation value length */
+       .byte 0x1b              /* DW_EH_PE_pcrel|DW_EH_PE_sdata4. */
+       .byte 0x0c              /* DW_CFA_def_cfa */
+       .uleb128 4
+       .uleb128 4
+       .byte 0x88              /* DW_CFA_offset, column 0x8 */
+       .uleb128 1
+       .align 4
+.LENDCIEDLSI:
+       .long .LENDFDEDLSI-.LSTARTFDEDLSI /* Length FDE */
+.LSTARTFDEDLSI:
+       .long .LSTARTFDEDLSI-.LSTARTFRAMEDLSI /* CIE pointer */
+       .long .LSTART_vsyscall-.        /* PC-relative start address */
+       .long .LEND_vsyscall-.LSTART_vsyscall
+       .uleb128 0
+       .align 4
+.LENDFDEDLSI:
+       .previous
+
+/*
+ * Get the common code for the sigreturn entry points.
+ */
+#include "vsyscall-sigreturn.S"
diff --git a/arch/i386/kernel/vsyscall-sigreturn.S b/arch/i386/kernel/vsyscall-sigreturn.S
new file mode 100644 (file)
index 0000000..58347a9
--- /dev/null
@@ -0,0 +1,38 @@
+/*
+ * Common code for the sigreturn entry points on the vsyscall page.
+ * So far this code is the same for both int80 and sysenter versions.
+ * This file is #include'd by vsyscall-*.S to define them after the
+ * vsyscall entry point.  The addresses we get for these entry points
+ * by doing ".balign 32" must match in both versions of the page.
+ */
+
+#include <asm/unistd.h>
+
+
+/* XXX
+   Should these be named "_sigtramp" or something?
+*/
+
+       .text
+       .balign 32
+       .globl __kernel_sigreturn
+       .type __kernel_sigreturn,@function
+__kernel_sigreturn:
+.LSTART_kernel_sigreturn:
+       popl %eax               /* XXX does this mean it needs unwind info? */
+       movl $__NR_sigreturn, %eax
+       int $0x80
+.LEND_sigreturn:
+       .size __kernel_sigreturn,.-.LSTART_sigreturn
+
+       .text
+       .balign 32
+       .globl __kernel_rt_sigreturn
+       .type __kernel_rt_sigreturn,@function
+__kernel_rt_sigreturn:
+.LSTART_kernel_rt_sigreturn:
+       movl $__NR_rt_sigreturn, %eax
+       int $0x80
+.LEND_rt_sigreturn:
+       .size __kernel_rt_sigreturn,.-.LSTART_rt_sigreturn
+       .previous
diff --git a/arch/i386/kernel/vsyscall-sysenter.S b/arch/i386/kernel/vsyscall-sysenter.S
new file mode 100644 (file)
index 0000000..e1dca09
--- /dev/null
@@ -0,0 +1,97 @@
+/*
+ * Code for the vsyscall page.  This version uses the sysenter instruction.
+ */
+
+       .text
+       .globl __kernel_vsyscall
+       .type __kernel_vsyscall,@function
+__kernel_vsyscall:
+.LSTART_vsyscall:
+       push %ecx
+.Lpush_ecx:
+       push %edx
+.Lpush_edx:
+       push %ebp
+.Lenter_kernel:
+       movl %esp,%ebp
+       sysenter
+
+       /* 7: align return point with nop's to make disassembly easier */
+       .space 7,0x90
+
+       /* 14: System call restart point is here! (SYSENTER_RETURN - 2) */
+       jmp .Lenter_kernel
+       /* 16: System call normal return point is here! */
+       .globl SYSENTER_RETURN  /* Symbol used by entry.S.  */
+SYSENTER_RETURN:
+       pop %ebp
+.Lpop_ebp:
+       pop %edx
+.Lpop_edx:
+       pop %ecx
+.Lpop_ecx:
+       ret
+.LEND_vsyscall:
+       .size __kernel_vsyscall,.-.LSTART_vsyscall
+       .previous
+
+       .section .eh_frame,"a",@progbits
+.LSTARTFRAMEDLSI:
+       .long .LENDCIEDLSI-.LSTARTCIEDLSI
+.LSTARTCIEDLSI:
+       .long 0                 /* CIE ID */
+       .byte 1                 /* Version number */
+       .string "zR"            /* NUL-terminated augmentation string */
+       .uleb128 1              /* Code alignment factor */
+       .sleb128 -4             /* Data alignment factor */
+       .byte 8                 /* Return address register column */
+       .uleb128 1              /* Augmentation value length */
+       .byte 0x1b              /* DW_EH_PE_pcrel|DW_EH_PE_sdata4. */
+       .byte 0x0c              /* DW_CFA_def_cfa */
+       .uleb128 4
+       .uleb128 4
+       .byte 0x88              /* DW_CFA_offset, column 0x8 */
+       .uleb128 1
+       .align 4
+.LENDCIEDLSI:
+       .long .LENDFDEDLSI-.LSTARTFDEDLSI /* Length FDE */
+.LSTARTFDEDLSI:
+       .long .LSTARTFDEDLSI-.LSTARTFRAMEDLSI /* CIE pointer */
+       .long .LSTART_vsyscall-.        /* PC-relative start address */
+       .long .LEND_vsyscall-.LSTART_vsyscall
+       .uleb128 0
+       /* What follows are the instructions for the table generation.
+          We have to record all changes of the stack pointer.  */
+       .byte 0x04              /* DW_CFA_advance_loc4 */
+       .long .Lpush_ecx-.LSTART_vsyscall
+       .byte 0x0e              /* DW_CFA_def_cfa_offset */
+       .byte 0x08              /* RA at offset 8 now */
+       .byte 0x04              /* DW_CFA_advance_loc4 */
+       .long .Lpush_edx-.Lpush_ecx
+       .byte 0x0e              /* DW_CFA_def_cfa_offset */
+       .byte 0x0c              /* RA at offset 12 now */
+       .byte 0x04              /* DW_CFA_advance_loc4 */
+       .long .Lenter_kernel-.Lpush_edx
+       .byte 0x0e              /* DW_CFA_def_cfa_offset */
+       .byte 0x10              /* RA at offset 16 now */
+       /* Finally the epilogue.  */
+       .byte 0x04              /* DW_CFA_advance_loc4 */
+       .long .Lpop_ebp-.Lenter_kernel
+       .byte 0x0e              /* DW_CFA_def_cfa_offset */
+       .byte 0x12              /* RA at offset 12 now */
+       .byte 0x04              /* DW_CFA_advance_loc4 */
+       .long .Lpop_edx-.Lpop_ebp
+       .byte 0x0e              /* DW_CFA_def_cfa_offset */
+       .byte 0x08              /* RA at offset 8 now */
+       .byte 0x04              /* DW_CFA_advance_loc4 */
+       .long .Lpop_ecx-.Lpop_edx
+       .byte 0x0e              /* DW_CFA_def_cfa_offset */
+       .byte 0x04              /* RA at offset 4 now */
+       .align 4
+.LENDFDEDLSI:
+       .previous
+
+/*
+ * Get the common code for the sigreturn entry points.
+ */
+#include "vsyscall-sigreturn.S"
diff --git a/arch/i386/kernel/vsyscall.lds b/arch/i386/kernel/vsyscall.lds
new file mode 100644 (file)
index 0000000..7ff7f8b
--- /dev/null
@@ -0,0 +1,67 @@
+/*
+ * Linker script for vsyscall DSO.  The vsyscall page is an ELF shared
+ * object prelinked to its virtual address, and with only one read-only
+ * segment (that fits in one page).  This script controls its layout.
+ */
+
+/* This must match <asm/fixmap.h>.  */
+VSYSCALL_BASE = 0xffffe000;
+
+SECTIONS
+{
+  . = VSYSCALL_BASE + SIZEOF_HEADERS;
+
+  .hash           : { *(.hash) }               :text
+  .dynsym         : { *(.dynsym) }
+  .dynstr         : { *(.dynstr) }
+  .gnu.version    : { *(.gnu.version) }
+  .gnu.version_d  : { *(.gnu.version_d) }
+  .gnu.version_r  : { *(.gnu.version_r) }
+
+  /* This linker script is used both with -r and with -shared.
+     For the layouts to match, we need to skip more than enough
+     space for the dynamic symbol table et al.  If this amount
+     is insufficient, ld -shared will barf.  Just increase it here.  */
+  . = VSYSCALL_BASE + 0x400;
+
+  .text           : { *(.text) }               :text =0x90909090
+
+  .eh_frame_hdr   : { *(.eh_frame_hdr) }       :text :eh_frame_hdr
+  .eh_frame       : { KEEP (*(.eh_frame)) }    :text
+  .dynamic        : { *(.dynamic) }            :text :dynamic
+  .useless        : {
+       *(.got.plt) *(.got)
+       *(.data .data.* .gnu.linkonce.d.*)
+       *(.dynbss)
+       *(.bss .bss.* .gnu.linkonce.b.*)
+  }                                            :text
+}
+
+/*
+ * We must supply the ELF program headers explicitly to get just one
+ * PT_LOAD segment, and set the flags explicitly to make segments read-only.
+ */
+PHDRS
+{
+  text PT_LOAD FILEHDR PHDRS FLAGS(5); /* PF_R|PF_X */
+  dynamic PT_DYNAMIC FLAGS(4); /* PF_R */
+  eh_frame_hdr 0x6474e550; /* PT_GNU_EH_FRAME, but ld doesn't match the name */
+}
+
+/*
+ * This controls what symbols we export from the DSO.
+ */
+VERSION
+{
+  LINUX_2.5 {
+    global:
+       __kernel_vsyscall;
+       __kernel_sigreturn;
+       __kernel_rt_sigreturn;
+
+    local: *;
+  };
+}
+
+/* The ELF entry point can be used to set the AT_SYSINFO value.  */
+ENTRY(__kernel_vsyscall);
index 5189b1a352a75cea5df7b698adffd4497a7d729e..69de3b87ae9917e07cf40ea405ccc45f45887d57 100644 (file)
@@ -1260,6 +1260,9 @@ static int elf_core_dump(long signr, struct pt_regs * regs, struct file * file)
        elf_core_copy_regs(&prstatus->pr_reg, regs);
        
        segs = current->mm->map_count;
+#ifdef ELF_CORE_EXTRA_PHDRS
+       segs += ELF_CORE_EXTRA_PHDRS;
+#endif
 
        /* Set up header */
        fill_elf_header(elf, segs+1);   /* including notes section */
@@ -1340,6 +1343,10 @@ static int elf_core_dump(long signr, struct pt_regs * regs, struct file * file)
                DUMP_WRITE(&phdr, sizeof(phdr));
        }
 
+#ifdef ELF_CORE_WRITE_EXTRA_PHDRS
+       ELF_CORE_WRITE_EXTRA_PHDRS;
+#endif
+
        /* write out the notes section */
        for (i = 0; i < numnote; i++)
                if (!writenote(notes + i, file))
@@ -1385,6 +1392,10 @@ static int elf_core_dump(long signr, struct pt_regs * regs, struct file * file)
                }
        }
 
+#ifdef ELF_CORE_WRITE_EXTRA_DATA
+       ELF_CORE_WRITE_EXTRA_DATA;
+#endif
+
        if ((off_t) file->f_pos != offset) {
                /* Sanity check */
                printk("elf_core_dump: file->f_pos (%ld) != offset (%ld)\n",
index 637838d7f85a87266560e05baa6b84bb2224ba79..a87a45df75e2417e9e39e3627e0832cb370ca782 100644 (file)
@@ -101,7 +101,7 @@ typedef struct user_fxsr_struct elf_fpxregset_t;
  * for more of them, start the x86-specific ones at 32.
  */
 #define AT_SYSINFO             32
-#define AT_SYSINFO_EH_FRAME    33
+#define AT_SYSINFO_EHDR                33
 
 #ifdef __KERNEL__
 #define SET_PERSONALITY(ex, ibcs2) set_personality((ibcs2)?PER_SVR4:PER_LINUX)
@@ -119,15 +119,56 @@ extern void dump_smp_unlazy_fpu(void);
 #define ELF_CORE_SYNC dump_smp_unlazy_fpu
 #endif
 
-/* Offset from the beginning of the page where the .eh_frame information
-   for the code in the vsyscall page starts.  */
-#define EH_FRAME_OFFSET 96
+#define VSYSCALL_BASE  (__fix_to_virt(FIX_VSYSCALL))
+#define VSYSCALL_EHDR  ((const struct elfhdr *) VSYSCALL_BASE)
+#define VSYSCALL_ENTRY ((unsigned long) &__kernel_vsyscall)
+extern void __kernel_vsyscall;
 
 #define ARCH_DLINFO                                            \
 do {                                                           \
-               NEW_AUX_ENT(AT_SYSINFO, 0xffffe000);            \
-               NEW_AUX_ENT(AT_SYSINFO_EH_FRAME,                \
-                           0xffffe000 + EH_FRAME_OFFSET);      \
+               NEW_AUX_ENT(AT_SYSINFO, VSYSCALL_ENTRY);        \
+               NEW_AUX_ENT(AT_SYSINFO_EHDR, VSYSCALL_BASE);    \
+} while (0)
+
+/*
+ * These macros parameterize elf_core_dump in fs/binfmt_elf.c to write out
+ * extra segments containing the vsyscall DSO contents.  Dumping its
+ * contents makes post-mortem fully interpretable later without matching up
+ * the same kernel and hardware config to see what PC values meant.
+ * Dumping its extra ELF program headers includes all the other information
+ * a debugger needs to easily find how the vsyscall DSO was being used.
+ */
+#define ELF_CORE_EXTRA_PHDRS           (VSYSCALL_EHDR->e_phnum)
+#define ELF_CORE_WRITE_EXTRA_PHDRS                                           \
+do {                                                                         \
+       const struct elf_phdr *const vsyscall_phdrs =                         \
+               (const struct elf_phdr *) (VSYSCALL_BASE                      \
+                                          + VSYSCALL_EHDR->e_phoff);         \
+       int i;                                                                \
+       Elf32_Off ofs = 0;                                                    \
+       for (i = 0; i < VSYSCALL_EHDR->e_phnum; ++i) {                        \
+               struct elf_phdr phdr = vsyscall_phdrs[i];                     \
+               if (phdr.p_type == PT_LOAD) {                                 \
+                       ofs = phdr.p_offset = offset;                         \
+                       offset += phdr.p_filesz;                              \
+               }                                                             \
+               else                                                          \
+                       phdr.p_offset += ofs;                                 \
+               phdr.p_paddr = 0; /* match other core phdrs */                \
+               DUMP_WRITE(&phdr, sizeof(phdr));                              \
+       }                                                                     \
+} while (0)
+#define ELF_CORE_WRITE_EXTRA_DATA                                            \
+do {                                                                         \
+       const struct elf_phdr *const vsyscall_phdrs =                         \
+               (const struct elf_phdr *) (VSYSCALL_BASE                      \
+                                          + VSYSCALL_EHDR->e_phoff);         \
+       int i;                                                                \
+       for (i = 0; i < VSYSCALL_EHDR->e_phnum; ++i) {                        \
+               if (vsyscall_phdrs[i].p_type == PT_LOAD)                      \
+                       DUMP_WRITE((void *) vsyscall_phdrs[i].p_vaddr,        \
+                                  vsyscall_phdrs[i].p_filesz);               \
+       }                                                                     \
 } while (0)
 
 #endif
index 0a7a35e2722b1f06239bb787b546cbc0cba7bd75..fb03869f90a83e788ef610a79dd30fc68b9d07e8 100644 (file)
@@ -29,8 +29,11 @@ typedef __s64        Elf64_Sxword;
 #define PT_NOTE    4
 #define PT_SHLIB   5
 #define PT_PHDR    6
+#define PT_LOOS           0x60000000
+#define PT_HIOS           0x6fffffff
 #define PT_LOPROC  0x70000000
 #define PT_HIPROC  0x7fffffff
+#define PT_GNU_EH_FRAME                0x6474e550
 #define PT_MIPS_REGINFO                0x70000000
 
 /* Flags in the e_flags field of the header */