From: Thomas Gleixner Date: Thu, 11 Oct 2007 09:17:10 +0000 (+0200) Subject: x86_64: move vdso X-Git-Tag: v2.6.24-rc1~1481 X-Git-Url: https://openfabrics.org/gitweb/?a=commitdiff_plain;h=7648b1330c335601b7c09c25f77a03cda128fcab;p=~shefty%2Frdma-dev.git x86_64: move vdso Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar --- diff --git a/arch/x86/vdso/.gitignore b/arch/x86/vdso/.gitignore new file mode 100644 index 00000000000..f8b69d84238 --- /dev/null +++ b/arch/x86/vdso/.gitignore @@ -0,0 +1 @@ +vdso.lds diff --git a/arch/x86/vdso/Makefile b/arch/x86/vdso/Makefile new file mode 100644 index 00000000000..8d03de029d9 --- /dev/null +++ b/arch/x86/vdso/Makefile @@ -0,0 +1,49 @@ +# +# x86-64 vDSO. +# + +# files to link into the vdso +# vdso-start.o has to be first +vobjs-y := vdso-start.o vdso-note.o vclock_gettime.o vgetcpu.o vvar.o + +# files to link into kernel +obj-y := vma.o vdso.o vdso-syms.o + +vobjs := $(foreach F,$(vobjs-y),$(obj)/$F) + +$(obj)/vdso.o: $(obj)/vdso.so + +targets += vdso.so vdso.lds $(vobjs-y) vdso-syms.o + +# The DSO images are built using a special linker script. +quiet_cmd_syscall = SYSCALL $@ + cmd_syscall = $(CC) -m elf_x86_64 -nostdlib $(SYSCFLAGS_$(@F)) \ + -Wl,-T,$(filter-out FORCE,$^) -o $@ + +export CPPFLAGS_vdso.lds += -P -C -U$(ARCH) + +vdso-flags = -fPIC -shared -Wl,-soname=linux-vdso.so.1 \ + $(call ld-option, -Wl$(comma)--hash-style=sysv) \ + -Wl,-z,max-page-size=4096 -Wl,-z,common-page-size=4096 +SYSCFLAGS_vdso.so = $(vdso-flags) + +$(obj)/vdso.o: $(src)/vdso.S $(obj)/vdso.so + +$(obj)/vdso.so: $(src)/vdso.lds $(vobjs) FORCE + $(call if_changed,syscall) + +CFL := $(PROFILING) -mcmodel=small -fPIC -g0 -O2 -fasynchronous-unwind-tables -m64 + +$(obj)/vclock_gettime.o: CFLAGS = $(CFL) +$(obj)/vgetcpu.o: CFLAGS = $(CFL) + +# We also create a special relocatable object that should mirror the symbol +# table and layout of the linked DSO. With ld -R we can then refer to +# these symbols in the kernel code rather than hand-coded addresses. +extra-y += vdso-syms.o +$(obj)/built-in.o: $(obj)/vdso-syms.o +$(obj)/built-in.o: ld_flags += -R $(obj)/vdso-syms.o + +SYSCFLAGS_vdso-syms.o = -r -d +$(obj)/vdso-syms.o: $(src)/vdso.lds $(vobjs) FORCE + $(call if_changed,syscall) diff --git a/arch/x86/vdso/vclock_gettime.c b/arch/x86/vdso/vclock_gettime.c new file mode 100644 index 00000000000..5b54cdfb2b0 --- /dev/null +++ b/arch/x86/vdso/vclock_gettime.c @@ -0,0 +1,121 @@ +/* + * Copyright 2006 Andi Kleen, SUSE Labs. + * Subject to the GNU Public License, v.2 + * + * Fast user context implementation of clock_gettime and gettimeofday. + * + * The code should have no internal unresolved relocations. + * Check with readelf after changing. + * Also alternative() doesn't work. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "vextern.h" + +#define gtod vdso_vsyscall_gtod_data + +static long vdso_fallback_gettime(long clock, struct timespec *ts) +{ + long ret; + asm("syscall" : "=a" (ret) : + "0" (__NR_clock_gettime),"D" (clock), "S" (ts) : "memory"); + return ret; +} + +static inline long vgetns(void) +{ + long v; + cycles_t (*vread)(void); + vread = gtod->clock.vread; + v = (vread() - gtod->clock.cycle_last) & gtod->clock.mask; + return (v * gtod->clock.mult) >> gtod->clock.shift; +} + +static noinline int do_realtime(struct timespec *ts) +{ + unsigned long seq, ns; + do { + seq = read_seqbegin(>od->lock); + ts->tv_sec = gtod->wall_time_sec; + ts->tv_nsec = gtod->wall_time_nsec; + ns = vgetns(); + } while (unlikely(read_seqretry(>od->lock, seq))); + timespec_add_ns(ts, ns); + return 0; +} + +/* Copy of the version in kernel/time.c which we cannot directly access */ +static void vset_normalized_timespec(struct timespec *ts, long sec, long nsec) +{ + while (nsec >= NSEC_PER_SEC) { + nsec -= NSEC_PER_SEC; + ++sec; + } + while (nsec < 0) { + nsec += NSEC_PER_SEC; + --sec; + } + ts->tv_sec = sec; + ts->tv_nsec = nsec; +} + +static noinline int do_monotonic(struct timespec *ts) +{ + unsigned long seq, ns, secs; + do { + seq = read_seqbegin(>od->lock); + secs = gtod->wall_time_sec; + ns = gtod->wall_time_nsec + vgetns(); + secs += gtod->wall_to_monotonic.tv_sec; + ns += gtod->wall_to_monotonic.tv_nsec; + } while (unlikely(read_seqretry(>od->lock, seq))); + vset_normalized_timespec(ts, secs, ns); + return 0; +} + +int __vdso_clock_gettime(clockid_t clock, struct timespec *ts) +{ + if (likely(gtod->sysctl_enabled && gtod->clock.vread)) + switch (clock) { + case CLOCK_REALTIME: + return do_realtime(ts); + case CLOCK_MONOTONIC: + return do_monotonic(ts); + } + return vdso_fallback_gettime(clock, ts); +} +int clock_gettime(clockid_t, struct timespec *) + __attribute__((weak, alias("__vdso_clock_gettime"))); + +int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz) +{ + long ret; + if (likely(gtod->sysctl_enabled && gtod->clock.vread)) { + BUILD_BUG_ON(offsetof(struct timeval, tv_usec) != + offsetof(struct timespec, tv_nsec) || + sizeof(*tv) != sizeof(struct timespec)); + do_realtime((struct timespec *)tv); + tv->tv_usec /= 1000; + if (unlikely(tz != NULL)) { + /* This relies on gcc inlining the memcpy. We'll notice + if it ever fails to do so. */ + memcpy(tz, >od->sys_tz, sizeof(struct timezone)); + } + return 0; + } + asm("syscall" : "=a" (ret) : + "0" (__NR_gettimeofday), "D" (tv), "S" (tz) : "memory"); + return ret; +} +int gettimeofday(struct timeval *, struct timezone *) + __attribute__((weak, alias("__vdso_gettimeofday"))); diff --git a/arch/x86/vdso/vdso-note.S b/arch/x86/vdso/vdso-note.S new file mode 100644 index 00000000000..79a071e4357 --- /dev/null +++ b/arch/x86/vdso/vdso-note.S @@ -0,0 +1,12 @@ +/* + * This supplies .note.* sections to go into the PT_NOTE inside the vDSO text. + * Here we can supply some information useful to userland. + */ + +#include +#include +#include + +ELFNOTE_START(Linux, 0, "a") + .long LINUX_VERSION_CODE +ELFNOTE_END diff --git a/arch/x86/vdso/vdso-start.S b/arch/x86/vdso/vdso-start.S new file mode 100644 index 00000000000..2dc2cdb84d6 --- /dev/null +++ b/arch/x86/vdso/vdso-start.S @@ -0,0 +1,2 @@ + .globl vdso_kernel_start +vdso_kernel_start: diff --git a/arch/x86/vdso/vdso.S b/arch/x86/vdso/vdso.S new file mode 100644 index 00000000000..4b1620a1529 --- /dev/null +++ b/arch/x86/vdso/vdso.S @@ -0,0 +1,2 @@ + .section ".vdso","a" + .incbin "arch/x86/vdso/vdso.so" diff --git a/arch/x86/vdso/vdso.lds.S b/arch/x86/vdso/vdso.lds.S new file mode 100644 index 00000000000..b9a60e665d0 --- /dev/null +++ b/arch/x86/vdso/vdso.lds.S @@ -0,0 +1,77 @@ +/* + * Linker script for vsyscall DSO. The vsyscall page is an ELF shared + * object prelinked to its virtual address, and with only one read-only + * segment (that fits in one page). This script controls its layout. + */ +#include +#include "voffset.h" + +#define VDSO_PRELINK 0xffffffffff700000 + +SECTIONS +{ + . = VDSO_PRELINK + SIZEOF_HEADERS; + + .hash : { *(.hash) } :text + .gnu.hash : { *(.gnu.hash) } + .dynsym : { *(.dynsym) } + .dynstr : { *(.dynstr) } + .gnu.version : { *(.gnu.version) } + .gnu.version_d : { *(.gnu.version_d) } + .gnu.version_r : { *(.gnu.version_r) } + + /* This linker script is used both with -r and with -shared. + For the layouts to match, we need to skip more than enough + space for the dynamic symbol table et al. If this amount + is insufficient, ld -shared will barf. Just increase it here. */ + . = VDSO_PRELINK + VDSO_TEXT_OFFSET; + + .text : { *(.text) } :text + .text.ptr : { *(.text.ptr) } :text + . = VDSO_PRELINK + 0x900; + .data : { *(.data) } :text + .bss : { *(.bss) } :text + + .altinstructions : { *(.altinstructions) } :text + .altinstr_replacement : { *(.altinstr_replacement) } :text + + .note : { *(.note.*) } :text :note + .eh_frame_hdr : { *(.eh_frame_hdr) } :text :eh_frame_hdr + .eh_frame : { KEEP (*(.eh_frame)) } :text + .dynamic : { *(.dynamic) } :text :dynamic + .useless : { + *(.got.plt) *(.got) + *(.gnu.linkonce.d.*) + *(.dynbss) + *(.gnu.linkonce.b.*) + } :text +} + +/* + * We must supply the ELF program headers explicitly to get just one + * PT_LOAD segment, and set the flags explicitly to make segments read-only. + */ +PHDRS +{ + text PT_LOAD FILEHDR PHDRS FLAGS(5); /* PF_R|PF_X */ + dynamic PT_DYNAMIC FLAGS(4); /* PF_R */ + note PT_NOTE FLAGS(4); /* PF_R */ + eh_frame_hdr 0x6474e550; /* PT_GNU_EH_FRAME, but ld doesn't match the name */ +} + +/* + * This controls what symbols we export from the DSO. + */ +VERSION +{ + LINUX_2.6 { + global: + clock_gettime; + __vdso_clock_gettime; + gettimeofday; + __vdso_gettimeofday; + getcpu; + __vdso_getcpu; + local: *; + }; +} diff --git a/arch/x86/vdso/vextern.h b/arch/x86/vdso/vextern.h new file mode 100644 index 00000000000..1683ba2ae3e --- /dev/null +++ b/arch/x86/vdso/vextern.h @@ -0,0 +1,16 @@ +#ifndef VEXTERN +#include +#define VEXTERN(x) \ + extern typeof(x) *vdso_ ## x __attribute__((visibility("hidden"))); +#endif + +#define VMAGIC 0xfeedbabeabcdefabUL + +/* Any kernel variables used in the vDSO must be exported in the main + kernel's vmlinux.lds.S/vsyscall.h/proper __section and + put into vextern.h and be referenced as a pointer with vdso prefix. + The main kernel later fills in the values. */ + +VEXTERN(jiffies) +VEXTERN(vgetcpu_mode) +VEXTERN(vsyscall_gtod_data) diff --git a/arch/x86/vdso/vgetcpu.c b/arch/x86/vdso/vgetcpu.c new file mode 100644 index 00000000000..91f6e85d0fc --- /dev/null +++ b/arch/x86/vdso/vgetcpu.c @@ -0,0 +1,50 @@ +/* + * Copyright 2006 Andi Kleen, SUSE Labs. + * Subject to the GNU Public License, v.2 + * + * Fast user context implementation of getcpu() + */ + +#include +#include +#include +#include +#include +#include +#include "vextern.h" + +long __vdso_getcpu(unsigned *cpu, unsigned *node, struct getcpu_cache *tcache) +{ + unsigned int dummy, p; + unsigned long j = 0; + + /* Fast cache - only recompute value once per jiffies and avoid + relatively costly rdtscp/cpuid otherwise. + This works because the scheduler usually keeps the process + on the same CPU and this syscall doesn't guarantee its + results anyways. + We do this here because otherwise user space would do it on + its own in a likely inferior way (no access to jiffies). + If you don't like it pass NULL. */ + if (tcache && tcache->blob[0] == (j = *vdso_jiffies)) { + p = tcache->blob[1]; + } else if (*vdso_vgetcpu_mode == VGETCPU_RDTSCP) { + /* Load per CPU data from RDTSCP */ + rdtscp(dummy, dummy, p); + } else { + /* Load per CPU data from GDT */ + asm("lsl %1,%0" : "=r" (p) : "r" (__PER_CPU_SEG)); + } + if (tcache) { + tcache->blob[0] = j; + tcache->blob[1] = p; + } + if (cpu) + *cpu = p & 0xfff; + if (node) + *node = p >> 12; + return 0; +} + +long getcpu(unsigned *cpu, unsigned *node, struct getcpu_cache *tcache) + __attribute__((weak, alias("__vdso_getcpu"))); diff --git a/arch/x86/vdso/vma.c b/arch/x86/vdso/vma.c new file mode 100644 index 00000000000..ff9333e5fb0 --- /dev/null +++ b/arch/x86/vdso/vma.c @@ -0,0 +1,140 @@ +/* + * Set up the VMAs to tell the VM about the vDSO. + * Copyright 2007 Andi Kleen, SUSE Labs. + * Subject to the GPL, v.2 + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include "voffset.h" + +int vdso_enabled = 1; + +#define VEXTERN(x) extern typeof(__ ## x) *vdso_ ## x; +#include "vextern.h" +#undef VEXTERN + +extern char vdso_kernel_start[], vdso_start[], vdso_end[]; +extern unsigned short vdso_sync_cpuid; + +struct page **vdso_pages; + +static inline void *var_ref(void *vbase, char *var, char *name) +{ + unsigned offset = var - &vdso_kernel_start[0] + VDSO_TEXT_OFFSET; + void *p = vbase + offset; + if (*(void **)p != (void *)VMAGIC) { + printk("VDSO: variable %s broken\n", name); + vdso_enabled = 0; + } + return p; +} + +static int __init init_vdso_vars(void) +{ + int npages = (vdso_end - vdso_start + PAGE_SIZE - 1) / PAGE_SIZE; + int i; + char *vbase; + + vdso_pages = kmalloc(sizeof(struct page *) * npages, GFP_KERNEL); + if (!vdso_pages) + goto oom; + for (i = 0; i < npages; i++) { + struct page *p; + p = alloc_page(GFP_KERNEL); + if (!p) + goto oom; + vdso_pages[i] = p; + copy_page(page_address(p), vdso_start + i*PAGE_SIZE); + } + + vbase = vmap(vdso_pages, npages, 0, PAGE_KERNEL); + if (!vbase) + goto oom; + + if (memcmp(vbase, "\177ELF", 4)) { + printk("VDSO: I'm broken; not ELF\n"); + vdso_enabled = 0; + } + +#define V(x) *(typeof(x) *) var_ref(vbase, (char *)RELOC_HIDE(&x, 0), #x) +#define VEXTERN(x) \ + V(vdso_ ## x) = &__ ## x; +#include "vextern.h" +#undef VEXTERN + return 0; + + oom: + printk("Cannot allocate vdso\n"); + vdso_enabled = 0; + return -ENOMEM; +} +__initcall(init_vdso_vars); + +struct linux_binprm; + +/* Put the vdso above the (randomized) stack with another randomized offset. + This way there is no hole in the middle of address space. + To save memory make sure it is still in the same PTE as the stack top. + This doesn't give that many random bits */ +static unsigned long vdso_addr(unsigned long start, unsigned len) +{ + unsigned long addr, end; + unsigned offset; + end = (start + PMD_SIZE - 1) & PMD_MASK; + if (end >= TASK_SIZE64) + end = TASK_SIZE64; + end -= len; + /* This loses some more bits than a modulo, but is cheaper */ + offset = get_random_int() & (PTRS_PER_PTE - 1); + addr = start + (offset << PAGE_SHIFT); + if (addr >= end) + addr = end; + return addr; +} + +/* Setup a VMA at program startup for the vsyscall page. + Not called for compat tasks */ +int arch_setup_additional_pages(struct linux_binprm *bprm, int exstack) +{ + struct mm_struct *mm = current->mm; + unsigned long addr; + int ret; + unsigned len = round_up(vdso_end - vdso_start, PAGE_SIZE); + + if (!vdso_enabled) + return 0; + + down_write(&mm->mmap_sem); + addr = vdso_addr(mm->start_stack, len); + addr = get_unmapped_area(NULL, addr, len, 0, 0); + if (IS_ERR_VALUE(addr)) { + ret = addr; + goto up_fail; + } + + ret = install_special_mapping(mm, addr, len, + VM_READ|VM_EXEC| + VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC| + VM_ALWAYSDUMP, + vdso_pages); + if (ret) + goto up_fail; + + current->mm->context.vdso = (void *)addr; +up_fail: + up_write(&mm->mmap_sem); + return ret; +} + +static __init int vdso_setup(char *s) +{ + vdso_enabled = simple_strtoul(s, NULL, 0); + return 0; +} +__setup("vdso=", vdso_setup); diff --git a/arch/x86/vdso/voffset.h b/arch/x86/vdso/voffset.h new file mode 100644 index 00000000000..4af67c79085 --- /dev/null +++ b/arch/x86/vdso/voffset.h @@ -0,0 +1 @@ +#define VDSO_TEXT_OFFSET 0x600 diff --git a/arch/x86/vdso/vvar.c b/arch/x86/vdso/vvar.c new file mode 100644 index 00000000000..6fc22219a47 --- /dev/null +++ b/arch/x86/vdso/vvar.c @@ -0,0 +1,12 @@ +/* Define pointer to external vDSO variables. + These are part of the vDSO. The kernel fills in the real addresses + at boot time. This is done because when the vdso is linked the + kernel isn't yet and we don't know the final addresses. */ +#include +#include +#include +#include +#include + +#define VEXTERN(x) typeof (__ ## x) *vdso_ ## x = (void *)VMAGIC; +#include "vextern.h" diff --git a/arch/x86_64/Makefile b/arch/x86_64/Makefile index ae48f179d71..d322592e38a 100644 --- a/arch/x86_64/Makefile +++ b/arch/x86_64/Makefile @@ -77,7 +77,7 @@ libs-y += arch/x86/lib/ core-y += arch/x86_64/kernel/ \ arch/x86_64/mm/ \ arch/x86/crypto/ \ - arch/x86_64/vdso/ + arch/x86/vdso/ core-$(CONFIG_IA32_EMULATION) += arch/x86_64/ia32/ drivers-$(CONFIG_PCI) += arch/x86_64/pci/ drivers-$(CONFIG_OPROFILE) += arch/x86/oprofile/ diff --git a/arch/x86_64/vdso/.gitignore b/arch/x86_64/vdso/.gitignore deleted file mode 100644 index f8b69d84238..00000000000 --- a/arch/x86_64/vdso/.gitignore +++ /dev/null @@ -1 +0,0 @@ -vdso.lds diff --git a/arch/x86_64/vdso/Makefile b/arch/x86_64/vdso/Makefile deleted file mode 100644 index 8d03de029d9..00000000000 --- a/arch/x86_64/vdso/Makefile +++ /dev/null @@ -1,49 +0,0 @@ -# -# x86-64 vDSO. -# - -# files to link into the vdso -# vdso-start.o has to be first -vobjs-y := vdso-start.o vdso-note.o vclock_gettime.o vgetcpu.o vvar.o - -# files to link into kernel -obj-y := vma.o vdso.o vdso-syms.o - -vobjs := $(foreach F,$(vobjs-y),$(obj)/$F) - -$(obj)/vdso.o: $(obj)/vdso.so - -targets += vdso.so vdso.lds $(vobjs-y) vdso-syms.o - -# The DSO images are built using a special linker script. -quiet_cmd_syscall = SYSCALL $@ - cmd_syscall = $(CC) -m elf_x86_64 -nostdlib $(SYSCFLAGS_$(@F)) \ - -Wl,-T,$(filter-out FORCE,$^) -o $@ - -export CPPFLAGS_vdso.lds += -P -C -U$(ARCH) - -vdso-flags = -fPIC -shared -Wl,-soname=linux-vdso.so.1 \ - $(call ld-option, -Wl$(comma)--hash-style=sysv) \ - -Wl,-z,max-page-size=4096 -Wl,-z,common-page-size=4096 -SYSCFLAGS_vdso.so = $(vdso-flags) - -$(obj)/vdso.o: $(src)/vdso.S $(obj)/vdso.so - -$(obj)/vdso.so: $(src)/vdso.lds $(vobjs) FORCE - $(call if_changed,syscall) - -CFL := $(PROFILING) -mcmodel=small -fPIC -g0 -O2 -fasynchronous-unwind-tables -m64 - -$(obj)/vclock_gettime.o: CFLAGS = $(CFL) -$(obj)/vgetcpu.o: CFLAGS = $(CFL) - -# We also create a special relocatable object that should mirror the symbol -# table and layout of the linked DSO. With ld -R we can then refer to -# these symbols in the kernel code rather than hand-coded addresses. -extra-y += vdso-syms.o -$(obj)/built-in.o: $(obj)/vdso-syms.o -$(obj)/built-in.o: ld_flags += -R $(obj)/vdso-syms.o - -SYSCFLAGS_vdso-syms.o = -r -d -$(obj)/vdso-syms.o: $(src)/vdso.lds $(vobjs) FORCE - $(call if_changed,syscall) diff --git a/arch/x86_64/vdso/vclock_gettime.c b/arch/x86_64/vdso/vclock_gettime.c deleted file mode 100644 index 5b54cdfb2b0..00000000000 --- a/arch/x86_64/vdso/vclock_gettime.c +++ /dev/null @@ -1,121 +0,0 @@ -/* - * Copyright 2006 Andi Kleen, SUSE Labs. - * Subject to the GNU Public License, v.2 - * - * Fast user context implementation of clock_gettime and gettimeofday. - * - * The code should have no internal unresolved relocations. - * Check with readelf after changing. - * Also alternative() doesn't work. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include "vextern.h" - -#define gtod vdso_vsyscall_gtod_data - -static long vdso_fallback_gettime(long clock, struct timespec *ts) -{ - long ret; - asm("syscall" : "=a" (ret) : - "0" (__NR_clock_gettime),"D" (clock), "S" (ts) : "memory"); - return ret; -} - -static inline long vgetns(void) -{ - long v; - cycles_t (*vread)(void); - vread = gtod->clock.vread; - v = (vread() - gtod->clock.cycle_last) & gtod->clock.mask; - return (v * gtod->clock.mult) >> gtod->clock.shift; -} - -static noinline int do_realtime(struct timespec *ts) -{ - unsigned long seq, ns; - do { - seq = read_seqbegin(>od->lock); - ts->tv_sec = gtod->wall_time_sec; - ts->tv_nsec = gtod->wall_time_nsec; - ns = vgetns(); - } while (unlikely(read_seqretry(>od->lock, seq))); - timespec_add_ns(ts, ns); - return 0; -} - -/* Copy of the version in kernel/time.c which we cannot directly access */ -static void vset_normalized_timespec(struct timespec *ts, long sec, long nsec) -{ - while (nsec >= NSEC_PER_SEC) { - nsec -= NSEC_PER_SEC; - ++sec; - } - while (nsec < 0) { - nsec += NSEC_PER_SEC; - --sec; - } - ts->tv_sec = sec; - ts->tv_nsec = nsec; -} - -static noinline int do_monotonic(struct timespec *ts) -{ - unsigned long seq, ns, secs; - do { - seq = read_seqbegin(>od->lock); - secs = gtod->wall_time_sec; - ns = gtod->wall_time_nsec + vgetns(); - secs += gtod->wall_to_monotonic.tv_sec; - ns += gtod->wall_to_monotonic.tv_nsec; - } while (unlikely(read_seqretry(>od->lock, seq))); - vset_normalized_timespec(ts, secs, ns); - return 0; -} - -int __vdso_clock_gettime(clockid_t clock, struct timespec *ts) -{ - if (likely(gtod->sysctl_enabled && gtod->clock.vread)) - switch (clock) { - case CLOCK_REALTIME: - return do_realtime(ts); - case CLOCK_MONOTONIC: - return do_monotonic(ts); - } - return vdso_fallback_gettime(clock, ts); -} -int clock_gettime(clockid_t, struct timespec *) - __attribute__((weak, alias("__vdso_clock_gettime"))); - -int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz) -{ - long ret; - if (likely(gtod->sysctl_enabled && gtod->clock.vread)) { - BUILD_BUG_ON(offsetof(struct timeval, tv_usec) != - offsetof(struct timespec, tv_nsec) || - sizeof(*tv) != sizeof(struct timespec)); - do_realtime((struct timespec *)tv); - tv->tv_usec /= 1000; - if (unlikely(tz != NULL)) { - /* This relies on gcc inlining the memcpy. We'll notice - if it ever fails to do so. */ - memcpy(tz, >od->sys_tz, sizeof(struct timezone)); - } - return 0; - } - asm("syscall" : "=a" (ret) : - "0" (__NR_gettimeofday), "D" (tv), "S" (tz) : "memory"); - return ret; -} -int gettimeofday(struct timeval *, struct timezone *) - __attribute__((weak, alias("__vdso_gettimeofday"))); diff --git a/arch/x86_64/vdso/vdso-note.S b/arch/x86_64/vdso/vdso-note.S deleted file mode 100644 index 79a071e4357..00000000000 --- a/arch/x86_64/vdso/vdso-note.S +++ /dev/null @@ -1,12 +0,0 @@ -/* - * This supplies .note.* sections to go into the PT_NOTE inside the vDSO text. - * Here we can supply some information useful to userland. - */ - -#include -#include -#include - -ELFNOTE_START(Linux, 0, "a") - .long LINUX_VERSION_CODE -ELFNOTE_END diff --git a/arch/x86_64/vdso/vdso-start.S b/arch/x86_64/vdso/vdso-start.S deleted file mode 100644 index 2dc2cdb84d6..00000000000 --- a/arch/x86_64/vdso/vdso-start.S +++ /dev/null @@ -1,2 +0,0 @@ - .globl vdso_kernel_start -vdso_kernel_start: diff --git a/arch/x86_64/vdso/vdso.S b/arch/x86_64/vdso/vdso.S deleted file mode 100644 index 92e80c1972a..00000000000 --- a/arch/x86_64/vdso/vdso.S +++ /dev/null @@ -1,2 +0,0 @@ - .section ".vdso","a" - .incbin "arch/x86_64/vdso/vdso.so" diff --git a/arch/x86_64/vdso/vdso.lds.S b/arch/x86_64/vdso/vdso.lds.S deleted file mode 100644 index b9a60e665d0..00000000000 --- a/arch/x86_64/vdso/vdso.lds.S +++ /dev/null @@ -1,77 +0,0 @@ -/* - * Linker script for vsyscall DSO. The vsyscall page is an ELF shared - * object prelinked to its virtual address, and with only one read-only - * segment (that fits in one page). This script controls its layout. - */ -#include -#include "voffset.h" - -#define VDSO_PRELINK 0xffffffffff700000 - -SECTIONS -{ - . = VDSO_PRELINK + SIZEOF_HEADERS; - - .hash : { *(.hash) } :text - .gnu.hash : { *(.gnu.hash) } - .dynsym : { *(.dynsym) } - .dynstr : { *(.dynstr) } - .gnu.version : { *(.gnu.version) } - .gnu.version_d : { *(.gnu.version_d) } - .gnu.version_r : { *(.gnu.version_r) } - - /* This linker script is used both with -r and with -shared. - For the layouts to match, we need to skip more than enough - space for the dynamic symbol table et al. If this amount - is insufficient, ld -shared will barf. Just increase it here. */ - . = VDSO_PRELINK + VDSO_TEXT_OFFSET; - - .text : { *(.text) } :text - .text.ptr : { *(.text.ptr) } :text - . = VDSO_PRELINK + 0x900; - .data : { *(.data) } :text - .bss : { *(.bss) } :text - - .altinstructions : { *(.altinstructions) } :text - .altinstr_replacement : { *(.altinstr_replacement) } :text - - .note : { *(.note.*) } :text :note - .eh_frame_hdr : { *(.eh_frame_hdr) } :text :eh_frame_hdr - .eh_frame : { KEEP (*(.eh_frame)) } :text - .dynamic : { *(.dynamic) } :text :dynamic - .useless : { - *(.got.plt) *(.got) - *(.gnu.linkonce.d.*) - *(.dynbss) - *(.gnu.linkonce.b.*) - } :text -} - -/* - * We must supply the ELF program headers explicitly to get just one - * PT_LOAD segment, and set the flags explicitly to make segments read-only. - */ -PHDRS -{ - text PT_LOAD FILEHDR PHDRS FLAGS(5); /* PF_R|PF_X */ - dynamic PT_DYNAMIC FLAGS(4); /* PF_R */ - note PT_NOTE FLAGS(4); /* PF_R */ - eh_frame_hdr 0x6474e550; /* PT_GNU_EH_FRAME, but ld doesn't match the name */ -} - -/* - * This controls what symbols we export from the DSO. - */ -VERSION -{ - LINUX_2.6 { - global: - clock_gettime; - __vdso_clock_gettime; - gettimeofday; - __vdso_gettimeofday; - getcpu; - __vdso_getcpu; - local: *; - }; -} diff --git a/arch/x86_64/vdso/vextern.h b/arch/x86_64/vdso/vextern.h deleted file mode 100644 index 1683ba2ae3e..00000000000 --- a/arch/x86_64/vdso/vextern.h +++ /dev/null @@ -1,16 +0,0 @@ -#ifndef VEXTERN -#include -#define VEXTERN(x) \ - extern typeof(x) *vdso_ ## x __attribute__((visibility("hidden"))); -#endif - -#define VMAGIC 0xfeedbabeabcdefabUL - -/* Any kernel variables used in the vDSO must be exported in the main - kernel's vmlinux.lds.S/vsyscall.h/proper __section and - put into vextern.h and be referenced as a pointer with vdso prefix. - The main kernel later fills in the values. */ - -VEXTERN(jiffies) -VEXTERN(vgetcpu_mode) -VEXTERN(vsyscall_gtod_data) diff --git a/arch/x86_64/vdso/vgetcpu.c b/arch/x86_64/vdso/vgetcpu.c deleted file mode 100644 index 91f6e85d0fc..00000000000 --- a/arch/x86_64/vdso/vgetcpu.c +++ /dev/null @@ -1,50 +0,0 @@ -/* - * Copyright 2006 Andi Kleen, SUSE Labs. - * Subject to the GNU Public License, v.2 - * - * Fast user context implementation of getcpu() - */ - -#include -#include -#include -#include -#include -#include -#include "vextern.h" - -long __vdso_getcpu(unsigned *cpu, unsigned *node, struct getcpu_cache *tcache) -{ - unsigned int dummy, p; - unsigned long j = 0; - - /* Fast cache - only recompute value once per jiffies and avoid - relatively costly rdtscp/cpuid otherwise. - This works because the scheduler usually keeps the process - on the same CPU and this syscall doesn't guarantee its - results anyways. - We do this here because otherwise user space would do it on - its own in a likely inferior way (no access to jiffies). - If you don't like it pass NULL. */ - if (tcache && tcache->blob[0] == (j = *vdso_jiffies)) { - p = tcache->blob[1]; - } else if (*vdso_vgetcpu_mode == VGETCPU_RDTSCP) { - /* Load per CPU data from RDTSCP */ - rdtscp(dummy, dummy, p); - } else { - /* Load per CPU data from GDT */ - asm("lsl %1,%0" : "=r" (p) : "r" (__PER_CPU_SEG)); - } - if (tcache) { - tcache->blob[0] = j; - tcache->blob[1] = p; - } - if (cpu) - *cpu = p & 0xfff; - if (node) - *node = p >> 12; - return 0; -} - -long getcpu(unsigned *cpu, unsigned *node, struct getcpu_cache *tcache) - __attribute__((weak, alias("__vdso_getcpu"))); diff --git a/arch/x86_64/vdso/vma.c b/arch/x86_64/vdso/vma.c deleted file mode 100644 index ff9333e5fb0..00000000000 --- a/arch/x86_64/vdso/vma.c +++ /dev/null @@ -1,140 +0,0 @@ -/* - * Set up the VMAs to tell the VM about the vDSO. - * Copyright 2007 Andi Kleen, SUSE Labs. - * Subject to the GPL, v.2 - */ -#include -#include -#include -#include -#include -#include -#include -#include -#include "voffset.h" - -int vdso_enabled = 1; - -#define VEXTERN(x) extern typeof(__ ## x) *vdso_ ## x; -#include "vextern.h" -#undef VEXTERN - -extern char vdso_kernel_start[], vdso_start[], vdso_end[]; -extern unsigned short vdso_sync_cpuid; - -struct page **vdso_pages; - -static inline void *var_ref(void *vbase, char *var, char *name) -{ - unsigned offset = var - &vdso_kernel_start[0] + VDSO_TEXT_OFFSET; - void *p = vbase + offset; - if (*(void **)p != (void *)VMAGIC) { - printk("VDSO: variable %s broken\n", name); - vdso_enabled = 0; - } - return p; -} - -static int __init init_vdso_vars(void) -{ - int npages = (vdso_end - vdso_start + PAGE_SIZE - 1) / PAGE_SIZE; - int i; - char *vbase; - - vdso_pages = kmalloc(sizeof(struct page *) * npages, GFP_KERNEL); - if (!vdso_pages) - goto oom; - for (i = 0; i < npages; i++) { - struct page *p; - p = alloc_page(GFP_KERNEL); - if (!p) - goto oom; - vdso_pages[i] = p; - copy_page(page_address(p), vdso_start + i*PAGE_SIZE); - } - - vbase = vmap(vdso_pages, npages, 0, PAGE_KERNEL); - if (!vbase) - goto oom; - - if (memcmp(vbase, "\177ELF", 4)) { - printk("VDSO: I'm broken; not ELF\n"); - vdso_enabled = 0; - } - -#define V(x) *(typeof(x) *) var_ref(vbase, (char *)RELOC_HIDE(&x, 0), #x) -#define VEXTERN(x) \ - V(vdso_ ## x) = &__ ## x; -#include "vextern.h" -#undef VEXTERN - return 0; - - oom: - printk("Cannot allocate vdso\n"); - vdso_enabled = 0; - return -ENOMEM; -} -__initcall(init_vdso_vars); - -struct linux_binprm; - -/* Put the vdso above the (randomized) stack with another randomized offset. - This way there is no hole in the middle of address space. - To save memory make sure it is still in the same PTE as the stack top. - This doesn't give that many random bits */ -static unsigned long vdso_addr(unsigned long start, unsigned len) -{ - unsigned long addr, end; - unsigned offset; - end = (start + PMD_SIZE - 1) & PMD_MASK; - if (end >= TASK_SIZE64) - end = TASK_SIZE64; - end -= len; - /* This loses some more bits than a modulo, but is cheaper */ - offset = get_random_int() & (PTRS_PER_PTE - 1); - addr = start + (offset << PAGE_SHIFT); - if (addr >= end) - addr = end; - return addr; -} - -/* Setup a VMA at program startup for the vsyscall page. - Not called for compat tasks */ -int arch_setup_additional_pages(struct linux_binprm *bprm, int exstack) -{ - struct mm_struct *mm = current->mm; - unsigned long addr; - int ret; - unsigned len = round_up(vdso_end - vdso_start, PAGE_SIZE); - - if (!vdso_enabled) - return 0; - - down_write(&mm->mmap_sem); - addr = vdso_addr(mm->start_stack, len); - addr = get_unmapped_area(NULL, addr, len, 0, 0); - if (IS_ERR_VALUE(addr)) { - ret = addr; - goto up_fail; - } - - ret = install_special_mapping(mm, addr, len, - VM_READ|VM_EXEC| - VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC| - VM_ALWAYSDUMP, - vdso_pages); - if (ret) - goto up_fail; - - current->mm->context.vdso = (void *)addr; -up_fail: - up_write(&mm->mmap_sem); - return ret; -} - -static __init int vdso_setup(char *s) -{ - vdso_enabled = simple_strtoul(s, NULL, 0); - return 0; -} -__setup("vdso=", vdso_setup); diff --git a/arch/x86_64/vdso/voffset.h b/arch/x86_64/vdso/voffset.h deleted file mode 100644 index 4af67c79085..00000000000 --- a/arch/x86_64/vdso/voffset.h +++ /dev/null @@ -1 +0,0 @@ -#define VDSO_TEXT_OFFSET 0x600 diff --git a/arch/x86_64/vdso/vvar.c b/arch/x86_64/vdso/vvar.c deleted file mode 100644 index 6fc22219a47..00000000000 --- a/arch/x86_64/vdso/vvar.c +++ /dev/null @@ -1,12 +0,0 @@ -/* Define pointer to external vDSO variables. - These are part of the vDSO. The kernel fills in the real addresses - at boot time. This is done because when the vdso is linked the - kernel isn't yet and we don't know the final addresses. */ -#include -#include -#include -#include -#include - -#define VEXTERN(x) typeof (__ ## x) *vdso_ ## x = (void *)VMAGIC; -#include "vextern.h"