From 77725b26090c8e0b604116d2caad5456753b0824 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Fri, 23 Nov 2007 15:26:23 -0500 Subject: [PATCH] Import 2.3.12pre3 --- Documentation/IO-APIC.txt | 85 +++--- arch/alpha/kernel/entry.S | 8 + arch/arm/kernel/calls.S | 10 +- arch/i386/kernel/entry.S | 10 +- arch/i386/kernel/io_apic.c | 246 +++++++--------- arch/i386/kernel/irq.h | 8 +- arch/i386/kernel/process.c | 17 +- arch/i386/kernel/smp.c | 42 +-- arch/i386/mm/fault.c | 4 +- arch/m68k/kernel/entry.S | 8 + arch/mips/kernel/syscalls.h | 8 + arch/ppc/kernel/misc.S | 10 +- arch/sparc/kernel/systbls.S | 3 +- arch/sparc64/kernel/systbls.S | 6 +- fs/exec.c | 27 +- include/asm-alpha/siginfo.h | 2 + include/asm-alpha/unistd.h | 8 + include/asm-arm/siginfo.h | 2 + include/asm-arm/unistd.h | 8 + include/asm-i386/{i82489.h => apic.h} | 18 +- include/asm-i386/fixmap.h | 4 +- include/asm-i386/mmu_context.h | 33 +-- include/asm-i386/processor.h | 7 +- include/asm-i386/siginfo.h | 5 +- include/asm-i386/smp.h | 12 +- include/asm-i386/unistd.h | 8 + include/asm-m68k/siginfo.h | 2 + include/asm-m68k/unistd.h | 8 + include/asm-mips/siginfo.h | 2 + include/asm-mips/unistd.h | 8 + include/asm-ppc/siginfo.h | 4 +- include/asm-ppc/unistd.h | 8 + include/asm-sparc/siginfo.h | 4 +- include/asm-sparc/unistd.h | 8 + include/asm-sparc64/siginfo.h | 4 +- include/asm-sparc64/unistd.h | 8 + include/linux/limits.h | 2 + include/linux/sched.h | 27 +- include/linux/time.h | 38 ++- kernel/exit.c | 29 ++ kernel/fork.c | 35 ++- kernel/itimer.c | 391 +++++++++++++++++++++++++- kernel/sched.c | 16 +- kernel/signal.c | 78 ++--- kernel/time.c | 54 ++++ 45 files changed, 966 insertions(+), 359 deletions(-) rename include/asm-i386/{i82489.h => apic.h} (87%) diff --git a/Documentation/IO-APIC.txt b/Documentation/IO-APIC.txt index 76b939e8a317..1a7dbd410429 100644 --- a/Documentation/IO-APIC.txt +++ b/Documentation/IO-APIC.txt @@ -1,51 +1,41 @@ -Most (all) Intel SMP boards have the so-called 'IO-APIC', which is -an enhanced interrupt controller, able to route hardware interrupts -to multiple CPUs, or to CPU groups. +Most (all) Intel-MP compliant SMP boards have the so-called 'IO-APIC', +which is an enhanced interrupt controller, it enables us to route +hardware interrupts to multiple CPUs, or to CPU groups. -Linux supports the IO-APIC, but unfortunately there are broken boards -out there which make it unsafe to enable the IO-APIC unconditionally. -The Linux policy thus is to enable the IO-APIC only if it's 100% safe, ie.: +Linux supports all variants of compliant SMP boards, including ones with +multiple IO-APICs. (multiple IO-APICs are used in high-end servers to +distribute IRQ load further). - - the board is on the 'whitelist' +There are (a few) known breakages in certain older boards, which bugs are +usually worked around by the kernel. If your MP-compliant SMP board does +not boot Linux, then consult the linux-smp mailing list archives first. - or - the board does not have PCI pins connected to the IO-APIC - - or - the user has overridden blacklisted settings with the - pirq= boot option line. - -Kernel messages tell you whether the board is 'safe'. If your box -boots with enabled IO-APIC IRQs, then you have nothing else to do. Your +If your box boots fine with enabled IO-APIC IRQs, then your /proc/interrupts will look like this one: ----------------------------> - hell:~> cat /proc/interrupts - CPU0 CPU1 - 0: 90782 0 XT PIC timer - 1: 4135 2375 IO-APIC keyboard - 2: 0 0 XT PIC cascade - 3: 851 807 IO-APIC serial - 9: 6 22 IO-APIC ncr53c8xx - 11: 307 154 IO-APIC NE2000 - 13: 4 0 XT PIC fpu - 14: 56000 30610 IO-APIC ide0 - NMI: 0 - IPI: 0 - <---------------------------- - -some interrupts will still be 'XT PIC', but this is not a problem, none -of those IRQ sources is 'heavy'. - -If one of your boot messages says 'unlisted/blacklisted board, DISABLING -IO-APIC IRQs', then you should do this to get multi-CPU IO-APIC IRQs -running: - - A) if your board is unlisted, then mail to linux-smp to get - it into either the white or the blacklist - B) if your board is blacklisted, then figure out the appropriate - pirq= option to get your system to boot - - -pirq= lines look like the following in /etc/lilo.conf: + hell:~> cat /proc/interrupts + CPU0 + 0: 1360293 IO-APIC-edge timer + 1: 4 IO-APIC-edge keyboard + 2: 0 XT-PIC cascade + 13: 1 XT-PIC fpu + 14: 1448 IO-APIC-edge ide0 + 16: 28232 IO-APIC-level Intel EtherExpress Pro 10/100 Ethernet + 17: 51304 IO-APIC-level eth0 + NMI: 0 + ERR: 0 + hell:~> + <---------------------------- + +some interrupts are still listed as 'XT PIC', but this is not a problem, +none of those IRQ sources is performance-critical. + + +in the unlikely case that your board does not create a working mp-table, +you can use the pirq= boot parameter to 'hand-construct' IRQ entries. This +is nontrivial though and cannot be automated. One sample /etc/lilo.conf +entry: append="pirq=15,11,10" @@ -111,8 +101,7 @@ permute all IRQ numbers properly ... it will take some time though. An won't function properly (if it's inserted as eg. a module). If you have 2 PCI buses, then you can use up to 8 pirq values. Although such -boards tend to have a good configuration and will be included in the -whitelist. +boards tend to have a good configuration. Be prepared that it might happen that you need some strange pirq line: @@ -120,14 +109,6 @@ Be prepared that it might happen that you need some strange pirq line: use smart try-and-err techniques to find out the correct pirq line ... - -the following pirq line can be used to force a board into the whitelist: - - append="pirq=0" - -[if your system works with no problems after this, then it should be added -to the official whitelist, contact us] - good luck and mail to linux-smp@vger.rutgers.edu or linux-kernel@vger.rutgers.edu if you have any problems that are not covered by this document. diff --git a/arch/alpha/kernel/entry.S b/arch/alpha/kernel/entry.S index b4e71bf56cd6..79ca929137fa 100644 --- a/arch/alpha/kernel/entry.S +++ b/arch/alpha/kernel/entry.S @@ -1143,3 +1143,11 @@ sys_call_table: .quad sys_capget .quad sys_capset .quad sys_sendfile /* 370 */ + .quad sys_timer_create + .quad sys_timer_settime + .quad sys_timer_gettime + .quad sys_timer_setoverrun + .quad sys_timer_delete /* 375 */ + .quad sys_clock_gettime + .quad sys_clock_settime + .quad sys_clock_getres /* 378 */ diff --git a/arch/arm/kernel/calls.S b/arch/arm/kernel/calls.S index adad3a551c32..568a0aaf74a7 100644 --- a/arch/arm/kernel/calls.S +++ b/arch/arm/kernel/calls.S @@ -200,8 +200,16 @@ .long SYMBOL_NAME(sys_ni_syscall) .long SYMBOL_NAME(sys_ni_syscall) /* 190 */ .long SYMBOL_NAME(sys_vfork_wrapper) + .long SYMBOL_NAME(sys_timer_create) + .long SYMBOL_NAME(sys_timer_settime) + .long SYMBOL_NAME(sys_timer_gettime) + .long SYMBOL_NAME(sys_timer_getoverrun) +/* 195 */ .long SYMBOL_NAME(sys_timer_delete) + .long SYMBOL_NAME(sys_clock_gettime) + .long SYMBOL_NAME(sys_clock_settime) + .long SYMBOL_NAME(sys_clock_getres) - .rept NR_syscalls-186 + .rept NR_syscalls-198 .long SYMBOL_NAME(sys_ni_syscall) .endr #endif diff --git a/arch/i386/kernel/entry.S b/arch/i386/kernel/entry.S index 8c04667fdfc9..cfdac84b63bc 100644 --- a/arch/i386/kernel/entry.S +++ b/arch/i386/kernel/entry.S @@ -560,6 +560,14 @@ ENTRY(sys_call_table) .long SYMBOL_NAME(sys_ni_syscall) /* streams1 */ .long SYMBOL_NAME(sys_ni_syscall) /* streams2 */ .long SYMBOL_NAME(sys_vfork) /* 190 */ + .long SYMBOL_NAME(sys_timer_create) + .long SYMBOL_NAME(sys_timer_settime) + .long SYMBOL_NAME(sys_timer_gettime) + .long SYMBOL_NAME(sys_timer_getoverrun) + .long SYMBOL_NAME(sys_timer_delete) /* 195 */ + .long SYMBOL_NAME(sys_clock_gettime) + .long SYMBOL_NAME(sys_clock_settime) + .long SYMBOL_NAME(sys_clock_getres) /* 198 */ /* * NOTE!! This doesn't have to be exact - we just have @@ -567,6 +575,6 @@ ENTRY(sys_call_table) * entries. Don't panic if you notice that this hasn't * been shrunk every time we add a new system call. */ - .rept NR_syscalls-190 + .rept NR_syscalls-198 .long SYMBOL_NAME(sys_ni_syscall) .endr diff --git a/arch/i386/kernel/io_apic.c b/arch/i386/kernel/io_apic.c index 42ebd964376f..cae25555adc0 100644 --- a/arch/i386/kernel/io_apic.c +++ b/arch/i386/kernel/io_apic.c @@ -5,6 +5,12 @@ * * Many thanks to Stig Venaas for trying out countless experimental * patches and reporting/debugging problems patiently! + * + * (c) 1999, Multiple IO-APIC support, developed by + * Ken-ichi Yaku and + * Hidemi Kishimoto , + * further tested and cleaned up by Zach Brown + * and Ingo Molnar */ #include @@ -19,7 +25,7 @@ * volatile is justified in this case, IO-APIC register contents * might change spontaneously, GCC should not cache it */ -#define IO_APIC_BASE ((volatile int *)fix_to_virt(FIX_IO_APIC_BASE)) +#define IO_APIC_BASE(idx) ((volatile int *)__fix_to_virt(FIX_IO_APIC_BASE_0 + idx)) /* * The structure of the IO-APIC: @@ -45,9 +51,10 @@ struct IO_APIC_reg_02 { } __attribute__ ((packed)); /* - * # of IRQ routing registers + * # of IO-APICs and # of IRQ routing registers */ -int nr_ioapic_registers = 0; +int nr_ioapics = 0; +int nr_ioapic_registers[MAX_IO_APICS]; enum ioapic_irq_destination_types { dest_Fixed = 0, @@ -94,6 +101,7 @@ enum mp_irq_source_types { mp_ExtINT = 3 }; +struct mpc_config_ioapic mp_apics[MAX_IO_APICS];/* I/O APIC entries */ int mp_irq_entries = 0; /* # of MP IRQ source entries */ struct mpc_config_intsrc mp_irqs[MAX_IRQ_SOURCES]; /* MP IRQ source entries */ @@ -108,34 +116,34 @@ int mpc_default_type = 0; /* non-0 if default (table-less) * between pins and IRQs. */ -static inline unsigned int io_apic_read(unsigned int reg) +static inline unsigned int io_apic_read(unsigned int apic, unsigned int reg) { - *IO_APIC_BASE = reg; - return *(IO_APIC_BASE+4); + *IO_APIC_BASE(apic) = reg; + return *(IO_APIC_BASE(apic)+4); } -static inline void io_apic_write(unsigned int reg, unsigned int value) +static inline void io_apic_write(unsigned int apic, unsigned int reg, unsigned int value) { - *IO_APIC_BASE = reg; - *(IO_APIC_BASE+4) = value; + *IO_APIC_BASE(apic) = reg; + *(IO_APIC_BASE(apic)+4) = value; } /* * Re-write a value: to be used for read-modify-write * cycles where the read already set up the index register. */ -static inline void io_apic_modify(unsigned int value) +static inline void io_apic_modify(unsigned int apic, unsigned int value) { - *(IO_APIC_BASE+4) = value; + *(IO_APIC_BASE(apic)+4) = value; } /* * Synchronize the IO-APIC and the CPU by doing * a dummy read from the IO-APIC */ -static inline void io_apic_sync(void) +static inline void io_apic_sync(unsigned int apic) { - (void) *(IO_APIC_BASE+4); + (void) *(IO_APIC_BASE(apic)+4); } /* @@ -146,7 +154,7 @@ static inline void io_apic_sync(void) #define PIN_MAP_SIZE (MAX_PLUS_SHARED_IRQS + NR_IRQS) static struct irq_pin_list { - int pin, next; + int apic, pin, next; } irq_2_pin[PIN_MAP_SIZE]; /* @@ -154,7 +162,7 @@ static struct irq_pin_list { * shared ISA-space IRQs, so we have to support them. We are super * fast in the common case, and fast for shared ISA-space IRQs. */ -static void add_pin_to_irq(unsigned int irq, int pin) +static void add_pin_to_irq(unsigned int irq, int apic, int pin) { static int first_free_entry = NR_IRQS; struct irq_pin_list *entry = irq_2_pin + irq; @@ -168,6 +176,7 @@ static void add_pin_to_irq(unsigned int irq, int pin) if (++first_free_entry >= PIN_MAP_SIZE) panic("io_apic.c: whoops"); } + entry->apic = apic; entry->pin = pin; } @@ -183,9 +192,9 @@ static void name##_IO_APIC_irq(unsigned int irq) \ pin = entry->pin; \ if (pin == -1) \ break; \ - reg = io_apic_read(0x10 + R + pin*2); \ + reg = io_apic_read(entry->apic, 0x10 + R + pin*2); \ reg ACTION; \ - io_apic_modify(reg); \ + io_apic_modify(entry->apic, reg); \ if (!entry->next) \ break; \ entry = irq_2_pin + entry->next; \ @@ -197,12 +206,12 @@ static void name##_IO_APIC_irq(unsigned int irq) \ * We disable IO-APIC IRQs by setting their 'destination CPU mask' to * zero. Trick by Ramesh Nalluri. */ -DO_ACTION( disable, 1, &= 0x00ffffff, io_apic_sync()) /* destination = 0x00 */ +DO_ACTION( disable, 1, &= 0x00ffffff, io_apic_sync(entry->apic))/* destination = 0x00 */ DO_ACTION( enable, 1, |= 0xff000000, ) /* destination = 0xff */ -DO_ACTION( mask, 0, |= 0x00010000, io_apic_sync()) /* mask = 1 */ +DO_ACTION( mask, 0, |= 0x00010000, io_apic_sync(entry->apic))/* mask = 1 */ DO_ACTION( unmask, 0, &= 0xfffeffff, ) /* mask = 0 */ -static void clear_IO_APIC_pin(unsigned int pin) +static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin) { struct IO_APIC_route_entry entry; @@ -211,16 +220,17 @@ static void clear_IO_APIC_pin(unsigned int pin) */ memset(&entry, 0, sizeof(entry)); entry.mask = 1; - io_apic_write(0x10 + 2 * pin, *(((int *)&entry) + 0)); - io_apic_write(0x11 + 2 * pin, *(((int *)&entry) + 1)); + io_apic_write(apic, 0x10 + 2 * pin, *(((int *)&entry) + 0)); + io_apic_write(apic, 0x11 + 2 * pin, *(((int *)&entry) + 1)); } static void clear_IO_APIC (void) { - int pin; + int apic, pin; - for (pin = 0; pin < nr_ioapic_registers; pin++) - clear_IO_APIC_pin(pin); + for (apic = 0; apic < nr_ioapics; apic++) + for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) + clear_IO_APIC_pin(apic, pin); } /* @@ -270,12 +280,13 @@ void __init ioapic_pirq_setup(char *str, int *ints) /* * Find the IRQ entry number of a certain pin. */ -static int __init find_irq_entry(int pin, int type) +static int __init find_irq_entry(int apic, int pin, int type) { int i; for (i = 0; i < mp_irq_entries; i++) if ( (mp_irqs[i].mpc_irqtype == type) && + (mp_irqs[i].mpc_dstapic == mp_apics[apic].mpc_apicid) && (mp_irqs[i].mpc_dstirq == pin)) return i; @@ -307,21 +318,26 @@ static int __init find_timer_pin(int type) * Find a specific PCI IRQ entry. * Not an initfunc, possibly needed by modules */ +static int __init pin_2_irq(int idx, int apic, int pin); int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pci_pin) { - int i; + int apic, i; for (i = 0; i < mp_irq_entries; i++) { int lbus = mp_irqs[i].mpc_srcbus; - if (IO_APIC_IRQ(mp_irqs[i].mpc_dstirq) && + for (apic = 0; apic < nr_ioapics; apic++) + if (mp_apics[apic].mpc_apicid == mp_irqs[i].mpc_dstapic) + break; + + if ((apic || IO_APIC_IRQ(mp_irqs[i].mpc_dstirq)) && (mp_bus_id_to_type[lbus] == MP_BUS_PCI) && !mp_irqs[i].mpc_irqtype && (bus == mp_bus_id_to_pci_bus[mp_irqs[i].mpc_srcbus]) && (slot == ((mp_irqs[i].mpc_srcbusirq >> 2) & 0x1f)) && (pci_pin == (mp_irqs[i].mpc_srcbusirq & 3))) - return mp_irqs[i].mpc_dstirq; + return pin_2_irq(i,apic,mp_irqs[i].mpc_dstirq); } return -1; } @@ -491,9 +507,9 @@ static inline int irq_trigger(int idx) return MPBIOS_trigger(idx); } -static int __init pin_2_irq(int idx, int pin) +static int __init pin_2_irq(int idx, int apic, int pin) { - int irq; + int irq, i; int bus = mp_irqs[idx].mpc_srcbus; /* @@ -513,9 +529,12 @@ static int __init pin_2_irq(int idx, int pin) case MP_BUS_PCI: /* PCI pin */ { /* - * PCI IRQs are 'directly mapped' + * PCI IRQs are mapped in order */ - irq = pin; + i = irq = 0; + while (i < apic) + irq += nr_ioapic_registers[i++]; + irq += pin; break; } default: @@ -545,12 +564,14 @@ static int __init pin_2_irq(int idx, int pin) static inline int IO_APIC_irq_trigger(int irq) { - int idx, pin; + int apic, idx, pin; - for (pin = 0; pin < nr_ioapic_registers; pin++) { - idx = find_irq_entry(pin,mp_INT); - if ((idx != -1) && (irq == pin_2_irq(idx,pin))) - return irq_trigger(idx); + for (apic = 0; apic < nr_ioapics; apic++) { + for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) { + idx = find_irq_entry(apic,pin,mp_INT); + if ((idx != -1) && (irq == pin_2_irq(idx,apic,pin))) + return irq_trigger(idx); + } } /* * nonexistent IRQs are edge default @@ -582,11 +603,12 @@ static int __init assign_irq_vector(int irq) void __init setup_IO_APIC_irqs(void) { struct IO_APIC_route_entry entry; - int pin, idx, bus, irq, first_notcon = 1; + int apic, pin, idx, irq, first_notcon = 1; printk("init IO_APIC IRQs\n"); - for (pin = 0; pin < nr_ioapic_registers; pin++) { + for (apic = 0; apic < nr_ioapics; apic++) { + for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) { /* * add it to the IO-APIC irq-routing table: @@ -598,13 +620,13 @@ void __init setup_IO_APIC_irqs(void) entry.mask = 0; /* enable IRQ */ entry.dest.logical.logical_dest = 0; /* but no route */ - idx = find_irq_entry(pin,mp_INT); + idx = find_irq_entry(apic,pin,mp_INT); if (idx == -1) { if (first_notcon) { - printk(" IO-APIC pin %d", pin); + printk(" IO-APIC (apicid-pin) %d-%d", mp_apics[apic].mpc_apicid, pin); first_notcon = 0; } else - printk(", %d", pin); + printk(", %d-%d", mp_apics[apic].mpc_apicid, pin); continue; } @@ -617,18 +639,17 @@ void __init setup_IO_APIC_irqs(void) entry.dest.logical.logical_dest = 0xff; } - irq = pin_2_irq(idx,pin); - add_pin_to_irq(irq, pin); + irq = pin_2_irq(idx,apic,pin); + add_pin_to_irq(irq, apic, pin); - if (!IO_APIC_IRQ(irq)) + if (!apic && !IO_APIC_IRQ(irq)) continue; entry.vector = assign_irq_vector(irq); - bus = mp_irqs[idx].mpc_srcbus; - - io_apic_write(0x11+2*pin, *(((int *)&entry)+1)); - io_apic_write(0x10+2*pin, *(((int *)&entry)+0)); + io_apic_write(apic, 0x11+2*pin, *(((int *)&entry)+1)); + io_apic_write(apic, 0x10+2*pin, *(((int *)&entry)+0)); + } } if (!first_notcon) @@ -638,7 +659,7 @@ void __init setup_IO_APIC_irqs(void) /* * Set up a certain pin as ExtINT delivered interrupt */ -void __init setup_ExtINT_pin(unsigned int pin, int irq) +void __init setup_ExtINT_pin(unsigned int apic, unsigned int pin, int irq) { struct IO_APIC_route_entry entry; @@ -662,8 +683,8 @@ void __init setup_ExtINT_pin(unsigned int pin, int irq) entry.polarity = 0; entry.trigger = 0; - io_apic_write(0x10+2*pin, *(((int *)&entry)+0)); - io_apic_write(0x11+2*pin, *(((int *)&entry)+1)); + io_apic_write(apic, 0x10+2*pin, *(((int *)&entry)+0)); + io_apic_write(apic, 0x11+2*pin, *(((int *)&entry)+1)); } void __init UNEXPECTED_IO_APIC(void) @@ -674,17 +695,14 @@ void __init UNEXPECTED_IO_APIC(void) void __init print_IO_APIC(void) { - int i; + int apic, i; struct IO_APIC_reg_00 reg_00; struct IO_APIC_reg_01 reg_01; struct IO_APIC_reg_02 reg_02; printk("number of MP IRQ sources: %d.\n", mp_irq_entries); - printk("number of IO-APIC registers: %d.\n", nr_ioapic_registers); - - *(int *)®_00 = io_apic_read(0); - *(int *)®_01 = io_apic_read(1); - *(int *)®_02 = io_apic_read(2); + for (i = 0; i < nr_ioapics; i++) + printk("number of IO-APIC #%d registers: %d.\n", mp_apics[i].mpc_apicid, nr_ioapic_registers[i]); /* * We are a bit conservative about what we expect. We have to @@ -692,6 +710,12 @@ void __init print_IO_APIC(void) */ printk("testing the IO APIC.......................\n"); + for (apic = 0; apic < nr_ioapics; apic++) { + + *(int *)®_00 = io_apic_read(apic, 0); + *(int *)®_01 = io_apic_read(apic, 1); + *(int *)®_02 = io_apic_read(apic, 2); + printk("\nIO APIC #%d......\n", mp_apics[apic].mpc_apicid); printk(".... register #00: %08X\n", *(int *)®_00); printk("....... : physical APIC id: %02X\n", reg_00.ID); if (reg_00.__reserved_1 || reg_00.__reserved_2) @@ -706,8 +730,6 @@ void __init print_IO_APIC(void) (reg_01.entries != 0x3F) /* bigger Xeon boards */ ) UNEXPECTED_IO_APIC(); - if (reg_01.entries == 0x0f) - printk("....... [IO-APIC cannot route PCI PIRQ 0-3]\n"); printk("....... : IO APIC version: %04X\n", reg_01.version); if ( (reg_01.version != 0x10) && /* oldest IO-APICs */ @@ -731,8 +753,8 @@ void __init print_IO_APIC(void) for (i = 0; i <= reg_01.entries; i++) { struct IO_APIC_route_entry entry; - *(((int *)&entry)+0) = io_apic_read(0x10+i*2); - *(((int *)&entry)+1) = io_apic_read(0x11+i*2); + *(((int *)&entry)+0) = io_apic_read(apic, 0x10+i*2); + *(((int *)&entry)+1) = io_apic_read(apic, 0x11+i*2); printk(" %02x %03X %02X ", i, @@ -751,7 +773,7 @@ void __init print_IO_APIC(void) entry.vector ); } - + } printk(KERN_DEBUG "IRQ to pin mappings:\n"); for (i = 0; i < NR_IRQS; i++) { struct irq_pin_list *entry = irq_2_pin + i; @@ -796,9 +818,12 @@ static void __init init_sym_mode(void) */ { struct IO_APIC_reg_01 reg_01; + int i; - *(int *)®_01 = io_apic_read(1); - nr_ioapic_registers = reg_01.entries+1; + for (i = 0; i < nr_ioapics; i++) { + *(int *)®_01 = io_apic_read(i, 1); + nr_ioapic_registers[i] = reg_01.entries+1; + } } /* @@ -827,55 +852,6 @@ void init_pic_mode(void) printk("...done.\n"); } -char ioapic_OEM_ID [16]; -char ioapic_Product_ID [16]; - -struct ioapic_list_entry { - char * oem_id; - char * product_id; -}; - -struct ioapic_list_entry __initdata ioapic_whitelist [] = { - - { "INTEL " , "PR440FX " }, - { "INTEL " , "82440FX " }, - { "AIR " , "KDI " }, - { 0 , 0 } -}; - -struct ioapic_list_entry __initdata ioapic_blacklist [] = { - - { "OEM00000" , "PROD00000000" }, - { 0 , 0 } -}; - -static int __init in_ioapic_list(struct ioapic_list_entry * table) -{ - for ( ; table->oem_id ; table++) - if ((!strcmp(table->oem_id,ioapic_OEM_ID)) && - (!strcmp(table->product_id,ioapic_Product_ID))) - return 1; - return 0; -} - -static int __init ioapic_whitelisted(void) -{ -/* - * Right now, whitelist everything to see whether the new parsing - * routines really do work for everybody. - */ -#if 1 - return 1; -#else - return in_ioapic_list(ioapic_whitelist); -#endif -} - -static int __init ioapic_blacklisted(void) -{ - return in_ioapic_list(ioapic_blacklist); -} - static void __init setup_ioapic_id(void) { struct IO_APIC_reg_00 reg_00; @@ -897,15 +873,15 @@ static void __init setup_ioapic_id(void) /* * Set the ID */ - *(int *)®_00 = io_apic_read(0); + *(int *)®_00 = io_apic_read(0, 0); printk("...changing IO-APIC physical APIC ID to 2...\n"); reg_00.ID = 0x2; - io_apic_write(0, *(int *)®_00); + io_apic_write(0, 0, *(int *)®_00); /* * Sanity check */ - *(int *)®_00 = io_apic_read(0); + *(int *)®_00 = io_apic_read(0, 0); if (reg_00.ID != 0x2) panic("could not set ID"); } @@ -1227,7 +1203,10 @@ static inline void check_timer(void) if (pin2 != -1) { printk(".. (found pin %d) ...", pin2); - setup_ExtINT_pin(pin2, 0); + /* + * legacy devices should be connected to IO APIC #0 + */ + setup_ExtINT_pin(0, pin2, 0); make_8259A_irq(0); } @@ -1238,9 +1217,9 @@ static inline void check_timer(void) * Just in case ... */ if (pin1 != -1) - clear_IO_APIC_pin(pin1); + clear_IO_APIC_pin(0, pin1); if (pin2 != -1) - clear_IO_APIC_pin(pin2); + clear_IO_APIC_pin(0, pin2); make_8259A_irq(0); @@ -1273,29 +1252,8 @@ void __init setup_IO_APIC(void) { init_sym_mode(); - /* - * Determine the range of IRQs handled by the IO-APIC. The - * following boards can be fully enabled: - * - * - whitelisted ones - * - those which have no PCI pins connected - * - those for which the user has specified a pirq= parameter - */ - if ( ioapic_whitelisted() || - (nr_ioapic_registers == 16) || - pirqs_enabled) - { - printk("ENABLING IO-APIC IRQs\n"); - io_apic_irqs = ~PIC_IRQS; - } else { - if (ioapic_blacklisted()) - printk(" blacklisted board, DISABLING IO-APIC IRQs\n"); - else - printk(" unlisted board, DISABLING IO-APIC IRQs\n"); - - printk(" see Documentation/IO-APIC.txt to enable them\n"); - io_apic_irqs = 0; - } + printk("ENABLING IO-APIC IRQs\n"); + io_apic_irqs = ~PIC_IRQS; /* * If there are no explicit MP IRQ entries, it's either one of the diff --git a/arch/i386/kernel/irq.h b/arch/i386/kernel/irq.h index 1023cd4da31e..a6ee1948de73 100644 --- a/arch/i386/kernel/irq.h +++ b/arch/i386/kernel/irq.h @@ -57,9 +57,9 @@ typedef struct { /* * Special IRQ vectors used by the SMP architecture: * - * (some of the following vectors are 'rare', they might be merged - * into a single vector to save vector space. TLB, reschedule and - * local APIC vectors are performance-critical.) + * (some of the following vectors are 'rare', they are merged + * into a single vector (FUNCTION_VECTOR) to save vector space. + * TLB, reschedule and local APIC vectors are performance-critical.) */ #define RESCHEDULE_VECTOR 0x30 #define INVALIDATE_TLB_VECTOR 0x31 @@ -120,8 +120,6 @@ enum mp_bustype { }; extern int mp_bus_id_to_type [MAX_MP_BUSSES]; extern int mp_bus_id_to_pci_bus [MAX_MP_BUSSES]; -extern char ioapic_OEM_ID [16]; -extern char ioapic_Product_ID [16]; extern spinlock_t irq_controller_lock; diff --git a/arch/i386/kernel/process.c b/arch/i386/kernel/process.c index 952f5b93d789..d98600fddfb6 100644 --- a/arch/i386/kernel/process.c +++ b/arch/i386/kernel/process.c @@ -327,8 +327,8 @@ void machine_restart(char * __unused) * the way we set root page dir in the future, then we wont break a * seldom used feature ;) */ - current->mm->pgd = swapper_pg_dir; + current->active_mm->pgd = swapper_pg_dir; activate_context(); /* Write 0x1234 to absolute memory location 0x472. The BIOS reads @@ -757,21 +757,6 @@ void __switch_to(struct task_struct *prev_p, struct task_struct *next_p) asm volatile("movl %%fs,%0":"=m" (*(int *)&prev->fs)); asm volatile("movl %%gs,%0":"=m" (*(int *)&prev->gs)); - /* - * Re-load LDT if necessary - */ - if (prev_p->active_mm->segments != next_p->active_mm->segments) - load_LDT(next_p->mm); - - /* Re-load page tables */ - { - unsigned long new_cr3 = next->cr3; - - tss->cr3 = new_cr3; - if (new_cr3 != prev->cr3) - asm volatile("movl %0,%%cr3": :"r" (new_cr3)); - } - /* * Restore %fs and %gs. */ diff --git a/arch/i386/kernel/smp.c b/arch/i386/kernel/smp.c index a62937b8631e..ffeea7e1b481 100644 --- a/arch/i386/kernel/smp.c +++ b/arch/i386/kernel/smp.c @@ -128,6 +128,8 @@ volatile unsigned long ipi_count; /* Number of IPIs delivered */ const char lk_lockmsg[] = "lock from interrupt context at %p\n"; int mp_bus_id_to_type [MAX_MP_BUSSES] = { -1, }; +extern int nr_ioapics; +extern struct mpc_config_ioapic mp_apics [MAX_IO_APICS]; extern int mp_irq_entries; extern struct mpc_config_intsrc mp_irqs [MAX_IRQ_SOURCES]; extern int mpc_default_type; @@ -258,12 +260,10 @@ static int __init smp_read_mpc(struct mp_config_table *mpc) } memcpy(str,mpc->mpc_oem,8); str[8]=0; - memcpy(ioapic_OEM_ID,str,9); printk("OEM ID: %s ",str); memcpy(str,mpc->mpc_productid,12); str[12]=0; - memcpy(ioapic_Product_ID,str,13); printk("Product ID: %s ",str); printk("APIC at: 0x%lX\n",mpc->mpc_lapic); @@ -368,11 +368,9 @@ static int __init smp_read_mpc(struct mp_config_table *mpc) printk("I/O APIC #%d Version %d at 0x%lX.\n", m->mpc_apicid,m->mpc_apicver, m->mpc_apicaddr); - /* - * we use the first one only currently - */ - if (ioapics == 1) - mp_ioapic_addr = m->mpc_apicaddr; + mp_apics [nr_ioapics] = *m; + if (++nr_ioapics > MAX_IO_APICS) + --nr_ioapics; } mpt+=sizeof(*m); count+=sizeof(*m); @@ -404,9 +402,9 @@ static int __init smp_read_mpc(struct mp_config_table *mpc) } } } - if (ioapics > 1) + if (ioapics > MAX_IO_APICS) { - printk("Warning: Multiple IO-APICs not yet supported.\n"); + printk("Warning: Max I/O APICs exceeded (max %d, found %d).\n", MAX_IO_APICS, ioapics); printk("Warning: switching to non APIC mode.\n"); skip_ioapic_setup=1; } @@ -774,18 +772,22 @@ unsigned long __init init_smp_mappings(unsigned long memory_start) #ifdef CONFIG_X86_IO_APIC { - unsigned long ioapic_phys; - - if (smp_found_config) { - ioapic_phys = mp_ioapic_addr; - } else { - ioapic_phys = __pa(memory_start); - memset((void *)memory_start, 0, PAGE_SIZE); - memory_start += PAGE_SIZE; + unsigned long ioapic_phys, idx = FIX_IO_APIC_BASE_0; + int i; + + for (i = 0; i < nr_ioapics; i++) { + if (smp_found_config) { + ioapic_phys = mp_apics[i].mpc_apicaddr; + } else { + ioapic_phys = __pa(memory_start); + memset((void *)memory_start, 0, PAGE_SIZE); + memory_start += PAGE_SIZE; + } + set_fixmap(idx,ioapic_phys); + printk("mapped IOAPIC to %08lx (%08lx)\n", + __fix_to_virt(idx), ioapic_phys); + idx++; } - set_fixmap(FIX_IO_APIC_BASE,ioapic_phys); - printk("mapped IOAPIC to %08lx (%08lx)\n", - fix_to_virt(FIX_IO_APIC_BASE), ioapic_phys); } #endif diff --git a/arch/i386/mm/fault.c b/arch/i386/mm/fault.c index 2d1f349b3107..5b160178e258 100644 --- a/arch/i386/mm/fault.c +++ b/arch/i386/mm/fault.c @@ -246,8 +246,8 @@ no_context: printk(" printing eip:\n"); printk("%08lx\n", regs->eip); __asm__("movl %%cr3,%0" : "=r" (page)); - printk(KERN_ALERT "current->thread.cr3 = %08lx, %%cr3 = %08lx\n", - tsk->thread.cr3, page); + printk(KERN_ALERT "current->active_mm.pgd = %p, %%cr3 = %08lx\n", + tsk->active_mm->pgd, page); page = ((unsigned long *) __va(page))[address >> 22]; printk(KERN_ALERT "*pde = %08lx\n", page); if (page & 1) { diff --git a/arch/m68k/kernel/entry.S b/arch/m68k/kernel/entry.S index 61482c3a8654..efba7bafa8d8 100644 --- a/arch/m68k/kernel/entry.S +++ b/arch/m68k/kernel/entry.S @@ -609,6 +609,14 @@ SYMBOL_NAME_LABEL(sys_call_table) .long SYMBOL_NAME(sys_ni_syscall) /* streams1 */ .long SYMBOL_NAME(sys_ni_syscall) /* streams2 */ .long SYMBOL_NAME(sys_vfork) /* 190 */ + .long SYMBOL_NAME(sys_timer_create) + .long SYMBOL_NAME(sys_timer_settime) + .long SYMBOL_NAME(sys_timer_gettime) + .long SYMBOL_NAME(sys_timer_getoverrun) + .long SYMBOL_NAME(sys_timer_delete) /* 195 */ + .long SYMBOL_NAME(sys_clock_gettime) + .long SYMBOL_NAME(sys_clock_settime) + .long SYMBOL_NAME(sys_clock_getres) /* 198 */ .rept NR_syscalls-(.-SYMBOL_NAME(sys_call_table))/4 .long SYMBOL_NAME(sys_ni_syscall) diff --git a/arch/mips/kernel/syscalls.h b/arch/mips/kernel/syscalls.h index bd9fe3feb399..4c5830679c6d 100644 --- a/arch/mips/kernel/syscalls.h +++ b/arch/mips/kernel/syscalls.h @@ -225,3 +225,11 @@ SYS(sys_sigaltstack, 2) SYS(sys_sendfile, 3) SYS(sys_ni_syscall, 0) SYS(sys_ni_syscall, 0) +SYS(sys_timer_create, 3) /* 4210 */ +SYS(sys_timer_settime, 4) +SYS(sys_timer_gettime, 2) +SYS(sys_timer_getoverrun, 1) +SYS(sys_timer_delete, 1) +SYS(sys_clock_gettime, 2) /* 4215 */ +SYS(sys_clock_settime, 2) +SYS(sys_clock_getres, 2) diff --git a/arch/ppc/kernel/misc.S b/arch/ppc/kernel/misc.S index 0caf06a3b415..138cbce95aa1 100644 --- a/arch/ppc/kernel/misc.S +++ b/arch/ppc/kernel/misc.S @@ -894,4 +894,12 @@ sys_call_table: .long sys_ni_syscall /* streams1 */ .long sys_ni_syscall /* streams2 */ .long sys_vfork - .space (NR_syscalls-183)*4 + .long sys_timer_create /* 190 */ + .long sys_timer_settime + .long sys_timer_gettime + .long sys_timer_getoverrun + .long sys_timer_delete + .long sys_clock_gettime /* 195 */ + .long sys_clock_settime + .long sys_clock_getres /* 197 */ + .space (NR_syscalls-197)*4 diff --git a/arch/sparc/kernel/systbls.S b/arch/sparc/kernel/systbls.S index 4d0dfff5fc49..fbc1776de2db 100644 --- a/arch/sparc/kernel/systbls.S +++ b/arch/sparc/kernel/systbls.S @@ -68,7 +68,8 @@ sys_call_table: /*240*/ .long sys_munlockall, sys_sched_setparam, sys_sched_getparam, sys_sched_setscheduler, sys_sched_getscheduler /*245*/ .long sys_sched_yield, sys_sched_get_priority_max, sys_sched_get_priority_min, sys_sched_rr_get_interval, sys_nanosleep /*250*/ .long sys_mremap, sys_sysctl, sys_getsid, sys_fdatasync, sys_nfsservctl -/*255*/ .long sys_aplib, sys_nis_syscall +/*255*/ .long sys_aplib, sys_timer_create, sys_timer_settime, sys_timer_gettime, sys_timer_getoverrun +/*260*/ .long sys_timer_delete, sys_clock_gettime, sys_clock_settime, sys_clock_getres /* Now the SunOS syscall table. */ diff --git a/arch/sparc64/kernel/systbls.S b/arch/sparc64/kernel/systbls.S index e99ae05325dc..fa411982eda2 100644 --- a/arch/sparc64/kernel/systbls.S +++ b/arch/sparc64/kernel/systbls.S @@ -68,7 +68,8 @@ sys_call_table32: /*240*/ .word sys_munlockall, sys_sched_setparam, sys_sched_getparam, sys_sched_setscheduler, sys_sched_getscheduler .word sys_sched_yield, sys_sched_get_priority_max, sys_sched_get_priority_min, sys32_sched_rr_get_interval, sys32_nanosleep /*250*/ .word sys_mremap, sys_sysctl, sys_getsid, sys_fdatasync, sys32_nfsservctl - .word sys_aplib +/*255*/ .word sys_aplib, sys_timer_create, sys_timer_settime, sys_timer_gettime, sys_timer_getoverrun +/*260*/ .word sys_timer_delete, sys_clock_gettime, sys_clock_settime, sys_clock_getres /* Now the 64-bit native Linux syscall table. */ @@ -127,7 +128,8 @@ sys_call_table: /*240*/ .word sys_munlockall, sys_sched_setparam, sys_sched_getparam, sys_sched_setscheduler, sys_sched_getscheduler .word sys_sched_yield, sys_sched_get_priority_max, sys_sched_get_priority_min, sys_sched_rr_get_interval, sys_nanosleep /*250*/ .word sys_mremap, sys_sysctl, sys_getsid, sys_fdatasync, sys_nfsservctl - .word sys_aplib +/*255*/ .word sys_aplib, sys_timer_create, sys_timer_settime, sys_timer_gettime, sys_timer_getoverrun +/*260*/ .word sys_timer_delete, sys_clock_gettime, sys_clock_settime, sys_clock_getres /* Now the 32-bit SunOS syscall table. */ diff --git a/fs/exec.c b/fs/exec.c index d8b62ab55792..5e614d8cdef7 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -379,25 +379,20 @@ static int exec_mmap(void) mm = mm_alloc(); if (mm) { + struct mm_struct *active_mm = current->active_mm; + mm->cpu_vm_mask = (1UL << smp_processor_id()); - mm->total_vm = 0; - mm->rss = 0; - mm->pgd = pgd_alloc(); - if (mm->pgd) { - struct mm_struct *active_mm = current->active_mm; - - current->mm = mm; - current->active_mm = mm; - activate_context(); - mm_release(); - if (old_mm) { - mmput(old_mm); - return 0; - } - mmdrop(active_mm); + current->mm = mm; + current->active_mm = mm; + activate_context(); + mm_release(); + if (old_mm) { + if (active_mm != old_mm) BUG(); + mmput(old_mm); return 0; } - kmem_cache_free(mm_cachep, mm); + mmdrop(active_mm); + return 0; } return -ENOMEM; } diff --git a/include/asm-alpha/siginfo.h b/include/asm-alpha/siginfo.h index a8bedd8e0a21..8e2275dd0b0e 100644 --- a/include/asm-alpha/siginfo.h +++ b/include/asm-alpha/siginfo.h @@ -67,6 +67,8 @@ typedef struct siginfo { */ #define si_pid _sifields._kill._pid #define si_uid _sifields._kill._uid +#define si_timer1 _sifields._timer._timer1 +#define si_timer2 _sifields._timer._timer2 #define si_status _sifields._sigchld._status #define si_utime _sifields._sigchld._utime #define si_stime _sifields._sigchld._stime diff --git a/include/asm-alpha/unistd.h b/include/asm-alpha/unistd.h index df20edc7f2e2..dab8c012e68e 100644 --- a/include/asm-alpha/unistd.h +++ b/include/asm-alpha/unistd.h @@ -308,6 +308,14 @@ #define __NR_capget 368 #define __NR_capset 369 #define __NR_sendfile 370 +#define __NR_timer_create 371 +#define __NR_timer_settime 372 +#define __NR_timer_gettime 373 +#define __NR_timer_getoverrun 374 +#define __NR_timer_delete 375 +#define __NR_clock_gettime 376 +#define __NR_clock_settime 377 +#define __NR_clock_getres 378 #if defined(__LIBRARY__) && defined(__GNUC__) diff --git a/include/asm-arm/siginfo.h b/include/asm-arm/siginfo.h index c08847d3252d..ac998192a189 100644 --- a/include/asm-arm/siginfo.h +++ b/include/asm-arm/siginfo.h @@ -67,6 +67,8 @@ typedef struct siginfo { */ #define si_pid _sifields._kill._pid #define si_uid _sifields._kill._uid +#define si_timer1 _sifields._timer._timer1 +#define si_timer2 _sifields._timer._timer2 #define si_status _sifields._sigchld._status #define si_utime _sifields._sigchld._utime #define si_stime _sifields._sigchld._stime diff --git a/include/asm-arm/unistd.h b/include/asm-arm/unistd.h index f4a7ed33af05..9c49c8b21671 100644 --- a/include/asm-arm/unistd.h +++ b/include/asm-arm/unistd.h @@ -198,6 +198,14 @@ /* 188 reserved */ /* 189 reserved */ #define __NR_vfork (__NR_SYSCALL_BASE+190) +#define __NR_timer_create (__NR_SYSCALL_BASE+191) +#define __NR_timer_settime (__NR_SYSCALL_BASE+192) +#define __NR_timer_gettime (__NR_SYSCALL_BASE+193) +#define __NR_timer_getoverrun (__NR_SYSCALL_BASE+194) +#define __NR_timer_delete (__NR_SYSCALL_BASE+195) +#define __NR_clock_gettime (__NR_SYSCALL_BASE+196) +#define __NR_clock_settime (__NR_SYSCALL_BASE+197) +#define __NR_clock_getres (__NR_SYSCALL_BASE+198) #define __sys2(x) #x #define __sys1(x) __sys2(x) diff --git a/include/asm-i386/i82489.h b/include/asm-i386/apic.h similarity index 87% rename from include/asm-i386/i82489.h rename to include/asm-i386/apic.h index 76f580bde2ee..7b035d9a3c9b 100644 --- a/include/asm-i386/i82489.h +++ b/include/asm-i386/apic.h @@ -1,10 +1,10 @@ -#ifndef __ASM_I82489_H -#define __ASM_I82489_H +#ifndef __ASM_APIC_H +#define __ASM_APIC_H /* - * Offsets for programming the 82489 and Pentium integrated APIC + * Constants for various Intel APICs. (local APIC, IOAPIC, etc.) * - * Alan Cox , 1995. + * Alan Cox , 1995. */ #define APIC_PHYS_BASE 0xfee00000 /* IA s/w dev Vol 3, Section 7.4 */ @@ -90,14 +90,6 @@ #define APIC_BASE (fix_to_virt(FIX_APIC_BASE)) -extern __inline void apic_write(unsigned long reg, unsigned long v) -{ - *((volatile unsigned long *)(APIC_BASE+reg))=v; -} - -extern __inline unsigned long apic_read(unsigned long reg) -{ - return *((volatile unsigned long *)(APIC_BASE+reg)); -} +#define MAX_IO_APICS 8 #endif diff --git a/include/asm-i386/fixmap.h b/include/asm-i386/fixmap.h index c259a45ee837..7f25d6410e93 100644 --- a/include/asm-i386/fixmap.h +++ b/include/asm-i386/fixmap.h @@ -13,6 +13,7 @@ #include #include +#include #include /* @@ -45,7 +46,8 @@ enum fixed_addresses { FIX_APIC_BASE, /* local (CPU) APIC) -- required for SMP or not */ #endif #ifdef CONFIG_X86_IO_APIC - FIX_IO_APIC_BASE, + FIX_IO_APIC_BASE_0, + FIX_IO_APIC_BASE_END = FIX_IO_APIC_BASE_0 + MAX_IO_APICS-1, #endif #ifdef CONFIG_X86_VISWS_APIC FIX_CO_CPU, /* Cobalt timer */ diff --git a/include/asm-i386/mmu_context.h b/include/asm-i386/mmu_context.h index 3e007f13b8b8..769fad6d4f44 100644 --- a/include/asm-i386/mmu_context.h +++ b/include/asm-i386/mmu_context.h @@ -3,36 +3,31 @@ #include -/* - * get a new mmu context.. x86's don't know much about contexts, - * but we have to reload the new LDT in exec(). - * - * We implement lazy MMU context-switching on x86 to optimize context - * switches done to/from kernel threads. Kernel threads 'inherit' the - * previous MM, so Linux doesnt have to flush the TLB. In most cases - * we switch back to the same process so we preserve the TLB cache. - * This all means that kernel threads have about as much overhead as - * a function call ... - */ -#define get_mmu_context(next) do { } while (0) -#define set_mmu_context(prev,next) do { next->thread.cr3 = prev->thread.cr3; } while(0) - /* * possibly do the LDT unload here? */ #define destroy_context(mm) do { } while(0) -#define init_new_context(tsk,mm) do { (tsk)->thread.cr3 = __pa((mm)->pgd); } while (0) +#define init_new_context(tsk,mm) do { } while (0) static inline void activate_context(void) { struct task_struct *tsk = current; struct mm_struct *mm = tsk->mm; - unsigned long cr3; load_LDT(mm); - cr3 = __pa(mm->pgd); - tsk->thread.cr3 = cr3; - __asm__ __volatile__("movl %0,%%cr3": :"r" (cr3)); + __asm__ __volatile__("movl %0,%%cr3": :"r" (__pa(mm->pgd))); +} + +static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next) +{ + /* + * Re-load LDT if necessary + */ + if (prev->segments != next->segments) + load_LDT(next); + + /* Re-load page tables */ + asm volatile("movl %0,%%cr3": :"r" (__pa(next->pgd))); } #endif diff --git a/include/asm-i386/processor.h b/include/asm-i386/processor.h index 0bc1afc9150c..99b291d405f5 100644 --- a/include/asm-i386/processor.h +++ b/include/asm-i386/processor.h @@ -267,7 +267,7 @@ struct tss_struct { unsigned short ss1,__ss1h; unsigned long esp2; unsigned short ss2,__ss2h; - unsigned long cr3; + unsigned long __cr3; unsigned long eip; unsigned long eflags; unsigned long eax,ecx,edx,ebx; @@ -292,7 +292,6 @@ struct tss_struct { struct thread_struct { unsigned long esp0; - unsigned long cr3; unsigned long eip; unsigned long esp; unsigned long fs; @@ -313,7 +312,7 @@ struct thread_struct { }; #define INIT_THREAD { \ - 0,(long) &swapper_pg_dir - PAGE_OFFSET, \ + 0, \ 0, 0, 0, 0, \ { [0 ... 7] = 0 }, /* debugging registers */ \ 0, 0, 0, \ @@ -330,7 +329,7 @@ struct thread_struct { sizeof(init_stack) + (long) &init_stack, /* esp0 */ \ __KERNEL_DS, 0, /* ss0 */ \ 0,0,0,0,0,0, /* stack1, stack2 */ \ - (long) &swapper_pg_dir - PAGE_OFFSET, /* cr3 */ \ + 0, /* cr3 */ \ 0,0, /* eip,eflags */ \ 0,0,0,0, /* eax,ecx,edx,ebx */ \ 0,0,0,0, /* esp,ebp,esi,edi */ \ diff --git a/include/asm-i386/siginfo.h b/include/asm-i386/siginfo.h index 7c805525c9e5..9b0a0204d182 100644 --- a/include/asm-i386/siginfo.h +++ b/include/asm-i386/siginfo.h @@ -31,13 +31,14 @@ typedef struct siginfo { struct { unsigned int _timer1; unsigned int _timer2; + sigval_t _sigval2; /* FIXME: must map to _sigval below because it is the same */ } _timer; /* POSIX.1b signals */ struct { pid_t _pid; /* sender's pid */ uid_t _uid; /* sender's uid */ - sigval_t _sigval; + sigval_t _sigval; /* FIXME: move out of union together with _sigval2 */ } _rt; /* SIGCHLD */ @@ -67,6 +68,8 @@ typedef struct siginfo { */ #define si_pid _sifields._kill._pid #define si_uid _sifields._kill._uid +#define si_timer1 _sifields._timer._timer1 +#define si_timer2 _sifields._timer._timer2 #define si_status _sifields._sigchld._status #define si_utime _sifields._sigchld._utime #define si_stime _sifields._sigchld._stime diff --git a/include/asm-i386/smp.h b/include/asm-i386/smp.h index b8a150aa7575..91199de7f1cb 100644 --- a/include/asm-i386/smp.h +++ b/include/asm-i386/smp.h @@ -8,7 +8,7 @@ #ifdef CONFIG_X86_LOCAL_APIC #ifndef ASSEMBLY #include -#include +#include #include #endif #endif @@ -186,6 +186,16 @@ extern inline int cpu_logical_map(int cpu) return __cpu_logical_map[cpu]; } +extern __inline void apic_write(unsigned long reg, unsigned long v) +{ + *((volatile unsigned long *)(APIC_BASE+reg))=v; +} + +extern __inline unsigned long apic_read(unsigned long reg) +{ + return *((volatile unsigned long *)(APIC_BASE+reg)); +} + /* * General functions that each host system must provide. diff --git a/include/asm-i386/unistd.h b/include/asm-i386/unistd.h index c961fdd2e801..18d814b9662c 100644 --- a/include/asm-i386/unistd.h +++ b/include/asm-i386/unistd.h @@ -195,6 +195,14 @@ #define __NR_getpmsg 188 /* some people actually want streams */ #define __NR_putpmsg 189 /* some people actually want streams */ #define __NR_vfork 190 +#define __NR_timer_create 191 +#define __NR_timer_settime 192 +#define __NR_timer_gettime 193 +#define __NR_timer_getoverrun 194 +#define __NR_timer_delete 195 +#define __NR_clock_gettime 196 +#define __NR_clock_settime 197 +#define __NR_clock_getres 198 /* user-visible error numbers are in the range -1 - -122: see */ diff --git a/include/asm-m68k/siginfo.h b/include/asm-m68k/siginfo.h index 4061e6f72946..27166cd860f6 100644 --- a/include/asm-m68k/siginfo.h +++ b/include/asm-m68k/siginfo.h @@ -67,6 +67,8 @@ typedef struct siginfo { */ #define si_pid _sifields._kill._pid #define si_uid _sifields._kill._uid +#define si_timer1 _sifields._timer._timer1 +#define si_timer2 _sifields._timer._timer2 #define si_status _sifields._sigchld._status #define si_utime _sifields._sigchld._utime #define si_stime _sifields._sigchld._stime diff --git a/include/asm-m68k/unistd.h b/include/asm-m68k/unistd.h index 4180f3df7c84..f74f60ff718b 100644 --- a/include/asm-m68k/unistd.h +++ b/include/asm-m68k/unistd.h @@ -194,6 +194,14 @@ #define __NR_getpmsg 188 /* some people actually want streams */ #define __NR_putpmsg 189 /* some people actually want streams */ #define __NR_vfork 190 +#define __NR_timer_create 191 +#define __NR_timer_settime 192 +#define __NR_timer_gettime 193 +#define __NR_timer_getoverrun 194 +#define __NR_timer_delete 195 +#define __NR_clock_gettime 196 +#define __NR_clock_settime 197 +#define __NR_clock_getres 198 /* user-visible error numbers are in the range -1 - -122: see */ diff --git a/include/asm-mips/siginfo.h b/include/asm-mips/siginfo.h index 3e19a3a55b1b..7152c88c6c25 100644 --- a/include/asm-mips/siginfo.h +++ b/include/asm-mips/siginfo.h @@ -75,6 +75,8 @@ typedef struct siginfo { */ #define si_pid _sifields._kill._pid #define si_uid _sifields._kill._uid +#define si_timer1 _sifields._timer._timer1 +#define si_timer2 _sifields._timer._timer2 #define si_status _sifields._sigchld._status #define si_utime _sifields._sigchld._utime #define si_stime _sifields._sigchld._stime diff --git a/include/asm-mips/unistd.h b/include/asm-mips/unistd.h index 755a4a2c66e3..540da7a2ca79 100644 --- a/include/asm-mips/unistd.h +++ b/include/asm-mips/unistd.h @@ -1196,6 +1196,14 @@ #define __NR_sendfile (__NR_Linux + 207) #define __NR_getpmsg (__NR_Linux + 208) #define __NR_putpmsg (__NR_Linux + 209) +#define __NR_timer_create (__NR_Linux + 210) +#define __NR_timer_settime (__NR_Linux + 211) +#define __NR_timer_gettime (__NR_Linux + 212) +#define __NR_timer_getoverrun (__NR_Linux + 213) +#define __NR_timer_delete (__NR_Linux + 214) +#define __NR_clock_gettime (__NR_Linux + 215) +#define __NR_clock_settime (__NR_Linux + 216) +#define __NR_clock_getres (__NR_Linux + 217) /* * Offset of the last Linux flavoured syscall diff --git a/include/asm-ppc/siginfo.h b/include/asm-ppc/siginfo.h index f838fcc82adb..3d4660d8ecae 100644 --- a/include/asm-ppc/siginfo.h +++ b/include/asm-ppc/siginfo.h @@ -67,6 +67,8 @@ typedef struct siginfo { */ #define si_pid _sifields._kill._pid #define si_uid _sifields._kill._uid +#define si_timer1 _sifields._timer._timer1 +#define si_timer2 _sifields._timer._timer2 #define si_status _sifields._sigchld._status #define si_utime _sifields._sigchld._utime #define si_stime _sifields._sigchld._stime @@ -122,7 +124,7 @@ typedef struct siginfo { * SIGSEGV si_codes */ #define SEGV_MAPERR 1 /* address not mapped to object */ -#define SRGV_ACCERR 2 /* invalid permissions for mapped object */ +#define SEGV_ACCERR 2 /* invalid permissions for mapped object */ #define NSIGSEGV 2 /* diff --git a/include/asm-ppc/unistd.h b/include/asm-ppc/unistd.h index 38372df08fae..299ad0a56e93 100644 --- a/include/asm-ppc/unistd.h +++ b/include/asm-ppc/unistd.h @@ -194,6 +194,14 @@ #define __NR_getpmsg 187 /* some people actually want streams */ #define __NR_putpmsg 188 /* some people actually want streams */ #define __NR_vfork 189 +#define __NR_timer_create 190 +#define __NR_timer_settime 191 +#define __NR_timer_gettime 192 +#define __NR_timer_getoverrun 193 +#define __NR_timer_delete 194 +#define __NR_clock_gettime 195 +#define __NR_clock_settime 196 +#define __NR_clock_getres 197 #define __NR(n) #n diff --git a/include/asm-sparc/siginfo.h b/include/asm-sparc/siginfo.h index 0a838c3a9747..aab2d32a063a 100644 --- a/include/asm-sparc/siginfo.h +++ b/include/asm-sparc/siginfo.h @@ -70,6 +70,8 @@ typedef struct siginfo { */ #define si_pid _sifields._kill._pid #define si_uid _sifields._kill._uid +#define si_timer1 _sifields._timer._timer1 +#define si_timer2 _sifields._timer._timer2 #define si_status _sifields._sigchld._status #define si_utime _sifields._sigchld._utime #define si_stime _sifields._sigchld._stime @@ -126,7 +128,7 @@ typedef struct siginfo { * SIGSEGV si_codes */ #define SEGV_MAPERR 1 /* address not mapped to object */ -#define SRGV_ACCERR 2 /* invalid permissions for mapped object */ +#define SEGV_ACCERR 2 /* invalid permissions for mapped object */ #define NSIGSEGV 2 /* diff --git a/include/asm-sparc/unistd.h b/include/asm-sparc/unistd.h index 139dc32b5429..50b2b77a28a8 100644 --- a/include/asm-sparc/unistd.h +++ b/include/asm-sparc/unistd.h @@ -271,6 +271,14 @@ #define __NR_fdatasync 253 #define __NR_nfsservctl 254 #define __NR_aplib 255 +#define __NR_timer_create 256 +#define __NR_timer_settime 257 +#define __NR_timer_gettime 258 +#define __NR_timer_getoverrun 259 +#define __NR_timer_delete 260 +#define __NR_clock_gettime 261 +#define __NR_clock_settime 262 +#define __NR_clock_getres 263 #define _syscall0(type,name) \ type name(void) \ diff --git a/include/asm-sparc64/siginfo.h b/include/asm-sparc64/siginfo.h index a23240e6623b..454e56a9dac4 100644 --- a/include/asm-sparc64/siginfo.h +++ b/include/asm-sparc64/siginfo.h @@ -129,6 +129,8 @@ typedef struct siginfo32 { */ #define si_pid _sifields._kill._pid #define si_uid _sifields._kill._uid +#define si_timer1 _sifields._timer._timer1 +#define si_timer2 _sifields._timer._timer2 #define si_status _sifields._sigchld._status #define si_utime _sifields._sigchld._utime #define si_stime _sifields._sigchld._stime @@ -185,7 +187,7 @@ typedef struct siginfo32 { * SIGSEGV si_codes */ #define SEGV_MAPERR 1 /* address not mapped to object */ -#define SRGV_ACCERR 2 /* invalid permissions for mapped object */ +#define SEGV_ACCERR 2 /* invalid permissions for mapped object */ #define NSIGSEGV 2 /* diff --git a/include/asm-sparc64/unistd.h b/include/asm-sparc64/unistd.h index 87a6833c5594..466317e0e829 100644 --- a/include/asm-sparc64/unistd.h +++ b/include/asm-sparc64/unistd.h @@ -271,6 +271,14 @@ #define __NR_fdatasync 253 #define __NR_nfsservctl 254 #define __NR_aplib 255 +#define __NR_timer_create 256 +#define __NR_timer_settime 257 +#define __NR_timer_gettime 258 +#define __NR_timer_getoverrun 259 +#define __NR_timer_delete 260 +#define __NR_clock_gettime 261 +#define __NR_clock_settime 262 +#define __NR_clock_getres 263 #define _syscall0(type,name) \ type name(void) \ diff --git a/include/linux/limits.h b/include/linux/limits.h index 5848688e786f..6ca0ae68b5a8 100644 --- a/include/linux/limits.h +++ b/include/linux/limits.h @@ -14,6 +14,8 @@ #define PATH_MAX 4095 /* # chars in a path name */ #define PIPE_BUF 4096 /* # bytes in atomic write to a pipe */ +#define MAX_ITIMERS 32 /* # POSIX.1b itimers per process */ + #define RTSIG_MAX 32 #endif diff --git a/include/linux/sched.h b/include/linux/sched.h index c708aa628e89..c1c85c98deb7 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -35,6 +35,7 @@ extern unsigned long event; #define CLONE_PID 0x00001000 /* set if pid shared */ #define CLONE_PTRACE 0x00002000 /* set if we want to let tracing continue on the child too */ #define CLONE_VFORK 0x00004000 /* set if the parent wants the child to wake it up on mm_release */ +#define CLONE_ITIMERS 0x00008000 /* set if POSIX.1b itimers are shared */ /* * These are the constant used to fake the fixed-point load-average @@ -223,6 +224,26 @@ struct signal_struct { */ struct user_struct; +/* POSIX.1b interval timer structure. */ +struct k_itimer { + spinlock_t it_lock; + clockid_t it_clock; /* which timer type */ + timer_t it_id; /* timer id */ + int it_overrun; /* number of signals overrun */ + struct sigevent it_signal; /* signal to be delivered */ + struct timespec it_interval; /* interval (rounded to jiffies) */ + int it_incr; /* interval specified in jiffies */ + struct task_struct *it_process; /* process to send signal to */ + struct timer_list it_timer; +}; + +/* Structure to maintain the dynamically created POSIX.1b interval timers. */ +struct itimer_struct { + atomic_t count; + spinlock_t its_lock; + struct k_itimer *itimer[MAX_ITIMERS]; +}; + struct task_struct { /* these are hardcoded - don't touch */ volatile long state; /* -1 unrunnable, 0 runnable, >0 stopped */ @@ -278,6 +299,7 @@ struct task_struct { unsigned long it_real_value, it_prof_value, it_virt_value; unsigned long it_real_incr, it_prof_incr, it_virt_incr; struct timer_list real_timer; + struct itimer_struct *posix_timers; /* POSIX.1b Interval Timers */ struct tms times; unsigned long start_time; long per_cpu_utime[NR_CPUS], per_cpu_stime[NR_CPUS]; @@ -315,6 +337,7 @@ struct task_struct { spinlock_t sigmask_lock; /* Protects signal and blocked */ struct signal_struct *sig; sigset_t signal, blocked; + siginfo_t nrt_info[SIGRTMIN]; /* siginfo for non RT signals */ struct signal_queue *sigqueue, **sigqueue_tail; unsigned long sas_ss_sp; size_t sas_ss_size; @@ -364,6 +387,7 @@ struct task_struct { /* chld wait */ __WAIT_QUEUE_HEAD_INITIALIZER(name.wait_chldexit), NULL, \ /* timeout */ SCHED_OTHER,0,0,0,0,0,0,0, \ /* timer */ { NULL, NULL, 0, 0, it_real_fn }, \ +/* POSIX.1b timer */ NULL, \ /* utime */ {0,0,0,0},0, \ /* per CPU times */ {0, }, {0, }, \ /* flt */ 0,0,0,0,0,0, \ @@ -382,7 +406,7 @@ struct task_struct { /* fs */ &init_fs, \ /* files */ &init_files, \ /* mm */ NULL, &init_mm, \ -/* signals */ SPIN_LOCK_UNLOCKED, &init_signals, {{0}}, {{0}}, NULL, &init_task.sigqueue, 0, 0, \ +/* signals */ SPIN_LOCK_UNLOCKED, &init_signals, {{0}}, {{0}}, {{0,},}, NULL, &init_task.sigqueue, 0, 0, \ } #ifndef INIT_TASK_SIZE @@ -614,6 +638,7 @@ extern void exit_mm(struct task_struct *); extern void exit_fs(struct task_struct *); extern void exit_files(struct task_struct *); extern void exit_sighand(struct task_struct *); +extern void exit_itimers(struct task_struct *); extern int do_execve(char *, char **, char **, struct pt_regs *); extern int do_fork(unsigned long, unsigned long, struct pt_regs *); diff --git a/include/linux/time.h b/include/linux/time.h index 53a125a08a14..8b30d460884b 100644 --- a/include/linux/time.h +++ b/include/linux/time.h @@ -26,6 +26,19 @@ struct timespec { */ #define MAX_JIFFY_OFFSET ((~0UL >> 1)-1) +/* Parameters used to convert the timespec values */ +#ifndef USEC_PER_SEC +#define USEC_PER_SEC (1000000L) +#endif + +#ifndef NSEC_PER_SEC +#define NSEC_PER_SEC (1000000000L) +#endif + +#ifndef NSEC_PER_USEC +#define NSEC_PER_USEC (1000L) +#endif + static __inline__ unsigned long timespec_to_jiffies(struct timespec *value) { @@ -34,15 +47,15 @@ timespec_to_jiffies(struct timespec *value) if (sec >= (MAX_JIFFY_OFFSET / HZ)) return MAX_JIFFY_OFFSET; - nsec += 1000000000L / HZ - 1; - nsec /= 1000000000L / HZ; + nsec += NSEC_PER_SEC / HZ - 1; + nsec /= NSEC_PER_SEC / HZ; return HZ * sec + nsec; } static __inline__ void jiffies_to_timespec(unsigned long jiffies, struct timespec *value) { - value->tv_nsec = (jiffies % HZ) * (1000000000L / HZ); + value->tv_nsec = (jiffies % HZ) * (NSEC_PER_SEC / HZ); value->tv_sec = jiffies / HZ; } @@ -89,4 +102,23 @@ struct itimerval { struct timeval it_value; /* current value */ }; + +/* + * Data types for POSIX.1b interval timers. + */ +typedef int clockid_t; +typedef int timer_t; + +/* + * The IDs of the various system clocks (for POSIX.1b interval timers). + */ +#define CLOCK_REALTIME 0 + +/* + * The various flags for setting POSIX.1b interval timers. + */ + +#define TIMER_ABSTIME 0x01 + + #endif diff --git a/kernel/exit.c b/kernel/exit.c index c8021906fc15..d95a32f65a76 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -229,6 +229,34 @@ void exit_sighand(struct task_struct *tsk) __exit_sighand(tsk); } +static inline void __exit_itimers(struct task_struct *tsk) +{ + struct itimer_struct *timers = tsk->posix_timers; + struct k_itimer *timr; + int i; + + if (timers == NULL) return; + + if (atomic_dec_and_test(&timers->count)) { + tsk->posix_timers = NULL; + for (i = 0; i < MAX_ITIMERS; i++) { + timr = timers->itimer[i]; + if (timr) { + start_bh_atomic(); + del_timer(&timr->it_timer); + end_bh_atomic(); + kfree(timr); + } + } + kfree(timers); + } +} + +void exit_itimers(struct task_struct *tsk) +{ + __exit_itimers(tsk); +} + /* * Turn us into a lazy TLB process if we * aren't already.. @@ -352,6 +380,7 @@ fake_volatile: __exit_files(tsk); __exit_fs(tsk); __exit_sighand(tsk); + __exit_itimers(tsk); exit_thread(); tsk->state = TASK_ZOMBIE; tsk->exit_code = code; diff --git a/kernel/fork.c b/kernel/fork.c index b11112ceb817..e6f1417dcf6b 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -304,8 +304,12 @@ struct mm_struct * mm_alloc(void) atomic_set(&mm->mm_count, 1); init_MUTEX(&mm->mmap_sem); mm->page_table_lock = SPIN_LOCK_UNLOCKED; + mm->pgd = pgd_alloc(); + if (mm->pgd) + return mm; + kmem_cache_free(mm_cachep, mm); } - return mm; + return NULL; } /* Please note the differences between mmput and mm_release. @@ -390,10 +394,6 @@ static inline int copy_mm(unsigned long clone_flags, struct task_struct * tsk) tsk->mm = mm; tsk->active_mm = mm; - mm->pgd = pgd_alloc(); - if (!mm->pgd) - goto free_mm; - /* * child gets a private LDT (if there was an LDT in the parent) */ @@ -411,9 +411,6 @@ good_mm: init_new_context(tsk,mm); return 0; -free_mm: - kmem_cache_free(mm_cachep, mm); - return retval; free_pt: mmput(mm); fail_nomem: @@ -550,6 +547,24 @@ static inline void copy_flags(unsigned long clone_flags, struct task_struct *p) p->flags = new_flags; } + +static inline int copy_itimers(unsigned long clone_flags, struct task_struct * tsk) +{ + if (clone_flags & CLONE_ITIMERS) { + atomic_inc(&tsk->posix_timers->count); + return 0; + } + + tsk->posix_timers = kmalloc(sizeof(*tsk->posix_timers), GFP_KERNEL); + if (tsk->posix_timers == NULL) return -1; + spin_lock_init(&tsk->posix_timers->its_lock); + atomic_set(&tsk->posix_timers->count, 1); + memset(tsk->posix_timers->itimer, 0, sizeof(tsk->posix_timers->itimer)); + + return 0; +} + + /* * Ok, this is the main fork-routine. It copies the system process * information (task[nr]) and sets up the necessary registers. It @@ -648,6 +663,8 @@ int do_fork(unsigned long clone_flags, unsigned long usp, struct pt_regs *regs) goto bad_fork_cleanup_files; if (copy_sighand(clone_flags, p)) goto bad_fork_cleanup_fs; + if (copy_itimers(clone_flags, p)) + goto bad_fork_cleanup_itimers; if (copy_mm(clone_flags, p)) goto bad_fork_cleanup_sighand; retval = copy_thread(0, clone_flags, usp, p, regs); @@ -692,6 +709,8 @@ fork_out: down(&sem); return retval; +bad_fork_cleanup_itimers: + exit_itimers(p); bad_fork_cleanup_sighand: exit_sighand(p); bad_fork_cleanup_fs: diff --git a/kernel/itimer.c b/kernel/itimer.c index 1b4661c397de..bf6d1c003f14 100644 --- a/kernel/itimer.c +++ b/kernel/itimer.c @@ -9,14 +9,16 @@ #include #include #include +#include +#include #include /* - * change timeval to jiffies, trying to avoid the + * change timeval to jiffies, trying to avoid the * most obvious overflows.. * - * The tv_*sec values are signed, but nothing seems to + * The tv_*sec values are signed, but nothing seems to * indicate whether we really should use them as signed values * when doing itimers. POSIX doesn't mention this (but if * alarm() uses itimers without checking, we have to use unsigned @@ -168,6 +170,389 @@ asmlinkage int sys_setitimer(int which, struct itimerval *value, return error; if (copy_to_user(ovalue, &get_buffer, sizeof(get_buffer))) - return -EFAULT; + return -EFAULT; + return 0; +} + +/* PRECONDITION: + * timr->it_lock must be locked + */ +static void timer_notify_task(struct k_itimer *timr) +{ + struct siginfo info; + int ret; + + if (timr->it_signal.sigev_notify == SIGEV_SIGNAL) { + + /* Send signal to the process that owns this timer. */ + info.si_signo = timr->it_signal.sigev_signo; + info.si_errno = 0; + info.si_code = SI_TIMER; + /* TODO: if someone has better ideas what to put in + * the next two fields... + * si_timer1 is currently used in signal.c to check + * whether a signal from this timer is already in the signal + * queue. + */ + info.si_timer1 = timr->it_id; + info.si_timer2 = 0; + info.si_value = timr->it_signal.sigev_value; + ret = send_sig_info(info.si_signo, &info, timr->it_process); + switch (ret) { + case 0: /* all's well */ + timr->it_overrun = 0; + break; + case 1: /* signal from this timer was already in the queue */ + timr->it_overrun++; + break; + default: + printk(KERN_WARNING "sending signal failed: %d\n", ret); + break; + } + } +} + +/* This function gets called when a POSIX.1b interval timer expires. */ +static void posix_timer_fn(unsigned long __data) +{ + struct k_itimer *timr = (struct k_itimer *)__data; + unsigned long interval; + + spin_lock(&timr->it_lock); + + timer_notify_task(timr); + + /* Set up the timer for the next interval (if there is one) */ + if ((interval = timr->it_incr) == 0) goto out; + + if (interval > (unsigned long) LONG_MAX) + interval = LONG_MAX; + timr->it_timer.expires = jiffies + interval; + add_timer(&timr->it_timer); +out: + spin_unlock(&timr->it_lock); +} + +/* Find the first available slot for the new timer. */ +static int timer_find_slot(struct itimer_struct *timers) +{ + int i; + + for (i = 0; i < MAX_ITIMERS; i++) { + if (timers->itimer[i] == NULL) return i; + } + return -1; +} + +static int good_sigevent(const struct sigevent *sigev) +{ + switch (sigev->sigev_notify) { + case SIGEV_NONE: + break; + case SIGEV_SIGNAL: + if ((sigev->sigev_signo <= 0) || + (sigev->sigev_signo > SIGRTMAX)) + return 0; + break; + default: + return 0; + } + return 1; +} + +/* Create a POSIX.1b interval timer. */ + +asmlinkage int sys_timer_create(clockid_t which_clock, + struct sigevent *timer_event_spec, + timer_t *created_timer_id) +{ + int error = 0; + struct k_itimer *new_timer = NULL; + struct itimer_struct *timers = current->posix_timers; + int new_timer_id; + + /* Right now, we only support CLOCK_REALTIME for timers. */ + if (which_clock != CLOCK_REALTIME) return -EINVAL; + + new_timer = (struct k_itimer *)kmalloc(sizeof(*new_timer), GFP_KERNEL); + if (new_timer == NULL) return -EAGAIN; + + spin_lock_init(&new_timer->it_lock); + new_timer->it_clock = which_clock; + new_timer->it_incr = 0; + new_timer->it_overrun = 0; + + if (timer_event_spec) { + if (copy_from_user(&new_timer->it_signal, timer_event_spec, + sizeof(new_timer->it_signal))) { + error = -EFAULT; + goto out; + } + if (!good_sigevent(&new_timer->it_signal)) { + error = -EINVAL; + goto out; + } + } + else { + new_timer->it_signal.sigev_notify = SIGEV_SIGNAL; + new_timer->it_signal.sigev_signo = SIGALRM; + } + + new_timer->it_interval.tv_sec = 0; + new_timer->it_interval.tv_nsec = 0; + new_timer->it_process = current; + new_timer->it_timer.next = NULL; + new_timer->it_timer.prev = NULL; + new_timer->it_timer.expires = 0; + new_timer->it_timer.data = (unsigned long)new_timer; + new_timer->it_timer.function = posix_timer_fn; + + spin_lock(&timers->its_lock); + + new_timer_id = timer_find_slot(timers); + if (new_timer_id == -1) { + error = -EAGAIN; + goto out; + } + new_timer->it_id = new_timer_id; + timers->itimer[new_timer_id] = new_timer; + if (timer_event_spec == NULL) { + new_timer->it_signal.sigev_value.sival_int = new_timer_id; + } + + if (copy_to_user(created_timer_id, &new_timer_id, sizeof(new_timer_id))) { + error = -EFAULT; + timers->itimer[new_timer_id] = NULL; + } + + spin_unlock(&timers->its_lock); +out: + if (error) { + kfree(new_timer); + } + return error; +} + + +/* good_timespec + * + * This function checks the elements of a timespec structure. + * + * Arguments: + * ts : Pointer to the timespec structure to check + * + * Return value: + * If a NULL pointer was passed in, or the tv_nsec field was less than 0 or + * greater than NSEC_PER_SEC, or the tv_sec field was less than 0, this + * function returns 0. Otherwise it returns 1. + */ + +static int good_timespec(const struct timespec *ts) +{ + if (ts == NULL) return 0; + if (ts->tv_sec < 0) return 0; + if ((ts->tv_nsec < 0) || (ts->tv_nsec >= NSEC_PER_SEC)) return 0; + return 1; +} + +static inline struct k_itimer* lock_timer(struct task_struct *tsk, timer_t timer_id) +{ + struct k_itimer *timr; + + if ((timer_id < 0) || (timer_id >= MAX_ITIMERS)) return NULL; + spin_lock(&tsk->posix_timers->its_lock); + timr = tsk->posix_timers->itimer[timer_id]; + if (timr) spin_lock(&timr->it_lock); + spin_unlock(&tsk->posix_timers->its_lock); + return timr; +} + +static inline void unlock_timer(struct k_itimer *timr) +{ + spin_unlock(&timr->it_lock); +} + +/* Get the time remaining on a POSIX.1b interval timer. */ +static void do_timer_gettime(struct k_itimer *timr, + struct itimerspec *cur_setting) +{ + unsigned long expires = timr->it_timer.expires; + + if (expires) expires -= jiffies; + + jiffies_to_timespec(expires, &cur_setting->it_value); + cur_setting->it_interval = timr->it_interval; +} + +/* Get the time remaining on a POSIX.1b interval timer. */ +asmlinkage int sys_timer_gettime(timer_t timer_id, struct itimerspec *setting) +{ + struct k_itimer *timr; + struct itimerspec cur_setting; + + timr = lock_timer(current, timer_id); + if (!timr) return -EINVAL; + + do_timer_gettime(timr, &cur_setting); + + unlock_timer(timr); + + copy_to_user_ret(setting, &cur_setting, sizeof(cur_setting), -EFAULT); + + return 0; +} + +/* Get the number of overruns of a POSIX.1b interval timer */ +asmlinkage int sys_timer_getoverrun(timer_t timer_id) +{ + struct k_itimer *timr; + int overrun; + + timr = lock_timer(current, timer_id); + if (!timr) return -EINVAL; + + overrun = timr->it_overrun; + + unlock_timer(timr); + + return overrun; +} + +static void timer_value_abs_to_rel(struct timespec *val) +{ + struct timeval tv; + struct timespec ts; + + do_gettimeofday(&tv); + ts.tv_sec = tv.tv_sec; + ts.tv_nsec = tv.tv_usec * NSEC_PER_USEC; + + /* check whether the time lies in the past */ + if ((val->tv_sec < ts.tv_sec) || + ((val->tv_sec == ts.tv_sec) && + (val->tv_nsec <= ts.tv_nsec))) { + /* expire immediately */ + val->tv_sec = 0; + val->tv_nsec = 0; + } + else { + val->tv_sec -= ts.tv_sec; + val->tv_nsec -= ts.tv_nsec; + if (val->tv_nsec < 0) { + val->tv_nsec += NSEC_PER_SEC; + val->tv_sec--; + } + } +} + +/* Set a POSIX.1b interval timer. */ +static void do_timer_settime(struct k_itimer *timr, int flags, + struct itimerspec *new_setting, + struct itimerspec *old_setting) +{ + /* disable the timer */ + start_bh_atomic(); + del_timer(&timr->it_timer); + end_bh_atomic(); + + if (old_setting) { + do_timer_gettime(timr, old_setting); + } + + /* switch off the timer when it_value is zero */ + if ((new_setting->it_value.tv_sec == 0) && + (new_setting->it_value.tv_nsec == 0)) { + timr->it_incr = 0; + timr->it_timer.expires = 0; + timr->it_interval.tv_sec = 0; + timr->it_interval.tv_nsec = 0; + return; + } + + timr->it_incr = timespec_to_jiffies(&new_setting->it_interval); + /* save the interval rounded to jiffies */ + jiffies_to_timespec(timr->it_incr, &timr->it_interval); + + if (flags & TIMER_ABSTIME) { + timer_value_abs_to_rel(&new_setting->it_value); + } + + timr->it_timer.expires = timespec_to_jiffies(&new_setting->it_value) + jiffies; + + /* + * For some reason the timer does not fire immediately if expires is + * equal to jiffies, so the timer callback function is called directly. + */ + if (timr->it_timer.expires == jiffies) { + posix_timer_fn((unsigned long)timr); + } + else { + add_timer(&timr->it_timer); + } +} + + +/* Set a POSIX.1b interval timer */ +asmlinkage int sys_timer_settime(timer_t timer_id, int flags, + const struct itimerspec *new_setting, + struct itimerspec *old_setting) +{ + struct k_itimer *timr; + struct itimerspec new_spec, old_spec; + int error = 0; + + timr = lock_timer(current, timer_id); + if (!timr) return -EINVAL; + + if (new_setting == NULL) { + error = -EINVAL; + goto out; + } + + if (copy_from_user(&new_spec, new_setting, sizeof(new_spec))) { + error = -EFAULT; + goto out; + } + + if ((!good_timespec(&new_spec.it_interval)) || + (!good_timespec(&new_spec.it_value))) { + error = -EINVAL; + goto out; + } + + do_timer_settime(timr, flags, &new_spec, + old_setting ? &old_spec : NULL); + + if (old_setting) { + if (copy_to_user(old_setting, &old_spec, sizeof(old_spec))) { + error = -EFAULT; + } + } + +out: + unlock_timer(timr); + return error; +} + + +/* Delete a POSIX.1b interval timer. */ +asmlinkage int sys_timer_delete(timer_t timer_id) +{ + struct k_itimer *timr; + + timr = lock_timer(current, timer_id); + if (!timr) return -EINVAL; + + start_bh_atomic(); + del_timer(&timr->it_timer); + end_bh_atomic(); + + spin_lock(¤t->posix_timers->its_lock); + + kfree(timr); + current->posix_timers->itimer[timer_id] = NULL; + + spin_unlock(¤t->posix_timers->its_lock); + return 0; } diff --git a/kernel/sched.c b/kernel/sched.c index aed41e981afc..d5da20cb2e7d 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -790,16 +790,22 @@ still_running_back: */ { struct mm_struct *mm = next->mm; + struct mm_struct *oldmm = prev->active_mm; if (!mm) { - mm = prev->active_mm; - set_mmu_context(prev,next); if (next->active_mm) BUG(); - next->active_mm = mm; - atomic_inc(&mm->mm_count); + next->active_mm = oldmm; + atomic_inc(&oldmm->mm_count); + } else { + if (next->active_mm != mm) BUG(); + if (mm != oldmm) + switch_mm(oldmm, mm); } } - get_mmu_context(next); + /* + * This just switches the register state and the + * stack. + */ switch_to(prev, next, prev); __schedule_tail(prev); diff --git a/kernel/signal.c b/kernel/signal.c index 53ee45120823..4cd2f6f54c57 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -132,17 +132,7 @@ printk("SIG dequeue (%s:%d): %d ", current->comm, current->pid, /* Collect the siginfo appropriate to this signal. */ if (sig < SIGRTMIN) { - /* XXX: As an extension, support queueing exactly - one non-rt signal if SA_SIGINFO is set, so that - we can get more detailed information about the - cause of the signal. */ - /* Deciding not to init these couple of fields is - more expensive that just initializing them. */ - info->si_signo = sig; - info->si_errno = 0; - info->si_code = 0; - info->si_pid = 0; - info->si_uid = 0; + *info = current->nrt_info[sig]; } else { struct signal_queue *q, **pp; pp = ¤t->sigqueue; @@ -185,8 +175,6 @@ printk("SIG dequeue (%s:%d): %d ", current->comm, current->pid, sigdelset(¤t->signal, sig); recalc_sigpending(current); - /* XXX: Once POSIX.1b timers are in, if si_code == SI_TIMER, - we need to xchg out the timer overrun values. */ } else { /* XXX: Once CLONE_PID is in to join those "threads" that are part of the same "process", look for signals sent to the @@ -247,6 +235,29 @@ static int ignored_signal(int sig, struct task_struct *t) return 1; } +static void set_siginfo(siginfo_t *dst, const siginfo_t *src, int sig) +{ + switch ((unsigned long)src) { + case 0: + dst->si_signo = sig; + dst->si_errno = 0; + dst->si_code = SI_USER; + dst->si_pid = current->pid; + dst->si_uid = current->uid; + break; + case 1: + dst->si_signo = sig; + dst->si_errno = 0; + dst->si_code = SI_KERNEL; + dst->si_pid = 0; + dst->si_uid = 0; + break; + default: + *dst = *src; + break; + } +} + int send_sig_info(int sig, struct siginfo *info, struct task_struct *t) { @@ -306,12 +317,10 @@ printk("SIG queue (%s:%d): %d ", t->comm, t->pid, sig); if (sig < SIGRTMIN) { /* Non-real-time signals are not queued. */ - /* XXX: As an extension, support queueing exactly one - non-rt signal if SA_SIGINFO is set, so that we can - get more detailed information about the cause of - the signal. */ if (sigismember(&t->signal, sig)) goto out; + set_siginfo(&t->nrt_info[sig], info, sig); + } else { /* Real-time signals must be queued if sent by sigqueue, or some other real-time mechanism. It is implementation @@ -323,6 +332,21 @@ printk("SIG queue (%s:%d): %d ", t->comm, t->pid, sig); struct signal_queue *q = 0; + /* In case of a POSIX timer generated signal you must check + if a signal from this timer is already in the queue */ + if (info && (info->si_code == SI_TIMER)) { + for (q = t->sigqueue; q; q = q->next) { + if ((q->info.si_code == SI_TIMER) && + (q->info.si_timer1 == info->si_timer1)) { + /* this special value (1) is recognized + only by posix_timer_fn() in + itimer.c */ + ret = 1; + goto out; + } + } + } + if (atomic_read(&nr_queued_signals) < max_queued_signals) { q = (struct signal_queue *) kmem_cache_alloc(signal_queue_cachep, GFP_ATOMIC); @@ -333,25 +357,7 @@ printk("SIG queue (%s:%d): %d ", t->comm, t->pid, sig); q->next = NULL; *t->sigqueue_tail = q; t->sigqueue_tail = &q->next; - switch ((unsigned long) info) { - case 0: - q->info.si_signo = sig; - q->info.si_errno = 0; - q->info.si_code = SI_USER; - q->info.si_pid = current->pid; - q->info.si_uid = current->uid; - break; - case 1: - q->info.si_signo = sig; - q->info.si_errno = 0; - q->info.si_code = SI_KERNEL; - q->info.si_pid = 0; - q->info.si_uid = 0; - break; - default: - q->info = *info; - break; - } + set_siginfo(&q->info, info, sig); } else { /* If this was sent by a rt mechanism, try again. */ if (info->si_code < 0) { diff --git a/kernel/time.c b/kernel/time.c index 911442dad460..9f778fd4cdb7 100644 --- a/kernel/time.c +++ b/kernel/time.c @@ -414,3 +414,57 @@ asmlinkage int sys_adjtimex(struct timex *txc_p) ret = do_adjtimex(&txc); return copy_to_user(txc_p, &txc, sizeof(struct timex)) ? -EFAULT : ret; } + + +/* POSIX.1b clock functions */ + +asmlinkage int sys_clock_gettime(clockid_t clock_id, struct timespec *ts) +{ + struct timespec kts; + struct timeval ktv; + + if (clock_id != CLOCK_REALTIME) return -EINVAL; + + if (ts == NULL) return 0; + + do_gettimeofday(&ktv); + kts.tv_sec = ktv.tv_sec; + kts.tv_nsec = ktv.tv_usec * NSEC_PER_USEC; + if (copy_to_user(ts, &kts, sizeof(kts))) return -EFAULT; + + return 0; +} + + +asmlinkage int sys_clock_settime(clockid_t clock_id, + const struct timespec *ts) +{ + struct timespec new_ts; + struct timeval tv; + + if (clock_id != CLOCK_REALTIME) return -EINVAL; + + if (ts == NULL) return 0; + + if (copy_from_user(&new_ts, ts, sizeof(*ts))) return -EFAULT; + tv.tv_sec = new_ts.tv_sec; + tv.tv_usec = new_ts.tv_nsec / NSEC_PER_USEC; + return do_sys_settimeofday(&tv, NULL); +} + + +asmlinkage int sys_clock_getres(clockid_t clock_id, + struct timespec *res) +{ + struct timespec kres; + + if (clock_id != CLOCK_REALTIME) return -EINVAL; + + if (res == NULL) return 0; + + kres.tv_sec = 0; + kres.tv_nsec = NSEC_PER_SEC / HZ; + if (copy_to_user(res, &kres, sizeof(kres))) return -EFAULT; + + return 0; +} -- 2.39.5