From 1b6d23c1c7f9952a599be0601f73d84d384dc5da Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Fri, 23 Nov 2007 15:15:17 -0500 Subject: [PATCH] Linux 2.1.99pre1 a cleanup of my previous patches wrt irq handling, and also fixes a real bug (we used to ACK the io-apic outside the irq-controller lock, which meant that the ack's we did and "ipi_pending[]" might have gotten out of sync - which could certainly have resulted in bad behaviour). This also re-enables the code that replays interrupts in enable_irq(), because it should be ok now that the rest of the code is cleaned up. People that had the earlier problem with locking up with floppies, please test: if this re-introduces the lockup, please just #if 0 out all the code inside trigger_pending_irqs(), and send me a note telling me that that code still doesn't work. Linus --- Documentation/kmod.txt | 14 +-- Documentation/sysctl/kernel.txt | 1 - Makefile | 2 +- arch/alpha/lib/copy_user.S | 53 ++++++--- arch/i386/kernel/irq.c | 154 +++++++++++++------------- arch/i386/mm/init.c | 1 - drivers/block/floppy.c | 2 - drivers/net/arcnet.c | 24 +++-- drivers/scsi/BusLogic.c | 12 ++- drivers/scsi/BusLogic.h | 2 - drivers/scsi/scsi_ioctl.c | 5 +- drivers/scsi/sg.c | 7 ++ fs/binfmt_elf.c | 7 +- fs/dcache.c | 42 ++------ fs/namei.c | 2 - include/linux/sysctl.h | 1 - include/scsi/scsi_ioctl.h | 8 +- init/main.c | 7 -- kernel/kmod.c | 185 ++++++++++++-------------------- kernel/sysctl.c | 3 - mm/page_alloc.c | 17 +-- mm/vmscan.c | 5 +- 22 files changed, 256 insertions(+), 298 deletions(-) diff --git a/Documentation/kmod.txt b/Documentation/kmod.txt index 3deeeaed878c..87a03429f9b3 100644 --- a/Documentation/kmod.txt +++ b/Documentation/kmod.txt @@ -4,19 +4,19 @@ Kirk Petersen Kmod is a simple replacement for kerneld. It consists of a request_module() replacement and a kernel thread called kmod. When the kernel requests a module, the kmod wakes up and execve()s modprobe, -passing it the name that was requested. After a configurable period of -time, kmod will have delete_module() remove any unused modules. +passing it the name that was requested. -Kmod is configurable through two entries in /proc/sys/kernel. You can -set the path of modprobe (where the kernel looks for it) by doing: +If you have the /proc filesystem mounted, you can set the path of +modprobe (where the kernel looks for it) by doing: echo "/sbin/modprobe" > /proc/sys/kernel/modprobe -To tell kmod when to unload unused modules, do something like: +To periodically unload unused modules, put something like the following +in root's crontab entry: - echo "120" > /proc/sys/kernel/kmod_unload_delay + 0-59/5 * * * * /sbin/rmmod -a -Kmod only loads and unloads modules. Kerneld could do more (although +Kmod only loads modules. Kerneld could do more (although nothing in the standard kernel used its other features). If you require features such as request_route, we suggest that you take a similar approach. A simple request_route function could be called, diff --git a/Documentation/sysctl/kernel.txt b/Documentation/sysctl/kernel.txt index c33df7e7bf89..15615f36481a 100644 --- a/Documentation/sysctl/kernel.txt +++ b/Documentation/sysctl/kernel.txt @@ -25,7 +25,6 @@ Currently, these files are in /proc/sys/kernel: - inode-max - inode-nr - inode-state -- kmod_unload_delay ==> Documentation/kmod.txt - modprobe ==> Documentation/kmod.txt - osrelease - ostype diff --git a/Makefile b/Makefile index bcacd9c2185c..1586e862d0f0 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ VERSION = 2 PATCHLEVEL = 1 -SUBLEVEL = 98 +SUBLEVEL = 99 ARCH := $(shell uname -m | sed -e s/i.86/i386/ -e s/sun4u/sparc64/) diff --git a/arch/alpha/lib/copy_user.S b/arch/alpha/lib/copy_user.S index da57fd6d1ac7..aa309b9f5245 100644 --- a/arch/alpha/lib/copy_user.S +++ b/arch/alpha/lib/copy_user.S @@ -27,11 +27,18 @@ */ /* Allow an exception for an insn; exit if we get one. */ -#define EX(x,y...) \ +#define EXI(x,y...) \ 99: x,##y; \ .section __ex_table,"a"; \ .gprel32 99b; \ - lda $31, $exit-99b($31); \ + lda $31, $exitin-99b($31); \ + .previous + +#define EXO(x,y...) \ + 99: x,##y; \ + .section __ex_table,"a"; \ + .gprel32 99b; \ + lda $31, $exitout-99b($31); \ .previous .set noat @@ -45,14 +52,14 @@ __copy_user: subq $3,8,$3 .align 5 $37: - EX( ldq_u $1,0($7) ) - EX( ldq_u $2,0($6) ) + EXI( ldq_u $1,0($7) ) + EXO( ldq_u $2,0($6) ) extbl $1,$7,$1 mskbl $2,$6,$2 insbl $1,$6,$1 addq $3,1,$3 bis $1,$2,$1 - EX( stq_u $1,0($6) ) + EXO( stq_u $1,0($6) ) subq $0,1,$0 addq $6,1,$6 addq $7,1,$7 @@ -63,10 +70,10 @@ $36: bic $0,7,$4 beq $1,$43 beq $4,$48 - EX( ldq_u $3,0($7) ) + EXI( ldq_u $3,0($7) ) .align 5 $50: - EX( ldq_u $2,8($7) ) + EXI( ldq_u $2,8($7) ) subq $4,8,$4 extql $3,$7,$3 extqh $2,$7,$1 @@ -81,13 +88,13 @@ $48: beq $0,$41 .align 5 $57: - EX( ldq_u $1,0($7) ) - EX( ldq_u $2,0($6) ) + EXI( ldq_u $1,0($7) ) + EXO( ldq_u $2,0($6) ) extbl $1,$7,$1 mskbl $2,$6,$2 insbl $1,$6,$1 bis $1,$2,$1 - EX( stq_u $1,0($6) ) + EXO( stq_u $1,0($6) ) subq $0,1,$0 addq $6,1,$6 addq $7,1,$7 @@ -98,7 +105,7 @@ $43: beq $4,$65 .align 5 $66: - EX( ldq $1,0($7) ) + EXI( ldq $1,0($7) ) subq $4,8,$4 stq $1,0($6) addq $7,8,$7 @@ -107,15 +114,31 @@ $66: bne $4,$66 $65: beq $0,$41 - EX( ldq $2,0($7) ) - EX( ldq $1,0($6) ) + EXI( ldq $2,0($7) ) + EXO( ldq $1,0($6) ) mskql $2,$0,$2 mskqh $1,$0,$1 bis $2,$1,$2 - EX( stq $2,0($6) ) + EXO( stq $2,0($6) ) bis $31,$31,$0 $41: $35: -$exit: +$exitout: ret $31,($28),1 + +$exitin: + /* A stupid byte-by-byte zeroing of the rest of the output + buffer. This cures security holes by never leaving + random kernel data around to be copied elsewhere. */ + + mov $0,$1 +$101: + EXO ( ldq_u $2,0($6) ) + subq $1,1,$1 + mskbl $2,$6,$2 + EXO ( stq_u $2,0($6) ) + addq $6,1,$6 + bgt $1,$101 + ret $31,($28),1 + .end __copy_user diff --git a/arch/i386/kernel/irq.c b/arch/i386/kernel/irq.c index 40af4f37ff85..75527666f2cc 100644 --- a/arch/i386/kernel/irq.c +++ b/arch/i386/kernel/irq.c @@ -70,9 +70,6 @@ spinlock_t irq_controller_lock; static unsigned int irq_events [NR_IRQS] = { -1, }; static int disabled_irq [NR_IRQS] = { 0, }; -#ifdef __SMP__ -static int ipi_pending [NR_IRQS] = { 0, }; -#endif /* * Not all IRQs can be routed through the IO-APIC, eg. on certain (older) @@ -651,18 +648,6 @@ static int handle_IRQ_event(unsigned int irq, struct pt_regs * regs) return status; } - -void disable_irq(unsigned int irq) -{ - unsigned long flags; - - spin_lock_irqsave(&irq_controller_lock, flags); - irq_handles[irq]->disable(irq); - spin_unlock_irqrestore(&irq_controller_lock, flags); - - synchronize_irq(); -} - /* * disable/enable_irq() wait for all irq contexts to finish * executing. Also it's recursive. @@ -673,55 +658,10 @@ static void disable_8259A_irq(unsigned int irq) set_8259A_irq_mask(irq); } -#ifdef __SMP__ -static void disable_ioapic_irq(unsigned int irq) -{ - disabled_irq[irq] = 1; - /* - * We do not disable IO-APIC irqs in hardware ... - */ -} -#endif - void enable_8259A_irq (unsigned int irq) { - unsigned long flags; - spin_lock_irqsave(&irq_controller_lock, flags); cached_irq_mask &= ~(1 << irq); set_8259A_irq_mask(irq); - spin_unlock_irqrestore(&irq_controller_lock, flags); -} - -#ifdef __SMP__ -void enable_ioapic_irq (unsigned int irq) -{ - unsigned long flags, should_handle_irq; - int cpu = smp_processor_id(); - - spin_lock_irqsave(&irq_controller_lock, flags); - disabled_irq[irq] = 0; - - /* - * In the SMP+IOAPIC case it might happen that there are an unspecified - * number of pending IRQ events unhandled. These cases are very rare, - * so we 'resend' these IRQs via IPIs, to the same CPU. It's much - * better to do it this way as thus we dont have to be aware of - * 'pending' interrupts in the IRQ path, except at this point. - */ - if (irq_events[irq]) { - if (!ipi_pending[irq]) { - ipi_pending[irq] = 1; - --irq_events[irq]; - send_IPI(cpu,IO_APIC_VECTOR(irq)); - } - } - spin_unlock_irqrestore(&irq_controller_lock, flags); -} -#endif - -void enable_irq(unsigned int irq) -{ - irq_handles[irq]->enable(irq); } void make_8259A_irq (unsigned int irq) @@ -771,41 +711,107 @@ static void do_8259A_IRQ(unsigned int irq, int cpu, struct pt_regs * regs) } #ifdef __SMP__ + +static int ipi_pending [NR_IRQS] = { 0, }; + +/* + * In the SMP+IOAPIC case it might happen that there are an unspecified + * number of pending IRQ events unhandled. These cases are very rare, + * so we 'resend' these IRQs via IPIs, to the same CPU. It's much + * better to do it this way as thus we dont have to be aware of + * 'pending' interrupts in the IRQ path, except at this point. + */ +static inline void trigger_pending_irqs(unsigned int irq) +{ + if (irq_events[irq] && !ipi_pending[irq]) { + ipi_pending[irq] = 1; + send_IPI(smp_processor_id(), IO_APIC_VECTOR(irq)); + } +} + +void enable_ioapic_irq (unsigned int irq) +{ + disabled_irq[irq] = 0; + trigger_pending_irqs(irq); +} + +/* + * We do not actually disable IO-APIC irqs in hardware ... + */ +static void disable_ioapic_irq(unsigned int irq) +{ + disabled_irq[irq] = 1; +} + static void do_ioapic_IRQ(unsigned int irq, int cpu, struct pt_regs * regs) { - int should_handle_irq = 0; + spin_lock(&irq_controller_lock); + /* Ack the irq inside the lock! */ ack_APIC_irq(); + ipi_pending[irq] = 0; - spin_lock(&irq_controller_lock); - if (ipi_pending[irq]) - ipi_pending[irq] = 0; + /* If the irq is disabled, just set a flag and return */ + if (disabled_irq[irq]) { + irq_events[irq] = 1; + spin_unlock(&irq_controller_lock); + return; + } - if (!irq_events[irq]++ && !disabled_irq[irq]) - should_handle_irq = 1; + disabled_irq[irq] = 1; + irq_events[irq] = 0; hardirq_enter(cpu); spin_unlock(&irq_controller_lock); - if (should_handle_irq) { - while (test_bit(0,&global_irq_lock)) mb(); -again: + while (test_bit(0,&global_irq_lock)) barrier(); + + for (;;) { + int pending; + handle_IRQ_event(irq, regs); spin_lock(&irq_controller_lock); - should_handle_irq=0; - if (--irq_events[irq] && !disabled_irq[irq]) - should_handle_irq=1; + pending = irq_events[irq]; + irq_events[irq] = 0; + disabled_irq[irq] = pending; spin_unlock(&irq_controller_lock); - if (should_handle_irq) - goto again; + if (!pending) + break; } hardirq_exit(cpu); release_irqlock(cpu); } + #endif + +/* + * Generic enable/disable code: this just calls + * down into the PIC-specific version after having + * gotten the irq controller lock. + */ +void disable_irq(unsigned int irq) +{ + unsigned long flags; + + spin_lock_irqsave(&irq_controller_lock, flags); + irq_handles[irq]->disable(irq); + spin_unlock_irqrestore(&irq_controller_lock, flags); + + synchronize_irq(); +} + +void enable_irq(unsigned int irq) +{ + unsigned long flags; + + spin_lock_irqsave(&irq_controller_lock, flags); + irq_handles[irq]->enable(irq); + spin_unlock_irqrestore(&irq_controller_lock, flags); +} + /* * do_IRQ handles all normal device IRQ's (the special * SMP cross-CPU interrupts have their own specific diff --git a/arch/i386/mm/init.c b/arch/i386/mm/init.c index b96a4bc2f195..caa366e3a354 100644 --- a/arch/i386/mm/init.c +++ b/arch/i386/mm/init.c @@ -90,7 +90,6 @@ void show_mem(void) shared += atomic_read(&mem_map[i].count) - 1; } printk("%d pages of RAM\n",total); - printk("%d free pages\n",free); printk("%d reserved pages\n",reserved); printk("%d pages shared\n",shared); printk("%d pages swap cached\n",cached); diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c index 1aa24b66ef55..989da1497835 100644 --- a/drivers/block/floppy.c +++ b/drivers/block/floppy.c @@ -4211,7 +4211,6 @@ static int floppy_grab_irq_and_dma(void) if (FDCS->address != -1) fd_outb(FDCS->dor, FD_DOR); fdc = 0; - fd_enable_irq(); return 0; } @@ -4235,7 +4234,6 @@ static void floppy_release_irq_and_dma(void) INT_ON; fd_disable_dma(); fd_free_dma(); - fd_disable_irq(); fd_free_irq(); set_dor(0, ~0, 8); diff --git a/drivers/net/arcnet.c b/drivers/net/arcnet.c index 8a731a6e35aa..16795257ec1b 100644 --- a/drivers/net/arcnet.c +++ b/drivers/net/arcnet.c @@ -18,6 +18,10 @@ ********************** + v3.01 (98/04/17) + - Interrupt handler now also checks dev->[se]dev are non-NULL + to avoid crashes in interrupts during card init. [dw] + v3.00 (97/11/09) - Minor cleanup of debugging messages. [mj] @@ -41,10 +45,10 @@ v2.80 ALPHA (97/08/01) - Split source into multiple files; generic arcnet support and - individual chipset drivers. + individual chipset drivers. - v2.61 ALPHA (97/07/30) by David Woodhouse (dwmw2@cam.ac.uk) for - Nortel (Northern Telecom). + v2.61 ALPHA (97/07/30) by David Woodhouse (Dave@imladris.demon.co.uk) + for Nortel (Northern Telecom). - Added support for IO-mapped modes and for SMC COM20020 chipset. - Fixed (avoided) race condition in send_packet routines which was discovered when the buffer copy routines got slow (?). @@ -170,7 +174,7 @@ */ static const char *version = - "arcnet.c: v3.00 97/11/09 Avery Pennarun et al.\n"; + "arcnet.c: v3.01 98/04/24 Avery Pennarun et al.\n"; #include #include @@ -956,20 +960,24 @@ arcnet_interrupt(int irq,void *dev_id,struct pt_regs *regs) return; /* don't even try. */ } #ifdef CONFIG_ARCNET_1051 - lp->sdev->interrupt=1; + if (lp->sdev) + lp->sdev->interrupt=1; #endif #ifdef CONFIG_ARCNET_ETH - lp->edev->interrupt=1; + if (lp->edev) + lp->edev->interrupt=1; #endif /* Call the "real" interrupt handler. */ (*lp->inthandler)(dev); #ifdef CONFIG_ARCNET_ETH - lp->edev->interrupt=0; + if (lp->edev) + lp->edev->interrupt=0; #endif #ifdef CONFIG_ARCNET_1051 - lp->sdev->interrupt=0; + if (lp->sdev) + lp->sdev->interrupt=0; #endif if (!test_and_clear_bit(0, (int *)&dev->interrupt)) BUGMSG(D_NORMAL, "Someone cleared our dev->interrupt flag!\n"); diff --git a/drivers/scsi/BusLogic.c b/drivers/scsi/BusLogic.c index 77292e1b69c9..28f18d7ddae9 100644 --- a/drivers/scsi/BusLogic.c +++ b/drivers/scsi/BusLogic.c @@ -34,6 +34,7 @@ #include #include #include +#include #include #include #include @@ -1204,10 +1205,13 @@ static boolean BusLogic_Failure(BusLogic_HostAdapter_T *HostAdapter, { BusLogic_AnnounceDriver(HostAdapter); if (HostAdapter->HostAdapterBusType == BusLogic_PCI_Bus) - BusLogic_Error("While configuring BusLogic PCI Host Adapter at\n" - "Bus %d Device %d I/O Address 0x%X PCI Address 0x%X:\n", - HostAdapter, HostAdapter->Bus, HostAdapter->Device, - HostAdapter->IO_Address, HostAdapter->PCI_Address); + { + BusLogic_Error("While configuring BusLogic PCI Host Adapter at\n", + HostAdapter); + BusLogic_Error("Bus %d Device %d I/O Address 0x%X PCI Address 0x%X:\n", + HostAdapter, HostAdapter->Bus, HostAdapter->Device, + HostAdapter->IO_Address, HostAdapter->PCI_Address); + } else BusLogic_Error("While configuring BusLogic Host Adapter at " "I/O Address 0x%X:\n", HostAdapter, HostAdapter->IO_Address); diff --git a/drivers/scsi/BusLogic.h b/drivers/scsi/BusLogic.h index 820f6cb81a62..654550aedae3 100644 --- a/drivers/scsi/BusLogic.h +++ b/drivers/scsi/BusLogic.h @@ -1532,7 +1532,6 @@ static inline void BusLogic_AcquireHostAdapterLockIH(BusLogic_HostAdapter_T *HostAdapter, ProcessorFlags_T *ProcessorFlags) { - extern spinlock_t io_request_lock; spin_lock_irqsave(&io_request_lock, *ProcessorFlags); } @@ -1546,7 +1545,6 @@ static inline void BusLogic_ReleaseHostAdapterLockIH(BusLogic_HostAdapter_T *HostAdapter, ProcessorFlags_T *ProcessorFlags) { - extern spinlock_t io_request_lock; spin_unlock_irqrestore(&io_request_lock, *ProcessorFlags); } diff --git a/drivers/scsi/scsi_ioctl.c b/drivers/scsi/scsi_ioctl.c index 8f6aff9850d1..a052caadfe9b 100644 --- a/drivers/scsi/scsi_ioctl.c +++ b/drivers/scsi/scsi_ioctl.c @@ -167,7 +167,7 @@ static int ioctl_internal_command(Scsi_Device *dev, char * cmd) * interface instead, as this is a more flexible approach to performing * generic SCSI commands on a device. */ -static int ioctl_command(Scsi_Device *dev, Scsi_Ioctl_Command *sic) +int scsi_ioctl_send_command(Scsi_Device *dev, Scsi_Ioctl_Command *sic) { unsigned long flags; char * buf; @@ -387,7 +387,8 @@ int scsi_ioctl (Scsi_Device *dev, int cmd, void *arg) return ioctl_probe(dev->host, arg); case SCSI_IOCTL_SEND_COMMAND: if(!suser()) return -EACCES; - return ioctl_command((Scsi_Device *) dev, (Scsi_Ioctl_Command *) arg); + return scsi_ioctl_send_command((Scsi_Device *) dev, + (Scsi_Ioctl_Command *) arg); case SCSI_IOCTL_DOORLOCK: if (!dev->removable || !dev->lockable) return 0; scsi_cmd[0] = ALLOW_MEDIUM_REMOVAL; diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c index f50cc96c2b90..4232a56a5845 100644 --- a/drivers/scsi/sg.c +++ b/drivers/scsi/sg.c @@ -97,6 +97,13 @@ static int sg_ioctl(struct inode * inode,struct file * file, return scsi_generics[dev].timeout; case SG_EMULATED_HOST: return put_user(scsi_generics[dev].device->host->hostt->emulated, (int *) arg); + case SCSI_IOCTL_SEND_COMMAND: + /* + Allow SCSI_IOCTL_SEND_COMMAND without checking suser() since the + user already has read/write access to the generic device and so + can execute arbitrary SCSI commands. + */ + return scsi_ioctl_send_command(scsi_generics[dev].device, (void *) arg); default: return scsi_ioctl(scsi_generics[dev].device, cmd_in, (void *) arg); } diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index 8324b4a2d9b3..342eb1ab5785 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -517,9 +517,10 @@ do_load_elf_binary(struct linux_binprm * bprm, struct pt_regs * regs) retval = PTR_ERR(interpreter_dentry); if (IS_ERR(interpreter_dentry)) goto out_free_interp; - - retval = read_exec(interpreter_dentry, 0, bprm->buf, - 128, 1); + retval = permission(interpreter_dentry->d_inode, MAY_EXEC); + if (retval < 0) + goto out_free_dentry; + retval = read_exec(interpreter_dentry, 0, bprm->buf, 128, 1); if (retval < 0) goto out_free_dentry; diff --git a/fs/dcache.c b/fs/dcache.c index 728c8ed1c602..58c6479c91b0 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -426,42 +426,16 @@ void shrink_dcache_parent(struct dentry * parent) } /* - * This is called from do_try_to_free_page() to indicate - * that we should reduce the dcache and inode cache memory. + * This is called from kswapd when we think we need some + * more memory, but aren't really sure how much. So we + * carefully try to free a _bit_ of our dcache, but not + * too much. */ -void shrink_dcache_memory() +void shrink_dcache_memory(void) { - dentry_stat.want_pages++; -} - -/* - * This carries out the request received by the above routine. - */ -void check_dcache_memory() -{ - if (dentry_stat.want_pages) { - unsigned int count, goal = 0; - /* - * Set the page goal. We don't necessarily need to trim - * the dcache just because the system needs memory ... - */ - if (page_cache_size > (num_physpages >> 1)) - goal = (dentry_stat.want_pages * page_cache_size) - / num_physpages; - dentry_stat.want_pages = 0; - if (goal) { - if (goal > 50) - goal = 50; - count = select_dcache(32, goal); -#ifdef DCACHE_DEBUG -printk(KERN_DEBUG "check_dcache_memory: goal=%d, count=%d\n", goal, count); -#endif - if (count) { - prune_dcache(count); - free_inode_memory(count); - } - } - } + int count = select_dcache(32, 8); + if (count) + prune_dcache(count); } #define NAME_ALLOC_LEN(len) ((len+16) & ~15) diff --git a/fs/namei.c b/fs/namei.c index 519a8ceac00c..a6de99eadfa3 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -454,7 +454,6 @@ struct dentry * __namei(const char *pathname, int follow_link) char *name; struct dentry *dentry; - check_dcache_memory(); name = getname(pathname); dentry = (struct dentry *) name; if (!IS_ERR(name)) { @@ -528,7 +527,6 @@ struct dentry * open_namei(const char * pathname, int flag, int mode) struct inode *inode; struct dentry *dentry; - check_dcache_memory(); mode &= S_IALLUGO & ~current->fs->umask; mode |= S_IFREG; diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h index 2df16d607bac..11552a24d417 100644 --- a/include/linux/sysctl.h +++ b/include/linux/sysctl.h @@ -72,7 +72,6 @@ enum KERN_STATINODE, KERN_DENTRY, /* dentry statistics */ KERN_MODPROBE, - KERN_KMOD_UNLOAD_DELAY }; diff --git a/include/scsi/scsi_ioctl.h b/include/scsi/scsi_ioctl.h index 57f439dfbc3c..97e20a98a5ed 100644 --- a/include/scsi/scsi_ioctl.h +++ b/include/scsi/scsi_ioctl.h @@ -15,9 +15,6 @@ #ifdef __KERNEL__ -extern int scsi_ioctl (Scsi_Device *dev, int cmd, void *arg); -extern int kernel_scsi_ioctl (Scsi_Device *dev, int cmd, void *arg); - /* * Structures used for scsi_ioctl et al. */ @@ -33,6 +30,11 @@ typedef struct scsi_idlun { __u32 host_unique_id; } Scsi_Idlun; +extern int scsi_ioctl (Scsi_Device *dev, int cmd, void *arg); +extern int kernel_scsi_ioctl (Scsi_Device *dev, int cmd, void *arg); +extern int scsi_ioctl_send_command(Scsi_Device *dev, + Scsi_Ioctl_Command *arg); + #endif #endif diff --git a/init/main.c b/init/main.c index de8dae6df10b..2230ea2d031d 100644 --- a/init/main.c +++ b/init/main.c @@ -1162,13 +1162,6 @@ static int init(void * unused) smp_begin(); #endif -#ifdef CONFIG_KMOD - { - extern int kmod_init(void); - kmod_init(); - } -#endif - #ifdef CONFIG_UMSDOS_FS { /* diff --git a/kernel/kmod.c b/kernel/kmod.c index c53c09f5bc28..7c57b1af51db 100644 --- a/kernel/kmod.c +++ b/kernel/kmod.c @@ -1,6 +1,12 @@ /* kmod, the new module loader (replaces kerneld) Kirk Petersen + + Reorganized not to be a daemon by Adam Richter, with guidance + from Greg Zornetzer. + + Modified to avoid chroot and file sharing problems. + Mikael Pettersson */ #define __KERNEL_SYSCALLS__ @@ -8,147 +14,88 @@ #include #include #include +#include +#include /* - kmod_unload_delay and modprobe_path are set via /proc/sys. + modprobe_path is set via /proc/sys. */ -int kmod_unload_delay = 60; char modprobe_path[256] = "/sbin/modprobe"; -static char module_name[64] = ""; -static char * argv[] = { modprobe_path, "-s", "-k", module_name, NULL }; static char * envp[] = { "HOME=/", "TERM=linux", "PATH=/usr/bin:/bin", NULL }; /* - kmod_queue synchronizes the kmod thread and the rest of the system - kmod_unload_timer is what we use to unload modules - after kmod_unload_delay seconds + exec_modprobe is spawned from a kernel-mode user process, + then changes its state to behave _as_if_ it was spawned + from the kernel's init process + (ppid and {e,}gid are not adjusted, but that shouldn't + be a problem since we trust modprobe) */ -static struct wait_queue * kmod_queue = NULL; -static struct timer_list kmod_unload_timer; - -/* - It is not easy to implement a full fork in kernel-space on some - systems (Alpha), and it is not necessary for us here. This is - a new thread that does the exec. -*/ -static int kmod_exec_modprobe(void * data) -{ - sigemptyset(¤t->blocked); - execve(modprobe_path, argv, envp); - printk(KERN_ERR "kmod: failed to load module %s\n", module_name); - return 0; -} +#define task_init task[smp_num_cpus] -/* - kmod_thread is the thread that does most of the work. kmod_unload and - request_module tell it to wake up and do work. -*/ -static int kmod_thread(void * data) -{ - int pid; +static inline void +use_init_file_context(void) { + lock_kernel(); - /* - Initialize basic thread information - */ - current->session = 1; - current->pgrp = 1; - sprintf(current->comm, "kmod"); - sigfillset(¤t->blocked); - - /* - This is the main kmod_thread loop. It first sleeps, then - handles requests from request_module or kmod_unload. - */ + /* don't use the user's root, use init's root instead */ + exit_fs(current); /* current->fs->count--; */ + current->fs = task_init->fs; + current->fs->count++; - while (1) { - interruptible_sleep_on(&kmod_queue); - - /* - If request_module woke us up, we should try to - load module_name. If not, kmod_unload woke us up, - do call delete_module. - (if somehow both want us to do something, ignore the - delete_module request) - */ - if (module_name[0] == '\0') { - delete_module(NULL); - } else { - pid = kernel_thread(kmod_exec_modprobe, NULL, SIGCHLD); - if (pid > 0) { - waitpid(pid, NULL, 0); - module_name[0] = '\0'; - wake_up(&kmod_queue); - } else { - printk(KERN_ERR "kmod: fork failed, errno %d\n", -pid); - } - } - } + /* don't use the user's files, use init's files instead */ + exit_files(current); /* current->files->count--; */ + current->files = task_init->files; + current->files->count++; - return 0; /* Never reached. */ + unlock_kernel(); } -/* - kmod_unload is the function that the kernel calls when - the kmod_unload_timer expires -*/ -void kmod_unload(unsigned long x) +static int exec_modprobe(void * module_name) { - /* - wake up the kmod thread, which does the work - (we can't call delete_module, as it locks the kernel and - we are in the bottom half of the kernel (right?)) - once it is awake, reset the timer - */ - wake_up(&kmod_queue); - kmod_unload_timer.expires = jiffies + (kmod_unload_delay * HZ); - add_timer(&kmod_unload_timer); -} + char *argv[] = { modprobe_path, "-s", "-k", (char*)module_name, NULL}; -int kmod_init(void) -{ - printk("Starting kmod\n"); - - /* - * CLONE_FS means that our "cwd" will follow that of init. - * CLONE_FILES just saves some space (we don't need any - * new file descriptors). Ditto for CLONE_SIGHAND. - */ - kernel_thread(kmod_thread, NULL, CLONE_FILES | CLONE_FS | CLONE_SIGHAND); - - kmod_unload_timer.next = NULL; - kmod_unload_timer.prev = NULL; - kmod_unload_timer.expires = jiffies + (5 * 60 * HZ); - kmod_unload_timer.data = 0L; - kmod_unload_timer.function = kmod_unload; - add_timer(&kmod_unload_timer); + use_init_file_context(); + /* Prevent parent user process from sending signals to child. + Otherwise, if the modprobe program does not exist, it might + be possible to get a user defined signal handler to execute + as the super user right after the execve fails if you time + the signal just right. + */ + spin_lock_irq(¤t->sigmask_lock); + flush_signals(current); + flush_signal_handlers(current); + spin_unlock_irq(¤t->sigmask_lock); + + set_fs(KERNEL_DS); /* Allow execve args to be in kernel space. */ + current->uid = current->euid = 0; + if (execve(modprobe_path, argv, envp) < 0) { + printk(KERN_ERR + "kmod: failed to exec %s -s -k %s, errno = %d\n", + modprobe_path, (char*) module_name, errno); + return -errno; + } return 0; } /* - request_module, the function that everyone calls when they need a - module to be loaded + request_module: the function that everyone calls when they need + a module. */ -int request_module(const char * name) +int request_module(const char * module_name) { - /* first, copy the name of the module into module_name */ - /* then wake_up() the kmod daemon */ - /* wait for the kmod daemon to finish (it will wake us up) */ - - /* - kmod_thread is sleeping, so start by copying the name of - the module into module_name. Once that is done, wake up - kmod_thread. - */ - strncpy(module_name, name, sizeof(module_name)); - module_name[sizeof(module_name)-1] = '\0'; - wake_up(&kmod_queue); - - /* - Now that we have told kmod_thread what to do, we want to - go to sleep and let it do its work. It will wake us up, - at which point we will be done (the module will be loaded). - */ - interruptible_sleep_on(&kmod_queue); + int pid; + int waitpid_result; + + pid = kernel_thread(exec_modprobe, (void*) module_name, + CLONE_FS | CLONE_FILES | SIGCHLD); + if (pid < 0) { + printk(KERN_ERR "kmod: fork failed, errno %d\n", -pid); + return pid; + } + waitpid_result = waitpid(pid, NULL, 0); + if (waitpid_result != pid) { + printk (KERN_ERR "kmod: waitpid(%d,NULL,0) failed, returning %d.\n", + pid, waitpid_result); + } return 0; } diff --git a/kernel/sysctl.c b/kernel/sysctl.c index ed2d8fd6987c..27616bfee282 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -43,7 +43,6 @@ extern char binfmt_java_interpreter[], binfmt_java_appletviewer[]; extern int sysctl_overcommit_memory; #ifdef CONFIG_KMOD extern char modprobe_path[]; -extern int kmod_unload_delay; #endif #ifdef CONFIG_CHR_DEV_SG extern int sg_big_buff; @@ -180,8 +179,6 @@ static ctl_table kern_table[] = { #ifdef CONFIG_KMOD {KERN_MODPROBE, "modprobe", &modprobe_path, 256, 0644, NULL, &proc_dostring, &sysctl_string }, - {KERN_KMOD_UNLOAD_DELAY, "kmod_unload_delay", &kmod_unload_delay, - sizeof(int), 0644, NULL, &proc_dointvec}, #endif #ifdef CONFIG_CHR_DEV_SG {KERN_NRFILE, "sg-big-buff", &sg_big_buff, sizeof (int), diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 80b916b13bee..2423d0d8bde4 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -286,16 +286,17 @@ unsigned long __get_free_pages(int gfp_mask, unsigned long order) } } -repeat: - spin_lock_irqsave(&page_alloc_lock, flags); - RMQUEUE(order, maxorder, (gfp_mask & GFP_DMA)); - spin_unlock_irqrestore(&page_alloc_lock, flags); - if (gfp_mask & __GFP_WAIT) { - int freed = try_to_free_pages(gfp_mask,SWAP_CLUSTER_MAX); + for (;;) { + spin_lock_irqsave(&page_alloc_lock, flags); + RMQUEUE(order, maxorder, (gfp_mask & GFP_DMA)); + spin_unlock_irqrestore(&page_alloc_lock, flags); + if (!(gfp_mask & __GFP_WAIT)) + break; + shrink_dcache(); + if (!try_to_free_pages(gfp_mask, SWAP_CLUSTER_MAX)) + break; gfp_mask &= ~__GFP_WAIT; /* go through this only once */ maxorder = NR_MEM_LISTS; /* Allow anything this time */ - if (freed) - goto repeat; } nopage: return 0; diff --git a/mm/vmscan.c b/mm/vmscan.c index 0ad129a6b714..0b970c06ae6b 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -442,7 +442,7 @@ static inline int do_try_to_free_page(int gfp_mask) int stop; /* Let the dcache know we're looking for memory ... */ - shrink_dcache_memory(); + shrink_dcache(); /* Always trim SLAB caches when memory gets low. */ kmem_cache_reap(gfp_mask); @@ -547,6 +547,9 @@ int kswapd(void *unused) run_task_queue(&tq_disk); schedule(); swapstats.wakeups++; + + /* This will gently shrink the dcache.. */ + shrink_dcache_memory(); /* * Do the background pageout: be -- 2.39.5