From 1b6d23c1c7f9952a599be0601f73d84d384dc5da Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linuxfoundation.org>
Date: Fri, 23 Nov 2007 15:15:17 -0500
Subject: [PATCH] Linux 2.1.99pre1

a cleanup of my previous patches wrt irq handling, and also fixes
a real bug (we used to ACK the io-apic outside the irq-controller lock,
which meant that the ack's we did and "ipi_pending[]" might have gotten
out of sync - which could certainly have resulted in bad behaviour).
This also re-enables the code that replays interrupts in enable_irq(),
because it should be ok now that the rest of the code is cleaned up.
People that had the earlier problem with locking up with floppies, please
test: if this re-introduces the lockup, please just #if 0 out all the code
inside trigger_pending_irqs(), and send me a note telling me that that
code still doesn't work.

                Linus
---
 Documentation/kmod.txt          |  14 +--
 Documentation/sysctl/kernel.txt |   1 -
 Makefile                        |   2 +-
 arch/alpha/lib/copy_user.S      |  53 ++++++---
 arch/i386/kernel/irq.c          | 154 +++++++++++++-------------
 arch/i386/mm/init.c             |   1 -
 drivers/block/floppy.c          |   2 -
 drivers/net/arcnet.c            |  24 +++--
 drivers/scsi/BusLogic.c         |  12 ++-
 drivers/scsi/BusLogic.h         |   2 -
 drivers/scsi/scsi_ioctl.c       |   5 +-
 drivers/scsi/sg.c               |   7 ++
 fs/binfmt_elf.c                 |   7 +-
 fs/dcache.c                     |  42 ++------
 fs/namei.c                      |   2 -
 include/linux/sysctl.h          |   1 -
 include/scsi/scsi_ioctl.h       |   8 +-
 init/main.c                     |   7 --
 kernel/kmod.c                   | 185 ++++++++++++--------------------
 kernel/sysctl.c                 |   3 -
 mm/page_alloc.c                 |  17 +--
 mm/vmscan.c                     |   5 +-
 22 files changed, 256 insertions(+), 298 deletions(-)

diff --git a/Documentation/kmod.txt b/Documentation/kmod.txt
index 3deeeaed878c..87a03429f9b3 100644
--- a/Documentation/kmod.txt
+++ b/Documentation/kmod.txt
@@ -4,19 +4,19 @@ Kirk Petersen
 Kmod is a simple replacement for kerneld.  It consists of a 
 request_module() replacement and a kernel thread called kmod.  When the
 kernel requests a module, the kmod wakes up and execve()s modprobe,
-passing it the name that was requested.  After a configurable period of
-time, kmod will have delete_module() remove any unused modules.
+passing it the name that was requested.
 
-Kmod is configurable through two entries in /proc/sys/kernel.  You can
-set the path of modprobe (where the kernel looks for it) by doing:
+If you have the /proc filesystem mounted, you can set the path of
+modprobe (where the kernel looks for it) by doing:
 
 	echo "/sbin/modprobe" > /proc/sys/kernel/modprobe
 
-To tell kmod when to unload unused modules, do something like:
+To periodically unload unused modules, put something like the following
+in root's crontab entry:
 
-	echo "120" > /proc/sys/kernel/kmod_unload_delay
+	0-59/5 * * * * /sbin/rmmod -a
 
-Kmod only loads and unloads modules.  Kerneld could do more (although
+Kmod only loads modules.  Kerneld could do more (although
 nothing in the standard kernel used its other features).  If you
 require features such as request_route, we suggest that you take
 a similar approach.  A simple request_route function could be called,
diff --git a/Documentation/sysctl/kernel.txt b/Documentation/sysctl/kernel.txt
index c33df7e7bf89..15615f36481a 100644
--- a/Documentation/sysctl/kernel.txt
+++ b/Documentation/sysctl/kernel.txt
@@ -25,7 +25,6 @@ Currently, these files are in /proc/sys/kernel:
 - inode-max
 - inode-nr
 - inode-state
-- kmod_unload_delay           ==> Documentation/kmod.txt
 - modprobe                    ==> Documentation/kmod.txt
 - osrelease
 - ostype
diff --git a/Makefile b/Makefile
index bcacd9c2185c..1586e862d0f0 100644
--- a/Makefile
+++ b/Makefile
@@ -1,6 +1,6 @@
 VERSION = 2
 PATCHLEVEL = 1
-SUBLEVEL = 98
+SUBLEVEL = 99
 
 ARCH := $(shell uname -m | sed -e s/i.86/i386/ -e s/sun4u/sparc64/)
 
diff --git a/arch/alpha/lib/copy_user.S b/arch/alpha/lib/copy_user.S
index da57fd6d1ac7..aa309b9f5245 100644
--- a/arch/alpha/lib/copy_user.S
+++ b/arch/alpha/lib/copy_user.S
@@ -27,11 +27,18 @@
  */
 
 /* Allow an exception for an insn; exit if we get one.  */
-#define EX(x,y...)			\
+#define EXI(x,y...)			\
 	99: x,##y;			\
 	.section __ex_table,"a";	\
 	.gprel32 99b;			\
-	lda $31, $exit-99b($31);	\
+	lda $31, $exitin-99b($31);	\
+	.previous
+
+#define EXO(x,y...)			\
+	99: x,##y;			\
+	.section __ex_table,"a";	\
+	.gprel32 99b;			\
+	lda $31, $exitout-99b($31);	\
 	.previous
 
 	.set noat
@@ -45,14 +52,14 @@ __copy_user:
 	subq $3,8,$3
 	.align 5
 $37:
-	EX( ldq_u $1,0($7) )
-	EX( ldq_u $2,0($6) )
+	EXI( ldq_u $1,0($7) )
+	EXO( ldq_u $2,0($6) )
 	extbl $1,$7,$1
 	mskbl $2,$6,$2
 	insbl $1,$6,$1
 	addq $3,1,$3
 	bis $1,$2,$1
-	EX( stq_u $1,0($6) )
+	EXO( stq_u $1,0($6) )
 	subq $0,1,$0
 	addq $6,1,$6
 	addq $7,1,$7
@@ -63,10 +70,10 @@ $36:
 	bic $0,7,$4
 	beq $1,$43
 	beq $4,$48
-	EX( ldq_u $3,0($7) )
+	EXI( ldq_u $3,0($7) )
 	.align 5
 $50:
-	EX( ldq_u $2,8($7) )
+	EXI( ldq_u $2,8($7) )
 	subq $4,8,$4
 	extql $3,$7,$3
 	extqh $2,$7,$1
@@ -81,13 +88,13 @@ $48:
 	beq $0,$41
 	.align 5
 $57:
-	EX( ldq_u $1,0($7) )
-	EX( ldq_u $2,0($6) )
+	EXI( ldq_u $1,0($7) )
+	EXO( ldq_u $2,0($6) )
 	extbl $1,$7,$1
 	mskbl $2,$6,$2
 	insbl $1,$6,$1
 	bis $1,$2,$1
-	EX( stq_u $1,0($6) )
+	EXO( stq_u $1,0($6) )
 	subq $0,1,$0
 	addq $6,1,$6
 	addq $7,1,$7
@@ -98,7 +105,7 @@ $43:
 	beq $4,$65
 	.align 5
 $66:
-	EX( ldq $1,0($7) )
+	EXI( ldq $1,0($7) )
 	subq $4,8,$4
 	stq $1,0($6)
 	addq $7,8,$7
@@ -107,15 +114,31 @@ $66:
 	bne $4,$66
 $65:
 	beq $0,$41
-	EX( ldq $2,0($7) )
-	EX( ldq $1,0($6) )
+	EXI( ldq $2,0($7) )
+	EXO( ldq $1,0($6) )
 	mskql $2,$0,$2
 	mskqh $1,$0,$1
 	bis $2,$1,$2
-	EX( stq $2,0($6) )
+	EXO( stq $2,0($6) )
 	bis $31,$31,$0
 $41:
 $35:
-$exit:
+$exitout:
 	ret $31,($28),1
+
+$exitin:
+	/* A stupid byte-by-byte zeroing of the rest of the output
+	   buffer.  This cures security holes by never leaving 
+	   random kernel data around to be copied elsewhere.  */
+
+	mov $0,$1
+$101:
+	EXO ( ldq_u $2,0($6) )
+	subq $1,1,$1
+	mskbl $2,$6,$2
+	EXO ( stq_u $2,0($6) )
+	addq $6,1,$6
+	bgt $1,$101
+	ret $31,($28),1
+
 	.end __copy_user
diff --git a/arch/i386/kernel/irq.c b/arch/i386/kernel/irq.c
index 40af4f37ff85..75527666f2cc 100644
--- a/arch/i386/kernel/irq.c
+++ b/arch/i386/kernel/irq.c
@@ -70,9 +70,6 @@ spinlock_t irq_controller_lock;
 
 static unsigned int irq_events [NR_IRQS] = { -1, };
 static int disabled_irq [NR_IRQS] = { 0, };
-#ifdef __SMP__
-static int ipi_pending [NR_IRQS] = { 0, };
-#endif
 
 /*
  * Not all IRQs can be routed through the IO-APIC, eg. on certain (older)
@@ -651,18 +648,6 @@ static int handle_IRQ_event(unsigned int irq, struct pt_regs * regs)
 	return status;
 }
 
-
-void disable_irq(unsigned int irq)
-{
-	unsigned long flags;
-
-	spin_lock_irqsave(&irq_controller_lock, flags);
-	irq_handles[irq]->disable(irq);
-	spin_unlock_irqrestore(&irq_controller_lock, flags);
-
-	synchronize_irq();
-}
-
 /*
  * disable/enable_irq() wait for all irq contexts to finish
  * executing. Also it's recursive.
@@ -673,55 +658,10 @@ static void disable_8259A_irq(unsigned int irq)
 	set_8259A_irq_mask(irq);
 }
 
-#ifdef __SMP__
-static void disable_ioapic_irq(unsigned int irq)
-{
-	disabled_irq[irq] = 1;
-	/*
-	 * We do not disable IO-APIC irqs in hardware ...
-	 */
-}
-#endif
-
 void enable_8259A_irq (unsigned int irq)
 {
-	unsigned long flags;
-	spin_lock_irqsave(&irq_controller_lock, flags);
 	cached_irq_mask &= ~(1 << irq);
 	set_8259A_irq_mask(irq);
-	spin_unlock_irqrestore(&irq_controller_lock, flags);
-}
-
-#ifdef __SMP__
-void enable_ioapic_irq (unsigned int irq)
-{
-	unsigned long flags, should_handle_irq;
-	int cpu = smp_processor_id();
-
-	spin_lock_irqsave(&irq_controller_lock, flags);
-	disabled_irq[irq] = 0;
-
-	/*
-	 * In the SMP+IOAPIC case it might happen that there are an unspecified
-	 * number of pending IRQ events unhandled. These cases are very rare,
-	 * so we 'resend' these IRQs via IPIs, to the same CPU. It's much
-	 * better to do it this way as thus we dont have to be aware of
-	 * 'pending' interrupts in the IRQ path, except at this point.
-	 */
-	if (irq_events[irq]) {
-		if (!ipi_pending[irq]) {
-			ipi_pending[irq] = 1;
-			--irq_events[irq];
-			send_IPI(cpu,IO_APIC_VECTOR(irq));
-		}
-	}
-	spin_unlock_irqrestore(&irq_controller_lock, flags);
-}
-#endif
-
-void enable_irq(unsigned int irq)
-{
-	irq_handles[irq]->enable(irq);
 }
 
 void make_8259A_irq (unsigned int irq)
@@ -771,41 +711,107 @@ static void do_8259A_IRQ(unsigned int irq, int cpu, struct pt_regs * regs)
 }
 
 #ifdef __SMP__
+
+static int ipi_pending [NR_IRQS] = { 0, };
+
+/*
+ * In the SMP+IOAPIC case it might happen that there are an unspecified
+ * number of pending IRQ events unhandled. These cases are very rare,
+ * so we 'resend' these IRQs via IPIs, to the same CPU. It's much
+ * better to do it this way as thus we dont have to be aware of
+ * 'pending' interrupts in the IRQ path, except at this point.
+ */
+static inline void trigger_pending_irqs(unsigned int irq)
+{
+	if (irq_events[irq] && !ipi_pending[irq]) {
+		ipi_pending[irq] = 1;
+		send_IPI(smp_processor_id(), IO_APIC_VECTOR(irq));
+	}
+}
+
+void enable_ioapic_irq (unsigned int irq)
+{
+	disabled_irq[irq] = 0;
+	trigger_pending_irqs(irq);
+}
+
+/*
+ * We do not actually disable IO-APIC irqs in hardware ...
+ */
+static void disable_ioapic_irq(unsigned int irq)
+{
+	disabled_irq[irq] = 1;
+}
+
 static void do_ioapic_IRQ(unsigned int irq, int cpu, struct pt_regs * regs)
 {
-	int should_handle_irq = 0;
+	spin_lock(&irq_controller_lock);
 
+	/* Ack the irq inside the lock! */
 	ack_APIC_irq();
+	ipi_pending[irq] = 0;
 
-	spin_lock(&irq_controller_lock);
-	if (ipi_pending[irq])
-		ipi_pending[irq] = 0;
+	/* If the irq is disabled, just set a flag and return */
+	if (disabled_irq[irq]) {
+		irq_events[irq] = 1;
+		spin_unlock(&irq_controller_lock);
+		return;
+	}
 
-	if (!irq_events[irq]++ && !disabled_irq[irq])
-		should_handle_irq = 1;
+	disabled_irq[irq] = 1;
+	irq_events[irq] = 0;
 	hardirq_enter(cpu);
 	spin_unlock(&irq_controller_lock);
 
-	if (should_handle_irq) {
-		while (test_bit(0,&global_irq_lock)) mb();
-again:
+	while (test_bit(0,&global_irq_lock)) barrier();
+
+	for (;;) {
+		int pending;
+
 		handle_IRQ_event(irq, regs);
 
 		spin_lock(&irq_controller_lock);
-		should_handle_irq=0;
-		if (--irq_events[irq] && !disabled_irq[irq])
-			should_handle_irq=1;
+		pending = irq_events[irq];
+		irq_events[irq] = 0;
+		disabled_irq[irq] = pending;
 		spin_unlock(&irq_controller_lock);
 
-		if (should_handle_irq)
-			goto again;
+		if (!pending)
+			break;
 	}
 
 	hardirq_exit(cpu);
 	release_irqlock(cpu);
 }
+
 #endif
 
+
+/*
+ * Generic enable/disable code: this just calls
+ * down into the PIC-specific version after having
+ * gotten the irq controller lock.
+ */
+void disable_irq(unsigned int irq)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&irq_controller_lock, flags);
+	irq_handles[irq]->disable(irq);
+	spin_unlock_irqrestore(&irq_controller_lock, flags);
+
+	synchronize_irq();
+}
+
+void enable_irq(unsigned int irq)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&irq_controller_lock, flags);
+	irq_handles[irq]->enable(irq);
+	spin_unlock_irqrestore(&irq_controller_lock, flags);
+}
+
 /*
  * do_IRQ handles all normal device IRQ's (the special
  * SMP cross-CPU interrupts have their own specific
diff --git a/arch/i386/mm/init.c b/arch/i386/mm/init.c
index b96a4bc2f195..caa366e3a354 100644
--- a/arch/i386/mm/init.c
+++ b/arch/i386/mm/init.c
@@ -90,7 +90,6 @@ void show_mem(void)
 			shared += atomic_read(&mem_map[i].count) - 1;
 	}
 	printk("%d pages of RAM\n",total);
-	printk("%d free pages\n",free);
 	printk("%d reserved pages\n",reserved);
 	printk("%d pages shared\n",shared);
 	printk("%d pages swap cached\n",cached);
diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c
index 1aa24b66ef55..989da1497835 100644
--- a/drivers/block/floppy.c
+++ b/drivers/block/floppy.c
@@ -4211,7 +4211,6 @@ static int floppy_grab_irq_and_dma(void)
 		if (FDCS->address != -1)
 			fd_outb(FDCS->dor, FD_DOR);
 	fdc = 0;
-	fd_enable_irq();
 	return 0;
 }
 
@@ -4235,7 +4234,6 @@ static void floppy_release_irq_and_dma(void)
 	INT_ON;
 	fd_disable_dma();
 	fd_free_dma();
-	fd_disable_irq();
 	fd_free_irq();
 
 	set_dor(0, ~0, 8);
diff --git a/drivers/net/arcnet.c b/drivers/net/arcnet.c
index 8a731a6e35aa..16795257ec1b 100644
--- a/drivers/net/arcnet.c
+++ b/drivers/net/arcnet.c
@@ -18,6 +18,10 @@
 
 	**********************
 
+	v3.01 (98/04/17)
+	  - Interrupt handler now also checks dev->[se]dev are non-NULL
+	    to avoid crashes in interrupts during card init. [dw]
+
 	v3.00 (97/11/09)
 	  - Minor cleanup of debugging messages. [mj]
 
@@ -41,10 +45,10 @@
 
 	v2.80 ALPHA (97/08/01)
 	  - Split source into multiple files; generic arcnet support and
-	    individual chipset drivers. <dwmw2@cam.ac.uk>
+	    individual chipset drivers. <Dave@imladris.demon.co.uk>
 
-	v2.61 ALPHA (97/07/30)    by David Woodhouse (dwmw2@cam.ac.uk) for
-	                            Nortel (Northern Telecom).
+	v2.61 ALPHA (97/07/30)  by David Woodhouse (Dave@imladris.demon.co.uk)
+	                            for Nortel (Northern Telecom).
           - Added support for IO-mapped modes and for SMC COM20020 chipset.
 	  - Fixed (avoided) race condition in send_packet routines which was
             discovered when the buffer copy routines got slow (?).
@@ -170,7 +174,7 @@
 */
 
 static const char *version =
- "arcnet.c: v3.00 97/11/09 Avery Pennarun <apenwarr@bond.net> et al.\n";
+ "arcnet.c: v3.01 98/04/24 Avery Pennarun <apenwarr@bond.net> et al.\n";
 
 #include <linux/module.h>
 #include <linux/config.h>
@@ -956,20 +960,24 @@ arcnet_interrupt(int irq,void *dev_id,struct pt_regs *regs)
 	    return;	/* don't even try. */
 	  }
 #ifdef CONFIG_ARCNET_1051
-	lp->sdev->interrupt=1;
+	if (lp->sdev)
+	  lp->sdev->interrupt=1;
 #endif
 #ifdef CONFIG_ARCNET_ETH
-	lp->edev->interrupt=1;
+	if (lp->edev)
+	  lp->edev->interrupt=1;
 #endif
 
 	/* Call the "real" interrupt handler. */
 	(*lp->inthandler)(dev);
 
 #ifdef CONFIG_ARCNET_ETH
-	lp->edev->interrupt=0;
+	if (lp->edev)
+	  lp->edev->interrupt=0;
 #endif
 #ifdef CONFIG_ARCNET_1051
-	lp->sdev->interrupt=0;
+	if (lp->sdev)
+	  lp->sdev->interrupt=0;
 #endif
 	if (!test_and_clear_bit(0, (int *)&dev->interrupt))
 	  BUGMSG(D_NORMAL, "Someone cleared our dev->interrupt flag!\n");
diff --git a/drivers/scsi/BusLogic.c b/drivers/scsi/BusLogic.c
index 77292e1b69c9..28f18d7ddae9 100644
--- a/drivers/scsi/BusLogic.c
+++ b/drivers/scsi/BusLogic.c
@@ -34,6 +34,7 @@
 #include <linux/module.h>
 #include <linux/config.h>
 #include <linux/types.h>
+#include <linux/blk.h>
 #include <linux/blkdev.h>
 #include <linux/delay.h>
 #include <linux/ioport.h>
@@ -1204,10 +1205,13 @@ static boolean BusLogic_Failure(BusLogic_HostAdapter_T *HostAdapter,
 {
   BusLogic_AnnounceDriver(HostAdapter);
   if (HostAdapter->HostAdapterBusType == BusLogic_PCI_Bus)
-    BusLogic_Error("While configuring BusLogic PCI Host Adapter at\n"
-		   "Bus %d Device %d I/O Address 0x%X PCI Address 0x%X:\n",
-		   HostAdapter, HostAdapter->Bus, HostAdapter->Device,
-		   HostAdapter->IO_Address, HostAdapter->PCI_Address);
+    {
+      BusLogic_Error("While configuring BusLogic PCI Host Adapter at\n",
+		     HostAdapter);
+      BusLogic_Error("Bus %d Device %d I/O Address 0x%X PCI Address 0x%X:\n",
+		     HostAdapter, HostAdapter->Bus, HostAdapter->Device,
+		     HostAdapter->IO_Address, HostAdapter->PCI_Address);
+    }
   else BusLogic_Error("While configuring BusLogic Host Adapter at "
 		      "I/O Address 0x%X:\n", HostAdapter,
 		      HostAdapter->IO_Address);
diff --git a/drivers/scsi/BusLogic.h b/drivers/scsi/BusLogic.h
index 820f6cb81a62..654550aedae3 100644
--- a/drivers/scsi/BusLogic.h
+++ b/drivers/scsi/BusLogic.h
@@ -1532,7 +1532,6 @@ static inline
 void BusLogic_AcquireHostAdapterLockIH(BusLogic_HostAdapter_T *HostAdapter,
 				       ProcessorFlags_T *ProcessorFlags)
 {
-  extern spinlock_t io_request_lock;
   spin_lock_irqsave(&io_request_lock, *ProcessorFlags);
 }
 
@@ -1546,7 +1545,6 @@ static inline
 void BusLogic_ReleaseHostAdapterLockIH(BusLogic_HostAdapter_T *HostAdapter,
 				       ProcessorFlags_T *ProcessorFlags)
 {
-  extern spinlock_t io_request_lock;
   spin_unlock_irqrestore(&io_request_lock, *ProcessorFlags);
 }
 
diff --git a/drivers/scsi/scsi_ioctl.c b/drivers/scsi/scsi_ioctl.c
index 8f6aff9850d1..a052caadfe9b 100644
--- a/drivers/scsi/scsi_ioctl.c
+++ b/drivers/scsi/scsi_ioctl.c
@@ -167,7 +167,7 @@ static int ioctl_internal_command(Scsi_Device *dev, char * cmd)
  * interface instead, as this is a more flexible approach to performing
  * generic SCSI commands on a device.
  */
-static int ioctl_command(Scsi_Device *dev, Scsi_Ioctl_Command *sic)
+int scsi_ioctl_send_command(Scsi_Device *dev, Scsi_Ioctl_Command *sic)
 {
     unsigned long flags;
     char * buf;
@@ -387,7 +387,8 @@ int scsi_ioctl (Scsi_Device *dev, int cmd, void *arg)
 	return ioctl_probe(dev->host, arg);
     case SCSI_IOCTL_SEND_COMMAND:
 	if(!suser())  return -EACCES;
-	return ioctl_command((Scsi_Device *) dev, (Scsi_Ioctl_Command *) arg);
+	return scsi_ioctl_send_command((Scsi_Device *) dev,
+				       (Scsi_Ioctl_Command *) arg);
     case SCSI_IOCTL_DOORLOCK:
 	if (!dev->removable || !dev->lockable) return 0;
 	scsi_cmd[0] = ALLOW_MEDIUM_REMOVAL;
diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c
index f50cc96c2b90..4232a56a5845 100644
--- a/drivers/scsi/sg.c
+++ b/drivers/scsi/sg.c
@@ -97,6 +97,13 @@ static int sg_ioctl(struct inode * inode,struct file * file,
 	return scsi_generics[dev].timeout;
     case SG_EMULATED_HOST:
     	return put_user(scsi_generics[dev].device->host->hostt->emulated, (int *) arg);
+    case SCSI_IOCTL_SEND_COMMAND:
+	/*
+	  Allow SCSI_IOCTL_SEND_COMMAND without checking suser() since the
+	  user already has read/write access to the generic device and so
+	  can execute arbitrary SCSI commands.
+	*/
+	return scsi_ioctl_send_command(scsi_generics[dev].device, (void *) arg);
     default:
 	return scsi_ioctl(scsi_generics[dev].device, cmd_in, (void *) arg);
     }
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 8324b4a2d9b3..342eb1ab5785 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -517,9 +517,10 @@ do_load_elf_binary(struct linux_binprm * bprm, struct pt_regs * regs)
 			retval = PTR_ERR(interpreter_dentry);
 			if (IS_ERR(interpreter_dentry))
 				goto out_free_interp;
-
-			retval = read_exec(interpreter_dentry, 0, bprm->buf, 
-						128, 1);
+			retval = permission(interpreter_dentry->d_inode, MAY_EXEC);
+			if (retval < 0)
+				goto out_free_dentry;
+			retval = read_exec(interpreter_dentry, 0, bprm->buf, 128, 1);
 			if (retval < 0)
 				goto out_free_dentry;
 
diff --git a/fs/dcache.c b/fs/dcache.c
index 728c8ed1c602..58c6479c91b0 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -426,42 +426,16 @@ void shrink_dcache_parent(struct dentry * parent)
 }
 
 /*
- * This is called from do_try_to_free_page() to indicate
- * that we should reduce the dcache and inode cache memory.
+ * This is called from kswapd when we think we need some
+ * more memory, but aren't really sure how much. So we
+ * carefully try to free a _bit_ of our dcache, but not
+ * too much.
  */
-void shrink_dcache_memory()
+void shrink_dcache_memory(void)
 {
-	dentry_stat.want_pages++;
-}
-
-/*
- * This carries out the request received by the above routine.
- */
-void check_dcache_memory()
-{
-	if (dentry_stat.want_pages) {
-		unsigned int count, goal = 0;
-		/*
-		 * Set the page goal.  We don't necessarily need to trim
-		 * the dcache just because the system needs memory ...
-		 */
-		if (page_cache_size > (num_physpages >> 1))
-			goal = (dentry_stat.want_pages * page_cache_size)
-				/ num_physpages;
-		dentry_stat.want_pages = 0;
-		if (goal) {
-			if (goal > 50)
-				goal = 50;
-			count = select_dcache(32, goal);
-#ifdef DCACHE_DEBUG
-printk(KERN_DEBUG "check_dcache_memory: goal=%d, count=%d\n", goal, count);
-#endif
-			if (count) {
-				prune_dcache(count);
-				free_inode_memory(count);
-			}
-		}
-	}
+	int count = select_dcache(32, 8);
+	if (count)
+		prune_dcache(count);
 }
 
 #define NAME_ALLOC_LEN(len)	((len+16) & ~15)
diff --git a/fs/namei.c b/fs/namei.c
index 519a8ceac00c..a6de99eadfa3 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -454,7 +454,6 @@ struct dentry * __namei(const char *pathname, int follow_link)
 	char *name;
 	struct dentry *dentry;
 
-	check_dcache_memory();
 	name = getname(pathname);
 	dentry = (struct dentry *) name;
 	if (!IS_ERR(name)) {
@@ -528,7 +527,6 @@ struct dentry * open_namei(const char * pathname, int flag, int mode)
 	struct inode *inode;
 	struct dentry *dentry;
 
-	check_dcache_memory();
 	mode &= S_IALLUGO & ~current->fs->umask;
 	mode |= S_IFREG;
 
diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h
index 2df16d607bac..11552a24d417 100644
--- a/include/linux/sysctl.h
+++ b/include/linux/sysctl.h
@@ -72,7 +72,6 @@ enum
 	KERN_STATINODE,
 	KERN_DENTRY,		/* dentry statistics */
 	KERN_MODPROBE,
-	KERN_KMOD_UNLOAD_DELAY
 };
 
 
diff --git a/include/scsi/scsi_ioctl.h b/include/scsi/scsi_ioctl.h
index 57f439dfbc3c..97e20a98a5ed 100644
--- a/include/scsi/scsi_ioctl.h
+++ b/include/scsi/scsi_ioctl.h
@@ -15,9 +15,6 @@
 
 #ifdef __KERNEL__
 
-extern int scsi_ioctl (Scsi_Device *dev, int cmd, void *arg);
-extern int kernel_scsi_ioctl (Scsi_Device *dev, int cmd, void *arg);
-
 /*
  * Structures used for scsi_ioctl et al.
  */
@@ -33,6 +30,11 @@ typedef struct scsi_idlun {
 	__u32 host_unique_id;
 } Scsi_Idlun;
 
+extern int scsi_ioctl (Scsi_Device *dev, int cmd, void *arg);
+extern int kernel_scsi_ioctl (Scsi_Device *dev, int cmd, void *arg);
+extern int scsi_ioctl_send_command(Scsi_Device *dev,
+				   Scsi_Ioctl_Command *arg);
+
 #endif
 
 #endif
diff --git a/init/main.c b/init/main.c
index de8dae6df10b..2230ea2d031d 100644
--- a/init/main.c
+++ b/init/main.c
@@ -1162,13 +1162,6 @@ static int init(void * unused)
 	smp_begin();
 #endif	
 
-#ifdef CONFIG_KMOD
-	{
-		extern int kmod_init(void);
-		kmod_init();
-	}
-#endif
-
 #ifdef CONFIG_UMSDOS_FS
 	{
 		/*
diff --git a/kernel/kmod.c b/kernel/kmod.c
index c53c09f5bc28..7c57b1af51db 100644
--- a/kernel/kmod.c
+++ b/kernel/kmod.c
@@ -1,6 +1,12 @@
 /*
 	kmod, the new module loader (replaces kerneld)
 	Kirk Petersen
+
+	Reorganized not to be a daemon by Adam Richter, with guidance
+	from Greg Zornetzer.
+
+	Modified to avoid chroot and file sharing problems.
+	Mikael Pettersson
 */
 
 #define __KERNEL_SYSCALLS__
@@ -8,147 +14,88 @@
 #include <linux/sched.h>
 #include <linux/types.h>
 #include <linux/unistd.h>
+#include <asm/smp_lock.h>
+#include <asm/uaccess.h>
 
 /*
-	kmod_unload_delay and modprobe_path are set via /proc/sys.
+	modprobe_path is set via /proc/sys.
 */
-int kmod_unload_delay = 60;
 char modprobe_path[256] = "/sbin/modprobe";
-static char module_name[64] = "";
-static char * argv[] = { modprobe_path, "-s", "-k", module_name, NULL };
 static char * envp[] = { "HOME=/", "TERM=linux", "PATH=/usr/bin:/bin", NULL };
 
 /*
-	kmod_queue synchronizes the kmod thread and the rest of the system
-	kmod_unload_timer is what we use to unload modules
-	after kmod_unload_delay seconds
+	exec_modprobe is spawned from a kernel-mode user process,
+	then changes its state to behave _as_if_ it was spawned
+	from the kernel's init process
+	(ppid and {e,}gid are not adjusted, but that shouldn't
+	be a problem since we trust modprobe)
 */
-static struct wait_queue * kmod_queue = NULL;
-static struct timer_list kmod_unload_timer;
-
-/*
-	It is not easy to implement a full fork in kernel-space on some
-	systems (Alpha), and it is not necessary for us here.  This is 
-	a new thread that does the exec.
-*/
-static int kmod_exec_modprobe(void * data)
-{
-	sigemptyset(&current->blocked);
-	execve(modprobe_path, argv, envp);
-	printk(KERN_ERR "kmod: failed to load module %s\n", module_name);
-	return 0;
-}
+#define task_init task[smp_num_cpus]
 
-/*
-	kmod_thread is the thread that does most of the work.  kmod_unload and
-	request_module tell it to wake up and do work.
-*/
-static int kmod_thread(void * data)
-{
-	int pid;
+static inline void
+use_init_file_context(void) {
+	lock_kernel();
 
-	/*
-		Initialize basic thread information
-	*/
-	current->session = 1;
-	current->pgrp = 1;
-	sprintf(current->comm, "kmod");
-	sigfillset(&current->blocked);
-
-	/*
-		This is the main kmod_thread loop.  It first sleeps, then
-		handles requests from request_module or kmod_unload.
-	*/
+	/* don't use the user's root, use init's root instead */
+	exit_fs(current);	/* current->fs->count--; */
+	current->fs = task_init->fs;
+	current->fs->count++;
 
-	while (1) {
-		interruptible_sleep_on(&kmod_queue);
-
-		/*
-			If request_module woke us up, we should try to
-			load module_name.  If not, kmod_unload woke us up,
-			do call delete_module.
-			(if somehow both want us to do something, ignore the
-			 delete_module request)
-		*/
-		if (module_name[0] == '\0') {
-			delete_module(NULL);
-		} else {
-			pid = kernel_thread(kmod_exec_modprobe, NULL, SIGCHLD);
-			if (pid > 0) {
-				waitpid(pid, NULL, 0);
-				module_name[0] = '\0';
-				wake_up(&kmod_queue);
-			} else {
-				printk(KERN_ERR "kmod: fork failed, errno %d\n", -pid);
-			}
-		}
-	}
+	/* don't use the user's files, use init's files instead */
+	exit_files(current);	/* current->files->count--; */
+	current->files = task_init->files;
+	current->files->count++;
 
-	return 0;	/* Never reached. */
+	unlock_kernel();
 }
 
-/*
-	kmod_unload is the function that the kernel calls when
-	the kmod_unload_timer expires
-*/
-void kmod_unload(unsigned long x)
+static int exec_modprobe(void * module_name)
 {
-	/*
-		wake up the kmod thread, which does the work
-		(we can't call delete_module, as it locks the kernel and
-		 we are in the bottom half of the kernel (right?))
-		once it is awake, reset the timer
-	*/
-	wake_up(&kmod_queue);
-	kmod_unload_timer.expires = jiffies + (kmod_unload_delay * HZ);
-	add_timer(&kmod_unload_timer);
-}
+	char *argv[] = { modprobe_path, "-s", "-k", (char*)module_name, NULL};
 
-int kmod_init(void)
-{
-	printk("Starting kmod\n");
-
-	/*
-	 * CLONE_FS means that our "cwd" will follow that of init.
-	 * CLONE_FILES just saves some space (we don't need any
-	 * new file descriptors). Ditto for CLONE_SIGHAND.
-	 */
-	kernel_thread(kmod_thread, NULL, CLONE_FILES | CLONE_FS | CLONE_SIGHAND);
-
-	kmod_unload_timer.next = NULL;
-	kmod_unload_timer.prev = NULL;
-	kmod_unload_timer.expires = jiffies + (5 * 60 * HZ);
-	kmod_unload_timer.data = 0L;
-	kmod_unload_timer.function = kmod_unload;
-	add_timer(&kmod_unload_timer);
+	use_init_file_context();
 
+	/* Prevent parent user process from sending signals to child.
+	   Otherwise, if the modprobe program does not exist, it might
+	   be possible to get a user defined signal handler to execute
+	   as the super user right after the execve fails if you time
+	   the signal just right.
+	*/
+	spin_lock_irq(&current->sigmask_lock);
+	flush_signals(current);
+	flush_signal_handlers(current);
+	spin_unlock_irq(&current->sigmask_lock);
+
+	set_fs(KERNEL_DS);	/* Allow execve args to be in kernel space. */
+	current->uid = current->euid = 0;
+	if (execve(modprobe_path, argv, envp) < 0) {
+		printk(KERN_ERR
+		       "kmod: failed to exec %s -s -k %s, errno = %d\n",
+		       modprobe_path, (char*) module_name, errno);
+		return -errno;
+	}
 	return 0;
 }
 
 /*
-	request_module, the function that everyone calls when they need a
-	module to be loaded
+	request_module: the function that everyone calls when they need
+	a module.
 */
-int request_module(const char * name)
+int request_module(const char * module_name)
 {
-	/* first, copy the name of the module into module_name */
-	/* then wake_up() the kmod daemon */
-	/* wait for the kmod daemon to finish (it will wake us up) */
-
-	/*
-		kmod_thread is sleeping, so start by copying the name of
-		the module into module_name.  Once that is done, wake up
-		kmod_thread.
-	*/
-	strncpy(module_name, name, sizeof(module_name));
-	module_name[sizeof(module_name)-1] = '\0';
-	wake_up(&kmod_queue);
-
-	/*
-		Now that we have told kmod_thread what to do, we want to
-		go to sleep and let it do its work.  It will wake us up,
-		at which point we will be done (the module will be loaded).
-	*/
-	interruptible_sleep_on(&kmod_queue);
+	int pid;
+	int waitpid_result;
+
+	pid = kernel_thread(exec_modprobe, (void*) module_name,
+			    CLONE_FS | CLONE_FILES | SIGCHLD);
+	if (pid < 0) {
+		printk(KERN_ERR "kmod: fork failed, errno %d\n", -pid);
+		return pid;
+	}
+	waitpid_result = waitpid(pid, NULL, 0);
+	if (waitpid_result != pid) {
+		printk (KERN_ERR "kmod: waitpid(%d,NULL,0) failed, returning %d.\n",
+			pid, waitpid_result);
+	}
 	return 0;
 }
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index ed2d8fd6987c..27616bfee282 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -43,7 +43,6 @@ extern char binfmt_java_interpreter[], binfmt_java_appletviewer[];
 extern int sysctl_overcommit_memory;
 #ifdef CONFIG_KMOD
 extern char modprobe_path[];
-extern int kmod_unload_delay;
 #endif
 #ifdef CONFIG_CHR_DEV_SG
 extern int sg_big_buff;
@@ -180,8 +179,6 @@ static ctl_table kern_table[] = {
 #ifdef CONFIG_KMOD
 	{KERN_MODPROBE, "modprobe", &modprobe_path, 256,
 	 0644, NULL, &proc_dostring, &sysctl_string },
-	{KERN_KMOD_UNLOAD_DELAY, "kmod_unload_delay", &kmod_unload_delay,
-	sizeof(int), 0644, NULL, &proc_dointvec},
 #endif
 #ifdef CONFIG_CHR_DEV_SG
 	{KERN_NRFILE, "sg-big-buff", &sg_big_buff, sizeof (int),
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 80b916b13bee..2423d0d8bde4 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -286,16 +286,17 @@ unsigned long __get_free_pages(int gfp_mask, unsigned long order)
 		}
 	}
 
-repeat:
-	spin_lock_irqsave(&page_alloc_lock, flags);
-	RMQUEUE(order, maxorder, (gfp_mask & GFP_DMA));
-	spin_unlock_irqrestore(&page_alloc_lock, flags);
-	if (gfp_mask & __GFP_WAIT) {
-		int freed = try_to_free_pages(gfp_mask,SWAP_CLUSTER_MAX);
+	for (;;) {
+		spin_lock_irqsave(&page_alloc_lock, flags);
+		RMQUEUE(order, maxorder, (gfp_mask & GFP_DMA));
+		spin_unlock_irqrestore(&page_alloc_lock, flags);
+		if (!(gfp_mask & __GFP_WAIT))
+			break;
+		shrink_dcache();
+		if (!try_to_free_pages(gfp_mask, SWAP_CLUSTER_MAX))
+			break;
 		gfp_mask &= ~__GFP_WAIT;	/* go through this only once */
 		maxorder = NR_MEM_LISTS;	/* Allow anything this time */
-		if (freed)
-			goto repeat;
 	}
 nopage:
 	return 0;
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 0ad129a6b714..0b970c06ae6b 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -442,7 +442,7 @@ static inline int do_try_to_free_page(int gfp_mask)
 	int stop;
 
 	/* Let the dcache know we're looking for memory ... */
-	shrink_dcache_memory();
+	shrink_dcache();
 
 	/* Always trim SLAB caches when memory gets low. */
 	kmem_cache_reap(gfp_mask);
@@ -547,6 +547,9 @@ int kswapd(void *unused)
 		run_task_queue(&tq_disk);
 		schedule();
 		swapstats.wakeups++;
+
+		/* This will gently shrink the dcache.. */
+		shrink_dcache_memory();
 	
 		/*
 		 * Do the background pageout: be
-- 
2.39.5