From: Linus Torvalds <torvalds@linuxfoundation.org>
Date: Fri, 23 Nov 2007 20:25:43 +0000 (-0500)
Subject: Import 2.3.8pre3
X-Git-Tag: 2.3.8
X-Git-Url: http://git.neil.brown.name/?a=commitdiff_plain;h=afbf60cdcf5b8bb7e375f75177bcc8d7ef3b9817;p=history.git

Import 2.3.8pre3
---

diff --git a/arch/alpha/config.in b/arch/alpha/config.in
index d632cdbb9919..4a0da0cbcb30 100644
--- a/arch/alpha/config.in
+++ b/arch/alpha/config.in
@@ -142,6 +142,7 @@ fi
 
 if [ "$CONFIG_ALPHA_CABRIOLET" = "y" -o "$CONFIG_ALPHA_AVANTI" = "y" \
 	-o "$CONFIG_ALPHA_EB64P" = "y" -o "$CONFIG_ALPHA_JENSEN" = "y" \
+	-o "$CONFIG_ALPHA_TAKARA" = "y" -o "$CONFIG_ALPHA_EB164" = "y" \
 	-o "$CONFIG_ALPHA_MIKASA" = "y" -o "$CONFIG_ALPHA_ALCOR" = "y" \
 	-o "$CONFIG_ALPHA_SABLE" = "y" -o "$CONFIG_ALPHA_MIATA" = "y" \
 	-o "$CONFIG_ALPHA_NORITAKE" = "y" -o "$CONFIG_ALPHA_PC164" = "y" \
@@ -166,7 +167,11 @@ then
 	define_bool CONFIG_ALPHA_AVANTI y
 fi
 
-bool 'Symmetric multi-processing support' CONFIG_SMP
+if [ "$CONFIG_ALPHA_SABLE" = "y" -o "$CONFIG_ALPHA_RAWHIDE" = "y" \
+	-o "$CONFIG_ALPHA_DP264" = "y" -o "$CONFIG_ALPHA_GENERIC" = "y" ]
+then
+	bool 'Symmetric multi-processing support' CONFIG_SMP
+fi
 
 if [ "$CONFIG_PCI" = "y" ]; then
   bool 'PCI quirks' CONFIG_PCI_QUIRKS
diff --git a/arch/alpha/kernel/alpha_ksyms.c b/arch/alpha/kernel/alpha_ksyms.c
index ce949d722d0c..cba493d09b1f 100644
--- a/arch/alpha/kernel/alpha_ksyms.c
+++ b/arch/alpha/kernel/alpha_ksyms.c
@@ -171,8 +171,8 @@ EXPORT_SYMBOL(__global_save_flags);
 EXPORT_SYMBOL(__global_restore_flags);
 #if DEBUG_SPINLOCK
 EXPORT_SYMBOL(spin_unlock);
-EXPORT_SYMBOL(spin_lock);
-EXPORT_SYMBOL(spin_trylock);
+EXPORT_SYMBOL(debug_spin_lock);
+EXPORT_SYMBOL(debug_spin_trylock);
 #endif
 #if DEBUG_RWLOCK
 EXPORT_SYMBOL(write_lock);
diff --git a/arch/alpha/kernel/core_cia.c b/arch/alpha/kernel/core_cia.c
index e0f52cb8eb21..eef31582638c 100644
--- a/arch/alpha/kernel/core_cia.c
+++ b/arch/alpha/kernel/core_cia.c
@@ -598,7 +598,7 @@ cia_pci_clr_err(void)
 {
 	CIA_jd = *(vuip)CIA_IOC_CIA_ERR;
 	DBGM(("CIA_pci_clr_err: CIA ERR after read 0x%x\n", CIA_jd));
-	*(vuip)CIA_IOC_CIA_ERR = 0x0180;
+	*(vuip)CIA_IOC_CIA_ERR = CIA_jd;
 	mb();
 	return 0;
 }
@@ -698,6 +698,10 @@ cia_machine_check(unsigned long vector, unsigned long la_ptr,
 		reason = buf;
 		break;
 	}
+	mb();
+	mb();  /* magic */
+	draina();
+	cia_pci_clr_err();
 	wrmces(rdmces());	/* reset machine check pending flag */
 	mb();
 
diff --git a/arch/alpha/kernel/core_mcpcia.c b/arch/alpha/kernel/core_mcpcia.c
index 72d2a62c9f2a..6e24dca07e83 100644
--- a/arch/alpha/kernel/core_mcpcia.c
+++ b/arch/alpha/kernel/core_mcpcia.c
@@ -18,7 +18,6 @@
 #include <asm/system.h>
 #include <asm/pci.h>
 #include <asm/hwrpb.h>
-#include <asm/mmu_context.h>
 
 #define __EXTERN_INLINE inline
 #include <asm/io.h>
diff --git a/arch/alpha/kernel/head.S b/arch/alpha/kernel/head.S
index 3fcbdbcda71e..3b004b7f615e 100644
--- a/arch/alpha/kernel/head.S
+++ b/arch/alpha/kernel/head.S
@@ -54,87 +54,6 @@ __smp_callin:
 	.end __smp_callin
 #endif /* __SMP__ */
 
-	.align 3
-	.globl	wrent
-	.ent	wrent
-wrent:
-	.prologue 0
-	call_pal PAL_wrent
-	ret	($26)
-	.end wrent
-
-	.align 3
-	.globl	wrkgp
-	.ent	wrkgp
-wrkgp:
-	.prologue 0
-	call_pal PAL_wrkgp
-	ret	($26)
-	.end wrkgp
-
-	.align 3
-	.globl	wrusp
-	.ent	wrusp
-wrusp:
-	.prologue 0
-	call_pal PAL_wrusp
-	ret	($26)
-	.end wrusp
-
-	.align 3
-	.globl	rdusp
-	.ent	rdusp
-rdusp:
-	.prologue 0
-	call_pal PAL_rdusp
-	ret	($26)
-	.end rdusp
-
-	.align 3
-	.globl	rdmces
-	.ent	rdmces
-rdmces:
-	.prologue 0
-	call_pal PAL_rdmces
-	ret	($26)
-	.end rdmces
-
-	.align 3
-	.globl	wrmces
-	.ent	wrmces
-wrmces:
-	.prologue 0
-	call_pal PAL_wrmces
-	ret	($26)
-	.end wrmces
-
-	.align 3
-	.globl  whami
-	.ent    whami
-whami:
-	.prologue 0
-	call_pal PAL_whami
-	ret     ($26)
-	.end whami
- 
-	.align 3
-	.globl  wripir
-	.ent    wripir
-wripir:
-	.prologue 0
-	call_pal PAL_wripir
-	ret     ($26)
-	.end wripir
-
-	.align 3
-	.globl wrvptptr
-	.ent wrvptptr
-wrvptptr:
-	.prologue 0
-	call_pal PAL_wrvptptr
-	ret	($26)
-	.end wrvptptr
-
 	#
 	# The following two functions are needed for supporting SRM PALcode
 	# on the PC164 (at least), since that PALcode manages the interrupt
diff --git a/arch/alpha/kernel/machvec.h b/arch/alpha/kernel/machvec.h
index 8420aaf9cc7f..1e11c046e3f1 100644
--- a/arch/alpha/kernel/machvec.h
+++ b/arch/alpha/kernel/machvec.h
@@ -36,7 +36,6 @@
 
 #define DO_EV4_MMU							\
 	max_asn:			EV4_MAX_ASN,			\
-	mmu_context_mask:		~0UL,				\
 	mv_get_mmu_context:		ev4_get_mmu_context,		\
 	mv_flush_tlb_current:		ev4_flush_tlb_current,		\
 	mv_flush_tlb_other:		ev4_flush_tlb_other,		\
@@ -44,7 +43,6 @@
 
 #define DO_EV5_MMU							\
 	max_asn:			EV5_MAX_ASN,			\
-	mmu_context_mask:		~0UL,				\
 	mv_get_mmu_context:		ev5_get_mmu_context,		\
 	mv_flush_tlb_current:		ev5_flush_tlb_current,		\
 	mv_flush_tlb_other:		ev5_flush_tlb_other,		\
@@ -52,7 +50,6 @@
 
 #define DO_EV6_MMU							\
 	max_asn:			EV6_MAX_ASN,			\
-	mmu_context_mask:		0xfffffffffful,			\
 	mv_get_mmu_context:		ev5_get_mmu_context,		\
 	mv_flush_tlb_current:		ev5_flush_tlb_current,		\
 	mv_flush_tlb_other:		ev5_flush_tlb_other,		\
diff --git a/arch/alpha/kernel/process.c b/arch/alpha/kernel/process.c
index 1993ed3b41eb..ecd4387aec2f 100644
--- a/arch/alpha/kernel/process.c
+++ b/arch/alpha/kernel/process.c
@@ -329,7 +329,7 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long usp,
 	p->tss.ksp = (unsigned long) childstack;
 	p->tss.pal_flags = 1;	/* set FEN, clear everything else */
 	p->tss.flags = current->tss.flags;
-	p->mm->context = 0;
+	p->tss.mm_context = p->tss.asn = 0;
 
 	return 0;
 }
diff --git a/arch/alpha/kernel/proto.h b/arch/alpha/kernel/proto.h
index f7e54a982c56..8fa1fd7ea766 100644
--- a/arch/alpha/kernel/proto.h
+++ b/arch/alpha/kernel/proto.h
@@ -180,7 +180,7 @@ extern unsigned long alpha_read_fp_reg (unsigned long reg);
 extern void wrmces(unsigned long mces);
 extern void cserve_ena(unsigned long);
 extern void cserve_dis(unsigned long);
-extern void __smp_callin(void);
+extern void __smp_callin(unsigned long);
 
 /* entry.S */
 extern void entArith(void);
diff --git a/arch/alpha/kernel/setup.c b/arch/alpha/kernel/setup.c
index 80b4454e1820..a808a8149a12 100644
--- a/arch/alpha/kernel/setup.c
+++ b/arch/alpha/kernel/setup.c
@@ -106,6 +106,7 @@ WEAK(alcor_mv);
 WEAK(alphabook1_mv);
 WEAK(avanti_mv);
 WEAK(cabriolet_mv);
+WEAK(clipper_mv);
 WEAK(dp264_mv);
 WEAK(eb164_mv);
 WEAK(eb64p_mv);
@@ -330,6 +331,10 @@ find_end_memory(void)
 
 	/* Round it up to an even number of pages. */
 	high = (high + PAGE_SIZE) & (PAGE_MASK*2);
+
+	/* Enforce maximum of 2GB even if there is more.  Blah.  */
+	if (high > 0x80000000UL)
+		high = 0x80000000UL;
 	return PAGE_OFFSET + high;
 }
 
@@ -448,11 +453,11 @@ get_sysvec(long type, long variation, long cpu)
 	static struct alpha_machine_vector *tsunami_vecs[]  __initlocaldata =
 	{
 		NULL,
-		&dp264_mv,		/* dp164 */
+		&dp264_mv,		/* dp264 */
 		&dp264_mv,		/* warhol */
 		&dp264_mv,		/* windjammer */
 		&monet_mv,		/* monet */
-		&dp264_mv,		/* clipper */
+		&clipper_mv,		/* clipper */
 		&dp264_mv,		/* goldrush */
 		&webbrick_mv,		/* webbrick */
 		&dp264_mv,		/* catamaran */
@@ -537,6 +542,7 @@ get_sysvec_byname(const char *name)
 		&alphabook1_mv,
 		&avanti_mv,
 		&cabriolet_mv,
+		&clipper_mv,
 		&dp264_mv,
 		&eb164_mv,
 		&eb64p_mv,
diff --git a/arch/alpha/kernel/smp.c b/arch/alpha/kernel/smp.c
index f01c0e55d632..e97021869ac7 100644
--- a/arch/alpha/kernel/smp.c
+++ b/arch/alpha/kernel/smp.c
@@ -95,6 +95,8 @@ static inline void __init
 smp_store_cpu_info(int cpuid)
 {
 	cpu_data[cpuid].loops_per_sec = loops_per_sec;
+	cpu_data[cpuid].last_asn
+	  = (cpuid << WIDTH_HARDWARE_ASN) + ASN_FIRST_VERSION;
 }
 
 /*
@@ -151,8 +153,8 @@ smp_callin(void)
 	while (!smp_threads_ready)
 		barrier();
 
-	printk(KERN_INFO "SMP: commencing CPU %d current %p\n",
-	       cpuid, current);
+	DBGS(("smp_callin: commencing CPU %d current %p\n",
+	      cpuid, current));
 
 	/* Do nothing.  */
 	cpu_idle(NULL);
@@ -293,9 +295,9 @@ recv_secondary_console_msg(void)
 		   + hwrpb->processor_offset
 		   + i * hwrpb->processor_size);
 
- 		printk(KERN_INFO "recv_secondary_console_msg: on %d from %d"
-		       " HALT_REASON 0x%lx FLAGS 0x%lx\n",
-		       mycpu, i, cpu->halt_reason, cpu->flags);
+ 		DBGS(("recv_secondary_console_msg: on %d from %d"
+		      " HALT_REASON 0x%lx FLAGS 0x%lx\n",
+		      mycpu, i, cpu->halt_reason, cpu->flags));
 
 		cnt = cpu->ipc_buffer[0] >> 32;
 		if (cnt <= 0 || cnt >= 80)
@@ -790,6 +792,11 @@ handle_ipi(struct pt_regs *regs)
 void
 smp_send_reschedule(int cpu)
 {
+#if DEBUG_IPI_MSG
+	if (cpu == hard_smp_processor_id())
+		printk(KERN_WARNING
+		       "smp_send_reschedule: Sending IPI to self.\n");
+#endif
 	send_ipi_message(1L << cpu, IPI_RESCHEDULE);
 }
 
@@ -797,6 +804,10 @@ void
 smp_send_stop(void)
 {
 	unsigned long to_whom = cpu_present_mask ^ (1L << smp_processor_id());
+#if DEBUG_IPI_MSG
+	if (hard_smp_processor_id() != boot_cpu_id)
+		printk(KERN_WARNING "smp_send_stop: Not on boot cpu.\n");
+#endif
 	send_ipi_message(to_whom, IPI_CPU_STOP);
 }
 
@@ -862,13 +873,13 @@ ipi_flush_tlb_all(void *ignored)
 void
 flush_tlb_all(void)
 {
-	tbia();
-
 	/* Although we don't have any data to pass, we do want to
 	   synchronize with the other processors.  */
 	if (smp_call_function(ipi_flush_tlb_all, NULL, 1, 1)) {
 		printk(KERN_CRIT "flush_tlb_all: timed out\n");
 	}
+
+	tbia();
 }
 
 static void
@@ -948,43 +959,21 @@ smp_info(char *buffer)
 
 
 #if DEBUG_SPINLOCK
-
-#ifdef MANAGE_SPINLOCK_IPL
-
-static inline long 
-spinlock_raise_ipl(spinlock_t * lock)
-{
- 	long min_ipl = lock->target_ipl;
-	long last_ipl = swpipl(7);
-	if (last_ipl < 7 && min_ipl < 7)
-		setipl(min_ipl < last_ipl ? last_ipl : min_ipl);
-	return last_ipl;
-}
-
-static inline void
-spinlock_restore_ipl(long prev)
-{
-	setipl(prev);
-}
-
-#else
-
-#define spinlock_raise_ipl(LOCK)	((void)(LOCK), 0)
-#define spinlock_restore_ipl(PREV)	((void)(PREV))
-
-#endif /* MANAGE_SPINLOCK_IPL */
-
 void
 spin_unlock(spinlock_t * lock)
 {
-	long old_ipl = lock->saved_ipl;
 	mb();
 	lock->lock = 0;
-	spinlock_restore_ipl(old_ipl);
+
+	lock->on_cpu = -1;
+	lock->previous = NULL;
+	lock->task = NULL;
+	lock->base_file = "none";
+	lock->line_no = 0;
 }
 
 void
-spin_lock(spinlock_t * lock)
+debug_spin_lock(spinlock_t * lock, const char *base_file, int line_no)
 {
 	long tmp;
 	long stuck;
@@ -992,7 +981,6 @@ spin_lock(spinlock_t * lock)
 	unsigned long started = jiffies;
 	int printed = 0;
 	int cpu = smp_processor_id();
-	long old_ipl = spinlock_raise_ipl(lock);
 
 	stuck = 1L << 28;
  try_again:
@@ -1020,39 +1008,43 @@ spin_lock(spinlock_t * lock)
 
 	if (stuck < 0) {
 		printk(KERN_WARNING
-		       "spinlock stuck at %p(%d) owner %s at %p(%d) st %ld\n",
-		       inline_pc, cpu, lock->task->comm, lock->previous,
-		       lock->task->processor, lock->task->state);
+		       "%s:%d spinlock stuck in %s at %p(%d)"
+		       " owner %s at %p(%d) %s:%d\n",
+		       base_file, line_no,
+		       current->comm, inline_pc, cpu,
+		       lock->task->comm, lock->previous,
+		       lock->on_cpu, lock->base_file, lock->line_no);
 		stuck = 1L << 36;
 		printed = 1;
 		goto try_again;
 	}
 
 	/* Exiting.  Got the lock.  */
-	lock->saved_ipl = old_ipl;
 	lock->on_cpu = cpu;
 	lock->previous = inline_pc;
 	lock->task = current;
+	lock->base_file = base_file;
+	lock->line_no = line_no;
 
 	if (printed) {
-		printk(KERN_WARNING "spinlock grabbed at %p(%d) %ld ticks\n",
-		       inline_pc, cpu, jiffies - started);
+		printk(KERN_WARNING
+		       "%s:%d spinlock grabbed in %s at %p(%d) %ld ticks\n",
+		       base_file, line_no, current->comm, inline_pc,
+		       cpu, jiffies - started);
 	}
 }
 
 int
-spin_trylock(spinlock_t * lock)
+debug_spin_trylock(spinlock_t * lock, const char *base_file, int line_no)
 {
-	long old_ipl = spinlock_raise_ipl(lock);
 	int ret;
 	if ((ret = !test_and_set_bit(0, lock))) {
-		mb();
-		lock->saved_ipl = old_ipl;
 		lock->on_cpu = smp_processor_id();
 		lock->previous = __builtin_return_address(0);
 		lock->task = current;
 	} else {
-		spinlock_restore_ipl(old_ipl);
+		lock->base_file = base_file;
+		lock->line_no = line_no;
 	}
 	return ret;
 }
diff --git a/arch/alpha/kernel/sys_dp264.c b/arch/alpha/kernel/sys_dp264.c
index c464c37ec68c..f465b3b4ef22 100644
--- a/arch/alpha/kernel/sys_dp264.c
+++ b/arch/alpha/kernel/sys_dp264.c
@@ -2,8 +2,8 @@
  *	linux/arch/alpha/kernel/sys_dp264.c
  *
  *	Copyright (C) 1995 David A Rusling
- *	Copyright (C) 1996 Jay A Estabrook
- *	Copyright (C) 1998 Richard Henderson
+ *	Copyright (C) 1996, 1999 Jay A Estabrook
+ *	Copyright (C) 1998, 1999 Richard Henderson
  *
  * Code supporting the DP264 (EV6+TSUNAMI).
  */
@@ -35,7 +35,7 @@
 #define dev2hose(d) (bus2hose[(d)->bus->number]->pci_hose_index)
 
 /*
- * HACK ALERT! only CPU#0 is used currently
+ * HACK ALERT! only the boot cpu is used for interrupts.
  */
 
 static void
@@ -65,35 +65,61 @@ dp264_update_irq_hw(unsigned long irq, unsigned long mask, int unmask_p)
 		outb(mask, 0x21);	/* ISA PIC1 */
 }
 
+static void
+clipper_update_irq_hw(unsigned long irq, unsigned long mask, int unmask_p)
+{
+	if (irq >= 16) {
+		volatile unsigned long *csr;
+
+		if (TSUNAMI_bootcpu < 2)
+			if (!TSUNAMI_bootcpu)
+				csr = &TSUNAMI_cchip->dim0.csr;
+			else
+				csr = &TSUNAMI_cchip->dim1.csr;
+		else
+			if (TSUNAMI_bootcpu == 2)
+				csr = &TSUNAMI_cchip->dim2.csr;
+			else
+				csr = &TSUNAMI_cchip->dim3.csr;
+		
+		*csr = (~mask >> 16) | (1UL << 55); /* master ISA enable */
+		mb();
+		*csr;
+	}
+	else if (irq >= 8)
+		outb(mask >> 8, 0xA1);	/* ISA PIC2 */
+	else
+		outb(mask, 0x21);	/* ISA PIC1 */
+}
+
 static void
 dp264_device_interrupt(unsigned long vector, struct pt_regs * regs)
 {
 #if 1
 	printk("dp264_device_interrupt: NOT IMPLEMENTED YET!! \n");
 #else
-        unsigned long pld;
-        unsigned int i;
-
-        /* Read the interrupt summary register of TSUNAMI */
-        pld = TSUNAMI_cchip->dir0.csr;
-
-        /*
-         * Now for every possible bit set, work through them and call
-         * the appropriate interrupt handler.
-         */
-        while (pld) {
-                i = ffz(~pld);
-                pld &= pld - 1; /* clear least bit set */
-                if (i == 55) {
-                        isa_device_interrupt(vector, regs);
-		} else { /* if not timer int */
-                        handle_irq(16 + i, 16 + i, regs);
-                }
+	unsigned long pld;
+	unsigned int i;
+
+	/* Read the interrupt summary register of TSUNAMI */
+	pld = TSUNAMI_cchip->dir0.csr;
+
+	/*
+	 * Now for every possible bit set, work through them and call
+	 * the appropriate interrupt handler.
+	 */
+	while (pld) {
+		i = ffz(~pld);
+		pld &= pld - 1; /* clear least bit set */
+		if (i == 55)
+			isa_device_interrupt(vector, regs);
+		else
+			handle_irq(16 + i, 16 + i, regs);
 #if 0
 		TSUNAMI_cchip->dir0.csr = 1UL << i; mb();
 		tmp = TSUNAMI_cchip->dir0.csr;
 #endif
-        }
+	}
 #endif
 }
 
@@ -104,24 +130,48 @@ dp264_srm_device_interrupt(unsigned long vector, struct pt_regs * regs)
 
 	ack = irq = (vector - 0x800) >> 4;
 
-        /*
-         * The DP264 SRM console reports PCI interrupts with a vector
-	 * 0x100 *higher* than one might expect, as PCI IRQ 0 (ie bit 0)
-	 * shows up as IRQ 16, etc, etc. We adjust it down by 16 to have
-	 * it line up with the actual bit numbers from the DIM registers,
-	 * which is how we manage the interrupts/mask. Sigh...
-         */
-        if (irq >= 32)
-                ack = irq = irq - 16;
+	/*
+	 * The SRM console reports PCI interrupts with a vector calculated by:
+	 *
+	 *	0x900 + (0x10 * DRIR-bit)
+	 *
+	 * So bit 16 shows up as IRQ 32, etc.
+	 * 
+	 * On DP264/BRICK/MONET, we adjust it down by 16 because at least
+	 * that many of the low order bits of the DRIR are not used, and
+	 * so we don't count them.
+	 */
+	if (irq >= 32)
+		ack = irq = irq - 16;
+
+	handle_irq(irq, ack, regs);
+}
+
+static void 
+clipper_srm_device_interrupt(unsigned long vector, struct pt_regs * regs)
+{
+	int irq, ack;
+
+	ack = irq = (vector - 0x800) >> 4;
 
+	/*
+	 * The SRM console reports PCI interrupts with a vector calculated by:
+	 *
+	 *	0x900 + (0x10 * DRIR-bit)
+	 *
+	 * So bit 16 shows up as IRQ 32, etc.
+	 * 
+	 * CLIPPER uses bits 8-47 for PCI interrupts, so we do not need
+	 * to scale down the vector reported, we just use it.
+	 *
+	 * Eg IRQ 24 is DRIR bit 8, etc, etc
+	 */
 	handle_irq(irq, ack, regs);
 }
 
 static void __init
 dp264_init_irq(void)
 {
-	volatile unsigned long *csr;
-
 	outb(0, DMA1_RESET_REG);
 	outb(0, DMA2_RESET_REG);
 	outb(DMA_MODE_CASCADE, DMA2_MODE_REG);
@@ -130,23 +180,26 @@ dp264_init_irq(void)
 	if (alpha_using_srm)
 		alpha_mv.device_interrupt = dp264_srm_device_interrupt;
 
-	if (TSUNAMI_bootcpu < 2)
-		if (!TSUNAMI_bootcpu)
-			csr = &TSUNAMI_cchip->dim0.csr;
-		else
-			csr = &TSUNAMI_cchip->dim1.csr;
-	else
-		if (TSUNAMI_bootcpu == 2)
-			csr = &TSUNAMI_cchip->dim2.csr;
-		else
-			csr = &TSUNAMI_cchip->dim3.csr;
-		
-	/* Note invert on MASK bits.  */
-        *csr = ~(alpha_irq_mask);
-	mb();
-        *csr;
+	dp264_update_irq_hw(16, alpha_irq_mask, 0);
 
-        enable_irq(55);     /* Enable CYPRESS interrupt controller (ISA).  */
+        enable_irq(55);     /* Enable ISA interrupt controller.  */
+	enable_irq(2);
+}
+
+static void __init
+clipper_init_irq(void)
+{
+	outb(0, DMA1_RESET_REG);
+	outb(0, DMA2_RESET_REG);
+	outb(DMA_MODE_CASCADE, DMA2_MODE_REG);
+	outb(0, DMA2_MASK_REG);
+
+	if (alpha_using_srm)
+		alpha_mv.device_interrupt = clipper_srm_device_interrupt;
+
+	clipper_update_irq_hw(16, alpha_irq_mask, 0);
+
+        enable_irq(55);     /* Enable ISA interrupt controller.  */
 	enable_irq(2);
 }
 
@@ -221,7 +274,7 @@ dp264_map_irq(struct pci_dev *dev, int slot, int pin)
 	const long min_idsel = 5, max_idsel = 10, irqs_per_slot = 5;
 	int irq = COMMON_TABLE_LOOKUP;
 
-	if (irq >= 0)
+	if (irq > 0)
 		irq += 16 * dev2hose(dev);
 
 	return irq;
@@ -250,42 +303,38 @@ monet_map_irq(struct pci_dev *dev, int slot, int pin)
 		{    32,    32,    33,    34,    35}, /* IdSel 13 slot 3 PCI0*/
 		{    28,    28,    29,    30,    31}, /* IdSel 14 slot 4 PCI2*/
 		{    24,    24,    25,    26,    27}  /* IdSel 15 slot 5 PCI2*/
-};
+	};
 	const long min_idsel = 3, max_idsel = 15, irqs_per_slot = 5;
-	int irq = COMMON_TABLE_LOOKUP;
-
-	return irq;
+	return COMMON_TABLE_LOOKUP;
 }
 
 static int __init
 monet_swizzle(struct pci_dev *dev, int *pinp)
 {
-        int slot, pin = *pinp;
-
-        /* Check first for the built-in bridge on hose 1. */
-        if (dev2hose(dev) == 1 && PCI_SLOT(dev->bus->self->devfn) == 8) {
-	  slot = PCI_SLOT(dev->devfn);
-        }
-        else
-        {
-                /* Must be a card-based bridge.  */
-                do {
+	int slot, pin = *pinp;
+
+	/* Check first for the built-in bridge on hose 1. */
+	if (dev2hose(dev) == 1 && PCI_SLOT(dev->bus->self->devfn) == 8) {
+		slot = PCI_SLOT(dev->devfn);
+	} else {
+		/* Must be a card-based bridge.  */
+		do {
 			/* Check for built-in bridge on hose 1. */
-                        if (dev2hose(dev) == 1 &&
+			if (dev2hose(dev) == 1 &&
 			    PCI_SLOT(dev->bus->self->devfn) == 8) {
 				slot = PCI_SLOT(dev->devfn);
 				break;
-                        }
-                        pin = bridge_swizzle(pin, PCI_SLOT(dev->devfn)) ;
-
-                        /* Move up the chain of bridges.  */
-                        dev = dev->bus->self;
-                        /* Slot of the next bridge.  */
-                        slot = PCI_SLOT(dev->devfn);
-                } while (dev->bus->self);
-        }
-        *pinp = pin;
-        return slot;
+			}
+			pin = bridge_swizzle(pin, PCI_SLOT(dev->devfn)) ;
+
+			/* Move up the chain of bridges.  */
+			dev = dev->bus->self;
+			/* Slot of the next bridge.  */
+			slot = PCI_SLOT(dev->devfn);
+		} while (dev->bus->self);
+	}
+	*pinp = pin;
+	return slot;
 }
 
 static int __init
@@ -300,14 +349,34 @@ webbrick_map_irq(struct pci_dev *dev, int slot, int pin)
 		{    30,    30,    30,    30,    30}, /* IdSel 11 21143 #2 */
 		{    -1,    -1,    -1,    -1,    -1}, /* IdSel 12 unused */
 		{    -1,    -1,    -1,    -1,    -1}, /* IdSel 13 unused */
-		{    47,    47,    46,    45,    44}, /* IdSel 14 slot 0 */
+		{    35,    35,    34,    33,    32}, /* IdSel 14 slot 0 */
 		{    39,    39,    38,    37,    36}, /* IdSel 15 slot 1 */
 		{    43,    43,    42,    41,    40}, /* IdSel 16 slot 2 */
-		{    35,    35,    34,    33,    32}, /* IdSel 17 slot 3 */
-};
+		{    47,    47,    46,    45,    44}, /* IdSel 17 slot 3 */
+	};
 	const long min_idsel = 7, max_idsel = 17, irqs_per_slot = 5;
+	return COMMON_TABLE_LOOKUP;
+}
+
+static int __init
+clipper_map_irq(struct pci_dev *dev, int slot, int pin)
+{
+	static char irq_tab[7][5] __initlocaldata = {
+		/*INT    INTA   INTB   INTC   INTD */
+		{ 16+ 8, 16+ 8, 16+ 9, 16+10, 16+11}, /* IdSel 1 slot 1 */
+		{ 16+12, 16+12, 16+13, 16+14, 16+15}, /* IdSel 2 slot 2 */
+		{ 16+16, 16+16, 16+17, 16+18, 16+19}, /* IdSel 3 slot 3 */
+		{ 16+20, 16+20, 16+21, 16+22, 16+23}, /* IdSel 4 slot 4 */
+		{ 16+24, 16+24, 16+25, 16+26, 16+27}, /* IdSel 5 slot 5 */
+		{ 16+28, 16+28, 16+29, 16+30, 16+31}, /* IdSel 6 slot 6 */
+		{    -1,    -1,    -1,    -1,    -1}  /* IdSel 7 ISA Bridge */
+	};
+	const long min_idsel = 1, max_idsel = 7, irqs_per_slot = 5;
 	int irq = COMMON_TABLE_LOOKUP;
 
+	if (irq > 0)
+		irq += 16 * dev2hose(dev);
+
 	return irq;
 }
 
@@ -336,6 +405,13 @@ webbrick_pci_fixup(void)
 	SMC669_Init(0);
 }
 
+static void __init
+clipper_pci_fixup(void)
+{
+	layout_all_busses(DEFAULT_IO_BASE, DEFAULT_MEM_BASE);
+	common_pci_fixup(clipper_map_irq, common_swizzle);
+}
+
 
 /*
  * The System Vectors
@@ -407,5 +483,28 @@ struct alpha_machine_vector webbrick_mv __initmv = {
 	pci_fixup:		webbrick_pci_fixup,
 	kill_arch:		generic_kill_arch,
 };
-/* No alpha_mv alias for webbrick, since we compile it in unconditionally
-   with DP264; setup_arch knows how to cope.  */
+
+struct alpha_machine_vector clipper_mv __initmv = {
+	vector_name:		"Clipper",
+	DO_EV6_MMU,
+	DO_DEFAULT_RTC,
+	DO_TSUNAMI_IO,
+	DO_TSUNAMI_BUS,
+	machine_check:		tsunami_machine_check,
+	max_dma_address:	ALPHA_MAX_DMA_ADDRESS,
+
+	nr_irqs:		64,
+	irq_probe_mask:		_PROBE_MASK(64),
+	update_irq_hw:		clipper_update_irq_hw,
+	ack_irq:		generic_ack_irq,
+	device_interrupt:	dp264_device_interrupt,
+
+	init_arch:		tsunami_init_arch,
+	init_irq:		clipper_init_irq,
+	init_pit:		generic_init_pit,
+	pci_fixup:		clipper_pci_fixup,
+	kill_arch:		generic_kill_arch,
+};
+
+/* No alpha_mv alias for webbrick/monet/clipper, since we compile them
+   in unconditionally with DP264; setup_arch knows how to cope.  */
diff --git a/arch/alpha/kernel/sys_takara.c b/arch/alpha/kernel/sys_takara.c
index 95adc3b39db2..c554a9fa1a07 100644
--- a/arch/alpha/kernel/sys_takara.c
+++ b/arch/alpha/kernel/sys_takara.c
@@ -3,7 +3,7 @@
  *
  *	Copyright (C) 1995 David A Rusling
  *	Copyright (C) 1996 Jay A Estabrook
- *	Copyright (C) 1998 Richard Henderson
+ *	Copyright (C) 1998, 1999 Richard Henderson
  *
  * Code supporting the TAKARA.
  */
@@ -30,11 +30,21 @@
 #include "machvec.h"
 
 
-/*
- * WARNING WARNING WARNING
- *
- * This port is missing an update_irq_hw implementation.
- */
+static void 
+takara_update_irq_hw(unsigned long irq, unsigned long mask, int unmask_p)
+{
+	unsigned int regaddr;
+
+	if (irq <= 15) {
+		if (irq <= 7)
+			outb(mask, 0x21);	/* ISA PIC1 */
+		else
+			outb(mask >> 8, 0xA1);	/* ISA PIC2 */
+	} else if (irq <= 31) {
+		regaddr = 0x510 + ((irq - 16) & 0x0c);
+		outl((mask >> ((irq - 16) & 0x0c)) & 0xf0000Ul, regaddr);
+	}
+}
 
 static void
 takara_device_interrupt(unsigned long vector, struct pt_regs *regs)
@@ -68,28 +78,45 @@ takara_device_interrupt(unsigned long vector, struct pt_regs *regs)
 		if (intstatus & 4) handle_irq(16+2, 16+2, regs);
 		if (intstatus & 2) handle_irq(16+1, 16+1, regs);
 		if (intstatus & 1) handle_irq(16+0, 16+0, regs);
-	} else
+	} else {
 		isa_device_interrupt (vector, regs);
+	}
+}
+
+static void 
+takara_srm_device_interrupt(unsigned long vector, struct pt_regs * regs)
+{
+	int irq = (vector - 0x800) >> 4;
+
+	if (irq > 15)
+		irq = ((vector - 0x800) >> 6) + 12;
+	
+	handle_irq(irq, irq, regs);
 }
 
 static void __init
 takara_init_irq(void)
 {
-	unsigned int ctlreg;
-
 	STANDARD_INIT_IRQ_PROLOG;
 
-	ctlreg = inl(0x500);
-	ctlreg &= ~0x8000;     /* return to non-accelerated mode */
-	outw(ctlreg >> 16, 0x502);
-	outw(ctlreg & 0xFFFF, 0x500);
-	ctlreg = 0x05107c00;   /* enable the PCI interrupt register */
-	outw(ctlreg >> 16, 0x502);
-	outw(ctlreg & 0xFFFF, 0x500);
+	if (alpha_using_srm)
+		alpha_mv.device_interrupt = takara_srm_device_interrupt;
+
+	if (!alpha_using_srm) {
+		unsigned int ctlreg = inl(0x500);
+
+		/* Return to non-accelerated mode.  */
+		ctlreg &= ~0x8000;
+		outl(ctlreg, 0x500);
+
+		/* Enable the PCI interrupt register.  */
+		ctlreg = 0x05107c00;
+		outl(ctlreg, 0x500);
+	}
+
 	enable_irq(2);
 }
 
-
 /*
  * The Takara has PCI devices 1, 2, and 3 configured to slots 20,
  * 19, and 18 respectively, in the default configuration. They can
@@ -123,12 +150,35 @@ takara_map_irq(struct pci_dev *dev, int slot, int pin)
 	return COMMON_TABLE_LOOKUP;
 }
 
+static int __init
+takara_swizzle(struct pci_dev *dev, int *pinp)
+{
+	int slot = PCI_SLOT(dev->devfn);
+	int pin = *pinp;
+	unsigned int ctlreg = inl(0x500);
+	unsigned int busslot = PCI_SLOT(dev->bus->self->devfn);
+
+	/* Check first for built-in bridges.  */
+	if (busslot > 16 && ((1<<(36-busslot)) & ctlreg)) {
+		if (pin == 1)
+			pin += (20 - busslot);
+		else {
+			/* Must be a card-based bridge.  */
+			printk(KERN_WARNING "takara_swizzle: cannot handle "
+			       "card-bridge behind builtin bridge yet.\n");
+		}
+	}
+
+	*pinp = pin;
+	return slot;
+}
+
 static void __init
 takara_pci_fixup(void)
 {
 	layout_all_busses(DEFAULT_IO_BASE, DEFAULT_MEM_BASE);
-	common_pci_fixup(takara_map_irq, common_swizzle);
-	enable_ide(0x26e);
+	common_pci_fixup(takara_map_irq, takara_swizzle);
+	/* enable_ide(0x26e); */
 }
 
 
@@ -147,7 +197,7 @@ struct alpha_machine_vector takara_mv __initmv = {
 
 	nr_irqs:		20,
 	irq_probe_mask:		_PROBE_MASK(20),
-	update_irq_hw:		NULL,
+	update_irq_hw:		takara_update_irq_hw,
 	ack_irq:		generic_ack_irq,
 	device_interrupt:	takara_device_interrupt,
 
diff --git a/arch/alpha/kernel/time.c b/arch/alpha/kernel/time.c
index 927ca201f503..e7f32dc9a7a5 100644
--- a/arch/alpha/kernel/time.c
+++ b/arch/alpha/kernel/time.c
@@ -1,7 +1,7 @@
 /*
  *  linux/arch/alpha/kernel/time.c
  *
- *  Copyright (C) 1991, 1992, 1995  Linus Torvalds
+ *  Copyright (C) 1991, 1992, 1995, 1999  Linus Torvalds
  *
  * This file contains the PC-specific time handling details:
  * reading the RTC at bootup, etc..
@@ -43,7 +43,7 @@
 #include "irq.h"
 
 extern rwlock_t xtime_lock;
-extern volatile unsigned long lost_ticks;	/*kernel/sched.c*/
+extern volatile unsigned long lost_ticks;	/* kernel/sched.c */
 
 static int set_rtc_mmss(unsigned long);
 
diff --git a/arch/alpha/mm/fault.c b/arch/alpha/mm/fault.c
index e991fdf833a9..5cf6e4ab0670 100644
--- a/arch/alpha/mm/fault.c
+++ b/arch/alpha/mm/fault.c
@@ -7,6 +7,7 @@
 #include <linux/sched.h>
 #include <linux/kernel.h>
 #include <linux/mm.h>
+#include <asm/io.h>
 
 #define __EXTERN_INLINE inline
 #include <asm/mmu_context.h>
@@ -28,66 +29,23 @@
 extern void die_if_kernel(char *,struct pt_regs *,long, unsigned long *);
 
 
-#ifdef __SMP__
-unsigned long last_asn[NR_CPUS] = { /* gag */
-  ASN_FIRST_VERSION +  (0 << WIDTH_HARDWARE_ASN),
-  ASN_FIRST_VERSION +  (1 << WIDTH_HARDWARE_ASN),
-  ASN_FIRST_VERSION +  (2 << WIDTH_HARDWARE_ASN),
-  ASN_FIRST_VERSION +  (3 << WIDTH_HARDWARE_ASN),
-  ASN_FIRST_VERSION +  (4 << WIDTH_HARDWARE_ASN),
-  ASN_FIRST_VERSION +  (5 << WIDTH_HARDWARE_ASN),
-  ASN_FIRST_VERSION +  (6 << WIDTH_HARDWARE_ASN),
-  ASN_FIRST_VERSION +  (7 << WIDTH_HARDWARE_ASN),
-  ASN_FIRST_VERSION +  (8 << WIDTH_HARDWARE_ASN),
-  ASN_FIRST_VERSION +  (9 << WIDTH_HARDWARE_ASN),
-  ASN_FIRST_VERSION + (10 << WIDTH_HARDWARE_ASN),
-  ASN_FIRST_VERSION + (11 << WIDTH_HARDWARE_ASN),
-  ASN_FIRST_VERSION + (12 << WIDTH_HARDWARE_ASN),
-  ASN_FIRST_VERSION + (13 << WIDTH_HARDWARE_ASN),
-  ASN_FIRST_VERSION + (14 << WIDTH_HARDWARE_ASN),
-  ASN_FIRST_VERSION + (15 << WIDTH_HARDWARE_ASN),
-  ASN_FIRST_VERSION + (16 << WIDTH_HARDWARE_ASN),
-  ASN_FIRST_VERSION + (17 << WIDTH_HARDWARE_ASN),
-  ASN_FIRST_VERSION + (18 << WIDTH_HARDWARE_ASN),
-  ASN_FIRST_VERSION + (19 << WIDTH_HARDWARE_ASN),
-  ASN_FIRST_VERSION + (20 << WIDTH_HARDWARE_ASN),
-  ASN_FIRST_VERSION + (21 << WIDTH_HARDWARE_ASN),
-  ASN_FIRST_VERSION + (22 << WIDTH_HARDWARE_ASN),
-  ASN_FIRST_VERSION + (23 << WIDTH_HARDWARE_ASN),
-  ASN_FIRST_VERSION + (24 << WIDTH_HARDWARE_ASN),
-  ASN_FIRST_VERSION + (25 << WIDTH_HARDWARE_ASN),
-  ASN_FIRST_VERSION + (26 << WIDTH_HARDWARE_ASN),
-  ASN_FIRST_VERSION + (27 << WIDTH_HARDWARE_ASN),
-  ASN_FIRST_VERSION + (28 << WIDTH_HARDWARE_ASN),
-  ASN_FIRST_VERSION + (29 << WIDTH_HARDWARE_ASN),
-  ASN_FIRST_VERSION + (30 << WIDTH_HARDWARE_ASN),
-  ASN_FIRST_VERSION + (31 << WIDTH_HARDWARE_ASN)
-};
-#else
-unsigned long asn_cache = ASN_FIRST_VERSION;
-#endif /* __SMP__ */
-
 /*
- * Select a new ASN for a task.
+ * Force a new ASN for a task.
  */
 
+#ifndef __SMP__
+unsigned long last_asn = ASN_FIRST_VERSION;
+#endif
+
 void
 get_new_mmu_context(struct task_struct *p, struct mm_struct *mm)
 {
-	unsigned long asn = asn_cache;
-
-	if ((asn & HARDWARE_ASN_MASK) < MAX_ASN)
-		++asn;
-	else {
-		tbiap();
-		imb();
-		asn = (asn & ~HARDWARE_ASN_MASK) + ASN_FIRST_VERSION;
-	}
-	asn_cache = asn;
-	mm->context = asn;			/* full version + asn */
-	p->tss.asn = asn & HARDWARE_ASN_MASK;	/* just asn */
+	unsigned long new = __get_new_mmu_context(p, mm);
+	p->tss.mm_context = new;
+	p->tss.asn = new & HARDWARE_ASN_MASK;
 }
 
+
 /*
  * This routine handles page faults.  It determines the address,
  * and the problem, and then passes it off to handle_mm_fault().
diff --git a/arch/alpha/mm/init.c b/arch/alpha/mm/init.c
index fc5a964bbbbe..582cf78f7e1f 100644
--- a/arch/alpha/mm/init.c
+++ b/arch/alpha/mm/init.c
@@ -174,7 +174,7 @@ show_mem(void)
 
 extern unsigned long free_area_init(unsigned long, unsigned long);
 
-static struct thread_struct *
+static inline struct thread_struct *
 load_PCB(struct thread_struct * pcb)
 {
 	register unsigned long sp __asm__("$30");
diff --git a/fs/buffer.c b/fs/buffer.c
index 68a66b999d52..9ffb8556a28c 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -744,20 +744,6 @@ out:
 	return bh;
 }
 
-void set_writetime(struct buffer_head * buf, int flag)
-{
-	int newtime;
-
-	if (buffer_dirty(buf)) {
-		/* Move buffer to dirty list if jiffies is clear. */
-		newtime = jiffies + (flag ? bdf_prm.b_un.age_super : bdf_prm.b_un.age_buffer);
-		if (!buf->b_flushtime || buf->b_flushtime > newtime)
-			 buf->b_flushtime = newtime;
-	} else {
-		buf->b_flushtime = 0;
-	}
-}
-
 /*
  * Put a buffer into the appropriate list, without side-effects.
  */
@@ -796,7 +782,7 @@ void balance_dirty(kdev_t dev)
 
 static inline void __mark_dirty(struct buffer_head *bh, int flag)
 {
-	set_writetime(bh, flag);
+	bh->b_flushtime = jiffies + (flag ? bdf_prm.b_un.age_super : bdf_prm.b_un.age_buffer);
 	refile_buffer(bh);
 }
 
@@ -840,9 +826,6 @@ void refile_buffer(struct buffer_head * buf)
  */
 void __brelse(struct buffer_head * buf)
 {
-	/* If dirty, mark the time this buffer should be written back. */
-	set_writetime(buf, 0);
-	refile_buffer(buf);
 	touch_buffer(buf);
 
 	if (buf->b_count) {
@@ -1556,7 +1539,7 @@ int block_write_partial_page (struct file *file, struct page *page, unsigned lon
 		 * lots of dirty pages.
 		 */
 		if (!test_and_set_bit(BH_Dirty, &bh->b_state)) {
-			__atomic_mark_buffer_dirty(bh,0);
+			__atomic_mark_buffer_dirty(bh, bdf_prm.b_un.age_buffer);
 			if (too_many_dirty_buffers)
 				balance_dirty(bh->b_dev);
 		}
diff --git a/fs/ext2/truncate.c b/fs/ext2/truncate.c
index 75ffaa534690..b6f57efb0fac 100644
--- a/fs/ext2/truncate.c
+++ b/fs/ext2/truncate.c
@@ -158,7 +158,7 @@ out:
 }
 
 #define DATA_BUFFER_USED(bh) \
-	((bh->b_count > 1) || buffer_locked(bh))
+	(bh->b_count || buffer_locked(bh))
 
 static int trunc_direct (struct inode * inode)
 {
@@ -177,12 +177,11 @@ static int trunc_direct (struct inode * inode)
 
 		bh = find_buffer(inode->i_dev, tmp, inode->i_sb->s_blocksize);
 		if (bh) {
-			bh->b_count++;
 			if (DATA_BUFFER_USED(bh)) {
-				brelse(bh);
 				retry = 1;
 				continue;
 			}
+			bh->b_count++;
 		}
 
 		*p = 0;
@@ -254,12 +253,11 @@ static int trunc_indirect (struct inode * inode, int offset, u32 * p,
 		 */
 		bh = find_buffer(inode->i_dev, tmp, inode->i_sb->s_blocksize);
 		if (bh) {
-			bh->b_count++;
 			if (DATA_BUFFER_USED(bh)) {
-				brelse(bh);
 				retry = 1;
 				continue;
 			}
+			bh->b_count++;
 		}
 
 		*ind = 0;
diff --git a/include/asm-alpha/atomic.h b/include/asm-alpha/atomic.h
index 2dccf35217ec..67b74d3568d2 100644
--- a/include/asm-alpha/atomic.h
+++ b/include/asm-alpha/atomic.h
@@ -75,6 +75,7 @@ extern __inline__ long atomic_add_return(int i, atomic_t * v)
 	"	mov %0,%2\n"
 	"	stl_c %0,%1\n"
 	"	beq %0,2f\n"
+	"	mb\n"
 	".section .text2,\"ax\"\n"
 	"2:	br 1b\n"
 	".previous"
@@ -92,6 +93,7 @@ extern __inline__ long atomic_sub_return(int i, atomic_t * v)
 	"	mov %0,%2\n"
 	"	stl_c %0,%1\n"
 	"	beq %0,2f\n"
+	"	mb\n"
 	".section .text2,\"ax\"\n"
 	"2:	br 1b\n"
 	".previous"
diff --git a/include/asm-alpha/bitops.h b/include/asm-alpha/bitops.h
index adaf2fac2658..c9e7e7aee9e8 100644
--- a/include/asm-alpha/bitops.h
+++ b/include/asm-alpha/bitops.h
@@ -90,6 +90,7 @@ extern __inline__ unsigned long test_and_set_bit(unsigned long nr,
 	"	xor %0,%3,%0\n"
 	"	stl_c %0,%1\n"
 	"	beq %0,3f\n"
+	"	mb\n"
 	"2:\n"
 	".section .text2,\"ax\"\n"
 	"3:	br 1b\n"
@@ -114,6 +115,7 @@ extern __inline__ unsigned long test_and_clear_bit(unsigned long nr,
 	"	xor %0,%3,%0\n"
 	"	stl_c %0,%1\n"
 	"	beq %0,3f\n"
+	"	mb\n"
 	"2:\n"
 	".section .text2,\"ax\"\n"
 	"3:	br 1b\n"
@@ -137,6 +139,7 @@ extern __inline__ unsigned long test_and_change_bit(unsigned long nr,
 	"	xor %0,%3,%0\n"
 	"	stl_c %0,%1\n"
 	"	beq %0,3f\n"
+	"	mb\n"
 	".section .text2,\"ax\"\n"
 	"3:	br 1b\n"
 	".previous"
diff --git a/include/asm-alpha/init.h b/include/asm-alpha/init.h
index a85501cbbbbb..f343aecd3266 100644
--- a/include/asm-alpha/init.h
+++ b/include/asm-alpha/init.h
@@ -1,6 +1,7 @@
 #ifndef _ALPHA_INIT_H
 #define _ALPHA_INIT_H
 
+#ifndef MODULE
 #define __init __attribute__ ((__section__ (".text.init")))
 #define __initdata __attribute__ ((__section__ (".data.init")))
 #define __initfunc(__arginit) \
@@ -11,6 +12,7 @@
 #define __INIT		.section	.text.init,"ax"
 #define __FINIT		.previous
 #define __INITDATA	.section	.data.init,"a"
+#endif
 
 #define __cacheline_aligned __attribute__((__aligned__(32)))
 
diff --git a/include/asm-alpha/io.h b/include/asm-alpha/io.h
index f908f74640ad..5ba356f61438 100644
--- a/include/asm-alpha/io.h
+++ b/include/asm-alpha/io.h
@@ -29,15 +29,16 @@
  */
 static inline void __set_hae(unsigned long new_hae)
 {
-	unsigned long ipl = swpipl(7);
+	unsigned long flags;
+	__save_and_cli(flags);
 
 	alpha_mv.hae_cache = new_hae;
 	*alpha_mv.hae_register = new_hae;
 	mb();
-
 	/* Re-read to make sure it was written.  */
 	new_hae = *alpha_mv.hae_register;
-	setipl(ipl);
+
+	__restore_flags(flags);
 }
 
 static inline void set_hae(unsigned long new_hae)
diff --git a/include/asm-alpha/machvec.h b/include/asm-alpha/machvec.h
index 035ffa4e2a94..587fa8a3e556 100644
--- a/include/asm-alpha/machvec.h
+++ b/include/asm-alpha/machvec.h
@@ -32,7 +32,6 @@ struct alpha_machine_vector
 	int rtc_port;
 	int max_asn;
 	unsigned long max_dma_address;
-	unsigned long mmu_context_mask;
 	unsigned long irq_probe_mask;
 	unsigned long iack_sc;
 
diff --git a/include/asm-alpha/mmu_context.h b/include/asm-alpha/mmu_context.h
index 03aa3c6d2861..7b800d156329 100644
--- a/include/asm-alpha/mmu_context.h
+++ b/include/asm-alpha/mmu_context.h
@@ -49,31 +49,24 @@
 # endif
 #endif
 
-#ifdef __SMP__
-#define WIDTH_THIS_PROCESSOR	5
 /*
- * last_asn[processor]:
+ * cpu_last_asn(processor):
  * 63                                            0
  * +-------------+----------------+--------------+
  * | asn version | this processor | hardware asn |
  * +-------------+----------------+--------------+
  */
-extern unsigned long last_asn[];
-#define asn_cache last_asn[p->processor]
 
+#ifdef __SMP__
+#include <asm/smp.h>
+#define cpu_last_asn(cpuid)	(cpu_data[cpuid].last_asn)
 #else
-#define WIDTH_THIS_PROCESSOR	0
-/*
- * asn_cache:
- * 63                                            0
- * +------------------------------+--------------+
- * |         asn version          | hardware asn |
- * +------------------------------+--------------+
- */
-extern unsigned long asn_cache;
+extern unsigned long last_asn;
+#define cpu_last_asn(cpuid)	last_asn
 #endif /* __SMP__ */
 
 #define WIDTH_HARDWARE_ASN	8
+#define WIDTH_THIS_PROCESSOR	5
 #define ASN_FIRST_VERSION (1UL << (WIDTH_THIS_PROCESSOR + WIDTH_HARDWARE_ASN))
 #define HARDWARE_ASN_MASK ((1UL << WIDTH_HARDWARE_ASN) - 1)
 
@@ -96,20 +89,46 @@ extern unsigned long asn_cache;
 
 extern void get_new_mmu_context(struct task_struct *p, struct mm_struct *mm);
 
-__EXTERN_INLINE void ev4_get_mmu_context(struct task_struct *p)
+static inline unsigned long
+__get_new_mmu_context(struct task_struct *p, struct mm_struct *mm)
 {
-	/* As described, ASN's are broken.  */
+	unsigned long asn = cpu_last_asn(smp_processor_id());
+	unsigned long next = asn + 1;
+
+	if ((next ^ asn) & ~MAX_ASN) {
+		tbiap();
+		next = (asn & ~HARDWARE_ASN_MASK) + ASN_FIRST_VERSION;
+	}
+	cpu_last_asn(smp_processor_id()) = next;
+	mm->context = next;                      /* full version + asn */
+	return next;
 }
 
-__EXTERN_INLINE void ev5_get_mmu_context(struct task_struct *p)
+__EXTERN_INLINE void
+ev4_get_mmu_context(struct task_struct *p)
 {
-	struct mm_struct * mm = p->mm;
+	/* As described, ASN's are broken.  But we can optimize for
+	   switching between threads -- if the mm is unchanged from
+	   current we needn't flush.  */
+	if (current->mm != p->mm)
+		tbiap();
+}
 
-	if (mm) {
-		unsigned long asn = asn_cache;
-		/* Check if our ASN is of an older version and thus invalid */
-		if ((mm->context ^ asn) & ~HARDWARE_ASN_MASK)
-			get_new_mmu_context(p, mm);
+__EXTERN_INLINE void
+ev5_get_mmu_context(struct task_struct *p)
+{
+	/* Check if our ASN is of an older version, or on a different CPU,
+	   and thus invalid.  */
+
+	long asn = cpu_last_asn(smp_processor_id());
+	struct mm_struct *mm = p->mm;
+	long mmc = mm->context;
+	
+	if ((p->tss.mm_context ^ asn) & ~HARDWARE_ASN_MASK) {
+		if ((mmc ^ asn) & ~HARDWARE_ASN_MASK)
+			mmc = __get_new_mmu_context(p, mm);
+		p->tss.mm_context = mmc;
+		p->tss.asn = mmc & HARDWARE_ASN_MASK;
 	}
 }
 
@@ -123,40 +142,40 @@ __EXTERN_INLINE void ev5_get_mmu_context(struct task_struct *p)
 # endif
 #endif
 
-extern inline void init_new_context(struct mm_struct *mm)
+extern inline void
+init_new_context(struct mm_struct *mm)
 {
 	mm->context = 0;
 }
 
-extern inline void destroy_context(struct mm_struct *mm)
+extern inline void
+destroy_context(struct mm_struct *mm)
 {
 	/* Nothing to do.  */
 }
 
+#ifdef __MMU_EXTERN_INLINE
+#undef __EXTERN_INLINE
+#undef __MMU_EXTERN_INLINE
+#endif
 
 /*
  * Force a context reload. This is needed when we change the page
  * table pointer or when we update the ASN of the current process.
  */
 
-#if defined(CONFIG_ALPHA_GENERIC)
-#define MASK_CONTEXT(tss) \
- ((struct thread_struct *)((unsigned long)(tss) & alpha_mv.mmu_context_mask))
-#elif defined(CONFIG_ALPHA_DP264)
-#define MASK_CONTEXT(tss) \
- ((struct thread_struct *)((unsigned long)(tss) & 0xfffffffffful))
-#else
-#define MASK_CONTEXT(tss)  (tss)
+/* Don't get into trouble with dueling __EXTERN_INLINEs.  */
+#ifndef __EXTERN_INLINE
+#include <asm/io.h>
 #endif
 
-__EXTERN_INLINE struct thread_struct *
+extern inline unsigned long
 __reload_tss(struct thread_struct *tss)
 {
-	register struct thread_struct *a0 __asm__("$16");
-	register struct thread_struct *v0 __asm__("$0");
-
-	a0 = MASK_CONTEXT(tss);
+	register unsigned long a0 __asm__("$16");
+	register unsigned long v0 __asm__("$0");
 
+	a0 = virt_to_phys(tss);
 	__asm__ __volatile__(
 		"call_pal %2 #__reload_tss"
 		: "=r"(v0), "=r"(a0)
@@ -166,27 +185,22 @@ __reload_tss(struct thread_struct *tss)
 	return v0;
 }
 
-__EXTERN_INLINE void
+extern inline void
 reload_context(struct task_struct *task)
 {
 	__reload_tss(&task->tss);
 }
 
 /*
- * After we have set current->mm to a new value, this activates the
- * context for the new mm so we see the new mappings.
+ * After setting current->mm to a new value, activate the context for the
+ * new mm so we see the new mappings.
  */
 
-__EXTERN_INLINE void
+extern inline void
 activate_context(struct task_struct *task)
 {
-	get_mmu_context(task);
+	get_new_mmu_context(task, task->mm);
 	reload_context(task);
 }
 
-#ifdef __MMU_EXTERN_INLINE
-#undef __EXTERN_INLINE
-#undef __MMU_EXTERN_INLINE
-#endif
-
 #endif /* __ALPHA_MMU_CONTEXT_H */
diff --git a/include/asm-alpha/page.h b/include/asm-alpha/page.h
index c2d27951e54e..816219ce9f65 100644
--- a/include/asm-alpha/page.h
+++ b/include/asm-alpha/page.h
@@ -105,6 +105,15 @@ typedef unsigned long pgprot_t;
 #define __pgprot(x)	(x)
 
 #endif /* STRICT_MM_TYPECHECKS */
+
+#define BUG()							\
+do {								\
+	printk("Kernel BUG at %s:%d!\n", __FILE__, __LINE__);	\
+	__asm__ __volatile__("call_pal 129 # bugchk");		\
+} while (1)
+
+#define PAGE_BUG(page) BUG()
+
 #endif /* !ASSEMBLY */
 
 /* to align the pointer to the (next) page boundary */
diff --git a/include/asm-alpha/processor.h b/include/asm-alpha/processor.h
index 754a4e8f835f..a399ac93eb92 100644
--- a/include/asm-alpha/processor.h
+++ b/include/asm-alpha/processor.h
@@ -8,10 +8,10 @@
 #define __ASM_ALPHA_PROCESSOR_H
 
 /*
- * Default implementation of macro that returns current
- * instruction pointer ("program counter").
+ * Returns current instruction pointer ("program counter").
  */
-#define current_text_addr() ({ __label__ _l; _l: &&_l;})
+#define current_text_addr() \
+  ({ void *__pc; __asm__ ("br %0,.+4" : "=r"(__pc)); __pc; })
 
 /*
  * We have a 42-bit user address space: 4TB user VM...
@@ -61,6 +61,15 @@ struct thread_struct {
 	 */
 	unsigned long flags;
 
+	/* The full version of the ASN including serial number.
+
+	   Two threads running on two different processors must of necessity
+	   have different serial numbers.  Having this duplicated from
+	   mm->context allows them to be slightly out of sync preventing 
+	   the asn from incrementing each and every time the two threads
+	   are scheduled.  */
+	unsigned long mm_context;
+
 	/* Perform syscall argument validation (get/set_fs). */
 	mm_segment_t fs;
 
@@ -77,7 +86,7 @@ struct thread_struct {
 	0, 0, 0, \
 	0, 0, 0, \
 	0, 0, 0, \
-	0, \
+	0, 0, \
 	KERNEL_DS \
 }
 
diff --git a/include/asm-alpha/smp.h b/include/asm-alpha/smp.h
index d53142bb11fd..785194f81859 100644
--- a/include/asm-alpha/smp.h
+++ b/include/asm-alpha/smp.h
@@ -9,6 +9,7 @@
 
 struct cpuinfo_alpha {
 	unsigned long loops_per_sec;
+	unsigned long last_asn;
 	unsigned long *pgd_cache;
 	unsigned long *pte_cache;
 	unsigned long pgtable_cache_sz;
diff --git a/include/asm-alpha/softirq.h b/include/asm-alpha/softirq.h
index cb89c53285ac..dad9c49055e0 100644
--- a/include/asm-alpha/softirq.h
+++ b/include/asm-alpha/softirq.h
@@ -5,18 +5,33 @@
 #include <asm/atomic.h>
 #include <asm/hardirq.h>
 
-/*
- * This works but is wrong - on SMP it should disable only on the
- * current CPU and shouldn't synchronize like the heavy global
- * disable does. Oh, well.
- *
- * See the x86 version for an example.
- */
-#define local_bh_enable()	start_bh_atomic()
-#define local_bh_disable()	end_bh_atomic()
-
 extern unsigned int local_bh_count[NR_CPUS];
 
+extern inline void cpu_bh_disable(int cpu)
+{
+	local_bh_count[cpu]++;
+	mb();
+}
+
+extern inline void cpu_bh_enable(int cpu)
+{
+	mb();
+	local_bh_count[cpu]--;
+}
+
+extern inline int cpu_bh_trylock(int cpu)
+{
+	return local_bh_count[cpu] ? 0 : (local_bh_count[cpu] = 1);
+}
+
+extern inline void cpu_bh_endlock(int cpu)
+{
+	local_bh_count[cpu] = 0;
+}
+
+#define local_bh_enable()	cpu_bh_enable(smp_processor_id())
+#define local_bh_disable()	cpu_bh_disable(smp_processor_id())
+
 #define get_active_bhs()	(bh_mask & bh_active)
 
 static inline void clear_active_bhs(unsigned long x)
@@ -43,8 +58,9 @@ extern inline void init_bh(int nr, void (*routine)(void))
 
 extern inline void remove_bh(int nr)
 {
-	bh_base[nr] = NULL;
 	bh_mask &= ~(1 << nr);
+	wmb();
+	bh_base[nr] = NULL;
 }
 
 extern inline void mark_bh(int nr)
@@ -78,44 +94,39 @@ static inline void end_bh_atomic(void)
 /* These are for the irq's testing the lock */
 static inline int softirq_trylock(int cpu)
 {
-	if (!test_and_set_bit(0,&global_bh_count)) {
-		if (atomic_read(&global_bh_lock) == 0) {
-			++local_bh_count[cpu];
-			return 1;
+	if (cpu_bh_trylock(cpu)) {
+		if (!test_and_set_bit(0, &global_bh_count)) {
+			if (atomic_read(&global_bh_lock) == 0)
+				return 1;
+			clear_bit(0, &global_bh_count);
 		}
-		clear_bit(0,&global_bh_count);
+		cpu_bh_endlock(cpu);
 	}
 	return 0;
 }
 
 static inline void softirq_endlock(int cpu)
 {
-	local_bh_count[cpu]--;
-	clear_bit(0,&global_bh_count);
+	cpu_bh_enable(cpu);
+	clear_bit(0, &global_bh_count);
 }
 
 #else
 
 extern inline void start_bh_atomic(void)
 {
-	local_bh_count[smp_processor_id()]++;
-	barrier();
+	local_bh_disable();
 }
 
 extern inline void end_bh_atomic(void)
 {
-	barrier();
-	local_bh_count[smp_processor_id()]--;
+	local_bh_enable();
 }
 
 /* These are for the irq's testing the lock */
-#define softirq_trylock(cpu) \
-  (local_bh_count[cpu] ? 0 : (local_bh_count[cpu] = 1))
-
-#define softirq_endlock(cpu) \
-  (local_bh_count[cpu] = 0)
-
-#define synchronize_bh()	do { } while (0)
+#define softirq_trylock(cpu)	cpu_bh_trylock(cpu)
+#define softirq_endlock(cpu)	cpu_bh_endlock(cpu)
+#define synchronize_bh()	barrier()
 
 #endif	/* SMP */
 
diff --git a/include/asm-alpha/spinlock.h b/include/asm-alpha/spinlock.h
index bbc8de52bb0f..454a56582388 100644
--- a/include/asm-alpha/spinlock.h
+++ b/include/asm-alpha/spinlock.h
@@ -8,29 +8,47 @@
  * and read-write locks.. We should actually do a
  * <linux/spinlock.h> with all of this. Oh, well.
  */
-#define spin_lock_irqsave(lock, flags)		do { local_irq_save(flags);       spin_lock(lock); } while (0)
-#define spin_lock_irq(lock)			do { local_irq_disable();         spin_lock(lock); } while (0)
-#define spin_lock_bh(lock)			do { local_bh_disable();          spin_lock(lock); } while (0)
-
-#define read_lock_irqsave(lock, flags)		do { local_irq_save(flags);       read_lock(lock); } while (0)
-#define read_lock_irq(lock)			do { local_irq_disable();         read_lock(lock); } while (0)
-#define read_lock_bh(lock)			do { local_bh_disable();          read_lock(lock); } while (0)
-
-#define write_lock_irqsave(lock, flags)		do { local_irq_save(flags);      write_lock(lock); } while (0)
-#define write_lock_irq(lock)			do { local_irq_disable();        write_lock(lock); } while (0)
-#define write_lock_bh(lock)			do { local_bh_disable();         write_lock(lock); } while (0)
-
-#define spin_unlock_irqrestore(lock, flags)	do { spin_unlock(lock);  local_irq_restore(flags); } while (0)
-#define spin_unlock_irq(lock)			do { spin_unlock(lock);  local_irq_enable();       } while (0)
-#define spin_unlock_bh(lock)			do { spin_unlock(lock);  local_bh_enable();        } while (0)
-
-#define read_unlock_irqrestore(lock, flags)	do { read_unlock(lock);  local_irq_restore(flags); } while (0)
-#define read_unlock_irq(lock)			do { read_unlock(lock);  local_irq_enable();       } while (0)
-#define read_unlock_bh(lock)			do { read_unlock(lock);  local_bh_enable();        } while (0)
-
-#define write_unlock_irqrestore(lock, flags)	do { write_unlock(lock); local_irq_restore(flags); } while (0)
-#define write_unlock_irq(lock)			do { write_unlock(lock); local_irq_enable();       } while (0)
-#define write_unlock_bh(lock)			do { write_unlock(lock); local_bh_enable();        } while (0)
+#define spin_lock_irqsave(lock, flags) \
+  do { local_irq_save(flags); spin_lock(lock); } while (0)
+#define spin_lock_irq(lock) \
+  do { local_irq_disable(); spin_lock(lock); } while (0)
+#define spin_lock_bh(lock) \
+  do { local_bh_disable(); spin_lock(lock); } while (0)
+
+#define read_lock_irqsave(lock, flags) \
+  do { local_irq_save(flags); read_lock(lock); } while (0)
+#define read_lock_irq(lock) \
+  do { local_irq_disable(); read_lock(lock); } while (0)
+#define read_lock_bh(lock) \
+  do { local_bh_disable(); read_lock(lock); } while (0)
+
+#define write_lock_irqsave(lock, flags) \
+  do { local_irq_save(flags); write_lock(lock); } while (0)
+#define write_lock_irq(lock) \
+  do { local_irq_disable(); write_lock(lock); } while (0)
+#define write_lock_bh(lock) \
+  do { local_bh_disable(); write_lock(lock); } while (0)
+
+#define spin_unlock_irqrestore(lock, flags) \
+  do { spin_unlock(lock); local_irq_restore(flags); } while (0)
+#define spin_unlock_irq(lock) \
+  do { spin_unlock(lock); local_irq_enable(); } while (0)
+#define spin_unlock_bh(lock) \
+  do { spin_unlock(lock); local_bh_enable(); } while (0)
+
+#define read_unlock_irqrestore(lock, flags) \
+  do { read_unlock(lock); local_irq_restore(flags); } while (0)
+#define read_unlock_irq(lock) \
+  do { read_unlock(lock); local_irq_enable(); } while (0)
+#define read_unlock_bh(lock) \
+  do { read_unlock(lock); local_bh_enable(); } while (0)
+
+#define write_unlock_irqrestore(lock, flags) \
+  do { write_unlock(lock); local_irq_restore(flags); } while (0)
+#define write_unlock_irq(lock) \
+  do { write_unlock(lock); local_irq_enable(); } while (0)
+#define write_unlock_bh(lock) \
+  do { write_unlock(lock); local_bh_enable(); } while (0)
 
 #ifndef __SMP__
 
@@ -49,7 +67,7 @@
 
 #define spin_lock_init(lock)			((void) 0)
 #define spin_lock(lock)				((void) 0)
-#define spin_trylock(lock)			((void) 0)
+#define spin_trylock(lock)			(1)
 #define spin_unlock_wait(lock)			((void) 0)
 #define spin_unlock(lock)			((void) 0)
 
@@ -94,19 +112,20 @@
  */
 
 typedef struct {
-	volatile unsigned int lock;
+	volatile unsigned int lock /*__attribute__((aligned(32))) */;
 #if DEBUG_SPINLOCK
-	char debug_state, target_ipl, saved_ipl, on_cpu;
+	int on_cpu;
+	int line_no;
 	void *previous;
 	struct task_struct * task;
+	const char *base_file;
 #endif
 } spinlock_t;
 
 #if DEBUG_SPINLOCK
-#define SPIN_LOCK_UNLOCKED (spinlock_t) {0, 1, 0, 0, 0, 0}
+#define SPIN_LOCK_UNLOCKED (spinlock_t) {0, -1, 0, 0, 0, 0}
 #define spin_lock_init(x)						\
-	((x)->lock = 0, (x)->target_ipl = 0, (x)->debug_state = 1,	\
-	 (x)->previous = 0, (x)->task = 0)
+	((x)->lock = 0, (x)->on_cpu = -1, (x)->previous = 0, (x)->task = 0)
 #else
 #define SPIN_LOCK_UNLOCKED	(spinlock_t) { 0 }
 #define spin_lock_init(x)	((x)->lock = 0)
@@ -120,8 +139,11 @@ typedef struct { unsigned long a[100]; } __dummy_lock_t;
 
 #if DEBUG_SPINLOCK
 extern void spin_unlock(spinlock_t * lock);
-extern void spin_lock(spinlock_t * lock);
-extern int spin_trylock(spinlock_t * lock);
+extern void debug_spin_lock(spinlock_t * lock, const char *, int);
+extern int debug_spin_trylock(spinlock_t * lock, const char *, int);
+
+#define spin_lock(LOCK) debug_spin_lock(LOCK, __BASE_FILE__, __LINE__)
+#define spin_trylock(LOCK) debug_spin_trylock(LOCK, __BASE_FILE__, __LINE__)
 
 #define spin_lock_own(LOCK, LOCATION)					\
 do {									\
@@ -167,7 +189,9 @@ static inline void spin_lock(spinlock_t * lock)
 
 /***********************************************************/
 
-typedef struct { volatile int write_lock:1, read_counter:31; } rwlock_t;
+typedef struct {
+	volatile int write_lock:1, read_counter:31;
+} /*__attribute__((aligned(32)))*/ rwlock_t;
 
 #define RW_LOCK_UNLOCKED (rwlock_t) { 0, 0 }
 
diff --git a/include/asm-alpha/system.h b/include/asm-alpha/system.h
index 2be0ced69715..6939d37abac5 100644
--- a/include/asm-alpha/system.h
+++ b/include/asm-alpha/system.h
@@ -86,16 +86,6 @@ struct el_common_EV5_uncorrectable_mcheck {
         unsigned long   ld_lock;          /* Contents of EV5 LD_LOCK register*/
 };
 
-
-extern void wrent(void *, unsigned long);
-extern void wrkgp(unsigned long);
-extern void wrusp(unsigned long);
-extern unsigned long rdusp(void);
-extern unsigned long rdmces (void);
-extern void wrmces (unsigned long);
-extern unsigned long whami(void);
-extern void wripir(unsigned long);
-
 extern void halt(void) __attribute__((noreturn));
 
 #define switch_to(prev,next,last)			\
@@ -159,73 +149,86 @@ enum amask_enum {
    __asm__ ("amask %1,%0" : "=r"(__amask) : "rI"(__input));	\
    __amask; })
 
-static inline unsigned long 
-wrperfmon(unsigned long perf_fun, unsigned long arg)
-{
-          register unsigned long __r0 __asm__("$0");
-	  register unsigned long __r16 __asm__("$16");
-	  register unsigned long __r17 __asm__("$17");
-	  __r16 = perf_fun;
-	  __r17 = arg;
-	  __asm__ __volatile__(
-		  "call_pal %1"
-		  : "=r"(__r0)
-		  : "i"(PAL_wrperfmon), "r"(__r16), "r"(__r17)
-		  : "$1", "$22", "$23", "$24", "$25", "$26");
-	  return __r0;
+#define __CALL_PAL_R0(NAME, TYPE)				\
+static inline TYPE NAME(void)					\
+{								\
+	register TYPE __r0 __asm__("$0");			\
+	__asm__ __volatile__(					\
+		"call_pal %1 # " #NAME				\
+		:"=r" (__r0)					\
+		:"i" (PAL_ ## NAME)				\
+		:"$1", "$16", "$22", "$23", "$24", "$25");	\
+	return __r0;						\
 }
 
+#define __CALL_PAL_W1(NAME, TYPE0)				\
+static inline void NAME(TYPE0 arg0)				\
+{								\
+	register TYPE0 __r16 __asm__("$16") = arg0;		\
+	__asm__ __volatile__(					\
+		"call_pal %1 # "#NAME				\
+		: "=r"(__r16)					\
+		: "i"(PAL_ ## NAME), "0"(__r16)			\
+		: "$1", "$22", "$23", "$24", "$25");		\
+}
 
-#define call_pal1(palno,arg)						\
-({									\
-	register unsigned long __r0 __asm__("$0");			\
-	register unsigned long __r16 __asm__("$16"); __r16 = arg;	\
-	__asm__ __volatile__(						\
-		"call_pal %3 #call_pal1"				\
-		:"=r" (__r0),"=r" (__r16)				\
-		:"1" (__r16),"i" (palno)				\
-		:"$1", "$22", "$23", "$24", "$25", "memory");		\
-	__r0;								\
-})
-
-#define getipl()							\
-({									\
-	register unsigned long r0 __asm__("$0");			\
-	__asm__ __volatile__(						\
-		"call_pal %1 #getipl"					\
-		:"=r" (r0)						\
-		:"i" (PAL_rdps)						\
-		:"$1", "$16", "$22", "$23", "$24", "$25", "memory");	\
-	r0;								\
-})
+#define __CALL_PAL_W2(NAME, TYPE0, TYPE1)			\
+static inline void NAME(TYPE0 arg0, TYPE1 arg1)			\
+{								\
+	register TYPE0 __r16 __asm__("$16") = arg0;		\
+	register TYPE1 __r17 __asm__("$17") = arg1;		\
+	__asm__ __volatile__(					\
+		"call_pal %2 # "#NAME				\
+		: "=r"(__r16), "=r"(__r17)			\
+		: "i"(PAL_ ## NAME), "0"(__r16), "1"(__r17)	\
+		: "$1", "$22", "$23", "$24", "$25");		\
+}
 
-#define setipl(ipl)							\
-({									\
-	register unsigned long __r16 __asm__("$16"); __r16 = (ipl);	\
-	__asm__ __volatile__(						\
-		"call_pal %2 #setipl"					\
-		:"=r" (__r16)						\
-		:"0" (__r16),"i" (PAL_swpipl)				\
-		:"$0", "$1", "$22", "$23", "$24", "$25", "memory");	\
-})
+#define __CALL_PAL_RW1(NAME, RTYPE, TYPE0)			\
+static inline RTYPE NAME(TYPE0 arg0)				\
+{								\
+	register RTYPE __r0 __asm__("$0");			\
+	register TYPE0 __r16 __asm__("$16") = arg0;		\
+	__asm__ __volatile__(					\
+		"call_pal %2 # "#NAME				\
+		: "=r"(__r16), "=r"(__r0)			\
+		: "i"(PAL_ ## NAME), "0"(__r16)			\
+		: "$1", "$22", "$23", "$24", "$25");		\
+	return __r0;						\
+}
 
-#define swpipl(ipl)						\
-({								\
-	register unsigned long __r0 __asm__("$0");		\
-	register unsigned long __r16 __asm__("$16") = (ipl);	\
+#define __CALL_PAL_RW2(NAME, RTYPE, TYPE0, TYPE1)		\
+static inline RTYPE NAME(TYPE0 arg0, TYPE1 arg1)		\
+{								\
+	register RTYPE __r0 __asm__("$0");			\
+	register TYPE0 __r16 __asm__("$16") = arg0;		\
+	register TYPE1 __r17 __asm__("$17") = arg1;		\
 	__asm__ __volatile__(					\
-		"call_pal %3 #swpipl"				\
-		:"=r" (__r0),"=r" (__r16)			\
-		:"1" (__r16),"i" (PAL_swpipl)			\
-		:"$1", "$22", "$23", "$24", "$25", "memory");	\
-	__r0;							\
-})
+		"call_pal %3 # "#NAME				\
+		: "=r"(__r16), "=r"(__r17), "=r"(__r0)		\
+		: "i"(PAL_ ## NAME), "0"(__r16), "1"(__r17)	\
+		: "$1", "$22", "$23", "$24", "$25");		\
+	return __r0;						\
+}
 
-#define __cli()			setipl(7)
-#define __sti()			setipl(0)
-#define __save_flags(flags)	((flags) = getipl())
+__CALL_PAL_R0(rdmces, unsigned long);
+__CALL_PAL_R0(rdps, unsigned long);
+__CALL_PAL_R0(rdusp, unsigned long);
+__CALL_PAL_RW1(swpipl, unsigned long, unsigned long);
+__CALL_PAL_R0(whami, unsigned long);
+__CALL_PAL_W2(wrent, void*, unsigned long);
+__CALL_PAL_W1(wripir, unsigned long);
+__CALL_PAL_W1(wrkgp, unsigned long);
+__CALL_PAL_W1(wrmces, unsigned long);
+__CALL_PAL_RW2(wrperfmon, unsigned long, unsigned long, unsigned long);
+__CALL_PAL_W1(wrusp, unsigned long);
+__CALL_PAL_W1(wrvptptr, unsigned long);
+
+#define __cli()			((void) swpipl(7))
+#define __sti()			((void) swpipl(0))
+#define __save_flags(flags)	((flags) = rdps())
 #define __save_and_cli(flags)	((flags) = swpipl(7))
-#define __restore_flags(flags)	setipl(flags)
+#define __restore_flags(flags)	((void) swpipl(flags))
 
 #define local_irq_save(flags)		__save_and_cli(flags)
 #define local_irq_restore(flags)	__restore_flags(flags)
@@ -294,6 +297,7 @@ extern __inline__ unsigned long xchg_u32(volatile int *m, unsigned long val)
 	"	bis $31,%3,%1\n"
 	"	stl_c %1,%2\n"
 	"	beq %1,2f\n"
+	"	mb\n"
 	".section .text2,\"ax\"\n"
 	"2:	br 1b\n"
 	".previous"
@@ -312,6 +316,7 @@ extern __inline__ unsigned long xchg_u64(volatile long * m, unsigned long val)
 	"	bis $31,%3,%1\n"
 	"	stq_c %1,%2\n"
 	"	beq %1,2f\n"
+	"	mb\n"
 	".section .text2,\"ax\"\n"
 	"2:	br 1b\n"
 	".previous"
diff --git a/include/linux/fs.h b/include/linux/fs.h
index fd67e059ac86..90595473b637 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -738,7 +738,6 @@ extern int fs_may_mount(kdev_t);
 
 extern struct file *inuse_filps;
 
-extern void set_writetime(struct buffer_head *, int);
 extern int try_to_free_buffers(struct page *);
 extern void refile_buffer(struct buffer_head * buf);
 
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index c5a8af7c7f73..3b7272caafe5 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -76,6 +76,7 @@ extern void lock_page(struct page *page);
 
 extern void __add_page_to_hash_queue(struct page * page, struct page **p);
 
+extern void add_to_page_cache(struct page * page, struct inode * inode, unsigned long offset);
 extern int add_to_page_cache_unique(struct page * page, struct inode * inode, unsigned long offset, struct page **hash);
 
 static inline void add_page_to_hash_queue(struct page * page, struct inode * inode, unsigned long offset)
diff --git a/include/linux/swap.h b/include/linux/swap.h
index df558c90c626..e95a3881a6bc 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -52,7 +52,6 @@ struct swap_info_struct {
 	kdev_t swap_device;
 	struct dentry * swap_file;
 	unsigned short * swap_map;
-	unsigned char * swap_lockmap;
 	unsigned int lowest_bit;
 	unsigned int highest_bit;
 	unsigned int cluster_next;
@@ -97,7 +96,7 @@ extern void swap_in(struct task_struct *, struct vm_area_struct *,
 
 /* linux/mm/swap_state.c */
 extern void show_swap_cache_info(void);
-extern int add_to_swap_cache(struct page *, unsigned long);
+extern void add_to_swap_cache(struct page *, unsigned long);
 extern int swap_duplicate(unsigned long);
 extern int swap_check_entry(unsigned long);
 struct page * lookup_swap_cache(unsigned long);
diff --git a/kernel/fork.c b/kernel/fork.c
index e85429ba4f51..bf44bd04cc9c 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -613,7 +613,7 @@ int do_fork(unsigned long clone_flags, unsigned long usp, struct pt_regs *regs)
 	{
 		int i;
 		p->has_cpu = 0;
-		p->processor = NO_PROC_ID;
+		p->processor = current->processor;
 		/* ?? should we just memset this ?? */
 		for(i = 0; i < smp_num_cpus; i++)
 			p->per_cpu_utime[i] = p->per_cpu_stime[i] = 0;
diff --git a/kernel/ksyms.c b/kernel/ksyms.c
index a57d67d8ba7b..0d4e2bee2fb7 100644
--- a/kernel/ksyms.c
+++ b/kernel/ksyms.c
@@ -355,7 +355,6 @@ EXPORT_SYMBOL(read_exec);
 EXPORT_SYMBOL(si_meminfo);
 
 /* Added to make file system as module */
-EXPORT_SYMBOL(set_writetime);
 EXPORT_SYMBOL(sys_tz);
 EXPORT_SYMBOL(__wait_on_super);
 EXPORT_SYMBOL(file_fsync);
diff --git a/mm/filemap.c b/mm/filemap.c
index 85eaa298072b..8936d35d1e82 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -40,7 +40,8 @@ struct page * page_hash_table[PAGE_HASH_SIZE];
 spinlock_t pagecache_lock = SPIN_LOCK_UNLOCKED;
 
 
-void __add_page_to_hash_queue(struct page * page, struct page **p){
+void __add_page_to_hash_queue(struct page * page, struct page **p)
+{
 	atomic_inc(&page_cache_size);
 	if((page->next_hash = *p) != NULL)
 		(*p)->pprev_hash = &page->next_hash;
@@ -461,6 +462,13 @@ static inline void __add_to_page_cache(struct page * page,
 	__add_page_to_hash_queue(page, hash);
 }
 
+void add_to_page_cache(struct page * page, struct inode * inode, unsigned long offset)
+{
+	spin_lock(&pagecache_lock);
+	__add_to_page_cache(page, inode, offset, page_hash(inode, offset));
+	spin_unlock(&pagecache_lock);
+}
+
 int add_to_page_cache_unique(struct page * page,
 	struct inode * inode, unsigned long offset,
 	struct page **hash)
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 3fcec62c1db8..3f30a049e350 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -374,8 +374,6 @@ void swapin_readahead(unsigned long entry)
 			break;
 		if (swapdev->swap_map[offset] == SWAP_MAP_BAD)
 			break;
-		if (test_bit(offset, swapdev->swap_lockmap))
-			break;
 
 		/* Ok, do the async read-ahead now */
 		new_page = read_swap_cache_async(SWP_ENTRY(SWP_TYPE(entry), offset), 0);
diff --git a/mm/page_io.c b/mm/page_io.c
index ef1027fcca2c..75b7195fb174 100644
--- a/mm/page_io.c
+++ b/mm/page_io.c
@@ -84,13 +84,6 @@ static void rw_swap_page_base(int rw, unsigned long entry, struct page *page, in
 		return;
 	}
 
-	if (dolock) {
-		/* Make sure we are the only process doing I/O with this swap page. */
-		while (test_and_set_bit(offset,p->swap_lockmap)) {
-			run_task_queue(&tq_disk);
-			sleep_on(&lock_queue);
-		}
-	}
 	if (rw == READ) {
 		ClearPageUptodate(page);
 		kstat.pswpin++;
@@ -146,14 +139,6 @@ static void rw_swap_page_base(int rw, unsigned long entry, struct page *page, in
 		}
 	} else {
 		printk(KERN_ERR "rw_swap_page: no swap file or device\n");
-		/* Do some cleaning up so if this ever happens we can hopefully
-		 * trigger controlled shutdown.
-		 */
-		if (dolock) {
-			if (!test_and_clear_bit(offset,p->swap_lockmap))
-				printk("rw_swap_page_base: lock already cleared\n");
-			wake_up(&lock_queue);
-		}
 		put_page(page);
 		return;
 	}
@@ -164,6 +149,7 @@ static void rw_swap_page_base(int rw, unsigned long entry, struct page *page, in
  	if (dolock) {
  		/* only lock/unlock swap cache pages! */
  		set_bit(PG_swap_unlock_after, &page->flags);
+		p->swap_map[offset]++;
  	}
  	set_bit(PG_free_after, &page->flags);
 
@@ -190,32 +176,13 @@ static void rw_swap_page_base(int rw, unsigned long entry, struct page *page, in
 #endif
 }
 
-/* Note: We could remove this totally asynchronous function,
- * and improve swap performance, and remove the need for the swap lock map,
- * by not removing pages from the swap cache until after I/O has been
- * processed and letting remove_from_page_cache decrement the swap count
- * just before it removes the page from the page cache.
+/*
+ * This is run when asynchronous page I/O has completed.
+ * It decrements the swap bitmap counter
  */
-/* This is run when asynchronous page I/O has completed. */
 void swap_after_unlock_page(unsigned long entry)
 {
-	unsigned long type, offset;
-	struct swap_info_struct * p;
-
-	type = SWP_TYPE(entry);
-	if (type >= nr_swapfiles) {
-		printk("swap_after_unlock_page: bad swap-device\n");
-		return;
-	}
-	p = &swap_info[type];
-	offset = SWP_OFFSET(entry);
-	if (offset >= p->max) {
-		printk("swap_after_unlock_page: weirdness\n");
-		return;
-	}
-	if (!test_and_clear_bit(offset,p->swap_lockmap))
-		printk("swap_after_unlock_page: lock already cleared\n");
-	wake_up(&lock_queue);
+	swap_free(entry);
 }
 
 /*
diff --git a/mm/swap_state.c b/mm/swap_state.c
index 5ddc0eb014b5..8ee2699f09b3 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -66,7 +66,7 @@ void show_swap_cache_info(void)
 }
 #endif
 
-int add_to_swap_cache(struct page *page, unsigned long entry)
+void add_to_swap_cache(struct page *page, unsigned long entry)
 {
 #ifdef SWAP_CACHE_INFO
 	swap_cache_add_total++;
@@ -79,19 +79,12 @@ int add_to_swap_cache(struct page *page, unsigned long entry)
 		printk(KERN_ERR "swap_cache: replacing non-empty entry %08lx "
 			   "on page %08lx\n",
 			   page->offset, page_address(page));
-		return 0;
 	}
 	if (page->inode) {
 		printk(KERN_ERR "swap_cache: replacing page-cached entry "
 			   "on page %08lx\n", page_address(page));
-		return 0;
 	}
-	get_page(page);
-	page->inode = &swapper_inode;
-	page->offset = entry;
-	add_page_to_hash_queue(page, &swapper_inode, entry);
-	add_page_to_inode_queue(&swapper_inode, page);
-	return 1;
+	add_to_page_cache(page, &swapper_inode, entry);
 }
 
 /*
@@ -363,10 +356,7 @@ struct page * read_swap_cache_async(unsigned long entry, int wait)
 	/* 
 	 * Add it to the swap cache and read its contents.
 	 */
-	if (!add_to_swap_cache(new_page, entry))
-		goto out_free_page;
-
-	LockPage(new_page);
+	add_to_swap_cache(new_page, entry);
 	rw_swap_page(READ, new_page, wait);
 #ifdef DEBUG_SWAP
 	printk("DebugVM: read_swap_cache_async created "
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 794e39aff0fb..a4a523ef25cb 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -42,8 +42,6 @@ static inline int scan_swap_map(struct swap_info_struct *si)
 			offset = si->cluster_next++;
 			if (si->swap_map[offset])
 				continue;
-			if (test_bit(offset, si->swap_lockmap))
-				continue;
 			si->cluster_nr--;
 			goto got_page;
 		}
@@ -52,8 +50,6 @@ static inline int scan_swap_map(struct swap_info_struct *si)
 	for (offset = si->lowest_bit; offset <= si->highest_bit ; offset++) {
 		if (si->swap_map[offset])
 			continue;
-		if (test_bit(offset, si->swap_lockmap))
-			continue;
 		si->lowest_bit = offset;
 got_page:
 		si->swap_map[offset] = 1;
@@ -424,8 +420,6 @@ asmlinkage int sys_swapoff(const char * specialfile)
 	p->swap_device = 0;
 	vfree(p->swap_map);
 	p->swap_map = NULL;
-	vfree(p->swap_lockmap);
-	p->swap_lockmap = NULL;
 	p->flags = 0;
 	err = 0;
 
@@ -505,7 +499,6 @@ asmlinkage int sys_swapon(const char * specialfile, int swap_flags)
 	int lock_map_size = PAGE_SIZE;
 	int nr_good_pages = 0;
 	unsigned long maxpages;
-	unsigned long tmp_lock_map = 0;
 	int swapfilesize;
 	
 	lock_kernel();
@@ -524,7 +517,6 @@ asmlinkage int sys_swapon(const char * specialfile, int swap_flags)
 	p->swap_file = NULL;
 	p->swap_device = 0;
 	p->swap_map = NULL;
-	p->swap_lockmap = NULL;
 	p->lowest_bit = 0;
 	p->highest_bit = 0;
 	p->cluster_nr = 0;
@@ -590,9 +582,8 @@ asmlinkage int sys_swapon(const char * specialfile, int swap_flags)
 		goto bad_swap;
 	}
 
-	p->swap_lockmap = (char *) &tmp_lock_map;
-	rw_swap_page_nocache(READ, SWP_ENTRY(type,0), (char *) swap_header);
-	p->swap_lockmap = NULL;
+	lock_page(mem_map + MAP_NR(swap_header));
+	rw_swap_page_nolock(READ, SWP_ENTRY(type,0), (char *) swap_header, 1);
 
 	if (!memcmp("SWAP-SPACE",swap_header->magic.magic,10))
 		swap_header_version = 1;
@@ -689,11 +680,6 @@ asmlinkage int sys_swapon(const char * specialfile, int swap_flags)
 		goto bad_swap;
 	}
 	p->swap_map[0] = SWAP_MAP_BAD;
-	if (!(p->swap_lockmap = vmalloc (lock_map_size))) {
-		error = -ENOMEM;
-		goto bad_swap;
-	}
-	memset(p->swap_lockmap,0,lock_map_size);
 	p->flags = SWP_WRITEOK;
 	p->pages = nr_good_pages;
 	nr_swap_pages += nr_good_pages;
@@ -720,15 +706,12 @@ bad_swap:
 	if(filp.f_op && filp.f_op->release)
 		filp.f_op->release(filp.f_dentry->d_inode,&filp);
 bad_swap_2:
-	if (p->swap_lockmap)
-		vfree(p->swap_lockmap);
 	if (p->swap_map)
 		vfree(p->swap_map);
 	dput(p->swap_file);
 	p->swap_device = 0;
 	p->swap_file = NULL;
 	p->swap_map = NULL;
-	p->swap_lockmap = NULL;
 	p->flags = 0;
 	if (!(swap_flags & SWAP_FLAG_PREFER))
 		++least_priority;
diff --git a/mm/vmscan.c b/mm/vmscan.c
index e3854d7df1cd..4cccaf1717bc 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -157,10 +157,9 @@ drop_pte:
 	set_pte(page_table, __pte(entry));
 	flush_tlb_page(vma, address);
 	swap_duplicate(entry);	/* One for the process, one for the swap cache */
+
+	/* This will also lock the page */
 	add_to_swap_cache(page, entry);
-	/* We checked we were unlocked way up above, and we
-	   have been careful not to stall until here */
-	LockPage(page);
 
 	/* OK, do a physical asynchronous write to swap.  */
 	rw_swap_page(WRITE, page, 0);