From: Linus Torvalds <torvalds@linuxfoundation.org>
Date: Fri, 23 Nov 2007 20:38:00 +0000 (-0500)
Subject: Linux 2.4.0-test7pre3
X-Git-Tag: 2.4.0-test7pre3
X-Git-Url: http://git.neil.brown.name/?a=commitdiff_plain;h=cbe131c1f1fbdccdc1014437a696659ae791e1d4;p=history.git

Linux 2.4.0-test7pre3

    - nfs_commit_rpcsetup() signed comparison bugfix and cleanup
    - sparc updates and TLB invalidation fix
    - networking updates (less verbose on the new reordering messages)
    - network driver Makefile cleanup
    - Fix segment copy on fork.
    - tsk->files race fixes: close-on-exec etc.
    - sound #define cleanups
    - fs/proc/array.c task_lock cleanup
---

diff --git a/Documentation/DocBook/Makefile b/Documentation/DocBook/Makefile
index 39e5f8813c49..855282be62b4 100644
--- a/Documentation/DocBook/Makefile
+++ b/Documentation/DocBook/Makefile
@@ -81,6 +81,11 @@ kernel-api.sgml: kernel-api.tmpl $(APISOURCES)
 	$(TOPDIR)/scripts/docgen $(APISOURCES) \
 		<kernel-api.tmpl >kernel-api.sgml
 
+kernel-api-man: $(APISOURCES)
+	@rm -rf $(TOPDIR)/Documentation/man
+	$(TOPDIR)/scripts/kernel-doc -man $^ | \
+		$(PERL) $(TOPDIR)/scripts/split-man $(TOPDIR)/Documentation/man
+
 parportbook: $(JPG-parportbook)
 parportbook.ps: $(EPS-parportbook)
 parportbook.sgml: parportbook.tmpl $(TOPDIR)/drivers/parport/init.c
diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt
index eb28764f21b8..bb7b0e2998ca 100644
--- a/Documentation/networking/ip-sysctl.txt
+++ b/Documentation/networking/ip-sysctl.txt
@@ -207,7 +207,7 @@ tcp_ecn - BOOLEN
 	Enable Explicit Congestion Notification in TCP.
 
 tcp_reordering - INTEGER
-	Maxmimal reordering of packets in a TCP stream.
+	Maximal reordering of packets in a TCP stream.
 	Default: 3	
 
 tcp_retrans_collapse - BOOLEAN
diff --git a/Makefile b/Makefile
index c3d16e0b06cc..84616206d9f1 100644
--- a/Makefile
+++ b/Makefile
@@ -136,10 +136,10 @@ DRIVERS-$(CONFIG_AGP) += drivers/char/agp/agp.o
 DRIVERS-$(CONFIG_DRM) += drivers/char/drm/drm.o
 DRIVERS-$(CONFIG_NUBUS) += drivers/nubus/nubus.a
 DRIVERS-$(CONFIG_ISDN) += drivers/isdn/isdn.a
-DRIVERS-$(CONFIG_NET_FC) += drivers/net/fc/fc.a
+DRIVERS-$(CONFIG_NET_FC) += drivers/net/fc/fc.o
 DRIVERS-$(CONFIG_APPLETALK) += drivers/net/appletalk/appletalk.a
 DRIVERS-$(CONFIG_TR) += drivers/net/tokenring/tr.a
-DRIVERS-$(CONFIG_WAN) += drivers/net/wan/wan.a
+DRIVERS-$(CONFIG_WAN) += drivers/net/wan/wan.o
 DRIVERS-$(CONFIG_ARCNET) += drivers/net/arcnet/arcnet.a
 DRIVERS-$(CONFIG_ATM) += drivers/atm/atm.o
 DRIVERS-$(CONFIG_IDE) += drivers/ide/idedriver.o
@@ -170,7 +170,7 @@ DRIVERS-$(CONFIG_HAMRADIO) += drivers/net/hamradio/hamradio.o
 DRIVERS-$(CONFIG_TC) += drivers/tc/tc.a
 DRIVERS-$(CONFIG_USB) += drivers/usb/usbdrv.o
 DRIVERS-$(CONFIG_I2O) += drivers/i2o/i2o.o
-DRIVERS-$(CONFIG_IRDA) += drivers/net/irda/irda_drivers.a
+DRIVERS-$(CONFIG_IRDA) += drivers/net/irda/irda.o
 DRIVERS-$(CONFIG_I2C) += drivers/i2c/i2c.o
 DRIVERS-$(CONFIG_PHONE) += drivers/telephony/telephony.a
 DRIVERS-$(CONFIG_ACPI_INTERPRETER) += drivers/acpi/acpi.o
diff --git a/arch/ia64/config.in b/arch/ia64/config.in
index 8b8dd761c0ea..74c3d52f27d3 100644
--- a/arch/ia64/config.in
+++ b/arch/ia64/config.in
@@ -18,15 +18,16 @@ mainmenu_option next_comment
 comment 'General setup'
 
 define_bool CONFIG_IA64 y
+define_bool CONFIG_SWIOTLB y	# for now...
 
 define_bool CONFIG_ISA n
 define_bool CONFIG_SBUS n
 
 choice 'IA-64 system type'					\
-	"Generic		CONFIG_IA64_GENERIC		\
+	"generic		CONFIG_IA64_GENERIC		\
+	 DIG-compliant		CONFIG_IA64_DIG			\
 	 HP-simulator		CONFIG_IA64_HP_SIM		\
-	 SN1-simulator		CONFIG_IA64_SGI_SN1_SIM		\
-	 DIG-compliant		CONFIG_IA64_DIG" Generic
+	 SN1-simulator		CONFIG_IA64_SGI_SN1_SIM" generic
 
 choice 'Kernel page size'						\
 	"4KB			CONFIG_IA64_PAGE_SIZE_4KB		\
@@ -38,16 +39,18 @@ if [ "$CONFIG_IA64_DIG" = "y" ]; then
 	define_bool CONFIG_ITANIUM y
 	define_bool CONFIG_IA64_BRL_EMU y
 	bool '  Enable Itanium A-step specific code' CONFIG_ITANIUM_ASTEP_SPECIFIC
-	bool '  Enable Itanium A1-step specific code' CONFIG_ITANIUM_A1_SPECIFIC
+	if [ "$CONFIG_ITANIUM_ASTEP_SPECIFIC" = "y" ]; then
+	  bool '   Enable Itanium A1-step specific code' CONFIG_ITANIUM_A1_SPECIFIC
+	fi
+	bool '  Enable Itanium B-step specific code' CONFIG_ITANIUM_BSTEP_SPECIFIC
+	if [ "$CONFIG_ITANIUM_BSTEP_SPECIFIC" = "y" ]; then
+	  bool '   Enable Itanium B0-step specific code' CONFIG_ITANIUM_B0_SPECIFIC
+	fi
+	bool '  Force interrupt redirection' CONFIG_IA64_HAVE_IRQREDIR
 	bool '  Enable use of global TLB purge instruction (ptc.g)' CONFIG_ITANIUM_PTCG
 	bool '  Enable SoftSDV hacks' CONFIG_IA64_SOFTSDV_HACKS
 	bool '  Enable AzusA hacks' CONFIG_IA64_AZUSA_HACKS
-	bool '  Emulate PAL/SAL/EFI firmware' CONFIG_IA64_FW_EMU
-	bool '  Enable IA64 Machine Check Abort' CONFIG_IA64_MCA
-fi
-
-if [ "$CONFIG_IA64_GENERIC" = "y" ]; then
-	define_bool CONFIG_IA64_SOFTSDV_HACKS y
+	bool '  Enable IA-64 Machine Check Abort' CONFIG_IA64_MCA
 fi
 
 if [ "$CONFIG_IA64_SGI_SN1_SIM" = "y" ]; then
@@ -59,7 +62,7 @@ define_bool CONFIG_KCORE_ELF y	# On IA-64, we always want an ELF /proc/kcore.
 
 bool 'SMP support' CONFIG_SMP
 bool 'Performance monitor support' CONFIG_PERFMON
-bool '/proc/palinfo support' CONFIG_IA64_PALINFO
+bool '/proc/pal support' CONFIG_IA64_PALINFO
 
 bool 'Networking support' CONFIG_NET
 bool 'System V IPC' CONFIG_SYSVIPC
@@ -163,8 +166,6 @@ source drivers/char/Config.in
 
 source fs/Config.in
 
-source fs/nls/Config.in
-
 if [ "$CONFIG_VT" = "y" ]; then
   mainmenu_option next_comment
   comment 'Console drivers'
diff --git a/arch/ia64/dig/iosapic.c b/arch/ia64/dig/iosapic.c
index 2426a0193647..7d9a084fdaff 100644
--- a/arch/ia64/dig/iosapic.c
+++ b/arch/ia64/dig/iosapic.c
@@ -22,12 +22,14 @@
 #include <linux/string.h>
 #include <linux/irq.h>
 
+#include <asm/acpi-ext.h>
+#include <asm/delay.h>
 #include <asm/io.h>
 #include <asm/iosapic.h>
+#include <asm/machvec.h>
+#include <asm/processor.h>
 #include <asm/ptrace.h>
 #include <asm/system.h>
-#include <asm/delay.h>
-#include <asm/processor.h>
 
 #undef DEBUG_IRQ_ROUTING
 
@@ -315,10 +317,6 @@ dig_irq_init (void)
 	 */
 	outb(0xff, 0xA1);
 	outb(0xff, 0x21);
-
-#ifndef CONFIG_IA64_DIG
-	iosapic_init(IO_SAPIC_DEFAULT_ADDR);
-#endif
 }
 
 void
@@ -337,15 +335,23 @@ dig_pci_fixup (void)
 			if (irq < 0 && dev->bus->parent) { /* go back to the bridge */
 				struct pci_dev * bridge = dev->bus->self;
 
-				/* do the bridge swizzle... */
-				pin = (pin + PCI_SLOT(dev->devfn)) % 4;
-				irq = iosapic_get_PCI_irq_vector(bridge->bus->number,
-								 PCI_SLOT(bridge->devfn), pin);
+				/* allow for multiple bridges on an adapter */
+				do {
+					/* do the bridge swizzle... */
+					pin = (pin + PCI_SLOT(dev->devfn)) % 4;
+					irq = iosapic_get_PCI_irq_vector(bridge->bus->number,
+									 PCI_SLOT(bridge->devfn), pin);
+				} while (irq < 0 && (bridge = bridge->bus->self));
 				if (irq >= 0)
 					printk(KERN_WARNING
 					       "PCI: using PPB(B%d,I%d,P%d) to get irq %02x\n",
 					       bridge->bus->number, PCI_SLOT(bridge->devfn),
 					       pin, irq);
+				else
+					printk(KERN_WARNING
+					       "PCI: Couldn't map irq for B%d,I%d,P%d\n",
+					       bridge->bus->number, PCI_SLOT(bridge->devfn),
+					       pin);
 			}
 			if (irq >= 0) {
 				printk("PCI->APIC IRQ transform: (B%d,I%d,P%d) -> %02x\n",
@@ -361,3 +367,34 @@ dig_pci_fixup (void)
 			dev->irq = 15;	/* Spurious interrupts */
 	}
 }
+
+/*
+ * Register an IOSAPIC discovered via ACPI.
+ */
+void __init
+dig_register_iosapic (acpi_entry_iosapic_t *iosapic)
+{
+	unsigned int ver, v;
+	int l, max_pin;
+
+	ver = iosapic_version(iosapic->address);
+	max_pin = (ver >> 16) & 0xff;
+	
+	printk("IOSAPIC Version %x.%x: address 0x%lx IRQs 0x%x - 0x%x\n", 
+	       (ver & 0xf0) >> 4, (ver & 0x0f), iosapic->address, 
+	       iosapic->irq_base, iosapic->irq_base + max_pin);
+	
+	for (l = 0; l <= max_pin; l++) {
+		v = iosapic->irq_base + l;
+		if (v < 16)
+			v = isa_irq_to_vector(v);
+		if (v > IA64_MAX_VECTORED_IRQ) {
+			printk("    !!! bad IOSAPIC interrupt vector: %u\n", v);
+			continue;
+		}
+		/* XXX Check for IOSAPIC collisions */
+		iosapic_addr(v) = (unsigned long) ioremap(iosapic->address, 0);
+		iosapic_baseirq(v) = iosapic->irq_base;
+	}
+	iosapic_init(iosapic->address, iosapic->irq_base);
+}
diff --git a/arch/ia64/dig/machvec.c b/arch/ia64/dig/machvec.c
index 640412d7e927..4d2452745006 100644
--- a/arch/ia64/dig/machvec.c
+++ b/arch/ia64/dig/machvec.c
@@ -1,4 +1,2 @@
+#define MACHVEC_PLATFORM_NAME	dig
 #include <asm/machvec_init.h>
-#include <asm/machvec_dig.h>
-
-MACHVEC_DEFINE(dig)
diff --git a/arch/ia64/dig/setup.c b/arch/ia64/dig/setup.c
index 45c1f96f5441..fa48254cc22a 100644
--- a/arch/ia64/dig/setup.c
+++ b/arch/ia64/dig/setup.c
@@ -24,10 +24,6 @@
 #include <asm/machvec.h>
 #include <asm/system.h>
 
-#ifdef CONFIG_IA64_FW_EMU
-# include "../../kernel/fw-emu.c"
-#endif
-
 /*
  * This is here so we can use the CMOS detection in ide-probe.c to
  * determine what drives are present.  In theory, we don't need this
diff --git a/arch/ia64/hp/hpsim_machvec.c b/arch/ia64/hp/hpsim_machvec.c
index 7d78f4961b40..76af3b4e217a 100644
--- a/arch/ia64/hp/hpsim_machvec.c
+++ b/arch/ia64/hp/hpsim_machvec.c
@@ -1,4 +1,2 @@
+#define MACHVEC_PLATFORM_NAME	hpsim
 #include <asm/machvec_init.h>
-#include <asm/machvec_hpsim.h>
-
-MACHVEC_DEFINE(hpsim)
diff --git a/arch/ia64/ia32/ia32_entry.S b/arch/ia64/ia32/ia32_entry.S
index a9dc378cf413..ad963b92ff99 100644
--- a/arch/ia64/ia32/ia32_entry.S
+++ b/arch/ia64/ia32/ia32_entry.S
@@ -73,7 +73,7 @@ GLOBAL_ENTRY(ia32_trace_syscall)
 END(ia32_trace_syscall)
 
 GLOBAL_ENTRY(sys32_vfork)
-	alloc r16=ar.pfs,2,2,3,0;;
+	alloc r16=ar.pfs,2,2,4,0;;
 	mov out0=IA64_CLONE_VFORK|IA64_CLONE_VM|SIGCHLD	// out0 = clone_flags
 	br.cond.sptk.few .fork1			// do the work
 END(sys32_vfork)
@@ -105,7 +105,7 @@ END(sys32_fork)
 	.align 8
 	.globl ia32_syscall_table
 ia32_syscall_table:	
-	data8 sys_ni_syscall	  /* 0	-  old "setup(" system call*/
+	data8 sys32_ni_syscall	  /* 0	-  old "setup(" system call*/
 	data8 sys_exit
 	data8 sys32_fork
 	data8 sys_read
@@ -122,25 +122,25 @@ ia32_syscall_table:
 	data8 sys_mknod
 	data8 sys_chmod		  /* 15 */
 	data8 sys_lchown
-	data8 sys_ni_syscall	  /* old break syscall holder */
-	data8 sys_ni_syscall
+	data8 sys32_ni_syscall	  /* old break syscall holder */
+	data8 sys32_ni_syscall
 	data8 sys_lseek
 	data8 sys_getpid	  /* 20 */
 	data8 sys_mount
 	data8 sys_oldumount
 	data8 sys_setuid
 	data8 sys_getuid
-	data8 sys_ni_syscall /* sys_stime is not supported on IA64 */  /* 25 */
+	data8 sys32_ni_syscall /* sys_stime is not supported on IA64 */  /* 25 */
 	data8 sys32_ptrace
 	data8 sys32_alarm
-	data8 sys_ni_syscall
-	data8 sys_ni_syscall
+	data8 sys32_ni_syscall
+	data8 sys32_ni_syscall
 	data8 ia32_utime	  /* 30 */
-	data8 sys_ni_syscall	  /* old stty syscall holder */
-	data8 sys_ni_syscall	  /* old gtty syscall holder */
+	data8 sys32_ni_syscall	  /* old stty syscall holder */
+	data8 sys32_ni_syscall	  /* old gtty syscall holder */
 	data8 sys_access
 	data8 sys_nice
-	data8 sys_ni_syscall	  /* 35 */	  /* old ftime syscall holder */
+	data8 sys32_ni_syscall	  /* 35 */	  /* old ftime syscall holder */
 	data8 sys_sync
 	data8 sys_kill
 	data8 sys_rename
@@ -149,22 +149,22 @@ ia32_syscall_table:
 	data8 sys_dup
 	data8 sys32_pipe
 	data8 sys32_times
-	data8 sys_ni_syscall	  /* old prof syscall holder */
+	data8 sys32_ni_syscall	  /* old prof syscall holder */
 	data8 sys_brk		  /* 45 */
 	data8 sys_setgid
 	data8 sys_getgid
-	data8 sys_ni_syscall
+	data8 sys32_ni_syscall
 	data8 sys_geteuid
 	data8 sys_getegid	  /* 50 */
 	data8 sys_acct
 	data8 sys_umount	  /* recycled never used phys( */
-	data8 sys_ni_syscall	  /* old lock syscall holder */
+	data8 sys32_ni_syscall	  /* old lock syscall holder */
 	data8 ia32_ioctl
-	data8 sys_fcntl		  /* 55 */
-	data8 sys_ni_syscall	  /* old mpx syscall holder */
+	data8 sys32_fcntl	  /* 55 */
+	data8 sys32_ni_syscall	  /* old mpx syscall holder */
 	data8 sys_setpgid
-	data8 sys_ni_syscall	  /* old ulimit syscall holder */
-	data8 sys_ni_syscall
+	data8 sys32_ni_syscall	  /* old ulimit syscall holder */
+	data8 sys32_ni_syscall
 	data8 sys_umask		  /* 60 */
 	data8 sys_chroot
 	data8 sys_ustat
@@ -172,12 +172,12 @@ ia32_syscall_table:
 	data8 sys_getppid
 	data8 sys_getpgrp	  /* 65 */
 	data8 sys_setsid
-	data8 sys_ni_syscall
-	data8 sys_ni_syscall
-	data8 sys_ni_syscall
+	data8 sys32_sigaction
+	data8 sys32_ni_syscall
+	data8 sys32_ni_syscall
 	data8 sys_setreuid	  /* 70 */
 	data8 sys_setregid
-	data8 sys_ni_syscall
+	data8 sys32_ni_syscall
 	data8 sys_sigpending
 	data8 sys_sethostname
 	data8 sys32_setrlimit	  /* 75 */
@@ -189,7 +189,7 @@ ia32_syscall_table:
 	data8 sys_setgroups
 	data8 old_select
 	data8 sys_symlink
-	data8 sys_ni_syscall
+	data8 sys32_ni_syscall
 	data8 sys_readlink	  /* 85 */
 	data8 sys_uselib
 	data8 sys_swapon
@@ -203,7 +203,7 @@ ia32_syscall_table:
 	data8 sys_fchown	  /* 95 */
 	data8 sys_getpriority
 	data8 sys_setpriority
-	data8 sys_ni_syscall	  /* old profil syscall holder */
+	data8 sys32_ni_syscall	  /* old profil syscall holder */
 	data8 sys32_statfs
 	data8 sys32_fstatfs	  /* 100 */
 	data8 sys_ioperm
@@ -214,11 +214,11 @@ ia32_syscall_table:
 	data8 sys32_newstat
 	data8 sys32_newlstat
 	data8 sys32_newfstat
-	data8 sys_ni_syscall
+	data8 sys32_ni_syscall
 	data8 sys_iopl		  /* 110 */
 	data8 sys_vhangup
-	data8 sys_ni_syscall		// used to be sys_idle
-	data8 sys_ni_syscall
+	data8 sys32_ni_syscall		// used to be sys_idle
+	data8 sys32_ni_syscall
 	data8 sys32_wait4
 	data8 sys_swapoff	  /* 115 */
 	data8 sys_sysinfo
@@ -242,7 +242,7 @@ ia32_syscall_table:
 	data8 sys_bdflush
 	data8 sys_sysfs		  /* 135 */
 	data8 sys_personality
-	data8 sys_ni_syscall	  /* for afs_syscall */
+	data8 sys32_ni_syscall	  /* for afs_syscall */
 	data8 sys_setfsuid
 	data8 sys_setfsgid
 	data8 sys_llseek	  /* 140 */
@@ -293,8 +293,8 @@ ia32_syscall_table:
 	data8 sys_capset	  /* 185 */
 	data8 sys_sigaltstack
 	data8 sys_sendfile
-	data8 sys_ni_syscall		  /* streams1 */
-	data8 sys_ni_syscall		  /* streams2 */
+	data8 sys32_ni_syscall		  /* streams1 */
+	data8 sys32_ni_syscall		  /* streams2 */
 	data8 sys32_vfork	  /* 190 */
 	/*
 	 *  CAUTION: If any system calls are added beyond this point
diff --git a/arch/ia64/ia32/sys_ia32.c b/arch/ia64/ia32/sys_ia32.c
index c051082e92a1..a6bf4a8d8b1a 100644
--- a/arch/ia64/ia32/sys_ia32.c
+++ b/arch/ia64/ia32/sys_ia32.c
@@ -74,10 +74,14 @@ nargs(unsigned int arg, char **ap)
 
 	n = 0;
 	do {
-		if ((err = get_user(addr, (int *)A(arg))) != 0)
-			return(err);
-		if (ap)
-			*ap++ = (char *)A(addr);
+		err = get_user(addr, (int *)A(arg));
+		if (IS_ERR(err))
+			return err;
+		if (ap) {		/* no access_ok needed, we allocated */
+			err = __put_user((char *)A(addr), ap++);
+			if (IS_ERR(err))
+				return err;
+		}
 		arg += sizeof(unsigned int);
 		n++;
 	} while (addr);
@@ -101,7 +105,11 @@ int stack)
 	int na, ne, r, len;
 
 	na = nargs(argv, NULL);
+	if (IS_ERR(na))
+		return(na);
 	ne = nargs(envp, NULL);
+	if (IS_ERR(ne))
+		return(ne);
 	len = (na + ne + 2) * sizeof(*av);
 	/*
 	 *  kmalloc won't work because the `sys_exec' code will attempt
@@ -121,12 +129,21 @@ int stack)
 	if (IS_ERR(av))
 		return (long)av;
 	ae = av + na + 1;
-	av[na] = (char *)0;
-	ae[ne] = (char *)0;
-	(void)nargs(argv, av);
-	(void)nargs(envp, ae);
+	r = __put_user(0, (av + na));
+	if (IS_ERR(r))
+		goto out;
+	r = __put_user(0, (ae + ne));
+	if (IS_ERR(r))
+		goto out;
+	r = nargs(argv, av);
+	if (IS_ERR(r))
+		goto out;
+	r = nargs(envp, ae);
+	if (IS_ERR(r))
+		goto out;
 	r = sys_execve(filename, av, ae, regs);
 	if (IS_ERR(r))
+out:
 		sys_munmap((unsigned long) av, len);
 	return(r);
 }
@@ -960,150 +977,85 @@ sys32_nanosleep(struct timespec32 *rqtp, struct timespec32 *rmtp)
 }
 
 struct iovec32 { unsigned int iov_base; int iov_len; };
+asmlinkage ssize_t sys_readv(unsigned long,const struct iovec *,unsigned long);
+asmlinkage ssize_t sys_writev(unsigned long,const struct iovec *,unsigned long);
 
-typedef ssize_t (*IO_fn_t)(struct file *, char *, size_t, loff_t *);
-
-static long
-do_readv_writev32(int type, struct file *file, const struct iovec32 *vector,
-		  u32 count)
+static struct iovec *
+get_iovec32(struct iovec32 *iov32, struct iovec *iov_buf, u32 count, int type)
 {
-	unsigned long tot_len;
-	struct iovec iovstack[UIO_FASTIOV];
-	struct iovec *iov=iovstack, *ivp;
-	struct inode *inode;
-	long retval, i;
-	IO_fn_t fn;
+	int i;
+	u32 buf, len;
+	struct iovec *ivp, *iov;
+
+	/* Get the "struct iovec" from user memory */
 
-	/* First get the "struct iovec" from user memory and
-	 * verify all the pointers
-	 */
 	if (!count)
 		return 0;
-	if(verify_area(VERIFY_READ, vector, sizeof(struct iovec32)*count))
-		return -EFAULT;
+	if(verify_area(VERIFY_READ, iov32, sizeof(struct iovec32)*count))
+		return(struct iovec *)0;
 	if (count > UIO_MAXIOV)
-		return -EINVAL;
+		return(struct iovec *)0;
 	if (count > UIO_FASTIOV) {
 		iov = kmalloc(count*sizeof(struct iovec), GFP_KERNEL);
 		if (!iov)
-			return -ENOMEM;
-	}
+			return((struct iovec *)0);
+	} else
+		iov = iov_buf;
 
-	tot_len = 0;
-	i = count;
 	ivp = iov;
-	while(i > 0) {
-		u32 len;
-		u32 buf;
-
-		__get_user(len, &vector->iov_len);
-		__get_user(buf, &vector->iov_base);
-		tot_len += len;
+	for (i = 0; i < count; i++) {
+		if (__get_user(len, &iov32->iov_len) ||
+		    __get_user(buf, &iov32->iov_base)) {
+			if (iov != iov_buf)
+				kfree(iov);
+			return((struct iovec *)0);
+		}
+		if (verify_area(type, (void *)A(buf), len)) {
+			if (iov != iov_buf)
+				kfree(iov);
+			return((struct iovec *)0);
+		}
 		ivp->iov_base = (void *)A(buf);
-		ivp->iov_len = (__kernel_size_t) len;
-		vector++;
-		ivp++;
-		i--;
-	}
-
-	inode = file->f_dentry->d_inode;
-	/* VERIFY_WRITE actually means a read, as we write to user space */
-	retval = locks_verify_area((type == VERIFY_WRITE
-				    ? FLOCK_VERIFY_READ : FLOCK_VERIFY_WRITE),
-				   inode, file, file->f_pos, tot_len);
-	if (retval) {
-		if (iov != iovstack)
-			kfree(iov);
-		return retval;
-	}
-
-	/* Then do the actual IO.  Note that sockets need to be handled
-	 * specially as they have atomicity guarantees and can handle
-	 * iovec's natively
-	 */
-	if (inode->i_sock) {
-		int err;
-		err = sock_readv_writev(type, inode, file, iov, count, tot_len);
-		if (iov != iovstack)
-			kfree(iov);
-		return err;
-	}
-
-	if (!file->f_op) {
-		if (iov != iovstack)
-			kfree(iov);
-		return -EINVAL;
-	}
-	/* VERIFY_WRITE actually means a read, as we write to user space */
-	fn = file->f_op->read;
-	if (type == VERIFY_READ)
-		fn = (IO_fn_t) file->f_op->write;		
-	ivp = iov;
-	while (count > 0) {
-		void * base;
-		int len, nr;
-
-		base = ivp->iov_base;
-		len = ivp->iov_len;
+		ivp->iov_len = (__kernel_size_t)len;
+		iov32++;
 		ivp++;
-		count--;
-		nr = fn(file, base, len, &file->f_pos);
-		if (nr < 0) {
-			if (retval)
-				break;
-			retval = nr;
-			break;
-		}
-		retval += nr;
-		if (nr != len)
-			break;
 	}
-	if (iov != iovstack)
-		kfree(iov);
-	return retval;
+	return(iov);
 }
 
 asmlinkage long
 sys32_readv(int fd, struct iovec32 *vector, u32 count)
 {
-	struct file *file;
-	long ret = -EBADF;
-
-	file = fget(fd);
-	if(!file)
-		goto bad_file;
-
-	if(!(file->f_mode & 1))
-		goto out;
+	struct iovec iovstack[UIO_FASTIOV];
+	struct iovec *iov;
+	int ret;
+	mm_segment_t old_fs = get_fs();
 
-	ret = do_readv_writev32(VERIFY_WRITE, file,
-				vector, count);
-out:
-	fput(file);
-bad_file:
+	if ((iov = get_iovec32(vector, iovstack, count, VERIFY_WRITE)) == (struct iovec *)0)
+		return -EFAULT;
+	set_fs(KERNEL_DS);
+	ret = sys_readv(fd, iov, count);
+	set_fs(old_fs);
+	if (iov != iovstack)
+		kfree(iov);
 	return ret;
 }
 
 asmlinkage long
 sys32_writev(int fd, struct iovec32 *vector, u32 count)
 {
-	struct file *file;
-	int ret = -EBADF;
-
-	file = fget(fd);
-	if(!file)
-		goto bad_file;
-
-	if(!(file->f_mode & 2))
-		goto out;
+	struct iovec iovstack[UIO_FASTIOV];
+	struct iovec *iov;
+	int ret;
+	mm_segment_t old_fs = get_fs();
 
-	down(&file->f_dentry->d_inode->i_sem);
-	ret = do_readv_writev32(VERIFY_READ, file,
-				vector, count);
-	up(&file->f_dentry->d_inode->i_sem);
-out:
-	fput(file);
-bad_file:
+	if ((iov = get_iovec32(vector, iovstack, count, VERIFY_READ)) == (struct iovec *)0)
+		return -EFAULT;
+	set_fs(KERNEL_DS);
+	ret = sys_writev(fd, iov, count);
+	set_fs(old_fs);
+	if (iov != iovstack)
+		kfree(iov);
 	return ret;
 }
 
@@ -1174,21 +1126,22 @@ struct msghdr32 {
 static inline int
 shape_msg(struct msghdr *mp, struct msghdr32 *mp32)
 {
+	int ret;
 	unsigned int i;
 
 	if (!access_ok(VERIFY_READ, mp32, sizeof(*mp32)))
 		return(-EFAULT);
-	__get_user(i, &mp32->msg_name);
+	ret = __get_user(i, &mp32->msg_name);
 	mp->msg_name = (void *)A(i);
-	__get_user(mp->msg_namelen, &mp32->msg_namelen);
-	__get_user(i, &mp32->msg_iov);
+	ret |= __get_user(mp->msg_namelen, &mp32->msg_namelen);
+	ret |= __get_user(i, &mp32->msg_iov);
 	mp->msg_iov = (struct iovec *)A(i);
-	__get_user(mp->msg_iovlen, &mp32->msg_iovlen);
-	__get_user(i, &mp32->msg_control);
+	ret |= __get_user(mp->msg_iovlen, &mp32->msg_iovlen);
+	ret |= __get_user(i, &mp32->msg_control);
 	mp->msg_control = (void *)A(i);
-	__get_user(mp->msg_controllen, &mp32->msg_controllen);
-	__get_user(mp->msg_flags, &mp32->msg_flags);
-	return(0);
+	ret |= __get_user(mp->msg_controllen, &mp32->msg_controllen);
+	ret |= __get_user(mp->msg_flags, &mp32->msg_flags);
+	return(ret ? -EFAULT : 0);
 }
 
 /*
@@ -2342,17 +2295,17 @@ restore_ia32_fpstate(struct task_struct *tsk, struct _fpstate_ia32 *save)
 {
 	struct switch_stack *swp;
 	struct pt_regs *ptp;
-	int i, tos;
+	int i, tos, ret;
 	int fsrlo, fsrhi;
 
 	if (!access_ok(VERIFY_READ, save, sizeof(*save)))
 		return(-EIO);
-	__get_user(tsk->thread.fcr, (unsigned int *)&save->cw);
-	__get_user(fsrlo, (unsigned int *)&save->sw);
-	__get_user(fsrhi, (unsigned int *)&save->tag);
+	ret = __get_user(tsk->thread.fcr, (unsigned int *)&save->cw);
+	ret |= __get_user(fsrlo, (unsigned int *)&save->sw);
+	ret |= __get_user(fsrhi, (unsigned int *)&save->tag);
 	tsk->thread.fsr = ((long)fsrhi << 32) | (long)fsrlo;
-	__get_user(tsk->thread.fir, (unsigned int *)&save->ipoff);
-	__get_user(tsk->thread.fdr, (unsigned int *)&save->dataoff);
+	ret |= __get_user(tsk->thread.fir, (unsigned int *)&save->ipoff);
+	ret |= __get_user(tsk->thread.fdr, (unsigned int *)&save->dataoff);
 	/*
 	 *  Stack frames start with 16-bytes of temp space
 	 */
@@ -2361,7 +2314,7 @@ restore_ia32_fpstate(struct task_struct *tsk, struct _fpstate_ia32 *save)
 	tos = (tsk->thread.fsr >> 11) & 3;
 	for (i = 0; i < 8; i++)
 		get_fpreg(i, &save->_st[i], ptp, swp, tos);
-	return(0);
+	return(ret ? -EFAULT : 0);
 }
 
 asmlinkage long sys_ptrace(long, pid_t, unsigned long, unsigned long, long, long, long, long, long);
@@ -2493,6 +2446,105 @@ sys32_ptrace (long request, pid_t pid, unsigned long addr, unsigned long data,
 	return ret;
 }
 
+static inline int
+get_flock32(struct flock *kfl, struct flock32 *ufl)
+{
+	int err;
+	
+	err = get_user(kfl->l_type, &ufl->l_type);
+	err |= __get_user(kfl->l_whence, &ufl->l_whence);
+	err |= __get_user(kfl->l_start, &ufl->l_start);
+	err |= __get_user(kfl->l_len, &ufl->l_len);
+	err |= __get_user(kfl->l_pid, &ufl->l_pid);
+	return err;
+}
+
+static inline int
+put_flock32(struct flock *kfl, struct flock32 *ufl)
+{
+	int err;
+	
+	err = __put_user(kfl->l_type, &ufl->l_type);
+	err |= __put_user(kfl->l_whence, &ufl->l_whence);
+	err |= __put_user(kfl->l_start, &ufl->l_start);
+	err |= __put_user(kfl->l_len, &ufl->l_len);
+	err |= __put_user(kfl->l_pid, &ufl->l_pid);
+	return err;
+}
+
+extern asmlinkage long sys_fcntl(unsigned int fd, unsigned int cmd,
+				 unsigned long arg);
+
+asmlinkage long
+sys32_fcntl(unsigned int fd, unsigned int cmd, int arg)
+{
+	struct flock f;
+	mm_segment_t old_fs;
+	long ret;
+
+	switch (cmd) {
+	case F_GETLK:
+	case F_SETLK:
+	case F_SETLKW:
+		if(cmd != F_GETLK && get_flock32(&f, (struct flock32 *)((long)arg)))
+			return -EFAULT;
+		old_fs = get_fs();
+		set_fs(KERNEL_DS);
+		ret = sys_fcntl(fd, cmd, (unsigned long)&f);
+		set_fs(old_fs);
+		if(cmd == F_GETLK && put_flock32(&f, (struct flock32 *)((long)arg)))
+			return -EFAULT;
+		return ret;
+	default:
+		/*
+		 *  `sys_fcntl' lies about arg, for the F_SETOWN
+		 *  sub-function arg can have a negative value.
+		 */
+		return sys_fcntl(fd, cmd, (unsigned long)((long)arg));
+	}
+}
+
+asmlinkage long
+sys32_sigaction (int sig, struct old_sigaction32 *act, struct old_sigaction32 *oact)
+{
+        struct k_sigaction new_ka, old_ka;
+        int ret;
+
+        if (act) {
+		old_sigset32_t mask;
+		
+		ret = get_user((long)new_ka.sa.sa_handler, &act->sa_handler);
+		ret |= __get_user(new_ka.sa.sa_flags, &act->sa_flags);
+		ret |= __get_user(mask, &act->sa_mask);
+		if (ret)
+			return ret;
+		siginitset(&new_ka.sa.sa_mask, mask);
+        }
+
+        ret = do_sigaction(sig, act ? &new_ka : NULL, oact ? &old_ka : NULL);
+
+	if (!ret && oact) {
+		ret = put_user((long)old_ka.sa.sa_handler, &oact->sa_handler);
+		ret |= __put_user(old_ka.sa.sa_flags, &oact->sa_flags);
+		ret |= __put_user(old_ka.sa.sa_mask.sig[0], &oact->sa_mask);
+        }
+
+	return ret;
+}
+
+asmlinkage long sys_ni_syscall(void);
+
+asmlinkage long
+sys32_ni_syscall(int dummy0, int dummy1, int dummy2, int dummy3,
+	int dummy4, int dummy5, int dummy6, int dummy7, int stack)
+{
+	struct pt_regs *regs = (struct pt_regs *)&stack;
+
+	printk("IA32 syscall #%d issued, maybe we should implement it\n",
+		(int)regs->r1);
+	return(sys_ni_syscall());
+}
+
 #ifdef	NOTYET  /* UNTESTED FOR IA64 FROM HERE DOWN */
 
 /* In order to reduce some races, while at the same time doing additional
@@ -2546,61 +2598,6 @@ sys32_ioperm(u32 from, u32 num, int on)
 	return sys_ioperm((unsigned long)from, (unsigned long)num, on);
 }
 
-static inline int
-get_flock(struct flock *kfl, struct flock32 *ufl)
-{
-	int err;
-	
-	err = get_user(kfl->l_type, &ufl->l_type);
-	err |= __get_user(kfl->l_whence, &ufl->l_whence);
-	err |= __get_user(kfl->l_start, &ufl->l_start);
-	err |= __get_user(kfl->l_len, &ufl->l_len);
-	err |= __get_user(kfl->l_pid, &ufl->l_pid);
-	return err;
-}
-
-static inline int
-put_flock(struct flock *kfl, struct flock32 *ufl)
-{
-	int err;
-	
-	err = __put_user(kfl->l_type, &ufl->l_type);
-	err |= __put_user(kfl->l_whence, &ufl->l_whence);
-	err |= __put_user(kfl->l_start, &ufl->l_start);
-	err |= __put_user(kfl->l_len, &ufl->l_len);
-	err |= __put_user(kfl->l_pid, &ufl->l_pid);
-	return err;
-}
-
-extern asmlinkage long sys_fcntl(unsigned int fd, unsigned int cmd,
-				 unsigned long arg);
-
-asmlinkage long
-sys32_fcntl(unsigned int fd, unsigned int cmd, unsigned long arg)
-{
-	switch (cmd) {
-	case F_GETLK:
-	case F_SETLK:
-	case F_SETLKW:
-		{
-			struct flock f;
-			mm_segment_t old_fs;
-			long ret;
-			
-			if(get_flock(&f, (struct flock32 *)arg))
-				return -EFAULT;
-			old_fs = get_fs(); set_fs (KERNEL_DS);
-			ret = sys_fcntl(fd, cmd, (unsigned long)&f);
-			set_fs (old_fs);
-			if(put_flock(&f, (struct flock32 *)arg))
-				return -EFAULT;
-			return ret;
-		}
-	default:
-		return sys_fcntl(fd, cmd, (unsigned long)arg);
-	}
-}
-
 struct dqblk32 {
     __u32 dqb_bhardlimit;
     __u32 dqb_bsoftlimit;
@@ -3863,40 +3860,6 @@ out:
 
 extern void check_pending(int signum);
 
-asmlinkage long
-sys32_sigaction (int sig, struct old_sigaction32 *act,
-		 struct old_sigaction32 *oact)
-{
-        struct k_sigaction new_ka, old_ka;
-        int ret;
-
-	if(sig < 0) {
-		current->tss.new_signal = 1;
-		sig = -sig;
-	}
-
-        if (act) {
-		old_sigset_t32 mask;
-		
-		ret = get_user((long)new_ka.sa.sa_handler, &act->sa_handler);
-		ret |= __get_user(new_ka.sa.sa_flags, &act->sa_flags);
-		ret |= __get_user(mask, &act->sa_mask);
-		if (ret)
-			return ret;
-		siginitset(&new_ka.sa.sa_mask, mask);
-        }
-
-        ret = do_sigaction(sig, act ? &new_ka : NULL, oact ? &old_ka : NULL);
-
-	if (!ret && oact) {
-		ret = put_user((long)old_ka.sa.sa_handler, &oact->sa_handler);
-		ret |= __put_user(old_ka.sa.sa_flags, &oact->sa_flags);
-		ret |= __put_user(old_ka.sa.sa_mask.sig[0], &oact->sa_mask);
-        }
-
-	return ret;
-}
-
 #ifdef CONFIG_MODULES
 
 extern asmlinkage unsigned long sys_create_module(const char *name_user,
diff --git a/arch/ia64/kernel/Makefile b/arch/ia64/kernel/Makefile
index dfba2529a853..563c308ea986 100644
--- a/arch/ia64/kernel/Makefile
+++ b/arch/ia64/kernel/Makefile
@@ -9,8 +9,8 @@
 
 all: kernel.o head.o init_task.o
 
-obj-y := acpi.o entry.o gate.o efi.o efi_stub.o irq.o irq_ia64.o irq_sapic.o ivt.o	\
-	 pal.o pci-dma.o process.o perfmon.o ptrace.o sal.o semaphore.o setup.o		\
+obj-y := acpi.o entry.o gate.o efi.o efi_stub.o irq.o irq_ia64.o irq_sapic.o ivt.o		\
+	 machvec.o pal.o pci-dma.o process.o perfmon.o ptrace.o sal.o semaphore.o setup.o	\
 	 signal.o sys_ia64.o traps.o time.o unaligned.o unwind.o
 
 obj-$(CONFIG_IA64_GENERIC) += machvec.o
diff --git a/arch/ia64/kernel/acpi.c b/arch/ia64/kernel/acpi.c
index 20521da3693d..4bba56e1d27e 100644
--- a/arch/ia64/kernel/acpi.c
+++ b/arch/ia64/kernel/acpi.c
@@ -19,10 +19,11 @@
 #include <linux/irq.h>
 
 #include <asm/acpi-ext.h>
-#include <asm/page.h>
 #include <asm/efi.h>
 #include <asm/io.h>
 #include <asm/iosapic.h>
+#include <asm/machvec.h>
+#include <asm/page.h>
 
 #undef ACPI_DEBUG		/* Guess what this does? */
 
@@ -74,47 +75,6 @@ acpi_lsapic(char *p)
 	total_cpus++;
 }
 
-/*
- * Find all IOSAPICs and tag the iosapic_vector structure with the appropriate 
- * base addresses.
- */
-static void __init
-acpi_iosapic(char *p) 
-{
-	/*
-	 * This is not good.  ACPI is not necessarily limited to CONFIG_IA64_SV, yet
-	 * ACPI does not necessarily imply IOSAPIC either.  Perhaps there should be
-	 * a means for platform_setup() to register ACPI handlers?
-	 */
-#ifdef CONFIG_IA64_DIG
-	acpi_entry_iosapic_t *iosapic = (acpi_entry_iosapic_t *) p;
-	unsigned int ver, v;
-	int l, max_pin;
-
-	ver = iosapic_version(iosapic->address);
-	max_pin = (ver >> 16) & 0xff;
-	
-	printk("IOSAPIC Version %x.%x: address 0x%lx IRQs 0x%x - 0x%x\n", 
-	       (ver & 0xf0) >> 4, (ver & 0x0f), iosapic->address, 
-	       iosapic->irq_base, iosapic->irq_base + max_pin);
-	
-	for (l = 0; l <= max_pin; l++) {
-		v = iosapic->irq_base + l;
-		if (v < 16)
-			v = isa_irq_to_vector(v);
-		if (v > IA64_MAX_VECTORED_IRQ) {
-			printk("    !!! bad IOSAPIC interrupt vector: %u\n", v);
-			continue;
-		}
-		/* XXX Check for IOSAPIC collisions */
-		iosapic_addr(v) = (unsigned long) ioremap(iosapic->address, 0);
-		iosapic_baseirq(v) = iosapic->irq_base;
-	}
-	iosapic_init(iosapic->address, iosapic->irq_base);
-#endif
-}
-
-
 /*
  * Configure legacy IRQ information in iosapic_vector
  */
@@ -227,7 +187,7 @@ acpi_parse_msapic(acpi_sapic_t *msapic)
 			break;
 	
 		case ACPI_ENTRY_IO_SAPIC:
-			acpi_iosapic(p);
+			platform_register_iosapic((acpi_entry_iosapic_t *) p);
 			break;
 
 		case ACPI_ENTRY_INT_SRC_OVERRIDE:
diff --git a/arch/ia64/kernel/efi.c b/arch/ia64/kernel/efi.c
index c4383b97fe71..d55835df6ed4 100644
--- a/arch/ia64/kernel/efi.c
+++ b/arch/ia64/kernel/efi.c
@@ -33,9 +33,10 @@
 extern efi_status_t efi_call_phys (void *, ...);
 
 struct efi efi;
-		    
 static efi_runtime_services_t *runtime;
 
+static unsigned long mem_limit = ~0UL;
+
 static efi_status_t
 phys_get_time (efi_time_t *tm, efi_time_cap_t *tc)
 {
@@ -169,15 +170,13 @@ efi_memmap_walk (efi_freemem_callback_t callback, void *arg)
 		      case EFI_BOOT_SERVICES_CODE:
 		      case EFI_BOOT_SERVICES_DATA:
 		      case EFI_CONVENTIONAL_MEMORY:
-			if (md->phys_addr > 1024*1024*1024UL) {
-				printk("Warning: ignoring %luMB of memory above 1GB!\n",
-				       md->num_pages >> 8);
-				md->type = EFI_UNUSABLE_MEMORY;
-				continue;
-			}
-
 			if (!(md->attribute & EFI_MEMORY_WB))
 				continue;
+			if (md->phys_addr + (md->num_pages << 12) > mem_limit) {
+				if (md->phys_addr > mem_limit)
+					continue;
+				md->num_pages = (mem_limit - md->phys_addr) >> 12;
+			}
 			if (md->num_pages == 0) {
 				printk("efi_memmap_walk: ignoring empty region at 0x%lx",
 				       md->phys_addr);
@@ -224,8 +223,8 @@ efi_memmap_walk (efi_freemem_callback_t callback, void *arg)
  * ITR to enable safe PAL calls in virtual mode.  See IA-64 Processor
  * Abstraction Layer chapter 11 in ADAG
  */
-static void
-map_pal_code (void)
+void
+efi_map_pal_code (void)
 {
 	void *efi_map_start, *efi_map_end, *p;
 	efi_memory_desc_t *md;
@@ -240,13 +239,14 @@ map_pal_code (void)
 
 	for (p = efi_map_start; p < efi_map_end; p += efi_desc_size) {
 		md = p;
-		if (md->type != EFI_PAL_CODE) continue;
+		if (md->type != EFI_PAL_CODE)
+			continue;
 
 		if (++pal_code_count > 1) {
 			printk(KERN_ERR "Too many EFI Pal Code memory ranges, dropped @ %lx\n",
 			       md->phys_addr);
 			continue;
-		} 
+		}
 		mask  = ~((1 << _PAGE_SIZE_4M)-1);	/* XXX should be dynamic? */
 		vaddr = PAGE_OFFSET + md->phys_addr;
 
@@ -281,9 +281,28 @@ efi_init (void)
 	efi_config_table_t *config_tables;
 	efi_char16_t *c16;
 	u64 efi_desc_size;
-	char vendor[100] = "unknown";
+	char *cp, *end, vendor[100] = "unknown";
+	extern char saved_command_line[];
 	int i;
 
+	/* it's too early to be able to use the standard kernel command line support... */
+	for (cp = saved_command_line; *cp; ) {
+		if (memcmp(cp, "mem=", 4) == 0) {
+			cp += 4;
+			mem_limit = memparse(cp, &end) - 1;
+			if (end != cp)
+				break;
+			cp = end;
+		} else {
+			while (*cp != ' ' && *cp)
+				++cp;
+			while (*cp == ' ')
+				++cp;
+		}
+	}
+	if (mem_limit != ~0UL)
+		printk("Ignoring memory above %luMB\n", mem_limit >> 20);
+
 	efi.systab = __va(ia64_boot_param.efi_systab);
 
 	/*
@@ -359,7 +378,7 @@ efi_init (void)
 	}
 #endif
 
-	map_pal_code();
+	efi_map_pal_code();
 }
 
 void
diff --git a/arch/ia64/kernel/entry.S b/arch/ia64/kernel/entry.S
index 43c99ca062bb..e37bd0df8040 100644
--- a/arch/ia64/kernel/entry.S
+++ b/arch/ia64/kernel/entry.S
@@ -106,29 +106,19 @@ GLOBAL_ENTRY(ia64_switch_to)
 	alloc r16=ar.pfs,1,0,0,0
 	DO_SAVE_SWITCH_STACK
 	UNW(.body)
-	// disable interrupts to ensure atomicity for next few instructions:
-	mov r17=psr		// M-unit
-	;;
-	rsm psr.i		// M-unit
-	dep r18=-1,r0,0,61	// build mask 0x1fffffffffffffff
-	;;
-	srlz.d
-	;;
+
 	adds r22=IA64_TASK_THREAD_KSP_OFFSET,r13
+	dep r18=-1,r0,0,61	// build mask 0x1fffffffffffffff
 	adds r21=IA64_TASK_THREAD_KSP_OFFSET,in0
 	;;
 	st8 [r22]=sp		// save kernel stack pointer of old task
 	ld8 sp=[r21]		// load kernel stack pointer of new task
 	and r20=in0,r18		// physical address of "current"
 	;;
+	mov ar.k6=r20		// copy "current" into ar.k6
 	mov r8=r13		// return pointer to previously running task
 	mov r13=in0		// set "current" pointer
-	mov ar.k6=r20		// copy "current" into ar.k6
 	;;
-	// restore interrupts
-	mov psr.l=r17
-	;;
-	srlz.d
 	DO_LOAD_SWITCH_STACK( )
 	br.ret.sptk.few rp
 END(ia64_switch_to)
diff --git a/arch/ia64/kernel/head.S b/arch/ia64/kernel/head.S
index b2abc48a4a69..bea14236dbcb 100644
--- a/arch/ia64/kernel/head.S
+++ b/arch/ia64/kernel/head.S
@@ -181,7 +181,9 @@ END(ia64_save_debug_regs)
 
 GLOBAL_ENTRY(ia64_load_debug_regs)
 	alloc r16=ar.pfs,1,0,0,0
+#if !(defined(CONFIG_ITANIUM_ASTEP_SPECIFIC) || defined(CONFIG_ITANIUM_BSTEP_SPECIFIC))
 	lfetch.nta [in0]
+#endif
 	mov r20=ar.lc			// preserve ar.lc
 	add r19=IA64_NUM_DBG_REGS*8,in0
 	mov ar.lc=IA64_NUM_DBG_REGS-1
@@ -702,3 +704,74 @@ SET_REG(b4);
 SET_REG(b5);
 
 #endif /* CONFIG_IA64_BRL_EMU */
+
+#ifdef CONFIG_SMP
+
+	/*
+	 * This routine handles spinlock contention.  It uses a simple exponential backoff
+	 * algorithm to reduce unnecessary bus traffic.  The initial delay is selected from
+	 * the low-order bits of the cycle counter (a cheap "randomizer").  I'm sure this
+	 * could use additional tuning, especially on systems with a large number of CPUs.
+	 * Also, I think the maximum delay should be made a function of the number of CPUs in
+	 * the system. --davidm 00/08/05
+	 *
+	 * WARNING: This is not a normal procedure.  It gets called from C code without
+	 * the compiler knowing about it.  Thus, we must not use any scratch registers
+	 * beyond those that were declared "clobbered" at the call-site (see spin_lock()
+	 * macro).  We may not even use the stacked registers, because that could overwrite
+	 * output registers.  Similarly, we can't use the scratch stack area as it may be
+	 * in use, too.
+	 *
+	 * Inputs:
+	 *	ar.ccv = 0 (and available for use)
+	 *	r28 = available for use
+	 *	r29 = available for use
+	 *	r30 = non-zero (and available for use)
+	 *	r31 = address of lock we're trying to acquire
+	 *	p15 = available for use
+	 */
+
+#	define delay	r28
+#	define timeout	r29
+#	define tmp	r30
+
+GLOBAL_ENTRY(ia64_spinlock_contention)
+	mov tmp=ar.itc
+	;;
+	and delay=0x3f,tmp
+	;;
+
+.retry:	add timeout=tmp,delay
+	shl delay=delay,1
+	;;
+	dep delay=delay,r0,0,13	// limit delay to 8192 cycles
+	;;
+	// delay a little...
+.wait:	sub tmp=tmp,timeout
+	or delay=0xf,delay	// make sure delay is non-zero (otherwise we get stuck with 0)
+	;;
+	cmp.lt p15,p0=tmp,r0
+	mov tmp=ar.itc
+(p15)	br.cond.sptk .wait
+	;;
+	ld1 tmp=[r31]
+	;;
+	cmp.ne p15,p0=tmp,r0
+	mov tmp=ar.itc
+(p15)	br.cond.sptk.few .retry	// lock is still busy
+	;;
+	// try acquiring lock (we know ar.ccv is still zero!):
+	mov tmp=1
+	;;
+	IA64_SEMFIX_INSN
+	cmpxchg1.acq tmp=[r31],tmp,ar.ccv
+	;;
+	cmp.eq p15,p0=tmp,r0
+
+	mov tmp=ar.itc
+(p15)	br.ret.sptk.many b7	// got lock -> return
+	br .retry		// still no luck, retry
+
+END(ia64_spinlock_contention)
+
+#endif
diff --git a/arch/ia64/kernel/ia64_ksyms.c b/arch/ia64/kernel/ia64_ksyms.c
index 2e4ffe403e3b..62e7926126e0 100644
--- a/arch/ia64/kernel/ia64_ksyms.c
+++ b/arch/ia64/kernel/ia64_ksyms.c
@@ -18,6 +18,7 @@ EXPORT_SYMBOL(strlen);
 EXPORT_SYMBOL(strncat);
 EXPORT_SYMBOL(strncmp);
 EXPORT_SYMBOL(strncpy);
+EXPORT_SYMBOL(strstr);
 EXPORT_SYMBOL(strtok);
 
 #include <linux/pci.h>
@@ -37,6 +38,7 @@ EXPORT_SYMBOL(cpu_data);
 EXPORT_SYMBOL(kernel_thread);
 
 #ifdef CONFIG_SMP
+#include <asm/hardirq.h>
 EXPORT_SYMBOL(synchronize_irq);
 
 #include <asm/smplock.h>
diff --git a/arch/ia64/kernel/irq_ia64.c b/arch/ia64/kernel/irq_ia64.c
index 1ee2974b5698..fe686db0ef44 100644
--- a/arch/ia64/kernel/irq_ia64.c
+++ b/arch/ia64/kernel/irq_ia64.c
@@ -117,6 +117,13 @@ ia64_handle_irq (unsigned long vector, struct pt_regs *regs)
 	{
 		unsigned long bsp, sp;
 
+		/*
+		 * Note: if the interrupt happened while executing in
+		 * the context switch routine (ia64_switch_to), we may
+		 * get a spurious stack overflow here.  This is
+		 * because the register and the memory stack are not
+		 * switched atomically.
+		 */
 		asm ("mov %0=ar.bsp" : "=r"(bsp));
 		asm ("mov %0=sp" : "=r"(sp));
 
diff --git a/arch/ia64/kernel/ivt.S b/arch/ia64/kernel/ivt.S
index 9f611c5b0350..d1b599f77f5c 100644
--- a/arch/ia64/kernel/ivt.S
+++ b/arch/ia64/kernel/ivt.S
@@ -170,33 +170,27 @@ ia64_ivt:
 	 * The ITLB basically does the same as the VHPT handler except
 	 * that we always insert exactly one instruction TLB entry.
 	 */
-#if 1
 	/*
 	 * Attempt to lookup PTE through virtual linear page table.
 	 * The speculative access will fail if there is no TLB entry
 	 * for the L3 page table page we're trying to access.
 	 */
-	mov r31=pr				// save predicates
-	;;
-	thash r17=r16				// compute virtual address of L3 PTE
+	mov r16=cr.iha				// get virtual address of L3 PTE
 	;;
-	ld8.s r18=[r17]				// try to read L3 PTE
+	ld8.s r16=[r16]				// try to read L3 PTE
+	mov r31=pr				// save predicates
 	;;
-	tnat.nz p6,p0=r18			// did read succeed?
+	tnat.nz p6,p0=r16			// did read succeed?
 (p6)	br.cond.spnt.many 1f
 	;;
-	itc.i r18
+	itc.i r16
 	;;
 	mov pr=r31,-1
 	rfi
 
-1:	rsm psr.dt				// use physical addressing for data
-#else
-	mov r16=cr.ifa				// get address that caused the TLB miss
+1:	mov r16=cr.ifa				// get address that caused the TLB miss
 	;;
 	rsm psr.dt				// use physical addressing for data
-#endif
-	mov r31=pr				// save the predicate registers
 	mov r19=ar.k7				// get page table base address
 	shl r21=r16,3				// shift bit 60 into sign bit
 	shr.u r17=r16,61			// get the region number into r17
@@ -244,33 +238,27 @@ ia64_ivt:
 	 * The DTLB basically does the same as the VHPT handler except
 	 * that we always insert exactly one data TLB entry.
 	 */
-	mov r16=cr.ifa				// get address that caused the TLB miss
-#if 1
 	/*
 	 * Attempt to lookup PTE through virtual linear page table.
 	 * The speculative access will fail if there is no TLB entry
 	 * for the L3 page table page we're trying to access.
 	 */
-	mov r31=pr				// save predicates
+	mov r16=cr.iha				// get virtual address of L3 PTE
 	;;
-	thash r17=r16				// compute virtual address of L3 PTE
-	;;
-	ld8.s r18=[r17]				// try to read L3 PTE
+	ld8.s r16=[r16]				// try to read L3 PTE
+	mov r31=pr				// save predicates
 	;;
-	tnat.nz p6,p0=r18			// did read succeed?
+	tnat.nz p6,p0=r16			// did read succeed?
 (p6)	br.cond.spnt.many 1f
 	;;
-	itc.d r18
+	itc.d r16
 	;;
 	mov pr=r31,-1
 	rfi
 
-1:	rsm psr.dt				// use physical addressing for data
-#else
-	rsm psr.dt				// use physical addressing for data
-	mov r31=pr				// save the predicate registers
+1:	mov r16=cr.ifa				// get address that caused the TLB miss
 	;;
-#endif
+	rsm psr.dt				// use physical addressing for data
 	mov r19=ar.k7				// get page table base address
 	shl r21=r16,3				// shift bit 60 into sign bit
 	shr.u r17=r16,61			// get the region number into r17
@@ -504,7 +492,24 @@ page_fault:
 	mov r29=b0				// save b0 in case of nested fault)
 	;;
 1:	ld8 r18=[r17]
-	;;					// avoid raw on r18
+#if defined(CONFIG_IA32_SUPPORT) && \
+    (defined(CONFIG_ITANIUM_ASTEP_SPECIFIC) || defined(CONFIG_ITANIUM_B0_SPECIFIC))
+	//
+	// Erratum 85 (Access bit fault could be reported before page not present fault)
+	//   If the PTE is indicates the page is not present, then just turn this into a
+	//   page fault.
+	//
+	mov r31=pr				// save predicates
+	;;
+	tbit.nz p6,p0=r18,0			// page present bit set?
+(p6)	br.cond.sptk 1f
+	;;					// avoid WAW on p6
+	mov pr=r31,-1
+	br.cond.sptk page_fault			// page wasn't present
+1:	mov pr=r31,-1
+#else
+	;;					// avoid RAW on r18
+#endif
 	or r18=_PAGE_A,r18			// set the accessed bit
 	mov b0=r29				// restore b0
 	;;
@@ -541,14 +546,6 @@ page_fault:
 	;;
 	srlz.d			// ensure everyone knows psr.dt is off...
 	cmp.eq p0,p7=r16,r17	// is this a system call? (p7 <- false, if so)
-#if 1
-	// Allow syscalls via the old system call number for the time being.  This is
-	// so we can transition to the new syscall number in a relatively smooth
-	// fashion.
-	mov r17=0x80000
-	;;
-(p7)	cmp.eq.or.andcm p0,p7=r16,r17		// is this the old syscall number?
-#endif
 (p7)	br.cond.spnt.many non_syscall
 
 	SAVE_MIN				// uses r31; defines r2:
diff --git a/arch/ia64/kernel/machvec.c b/arch/ia64/kernel/machvec.c
index 153fb5684347..68227302c7a8 100644
--- a/arch/ia64/kernel/machvec.c
+++ b/arch/ia64/kernel/machvec.c
@@ -3,12 +3,9 @@
 #include <asm/page.h>
 #include <asm/machvec.h>
 
-struct ia64_machine_vector ia64_mv;
+#ifdef CONFIG_IA64_GENERIC
 
-void
-machvec_noop (void)
-{
-}
+struct ia64_machine_vector ia64_mv;
 
 /*
  * Most platforms use this routine for mapping page frame addresses
@@ -46,3 +43,10 @@ machvec_init (const char *name)
 	ia64_mv = *mv;
 	printk("booting generic kernel on platform %s\n", name);
 }
+
+#endif /* CONFIG_IA64_GENERIC */
+
+void
+machvec_noop (void)
+{
+}
diff --git a/arch/ia64/kernel/pal.S b/arch/ia64/kernel/pal.S
index 147e2b8fe90f..0b07163dcec9 100644
--- a/arch/ia64/kernel/pal.S
+++ b/arch/ia64/kernel/pal.S
@@ -191,3 +191,57 @@ GLOBAL_ENTRY(ia64_pal_call_phys_static)
 	srlz.d				// seralize restoration of psr.l
 	br.ret.sptk.few	b0
 END(ia64_pal_call_phys_static)
+
+/*
+ * Make a PAL call using the stacked registers in physical mode.
+ *
+ * Inputs:
+ * 	in0         Index of PAL service
+ * 	in2 - in3   Remaning PAL arguments
+ */
+GLOBAL_ENTRY(ia64_pal_call_phys_stacked)
+	UNW(.prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(5))
+	alloc	loc1 = ar.pfs,5,5,86,0
+	movl	loc2 = pal_entry_point
+1:	{
+	  mov r28  = in0		// copy procedure index
+	  mov loc0 = rp		// save rp
+	}
+	.body
+	;;
+	ld8 loc2 = [loc2]		// loc2 <- entry point
+	mov out0 = in0		// first argument
+	mov out1 = in1		// copy arg2
+	mov out2 = in2		// copy arg3
+	mov out3 = in3		// copy arg3
+	;;
+	mov loc3 = psr		// save psr
+	;; 
+	mov loc4=ar.rsc			// save RSE configuration
+	dep.z loc2=loc2,0,61		// convert pal entry point to physical
+	;;
+	mov ar.rsc=r0			// put RSE in enforced lazy, LE mode
+	movl r16=PAL_PSR_BITS_TO_CLEAR
+	movl r17=PAL_PSR_BITS_TO_SET
+	;;
+	or loc3=loc3,r17		// add in psr the bits to set
+	mov b7 = loc2			// install target to branch reg
+	;;
+	andcm r16=loc3,r16		// removes bits to clear from psr
+	br.call.sptk.few rp=ia64_switch_mode
+.ret6:
+	br.call.sptk.many rp=b7		// now make the call
+.ret7:
+	mov ar.rsc=r0			// put RSE in enforced lazy, LE mode
+	mov r16=loc3			// r16= original psr
+	br.call.sptk.few rp=ia64_switch_mode	// return to virtual mode
+
+.ret8:	mov psr.l  = loc3		// restore init PSR
+	mov ar.pfs = loc1
+	mov rp = loc0
+	;;
+	mov ar.rsc=loc4			// restore RSE configuration
+	srlz.d				// seralize restoration of psr.l
+	br.ret.sptk.few	b0
+END(ia64_pal_call_phys_stacked)
+
diff --git a/arch/ia64/kernel/palinfo.c b/arch/ia64/kernel/palinfo.c
index ad40e911e2af..0e33a4e0bc0d 100644
--- a/arch/ia64/kernel/palinfo.c
+++ b/arch/ia64/kernel/palinfo.c
@@ -21,19 +21,35 @@
 #include <linux/init.h>
 #include <linux/proc_fs.h>
 #include <linux/mm.h>
+#include <linux/module.h>
+#if defined(MODVERSIONS)
+#include <linux/modversions.h>
+#endif
 
 #include <asm/pal.h>
 #include <asm/sal.h>
 #include <asm/efi.h>
 #include <asm/page.h>
 #include <asm/processor.h>
+#ifdef CONFIG_SMP
+#include <linux/smp.h>
+#endif
+
+MODULE_AUTHOR("Stephane Eranian <eranian@hpl.hp.com>");
+MODULE_DESCRIPTION("/proc interface to IA-64 PAL");
 
 /*
- * Hope to get rid of these in a near future
+ * Hope to get rid of this one in a near future
 */
 #define IA64_PAL_VERSION_BUG		1
 
-#define PALINFO_VERSION "0.1"
+#define PALINFO_VERSION "0.3"
+
+#ifdef CONFIG_SMP
+#define cpu_is_online(i) (cpu_online_map & (1UL << i))
+#else
+#define cpu_is_online(i)	1
+#endif
 
 typedef int (*palinfo_func_t)(char*);
 
@@ -43,7 +59,6 @@ typedef struct {
 	struct proc_dir_entry	*entry;		/* registered entry (removal) */
 } palinfo_entry_t;
 
-static struct proc_dir_entry *palinfo_dir;
 
 /*
  *  A bunch of string array to get pretty printing
@@ -95,7 +110,7 @@ static const char *rse_hints[]={
 #define RSE_HINTS_COUNT (sizeof(rse_hints)/sizeof(const char *))
 
 /*
- * The current resvision of the Volume 2 of 
+ * The current revision of the Volume 2 of 
  * IA-64 Architecture Software Developer's Manual is wrong.
  * Table 4-10 has invalid information concerning the ma field:
  * Correct table is:
@@ -121,64 +136,31 @@ static const char *mem_attrib[]={
 /*
  * Allocate a buffer suitable for calling PAL code in Virtual mode
  *
- * The documentation (PAL2.6) requires thius buffer to have a pinned
- * translation to avoid any DTLB faults. For this reason we allocate
- * a page (large enough to hold any possible reply) and use a DTC
- * to hold the translation during the call. A call the free_palbuffer()
- * is required to release ALL resources (page + translation).
- *
- * The size of the page allocated is based on the PAGE_SIZE defined
- * at compile time for the kernel, i.e.  >= 4Kb.
+ * The documentation (PAL2.6) allows DTLB misses on the buffer. So 
+ * using the TC is enough, no need to pin the entry.
  *
- * Return: a pointer to the newly allocated page (virtual address)
+ * We allocate a kernel-sized page (at least 4KB). This is enough to
+ * hold any possible reply.
  */
-static void *
+static inline void *
 get_palcall_buffer(void)
 {
 	void *tmp;
 
 	tmp = (void *)__get_free_page(GFP_KERNEL);
 	if (tmp == 0) {
-		printk(KERN_ERR "%s: can't get a buffer page\n", __FUNCTION__);
-	} else if ( ((u64)tmp - PAGE_OFFSET) > (1<<_PAGE_SIZE_256M) )  { /* XXX: temporary hack */
-		unsigned long flags;
-
-		/* PSR.ic must be zero to insert new DTR */
-		ia64_clear_ic(flags);
-
-		/*
-		 * we  only insert of DTR
-		 *
-		 * XXX: we need to figure out a way to "allocate" TR(s) to avoid
-		 * conflicts. Maybe something in an include file like pgtable.h
-		 * page.h or processor.h
-		 *
-		 * ITR0/DTR0: used for kernel code/data
-		 * ITR1/DTR1: used by HP simulator
-		 * ITR2/DTR2: used to map PAL code
-		 */
-		ia64_itr(0x2, 3, (u64)tmp,
-			 pte_val(mk_pte_phys(__pa(tmp), __pgprot(__DIRTY_BITS|_PAGE_PL_0|_PAGE_AR_RW))), PAGE_SHIFT);
-
-		ia64_srlz_d ();
-
-		__restore_flags(flags);	
-	}
-
+		printk(KERN_ERR __FUNCTION__" : can't get a buffer page\n"); 
+	} 
 	return tmp;
 }
 
 /*
  * Free a palcall buffer allocated with the previous call
- *
- * The translation is also purged.
  */
-static void
+static inline void
 free_palcall_buffer(void *addr)
 {
 	__free_page(addr);
-	ia64_ptr(0x2, (u64)addr, PAGE_SHIFT);
-	ia64_srlz_d ();
 }
 
 /*
@@ -564,7 +546,6 @@ processor_info(char *page)
 	int i;
 	s64 ret;
 
-	/* must be in physical mode */
 	if ((ret=ia64_pal_proc_get_features(&avail, &status, &control)) != 0) return 0;
 
 	for(i=0; i < 64; i++, v++,avail >>=1, status >>=1, control >>=1) {
@@ -577,6 +558,57 @@ processor_info(char *page)
 	return p - page;
 }
 
+static const char *bus_features[]={
+	NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,
+	NULL,NULL,NULL,NULL,NULL,NULL,NULL, NULL,NULL,
+	NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,
+	NULL,NULL,
+	"Request  Bus Parking",
+	"Bus Lock Mask",
+	"Enable Half Transfer",
+	NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+	NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+	NULL, NULL, NULL, NULL, NULL, NULL,
+	"Disable Transaction Queuing",
+	"Disable Reponse Error Checking",
+	"Disable Bus Error Checking",
+	"Disable Bus Requester Internal Error Signalling",
+	"Disable Bus Requester Error Signalling",
+	"Disable Bus Initialization Event Checking",
+	"Disable Bus Initialization Event Signalling",
+	"Disable Bus Address Error Checking",
+	"Disable Bus Address Error Signalling",
+	"Disable Bus Data Error Checking"
+};
+
+	
+static int
+bus_info(char *page)
+{
+	char *p = page;
+	const char **v = bus_features;
+	pal_bus_features_u_t av, st, ct;
+	u64 avail, status, control;
+	int i;
+	s64 ret;
+
+	if ((ret=ia64_pal_bus_get_features(&av, &st, &ct)) != 0) return 0;
+
+	avail   = av.pal_bus_features_val;
+	status  = st.pal_bus_features_val;
+	control = ct.pal_bus_features_val;
+
+	for(i=0; i < 64; i++, v++, avail >>=1, status >>=1, control >>=1) {
+		if ( ! *v ) continue;
+		p += sprintf(p, "%-48s : %s%s %s\n", *v, 
+				avail & 0x1 ? "" : "NotImpl",
+				avail & 0x1 ? (status  & 0x1 ? "On" : "Off"): "",
+				avail & 0x1 ? (control & 0x1 ? "Ctrl" : "NoCtrl"): "");
+	}
+	return p - page;
+}
+
+
 /*
  * physical mode call for PAL_VERSION is working fine.
  * This function is meant to go away once PAL get fixed.
@@ -613,21 +645,25 @@ version_info(char *page)
 #endif
 	if (status != 0) return 0;
 
-	p += sprintf(p, "PAL_vendor     : 0x%x (min=0x%x)\n" \
-			"PAL_A revision : 0x%x (min=0x%x)\n" \
-			"PAL_A model    : 0x%x (min=0x%x)\n" \
-			"PAL_B mode     : 0x%x (min=0x%x)\n" \
-			"PAL_B revision : 0x%x (min=0x%x)\n",
+	p += sprintf(p, "PAL_vendor : 0x%02x (min=0x%02x)\n" \
+			"PAL_A      : %x.%x.%x (min=%x.%x.%x)\n" \
+			"PAL_B      : %x.%x.%x (min=%x.%x.%x)\n",
 	     		cur_ver.pal_version_s.pv_pal_vendor,
 	     		min_ver.pal_version_s.pv_pal_vendor,
+
+	     		cur_ver.pal_version_s.pv_pal_a_model>>4,
+	     		cur_ver.pal_version_s.pv_pal_a_model&0xf,
 	     		cur_ver.pal_version_s.pv_pal_a_rev,
-	     		cur_ver.pal_version_s.pv_pal_a_rev,
-	     		cur_ver.pal_version_s.pv_pal_a_model,
-	     		min_ver.pal_version_s.pv_pal_a_model,
+	     		min_ver.pal_version_s.pv_pal_a_model>>4,
+	     		min_ver.pal_version_s.pv_pal_a_model&0xf,
+	     		min_ver.pal_version_s.pv_pal_a_rev,
+
+	     		cur_ver.pal_version_s.pv_pal_b_model>>4,
+	     		cur_ver.pal_version_s.pv_pal_b_model&0xf,
 	     		cur_ver.pal_version_s.pv_pal_b_rev,
-	     		min_ver.pal_version_s.pv_pal_b_rev,
-	     		cur_ver.pal_version_s.pv_pal_b_model,
-	     		min_ver.pal_version_s.pv_pal_b_model);
+	     		min_ver.pal_version_s.pv_pal_b_model>>4,
+	     		min_ver.pal_version_s.pv_pal_b_model&0xf,
+	     		min_ver.pal_version_s.pv_pal_b_rev);
 
 	return p - page;
 }
@@ -648,6 +684,9 @@ perfmon_info(char *page)
 	}
 
 #ifdef IA64_PAL_PERF_MON_INFO_BUG
+	/*
+	 * This bug has been fixed in PAL 2.2.9 and higher
+	 */
 	pm_buffer[5]=0x3;
 	pm_info.pal_perf_mon_info_s.cycles  = 0x12;
 	pm_info.pal_perf_mon_info_s.retired = 0x08;
@@ -708,30 +747,111 @@ frequency_info(char *page)
 	return p - page;
 }
 
-
-/*
- * Entry point routine: all calls go trhough this function
- */
 static int
-palinfo_read_entry(char *page, char **start, off_t off, int count, int *eof, void *data)
+tr_info(char *page)
 {
-	palinfo_func_t info = (palinfo_func_t)data;
-        int len = info(page);
+	char *p = page;
+	s64 status;
+	pal_tr_valid_u_t tr_valid;
+	u64 tr_buffer[4];
+	pal_vm_info_1_u_t vm_info_1;
+	pal_vm_info_2_u_t vm_info_2;
+	int i, j;
+	u64 max[3], pgm;
+	struct ifa_reg {
+		u64 valid:1;
+		u64 ig:11;
+		u64 vpn:52;
+	} *ifa_reg;
+	struct itir_reg {
+		u64 rv1:2;
+		u64 ps:6;
+		u64 key:24;
+		u64 rv2:32;
+	} *itir_reg;
+	struct gr_reg {
+		u64 p:1;
+		u64 rv1:1;
+		u64 ma:3;
+		u64 a:1;
+		u64 d:1;
+		u64 pl:2;
+		u64 ar:3;
+		u64 ppn:38;
+		u64 rv2:2;
+		u64 ed:1;
+		u64 ig:11;
+	} *gr_reg;
+	struct rid_reg {
+		u64 ig1:1;
+		u64 rv1:1;
+		u64 ig2:6;
+		u64 rid:24;
+		u64 rv2:32;
+	} *rid_reg;
 
-        if (len <= off+count) *eof = 1;
+	if ((status=ia64_pal_vm_summary(&vm_info_1, &vm_info_2)) !=0) {
+		printk("ia64_pal_vm_summary=%ld\n", status);
+		return 0;
+	}
+	max[0] = vm_info_1.pal_vm_info_1_s.max_itr_entry+1;
+	max[1] = vm_info_1.pal_vm_info_1_s.max_dtr_entry+1;
 
-        *start = page + off;
-        len   -= off;
+	for (i=0; i < 2; i++ ) {
+		for (j=0; j < max[i]; j++) {
 
-        if (len>count) len = count;
-        if (len<0) len = 0;
+		status = ia64_pal_tr_read(j, i, tr_buffer, &tr_valid);
+		if (status != 0) {
+			printk(__FUNCTION__ " pal call failed on tr[%d:%d]=%ld\n", i, j, status);
+			continue;
+		}
 
-        return len;
+		ifa_reg  = (struct ifa_reg *)&tr_buffer[2];
+
+		if (ifa_reg->valid == 0) continue;
+
+		gr_reg   = (struct gr_reg *)tr_buffer;	
+		itir_reg = (struct itir_reg *)&tr_buffer[1];
+		rid_reg  = (struct rid_reg *)&tr_buffer[3];
+
+		pgm	 = -1 << (itir_reg->ps - 12);
+		p += sprintf(p, "%cTR%d: av=%d pv=%d dv=%d mv=%d\n" \
+				"\tppn  : 0x%lx\n" \
+				"\tvpn  : 0x%lx\n" \
+				"\tps   : ",
+
+				"ID"[i],
+				j,
+				tr_valid.pal_tr_valid_s.access_rights_valid,
+				tr_valid.pal_tr_valid_s.priv_level_valid,
+				tr_valid.pal_tr_valid_s.dirty_bit_valid,
+				tr_valid.pal_tr_valid_s.mem_attr_valid,
+				(gr_reg->ppn & pgm)<< 12,
+				(ifa_reg->vpn & pgm)<< 12);
+
+		p = bitvector_process(p, 1<< itir_reg->ps);
+
+		p += sprintf(p, "\n\tpl   : %d\n" \
+				"\tar   : %d\n" \
+				"\trid  : %x\n" \
+				"\tp    : %d\n" \
+				"\tma   : %d\n" \
+				"\td    : %d\n", 
+				gr_reg->pl,
+				gr_reg->ar,
+				rid_reg->rid,
+				gr_reg->p,
+				gr_reg->ma,
+				gr_reg->d);
+		}
+	}
+	return p - page;
 }
 
+
+
 /*
- * List names,function pairs for every entry in /proc/palinfo
- * Must be terminated with the NULL,NULL entry.
+ * List {name,function} pairs for every entry in /proc/palinfo/cpu*
  */
 static palinfo_entry_t palinfo_entries[]={
 	{ "version_info",	version_info, },
@@ -742,38 +862,190 @@ static palinfo_entry_t palinfo_entries[]={
 	{ "processor_info",	processor_info, },
 	{ "perfmon_info",	perfmon_info, },
 	{ "frequency_info",	frequency_info, },
-	{ NULL,			NULL,}
+	{ "bus_info",		bus_info },
+	{ "tr_info",		tr_info, }
 };
 
+#define NR_PALINFO_ENTRIES	(sizeof(palinfo_entries)/sizeof(palinfo_entry_t))
+
+/*
+ * this array is used to keep track of the proc entries we create. This is 
+ * required in the module mode when we need to remove all entries. The procfs code
+ * does not do recursion of deletion
+ *
+ * Notes:
+ *	- first +1 accounts for the cpuN entry
+ *	- second +1 account for toplevel palinfo
+ * 
+ */
+#define NR_PALINFO_PROC_ENTRIES	(NR_CPUS*(NR_PALINFO_ENTRIES+1)+1)
+
+static struct proc_dir_entry *palinfo_proc_entries[NR_PALINFO_PROC_ENTRIES];
+
+/*
+ * This data structure is used to pass which cpu,function is being requested
+ * It must fit in a 64bit quantity to be passed to the proc callback routine
+ *
+ * In SMP mode, when we get a request for another CPU, we must call that
+ * other CPU using IPI and wait for the result before returning.
+ */
+typedef union {
+	u64 value;
+	struct {
+		unsigned	req_cpu: 32;	/* for which CPU this info is */
+		unsigned	func_id: 32;	/* which function is requested */
+	} pal_func_cpu;
+} pal_func_cpu_u_t;
+
+#define req_cpu	pal_func_cpu.req_cpu
+#define func_id pal_func_cpu.func_id
+
+#ifdef CONFIG_SMP
+
+/*
+ * used to hold information about final function to call 
+ */
+typedef struct {
+	palinfo_func_t	func;	/* pointer to function to call */
+	char		*page;	/* buffer to store results */
+	int		ret;	/* return value from call */
+} palinfo_smp_data_t;
+
+
+/*
+ * this function does the actual final call and he called
+ * from the smp code, i.e., this is the palinfo callback routine
+ */
+static void
+palinfo_smp_call(void *info)
+{
+	palinfo_smp_data_t *data = (palinfo_smp_data_t *)info;
+	/* printk(__FUNCTION__" called on CPU %d\n", smp_processor_id());*/
+	if (data == NULL) {
+		printk(KERN_ERR __FUNCTION__" data pointer is NULL\n");
+		data->ret = 0; /* no output */
+		return;
+	}
+	/* does this actual call */
+	data->ret = (*data->func)(data->page);
+}
+
+/*
+ * function called to trigger the IPI, we need to access a remote CPU
+ * Return:
+ *	0 : error or nothing to output
+ *	otherwise how many bytes in the "page" buffer were written
+ */
+static 
+int palinfo_handle_smp(pal_func_cpu_u_t *f, char *page)
+{
+	palinfo_smp_data_t ptr;
+	int ret;
+
+	ptr.func = palinfo_entries[f->func_id].proc_read;
+	ptr.page = page;
+	ptr.ret  = 0; /* just in case */
+
+	/*printk(__FUNCTION__" calling CPU %d from CPU %d for function %d\n", f->req_cpu,smp_processor_id(), f->func_id);*/
+
+	/* will send IPI to other CPU and wait for completion of remote call */
+	if ((ret=smp_call_function_single(f->req_cpu, palinfo_smp_call, &ptr, 0, 1))) {
+		printk(__FUNCTION__" remote CPU call from %d to %d on function %d: error %d\n", smp_processor_id(), f->req_cpu, f->func_id, ret);
+		return 0;
+	}
+	return ptr.ret;
+}
+#else /* ! CONFIG_SMP */
+static 
+int palinfo_handle_smp(pal_func_cpu_u_t *f, char *page)
+{
+	printk(__FUNCTION__" should not be called with non SMP kernel\n");
+	return 0;
+}
+#endif /* CONFIG_SMP */
+
+/*
+ * Entry point routine: all calls go through this function
+ */
+static int
+palinfo_read_entry(char *page, char **start, off_t off, int count, int *eof, void *data)
+{
+	int len=0;
+	pal_func_cpu_u_t *f = (pal_func_cpu_u_t *)&data;
+
+	MOD_INC_USE_COUNT;
+	/*
+	 * in SMP mode, we may need to call another CPU to get correct
+	 * information. PAL, by definition, is processor specific
+	 */
+	if (f->req_cpu == smp_processor_id()) 
+		len = (*palinfo_entries[f->func_id].proc_read)(page);
+	else
+		len = palinfo_handle_smp(f, page);
+
+        if (len <= off+count) *eof = 1;
+
+        *start = page + off;
+        len   -= off;
+
+        if (len>count) len = count;
+        if (len<0) len = 0;
+
+	MOD_DEC_USE_COUNT;
+
+        return len;
+}
 
 static int __init 
 palinfo_init(void)
 {
-	palinfo_entry_t *p;
+#	define CPUSTR	"cpu%d"
+
+	pal_func_cpu_u_t f;
+	struct proc_dir_entry **pdir = palinfo_proc_entries;
+	struct proc_dir_entry *palinfo_dir, *cpu_dir;
+	int i, j;
+	char cpustr[sizeof(CPUSTR)];
 
 	printk(KERN_INFO "PAL Information Facility v%s\n", PALINFO_VERSION);
 
-	palinfo_dir = create_proc_entry("palinfo",  S_IFDIR | S_IRUGO | S_IXUGO, NULL);
+	palinfo_dir = proc_mkdir("pal", NULL);
+
+	/*
+	 * we keep track of created entries in a depth-first order for
+	 * cleanup purposes. Each entry is stored into palinfo_proc_entries
+	 */
+	for (i=0; i < NR_CPUS; i++) {
+
+		if (!cpu_is_online(i)) continue;
+
+		sprintf(cpustr,CPUSTR, i);
+
+		cpu_dir = proc_mkdir(cpustr, palinfo_dir);
 
-	for (p = palinfo_entries; p->name ; p++){
-		p->entry = create_proc_read_entry (p->name, 0, palinfo_dir, 
-						   palinfo_read_entry, p->proc_read);
+		f.req_cpu = i;
+
+		for (j=0; j < NR_PALINFO_ENTRIES; j++) {
+			f.func_id = j;
+			*pdir++ = create_proc_read_entry (palinfo_entries[j].name, 0, cpu_dir, 
+						palinfo_read_entry, (void *)f.value);
+		}
+		*pdir++ = cpu_dir;
 	}
+	*pdir = palinfo_dir;
 
 	return 0;
 }
 
-static int __exit
+static void __exit
 palinfo_exit(void)
 {
-	palinfo_entry_t *p;
+	int i = 0;
 
-	for (p = palinfo_entries; p->name ; p++){
-		remove_proc_entry (p->name, palinfo_dir);
+	/* remove all nodes: depth first pass */
+	for (i=0; i< NR_PALINFO_PROC_ENTRIES ; i++) {
+		remove_proc_entry (palinfo_proc_entries[i]->name, NULL);
 	}
-	remove_proc_entry ("palinfo", 0);
-
-	return 0;
 }
 
 module_init(palinfo_init);
diff --git a/arch/ia64/kernel/pci-dma.c b/arch/ia64/kernel/pci-dma.c
index ab86e69b3d5d..80509c6a1e61 100644
--- a/arch/ia64/kernel/pci-dma.c
+++ b/arch/ia64/kernel/pci-dma.c
@@ -3,34 +3,509 @@
  *
  * This implementation is for IA-64 platforms that do not support
  * I/O TLBs (aka DMA address translation hardware).
- *
- * XXX This doesn't do the right thing yet.  It appears we would have
- * to add additional zones so we can implement the various address
- * mask constraints that we might encounter.  A zone for memory < 32
- * bits is obviously necessary...
+ * Copyright (C) 2000 Asit Mallick <Asit.K.Mallick@intel.com>
+ * Copyright (C) 2000 Goutham Rao <goutham.rao@intel.com>
  */
 
-#include <linux/types.h>
+#include <linux/config.h>
+
 #include <linux/mm.h>
-#include <linux/string.h>
 #include <linux/pci.h>
+#include <linux/spinlock.h>
+#include <linux/string.h>
+#include <linux/types.h>
 
 #include <asm/io.h>
+#include <asm/pci.h>
+#include <asm/dma.h>
+
+#ifdef CONFIG_SWIOTLB
+
+#include <linux/init.h>
+#include <linux/bootmem.h>
+
+#define ALIGN(val, align) ((unsigned long) (((unsigned long) (val) + ((align) - 1)) & ~((align) - 1)))
+
+/*
+ * log of the size of each IO TLB slab.  The number of slabs is command line
+ * controllable.
+ */
+#define IO_TLB_SHIFT 11
+
+/*
+ * Used to do a quick range check in pci_unmap_single and pci_sync_single, to see if the 
+ * memory was in fact allocated by this API.
+ */
+static char *io_tlb_start, *io_tlb_end;
+
+/*
+ * The number of IO TLB blocks (in groups of 64) betweeen io_tlb_start and io_tlb_end.
+ * This is command line adjustable via setup_io_tlb_npages.
+ */
+unsigned long io_tlb_nslabs = 1024;
+
+/*
+ * This is a free list describing the number of free entries available from each index
+ */
+static unsigned int *io_tlb_list;
+static unsigned int io_tlb_index;
+
+/*
+ * We need to save away the original address corresponding to a mapped entry for the sync 
+ * operations.
+ */
+static unsigned char **io_tlb_orig_addr;
+
+/*
+ * Protect the above data structures in the map and unmap calls
+ */ 
+spinlock_t io_tlb_lock = SPIN_LOCK_UNLOCKED;
+
+static int __init
+setup_io_tlb_npages (char *str)
+{
+	io_tlb_nslabs = simple_strtoul(str, NULL, 0) << (PAGE_SHIFT - IO_TLB_SHIFT);
+	return 1;
+}
+__setup("swiotlb=", setup_io_tlb_npages);
+
+/*
+ * Statically reserve bounce buffer space and initialize bounce buffer
+ * data structures for the software IO TLB used to implement the PCI DMA API
+ */
+void
+setup_swiotlb (void)
+{
+	int i;
+
+	/*
+	 * Get IO TLB memory from the low pages
+	 */
+	io_tlb_start = alloc_bootmem_low_pages(io_tlb_nslabs * (1 << IO_TLB_SHIFT));
+	if (!io_tlb_start)
+		BUG();
+	io_tlb_end = io_tlb_start + io_tlb_nslabs * (1 << IO_TLB_SHIFT);
+
+	/*
+	 * Allocate and initialize the free list array.  This array is used
+	 * to find contiguous free memory regions of size 2^IO_TLB_SHIFT between
+	 * io_tlb_start and io_tlb_end.
+	 */
+	io_tlb_list = alloc_bootmem(io_tlb_nslabs * sizeof(int));
+	for (i = 0; i < io_tlb_nslabs; i++)
+		io_tlb_list[i] = io_tlb_nslabs - i;
+	io_tlb_index = 0;
+	io_tlb_orig_addr = alloc_bootmem(io_tlb_nslabs * sizeof(char *));
+
+	printk("Placing software IO TLB between 0x%p - 0x%p\n", io_tlb_start, io_tlb_end);
+}
+
+/*
+ * Allocates bounce buffer and returns its kernel virtual address.
+ */
+static void *
+__pci_map_single (struct pci_dev *hwdev, char *buffer, size_t size, int direction)
+{
+	unsigned long flags;
+	char *dma_addr;
+	unsigned int i, nslots, stride, index, wrap;
+
+	/*
+	 * For mappings greater than a page size, we limit the stride (and hence alignment)
+	 * to a page size.
+	 */
+	nslots = ALIGN(size, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT;
+	if (size > (1 << PAGE_SHIFT))
+		stride = (1 << (PAGE_SHIFT - IO_TLB_SHIFT));
+	else
+		stride = nslots;
+
+	if (!nslots)
+		BUG();
+
+	/*
+	 * Find suitable number of IO TLB entries size that will fit this request and allocate a buffer
+	 * from that IO TLB pool.
+	 */
+	spin_lock_irqsave(&io_tlb_lock, flags);
+	{
+		wrap = index = ALIGN(io_tlb_index, stride);
+		do {
+			/*
+			 * If we find a slot that indicates we have 'nslots' number of 
+			 * contiguous buffers, we allocate the buffers from that slot and mark the
+			 * entries as '0' indicating unavailable.
+			 */
+			if (io_tlb_list[index] >= nslots) {
+				for (i = index; i < index + nslots; i++)
+					io_tlb_list[i] = 0;
+				dma_addr = io_tlb_start + (index << IO_TLB_SHIFT);
+
+				/*
+				 * Update the indices to avoid searching in the next round.
+				 */
+				io_tlb_index = (index + nslots) < io_tlb_nslabs ? (index + nslots) : 0;
+
+				goto found;
+			}
+			index += stride;
+			if (index >= io_tlb_nslabs)
+				index = 0;
+		} while (index != wrap);
+
+		/*
+		 * XXX What is a suitable recovery mechanism here?  We cannot 
+		 * sleep because we are called from with in interrupts!
+		 */
+		panic("__pci_map_single: could not allocate software IO TLB (%ld bytes)", size);
+found:
+	}
+	spin_unlock_irqrestore(&io_tlb_lock, flags);
+
+	/*
+	 * Save away the mapping from the original address to the DMA address.  This is needed
+	 * when we sync the memory.  Then we sync the buffer if needed.
+	 */
+	io_tlb_orig_addr[index] = buffer;
+	if (direction == PCI_DMA_TODEVICE || direction == PCI_DMA_BIDIRECTIONAL)
+		memcpy(dma_addr, buffer, size);
+
+	return dma_addr;
+}
+
+/*
+ * dma_addr is the kernel virtual address of the bounce buffer to unmap.
+ */
+static void
+__pci_unmap_single (struct pci_dev *hwdev, char *dma_addr, size_t size, int direction)
+{
+	unsigned long flags;
+	int i, nslots = ALIGN(size, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT;
+	int index = (dma_addr - io_tlb_start) >> IO_TLB_SHIFT;
+	char *buffer = io_tlb_orig_addr[index];
+
+	/*
+	 * First, sync the memory before unmapping the entry
+	 */
+	if ((direction == PCI_DMA_FROMDEVICE) || (direction == PCI_DMA_BIDIRECTIONAL))
+		/*
+ 	 	 * bounce... copy the data back into the original buffer
+	  	 * and delete the bounce buffer.
+ 	 	 */
+		memcpy(buffer, dma_addr, size);
+
+	/*
+	 * Return the buffer to the free list by setting the corresponding entries to indicate
+	 * the number of contigous entries available.  
+	 * While returning the entries to the free list, we merge the entries with slots below
+	 * and above the pool being returned.
+	 */
+	spin_lock_irqsave(&io_tlb_lock, flags);
+	{
+		int count = ((index + nslots) < io_tlb_nslabs ? io_tlb_list[index + nslots] : 0);
+		/*
+		 * Step 1: return the slots to the free list, merging the slots with superceeding slots
+		 */
+		for (i = index + nslots - 1; i >= index; i--)
+			io_tlb_list[i] = ++count;
+		/*
+		 * Step 2: merge the returned slots with the preceeding slots, if available (non zero)
+		 */
+		for (i = index - 1; (i >= 0) && io_tlb_list[i]; i--)
+			io_tlb_list[i] += io_tlb_list[index];
+	}
+	spin_unlock_irqrestore(&io_tlb_lock, flags);
+}
+
+static void
+__pci_sync_single (struct pci_dev *hwdev, char *dma_addr, size_t size, int direction)
+{
+	int index = (dma_addr - io_tlb_start) >> IO_TLB_SHIFT;
+	char *buffer = io_tlb_orig_addr[index];
+
+	/*
+  	 * bounce... copy the data back into/from the original buffer
+	 * XXX How do you handle PCI_DMA_BIDIRECTIONAL here ?
+ 	 */
+	if (direction == PCI_DMA_FROMDEVICE)
+		memcpy(buffer, dma_addr, size);
+	else if (direction == PCI_DMA_TODEVICE)
+		memcpy(dma_addr, buffer, size);
+	else
+		BUG();
+}
+
+/*
+ * Map a single buffer of the indicated size for DMA in streaming mode.
+ * The PCI address to use is returned.
+ *
+ * Once the device is given the dma address, the device owns this memory
+ * until either pci_unmap_single or pci_dma_sync_single is performed.
+ */
+dma_addr_t
+pci_map_single (struct pci_dev *hwdev, void *ptr, size_t size, int direction)
+{
+	unsigned long pci_addr = virt_to_phys(ptr);
+
+	if (direction == PCI_DMA_NONE)
+		BUG();
+	/*
+	 * Check if the PCI device can DMA to ptr... if so, just return ptr
+	 */
+	if ((pci_addr & ~hwdev->dma_mask) == 0)
+		/*
+		 * Device is bit capable of DMA'ing to the
+		 * buffer... just return the PCI address of ptr
+		 */
+		return pci_addr;
+
+	/* 
+	 * get a bounce buffer: 
+	 */
+	pci_addr = virt_to_phys(__pci_map_single(hwdev, ptr, size, direction));
+
+	/*
+	 * Ensure that the address returned is DMA'ble:
+	 */
+	if ((pci_addr & ~hwdev->dma_mask) != 0)
+		panic("__pci_map_single: bounce buffer is not DMA'ble");
+
+	return pci_addr;
+}
+
+/*
+ * Unmap a single streaming mode DMA translation.  The dma_addr and size
+ * must match what was provided for in a previous pci_map_single call.  All
+ * other usages are undefined.
+ *
+ * After this call, reads by the cpu to the buffer are guarenteed to see
+ * whatever the device wrote there.
+ */
+void
+pci_unmap_single (struct pci_dev *hwdev, dma_addr_t pci_addr, size_t size, int direction)
+{
+	char *dma_addr = phys_to_virt(pci_addr);
+
+	if (direction == PCI_DMA_NONE)
+		BUG();
+	if (dma_addr >= io_tlb_start && dma_addr < io_tlb_end)
+		__pci_unmap_single(hwdev, dma_addr, size, direction);
+}
+
+/*
+ * Make physical memory consistent for a single
+ * streaming mode DMA translation after a transfer.
+ *
+ * If you perform a pci_map_single() but wish to interrogate the
+ * buffer using the cpu, yet do not wish to teardown the PCI dma
+ * mapping, you must call this function before doing so.  At the
+ * next point you give the PCI dma address back to the card, the
+ * device again owns the buffer.
+ */
+void
+pci_dma_sync_single (struct pci_dev *hwdev, dma_addr_t pci_addr, size_t size, int direction)
+{
+	char *dma_addr = phys_to_virt(pci_addr);
+
+	if (direction == PCI_DMA_NONE)
+		BUG();
+	if (dma_addr >= io_tlb_start && dma_addr < io_tlb_end)
+		__pci_sync_single(hwdev, dma_addr, size, direction);
+}
+
+/*
+ * Map a set of buffers described by scatterlist in streaming
+ * mode for DMA.  This is the scather-gather version of the
+ * above pci_map_single interface.  Here the scatter gather list
+ * elements are each tagged with the appropriate dma address
+ * and length.  They are obtained via sg_dma_{address,length}(SG).
+ *
+ * NOTE: An implementation may be able to use a smaller number of
+ *       DMA address/length pairs than there are SG table elements.
+ *       (for example via virtual mapping capabilities)
+ *       The routine returns the number of addr/length pairs actually
+ *       used, at most nents.
+ *
+ * Device ownership issues as mentioned above for pci_map_single are
+ * the same here.
+ */
+int
+pci_map_sg (struct pci_dev *hwdev, struct scatterlist *sg, int nelems, int direction)
+{
+	int i;
+
+	if (direction == PCI_DMA_NONE)
+		BUG();
+
+	for (i = 0; i < nelems; i++, sg++) {
+		sg->orig_address = sg->address;
+		if ((virt_to_phys(sg->address) & ~hwdev->dma_mask) != 0) {
+			sg->address = __pci_map_single(hwdev, sg->address, sg->length, direction);
+		}
+	}
+	return nelems;
+}
+
+/*
+ * Unmap a set of streaming mode DMA translations.
+ * Again, cpu read rules concerning calls here are the same as for
+ * pci_unmap_single() above.
+ */
+void
+pci_unmap_sg (struct pci_dev *hwdev, struct scatterlist *sg, int nelems, int direction)
+{
+	int i;
+
+	if (direction == PCI_DMA_NONE)
+		BUG();
+
+	for (i = 0; i < nelems; i++, sg++)
+		if (sg->orig_address != sg->address) {
+			__pci_unmap_single(hwdev, sg->address, sg->length, direction);
+			sg->address = sg->orig_address;
+		}
+}
+
+/*
+ * Make physical memory consistent for a set of streaming mode DMA
+ * translations after a transfer.
+ *
+ * The same as pci_dma_sync_single but for a scatter-gather list,
+ * same rules and usage.
+ */
+void
+pci_dma_sync_sg (struct pci_dev *hwdev, struct scatterlist *sg, int nelems, int direction)
+{
+	int i;
+
+	if (direction == PCI_DMA_NONE)
+		BUG();
+
+	for (i = 0; i < nelems; i++, sg++)
+		if (sg->orig_address != sg->address)
+			__pci_sync_single(hwdev, sg->address, sg->length, direction);
+}
+
+#else
+/*
+ * Map a single buffer of the indicated size for DMA in streaming mode.
+ * The 32-bit bus address to use is returned.
+ *
+ * Once the device is given the dma address, the device owns this memory
+ * until either pci_unmap_single or pci_dma_sync_single is performed.
+ */
+extern inline dma_addr_t
+pci_map_single (struct pci_dev *hwdev, void *ptr, size_t size, int direction)
+{
+        if (direction == PCI_DMA_NONE)
+                BUG();
+        return virt_to_bus(ptr);
+}
+
+/*
+ * Unmap a single streaming mode DMA translation.  The dma_addr and size
+ * must match what was provided for in a previous pci_map_single call.  All
+ * other usages are undefined.
+ *
+ * After this call, reads by the cpu to the buffer are guarenteed to see
+ * whatever the device wrote there.
+ */
+extern inline void
+pci_unmap_single (struct pci_dev *hwdev, dma_addr_t dma_addr, size_t size, int direction)
+{
+        if (direction == PCI_DMA_NONE)
+                BUG();
+        /* Nothing to do */
+}
+/*
+ * Map a set of buffers described by scatterlist in streaming
+ * mode for DMA.  This is the scather-gather version of the
+ * above pci_map_single interface.  Here the scatter gather list
+ * elements are each tagged with the appropriate dma address
+ * and length.  They are obtained via sg_dma_{address,length}(SG).
+ *
+ * NOTE: An implementation may be able to use a smaller number of
+ *       DMA address/length pairs than there are SG table elements.
+ *       (for example via virtual mapping capabilities)
+ *       The routine returns the number of addr/length pairs actually
+ *       used, at most nents.
+ *
+ * Device ownership issues as mentioned above for pci_map_single are
+ * the same here.
+ */
+extern inline int
+pci_map_sg (struct pci_dev *hwdev, struct scatterlist *sg, int nents, int direction)
+{
+        if (direction == PCI_DMA_NONE)
+                BUG();
+        return nents;
+}
+
+/*
+ * Unmap a set of streaming mode DMA translations.
+ * Again, cpu read rules concerning calls here are the same as for
+ * pci_unmap_single() above.
+ */
+extern inline void
+pci_unmap_sg (struct pci_dev *hwdev, struct scatterlist *sg, int nents, int direction)
+{
+        if (direction == PCI_DMA_NONE)
+                BUG();
+        /* Nothing to do */
+}
+/*
+ * Make physical memory consistent for a single
+ * streaming mode DMA translation after a transfer.
+ *
+ * If you perform a pci_map_single() but wish to interrogate the
+ * buffer using the cpu, yet do not wish to teardown the PCI dma
+ * mapping, you must call this function before doing so.  At the
+ * next point you give the PCI dma address back to the card, the
+ * device again owns the buffer.
+ */
+extern inline void
+pci_dma_sync_single (struct pci_dev *hwdev, dma_addr_t dma_handle, size_t size, int direction)
+{
+        if (direction == PCI_DMA_NONE)
+                BUG();
+        /* Nothing to do */
+}
+
+/*
+ * Make physical memory consistent for a set of streaming mode DMA
+ * translations after a transfer.
+ *
+ * The same as pci_dma_sync_single but for a scatter-gather list,
+ * same rules and usage.
+ */
+extern inline void
+pci_dma_sync_sg (struct pci_dev *hwdev, struct scatterlist *sg, int nelems, int direction)
+{
+        if (direction == PCI_DMA_NONE)
+                BUG();
+        /* Nothing to do */
+}
+
+#endif /* CONFIG_SWIOTLB */
 
 void *
 pci_alloc_consistent (struct pci_dev *hwdev, size_t size, dma_addr_t *dma_handle)
 {
-	void *ret;
+	unsigned long pci_addr;
 	int gfp = GFP_ATOMIC;
+	void *ret;
 
-	if (!hwdev || hwdev->dma_mask == 0xffffffff)
-		gfp |= GFP_DMA;	/* XXX fix me: should change this to GFP_32BIT or ZONE_32BIT */
+	if (!hwdev || hwdev->dma_mask <= 0xffffffff)
+		gfp |= GFP_DMA; /* XXX fix me: should change this to GFP_32BIT or ZONE_32BIT */
 	ret = (void *)__get_free_pages(gfp, get_order(size));
+	if (!ret)
+		return NULL;
 
-	if (ret) {
-		memset(ret, 0, size);
-		*dma_handle = virt_to_bus(ret);
-	}
+	memset(ret, 0, size);
+	pci_addr = virt_to_phys(ret);
+	if ((pci_addr & ~hwdev->dma_mask) != 0)
+		panic("pci_alloc_consistent: allocated memory is out of range for PCI device");
+	*dma_handle = pci_addr;
 	return ret;
 }
 
diff --git a/arch/ia64/kernel/perfmon.c b/arch/ia64/kernel/perfmon.c
index 29291e1f928d..752b2a9a18a9 100644
--- a/arch/ia64/kernel/perfmon.c
+++ b/arch/ia64/kernel/perfmon.c
@@ -11,6 +11,7 @@
 #include <linux/config.h>
 #include <linux/kernel.h>
 #include <linux/sched.h>
+#include <linux/interrupt.h>
 #include <linux/smp_lock.h>
 
 #include <asm/errno.h>
@@ -55,24 +56,23 @@
 #define WRITE_PMCS		0xa1
 #define READ_PMDS		0xa2
 #define STOP_PMCS		0xa3
-#define IA64_COUNTER_MASK	0xffffffffffffff6f
-#define PERF_OVFL_VAL		0xffffffff
+#define IA64_COUNTER_MASK	0xffffffffffffff6fL
+#define PERF_OVFL_VAL		0xffffffffL
+
+volatile int used_by_system;
 
 struct perfmon_counter {
         unsigned long data;
         unsigned long counter_num;
 };
 
-unsigned long pmds[MAX_PERF_COUNTER];
-struct task_struct *perf_owner=NULL;
+unsigned long pmds[NR_CPUS][MAX_PERF_COUNTER];
 
 asmlinkage unsigned long
 sys_perfmonctl (int cmd1, int cmd2, void *ptr)
 {
         struct perfmon_counter tmp, *cptr = ptr;
-        unsigned long pmd, cnum, dcr, flags;
-        struct task_struct *p;
-        struct pt_regs *regs;
+        unsigned long cnum, dcr, flags;
         struct perf_counter;
         int i;
 
@@ -80,22 +80,24 @@ sys_perfmonctl (int cmd1, int cmd2, void *ptr)
 	      case WRITE_PMCS:           /* Writes to PMC's and clears PMDs */
 	      case WRITE_PMCS_AND_START: /* Also starts counting */
 
-		if (!access_ok(VERIFY_READ, cptr, sizeof(struct perf_counter)*cmd2))
-			return -EFAULT;
+		if (cmd2 <= 0 || cmd2 > MAX_PERF_COUNTER - used_by_system)
+			return -EINVAL;
 
-		if (cmd2 > MAX_PERF_COUNTER)
+		if (!access_ok(VERIFY_READ, cptr, sizeof(struct perf_counter)*cmd2))
 			return -EFAULT;
 
-		if (perf_owner && perf_owner != current)
-			return -EBUSY;
-		perf_owner = current;
+		current->thread.flags |= IA64_THREAD_PM_VALID;
 
 		for (i = 0; i < cmd2; i++, cptr++) {
 			copy_from_user(&tmp, cptr, sizeof(tmp));
 			/* XXX need to check validity of counter_num and perhaps data!! */
+			if (tmp.counter_num < 4
+			    || tmp.counter_num >= 4 + MAX_PERF_COUNTER - used_by_system)
+				return -EFAULT;
+
 			ia64_set_pmc(tmp.counter_num, tmp.data);
 			ia64_set_pmd(tmp.counter_num, 0);
-			pmds[tmp.counter_num - 4] = 0;
+			pmds[smp_processor_id()][tmp.counter_num - 4] = 0;
 		}
 
 		if (cmd1 == WRITE_PMCS_AND_START) {
@@ -104,26 +106,13 @@ sys_perfmonctl (int cmd1, int cmd2, void *ptr)
 			dcr |= IA64_DCR_PP;
 			ia64_set_dcr(dcr);
 			local_irq_restore(flags);
-
-			/*
-			 * This is a no can do.  It obviously wouldn't
-			 * work on SMP where another process may not
-			 * be blocked at all. We need to put in a  perfmon 
-			 * IPI to take care of MP systems. See blurb above.
-			 */
-			lock_kernel();
-			for_each_task(p) {
-				regs = (struct pt_regs *) (((char *)p) + IA64_STK_OFFSET) -1 ;	
-				ia64_psr(regs)->pp = 1;
-			}
-			unlock_kernel();
 			ia64_set_pmc(0, 0);
 		}
                 break;
 
 	      case READ_PMDS:
-		if (cmd2 > MAX_PERF_COUNTER)
-			return -EFAULT;
+		if (cmd2 <= 0 || cmd2 > MAX_PERF_COUNTER - used_by_system)
+			return -EINVAL;
 		if (!access_ok(VERIFY_WRITE, cptr, sizeof(struct perf_counter)*cmd2))
 			return -EFAULT;
 
@@ -153,9 +142,13 @@ sys_perfmonctl (int cmd1, int cmd2, void *ptr)
 		 * when we re-enabled interrupts. When I muck with dcr, 
 		 * is the irq_save/restore needed?
 		 */
-		for (i = 0, cnum = 4;i < MAX_PERF_COUNTER; i++, cnum++, cptr++){
-			pmd = pmds[i] + (ia64_get_pmd(cnum) & PERF_OVFL_VAL);
-			put_user(pmd, &cptr->data);
+		for (i = 0, cnum = 4;i < cmd2; i++, cnum++, cptr++) {
+			tmp.data = (pmds[smp_processor_id()][i]
+				    + (ia64_get_pmd(cnum) & PERF_OVFL_VAL));
+			tmp.counter_num = cnum;
+			if (copy_to_user(cptr, &tmp, sizeof(tmp)))
+				return -EFAULT;
+			//put_user(pmd, &cptr->data);
 		}
 		local_irq_save(flags);
 		__asm__ __volatile__("ssm psr.pp");
@@ -167,30 +160,22 @@ sys_perfmonctl (int cmd1, int cmd2, void *ptr)
 
 	      case STOP_PMCS:
 		ia64_set_pmc(0, 1);
-		for (i = 0; i < MAX_PERF_COUNTER; ++i)
-			ia64_set_pmc(i, 0);
+		ia64_srlz_d();
+		for (i = 0; i < MAX_PERF_COUNTER - used_by_system; ++i)
+			ia64_set_pmc(4+i, 0);
 
-		local_irq_save(flags);
-		dcr = ia64_get_dcr();
-		dcr &= ~IA64_DCR_PP;
-		ia64_set_dcr(dcr);
-		local_irq_restore(flags);
-		/*
-		 * This is a no can do.  It obviously wouldn't
-		 * work on SMP where another process may not
-		 * be blocked at all. We need to put in a  perfmon 
-		 * IPI to take care of MP systems. See blurb above.
-		 */
-		lock_kernel();
-		for_each_task(p) {
-			regs = (struct pt_regs *) (((char *)p) + IA64_STK_OFFSET) - 1;
-			ia64_psr(regs)->pp = 0;
+		if (!used_by_system) {
+			local_irq_save(flags);
+			dcr = ia64_get_dcr();
+			dcr &= ~IA64_DCR_PP;
+			ia64_set_dcr(dcr);
+			local_irq_restore(flags);
 		}
-		unlock_kernel();
-		perf_owner = NULL;
+		current->thread.flags &= ~(IA64_THREAD_PM_VALID);
 		break;
 
 	      default:
+		return -EINVAL;
 		break;
         }
         return 0;
@@ -202,13 +187,13 @@ update_counters (void)
 	unsigned long mask, i, cnum, val;
 
 	mask = ia64_get_pmc(0) >> 4;
-	for (i = 0, cnum = 4; i < MAX_PERF_COUNTER; cnum++, i++, mask >>= 1) {
+	for (i = 0, cnum = 4; i < MAX_PERF_COUNTER - used_by_system; cnum++, i++, mask >>= 1) {
+		val = 0;
 		if (mask & 0x1) 
-			val = PERF_OVFL_VAL;
-		else
+			val += PERF_OVFL_VAL + 1;
 		/* since we got an interrupt, might as well clear every pmd. */
-			val = ia64_get_pmd(cnum) & PERF_OVFL_VAL;
-		pmds[i] += val;
+		val += ia64_get_pmd(cnum) & PERF_OVFL_VAL;
+		pmds[smp_processor_id()][i] += val;
 		ia64_set_pmd(cnum, 0);
 	}
 }
@@ -221,20 +206,61 @@ perfmon_interrupt (int irq, void *arg, struct pt_regs *regs)
 	ia64_srlz_d();
 }
 
+static struct irqaction perfmon_irqaction = {
+	handler:	perfmon_interrupt,
+	flags:		SA_INTERRUPT,
+	name:		"perfmon"
+};
+
 void
 perfmon_init (void)
 {
-        if (request_irq(PERFMON_IRQ, perfmon_interrupt, 0, "perfmon", NULL)) {
-		printk("perfmon_init: could not allocate performance monitor vector %u\n",
-		       PERFMON_IRQ);
-		return;
-	}
+	irq_desc[PERFMON_IRQ].status |= IRQ_PER_CPU;
+	irq_desc[PERFMON_IRQ].handler = &irq_type_ia64_sapic;
+	setup_irq(PERFMON_IRQ, &perfmon_irqaction);
+
 	ia64_set_pmv(PERFMON_IRQ);
 	ia64_srlz_d();
 	printk("Initialized perfmon vector to %u\n",PERFMON_IRQ);
 }
 
+void
+perfmon_init_percpu (void)
+{
+	ia64_set_pmv(PERFMON_IRQ);
+	ia64_srlz_d();
+}
+
+void
+ia64_save_pm_regs (struct thread_struct *t)
+{
+	int i;
+
+	ia64_set_pmc(0, 1);
+	ia64_srlz_d();
+	for (i=0; i< IA64_NUM_PM_REGS - used_by_system ; i++) {
+		t->pmd[i] = ia64_get_pmd(4+i);
+		t->pmod[i] = pmds[smp_processor_id()][i];
+		t->pmc[i] = ia64_get_pmc(4+i);
+	}
+}
+
+void
+ia64_load_pm_regs (struct thread_struct *t)
+{
+	int i;
+
+	for (i=0; i< IA64_NUM_PM_REGS - used_by_system ; i++) {
+		ia64_set_pmd(4+i, t->pmd[i]);
+		pmds[smp_processor_id()][i] = t->pmod[i];
+		ia64_set_pmc(4+i, t->pmc[i]);
+	}
+	ia64_set_pmc(0, 0);
+	ia64_srlz_d();
+}
+
 #else /* !CONFIG_PERFMON */
+
 asmlinkage unsigned long
 sys_perfmonctl (int cmd1, int cmd2, void *ptr)
 {
diff --git a/arch/ia64/kernel/process.c b/arch/ia64/kernel/process.c
index 3759e52f86b7..e586a4074dd6 100644
--- a/arch/ia64/kernel/process.c
+++ b/arch/ia64/kernel/process.c
@@ -27,6 +27,8 @@
 #include <asm/unwind.h>
 #include <asm/user.h>
 
+#ifdef CONFIG_IA64_NEW_UNWIND
+
 static void
 do_show_stack (struct unw_frame_info *info, void *arg)
 {
@@ -44,6 +46,8 @@ do_show_stack (struct unw_frame_info *info, void *arg)
 	} while (unw_unwind(info) >= 0);
 }
 
+#endif
+
 void
 show_stack (struct task_struct *task)
 {
@@ -118,15 +122,14 @@ cpu_idle (void *unused)
 	current->nice = 20;
 	current->counter = -100;
 
-#ifdef CONFIG_SMP
-	if (!current->need_resched)
-		min_xtp();
-#endif
 
 	while (1) {
-		while (!current->need_resched) {
+#ifdef CONFIG_SMP
+		if (!current->need_resched)
+			min_xtp();
+#endif
+		while (!current->need_resched)
 			continue;
-		}
 #ifdef CONFIG_SMP
 		normal_xtp();
 #endif
@@ -157,11 +160,12 @@ cpu_idle (void *unused)
 void
 ia64_save_extra (struct task_struct *task)
 {
-	extern void ia64_save_debug_regs (unsigned long *save_area);
-	extern void ia32_save_state (struct thread_struct *thread);
-
 	if ((task->thread.flags & IA64_THREAD_DBG_VALID) != 0)
 		ia64_save_debug_regs(&task->thread.dbr[0]);
+#ifdef CONFIG_PERFMON
+	if ((task->thread.flags & IA64_THREAD_PM_VALID) != 0)
+		ia64_save_pm_regs(&task->thread);
+#endif
 	if (IS_IA32_PROCESS(ia64_task_regs(task)))
 		ia32_save_state(&task->thread);
 }
@@ -169,11 +173,12 @@ ia64_save_extra (struct task_struct *task)
 void
 ia64_load_extra (struct task_struct *task)
 {
-	extern void ia64_load_debug_regs (unsigned long *save_area);
-	extern void ia32_load_state (struct thread_struct *thread);
-
 	if ((task->thread.flags & IA64_THREAD_DBG_VALID) != 0)
 		ia64_load_debug_regs(&task->thread.dbr[0]);
+#ifdef CONFIG_PERFMON
+	if ((task->thread.flags & IA64_THREAD_PM_VALID) != 0)
+		ia64_load_pm_regs(&task->thread);
+#endif
 	if (IS_IA32_PROCESS(ia64_task_regs(task)))
 		ia32_load_state(&task->thread);
 }
@@ -532,17 +537,6 @@ exit_thread (void)
 	}
 }
 
-/*
- * Free remaining state associated with DEAD_TASK.  This is called
- * after the parent of DEAD_TASK has collected the exist status of the
- * task via wait().
- */
-void
-release_thread (struct task_struct *dead_task)
-{
-	/* nothing to do */
-}
-
 unsigned long
 get_wchan (struct task_struct *p)
 {
diff --git a/arch/ia64/kernel/ptrace.c b/arch/ia64/kernel/ptrace.c
index ae8991c51111..10868ce41240 100644
--- a/arch/ia64/kernel/ptrace.c
+++ b/arch/ia64/kernel/ptrace.c
@@ -549,6 +549,7 @@ void
 ia64_sync_fph (struct task_struct *child)
 {
 	if (ia64_psr(ia64_task_regs(child))->mfh && ia64_get_fpu_owner() == child) {
+		ia64_psr(ia64_task_regs(child))->mfh = 0;
 		ia64_set_fpu_owner(0);
 		ia64_save_fpu(&child->thread.fph[0]);
 		child->thread.flags |= IA64_THREAD_FPH_VALID;
diff --git a/arch/ia64/kernel/sal.c b/arch/ia64/kernel/sal.c
index f4b8ce9ddb4d..f73cd89686d9 100644
--- a/arch/ia64/kernel/sal.c
+++ b/arch/ia64/kernel/sal.c
@@ -156,6 +156,14 @@ ia64_sal_init (struct ia64_sal_systab *systab)
 			      struct ia64_sal_desc_platform_feature *pf = (void *) p;
 			      printk("SAL: Platform features ");
 
+#ifdef CONFIG_IA64_HAVE_IRQREDIR
+			      /*
+			       * Early versions of SAL say we don't have
+			       * IRQ redirection, even though we do...
+			       */
+			      pf->feature_mask |= (1 << 1);
+#endif
+
 			      if (pf->feature_mask & (1 << 0))
 				      printk("BusLock ");
 
diff --git a/arch/ia64/kernel/semaphore.c b/arch/ia64/kernel/semaphore.c
index bc55670bff67..1bbe4feab974 100644
--- a/arch/ia64/kernel/semaphore.c
+++ b/arch/ia64/kernel/semaphore.c
@@ -222,9 +222,6 @@ down_read_failed (struct rw_semaphore *sem)
 void
 __down_read_failed (struct rw_semaphore *sem, long count)
 {
-	struct task_struct *tsk = current;
-	DECLARE_WAITQUEUE(wait, tsk);
-
 	while (1) {
 		if (count == -1) {
 			down_read_failed_biased(sem);
diff --git a/arch/ia64/kernel/setup.c b/arch/ia64/kernel/setup.c
index fcb4e61903f6..62e3e19eab2c 100644
--- a/arch/ia64/kernel/setup.c
+++ b/arch/ia64/kernel/setup.c
@@ -122,6 +122,10 @@ setup_arch (char **cmdline_p)
 	 */
 	memcpy(&ia64_boot_param, (void *) ZERO_PAGE_ADDR, sizeof(ia64_boot_param));
 
+	*cmdline_p = __va(ia64_boot_param.command_line);
+	strncpy(saved_command_line, *cmdline_p, sizeof(saved_command_line));
+	saved_command_line[COMMAND_LINE_SIZE-1] = '\0';		/* for safety */
+
 	efi_init();
 
 	max_pfn = 0;
@@ -133,19 +137,65 @@ setup_arch (char **cmdline_p)
 	 */
 	bootmap_start = PAGE_ALIGN(__pa(&_end));
 	if (ia64_boot_param.initrd_size)
-		bootmap_start = PAGE_ALIGN(bootmap_start + ia64_boot_param.initrd_size);
+		bootmap_start = PAGE_ALIGN(bootmap_start
+					   + ia64_boot_param.initrd_size);
 	bootmap_size = init_bootmem(bootmap_start >> PAGE_SHIFT, max_pfn);
 
 	efi_memmap_walk(free_available_memory, 0);
 
 	reserve_bootmem(bootmap_start, bootmap_size);
+
 #ifdef CONFIG_BLK_DEV_INITRD
 	initrd_start = ia64_boot_param.initrd_start;
+
 	if (initrd_start) {
+		u64 start, size;
+#		define is_same_page(a,b) (((a)&PAGE_MASK) == ((b)&PAGE_MASK))
+
+#if 1
+		/* XXX for now some backwards compatibility... */
+		if (initrd_start >= PAGE_OFFSET)
+			printk("Warning: boot loader passed virtual address "
+			       "for initrd, please upgrade the loader\n");
+		} else
+#endif
+			/* 
+			 * The loader ONLY passes physical addresses
+			 */
+			initrd_start = (unsigned long)__va(initrd_start);
 		initrd_end = initrd_start+ia64_boot_param.initrd_size;
+		start      = initrd_start;
+		size       = ia64_boot_param.initrd_size;
+
 		printk("Initial ramdisk at: 0x%p (%lu bytes)\n",
 		       (void *) initrd_start, ia64_boot_param.initrd_size);
-		reserve_bootmem(virt_to_phys(initrd_start), ia64_boot_param.initrd_size);
+
+		/*
+		 * The kernel end and the beginning of initrd can be
+		 * on the same page. This would cause the page to be
+		 * reserved twice.  While not harmful, it does lead to
+		 * a warning message which can cause confusion.  Thus,
+		 * we make sure that in this case we only reserve new
+		 * pages, i.e., initrd only pages. We need to:
+		 *
+		 *	- align up start
+		 *	- adjust size of reserved section accordingly
+		 *
+		 * It should be noted that this operation is only
+		 * valid for the reserve_bootmem() call and does not
+		 * affect the integrety of the initrd itself.
+		 *
+		 * reserve_bootmem() considers partial pages as reserved.
+		 */
+		if (is_same_page(initrd_start, (unsigned long)&_end)) {
+			start  = PAGE_ALIGN(start);
+			size  -= start-initrd_start;
+
+			printk("Initial ramdisk & kernel on the same page: "
+			       "reserving start=%lx size=%ld bytes\n",
+			       start, size);
+		}
+		reserve_bootmem(__pa(start), size);
 	}
 #endif
 #if 0
@@ -164,27 +214,21 @@ setup_arch (char **cmdline_p)
 	/* process SAL system table: */
 	ia64_sal_init(efi.sal_systab);
 
-	*cmdline_p = __va(ia64_boot_param.command_line);
-	strncpy(saved_command_line, *cmdline_p, sizeof(saved_command_line));
-	saved_command_line[COMMAND_LINE_SIZE-1] = '\0';		/* for safety */
-
-	printk("args to kernel: %s\n", *cmdline_p);
-
 #ifdef CONFIG_SMP
 	bootstrap_processor = hard_smp_processor_id();
 	current->processor = bootstrap_processor;
 #endif
 	cpu_init();	/* initialize the bootstrap CPU */
 
+#ifdef CONFIG_IA64_GENERIC
+	machvec_init(acpi_get_sysname());
+#endif
+
 	if (efi.acpi) {
 		/* Parse the ACPI tables */
 		acpi_parse(efi.acpi);
 	}
 
-#ifdef CONFIG_IA64_GENERIC
-	machvec_init(acpi_get_sysname());
-#endif
-
 #ifdef CONFIG_VT
 # if defined(CONFIG_VGA_CONSOLE)
 	conswitchp = &vga_con;
@@ -197,8 +241,16 @@ setup_arch (char **cmdline_p)
 	/* enable IA-64 Machine Check Abort Handling */
 	ia64_mca_init();
 #endif
+
 	paging_init();
 	platform_setup(cmdline_p);
+
+#ifdef CONFIG_SWIOTLB
+	{
+		extern void setup_swiotlb (void);
+		setup_swiotlb();
+	}
+#endif
 }
 
 /*
diff --git a/arch/ia64/kernel/smp.c b/arch/ia64/kernel/smp.c
index b5153433f56e..d64305cf3eb4 100644
--- a/arch/ia64/kernel/smp.c
+++ b/arch/ia64/kernel/smp.c
@@ -319,6 +319,58 @@ smp_send_flush_tlb(void)
 }
 #endif	/* !CONFIG_ITANIUM_PTCG */
 
+/*
+ * Run a function on another CPU
+ *  <func>	The function to run. This must be fast and non-blocking.
+ *  <info>	An arbitrary pointer to pass to the function.
+ *  <retry>	If true, keep retrying until ready.
+ *  <wait>	If true, wait until function has completed on other CPUs.
+ *  [RETURNS]   0 on success, else a negative status code.
+ *
+ * Does not return until the remote CPU is nearly ready to execute <func>
+ * or is or has executed.
+ */
+
+int
+smp_call_function_single (int cpuid, void (*func) (void *info), void *info, int retry, int wait)
+{
+	struct smp_call_struct data;
+	long timeout;
+	int cpus = 1;
+
+	if (cpuid == smp_processor_id()) {
+		printk(__FUNCTION__" trying to call self\n");
+		return -EBUSY;
+	}
+	
+	data.func = func;
+	data.info = info;
+	data.wait = wait;
+	atomic_set(&data.unstarted_count, cpus);
+	atomic_set(&data.unfinished_count, cpus);
+
+	if (pointer_lock(&smp_call_function_data, &data, retry))
+		return -EBUSY;
+
+	/*  Send a message to all other CPUs and wait for them to respond  */
+	send_IPI_single(cpuid, IPI_CALL_FUNC);
+
+	/*  Wait for response  */
+	timeout = jiffies + HZ;
+	while ((atomic_read(&data.unstarted_count) > 0) && time_before(jiffies, timeout))
+		barrier();
+	if (atomic_read(&data.unstarted_count) > 0) {
+		smp_call_function_data = NULL;
+		return -ETIMEDOUT;
+	}
+	if (wait)
+		while (atomic_read(&data.unfinished_count) > 0)
+			barrier();
+	/* unlock pointer */
+	smp_call_function_data = NULL;
+	return 0;
+}
+
 /*
  * Run a function on all other CPUs.
  *  <func>	The function to run. This must be fast and non-blocking.
@@ -396,13 +448,19 @@ void
 smp_do_timer(struct pt_regs *regs)
 {
         int cpu = smp_processor_id();
+        int user = user_mode(regs);
 	struct cpuinfo_ia64 *data = &cpu_data[cpu];
 
-        if (!--data->prof_counter) {
-		irq_enter(cpu, TIMER_IRQ);
-		update_process_times(user_mode(regs));
+        if (--data->prof_counter <= 0) {
 		data->prof_counter = data->prof_multiplier;
-		irq_exit(cpu, TIMER_IRQ);
+		/*
+		 * update_process_times() expects us to have done irq_enter().
+		 * Besides, if we don't timer interrupts ignore the global
+		 * interrupt lock, which is the WrongThing (tm) to do.
+		 */
+		irq_enter(cpu, 0);
+		update_process_times(user);
+		irq_exit(cpu, 0);
 	}
 }
 
@@ -473,6 +531,11 @@ smp_callin(void)
 	extern void ia64_rid_init(void);
 	extern void ia64_init_itm(void);
 	extern void ia64_cpu_local_tick(void);
+#ifdef CONFIG_PERFMON
+	extern void perfmon_init_percpu(void);
+#endif
+
+	efi_map_pal_code();
 
 	cpu_init();
 
@@ -481,6 +544,10 @@ smp_callin(void)
 	/* setup the CPU local timer tick */
 	ia64_init_itm();
 
+#ifdef CONFIG_PERFMON
+	perfmon_init_percpu();
+#endif
+
 	/* Disable all local interrupts */
 	ia64_set_lrr0(0, 1);	
 	ia64_set_lrr1(0, 1);	
diff --git a/arch/ia64/kernel/time.c b/arch/ia64/kernel/time.c
index 95b2b3fc3006..96ff76c019e0 100644
--- a/arch/ia64/kernel/time.c
+++ b/arch/ia64/kernel/time.c
@@ -150,11 +150,13 @@ do_gettimeofday (struct timeval *tv)
 static void
 timer_interrupt(int irq, void *dev_id, struct pt_regs *regs)
 {
-	static unsigned long last_time;
-	static unsigned char count;
 	int cpu = smp_processor_id();
 	unsigned long new_itm;
+#if 0
+	static unsigned long last_time;
+	static unsigned char count;
 	int printed = 0;
+#endif
 
 	/*
 	 * Here we are in the timer irq handler. We have irqs locally
@@ -192,7 +194,7 @@ timer_interrupt(int irq, void *dev_id, struct pt_regs *regs)
 		if (time_after(new_itm, ia64_get_itc()))
 			break;
 
-#if !(defined(CONFIG_IA64_SOFTSDV_HACKS) && defined(CONFIG_SMP))
+#if 0
 		/*
 		 * SoftSDV in SMP mode is _slow_, so we do "lose" ticks, 
 		 * but it's really OK...
diff --git a/arch/ia64/kernel/traps.c b/arch/ia64/kernel/traps.c
index 4003b20f126e..bf1abd839ea6 100644
--- a/arch/ia64/kernel/traps.c
+++ b/arch/ia64/kernel/traps.c
@@ -204,11 +204,13 @@ disabled_fph_fault (struct pt_regs *regs)
 {
 	struct task_struct *fpu_owner = ia64_get_fpu_owner();
 
+	/* first, clear psr.dfh and psr.mfh: */
 	regs->cr_ipsr &= ~(IA64_PSR_DFH | IA64_PSR_MFH);
 	if (fpu_owner != current) {
 		ia64_set_fpu_owner(current);
 
 		if (fpu_owner && ia64_psr(ia64_task_regs(fpu_owner))->mfh) {
+			ia64_psr(ia64_task_regs(fpu_owner))->mfh = 0;
 			fpu_owner->thread.flags |= IA64_THREAD_FPH_VALID;
 			__ia64_save_fpu(fpu_owner->thread.fph);
 		}
@@ -216,6 +218,11 @@ disabled_fph_fault (struct pt_regs *regs)
 			__ia64_load_fpu(current->thread.fph);
 		} else {
 			__ia64_init_fpu();
+			/*
+			 * Set mfh because the state in thread.fph does not match
+			 * the state in the fph partition.
+			 */
+			ia64_psr(regs)->mfh = 1;
 		}
 	}
 }
diff --git a/arch/ia64/kernel/unwind.c b/arch/ia64/kernel/unwind.c
index 739007a9603e..5d0049f32688 100644
--- a/arch/ia64/kernel/unwind.c
+++ b/arch/ia64/kernel/unwind.c
@@ -62,7 +62,7 @@
 #define UNW_LOG_HASH_SIZE	(UNW_LOG_CACHE_SIZE + 1)
 #define UNW_HASH_SIZE		(1 << UNW_LOG_HASH_SIZE)
 
-#define UNW_DEBUG	1
+#define UNW_DEBUG	0
 #define UNW_STATS	0	/* WARNING: this disabled interrupts for long time-spans!! */
 
 #if UNW_DEBUG
diff --git a/arch/ia64/lib/memcpy.S b/arch/ia64/lib/memcpy.S
index 3b16916d039a..350e66256f27 100644
--- a/arch/ia64/lib/memcpy.S
+++ b/arch/ia64/lib/memcpy.S
@@ -1,3 +1,20 @@
+/*
+ *
+ * Optimized version of the standard memcpy() function
+ *
+ * Inputs:
+ * 	in0:	destination address
+ *	in1:	source address
+ *	in2:	number of bytes to copy
+ * Output:
+ * 	no return value
+ *
+ * Copyright (C) 2000 Hewlett-Packard Co
+ * Copyright (C) 2000 Stephane Eranian <eranian@hpl.hp.com>
+ * Copyright (C) 2000 David Mosberger-Tang <davidm@hpl.hp.com>
+ */
+#include <linux/config.h>
+
 #include <asm/asmmacro.h>
 
 GLOBAL_ENTRY(bcopy)
@@ -10,77 +27,254 @@ END(bcopy)
 	// FALL THROUGH
 GLOBAL_ENTRY(memcpy)
 
-#	define MEM_LAT	4
-
-#	define N	MEM_LAT-1
-#	define Nrot	((MEM_LAT + 7) & ~7)
+#	define MEM_LAT	2		/* latency to L1 cache */
 
 #	define dst	r2
 #	define src	r3
-#	define len	r9
-#	define saved_pfs r10
-#	define saved_lc	r11
-#	define saved_pr	r16
-#	define t0	r17
-#	define cnt	r18
+#	define retval	r8
+#	define saved_pfs r9
+#	define saved_lc	r10
+#	define saved_pr	r11
+#	define cnt	r16
+#	define src2	r17
+#	define t0	r18
+#	define t1	r19
+#	define t2	r20
+#	define t3	r21
+#	define t4	r22
+#	define src_end	r23
 
+#	define N	(MEM_LAT + 4)
+#	define Nrot	((N + 7) & ~7)
+
+	/*
+	 * First, check if everything (src, dst, len) is a multiple of eight.  If
+	 * so, we handle everything with no taken branches (other than the loop
+	 * itself) and a small icache footprint.  Otherwise, we jump off to
+	 * the more general copy routine handling arbitrary
+	 * sizes/alignment etc.
+	 */
 	UNW(.prologue)
 	UNW(.save ar.pfs, saved_pfs)
 	alloc saved_pfs=ar.pfs,3,Nrot,0,Nrot
+#if !(defined(CONFIG_ITANIUM_ASTEP_SPECIFIC) || defined(CONFIG_ITANIUM_BSTEP_SPECIFIC))
 	lfetch [in1]
+#else
+	nop.m 0
+#endif
+	or t0=in0,in1
+	;;
 
-	.rotr val[MEM_LAT]
-	.rotp p[MEM_LAT]
-
+	or t0=t0,in2
 	UNW(.save ar.lc, saved_lc)
 	mov saved_lc=ar.lc
-
-	or t0=in0,in1
 	UNW(.save pr, saved_pr)
 	mov saved_pr=pr
 
-	UNW(.body)
-
-	mov ar.ec=MEM_LAT
+	cmp.eq p6,p0=in2,r0	// zero length?
+	mov retval=in0		// return dst
+(p6)	br.ret.spnt.many rp	// zero length, return immediately
+	;;
 
-	mov r8=in0		// return dst
-	shr cnt=in2,3		// number of 8-byte words to copy
+	mov dst=in0		// copy because of rotation
+	shr.u cnt=in2,3		// number of 8-byte words to copy
 	mov pr.rot=1<<16
 	;;
-	cmp.eq p6,p0=in2,r0	// zero length?
-	or t0=t0,in2
-(p6)	br.ret.spnt.many rp	// yes, return immediately
 
-	mov dst=in0		// copy because of rotation
-	mov src=in1		// copy because of rotation
 	adds cnt=-1,cnt		// br.ctop is repeat/until
+	cmp.gtu p7,p0=16,in2	// copying less than 16 bytes?
+	UNW(.body)
+	mov ar.ec=N
 	;;
+
 	and t0=0x7,t0
 	mov ar.lc=cnt
 	;;
 	cmp.ne p6,p0=t0,r0
-(p6)	br.cond.spnt.few slow_memcpy
 
+	mov src=in1		// copy because of rotation
+(p7)	br.cond.spnt.few memcpy_short
+(p6)	br.cond.spnt.few memcpy_long
+	;;
+	.rotr val[N]
+	.rotp p[N]
 1:
 (p[0])	ld8 val[0]=[src],8
-(p[N])	st8 [dst]=val[N],8
-	br.ctop.sptk.few 1b
+(p[N-1])st8 [dst]=val[N-1],8
+	br.ctop.dptk.few 1b
 	;;
-.exit:
 	mov ar.lc=saved_lc
-	mov pr=saved_pr,0xffffffffffff0000
+	mov pr=saved_pr,-1
 	mov ar.pfs=saved_pfs
 	br.ret.sptk.many rp
 
-slow_memcpy:
-	adds cnt=-1,in2
+	/*
+	 * Small (<16 bytes) unaligned copying is done via a simple byte-at-the-time
+	 * copy loop.  This performs relatively poorly on Itanium, but it doesn't
+	 * get used very often (gcc inlines small copies) and due to atomicity
+	 * issues, we want to avoid read-modify-write of entire words.
+	 */
+	.align 32
+memcpy_short:
+	adds cnt=-1,in2		// br.ctop is repeat/until
+	mov ar.ec=MEM_LAT
 	;;
 	mov ar.lc=cnt
 	;;
+	/*
+	 * It is faster to put a stop bit in the loop here because it makes
+	 * the pipeline shorter (and latency is what matters on short copies).
+	 */
 1:
 (p[0])	ld1 val[0]=[src],1
-(p[N])	st1 [dst]=val[N],1
-	br.ctop.sptk.few 1b
-	br.sptk.few .exit
+	;;
+(p[MEM_LAT-1])st1 [dst]=val[MEM_LAT-1],1
+	br.ctop.dptk.few 1b
+	;;
+	mov ar.lc=saved_lc
+	mov pr=saved_pr,-1
+	mov ar.pfs=saved_pfs
+	br.ret.sptk.many rp
+
+	/*
+	 * Large (>= 16 bytes) copying is done in a fancy way.  Latency isn't
+	 * an overriding concern here, but throughput is.  We first do
+	 * sub-word copying until the destination is aligned, then we check
+	 * if the source is also aligned.  If so, we do a simple load/store-loop
+	 * until there are less than 8 bytes left over and then we do the tail,
+	 * by storing the last few bytes using sub-word copying.  If the source
+	 * is not aligned, we branch off to the non-congruent loop.
+	 *
+	 *   stage:   op:
+	 *         0  ld
+	 *	   :
+	 * MEM_LAT+3  shrp
+	 * MEM_LAT+4  st
+	 *
+	 * On Itanium, the pipeline itself runs without stalls.  However,  br.ctop
+	 * seems to introduce an unavoidable bubble in the pipeline so the overall
+	 * latency is 2 cycles/iteration.  This gives us a _copy_ throughput
+	 * of 4 byte/cycle.  Still not bad.
+	 */
+#	undef N
+#	undef Nrot
+#	define N	(MEM_LAT + 5)		/* number of stages */
+#	define Nrot	((N+1 + 2 + 7) & ~7)	/* number of rotating regs */
+
+#define LOG_LOOP_SIZE	6
+
+memcpy_long:
+	alloc t3=ar.pfs,3,Nrot,0,Nrot	// resize register frame
+	and t0=-8,src		// t0 = src & ~7
+	and t2=7,src		// t2 = src & 7
+	;;
+	ld8 t0=[t0]		// t0 = 1st source word
+	adds src2=7,src		// src2 = (src + 7)
+	sub t4=r0,dst		// t4 = -dst
+	;;
+	and src2=-8,src2	// src2 = (src + 7) & ~7
+	shl t2=t2,3		// t2 = 8*(src & 7)
+	shl t4=t4,3		// t4 = 8*(dst & 7)
+	;;
+	ld8 t1=[src2]		// t1 = 1st source word if src is 8-byte aligned, 2nd otherwise
+	sub t3=64,t2		// t3 = 64-8*(src & 7)
+	shr.u t0=t0,t2
+	;;
+	add src_end=src,in2
+	shl t1=t1,t3
+	mov pr=t4,0x38		// (p5,p4,p3)=(dst & 7)
+	;;
+	or t0=t0,t1
+	mov cnt=r0
+	adds src_end=-1,src_end
+	;;
+(p3)	st1 [dst]=t0,1
+(p3)	shr.u t0=t0,8
+(p3)	adds cnt=1,cnt
+	;;
+(p4)	st2 [dst]=t0,2
+(p4)	shr.u t0=t0,16
+(p4)	adds cnt=2,cnt
+	;;
+(p5)	st4 [dst]=t0,4
+(p5)	adds cnt=4,cnt
+	and src_end=-8,src_end	// src_end = last word of source buffer
+	;;
+
+	// At this point, dst is aligned to 8 bytes and there at least 16-7=9 bytes left to copy:
+
+1:{	add src=cnt,src			// make src point to remainder of source buffer
+	sub cnt=in2,cnt			// cnt = number of bytes left to copy
+	mov t4=ip
+  }	;;
+	and src2=-8,src			// align source pointer
+	adds t4=memcpy_loops-1b,t4
+	mov ar.ec=N
+
+	and t0=7,src			// t0 = src & 7
+	shr.u t2=cnt,3			// t2 = number of 8-byte words left to copy
+	shl cnt=cnt,3			// move bits 0-2 to 3-5
+	;;
+
+	.rotr val[N+1], w[2]
+	.rotp p[N]
+
+	cmp.ne p6,p0=t0,r0		// is src aligned, too?
+	shl t0=t0,LOG_LOOP_SIZE		// t0 = 8*(src & 7)
+	adds t2=-1,t2			// br.ctop is repeat/until
+	;;
+	add t4=t0,t4
+	mov pr=cnt,0x38			// set (p5,p4,p3) to # of bytes last-word bytes to copy
+	mov ar.lc=t2
+	;;
+(p6)	ld8 val[1]=[src2],8		// prime the pump...
+	mov b6=t4
+	br.sptk.few b6
+	;;
+
+memcpy_tail:
+	// At this point, (p5,p4,p3) are set to the number of bytes left to copy (which is
+	// less than 8) and t0 contains the last few bytes of the src buffer:
+(p5)	st4 [dst]=t0,4
+(p5)	shr.u t0=t0,32
+	mov ar.lc=saved_lc
+	;;
+(p4)	st2 [dst]=t0,2
+(p4)	shr.u t0=t0,16
+	mov ar.pfs=saved_pfs
+	;;
+(p3)	st1 [dst]=t0
+	mov pr=saved_pr,-1
+	br.ret.sptk.many rp
+
+///////////////////////////////////////////////////////
+	.align 64
+
+#define COPY(shift,index)									\
+ 1:												\
+  { .mfi											\
+	(p[0])		ld8 val[0]=[src2],8;							\
+			nop.f 0;								\
+	(p[MEM_LAT+3])	shrp w[0]=val[MEM_LAT+3],val[MEM_LAT+4-index],shift;			\
+  };												\
+  { .mbb											\
+	(p[MEM_LAT+4])	st8 [dst]=w[1],8;							\
+			nop.b 0;								\
+			br.ctop.dptk.few 1b;							\
+  };												\
+			;;									\
+			ld8 val[N-1]=[src_end];	/* load last word (may be same as val[N]) */	\
+			;;									\
+			shrp t0=val[N-1],val[N-index],shift;					\
+			br memcpy_tail
+memcpy_loops:
+	COPY(0, 1) /* no point special casing this---it doesn't go any faster without shrp */
+	COPY(8, 0)
+	COPY(16, 0)
+	COPY(24, 0)
+	COPY(32, 0)
+	COPY(40, 0)
+	COPY(48, 0)
+	COPY(56, 0)
 
 END(memcpy)
diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c
index 8ddda7e1192f..3652cfc80429 100644
--- a/arch/ia64/mm/init.c
+++ b/arch/ia64/mm/init.c
@@ -185,8 +185,42 @@ free_initmem (void)
 void
 free_initrd_mem(unsigned long start, unsigned long end)
 {
+	/*
+	 * EFI uses 4KB pages while the kernel can use 4KB  or bigger.
+	 * Thus EFI and the kernel may have different page sizes. It is 
+	 * therefore possible to have the initrd share the same page as 
+	 * the end of the kernel (given current setup). 
+	 *
+	 * To avoid freeing/using the wrong page (kernel sized) we:
+	 * 	- align up the beginning of initrd
+	 *	- keep the end untouched
+	 *
+	 *  |             |
+	 *  |=============| a000
+	 *  |             |
+	 *  |             |
+	 *  |             | 9000
+	 *  |/////////////| 
+	 *  |/////////////| 
+	 *  |=============| 8000
+	 *  |///INITRD////|
+	 *  |/////////////|
+	 *  |/////////////| 7000
+	 *  |             |
+	 *  |KKKKKKKKKKKKK|
+	 *  |=============| 6000
+	 *  |KKKKKKKKKKKKK|
+	 *  |KKKKKKKKKKKKK| 
+	 *  K=kernel using 8KB pages
+	 * 
+	 * In this example, we must free page 8000 ONLY. So we must align up
+	 * initrd_start and keep initrd_end as is.
+	 */
+	start = PAGE_ALIGN(start);
+
 	if (start < end)
 		printk ("Freeing initrd memory: %ldkB freed\n", (end - start) >> 10);
+
 	for (; start < end; start += PAGE_SIZE) {
 		clear_bit(PG_reserved, &virt_to_page(start)->flags);
 		set_page_count(virt_to_page(start), 1);
@@ -423,5 +457,4 @@ mem_init (void)
 #ifdef CONFIG_IA32_SUPPORT
 	ia32_gdt_init();
 #endif
-	return;
 }
diff --git a/arch/ia64/mm/tlb.c b/arch/ia64/mm/tlb.c
index 026f88998968..875ce446ce14 100644
--- a/arch/ia64/mm/tlb.c
+++ b/arch/ia64/mm/tlb.c
@@ -1,8 +1,11 @@
 /*
  * TLB support routines.
  *
- * Copyright (C) 1998, 1999 Hewlett-Packard Co
- * Copyright (C) 1998, 1999 David Mosberger-Tang <davidm@hpl.hp.com>
+ * Copyright (C) 1998-2000 Hewlett-Packard Co
+ * Copyright (C) 1998-2000 David Mosberger-Tang <davidm@hpl.hp.com>
+ *
+ * 08/02/00 A. Mallick <asit.k.mallick@intel.com>	
+ *		Modified RID allocation for SMP 
  */
 #include <linux/config.h>
 #include <linux/init.h>
@@ -27,9 +30,11 @@
 		1 << _PAGE_SIZE_8K   |		\
 		1 << _PAGE_SIZE_4K )
 
-static void wrap_context (struct mm_struct *mm);
-
-unsigned long ia64_next_context = (1UL << IA64_HW_CONTEXT_BITS) + 1;
+struct ia64_ctx ia64_ctx = {
+	lock:	SPIN_LOCK_UNLOCKED,
+	next:	1,
+	limit:	(1UL << IA64_HW_CONTEXT_BITS)
+};
 
  /*
   * Put everything in a struct so we avoid the global offset table whenever
@@ -106,49 +111,43 @@ flush_tlb_no_ptcg (unsigned long start, unsigned long end, unsigned long nbits)
 
 #endif /* CONFIG_SMP && !CONFIG_ITANIUM_PTCG */
 
-void
-get_new_mmu_context (struct mm_struct *mm)
-{
-	if ((ia64_next_context & IA64_HW_CONTEXT_MASK) == 0) {
-		wrap_context(mm);
-	}
-	mm->context = ia64_next_context++;
-}
-
 /*
- * This is where we handle the case where (ia64_next_context &
- * IA64_HW_CONTEXT_MASK) == 0.  Whenever this happens, we need to
- * flush the entire TLB and skip over region id number 0, which is
- * used by the kernel.
+ * Acquire the ia64_ctx.lock before calling this function!
  */
-static void
-wrap_context (struct mm_struct *mm)
+void
+wrap_mmu_context (struct mm_struct *mm)
 {
-	struct task_struct *task;
+	struct task_struct *tsk;
+	unsigned long tsk_context;
+
+	if (ia64_ctx.next >= (1UL << IA64_HW_CONTEXT_BITS)) 
+		ia64_ctx.next = 300;	/* skip daemons */
+	ia64_ctx.limit = (1UL << IA64_HW_CONTEXT_BITS);
 
 	/*
-	 * We wrapped back to the first region id so we nuke the TLB
-	 * so we can switch to the next generation of region ids.
+	 * Scan all the task's mm->context and set proper safe range
 	 */
-	__flush_tlb_all();
-	if (ia64_next_context++ == 0) {
-		/*
-		 * Oops, we've used up all 64 bits of the context
-		 * space---walk through task table to ensure we don't
-		 * get tricked into using an old context.  If this
-		 * happens, the machine has been running for a long,
-		 * long time!
-		 */
-		ia64_next_context = (1UL << IA64_HW_CONTEXT_BITS) + 1;
 
-		read_lock(&tasklist_lock);
-		for_each_task (task) {
-			if (task->mm == mm)
-				continue;
-			flush_tlb_mm(mm);
+	read_lock(&tasklist_lock);
+  repeat:
+	for_each_task(tsk) {
+		if (!tsk->mm)
+			continue;
+		tsk_context = tsk->mm->context;
+		if (tsk_context == ia64_ctx.next) {
+			if (++ia64_ctx.next >= ia64_ctx.limit) {
+				/* empty range: reset the range limit and start over */
+				if (ia64_ctx.next >= (1UL << IA64_HW_CONTEXT_BITS)) 
+					ia64_ctx.next = 300;
+				ia64_ctx.limit = (1UL << IA64_HW_CONTEXT_BITS);
+				goto repeat;
+			}
 		}
-		read_unlock(&tasklist_lock);
+		if ((tsk_context > ia64_ctx.next) && (tsk_context < ia64_ctx.limit))
+			ia64_ctx.limit = tsk_context;
 	}
+	read_unlock(&tasklist_lock);
+	flush_tlb_all();
 }
 
 void
diff --git a/arch/ia64/sn/sn1/irq.c b/arch/ia64/sn/sn1/irq.c
index df8e56943af6..a8270fd2a407 100644
--- a/arch/ia64/sn/sn1/irq.c
+++ b/arch/ia64/sn/sn1/irq.c
@@ -1,9 +1,10 @@
 #include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/irq.h>
 
-#include <asm/irq.h>
 #include <asm/ptrace.h>
 
-static int
+static unsigned int
 sn1_startup_irq(unsigned int irq)
 {
         return(0);
@@ -24,23 +25,16 @@ sn1_enable_irq(unsigned int irq)
 {
 }
 
-static int
-sn1_handle_irq(unsigned int irq, struct pt_regs *regs)
-{
-       return(0);
-}
-
 struct hw_interrupt_type irq_type_sn1 = {
         "sn1_irq",
         sn1_startup_irq,
         sn1_shutdown_irq,
-        sn1_handle_irq,
         sn1_enable_irq,
         sn1_disable_irq
 };
 
 void
-sn1_irq_init (struct irq_desc desc[NR_IRQS])
+sn1_irq_init (void)
 {
 	int i;
 
diff --git a/arch/ia64/sn/sn1/machvec.c b/arch/ia64/sn/sn1/machvec.c
index 2e36b2e082b4..409d9a2eae56 100644
--- a/arch/ia64/sn/sn1/machvec.c
+++ b/arch/ia64/sn/sn1/machvec.c
@@ -1,4 +1,2 @@
+#define MACHVEC_PLATFORM_NAME	sn1
 #include <asm/machvec_init.h>
-#include <asm/machvec_sn1.h>
-
-MACHVEC_DEFINE(sn1)
diff --git a/arch/ia64/sn/sn1/setup.c b/arch/ia64/sn/sn1/setup.c
index 45242fc26bff..7b397bb6b012 100644
--- a/arch/ia64/sn/sn1/setup.c
+++ b/arch/ia64/sn/sn1/setup.c
@@ -13,6 +13,7 @@
 #include <linux/console.h>
 #include <linux/timex.h>
 #include <linux/sched.h>
+#include <linux/ioport.h>
 
 #include <asm/io.h>
 #include <asm/machvec.h>
diff --git a/arch/ia64/vmlinux.lds.S b/arch/ia64/vmlinux.lds.S
index 5c199bc1a825..c8c13363c26b 100644
--- a/arch/ia64/vmlinux.lds.S
+++ b/arch/ia64/vmlinux.lds.S
@@ -46,6 +46,15 @@ SECTIONS
 	{ *(__ex_table) }
   __stop___ex_table = .;
 
+#if defined(CONFIG_IA64_GENERIC)
+  /* Machine Vector */
+  . = ALIGN(16);
+  machvec_start = .;
+  .machvec : AT(ADDR(.machvec) - PAGE_OFFSET)
+	{ *(.machvec) }
+  machvec_end = .;
+#endif
+
   __start___ksymtab = .;	/* Kernel symbol table */
   __ksymtab : AT(ADDR(__ksymtab) - PAGE_OFFSET)
 	{ *(__ksymtab) }
diff --git a/arch/mips/kernel/sysirix.c b/arch/mips/kernel/sysirix.c
index 82c95177f64e..d9d73470a692 100644
--- a/arch/mips/kernel/sysirix.c
+++ b/arch/mips/kernel/sysirix.c
@@ -1622,7 +1622,7 @@ asmlinkage int irix_mmap64(struct pt_regs *regs)
 	pgoff = (off1 << (32 - PAGE_SHIFT)) | (off2 >> PAGE_SHIFT);
 
 	if (!(flags & MAP_ANONYMOUS)) {
-		if (!(file = fcheck(fd))) {
+		if (!(file = fget(fd))) {
 			error = -EBADF;
 			goto out;
 		}
diff --git a/arch/sparc/kernel/Makefile b/arch/sparc/kernel/Makefile
index 6154453645e4..c8c79591cbb2 100644
--- a/arch/sparc/kernel/Makefile
+++ b/arch/sparc/kernel/Makefile
@@ -1,4 +1,4 @@
-# $Id: Makefile,v 1.59 2000/07/16 18:21:24 ecd Exp $
+# $Id: Makefile,v 1.60 2000/08/12 08:35:53 ecd Exp $
 # Makefile for the linux kernel.
 #
 # Note! Dependencies are done automagically by 'make dep', which also
@@ -7,6 +7,8 @@
 #
 # Note 2! The CFLAGS definitions are now in the main makefile...
 
+SH = $(CONFIG_SHELL)
+
 .S.s:
 	$(CPP) $(AFLAGS) -ansi $< -o $*.s
 
diff --git a/arch/sparc/kernel/sys_sunos.c b/arch/sparc/kernel/sys_sunos.c
index dde73f4cfcfa..ac252f55a735 100644
--- a/arch/sparc/kernel/sys_sunos.c
+++ b/arch/sparc/kernel/sys_sunos.c
@@ -1,4 +1,4 @@
-/* $Id: sys_sunos.c,v 1.129 2000/07/10 20:57:35 davem Exp $
+/* $Id: sys_sunos.c,v 1.130 2000/08/12 13:25:41 davem Exp $
  * sys_sunos.c: SunOS specific syscall compatibility support.
  *
  * Copyright (C) 1995 David S. Miller (davem@caip.rutgers.edu)
diff --git a/arch/sparc/kernel/systbls.S b/arch/sparc/kernel/systbls.S
index 81ade3f9824e..d243122a149a 100644
--- a/arch/sparc/kernel/systbls.S
+++ b/arch/sparc/kernel/systbls.S
@@ -1,4 +1,4 @@
-/* $Id: systbls.S,v 1.97 2000/04/13 00:55:49 davem Exp $
+/* $Id: systbls.S,v 1.98 2000/08/12 13:25:41 davem Exp $
  * systbls.S: System call entry point tables for OS compatibility.
  *            The native Linux system call table lives here also.
  *
diff --git a/arch/sparc64/kernel/Makefile b/arch/sparc64/kernel/Makefile
index c93f2efc8aeb..9bbc2e1931cf 100644
--- a/arch/sparc64/kernel/Makefile
+++ b/arch/sparc64/kernel/Makefile
@@ -1,4 +1,4 @@
-# $Id: Makefile,v 1.61 2000/08/09 08:25:19 jj Exp $
+# $Id: Makefile,v 1.62 2000/08/12 08:35:53 ecd Exp $
 # Makefile for the linux kernel.
 #
 # Note! Dependencies are done automagically by 'make dep', which also
@@ -7,6 +7,8 @@
 #
 # Note 2! The CFLAGS definitions are now in the main makefile...
 
+SH = $(CONFIG_SHELL)
+
 .S.s:
 	$(CPP) $(AFLAGS) -ansi $< -o $*.s
 
diff --git a/arch/sparc64/kernel/sys_sparc32.c b/arch/sparc64/kernel/sys_sparc32.c
index e84155b067d9..181909080a9b 100644
--- a/arch/sparc64/kernel/sys_sparc32.c
+++ b/arch/sparc64/kernel/sys_sparc32.c
@@ -1,4 +1,4 @@
-/* $Id: sys_sparc32.c,v 1.159 2000/08/08 02:47:50 davem Exp $
+/* $Id: sys_sparc32.c,v 1.160 2000/08/12 13:25:41 davem Exp $
  * sys_sparc32.c: Conversion between 32bit and 64bit native syscalls.
  *
  * Copyright (C) 1997,1998 Jakub Jelinek (jj@sunsite.mff.cuni.cz)
@@ -1180,7 +1180,7 @@ struct readdir_callback32 {
 };
 
 static int fillonedir(void * __buf, const char * name, int namlen,
-		      off_t offset, ino_t ino)
+		      off_t offset, ino_t ino, unsigned int d_type)
 {
 	struct readdir_callback32 * buf = (struct readdir_callback32 *) __buf;
 	struct old_linux_dirent32 * dirent;
diff --git a/arch/sparc64/kernel/sys_sunos32.c b/arch/sparc64/kernel/sys_sunos32.c
index 449f6fd8ba83..75d5c096eb0e 100644
--- a/arch/sparc64/kernel/sys_sunos32.c
+++ b/arch/sparc64/kernel/sys_sunos32.c
@@ -1,4 +1,4 @@
-/* $Id: sys_sunos32.c,v 1.53 2000/07/30 23:12:24 davem Exp $
+/* $Id: sys_sunos32.c,v 1.54 2000/08/12 13:25:41 davem Exp $
  * sys_sunos32.c: SunOS binary compatability layer on sparc64.
  *
  * Copyright (C) 1995, 1996, 1997 David S. Miller (davem@caip.rutgers.edu)
diff --git a/arch/sparc64/kernel/systbls.S b/arch/sparc64/kernel/systbls.S
index c090ccac88f3..bb5066db4cf8 100644
--- a/arch/sparc64/kernel/systbls.S
+++ b/arch/sparc64/kernel/systbls.S
@@ -1,4 +1,4 @@
-/* $Id: systbls.S,v 1.74 2000/07/13 10:59:13 davem Exp $
+/* $Id: systbls.S,v 1.75 2000/08/12 13:25:42 davem Exp $
  * systbls.S: System call entry point tables for OS compatibility.
  *            The native Linux system call table lives here also.
  *
diff --git a/arch/sparc64/mm/fault.c b/arch/sparc64/mm/fault.c
index 8c9b7c9fd240..9ebe1f49468c 100644
--- a/arch/sparc64/mm/fault.c
+++ b/arch/sparc64/mm/fault.c
@@ -1,4 +1,4 @@
-/* $Id: fault.c,v 1.49 2000/08/09 00:00:15 davem Exp $
+/* $Id: fault.c,v 1.50 2000/08/11 03:00:13 davem Exp $
  * arch/sparc64/mm/fault.c: Page fault handlers for the 64-bit Sparc.
  *
  * Copyright (C) 1996 David S. Miller (davem@caip.rutgers.edu)
@@ -42,7 +42,7 @@ unsigned long __init prom_probe_memory (void)
 	sp_banks[0].base_addr = base_paddr;
 	sp_banks[0].num_bytes = bytes;
 
-	while (mlist->theres_more != (void *) 0){
+	while (mlist->theres_more != (void *) 0) {
 		i++;
 		mlist = mlist->theres_more;
 		bytes = mlist->num_bytes;
@@ -68,7 +68,7 @@ unsigned long __init prom_probe_memory (void)
 	/* Now mask all bank sizes on a page boundary, it is all we can
 	 * use anyways.
 	 */
-	for(i=0; sp_banks[i].num_bytes != 0; i++)
+	for (i = 0; sp_banks[i].num_bytes != 0; i++)
 		sp_banks[i].num_bytes &= PAGE_MASK;
 
 	return tally;
@@ -77,7 +77,7 @@ unsigned long __init prom_probe_memory (void)
 void unhandled_fault(unsigned long address, struct task_struct *tsk,
                      struct pt_regs *regs)
 {
-	if((unsigned long) address < PAGE_SIZE) {
+	if ((unsigned long) address < PAGE_SIZE) {
 		printk(KERN_ALERT "Unable to handle kernel NULL "
 		       "pointer dereference\n");
 	} else {
@@ -100,17 +100,17 @@ static unsigned int get_user_insn(unsigned long tpc)
 	unsigned long pa;
 	u32 insn = 0;
 
-	if(pgd_none(*pgdp))
+	if (pgd_none(*pgdp))
 		goto out;
 	pmdp = pmd_offset(pgdp, tpc);
-	if(pmd_none(*pmdp))
+	if (pmd_none(*pmdp))
 		goto out;
 	ptep = pte_offset(pmdp, tpc);
 	pte = *ptep;
-	if(!pte_present(pte))
+	if (!pte_present(pte))
 		goto out;
 
-	pa  = phys_base + (sparc64_pte_pagenr(pte) << PAGE_SHIFT);
+	pa  = (pte_val(pte) & _PAGE_PADDR);
 	pa += (tpc & ~PAGE_MASK);
 
 	/* Use phys bypass so we don't pollute dtlb/dcache. */
diff --git a/drivers/block/ll_rw_blk.c b/drivers/block/ll_rw_blk.c
index f083a46a4ea9..b56c19dc8fc2 100644
--- a/drivers/block/ll_rw_blk.c
+++ b/drivers/block/ll_rw_blk.c
@@ -256,8 +256,7 @@ void blk_queue_pluggable (request_queue_t * q, plug_device_fn *plug)
 
 
 /**
- * blk_queue_make_request - define an alternate make_request function for a
- * device
+ * blk_queue_make_request - define an alternate make_request function for a device
  * @q:  the request queue for the device to be affected
  * @mfn: the alternate make_request function
  *
diff --git a/drivers/block/xor.c b/drivers/block/xor.c
index 362ec6f8f793..eb99582de095 100644
--- a/drivers/block/xor.c
+++ b/drivers/block/xor.c
@@ -1175,7 +1175,7 @@ xor_block_VIS:
 	 wr	%%g0, 0, %%fprs
 	" : :
 	"i" (&((struct buffer_head *)0)->b_data),
-	"i" (&((struct buffer_head *)0)->b_data),
+	"i" (&((struct buffer_head *)0)->b_size),
 	"i" (FPRS_FEF|FPRS_DU), "i" (ASI_BLK_P),
 	"i" (FPRS_FEF), "i" (VISenter));
 }
diff --git a/drivers/char/agp/agpgart_be.c b/drivers/char/agp/agpgart_be.c
index e5fbba8b540c..b775446edd6d 100644
--- a/drivers/char/agp/agpgart_be.c
+++ b/drivers/char/agp/agpgart_be.c
@@ -67,14 +67,16 @@ static inline void flush_cache(void)
 {
 #if defined(__i386__)
 	asm volatile ("wbinvd":::"memory");
-#elif defined(__alpha__)
+#elif defined(__alpha__) || defined(__ia64__)
 	/* ??? I wonder if we'll really need to flush caches, or if the
 	   core logic can manage to keep the system coherent.  The ARM
 	   speaks only of using `cflush' to get things in memory in
 	   preparation for power failure.
 
 	   If we do need to call `cflush', we'll need a target page,
-	   as we can only flush one page at a time.  */
+	   as we can only flush one page at a time.
+
+	   Ditto for IA-64. --davidm 00/08/07 */
 	mb();
 #else
 #error "Please define flush_cache."
diff --git a/drivers/char/drm/agpsupport.c b/drivers/char/drm/agpsupport.c
index 7ed234e153f4..8b04dc6e06d0 100644
--- a/drivers/char/drm/agpsupport.c
+++ b/drivers/char/drm/agpsupport.c
@@ -322,7 +322,7 @@ drm_agp_head_t *drm_agp_init(void)
 		case ALI_M1541: 	head->chipset = "ALi M1541";     break;
 		default:		head->chipset = "Unknown";       break;
 		}
-		DRM_INFO("AGP %d.%d on %s @ 0x%08lx %dMB\n",
+		DRM_INFO("AGP %d.%d on %s @ 0x%08lx %ZuMB\n",
 			 head->agp_info.version.major,
 			 head->agp_info.version.minor,
 			 head->chipset,
diff --git a/drivers/char/drm/lists.c b/drivers/char/drm/lists.c
index f62495aa21d2..a1ca196df913 100644
--- a/drivers/char/drm/lists.c
+++ b/drivers/char/drm/lists.c
@@ -153,6 +153,7 @@ int drm_freelist_put(drm_device_t *dev, drm_freelist_t *bl, drm_buf_t *buf)
 #endif
 	buf->list	= DRM_LIST_FREE;
 	do {
+		/* XXX this is wrong due to the ABA problem! --davidm 00/08/07 */
 		old       = bl->next;
 		buf->next = old;
 		prev      = cmpxchg(&bl->next, old, buf);
@@ -185,6 +186,7 @@ static drm_buf_t *drm_freelist_try(drm_freelist_t *bl)
 	
 				/* Get buffer */
 	do {
+		/* XXX this is wrong due to the ABA problem! --davidm 00/08/07 */
 		old = bl->next;
 		if (!old) return NULL;
 		new  = bl->next->next;
diff --git a/drivers/char/drm/vm.c b/drivers/char/drm/vm.c
index d295529ba76d..7c5a24bc988a 100644
--- a/drivers/char/drm/vm.c
+++ b/drivers/char/drm/vm.c
@@ -250,7 +250,7 @@ int drm_mmap_dma(struct file *filp, struct vm_area_struct *vma)
 		  vma->vm_start, vma->vm_end, VM_OFFSET(vma));
 
 				/* Length must match exact page count */
-	if ((length >> PAGE_SHIFT) != dma->page_count) {
+	if (!dma || (length >> PAGE_SHIFT) != dma->page_count) {
 		unlock_kernel();
 		return -EINVAL;
 	}
@@ -323,6 +323,9 @@ int drm_mmap(struct file *filp, struct vm_area_struct *vma)
 				pgprot_val(vma->vm_page_prot) |= _PAGE_PCD;
 				pgprot_val(vma->vm_page_prot) &= ~_PAGE_PWT;
 			}
+#elif defined(__ia64__)
+			if (map->type != _DRM_AGP)
+				vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
 #endif
 			vma->vm_flags |= VM_IO;	/* not in core dump */
 		}
diff --git a/drivers/char/efirtc.c b/drivers/char/efirtc.c
index 9bb1748ae6a9..4639f34ebdc6 100644
--- a/drivers/char/efirtc.c
+++ b/drivers/char/efirtc.c
@@ -395,11 +395,10 @@ efi_rtc_init(void)
 	return 0;
 }
 
-static int __exit
+static void __exit
 efi_rtc_exit(void)
 {
 	/* not yet used */
-	return 0;
 }
 
 module_init(efi_rtc_init);
diff --git a/drivers/char/tty_io.c b/drivers/char/tty_io.c
index 4e8d4db7321d..252652c23e89 100644
--- a/drivers/char/tty_io.c
+++ b/drivers/char/tty_io.c
@@ -1839,9 +1839,12 @@ void do_SAK( struct tty_struct *tty)
 	read_lock(&tasklist_lock);
 	for_each_task(p) {
 		if ((p->tty == tty) ||
-		    ((session > 0) && (p->session == session)))
+		    ((session > 0) && (p->session == session))) {
 			send_sig(SIGKILL, p, 1);
-		else if (p->files) {
+			continue;
+		}
+		task_lock(p);
+		if (p->files) {
 			read_lock(&p->files->file_lock);
 			/* FIXME: p->files could change */
 			for (i=0; i < p->files->max_fds; i++) {
@@ -1854,6 +1857,7 @@ void do_SAK( struct tty_struct *tty)
 			}
 			read_unlock(&p->files->file_lock);
 		}
+		task_unlock(p);
 	}
 	read_unlock(&tasklist_lock);
 #endif
diff --git a/drivers/net/eepro100.c b/drivers/net/eepro100.c
index c343bc53c711..485852c0edca 100644
--- a/drivers/net/eepro100.c
+++ b/drivers/net/eepro100.c
@@ -23,6 +23,8 @@
 		Convert to new PCI driver interface
 	2000 Mar 24  Dragan Stancevic <visitor@valinux.com>
 		Disabled FC and ER, to avoid lockups when when we get FCP interrupts.
+	2000 Jul 17 Goutham Rao <goutham.rao@intel.com>
+		PCI DMA API fixes, adding pci_dma_sync_single calls where neccesary
 */
 
 static const char *version =
@@ -515,6 +517,7 @@ struct speedo_private {
 	spinlock_t lock;					/* Group with Tx control cache line. */
 	u32 tx_threshold;					/* The value for txdesc.count. */
 	struct RxFD *last_rxf;				/* Last filled RX buffer. */
+	dma_addr_t last_rxf_dma;
 	unsigned int cur_rx, dirty_rx;		/* The next free ring entry */
 	long last_rx_time;			/* Last Rx, in jiffies, to handle Rx hang. */
 	const char *product_name;
@@ -1213,19 +1216,24 @@ speedo_init_rx_ring(struct net_device *dev)
 		sp->rx_ring_dma[i] =
 			pci_map_single(sp->pdev, rxf, PKT_BUF_SZ + sizeof(struct RxFD), PCI_DMA_FROMDEVICE);
 		skb_reserve(skb, sizeof(struct RxFD));
-		if (last_rxf)
+		if (last_rxf) {
 			last_rxf->link = cpu_to_le32(sp->rx_ring_dma[i]);
+			pci_dma_sync_single(sp->pdev, sp->rx_ring_dma[i-1], sizeof(struct RxFD), PCI_DMA_TODEVICE);
+		}
 		last_rxf = rxf;
 		rxf->status = cpu_to_le32(0x00000001);	/* '1' is flag value only. */
 		rxf->link = 0;						/* None yet. */
 		/* This field unused by i82557. */
 		rxf->rx_buf_addr = 0xffffffff;
 		rxf->count = cpu_to_le32(PKT_BUF_SZ << 16);
+		pci_dma_sync_single(sp->pdev, sp->rx_ring_dma[i], sizeof(struct RxFD), PCI_DMA_TODEVICE);
 	}
 	sp->dirty_rx = (unsigned int)(i - RX_RING_SIZE);
 	/* Mark the last entry as end-of-list. */
 	last_rxf->status = cpu_to_le32(0xC0000002);	/* '2' is flag value only. */
+	pci_dma_sync_single(sp->pdev, sp->rx_ring_dma[RX_RING_SIZE-1], sizeof(struct RxFD), PCI_DMA_TODEVICE);
 	sp->last_rxf = last_rxf;
+	sp->last_rxf_dma = sp->rx_ring_dma[RX_RING_SIZE-1];
 }
 
 static void speedo_purge_tx(struct net_device *dev)
@@ -1660,6 +1668,7 @@ static inline struct RxFD *speedo_rx_alloc(struct net_device *dev, int entry)
 	skb->dev = dev;
 	skb_reserve(skb, sizeof(struct RxFD));
 	rxf->rx_buf_addr = 0xffffffff;
+	pci_dma_sync_single(sp->pdev, sp->rx_ring_dma[entry], sizeof(struct RxFD), PCI_DMA_TODEVICE);
 	return rxf;
 }
 
@@ -1672,7 +1681,9 @@ static inline void speedo_rx_link(struct net_device *dev, int entry,
 	rxf->count = cpu_to_le32(PKT_BUF_SZ << 16);
 	sp->last_rxf->link = cpu_to_le32(rxf_dma);
 	sp->last_rxf->status &= cpu_to_le32(~0xC0000000);
+	pci_dma_sync_single(sp->pdev, sp->last_rxf_dma, sizeof(struct RxFD), PCI_DMA_TODEVICE);
 	sp->last_rxf = rxf;
+	sp->last_rxf_dma = rxf_dma;
 }
 
 static int speedo_refill_rx_buf(struct net_device *dev, int force)
@@ -1738,9 +1749,17 @@ speedo_rx(struct net_device *dev)
 	if (speedo_debug > 4)
 		printk(KERN_DEBUG " In speedo_rx().\n");
 	/* If we own the next entry, it's a new packet. Send it up. */
-	while (sp->rx_ringp[entry] != NULL &&
-		   (status = le32_to_cpu(sp->rx_ringp[entry]->status)) & RxComplete) {
-		int pkt_len = le32_to_cpu(sp->rx_ringp[entry]->count) & 0x3fff;
+	while (sp->rx_ringp[entry] != NULL) {
+		int pkt_len;
+
+		pci_dma_sync_single(sp->pdev, sp->rx_ring_dma[entry],
+			sizeof(struct RxFD), PCI_DMA_FROMDEVICE);
+
+		if(!((status = le32_to_cpu(sp->rx_ringp[entry]->status)) & RxComplete)) {
+			break;
+		}
+
+		pkt_len = le32_to_cpu(sp->rx_ringp[entry]->count) & 0x3fff;
 
 		if (--rx_work_limit < 0)
 			break;
@@ -1782,7 +1801,8 @@ speedo_rx(struct net_device *dev)
 				skb_reserve(skb, 2);	/* Align IP on 16 byte boundaries */
 				/* 'skb_put()' points to the start of sk_buff data area. */
 				pci_dma_sync_single(sp->pdev, sp->rx_ring_dma[entry],
-						PKT_BUF_SZ + sizeof(struct RxFD), PCI_DMA_FROMDEVICE);
+					sizeof(struct RxFD) + pkt_len, PCI_DMA_FROMDEVICE);
+
 #if 1 || USE_IP_CSUM
 				/* Packet is in one chunk -- we can copy + cksum. */
 				eth_copy_and_sum(skb, sp->rx_skbuff[entry]->tail, pkt_len, 0);
@@ -2166,6 +2186,8 @@ static void set_rx_mode(struct net_device *dev)
 		mc_setup_frm->link =
 			cpu_to_le32(TX_RING_ELEM_DMA(sp, (entry + 1) % TX_RING_SIZE));
 
+		pci_dma_sync_single(sp->pdev, mc_blk->frame_dma, mc_blk->len, PCI_DMA_TODEVICE);
+
 		wait_for_cmd_done(ioaddr + SCBCmd);
 		clear_suspend(last_cmd);
 		/* Immediately trigger the command unit resume. */
diff --git a/drivers/net/fc/Makefile b/drivers/net/fc/Makefile
index 87e599863f5b..a36b9f85576e 100644
--- a/drivers/net/fc/Makefile
+++ b/drivers/net/fc/Makefile
@@ -1,27 +1,15 @@
-
+#
 # Makefile for linux/drivers/net/fc
 #
-# Note! Dependencies are done automagically by 'make dep', which also
-# removes any old dependencies. DON'T put your own dependencies here
-# unless it's something special (ie not a .c file).
+# 9 Aug 2000, Christoph Hellwig <hch@caldera.de>
+# Rewritten to use lists instead of if-statements.
 #
 
-L_TARGET := fc.a
-L_OBJS   := 
-M_OBJS   := 
-MX_OBJS  := 
-FC_SRCS = $(wildcard $(L_OBJS:%.o=%.c))
+O_TARGET := fc.o
 
-ifeq ($(CONFIG_IPHASE5526),y)
-L_OBJS += iph5526.o
-else
-  ifeq ($(CONFIG_IPHASE5526),m)
-    M_OBJS += iph5526.o 
-  endif
-endif
+obj-$(CONFIG_IPHASE5526)	+= iph5526.o
 
-include $(TOPDIR)/Rules.make
-
-clean:
-	rm *.o 
+O_OBJS	:= $(obj-y)
+M_OBJS	:= $(obj-m)
 
+include $(TOPDIR)/Rules.make
diff --git a/drivers/net/hamradio/Makefile b/drivers/net/hamradio/Makefile
index c2fb726c3e69..7ca3c1efdb6b 100644
--- a/drivers/net/hamradio/Makefile
+++ b/drivers/net/hamradio/Makefile
@@ -1,153 +1,55 @@
-# File: drivers/hamradio/Makefile
 #
 # Makefile for the Linux AX.25 and HFMODEM device drivers.
 #
+#
 # 19971130 	Moved the amateur radio related network drivers from 
 #		drivers/net/ to drivers/hamradio for easier maintainance.
 #               Joerg Reuter DL1BKE <jreuter@poboxes.com>
+#
+# 20000806	Rewritten to use lists instead of if-statements.
+#		Christoph Hellwig <hch@caldera.de>
+#
 
 
 SUB_DIRS     := 
 MOD_SUB_DIRS := $(SUB_DIRS)
-ALL_SUB_DIRS := $(SUB_DIRS)
+ALL_SUB_DIRS := $(SUB_DIRS) soundmodem
 
 O_TARGET := hamradio.o
-O_OBJS   := 
-M_OBJS   :=
-
-# Need these to keep track of whether the hdlc module should
-# really go in the kernel or a module.
-CONFIG_HDLCDRV_BUILTIN :=
-CONFIG_HDLCDRV_MODULE  :=
-
-ifeq ($(CONFIG_DMASCC),y)
-O_OBJS += dmascc.o
-else
-  ifeq ($(CONFIG_DMASCC),m)
-  M_OBJS += dmascc.o
-  endif
-endif
-
-ifeq ($(CONFIG_SCC),y)
-O_OBJS += scc.o
-else
-  ifeq ($(CONFIG_SCC),m)
-  M_OBJS += scc.o
-  endif
-endif
-
-ifeq ($(CONFIG_MKISS),y)
-O_OBJS += mkiss.o
-else
-  ifeq ($(CONFIG_MKISS),m)
-  M_OBJS += mkiss.o
-  endif
-endif
-
-ifeq ($(CONFIG_6PACK),y)
-O_OBJS += 6pack.o
-else
-  ifeq ($(CONFIG_6PACK),m)
-  M_OBJS += 6pack.o
-  endif
-endif
-
-ifeq ($(CONFIG_YAM),y)
-O_OBJS += yam.o
-else
-  ifeq ($(CONFIG_YAM),m)
-  M_OBJS += yam.o
-  endif
-endif
-
-ifeq ($(CONFIG_PI),y)
-O_OBJS += pi2.o
-else
-  ifeq ($(CONFIG_PI),m)
-  M_OBJS += pi2.o
-  endif
-endif
 
-ifeq ($(CONFIG_PT),y)
-O_OBJS += pt.o
-else
-  ifeq ($(CONFIG_PT),m)
-  M_OBJS += pt.o
-  endif
-endif
-
-ifeq ($(CONFIG_BPQETHER),y)
-O_OBJS += bpqether.o
-else
-  ifeq ($(CONFIG_BPQETHER),m)
-  M_OBJS += bpqether.o
-  endif
-endif
+export-objs	= hdlcdrv.o
 
-ifeq ($(CONFIG_BAYCOM_SER_FDX),y)
-O_OBJS += baycom_ser_fdx.o
-CONFIG_HDLCDRV_BUILTIN = y
-else
-  ifeq ($(CONFIG_BAYCOM_SER_FDX),m)
-  CONFIG_HDLCDRV_MODULE = y
-  M_OBJS += baycom_ser_fdx.o
-  endif
-endif
 
-ifeq ($(CONFIG_BAYCOM_SER_HDX),y)
-O_OBJS += baycom_ser_hdx.o
-CONFIG_HDLCDRV_BUILTIN = y
-else
-  ifeq ($(CONFIG_BAYCOM_SER_HDX),m)
-  CONFIG_HDLCDRV_MODULE = y
-  M_OBJS += baycom_ser_hdx.o
-  endif
-endif
-
-ifeq ($(CONFIG_BAYCOM_PAR),y)
-O_OBJS += baycom_par.o
-CONFIG_HDLCDRV_BUILTIN = y
-else
-  ifeq ($(CONFIG_BAYCOM_PAR),m)
-  CONFIG_HDLCDRV_MODULE = y
-  M_OBJS += baycom_par.o
-  endif
-endif
-
-ifeq ($(CONFIG_BAYCOM_EPP),y)
-O_OBJS += baycom_epp.o
-CONFIG_HDLCDRV_BUILTIN = y
-else
-  ifeq ($(CONFIG_BAYCOM_EPP),m)
-  CONFIG_HDLCDRV_MODULE = y
-  M_OBJS += baycom_epp.o
-  endif
-endif
+obj-$(CONFIG_DMASCC)		+= dmascc.o
+obj-$(CONFIG_SCC)		+= scc.o
+obj-$(CONFIG_MKISS)		+= mkiss.o
+obj-$(CONFIG_6PACK)		+= 6pack.o
+obj-$(CONFIG_YAM)		+= yam.o
+obj-$(CONFIG_PI)		+= pi2.o
+obj-$(CONFIG_PT)		+= pt.o
+obj-$(CONFIG_BPQETHER)		+= bpqether.o
+obj-$(CONFIG_BAYCOM_SER_FDX)	+= baycom_ser_fdx.o	hdlcdrv.o
+obj-$(CONFIG_BAYCOM_SER_HDX)	+= baycom_ser_hdx.o	hdlcdrv.o
+obj-$(CONFIG_BAYCOM_PAR)	+= baycom_par.o		hdlcdrv.o
+obj-$(CONFIG_BAYCOM_EPP)	+= baycom_epp.o		hdlcdrv.o
+obj-$(CONFIG_SOUNDMODEM)	+= 			hdlcdrv.o
 
 ifeq ($(CONFIG_SOUNDMODEM),y)
-ALL_SUB_DIRS += soundmodem
 SUB_DIRS += soundmodem
 O_OBJS += soundmodem/soundmodem.o
-CONFIG_HDLCDRV_BUILTIN = y
 else
   ifeq ($(CONFIG_SOUNDMODEM),m)
-  CONFIG_HDLCDRV_MODULE = y
-  ALL_SUB_DIRS += soundmodem
   MOD_SUB_DIRS += soundmodem
   endif
 endif
 
-# If anything built-in uses the hdlcdrv, then build it into the kernel also.
-# If not, but a module uses it, build as a module.
-ifdef CONFIG_HDLCDRV_BUILTIN
-OX_OBJS += hdlcdrv.o
-else
-  ifdef CONFIG_HDLCDRV_MODULE
-  MX_OBJS += hdlcdrv.o
-  endif
-endif
+# Files that are both resident and modular: remove from modular.
+obj-m		:= $(filter-out $(obj-y), $(obj-m))
 
-include $(TOPDIR)/Rules.make
+# Translate to Rules.make lists.
+O_OBJS		:= $(filter-out $(export-objs), $(obj-y))
+OX_OBJS		:= $(filter     $(export-objs), $(obj-y))
+M_OBJS		:= $(sort $(filter-out $(export-objs), $(obj-m)))
+MX_OBJS		:= $(sort $(filter     $(export-objs), $(obj-m)))
 
-clean:
-	rm -f core *.o *.a *.s
+include $(TOPDIR)/Rules.make
diff --git a/drivers/net/irda/Makefile b/drivers/net/irda/Makefile
index 53ee8867c114..f923bf0123d3 100644
--- a/drivers/net/irda/Makefile
+++ b/drivers/net/irda/Makefile
@@ -1,121 +1,40 @@
-# File: drivers/irda/Makefile
 #
 # Makefile for the Linux IrDA infrared port device drivers.
 #
+# 9 Aug 2000, Christoph Hellwig <hch@caldera.de>
+# Rewritten to use lists instead of if-statements.
+#
 
 SUB_DIRS     := 
 MOD_SUB_DIRS := $(SUB_DIRS)
 ALL_SUB_DIRS := $(SUB_DIRS)
 
-L_TARGET := irda_drivers.a
-L_OBJS   := 
-M_OBJS   :=
-
-ifeq ($(CONFIG_IRTTY_SIR),y)
-L_OBJS += irtty.o
-else
-  ifeq ($(CONFIG_IRTTY_SIR),m)
-  M_OBJS += irtty.o
-  endif
-endif
-
-ifeq ($(CONFIG_IRPORT_SIR),y)
-LX_OBJS += irport.o
-else
-  ifeq ($(CONFIG_IRPORT_SIR),m)
-  MX_OBJS += irport.o
-  endif
-endif
-
-ifeq ($(CONFIG_NSC_FIR),y)
-L_OBJS += nsc-ircc.o
-else
-  ifeq ($(CONFIG_NSC_FIR),m)
-  M_OBJS += nsc-ircc.o
-  endif
-endif
-
-ifeq ($(CONFIG_WINBOND_FIR),y)
-L_OBJS += w83977af_ir.o
-else
-  ifeq ($(CONFIG_WINBOND_FIR),m)
-  M_OBJS += w83977af_ir.o
-  endif
-endif
-
-ifeq ($(CONFIG_TOSHIBA_FIR),y)
-L_OBJS += toshoboe.o
-else
-  ifeq ($(CONFIG_TOSHIBA_FIR),m)
-  M_OBJS += toshoboe.o
-  endif
-endif
-
-ifeq ($(CONFIG_SMC_IRCC_FIR),y)
-L_OBJS += smc-ircc.o
-LX_OBJS += irport.o
-else
-  ifeq ($(CONFIG_SMC_IRCC_FIR),m)
-  M_OBJS += smc-ircc.o
-  MX_OBJS += irport.o
-  endif
-endif
+O_TARGET := irda.o
 
-ifeq ($(CONFIG_ESI_DONGLE),y)
-L_OBJS += esi.o
-else
-  ifeq ($(CONFIG_ESI_DONGLE),m)
-  M_OBJS += esi.o
-  endif
-endif
+export-objs	= irport.o
 
-ifeq ($(CONFIG_TEKRAM_DONGLE),y)
-L_OBJS += tekram.o
-else
-  ifeq ($(CONFIG_TEKRAM_DONGLE),m)
-  M_OBJS += tekram.o
-  endif
-endif
 
-ifeq ($(CONFIG_ACTISYS_DONGLE),y)
-L_OBJS += actisys.o
-else
-  ifeq ($(CONFIG_ACTISYS_DONGLE),m)
-  M_OBJS += actisys.o
-  endif
-endif
+obj-$(CONFIG_IRTTY_SIR)		+= irtty.o
+obj-$(CONFIG_IRPORT_SIR)	+= 		irport.o
+obj-$(CONFIG_NSC_FIR)		+= nsc-ircc.o
+obj-$(CONFIG_WINBOND_FIR)	+= w83977af_ir.o
+obj-$(CONFIG_TOSHIBA_FIR)	+= toshoboe.o
+obj-$(CONFIG_SMC_IRCC_FIR)	+= smc-ircc.o	irport.o
+obj-$(CONFIG_ESI_DONGLE)	+= esi.o
+obj-$(CONFIG_TEKRAM_DONGLE)	+= tekram.o
+obj-$(CONFIG_ACTISYS_DONGLE)	+= actisys.o
+obj-$(CONFIG_GIRBIL_DONGLE)	+= girbil.o
+obj-$(CONFIG_LITELINK_DONGLE)	+= litelink.o
+obj-$(CONFIG_OLD_BELKIN_DONGLE)	+= old_belkin.o
 
-ifeq ($(CONFIG_GIRBIL_DONGLE),y)
-L_OBJS += girbil.o
-else
-  ifeq ($(CONFIG_GIRBIL_DONGLE),m)
-  M_OBJS += girbil.o
-  endif
-endif
 
-ifeq ($(CONFIG_LITELINK_DONGLE),y)
-L_OBJS += litelink.o
-else
-  ifeq ($(CONFIG_LITELINK_DONGLE),m)
-  M_OBJS += litelink.o
-  endif
-endif
+# Files that are both resident and modular: remove from modular.
+obj-m		:= $(filter-out $(obj-y), $(obj-m))
 
-ifeq ($(CONFIG_OLD_BELKIN_DONGLE),y)
-L_OBJS += old_belkin.o
-else
-  ifeq ($(CONFIG_OLD_BELKIN_DONGLE),m)
-  M_OBJS += old_belkin.o
-  endif
-endif
+# Translate to Rules.make lists.
+O_OBJS		:= $(filter-out $(export-objs), $(obj-y))
+OX_OBJS		:= $(filter     $(export-objs), $(obj-y))
+M_OBJS		:= $(sort $(filter-out $(export-objs), $(obj-m)))
+MX_OBJS		:= $(sort $(filter     $(export-objs), $(obj-m)))
 
 include $(TOPDIR)/Rules.make
-
-clean:
-	rm -f core *.o *.a *.s
-
-
-
-
-
-
diff --git a/drivers/net/wan/Makefile b/drivers/net/wan/Makefile
index f7f3fe58a2fd..ce7bfa2b91b9 100644
--- a/drivers/net/wan/Makefile
+++ b/drivers/net/wan/Makefile
@@ -1,269 +1,86 @@
-# File: drivers/net/wan/Makefile
 #
 # Makefile for the Linux network (wan) device drivers.
 #
-# Note! Dependencies are done automagically by 'make dep', which also
-# removes any old dependencies. DON'T put your own dependencies here
-# unless it's something special (ie not a .c file).
-#
-# Note 2! The CFLAGS definition is now inherited from the
-# parent makefile.
+# 3 Aug 2000, Christoph Hellwig <hch@caldera.de>
+# Rewritten to use lists instead of if-statements.
 #
 
 SUB_DIRS     := 
 MOD_SUB_DIRS := $(SUB_DIRS)
 ALL_SUB_DIRS := $(SUB_DIRS) lmc
 
-L_TARGET := wan.a
-L_OBJS   :=
-M_OBJS   :=
-
-# Need these to keep track of whether the 82530 or SYNCPPP
-# modules should really go in the kernel or a module.
-CONFIG_85230_BUILTIN :=
-CONFIG_85230_MODULE  :=
-CONFIG_SYNCPPP_BUILTIN :=
-CONFIG_SYNCPPP_MODULE  :=
-
-ifeq ($(CONFIG_HOSTESS_SV11),y)
-L_OBJS += hostess_sv11.o
-CONFIG_85230_BUILTIN = y
-CONFIG_SYNCPPP_BUILTIN = y
-else
-  ifeq ($(CONFIG_HOSTESS_SV11),m)
-  CONFIG_85230_MODULE = y
-  CONFIG_SYNCPPP_MODULE = y
-  M_OBJS += hostess_sv11.o
-  endif
-endif
-
-ifeq ($(CONFIG_SEALEVEL_4021),y)
-L_OBJS += sealevel.o
-CONFIG_85230_BUILTIN = y
-CONFIG_SYNCPPP_BUILTIN = y
-else
-  ifeq ($(CONFIG_SEALEVEL_4021),m)
-  CONFIG_85230_MODULE = y
-  CONFIG_SYNCPPP_MODULE = y
-  M_OBJS += sealevel.o
-  endif
-endif
-
-ifeq ($(CONFIG_SYNCLINK_SYNCPPP),y)
-CONFIG_SYNCPPP_BUILTIN = y
-else
-  ifeq ($(CONFIG_SYNCLINK_SYNCPPP),m)
-  CONFIG_SYNCPPP_MODULE = y
-  endif
-endif
-
-ifeq ($(CONFIG_COMX),y)
-LX_OBJS += comx.o
-else
-  ifeq ($(CONFIG_COMX),m)
-  MX_OBJS += comx.o
-  endif
-endif
-
-ifeq ($(CONFIG_COMX_HW_COMX),y)
-L_OBJS += comx-hw-comx.o
-else 
-  ifeq ($(CONFIG_COMX_HW_COMX),m)
-  M_OBJS += comx-hw-comx.o
-  endif
-endif
-
-ifeq ($(CONFIG_COMX_HW_LOCOMX),y)
-L_OBJS += comx-hw-locomx.o
-CONFIG_85230_BUILTIN=y
-CONFIG_SYNCPPP_BUILTIN = y
-else 
-  ifeq ($(CONFIG_COMX_HW_LOCOMX),m)
-  M_OBJS += comx-hw-locomx.o
-  CONFIG_85230_MODULE=y
-  CONFIG_SYNCPPP_MODULE = y
-  endif
-endif
+O_TARGET := wan.a
 
-ifeq ($(CONFIG_COMX_HW_MIXCOM),y)
-L_OBJS += comx-hw-mixcom.o
-else 
-  ifeq ($(CONFIG_COMX_HW_MIXCOM),m)
-  M_OBJS += comx-hw-mixcom.o
-  endif
-endif
+export-objs =	z85230.o syncppp.o comx.o sdladrv.o cycx_drv.o
+list-multi =	wanpipe.o cyclomx.o
 
-ifeq ($(CONFIG_COMX_PROTO_PPP),y)
-L_OBJS += comx-proto-ppp.o
-CONFIG_SYNCPPP_BUILTIN = y
-else 
-  ifeq ($(CONFIG_COMX_PROTO_PPP),m)
-  M_OBJS += comx-proto-ppp.o
-  CONFIG_SYNCPPP_MODULE = y
-  endif
-endif
+wanpipe-objs = sdlamain.o $(wanpipe-y)
+wanpipe-$(CONFIG_WANPIPE_X25)	+= sdla_x25.o
+wanpipe-$(CONFIG_WANPIPE_FR)	+= sdla_fr.o
+wanpipe-$(CONFIG_WANPIPE_CHDLC)	+= sdla_chdlc.o
+wanpipe-$(CONFIG_WANPIPE_PPP)	+= sdla_ppp.o
 
-ifeq ($(CONFIG_COMX_PROTO_LAPB),y)
-L_OBJS += comx-proto-lapb.o
-else 
-  ifeq ($(CONFIG_COMX_PROTO_LAPB),m)
-  M_OBJS += comx-proto-lapb.o
-  endif
-endif
+cyclomx-objs = cycx_main.o $(cyclomx-y)  
+cyclomx-$(CONFIG_CYCLOMX_X25)	+= cycx_x25.o
 
-ifeq ($(CONFIG_COMX_PROTO_FR),y)
-L_OBJS += comx-proto-fr.o
-else 
-  ifeq ($(CONFIG_COMX_PROTO_FR),m)
-  M_OBJS += comx-proto-fr.o
-  endif
-endif
 
-ifeq ($(CONFIG_COSA),y)
-L_OBJS += cosa.o
-CONFIG_SYNCPPP_BUILTIN = y
-else
-  ifeq ($(CONFIG_COSA),m)
-  CONFIG_SYNCPPP_MODULE = y
-  M_OBJS += cosa.o
-  endif
-endif
+obj-$(CONFIG_HOSTESS_SV11)	+= z85230.o	syncppp.o	hostess_sv11.o
+obj-$(CONFIG_SEALEVEL_4021)	+= z85230.o	syncppp.o	sealevel.o
+obj-$(CONFIG_COMX)		+= 				comx.o
+obj-$(CONFIG_COMX_HW_COMX)	+= 				comx-hw-comx.o
+obj-$(CONFIG_COMX_HW_LOCOMX)	+= cz85230.o	syncppp.o	comx-hw-locomx.o
+obj-$(CONFIG_COMX_HW_MIXCOM)	+=				comx-hw-mixcom.o
+obj-$(CONFIG_COMX_PROTO_PPP)	+=		syncppp.o	comx-proto-ppp.o
+obj-$(CONFIG_COMX_PROTO_LAPB)	+=				comx-proto-lapb.o
+obj-$(CONFIG_COMX_PROTO_FR)	+=				comx-proto-fr.o
+obj-$(CONFIG_COSA)		+=		syncppp.o	cosa.o
+obj-$(CONFIG_LANMEDIA)		+=		syncppp.o
 
 ifeq ($(CONFIG_LANMEDIA),y)
   SUB_DIRS += lmc
   MOD_IN_SUB_DIRS += lmc
-  L_OBJS += lmc/lmc.o
-  CONFIG_SYNCPPP_BUILTIN = y
+  obj-y += lmc/lmc.o
 else
   ifeq ($(CONFIG_LANMEDIA),m)
-  CONFIG_SYNCPPP_MODULE = y
   MOD_IN_SUB_DIRS += lmc
   endif
 endif
  
-
-# If anything built-in uses syncppp, then build it into the kernel also.
-# If not, but a module uses it, build as a module.
-
-ifdef CONFIG_SYNCPPP_BUILTIN
-LX_OBJS += syncppp.o
-else
-  ifdef CONFIG_SYNCPPP_MODULE
-  MX_OBJS += syncppp.o
-  endif
-endif
-
-# If anything built-in uses Z85230, then build it into the kernel also.
-# If not, but a module uses it, build as a module.
-
-ifdef CONFIG_85230_BUILTIN
-LX_OBJS += z85230.o
-else
-  ifdef CONFIG_85230_MODULE
-  MX_OBJS += z85230.o
-  endif
-endif
-
-ifeq ($(CONFIG_DLCI),y)
-L_OBJS += dlci.o 
-else
-  ifeq ($(CONFIG_DLCI),m)
-  M_OBJS += dlci.o
-  endif
-endif
-
-ifeq ($(CONFIG_SDLA),y)
-  L_OBJS += sdla.o
-else
-  ifeq ($(CONFIG_SDLA),m)
-  M_OBJS += sdla.o
-  endif
-endif
-
-ifeq ($(CONFIG_VENDOR_SANGOMA),y)
-  LX_OBJS += sdladrv.o
-  L_OBJS += sdlamain.o
-  ifeq ($(CONFIG_WANPIPE_X25),y)
-    L_OBJS += sdla_x25.o
-  endif
-  ifeq ($(CONFIG_WANPIPE_CHDLC),y)
-    L_OBJS += sdla_chdlc.o
-  endif
-  ifeq ($(CONFIG_WANPIPE_FR),y)
-    L_OBJS += sdla_fr.o
-  endif
-  ifeq ($(CONFIG_WANPIPE_PPP),y)
-    L_OBJS += sdla_ppp.o
-  endif
-endif
-
-ifeq ($(CONFIG_VENDOR_SANGOMA),m)
-  MX_OBJS += sdladrv.o
-  M_OBJS += wanpipe.o
-  WANPIPE_OBJS = sdlamain.o
-  ifeq ($(CONFIG_WANPIPE_X25),y)
-    WANPIPE_OBJS += sdla_x25.o
-  endif
-  ifeq ($(CONFIG_WANPIPE_FR),y)
-    WANPIPE_OBJS += sdla_fr.o
-  endif
-  ifeq ($(CONFIG_WANPIPE_CHDLC),y)
-    WANPIPE_OBJS += sdla_chdlc.o
-  endif
-  ifeq ($(CONFIG_WANPIPE_PPP),y)
-    WANPIPE_OBJS += sdla_ppp.o
-  endif
-endif
-
-ifeq ($(CONFIG_CYCLADES_SYNC),y)
-  LX_OBJS += cycx_drv.o
-  L_OBJS += cycx_main.o
-  ifeq ($(CONFIG_CYCLOMX_X25),y)
-    L_OBJS += cycx_x25.o
-  endif
-endif
-
-ifeq ($(CONFIG_CYCLADES_SYNC),m)
-  MX_OBJS += cycx_drv.o
-  M_OBJS += cyclomx.o
-  CYCLOMX_OBJS = cycx_main.o
-  ifeq ($(CONFIG_CYCLOMX_X25),y)
-    CYCLOMX_OBJS += cycx_x25.o
-  endif
-endif
-
-ifeq ($(CONFIG_X25_ASY),y)
-L_OBJS += x25_asy.o
-else
-  ifeq ($(CONFIG_X25_ASY),m)
-  M_OBJS += x25_asy.o
-  endif
-endif
-
-ifeq ($(CONFIG_LAPBETHER),y)
-L_OBJS += lapbether.o
-else
-  ifeq ($(CONFIG_LAPBETHER),m)
-  M_OBJS += lapbether.o
-  endif
-endif
-
-ifeq ($(CONFIG_SBNI),y)
-L_OBJS += sbni.o
-else
-  ifeq ($(CONFIG_SBNI),m)
-  M_OBJS += sbni.o
-  endif
-endif
+obj-$(CONFIG_DLCI)		+= dlci.o 
+obj-$(CONFIG_SDLA)		+= sdla.o
+obj-$(CONFIG_VENDOR_SANGOMA)	+= sdladrv.o wanpipe.o
+obj-$(CONFIG_CYCLADES_SYNC)	+= cycx_drv.o cyclomx.o
+obj-$(CONFIG_LAPBETHER)		+= lapbether.o
+obj-$(CONFIG_SBNI)		+= sbni.o
+
+
+# Extract lists of the multi-part drivers.
+# The 'int-*' lists are the intermediate files used to build the multi's.
+multi-y		:= $(filter $(list-multi), $(obj-y))
+multi-m		:= $(filter $(list-multi), $(obj-m))
+int-y		:= $(sort $(foreach m, $(multi-y), $($(basename $(m))-objs)))
+int-m		:= $(sort $(foreach m, $(multi-m), $($(basename $(m))-objs)))
+
+# Files that are both resident and modular: remove from modular.
+obj-m		:= $(filter-out $(obj-y), $(obj-m))
+int-m		:= $(filter-out $(int-y), $(int-m))
+
+# Take multi-part drivers out of obj-y and put components in.
+obj-y		:= $(filter-out $(list-multi), $(obj-y)) $(int-y)
+
+# Translate to Rules.make lists.
+O_OBJS		:= $(filter-out $(export-objs), $(obj-y))
+OX_OBJS		:= $(filter     $(export-objs), $(obj-y))
+M_OBJS		:= $(sort $(filter-out $(export-objs), $(obj-m)))
+MX_OBJS		:= $(sort $(filter     $(export-objs), $(obj-m)))
+MI_OBJS		:= $(sort $(filter-out $(export-objs), $(int-m)))
+MIX_OBJS	:= $(sort $(filter     $(export-objs), $(int-m)))
 
 include $(TOPDIR)/Rules.make
 
-clean:
-	rm -f core *.o *.a *.s
-
-wanpipe.o: $(WANPIPE_OBJS)
-	$(LD) -r -o $@ $(WANPIPE_OBJS)
+wanpipe.o: $(wanpipe-objs)
+	$(LD) -r -o $@ $(wanpipe-objs)
 
-cyclomx.o: $(CYCLOMX_OBJS)
-	$(LD) -r -o $@ $(CYCLOMX_OBJS)
+cyclomx.o: $(cyclomx-objs)
+	$(LD) -r -o $@ $(cyclomx-objs)
 
diff --git a/drivers/net/wan/sbni.c b/drivers/net/wan/sbni.c
index 37ef27487b69..b4073b00f6cd 100644
--- a/drivers/net/wan/sbni.c
+++ b/drivers/net/wan/sbni.c
@@ -448,6 +448,7 @@ static int __init sbni_probe1(struct net_device *dev, int ioaddr)
 	if(dev->priv == NULL)
 	{
 		DP( printk("%s: cannot allocate memory\n", dev->name); )
+		free_irq(dev->irq, dev);
 		return -ENOMEM;
 	}
    
diff --git a/drivers/pci/pci.ids b/drivers/pci/pci.ids
index 62e06d38f88b..36ecd489e435 100644
--- a/drivers/pci/pci.ids
+++ b/drivers/pci/pci.ids
@@ -4635,7 +4635,12 @@
 	84c4  450KX/GX [Orion] - 82454KX/GX PCI bridge
 	84c5  450KX/GX [Orion] - 82453KX/GX Memory controller
 	84ca  450NX - 82451NX Memory & I/O Controller
-	84cb  450NX - 82454NX PCI Expander Bridge
+	84cb  450NX - 82454NX/84460GX PCI Expander Bridge
+	84e0  460GX - 84460GX System Address Controller (SAC)
+	84e1  460GX - 84460GX System Data Controller (SDC)
+	84e2  460GX - 84460GX AGP Bridge (GXB)
+	84e3  460GX - 84460GX Memory Address Controller (MAC)
+	84e4  460GX - 84460GX Memory Data Controller (MDC)
 	ffff  450NX/GX [Orion] - 82453KX/GX Memory controller [BUG]
 8800  Trigem Computer Inc.
 	2008  Video assistent component
diff --git a/drivers/sound/cmpci.c b/drivers/sound/cmpci.c
index 6fcb896ac4b0..fbba125832d1 100644
--- a/drivers/sound/cmpci.c
+++ b/drivers/sound/cmpci.c
@@ -238,8 +238,6 @@ static const unsigned sample_shift[] = { 0, 1, 1, 2 };
 
 #define FMODE_DMFM 0x10
 
-#define SND_DEV_DSP16   5 
-
 /* --------------------------------------------------------------------- */
 
 struct cm_state {
diff --git a/drivers/sound/es1370.c b/drivers/sound/es1370.c
index a5bb668f2c90..01f2cf46f9db 100644
--- a/drivers/sound/es1370.c
+++ b/drivers/sound/es1370.c
@@ -302,8 +302,6 @@ static const unsigned dac1_samplerate[] = { 5512, 11025, 22050, 44100 };
 #define FMODE_MIDI_READ  (FMODE_READ << FMODE_MIDI_SHIFT)
 #define FMODE_MIDI_WRITE (FMODE_WRITE << FMODE_MIDI_SHIFT)
 
-#define SND_DEV_DSP16   5 
-
 /* --------------------------------------------------------------------- */
 
 struct es1370_state {
diff --git a/drivers/sound/es1371.c b/drivers/sound/es1371.c
index df362e8bb757..4e340c3cc4f9 100644
--- a/drivers/sound/es1371.c
+++ b/drivers/sound/es1371.c
@@ -364,8 +364,6 @@ static const unsigned sample_shift[] = { 0, 1, 1, 2 };
 #define FMODE_MIDI_READ  (FMODE_READ << FMODE_MIDI_SHIFT)
 #define FMODE_MIDI_WRITE (FMODE_WRITE << FMODE_MIDI_SHIFT)
 
-#define SND_DEV_DSP16   5 
-
 #define ES1371_MODULE_NAME "es1371"
 #define PFX ES1371_MODULE_NAME ": "
 
diff --git a/drivers/sound/i810_audio.c b/drivers/sound/i810_audio.c
index 27a1c243b300..15b469cf59a7 100644
--- a/drivers/sound/i810_audio.c
+++ b/drivers/sound/i810_audio.c
@@ -189,12 +189,6 @@ enum {
 /* maxinum number of AC97 codecs connected, AC97 2.0 defined 4 */
 #define NR_AC97		2
 
-/* minor number of /dev/dspW */
-#define SND_DEV_DSP8	1
-
-/* minor number of /dev/dspW */
-#define SND_DEV_DSP16	1 
-
 static const unsigned sample_size[] = { 1, 2, 2, 4 };
 static const unsigned sample_shift[] = { 0, 1, 1, 2 };
 
diff --git a/drivers/sound/maestro.c b/drivers/sound/maestro.c
index 6c664aea42a9..9d848e2a77e9 100644
--- a/drivers/sound/maestro.c
+++ b/drivers/sound/maestro.c
@@ -301,8 +301,6 @@ static int use_pm=2; /* set to 1 for force */
 #define NR_DSPS		(1<<dsps_order)
 #define NR_IDRS		32
 
-#define SND_DEV_DSP16   5 
-
 #define NR_APUS		64
 #define NR_APU_REGS	16
 
diff --git a/drivers/sound/sonicvibes.c b/drivers/sound/sonicvibes.c
index 526ba929a571..099bbf1ab47b 100644
--- a/drivers/sound/sonicvibes.c
+++ b/drivers/sound/sonicvibes.c
@@ -282,8 +282,6 @@ static const unsigned sample_shift[] = { 0, 1, 1, 2 };
 
 #define FMODE_DMFM 0x10
 
-#define SND_DEV_DSP16   5 
-
 /* --------------------------------------------------------------------- */
 
 struct sv_state {
diff --git a/drivers/sound/sound_config.h b/drivers/sound/sound_config.h
index 35c39e94744a..050c38df5f21 100644
--- a/drivers/sound/sound_config.h
+++ b/drivers/sound/sound_config.h
@@ -53,28 +53,7 @@
 #define SBFM_MAXINSTR		(256)	/* Size of the FM Instrument bank */
 /* 128 instruments for general MIDI setup and 16 unassigned	 */
 
-/*
- * Minor numbers for the sound driver.
- *
- * Unfortunately Creative called the codec chip of SB as a DSP. For this
- * reason the /dev/dsp is reserved for digitized audio use. There is a
- * device for true DSP processors but it will be called something else.
- * In v3.0 it's /dev/sndproc but this could be a temporary solution.
- */
-
 #define SND_NDEVS	256	/* Number of supported devices */
-#define SND_DEV_CTL	0	/* Control port /dev/mixer */
-#define SND_DEV_SEQ	1	/* Sequencer output /dev/sequencer (FM
-				   synthesizer and MIDI output) */
-#define SND_DEV_MIDIN	2	/* Raw midi access */
-#define SND_DEV_DSP	3	/* Digitized voice /dev/dsp */
-#define SND_DEV_AUDIO	4	/* Sparc compatible /dev/audio */
-#define SND_DEV_DSP16	5	/* Like /dev/dsp but 16 bits/sample */
-#define SND_DEV_STATUS	6	/* /dev/sndstat */
-#define SND_DEV_AWFM	7	/* Reserved */
-#define SND_DEV_SEQ2	8	/* /dev/sequencer, level 2 interface */
-#define SND_DEV_SNDPROC 9	/* /dev/sndproc for programmable devices */
-#define SND_DEV_PSS	SND_DEV_SNDPROC
 
 #define DSP_DEFAULT_SPEED	8000
 
diff --git a/drivers/sound/trident.c b/drivers/sound/trident.c
index 27a29e42d4ed..c85d409917b1 100644
--- a/drivers/sound/trident.c
+++ b/drivers/sound/trident.c
@@ -130,12 +130,6 @@
    have 2 SDATA_IN lines (currently) */
 #define NR_AC97		2
 
-/* minor number of /dev/dspW */
-#define SND_DEV_DSP8	3
-
-/* minor number of /dev/dspW */
-#define SND_DEV_DSP16	5 
-
 /* minor number of /dev/swmodem (temporary, experimental) */
 #define SND_DEV_SWMODEM	7
 
diff --git a/drivers/sound/via82cxxx_audio.c b/drivers/sound/via82cxxx_audio.c
index be3b1c9ca9d1..7ed78c980a1f 100644
--- a/drivers/sound/via82cxxx_audio.c
+++ b/drivers/sound/via82cxxx_audio.c
@@ -36,10 +36,6 @@
 #include <asm/uaccess.h>
 #include <asm/hardirq.h>
 
-/* much better to duplicate this value than include
- * drivers/sound/sound_config.h just for this definition */
-#define SND_DEV_DSP16 5 
-
 
 #undef VIA_DEBUG	/* define to enable debugging output and checks */
 #ifdef VIA_DEBUG
diff --git a/fs/dcache.c b/fs/dcache.c
index 0e547009d05c..214f0da2ec47 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -1189,7 +1189,9 @@ static void __init dcache_init(unsigned long mempages)
 	if (!dentry_cache)
 		panic("Cannot create dentry cache");
 
+#if PAGE_SHIFT < 13
 	mempages >>= (13 - PAGE_SHIFT);
+#endif
 	mempages *= sizeof(struct list_head);
 	for (order = 0; ((1UL << order) << PAGE_SHIFT) < mempages; order++)
 		;
diff --git a/fs/exec.c b/fs/exec.c
index 7c3efb3667de..5f436f1e0c4d 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -473,19 +473,28 @@ static inline void flush_old_files(struct files_struct * files)
 	unsigned long j;
 
 	j = 0;
+	write_lock(&files->file_lock);
 	for (;;) {
 		unsigned long set, i;
 
 		i = j * __NFDBITS;
 		if (i >= files->max_fds || i >= files->max_fdset)
 			break;
-		set = xchg(&files->close_on_exec->fds_bits[j], 0);
+		set = files->close_on_exec->fds_bits[j];
+		if (!set)
+			continue;
+		files->close_on_exec->fds_bits[j] = 0;
 		j++;
+		write_unlock(&files->file_lock);
 		for ( ; set ; i++,set >>= 1) {
-			if (set & 1)
+			if (set & 1) {
 				sys_close(i);
+			}
 		}
+		write_lock(&files->file_lock);
+
 	}
+	write_unlock(&files->file_lock);
 }
 
 int flush_old_exec(struct linux_binprm * bprm)
diff --git a/fs/fcntl.c b/fs/fcntl.c
index 1ff077f93b90..7a3bcefb89e8 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -232,13 +232,10 @@ static long do_fcntl(unsigned int fd, unsigned int cmd,
 			}
 			break;
 		case F_GETFD:
-			err = FD_ISSET(fd, current->files->close_on_exec);
+			err = get_close_on_exec(fd);
 			break;
 		case F_SETFD:
-			if (arg&1)
-				FD_SET(fd, current->files->close_on_exec);
-			else
-				FD_CLR(fd, current->files->close_on_exec);
+			set_close_on_exec(fd, arg&1);
 			break;
 		case F_GETFL:
 			err = filp->f_flags;
diff --git a/fs/ioctl.c b/fs/ioctl.c
index f02d766bd378..21af77a3a168 100644
--- a/fs/ioctl.c
+++ b/fs/ioctl.c
@@ -58,11 +58,11 @@ asmlinkage long sys_ioctl(unsigned int fd, unsigned int cmd, unsigned long arg)
 	lock_kernel();
 	switch (cmd) {
 		case FIOCLEX:
-			FD_SET(fd, current->files->close_on_exec);
+			set_close_on_exec(fd, 1);
 			break;
 
 		case FIONCLEX:
-			FD_CLR(fd, current->files->close_on_exec);
+			set_close_on_exec(fd, 0);
 			break;
 
 		case FIONBIO:
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index cb97c40bc4a6..a5bda60ba649 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -1291,7 +1291,7 @@ nfs_writeback_done(struct rpc_task *task)
 static void
 nfs_commit_rpcsetup(struct list_head *head, struct nfs_write_data *data)
 {
-	struct nfs_page		*req;
+	struct nfs_page		*first, *last;
 	struct dentry		*dentry;
 	struct inode		*inode;
 	loff_t			start, end, len;
@@ -1299,32 +1299,28 @@ nfs_commit_rpcsetup(struct list_head *head, struct nfs_write_data *data)
 	/* Set up the RPC argument and reply structs
 	 * NB: take care not to mess about with data->commit et al. */
 
-	end = 0;
-	start = ~0;
-	req = nfs_list_entry(head->next);
-	dentry = req->wb_dentry;
-	data->dentry = dentry;
-	data->cred = req->wb_cred;
+	list_splice(head, &data->pages);
+	INIT_LIST_HEAD(head);
+	first = nfs_list_entry(data->pages.next);
+	last = nfs_list_entry(data->pages.prev);
+	dentry = first->wb_dentry;
 	inode = dentry->d_inode;
-	while (!list_empty(head)) {
-		struct nfs_page	*req;
-		loff_t	rqstart, rqend;
-		req = nfs_list_entry(head->next);
-		nfs_list_remove_request(req);
-		nfs_list_add_request(req, &data->pages);
-		rqstart = page_offset(req->wb_page) + req->wb_offset;
-		rqend = rqstart + req->wb_bytes;
-		if (rqstart < start)
-			start = rqstart;
-		if (rqend > end)
-			end = rqend;
-	}
-	data->args.fh     = NFS_FH(dentry);
-	data->args.offset = start;
+
+	/*
+	 * Determine the offset range of requests in the COMMIT call.
+	 * We rely on the fact that data->pages is an ordered list...
+	 */
+	start = page_offset(first->wb_page) + first->wb_offset;
+	end = page_offset(last->wb_page) + (last->wb_offset + last->wb_bytes);
 	len = end - start;
 	/* If 'len' is not a 32-bit quantity, pass '0' in the COMMIT call */
-	if (end >= inode->i_size || len > (~((u32)0) >> 1))
+	if (end >= inode->i_size || len < 0 || len > (~((u32)0) >> 1))
 		len = 0;
+
+	data->dentry	  = dentry;
+	data->cred	  = first->wb_cred;
+	data->args.fh     = NFS_FH(dentry);
+	data->args.offset = start;
 	data->res.count   = data->args.count = (u32)len;
 	data->res.fattr   = &data->fattr;
 	data->res.verf    = &data->verf;
diff --git a/fs/openpromfs/inode.c b/fs/openpromfs/inode.c
index 6dcfe8ef120e..e264dd3d3a28 100644
--- a/fs/openpromfs/inode.c
+++ b/fs/openpromfs/inode.c
@@ -1,4 +1,4 @@
-/* $Id: inode.c,v 1.12 2000/07/13 08:06:42 davem Exp $
+/* $Id: inode.c,v 1.13 2000/08/12 13:25:46 davem Exp $
  * openpromfs.c: /proc/openprom handling routines
  *
  * Copyright (C) 1996-1999 Jakub Jelinek  (jakub@redhat.com)
diff --git a/fs/proc/array.c b/fs/proc/array.c
index 5d764084c20c..9b00883cdc53 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -301,12 +301,11 @@ int proc_pid_stat(struct task_struct *task, char * buffer)
 {
 	unsigned long vsize, eip, esp, wchan;
 	long priority, nice;
-	int tty_pgrp;
+	int tty_pgrp = -1, tty_nr = 0;
 	sigset_t sigign, sigcatch;
 	char state;
 	int res;
 	pid_t ppid;
-	int tty_nr;
 	struct mm_struct *mm;
 
 	state = *get_task_state(task);
@@ -315,6 +314,10 @@ int proc_pid_stat(struct task_struct *task, char * buffer)
 	mm = task->mm;
 	if(mm)
 		atomic_inc(&mm->mm_users);
+	if (task->tty) {
+		tty_pgrp = task->tty->pgrp;
+		tty_nr = kdev_t_to_nr(task->tty->device);
+	}
 	task_unlock(task);
 	if (mm) {
 		struct vm_area_struct *vma;
@@ -333,14 +336,6 @@ int proc_pid_stat(struct task_struct *task, char * buffer)
 
 	collect_sigign_sigcatch(task, &sigign, &sigcatch);
 
-	task_lock(task);
-	if (task->tty)
-		tty_pgrp = task->tty->pgrp;
-	else
-		tty_pgrp = -1;
-	tty_nr = task->tty ? kdev_t_to_nr(task->tty->device) : 0;
-	task_unlock(task);
-
 	/* scale priority and nice values from timeslices to -20..20 */
 	/* to make it look like a "normal" Unix priority/nice value  */
 	priority = task->counter;
diff --git a/include/asm-ia64/acpi-ext.h b/include/asm-ia64/acpi-ext.h
index c3999f1481b1..24f9822e6034 100644
--- a/include/asm-ia64/acpi-ext.h
+++ b/include/asm-ia64/acpi-ext.h
@@ -69,7 +69,7 @@ typedef struct {
 	u8 eid;
 } acpi_entry_lsapic_t;
 
-typedef struct {
+typedef struct acpi_entry_iosapic {
 	u8 type;
 	u8 length;
 	u16 reserved;
diff --git a/include/asm-ia64/asmmacro.h b/include/asm-ia64/asmmacro.h
index 4991bb26efef..614ca7e8e1b1 100644
--- a/include/asm-ia64/asmmacro.h
+++ b/include/asm-ia64/asmmacro.h
@@ -23,7 +23,7 @@
 #endif
 
 #define ENTRY(name)				\
-	.align 16;				\
+	.align 32;				\
 	.proc name;				\
 name:
 
diff --git a/include/asm-ia64/efi.h b/include/asm-ia64/efi.h
index 54313248629d..5d311d32edcf 100644
--- a/include/asm-ia64/efi.h
+++ b/include/asm-ia64/efi.h
@@ -226,6 +226,7 @@ efi_guidcmp (efi_guid_t left, efi_guid_t right)
 }
 
 extern void efi_init (void);
+extern void efi_map_pal_code (void);
 extern void efi_memmap_walk (efi_freemem_callback_t callback, void *arg);
 extern void efi_gettimeofday (struct timeval *tv);
 extern void efi_enter_virtual_mode (void);	/* switch EFI to virtual mode, if possible */
diff --git a/include/asm-ia64/ia32.h b/include/asm-ia64/ia32.h
index 80cc76755f83..c6732837c847 100644
--- a/include/asm-ia64/ia32.h
+++ b/include/asm-ia64/ia32.h
@@ -40,7 +40,6 @@ struct flock32 {
        __kernel_off_t32 l_start;
        __kernel_off_t32 l_len;
        __kernel_pid_t32 l_pid;
-       short __unused;
 };
 
 
@@ -105,11 +104,21 @@ typedef struct {
 } sigset32_t;
 
 struct sigaction32 {
-       unsigned int  sa_handler;       /* Really a pointer, but need to deal 
-					  with 32 bits */
+       unsigned int  sa_handler;	/* Really a pointer, but need to deal 
+					     with 32 bits */
        unsigned int sa_flags;
-       unsigned int sa_restorer;       /* Another 32 bit pointer */
-       sigset32_t sa_mask;     /* A 32 bit mask */
+       unsigned int sa_restorer;	/* Another 32 bit pointer */
+       sigset32_t sa_mask;		/* A 32 bit mask */
+};
+
+typedef unsigned int old_sigset32_t;	/* at least 32 bits */
+
+struct old_sigaction32 {
+       unsigned int  sa_handler;	/* Really a pointer, but need to deal 
+					     with 32 bits */
+       old_sigset32_t sa_mask;		/* A 32 bit mask */
+       unsigned int sa_flags;
+       unsigned int sa_restorer;	/* Another 32 bit pointer */
 };
 
 typedef struct sigaltstack_ia32 {
diff --git a/include/asm-ia64/io.h b/include/asm-ia64/io.h
index a371f1361c44..0740af45fd60 100644
--- a/include/asm-ia64/io.h
+++ b/include/asm-ia64/io.h
@@ -47,6 +47,10 @@ phys_to_virt(unsigned long address)
 	return (void *) (address + PAGE_OFFSET);
 }
 
+/*
+ * The following two macros are deprecated and scheduled for removal.
+ * Please use the PCI-DMA interface defined in <asm/pci.h> instead.
+ */
 #define bus_to_virt	phys_to_virt
 #define virt_to_bus	virt_to_phys
 
@@ -315,6 +319,7 @@ __writeq (unsigned long val, void *addr)
 #define writeq(v,a)	__writeq((v), (void *) (a))
 #define __raw_writeb	writeb
 #define __raw_writew	writew
+#define __raw_writel	writel
 #define __raw_writeq	writeq
 
 #ifndef inb_p
diff --git a/include/asm-ia64/machvec.h b/include/asm-ia64/machvec.h
index 890224329d48..3ac473f149a9 100644
--- a/include/asm-ia64/machvec.h
+++ b/include/asm-ia64/machvec.h
@@ -4,8 +4,8 @@
  * Copyright (C) 1999 Silicon Graphics, Inc.
  * Copyright (C) Srinivasa Thirumalachar <sprasad@engr.sgi.com>
  * Copyright (C) Vijay Chander <vijay@engr.sgi.com>
- * Copyright (C) 1999 Hewlett-Packard Co.
- * Copyright (C) David Mosberger-Tang <davidm@hpl.hp.com>
+ * Copyright (C) 1999-2000 Hewlett-Packard Co.
+ * Copyright (C) 1999-2000 David Mosberger-Tang <davidm@hpl.hp.com>
  */
 #ifndef _ASM_IA64_MACHVEC_H
 #define _ASM_IA64_MACHVEC_H
@@ -21,6 +21,7 @@ struct pt_regs;
 struct task_struct;
 struct timeval;
 struct vm_area_struct;
+struct acpi_entry_iosapic;
 
 typedef void ia64_mv_setup_t (char **);
 typedef void ia64_mv_irq_init_t (void);
@@ -30,15 +31,33 @@ typedef void ia64_mv_mca_init_t (void);
 typedef void ia64_mv_mca_handler_t (void);
 typedef void ia64_mv_cmci_handler_t (int, void *, struct pt_regs *);
 typedef void ia64_mv_log_print_t (void);
+typedef void ia64_mv_register_iosapic_t (struct acpi_entry_iosapic *);
+
+extern void machvec_noop (void);
 
 # if defined (CONFIG_IA64_HP_SIM)
 #  include <asm/machvec_hpsim.h>
 # elif defined (CONFIG_IA64_DIG)
 #  include <asm/machvec_dig.h>
 # elif defined (CONFIG_IA64_SGI_SN1_SIM)
-#  include <asm/machvec_sgi_sn1_SIM.h>
+#  include <asm/machvec_sn1.h>
 # elif defined (CONFIG_IA64_GENERIC)
 
+# ifdef MACHVEC_PLATFORM_HEADER
+#  include MACHVEC_PLATFORM_HEADER
+# else
+#  define platform_name		ia64_mv.name
+#  define platform_setup	ia64_mv.setup
+#  define platform_irq_init	ia64_mv.irq_init
+#  define platform_map_nr	ia64_mv.map_nr
+#  define platform_mca_init	ia64_mv.mca_init
+#  define platform_mca_handler	ia64_mv.mca_handler
+#  define platform_cmci_handler	ia64_mv.cmci_handler
+#  define platform_log_print	ia64_mv.log_print
+#  define platform_pci_fixup	ia64_mv.pci_fixup
+#  define platform_register_iosapic	ia64_mv.register_iosapic
+# endif
+
 struct ia64_machine_vector {
 	const char *name;
 	ia64_mv_setup_t *setup;
@@ -49,6 +68,7 @@ struct ia64_machine_vector {
 	ia64_mv_mca_handler_t *mca_handler;
 	ia64_mv_cmci_handler_t *cmci_handler;
 	ia64_mv_log_print_t *log_print;
+	ia64_mv_register_iosapic_t *register_iosapic;
 };
 
 #define MACHVEC_INIT(name)			\
@@ -61,22 +81,12 @@ struct ia64_machine_vector {
 	platform_mca_init,			\
 	platform_mca_handler,			\
 	platform_cmci_handler,			\
-	platform_log_print			\
+	platform_log_print,			\
+	platform_register_iosapic			\
 }
 
-# ifndef MACHVEC_INHIBIT_RENAMING
-#  define platform_name		ia64_mv.name
-#  define platform_setup	ia64_mv.setup
-#  define platform_irq_init	ia64_mv.irq_init
-#  define platform_map_nr	ia64_mv.map_nr
-#  define platform_mca_init	ia64_mv.mca_init
-#  define platform_mca_handler	ia64_mv.mca_handler
-#  define platform_cmci_handler	ia64_mv.cmci_handler
-#  define platform_log_print	ia64_mv.log_print
-# endif
-
 extern struct ia64_machine_vector ia64_mv;
-extern void machvec_noop (void);
+extern void machvec_init (const char *name);
 
 # else
 #  error Unknown configuration.  Update asm-ia64/machvec.h.
@@ -104,5 +114,11 @@ extern void machvec_noop (void);
 #ifndef platform_log_print
 # define platform_log_print	((ia64_mv_log_print_t *) machvec_noop)
 #endif
+#ifndef platform_pci_fixup
+# define platform_pci_fixup	((ia64_mv_pci_fixup_t *) machvec_noop)
+#endif
+#ifndef platform_register_iosapic
+# define platform_register_iosapic	((ia64_mv_register_iosapic_t *) machvec_noop)
+#endif
 
 #endif /* _ASM_IA64_MACHVEC_H */
diff --git a/include/asm-ia64/machvec_dig.h b/include/asm-ia64/machvec_dig.h
index a63e586c80e6..dedf37cdd3dc 100644
--- a/include/asm-ia64/machvec_dig.h
+++ b/include/asm-ia64/machvec_dig.h
@@ -5,6 +5,7 @@ extern ia64_mv_setup_t dig_setup;
 extern ia64_mv_irq_init_t dig_irq_init;
 extern ia64_mv_pci_fixup_t dig_pci_fixup;
 extern ia64_mv_map_nr_t map_nr_dense;
+extern ia64_mv_register_iosapic_t dig_register_iosapic;
 
 /*
  * This stuff has dual use!
@@ -18,5 +19,6 @@ extern ia64_mv_map_nr_t map_nr_dense;
 #define platform_irq_init	dig_irq_init
 #define platform_pci_fixup	dig_pci_fixup
 #define platform_map_nr		map_nr_dense
+#define platform_register_iosapic dig_register_iosapic
 
 #endif /* _ASM_IA64_MACHVEC_DIG_h */
diff --git a/include/asm-ia64/machvec_init.h b/include/asm-ia64/machvec_init.h
index 60859418a2e9..2cae5accf24c 100644
--- a/include/asm-ia64/machvec_init.h
+++ b/include/asm-ia64/machvec_init.h
@@ -1,4 +1,6 @@
-#define MACHVEC_INHIBIT_RENAMING
+#define __MACHVEC_HDR(n)		<asm/machvec_##n##.h>
+#define __MACHVEC_EXPAND(n)		__MACHVEC_HDR(n)
+#define MACHVEC_PLATFORM_HEADER		__MACHVEC_EXPAND(MACHVEC_PLATFORM_NAME)
 
 #include <asm/machvec.h>
 
@@ -7,3 +9,5 @@
 	= MACHVEC_INIT(name);
 
 #define MACHVEC_DEFINE(name)	MACHVEC_HELPER(name)
+
+MACHVEC_DEFINE(MACHVEC_PLATFORM_NAME)
diff --git a/include/asm-ia64/mmu_context.h b/include/asm-ia64/mmu_context.h
index a223e9bbdeef..c50eacaf0068 100644
--- a/include/asm-ia64/mmu_context.h
+++ b/include/asm-ia64/mmu_context.h
@@ -2,12 +2,13 @@
 #define _ASM_IA64_MMU_CONTEXT_H
 
 /*
- * Copyright (C) 1998, 1999 Hewlett-Packard Co
- * Copyright (C) 1998, 1999 David Mosberger-Tang <davidm@hpl.hp.com>
+ * Copyright (C) 1998-2000 Hewlett-Packard Co
+ * Copyright (C) 1998-2000 David Mosberger-Tang <davidm@hpl.hp.com>
  */
 
 #include <linux/config.h>
 #include <linux/sched.h>
+#include <linux/spinlock.h>
 
 #include <asm/processor.h>
 
@@ -26,21 +27,6 @@
  * architecture manual guarantees this number to be in the range
  * 18-24.
  *
- * A context number has the following format:
- *
- *  +--------------------+---------------------+
- *  |  generation number |    region id        |
- *  +--------------------+---------------------+
- *
- * A context number of 0 is considered "invalid".
- *
- * The generation number is incremented whenever we end up having used
- * up all available region ids.  At that point with flush the entire
- * TLB and reuse the first region id.  The new generation number
- * ensures that when we context switch back to an old process, we do
- * not inadvertently end up using its possibly reused region id.
- * Instead, we simply allocate a new region id for that process.
- *
  * Copyright (C) 1998 David Mosberger-Tang <davidm@hpl.hp.com>
  */
 
@@ -56,9 +42,15 @@
 
 #define IA64_HW_CONTEXT_MASK	((1UL << IA64_HW_CONTEXT_BITS) - 1)
 
-extern unsigned long ia64_next_context;
+struct ia64_ctx {
+	spinlock_t lock;
+	unsigned int next;	/* next context number to use */
+	unsigned int limit;	/* next >= limit => must call wrap_mmu_context() */
+};
+
+extern struct ia64_ctx ia64_ctx;
 
-extern void get_new_mmu_context (struct mm_struct *mm);
+extern void wrap_mmu_context (struct mm_struct *mm);
 
 static inline void
 enter_lazy_tlb (struct mm_struct *mm, struct task_struct *tsk, unsigned cpu)
@@ -75,13 +67,25 @@ ia64_rid (unsigned long context, unsigned long region_addr)
 # endif
 }
 
+extern inline void
+get_new_mmu_context (struct mm_struct *mm)
+{
+	spin_lock(&ia64_ctx.lock);
+	{
+		if (ia64_ctx.next >= ia64_ctx.limit)
+			wrap_mmu_context(mm);
+		mm->context = ia64_ctx.next++;
+	}
+	spin_unlock(&ia64_ctx.lock);
+
+}
+
 extern inline void
 get_mmu_context (struct mm_struct *mm)
 {
 	/* check if our ASN is of an older generation and thus invalid: */
-	if (((mm->context ^ ia64_next_context) & ~IA64_HW_CONTEXT_MASK) != 0) {
+	if (mm->context == 0)
 		get_new_mmu_context(mm);
-	}
 }
 
 extern inline int
@@ -104,7 +108,7 @@ reload_context (struct mm_struct *mm)
 	unsigned long rid_incr = 0;
 	unsigned long rr0, rr1, rr2, rr3, rr4;
 
-	rid = (mm->context & IA64_HW_CONTEXT_MASK);
+	rid = mm->context;
 
 #ifndef CONFIG_IA64_TLB_CHECKS_REGION_NUMBER
 	rid <<= 3;	/* make space for encoding the region number */
diff --git a/include/asm-ia64/offsets.h b/include/asm-ia64/offsets.h
index c5c02dedffd3..88cad88d52fc 100644
--- a/include/asm-ia64/offsets.h
+++ b/include/asm-ia64/offsets.h
@@ -11,10 +11,10 @@
 #define PT_PTRACED_BIT			0
 #define PT_TRACESYS_BIT			1
 
-#define IA64_TASK_SIZE			2768	/* 0xad0 */
+#define IA64_TASK_SIZE			2864	/* 0xb30 */
 #define IA64_PT_REGS_SIZE		400	/* 0x190 */
 #define IA64_SWITCH_STACK_SIZE		560	/* 0x230 */
-#define IA64_SIGINFO_SIZE		136	/* 0x88 */
+#define IA64_SIGINFO_SIZE		128	/* 0x80 */
 #define UNW_FRAME_INFO_SIZE		448	/* 0x1c0 */
 
 #define IA64_TASK_PTRACE_OFFSET		48	/* 0x30 */
@@ -23,7 +23,7 @@
 #define IA64_TASK_PROCESSOR_OFFSET	100	/* 0x64 */
 #define IA64_TASK_THREAD_OFFSET		896	/* 0x380 */
 #define IA64_TASK_THREAD_KSP_OFFSET	896	/* 0x380 */
-#define IA64_TASK_THREAD_SIGMASK_OFFSET	2648	/* 0xa58 */
+#define IA64_TASK_THREAD_SIGMASK_OFFSET	2744	/* 0xab8 */
 #define IA64_TASK_PID_OFFSET		188	/* 0xbc */
 #define IA64_TASK_MM_OFFSET		88	/* 0x58 */
 #define IA64_PT_REGS_CR_IPSR_OFFSET	0	/* 0x0 */
diff --git a/include/asm-ia64/page.h b/include/asm-ia64/page.h
index f046dad8643b..62881b53849c 100644
--- a/include/asm-ia64/page.h
+++ b/include/asm-ia64/page.h
@@ -100,13 +100,14 @@ typedef unsigned long pgprot_t;
 #define MAP_NR_SN1(addr)	(((unsigned long) (addr) - PAGE_OFFSET) >> PAGE_SHIFT)
 
 #ifdef CONFIG_IA64_GENERIC
-# define virt_to_page(kaddr)	(mem_map + platform_map_nr(kaddr))
+# include <asm/machvec.h>
+# define virt_to_page(kaddr)   (mem_map + platform_map_nr(kaddr))
 #elif defined (CONFIG_IA64_SN_SN1_SIM)
-# define virt_to_page(kaddr)	(mem_map + MAP_NR_SN1(kaddr))
+# define virt_to_page(kaddr)   (mem_map + MAP_NR_SN1(kaddr))
 #else
-# define virt_to_page(kaddr)	(mem_map + MAP_NR_DENSE(kaddr))
+# define virt_to_page(kaddr)   (mem_map + MAP_NR_DENSE(kaddr))
 #endif
-#define VALID_PAGE(page)	((page - mem_map) < max_mapnr)
+#define VALID_PAGE(page)       ((page - mem_map) < max_mapnr)
 
 # endif /* __KERNEL__ */
 
diff --git a/include/asm-ia64/pal.h b/include/asm-ia64/pal.h
index 5169b3f82756..d55b1625316d 100644
--- a/include/asm-ia64/pal.h
+++ b/include/asm-ia64/pal.h
@@ -18,7 +18,8 @@
  * 00/03/07	davidm	Updated pal_cache_flush() to be in sync with PAL v2.6.
  * 00/03/23     cfleck  Modified processor min-state save area to match updated PAL & SAL info
  * 00/05/24     eranian Updated to latest PAL spec, fix structures bugs, added 
- * 00/05/25	eranian Support for stack calls, and statis physical calls
+ * 00/05/25	eranian Support for stack calls, and static physical calls
+ * 00/06/18	eranian Support for stacked physical calls
  */
 
 /*
@@ -646,10 +647,12 @@ struct ia64_pal_retval {
 extern struct ia64_pal_retval ia64_pal_call_static (u64, u64, u64, u64); 
 extern struct ia64_pal_retval ia64_pal_call_stacked (u64, u64, u64, u64); 
 extern struct ia64_pal_retval ia64_pal_call_phys_static (u64, u64, u64, u64); 
+extern struct ia64_pal_retval ia64_pal_call_phys_stacked (u64, u64, u64, u64); 
 
 #define PAL_CALL(iprv,a0,a1,a2,a3)	iprv = ia64_pal_call_static(a0, a1, a2, a3)
 #define PAL_CALL_STK(iprv,a0,a1,a2,a3)	iprv = ia64_pal_call_stacked(a0, a1, a2, a3)
 #define PAL_CALL_PHYS(iprv,a0,a1,a2,a3) iprv = ia64_pal_call_phys_static(a0, a1, a2, a3)
+#define PAL_CALL_PHYS_STK(iprv,a0,a1,a2,a3) iprv = ia64_pal_call_phys_stacked(a0, a1, a2, a3)
 
 typedef int (*ia64_pal_handler) (u64, ...);
 extern ia64_pal_handler ia64_pal;
@@ -951,7 +954,7 @@ typedef union pal_power_mgmt_info_u {
 /* Return information about processor's optional power management capabilities. */
 extern inline s64 
 ia64_pal_halt_info (pal_power_mgmt_info_u_t *power_buf) 
-{
+{	
 	struct ia64_pal_retval iprv;
 	PAL_CALL_STK(iprv, PAL_HALT_INFO, (unsigned long) power_buf, 0, 0);
 	return iprv.status; 
@@ -1370,17 +1373,17 @@ typedef union pal_itr_valid_u {
 				dirty_bit_valid		: 1,
 				mem_attr_valid		: 1,
 				reserved		: 60;
-	} pal_itr_valid_s;
-} pal_itr_valid_u_t;
+	} pal_tr_valid_s;
+} pal_tr_valid_u_t;
 
 /* Read a translation register */
 extern inline s64 
-ia64_pal_vm_tr_read (u64 reg_num, u64 tr_type, u64 tr_buffer, pal_itr_valid_u_t *itr_valid) 
-{	
+ia64_pal_tr_read (u64 reg_num, u64 tr_type, u64 *tr_buffer, pal_tr_valid_u_t *tr_valid)
+{
 	struct ia64_pal_retval iprv;
-	PAL_CALL(iprv, PAL_VM_TR_READ, reg_num, tr_type, tr_buffer);
-	if (itr_valid)
-		itr_valid->piv_val = iprv.v0;
+	PAL_CALL_PHYS_STK(iprv, PAL_VM_TR_READ, reg_num, tr_type,(u64)__pa(tr_buffer));
+	if (tr_valid)
+		tr_valid->piv_val = iprv.v0;
 	return iprv.status; 
 }
 
diff --git a/include/asm-ia64/param.h b/include/asm-ia64/param.h
index e93d4756a0ce..f892ad0446f8 100644
--- a/include/asm-ia64/param.h
+++ b/include/asm-ia64/param.h
@@ -10,23 +10,13 @@
 
 #include <linux/config.h>
 
-#ifdef CONFIG_IA64_HP_SIM
+#if defined(CONFIG_IA64_HP_SIM) || defined(CONFIG_IA64_SOFTSDV_HACKS)
 /*
  * Yeah, simulating stuff is slow, so let us catch some breath between
  * timer interrupts...
  */
 # define HZ 20
-#endif
-
-#ifdef CONFIG_IA64_DIG
-# ifdef CONFIG_IA64_SOFTSDV_HACKS
-#  define HZ 20
-# else
-#  define HZ 100
-# endif
-#endif
-
-#ifndef HZ
+#else
 # define HZ	1024
 #endif
 
diff --git a/include/asm-ia64/pci.h b/include/asm-ia64/pci.h
index 0c40b0e6befc..14a87544dcf0 100644
--- a/include/asm-ia64/pci.h
+++ b/include/asm-ia64/pci.h
@@ -1,6 +1,15 @@
 #ifndef _ASM_IA64_PCI_H
 #define _ASM_IA64_PCI_H
 
+#include <linux/config.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+#include <linux/types.h>
+#include <linux/spinlock.h>
+
+#include <asm/io.h>
+#include <asm/scatterlist.h>
+
 /*
  * Can be used to override the logic in pci_scan_bus for skipping
  * already-configured bus numbers - to be used for buggy BIOSes or
@@ -11,6 +20,8 @@
 #define PCIBIOS_MIN_IO		0x1000
 #define PCIBIOS_MIN_MEM		0x10000000
 
+struct pci_dev;
+
 extern inline void pcibios_set_master(struct pci_dev *dev)
 {
 	/* No special bus mastering setup handling */
@@ -23,18 +34,8 @@ extern inline void pcibios_penalize_isa_irq(int irq)
 
 /*
  * Dynamic DMA mapping API.
- * IA-64 has everything mapped statically.
  */
 
-#include <linux/slab.h>
-#include <linux/string.h>
-#include <linux/types.h>
-
-#include <asm/io.h>
-#include <asm/scatterlist.h>
-
-struct pci_dev;
-
 /*
  * Allocate and map kernel buffer using consistent mode DMA for a device.
  * hwdev should be valid struct pci_dev pointer for PCI devices,
@@ -64,13 +65,7 @@ extern void pci_free_consistent (struct pci_dev *hwdev, size_t size,
  * Once the device is given the dma address, the device owns this memory
  * until either pci_unmap_single or pci_dma_sync_single is performed.
  */
-extern inline dma_addr_t
-pci_map_single (struct pci_dev *hwdev, void *ptr, size_t size, int direction)
-{
-	if (direction == PCI_DMA_NONE)
-		BUG();
-	return virt_to_bus(ptr);
-}
+extern dma_addr_t pci_map_single(struct pci_dev *hwdev, void *ptr, size_t size, int direction);
 
 /*
  * Unmap a single streaming mode DMA translation.  The dma_addr and size
@@ -80,13 +75,7 @@ pci_map_single (struct pci_dev *hwdev, void *ptr, size_t size, int direction)
  * After this call, reads by the cpu to the buffer are guarenteed to see
  * whatever the device wrote there.
  */
-extern inline void
-pci_unmap_single (struct pci_dev *hwdev, dma_addr_t dma_addr, size_t size, int direction)
-{
-	if (direction == PCI_DMA_NONE)
-		BUG();
-	/* Nothing to do */
-}
+extern void pci_unmap_single (struct pci_dev *hwdev, dma_addr_t dma_addr, size_t size, int direction);
 
 /*
  * Map a set of buffers described by scatterlist in streaming
@@ -104,26 +93,14 @@ pci_unmap_single (struct pci_dev *hwdev, dma_addr_t dma_addr, size_t size, int d
  * Device ownership issues as mentioned above for pci_map_single are
  * the same here.
  */
-extern inline int
-pci_map_sg (struct pci_dev *hwdev, struct scatterlist *sg, int nents, int direction)
-{
-	if (direction == PCI_DMA_NONE)
-		BUG();
-	return nents;
-}
+extern int pci_map_sg(struct pci_dev *hwdev, struct scatterlist *sg, int nents, int direction);
 
 /*
  * Unmap a set of streaming mode DMA translations.
  * Again, cpu read rules concerning calls here are the same as for
  * pci_unmap_single() above.
  */
-extern inline void
-pci_unmap_sg (struct pci_dev *hwdev, struct scatterlist *sg, int nents, int direction)
-{
-	if (direction == PCI_DMA_NONE)
-		BUG();
-	/* Nothing to do */
-}
+extern void pci_unmap_sg(struct pci_dev *hwdev, struct scatterlist *sg, int nents, int direction);
 
 /*
  * Make physical memory consistent for a single
@@ -135,13 +112,7 @@ pci_unmap_sg (struct pci_dev *hwdev, struct scatterlist *sg, int nents, int dire
  * next point you give the PCI dma address back to the card, the
  * device again owns the buffer.
  */
-extern inline void
-pci_dma_sync_single (struct pci_dev *hwdev, dma_addr_t dma_handle, size_t size, int direction)
-{
-	if (direction == PCI_DMA_NONE)
-		BUG();
-	/* Nothing to do */
-}
+extern void pci_dma_sync_single (struct pci_dev *hwdev, dma_addr_t dma_handle, size_t size, int direction);
 
 /*
  * Make physical memory consistent for a set of streaming mode DMA
@@ -150,20 +121,15 @@ pci_dma_sync_single (struct pci_dev *hwdev, dma_addr_t dma_handle, size_t size,
  * The same as pci_dma_sync_single but for a scatter-gather list,
  * same rules and usage.
  */
-extern inline void
-pci_dma_sync_sg (struct pci_dev *hwdev, struct scatterlist *sg, int nelems, int direction)
-{
-	if (direction == PCI_DMA_NONE)
-		BUG();
-	/* Nothing to do */
-}
+extern void pci_dma_sync_sg(struct pci_dev *hwdev, struct scatterlist *sg, int nelems, int direction);
 
 /* Return whether the given PCI device DMA address mask can
  * be supported properly.  For example, if your device can
  * only drive the low 24-bits during PCI bus mastering, then
  * you would pass 0x00ffffff as the mask to this function.
  */
-extern inline int pci_dma_supported(struct pci_dev *hwdev, dma_addr_t mask)
+extern inline int
+pci_dma_supported(struct pci_dev *hwdev, dma_addr_t mask)
 {
 	return 1;
 }
diff --git a/include/asm-ia64/pgtable.h b/include/asm-ia64/pgtable.h
index 9963ebb731ad..6771f0192ac4 100644
--- a/include/asm-ia64/pgtable.h
+++ b/include/asm-ia64/pgtable.h
@@ -111,6 +111,7 @@
 
 #include <asm/bitops.h>
 #include <asm/mmu_context.h>
+#include <asm/processor.h>
 #include <asm/system.h>
 
 /*
@@ -286,7 +287,17 @@ extern pmd_t *ia64_bad_pagetable (void);
  * contains the memory attribute bits, dirty bits, and various other
  * bits as well.
  */
-#define pgprot_noncached(prot)	__pgprot((pgprot_val(prot) & ~_PAGE_MA_MASK) | _PAGE_MA_UC)
+#define pgprot_noncached(prot)		__pgprot((pgprot_val(prot) & ~_PAGE_MA_MASK) | _PAGE_MA_UC)
+
+/*
+ * Macro to make mark a page protection value as "write-combining".
+ * Note that "protection" is really a misnomer here as the protection
+ * value contains the memory attribute bits, dirty bits, and various
+ * other bits as well.  Accesses through a write-combining translation
+ * works bypasses the caches, but does allow for consecutive writes to
+ * be combined into single (but larger) write transactions.
+ */
+#define pgprot_writecombine(prot)	__pgprot((pgprot_val(prot) & ~_PAGE_MA_MASK) | _PAGE_MA_WC)
 
 /*
  * Return the region index for virtual address ADDRESS.
diff --git a/include/asm-ia64/processor.h b/include/asm-ia64/processor.h
index 5024801ae90d..c37fc76b18b1 100644
--- a/include/asm-ia64/processor.h
+++ b/include/asm-ia64/processor.h
@@ -19,6 +19,7 @@
 #include <asm/types.h>
 
 #define IA64_NUM_DBG_REGS	8
+#define IA64_NUM_PM_REGS	4
 
 /*
  * TASK_SIZE really is a mis-named.  It really is the maximum user
@@ -152,12 +153,13 @@
 
 #define IA64_THREAD_FPH_VALID	(__IA64_UL(1) << 0)	/* floating-point high state valid? */
 #define IA64_THREAD_DBG_VALID	(__IA64_UL(1) << 1)	/* debug registers valid? */
-#define IA64_THREAD_UAC_NOPRINT	(__IA64_UL(1) << 2)	/* don't log unaligned accesses */
-#define IA64_THREAD_UAC_SIGBUS	(__IA64_UL(1) << 3)	/* generate SIGBUS on unaligned acc. */
-#define IA64_THREAD_KRBS_SYNCED	(__IA64_UL(1) << 4)	/* krbs synced with process vm? */
+#define IA64_THREAD_PM_VALID	(__IA64_UL(1) << 2)	/* performance registers valid? */
+#define IA64_THREAD_UAC_NOPRINT	(__IA64_UL(1) << 3)	/* don't log unaligned accesses */
+#define IA64_THREAD_UAC_SIGBUS	(__IA64_UL(1) << 4)	/* generate SIGBUS on unaligned acc. */
+#define IA64_THREAD_KRBS_SYNCED	(__IA64_UL(1) << 5)	/* krbs synced with process vm? */
 #define IA64_KERNEL_DEATH	(__IA64_UL(1) << 63)	/* see die_if_kernel()... */
 
-#define IA64_THREAD_UAC_SHIFT	2	
+#define IA64_THREAD_UAC_SHIFT	3
 #define IA64_THREAD_UAC_MASK	(IA64_THREAD_UAC_NOPRINT | IA64_THREAD_UAC_SIGBUS)
 
 #ifndef __ASSEMBLY__
@@ -285,6 +287,14 @@ struct thread_struct {
 	struct ia64_fpreg fph[96];	/* saved/loaded on demand */
 	__u64 dbr[IA64_NUM_DBG_REGS];
 	__u64 ibr[IA64_NUM_DBG_REGS];
+#ifdef CONFIG_PERFMON
+	__u64 pmc[IA64_NUM_PM_REGS];
+	__u64 pmd[IA64_NUM_PM_REGS];
+	__u64 pmod[IA64_NUM_PM_REGS];
+# define INIT_THREAD_PM		{0, }, {0, }, {0, },
+#else
+# define INIT_THREAD_PM
+#endif
 	__u64 map_base;			/* base address for mmap() */
 #ifdef CONFIG_IA32_SUPPORT
 	__u64 eflag;			/* IA32 EFLAGS reg */
@@ -316,6 +326,7 @@ struct thread_struct {
 	{{{{0}}}, },			/* fph */	\
 	{0, },				/* dbr */	\
 	{0, },				/* ibr */	\
+	INIT_THREAD_PM					\
 	0x2000000000000000		/* map_base */	\
 	INIT_THREAD_IA32,				\
 	0				/* siginfo */	\
@@ -338,8 +349,12 @@ struct thread_struct {
 struct mm_struct;
 struct task_struct;
 
-/* Free all resources held by a thread. */
-extern void release_thread (struct task_struct *);
+/*
+ * Free all resources held by a thread. This is called after the
+ * parent of DEAD_TASK has collected the exist status of the task via
+ * wait().  This is a no-op on IA-64.
+ */
+#define release_thread(dead_task)
 
 /*
  * This is the mechanism for creating a new kernel thread.
@@ -392,6 +407,18 @@ ia64_set_fpu_owner (struct task_struct *t)
 extern void __ia64_init_fpu (void);
 extern void __ia64_save_fpu (struct ia64_fpreg *fph);
 extern void __ia64_load_fpu (struct ia64_fpreg *fph);
+extern void ia64_save_debug_regs (unsigned long *save_area);
+extern void ia64_load_debug_regs (unsigned long *save_area);
+
+#ifdef CONFIG_IA32_SUPPORT
+extern void ia32_save_state (struct thread_struct *thread);
+extern void ia32_load_state (struct thread_struct *thread);
+#endif
+
+#ifdef CONFIG_PERFMON
+extern void ia64_save_pm_regs (struct thread_struct *thread);
+extern void ia64_load_pm_regs (struct thread_struct *thread);
+#endif
 
 #define ia64_fph_enable()	__asm__ __volatile__ (";; rsm psr.dfh;; srlz.d;;" ::: "memory");
 #define ia64_fph_disable()	__asm__ __volatile__ (";; ssm psr.dfh;; srlz.d;;" ::: "memory");
diff --git a/include/asm-ia64/scatterlist.h b/include/asm-ia64/scatterlist.h
index 5a119b6c8e1f..192eef92e94d 100644
--- a/include/asm-ia64/scatterlist.h
+++ b/include/asm-ia64/scatterlist.h
@@ -13,6 +13,7 @@ struct scatterlist {
 	 * indirection buffer, NULL otherwise:
 	 */
 	char *alt_address;
+	char *orig_address;	/* Save away the original buffer address (used by pci-dma.c) */
 	unsigned int length;	/* buffer length */
 };
 
diff --git a/include/asm-ia64/siginfo.h b/include/asm-ia64/siginfo.h
index 7222fb285df9..a54312e12b96 100644
--- a/include/asm-ia64/siginfo.h
+++ b/include/asm-ia64/siginfo.h
@@ -14,12 +14,13 @@ typedef union sigval {
 } sigval_t;
 
 #define SI_MAX_SIZE	128
-#define SI_PAD_SIZE	((SI_MAX_SIZE/sizeof(int)) - 3)
+#define SI_PAD_SIZE	((SI_MAX_SIZE/sizeof(int)) - 4)
 
 typedef struct siginfo {
 	int si_signo;
 	int si_errno;
 	int si_code;
+	int __pad0;
 
 	union {
 		int _pad[SI_PAD_SIZE];
@@ -212,7 +213,7 @@ typedef struct siginfo {
 #define SIGEV_THREAD	2	/* deliver via thread creation */
 
 #define SIGEV_MAX_SIZE	64
-#define SIGEV_PAD_SIZE	((SIGEV_MAX_SIZE/sizeof(int)) - 3)
+#define SIGEV_PAD_SIZE	((SIGEV_MAX_SIZE/sizeof(int)) - 4)
 
 typedef struct sigevent {
 	sigval_t sigev_value;
diff --git a/include/asm-ia64/smp.h b/include/asm-ia64/smp.h
index 6175de538e8a..0788865fcd73 100644
--- a/include/asm-ia64/smp.h
+++ b/include/asm-ia64/smp.h
@@ -99,5 +99,9 @@ hard_smp_processor_id(void)
 extern void __init init_smp_config (void);
 extern void smp_do_timer (struct pt_regs *regs);
 
+extern int smp_call_function_single (int cpuid, void (*func) (void *info), void *info,
+			      int retry, int wait);
+
+
 #endif /* CONFIG_SMP */
 #endif /* _ASM_IA64_SMP_H */
diff --git a/include/asm-ia64/spinlock.h b/include/asm-ia64/spinlock.h
index fedd8f8c678b..24b85b4d6305 100644
--- a/include/asm-ia64/spinlock.h
+++ b/include/asm-ia64/spinlock.h
@@ -15,8 +15,11 @@
 #include <asm/bitops.h>
 #include <asm/atomic.h>
 
+#undef NEW_LOCK
+
+#ifdef NEW_LOCK
 typedef struct { 
-	volatile unsigned int lock;
+	volatile unsigned char lock;
 } spinlock_t;
 
 #define SPIN_LOCK_UNLOCKED			(spinlock_t) { 0 }
@@ -26,44 +29,86 @@ typedef struct {
  * Streamlined test_and_set_bit(0, (x)).  We use test-and-test-and-set
  * rather than a simple xchg to avoid writing the cache-line when
  * there is contention.
+ *
+ * XXX Fix me: instead of preserving ar.pfs, we should just mark it
+ * XXX as "clobbered".  Unfortunately, the Mar 2000 release of the compiler
+ * XXX doesn't let us do that.  The August release fixes that.
  */
-#if 1 /* Bad code generation? */
-#define spin_lock(x) __asm__ __volatile__ ( \
-       "mov ar.ccv = r0\n" \
-       "mov r29 = 1\n" \
-       ";;\n" \
-       "1:\n" \
-       "ld4 r2 = %0\n" \
-       ";;\n" \
-       "cmp4.eq p0,p7 = r0,r2\n" \
-       "(p7) br.cond.spnt.few 1b \n" \
-       "cmpxchg4.acq r2 = %0, r29, ar.ccv\n" \
-       ";;\n" \
-       "cmp4.eq p0,p7 = r0, r2\n" \
-       "(p7) br.cond.spnt.few 1b\n" \
-       ";;\n" \
-       :: "m" __atomic_fool_gcc((x)) : "r2", "r29", "memory")
- 
-#else 
-#define spin_lock(x)					\
-{							\
-	spinlock_t *__x = (x);				\
-							\
-	do {						\
-		while (__x->lock);			\
-	} while (cmpxchg_acq(&__x->lock, 0, 1));	\
+#define spin_lock(x)								\
+{										\
+	register char *addr __asm__ ("r31") = (char *) &(x)->lock;		\
+	long saved_pfs;								\
+										\
+	__asm__ __volatile__ (							\
+		"mov r30=1\n"							\
+		"mov ar.ccv=r0\n"						\
+		";;\n"								\
+		IA64_SEMFIX"cmpxchg1.acq r30=[%1],r30,ar.ccv\n"			\
+		";;\n"								\
+		"cmp.ne p15,p0=r30,r0\n"					\
+		"mov %0=ar.pfs\n"						\
+		"(p15) br.call.spnt.few b7=ia64_spinlock_contention\n"		\
+		";;\n"								\
+		"1: (p15) mov ar.pfs=%0;;\n"	/* force a new bundle */	\
+		: "=&r"(saved_pfs) : "r"(addr)					\
+		: "p15", "r28", "r29", "r30", "memory");			\
 }
-#endif
+
+#define spin_trylock(x)							\
+({									\
+	register char *addr __asm__ ("r31") = (char *) &(x)->lock;	\
+	register long result;						\
+									\
+	__asm__ __volatile__ (						\
+		"mov r30=1\n"						\
+		"mov ar.ccv=r0\n"					\
+		";;\n"							\
+		IA64_SEMFIX"cmpxchg1.acq %0=[%1],r30,ar.ccv\n"		\
+		: "=r"(result) : "r"(addr) : "r30", "memory");		\
+	(result == 0);							\
+})
 
 #define spin_is_locked(x)	((x)->lock != 0)
+#define spin_unlock(x)		({((spinlock_t *) x)->lock = 0;})
+#define spin_unlock_wait(x)	({ while ((x)->lock); })
 
-#define spin_unlock(x)		({((spinlock_t *) x)->lock = 0; barrier();})
+#else /* !NEW_LOCK */
 
-/* Streamlined !test_and_set_bit(0, (x)) */
-#define spin_trylock(x)		(cmpxchg_acq(&(x)->lock, 0, 1) == 0)
+typedef struct { 
+	volatile unsigned int lock;
+} spinlock_t;
+
+#define SPIN_LOCK_UNLOCKED			(spinlock_t) { 0 }
+#define spin_lock_init(x)			((x)->lock = 0)
 
+/*
+ * Streamlined test_and_set_bit(0, (x)).  We use test-and-test-and-set
+ * rather than a simple xchg to avoid writing the cache-line when
+ * there is contention.
+ */
+#define spin_lock(x) __asm__ __volatile__ (			\
+	"mov ar.ccv = r0\n"					\
+	"mov r29 = 1\n"						\
+	";;\n"							\
+	"1:\n"							\
+	"ld4 r2 = %0\n"						\
+	";;\n"							\
+	"cmp4.eq p0,p7 = r0,r2\n"				\
+	"(p7) br.cond.spnt.few 1b \n"				\
+	IA64_SEMFIX"cmpxchg4.acq r2 = %0, r29, ar.ccv\n"	\
+	";;\n"							\
+	"cmp4.eq p0,p7 = r0, r2\n"				\
+	"(p7) br.cond.spnt.few 1b\n"				\
+	";;\n"							\
+	:: "m" __atomic_fool_gcc((x)) : "r2", "r29", "memory")
+
+#define spin_is_locked(x)	((x)->lock != 0)
+#define spin_unlock(x)		({((spinlock_t *) x)->lock = 0; barrier();})
+#define spin_trylock(x)		(cmpxchg_acq(&(x)->lock, 0, 1) == 0)
 #define spin_unlock_wait(x)	({ do { barrier(); } while ((x)->lock); })
 
+#endif /* !NEW_LOCK */
+
 typedef struct {
 	volatile int read_counter:31;
 	volatile int write_lock:1;
@@ -73,12 +118,12 @@ typedef struct {
 #define read_lock(rw)							 \
 do {									 \
 	int tmp = 0;							 \
-	__asm__ __volatile__ ("1:\tfetchadd4.acq %0 = %1, 1\n"		 \
+	__asm__ __volatile__ ("1:\t"IA64_SEMFIX"fetchadd4.acq %0 = %1, 1\n"		 \
 			      ";;\n"					 \
 			      "tbit.nz p6,p0 = %0, 31\n"		 \
 			      "(p6) br.cond.sptk.few 2f\n"		 \
 			      ".section .text.lock,\"ax\"\n"		 \
-			      "2:\tfetchadd4.rel %0 = %1, -1\n"		 \
+			      "2:\t"IA64_SEMFIX"fetchadd4.rel %0 = %1, -1\n"		 \
 			      ";;\n"					 \
 			      "3:\tld4.acq %0 = %1\n"			 \
 			      ";;\n"					 \
@@ -94,7 +139,7 @@ do {									 \
 #define read_unlock(rw)						\
 do {								\
 	int tmp = 0;						\
-	__asm__ __volatile__ ("fetchadd4.rel %0 = %1, -1\n"	\
+	__asm__ __volatile__ (IA64_SEMFIX"fetchadd4.rel %0 = %1, -1\n"	\
 			      : "=r" (tmp)			\
 			      : "m" (__atomic_fool_gcc(rw))	\
 			      : "memory");			\
diff --git a/include/asm-ia64/system.h b/include/asm-ia64/system.h
index abffefa016a9..25438c18e9c9 100644
--- a/include/asm-ia64/system.h
+++ b/include/asm-ia64/system.h
@@ -27,6 +27,15 @@
 
 #define GATE_ADDR		(0xa000000000000000 + PAGE_SIZE)
 
+#if defined(CONFIG_ITANIUM_ASTEP_SPECIFIC) || defined(CONFIG_ITANIUM_BSTEP_SPECIFIC)
+  /* Workaround for Errata 97.  */
+# define IA64_SEMFIX_INSN	mf;
+# define IA64_SEMFIX	"mf;"
+#else
+# define IA64_SEMFIX_INSN
+# define IA64_SEMFIX	""
+#endif
+
 #ifndef __ASSEMBLY__
 
 #include <linux/types.h>
@@ -231,13 +240,13 @@ extern unsigned long __bad_increment_for_ia64_fetch_and_add (void);
 ({										\
 	switch (sz) {								\
 	      case 4:								\
-		__asm__ __volatile__ ("fetchadd4.rel %0=%1,%3"			\
+		__asm__ __volatile__ (IA64_SEMFIX"fetchadd4.rel %0=%1,%3"	\
 				      : "=r"(tmp), "=m"(__atomic_fool_gcc(v))	\
 				      : "m" (__atomic_fool_gcc(v)), "i"(n));	\
 		break;								\
 										\
 	      case 8:								\
-		__asm__ __volatile__ ("fetchadd8.rel %0=%1,%3"			\
+		__asm__ __volatile__ (IA64_SEMFIX"fetchadd8.rel %0=%1,%3"	\
 				      : "=r"(tmp), "=m"(__atomic_fool_gcc(v))	\
 				      : "m" (__atomic_fool_gcc(v)), "i"(n));	\
 		break;								\
@@ -280,22 +289,22 @@ __xchg (unsigned long x, volatile void *ptr, int size)
 
 	switch (size) {
 	      case 1:
-		__asm__ __volatile ("xchg1 %0=%1,%2" : "=r" (result)
+		__asm__ __volatile (IA64_SEMFIX"xchg1 %0=%1,%2" : "=r" (result)
 				    : "m" (*(char *) ptr), "r" (x) : "memory");
 		return result;
 
 	      case 2:
-		__asm__ __volatile ("xchg2 %0=%1,%2" : "=r" (result)
+		__asm__ __volatile (IA64_SEMFIX"xchg2 %0=%1,%2" : "=r" (result)
 				    : "m" (*(short *) ptr), "r" (x) : "memory");
 		return result;
 
 	      case 4:
-		__asm__ __volatile ("xchg4 %0=%1,%2" : "=r" (result)
+		__asm__ __volatile (IA64_SEMFIX"xchg4 %0=%1,%2" : "=r" (result)
 				    : "m" (*(int *) ptr), "r" (x) : "memory");
 		return result;
 
 	      case 8:
-		__asm__ __volatile ("xchg8 %0=%1,%2" : "=r" (result)
+		__asm__ __volatile (IA64_SEMFIX"xchg8 %0=%1,%2" : "=r" (result)
 				    : "m" (*(long *) ptr), "r" (x) : "memory");
 		return result;
 	}
@@ -305,7 +314,6 @@ __xchg (unsigned long x, volatile void *ptr, int size)
 
 #define xchg(ptr,x)							     \
   ((__typeof__(*(ptr))) __xchg ((unsigned long) (x), (ptr), sizeof(*(ptr))))
-#define tas(ptr)	(xchg ((ptr), 1))
 
 /* 
  * Atomic compare and exchange.  Compare OLD with MEM, if identical,
@@ -324,50 +332,50 @@ extern long __cmpxchg_called_with_bad_pointer(void);
 struct __xchg_dummy { unsigned long a[100]; };
 #define __xg(x) (*(struct __xchg_dummy *)(x))
 
-#define ia64_cmpxchg(sem,ptr,old,new,size)				\
-({									\
-	__typeof__(ptr) _p_ = (ptr);					\
-	__typeof__(new) _n_ = (new);					\
-	__u64 _o_, _r_;							\
-									\
-	switch (size) {							\
-	      case 1: _o_ = (__u8 ) (old); break;			\
-	      case 2: _o_ = (__u16) (old); break;			\
-	      case 4: _o_ = (__u32) (old); break;			\
-	      case 8: _o_ = (__u64) (old); break;			\
-	      default:							\
-	}								\
-	 __asm__ __volatile__ ("mov ar.ccv=%0;;" :: "rO"(_o_));		\
-	switch (size) {							\
-	      case 1:							\
-		__asm__ __volatile__ ("cmpxchg1."sem" %0=%2,%3,ar.ccv"	\
-				      : "=r"(_r_), "=m"(__xg(_p_))	\
-				      : "m"(__xg(_p_)), "r"(_n_));	\
-		break;							\
-									\
-	      case 2:							\
-		__asm__ __volatile__ ("cmpxchg2."sem" %0=%2,%3,ar.ccv"	\
-				      : "=r"(_r_), "=m"(__xg(_p_))	\
-				      : "m"(__xg(_p_)), "r"(_n_));	\
-		break;							\
-									\
-	      case 4:							\
-		__asm__ __volatile__ ("cmpxchg4."sem" %0=%2,%3,ar.ccv"	\
-				      : "=r"(_r_), "=m"(__xg(_p_))	\
-				      : "m"(__xg(_p_)), "r"(_n_));	\
-		break;							\
-									\
-	      case 8:							\
-		__asm__ __volatile__ ("cmpxchg8."sem" %0=%2,%3,ar.ccv"	\
-				      : "=r"(_r_), "=m"(__xg(_p_))	\
-				      : "m"(__xg(_p_)), "r"(_n_));	\
-		break;							\
-									\
-	      default:							\
-		_r_ = __cmpxchg_called_with_bad_pointer();		\
-		break;							\
-	}								\
-	(__typeof__(old)) _r_;						\
+#define ia64_cmpxchg(sem,ptr,old,new,size)						\
+({											\
+	__typeof__(ptr) _p_ = (ptr);							\
+	__typeof__(new) _n_ = (new);							\
+	__u64 _o_, _r_;									\
+											\
+	switch (size) {									\
+	      case 1: _o_ = (__u8 ) (long) (old); break;				\
+	      case 2: _o_ = (__u16) (long) (old); break;				\
+	      case 4: _o_ = (__u32) (long) (old); break;				\
+	      case 8: _o_ = (__u64) (long) (old); break;				\
+	      default:									\
+	}										\
+	 __asm__ __volatile__ ("mov ar.ccv=%0;;" :: "rO"(_o_));				\
+	switch (size) {									\
+	      case 1:									\
+		__asm__ __volatile__ (IA64_SEMFIX"cmpxchg1."sem" %0=%2,%3,ar.ccv"	\
+				      : "=r"(_r_), "=m"(__xg(_p_))			\
+				      : "m"(__xg(_p_)), "r"(_n_));			\
+		break;									\
+											\
+	      case 2:									\
+		__asm__ __volatile__ (IA64_SEMFIX"cmpxchg2."sem" %0=%2,%3,ar.ccv"	\
+				      : "=r"(_r_), "=m"(__xg(_p_))			\
+				      : "m"(__xg(_p_)), "r"(_n_));			\
+		break;									\
+											\
+	      case 4:									\
+		__asm__ __volatile__ (IA64_SEMFIX"cmpxchg4."sem" %0=%2,%3,ar.ccv"	\
+				      : "=r"(_r_), "=m"(__xg(_p_))			\
+				      : "m"(__xg(_p_)), "r"(_n_));			\
+		break;									\
+											\
+	      case 8:									\
+		__asm__ __volatile__ (IA64_SEMFIX"cmpxchg8."sem" %0=%2,%3,ar.ccv"	\
+				      : "=r"(_r_), "=m"(__xg(_p_))			\
+				      : "m"(__xg(_p_)), "r"(_n_));			\
+		break;									\
+											\
+	      default:									\
+		_r_ = __cmpxchg_called_with_bad_pointer();				\
+		break;									\
+	}										\
+	(__typeof__(old)) _r_;								\
 })
 
 #define cmpxchg_acq(ptr,o,n)	ia64_cmpxchg("acq", (ptr), (o), (n), sizeof(*(ptr)))
@@ -418,15 +426,15 @@ extern struct task_struct *ia64_switch_to (void *next_task);
 extern void ia64_save_extra (struct task_struct *task);
 extern void ia64_load_extra (struct task_struct *task);
 
-#define __switch_to(prev,next,last) do {					\
-	if (((prev)->thread.flags & IA64_THREAD_DBG_VALID)			\
-	    || IS_IA32_PROCESS(ia64_task_regs(prev)))				\
-		ia64_save_extra(prev);						\
-	if (((next)->thread.flags & IA64_THREAD_DBG_VALID)			\
-	    || IS_IA32_PROCESS(ia64_task_regs(next)))				\
-		ia64_load_extra(next);						\
-	ia64_psr(ia64_task_regs(next))->dfh = (ia64_get_fpu_owner() != (next));	\
-	(last) = ia64_switch_to((next));					\
+#define __switch_to(prev,next,last) do {						\
+	if (((prev)->thread.flags & (IA64_THREAD_DBG_VALID|IA64_THREAD_PM_VALID))	\
+	    || IS_IA32_PROCESS(ia64_task_regs(prev)))					\
+		ia64_save_extra(prev);							\
+	if (((next)->thread.flags & (IA64_THREAD_DBG_VALID|IA64_THREAD_PM_VALID))	\
+	    || IS_IA32_PROCESS(ia64_task_regs(next)))					\
+		ia64_load_extra(next);							\
+	ia64_psr(ia64_task_regs(next))->dfh = (ia64_get_fpu_owner() != (next));		\
+	(last) = ia64_switch_to((next));						\
 } while (0)
 
 #ifdef CONFIG_SMP 
@@ -444,6 +452,7 @@ extern void ia64_load_extra (struct task_struct *task);
    */
 # define switch_to(prev,next,last) do {							\
 	if (ia64_get_fpu_owner() == (prev) && ia64_psr(ia64_task_regs(prev))->mfh) {	\
+		ia64_psr(ia64_task_regs(prev))->mfh = 0;				\
 		(prev)->thread.flags |= IA64_THREAD_FPH_VALID;				\
 		__ia64_save_fpu((prev)->thread.fph);					\
 	}										\
diff --git a/include/asm-sparc/bitops.h b/include/asm-sparc/bitops.h
index 16daf7a3aa4c..8ab8bdaf024c 100644
--- a/include/asm-sparc/bitops.h
+++ b/include/asm-sparc/bitops.h
@@ -1,4 +1,4 @@
-/* $Id: bitops.h,v 1.59 2000/07/13 01:51:50 davem Exp $
+/* $Id: bitops.h,v 1.60 2000/08/10 23:49:16 davem Exp $
  * bitops.h: Bit string operations on the Sparc.
  *
  * Copyright 1995 David S. Miller (davem@caip.rutgers.edu)
@@ -232,6 +232,8 @@ extern __inline__ unsigned long find_next_zero_bit(void *addr, unsigned long siz
 
 found_first:
 	tmp |= ~0UL << size;
+	if (tmp == ~0UL)        /* Are any bits zero? */
+		return result + size; /* Nope. */
 found_middle:
 	return result + ffz(tmp);
 }
@@ -379,7 +381,11 @@ extern __inline__ unsigned long find_next_zero_le_bit(void *addr, unsigned long
 	tmp = *p;
 
 found_first:
-	return result + ffz(__swab32(tmp) | (~0UL << size));
+	tmp = __swab32(tmp) | (~0UL << size);
+	if (tmp == ~0UL)        /* Are any bits zero? */
+		return result + size; /* Nope. */
+	return result + ffz(tmp);
+
 found_middle:
 	return result + ffz(__swab32(tmp));
 }
diff --git a/include/asm-sparc/md.h b/include/asm-sparc/md.h
deleted file mode 100644
index e0d0e85a5539..000000000000
--- a/include/asm-sparc/md.h
+++ /dev/null
@@ -1,13 +0,0 @@
-/* $Id: md.h,v 1.1 1997/12/15 15:12:39 jj Exp $
- * md.h: High speed xor_block operation for RAID4/5 
- *
- */
- 
-#ifndef __ASM_MD_H
-#define __ASM_MD_H
-
-/* #define HAVE_ARCH_XORBLOCK */
-
-#define MD_XORBLOCK_ALIGNMENT	sizeof(long)
-
-#endif /* __ASM_MD_H */
diff --git a/include/asm-sparc/unistd.h b/include/asm-sparc/unistd.h
index 756d3a57d416..e6dac360b1f4 100644
--- a/include/asm-sparc/unistd.h
+++ b/include/asm-sparc/unistd.h
@@ -1,4 +1,4 @@
-/* $Id: unistd.h,v 1.66 2000/01/29 17:57:25 jj Exp $ */
+/* $Id: unistd.h,v 1.67 2000/08/12 13:25:51 davem Exp $ */
 #ifndef _SPARC_UNISTD_H
 #define _SPARC_UNISTD_H
 
diff --git a/include/asm-sparc64/bitops.h b/include/asm-sparc64/bitops.h
index 0f42cad91d46..e297d6f96e88 100644
--- a/include/asm-sparc64/bitops.h
+++ b/include/asm-sparc64/bitops.h
@@ -1,4 +1,4 @@
-/* $Id: bitops.h,v 1.29 2000/06/30 10:18:39 davem Exp $
+/* $Id: bitops.h,v 1.30 2000/08/10 23:49:16 davem Exp $
  * bitops.h: Bit string operations on the V9.
  *
  * Copyright 1996, 1997 David S. Miller (davem@caip.rutgers.edu)
@@ -158,6 +158,8 @@ extern __inline__ unsigned long find_next_zero_bit(void *addr, unsigned long siz
 
 found_first:
 	tmp |= ~0UL << size;
+	if (tmp == ~0UL)        /* Are any bits zero? */
+		return result + size; /* Nope. */
 found_middle:
 	return result + ffz(tmp);
 }
@@ -217,6 +219,8 @@ extern __inline__ unsigned long find_next_zero_le_bit(void *addr, unsigned long
 	tmp = __swab64p(p);
 found_first:
 	tmp |= (~0UL << size);
+	if (tmp == ~0UL)        /* Are any bits zero? */
+		return result + size; /* Nope. */
 found_middle:
 	return result + ffz(tmp);
 }
diff --git a/include/asm-sparc64/fcntl.h b/include/asm-sparc64/fcntl.h
index 8c7935dece80..00d73f44418c 100644
--- a/include/asm-sparc64/fcntl.h
+++ b/include/asm-sparc64/fcntl.h
@@ -1,4 +1,4 @@
-/* $Id: fcntl.h,v 1.8 2000/07/06 01:41:45 davem Exp $ */
+/* $Id: fcntl.h,v 1.9 2000/08/12 13:25:53 davem Exp $ */
 #ifndef _SPARC64_FCNTL_H
 #define _SPARC64_FCNTL_H
 
@@ -73,4 +73,8 @@ struct flock32 {
 };
 #endif
 
+#ifdef __KERNEL__
+#define flock64	flock
+#endif
+
 #endif /* !(_SPARC64_FCNTL_H) */
diff --git a/include/asm-sparc64/kdebug.h b/include/asm-sparc64/kdebug.h
index 4b61cc432a2f..65862f7c4dc3 100644
--- a/include/asm-sparc64/kdebug.h
+++ b/include/asm-sparc64/kdebug.h
@@ -1,77 +1,9 @@
-/* $Id: kdebug.h,v 1.3 1997/12/14 23:24:47 ecd Exp $
- * kdebug.h:  Defines and definitions for debugging the Linux kernel
- *            under various kernel debuggers.
- *
- * Copyright (C) 1995 David S. Miller (davem@caip.rutgers.edu)
- */
 #ifndef _SPARC64_KDEBUG_H
 #define _SPARC64_KDEBUG_H
 
-#include <asm/openprom.h>
-
-/* XXX This needs to all be fixed for Ultra/V9 -DaveM */
-
-/* The debugger lives in 1MB of virtual address space right underneath
- * the boot prom.
- */
-
-#define DEBUG_FIRSTVADDR       0xffc00000
-#define DEBUG_LASTVADDR        LINUX_OPPROM_BEGVM
-
-/* Breakpoints are enter through trap table entry 126.  So in sparc assembly
- * if you want to drop into the debugger you do:
- *
- * t DEBUG_BP_TRAP
- */
-
-#define DEBUG_BP_TRAP     126
-
-#ifndef __ASSEMBLY__
-/* The debug vector is passed in %o1 at boot time.  It is a pointer to
- * a structure in the debuggers address space.  Here is its format.
+/* 
+ * No kernel debugger on sparc64. Kept here because drivers/sbus/char/
+ * includes it for sparc32 sake.
  */
 
-typedef unsigned int (*debugger_funct)(void);
-
-struct kernel_debug {
-	/* First the entry point into the debugger.  You jump here
-	 * to give control over to the debugger.
-	 */
-	unsigned long kdebug_entry;
-	unsigned long kdebug_trapme;   /* Figure out later... */
-	/* The following is the number of pages that the debugger has
-	 * taken from to total pool.
-	 */
-	unsigned long *kdebug_stolen_pages;
-	/* Ok, after you remap yourself and/or change the trap table
-	 * from what you were left with at boot time you have to call
-	 * this synchronization function so the debugger can check out
-	 * what you have done.
-	 */
-	debugger_funct teach_debugger;
-}; /* I think that is it... */
-
-extern struct kernel_debug *linux_dbvec;
-
-/* Use this macro in C-code to enter the debugger. */
-extern __inline__ void sp_enter_debugger(void)
-{
-	__asm__ __volatile__("jmpl %0, %%o7\n\t"
-			     "nop\n\t" : :
-			     "r" (linux_dbvec) : "o7", "memory");
-}
-
-#define SP_ENTER_DEBUGGER do { \
-	     if((linux_dbvec!=0) && ((*(short *)linux_dbvec)!=-1)) \
-	       sp_enter_debugger(); \
-		       } while(0)
-
-#endif /* !(__ASSEMBLY__) */
-
-/* Some nice offset defines for assembler code. */
-#define KDEBUG_ENTRY_OFF    0x0
-#define KDEBUG_DUNNO_OFF    0x4
-#define KDEBUG_DUNNO2_OFF   0x8
-#define KDEBUG_TEACH_OFF    0xc
-
-#endif /* !(_SPARC64_KDEBUG_H) */
+#endif
diff --git a/include/asm-sparc64/md.h b/include/asm-sparc64/md.h
deleted file mode 100644
index 373c7674420c..000000000000
--- a/include/asm-sparc64/md.h
+++ /dev/null
@@ -1,91 +0,0 @@
-/* $Id: md.h,v 1.3 1999/05/25 16:53:28 jj Exp $
- * md.h: High speed xor_block operation for RAID4/5 
- *            utilizing the UltraSparc Visual Instruction Set.
- *
- * Copyright (C) 1997 Jakub Jelinek (jj@sunsite.mff.cuni.cz)
- */
- 
-#ifndef __ASM_MD_H
-#define __ASM_MD_H
-
-#include <asm/head.h>
-#include <asm/asi.h>
-
-#undef HAVE_ARCH_XORBLOCK
-
-#define MD_XORBLOCK_ALIGNMENT	64
-
-/*	void __xor_block (char *dest, char *src, long len)
- *	{
- *		while (len--) *dest++ ^= *src++;
- * 	}
- *
- *	Requirements:
- *	!(((long)dest | (long)src) & (MD_XORBLOCK_ALIGNMENT - 1)) &&
- *	!(len & 127) && len >= 256
- */
-
-static inline void __xor_block (char *dest, char *src, long len)
-{
-	__asm__ __volatile__ ("
-	wr	%%g0, %3, %%fprs
-	wr	%%g0, %4, %%asi
-	membar	#LoadStore|#StoreLoad|#StoreStore
-	sub	%2, 128, %2
-	ldda	[%0] %4, %%f0
-	ldda	[%1] %4, %%f16
-1:	ldda	[%0 + 64] %%asi, %%f32
-	fxor	%%f0, %%f16, %%f16
-	fxor	%%f2, %%f18, %%f18
-	fxor	%%f4, %%f20, %%f20
-	fxor	%%f6, %%f22, %%f22
-	fxor	%%f8, %%f24, %%f24
-	fxor	%%f10, %%f26, %%f26
-	fxor	%%f12, %%f28, %%f28
-	fxor	%%f14, %%f30, %%f30
-	stda	%%f16, [%0] %4
-	ldda	[%1 + 64] %%asi, %%f48
-	ldda	[%0 + 128] %%asi, %%f0
-	fxor	%%f32, %%f48, %%f48
-	fxor	%%f34, %%f50, %%f50
-	add	%0, 128, %0
-	fxor	%%f36, %%f52, %%f52
-	add	%1, 128, %1
-	fxor	%%f38, %%f54, %%f54
-	subcc	%2, 128, %2
-	fxor	%%f40, %%f56, %%f56
-	fxor	%%f42, %%f58, %%f58
-	fxor	%%f44, %%f60, %%f60
-	fxor	%%f46, %%f62, %%f62
-	stda	%%f48, [%0 - 64] %%asi
-	bne,pt	%%xcc, 1b
-	 ldda	[%1] %4, %%f16
-	ldda	[%0 + 64] %%asi, %%f32
-	fxor	%%f0, %%f16, %%f16
-	fxor	%%f2, %%f18, %%f18
-	fxor	%%f4, %%f20, %%f20
-	fxor	%%f6, %%f22, %%f22
-	fxor	%%f8, %%f24, %%f24
-	fxor	%%f10, %%f26, %%f26
-	fxor	%%f12, %%f28, %%f28
-	fxor	%%f14, %%f30, %%f30
-	stda	%%f16, [%0] %4
-	ldda	[%1 + 64] %%asi, %%f48
-	membar	#Sync
-	fxor	%%f32, %%f48, %%f48
-	fxor	%%f34, %%f50, %%f50
-	fxor	%%f36, %%f52, %%f52
-	fxor	%%f38, %%f54, %%f54
-	fxor	%%f40, %%f56, %%f56
-	fxor	%%f42, %%f58, %%f58
-	fxor	%%f44, %%f60, %%f60
-	fxor	%%f46, %%f62, %%f62
-	stda	%%f48, [%0 + 64] %%asi
-	membar	#Sync|#StoreStore|#StoreLoad
-	wr	%%g0, 0, %%fprs
-	" : :
-	"r" (dest), "r" (src), "r" (len), "i" (FPRS_FEF), "i" (ASI_BLK_P) :
-	"cc", "memory");
-}
-
-#endif /* __ASM_MD_H */
diff --git a/include/asm-sparc64/mmu_context.h b/include/asm-sparc64/mmu_context.h
index 558df48efde4..6c5e894b8715 100644
--- a/include/asm-sparc64/mmu_context.h
+++ b/include/asm-sparc64/mmu_context.h
@@ -1,4 +1,4 @@
-/* $Id: mmu_context.h,v 1.43 2000/08/09 08:04:45 davem Exp $ */
+/* $Id: mmu_context.h,v 1.45 2000/08/12 13:25:52 davem Exp $ */
 #ifndef __SPARC64_MMU_CONTEXT_H
 #define __SPARC64_MMU_CONTEXT_H
 
@@ -100,30 +100,36 @@ do { \
 /* Switch the current MM context. */
 static inline void switch_mm(struct mm_struct *old_mm, struct mm_struct *mm, struct task_struct *tsk, int cpu)
 {
-	long dirty;
+	unsigned long ctx_valid;
 
 	spin_lock(&mm->page_table_lock);
 	if (CTX_VALID(mm->context))
-		dirty = 0;
-	else
-		dirty = 1;
-	if (dirty || (old_mm != mm)) {
-		unsigned long vm_mask;
+		ctx_valid = 1;
+        else
+		ctx_valid = 0;
 
-		if (dirty)
+	if (!ctx_valid || (old_mm != mm)) {
+		if (!ctx_valid)
 			get_new_mmu_context(mm);
 
-		vm_mask = (1UL << cpu);
-		if (!(mm->cpu_vm_mask & vm_mask)) {
-			mm->cpu_vm_mask |= vm_mask;
-			dirty = 1;
-		}
-
 		load_secondary_context(mm);
-		if (dirty != 0)
-			clean_secondary_context();
 		reload_tlbmiss_state(tsk, mm);
 	}
+
+	{
+		unsigned long vm_mask = (1UL << cpu);
+
+		/* Even if (mm == old_mm) we _must_ check
+		 * the cpu_vm_mask.  If we do not we could
+		 * corrupt the TLB state because of how
+		 * smp_flush_tlb_{page,range,mm} on sparc64
+		 * and lazy tlb switches work. -DaveM
+		 */
+		if (!ctx_valid || !(mm->cpu_vm_mask & vm_mask)) {
+			mm->cpu_vm_mask |= vm_mask;
+			clean_secondary_context();
+		}
+	}
 	spin_unlock(&mm->page_table_lock);
 }
 
diff --git a/include/asm-sparc64/pgtable.h b/include/asm-sparc64/pgtable.h
index c47527736d84..070fb31dbcb4 100644
--- a/include/asm-sparc64/pgtable.h
+++ b/include/asm-sparc64/pgtable.h
@@ -1,4 +1,4 @@
-/* $Id: pgtable.h,v 1.130 2000/08/09 00:00:17 davem Exp $
+/* $Id: pgtable.h,v 1.131 2000/08/11 03:00:14 davem Exp $
  * pgtable.h: SpitFire page table operations.
  *
  * Copyright 1996,1997 David S. Miller (davem@caip.rutgers.edu)
@@ -174,7 +174,6 @@ extern inline pte_t pte_modify(pte_t orig_pte, pgprot_t new_prot)
 	(pmd_val(*(pmdp)) = (__pa((unsigned long) (ptep)) >> 11UL))
 #define pgd_set(pgdp, pmdp)	\
 	(pgd_val(*(pgdp)) = (__pa((unsigned long) (pmdp)) >> 11UL))
-#define sparc64_pte_pagenr(pte)   (((unsigned long) ((pte_val(pte)&~PAGE_OFFSET)-phys_base)>>PAGE_SHIFT))
 #define pmd_page(pmd)			((unsigned long) __va((pmd_val(pmd)<<11UL)))
 #define pgd_page(pgd)			((unsigned long) __va((pgd_val(pgd)<<11UL)))
 #define pte_none(pte) 			(!pte_val(pte))
@@ -206,7 +205,7 @@ extern inline pte_t pte_modify(pte_t orig_pte, pgprot_t new_prot)
 #define __page_address(page)	((page)->virtual)
 #define page_address(page)	({ __page_address(page); })
 
-#define pte_page(x) (mem_map+sparc64_pte_pagenr(x))
+#define pte_page(x) (mem_map+(((pte_val(x)&_PAGE_PADDR)-phys_base)>>PAGE_SHIFT))
 
 /* Be very careful when you change these three, they are delicate. */
 #define pte_mkyoung(pte)	(__pte(pte_val(pte) | _PAGE_ACCESSED | _PAGE_R))
diff --git a/include/asm-sparc64/unistd.h b/include/asm-sparc64/unistd.h
index a7848350fbd7..9781003bd5ea 100644
--- a/include/asm-sparc64/unistd.h
+++ b/include/asm-sparc64/unistd.h
@@ -1,4 +1,4 @@
-/* $Id: unistd.h,v 1.44 2000/05/16 16:42:33 jj Exp $ */
+/* $Id: unistd.h,v 1.45 2000/08/12 13:25:52 davem Exp $ */
 #ifndef _SPARC64_UNISTD_H
 #define _SPARC64_UNISTD_H
 
diff --git a/include/linux/file.h b/include/linux/file.h
index a2ba66eb9dbe..f596f78e90d8 100644
--- a/include/linux/file.h
+++ b/include/linux/file.h
@@ -7,6 +7,27 @@
 
 extern void FASTCALL(fput(struct file *));
 extern struct file * FASTCALL(fget(unsigned int fd));
+ 
+static inline int get_close_on_exec(unsigned int fd)
+{
+	struct files_struct *files = current->files;
+	int res;
+	write_lock(&files->file_lock);
+	res = FD_ISSET(fd, files->close_on_exec);
+	write_unlock(&files->file_lock);
+	return res;
+}
+
+static inline void set_close_on_exec(unsigned int fd, int flag)
+{
+	struct files_struct *files = current->files;
+	write_lock(&files->file_lock);
+	if (flag)
+		FD_SET(fd, files->close_on_exec);
+	else
+		FD_CLR(fd, files->close_on_exec);
+	write_unlock(&files->file_lock);
+}
 
 static inline struct file * fcheck_files(struct files_struct *files, unsigned int fd)
 {
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index f0f19148312d..ebe8428da0fa 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -402,7 +402,6 @@ struct packet_type
 extern struct net_device		loopback_dev;		/* The loopback */
 extern struct net_device		*dev_base;		/* All devices */
 extern rwlock_t				dev_base_lock;		/* Device list lock */
-extern struct netdev_boot_setup		dev_boot_setup[];
 
 extern int			netdev_boot_setup_add(char *name, struct ifmap *map);
 extern int 			netdev_boot_setup_check(struct net_device *dev);
diff --git a/include/linux/sound.h b/include/linux/sound.h
index 4921b90e2cbb..18a67332039a 100644
--- a/include/linux/sound.h
+++ b/include/linux/sound.h
@@ -1,3 +1,28 @@
+
+/*
+ * Minor numbers for the sound driver.
+ */
+
+#define SND_DEV_CTL		0	/* Control port /dev/mixer */
+#define SND_DEV_SEQ		1	/* Sequencer output /dev/sequencer (FM
+						synthesizer and MIDI output) */
+#define SND_DEV_MIDIN		2	/* Raw midi access */
+#define SND_DEV_DSP		3	/* Digitized voice /dev/dsp */
+#define SND_DEV_AUDIO		4	/* Sparc compatible /dev/audio */
+#define SND_DEV_DSP16		5	/* Like /dev/dsp but 16 bits/sample */
+/* #define SND_DEV_STATUS	6 */	/* /dev/sndstat (obsolete) */
+#define SND_DEV_UNUSED		6
+#define SND_DEV_AWFM		7	/* Reserved */
+#define SND_DEV_SEQ2		8	/* /dev/sequencer, level 2 interface */
+/* #define SND_DEV_SNDPROC	9 */	/* /dev/sndproc for programmable devices (not used) */
+/* #define SND_DEV_DMMIDI	9 */
+#define SND_DEV_SYNTH		9	/* Raw synth access /dev/synth (same as /dev/dmfm) */
+#define SND_DEV_DMFM		10	/* Raw synth access /dev/dmfm */
+#define SND_DEV_UNKNOWN11	11
+#define SND_DEV_ADSP		12	/* Like /dev/dsp (obsolete) */
+#define SND_DEV_AMIDI		13	/* Like /dev/midi (obsolete) */
+#define SND_DEV_ADMMIDI		14	/* Like /dev/dmmidi (onsolete) */
+
 /*
  *	Sound core interface functions
  */
diff --git a/include/net/sock.h b/include/net/sock.h
index 291e20361f13..0e4ff35725d8 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -272,8 +272,6 @@ struct tcp_opt {
 		__u32	lrcvtime;	/* timestamp of last received data packet*/
 		__u16	last_seg_size;	/* Size of last incoming segment	*/
 		__u16	rcv_mss;	/* MSS used for delayed ACK decisions	*/ 
-		__u16	rcv_small;	/* Number of not ACKed small segments	*/
-		__u16	rcv_thresh;	/* Peer doing TCP_NODELAY		*/
 	} ack;
 
 	/* Data for direct copy to user */
diff --git a/include/net/tcp.h b/include/net/tcp.h
index d92de90b57cf..ff6e603e321e 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -19,13 +19,13 @@
 #define _TCP_H
 
 #define TCP_DEBUG 1
-#define FASTRETRANS_DEBUG 2
+#define FASTRETRANS_DEBUG 1
 
 /* Be paranoid about data immediately beyond right edge of window. */
 #undef  TCP_FORMAL_WINDOW
 
 /* Cancel timers, when they are not required. */
-#undef TCP_CLEAR_TIMER
+#undef TCP_CLEAR_TIMERS
 
 #include <linux/config.h>
 #include <linux/tcp.h>
@@ -624,14 +624,21 @@ extern int			tcp_rcv_established(struct sock *sk,
 						    struct tcphdr *th, 
 						    unsigned len);
 
+enum tcp_ack_state_t
+{
+	TCP_ACK_SCHED = 1,
+	TCP_ACK_TIMER = 2,
+	TCP_ACK_PUSHED= 4
+};
+
 static inline void tcp_schedule_ack(struct tcp_opt *tp)
 {
-	tp->ack.pending |= 1;
+	tp->ack.pending |= TCP_ACK_SCHED;
 }
 
 static inline int tcp_ack_scheduled(struct tcp_opt *tp)
 {
-	return tp->ack.pending&1;
+	return tp->ack.pending&TCP_ACK_SCHED;
 }
 
 static __inline__ void tcp_dec_quickack_mode(struct tcp_opt *tp)
@@ -851,7 +858,7 @@ here:
 		break;
 
 	case TCP_TIME_DACK:
-		tp->ack.pending |= 2;
+		tp->ack.pending |= TCP_ACK_TIMER;
 		tp->ack.timeout = jiffies+when;
 		if (!mod_timer(&tp->delack_timer, tp->ack.timeout))
 			sock_hold(sk);
diff --git a/kernel/fork.c b/kernel/fork.c
index e84aa43fd259..7c55a76e21c1 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -311,17 +311,17 @@ static inline int copy_mm(unsigned long clone_flags, struct task_struct * tsk)
 	tsk->mm = mm;
 	tsk->active_mm = mm;
 
-	/*
-	 * child gets a private LDT (if there was an LDT in the parent)
-	 */
-	copy_segments(tsk, mm);
-
 	down(&current->mm->mmap_sem);
 	retval = dup_mmap(mm);
 	up(&current->mm->mmap_sem);
 	if (retval)
 		goto free_pt;
 
+	/*
+	 * child gets a private LDT (if there was an LDT in the parent)
+	 */
+	copy_segments(tsk, mm);
+
 	if (init_new_context(tsk,mm))
 		goto free_pt;
 
diff --git a/lib/cmdline.c b/lib/cmdline.c
index e147bc0b1d27..653287396036 100644
--- a/lib/cmdline.c
+++ b/lib/cmdline.c
@@ -85,12 +85,12 @@ char *get_options (char *str, int nints, int *ints)
  *	@ptr: Where parse begins
  *	@retptr: (output) Pointer to next char after parse completes
  *
- *	Parses a string into a number.  The number stored
- *	at @ptr is potentially suffixed with %K (for
- *	kilobytes, or 1024 bytes) or suffixed with %M (for
- *	megabytes, or 1048576 bytes).  If the number is suffixed
- *	with K or M, then the return value is the number
- *	multiplied by one kilobyte, or one megabyte, respectively.
+ *	Parses a string into a number.  The number stored at @ptr is
+ *	potentially suffixed with %K (for kilobytes, or 1024 bytes),
+ *	%M (for megabytes, or 1048576 bytes), or %G (for gigabytes, or
+ *	1073741824).  If the number is suffixed with K, M, or G, then
+ *	the return value is the number multiplied by one kilobyte, one
+ *	megabyte, or one gigabyte, respectively.
  */
 
 unsigned long memparse (char *ptr, char **retptr)
@@ -98,6 +98,9 @@ unsigned long memparse (char *ptr, char **retptr)
 	unsigned long ret = simple_strtoul (ptr, retptr, 0);
 
 	switch (**retptr) {
+	case 'G':
+	case 'g':
+		ret <<= 10;
 	case 'M':
 	case 'm':
 		ret <<= 10;
diff --git a/net/core/dev.c b/net/core/dev.c
index 79cb7013b32f..184723cfca7d 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -258,7 +258,7 @@ void dev_remove_pack(struct packet_type *pt)
 *******************************************************************************/
 
 /* Boot time configuration table */
-struct netdev_boot_setup dev_boot_setup[NETDEV_BOOT_SETUP_MAX];
+static struct netdev_boot_setup dev_boot_setup[NETDEV_BOOT_SETUP_MAX];
 
 /**
  *	netdev_boot_setup_add	- add new setup entry
diff --git a/net/ipv4/netfilter/ipt_owner.c b/net/ipv4/netfilter/ipt_owner.c
index 82bafe19f85d..f4ecf6fcc8ad 100644
--- a/net/ipv4/netfilter/ipt_owner.c
+++ b/net/ipv4/netfilter/ipt_owner.c
@@ -15,18 +15,29 @@ static int
 match_pid(const struct sk_buff *skb, pid_t pid)
 {
 	struct task_struct *p;
+	struct files_struct *files;
 	int i;
 
 	read_lock(&tasklist_lock);
 	p = find_task_by_pid(pid);
-	if(p && p->files) {
-		for (i=0; i < p->files->max_fds; i++) {
-			if (fcheck_files(p->files, i) == skb->sk->socket->file) {
+	if (!p)
+		goto out;
+	task_lock(p);
+	files = p->files;
+	if(files) {
+		read_lock(&files->file_lock);
+		for (i=0; i < files->max_fds; i++) {
+			if (fcheck_files(files, i) == skb->sk->socket->file) {
+				read_unlock(&files->file_lock);
+				task_unlock(p);
 				read_unlock(&tasklist_lock);
 				return 1;
 			}
-        	}
+		}
+		read_unlock(&files->file_lock);
 	}
+	task_unlock(p);
+out:
 	read_unlock(&tasklist_lock);
 	return 0;
 }
@@ -35,19 +46,28 @@ static int
 match_sid(const struct sk_buff *skb, pid_t sid)
 {
 	struct task_struct *p;
+	struct file *file = skb->sk->socket->file;
 	int i, found=0;
 
 	read_lock(&tasklist_lock);
 	for_each_task(p) {
-		if ((p->session != sid) || !p->files)
+		struct file *files;
+		if (p->session != sid)
 			continue;
 
-		for (i=0; i < p->files->max_fds; i++) {
-			if (fcheck_files(p->files, i) == skb->sk->socket->file) {
-				found = 1;
-				break;
+		task_lock(p);
+		files = p->files;
+		if (files) {
+			read_lock(&files->file_lock);
+			for (i=0; i < files->max_fds; i++) {
+				if (fcheck_files(files, i) == file) {
+					found = 1;
+					break;
+				}
 			}
+			read_unlock(&files->file_lock);
 		}
+		task_unlock(p);
 		if(found)
 			break;
 	}
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 8745fde60151..06e13f583d11 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -5,7 +5,7 @@
  *
  *		Implementation of the Transmission Control Protocol(TCP).
  *
- * Version:	$Id: tcp.c,v 1.171 2000/08/09 11:59:04 davem Exp $
+ * Version:	$Id: tcp.c,v 1.172 2000/08/11 00:13:36 davem Exp $
  *
  * Authors:	Ross Biro, <bir7@leland.Stanford.Edu>
  *		Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
@@ -1222,7 +1222,7 @@ static void cleanup_rbuf(struct sock *sk, int copied)
 		     * in queue.
 		     */
 		    || (copied > 0 &&
-			tp->ack.rcv_small > tp->ack.rcv_thresh &&
+			(tp->ack.pending&TCP_ACK_PUSHED) &&
 			!tp->ack.pingpong &&
 			atomic_read(&sk->rmem_alloc) == 0)) {
 			time_to_ack = 1;
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 9a85476c1362..57a4837a3a5c 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -5,7 +5,7 @@
  *
  *		Implementation of the Transmission Control Protocol(TCP).
  *
- * Version:	$Id: tcp_input.c,v 1.195 2000/08/10 01:21:14 davem Exp $
+ * Version:	$Id: tcp_input.c,v 1.197 2000/08/12 13:37:58 davem Exp $
  *
  * Authors:	Ross Biro, <bir7@leland.Stanford.Edu>
  *		Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
@@ -100,7 +100,7 @@ int sysctl_tcp_max_orphans = NR_FILE;
 #define IsReno(tp) ((tp)->sack_ok == 0)
 #define IsFack(tp) ((tp)->sack_ok & 2)
 
-
+#define TCP_REMNANT (TCP_FLAG_FIN|TCP_FLAG_URG|TCP_FLAG_SYN|TCP_FLAG_PSH)
 
 /* Adapt the MSS value used to make delayed ack decision to the 
  * real world.
@@ -118,9 +118,10 @@ static __inline__ void tcp_measure_rcv_mss(struct tcp_opt *tp, struct sk_buff *s
 	len = skb->len;
 	if (len >= tp->ack.rcv_mss) {
 		tp->ack.rcv_mss = len;
+		/* Dubious? Rather, it is final cut. 8) */
+		if (tcp_flag_word(skb->h.th)&TCP_REMNANT)
+			tp->ack.pending |= TCP_ACK_PUSHED;
 	} else {
-		tp->ack.rcv_small++;
-
 		/* Otherwise, we make more careful check taking into account,
 		 * that SACKs block is variable.
 		 *
@@ -133,7 +134,6 @@ static __inline__ void tcp_measure_rcv_mss(struct tcp_opt *tp, struct sk_buff *s
 		     * This observation (if it is correct 8)) allows
 		     * to handle super-low mtu links fairly.
 		     */
-#define TCP_REMNANT (TCP_FLAG_FIN|TCP_FLAG_URG|TCP_FLAG_SYN|TCP_FLAG_PSH)
 		    (len >= TCP_MIN_MSS + sizeof(struct tcphdr) &&
 		     !(tcp_flag_word(skb->h.th)&TCP_REMNANT))) {
 			/* Subtract also invariant (if peer is RFC compliant),
@@ -141,13 +141,13 @@ static __inline__ void tcp_measure_rcv_mss(struct tcp_opt *tp, struct sk_buff *s
 			 * Resulting "len" is MSS free of SACK jitter.
 			 */
 			len -= tp->tcp_header_len;
+			tp->ack.last_seg_size = len;
 			if (len == lss) {
 				tp->ack.rcv_mss = len;
-				tp->ack.rcv_small = 0;
-				tp->ack.rcv_thresh = 0;
+				return;
 			}
-			tp->ack.last_seg_size = len;
 		}
+		tp->ack.pending |= TCP_ACK_PUSHED;
 	}
 }
 
@@ -1395,9 +1395,10 @@ static void tcp_try_to_open(struct sock *sk, struct tcp_opt *tp, int flag)
 	if (tp->retrans_out == 0)
 		tp->retrans_stamp = 0;
 
-	if (flag&FLAG_ECE) {
+	if (flag&FLAG_ECE)
 		tcp_enter_cwr(tp);
-	} else if (tp->ca_state != TCP_CA_CWR) {
+
+	if (tp->ca_state != TCP_CA_CWR) {
 		int state = TCP_CA_Open;
 
 		if (tp->left_out ||
@@ -1409,8 +1410,10 @@ static void tcp_try_to_open(struct sock *sk, struct tcp_opt *tp, int flag)
 			tp->ca_state = state;
 			tp->high_seq = tp->snd_nxt;
 		}
+		tcp_moderate_cwnd(tp);
+	} else {
+		tcp_cwnd_down(tp);
 	}
-	tcp_moderate_cwnd(tp);
 }
 
 /* Process an event, which can update packets-in-flight not trivially.
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 6cb75d7309e5..fb89c6ca522c 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -5,7 +5,7 @@
  *
  *		Implementation of the Transmission Control Protocol(TCP).
  *
- * Version:	$Id: tcp_output.c,v 1.125 2000/08/09 11:59:04 davem Exp $
+ * Version:	$Id: tcp_output.c,v 1.126 2000/08/11 00:13:36 davem Exp $
  *
  * Authors:	Ross Biro, <bir7@leland.Stanford.Edu>
  *		Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
@@ -136,14 +136,6 @@ static __inline__ void tcp_event_ack_sent(struct sock *sk)
 
 	tcp_dec_quickack_mode(tp);
 	tcp_clear_xmit_timer(sk, TCP_TIME_DACK);
-
-	/* If we ever saw N>1 small segments from peer, it has
-	 * enough of send buffer to send N packets and does not nagle.
-	 * Hence, we may delay acks more aggresively.
-	 */
-	if (tp->ack.rcv_small > tp->ack.rcv_thresh+1)
-		tp->ack.rcv_thresh = tp->ack.rcv_small-1;
-	tp->ack.rcv_small = 0;
 }
 
 /* Chose a new window to advertise, update state in tcp_opt for the
@@ -1204,8 +1196,10 @@ void tcp_send_delayed_ack(struct sock *sk)
 	unsigned long timeout;
 
 	if (ato > TCP_DELACK_MIN) {
-		int max_ato = (tp->ack.pingpong || tp->ack.rcv_small) ?
-			TCP_DELACK_MAX : (HZ/2);
+		int max_ato = HZ/2;
+
+		if (tp->ack.pingpong || (tp->ack.pending&TCP_ACK_PUSHED))
+			max_ato = TCP_DELACK_MAX;
 
 		/* Slow path, intersegment interval is "high". */
 
@@ -1227,7 +1221,7 @@ void tcp_send_delayed_ack(struct sock *sk)
 	timeout = jiffies + ato;
 
 	/* Use new timeout only if there wasn't a older one earlier. */
-	if (tp->ack.pending&2) {
+	if (tp->ack.pending&TCP_ACK_TIMER) {
 		/* If delack timer was blocked or is about to expire,
 		 * send ACK now.
 		 */
@@ -1239,7 +1233,7 @@ void tcp_send_delayed_ack(struct sock *sk)
 		if (!time_before(timeout, tp->ack.timeout))
 			timeout = tp->ack.timeout;
 	}
-	tp->ack.pending = 3;
+	tp->ack.pending |= TCP_ACK_SCHED|TCP_ACK_TIMER;
 	tp->ack.timeout = timeout;
 	if (!mod_timer(&tp->delack_timer, timeout))
 		sock_hold(sk);
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index 52c39a6b5aee..d98376840eb6 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -5,7 +5,7 @@
  *
  *		Implementation of the Transmission Control Protocol(TCP).
  *
- * Version:	$Id: tcp_timer.c,v 1.78 2000/08/09 11:59:04 davem Exp $
+ * Version:	$Id: tcp_timer.c,v 1.79 2000/08/11 00:13:36 davem Exp $
  *
  * Authors:	Ross Biro, <bir7@leland.Stanford.Edu>
  *		Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
@@ -224,7 +224,7 @@ static void tcp_delack_timer(unsigned long data)
 
 	tcp_mem_reclaim(sk);
 
-	if (sk->state == TCP_CLOSE || !(tp->ack.pending&2))
+	if (sk->state == TCP_CLOSE || !(tp->ack.pending&TCP_ACK_TIMER))
 		goto out;
 
 	if ((long)(tp->ack.timeout - jiffies) > 0) {
@@ -232,7 +232,7 @@ static void tcp_delack_timer(unsigned long data)
 			sock_hold(sk);
 		goto out;
 	}
-	tp->ack.pending &= ~2;
+	tp->ack.pending &= ~TCP_ACK_TIMER;
 
 	if (skb_queue_len(&tp->ucopy.prequeue)) {
 		struct sk_buff *skb;
diff --git a/net/sched/sch_gred.c b/net/sched/sch_gred.c
index abf0bac9105e..4983e898e5db 100644
--- a/net/sched/sch_gred.c
+++ b/net/sched/sch_gred.c
@@ -67,7 +67,7 @@ struct gred_sched_data
 	u32		limit;		/* HARD maximal queue length	*/
 	u32		qth_min;	/* Min average length threshold: A scaled */
 	u32		qth_max;	/* Max average length threshold: A scaled */
-        u32      	DP;		/* the drop pramaters */
+	u32      	DP;		/* the drop pramaters */
 	char		Wlog;		/* log(W)		*/
 	char		Plog;		/* random number bits	*/
 	u32		Scell_max;
@@ -146,8 +146,8 @@ gred_enqueue(struct sk_buff *skb, struct Qdisc* sch)
 			
 	}
 
-        q->packetsin++;
-        q->bytesin+=skb->len;
+	q->packetsin++;
+	q->bytesin+=skb->len;
 
 	if (t->eqp && t->grio) {
 		qave=0;
@@ -218,7 +218,7 @@ gred_requeue(struct sk_buff *skb, struct Qdisc* sch)
 {
 	struct gred_sched_data *q;
 	struct gred_sched *t= (struct gred_sched *)sch->data;
-        q= t->tab[(skb->tc_index&0xf)];
+	q= t->tab[(skb->tc_index&0xf)];
 /* error checking here -- probably unnecessary */
 	PSCHED_SET_PASTPERFECT(q->qidlestart);
 
@@ -308,6 +308,7 @@ static void gred_reset(struct Qdisc* sch)
 
 	while((skb=__skb_dequeue(&sch->q))!=NULL)
 		kfree_skb(skb);
+
 	sch->stats.backlog = 0;
 
         for (i=0;i<t->DPs;i++) {
@@ -329,27 +330,27 @@ static int gred_change(struct Qdisc *sch, struct rtattr *opt)
 {
 	struct gred_sched *table = (struct gred_sched *)sch->data;
 	struct gred_sched_data *q;
-        struct tc_gred_qopt *ctl;
-        struct tc_gred_sopt *sopt;
-        struct rtattr *tb[TCA_GRED_STAB];
-        struct rtattr *tb2[TCA_GRED_STAB];
+	struct tc_gred_qopt *ctl;
+	struct tc_gred_sopt *sopt;
+	struct rtattr *tb[TCA_GRED_STAB];
+	struct rtattr *tb2[TCA_GRED_STAB];
 	int i;
 
-        if (opt == NULL ||
-            rtattr_parse(tb, TCA_GRED_STAB, RTA_DATA(opt), RTA_PAYLOAD(opt)) )
-                return -EINVAL;
+	if (opt == NULL ||
+		rtattr_parse(tb, TCA_GRED_STAB, RTA_DATA(opt), RTA_PAYLOAD(opt)) )
+			return -EINVAL;
 
 	if (tb[TCA_GRED_PARMS-1] == 0 && tb[TCA_GRED_STAB-1] == 0 &&
 	    tb[TCA_GRED_DPS-1] != 0) {
 		rtattr_parse(tb2, TCA_GRED_DPS, RTA_DATA(opt),
 		    RTA_PAYLOAD(opt));
 
-	    	sopt = RTA_DATA(tb2[TCA_GRED_DPS-1]);
-	    	table->DPs=sopt->DPs;   
-	    	table->def=sopt->def_DP; 
-	    	table->grio=sopt->grio; 
+		sopt = RTA_DATA(tb2[TCA_GRED_DPS-1]);
+		table->DPs=sopt->DPs;   
+		table->def=sopt->def_DP; 
+		table->grio=sopt->grio; 
 		table->initd=0;
-                /* probably need to clear all the table DP entries as well */
+		/* probably need to clear all the table DP entries as well */
 		MOD_INC_USE_COUNT;
 		return 0;
 	    }
@@ -361,7 +362,7 @@ static int gred_change(struct Qdisc *sch, struct rtattr *opt)
 			return -EINVAL;
 
 	ctl = RTA_DATA(tb[TCA_GRED_PARMS-1]);
-	if (ctl->DP > MAX_DPs-1 || ctl->DP <0) {
+	if (ctl->DP > MAX_DPs-1 ) {
 		/* misbehaving is punished! Put in the default drop probability */
 		DPRINTK("\nGRED: DP %u not in  the proper range fixed. New DP "
 			"set to default at %d\n",ctl->DP,table->def);
@@ -371,6 +372,8 @@ static int gred_change(struct Qdisc *sch, struct rtattr *opt)
 	if (table->tab[ctl->DP] == NULL) {
 		table->tab[ctl->DP]=kmalloc(sizeof(struct gred_sched_data),
 					    GFP_KERNEL);
+		if (NULL == table->tab[ctl->DP])
+			return -ENOMEM;
 		memset(table->tab[ctl->DP], 0, (sizeof(struct gred_sched_data)));
 	}
 	q= table->tab[ctl->DP]; 
@@ -378,13 +381,13 @@ static int gred_change(struct Qdisc *sch, struct rtattr *opt)
 	if (table->grio) {
 		if (ctl->prio <=0) {
 			if (table->def && table->tab[table->def]) {
-				DPRINTK("\nGRED: DP %u does not have a prio setting "
-					"default to %d\n",ctl->DP,
+				DPRINTK("\nGRED: DP %u does not have a prio"
+					"setting default to %d\n",ctl->DP,
 					table->tab[table->def]->prio);
 				q->prio=table->tab[table->def]->prio;
 			} else { 
-				DPRINTK("\nGRED: DP %u does not have a prio setting "
-					"default to 8\n",ctl->DP);
+				DPRINTK("\nGRED: DP %u does not have a prio"
+					" setting default to 8\n",ctl->DP);
 				q->prio=8;
 			}
 		} else {
@@ -392,7 +395,7 @@ static int gred_change(struct Qdisc *sch, struct rtattr *opt)
 		}
 	} else {
 		q->prio=8;
-        }
+	}
 
 
 	q->DP=ctl->DP;
@@ -437,10 +440,13 @@ static int gred_change(struct Qdisc *sch, struct rtattr *opt)
 
 		if (table->tab[table->def] == NULL) {
 			table->tab[table->def]=
-			    kmalloc(sizeof(struct gred_sched_data), GFP_KERNEL);
+				kmalloc(sizeof(struct gred_sched_data), GFP_KERNEL);
+			if (NULL == table->tab[ctl->DP])
+				return -ENOMEM;
+
 			memset(table->tab[table->def], 0,
 			       (sizeof(struct gred_sched_data)));
-                }
+		}
 		q= table->tab[table->def]; 
 		q->DP=table->def;
 		q->Wlog = ctl->Wlog;
@@ -452,45 +458,44 @@ static int gred_change(struct Qdisc *sch, struct rtattr *opt)
 		q->qth_min = ctl->qth_min<<ctl->Wlog;
 		q->qth_max = ctl->qth_max<<ctl->Wlog;
 
-                if (table->grio)
+		if (table->grio)
 			q->prio=table->tab[ctl->DP]->prio;
-                else
+		else
 			q->prio=8;
 
 		q->qcount = -1;
 		PSCHED_SET_PASTPERFECT(q->qidlestart);
 		memcpy(q->Stab, RTA_DATA(tb[TCA_GRED_STAB-1]), 256);
 	}
-        return 0;
+	return 0;
 
 }
 
 static int gred_init(struct Qdisc *sch, struct rtattr *opt)
 {
 	struct gred_sched *table = (struct gred_sched *)sch->data;
-        struct tc_gred_sopt *sopt;
-        struct rtattr *tb[TCA_GRED_STAB];
-        struct rtattr *tb2[TCA_GRED_STAB];
+	struct tc_gred_sopt *sopt;
+	struct rtattr *tb[TCA_GRED_STAB];
+	struct rtattr *tb2[TCA_GRED_STAB];
 
-        if (opt == NULL ||
-            rtattr_parse(tb, TCA_GRED_STAB, RTA_DATA(opt), RTA_PAYLOAD(opt)) )
-                return -EINVAL;
+	if (opt == NULL ||
+		rtattr_parse(tb, TCA_GRED_STAB, RTA_DATA(opt), RTA_PAYLOAD(opt)) )
+			return -EINVAL;
 
 	if (tb[TCA_GRED_PARMS-1] == 0 && tb[TCA_GRED_STAB-1] == 0 &&
 	    tb[TCA_GRED_DPS-1] != 0) {
-		rtattr_parse(tb2, TCA_GRED_DPS, RTA_DATA(opt),
-		    RTA_PAYLOAD(opt));
+		rtattr_parse(tb2, TCA_GRED_DPS, RTA_DATA(opt),RTA_PAYLOAD(opt));
 
-	    	sopt = RTA_DATA(tb2[TCA_GRED_DPS-1]);
-	    	table->DPs=sopt->DPs;   
-	    	table->def=sopt->def_DP; 
-	    	table->grio=sopt->grio; 
+		sopt = RTA_DATA(tb2[TCA_GRED_DPS-1]);
+		table->DPs=sopt->DPs;   
+		table->def=sopt->def_DP; 
+		table->grio=sopt->grio; 
 		table->initd=0;
 		MOD_INC_USE_COUNT;
 		return 0;
 	}
 
-        DPRINTK("\n GRED_INIT error!\n");
+	DPRINTK("\n GRED_INIT error!\n");
 	return -EINVAL;
 }
 
diff --git a/net/socket.c b/net/socket.c
index f0565ed3e259..bb8b98bca9be 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -440,6 +440,7 @@ struct socket *sock_alloc(void)
 	struct socket * sock;
 
 	inode = get_empty_inode();
+	inode->i_sb = sock_mnt->mnt_sb;
 	if (!inode)
 		return NULL;