From c72adc34d37449446195965a9e3834dfe9c50805 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Fri, 23 Nov 2007 15:11:03 -0500 Subject: [PATCH] Import pre2.0.9 --- Documentation/ioctl-number.txt | 2 +- MAINTAINERS | 6 + Makefile | 2 +- arch/i386/kernel/time.c | 179 ++++++++++++---- arch/ppc/boot/mk_type41.c | 4 +- arch/ppc/kernel/include/elf/ChangeLog | 4 +- arch/ppc/kernel/ppc_machine.h | 2 +- arch/ppc/kernel/process.c | 2 +- arch/ppc/kernel/support.c | 2 +- arch/ppc/kernel/syscalls.c | 2 +- drivers/char/rtc.c | 7 +- drivers/net/eexpress.c | 23 ++- drivers/net/ne.c | 12 +- drivers/net/ppp.c | 2 +- drivers/scsi/scsi.c | 13 +- drivers/scsi/scsi_ioctl.c | 2 +- fs/isofs/util.c | 2 +- fs/nfs/dir.c | 8 +- fs/read_write.c | 4 +- include/asm-alpha/bitops.h | 6 +- include/asm-i386/checksum.h | 2 +- include/asm-i386/system.h | 15 +- include/asm-ppc/posix_types.h | 4 +- include/linux/mc146818rtc.h | 40 ++-- kernel/ksyms.c | 1 - mm/filemap.c | 286 ++++++++++++++++---------- mm/kmalloc.c | 41 +++- mm/page_io.c | 8 +- mm/swapfile.c | 23 ++- net/ipv4/ip_fw.c | 8 +- 30 files changed, 478 insertions(+), 234 deletions(-) diff --git a/Documentation/ioctl-number.txt b/Documentation/ioctl-number.txt index 0c8d9e048779..5ca0c696e48e 100644 --- a/Documentation/ioctl-number.txt +++ b/Documentation/ioctl-number.txt @@ -55,7 +55,6 @@ This table is current to Linux 1.3.98. Ioctl Include File Comments ======================================================== 0x00 linux/fs.h only FIBMAP, FIGETBSZ -0x00 linux/mc146818rtc.h conflict! 0x02 linux/fd.h 0x03 linux/hdreg.h 0x04 linux/umsdos_fs.h @@ -90,6 +89,7 @@ Ioctl Include File Comments 'm' linux/mtio.h conflict! 'm' linux/soundcard.h conflict! 'n' linux/ncp_fs.h +'p' linux/mc146818rtc.h 'r' linux/msdos_fs.h 's' linux/cdk.h 't' linux/if_ppp.h no conflict diff --git a/MAINTAINERS b/MAINTAINERS index 83cc2b95bc81..100bbca09915 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -68,6 +68,12 @@ S: Status, one of the following: it has been replaced by a better system and you should be using that. +EXT2 FILE SYSTEM +P: Remy Card +M: Remy.Card@linux.org +L: linux-kernel@vger.rutgers.edu +S: Maintained + 3C501 NETWORK DRIVER P: Alan Cox M: net-patches@lxorguk.ukuu.org.uk diff --git a/Makefile b/Makefile index b3ba1c4638f1..b00bfd888d4e 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ VERSION = 1 PATCHLEVEL = 99 -SUBLEVEL = 8 +SUBLEVEL = 9 ARCH = i386 diff --git a/arch/i386/kernel/time.c b/arch/i386/kernel/time.c index 1b75c30d49b0..e2d268971959 100644 --- a/arch/i386/kernel/time.c +++ b/arch/i386/kernel/time.c @@ -10,6 +10,8 @@ * 1995-03-26 Markus Kuhn * fixed 500 ms bug at call to set_rtc_mmss, fixed DS12887 * precision CMOS clock update + * 1996-05-03 Ingo Molnar + * fixed time warps in do_[slow|fast]_gettimeoffset() */ #include #include @@ -31,57 +33,102 @@ extern int setup_x86_irq(int, struct irqaction *); #ifndef CONFIG_APM /* cycle counter may be unreliable */ /* Cycle counter value at the previous timer interrupt.. */ -static unsigned long long last_timer_cc = 0; -static unsigned long long init_timer_cc = 0; +static struct { + unsigned long low; + unsigned long high; +} init_timer_cc, last_timer_cc; +/* + * This is more assembly than C, but it's also rather + * timing-critical and we have to use assembler to get + * reasonable 64-bit arithmetic + */ static unsigned long do_fast_gettimeoffset(void) { - unsigned long time_low, time_high; - unsigned long quotient, remainder; - - /* Get last timer tick in absolute kernel time */ - __asm__("subl %2,%0\n\t" - "sbbl %3,%1" - :"=r" (time_low), "=r" (time_high) - :"m" (*(0+(long *)&init_timer_cc)), - "m" (*(1+(long *)&init_timer_cc)), - "0" (*(0+(long *)&last_timer_cc)), - "1" (*(1+(long *)&last_timer_cc))); - /* - * Divide the 64-bit time with the 32-bit jiffy counter, - * getting the quotient in clocks. - * - * Giving quotient = "average internal clocks per jiffy" - */ - __asm__("divl %2" - :"=a" (quotient), "=d" (remainder) - :"r" (jiffies), - "0" (time_low), "1" (time_high)); + register unsigned long eax asm("ax"); + register unsigned long edx asm("dx"); + unsigned long tmp, quotient, low_timer, missing_time; + + /* Last jiffie when do_fast_gettimeoffset() was called.. */ + static unsigned long last_jiffies=0; + + /* Cached "clocks per usec" value.. */ + static unsigned long cached_quotient=0; + + /* The "clocks per usec" value is calculated once each jiffie */ + tmp = jiffies; + quotient = cached_quotient; + low_timer = last_timer_cc.low; + missing_time = 0; + if (last_jiffies != tmp) { + last_jiffies = tmp; + /* + * test for hanging bottom handler (this means xtime is not + * updated yet) + */ + if (test_bit(TIMER_BH, &bh_active) ) + { + missing_time = 997670/HZ; + } + + /* Get last timer tick in absolute kernel time */ + eax = low_timer; + edx = last_timer_cc.high; + __asm__("subl "SYMBOL_NAME_STR(init_timer_cc)",%0\n\t" + "sbbl "SYMBOL_NAME_STR(init_timer_cc)"+4,%1" + :"=a" (eax), "=d" (edx) + :"0" (eax), "1" (edx)); + + /* + * Divide the 64-bit time with the 32-bit jiffy counter, + * getting the quotient in clocks. + * + * Giving quotient = "average internal clocks per usec" + */ + __asm__("divl %2" + :"=a" (eax), "=d" (edx) + :"r" (tmp), + "0" (eax), "1" (edx)); + + edx = 997670/HZ; + tmp = eax; + eax = 0; + + __asm__("divl %2" + :"=a" (eax), "=d" (edx) + :"r" (tmp), + "0" (eax), "1" (edx)); + cached_quotient = eax; + quotient = eax; + } /* Read the time counter */ __asm__(".byte 0x0f,0x31" - :"=a" (time_low), "=d" (time_high)); + :"=a" (eax), "=d" (edx)); /* .. relative to previous jiffy (32 bits is enough) */ - time_low -= (unsigned long) last_timer_cc; + edx = 0; + eax -= low_timer; /* - * Time offset = (1000000/HZ * remainder) / quotient. + * Time offset = (997670/HZ * time_low) / quotient. */ - __asm__("mull %1\n\t" - "divl %2" - :"=a" (quotient), "=d" (remainder) + + __asm__("mull %2" + :"=a" (eax), "=d" (edx) :"r" (quotient), - "0" (time_low), "1" (1000000/HZ)); + "0" (eax), "1" (edx)); /* - * Due to rounding errors (and jiffies inconsistencies), + * Due to rounding errors (and jiffies inconsistencies), * we need to check the result so that we'll get a timer * that is monotonous. */ - if (quotient >= 1000000/HZ) - quotient = 1000000/HZ-1; - return quotient; + if (edx >= 997670/HZ) + edx = 997670/HZ-1; + + eax = edx + missing_time; + return eax; } #endif @@ -122,21 +169,63 @@ static unsigned long do_fast_gettimeoffset(void) static unsigned long do_slow_gettimeoffset(void) { int count; + static int count_p = 0; unsigned long offset = 0; + static unsigned long jiffies_p = 0; + + /* + * cache volatile jiffies temporaly, we have IRQs turned off. + */ + unsigned long jiffies_t; /* timer count may underflow right here */ outb_p(0x00, 0x43); /* latch the count ASAP */ count = inb_p(0x40); /* read the latched count */ count |= inb(0x40) << 8; - /* we know probability of underflow is always MUCH less than 1% */ - if (count > (LATCH - LATCH/100)) { - /* check for pending timer interrupt */ - outb_p(0x0a, 0x20); - if (inb(0x20) & 1) - offset = TICK_SIZE; - } + + jiffies_t = jiffies; + + /* + * avoiding timer inconsistencies (they are rare, but they happen)... + * there are three kinds of problems that must be avoided here: + * 1. the timer counter underflows + * 2. hardware problem with the timer, not giving us continuous time, + * the counter does small "jumps" upwards on some Pentium systems, + * thus causes time warps + * 3. we are after the timer interrupt, but the bottom half handler + * hasn't executed yet. + */ + if( count > count_p ) { + if( jiffies_t == jiffies_p ) { + if( count > LATCH-LATCH/100 ) + offset = TICK_SIZE; + else + /* + * argh, the timer is bugging we cant do nothing + * but to give the previous clock value. + */ + count = count_p; + } else { + if( test_bit(TIMER_BH, &bh_active) ) { + /* + * we have detected a counter underflow. + */ + offset = TICK_SIZE; + count_p = count; + } else { + count_p = count; + jiffies_p = jiffies_t; + } + } + } else { + count_p = count; + jiffies_p = jiffies_t; + } + + count = ((LATCH-1) - count) * TICK_SIZE; count = (count + LATCH/2) / LATCH; + return offset + count; } @@ -283,8 +372,8 @@ static void pentium_timer_interrupt(int irq, void *dev_id, struct pt_regs *regs) { /* read Pentium cycle counter */ __asm__(".byte 0x0f,0x31" - :"=a" (((unsigned long *) &last_timer_cc)[0]), - "=d" (((unsigned long *) &last_timer_cc)[1])); + :"=a" (last_timer_cc.low), + "=d" (last_timer_cc.high)); timer_interrupt(irq, NULL, regs); } #endif @@ -375,8 +464,8 @@ void time_init(void) do_gettimeoffset = do_fast_gettimeoffset; /* read Pentium cycle counter */ __asm__(".byte 0x0f,0x31" - :"=a" (((unsigned long *) &init_timer_cc)[0]), - "=d" (((unsigned long *) &init_timer_cc)[1])); + :"=a" (init_timer_cc.low), + "=d" (init_timer_cc.high)); irq0.handler = pentium_timer_interrupt; } #endif diff --git a/arch/ppc/boot/mk_type41.c b/arch/ppc/boot/mk_type41.c index aeb574cddf27..a67bf961b601 100644 --- a/arch/ppc/boot/mk_type41.c +++ b/arch/ppc/boot/mk_type41.c @@ -40,7 +40,7 @@ main(int argc, char *argv[]) } if ((out_fd = creat(argv[2], 0666)) < 0) { - fprintf(stderr, "Can't create outpue file: '%s': %s\n", argv[2], strerror(errno)); + fprintf(stderr, "Can't create output file: '%s': %s\n", argv[2], strerror(errno)); exit(2); } if (fstat(in_fd, &info) < 0) @@ -185,7 +185,7 @@ write_prep_boot_partition(int out_fd) * the next two. * - size of the diskette is (assumed to be) * (2 tracks/cylinder)(18 sectors/tracks)(80 cylinders/diskette) - * - unlike the above sector nunbers, the beginning sector is zero-based! + * - unlike the above sector numbers, the beginning sector is zero-based! */ #if 0 pe->beginning_sector = LeDword(1); diff --git a/arch/ppc/kernel/include/elf/ChangeLog b/arch/ppc/kernel/include/elf/ChangeLog index 4573e49dd3f3..b4e7bd3adde8 100644 --- a/arch/ppc/kernel/include/elf/ChangeLog +++ b/arch/ppc/kernel/include/elf/ChangeLog @@ -9,7 +9,7 @@ Wed Mar 8 18:14:37 1995 Michael Meissner Tue Feb 14 13:59:13 1995 Michael Meissner - * common.h (EM_PPC): Use offical value of 20, not 17. + * common.h (EM_PPC): Use official value of 20, not 17. (EM_PPC_OLD): Define this to be the old value of EM_PPC. @@ -108,7 +108,7 @@ Thu Apr 29 12:12:20 1993 Ken Raeburn (raeburn@deneb.cygnus.com) * common.h (EM_HPPA, NT_VERSION, STN_UNDEF, DT_*): New macros. * external.h (Elf_External_Dyn): New type. - * internal.h (Elf_Intenral_Shdr): New field `size'. + * internal.h (Elf_Internal_Shdr): New field `size'. (Elf_Internal_Dyn): New type. Tue Apr 20 16:03:45 1993 Fred Fish (fnf@cygnus.com) diff --git a/arch/ppc/kernel/ppc_machine.h b/arch/ppc/kernel/ppc_machine.h index 8bdb2bf78821..2891e5a4f1fa 100644 --- a/arch/ppc/kernel/ppc_machine.h +++ b/arch/ppc/kernel/ppc_machine.h @@ -10,7 +10,7 @@ #define MSR_TGPR (1<<17) /* TLB Update registers in use */ #define MSR_ILE (1<<16) /* Interrupt Little-Endian enable */ #define MSR_EE (1<<15) /* External Interrupt enable */ -#define MSR_PR (1<<14) /* Supervisor/User privelege */ +#define MSR_PR (1<<14) /* Supervisor/User privilege */ #define MSR_FP (1<<13) /* Floating Point enable */ #define MSR_ME (1<<12) /* Machine Check enable */ #define MSR_FE0 (1<<11) /* Floating Exception mode 0 */ diff --git a/arch/ppc/kernel/process.c b/arch/ppc/kernel/process.c index f75b65d3d7e1..0b85d43577df 100644 --- a/arch/ppc/kernel/process.c +++ b/arch/ppc/kernel/process.c @@ -2,7 +2,7 @@ * linux/arch/ppc/kernel/process.c * * Copyright (C) 1995 Linus Torvalds - * Adapted for PowerPC by Gary THomas + * Adapted for PowerPC by Gary Thomas */ /* diff --git a/arch/ppc/kernel/support.c b/arch/ppc/kernel/support.c index fb944a49c9fc..cd2b58b8a180 100644 --- a/arch/ppc/kernel/support.c +++ b/arch/ppc/kernel/support.c @@ -1,5 +1,5 @@ /* - * Miscallaneous support routines + * Miscellaneous support routines */ #include diff --git a/arch/ppc/kernel/syscalls.c b/arch/ppc/kernel/syscalls.c index 2ccb4b6d93cd..7dd34510cc8f 100644 --- a/arch/ppc/kernel/syscalls.c +++ b/arch/ppc/kernel/syscalls.c @@ -19,7 +19,7 @@ /* * sys_pipe() is the normal C calling standard for creating - * a pipe. It's not the way unix tranditionally does this, though. + * a pipe. It's not the way unix traditionally does this, though. */ asmlinkage int sys_pipe(unsigned long * fildes) { diff --git a/drivers/char/rtc.c b/drivers/char/rtc.c index 3813edc5677e..4a6ab6f9b8f2 100644 --- a/drivers/char/rtc.c +++ b/drivers/char/rtc.c @@ -30,11 +30,10 @@ * */ -#define RTC_VERSION "1.06" +#define RTC_VERSION "1.07" #define RTC_IRQ 8 /* Can't see this changing soon. */ -#define RTC_IO_BASE 0x70 /* Or this... */ -#define RTC_IO_EXTENT 0x10 /* Only really 0x70 to 0x71, but... */ +#define RTC_IO_EXTENT 0x10 /* Only really two ports, but... */ /* * Note that *all* calls to CMOS_READ and CMOS_WRITE are done with @@ -539,7 +538,7 @@ int rtc_init(void) } misc_register(&rtc_dev); /* Check region? Naaah! Just snarf it up. */ - request_region(RTC_IO_BASE, RTC_IO_EXTENT, "rtc"); + request_region(RTC_PORT(0), RTC_IO_EXTENT, "rtc"); init_timer(&rtc_irq_timer); rtc_irq_timer.function = rtc_dropped_irq; rtc_wait = NULL; diff --git a/drivers/net/eexpress.c b/drivers/net/eexpress.c index ff1e11d123aa..25bccfc20e46 100644 --- a/drivers/net/eexpress.c +++ b/drivers/net/eexpress.c @@ -1,4 +1,4 @@ -/* $Id: eexpress.c,v 1.12 1996/04/15 17:27:30 phil Exp $ +/* $Id: eexpress.c,v 1.13 1996/05/19 15:59:51 phil Exp $ * * Intel EtherExpress device driver for Linux * @@ -86,7 +86,7 @@ static char version[] = "eexpress.c: v0.10 04-May-95 John Sullivan \n" -" v0.13 10-Apr-96 Philip Blundell \n"; +" v0.14 19-May-96 Philip Blundell \n"; #include @@ -103,6 +103,7 @@ static char version[] = #include #include #include +#include #include #include @@ -1060,15 +1061,25 @@ static void eexp_hw_init586(struct device *dev) printk("%s: eexp_hw_init586()\n", dev->name); #endif - PRIV(dev)->started = 0; + lp->started = 0; set_loopback; outb(SIRQ_dis|irqrmap[dev->irq],ioaddr+SET_IRQ); outb_p(i586_RST,ioaddr+EEPROM_Ctrl); + udelay(2000); /* delay 20ms */ + { + unsigned short ofs, i; + for (ofs = 0; ofs < lp->rx_buf_end; ofs += 32) { + outw_p(ofs, ioaddr+SM_PTR); + for (i = 0; i < 16; i++) { + outw_p(0, ioaddr+SM_ADDR(i<<1)); + } + } + } outw_p(lp->rx_buf_end,ioaddr+WRITE_PTR); start_code[28] = (dev->flags & IFF_PROMISC)?(start_code[28] | 1):(start_code[28] & ~1); - PRIV(dev)->promisc = dev->flags & IFF_PROMISC; + lp->promisc = dev->flags & IFF_PROMISC; /* We may die here */ outsw(ioaddr, start_code, sizeof(start_code)>>1); outw(CONF_HW_ADDR,ioaddr+WRITE_PTR); @@ -1205,8 +1216,8 @@ static char namelist[NAMELEN * EEXP_MAX_CARDS] = { 0, }; static struct device dev_eexp[EEXP_MAX_CARDS] = { - NULL, /* will allocate dynamically */ - 0, 0, 0, 0, 0, 0, 0, 0, 0, NULL, express_probe + { NULL, /* will allocate dynamically */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, NULL, express_probe }, }; int irq[EEXP_MAX_CARDS] = {0, }; diff --git a/drivers/net/ne.c b/drivers/net/ne.c index 7d319f3ae037..6ac4c2aa6dce 100644 --- a/drivers/net/ne.c +++ b/drivers/net/ne.c @@ -229,6 +229,12 @@ static int ne_probe1(struct device *dev, int ioaddr) } } + /* We should have a "dev" from Space.c or the static module table. */ + if (dev == NULL) { + printk(KERN_ERR "ne.c: Passed a NULL device.\n"); + dev = init_etherdev(0, 0); + } + if (ei_debug && version_printed++ == 0) printk(version); @@ -346,12 +352,6 @@ static int ne_probe1(struct device *dev, int ioaddr) } - /* We should have a "dev" from Space.c or the static module table. */ - if (dev == NULL) { - printk("ne.c: Passed a NULL device.\n"); - dev = init_etherdev(0, 0); - } - if (pci_irq_line) { dev->irq = pci_irq_line; } diff --git a/drivers/net/ppp.c b/drivers/net/ppp.c index e918642cb8fa..c58dced821d1 100644 --- a/drivers/net/ppp.c +++ b/drivers/net/ppp.c @@ -6,7 +6,7 @@ * Dynamic PPP devices by Jim Freeman . * ppp_tty_receive ``noisy-raise-bug'' fixed by Ove Ewerlid * - * ==FILEVERSION 960303== + * ==FILEVERSION 960528== * * NOTE TO MAINTAINERS: * If you modify this file at all, please set the number above to the diff --git a/drivers/scsi/scsi.c b/drivers/scsi/scsi.c index f04f3f5cf374..c5a1bf7abd7e 100644 --- a/drivers/scsi/scsi.c +++ b/drivers/scsi/scsi.c @@ -580,7 +580,7 @@ int scan_scsis_single (int channel, int dev, int lun, int *max_dev_lun, printk("\n"); #endif -if (host_byte(SCpnt->result) != DID_OK) { + if (SCpnt->result) { if (((driver_byte (SCpnt->result) & DRIVER_SENSE) || (status_byte (SCpnt->result) & CHECK_CONDITION)) && ((SCpnt->sense_buffer[0] & 0x70) >> 4) == 7) { @@ -1569,9 +1569,15 @@ static void scsi_done (Scsi_Cmnd * SCpnt) case SUGGEST_IS_OK: break; case SUGGEST_REMAP: +#ifdef DEBUG + printk("SENSE SUGGEST REMAP - status = FINISHED\n"); +#endif + status = FINISHED; + exit = DRIVER_SENSE | SUGGEST_ABORT; + break; case SUGGEST_RETRY: #ifdef DEBUG - printk("SENSE SUGGEST REMAP or SUGGEST RETRY - status = MAYREDO\n"); + printk("SENSE SUGGEST RETRY - status = MAYREDO\n"); #endif status = MAYREDO; exit = DRIVER_SENSE | SUGGEST_RETRY; @@ -1606,6 +1612,9 @@ static void scsi_done (Scsi_Cmnd * SCpnt) status = REDO; break; case SUGGEST_REMAP: + status = FINISHED; + exit = DRIVER_SENSE | SUGGEST_ABORT; + break; case SUGGEST_RETRY: status = MAYREDO; exit = DRIVER_SENSE | SUGGEST_RETRY; diff --git a/drivers/scsi/scsi_ioctl.c b/drivers/scsi/scsi_ioctl.c index 42da5ecc237f..20a5fe289a1d 100644 --- a/drivers/scsi/scsi_ioctl.c +++ b/drivers/scsi/scsi_ioctl.c @@ -150,7 +150,7 @@ static int ioctl_internal_command(Scsi_Device *dev, char * cmd) result = SCpnt->result; SCpnt->request.rq_status = RQ_INACTIVE; - if(SCpnt->device->scsi_request_fn) + if (!SCpnt->device->was_reset && SCpnt->device->scsi_request_fn) (*SCpnt->device->scsi_request_fn)(); wake_up(&SCpnt->device->device_wait); diff --git a/fs/isofs/util.c b/fs/isofs/util.c index 9e6ae074dbff..b080406cda2b 100644 --- a/fs/isofs/util.c +++ b/fs/isofs/util.c @@ -6,7 +6,7 @@ * convert numbers according to section 7.3.3, etc. * * isofs special functions. This file was lifted in its entirety from - * the bsd386 iso9660 filesystem, by Pace Williamson. + * the 386bsd iso9660 filesystem, by Pace Willisson . */ int diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index bd36ea1e5f3f..f4d4d41d8c82 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -476,8 +476,12 @@ static int nfs_mkdir(struct inode *dir, const char *name, int len, int mode) sattr.atime.seconds = sattr.mtime.seconds = (unsigned) -1; error = nfs_proc_mkdir(NFS_SERVER(dir), NFS_FH(dir), name, &sattr, &fhandle, &fattr); - if (!error) - nfs_lookup_cache_add(dir, name, &fhandle, &fattr); + if (!error) { + if (fattr.fileid == dir->i_ino) + printk("Sony NewsOS 4.1R buggy nfs server?\n"); + else + nfs_lookup_cache_add(dir, name, &fhandle, &fattr); + } iput(dir); return error; } diff --git a/fs/read_write.c b/fs/read_write.c index 0aeae406a3b4..d0db64f7a274 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -100,7 +100,7 @@ asmlinkage int sys_llseek(unsigned int fd, unsigned long offset_high, return 0; } -asmlinkage int sys_read(unsigned int fd,char * buf,unsigned int count) +asmlinkage int sys_read(unsigned int fd,char * buf,int count) { int error; struct file * file; @@ -112,7 +112,7 @@ asmlinkage int sys_read(unsigned int fd,char * buf,unsigned int count) return -EBADF; if (!file->f_op || !file->f_op->read) return -EINVAL; - if (!count) + if (count <= 0) return 0; error = locks_verify_area(FLOCK_VERIFY_READ,inode,file,file->f_pos,count); if (error) diff --git a/include/asm-alpha/bitops.h b/include/asm-alpha/bitops.h index febbca5ce46f..29e4bcab062b 100644 --- a/include/asm-alpha/bitops.h +++ b/include/asm-alpha/bitops.h @@ -31,7 +31,7 @@ extern __inline__ unsigned long set_bit(unsigned long nr, void * addr) :"=&r" (temp), "=m" (*m), "=&r" (oldbit) - :"r" (1UL << (nr & 31)), + :"Ir" (1UL << (nr & 31)), "m" (*m)); return oldbit != 0; } @@ -54,7 +54,7 @@ extern __inline__ unsigned long clear_bit(unsigned long nr, void * addr) :"=&r" (temp), "=m" (*m), "=&r" (oldbit) - :"r" (1UL << (nr & 31)), + :"Ir" (1UL << (nr & 31)), "m" (*m)); return oldbit != 0; } @@ -75,7 +75,7 @@ extern __inline__ unsigned long change_bit(unsigned long nr, void * addr) :"=&r" (temp), "=m" (*m), "=&r" (oldbit) - :"r" (1UL << (nr & 31)), + :"Ir" (1UL << (nr & 31)), "m" (*m)); return oldbit != 0; } diff --git a/include/asm-i386/checksum.h b/include/asm-i386/checksum.h index cbc2615d0e07..ac49b14007d4 100644 --- a/include/asm-i386/checksum.h +++ b/include/asm-i386/checksum.h @@ -46,7 +46,7 @@ static inline unsigned short ip_fast_csum(unsigned char * iph, unsigned int ihl) { unsigned int sum; - __asm__(" + __asm__ __volatile__(" movl (%1), %0 subl $4, %2 jbe 2f diff --git a/include/asm-i386/system.h b/include/asm-i386/system.h index b383781bd95f..fd63a471621f 100644 --- a/include/asm-i386/system.h +++ b/include/asm-i386/system.h @@ -199,18 +199,21 @@ static inline unsigned long __xchg(unsigned long x, void * ptr, int size) switch (size) { case 1: __asm__("xchgb %b0,%1" - :"=&q" (x), "=m" (*__xg(ptr)) - :"0" (x), "m" (*__xg(ptr))); + :"=q" (x) + :"m" (*__xg(ptr)), "0" (x) + :"memory"); break; case 2: __asm__("xchgw %w0,%1" - :"=&r" (x), "=m" (*__xg(ptr)) - :"0" (x), "m" (*__xg(ptr))); + :"=r" (x) + :"m" (*__xg(ptr)), "0" (x) + :"memory"); break; case 4: __asm__("xchgl %0,%1" - :"=&r" (x), "=m" (*__xg(ptr)) - :"0" (x), "m" (*__xg(ptr))); + :"=r" (x) + :"m" (*__xg(ptr)), "0" (x) + :"memory"); break; } return x; diff --git a/include/asm-ppc/posix_types.h b/include/asm-ppc/posix_types.h index 7af5c0a77507..03cb50ce2003 100644 --- a/include/asm-ppc/posix_types.h +++ b/include/asm-ppc/posix_types.h @@ -1,5 +1,5 @@ #ifndef _PPC_POSIX_TYPES_H -#define _PPc_POSIX_TYPES_H +#define _PPC_POSIX_TYPES_H /* * This file is generally used by user-level software, so you need to @@ -95,4 +95,4 @@ static __inline__ void __FD_ZERO(__kernel_fd_set *p) #endif /* __GNUC__ */ -#endif /* _PPc_POSIX_TYPES_H */ +#endif /* _PPC_POSIX_TYPES_H */ diff --git a/include/linux/mc146818rtc.h b/include/linux/mc146818rtc.h index 2d438925d071..e870910c4958 100644 --- a/include/linux/mc146818rtc.h +++ b/include/linux/mc146818rtc.h @@ -107,25 +107,7 @@ outb_p((val),RTC_PORT(1)); \ #endif /* - * ioctl calls that are permitted to the /dev/rtc interface, if - * CONFIG_RTC was enabled. - */ - -#define RTC_AIE_ON 0x01 /* Alarm int. enable on */ -#define RTC_AIE_OFF 0x02 /* ... off */ -#define RTC_UIE_ON 0x03 /* Update int. enable on */ -#define RTC_UIE_OFF 0x04 /* ... off */ -#define RTC_PIE_ON 0x05 /* Periodic int. enable on */ -#define RTC_PIE_OFF 0x06 /* ... off */ -#define RTC_ALM_SET 0x07 /* Set alarm (struct tm) */ -#define RTC_ALM_READ 0x08 /* Read alarm (struct tm) */ -#define RTC_RD_TIME 0x09 /* Read RTC time (struct tm) */ -#define RTC_SET_TIME 0x0a /* Set time of RTC (not used) */ -#define RTC_IRQP_READ 0x0b /* Read periodic IRQ rate (Hz) */ -#define RTC_IRQP_SET 0x0c /* Set periodic IRQ rate (Hz) */ - -/* - * The struct used to pass data via the above ioctl. Similar to the + * The struct used to pass data via the following ioctl. Similar to the * struct tm in , but it needs to be here so that the kernel * source is self contained, allowing cross-compiles, etc. etc. */ @@ -142,4 +124,24 @@ struct rtc_time { int tm_isdst; }; +/* + * ioctl calls that are permitted to the /dev/rtc interface, if + * CONFIG_RTC was enabled. + */ + +#define RTC_AIE_ON _IO('p', 0x01) /* Alarm int. enable on */ +#define RTC_AIE_OFF _IO('p', 0x02) /* ... off */ +#define RTC_UIE_ON _IO('p', 0x03) /* Update int. enable on */ +#define RTC_UIE_OFF _IO('p', 0x04) /* ... off */ +#define RTC_PIE_ON _IO('p', 0x05) /* Periodic int. enable on */ +#define RTC_PIE_OFF _IO('p', 0x06) /* ... off */ + +#define RTC_ALM_SET _IOW('p', 0x07, struct rtc_time) /* Set alarm time */ +#define RTC_ALM_READ _IOR('p', 0x08, struct rtc_time) /* Read alarm time */ +#define RTC_RD_TIME _IOR('p', 0x09, struct rtc_time) /* Read RTC time */ +#define RTC_SET_TIME _IOW('p', 0x0a, struct rtc_time) /* Set RTC time */ +#define RTC_IRQP_READ _IOR('p', 0x0b, unsigned long) /* Read IRQ rate */ +#define RTC_IRQP_SET _IOW('p', 0x0c, unsigned long) /* Set IRQ rate */ + + #endif /* _MC146818RTC_H */ diff --git a/kernel/ksyms.c b/kernel/ksyms.c index 1e9c0973ba64..acbd730bd963 100644 --- a/kernel/ksyms.c +++ b/kernel/ksyms.c @@ -163,7 +163,6 @@ struct symbol_table symbol_table = { X(__bforget), X(ll_rw_block), X(__wait_on_buffer), - X(__wait_on_page), X(mark_buffer_uptodate), X(unlock_buffer), X(dcache_lookup), diff --git a/mm/filemap.c b/mm/filemap.c index d8a7a023c528..c7c686217387 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -41,6 +41,19 @@ struct page * page_hash_table[PAGE_HASH_SIZE]; * Simple routines for both non-shared and shared mappings. */ +/* + * This is a special fast page-free routine that _only_ works + * on page-cache pages that we are currently using. We can + * just decrement the page count, because we know that the page + * has a count > 1 (the page cache itself counts as one, and + * we're currently using it counts as one). So we don't need + * the full free_page() stuff.. + */ +static inline void release_page(struct page * page) +{ + atomic_dec(&page->count); +} + /* * Invalidate the pages of an inode, removing all pages that aren't * locked down (those are sure to be up-to-date anyway, so we shouldn't @@ -228,12 +241,9 @@ void update_vm_cache(struct inode * inode, unsigned long pos, const char * buf, len = count; page = find_page(inode, pos); if (page) { - unsigned long addr; - wait_on_page(page); - addr = page_address(page); - memcpy((void *) (offset + addr), buf, len); - free_page(addr); + memcpy((void *) (offset + page_address(page)), buf, len); + release_page(page); } count -= len; buf += len; @@ -273,7 +283,7 @@ static unsigned long try_to_read_ahead(struct inode * inode, unsigned long offse #if 1 page = find_page(inode, offset); if (page) { - page->count--; + release_page(page); return page_cache; } /* @@ -291,12 +301,15 @@ static unsigned long try_to_read_ahead(struct inode * inode, unsigned long offse /* * Wait for IO to complete on a locked page. + * + * This must be called with the caller "holding" the page, + * ie with increased "page->count" so that the page won't + * go away during the wait.. */ void __wait_on_page(struct page *page) { struct wait_queue wait = { current, NULL }; - page->count++; add_wait_queue(&page->wait, &wait); repeat: run_task_queue(&tq_disk); @@ -306,7 +319,6 @@ repeat: goto repeat; } remove_wait_queue(&page->wait, &wait); - page->count--; current->state = TASK_RUNNING; } @@ -558,9 +570,6 @@ int generic_file_read(struct inode * inode, struct file * filp, char * buf, int unsigned long pos, ppos, page_cache; int reada_ok; - if (count <= 0) - return 0; - error = 0; read = 0; page_cache = 0; @@ -608,45 +617,18 @@ int generic_file_read(struct inode * inode, struct file * filp, char * buf, int for (;;) { struct page *page; - unsigned long offset, addr, nr; if (pos >= inode->i_size) break; - offset = pos & ~PAGE_MASK; - nr = PAGE_SIZE - offset; - /* - * Try to find the data in the page cache.. - */ - page = find_page(inode, pos & PAGE_MASK); - if (page) - goto found_page; - - /* - * Ok, it wasn't cached, so we need to create a new - * page.. - */ - if (page_cache) - goto new_page; - - error = -ENOMEM; - page_cache = __get_free_page(GFP_KERNEL); - if (!page_cache) - break; - error = 0; /* - * That could have slept, so we need to check again.. + * Try to find the data in the page cache.. */ - if (pos >= inode->i_size) - break; page = find_page(inode, pos & PAGE_MASK); if (!page) - goto new_page; + goto no_cached_page; found_page: - addr = page_address(page); - if (nr > count) - nr = count; /* * Try to read ahead only if the current page is filled or being filled. * Otherwise, if we were reading ahead, decrease max read ahead size to @@ -659,15 +641,27 @@ found_page: else if (reada_ok && filp->f_ramax > MIN_READAHEAD) filp->f_ramax = MIN_READAHEAD; - if (PageLocked(page)) - __wait_on_page(page); + wait_on_page(page); if (!PageUptodate(page)) - goto read_page; + goto page_read_error; + +success: + /* + * Ok, we have the page, it's up-to-date and ok, + * so now we can finally copy it to user space... + */ + { + unsigned long offset, nr; + offset = pos & ~PAGE_MASK; + nr = PAGE_SIZE - offset; + if (nr > count) + nr = count; + if (nr > inode->i_size - pos) nr = inode->i_size - pos; - memcpy_tofs(buf, (void *) (addr + offset), nr); - free_page(addr); + memcpy_tofs(buf, (void *) (page_address(page) + offset), nr); + release_page(page); buf += nr; pos += nr; read += nr; @@ -675,13 +669,28 @@ found_page: if (count) continue; break; - + } + +no_cached_page: + /* + * Ok, it wasn't cached, so we need to create a new + * page.. + */ + if (!page_cache) { + page_cache = __get_free_page(GFP_KERNEL); + /* + * That could have slept, so go around to the + * very beginning.. + */ + if (page_cache) + continue; + error = -ENOMEM; + break; + } -new_page: /* * Ok, add the new page to the hash-queues... */ - addr = page_cache; page = mem_map + MAP_NR(page_cache); page_cache = 0; add_to_page_cache(page, inode, pos & PAGE_MASK); @@ -694,7 +703,6 @@ new_page: * identity of the reader can decide if we can read the * page or not.. */ -read_page: /* * We have to read the page. * If we were reading ahead, we had previously tried to read this page, @@ -706,12 +714,25 @@ read_page: filp->f_ramax = MIN_READAHEAD; error = inode->i_op->readpage(inode, page); + if (!error) + goto found_page; + release_page(page); + break; + +page_read_error: + /* + * We found the page, but it wasn't up-to-date. + * Try to re-read it _once_. We do this synchronously, + * because this happens only if there were errors. + */ + error = inode->i_op->readpage(inode, page); if (!error) { - if (!PageError(page)) - goto found_page; - error = -EIO; + wait_on_page(page); + if (PageUptodate(page) && !PageError(page)) + goto success; + error = -EIO; /* Some unspecified error occurred.. */ } - free_page(addr); + release_page(page); break; } @@ -729,78 +750,125 @@ read_page: } /* - * Find a cached page and wait for it to become up-to-date, return - * the page address. Increments the page count. + * Semantics for shared and private memory areas are different past the end + * of the file. A shared mapping past the last page of the file is an error + * and results in a SIGBUS, while a private mapping just maps in a zero page. + * + * The goto's are kind of ugly, but this streamlines the normal case of having + * it in the page cache, and handles the special cases reasonably without + * having a lot of duplicated code. */ -static inline unsigned long fill_page(struct inode * inode, unsigned long offset) +static unsigned long filemap_nopage(struct vm_area_struct * area, unsigned long address, int no_share) { + unsigned long offset; struct page * page; - unsigned long new_page; + struct inode * inode = area->vm_inode; + unsigned long old_page, new_page; + + new_page = 0; + offset = (address & PAGE_MASK) - area->vm_start + area->vm_offset; + if (offset >= inode->i_size && (area->vm_flags & VM_SHARED) && area->vm_mm == current->mm) + goto no_page; + /* + * Do we have something in the page cache already? + */ page = find_page(inode, offset); - if (page) - goto found_page_dont_free; + if (!page) + goto no_cached_page; + +found_page: + /* + * Ok, found a page in the page cache, now we need to check + * that it's up-to-date + */ + wait_on_page(page); + if (!PageUptodate(page)) + goto page_read_error; + +success: + /* + * Found the page, need to check sharing and possibly + * copy it over to another page.. + */ + old_page = page_address(page); + if (!no_share) { + /* + * Ok, we can share the cached page directly.. Get rid + * of any potential extra pages. + */ + if (new_page) + free_page(new_page); + + flush_page_to_ram(old_page); + return old_page; + } + + /* + * Check that we have another page to copy it over to.. + */ + if (!new_page) { + new_page = __get_free_page(GFP_KERNEL); + if (!new_page) + goto failure; + } + memcpy((void *) new_page, (void *) old_page, PAGE_SIZE); + flush_page_to_ram(new_page); + release_page(page); + return new_page; + +no_cached_page: new_page = __get_free_page(GFP_KERNEL); + if (!new_page) + goto no_page; + + /* + * During getting the above page we might have slept, + * so we need to re-check the situation with the page + * cache.. The page we just got may be useful if we + * can't share, so don't get rid of it here. + */ page = find_page(inode, offset); if (page) goto found_page; - if (!new_page) - goto failure; + + /* + * Now, create a new page-cache page from the page we got + */ page = mem_map + MAP_NR(new_page); new_page = 0; add_to_page_cache(page, inode, offset); - inode->i_op->readpage(inode, page); + + if (inode->i_op->readpage(inode, page) != 0) + goto failure; + + /* + * Do a very limited read-ahead if appropriate + */ if (PageLocked(page)) new_page = try_to_read_ahead(inode, offset + PAGE_SIZE, 0); -found_page: - if (new_page) - free_page(new_page); -found_page_dont_free: - wait_on_page(page); - if (PageUptodate(page)) { -success: - return page_address(page); - } - /* If not marked as error, try _once_ to read it again */ - if (!PageError(page)) { - inode->i_op->readpage(inode, page); - wait_on_page(page); - if (PageUptodate(page)) - goto success; - } - page->count--; -failure: - return 0; -} + goto found_page; -/* - * Semantics for shared and private memory areas are different past the end - * of the file. A shared mapping past the last page of the file is an error - * and results in a SIGBUS, while a private mapping just maps in a zero page. - */ -static unsigned long filemap_nopage(struct vm_area_struct * area, unsigned long address, int no_share) -{ - unsigned long offset; - struct inode * inode = area->vm_inode; - unsigned long page; - - offset = (address & PAGE_MASK) - area->vm_start + area->vm_offset; - if (offset >= inode->i_size && (area->vm_flags & VM_SHARED) && area->vm_mm == current->mm) - return 0; +page_read_error: + /* + * Umm, take care of errors if the page isn't up-to-date. + * Try to re-read it _once_. + */ + if (inode->i_op->readpage(inode, page) != 0) + goto failure; + if (PageError(page)) + goto failure; + if (PageUptodate(page)) + goto success; - page = fill_page(inode, offset); - if (page && no_share) { - unsigned long new_page = __get_free_page(GFP_KERNEL); - if (new_page) { - memcpy((void *) new_page, (void *) page, PAGE_SIZE); - flush_page_to_ram(new_page); - } - free_page(page); - return new_page; - } - if (page) - flush_page_to_ram(page); - return page; + /* + * Uhhuh.. Things didn't work out. Return zero to tell the + * mm layer so, possibly freeing the page cache page first. + */ +failure: + release_page(page); +no_page: + return 0; } /* diff --git a/mm/kmalloc.c b/mm/kmalloc.c index ea122b769862..fba1e260d244 100644 --- a/mm/kmalloc.c +++ b/mm/kmalloc.c @@ -171,6 +171,38 @@ struct size_descriptor sizes[] = #define BLOCKSIZE(order) (blocksize[order]) #define AREASIZE(order) (PAGE_SIZE<<(sizes[order].gfporder)) +/* + * Create a small cache of page allocations: this helps a bit with + * those pesky 8kB+ allocations for NFS when we're temporarily + * out of memory.. + * + * This is a _truly_ small cache, we just cache one single page + * order (for orders 0, 1 and 2, that is 4, 8 and 16kB on x86). + */ +#define MAX_CACHE_ORDER 3 +struct page_descriptor * kmalloc_cache[MAX_CACHE_ORDER]; + +static inline struct page_descriptor * get_kmalloc_pages(unsigned long priority, + unsigned long order, int dma) +{ + struct page_descriptor * tmp; + + tmp = (struct page_descriptor *) __get_free_pages(priority, order, dma); + if (!tmp && !dma && order < MAX_CACHE_ORDER) + tmp = xchg(kmalloc_cache+order, tmp); + return tmp; +} + +static inline void free_kmalloc_pages(struct page_descriptor * page, + unsigned long order, int dma) +{ + if (!dma && order < MAX_CACHE_ORDER) { + page = xchg(kmalloc_cache+order, page); + if (!page) + return; + } + free_pages((unsigned long) page, order); +} long kmalloc_init(long start_mem, long end_mem) { @@ -260,8 +292,7 @@ void *kmalloc(size_t size, int priority) /* sz is the size of the blocks we're dealing with */ sz = BLOCKSIZE(order); - page = (struct page_descriptor *) __get_free_pages(priority, - sizes[order].gfporder, dma); + page = get_kmalloc_pages(priority, sizes[order].gfporder, dma); if (!page) goto no_free_page; @@ -322,7 +353,7 @@ not_free_on_freelist: void kfree(void *ptr) { - int size; + int size, dma; unsigned long flags; int order; register struct block_header *p; @@ -331,10 +362,12 @@ void kfree(void *ptr) if (!ptr) return; p = ((struct block_header *) ptr) - 1; + dma = 0; page = PAGE_DESC(p); order = page->order; pg = &sizes[order].firstfree; if (p->bh_flags == MF_DMA) { + dma = 1; p->bh_flags = MF_USED; pg = &sizes[order].dmafree; } @@ -378,7 +411,7 @@ void kfree(void *ptr) pg = &tmp->next; } sizes[order].npages--; - free_pages((long) page, sizes[order].gfporder); + free_kmalloc_pages(page, sizes[order].gfporder, dma); } sizes[order].nfrees++; sizes[order].nbytesmalloced -= size; diff --git a/mm/page_io.c b/mm/page_io.c index 5e1dcf8fcf35..7c65b080622f 100644 --- a/mm/page_io.c +++ b/mm/page_io.c @@ -76,10 +76,10 @@ void rw_swap_page(int rw, unsigned long entry, char * buf, int wait) else kstat.pswpout++; page = mem_map + MAP_NR(buf); + atomic_inc(&page->count); wait_on_page(page); if (p->swap_device) { if (!wait) { - page->count++; set_bit(PG_free_after, &page->flags); set_bit(PG_decr_after, &page->flags); set_bit(PG_swap_unlock_after, &page->flags); @@ -87,6 +87,11 @@ void rw_swap_page(int rw, unsigned long entry, char * buf, int wait) nr_async_pages++; } ll_rw_page(rw,p->swap_device,offset,buf); + /* + * NOTE! We don't decrement the page count if we + * don't wait - that will happen asynchronously + * when the IO completes. + */ if (!wait) return; wait_on_page(page); @@ -130,6 +135,7 @@ void rw_swap_page(int rw, unsigned long entry, char * buf, int wait) ll_rw_swap_file(rw,swapf->i_dev, zones, i,buf); } else printk("rw_swap_page: no swap file or device\n"); + atomic_dec(&page->count); if (offset && !clear_bit(offset,p->swap_lockmap)) printk("rw_swap_page: lock already cleared\n"); wake_up(&lock_queue); diff --git a/mm/swapfile.c b/mm/swapfile.c index 52f671555908..34ac7f7ff1a3 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -318,12 +318,13 @@ asmlinkage int sys_swapoff(const char * specialfile) struct inode * inode; struct file filp; int i, type, prev; + int err; if (!suser()) return -EPERM; - i = namei(specialfile,&inode); - if (i) - return i; + err = namei(specialfile,&inode); + if (err) + return err; prev = -1; for (type = swap_list.head; type >= 0; type = swap_info[type].next) { p = swap_info + type; @@ -353,13 +354,21 @@ asmlinkage int sys_swapoff(const char * specialfile) swap_list.next = swap_list.head; } p->flags = SWP_USED; - i = try_to_unuse(type); - if (i) { + err = try_to_unuse(type); + if (err) { iput(inode); + /* re-insert swap space back into swap_list */ + for (prev = -1, i = swap_list.head; i >= 0; prev = i, i = swap_info[i].next) + if (p->prio >= swap_info[i].prio) + break; + p->next = i; + if (prev < 0) + swap_list.head = swap_list.next = p - swap_info; + else + swap_info[prev].next = p - swap_info; p->flags = SWP_WRITEOK; - return i; + return err; } - if(p->swap_device){ memset(&filp, 0, sizeof(filp)); filp.f_inode = inode; diff --git a/net/ipv4/ip_fw.c b/net/ipv4/ip_fw.c index 63d0bca55c68..4e8f8e2c3791 100644 --- a/net/ipv4/ip_fw.c +++ b/net/ipv4/ip_fw.c @@ -541,7 +541,13 @@ int ip_fw_chk(struct iphdr *ip, struct device *rif, __u16 *redirport, struct ip_ #ifdef CONFIG_IP_TRANSPARENT_PROXY if (policy&IP_FW_F_REDIR) { if (redirport) - *redirport = htons(f->fw_pts[f->fw_nsp+f->fw_ndp]); + if ((*redirport = htons(f->fw_pts[f->fw_nsp+f->fw_ndp])) == 0) { + /* Wildcard redirection. + * Note that redirport will become + * 0xFFFF for non-TCP/UDP packets. + */ + *redirport = dst_port; + } answer = FW_REDIRECT; } else #endif -- 2.39.5