From 58cf0ac4320a67b6fa00950c2d375a816ccf3b56 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Fri, 23 Nov 2007 15:27:47 -0500 Subject: [PATCH] Import 2.3.23pre1 --- Documentation/pci.txt | 77 ++++--- Makefile | 2 +- arch/alpha/mm/init.c | 1 + arch/arm/mm/init.c | 1 + arch/i386/kernel/pci-i386.c | 54 ++--- arch/i386/kernel/pci-i386.h | 5 +- arch/i386/kernel/pci-pc.c | 77 +++++-- arch/i386/kernel/pci-visws.c | 5 + arch/i386/mm/init.c | 1 + arch/m68k/mm/init.c | 1 + arch/mips/mm/init.c | 1 + arch/ppc/mm/init.c | 1 + arch/sparc/mm/init.c | 1 + arch/sparc64/mm/init.c | 1 + drivers/block/ll_rw_blk.c | 13 +- drivers/char/Config.in | 36 ++-- drivers/pci/pci.c | 58 ++++++ drivers/pci/setup.c | 6 + drivers/sgi/char/sgiserial.c | 1 + drivers/usb/uhci.c | 2 + fs/binfmt_aout.c | 65 ++---- fs/buffer.c | 385 +++++++++++++++++++++++------------ fs/super.c | 4 +- include/asm-i386/pgtable.h | 2 +- include/linux/fs.h | 3 + include/linux/pci.h | 11 +- include/linux/pci_ids.h | 5 + kernel/ksyms.c | 6 + mm/page_io.c | 32 ++- 29 files changed, 569 insertions(+), 288 deletions(-) diff --git a/Documentation/pci.txt b/Documentation/pci.txt index 4536c87da1ce..86d0a58c43b2 100644 --- a/Documentation/pci.txt +++ b/Documentation/pci.txt @@ -4,10 +4,19 @@ "What should you avoid when writing PCI drivers" - by Martin Mares on 17-Jun-1999 + by Martin Mares on 09-Oct-1999 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +0. Structure of PCI drivers +~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Aa typical PCI device driver needs to perform the following actions: + + 1. Find PCI devices it's able to handle + 2. Enable them + 3. Access their configuration space + 4. Discover addresses and IRQ numbers + 1. How to find PCI devices ~~~~~~~~~~~~~~~~~~~~~~~~~~ In case your driver wants to search for all devices with given vendor/device @@ -19,6 +28,9 @@ ID, it should use: For class-based search, use pci_find_class(CLASS_ID, dev). + If you need to match by subsystem vendor/device ID, use +pci_find_subsys(VENDOR_ID, DEVICE_ID, SUBSYS_VENDOR_ID, SUBSYS_DEVICE_ID, dev). + You can use the constant PCI_ANY_ID as a wildcard replacement for VENDOR_ID or DEVICE_ID. This allows searching for any device from a specific vendor, for example. @@ -26,44 +38,57 @@ specific vendor, for example. In case you want to do some complex matching, look at pci_devices -- it's a linked list of pci_dev structures for all PCI devices in the system. - All these methods return a pointer to a pci_dev structure which is used as a -parameter for many other PCI functions. The rest of them accept bus and -device/function numbers which can be found in pci_dev->bus->number and -pci_dev->devfn. Feel free to use all other fields of the pci_dev structure, but -don't modify them. + The `struct pci_dev *' pointer serves as an identification of a PCI device +and is passed to all other functions operating on PCI devices. + +2. Enabling devices +~~~~~~~~~~~~~~~~~~~ + Before you do anything with the device you've found, you need to enable +it by calling pci_enable_device() which enables I/O and memory regions of +the device, assigns missing resources if needed and wakes up the device +if it was in suspended state. Please note that this function can fail. - The pci_present() function can be used to test presence of PCI in the -machine. + If you want to use the device in bus mastering mode, call pci_set_master() +which enables the bus master bit in PCI_COMMAND register and also fixes +the latency timer value if it's set to something bogus by the BIOS. -2. How to access PCI config space +3. How to access PCI config space ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ You can use pci_(read|write)_config_(byte|word|dword) to access the config -space of a device represented by pci_dev. All these functions return 0 when -successful or an error code (PCIBIOS_...) which can be translated to a text +space of a device represented by struct pci_dev *. All these functions return 0 +when successful or an error code (PCIBIOS_...) which can be translated to a text string by pcibios_strerror. Most drivers expect that accesses to valid PCI devices don't fail. - In case you want to address the devices by bus/device/function numbers, -use pcibios_(read_write)_config_(byte|word|dword). - If you access fields in the standard portion of the config header, please use symbolic names of locations and bits declared in . -3. Addresses and interrupts + If you need to access Extended PCI Capability registers, just call +pci_find_capability() for the particular capability and it will find the +corresponding register block for you. + +4. Addresses and interrupts ~~~~~~~~~~~~~~~~~~~~~~~~~~~ Memory and port addresses and interrupt numbers should NOT be read from the config space. You should use the values in the pci_dev structure as they might have been remapped by the kernel. -4. Obsolete functions -~~~~~~~~~~~~~~~~~~~~~ - is obsolete and should not be included in new code. +5. Other interesting functions +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +pci_find_slot() Find pci_dev corresponding to given bus and + slot numbers. +pci_set_power_state() Set PCI Power Management state (0=D0 ... 3=D3) -pcibios_find_(device|class) are also obsolete and should be replaced by -pci_find_(device|class). - -5. Bus mastering -~~~~~~~~~~~~~~~~ - If you need to setup a bus-mastering card, just call pci_set_master(). It -should set PCI_COMMAND_MASTER in the command register and adjust the latency -timer if needed. +6. Obsolete functions +~~~~~~~~~~~~~~~~~~~~~ +There are several functions kept only for compatibility with old drivers +not updated to the new PCI interface. Please don't use them in new code. + +pcibios_present() Since ages, you don't need to test presence + of PCI subsystem when trying to talk with it. + If it's not there, the list of PCI devices + is empty and all functions for searching for + devices just return NULL. +pcibios_(read|write)_* Superseded by their pci_(read|write)_* + counterparts. +pcibios_find_* Superseded by their pci_find_* counterparts. diff --git a/Makefile b/Makefile index 0735b7cfee74..2b862c3a1af4 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ VERSION = 2 PATCHLEVEL = 3 -SUBLEVEL = 22 +SUBLEVEL = 23 EXTRAVERSION = ARCH := $(shell uname -m | sed -e s/i.86/i386/ -e s/sun4u/sparc64/ -e s/arm.*/arm/ -e s/sa110/arm/) diff --git a/arch/alpha/mm/init.c b/arch/alpha/mm/init.c index 86cf4c925f17..0ec65b409000 100644 --- a/arch/alpha/mm/init.c +++ b/arch/alpha/mm/init.c @@ -166,6 +166,7 @@ show_mem(void) printk("%ld pages shared\n",shared); printk("%ld pages swap cached\n",cached); printk("%ld pages in page table cache\n",pgtable_cache_size); + show_buffers(); #ifdef CONFIG_NET show_net_buffers(); #endif diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c index 4fa1ca4988d9..50dc887cdb48 100644 --- a/arch/arm/mm/init.c +++ b/arch/arm/mm/init.c @@ -115,6 +115,7 @@ void show_mem(void) printk("%d free pages\n",free); printk("%d reserved pages\n",reserved); printk("%d pages shared\n",shared); + show_buffers(); #ifdef CONFIG_NET show_net_buffers(); #endif diff --git a/arch/i386/kernel/pci-i386.c b/arch/i386/kernel/pci-i386.c index af362611d71e..8e609ec36186 100644 --- a/arch/i386/kernel/pci-i386.c +++ b/arch/i386/kernel/pci-i386.c @@ -177,7 +177,7 @@ static int __init pcibios_assign_resource(struct pci_dev *dev, int i) * (4) Assign new addresses to resources which were either * not configured at all or misconfigured. If explicitly * requested by the user, configure expansion ROM address - * as well. Finally enable the I/O and Memory bits. + * as well. */ static void __init pcibios_allocate_bus_resources(struct pci_bus *bus) @@ -252,21 +252,18 @@ static void __init pcibios_allocate_resources(int pass) static void __init pcibios_assign_resources(void) { struct pci_dev *dev; - u16 cmd, old_cmd; int idx; - int fault = 0; struct resource *r; for(dev=pci_devices; dev; dev=dev->next) { - pci_read_config_word(dev, PCI_COMMAND, &cmd); - old_cmd = cmd; for(idx=0; idx<6; idx++) { r = &dev->resource[idx]; if (((dev->class >> 8) == PCI_CLASS_STORAGE_IDE && idx < 4) || - ((dev->class >> 8) == PCI_CLASS_DISPLAY_VGA && (r->flags & IORESOURCE_IO))) + ((dev->class >> 8) == PCI_CLASS_DISPLAY_VGA && (r->flags & IORESOURCE_IO)) || + !dev->class || (dev->class >> 8) == PCI_CLASS_BRIDGE_HOST) /* * Don't touch IDE controllers and I/O ports of video cards! - * Neither enable anything in their command registers. + * Also avoid classless devices and host bridges. */ continue; if (!r->start && r->end) { @@ -275,24 +272,9 @@ static void __init pcibios_assign_resources(void) * the BIOS forgot to do so or because we have decided the old * address was unusable for some reason. */ - if (pcibios_assign_resource(dev, idx) < 0) - fault = 1; - } - if (r->flags & IORESOURCE_IO) - cmd |= PCI_COMMAND_IO; - if (r->flags & IORESOURCE_MEM) - cmd |= PCI_COMMAND_MEMORY; - } - - if (cmd != old_cmd) { - if (fault) - printk("PCI: Not enabling device %s because of resource collisions\n", dev->slot_name); - else { - printk("PCI: Enabling device %s (%04x -> %04x)\n", dev->slot_name, old_cmd, cmd); - pci_write_config_word(dev, PCI_COMMAND, cmd); + pcibios_assign_resource(dev, idx); } } - if (pci_probe & PCI_ASSIGN_ROMS) { r = &dev->resource[PCI_ROM_RESOURCE]; r->end -= r->start; @@ -310,3 +292,29 @@ void __init pcibios_resource_survey(void) pcibios_allocate_resources(1); pcibios_assign_resources(); } + +int pcibios_enable_resources(struct pci_dev *dev) +{ + u16 cmd, old_cmd; + int idx; + struct resource *r; + + pci_read_config_word(dev, PCI_COMMAND, &cmd); + old_cmd = cmd; + for(idx=0; idx<6; idx++) { + r = &dev->resource[idx]; + if (!r->start && r->end) { + printk(KERN_ERR "PCI: Device %s not available because of resource collisions\n", dev->slot_name); + return -EINVAL; + } + if (r->flags & IORESOURCE_IO) + cmd |= PCI_COMMAND_IO; + if (r->flags & IORESOURCE_MEM) + cmd |= PCI_COMMAND_MEMORY; + } + if (cmd != old_cmd) { + printk("PCI: Enabling device %s (%04x -> %04x)\n", dev->slot_name, old_cmd, cmd); + pci_write_config_word(dev, PCI_COMMAND, cmd); + } + return 0; +} diff --git a/arch/i386/kernel/pci-i386.h b/arch/i386/kernel/pci-i386.h index 41ac2b8567f8..1be988b6c399 100644 --- a/arch/i386/kernel/pci-i386.h +++ b/arch/i386/kernel/pci-i386.h @@ -18,12 +18,13 @@ #define PCI_NO_SORT 0x100 #define PCI_BIOS_SORT 0x200 #define PCI_NO_CHECKS 0x400 -#define PCI_NO_PEER_FIXUP 0x800 +#define PCI_PEER_FIXUP 0x800 #define PCI_ASSIGN_ROMS 0x1000 -#define PCI_NO_IRQ_SCAN 0x2000 +#define PCI_BIOS_IRQ_SCAN 0x2000 extern unsigned int pci_probe; /* pci-i386.c */ void pcibios_resource_survey(void); +int pcibios_enable_resources(struct pci_dev *); diff --git a/arch/i386/kernel/pci-pc.c b/arch/i386/kernel/pci-pc.c index 8ce187d3f921..61d13af555ab 100644 --- a/arch/i386/kernel/pci-pc.c +++ b/arch/i386/kernel/pci-pc.c @@ -688,7 +688,7 @@ static struct irq_routing_table * __init pcibios_get_irq_routing_table(void) struct irq_routing_table *rt; int ret, map; - if (pci_probe & PCI_NO_IRQ_SCAN) + if (!(pci_probe & PCI_BIOS_IRQ_SCAN)) return NULL; pcibios_irq_page = __get_free_page(GFP_KERNEL); if (!pcibios_irq_page) @@ -868,7 +868,30 @@ static void __init pci_fixup_i450nx(struct pci_dev *d) if (suba < subb) pci_scan_bus(suba+1, pci_root->ops, NULL); /* Bus B */ } - pci_probe |= PCI_NO_PEER_FIXUP; +} + +static void __init pci_fixup_rcc(struct pci_dev *d) +{ + /* + * RCC host bridges -- Find and scan all secondary buses. + * Register 0x44 contains first, 0x45 last bus number routed there. + */ + u8 busno; + pci_read_config_byte(d, 0x44, &busno); + printk("PCI: RCC host bridge: secondary bus %02x\n", busno); + pci_scan_bus(busno, pci_root->ops, NULL); +} + +static void __init pci_fixup_compaq(struct pci_dev *d) +{ + /* + * Compaq host bridges -- Find and scan all secondary buses. + * This time registers 0xc8 and 0xc9. + */ + u8 busno; + pci_read_config_byte(d, 0xc8, &busno); + printk("PCI: Compaq host bridge: secondary bus %02x\n", busno); + pci_scan_bus(busno, pci_root->ops, NULL); } static void __init pci_fixup_umc_ide(struct pci_dev *d) @@ -905,6 +928,9 @@ static void __init pci_fixup_ide_bases(struct pci_dev *d) struct pci_fixup pcibios_fixups[] = { { PCI_FIXUP_HEADER, PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82451NX, pci_fixup_i450nx }, + { PCI_FIXUP_HEADER, PCI_VENDOR_ID_RCC, PCI_DEVICE_ID_RCC_HE, pci_fixup_rcc }, + { PCI_FIXUP_HEADER, PCI_VENDOR_ID_RCC, PCI_DEVICE_ID_RCC_LE, pci_fixup_rcc }, + { PCI_FIXUP_HEADER, PCI_VENDOR_ID_COMPAQ, PCI_DEVICE_ID_COMPAQ_6010, pci_fixup_compaq }, { PCI_FIXUP_HEADER, PCI_VENDOR_ID_UMC, PCI_DEVICE_ID_UMC_UM8886BF, pci_fixup_umc_ide }, { PCI_FIXUP_HEADER, PCI_ANY_ID, PCI_ANY_ID, pci_fixup_ide_bases }, { 0 } @@ -919,6 +945,8 @@ extern int skip_ioapic_setup; #define PIRQ_SIGNATURE (('$' << 0) + ('P' << 8) + ('I' << 16) + ('R' << 24)) #define PIRQ_VERSION 0x0100 +static struct irq_routing_table *pirq_table; + /* * Search 0xf0000 -- 0xfffff for the PCI IRQ Routing Table. */ @@ -974,7 +1002,6 @@ static void __init pcibios_irq_peer_trick(struct irq_routing_table *rt) */ if (busmap[i] && pci_scan_bus(i, pci_root->ops, NULL)) printk("PCI: Discovered primary peer bus %02x [IRQ]\n", i); - pci_probe |= PCI_NO_PEER_FIXUP; } /* @@ -982,7 +1009,7 @@ static void __init pcibios_irq_peer_trick(struct irq_routing_table *rt) * table, but unfortunately we have to know the interrupt router chip. */ -static char * __init pcibios_lookup_irq(struct pci_dev *dev, struct irq_routing_table *rt, int pin) +static char *pcibios_lookup_irq(struct pci_dev *dev, struct irq_routing_table *rt, int pin, int assign) { struct irq_info *q; struct pci_dev *router; @@ -1012,9 +1039,9 @@ static char * __init pcibios_lookup_irq(struct pci_dev *dev, struct irq_routing_ return NULL; } DBG(" -> PIRQ %02x, mask %04x", pirq, mask); - if ((dev->class >> 8) == PCI_CLASS_DISPLAY_VGA) + if (!assign || (dev->class >> 8) == PCI_CLASS_DISPLAY_VGA) newirq = 0; - else for(newirq = 15; newirq && !(mask & (1 << newirq)); newirq--) + else for(newirq = 13; newirq && !(mask & (1 << newirq)); newirq--) ; if (!(router = pci_find_slot(rt->rtr_bus, rt->rtr_devfn))) { DBG(" -> router not found\n"); @@ -1068,7 +1095,7 @@ static void __init pcibios_fixup_irqs(void) struct pci_dev *dev; u8 pin; - rtable = pcibios_find_irq_routing_table(); + rtable = pirq_table = pcibios_find_irq_routing_table(); #ifdef CONFIG_PCI_BIOS if (!rtable && pci_bios_present) rtable = pcibios_get_irq_routing_table(); @@ -1106,7 +1133,7 @@ static void __init pcibios_fixup_irqs(void) dev->irq = irq; } } - rtable = NULL; /* Avoid IRQ assignment below */ + pirq_table = NULL; /* Avoid automatic IRQ assignment */ } #endif /* @@ -1114,10 +1141,10 @@ static void __init pcibios_fixup_irqs(void) */ if (dev->irq >= NR_IRQS) dev->irq = 0; - if (pin && !dev->irq && rtable && rtable->version) { - char *msg = pcibios_lookup_irq(dev, rtable, pin); + if (pin && !dev->irq && pirq_table) { + char *msg = pcibios_lookup_irq(dev, pirq_table, pin, 0); if (msg) - printk("PCI: Assigned IRQ %d to device %s [%s]\n", dev->irq, dev->slot_name, msg); + printk("PCI: Found IRQ %d for device %s [%s]\n", dev->irq, dev->slot_name, msg); } } @@ -1173,7 +1200,7 @@ void __init pcibios_init(void) pci_scan_bus(0, ops, NULL); pcibios_fixup_irqs(); - if (!(pci_probe & PCI_NO_PEER_FIXUP)) + if (pci_probe & PCI_PEER_FIXUP) pcibios_fixup_peer_bridges(); pcibios_resource_survey(); @@ -1199,8 +1226,8 @@ char * __init pcibios_setup(char *str) } else if (!strcmp(str, "nosort")) { pci_probe |= PCI_NO_SORT; return NULL; - } else if (!strcmp(str, "noirq")) { - pci_probe |= PCI_NO_IRQ_SCAN; + } else if (!strcmp(str, "biosirq")) { + pci_probe |= PCI_BIOS_IRQ_SCAN; return NULL; } #endif @@ -1214,8 +1241,8 @@ char * __init pcibios_setup(char *str) return NULL; } #endif - else if (!strcmp(str, "nopeer")) { - pci_probe |= PCI_NO_PEER_FIXUP; + else if (!strcmp(str, "peer")) { + pci_probe |= PCI_PEER_FIXUP; return NULL; } else if (!strcmp(str, "rom")) { pci_probe |= PCI_ASSIGN_ROMS; @@ -1223,3 +1250,21 @@ char * __init pcibios_setup(char *str) } return str; } + +int pcibios_enable_device(struct pci_dev *dev) +{ + int err; + + if ((err = pcibios_enable_resources(dev)) < 0) + return err; + if (!dev->irq && pirq_table) { + u8 pin; + pci_read_config_byte(dev, PCI_INTERRUPT_PIN, &pin); + if (pin) { + char *msg = pcibios_lookup_irq(dev, pirq_table, pin, 1); + if (msg) + printk("PCI: Assigned IRQ %d to device %s [%s]\n", dev->irq, dev->slot_name, msg); + } + } + return 0; +} diff --git a/arch/i386/kernel/pci-visws.c b/arch/i386/kernel/pci-visws.c index 31a767a2226a..63620c635fac 100644 --- a/arch/i386/kernel/pci-visws.c +++ b/arch/i386/kernel/pci-visws.c @@ -129,3 +129,8 @@ char * __init pcibios_setup(char *str) { return str; } + +int pcibios_enable_device(struct pci_dev *dev) +{ + return pcibios_enable_resources(dev); +} diff --git a/arch/i386/mm/init.c b/arch/i386/mm/init.c index b1140f892ab7..984488089738 100644 --- a/arch/i386/mm/init.c +++ b/arch/i386/mm/init.c @@ -180,6 +180,7 @@ void show_mem(void) printk("%d pages shared\n",shared); printk("%d pages swap cached\n",cached); printk("%ld pages in page table cache\n",pgtable_cache_size); + show_buffers(); #ifdef CONFIG_NET show_net_buffers(); #endif diff --git a/arch/m68k/mm/init.c b/arch/m68k/mm/init.c index c22dccfc58ed..3442c1cfab38 100644 --- a/arch/m68k/mm/init.c +++ b/arch/m68k/mm/init.c @@ -112,6 +112,7 @@ void show_mem(void) printk("%d pages shared\n",shared); printk("%d pages swap cached\n",cached); printk("%ld pages in page table cache\n",pgtable_cache_size); + show_buffers(); #ifdef CONFIG_NET show_net_buffers(); #endif diff --git a/arch/mips/mm/init.c b/arch/mips/mm/init.c index 0f65b95585ed..45a8c87c6944 100644 --- a/arch/mips/mm/init.c +++ b/arch/mips/mm/init.c @@ -265,6 +265,7 @@ void show_mem(void) printk("%d pages swap cached\n",cached); printk("%ld pages in page table cache\n",pgtable_cache_size); printk("%d free pages\n", free); + show_buffers(); #ifdef CONFIG_NET show_net_buffers(); #endif diff --git a/arch/ppc/mm/init.c b/arch/ppc/mm/init.c index eb158fb1ea0f..a708c59426bf 100644 --- a/arch/ppc/mm/init.c +++ b/arch/ppc/mm/init.c @@ -289,6 +289,7 @@ void show_mem(void) printk("%d pages shared\n",shared); printk("%d pages swap cached\n",cached); printk("%d pages in page table cache\n",(int)pgtable_cache_size); + show_buffers(); #ifdef CONFIG_NET show_net_buffers(); #endif diff --git a/arch/sparc/mm/init.c b/arch/sparc/mm/init.c index 69c69d212eb0..40aab1d66398 100644 --- a/arch/sparc/mm/init.c +++ b/arch/sparc/mm/init.c @@ -106,6 +106,7 @@ void show_mem(void) printk("%ld page tables cached\n",pgtable_cache_size); if (sparc_cpu_model == sun4m || sparc_cpu_model == sun4d) printk("%ld page dirs cached\n", pgd_cache_size); + show_buffers(); #ifdef CONFIG_NET show_net_buffers(); #endif diff --git a/arch/sparc64/mm/init.c b/arch/sparc64/mm/init.c index c1d8d24aeb4f..6df374b4e8e1 100644 --- a/arch/sparc64/mm/init.c +++ b/arch/sparc64/mm/init.c @@ -150,6 +150,7 @@ void show_mem(void) #ifndef __SMP__ printk("%d entries in page dir cache\n",pgd_cache_size); #endif + show_buffers(); #ifdef CONFIG_NET show_net_buffers(); #endif diff --git a/drivers/block/ll_rw_blk.c b/drivers/block/ll_rw_blk.c index 2bb1610829dc..e4d844e48998 100644 --- a/drivers/block/ll_rw_blk.c +++ b/drivers/block/ll_rw_blk.c @@ -416,10 +416,6 @@ void make_request(int major,int rw, struct buffer_head * bh) count = bh->b_size >> 9; sector = bh->b_rsector; - /* We'd better have a real physical mapping! */ - if (!buffer_mapped(bh)) - BUG(); - /* It had better not be a new buffer by the time we see it */ if (buffer_new(bh)) BUG(); @@ -480,6 +476,13 @@ void make_request(int major,int rw, struct buffer_head * bh) goto end_io; } + /* We'd better have a real physical mapping! + Check this bit only if the buffer was dirty and just locked + down by us so at this point flushpage will block and + won't clear the mapped bit under us. */ + if (!buffer_mapped(bh)) + BUG(); + /* look for a free request. */ /* Loop uses two requests, 1 for loop and 1 for the real device. * Cut max_req in half to avoid running out and deadlocking. */ @@ -694,7 +697,7 @@ void ll_rw_block(int rw, int nr, struct buffer_head * bh[]) sorry: for (i = 0; i < nr; i++) { - clear_bit(BH_Dirty, &bh[i]->b_state); + mark_buffer_clean(bh[i]); /* remeber to refile it */ clear_bit(BH_Uptodate, &bh[i]->b_state); bh[i]->b_end_io(bh[i], 0); } diff --git a/drivers/char/Config.in b/drivers/char/Config.in index fed33b13b59e..a81cc8b586a5 100644 --- a/drivers/char/Config.in +++ b/drivers/char/Config.in @@ -10,15 +10,15 @@ if [ "$CONFIG_VT" = "y" ]; then fi tristate 'Standard/generic (dumb) serial support' CONFIG_SERIAL if [ "$CONFIG_SERIAL" = "y" ]; then - bool ' Support for console on serial port' CONFIG_SERIAL_CONSOLE + bool ' Support for console on serial port' CONFIG_SERIAL_CONSOLE fi bool 'Extended dumb serial driver options' CONFIG_SERIAL_EXTENDED if [ "$CONFIG_SERIAL_EXTENDED" = "y" ]; then - bool ' Support more than 4 serial ports' CONFIG_SERIAL_MANY_PORTS - bool ' Support for sharing serial interrupts' CONFIG_SERIAL_SHARE_IRQ - bool ' Autodetect IRQ on standard ports (unsafe)' CONFIG_SERIAL_DETECT_IRQ - bool ' Support special multiport boards' CONFIG_SERIAL_MULTIPORT - bool ' Support the Bell Technologies HUB6 card' CONFIG_HUB6 + bool ' Support more than 4 serial ports' CONFIG_SERIAL_MANY_PORTS + bool ' Support for sharing serial interrupts' CONFIG_SERIAL_SHARE_IRQ + bool ' Autodetect IRQ on standard ports (unsafe)' CONFIG_SERIAL_DETECT_IRQ + bool ' Support special multiport boards' CONFIG_SERIAL_MULTIPORT + bool ' Support the Bell Technologies HUB6 card' CONFIG_HUB6 fi bool 'Non-standard serial port support' CONFIG_SERIAL_NONSTANDARD if [ "$CONFIG_SERIAL_NONSTANDARD" = "y" ]; then @@ -86,31 +86,31 @@ tristate 'QIC-02 tape support' CONFIG_QIC02_TAPE if [ "$CONFIG_QIC02_TAPE" != "n" ]; then bool 'Do you want runtime configuration for QIC-02' CONFIG_QIC02_DYNCONF if [ "$CONFIG_QIC02_DYNCONF" != "y" ]; then - comment ' Edit configuration parameters in ./include/linux/tpqic02.h!' + comment ' Edit configuration parameters in ./include/linux/tpqic02.h!' else - comment ' Setting runtime QIC-02 configuration is done with qic02conf' - comment ' from the tpqic02-support package. It is available at' - comment ' metalab.unc.edu or ftp://titus.cfw.com/pub/Linux/util/' + comment ' Setting runtime QIC-02 configuration is done with qic02conf' + comment ' from the tpqic02-support package. It is available at' + comment ' metalab.unc.edu or ftp://titus.cfw.com/pub/Linux/util/' fi dep_tristate 'Zoran ZR36057/36060 support' CONFIG_VIDEO_ZORAN $CONFIG_VIDEO_DEV - dep_tristate ' Include support for Iomega Buz' CONFIG_VIDEO_BUZ $CONFIG_VIDEO_ZORAN + dep_tristate ' Include support for Iomega Buz' CONFIG_VIDEO_BUZ $CONFIG_VIDEO_ZORAN fi bool 'Watchdog Timer Support' CONFIG_WATCHDOG if [ "$CONFIG_WATCHDOG" != "n" ]; then mainmenu_option next_comment comment 'Watchdog Cards' - bool ' Disable watchdog shutdown on close' CONFIG_WATCHDOG_NOWAYOUT - tristate ' WDT Watchdog timer' CONFIG_WDT + bool ' Disable watchdog shutdown on close' CONFIG_WATCHDOG_NOWAYOUT + tristate ' WDT Watchdog timer' CONFIG_WDT if [ "$CONFIG_WDT" != "n" ]; then - bool ' WDT501 features' CONFIG_WDT_501 + bool ' WDT501 features' CONFIG_WDT_501 if [ "$CONFIG_WDT_501" = "y" ]; then - bool ' Fan Tachometer' CONFIG_WDT_501_FAN + bool ' Fan Tachometer' CONFIG_WDT_501_FAN fi fi - tristate ' Software Watchdog' CONFIG_SOFT_WATCHDOG - tristate ' Berkshire Products PC Watchdog' CONFIG_PCWATCHDOG - tristate ' Acquire SBC Watchdog Timer' CONFIG_ACQUIRE_WDT + tristate ' Software Watchdog' CONFIG_SOFT_WATCHDOG + tristate ' Berkshire Products PC Watchdog' CONFIG_PCWATCHDOG + tristate ' Acquire SBC Watchdog Timer' CONFIG_ACQUIRE_WDT endmenu fi diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index 5cf99152198e..7d55ba550e14 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -140,6 +140,64 @@ pci_find_parent_resource(struct pci_dev *dev, struct resource *res) return best; } +/* + * Set power management state of a device. For transitions from state D3 + * it isn't as straightforward as one could assume since many devices forget + * their configuration space during wakeup. Returns old power state. + */ +int +pci_set_power_state(struct pci_dev *dev, int new_state) +{ + u32 base[5], romaddr; + u16 pci_command, pwr_command; + u8 pci_latency, pci_cacheline; + int i, old_state; + int pm = pci_find_capability(dev, PCI_CAP_ID_PM); + + if (!pm) + return 0; + pci_read_config_word(dev, pm + PCI_PM_CTRL, &pwr_command); + old_state = pwr_command & PCI_PM_CTRL_STATE_MASK; + if (old_state == new_state) + return old_state; + DBG("PCI: %s goes from D%d to D%d\n", dev->slot_name, old_state, new_state); + if (old_state == 3) { + pci_read_config_word(dev, PCI_COMMAND, &pci_command); + pci_write_config_word(dev, PCI_COMMAND, pci_command & ~(PCI_COMMAND_IO | PCI_COMMAND_MEMORY)); + for (i = 0; i < 5; i++) + pci_read_config_dword(dev, PCI_BASE_ADDRESS_0 + i*4, &base[i]); + pci_read_config_dword(dev, PCI_ROM_ADDRESS, &romaddr); + pci_read_config_byte(dev, PCI_LATENCY_TIMER, &pci_latency); + pci_read_config_byte(dev, PCI_CACHE_LINE_SIZE, &pci_cacheline); + pci_write_config_word(dev, pm + PCI_PM_CTRL, new_state); + for (i = 0; i < 5; i++) + pci_write_config_dword(dev, PCI_BASE_ADDRESS_0 + i*4, base[i]); + pci_write_config_dword(dev, PCI_ROM_ADDRESS, romaddr); + pci_write_config_byte(dev, PCI_INTERRUPT_LINE, dev->irq); + pci_write_config_byte(dev, PCI_CACHE_LINE_SIZE, pci_cacheline); + pci_write_config_byte(dev, PCI_LATENCY_TIMER, pci_latency); + pci_write_config_word(dev, PCI_COMMAND, pci_command); + } else + pci_write_config_word(dev, pm + PCI_PM_CTRL, (pwr_command & ~PCI_PM_CTRL_STATE_MASK) | new_state); + return old_state; +} + +/* + * Initialize device before it's used by a driver. Ask low-level code + * to enable I/O and memory. Wake up the device if it was suspended. + * Beware, this function can fail. + */ +int +pci_enable_device(struct pci_dev *dev) +{ + int err; + + if ((err = pcibios_enable_device(dev)) < 0) + return err; + pci_set_power_state(dev, 0); + return 0; +} + /* * This interrupt-safe spinlock protects all accesses to PCI diff --git a/drivers/pci/setup.c b/drivers/pci/setup.c index 9c752d0defa8..930484b16c72 100644 --- a/drivers/pci/setup.c +++ b/drivers/pci/setup.c @@ -323,3 +323,9 @@ pci_fixup_irqs(u8 (*swizzle)(struct pci_dev *, u8 *), for (dev = pci_devices; dev; dev = dev->next) pdev_fixup_irq(dev, swizzle, map_irq); } + +int +pcibios_enable_device(struct pci_dev *dev) +{ + return 0; +} diff --git a/drivers/sgi/char/sgiserial.c b/drivers/sgi/char/sgiserial.c index 4d2d8867cfe9..877acef3e032 100644 --- a/drivers/sgi/char/sgiserial.c +++ b/drivers/sgi/char/sgiserial.c @@ -408,6 +408,7 @@ static _INLINE_ void receive_chars(struct sgi_serial *info, struct pt_regs *regs show_state(); return; } else if (ch == 2) { + show_buffers(); return; } /* It is a 'keyboard interrupt' ;-) */ diff --git a/drivers/usb/uhci.c b/drivers/usb/uhci.c index ae2b4ec73e34..57cf20ff6d7e 100644 --- a/drivers/usb/uhci.c +++ b/drivers/usb/uhci.c @@ -2169,6 +2169,8 @@ static int start_uhci(struct pci_dev *dev) /* disable legacy emulation */ pci_write_config_word(dev, USBLEGSUP, USBLEGSUP_DEFAULT); + pci_enable_device(dev); + return found_uhci(dev->irq, io_addr, io_size); } return -1; diff --git a/fs/binfmt_aout.c b/fs/binfmt_aout.c index cc72f4e18ae0..ca5d8e8cb446 100644 --- a/fs/binfmt_aout.c +++ b/fs/binfmt_aout.c @@ -270,7 +270,6 @@ static inline int do_load_aout_binary(struct linux_binprm * bprm, struct pt_regs unsigned long fd_offset; unsigned long rlim; int retval; - static unsigned long error_time=0; ex = *((struct exec *) bprm->buf); /* exec-header */ if ((N_MAGIC(ex) != ZMAGIC && N_MAGIC(ex) != OMAGIC && @@ -282,29 +281,6 @@ static inline int do_load_aout_binary(struct linux_binprm * bprm, struct pt_regs fd_offset = N_TXTOFF(ex); -#ifdef __i386__ - if (N_MAGIC(ex) == ZMAGIC && fd_offset != BLOCK_SIZE) { - if((jiffies-error_time) >5) - { - printk(KERN_NOTICE "N_TXTOFF != BLOCK_SIZE. See a.out.h.\n"); - error_time=jiffies; - } - return -ENOEXEC; - } - - if (N_MAGIC(ex) == ZMAGIC && ex.a_text && - bprm->dentry->d_inode->i_op && - bprm->dentry->d_inode->i_op->get_block && - (fd_offset < bprm->dentry->d_inode->i_sb->s_blocksize)) { - if((jiffies-error_time) >5) - { - printk(KERN_NOTICE "N_TXTOFF < BLOCK_SIZE. Please convert binary.\n"); - error_time=jiffies; - } - return -ENOEXEC; - } -#endif - /* Check initial limits. This avoids letting people circumvent * size limits imposed on them by creating programs with large * arrays in the data or bss. @@ -364,26 +340,32 @@ static inline int do_load_aout_binary(struct linux_binprm * bprm, struct pt_regs flush_icache_range((unsigned long) 0, (unsigned long) ex.a_text+ex.a_data); } else { + static unsigned long error_time, error_time2; if ((ex.a_text & 0xfff || ex.a_data & 0xfff) && - (N_MAGIC(ex) != NMAGIC)) + (N_MAGIC(ex) != NMAGIC) && (jiffies-error_time2) > 5*HZ) + { printk(KERN_NOTICE "executable not page aligned\n"); + error_time2 = jiffies; + } fd = open_dentry(bprm->dentry, O_RDONLY); if (fd < 0) return fd; file = fget(fd); - if ((fd_offset & ~PAGE_MASK) != 0) { + if ((fd_offset & ~PAGE_MASK) != 0 && + (jiffies-error_time) > 5*HZ) + { printk(KERN_WARNING "fd_offset is not page aligned. Please convert program: %s\n", - file->f_dentry->d_name.name - ); + file->f_dentry->d_name.name); + error_time = jiffies; } if (!file->f_op || !file->f_op->mmap || ((fd_offset & ~PAGE_MASK) != 0)) { fput(file); sys_close(fd); - do_brk(0, ex.a_text+ex.a_data); + do_brk(N_TXTADDR(ex), ex.a_text+ex.a_data); read_exec(bprm->dentry, fd_offset, (char *) N_TXTADDR(ex), ex.a_text+ex.a_data, 0); flush_icache_range((unsigned long) N_TXTADDR(ex), @@ -493,12 +475,6 @@ do_load_aout_library(int fd) goto out_putf; } - if (N_MAGIC(ex) == ZMAGIC && N_TXTOFF(ex) && - (N_TXTOFF(ex) < inode->i_sb->s_blocksize)) { - printk("N_TXTOFF < BLOCK_SIZE. Please convert library\n"); - goto out_putf; - } - if (N_FLAGS(ex)) goto out_putf; @@ -508,14 +484,17 @@ do_load_aout_library(int fd) start_addr = ex.a_entry & 0xfffff000; if ((N_TXTOFF(ex) & ~PAGE_MASK) != 0) { - printk(KERN_WARNING - "N_TXTOFF is not page aligned. Please convert library: %s\n", - file->f_dentry->d_name.name - ); - - do_mmap(NULL, start_addr & PAGE_MASK, ex.a_text + ex.a_data + ex.a_bss, - PROT_READ | PROT_WRITE | PROT_EXEC, - MAP_FIXED| MAP_PRIVATE, 0); + static unsigned long error_time; + + if ((jiffies-error_time) > 5*HZ) + { + printk(KERN_WARNING + "N_TXTOFF is not page aligned. Please convert library: %s\n", + file->f_dentry->d_name.name); + error_time = jiffies; + } + + do_brk(start_addr, ex.a_text + ex.a_data + ex.a_bss); read_exec(file->f_dentry, N_TXTOFF(ex), (char *)start_addr, ex.a_text + ex.a_data, 0); diff --git a/fs/buffer.c b/fs/buffer.c index c43c54a36b45..f3c3f11b7b1c 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -26,6 +26,8 @@ /* Thread it... -DaveM */ +/* async buffer flushing, 1999 Andrea Arcangeli */ + #include #include #include @@ -41,6 +43,7 @@ #include #include #include +#include #include #include @@ -76,6 +79,7 @@ static rwlock_t hash_table_lock = RW_LOCK_UNLOCKED; static struct buffer_head *lru_list[NR_LIST]; static spinlock_t lru_list_lock = SPIN_LOCK_UNLOCKED; static int nr_buffers_type[NR_LIST] = {0,}; +static unsigned long size_buffers_type[NR_LIST] = {0,}; static struct buffer_head * unused_list = NULL; static int nr_unused_buffer_heads = 0; @@ -114,18 +118,18 @@ union bdflush_param { each time we call refill */ int nref_dirt; /* Dirty buffer threshold for activating bdflush when trying to refill buffers. */ - int dummy1; /* unused */ + int interval; /* jiffies delay between kupdate flushes */ int age_buffer; /* Time for normal buffer to age before we flush it */ int age_super; /* Time for superblock to age before we flush it */ int dummy2; /* unused */ int dummy3; /* unused */ } b_un; unsigned int data[N_PARAM]; -} bdf_prm = {{40, 500, 64, 256, 15, 30*HZ, 5*HZ, 1884, 2}}; +} bdf_prm = {{40, 500, 64, 256, 5*HZ, 30*HZ, 5*HZ, 1884, 2}}; /* These are the min and max parameter values that we will allow to be assigned */ int bdflush_min[N_PARAM] = { 0, 10, 5, 25, 0, 1*HZ, 1*HZ, 1, 1}; -int bdflush_max[N_PARAM] = {100,50000, 20000, 20000,1000, 6000*HZ, 6000*HZ, 2047, 5}; +int bdflush_max[N_PARAM] = {100,50000, 20000, 20000,600*HZ, 6000*HZ, 6000*HZ, 2047, 5}; void wakeup_bdflush(int); @@ -482,6 +486,7 @@ static void __insert_into_lru_list(struct buffer_head * bh, int blist) (*bhp)->b_prev_free->b_next_free = bh; (*bhp)->b_prev_free = bh; nr_buffers_type[blist]++; + size_buffers_type[blist] += bh->b_size; } static void __remove_from_lru_list(struct buffer_head * bh, int blist) @@ -495,6 +500,7 @@ static void __remove_from_lru_list(struct buffer_head * bh, int blist) lru_list[blist] = NULL; bh->b_next_free = bh->b_prev_free = NULL; nr_buffers_type[blist]--; + size_buffers_type[blist] -= bh->b_size; } } @@ -813,6 +819,27 @@ out: return bh; } +/* -1 -> no need to flush + 0 -> async flush + 1 -> sync flush (wait for I/O completation) */ +static int balance_dirty_state(kdev_t dev) +{ + unsigned long dirty, tot, hard_dirty_limit, soft_dirty_limit; + + dirty = size_buffers_type[BUF_DIRTY] >> PAGE_SHIFT; + tot = nr_lru_pages + nr_free_pages - nr_free_bigpages; + hard_dirty_limit = tot * bdf_prm.b_un.nfract / 100; + soft_dirty_limit = hard_dirty_limit >> 1; + + if (dirty > soft_dirty_limit) + { + if (dirty > hard_dirty_limit) + return 1; + return 0; + } + return -1; +} + /* * if a new dirty buffer is created we need to balance bdflush. * @@ -820,23 +847,13 @@ out: * pressures on different devices - thus the (currently unused) * 'dev' parameter. */ -static int too_many_dirty_buffers; - void balance_dirty(kdev_t dev) { - int dirty = nr_buffers_type[BUF_DIRTY]; - int ndirty = bdf_prm.b_un.ndirty; - - if (dirty > ndirty) { - if (dirty > 2*ndirty) { - too_many_dirty_buffers = 1; - wakeup_bdflush(1); - return; - } - wakeup_bdflush(0); - } - too_many_dirty_buffers = 0; - return; + int state = balance_dirty_state(dev); + + if (state < 0) + return; + wakeup_bdflush(state); } static inline void __mark_dirty(struct buffer_head *bh, int flag) @@ -1364,6 +1381,7 @@ int block_write_partial_page(struct file *file, struct page *page, unsigned long unsigned long bbits, blocks, i, len; struct buffer_head *bh, *head; char * target_buf; + int need_balance_dirty; target_buf = (char *)page_address(page) + offset; @@ -1403,6 +1421,7 @@ int block_write_partial_page(struct file *file, struct page *page, unsigned long i = 0; bh = head; partial = 0; + need_balance_dirty = 0; do { if (!bh) BUG(); @@ -1473,8 +1492,7 @@ int block_write_partial_page(struct file *file, struct page *page, unsigned long set_bit(BH_Uptodate, &bh->b_state); if (!test_and_set_bit(BH_Dirty, &bh->b_state)) { __mark_dirty(bh, 0); - if (too_many_dirty_buffers) - balance_dirty(bh->b_dev); + need_balance_dirty = 1; } if (err) { @@ -1488,6 +1506,9 @@ skip: bh = bh->b_this_page; } while (bh != head); + if (need_balance_dirty) + balance_dirty(bh->b_dev); + /* * is this a partial write that happened to make all buffers * uptodate then we can optimize away a bogus readpage() for @@ -1519,6 +1540,7 @@ int block_write_cont_page(struct file *file, struct page *page, unsigned long of struct buffer_head *bh, *head; char * target_buf, *target_data; unsigned long data_offset = offset; + int need_balance_dirty; offset = inode->i_size - page->offset; if (page->offset>inode->i_size) @@ -1566,6 +1588,7 @@ int block_write_cont_page(struct file *file, struct page *page, unsigned long of i = 0; bh = head; partial = 0; + need_balance_dirty = 0; do { if (!bh) BUG(); @@ -1644,8 +1667,7 @@ int block_write_cont_page(struct file *file, struct page *page, unsigned long of set_bit(BH_Uptodate, &bh->b_state); if (!test_and_set_bit(BH_Dirty, &bh->b_state)) { __mark_dirty(bh, 0); - if (too_many_dirty_buffers) - balance_dirty(bh->b_dev); + need_balance_dirty = 1; } if (err) { @@ -1659,6 +1681,9 @@ skip: bh = bh->b_this_page; } while (bh != head); + if (need_balance_dirty) + balance_dirty(bh->b_dev); + /* * is this a partial write that happened to make all buffers * uptodate then we can optimize away a bogus readpage() for @@ -2168,12 +2193,52 @@ out: busy_buffer_page: /* Uhhuh, start writeback so that we don't end up with all dirty pages */ - too_many_dirty_buffers = 1; wakeup_bdflush(0); ret = 0; goto out; } +/* ================== Debugging =================== */ + +void show_buffers(void) +{ + struct buffer_head * bh; + int found = 0, locked = 0, dirty = 0, used = 0, lastused = 0; + int protected = 0; + int nlist; + static char *buf_types[NR_LIST] = { "CLEAN", "LOCKED", "DIRTY" }; + + printk("Buffer memory: %6dkB\n", atomic_read(&buffermem) >> 10); + +#ifdef __SMP__ /* trylock does nothing on UP and so we could deadlock */ + if (!spin_trylock(&lru_list_lock)) + return; + for(nlist = 0; nlist < NR_LIST; nlist++) { + found = locked = dirty = used = lastused = protected = 0; + bh = lru_list[nlist]; + if(!bh) continue; + + do { + found++; + if (buffer_locked(bh)) + locked++; + if (buffer_protected(bh)) + protected++; + if (buffer_dirty(bh)) + dirty++; + if (atomic_read(&bh->b_count)) + used++, lastused = found; + bh = bh->b_next_free; + } while (bh != lru_list[nlist]); + printk("%8s: %d buffers, %d used (last=%d), " + "%d locked, %d protected, %d dirty\n", + buf_types[nlist], found, used, lastused, + locked, protected, dirty); + } + spin_unlock(&lru_list_lock); +#endif +} + /* ===================== Init ======================= */ /* @@ -2246,21 +2311,92 @@ void __init buffer_init(unsigned long memory_size) * response to dirty buffers. Once this process is activated, we write back * a limited number of buffers to the disks and then go back to sleep again. */ -static DECLARE_WAIT_QUEUE_HEAD(bdflush_wait); static DECLARE_WAIT_QUEUE_HEAD(bdflush_done); struct task_struct *bdflush_tsk = 0; -void wakeup_bdflush(int wait) +void wakeup_bdflush(int block) { + DECLARE_WAITQUEUE(wait, current); + if (current == bdflush_tsk) return; - if (wait) - run_task_queue(&tq_disk); - wake_up(&bdflush_wait); - if (wait) - sleep_on(&bdflush_done); + + if (!block) + { + wake_up_process(bdflush_tsk); + return; + } + + /* kflushd can wakeup us before we have a chance to + go to sleep so we must be smart in handling + this wakeup event from kflushd to avoid deadlocking in SMP + (we are not holding any lock anymore in these two paths). */ + __set_current_state(TASK_UNINTERRUPTIBLE); + add_wait_queue(&bdflush_done, &wait); + + wake_up_process(bdflush_tsk); + schedule(); + + remove_wait_queue(&bdflush_done, &wait); + __set_current_state(TASK_RUNNING); } +/* This is the _only_ function that deals with flushing async writes + to disk. + NOTENOTENOTENOTE: we _only_ need to browse the DIRTY lru list + as all dirty buffers lives _only_ in the DIRTY lru list. + As we never browse the LOCKED and CLEAN lru lists they are infact + completly useless. */ +static void flush_dirty_buffers(int check_flushtime) +{ + struct buffer_head * bh, *next; + int flushed = 0, i; + + restart: + spin_lock(&lru_list_lock); + bh = lru_list[BUF_DIRTY]; + if (!bh) + goto out_unlock; + for (i = nr_buffers_type[BUF_DIRTY]; i-- > 0; bh = next) + { + next = bh->b_next_free; + + if (!buffer_dirty(bh)) + { + __refile_buffer(bh); + continue; + } + if (buffer_locked(bh)) + continue; + + if (check_flushtime) + { + /* The dirty lru list is chronogical ordered so + if the current bh is not yet timed out, + then also all the following bhs + will be too young. */ + if (time_before(jiffies, bh->b_flushtime)) + goto out_unlock; + } + else + { + if (++flushed > bdf_prm.b_un.ndirty) + goto out_unlock; + } + + /* OK, now we are committed to write it out. */ + atomic_inc(&bh->b_count); + spin_unlock(&lru_list_lock); + ll_rw_block(WRITE, 1, &bh); + atomic_dec(&bh->b_count); + + if (current->need_resched) + schedule(); + goto restart; + } + out_unlock: + spin_unlock(&lru_list_lock); +} /* * Here we attempt to write back old buffers. We also try to flush inodes @@ -2272,47 +2408,13 @@ void wakeup_bdflush(int wait) static int sync_old_buffers(void) { - int nlist; - lock_kernel(); sync_supers(0); sync_inodes(0); unlock_kernel(); - for(nlist = BUF_LOCKED; nlist <= BUF_DIRTY; nlist++) { - struct buffer_head *bh; - repeat: - spin_lock(&lru_list_lock); - bh = lru_list[nlist]; - if(bh) { - struct buffer_head *next; - int i; - for (i = nr_buffers_type[nlist]; i-- > 0; bh = next) { - next = bh->b_next_free; - - /* If the buffer is not on the proper list, - * then refile it. - */ - if ((nlist == BUF_DIRTY && - (!buffer_dirty(bh) && !buffer_locked(bh))) || - (nlist == BUF_LOCKED && !buffer_locked(bh))) { - __refile_buffer(bh); - continue; - } - - if (buffer_locked(bh) || !buffer_dirty(bh)) - continue; - - /* OK, now we are committed to write it out. */ - atomic_inc(&bh->b_count); - spin_unlock(&lru_list_lock); - ll_rw_block(WRITE, 1, &bh); - atomic_dec(&bh->b_count); - goto repeat; - } - } - spin_unlock(&lru_list_lock); - } + flush_dirty_buffers(1); + /* must really sync all the active I/O request to disk here */ run_task_queue(&tq_disk); return 0; } @@ -2328,6 +2430,10 @@ asmlinkage long sys_bdflush(int func, long data) return -EPERM; if (func == 1) { + /* do_exit directly and let kupdate to do its work alone. */ + do_exit(0); +#if 0 /* left here as it's the only example of lazy-mm-stuff used from + a syscall that doesn't care about the current mm context. */ int error; struct mm_struct *user_mm; @@ -2341,6 +2447,7 @@ asmlinkage long sys_bdflush(int func, long data) error = sync_old_buffers(); end_lazy_tlb(user_mm); return error; +#endif } /* Basically func 1 means read param 1, 2 means write param 1, etc */ @@ -2383,85 +2490,103 @@ int bdflush(void * unused) sprintf(current->comm, "kflushd"); bdflush_tsk = current; - for (;;) { - int nlist; + /* avoid getting signals */ + spin_lock_irq(¤t->sigmask_lock); + flush_signals(current); + sigfillset(¤t->blocked); + recalc_sigpending(current); + spin_unlock_irq(¤t->sigmask_lock); + for (;;) { CHECK_EMERGENCY_SYNC - for(nlist = BUF_LOCKED; nlist <= BUF_DIRTY; nlist++) { - int nr, major, written = 0; - struct buffer_head *next; - - repeat: - spin_lock(&lru_list_lock); - next = lru_list[nlist]; - nr = nr_buffers_type[nlist]; - while (nr-- > 0) { - struct buffer_head *bh = next; - - next = next->b_next_free; - - /* If the buffer is not on the correct list, - * then refile it. - */ - if ((nlist == BUF_DIRTY && - (!buffer_dirty(bh) && !buffer_locked(bh))) || - (nlist == BUF_LOCKED && !buffer_locked(bh))) { - __refile_buffer(bh); - continue; - } - - /* If we aren't in panic mode, don't write out too much - * at a time. Also, don't write out buffers we don't - * really have to write out yet.. - */ - if (!too_many_dirty_buffers) { - if (written > bdf_prm.b_un.ndirty) - break; - if (time_before(jiffies, bh->b_flushtime)) - continue; - } - - if (buffer_locked(bh) || !buffer_dirty(bh)) - continue; - - major = MAJOR(bh->b_dev); - written++; - - /* - * For the loop major we can try to do asynchronous writes, - * but we have to guarantee that we're making some progress.. - */ - atomic_inc(&bh->b_count); - spin_unlock(&lru_list_lock); - ll_rw_block(WRITE, 1, &bh); - atomic_dec(&bh->b_count); - goto repeat; - } - spin_unlock(&lru_list_lock); - } - run_task_queue(&tq_disk); + flush_dirty_buffers(0); + + /* If wakeup_bdflush will wakeup us + after our bdflush_done wakeup, then + we must make sure to not sleep + in schedule_timeout otherwise + wakeup_bdflush may wait for our + bdflush_done wakeup that would never arrive + (as we would be sleeping) and so it would + deadlock in SMP. */ + __set_current_state(TASK_INTERRUPTIBLE); wake_up(&bdflush_done); - /* * If there are still a lot of dirty buffers around, * skip the sleep and flush some more. Otherwise, we - * sleep for a while and mark us as not being in panic - * mode.. + * sleep for a while. */ - if (!too_many_dirty_buffers || nr_buffers_type[BUF_DIRTY] < bdf_prm.b_un.ndirty) { - too_many_dirty_buffers = 0; - spin_lock_irq(¤t->sigmask_lock); - flush_signals(current); - spin_unlock_irq(¤t->sigmask_lock); - interruptible_sleep_on_timeout(&bdflush_wait, 5*HZ); + if (balance_dirty_state(NODEV) < 0) + schedule_timeout(5*HZ); + /* Remember to mark us as running otherwise + the next schedule will block. */ + __set_current_state(TASK_RUNNING); + } +} + +/* + * This is the kernel update daemon. It was used to live in userspace + * but since it's need to run safely we want it unkillable by mistake. + * You don't need to change your userspace configuration since + * the userspace `update` will do_exit(0) at the first sys_bdflush(). + */ +int kupdate(void * unused) +{ + struct task_struct * tsk = current; + int interval; + + tsk->session = 1; + tsk->pgrp = 1; + strcpy(tsk->comm, "kupdate"); + + /* sigstop and sigcont will stop and wakeup kupdate */ + spin_lock_irq(&tsk->sigmask_lock); + sigfillset(&tsk->blocked); + siginitsetinv(¤t->blocked, sigmask(SIGCONT) | sigmask(SIGSTOP)); + recalc_sigpending(tsk); + spin_unlock_irq(&tsk->sigmask_lock); + + for (;;) { + /* update interval */ + interval = bdf_prm.b_un.interval; + if (interval) + { + tsk->state = TASK_INTERRUPTIBLE; + schedule_timeout(interval); + } + else + { + stop_kupdate: + tsk->state = TASK_STOPPED; + schedule(); /* wait for SIGCONT */ } + /* check for sigstop */ + if (signal_pending(tsk)) + { + int stopped = 0; + spin_lock_irq(&tsk->sigmask_lock); + if (sigismember(&tsk->signal, SIGSTOP)) + { + sigdelset(&tsk->signal, SIGSTOP); + stopped = 1; + } + recalc_sigpending(tsk); + spin_unlock_irq(&tsk->sigmask_lock); + if (stopped) + goto stop_kupdate; + } +#ifdef DEBUG + printk("kupdate() activated...\n"); +#endif + sync_old_buffers(); } } static int __init bdflush_init(void) { kernel_thread(bdflush, NULL, CLONE_FS | CLONE_FILES | CLONE_SIGHAND); + kernel_thread(kupdate, NULL, CLONE_FS | CLONE_FILES | CLONE_SIGHAND); return 0; } diff --git a/fs/super.c b/fs/super.c index 693017eee0b6..3b58d13cc0c1 100644 --- a/fs/super.c +++ b/fs/super.c @@ -135,7 +135,7 @@ out: return lptr; } -static void remove_vfsmnt(kdev_t dev) +void remove_vfsmnt(kdev_t dev) { struct vfsmount *lptr, *tofree; @@ -508,7 +508,7 @@ out: /* * Find a super_block with no device assigned. */ -static struct super_block *get_empty_super(void) +struct super_block *get_empty_super(void) { struct super_block *s; diff --git a/include/asm-i386/pgtable.h b/include/asm-i386/pgtable.h index 36303437b527..aea0cd14b9c4 100644 --- a/include/asm-i386/pgtable.h +++ b/include/asm-i386/pgtable.h @@ -44,7 +44,7 @@ extern pgd_t swapper_pg_dir[1024]; do { unsigned long tmpreg; __asm__ __volatile__("movl %%cr3,%0\n\tmovl %0,%%cr3":"=r" (tmpreg) : :"memory"); } while (0) #ifndef CONFIG_X86_INVLPG -#define __flush_tlb_one(addr) flush_tlb() +#define __flush_tlb_one(addr) __flush_tlb() #else #define __flush_tlb_one(addr) \ __asm__ __volatile__("invlpg %0": :"m" (*(char *) addr)) diff --git a/include/linux/fs.h b/include/linux/fs.h index 6d88414eaf48..47e954a8e863 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -934,11 +934,14 @@ extern void do_generic_file_read(struct file * filp, loff_t *ppos, read_descript extern struct super_block *get_super(kdev_t); +struct super_block *get_empty_super(void); +void remove_vfsmnt(kdev_t dev); extern void put_super(kdev_t); unsigned long generate_cluster(kdev_t, int b[], int); unsigned long generate_cluster_swab32(kdev_t, int b[], int); extern kdev_t ROOT_DEV; +extern void show_buffers(void); extern void mount_root(void); #ifdef CONFIG_BLK_DEV_INITRD diff --git a/include/linux/pci.h b/include/linux/pci.h index 2fe52c2b4c37..a71f4c1be6dc 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -406,6 +406,7 @@ struct pci_ops { void pcibios_init(void); void pcibios_fixup_bus(struct pci_bus *); +int pcibios_enable_device(struct pci_dev *); char *pcibios_setup (char *str); void pcibios_update_resource(struct pci_dev *, struct resource *, @@ -460,9 +461,11 @@ int pci_read_config_dword(struct pci_dev *dev, int where, u32 *val); int pci_write_config_byte(struct pci_dev *dev, int where, u8 val); int pci_write_config_word(struct pci_dev *dev, int where, u16 val); int pci_write_config_dword(struct pci_dev *dev, int where, u32 val); +int pci_enable_device(struct pci_dev *dev); void pci_set_master(struct pci_dev *dev); +int pci_set_power_state(struct pci_dev *dev, int state); -/* Helper functions (drivers/pci/setup.c) */ +/* Helper functions for low-level code (drivers/pci/setup.c) */ int pci_claim_resource(struct pci_dev *, int); void pci_assign_unassigned_resources(u32 min_io, u32 min_mem); @@ -471,7 +474,7 @@ void pci_fixup_irqs(u8 (*)(struct pci_dev *, u8 *), int (*)(struct pci_dev *, u8, u8)); /* - * simple PCI probing for drivers + * simple PCI probing for drivers (drivers/pci/helper.c) */ struct pci_simple_probe_entry; @@ -524,8 +527,8 @@ extern inline struct pci_dev *pci_find_subsys(unsigned int vendor, unsigned int unsigned int ss_vendor, unsigned int ss_device, struct pci_dev *from) { return NULL; } -extern inline void pci_set_master(struct pci_dev *dev) -{ return; } +extern inline void pci_set_master(struct pci_dev *dev) { } +extern inline int pci_enable_device(struct pci_dev *dev) { return 0; } extern inline int pci_simple_probe (struct pci_simple_probe_entry *list, size_t match_limit, pci_simple_probe_callback cb, void *drvr_data) diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index 98a32ffb76d6..e96bf31f0904 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -100,6 +100,7 @@ #define PCI_DEVICE_ID_COMPAQ_TOKENRING 0x0508 #define PCI_DEVICE_ID_COMPAQ_1280 0x3033 #define PCI_DEVICE_ID_COMPAQ_TRIFLEX 0x4000 +#define PCI_DEVICE_ID_COMPAQ_6010 0x6010 #define PCI_DEVICE_ID_COMPAQ_SMART2P 0xae10 #define PCI_DEVICE_ID_COMPAQ_NETEL100 0xae32 #define PCI_DEVICE_ID_COMPAQ_NETEL10 0xae34 @@ -728,6 +729,10 @@ #define PCI_DEVICE_ID_RENDITION_VERITE 0x0001 #define PCI_DEVICE_ID_RENDITION_VERITE2100 0x2000 +#define PCI_VENDOR_ID_RCC 0x1166 +#define PCI_DEVICE_ID_RCC_HE 0x0008 +#define PCI_DEVICE_ID_RCC_LE 0x0009 + #define PCI_VENDOR_ID_TOSHIBA 0x1179 #define PCI_DEVICE_ID_TOSHIBA_601 0x0601 #define PCI_DEVICE_ID_TOSHIBA_TOPIC95 0x060a diff --git a/kernel/ksyms.c b/kernel/ksyms.c index 7664406d8aa3..5c4d369ec744 100644 --- a/kernel/ksyms.c +++ b/kernel/ksyms.c @@ -36,6 +36,7 @@ #include #include #include +#include #include #include #include @@ -118,6 +119,8 @@ EXPORT_SYMBOL(in_group_p); EXPORT_SYMBOL(update_atime); EXPORT_SYMBOL(get_super); EXPORT_SYMBOL(get_fs_type); +EXPORT_SYMBOL(get_empty_super); +EXPORT_SYMBOL(remove_vfsmnt); EXPORT_SYMBOL(getname); EXPORT_SYMBOL(_fput); EXPORT_SYMBOL(igrab); @@ -140,6 +143,9 @@ EXPORT_SYMBOL(d_lookup); EXPORT_SYMBOL(d_path); EXPORT_SYMBOL(__mark_buffer_dirty); EXPORT_SYMBOL(__mark_inode_dirty); +EXPORT_SYMBOL(free_kiovec); +EXPORT_SYMBOL(brw_kiovec); +EXPORT_SYMBOL(alloc_kiovec); EXPORT_SYMBOL(get_empty_filp); EXPORT_SYMBOL(init_private_file); EXPORT_SYMBOL(filp_open); diff --git a/mm/page_io.c b/mm/page_io.c index c89416bf9d78..97516e77dcfa 100644 --- a/mm/page_io.c +++ b/mm/page_io.c @@ -33,7 +33,7 @@ * that shared pages stay shared while being swapped. */ -static void rw_swap_page_base(int rw, unsigned long entry, struct page *page, int wait) +static int rw_swap_page_base(int rw, unsigned long entry, struct page *page, int wait) { unsigned long type, offset; struct swap_info_struct * p; @@ -52,7 +52,7 @@ static void rw_swap_page_base(int rw, unsigned long entry, struct page *page, in type = SWP_TYPE(entry); if (type >= nr_swapfiles) { printk("Internal error: bad swap-device\n"); - return; + return 0; } /* Don't allow too many pending pages in flight.. */ @@ -63,23 +63,18 @@ static void rw_swap_page_base(int rw, unsigned long entry, struct page *page, in offset = SWP_OFFSET(entry); if (offset >= p->max) { printk("rw_swap_page: weirdness\n"); - return; + return 0; } if (p->swap_map && !p->swap_map[offset]) { printk(KERN_ERR "rw_swap_page: " "Trying to %s unallocated swap (%08lx)\n", (rw == READ) ? "read" : "write", entry); - return; + return 0; } if (!(p->flags & SWP_USED)) { printk(KERN_ERR "rw_swap_page: " "Trying to swap to unused swap-device\n"); - return; - } - - if (!PageLocked(page)) { - printk(KERN_ERR "VM: swap page is unlocked\n"); - return; + return 0; } if (rw == READ) { @@ -104,13 +99,13 @@ static void rw_swap_page_base(int rw, unsigned long entry, struct page *page, in for (i=0, j=0; j< PAGE_SIZE ; i++, j += block_size) if (!(zones[i] = bmap(swapf,block++))) { printk("rw_swap_page: bad swap file\n"); - return; + return 0; } zones_used = i; dev = swapf->i_dev; } else { printk(KERN_ERR "rw_swap_page: no swap file or device\n"); - return; + return 0; } if (!wait) { set_bit(PG_decr_after, &page->flags); @@ -124,9 +119,9 @@ static void rw_swap_page_base(int rw, unsigned long entry, struct page *page, in * decrementing the page count, and unlocking the page in the * swap lock map - in the IO completion handler. */ - if (!wait) { - return; - } + if (!wait) + return 1; + wait_on_page(page); /* This shouldn't happen, but check to be sure. */ if (page_count(page) == 0) @@ -138,6 +133,7 @@ static void rw_swap_page_base(int rw, unsigned long entry, struct page *page, in (char *) page_address(page), page_count(page)); #endif + return 1; } /* @@ -157,7 +153,8 @@ void rw_swap_page(int rw, struct page *page, int wait) PAGE_BUG(page); if (page->inode != &swapper_inode) PAGE_BUG(page); - rw_swap_page_base(rw, entry, page, wait); + if (!rw_swap_page_base(rw, entry, page, wait)) + UnlockPage(page); } /* @@ -173,5 +170,6 @@ void rw_swap_page_nolock(int rw, unsigned long entry, char *buf, int wait) PAGE_BUG(page); if (PageSwapCache(page)) PAGE_BUG(page); - rw_swap_page_base(rw, entry, page, wait); + if (!rw_swap_page_base(rw, entry, page, wait)) + UnlockPage(page); } -- 2.39.5